1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* 3 * Copyright (c) 2014-2017 Oracle. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the BSD-type 10 * license below: 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 16 * Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * Neither the name of the Network Appliance, Inc. nor the names of 25 * its contributors may be used to endorse or promote products 26 * derived from this software without specific prior written 27 * permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /* 43 * verbs.c 44 * 45 * Encapsulates the major functions managing: 46 * o adapters 47 * o endpoints 48 * o connections 49 * o buffer memory 50 */ 51 52 #include <linux/interrupt.h> 53 #include <linux/slab.h> 54 #include <linux/sunrpc/addr.h> 55 #include <linux/sunrpc/svc_rdma.h> 56 #include <linux/log2.h> 57 58 #include <asm-generic/barrier.h> 59 #include <asm/bitops.h> 60 61 #include <rdma/ib_cm.h> 62 63 #include "xprt_rdma.h" 64 #include <trace/events/rpcrdma.h> 65 66 /* 67 * Globals/Macros 68 */ 69 70 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 71 # define RPCDBG_FACILITY RPCDBG_TRANS 72 #endif 73 74 /* 75 * internal functions 76 */ 77 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); 78 static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); 79 static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 80 struct rpcrdma_sendctx *sc); 81 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); 82 static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); 83 static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); 84 static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); 85 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); 86 static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); 87 static void rpcrdma_ep_get(struct rpcrdma_ep *ep); 88 static int rpcrdma_ep_put(struct rpcrdma_ep *ep); 89 static struct rpcrdma_regbuf * 90 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, 91 gfp_t flags); 92 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); 93 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); 94 95 /* Wait for outstanding transport work to finish. ib_drain_qp 96 * handles the drains in the wrong order for us, so open code 97 * them here. 98 */ 99 static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) 100 { 101 struct rpcrdma_ep *ep = r_xprt->rx_ep; 102 struct rdma_cm_id *id = ep->re_id; 103 104 /* Flush Receives, then wait for deferred Reply work 105 * to complete. 106 */ 107 ib_drain_rq(id->qp); 108 109 /* Deferred Reply processing might have scheduled 110 * local invalidations. 111 */ 112 ib_drain_sq(id->qp); 113 114 rpcrdma_ep_put(ep); 115 } 116 117 /** 118 * rpcrdma_qp_event_handler - Handle one QP event (error notification) 119 * @event: details of the event 120 * @context: ep that owns QP where event occurred 121 * 122 * Called from the RDMA provider (device driver) possibly in an interrupt 123 * context. The QP is always destroyed before the ID, so the ID will be 124 * reliably available when this handler is invoked. 125 */ 126 static void rpcrdma_qp_event_handler(struct ib_event *event, void *context) 127 { 128 struct rpcrdma_ep *ep = context; 129 130 trace_xprtrdma_qp_event(ep, event); 131 } 132 133 /* Ensure xprt_force_disconnect() is invoked exactly once when a 134 * connection is closed or lost. (The important thing is it needs 135 * to be invoked "at least" once). 136 */ 137 static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep) 138 { 139 if (atomic_add_unless(&ep->re_force_disconnect, 1, 1)) 140 xprt_force_disconnect(ep->re_xprt); 141 } 142 143 /** 144 * rpcrdma_flush_disconnect - Disconnect on flushed completion 145 * @r_xprt: transport to disconnect 146 * @wc: work completion entry 147 * 148 * Must be called in process context. 149 */ 150 void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc) 151 { 152 if (wc->status != IB_WC_SUCCESS) 153 rpcrdma_force_disconnect(r_xprt->rx_ep); 154 } 155 156 /** 157 * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC 158 * @cq: completion queue 159 * @wc: WCE for a completed Send WR 160 * 161 */ 162 static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) 163 { 164 struct ib_cqe *cqe = wc->wr_cqe; 165 struct rpcrdma_sendctx *sc = 166 container_of(cqe, struct rpcrdma_sendctx, sc_cqe); 167 struct rpcrdma_xprt *r_xprt = cq->cq_context; 168 169 /* WARNING: Only wr_cqe and status are reliable at this point */ 170 trace_xprtrdma_wc_send(sc, wc); 171 rpcrdma_sendctx_put_locked(r_xprt, sc); 172 rpcrdma_flush_disconnect(r_xprt, wc); 173 } 174 175 /** 176 * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC 177 * @cq: completion queue 178 * @wc: WCE for a completed Receive WR 179 * 180 */ 181 static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) 182 { 183 struct ib_cqe *cqe = wc->wr_cqe; 184 struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, 185 rr_cqe); 186 struct rpcrdma_xprt *r_xprt = cq->cq_context; 187 188 /* WARNING: Only wr_cqe and status are reliable at this point */ 189 trace_xprtrdma_wc_receive(wc); 190 --r_xprt->rx_ep->re_receive_count; 191 if (wc->status != IB_WC_SUCCESS) 192 goto out_flushed; 193 194 /* status == SUCCESS means all fields in wc are trustworthy */ 195 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); 196 rep->rr_wc_flags = wc->wc_flags; 197 rep->rr_inv_rkey = wc->ex.invalidate_rkey; 198 199 ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf), 200 rdmab_addr(rep->rr_rdmabuf), 201 wc->byte_len, DMA_FROM_DEVICE); 202 203 rpcrdma_reply_handler(rep); 204 return; 205 206 out_flushed: 207 rpcrdma_flush_disconnect(r_xprt, wc); 208 rpcrdma_rep_destroy(rep); 209 } 210 211 static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, 212 struct rdma_conn_param *param) 213 { 214 const struct rpcrdma_connect_private *pmsg = param->private_data; 215 unsigned int rsize, wsize; 216 217 /* Default settings for RPC-over-RDMA Version One */ 218 ep->re_implicit_roundup = xprt_rdma_pad_optimize; 219 rsize = RPCRDMA_V1_DEF_INLINE_SIZE; 220 wsize = RPCRDMA_V1_DEF_INLINE_SIZE; 221 222 if (pmsg && 223 pmsg->cp_magic == rpcrdma_cmp_magic && 224 pmsg->cp_version == RPCRDMA_CMP_VERSION) { 225 ep->re_implicit_roundup = true; 226 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); 227 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 228 } 229 230 if (rsize < ep->re_inline_recv) 231 ep->re_inline_recv = rsize; 232 if (wsize < ep->re_inline_send) 233 ep->re_inline_send = wsize; 234 235 rpcrdma_set_max_header_sizes(ep); 236 } 237 238 /** 239 * rpcrdma_cm_event_handler - Handle RDMA CM events 240 * @id: rdma_cm_id on which an event has occurred 241 * @event: details of the event 242 * 243 * Called with @id's mutex held. Returns 1 if caller should 244 * destroy @id, otherwise 0. 245 */ 246 static int 247 rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 248 { 249 struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; 250 struct rpcrdma_ep *ep = id->context; 251 252 might_sleep(); 253 254 switch (event->event) { 255 case RDMA_CM_EVENT_ADDR_RESOLVED: 256 case RDMA_CM_EVENT_ROUTE_RESOLVED: 257 ep->re_async_rc = 0; 258 complete(&ep->re_done); 259 return 0; 260 case RDMA_CM_EVENT_ADDR_ERROR: 261 ep->re_async_rc = -EPROTO; 262 complete(&ep->re_done); 263 return 0; 264 case RDMA_CM_EVENT_ROUTE_ERROR: 265 ep->re_async_rc = -ENETUNREACH; 266 complete(&ep->re_done); 267 return 0; 268 case RDMA_CM_EVENT_DEVICE_REMOVAL: 269 pr_info("rpcrdma: removing device %s for %pISpc\n", 270 ep->re_id->device->name, sap); 271 /* fall through */ 272 case RDMA_CM_EVENT_ADDR_CHANGE: 273 ep->re_connect_status = -ENODEV; 274 goto disconnected; 275 case RDMA_CM_EVENT_ESTABLISHED: 276 rpcrdma_ep_get(ep); 277 ep->re_connect_status = 1; 278 rpcrdma_update_cm_private(ep, &event->param.conn); 279 trace_xprtrdma_inline_thresh(ep); 280 wake_up_all(&ep->re_connect_wait); 281 break; 282 case RDMA_CM_EVENT_CONNECT_ERROR: 283 ep->re_connect_status = -ENOTCONN; 284 goto disconnected; 285 case RDMA_CM_EVENT_UNREACHABLE: 286 ep->re_connect_status = -ENETUNREACH; 287 goto disconnected; 288 case RDMA_CM_EVENT_REJECTED: 289 dprintk("rpcrdma: connection to %pISpc rejected: %s\n", 290 sap, rdma_reject_msg(id, event->status)); 291 ep->re_connect_status = -ECONNREFUSED; 292 if (event->status == IB_CM_REJ_STALE_CONN) 293 ep->re_connect_status = -EAGAIN; 294 goto disconnected; 295 case RDMA_CM_EVENT_DISCONNECTED: 296 ep->re_connect_status = -ECONNABORTED; 297 disconnected: 298 rpcrdma_force_disconnect(ep); 299 return rpcrdma_ep_put(ep); 300 default: 301 break; 302 } 303 304 dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap, 305 ep->re_id->device->name, rdma_event_msg(event->event)); 306 return 0; 307 } 308 309 static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, 310 struct rpcrdma_ep *ep) 311 { 312 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; 313 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 314 struct rdma_cm_id *id; 315 int rc; 316 317 init_completion(&ep->re_done); 318 319 id = rdma_create_id(xprt->xprt_net, rpcrdma_cm_event_handler, ep, 320 RDMA_PS_TCP, IB_QPT_RC); 321 if (IS_ERR(id)) 322 return id; 323 324 ep->re_async_rc = -ETIMEDOUT; 325 rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr, 326 RDMA_RESOLVE_TIMEOUT); 327 if (rc) 328 goto out; 329 rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); 330 if (rc < 0) 331 goto out; 332 333 rc = ep->re_async_rc; 334 if (rc) 335 goto out; 336 337 ep->re_async_rc = -ETIMEDOUT; 338 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); 339 if (rc) 340 goto out; 341 rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); 342 if (rc < 0) 343 goto out; 344 rc = ep->re_async_rc; 345 if (rc) 346 goto out; 347 348 return id; 349 350 out: 351 rdma_destroy_id(id); 352 return ERR_PTR(rc); 353 } 354 355 static void rpcrdma_ep_destroy(struct kref *kref) 356 { 357 struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); 358 359 if (ep->re_id->qp) { 360 rdma_destroy_qp(ep->re_id); 361 ep->re_id->qp = NULL; 362 } 363 364 if (ep->re_attr.recv_cq) 365 ib_free_cq(ep->re_attr.recv_cq); 366 ep->re_attr.recv_cq = NULL; 367 if (ep->re_attr.send_cq) 368 ib_free_cq(ep->re_attr.send_cq); 369 ep->re_attr.send_cq = NULL; 370 371 if (ep->re_pd) 372 ib_dealloc_pd(ep->re_pd); 373 ep->re_pd = NULL; 374 375 kfree(ep); 376 module_put(THIS_MODULE); 377 } 378 379 static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep) 380 { 381 kref_get(&ep->re_kref); 382 } 383 384 /* Returns: 385 * %0 if @ep still has a positive kref count, or 386 * %1 if @ep was destroyed successfully. 387 */ 388 static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep) 389 { 390 return kref_put(&ep->re_kref, rpcrdma_ep_destroy); 391 } 392 393 static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) 394 { 395 struct rpcrdma_connect_private *pmsg; 396 struct ib_device *device; 397 struct rdma_cm_id *id; 398 struct rpcrdma_ep *ep; 399 int rc; 400 401 ep = kzalloc(sizeof(*ep), GFP_NOFS); 402 if (!ep) 403 return -EAGAIN; 404 ep->re_xprt = &r_xprt->rx_xprt; 405 kref_init(&ep->re_kref); 406 407 id = rpcrdma_create_id(r_xprt, ep); 408 if (IS_ERR(id)) { 409 rc = PTR_ERR(id); 410 goto out_free; 411 } 412 __module_get(THIS_MODULE); 413 device = id->device; 414 ep->re_id = id; 415 416 ep->re_max_requests = r_xprt->rx_xprt.max_reqs; 417 ep->re_inline_send = xprt_rdma_max_inline_write; 418 ep->re_inline_recv = xprt_rdma_max_inline_read; 419 rc = frwr_query_device(ep, device); 420 if (rc) 421 goto out_destroy; 422 423 r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests); 424 425 ep->re_attr.event_handler = rpcrdma_qp_event_handler; 426 ep->re_attr.qp_context = ep; 427 ep->re_attr.srq = NULL; 428 ep->re_attr.cap.max_inline_data = 0; 429 ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 430 ep->re_attr.qp_type = IB_QPT_RC; 431 ep->re_attr.port_num = ~0; 432 433 dprintk("RPC: %s: requested max: dtos: send %d recv %d; " 434 "iovs: send %d recv %d\n", 435 __func__, 436 ep->re_attr.cap.max_send_wr, 437 ep->re_attr.cap.max_recv_wr, 438 ep->re_attr.cap.max_send_sge, 439 ep->re_attr.cap.max_recv_sge); 440 441 ep->re_send_batch = ep->re_max_requests >> 3; 442 ep->re_send_count = ep->re_send_batch; 443 init_waitqueue_head(&ep->re_connect_wait); 444 445 ep->re_attr.send_cq = ib_alloc_cq_any(device, r_xprt, 446 ep->re_attr.cap.max_send_wr, 447 IB_POLL_WORKQUEUE); 448 if (IS_ERR(ep->re_attr.send_cq)) { 449 rc = PTR_ERR(ep->re_attr.send_cq); 450 goto out_destroy; 451 } 452 453 ep->re_attr.recv_cq = ib_alloc_cq_any(device, r_xprt, 454 ep->re_attr.cap.max_recv_wr, 455 IB_POLL_WORKQUEUE); 456 if (IS_ERR(ep->re_attr.recv_cq)) { 457 rc = PTR_ERR(ep->re_attr.recv_cq); 458 goto out_destroy; 459 } 460 ep->re_receive_count = 0; 461 462 /* Initialize cma parameters */ 463 memset(&ep->re_remote_cma, 0, sizeof(ep->re_remote_cma)); 464 465 /* Prepare RDMA-CM private message */ 466 pmsg = &ep->re_cm_private; 467 pmsg->cp_magic = rpcrdma_cmp_magic; 468 pmsg->cp_version = RPCRDMA_CMP_VERSION; 469 pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; 470 pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->re_inline_send); 471 pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->re_inline_recv); 472 ep->re_remote_cma.private_data = pmsg; 473 ep->re_remote_cma.private_data_len = sizeof(*pmsg); 474 475 /* Client offers RDMA Read but does not initiate */ 476 ep->re_remote_cma.initiator_depth = 0; 477 ep->re_remote_cma.responder_resources = 478 min_t(int, U8_MAX, device->attrs.max_qp_rd_atom); 479 480 /* Limit transport retries so client can detect server 481 * GID changes quickly. RPC layer handles re-establishing 482 * transport connection and retransmission. 483 */ 484 ep->re_remote_cma.retry_count = 6; 485 486 /* RPC-over-RDMA handles its own flow control. In addition, 487 * make all RNR NAKs visible so we know that RPC-over-RDMA 488 * flow control is working correctly (no NAKs should be seen). 489 */ 490 ep->re_remote_cma.flow_control = 0; 491 ep->re_remote_cma.rnr_retry_count = 0; 492 493 ep->re_pd = ib_alloc_pd(device, 0); 494 if (IS_ERR(ep->re_pd)) { 495 rc = PTR_ERR(ep->re_pd); 496 goto out_destroy; 497 } 498 499 rc = rdma_create_qp(id, ep->re_pd, &ep->re_attr); 500 if (rc) 501 goto out_destroy; 502 503 r_xprt->rx_ep = ep; 504 return 0; 505 506 out_destroy: 507 rpcrdma_ep_put(ep); 508 rdma_destroy_id(id); 509 out_free: 510 kfree(ep); 511 r_xprt->rx_ep = NULL; 512 return rc; 513 } 514 515 /** 516 * rpcrdma_xprt_connect - Connect an unconnected transport 517 * @r_xprt: controlling transport instance 518 * 519 * Returns 0 on success or a negative errno. 520 */ 521 int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) 522 { 523 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 524 struct rpcrdma_ep *ep; 525 int rc; 526 527 retry: 528 rpcrdma_xprt_disconnect(r_xprt); 529 rc = rpcrdma_ep_create(r_xprt); 530 if (rc) 531 return rc; 532 ep = r_xprt->rx_ep; 533 534 xprt_clear_connected(xprt); 535 rpcrdma_reset_cwnd(r_xprt); 536 537 /* Bump the ep's reference count while there are 538 * outstanding Receives. 539 */ 540 rpcrdma_ep_get(ep); 541 rpcrdma_post_recvs(r_xprt, true); 542 543 rc = rpcrdma_sendctxs_create(r_xprt); 544 if (rc) 545 goto out; 546 547 rc = rdma_connect(ep->re_id, &ep->re_remote_cma); 548 if (rc) 549 goto out; 550 551 if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) 552 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; 553 wait_event_interruptible(ep->re_connect_wait, 554 ep->re_connect_status != 0); 555 if (ep->re_connect_status <= 0) { 556 if (ep->re_connect_status == -EAGAIN) 557 goto retry; 558 rc = ep->re_connect_status; 559 goto out; 560 } 561 562 rc = rpcrdma_reqs_setup(r_xprt); 563 if (rc) { 564 rpcrdma_xprt_disconnect(r_xprt); 565 goto out; 566 } 567 rpcrdma_mrs_create(r_xprt); 568 569 out: 570 trace_xprtrdma_connect(r_xprt, rc); 571 return rc; 572 } 573 574 /** 575 * rpcrdma_xprt_disconnect - Disconnect underlying transport 576 * @r_xprt: controlling transport instance 577 * 578 * Caller serializes. Either the transport send lock is held, 579 * or we're being called to destroy the transport. 580 * 581 * On return, @r_xprt is completely divested of all hardware 582 * resources and prepared for the next ->connect operation. 583 */ 584 void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) 585 { 586 struct rpcrdma_ep *ep = r_xprt->rx_ep; 587 struct rdma_cm_id *id; 588 int rc; 589 590 if (!ep) 591 return; 592 593 id = ep->re_id; 594 rc = rdma_disconnect(id); 595 trace_xprtrdma_disconnect(r_xprt, rc); 596 597 rpcrdma_xprt_drain(r_xprt); 598 rpcrdma_reps_unmap(r_xprt); 599 rpcrdma_reqs_reset(r_xprt); 600 rpcrdma_mrs_destroy(r_xprt); 601 rpcrdma_sendctxs_destroy(r_xprt); 602 603 if (rpcrdma_ep_put(ep)) 604 rdma_destroy_id(id); 605 606 r_xprt->rx_ep = NULL; 607 } 608 609 /* Fixed-size circular FIFO queue. This implementation is wait-free and 610 * lock-free. 611 * 612 * Consumer is the code path that posts Sends. This path dequeues a 613 * sendctx for use by a Send operation. Multiple consumer threads 614 * are serialized by the RPC transport lock, which allows only one 615 * ->send_request call at a time. 616 * 617 * Producer is the code path that handles Send completions. This path 618 * enqueues a sendctx that has been completed. Multiple producer 619 * threads are serialized by the ib_poll_cq() function. 620 */ 621 622 /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced 623 * queue activity, and rpcrdma_xprt_drain has flushed all remaining 624 * Send requests. 625 */ 626 static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt) 627 { 628 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 629 unsigned long i; 630 631 if (!buf->rb_sc_ctxs) 632 return; 633 for (i = 0; i <= buf->rb_sc_last; i++) 634 kfree(buf->rb_sc_ctxs[i]); 635 kfree(buf->rb_sc_ctxs); 636 buf->rb_sc_ctxs = NULL; 637 } 638 639 static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) 640 { 641 struct rpcrdma_sendctx *sc; 642 643 sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge), 644 GFP_KERNEL); 645 if (!sc) 646 return NULL; 647 648 sc->sc_cqe.done = rpcrdma_wc_send; 649 return sc; 650 } 651 652 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) 653 { 654 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 655 struct rpcrdma_sendctx *sc; 656 unsigned long i; 657 658 /* Maximum number of concurrent outstanding Send WRs. Capping 659 * the circular queue size stops Send Queue overflow by causing 660 * the ->send_request call to fail temporarily before too many 661 * Sends are posted. 662 */ 663 i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS; 664 buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); 665 if (!buf->rb_sc_ctxs) 666 return -ENOMEM; 667 668 buf->rb_sc_last = i - 1; 669 for (i = 0; i <= buf->rb_sc_last; i++) { 670 sc = rpcrdma_sendctx_create(r_xprt->rx_ep); 671 if (!sc) 672 return -ENOMEM; 673 674 buf->rb_sc_ctxs[i] = sc; 675 } 676 677 buf->rb_sc_head = 0; 678 buf->rb_sc_tail = 0; 679 return 0; 680 } 681 682 /* The sendctx queue is not guaranteed to have a size that is a 683 * power of two, thus the helpers in circ_buf.h cannot be used. 684 * The other option is to use modulus (%), which can be expensive. 685 */ 686 static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf, 687 unsigned long item) 688 { 689 return likely(item < buf->rb_sc_last) ? item + 1 : 0; 690 } 691 692 /** 693 * rpcrdma_sendctx_get_locked - Acquire a send context 694 * @r_xprt: controlling transport instance 695 * 696 * Returns pointer to a free send completion context; or NULL if 697 * the queue is empty. 698 * 699 * Usage: Called to acquire an SGE array before preparing a Send WR. 700 * 701 * The caller serializes calls to this function (per transport), and 702 * provides an effective memory barrier that flushes the new value 703 * of rb_sc_head. 704 */ 705 struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt) 706 { 707 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 708 struct rpcrdma_sendctx *sc; 709 unsigned long next_head; 710 711 next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head); 712 713 if (next_head == READ_ONCE(buf->rb_sc_tail)) 714 goto out_emptyq; 715 716 /* ORDER: item must be accessed _before_ head is updated */ 717 sc = buf->rb_sc_ctxs[next_head]; 718 719 /* Releasing the lock in the caller acts as a memory 720 * barrier that flushes rb_sc_head. 721 */ 722 buf->rb_sc_head = next_head; 723 724 return sc; 725 726 out_emptyq: 727 /* The queue is "empty" if there have not been enough Send 728 * completions recently. This is a sign the Send Queue is 729 * backing up. Cause the caller to pause and try again. 730 */ 731 xprt_wait_for_buffer_space(&r_xprt->rx_xprt); 732 r_xprt->rx_stats.empty_sendctx_q++; 733 return NULL; 734 } 735 736 /** 737 * rpcrdma_sendctx_put_locked - Release a send context 738 * @r_xprt: controlling transport instance 739 * @sc: send context to release 740 * 741 * Usage: Called from Send completion to return a sendctxt 742 * to the queue. 743 * 744 * The caller serializes calls to this function (per transport). 745 */ 746 static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 747 struct rpcrdma_sendctx *sc) 748 { 749 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 750 unsigned long next_tail; 751 752 /* Unmap SGEs of previously completed but unsignaled 753 * Sends by walking up the queue until @sc is found. 754 */ 755 next_tail = buf->rb_sc_tail; 756 do { 757 next_tail = rpcrdma_sendctx_next(buf, next_tail); 758 759 /* ORDER: item must be accessed _before_ tail is updated */ 760 rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]); 761 762 } while (buf->rb_sc_ctxs[next_tail] != sc); 763 764 /* Paired with READ_ONCE */ 765 smp_store_release(&buf->rb_sc_tail, next_tail); 766 767 xprt_write_space(&r_xprt->rx_xprt); 768 } 769 770 static void 771 rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) 772 { 773 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 774 struct rpcrdma_ep *ep = r_xprt->rx_ep; 775 unsigned int count; 776 777 for (count = 0; count < ep->re_max_rdma_segs; count++) { 778 struct rpcrdma_mr *mr; 779 int rc; 780 781 mr = kzalloc(sizeof(*mr), GFP_NOFS); 782 if (!mr) 783 break; 784 785 rc = frwr_mr_init(r_xprt, mr); 786 if (rc) { 787 kfree(mr); 788 break; 789 } 790 791 spin_lock(&buf->rb_lock); 792 rpcrdma_mr_push(mr, &buf->rb_mrs); 793 list_add(&mr->mr_all, &buf->rb_all_mrs); 794 spin_unlock(&buf->rb_lock); 795 } 796 797 r_xprt->rx_stats.mrs_allocated += count; 798 trace_xprtrdma_createmrs(r_xprt, count); 799 } 800 801 static void 802 rpcrdma_mr_refresh_worker(struct work_struct *work) 803 { 804 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, 805 rb_refresh_worker); 806 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 807 rx_buf); 808 809 rpcrdma_mrs_create(r_xprt); 810 xprt_write_space(&r_xprt->rx_xprt); 811 } 812 813 /** 814 * rpcrdma_mrs_refresh - Wake the MR refresh worker 815 * @r_xprt: controlling transport instance 816 * 817 */ 818 void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) 819 { 820 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 821 struct rpcrdma_ep *ep = r_xprt->rx_ep; 822 823 /* If there is no underlying connection, it's no use 824 * to wake the refresh worker. 825 */ 826 if (ep->re_connect_status == 1) { 827 /* The work is scheduled on a WQ_MEM_RECLAIM 828 * workqueue in order to prevent MR allocation 829 * from recursing into NFS during direct reclaim. 830 */ 831 queue_work(xprtiod_workqueue, &buf->rb_refresh_worker); 832 } 833 } 834 835 /** 836 * rpcrdma_req_create - Allocate an rpcrdma_req object 837 * @r_xprt: controlling r_xprt 838 * @size: initial size, in bytes, of send and receive buffers 839 * @flags: GFP flags passed to memory allocators 840 * 841 * Returns an allocated and fully initialized rpcrdma_req or NULL. 842 */ 843 struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, 844 gfp_t flags) 845 { 846 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; 847 struct rpcrdma_req *req; 848 849 req = kzalloc(sizeof(*req), flags); 850 if (req == NULL) 851 goto out1; 852 853 req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags); 854 if (!req->rl_sendbuf) 855 goto out2; 856 857 req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags); 858 if (!req->rl_recvbuf) 859 goto out3; 860 861 INIT_LIST_HEAD(&req->rl_free_mrs); 862 INIT_LIST_HEAD(&req->rl_registered); 863 spin_lock(&buffer->rb_lock); 864 list_add(&req->rl_all, &buffer->rb_allreqs); 865 spin_unlock(&buffer->rb_lock); 866 return req; 867 868 out3: 869 kfree(req->rl_sendbuf); 870 out2: 871 kfree(req); 872 out1: 873 return NULL; 874 } 875 876 /** 877 * rpcrdma_req_setup - Per-connection instance setup of an rpcrdma_req object 878 * @r_xprt: controlling transport instance 879 * @req: rpcrdma_req object to set up 880 * 881 * Returns zero on success, and a negative errno on failure. 882 */ 883 int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) 884 { 885 struct rpcrdma_regbuf *rb; 886 size_t maxhdrsize; 887 888 /* Compute maximum header buffer size in bytes */ 889 maxhdrsize = rpcrdma_fixed_maxsz + 3 + 890 r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz; 891 maxhdrsize *= sizeof(__be32); 892 rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), 893 DMA_TO_DEVICE, GFP_KERNEL); 894 if (!rb) 895 goto out; 896 897 if (!__rpcrdma_regbuf_dma_map(r_xprt, rb)) 898 goto out_free; 899 900 req->rl_rdmabuf = rb; 901 xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); 902 return 0; 903 904 out_free: 905 rpcrdma_regbuf_free(rb); 906 out: 907 return -ENOMEM; 908 } 909 910 /* ASSUMPTION: the rb_allreqs list is stable for the duration, 911 * and thus can be walked without holding rb_lock. Eg. the 912 * caller is holding the transport send lock to exclude 913 * device removal or disconnection. 914 */ 915 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt) 916 { 917 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 918 struct rpcrdma_req *req; 919 int rc; 920 921 list_for_each_entry(req, &buf->rb_allreqs, rl_all) { 922 rc = rpcrdma_req_setup(r_xprt, req); 923 if (rc) 924 return rc; 925 } 926 return 0; 927 } 928 929 static void rpcrdma_req_reset(struct rpcrdma_req *req) 930 { 931 /* Credits are valid for only one connection */ 932 req->rl_slot.rq_cong = 0; 933 934 rpcrdma_regbuf_free(req->rl_rdmabuf); 935 req->rl_rdmabuf = NULL; 936 937 rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); 938 rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); 939 } 940 941 /* ASSUMPTION: the rb_allreqs list is stable for the duration, 942 * and thus can be walked without holding rb_lock. Eg. the 943 * caller is holding the transport send lock to exclude 944 * device removal or disconnection. 945 */ 946 static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) 947 { 948 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 949 struct rpcrdma_req *req; 950 951 list_for_each_entry(req, &buf->rb_allreqs, rl_all) 952 rpcrdma_req_reset(req); 953 } 954 955 /* No locking needed here. This function is called only by the 956 * Receive completion handler. 957 */ 958 static noinline 959 struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, 960 bool temp) 961 { 962 struct rpcrdma_rep *rep; 963 964 rep = kzalloc(sizeof(*rep), GFP_KERNEL); 965 if (rep == NULL) 966 goto out; 967 968 rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, 969 DMA_FROM_DEVICE, GFP_KERNEL); 970 if (!rep->rr_rdmabuf) 971 goto out_free; 972 973 if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) 974 goto out_free_regbuf; 975 976 xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), 977 rdmab_length(rep->rr_rdmabuf)); 978 rep->rr_cqe.done = rpcrdma_wc_receive; 979 rep->rr_rxprt = r_xprt; 980 rep->rr_recv_wr.next = NULL; 981 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 982 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 983 rep->rr_recv_wr.num_sge = 1; 984 rep->rr_temp = temp; 985 list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps); 986 return rep; 987 988 out_free_regbuf: 989 rpcrdma_regbuf_free(rep->rr_rdmabuf); 990 out_free: 991 kfree(rep); 992 out: 993 return NULL; 994 } 995 996 /* No locking needed here. This function is invoked only by the 997 * Receive completion handler, or during transport shutdown. 998 */ 999 static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) 1000 { 1001 list_del(&rep->rr_all); 1002 rpcrdma_regbuf_free(rep->rr_rdmabuf); 1003 kfree(rep); 1004 } 1005 1006 static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) 1007 { 1008 struct llist_node *node; 1009 1010 /* Calls to llist_del_first are required to be serialized */ 1011 node = llist_del_first(&buf->rb_free_reps); 1012 if (!node) 1013 return NULL; 1014 return llist_entry(node, struct rpcrdma_rep, rr_node); 1015 } 1016 1017 static void rpcrdma_rep_put(struct rpcrdma_buffer *buf, 1018 struct rpcrdma_rep *rep) 1019 { 1020 llist_add(&rep->rr_node, &buf->rb_free_reps); 1021 } 1022 1023 static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) 1024 { 1025 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1026 struct rpcrdma_rep *rep; 1027 1028 list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { 1029 rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); 1030 rep->rr_temp = true; 1031 } 1032 } 1033 1034 static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) 1035 { 1036 struct rpcrdma_rep *rep; 1037 1038 while ((rep = rpcrdma_rep_get_locked(buf)) != NULL) 1039 rpcrdma_rep_destroy(rep); 1040 } 1041 1042 /** 1043 * rpcrdma_buffer_create - Create initial set of req/rep objects 1044 * @r_xprt: transport instance to (re)initialize 1045 * 1046 * Returns zero on success, otherwise a negative errno. 1047 */ 1048 int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) 1049 { 1050 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1051 int i, rc; 1052 1053 buf->rb_bc_srv_max_requests = 0; 1054 spin_lock_init(&buf->rb_lock); 1055 INIT_LIST_HEAD(&buf->rb_mrs); 1056 INIT_LIST_HEAD(&buf->rb_all_mrs); 1057 INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); 1058 1059 INIT_LIST_HEAD(&buf->rb_send_bufs); 1060 INIT_LIST_HEAD(&buf->rb_allreqs); 1061 INIT_LIST_HEAD(&buf->rb_all_reps); 1062 1063 rc = -ENOMEM; 1064 for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { 1065 struct rpcrdma_req *req; 1066 1067 req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, 1068 GFP_KERNEL); 1069 if (!req) 1070 goto out; 1071 list_add(&req->rl_list, &buf->rb_send_bufs); 1072 } 1073 1074 init_llist_head(&buf->rb_free_reps); 1075 1076 return 0; 1077 out: 1078 rpcrdma_buffer_destroy(buf); 1079 return rc; 1080 } 1081 1082 /** 1083 * rpcrdma_req_destroy - Destroy an rpcrdma_req object 1084 * @req: unused object to be destroyed 1085 * 1086 * Relies on caller holding the transport send lock to protect 1087 * removing req->rl_all from buf->rb_all_reqs safely. 1088 */ 1089 void rpcrdma_req_destroy(struct rpcrdma_req *req) 1090 { 1091 struct rpcrdma_mr *mr; 1092 1093 list_del(&req->rl_all); 1094 1095 while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) { 1096 struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; 1097 1098 spin_lock(&buf->rb_lock); 1099 list_del(&mr->mr_all); 1100 spin_unlock(&buf->rb_lock); 1101 1102 frwr_release_mr(mr); 1103 } 1104 1105 rpcrdma_regbuf_free(req->rl_recvbuf); 1106 rpcrdma_regbuf_free(req->rl_sendbuf); 1107 rpcrdma_regbuf_free(req->rl_rdmabuf); 1108 kfree(req); 1109 } 1110 1111 /** 1112 * rpcrdma_mrs_destroy - Release all of a transport's MRs 1113 * @r_xprt: controlling transport instance 1114 * 1115 * Relies on caller holding the transport send lock to protect 1116 * removing mr->mr_list from req->rl_free_mrs safely. 1117 */ 1118 static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt) 1119 { 1120 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1121 struct rpcrdma_mr *mr; 1122 1123 cancel_work_sync(&buf->rb_refresh_worker); 1124 1125 spin_lock(&buf->rb_lock); 1126 while ((mr = list_first_entry_or_null(&buf->rb_all_mrs, 1127 struct rpcrdma_mr, 1128 mr_all)) != NULL) { 1129 list_del(&mr->mr_list); 1130 list_del(&mr->mr_all); 1131 spin_unlock(&buf->rb_lock); 1132 1133 frwr_release_mr(mr); 1134 1135 spin_lock(&buf->rb_lock); 1136 } 1137 spin_unlock(&buf->rb_lock); 1138 } 1139 1140 /** 1141 * rpcrdma_buffer_destroy - Release all hw resources 1142 * @buf: root control block for resources 1143 * 1144 * ORDERING: relies on a prior rpcrdma_xprt_drain : 1145 * - No more Send or Receive completions can occur 1146 * - All MRs, reps, and reqs are returned to their free lists 1147 */ 1148 void 1149 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1150 { 1151 rpcrdma_reps_destroy(buf); 1152 1153 while (!list_empty(&buf->rb_send_bufs)) { 1154 struct rpcrdma_req *req; 1155 1156 req = list_first_entry(&buf->rb_send_bufs, 1157 struct rpcrdma_req, rl_list); 1158 list_del(&req->rl_list); 1159 rpcrdma_req_destroy(req); 1160 } 1161 } 1162 1163 /** 1164 * rpcrdma_mr_get - Allocate an rpcrdma_mr object 1165 * @r_xprt: controlling transport 1166 * 1167 * Returns an initialized rpcrdma_mr or NULL if no free 1168 * rpcrdma_mr objects are available. 1169 */ 1170 struct rpcrdma_mr * 1171 rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) 1172 { 1173 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1174 struct rpcrdma_mr *mr; 1175 1176 spin_lock(&buf->rb_lock); 1177 mr = rpcrdma_mr_pop(&buf->rb_mrs); 1178 spin_unlock(&buf->rb_lock); 1179 return mr; 1180 } 1181 1182 /** 1183 * rpcrdma_mr_put - DMA unmap an MR and release it 1184 * @mr: MR to release 1185 * 1186 */ 1187 void rpcrdma_mr_put(struct rpcrdma_mr *mr) 1188 { 1189 struct rpcrdma_xprt *r_xprt = mr->mr_xprt; 1190 1191 if (mr->mr_dir != DMA_NONE) { 1192 trace_xprtrdma_mr_unmap(mr); 1193 ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device, 1194 mr->mr_sg, mr->mr_nents, mr->mr_dir); 1195 mr->mr_dir = DMA_NONE; 1196 } 1197 1198 rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs); 1199 } 1200 1201 /** 1202 * rpcrdma_buffer_get - Get a request buffer 1203 * @buffers: Buffer pool from which to obtain a buffer 1204 * 1205 * Returns a fresh rpcrdma_req, or NULL if none are available. 1206 */ 1207 struct rpcrdma_req * 1208 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) 1209 { 1210 struct rpcrdma_req *req; 1211 1212 spin_lock(&buffers->rb_lock); 1213 req = list_first_entry_or_null(&buffers->rb_send_bufs, 1214 struct rpcrdma_req, rl_list); 1215 if (req) 1216 list_del_init(&req->rl_list); 1217 spin_unlock(&buffers->rb_lock); 1218 return req; 1219 } 1220 1221 /** 1222 * rpcrdma_buffer_put - Put request/reply buffers back into pool 1223 * @buffers: buffer pool 1224 * @req: object to return 1225 * 1226 */ 1227 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) 1228 { 1229 if (req->rl_reply) 1230 rpcrdma_rep_put(buffers, req->rl_reply); 1231 req->rl_reply = NULL; 1232 1233 spin_lock(&buffers->rb_lock); 1234 list_add(&req->rl_list, &buffers->rb_send_bufs); 1235 spin_unlock(&buffers->rb_lock); 1236 } 1237 1238 /** 1239 * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list 1240 * @rep: rep to release 1241 * 1242 * Used after error conditions. 1243 */ 1244 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) 1245 { 1246 rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep); 1247 } 1248 1249 /* Returns a pointer to a rpcrdma_regbuf object, or NULL. 1250 * 1251 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for 1252 * receiving the payload of RDMA RECV operations. During Long Calls 1253 * or Replies they may be registered externally via frwr_map. 1254 */ 1255 static struct rpcrdma_regbuf * 1256 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, 1257 gfp_t flags) 1258 { 1259 struct rpcrdma_regbuf *rb; 1260 1261 rb = kmalloc(sizeof(*rb), flags); 1262 if (!rb) 1263 return NULL; 1264 rb->rg_data = kmalloc(size, flags); 1265 if (!rb->rg_data) { 1266 kfree(rb); 1267 return NULL; 1268 } 1269 1270 rb->rg_device = NULL; 1271 rb->rg_direction = direction; 1272 rb->rg_iov.length = size; 1273 return rb; 1274 } 1275 1276 /** 1277 * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer 1278 * @rb: regbuf to reallocate 1279 * @size: size of buffer to be allocated, in bytes 1280 * @flags: GFP flags 1281 * 1282 * Returns true if reallocation was successful. If false is 1283 * returned, @rb is left untouched. 1284 */ 1285 bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) 1286 { 1287 void *buf; 1288 1289 buf = kmalloc(size, flags); 1290 if (!buf) 1291 return false; 1292 1293 rpcrdma_regbuf_dma_unmap(rb); 1294 kfree(rb->rg_data); 1295 1296 rb->rg_data = buf; 1297 rb->rg_iov.length = size; 1298 return true; 1299 } 1300 1301 /** 1302 * __rpcrdma_regbuf_dma_map - DMA-map a regbuf 1303 * @r_xprt: controlling transport instance 1304 * @rb: regbuf to be mapped 1305 * 1306 * Returns true if the buffer is now DMA mapped to @r_xprt's device 1307 */ 1308 bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, 1309 struct rpcrdma_regbuf *rb) 1310 { 1311 struct ib_device *device = r_xprt->rx_ep->re_id->device; 1312 1313 if (rb->rg_direction == DMA_NONE) 1314 return false; 1315 1316 rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb), 1317 rdmab_length(rb), rb->rg_direction); 1318 if (ib_dma_mapping_error(device, rdmab_addr(rb))) { 1319 trace_xprtrdma_dma_maperr(rdmab_addr(rb)); 1320 return false; 1321 } 1322 1323 rb->rg_device = device; 1324 rb->rg_iov.lkey = r_xprt->rx_ep->re_pd->local_dma_lkey; 1325 return true; 1326 } 1327 1328 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb) 1329 { 1330 if (!rb) 1331 return; 1332 1333 if (!rpcrdma_regbuf_is_mapped(rb)) 1334 return; 1335 1336 ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb), 1337 rb->rg_direction); 1338 rb->rg_device = NULL; 1339 } 1340 1341 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) 1342 { 1343 rpcrdma_regbuf_dma_unmap(rb); 1344 if (rb) 1345 kfree(rb->rg_data); 1346 kfree(rb); 1347 } 1348 1349 /** 1350 * rpcrdma_post_sends - Post WRs to a transport's Send Queue 1351 * @r_xprt: controlling transport instance 1352 * @req: rpcrdma_req containing the Send WR to post 1353 * 1354 * Returns 0 if the post was successful, otherwise -ENOTCONN 1355 * is returned. 1356 */ 1357 int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) 1358 { 1359 struct ib_send_wr *send_wr = &req->rl_wr; 1360 struct rpcrdma_ep *ep = r_xprt->rx_ep; 1361 int rc; 1362 1363 if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) { 1364 send_wr->send_flags |= IB_SEND_SIGNALED; 1365 ep->re_send_count = ep->re_send_batch; 1366 } else { 1367 send_wr->send_flags &= ~IB_SEND_SIGNALED; 1368 --ep->re_send_count; 1369 } 1370 1371 trace_xprtrdma_post_send(req); 1372 rc = frwr_send(r_xprt, req); 1373 if (rc) 1374 return -ENOTCONN; 1375 return 0; 1376 } 1377 1378 /** 1379 * rpcrdma_post_recvs - Refill the Receive Queue 1380 * @r_xprt: controlling transport instance 1381 * @temp: mark Receive buffers to be deleted after use 1382 * 1383 */ 1384 void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) 1385 { 1386 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1387 struct rpcrdma_ep *ep = r_xprt->rx_ep; 1388 struct ib_recv_wr *wr, *bad_wr; 1389 struct rpcrdma_rep *rep; 1390 int needed, count, rc; 1391 1392 rc = 0; 1393 count = 0; 1394 1395 needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); 1396 if (likely(ep->re_receive_count > needed)) 1397 goto out; 1398 needed -= ep->re_receive_count; 1399 if (!temp) 1400 needed += RPCRDMA_MAX_RECV_BATCH; 1401 1402 /* fast path: all needed reps can be found on the free list */ 1403 wr = NULL; 1404 while (needed) { 1405 rep = rpcrdma_rep_get_locked(buf); 1406 if (rep && rep->rr_temp) { 1407 rpcrdma_rep_destroy(rep); 1408 continue; 1409 } 1410 if (!rep) 1411 rep = rpcrdma_rep_create(r_xprt, temp); 1412 if (!rep) 1413 break; 1414 1415 trace_xprtrdma_post_recv(rep); 1416 rep->rr_recv_wr.next = wr; 1417 wr = &rep->rr_recv_wr; 1418 --needed; 1419 ++count; 1420 } 1421 if (!wr) 1422 goto out; 1423 1424 rc = ib_post_recv(ep->re_id->qp, wr, 1425 (const struct ib_recv_wr **)&bad_wr); 1426 out: 1427 trace_xprtrdma_post_recvs(r_xprt, count, rc); 1428 if (rc) { 1429 for (wr = bad_wr; wr;) { 1430 struct rpcrdma_rep *rep; 1431 1432 rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr); 1433 wr = wr->next; 1434 rpcrdma_recv_buffer_put(rep); 1435 --count; 1436 } 1437 } 1438 ep->re_receive_count += count; 1439 return; 1440 } 1441