1 /* 2 * Copyright (c) 2016 Oracle. All rights reserved. 3 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 4 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the BSD-type 10 * license below: 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 16 * Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * Neither the name of the Network Appliance, Inc. nor the names of 25 * its contributors may be used to endorse or promote products 26 * derived from this software without specific prior written 27 * permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 * 41 * Author: Tom Tucker <tom@opengridcomputing.com> 42 */ 43 44 /* Operation 45 * 46 * The main entry point is svc_rdma_sendto. This is called by the 47 * RPC server when an RPC Reply is ready to be transmitted to a client. 48 * 49 * The passed-in svc_rqst contains a struct xdr_buf which holds an 50 * XDR-encoded RPC Reply message. sendto must construct the RPC-over-RDMA 51 * transport header, post all Write WRs needed for this Reply, then post 52 * a Send WR conveying the transport header and the RPC message itself to 53 * the client. 54 * 55 * svc_rdma_sendto must fully transmit the Reply before returning, as 56 * the svc_rqst will be recycled as soon as sendto returns. Remaining 57 * resources referred to by the svc_rqst are also recycled at that time. 58 * Therefore any resources that must remain longer must be detached 59 * from the svc_rqst and released later. 60 * 61 * Page Management 62 * 63 * The I/O that performs Reply transmission is asynchronous, and may 64 * complete well after sendto returns. Thus pages under I/O must be 65 * removed from the svc_rqst before sendto returns. 66 * 67 * The logic here depends on Send Queue and completion ordering. Since 68 * the Send WR is always posted last, it will always complete last. Thus 69 * when it completes, it is guaranteed that all previous Write WRs have 70 * also completed. 71 * 72 * Write WRs are constructed and posted. Each Write segment gets its own 73 * svc_rdma_rw_ctxt, allowing the Write completion handler to find and 74 * DMA-unmap the pages under I/O for that Write segment. The Write 75 * completion handler does not release any pages. 76 * 77 * When the Send WR is constructed, it also gets its own svc_rdma_op_ctxt. 78 * The ownership of all of the Reply's pages are transferred into that 79 * ctxt, the Send WR is posted, and sendto returns. 80 * 81 * The svc_rdma_op_ctxt is presented when the Send WR completes. The 82 * Send completion handler finally releases the Reply's pages. 83 * 84 * This mechanism also assumes that completions on the transport's Send 85 * Completion Queue do not run in parallel. Otherwise a Write completion 86 * and Send completion running at the same time could release pages that 87 * are still DMA-mapped. 88 * 89 * Error Handling 90 * 91 * - If the Send WR is posted successfully, it will either complete 92 * successfully, or get flushed. Either way, the Send completion 93 * handler releases the Reply's pages. 94 * - If the Send WR cannot be not posted, the forward path releases 95 * the Reply's pages. 96 * 97 * This handles the case, without the use of page reference counting, 98 * where two different Write segments send portions of the same page. 99 */ 100 101 #include <linux/sunrpc/debug.h> 102 #include <linux/sunrpc/rpc_rdma.h> 103 #include <linux/spinlock.h> 104 #include <asm/unaligned.h> 105 #include <rdma/ib_verbs.h> 106 #include <rdma/rdma_cm.h> 107 #include <linux/sunrpc/svc_rdma.h> 108 109 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 110 111 static u32 xdr_padsize(u32 len) 112 { 113 return (len & 3) ? (4 - (len & 3)) : 0; 114 } 115 116 /* Returns length of transport header, in bytes. 117 */ 118 static unsigned int svc_rdma_reply_hdr_len(__be32 *rdma_resp) 119 { 120 unsigned int nsegs; 121 __be32 *p; 122 123 p = rdma_resp; 124 125 /* RPC-over-RDMA V1 replies never have a Read list. */ 126 p += rpcrdma_fixed_maxsz + 1; 127 128 /* Skip Write list. */ 129 while (*p++ != xdr_zero) { 130 nsegs = be32_to_cpup(p++); 131 p += nsegs * rpcrdma_segment_maxsz; 132 } 133 134 /* Skip Reply chunk. */ 135 if (*p++ != xdr_zero) { 136 nsegs = be32_to_cpup(p++); 137 p += nsegs * rpcrdma_segment_maxsz; 138 } 139 140 return (unsigned long)p - (unsigned long)rdma_resp; 141 } 142 143 /* One Write chunk is copied from Call transport header to Reply 144 * transport header. Each segment's length field is updated to 145 * reflect number of bytes consumed in the segment. 146 * 147 * Returns number of segments in this chunk. 148 */ 149 static unsigned int xdr_encode_write_chunk(__be32 *dst, __be32 *src, 150 unsigned int remaining) 151 { 152 unsigned int i, nsegs; 153 u32 seg_len; 154 155 /* Write list discriminator */ 156 *dst++ = *src++; 157 158 /* number of segments in this chunk */ 159 nsegs = be32_to_cpup(src); 160 *dst++ = *src++; 161 162 for (i = nsegs; i; i--) { 163 /* segment's RDMA handle */ 164 *dst++ = *src++; 165 166 /* bytes returned in this segment */ 167 seg_len = be32_to_cpu(*src); 168 if (remaining >= seg_len) { 169 /* entire segment was consumed */ 170 *dst = *src; 171 remaining -= seg_len; 172 } else { 173 /* segment only partly filled */ 174 *dst = cpu_to_be32(remaining); 175 remaining = 0; 176 } 177 dst++; src++; 178 179 /* segment's RDMA offset */ 180 *dst++ = *src++; 181 *dst++ = *src++; 182 } 183 184 return nsegs; 185 } 186 187 /* The client provided a Write list in the Call message. Fill in 188 * the segments in the first Write chunk in the Reply's transport 189 * header with the number of bytes consumed in each segment. 190 * Remaining chunks are returned unused. 191 * 192 * Assumptions: 193 * - Client has provided only one Write chunk 194 */ 195 static void svc_rdma_xdr_encode_write_list(__be32 *rdma_resp, __be32 *wr_ch, 196 unsigned int consumed) 197 { 198 unsigned int nsegs; 199 __be32 *p, *q; 200 201 /* RPC-over-RDMA V1 replies never have a Read list. */ 202 p = rdma_resp + rpcrdma_fixed_maxsz + 1; 203 204 q = wr_ch; 205 while (*q != xdr_zero) { 206 nsegs = xdr_encode_write_chunk(p, q, consumed); 207 q += 2 + nsegs * rpcrdma_segment_maxsz; 208 p += 2 + nsegs * rpcrdma_segment_maxsz; 209 consumed = 0; 210 } 211 212 /* Terminate Write list */ 213 *p++ = xdr_zero; 214 215 /* Reply chunk discriminator; may be replaced later */ 216 *p = xdr_zero; 217 } 218 219 /* The client provided a Reply chunk in the Call message. Fill in 220 * the segments in the Reply chunk in the Reply message with the 221 * number of bytes consumed in each segment. 222 * 223 * Assumptions: 224 * - Reply can always fit in the provided Reply chunk 225 */ 226 static void svc_rdma_xdr_encode_reply_chunk(__be32 *rdma_resp, __be32 *rp_ch, 227 unsigned int consumed) 228 { 229 __be32 *p; 230 231 /* Find the Reply chunk in the Reply's xprt header. 232 * RPC-over-RDMA V1 replies never have a Read list. 233 */ 234 p = rdma_resp + rpcrdma_fixed_maxsz + 1; 235 236 /* Skip past Write list */ 237 while (*p++ != xdr_zero) 238 p += 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz; 239 240 xdr_encode_write_chunk(p, rp_ch, consumed); 241 } 242 243 int svc_rdma_map_xdr(struct svcxprt_rdma *xprt, 244 struct xdr_buf *xdr, 245 struct svc_rdma_req_map *vec, 246 bool write_chunk_present) 247 { 248 int sge_no; 249 u32 sge_bytes; 250 u32 page_bytes; 251 u32 page_off; 252 int page_no; 253 254 if (xdr->len != 255 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) { 256 pr_err("svcrdma: %s: XDR buffer length error\n", __func__); 257 return -EIO; 258 } 259 260 /* Skip the first sge, this is for the RPCRDMA header */ 261 sge_no = 1; 262 263 /* Head SGE */ 264 vec->sge[sge_no].iov_base = xdr->head[0].iov_base; 265 vec->sge[sge_no].iov_len = xdr->head[0].iov_len; 266 sge_no++; 267 268 /* pages SGE */ 269 page_no = 0; 270 page_bytes = xdr->page_len; 271 page_off = xdr->page_base; 272 while (page_bytes) { 273 vec->sge[sge_no].iov_base = 274 page_address(xdr->pages[page_no]) + page_off; 275 sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); 276 page_bytes -= sge_bytes; 277 vec->sge[sge_no].iov_len = sge_bytes; 278 279 sge_no++; 280 page_no++; 281 page_off = 0; /* reset for next time through loop */ 282 } 283 284 /* Tail SGE */ 285 if (xdr->tail[0].iov_len) { 286 unsigned char *base = xdr->tail[0].iov_base; 287 size_t len = xdr->tail[0].iov_len; 288 u32 xdr_pad = xdr_padsize(xdr->page_len); 289 290 if (write_chunk_present && xdr_pad) { 291 base += xdr_pad; 292 len -= xdr_pad; 293 } 294 295 if (len) { 296 vec->sge[sge_no].iov_base = base; 297 vec->sge[sge_no].iov_len = len; 298 sge_no++; 299 } 300 } 301 302 dprintk("svcrdma: %s: sge_no %d page_no %d " 303 "page_base %u page_len %u head_len %zu tail_len %zu\n", 304 __func__, sge_no, page_no, xdr->page_base, xdr->page_len, 305 xdr->head[0].iov_len, xdr->tail[0].iov_len); 306 307 vec->count = sge_no; 308 return 0; 309 } 310 311 /* Parse the RPC Call's transport header. 312 */ 313 static void svc_rdma_get_write_arrays(__be32 *rdma_argp, 314 __be32 **write, __be32 **reply) 315 { 316 __be32 *p; 317 318 p = rdma_argp + rpcrdma_fixed_maxsz; 319 320 /* Read list */ 321 while (*p++ != xdr_zero) 322 p += 5; 323 324 /* Write list */ 325 if (*p != xdr_zero) { 326 *write = p; 327 while (*p++ != xdr_zero) 328 p += 1 + be32_to_cpu(*p) * 4; 329 } else { 330 *write = NULL; 331 p++; 332 } 333 334 /* Reply chunk */ 335 if (*p != xdr_zero) 336 *reply = p; 337 else 338 *reply = NULL; 339 } 340 341 /* RPC-over-RDMA Version One private extension: Remote Invalidation. 342 * Responder's choice: requester signals it can handle Send With 343 * Invalidate, and responder chooses one rkey to invalidate. 344 * 345 * Find a candidate rkey to invalidate when sending a reply. Picks the 346 * first R_key it finds in the chunk lists. 347 * 348 * Returns zero if RPC's chunk lists are empty. 349 */ 350 static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp, 351 __be32 *wr_lst, __be32 *rp_ch) 352 { 353 __be32 *p; 354 355 p = rdma_argp + rpcrdma_fixed_maxsz; 356 if (*p != xdr_zero) 357 p += 2; 358 else if (wr_lst && be32_to_cpup(wr_lst + 1)) 359 p = wr_lst + 2; 360 else if (rp_ch && be32_to_cpup(rp_ch + 1)) 361 p = rp_ch + 2; 362 else 363 return 0; 364 return be32_to_cpup(p); 365 } 366 367 /* ib_dma_map_page() is used here because svc_rdma_dma_unmap() 368 * is used during completion to DMA-unmap this memory, and 369 * it uses ib_dma_unmap_page() exclusively. 370 */ 371 static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, 372 struct svc_rdma_op_ctxt *ctxt, 373 unsigned int sge_no, 374 unsigned char *base, 375 unsigned int len) 376 { 377 unsigned long offset = (unsigned long)base & ~PAGE_MASK; 378 struct ib_device *dev = rdma->sc_cm_id->device; 379 dma_addr_t dma_addr; 380 381 dma_addr = ib_dma_map_page(dev, virt_to_page(base), 382 offset, len, DMA_TO_DEVICE); 383 if (ib_dma_mapping_error(dev, dma_addr)) 384 return -EIO; 385 386 ctxt->sge[sge_no].addr = dma_addr; 387 ctxt->sge[sge_no].length = len; 388 ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; 389 svc_rdma_count_mappings(rdma, ctxt); 390 return 0; 391 } 392 393 static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, 394 struct svc_rdma_op_ctxt *ctxt, 395 unsigned int sge_no, 396 struct page *page, 397 unsigned int offset, 398 unsigned int len) 399 { 400 struct ib_device *dev = rdma->sc_cm_id->device; 401 dma_addr_t dma_addr; 402 403 dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE); 404 if (ib_dma_mapping_error(dev, dma_addr)) 405 return -EIO; 406 407 ctxt->sge[sge_no].addr = dma_addr; 408 ctxt->sge[sge_no].length = len; 409 ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; 410 svc_rdma_count_mappings(rdma, ctxt); 411 return 0; 412 } 413 414 /** 415 * svc_rdma_map_reply_hdr - DMA map the transport header buffer 416 * @rdma: controlling transport 417 * @ctxt: op_ctxt for the Send WR 418 * @rdma_resp: buffer containing transport header 419 * @len: length of transport header 420 * 421 * Returns: 422 * %0 if the header is DMA mapped, 423 * %-EIO if DMA mapping failed. 424 */ 425 int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma, 426 struct svc_rdma_op_ctxt *ctxt, 427 __be32 *rdma_resp, 428 unsigned int len) 429 { 430 ctxt->direction = DMA_TO_DEVICE; 431 ctxt->pages[0] = virt_to_page(rdma_resp); 432 ctxt->count = 1; 433 return svc_rdma_dma_map_page(rdma, ctxt, 0, ctxt->pages[0], 0, len); 434 } 435 436 /* Load the xdr_buf into the ctxt's sge array, and DMA map each 437 * element as it is added. 438 * 439 * Returns the number of sge elements loaded on success, or 440 * a negative errno on failure. 441 */ 442 static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, 443 struct svc_rdma_op_ctxt *ctxt, 444 struct xdr_buf *xdr, __be32 *wr_lst) 445 { 446 unsigned int len, sge_no, remaining, page_off; 447 struct page **ppages; 448 unsigned char *base; 449 u32 xdr_pad; 450 int ret; 451 452 sge_no = 1; 453 454 ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, 455 xdr->head[0].iov_base, 456 xdr->head[0].iov_len); 457 if (ret < 0) 458 return ret; 459 460 /* If a Write chunk is present, the xdr_buf's page list 461 * is not included inline. However the Upper Layer may 462 * have added XDR padding in the tail buffer, and that 463 * should not be included inline. 464 */ 465 if (wr_lst) { 466 base = xdr->tail[0].iov_base; 467 len = xdr->tail[0].iov_len; 468 xdr_pad = xdr_padsize(xdr->page_len); 469 470 if (len && xdr_pad) { 471 base += xdr_pad; 472 len -= xdr_pad; 473 } 474 475 goto tail; 476 } 477 478 ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); 479 page_off = xdr->page_base & ~PAGE_MASK; 480 remaining = xdr->page_len; 481 while (remaining) { 482 len = min_t(u32, PAGE_SIZE - page_off, remaining); 483 484 ret = svc_rdma_dma_map_page(rdma, ctxt, sge_no++, 485 *ppages++, page_off, len); 486 if (ret < 0) 487 return ret; 488 489 remaining -= len; 490 page_off = 0; 491 } 492 493 base = xdr->tail[0].iov_base; 494 len = xdr->tail[0].iov_len; 495 tail: 496 if (len) { 497 ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, base, len); 498 if (ret < 0) 499 return ret; 500 } 501 502 return sge_no - 1; 503 } 504 505 /* The svc_rqst and all resources it owns are released as soon as 506 * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt 507 * so they are released by the Send completion handler. 508 */ 509 static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, 510 struct svc_rdma_op_ctxt *ctxt) 511 { 512 int i, pages = rqstp->rq_next_page - rqstp->rq_respages; 513 514 ctxt->count += pages; 515 for (i = 0; i < pages; i++) { 516 ctxt->pages[i + 1] = rqstp->rq_respages[i]; 517 rqstp->rq_respages[i] = NULL; 518 } 519 rqstp->rq_next_page = rqstp->rq_respages + 1; 520 } 521 522 /** 523 * svc_rdma_post_send_wr - Set up and post one Send Work Request 524 * @rdma: controlling transport 525 * @ctxt: op_ctxt for transmitting the Send WR 526 * @num_sge: number of SGEs to send 527 * @inv_rkey: R_key argument to Send With Invalidate, or zero 528 * 529 * Returns: 530 * %0 if the Send* was posted successfully, 531 * %-ENOTCONN if the connection was lost or dropped, 532 * %-EINVAL if there was a problem with the Send we built, 533 * %-ENOMEM if ib_post_send failed. 534 */ 535 int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma, 536 struct svc_rdma_op_ctxt *ctxt, int num_sge, 537 u32 inv_rkey) 538 { 539 struct ib_send_wr *send_wr = &ctxt->send_wr; 540 541 dprintk("svcrdma: posting Send WR with %u sge(s)\n", num_sge); 542 543 send_wr->next = NULL; 544 ctxt->cqe.done = svc_rdma_wc_send; 545 send_wr->wr_cqe = &ctxt->cqe; 546 send_wr->sg_list = ctxt->sge; 547 send_wr->num_sge = num_sge; 548 send_wr->send_flags = IB_SEND_SIGNALED; 549 if (inv_rkey) { 550 send_wr->opcode = IB_WR_SEND_WITH_INV; 551 send_wr->ex.invalidate_rkey = inv_rkey; 552 } else { 553 send_wr->opcode = IB_WR_SEND; 554 } 555 556 return svc_rdma_send(rdma, send_wr); 557 } 558 559 /* Prepare the portion of the RPC Reply that will be transmitted 560 * via RDMA Send. The RPC-over-RDMA transport header is prepared 561 * in sge[0], and the RPC xdr_buf is prepared in following sges. 562 * 563 * Depending on whether a Write list or Reply chunk is present, 564 * the server may send all, a portion of, or none of the xdr_buf. 565 * In the latter case, only the transport header (sge[0]) is 566 * transmitted. 567 * 568 * RDMA Send is the last step of transmitting an RPC reply. Pages 569 * involved in the earlier RDMA Writes are here transferred out 570 * of the rqstp and into the ctxt's page array. These pages are 571 * DMA unmapped by each Write completion, but the subsequent Send 572 * completion finally releases these pages. 573 * 574 * Assumptions: 575 * - The Reply's transport header will never be larger than a page. 576 */ 577 static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, 578 __be32 *rdma_argp, __be32 *rdma_resp, 579 struct svc_rqst *rqstp, 580 __be32 *wr_lst, __be32 *rp_ch) 581 { 582 struct svc_rdma_op_ctxt *ctxt; 583 u32 inv_rkey; 584 int ret; 585 586 dprintk("svcrdma: sending %s reply: head=%zu, pagelen=%u, tail=%zu\n", 587 (rp_ch ? "RDMA_NOMSG" : "RDMA_MSG"), 588 rqstp->rq_res.head[0].iov_len, 589 rqstp->rq_res.page_len, 590 rqstp->rq_res.tail[0].iov_len); 591 592 ctxt = svc_rdma_get_context(rdma); 593 594 ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, 595 svc_rdma_reply_hdr_len(rdma_resp)); 596 if (ret < 0) 597 goto err; 598 599 if (!rp_ch) { 600 ret = svc_rdma_map_reply_msg(rdma, ctxt, 601 &rqstp->rq_res, wr_lst); 602 if (ret < 0) 603 goto err; 604 } 605 606 svc_rdma_save_io_pages(rqstp, ctxt); 607 608 inv_rkey = 0; 609 if (rdma->sc_snd_w_inv) 610 inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch); 611 ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, inv_rkey); 612 if (ret) 613 goto err; 614 615 return 0; 616 617 err: 618 pr_err("svcrdma: failed to post Send WR (%d)\n", ret); 619 svc_rdma_unmap_dma(ctxt); 620 svc_rdma_put_context(ctxt, 1); 621 return ret; 622 } 623 624 void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) 625 { 626 } 627 628 /** 629 * svc_rdma_sendto - Transmit an RPC reply 630 * @rqstp: processed RPC request, reply XDR already in ::rq_res 631 * 632 * Any resources still associated with @rqstp are released upon return. 633 * If no reply message was possible, the connection is closed. 634 * 635 * Returns: 636 * %0 if an RPC reply has been successfully posted, 637 * %-ENOMEM if a resource shortage occurred (connection is lost), 638 * %-ENOTCONN if posting failed (connection is lost). 639 */ 640 int svc_rdma_sendto(struct svc_rqst *rqstp) 641 { 642 struct svc_xprt *xprt = rqstp->rq_xprt; 643 struct svcxprt_rdma *rdma = 644 container_of(xprt, struct svcxprt_rdma, sc_xprt); 645 __be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch; 646 struct xdr_buf *xdr = &rqstp->rq_res; 647 struct page *res_page; 648 int ret; 649 650 /* Find the call's chunk lists to decide how to send the reply. 651 * Receive places the Call's xprt header at the start of page 0. 652 */ 653 rdma_argp = page_address(rqstp->rq_pages[0]); 654 svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch); 655 656 dprintk("svcrdma: preparing response for XID 0x%08x\n", 657 be32_to_cpup(rdma_argp)); 658 659 /* Create the RDMA response header. xprt->xpt_mutex, 660 * acquired in svc_send(), serializes RPC replies. The 661 * code path below that inserts the credit grant value 662 * into each transport header runs only inside this 663 * critical section. 664 */ 665 ret = -ENOMEM; 666 res_page = alloc_page(GFP_KERNEL); 667 if (!res_page) 668 goto err0; 669 rdma_resp = page_address(res_page); 670 671 p = rdma_resp; 672 *p++ = *rdma_argp; 673 *p++ = *(rdma_argp + 1); 674 *p++ = rdma->sc_fc_credits; 675 *p++ = rp_ch ? rdma_nomsg : rdma_msg; 676 677 /* Start with empty chunks */ 678 *p++ = xdr_zero; 679 *p++ = xdr_zero; 680 *p = xdr_zero; 681 682 if (wr_lst) { 683 /* XXX: Presume the client sent only one Write chunk */ 684 ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr); 685 if (ret < 0) 686 goto err1; 687 svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret); 688 } 689 if (rp_ch) { 690 ret = svc_rdma_send_reply_chunk(rdma, rp_ch, wr_lst, xdr); 691 if (ret < 0) 692 goto err1; 693 svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret); 694 } 695 696 ret = svc_rdma_post_recv(rdma, GFP_KERNEL); 697 if (ret) 698 goto err1; 699 ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp, 700 wr_lst, rp_ch); 701 if (ret < 0) 702 goto err0; 703 return 0; 704 705 err1: 706 put_page(res_page); 707 err0: 708 pr_err("svcrdma: Could not send reply, err=%d. Closing transport.\n", 709 ret); 710 set_bit(XPT_CLOSE, &xprt->xpt_flags); 711 return -ENOTCONN; 712 } 713 714 void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, 715 int status) 716 { 717 struct page *p; 718 struct svc_rdma_op_ctxt *ctxt; 719 enum rpcrdma_errcode err; 720 __be32 *va; 721 int length; 722 int ret; 723 724 ret = svc_rdma_repost_recv(xprt, GFP_KERNEL); 725 if (ret) 726 return; 727 728 p = alloc_page(GFP_KERNEL); 729 if (!p) 730 return; 731 va = page_address(p); 732 733 /* XDR encode an error reply */ 734 err = ERR_CHUNK; 735 if (status == -EPROTONOSUPPORT) 736 err = ERR_VERS; 737 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 738 739 /* Map transport header; no RPC message payload */ 740 ctxt = svc_rdma_get_context(xprt); 741 ret = svc_rdma_map_reply_hdr(xprt, ctxt, &rmsgp->rm_xid, length); 742 if (ret) { 743 dprintk("svcrdma: Error %d mapping send for protocol error\n", 744 ret); 745 return; 746 } 747 748 ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0); 749 if (ret) { 750 dprintk("svcrdma: Error %d posting send for protocol error\n", 751 ret); 752 svc_rdma_unmap_dma(ctxt); 753 svc_rdma_put_context(ctxt, 1); 754 } 755 } 756