1c06b540aSTom Tucker /* 2c06b540aSTom Tucker * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 3c06b540aSTom Tucker * 4c06b540aSTom Tucker * This software is available to you under a choice of one of two 5c06b540aSTom Tucker * licenses. You may choose to be licensed under the terms of the GNU 6c06b540aSTom Tucker * General Public License (GPL) Version 2, available from the file 7c06b540aSTom Tucker * COPYING in the main directory of this source tree, or the BSD-type 8c06b540aSTom Tucker * license below: 9c06b540aSTom Tucker * 10c06b540aSTom Tucker * Redistribution and use in source and binary forms, with or without 11c06b540aSTom Tucker * modification, are permitted provided that the following conditions 12c06b540aSTom Tucker * are met: 13c06b540aSTom Tucker * 14c06b540aSTom Tucker * Redistributions of source code must retain the above copyright 15c06b540aSTom Tucker * notice, this list of conditions and the following disclaimer. 16c06b540aSTom Tucker * 17c06b540aSTom Tucker * Redistributions in binary form must reproduce the above 18c06b540aSTom Tucker * copyright notice, this list of conditions and the following 19c06b540aSTom Tucker * disclaimer in the documentation and/or other materials provided 20c06b540aSTom Tucker * with the distribution. 21c06b540aSTom Tucker * 22c06b540aSTom Tucker * Neither the name of the Network Appliance, Inc. nor the names of 23c06b540aSTom Tucker * its contributors may be used to endorse or promote products 24c06b540aSTom Tucker * derived from this software without specific prior written 25c06b540aSTom Tucker * permission. 26c06b540aSTom Tucker * 27c06b540aSTom Tucker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28c06b540aSTom Tucker * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29c06b540aSTom Tucker * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30c06b540aSTom Tucker * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31c06b540aSTom Tucker * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32c06b540aSTom Tucker * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33c06b540aSTom Tucker * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34c06b540aSTom Tucker * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35c06b540aSTom Tucker * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36c06b540aSTom Tucker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37c06b540aSTom Tucker * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38c06b540aSTom Tucker * 39c06b540aSTom Tucker * Author: Tom Tucker <tom@opengridcomputing.com> 40c06b540aSTom Tucker */ 41c06b540aSTom Tucker 42c06b540aSTom Tucker #include <linux/sunrpc/debug.h> 43c06b540aSTom Tucker #include <linux/sunrpc/rpc_rdma.h> 44c06b540aSTom Tucker #include <linux/spinlock.h> 45c06b540aSTom Tucker #include <asm/unaligned.h> 46c06b540aSTom Tucker #include <rdma/ib_verbs.h> 47c06b540aSTom Tucker #include <rdma/rdma_cm.h> 48c06b540aSTom Tucker #include <linux/sunrpc/svc_rdma.h> 49c06b540aSTom Tucker 50c06b540aSTom Tucker #define RPCDBG_FACILITY RPCDBG_SVCXPRT 51c06b540aSTom Tucker 52c06b540aSTom Tucker /* Encode an XDR as an array of IB SGE 53c06b540aSTom Tucker * 54c06b540aSTom Tucker * Assumptions: 55c06b540aSTom Tucker * - head[0] is physically contiguous. 56c06b540aSTom Tucker * - tail[0] is physically contiguous. 57c06b540aSTom Tucker * - pages[] is not physically or virtually contigous and consists of 58c06b540aSTom Tucker * PAGE_SIZE elements. 59c06b540aSTom Tucker * 60c06b540aSTom Tucker * Output: 61c06b540aSTom Tucker * SGE[0] reserved for RCPRDMA header 62c06b540aSTom Tucker * SGE[1] data from xdr->head[] 63c06b540aSTom Tucker * SGE[2..sge_count-2] data from xdr->pages[] 64c06b540aSTom Tucker * SGE[sge_count-1] data from xdr->tail. 65c06b540aSTom Tucker * 66c06b540aSTom Tucker */ 67c06b540aSTom Tucker static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, 68c06b540aSTom Tucker struct xdr_buf *xdr, 69c06b540aSTom Tucker struct ib_sge *sge, 70c06b540aSTom Tucker int *sge_count) 71c06b540aSTom Tucker { 72c06b540aSTom Tucker /* Max we need is the length of the XDR / pagesize + one for 73c06b540aSTom Tucker * head + one for tail + one for RPCRDMA header 74c06b540aSTom Tucker */ 75c06b540aSTom Tucker int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; 76c06b540aSTom Tucker int sge_no; 77c06b540aSTom Tucker u32 byte_count = xdr->len; 78c06b540aSTom Tucker u32 sge_bytes; 79c06b540aSTom Tucker u32 page_bytes; 80c06b540aSTom Tucker int page_off; 81c06b540aSTom Tucker int page_no; 82c06b540aSTom Tucker 83c06b540aSTom Tucker /* Skip the first sge, this is for the RPCRDMA header */ 84c06b540aSTom Tucker sge_no = 1; 85c06b540aSTom Tucker 86c06b540aSTom Tucker /* Head SGE */ 87c06b540aSTom Tucker sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, 88c06b540aSTom Tucker xdr->head[0].iov_base, 89c06b540aSTom Tucker xdr->head[0].iov_len, 90c06b540aSTom Tucker DMA_TO_DEVICE); 91c06b540aSTom Tucker sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len); 92c06b540aSTom Tucker byte_count -= sge_bytes; 93c06b540aSTom Tucker sge[sge_no].length = sge_bytes; 94c06b540aSTom Tucker sge[sge_no].lkey = xprt->sc_phys_mr->lkey; 95c06b540aSTom Tucker sge_no++; 96c06b540aSTom Tucker 97c06b540aSTom Tucker /* pages SGE */ 98c06b540aSTom Tucker page_no = 0; 99c06b540aSTom Tucker page_bytes = xdr->page_len; 100c06b540aSTom Tucker page_off = xdr->page_base; 101c06b540aSTom Tucker while (byte_count && page_bytes) { 102c06b540aSTom Tucker sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off)); 103c06b540aSTom Tucker sge[sge_no].addr = 104c06b540aSTom Tucker ib_dma_map_page(xprt->sc_cm_id->device, 105c06b540aSTom Tucker xdr->pages[page_no], page_off, 106c06b540aSTom Tucker sge_bytes, DMA_TO_DEVICE); 107c06b540aSTom Tucker sge_bytes = min(sge_bytes, page_bytes); 108c06b540aSTom Tucker byte_count -= sge_bytes; 109c06b540aSTom Tucker page_bytes -= sge_bytes; 110c06b540aSTom Tucker sge[sge_no].length = sge_bytes; 111c06b540aSTom Tucker sge[sge_no].lkey = xprt->sc_phys_mr->lkey; 112c06b540aSTom Tucker 113c06b540aSTom Tucker sge_no++; 114c06b540aSTom Tucker page_no++; 115c06b540aSTom Tucker page_off = 0; /* reset for next time through loop */ 116c06b540aSTom Tucker } 117c06b540aSTom Tucker 118c06b540aSTom Tucker /* Tail SGE */ 119c06b540aSTom Tucker if (byte_count && xdr->tail[0].iov_len) { 120c06b540aSTom Tucker sge[sge_no].addr = 121c06b540aSTom Tucker ib_dma_map_single(xprt->sc_cm_id->device, 122c06b540aSTom Tucker xdr->tail[0].iov_base, 123c06b540aSTom Tucker xdr->tail[0].iov_len, 124c06b540aSTom Tucker DMA_TO_DEVICE); 125c06b540aSTom Tucker sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len); 126c06b540aSTom Tucker byte_count -= sge_bytes; 127c06b540aSTom Tucker sge[sge_no].length = sge_bytes; 128c06b540aSTom Tucker sge[sge_no].lkey = xprt->sc_phys_mr->lkey; 129c06b540aSTom Tucker sge_no++; 130c06b540aSTom Tucker } 131c06b540aSTom Tucker 132c06b540aSTom Tucker BUG_ON(sge_no > sge_max); 133c06b540aSTom Tucker BUG_ON(byte_count != 0); 134c06b540aSTom Tucker 135c06b540aSTom Tucker *sge_count = sge_no; 136c06b540aSTom Tucker return sge; 137c06b540aSTom Tucker } 138c06b540aSTom Tucker 139c06b540aSTom Tucker 140c06b540aSTom Tucker /* Assumptions: 141c06b540aSTom Tucker * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE 142c06b540aSTom Tucker */ 143c06b540aSTom Tucker static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, 144c06b540aSTom Tucker u32 rmr, u64 to, 145c06b540aSTom Tucker u32 xdr_off, int write_len, 146c06b540aSTom Tucker struct ib_sge *xdr_sge, int sge_count) 147c06b540aSTom Tucker { 148c06b540aSTom Tucker struct svc_rdma_op_ctxt *tmp_sge_ctxt; 149c06b540aSTom Tucker struct ib_send_wr write_wr; 150c06b540aSTom Tucker struct ib_sge *sge; 151c06b540aSTom Tucker int xdr_sge_no; 152c06b540aSTom Tucker int sge_no; 153c06b540aSTom Tucker int sge_bytes; 154c06b540aSTom Tucker int sge_off; 155c06b540aSTom Tucker int bc; 156c06b540aSTom Tucker struct svc_rdma_op_ctxt *ctxt; 157c06b540aSTom Tucker int ret = 0; 158c06b540aSTom Tucker 1593fedb3c5STom Tucker BUG_ON(sge_count > RPCSVC_MAXPAGES); 160c06b540aSTom Tucker dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " 161c06b540aSTom Tucker "write_len=%d, xdr_sge=%p, sge_count=%d\n", 162bb50c801SRoland Dreier rmr, (unsigned long long)to, xdr_off, 163bb50c801SRoland Dreier write_len, xdr_sge, sge_count); 164c06b540aSTom Tucker 165c06b540aSTom Tucker ctxt = svc_rdma_get_context(xprt); 166c06b540aSTom Tucker ctxt->count = 0; 167c06b540aSTom Tucker tmp_sge_ctxt = svc_rdma_get_context(xprt); 168c06b540aSTom Tucker sge = tmp_sge_ctxt->sge; 169c06b540aSTom Tucker 170c06b540aSTom Tucker /* Find the SGE associated with xdr_off */ 171c06b540aSTom Tucker for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count; 172c06b540aSTom Tucker xdr_sge_no++) { 173c06b540aSTom Tucker if (xdr_sge[xdr_sge_no].length > bc) 174c06b540aSTom Tucker break; 175c06b540aSTom Tucker bc -= xdr_sge[xdr_sge_no].length; 176c06b540aSTom Tucker } 177c06b540aSTom Tucker 178c06b540aSTom Tucker sge_off = bc; 179c06b540aSTom Tucker bc = write_len; 180c06b540aSTom Tucker sge_no = 0; 181c06b540aSTom Tucker 182c06b540aSTom Tucker /* Copy the remaining SGE */ 183c06b540aSTom Tucker while (bc != 0 && xdr_sge_no < sge_count) { 184c06b540aSTom Tucker sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off; 185c06b540aSTom Tucker sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey; 186c06b540aSTom Tucker sge_bytes = min((size_t)bc, 187c06b540aSTom Tucker (size_t)(xdr_sge[xdr_sge_no].length-sge_off)); 188c06b540aSTom Tucker sge[sge_no].length = sge_bytes; 189c06b540aSTom Tucker 190c06b540aSTom Tucker sge_off = 0; 191c06b540aSTom Tucker sge_no++; 192c06b540aSTom Tucker xdr_sge_no++; 193c06b540aSTom Tucker bc -= sge_bytes; 194c06b540aSTom Tucker } 195c06b540aSTom Tucker 196c06b540aSTom Tucker BUG_ON(bc != 0); 197c06b540aSTom Tucker BUG_ON(xdr_sge_no > sge_count); 198c06b540aSTom Tucker 199c06b540aSTom Tucker /* Prepare WRITE WR */ 200c06b540aSTom Tucker memset(&write_wr, 0, sizeof write_wr); 201c06b540aSTom Tucker ctxt->wr_op = IB_WR_RDMA_WRITE; 202c06b540aSTom Tucker write_wr.wr_id = (unsigned long)ctxt; 203c06b540aSTom Tucker write_wr.sg_list = &sge[0]; 204c06b540aSTom Tucker write_wr.num_sge = sge_no; 205c06b540aSTom Tucker write_wr.opcode = IB_WR_RDMA_WRITE; 206c06b540aSTom Tucker write_wr.send_flags = IB_SEND_SIGNALED; 207c06b540aSTom Tucker write_wr.wr.rdma.rkey = rmr; 208c06b540aSTom Tucker write_wr.wr.rdma.remote_addr = to; 209c06b540aSTom Tucker 210c06b540aSTom Tucker /* Post It */ 211c06b540aSTom Tucker atomic_inc(&rdma_stat_write); 212c06b540aSTom Tucker if (svc_rdma_send(xprt, &write_wr)) { 213c06b540aSTom Tucker svc_rdma_put_context(ctxt, 1); 214c06b540aSTom Tucker /* Fatal error, close transport */ 215c06b540aSTom Tucker ret = -EIO; 216c06b540aSTom Tucker } 217c06b540aSTom Tucker svc_rdma_put_context(tmp_sge_ctxt, 0); 218c06b540aSTom Tucker return ret; 219c06b540aSTom Tucker } 220c06b540aSTom Tucker 221c06b540aSTom Tucker static int send_write_chunks(struct svcxprt_rdma *xprt, 222c06b540aSTom Tucker struct rpcrdma_msg *rdma_argp, 223c06b540aSTom Tucker struct rpcrdma_msg *rdma_resp, 224c06b540aSTom Tucker struct svc_rqst *rqstp, 225c06b540aSTom Tucker struct ib_sge *sge, 226c06b540aSTom Tucker int sge_count) 227c06b540aSTom Tucker { 228c06b540aSTom Tucker u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; 229c06b540aSTom Tucker int write_len; 230c06b540aSTom Tucker int max_write; 231c06b540aSTom Tucker u32 xdr_off; 232c06b540aSTom Tucker int chunk_off; 233c06b540aSTom Tucker int chunk_no; 234c06b540aSTom Tucker struct rpcrdma_write_array *arg_ary; 235c06b540aSTom Tucker struct rpcrdma_write_array *res_ary; 236c06b540aSTom Tucker int ret; 237c06b540aSTom Tucker 238c06b540aSTom Tucker arg_ary = svc_rdma_get_write_array(rdma_argp); 239c06b540aSTom Tucker if (!arg_ary) 240c06b540aSTom Tucker return 0; 241c06b540aSTom Tucker res_ary = (struct rpcrdma_write_array *) 242c06b540aSTom Tucker &rdma_resp->rm_body.rm_chunks[1]; 243c06b540aSTom Tucker 244c06b540aSTom Tucker max_write = xprt->sc_max_sge * PAGE_SIZE; 245c06b540aSTom Tucker 246c06b540aSTom Tucker /* Write chunks start at the pagelist */ 247c06b540aSTom Tucker for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; 248c06b540aSTom Tucker xfer_len && chunk_no < arg_ary->wc_nchunks; 249c06b540aSTom Tucker chunk_no++) { 250c06b540aSTom Tucker struct rpcrdma_segment *arg_ch; 251c06b540aSTom Tucker u64 rs_offset; 252c06b540aSTom Tucker 253c06b540aSTom Tucker arg_ch = &arg_ary->wc_array[chunk_no].wc_target; 254c06b540aSTom Tucker write_len = min(xfer_len, arg_ch->rs_length); 255c06b540aSTom Tucker 256c06b540aSTom Tucker /* Prepare the response chunk given the length actually 257c06b540aSTom Tucker * written */ 258c06b540aSTom Tucker rs_offset = get_unaligned(&(arg_ch->rs_offset)); 259c06b540aSTom Tucker svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, 260c06b540aSTom Tucker arg_ch->rs_handle, 261c06b540aSTom Tucker rs_offset, 262c06b540aSTom Tucker write_len); 263c06b540aSTom Tucker chunk_off = 0; 264c06b540aSTom Tucker while (write_len) { 265c06b540aSTom Tucker int this_write; 266c06b540aSTom Tucker this_write = min(write_len, max_write); 267c06b540aSTom Tucker ret = send_write(xprt, rqstp, 268c06b540aSTom Tucker arg_ch->rs_handle, 269c06b540aSTom Tucker rs_offset + chunk_off, 270c06b540aSTom Tucker xdr_off, 271c06b540aSTom Tucker this_write, 272c06b540aSTom Tucker sge, 273c06b540aSTom Tucker sge_count); 274c06b540aSTom Tucker if (ret) { 275c06b540aSTom Tucker dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 276c06b540aSTom Tucker ret); 277c06b540aSTom Tucker return -EIO; 278c06b540aSTom Tucker } 279c06b540aSTom Tucker chunk_off += this_write; 280c06b540aSTom Tucker xdr_off += this_write; 281c06b540aSTom Tucker xfer_len -= this_write; 282c06b540aSTom Tucker write_len -= this_write; 283c06b540aSTom Tucker } 284c06b540aSTom Tucker } 285c06b540aSTom Tucker /* Update the req with the number of chunks actually used */ 286c06b540aSTom Tucker svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no); 287c06b540aSTom Tucker 288c06b540aSTom Tucker return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; 289c06b540aSTom Tucker } 290c06b540aSTom Tucker 291c06b540aSTom Tucker static int send_reply_chunks(struct svcxprt_rdma *xprt, 292c06b540aSTom Tucker struct rpcrdma_msg *rdma_argp, 293c06b540aSTom Tucker struct rpcrdma_msg *rdma_resp, 294c06b540aSTom Tucker struct svc_rqst *rqstp, 295c06b540aSTom Tucker struct ib_sge *sge, 296c06b540aSTom Tucker int sge_count) 297c06b540aSTom Tucker { 298c06b540aSTom Tucker u32 xfer_len = rqstp->rq_res.len; 299c06b540aSTom Tucker int write_len; 300c06b540aSTom Tucker int max_write; 301c06b540aSTom Tucker u32 xdr_off; 302c06b540aSTom Tucker int chunk_no; 303c06b540aSTom Tucker int chunk_off; 304c06b540aSTom Tucker struct rpcrdma_segment *ch; 305c06b540aSTom Tucker struct rpcrdma_write_array *arg_ary; 306c06b540aSTom Tucker struct rpcrdma_write_array *res_ary; 307c06b540aSTom Tucker int ret; 308c06b540aSTom Tucker 309c06b540aSTom Tucker arg_ary = svc_rdma_get_reply_array(rdma_argp); 310c06b540aSTom Tucker if (!arg_ary) 311c06b540aSTom Tucker return 0; 312c06b540aSTom Tucker /* XXX: need to fix when reply lists occur with read-list and or 313c06b540aSTom Tucker * write-list */ 314c06b540aSTom Tucker res_ary = (struct rpcrdma_write_array *) 315c06b540aSTom Tucker &rdma_resp->rm_body.rm_chunks[2]; 316c06b540aSTom Tucker 317c06b540aSTom Tucker max_write = xprt->sc_max_sge * PAGE_SIZE; 318c06b540aSTom Tucker 319c06b540aSTom Tucker /* xdr offset starts at RPC message */ 320c06b540aSTom Tucker for (xdr_off = 0, chunk_no = 0; 321c06b540aSTom Tucker xfer_len && chunk_no < arg_ary->wc_nchunks; 322c06b540aSTom Tucker chunk_no++) { 323c06b540aSTom Tucker u64 rs_offset; 324c06b540aSTom Tucker ch = &arg_ary->wc_array[chunk_no].wc_target; 325c06b540aSTom Tucker write_len = min(xfer_len, ch->rs_length); 326c06b540aSTom Tucker 327c06b540aSTom Tucker 328c06b540aSTom Tucker /* Prepare the reply chunk given the length actually 329c06b540aSTom Tucker * written */ 330c06b540aSTom Tucker rs_offset = get_unaligned(&(ch->rs_offset)); 331c06b540aSTom Tucker svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, 332c06b540aSTom Tucker ch->rs_handle, rs_offset, 333c06b540aSTom Tucker write_len); 334c06b540aSTom Tucker chunk_off = 0; 335c06b540aSTom Tucker while (write_len) { 336c06b540aSTom Tucker int this_write; 337c06b540aSTom Tucker 338c06b540aSTom Tucker this_write = min(write_len, max_write); 339c06b540aSTom Tucker ret = send_write(xprt, rqstp, 340c06b540aSTom Tucker ch->rs_handle, 341c06b540aSTom Tucker rs_offset + chunk_off, 342c06b540aSTom Tucker xdr_off, 343c06b540aSTom Tucker this_write, 344c06b540aSTom Tucker sge, 345c06b540aSTom Tucker sge_count); 346c06b540aSTom Tucker if (ret) { 347c06b540aSTom Tucker dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 348c06b540aSTom Tucker ret); 349c06b540aSTom Tucker return -EIO; 350c06b540aSTom Tucker } 351c06b540aSTom Tucker chunk_off += this_write; 352c06b540aSTom Tucker xdr_off += this_write; 353c06b540aSTom Tucker xfer_len -= this_write; 354c06b540aSTom Tucker write_len -= this_write; 355c06b540aSTom Tucker } 356c06b540aSTom Tucker } 357c06b540aSTom Tucker /* Update the req with the number of chunks actually used */ 358c06b540aSTom Tucker svc_rdma_xdr_encode_reply_array(res_ary, chunk_no); 359c06b540aSTom Tucker 360c06b540aSTom Tucker return rqstp->rq_res.len; 361c06b540aSTom Tucker } 362c06b540aSTom Tucker 363c06b540aSTom Tucker /* This function prepares the portion of the RPCRDMA message to be 364c06b540aSTom Tucker * sent in the RDMA_SEND. This function is called after data sent via 365c06b540aSTom Tucker * RDMA has already been transmitted. There are three cases: 366c06b540aSTom Tucker * - The RPCRDMA header, RPC header, and payload are all sent in a 367c06b540aSTom Tucker * single RDMA_SEND. This is the "inline" case. 368c06b540aSTom Tucker * - The RPCRDMA header and some portion of the RPC header and data 369c06b540aSTom Tucker * are sent via this RDMA_SEND and another portion of the data is 370c06b540aSTom Tucker * sent via RDMA. 371c06b540aSTom Tucker * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC 372c06b540aSTom Tucker * header and data are all transmitted via RDMA. 373c06b540aSTom Tucker * In all three cases, this function prepares the RPCRDMA header in 374c06b540aSTom Tucker * sge[0], the 'type' parameter indicates the type to place in the 375c06b540aSTom Tucker * RPCRDMA header, and the 'byte_count' field indicates how much of 376c06b540aSTom Tucker * the XDR to include in this RDMA_SEND. 377c06b540aSTom Tucker */ 378c06b540aSTom Tucker static int send_reply(struct svcxprt_rdma *rdma, 379c06b540aSTom Tucker struct svc_rqst *rqstp, 380c06b540aSTom Tucker struct page *page, 381c06b540aSTom Tucker struct rpcrdma_msg *rdma_resp, 382c06b540aSTom Tucker struct svc_rdma_op_ctxt *ctxt, 383c06b540aSTom Tucker int sge_count, 384c06b540aSTom Tucker int byte_count) 385c06b540aSTom Tucker { 386c06b540aSTom Tucker struct ib_send_wr send_wr; 387c06b540aSTom Tucker int sge_no; 388c06b540aSTom Tucker int sge_bytes; 389c06b540aSTom Tucker int page_no; 390c06b540aSTom Tucker int ret; 391c06b540aSTom Tucker 3920e7f011aSTom Tucker /* Post a recv buffer to handle another request. */ 3930e7f011aSTom Tucker ret = svc_rdma_post_recv(rdma); 3940e7f011aSTom Tucker if (ret) { 3950e7f011aSTom Tucker printk(KERN_INFO 3960e7f011aSTom Tucker "svcrdma: could not post a receive buffer, err=%d." 3970e7f011aSTom Tucker "Closing transport %p.\n", ret, rdma); 3980e7f011aSTom Tucker set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 3990e7f011aSTom Tucker return 0; 4000e7f011aSTom Tucker } 4010e7f011aSTom Tucker 402c06b540aSTom Tucker /* Prepare the context */ 403c06b540aSTom Tucker ctxt->pages[0] = page; 404c06b540aSTom Tucker ctxt->count = 1; 405c06b540aSTom Tucker 406c06b540aSTom Tucker /* Prepare the SGE for the RPCRDMA Header */ 407c06b540aSTom Tucker ctxt->sge[0].addr = 408c06b540aSTom Tucker ib_dma_map_page(rdma->sc_cm_id->device, 409c06b540aSTom Tucker page, 0, PAGE_SIZE, DMA_TO_DEVICE); 410c06b540aSTom Tucker ctxt->direction = DMA_TO_DEVICE; 411c06b540aSTom Tucker ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 412c06b540aSTom Tucker ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; 413c06b540aSTom Tucker 414c06b540aSTom Tucker /* Determine how many of our SGE are to be transmitted */ 415c06b540aSTom Tucker for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) { 416c06b540aSTom Tucker sge_bytes = min((size_t)ctxt->sge[sge_no].length, 417c06b540aSTom Tucker (size_t)byte_count); 418c06b540aSTom Tucker byte_count -= sge_bytes; 419c06b540aSTom Tucker } 420c06b540aSTom Tucker BUG_ON(byte_count != 0); 421c06b540aSTom Tucker 422c06b540aSTom Tucker /* Save all respages in the ctxt and remove them from the 423c06b540aSTom Tucker * respages array. They are our pages until the I/O 424c06b540aSTom Tucker * completes. 425c06b540aSTom Tucker */ 426c06b540aSTom Tucker for (page_no = 0; page_no < rqstp->rq_resused; page_no++) { 427c06b540aSTom Tucker ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; 428c06b540aSTom Tucker ctxt->count++; 429c06b540aSTom Tucker rqstp->rq_respages[page_no] = NULL; 430c06b540aSTom Tucker } 431c06b540aSTom Tucker 432c06b540aSTom Tucker BUG_ON(sge_no > rdma->sc_max_sge); 433c06b540aSTom Tucker memset(&send_wr, 0, sizeof send_wr); 434c06b540aSTom Tucker ctxt->wr_op = IB_WR_SEND; 435c06b540aSTom Tucker send_wr.wr_id = (unsigned long)ctxt; 436c06b540aSTom Tucker send_wr.sg_list = ctxt->sge; 437c06b540aSTom Tucker send_wr.num_sge = sge_no; 438c06b540aSTom Tucker send_wr.opcode = IB_WR_SEND; 439c06b540aSTom Tucker send_wr.send_flags = IB_SEND_SIGNALED; 440c06b540aSTom Tucker 441c06b540aSTom Tucker ret = svc_rdma_send(rdma, &send_wr); 442c06b540aSTom Tucker if (ret) 443c06b540aSTom Tucker svc_rdma_put_context(ctxt, 1); 444c06b540aSTom Tucker 445c06b540aSTom Tucker return ret; 446c06b540aSTom Tucker } 447c06b540aSTom Tucker 448c06b540aSTom Tucker void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) 449c06b540aSTom Tucker { 450c06b540aSTom Tucker } 451c06b540aSTom Tucker 452c06b540aSTom Tucker /* 453c06b540aSTom Tucker * Return the start of an xdr buffer. 454c06b540aSTom Tucker */ 455c06b540aSTom Tucker static void *xdr_start(struct xdr_buf *xdr) 456c06b540aSTom Tucker { 457c06b540aSTom Tucker return xdr->head[0].iov_base - 458c06b540aSTom Tucker (xdr->len - 459c06b540aSTom Tucker xdr->page_len - 460c06b540aSTom Tucker xdr->tail[0].iov_len - 461c06b540aSTom Tucker xdr->head[0].iov_len); 462c06b540aSTom Tucker } 463c06b540aSTom Tucker 464c06b540aSTom Tucker int svc_rdma_sendto(struct svc_rqst *rqstp) 465c06b540aSTom Tucker { 466c06b540aSTom Tucker struct svc_xprt *xprt = rqstp->rq_xprt; 467c06b540aSTom Tucker struct svcxprt_rdma *rdma = 468c06b540aSTom Tucker container_of(xprt, struct svcxprt_rdma, sc_xprt); 469c06b540aSTom Tucker struct rpcrdma_msg *rdma_argp; 470c06b540aSTom Tucker struct rpcrdma_msg *rdma_resp; 471c06b540aSTom Tucker struct rpcrdma_write_array *reply_ary; 472c06b540aSTom Tucker enum rpcrdma_proc reply_type; 473c06b540aSTom Tucker int ret; 474c06b540aSTom Tucker int inline_bytes; 475c06b540aSTom Tucker struct ib_sge *sge; 476c06b540aSTom Tucker int sge_count = 0; 477c06b540aSTom Tucker struct page *res_page; 478c06b540aSTom Tucker struct svc_rdma_op_ctxt *ctxt; 479c06b540aSTom Tucker 480c06b540aSTom Tucker dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); 481c06b540aSTom Tucker 482c06b540aSTom Tucker /* Get the RDMA request header. */ 483c06b540aSTom Tucker rdma_argp = xdr_start(&rqstp->rq_arg); 484c06b540aSTom Tucker 485c06b540aSTom Tucker /* Build an SGE for the XDR */ 486c06b540aSTom Tucker ctxt = svc_rdma_get_context(rdma); 487c06b540aSTom Tucker ctxt->direction = DMA_TO_DEVICE; 488c06b540aSTom Tucker sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count); 489c06b540aSTom Tucker 490c06b540aSTom Tucker inline_bytes = rqstp->rq_res.len; 491c06b540aSTom Tucker 492c06b540aSTom Tucker /* Create the RDMA response header */ 493c06b540aSTom Tucker res_page = svc_rdma_get_page(); 494c06b540aSTom Tucker rdma_resp = page_address(res_page); 495c06b540aSTom Tucker reply_ary = svc_rdma_get_reply_array(rdma_argp); 496c06b540aSTom Tucker if (reply_ary) 497c06b540aSTom Tucker reply_type = RDMA_NOMSG; 498c06b540aSTom Tucker else 499c06b540aSTom Tucker reply_type = RDMA_MSG; 500c06b540aSTom Tucker svc_rdma_xdr_encode_reply_header(rdma, rdma_argp, 501c06b540aSTom Tucker rdma_resp, reply_type); 502c06b540aSTom Tucker 503c06b540aSTom Tucker /* Send any write-chunk data and build resp write-list */ 504c06b540aSTom Tucker ret = send_write_chunks(rdma, rdma_argp, rdma_resp, 505c06b540aSTom Tucker rqstp, sge, sge_count); 506c06b540aSTom Tucker if (ret < 0) { 507c06b540aSTom Tucker printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", 508c06b540aSTom Tucker ret); 509c06b540aSTom Tucker goto error; 510c06b540aSTom Tucker } 511c06b540aSTom Tucker inline_bytes -= ret; 512c06b540aSTom Tucker 513c06b540aSTom Tucker /* Send any reply-list data and update resp reply-list */ 514c06b540aSTom Tucker ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, 515c06b540aSTom Tucker rqstp, sge, sge_count); 516c06b540aSTom Tucker if (ret < 0) { 517c06b540aSTom Tucker printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", 518c06b540aSTom Tucker ret); 519c06b540aSTom Tucker goto error; 520c06b540aSTom Tucker } 521c06b540aSTom Tucker inline_bytes -= ret; 522c06b540aSTom Tucker 523c06b540aSTom Tucker ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count, 524c06b540aSTom Tucker inline_bytes); 525c06b540aSTom Tucker dprintk("svcrdma: send_reply returns %d\n", ret); 526c06b540aSTom Tucker return ret; 527c06b540aSTom Tucker error: 528c06b540aSTom Tucker svc_rdma_put_context(ctxt, 0); 529c06b540aSTom Tucker put_page(res_page); 530c06b540aSTom Tucker return ret; 531c06b540aSTom Tucker } 532