1c06b540aSTom Tucker /* 2c06b540aSTom Tucker * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 3c06b540aSTom Tucker * 4c06b540aSTom Tucker * This software is available to you under a choice of one of two 5c06b540aSTom Tucker * licenses. You may choose to be licensed under the terms of the GNU 6c06b540aSTom Tucker * General Public License (GPL) Version 2, available from the file 7c06b540aSTom Tucker * COPYING in the main directory of this source tree, or the BSD-type 8c06b540aSTom Tucker * license below: 9c06b540aSTom Tucker * 10c06b540aSTom Tucker * Redistribution and use in source and binary forms, with or without 11c06b540aSTom Tucker * modification, are permitted provided that the following conditions 12c06b540aSTom Tucker * are met: 13c06b540aSTom Tucker * 14c06b540aSTom Tucker * Redistributions of source code must retain the above copyright 15c06b540aSTom Tucker * notice, this list of conditions and the following disclaimer. 16c06b540aSTom Tucker * 17c06b540aSTom Tucker * Redistributions in binary form must reproduce the above 18c06b540aSTom Tucker * copyright notice, this list of conditions and the following 19c06b540aSTom Tucker * disclaimer in the documentation and/or other materials provided 20c06b540aSTom Tucker * with the distribution. 21c06b540aSTom Tucker * 22c06b540aSTom Tucker * Neither the name of the Network Appliance, Inc. nor the names of 23c06b540aSTom Tucker * its contributors may be used to endorse or promote products 24c06b540aSTom Tucker * derived from this software without specific prior written 25c06b540aSTom Tucker * permission. 26c06b540aSTom Tucker * 27c06b540aSTom Tucker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28c06b540aSTom Tucker * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29c06b540aSTom Tucker * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30c06b540aSTom Tucker * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31c06b540aSTom Tucker * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32c06b540aSTom Tucker * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33c06b540aSTom Tucker * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34c06b540aSTom Tucker * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35c06b540aSTom Tucker * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36c06b540aSTom Tucker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37c06b540aSTom Tucker * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38c06b540aSTom Tucker * 39c06b540aSTom Tucker * Author: Tom Tucker <tom@opengridcomputing.com> 40c06b540aSTom Tucker */ 41c06b540aSTom Tucker 42c06b540aSTom Tucker #include <linux/sunrpc/debug.h> 43c06b540aSTom Tucker #include <linux/sunrpc/rpc_rdma.h> 44c06b540aSTom Tucker #include <linux/spinlock.h> 45c06b540aSTom Tucker #include <asm/unaligned.h> 46c06b540aSTom Tucker #include <rdma/ib_verbs.h> 47c06b540aSTom Tucker #include <rdma/rdma_cm.h> 48c06b540aSTom Tucker #include <linux/sunrpc/svc_rdma.h> 49c06b540aSTom Tucker 50c06b540aSTom Tucker #define RPCDBG_FACILITY RPCDBG_SVCXPRT 51c06b540aSTom Tucker 52c06b540aSTom Tucker /* Encode an XDR as an array of IB SGE 53c06b540aSTom Tucker * 54c06b540aSTom Tucker * Assumptions: 55c06b540aSTom Tucker * - head[0] is physically contiguous. 56c06b540aSTom Tucker * - tail[0] is physically contiguous. 57c06b540aSTom Tucker * - pages[] is not physically or virtually contigous and consists of 58c06b540aSTom Tucker * PAGE_SIZE elements. 59c06b540aSTom Tucker * 60c06b540aSTom Tucker * Output: 61c06b540aSTom Tucker * SGE[0] reserved for RCPRDMA header 62c06b540aSTom Tucker * SGE[1] data from xdr->head[] 63c06b540aSTom Tucker * SGE[2..sge_count-2] data from xdr->pages[] 64c06b540aSTom Tucker * SGE[sge_count-1] data from xdr->tail. 65c06b540aSTom Tucker * 66c06b540aSTom Tucker */ 67c06b540aSTom Tucker static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, 68c06b540aSTom Tucker struct xdr_buf *xdr, 69c06b540aSTom Tucker struct ib_sge *sge, 70c06b540aSTom Tucker int *sge_count) 71c06b540aSTom Tucker { 72c06b540aSTom Tucker /* Max we need is the length of the XDR / pagesize + one for 73c06b540aSTom Tucker * head + one for tail + one for RPCRDMA header 74c06b540aSTom Tucker */ 75c06b540aSTom Tucker int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; 76c06b540aSTom Tucker int sge_no; 77c06b540aSTom Tucker u32 byte_count = xdr->len; 78c06b540aSTom Tucker u32 sge_bytes; 79c06b540aSTom Tucker u32 page_bytes; 80c06b540aSTom Tucker int page_off; 81c06b540aSTom Tucker int page_no; 82c06b540aSTom Tucker 83c06b540aSTom Tucker /* Skip the first sge, this is for the RPCRDMA header */ 84c06b540aSTom Tucker sge_no = 1; 85c06b540aSTom Tucker 86c06b540aSTom Tucker /* Head SGE */ 87c06b540aSTom Tucker sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, 88c06b540aSTom Tucker xdr->head[0].iov_base, 89c06b540aSTom Tucker xdr->head[0].iov_len, 90c06b540aSTom Tucker DMA_TO_DEVICE); 91c06b540aSTom Tucker sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len); 92c06b540aSTom Tucker byte_count -= sge_bytes; 93c06b540aSTom Tucker sge[sge_no].length = sge_bytes; 94c06b540aSTom Tucker sge[sge_no].lkey = xprt->sc_phys_mr->lkey; 95c06b540aSTom Tucker sge_no++; 96c06b540aSTom Tucker 97c06b540aSTom Tucker /* pages SGE */ 98c06b540aSTom Tucker page_no = 0; 99c06b540aSTom Tucker page_bytes = xdr->page_len; 100c06b540aSTom Tucker page_off = xdr->page_base; 101c06b540aSTom Tucker while (byte_count && page_bytes) { 102c06b540aSTom Tucker sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off)); 103c06b540aSTom Tucker sge[sge_no].addr = 104c06b540aSTom Tucker ib_dma_map_page(xprt->sc_cm_id->device, 105c06b540aSTom Tucker xdr->pages[page_no], page_off, 106c06b540aSTom Tucker sge_bytes, DMA_TO_DEVICE); 107c06b540aSTom Tucker sge_bytes = min(sge_bytes, page_bytes); 108c06b540aSTom Tucker byte_count -= sge_bytes; 109c06b540aSTom Tucker page_bytes -= sge_bytes; 110c06b540aSTom Tucker sge[sge_no].length = sge_bytes; 111c06b540aSTom Tucker sge[sge_no].lkey = xprt->sc_phys_mr->lkey; 112c06b540aSTom Tucker 113c06b540aSTom Tucker sge_no++; 114c06b540aSTom Tucker page_no++; 115c06b540aSTom Tucker page_off = 0; /* reset for next time through loop */ 116c06b540aSTom Tucker } 117c06b540aSTom Tucker 118c06b540aSTom Tucker /* Tail SGE */ 119c06b540aSTom Tucker if (byte_count && xdr->tail[0].iov_len) { 120c06b540aSTom Tucker sge[sge_no].addr = 121c06b540aSTom Tucker ib_dma_map_single(xprt->sc_cm_id->device, 122c06b540aSTom Tucker xdr->tail[0].iov_base, 123c06b540aSTom Tucker xdr->tail[0].iov_len, 124c06b540aSTom Tucker DMA_TO_DEVICE); 125c06b540aSTom Tucker sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len); 126c06b540aSTom Tucker byte_count -= sge_bytes; 127c06b540aSTom Tucker sge[sge_no].length = sge_bytes; 128c06b540aSTom Tucker sge[sge_no].lkey = xprt->sc_phys_mr->lkey; 129c06b540aSTom Tucker sge_no++; 130c06b540aSTom Tucker } 131c06b540aSTom Tucker 132c06b540aSTom Tucker BUG_ON(sge_no > sge_max); 133c06b540aSTom Tucker BUG_ON(byte_count != 0); 134c06b540aSTom Tucker 135c06b540aSTom Tucker *sge_count = sge_no; 136c06b540aSTom Tucker return sge; 137c06b540aSTom Tucker } 138c06b540aSTom Tucker 139c06b540aSTom Tucker 140c06b540aSTom Tucker /* Assumptions: 141c06b540aSTom Tucker * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE 142c06b540aSTom Tucker */ 143c06b540aSTom Tucker static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, 144c06b540aSTom Tucker u32 rmr, u64 to, 145c06b540aSTom Tucker u32 xdr_off, int write_len, 146c06b540aSTom Tucker struct ib_sge *xdr_sge, int sge_count) 147c06b540aSTom Tucker { 148c06b540aSTom Tucker struct svc_rdma_op_ctxt *tmp_sge_ctxt; 149c06b540aSTom Tucker struct ib_send_wr write_wr; 150c06b540aSTom Tucker struct ib_sge *sge; 151c06b540aSTom Tucker int xdr_sge_no; 152c06b540aSTom Tucker int sge_no; 153c06b540aSTom Tucker int sge_bytes; 154c06b540aSTom Tucker int sge_off; 155c06b540aSTom Tucker int bc; 156c06b540aSTom Tucker struct svc_rdma_op_ctxt *ctxt; 157c06b540aSTom Tucker int ret = 0; 158c06b540aSTom Tucker 159c06b540aSTom Tucker BUG_ON(sge_count >= 32); 160c06b540aSTom Tucker dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " 161c06b540aSTom Tucker "write_len=%d, xdr_sge=%p, sge_count=%d\n", 162c06b540aSTom Tucker rmr, to, xdr_off, write_len, xdr_sge, sge_count); 163c06b540aSTom Tucker 164c06b540aSTom Tucker ctxt = svc_rdma_get_context(xprt); 165c06b540aSTom Tucker ctxt->count = 0; 166c06b540aSTom Tucker tmp_sge_ctxt = svc_rdma_get_context(xprt); 167c06b540aSTom Tucker sge = tmp_sge_ctxt->sge; 168c06b540aSTom Tucker 169c06b540aSTom Tucker /* Find the SGE associated with xdr_off */ 170c06b540aSTom Tucker for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count; 171c06b540aSTom Tucker xdr_sge_no++) { 172c06b540aSTom Tucker if (xdr_sge[xdr_sge_no].length > bc) 173c06b540aSTom Tucker break; 174c06b540aSTom Tucker bc -= xdr_sge[xdr_sge_no].length; 175c06b540aSTom Tucker } 176c06b540aSTom Tucker 177c06b540aSTom Tucker sge_off = bc; 178c06b540aSTom Tucker bc = write_len; 179c06b540aSTom Tucker sge_no = 0; 180c06b540aSTom Tucker 181c06b540aSTom Tucker /* Copy the remaining SGE */ 182c06b540aSTom Tucker while (bc != 0 && xdr_sge_no < sge_count) { 183c06b540aSTom Tucker sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off; 184c06b540aSTom Tucker sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey; 185c06b540aSTom Tucker sge_bytes = min((size_t)bc, 186c06b540aSTom Tucker (size_t)(xdr_sge[xdr_sge_no].length-sge_off)); 187c06b540aSTom Tucker sge[sge_no].length = sge_bytes; 188c06b540aSTom Tucker 189c06b540aSTom Tucker sge_off = 0; 190c06b540aSTom Tucker sge_no++; 191c06b540aSTom Tucker xdr_sge_no++; 192c06b540aSTom Tucker bc -= sge_bytes; 193c06b540aSTom Tucker } 194c06b540aSTom Tucker 195c06b540aSTom Tucker BUG_ON(bc != 0); 196c06b540aSTom Tucker BUG_ON(xdr_sge_no > sge_count); 197c06b540aSTom Tucker 198c06b540aSTom Tucker /* Prepare WRITE WR */ 199c06b540aSTom Tucker memset(&write_wr, 0, sizeof write_wr); 200c06b540aSTom Tucker ctxt->wr_op = IB_WR_RDMA_WRITE; 201c06b540aSTom Tucker write_wr.wr_id = (unsigned long)ctxt; 202c06b540aSTom Tucker write_wr.sg_list = &sge[0]; 203c06b540aSTom Tucker write_wr.num_sge = sge_no; 204c06b540aSTom Tucker write_wr.opcode = IB_WR_RDMA_WRITE; 205c06b540aSTom Tucker write_wr.send_flags = IB_SEND_SIGNALED; 206c06b540aSTom Tucker write_wr.wr.rdma.rkey = rmr; 207c06b540aSTom Tucker write_wr.wr.rdma.remote_addr = to; 208c06b540aSTom Tucker 209c06b540aSTom Tucker /* Post It */ 210c06b540aSTom Tucker atomic_inc(&rdma_stat_write); 211c06b540aSTom Tucker if (svc_rdma_send(xprt, &write_wr)) { 212c06b540aSTom Tucker svc_rdma_put_context(ctxt, 1); 213c06b540aSTom Tucker /* Fatal error, close transport */ 214c06b540aSTom Tucker ret = -EIO; 215c06b540aSTom Tucker } 216c06b540aSTom Tucker svc_rdma_put_context(tmp_sge_ctxt, 0); 217c06b540aSTom Tucker return ret; 218c06b540aSTom Tucker } 219c06b540aSTom Tucker 220c06b540aSTom Tucker static int send_write_chunks(struct svcxprt_rdma *xprt, 221c06b540aSTom Tucker struct rpcrdma_msg *rdma_argp, 222c06b540aSTom Tucker struct rpcrdma_msg *rdma_resp, 223c06b540aSTom Tucker struct svc_rqst *rqstp, 224c06b540aSTom Tucker struct ib_sge *sge, 225c06b540aSTom Tucker int sge_count) 226c06b540aSTom Tucker { 227c06b540aSTom Tucker u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; 228c06b540aSTom Tucker int write_len; 229c06b540aSTom Tucker int max_write; 230c06b540aSTom Tucker u32 xdr_off; 231c06b540aSTom Tucker int chunk_off; 232c06b540aSTom Tucker int chunk_no; 233c06b540aSTom Tucker struct rpcrdma_write_array *arg_ary; 234c06b540aSTom Tucker struct rpcrdma_write_array *res_ary; 235c06b540aSTom Tucker int ret; 236c06b540aSTom Tucker 237c06b540aSTom Tucker arg_ary = svc_rdma_get_write_array(rdma_argp); 238c06b540aSTom Tucker if (!arg_ary) 239c06b540aSTom Tucker return 0; 240c06b540aSTom Tucker res_ary = (struct rpcrdma_write_array *) 241c06b540aSTom Tucker &rdma_resp->rm_body.rm_chunks[1]; 242c06b540aSTom Tucker 243c06b540aSTom Tucker max_write = xprt->sc_max_sge * PAGE_SIZE; 244c06b540aSTom Tucker 245c06b540aSTom Tucker /* Write chunks start at the pagelist */ 246c06b540aSTom Tucker for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; 247c06b540aSTom Tucker xfer_len && chunk_no < arg_ary->wc_nchunks; 248c06b540aSTom Tucker chunk_no++) { 249c06b540aSTom Tucker struct rpcrdma_segment *arg_ch; 250c06b540aSTom Tucker u64 rs_offset; 251c06b540aSTom Tucker 252c06b540aSTom Tucker arg_ch = &arg_ary->wc_array[chunk_no].wc_target; 253c06b540aSTom Tucker write_len = min(xfer_len, arg_ch->rs_length); 254c06b540aSTom Tucker 255c06b540aSTom Tucker /* Prepare the response chunk given the length actually 256c06b540aSTom Tucker * written */ 257c06b540aSTom Tucker rs_offset = get_unaligned(&(arg_ch->rs_offset)); 258c06b540aSTom Tucker svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, 259c06b540aSTom Tucker arg_ch->rs_handle, 260c06b540aSTom Tucker rs_offset, 261c06b540aSTom Tucker write_len); 262c06b540aSTom Tucker chunk_off = 0; 263c06b540aSTom Tucker while (write_len) { 264c06b540aSTom Tucker int this_write; 265c06b540aSTom Tucker this_write = min(write_len, max_write); 266c06b540aSTom Tucker ret = send_write(xprt, rqstp, 267c06b540aSTom Tucker arg_ch->rs_handle, 268c06b540aSTom Tucker rs_offset + chunk_off, 269c06b540aSTom Tucker xdr_off, 270c06b540aSTom Tucker this_write, 271c06b540aSTom Tucker sge, 272c06b540aSTom Tucker sge_count); 273c06b540aSTom Tucker if (ret) { 274c06b540aSTom Tucker dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 275c06b540aSTom Tucker ret); 276c06b540aSTom Tucker return -EIO; 277c06b540aSTom Tucker } 278c06b540aSTom Tucker chunk_off += this_write; 279c06b540aSTom Tucker xdr_off += this_write; 280c06b540aSTom Tucker xfer_len -= this_write; 281c06b540aSTom Tucker write_len -= this_write; 282c06b540aSTom Tucker } 283c06b540aSTom Tucker } 284c06b540aSTom Tucker /* Update the req with the number of chunks actually used */ 285c06b540aSTom Tucker svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no); 286c06b540aSTom Tucker 287c06b540aSTom Tucker return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; 288c06b540aSTom Tucker } 289c06b540aSTom Tucker 290c06b540aSTom Tucker static int send_reply_chunks(struct svcxprt_rdma *xprt, 291c06b540aSTom Tucker struct rpcrdma_msg *rdma_argp, 292c06b540aSTom Tucker struct rpcrdma_msg *rdma_resp, 293c06b540aSTom Tucker struct svc_rqst *rqstp, 294c06b540aSTom Tucker struct ib_sge *sge, 295c06b540aSTom Tucker int sge_count) 296c06b540aSTom Tucker { 297c06b540aSTom Tucker u32 xfer_len = rqstp->rq_res.len; 298c06b540aSTom Tucker int write_len; 299c06b540aSTom Tucker int max_write; 300c06b540aSTom Tucker u32 xdr_off; 301c06b540aSTom Tucker int chunk_no; 302c06b540aSTom Tucker int chunk_off; 303c06b540aSTom Tucker struct rpcrdma_segment *ch; 304c06b540aSTom Tucker struct rpcrdma_write_array *arg_ary; 305c06b540aSTom Tucker struct rpcrdma_write_array *res_ary; 306c06b540aSTom Tucker int ret; 307c06b540aSTom Tucker 308c06b540aSTom Tucker arg_ary = svc_rdma_get_reply_array(rdma_argp); 309c06b540aSTom Tucker if (!arg_ary) 310c06b540aSTom Tucker return 0; 311c06b540aSTom Tucker /* XXX: need to fix when reply lists occur with read-list and or 312c06b540aSTom Tucker * write-list */ 313c06b540aSTom Tucker res_ary = (struct rpcrdma_write_array *) 314c06b540aSTom Tucker &rdma_resp->rm_body.rm_chunks[2]; 315c06b540aSTom Tucker 316c06b540aSTom Tucker max_write = xprt->sc_max_sge * PAGE_SIZE; 317c06b540aSTom Tucker 318c06b540aSTom Tucker /* xdr offset starts at RPC message */ 319c06b540aSTom Tucker for (xdr_off = 0, chunk_no = 0; 320c06b540aSTom Tucker xfer_len && chunk_no < arg_ary->wc_nchunks; 321c06b540aSTom Tucker chunk_no++) { 322c06b540aSTom Tucker u64 rs_offset; 323c06b540aSTom Tucker ch = &arg_ary->wc_array[chunk_no].wc_target; 324c06b540aSTom Tucker write_len = min(xfer_len, ch->rs_length); 325c06b540aSTom Tucker 326c06b540aSTom Tucker 327c06b540aSTom Tucker /* Prepare the reply chunk given the length actually 328c06b540aSTom Tucker * written */ 329c06b540aSTom Tucker rs_offset = get_unaligned(&(ch->rs_offset)); 330c06b540aSTom Tucker svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, 331c06b540aSTom Tucker ch->rs_handle, rs_offset, 332c06b540aSTom Tucker write_len); 333c06b540aSTom Tucker chunk_off = 0; 334c06b540aSTom Tucker while (write_len) { 335c06b540aSTom Tucker int this_write; 336c06b540aSTom Tucker 337c06b540aSTom Tucker this_write = min(write_len, max_write); 338c06b540aSTom Tucker ret = send_write(xprt, rqstp, 339c06b540aSTom Tucker ch->rs_handle, 340c06b540aSTom Tucker rs_offset + chunk_off, 341c06b540aSTom Tucker xdr_off, 342c06b540aSTom Tucker this_write, 343c06b540aSTom Tucker sge, 344c06b540aSTom Tucker sge_count); 345c06b540aSTom Tucker if (ret) { 346c06b540aSTom Tucker dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 347c06b540aSTom Tucker ret); 348c06b540aSTom Tucker return -EIO; 349c06b540aSTom Tucker } 350c06b540aSTom Tucker chunk_off += this_write; 351c06b540aSTom Tucker xdr_off += this_write; 352c06b540aSTom Tucker xfer_len -= this_write; 353c06b540aSTom Tucker write_len -= this_write; 354c06b540aSTom Tucker } 355c06b540aSTom Tucker } 356c06b540aSTom Tucker /* Update the req with the number of chunks actually used */ 357c06b540aSTom Tucker svc_rdma_xdr_encode_reply_array(res_ary, chunk_no); 358c06b540aSTom Tucker 359c06b540aSTom Tucker return rqstp->rq_res.len; 360c06b540aSTom Tucker } 361c06b540aSTom Tucker 362c06b540aSTom Tucker /* This function prepares the portion of the RPCRDMA message to be 363c06b540aSTom Tucker * sent in the RDMA_SEND. This function is called after data sent via 364c06b540aSTom Tucker * RDMA has already been transmitted. There are three cases: 365c06b540aSTom Tucker * - The RPCRDMA header, RPC header, and payload are all sent in a 366c06b540aSTom Tucker * single RDMA_SEND. This is the "inline" case. 367c06b540aSTom Tucker * - The RPCRDMA header and some portion of the RPC header and data 368c06b540aSTom Tucker * are sent via this RDMA_SEND and another portion of the data is 369c06b540aSTom Tucker * sent via RDMA. 370c06b540aSTom Tucker * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC 371c06b540aSTom Tucker * header and data are all transmitted via RDMA. 372c06b540aSTom Tucker * In all three cases, this function prepares the RPCRDMA header in 373c06b540aSTom Tucker * sge[0], the 'type' parameter indicates the type to place in the 374c06b540aSTom Tucker * RPCRDMA header, and the 'byte_count' field indicates how much of 375c06b540aSTom Tucker * the XDR to include in this RDMA_SEND. 376c06b540aSTom Tucker */ 377c06b540aSTom Tucker static int send_reply(struct svcxprt_rdma *rdma, 378c06b540aSTom Tucker struct svc_rqst *rqstp, 379c06b540aSTom Tucker struct page *page, 380c06b540aSTom Tucker struct rpcrdma_msg *rdma_resp, 381c06b540aSTom Tucker struct svc_rdma_op_ctxt *ctxt, 382c06b540aSTom Tucker int sge_count, 383c06b540aSTom Tucker int byte_count) 384c06b540aSTom Tucker { 385c06b540aSTom Tucker struct ib_send_wr send_wr; 386c06b540aSTom Tucker int sge_no; 387c06b540aSTom Tucker int sge_bytes; 388c06b540aSTom Tucker int page_no; 389c06b540aSTom Tucker int ret; 390c06b540aSTom Tucker 391c06b540aSTom Tucker /* Prepare the context */ 392c06b540aSTom Tucker ctxt->pages[0] = page; 393c06b540aSTom Tucker ctxt->count = 1; 394c06b540aSTom Tucker 395c06b540aSTom Tucker /* Prepare the SGE for the RPCRDMA Header */ 396c06b540aSTom Tucker ctxt->sge[0].addr = 397c06b540aSTom Tucker ib_dma_map_page(rdma->sc_cm_id->device, 398c06b540aSTom Tucker page, 0, PAGE_SIZE, DMA_TO_DEVICE); 399c06b540aSTom Tucker ctxt->direction = DMA_TO_DEVICE; 400c06b540aSTom Tucker ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 401c06b540aSTom Tucker ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; 402c06b540aSTom Tucker 403c06b540aSTom Tucker /* Determine how many of our SGE are to be transmitted */ 404c06b540aSTom Tucker for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) { 405c06b540aSTom Tucker sge_bytes = min((size_t)ctxt->sge[sge_no].length, 406c06b540aSTom Tucker (size_t)byte_count); 407c06b540aSTom Tucker byte_count -= sge_bytes; 408c06b540aSTom Tucker } 409c06b540aSTom Tucker BUG_ON(byte_count != 0); 410c06b540aSTom Tucker 411c06b540aSTom Tucker /* Save all respages in the ctxt and remove them from the 412c06b540aSTom Tucker * respages array. They are our pages until the I/O 413c06b540aSTom Tucker * completes. 414c06b540aSTom Tucker */ 415c06b540aSTom Tucker for (page_no = 0; page_no < rqstp->rq_resused; page_no++) { 416c06b540aSTom Tucker ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; 417c06b540aSTom Tucker ctxt->count++; 418c06b540aSTom Tucker rqstp->rq_respages[page_no] = NULL; 419c06b540aSTom Tucker } 420c06b540aSTom Tucker 421c06b540aSTom Tucker BUG_ON(sge_no > rdma->sc_max_sge); 422c06b540aSTom Tucker memset(&send_wr, 0, sizeof send_wr); 423c06b540aSTom Tucker ctxt->wr_op = IB_WR_SEND; 424c06b540aSTom Tucker send_wr.wr_id = (unsigned long)ctxt; 425c06b540aSTom Tucker send_wr.sg_list = ctxt->sge; 426c06b540aSTom Tucker send_wr.num_sge = sge_no; 427c06b540aSTom Tucker send_wr.opcode = IB_WR_SEND; 428c06b540aSTom Tucker send_wr.send_flags = IB_SEND_SIGNALED; 429c06b540aSTom Tucker 430c06b540aSTom Tucker ret = svc_rdma_send(rdma, &send_wr); 431c06b540aSTom Tucker if (ret) 432c06b540aSTom Tucker svc_rdma_put_context(ctxt, 1); 433c06b540aSTom Tucker 434c06b540aSTom Tucker return ret; 435c06b540aSTom Tucker } 436c06b540aSTom Tucker 437c06b540aSTom Tucker void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) 438c06b540aSTom Tucker { 439c06b540aSTom Tucker } 440c06b540aSTom Tucker 441c06b540aSTom Tucker /* 442c06b540aSTom Tucker * Return the start of an xdr buffer. 443c06b540aSTom Tucker */ 444c06b540aSTom Tucker static void *xdr_start(struct xdr_buf *xdr) 445c06b540aSTom Tucker { 446c06b540aSTom Tucker return xdr->head[0].iov_base - 447c06b540aSTom Tucker (xdr->len - 448c06b540aSTom Tucker xdr->page_len - 449c06b540aSTom Tucker xdr->tail[0].iov_len - 450c06b540aSTom Tucker xdr->head[0].iov_len); 451c06b540aSTom Tucker } 452c06b540aSTom Tucker 453c06b540aSTom Tucker int svc_rdma_sendto(struct svc_rqst *rqstp) 454c06b540aSTom Tucker { 455c06b540aSTom Tucker struct svc_xprt *xprt = rqstp->rq_xprt; 456c06b540aSTom Tucker struct svcxprt_rdma *rdma = 457c06b540aSTom Tucker container_of(xprt, struct svcxprt_rdma, sc_xprt); 458c06b540aSTom Tucker struct rpcrdma_msg *rdma_argp; 459c06b540aSTom Tucker struct rpcrdma_msg *rdma_resp; 460c06b540aSTom Tucker struct rpcrdma_write_array *reply_ary; 461c06b540aSTom Tucker enum rpcrdma_proc reply_type; 462c06b540aSTom Tucker int ret; 463c06b540aSTom Tucker int inline_bytes; 464c06b540aSTom Tucker struct ib_sge *sge; 465c06b540aSTom Tucker int sge_count = 0; 466c06b540aSTom Tucker struct page *res_page; 467c06b540aSTom Tucker struct svc_rdma_op_ctxt *ctxt; 468c06b540aSTom Tucker 469c06b540aSTom Tucker dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); 470c06b540aSTom Tucker 471c06b540aSTom Tucker /* Get the RDMA request header. */ 472c06b540aSTom Tucker rdma_argp = xdr_start(&rqstp->rq_arg); 473c06b540aSTom Tucker 474c06b540aSTom Tucker /* Build an SGE for the XDR */ 475c06b540aSTom Tucker ctxt = svc_rdma_get_context(rdma); 476c06b540aSTom Tucker ctxt->direction = DMA_TO_DEVICE; 477c06b540aSTom Tucker sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count); 478c06b540aSTom Tucker 479c06b540aSTom Tucker inline_bytes = rqstp->rq_res.len; 480c06b540aSTom Tucker 481c06b540aSTom Tucker /* Create the RDMA response header */ 482c06b540aSTom Tucker res_page = svc_rdma_get_page(); 483c06b540aSTom Tucker rdma_resp = page_address(res_page); 484c06b540aSTom Tucker reply_ary = svc_rdma_get_reply_array(rdma_argp); 485c06b540aSTom Tucker if (reply_ary) 486c06b540aSTom Tucker reply_type = RDMA_NOMSG; 487c06b540aSTom Tucker else 488c06b540aSTom Tucker reply_type = RDMA_MSG; 489c06b540aSTom Tucker svc_rdma_xdr_encode_reply_header(rdma, rdma_argp, 490c06b540aSTom Tucker rdma_resp, reply_type); 491c06b540aSTom Tucker 492c06b540aSTom Tucker /* Send any write-chunk data and build resp write-list */ 493c06b540aSTom Tucker ret = send_write_chunks(rdma, rdma_argp, rdma_resp, 494c06b540aSTom Tucker rqstp, sge, sge_count); 495c06b540aSTom Tucker if (ret < 0) { 496c06b540aSTom Tucker printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", 497c06b540aSTom Tucker ret); 498c06b540aSTom Tucker goto error; 499c06b540aSTom Tucker } 500c06b540aSTom Tucker inline_bytes -= ret; 501c06b540aSTom Tucker 502c06b540aSTom Tucker /* Send any reply-list data and update resp reply-list */ 503c06b540aSTom Tucker ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, 504c06b540aSTom Tucker rqstp, sge, sge_count); 505c06b540aSTom Tucker if (ret < 0) { 506c06b540aSTom Tucker printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", 507c06b540aSTom Tucker ret); 508c06b540aSTom Tucker goto error; 509c06b540aSTom Tucker } 510c06b540aSTom Tucker inline_bytes -= ret; 511c06b540aSTom Tucker 512c06b540aSTom Tucker ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count, 513c06b540aSTom Tucker inline_bytes); 514c06b540aSTom Tucker dprintk("svcrdma: send_reply returns %d\n", ret); 515c06b540aSTom Tucker return ret; 516c06b540aSTom Tucker error: 517c06b540aSTom Tucker svc_rdma_put_context(ctxt, 0); 518c06b540aSTom Tucker put_page(res_page); 519c06b540aSTom Tucker return ret; 520c06b540aSTom Tucker } 521