1bcf3ffd4SChuck Lever // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2c06b540aSTom Tucker /* 3ecf85b23SChuck Lever * Copyright (c) 2016-2018 Oracle. All rights reserved. 40bf48289SSteve Wise * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 5c06b540aSTom Tucker * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 6c06b540aSTom Tucker * 7c06b540aSTom Tucker * This software is available to you under a choice of one of two 8c06b540aSTom Tucker * licenses. You may choose to be licensed under the terms of the GNU 9c06b540aSTom Tucker * General Public License (GPL) Version 2, available from the file 10c06b540aSTom Tucker * COPYING in the main directory of this source tree, or the BSD-type 11c06b540aSTom Tucker * license below: 12c06b540aSTom Tucker * 13c06b540aSTom Tucker * Redistribution and use in source and binary forms, with or without 14c06b540aSTom Tucker * modification, are permitted provided that the following conditions 15c06b540aSTom Tucker * are met: 16c06b540aSTom Tucker * 17c06b540aSTom Tucker * Redistributions of source code must retain the above copyright 18c06b540aSTom Tucker * notice, this list of conditions and the following disclaimer. 19c06b540aSTom Tucker * 20c06b540aSTom Tucker * Redistributions in binary form must reproduce the above 21c06b540aSTom Tucker * copyright notice, this list of conditions and the following 22c06b540aSTom Tucker * disclaimer in the documentation and/or other materials provided 23c06b540aSTom Tucker * with the distribution. 24c06b540aSTom Tucker * 25c06b540aSTom Tucker * Neither the name of the Network Appliance, Inc. nor the names of 26c06b540aSTom Tucker * its contributors may be used to endorse or promote products 27c06b540aSTom Tucker * derived from this software without specific prior written 28c06b540aSTom Tucker * permission. 29c06b540aSTom Tucker * 30c06b540aSTom Tucker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 31c06b540aSTom Tucker * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 32c06b540aSTom Tucker * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 33c06b540aSTom Tucker * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 34c06b540aSTom Tucker * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35c06b540aSTom Tucker * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 36c06b540aSTom Tucker * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37c06b540aSTom Tucker * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38c06b540aSTom Tucker * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39c06b540aSTom Tucker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 40c06b540aSTom Tucker * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41c06b540aSTom Tucker * 42c06b540aSTom Tucker * Author: Tom Tucker <tom@opengridcomputing.com> 43c06b540aSTom Tucker */ 44c06b540aSTom Tucker 459a6a180bSChuck Lever /* Operation 469a6a180bSChuck Lever * 479a6a180bSChuck Lever * The main entry point is svc_rdma_sendto. This is called by the 489a6a180bSChuck Lever * RPC server when an RPC Reply is ready to be transmitted to a client. 499a6a180bSChuck Lever * 509a6a180bSChuck Lever * The passed-in svc_rqst contains a struct xdr_buf which holds an 519a6a180bSChuck Lever * XDR-encoded RPC Reply message. sendto must construct the RPC-over-RDMA 529a6a180bSChuck Lever * transport header, post all Write WRs needed for this Reply, then post 539a6a180bSChuck Lever * a Send WR conveying the transport header and the RPC message itself to 549a6a180bSChuck Lever * the client. 559a6a180bSChuck Lever * 569a6a180bSChuck Lever * svc_rdma_sendto must fully transmit the Reply before returning, as 579a6a180bSChuck Lever * the svc_rqst will be recycled as soon as sendto returns. Remaining 589a6a180bSChuck Lever * resources referred to by the svc_rqst are also recycled at that time. 599a6a180bSChuck Lever * Therefore any resources that must remain longer must be detached 609a6a180bSChuck Lever * from the svc_rqst and released later. 619a6a180bSChuck Lever * 629a6a180bSChuck Lever * Page Management 639a6a180bSChuck Lever * 649a6a180bSChuck Lever * The I/O that performs Reply transmission is asynchronous, and may 659a6a180bSChuck Lever * complete well after sendto returns. Thus pages under I/O must be 669a6a180bSChuck Lever * removed from the svc_rqst before sendto returns. 679a6a180bSChuck Lever * 689a6a180bSChuck Lever * The logic here depends on Send Queue and completion ordering. Since 699a6a180bSChuck Lever * the Send WR is always posted last, it will always complete last. Thus 709a6a180bSChuck Lever * when it completes, it is guaranteed that all previous Write WRs have 719a6a180bSChuck Lever * also completed. 729a6a180bSChuck Lever * 739a6a180bSChuck Lever * Write WRs are constructed and posted. Each Write segment gets its own 749a6a180bSChuck Lever * svc_rdma_rw_ctxt, allowing the Write completion handler to find and 759a6a180bSChuck Lever * DMA-unmap the pages under I/O for that Write segment. The Write 769a6a180bSChuck Lever * completion handler does not release any pages. 779a6a180bSChuck Lever * 784201c746SChuck Lever * When the Send WR is constructed, it also gets its own svc_rdma_send_ctxt. 799a6a180bSChuck Lever * The ownership of all of the Reply's pages are transferred into that 809a6a180bSChuck Lever * ctxt, the Send WR is posted, and sendto returns. 819a6a180bSChuck Lever * 824201c746SChuck Lever * The svc_rdma_send_ctxt is presented when the Send WR completes. The 839a6a180bSChuck Lever * Send completion handler finally releases the Reply's pages. 849a6a180bSChuck Lever * 859a6a180bSChuck Lever * This mechanism also assumes that completions on the transport's Send 869a6a180bSChuck Lever * Completion Queue do not run in parallel. Otherwise a Write completion 879a6a180bSChuck Lever * and Send completion running at the same time could release pages that 889a6a180bSChuck Lever * are still DMA-mapped. 899a6a180bSChuck Lever * 909a6a180bSChuck Lever * Error Handling 919a6a180bSChuck Lever * 929a6a180bSChuck Lever * - If the Send WR is posted successfully, it will either complete 939a6a180bSChuck Lever * successfully, or get flushed. Either way, the Send completion 949a6a180bSChuck Lever * handler releases the Reply's pages. 959a6a180bSChuck Lever * - If the Send WR cannot be not posted, the forward path releases 969a6a180bSChuck Lever * the Reply's pages. 979a6a180bSChuck Lever * 989a6a180bSChuck Lever * This handles the case, without the use of page reference counting, 999a6a180bSChuck Lever * where two different Write segments send portions of the same page. 1009a6a180bSChuck Lever */ 1019a6a180bSChuck Lever 102c06b540aSTom Tucker #include <linux/spinlock.h> 103c06b540aSTom Tucker #include <asm/unaligned.h> 10498895edbSChuck Lever 105c06b540aSTom Tucker #include <rdma/ib_verbs.h> 106c06b540aSTom Tucker #include <rdma/rdma_cm.h> 10798895edbSChuck Lever 10898895edbSChuck Lever #include <linux/sunrpc/debug.h> 10998895edbSChuck Lever #include <linux/sunrpc/rpc_rdma.h> 110c06b540aSTom Tucker #include <linux/sunrpc/svc_rdma.h> 111c06b540aSTom Tucker 11298895edbSChuck Lever #include "xprt_rdma.h" 11398895edbSChuck Lever #include <trace/events/rpcrdma.h> 11498895edbSChuck Lever 115c06b540aSTom Tucker #define RPCDBG_FACILITY RPCDBG_SVCXPRT 116c06b540aSTom Tucker 1174201c746SChuck Lever static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc); 1184201c746SChuck Lever 1194201c746SChuck Lever static inline struct svc_rdma_send_ctxt * 1204201c746SChuck Lever svc_rdma_next_send_ctxt(struct list_head *list) 1214201c746SChuck Lever { 1224201c746SChuck Lever return list_first_entry_or_null(list, struct svc_rdma_send_ctxt, 1234201c746SChuck Lever sc_list); 1244201c746SChuck Lever } 1254201c746SChuck Lever 1264201c746SChuck Lever static struct svc_rdma_send_ctxt * 1274201c746SChuck Lever svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) 1284201c746SChuck Lever { 1294201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt; 13025fd86ecSChuck Lever size_t size; 1314201c746SChuck Lever int i; 1324201c746SChuck Lever 13325fd86ecSChuck Lever size = sizeof(*ctxt); 13425fd86ecSChuck Lever size += rdma->sc_max_send_sges * sizeof(struct ib_sge); 13525fd86ecSChuck Lever ctxt = kmalloc(size, GFP_KERNEL); 1364201c746SChuck Lever if (!ctxt) 1374201c746SChuck Lever return NULL; 1384201c746SChuck Lever 1394201c746SChuck Lever ctxt->sc_cqe.done = svc_rdma_wc_send; 1404201c746SChuck Lever ctxt->sc_send_wr.next = NULL; 1414201c746SChuck Lever ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; 1424201c746SChuck Lever ctxt->sc_send_wr.sg_list = ctxt->sc_sges; 1434201c746SChuck Lever ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; 14425fd86ecSChuck Lever for (i = 0; i < rdma->sc_max_send_sges; i++) 1454201c746SChuck Lever ctxt->sc_sges[i].lkey = rdma->sc_pd->local_dma_lkey; 1464201c746SChuck Lever return ctxt; 1474201c746SChuck Lever } 1484201c746SChuck Lever 1494201c746SChuck Lever /** 1504201c746SChuck Lever * svc_rdma_send_ctxts_destroy - Release all send_ctxt's for an xprt 1514201c746SChuck Lever * @rdma: svcxprt_rdma being torn down 1524201c746SChuck Lever * 1534201c746SChuck Lever */ 1544201c746SChuck Lever void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma) 1554201c746SChuck Lever { 1564201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt; 1574201c746SChuck Lever 1584201c746SChuck Lever while ((ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts))) { 1594201c746SChuck Lever list_del(&ctxt->sc_list); 1604201c746SChuck Lever kfree(ctxt); 1614201c746SChuck Lever } 1624201c746SChuck Lever } 1634201c746SChuck Lever 1644201c746SChuck Lever /** 1654201c746SChuck Lever * svc_rdma_send_ctxt_get - Get a free send_ctxt 1664201c746SChuck Lever * @rdma: controlling svcxprt_rdma 1674201c746SChuck Lever * 1684201c746SChuck Lever * Returns a ready-to-use send_ctxt, or NULL if none are 1694201c746SChuck Lever * available and a fresh one cannot be allocated. 1704201c746SChuck Lever */ 1714201c746SChuck Lever struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma) 1724201c746SChuck Lever { 1734201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt; 1744201c746SChuck Lever 1754201c746SChuck Lever spin_lock(&rdma->sc_send_lock); 1764201c746SChuck Lever ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts); 1774201c746SChuck Lever if (!ctxt) 1784201c746SChuck Lever goto out_empty; 1794201c746SChuck Lever list_del(&ctxt->sc_list); 1804201c746SChuck Lever spin_unlock(&rdma->sc_send_lock); 1814201c746SChuck Lever 1824201c746SChuck Lever out: 1834201c746SChuck Lever ctxt->sc_send_wr.num_sge = 0; 1844201c746SChuck Lever ctxt->sc_page_count = 0; 1854201c746SChuck Lever return ctxt; 1864201c746SChuck Lever 1874201c746SChuck Lever out_empty: 1884201c746SChuck Lever spin_unlock(&rdma->sc_send_lock); 1894201c746SChuck Lever ctxt = svc_rdma_send_ctxt_alloc(rdma); 1904201c746SChuck Lever if (!ctxt) 1914201c746SChuck Lever return NULL; 1924201c746SChuck Lever goto out; 1934201c746SChuck Lever } 1944201c746SChuck Lever 1954201c746SChuck Lever /** 1964201c746SChuck Lever * svc_rdma_send_ctxt_put - Return send_ctxt to free list 1974201c746SChuck Lever * @rdma: controlling svcxprt_rdma 1984201c746SChuck Lever * @ctxt: object to return to the free list 1994201c746SChuck Lever * 2004201c746SChuck Lever * Pages left in sc_pages are DMA unmapped and released. 2014201c746SChuck Lever */ 2024201c746SChuck Lever void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, 2034201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt) 2044201c746SChuck Lever { 2054201c746SChuck Lever struct ib_device *device = rdma->sc_cm_id->device; 2064201c746SChuck Lever unsigned int i; 2074201c746SChuck Lever 2084201c746SChuck Lever for (i = 0; i < ctxt->sc_send_wr.num_sge; i++) 2094201c746SChuck Lever ib_dma_unmap_page(device, 2104201c746SChuck Lever ctxt->sc_sges[i].addr, 2114201c746SChuck Lever ctxt->sc_sges[i].length, 2124201c746SChuck Lever DMA_TO_DEVICE); 2134201c746SChuck Lever 2144201c746SChuck Lever for (i = 0; i < ctxt->sc_page_count; ++i) 2154201c746SChuck Lever put_page(ctxt->sc_pages[i]); 2164201c746SChuck Lever 2174201c746SChuck Lever spin_lock(&rdma->sc_send_lock); 2184201c746SChuck Lever list_add(&ctxt->sc_list, &rdma->sc_send_ctxts); 2194201c746SChuck Lever spin_unlock(&rdma->sc_send_lock); 2204201c746SChuck Lever } 2214201c746SChuck Lever 2224201c746SChuck Lever /** 2234201c746SChuck Lever * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC 2244201c746SChuck Lever * @cq: Completion Queue context 2254201c746SChuck Lever * @wc: Work Completion object 2264201c746SChuck Lever * 2274201c746SChuck Lever * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that 2284201c746SChuck Lever * the Send completion handler could be running. 2294201c746SChuck Lever */ 2304201c746SChuck Lever static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) 2314201c746SChuck Lever { 2324201c746SChuck Lever struct svcxprt_rdma *rdma = cq->cq_context; 2334201c746SChuck Lever struct ib_cqe *cqe = wc->wr_cqe; 2344201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt; 2354201c746SChuck Lever 2364201c746SChuck Lever trace_svcrdma_wc_send(wc); 2374201c746SChuck Lever 2384201c746SChuck Lever atomic_inc(&rdma->sc_sq_avail); 2394201c746SChuck Lever wake_up(&rdma->sc_send_wait); 2404201c746SChuck Lever 2414201c746SChuck Lever ctxt = container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); 2424201c746SChuck Lever svc_rdma_send_ctxt_put(rdma, ctxt); 2434201c746SChuck Lever 2444201c746SChuck Lever if (unlikely(wc->status != IB_WC_SUCCESS)) { 2454201c746SChuck Lever set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 2464201c746SChuck Lever svc_xprt_enqueue(&rdma->sc_xprt); 2474201c746SChuck Lever if (wc->status != IB_WC_WR_FLUSH_ERR) 2484201c746SChuck Lever pr_err("svcrdma: Send: %s (%u/0x%x)\n", 2494201c746SChuck Lever ib_wc_status_msg(wc->status), 2504201c746SChuck Lever wc->status, wc->vendor_err); 2514201c746SChuck Lever } 2524201c746SChuck Lever 2534201c746SChuck Lever svc_xprt_put(&rdma->sc_xprt); 2544201c746SChuck Lever } 2554201c746SChuck Lever 2563abb03faSChuck Lever /** 2573abb03faSChuck Lever * svc_rdma_send - Post a single Send WR 2583abb03faSChuck Lever * @rdma: transport on which to post the WR 2593abb03faSChuck Lever * @wr: prepared Send WR to post 2603abb03faSChuck Lever * 2613abb03faSChuck Lever * Returns zero the Send WR was posted successfully. Otherwise, a 2623abb03faSChuck Lever * negative errno is returned. 2633abb03faSChuck Lever */ 2644201c746SChuck Lever int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) 2654201c746SChuck Lever { 2663abb03faSChuck Lever struct ib_send_wr *bad_wr; 2674201c746SChuck Lever int ret; 2684201c746SChuck Lever 2693abb03faSChuck Lever might_sleep(); 2704201c746SChuck Lever 2714201c746SChuck Lever /* If the SQ is full, wait until an SQ entry is available */ 2724201c746SChuck Lever while (1) { 2733abb03faSChuck Lever if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) { 2744201c746SChuck Lever atomic_inc(&rdma_stat_sq_starve); 2754201c746SChuck Lever trace_svcrdma_sq_full(rdma); 2763abb03faSChuck Lever atomic_inc(&rdma->sc_sq_avail); 2774201c746SChuck Lever wait_event(rdma->sc_send_wait, 2783abb03faSChuck Lever atomic_read(&rdma->sc_sq_avail) > 1); 2794201c746SChuck Lever if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) 2804201c746SChuck Lever return -ENOTCONN; 2814201c746SChuck Lever trace_svcrdma_sq_retry(rdma); 2824201c746SChuck Lever continue; 2834201c746SChuck Lever } 2844201c746SChuck Lever 2853abb03faSChuck Lever svc_xprt_get(&rdma->sc_xprt); 2864201c746SChuck Lever ret = ib_post_send(rdma->sc_qp, wr, &bad_wr); 2874201c746SChuck Lever trace_svcrdma_post_send(wr, ret); 2884201c746SChuck Lever if (ret) { 2894201c746SChuck Lever set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 2904201c746SChuck Lever svc_xprt_put(&rdma->sc_xprt); 2914201c746SChuck Lever wake_up(&rdma->sc_send_wait); 2924201c746SChuck Lever } 2934201c746SChuck Lever break; 2944201c746SChuck Lever } 2954201c746SChuck Lever return ret; 2964201c746SChuck Lever } 2974201c746SChuck Lever 298cf570a93SChuck Lever static u32 xdr_padsize(u32 len) 299cf570a93SChuck Lever { 300cf570a93SChuck Lever return (len & 3) ? (4 - (len & 3)) : 0; 301cf570a93SChuck Lever } 302cf570a93SChuck Lever 3039a6a180bSChuck Lever /* Returns length of transport header, in bytes. 3049a6a180bSChuck Lever */ 3059a6a180bSChuck Lever static unsigned int svc_rdma_reply_hdr_len(__be32 *rdma_resp) 3069a6a180bSChuck Lever { 3079a6a180bSChuck Lever unsigned int nsegs; 3089a6a180bSChuck Lever __be32 *p; 3099a6a180bSChuck Lever 3109a6a180bSChuck Lever p = rdma_resp; 3119a6a180bSChuck Lever 3129a6a180bSChuck Lever /* RPC-over-RDMA V1 replies never have a Read list. */ 3139a6a180bSChuck Lever p += rpcrdma_fixed_maxsz + 1; 3149a6a180bSChuck Lever 3159a6a180bSChuck Lever /* Skip Write list. */ 3169a6a180bSChuck Lever while (*p++ != xdr_zero) { 3179a6a180bSChuck Lever nsegs = be32_to_cpup(p++); 3189a6a180bSChuck Lever p += nsegs * rpcrdma_segment_maxsz; 3199a6a180bSChuck Lever } 3209a6a180bSChuck Lever 3219a6a180bSChuck Lever /* Skip Reply chunk. */ 3229a6a180bSChuck Lever if (*p++ != xdr_zero) { 3239a6a180bSChuck Lever nsegs = be32_to_cpup(p++); 3249a6a180bSChuck Lever p += nsegs * rpcrdma_segment_maxsz; 3259a6a180bSChuck Lever } 3269a6a180bSChuck Lever 3279a6a180bSChuck Lever return (unsigned long)p - (unsigned long)rdma_resp; 3289a6a180bSChuck Lever } 3299a6a180bSChuck Lever 3309a6a180bSChuck Lever /* One Write chunk is copied from Call transport header to Reply 3319a6a180bSChuck Lever * transport header. Each segment's length field is updated to 3329a6a180bSChuck Lever * reflect number of bytes consumed in the segment. 3339a6a180bSChuck Lever * 3349a6a180bSChuck Lever * Returns number of segments in this chunk. 3359a6a180bSChuck Lever */ 3369a6a180bSChuck Lever static unsigned int xdr_encode_write_chunk(__be32 *dst, __be32 *src, 3379a6a180bSChuck Lever unsigned int remaining) 3389a6a180bSChuck Lever { 3399a6a180bSChuck Lever unsigned int i, nsegs; 3409a6a180bSChuck Lever u32 seg_len; 3419a6a180bSChuck Lever 3429a6a180bSChuck Lever /* Write list discriminator */ 3439a6a180bSChuck Lever *dst++ = *src++; 3449a6a180bSChuck Lever 3459a6a180bSChuck Lever /* number of segments in this chunk */ 3469a6a180bSChuck Lever nsegs = be32_to_cpup(src); 3479a6a180bSChuck Lever *dst++ = *src++; 3489a6a180bSChuck Lever 3499a6a180bSChuck Lever for (i = nsegs; i; i--) { 3509a6a180bSChuck Lever /* segment's RDMA handle */ 3519a6a180bSChuck Lever *dst++ = *src++; 3529a6a180bSChuck Lever 3539a6a180bSChuck Lever /* bytes returned in this segment */ 3549a6a180bSChuck Lever seg_len = be32_to_cpu(*src); 3559a6a180bSChuck Lever if (remaining >= seg_len) { 3569a6a180bSChuck Lever /* entire segment was consumed */ 3579a6a180bSChuck Lever *dst = *src; 3589a6a180bSChuck Lever remaining -= seg_len; 3599a6a180bSChuck Lever } else { 3609a6a180bSChuck Lever /* segment only partly filled */ 3619a6a180bSChuck Lever *dst = cpu_to_be32(remaining); 3629a6a180bSChuck Lever remaining = 0; 3639a6a180bSChuck Lever } 3649a6a180bSChuck Lever dst++; src++; 3659a6a180bSChuck Lever 3669a6a180bSChuck Lever /* segment's RDMA offset */ 3679a6a180bSChuck Lever *dst++ = *src++; 3689a6a180bSChuck Lever *dst++ = *src++; 3699a6a180bSChuck Lever } 3709a6a180bSChuck Lever 3719a6a180bSChuck Lever return nsegs; 3729a6a180bSChuck Lever } 3739a6a180bSChuck Lever 3749a6a180bSChuck Lever /* The client provided a Write list in the Call message. Fill in 3759a6a180bSChuck Lever * the segments in the first Write chunk in the Reply's transport 3769a6a180bSChuck Lever * header with the number of bytes consumed in each segment. 3779a6a180bSChuck Lever * Remaining chunks are returned unused. 3789a6a180bSChuck Lever * 3799a6a180bSChuck Lever * Assumptions: 3809a6a180bSChuck Lever * - Client has provided only one Write chunk 3819a6a180bSChuck Lever */ 3829a6a180bSChuck Lever static void svc_rdma_xdr_encode_write_list(__be32 *rdma_resp, __be32 *wr_ch, 3839a6a180bSChuck Lever unsigned int consumed) 3849a6a180bSChuck Lever { 3859a6a180bSChuck Lever unsigned int nsegs; 3869a6a180bSChuck Lever __be32 *p, *q; 3879a6a180bSChuck Lever 3889a6a180bSChuck Lever /* RPC-over-RDMA V1 replies never have a Read list. */ 3899a6a180bSChuck Lever p = rdma_resp + rpcrdma_fixed_maxsz + 1; 3909a6a180bSChuck Lever 3919a6a180bSChuck Lever q = wr_ch; 3929a6a180bSChuck Lever while (*q != xdr_zero) { 3939a6a180bSChuck Lever nsegs = xdr_encode_write_chunk(p, q, consumed); 3949a6a180bSChuck Lever q += 2 + nsegs * rpcrdma_segment_maxsz; 3959a6a180bSChuck Lever p += 2 + nsegs * rpcrdma_segment_maxsz; 3969a6a180bSChuck Lever consumed = 0; 3979a6a180bSChuck Lever } 3989a6a180bSChuck Lever 3999a6a180bSChuck Lever /* Terminate Write list */ 4009a6a180bSChuck Lever *p++ = xdr_zero; 4019a6a180bSChuck Lever 4029a6a180bSChuck Lever /* Reply chunk discriminator; may be replaced later */ 4039a6a180bSChuck Lever *p = xdr_zero; 4049a6a180bSChuck Lever } 4059a6a180bSChuck Lever 4069a6a180bSChuck Lever /* The client provided a Reply chunk in the Call message. Fill in 4079a6a180bSChuck Lever * the segments in the Reply chunk in the Reply message with the 4089a6a180bSChuck Lever * number of bytes consumed in each segment. 4099a6a180bSChuck Lever * 4109a6a180bSChuck Lever * Assumptions: 4119a6a180bSChuck Lever * - Reply can always fit in the provided Reply chunk 4129a6a180bSChuck Lever */ 4139a6a180bSChuck Lever static void svc_rdma_xdr_encode_reply_chunk(__be32 *rdma_resp, __be32 *rp_ch, 4149a6a180bSChuck Lever unsigned int consumed) 4159a6a180bSChuck Lever { 4169a6a180bSChuck Lever __be32 *p; 4179a6a180bSChuck Lever 4189a6a180bSChuck Lever /* Find the Reply chunk in the Reply's xprt header. 4199a6a180bSChuck Lever * RPC-over-RDMA V1 replies never have a Read list. 4209a6a180bSChuck Lever */ 4219a6a180bSChuck Lever p = rdma_resp + rpcrdma_fixed_maxsz + 1; 4229a6a180bSChuck Lever 4239a6a180bSChuck Lever /* Skip past Write list */ 4249a6a180bSChuck Lever while (*p++ != xdr_zero) 4259a6a180bSChuck Lever p += 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz; 4269a6a180bSChuck Lever 4279a6a180bSChuck Lever xdr_encode_write_chunk(p, rp_ch, consumed); 4289a6a180bSChuck Lever } 4299a6a180bSChuck Lever 4305fdca653SChuck Lever /* Parse the RPC Call's transport header. 43110dc4512SChuck Lever */ 4329a6a180bSChuck Lever static void svc_rdma_get_write_arrays(__be32 *rdma_argp, 4339a6a180bSChuck Lever __be32 **write, __be32 **reply) 43410dc4512SChuck Lever { 4355fdca653SChuck Lever __be32 *p; 43610dc4512SChuck Lever 4379a6a180bSChuck Lever p = rdma_argp + rpcrdma_fixed_maxsz; 4385fdca653SChuck Lever 4395fdca653SChuck Lever /* Read list */ 4405fdca653SChuck Lever while (*p++ != xdr_zero) 4415fdca653SChuck Lever p += 5; 4425fdca653SChuck Lever 4435fdca653SChuck Lever /* Write list */ 4445fdca653SChuck Lever if (*p != xdr_zero) { 4459a6a180bSChuck Lever *write = p; 4465fdca653SChuck Lever while (*p++ != xdr_zero) 4475fdca653SChuck Lever p += 1 + be32_to_cpu(*p) * 4; 4485fdca653SChuck Lever } else { 4495fdca653SChuck Lever *write = NULL; 4505fdca653SChuck Lever p++; 45110dc4512SChuck Lever } 45210dc4512SChuck Lever 4535fdca653SChuck Lever /* Reply chunk */ 4545fdca653SChuck Lever if (*p != xdr_zero) 4559a6a180bSChuck Lever *reply = p; 4565fdca653SChuck Lever else 4575fdca653SChuck Lever *reply = NULL; 45810dc4512SChuck Lever } 45910dc4512SChuck Lever 46025d55296SChuck Lever /* RPC-over-RDMA Version One private extension: Remote Invalidation. 46125d55296SChuck Lever * Responder's choice: requester signals it can handle Send With 46225d55296SChuck Lever * Invalidate, and responder chooses one rkey to invalidate. 46325d55296SChuck Lever * 46425d55296SChuck Lever * Find a candidate rkey to invalidate when sending a reply. Picks the 465c238c4c0SChuck Lever * first R_key it finds in the chunk lists. 46625d55296SChuck Lever * 46725d55296SChuck Lever * Returns zero if RPC's chunk lists are empty. 46825d55296SChuck Lever */ 469c238c4c0SChuck Lever static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp, 470c238c4c0SChuck Lever __be32 *wr_lst, __be32 *rp_ch) 47125d55296SChuck Lever { 472c238c4c0SChuck Lever __be32 *p; 47325d55296SChuck Lever 474c238c4c0SChuck Lever p = rdma_argp + rpcrdma_fixed_maxsz; 475c238c4c0SChuck Lever if (*p != xdr_zero) 476c238c4c0SChuck Lever p += 2; 477c238c4c0SChuck Lever else if (wr_lst && be32_to_cpup(wr_lst + 1)) 478c238c4c0SChuck Lever p = wr_lst + 2; 479c238c4c0SChuck Lever else if (rp_ch && be32_to_cpup(rp_ch + 1)) 480c238c4c0SChuck Lever p = rp_ch + 2; 481c238c4c0SChuck Lever else 482fafedf81SChuck Lever return 0; 483c238c4c0SChuck Lever return be32_to_cpup(p); 48425d55296SChuck Lever } 48525d55296SChuck Lever 4866e6092caSChuck Lever static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, 4874201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt, 4886e6092caSChuck Lever struct page *page, 489f016f305SChuck Lever unsigned long offset, 4906e6092caSChuck Lever unsigned int len) 4916e6092caSChuck Lever { 4926e6092caSChuck Lever struct ib_device *dev = rdma->sc_cm_id->device; 4936e6092caSChuck Lever dma_addr_t dma_addr; 4946e6092caSChuck Lever 4956e6092caSChuck Lever dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE); 4966e6092caSChuck Lever if (ib_dma_mapping_error(dev, dma_addr)) 49791a08eaeSChuck Lever goto out_maperr; 4986e6092caSChuck Lever 49925fd86ecSChuck Lever ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr; 50025fd86ecSChuck Lever ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len; 5014201c746SChuck Lever ctxt->sc_send_wr.num_sge++; 5026e6092caSChuck Lever return 0; 50391a08eaeSChuck Lever 50491a08eaeSChuck Lever out_maperr: 505bd2abef3SChuck Lever trace_svcrdma_dma_map_page(rdma, page); 50691a08eaeSChuck Lever return -EIO; 5076e6092caSChuck Lever } 5086e6092caSChuck Lever 509f016f305SChuck Lever /* ib_dma_map_page() is used here because svc_rdma_dma_unmap() 510f016f305SChuck Lever * handles DMA-unmap and it uses ib_dma_unmap_page() exclusively. 511f016f305SChuck Lever */ 512f016f305SChuck Lever static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, 5134201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt, 514f016f305SChuck Lever unsigned char *base, 515f016f305SChuck Lever unsigned int len) 516f016f305SChuck Lever { 51725fd86ecSChuck Lever return svc_rdma_dma_map_page(rdma, ctxt, virt_to_page(base), 518f016f305SChuck Lever offset_in_page(base), len); 519f016f305SChuck Lever } 520f016f305SChuck Lever 5216e6092caSChuck Lever /** 5226e6092caSChuck Lever * svc_rdma_map_reply_hdr - DMA map the transport header buffer 5236e6092caSChuck Lever * @rdma: controlling transport 5246e6092caSChuck Lever * @ctxt: op_ctxt for the Send WR 5256e6092caSChuck Lever * @rdma_resp: buffer containing transport header 5266e6092caSChuck Lever * @len: length of transport header 5276e6092caSChuck Lever * 5286e6092caSChuck Lever * Returns: 5296e6092caSChuck Lever * %0 if the header is DMA mapped, 5306e6092caSChuck Lever * %-EIO if DMA mapping failed. 5316e6092caSChuck Lever */ 5326e6092caSChuck Lever int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma, 5334201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt, 5346e6092caSChuck Lever __be32 *rdma_resp, 5356e6092caSChuck Lever unsigned int len) 5366e6092caSChuck Lever { 5374201c746SChuck Lever ctxt->sc_pages[0] = virt_to_page(rdma_resp); 5384201c746SChuck Lever ctxt->sc_page_count++; 53925fd86ecSChuck Lever ctxt->sc_cur_sge_no = 0; 54025fd86ecSChuck Lever return svc_rdma_dma_map_page(rdma, ctxt, ctxt->sc_pages[0], 0, len); 5416e6092caSChuck Lever } 5426e6092caSChuck Lever 5439a6a180bSChuck Lever /* Load the xdr_buf into the ctxt's sge array, and DMA map each 5449a6a180bSChuck Lever * element as it is added. 5459a6a180bSChuck Lever * 54623262790SChuck Lever * Returns zero on success, or a negative errno on failure. 547c06b540aSTom Tucker */ 5489a6a180bSChuck Lever static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, 5494201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt, 5509a6a180bSChuck Lever struct xdr_buf *xdr, __be32 *wr_lst) 551c06b540aSTom Tucker { 55225fd86ecSChuck Lever unsigned int len, remaining; 553f016f305SChuck Lever unsigned long page_off; 5549a6a180bSChuck Lever struct page **ppages; 5559a6a180bSChuck Lever unsigned char *base; 5569a6a180bSChuck Lever u32 xdr_pad; 557c06b540aSTom Tucker int ret; 558c06b540aSTom Tucker 55925fd86ecSChuck Lever if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) 56025fd86ecSChuck Lever return -EIO; 56125fd86ecSChuck Lever ret = svc_rdma_dma_map_buf(rdma, ctxt, 5629a6a180bSChuck Lever xdr->head[0].iov_base, 5639a6a180bSChuck Lever xdr->head[0].iov_len); 5649a6a180bSChuck Lever if (ret < 0) 5659a6a180bSChuck Lever return ret; 566c06b540aSTom Tucker 5679a6a180bSChuck Lever /* If a Write chunk is present, the xdr_buf's page list 5689a6a180bSChuck Lever * is not included inline. However the Upper Layer may 5699a6a180bSChuck Lever * have added XDR padding in the tail buffer, and that 5709a6a180bSChuck Lever * should not be included inline. 5719a6a180bSChuck Lever */ 5729a6a180bSChuck Lever if (wr_lst) { 5739a6a180bSChuck Lever base = xdr->tail[0].iov_base; 5749a6a180bSChuck Lever len = xdr->tail[0].iov_len; 5759a6a180bSChuck Lever xdr_pad = xdr_padsize(xdr->page_len); 576c06b540aSTom Tucker 5779a6a180bSChuck Lever if (len && xdr_pad) { 5789a6a180bSChuck Lever base += xdr_pad; 5799a6a180bSChuck Lever len -= xdr_pad; 580c06b540aSTom Tucker } 581c06b540aSTom Tucker 5829a6a180bSChuck Lever goto tail; 583c06b540aSTom Tucker } 5849a6a180bSChuck Lever 5859a6a180bSChuck Lever ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); 5869a6a180bSChuck Lever page_off = xdr->page_base & ~PAGE_MASK; 5879a6a180bSChuck Lever remaining = xdr->page_len; 5889a6a180bSChuck Lever while (remaining) { 5899a6a180bSChuck Lever len = min_t(u32, PAGE_SIZE - page_off, remaining); 5909a6a180bSChuck Lever 59125fd86ecSChuck Lever if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) 59225fd86ecSChuck Lever return -EIO; 59325fd86ecSChuck Lever ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++, 59425fd86ecSChuck Lever page_off, len); 5959a6a180bSChuck Lever if (ret < 0) 5969a6a180bSChuck Lever return ret; 5979a6a180bSChuck Lever 5989a6a180bSChuck Lever remaining -= len; 5999a6a180bSChuck Lever page_off = 0; 600c06b540aSTom Tucker } 601c06b540aSTom Tucker 6029a6a180bSChuck Lever base = xdr->tail[0].iov_base; 6039a6a180bSChuck Lever len = xdr->tail[0].iov_len; 6049a6a180bSChuck Lever tail: 6059a6a180bSChuck Lever if (len) { 60625fd86ecSChuck Lever if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) 60725fd86ecSChuck Lever return -EIO; 60825fd86ecSChuck Lever ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len); 6099a6a180bSChuck Lever if (ret < 0) 6109a6a180bSChuck Lever return ret; 6119a6a180bSChuck Lever } 61208ae4e7fSChuck Lever 61323262790SChuck Lever return 0; 614c06b540aSTom Tucker } 615c06b540aSTom Tucker 616c55ab070SChuck Lever /* The svc_rqst and all resources it owns are released as soon as 617c55ab070SChuck Lever * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt 618c55ab070SChuck Lever * so they are released by the Send completion handler. 619c55ab070SChuck Lever */ 620c55ab070SChuck Lever static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, 6214201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt) 622c55ab070SChuck Lever { 623c55ab070SChuck Lever int i, pages = rqstp->rq_next_page - rqstp->rq_respages; 624c55ab070SChuck Lever 6254201c746SChuck Lever ctxt->sc_page_count += pages; 626c55ab070SChuck Lever for (i = 0; i < pages; i++) { 6274201c746SChuck Lever ctxt->sc_pages[i + 1] = rqstp->rq_respages[i]; 628c55ab070SChuck Lever rqstp->rq_respages[i] = NULL; 629c55ab070SChuck Lever } 630c55ab070SChuck Lever rqstp->rq_next_page = rqstp->rq_respages + 1; 631c55ab070SChuck Lever } 632c55ab070SChuck Lever 6339a6a180bSChuck Lever /* Prepare the portion of the RPC Reply that will be transmitted 6349a6a180bSChuck Lever * via RDMA Send. The RPC-over-RDMA transport header is prepared 6354201c746SChuck Lever * in sc_sges[0], and the RPC xdr_buf is prepared in following sges. 6369a6a180bSChuck Lever * 6379a6a180bSChuck Lever * Depending on whether a Write list or Reply chunk is present, 6389a6a180bSChuck Lever * the server may send all, a portion of, or none of the xdr_buf. 6394201c746SChuck Lever * In the latter case, only the transport header (sc_sges[0]) is 6409a6a180bSChuck Lever * transmitted. 6419a6a180bSChuck Lever * 6429a6a180bSChuck Lever * RDMA Send is the last step of transmitting an RPC reply. Pages 6439a6a180bSChuck Lever * involved in the earlier RDMA Writes are here transferred out 6449a6a180bSChuck Lever * of the rqstp and into the ctxt's page array. These pages are 6459a6a180bSChuck Lever * DMA unmapped by each Write completion, but the subsequent Send 6469a6a180bSChuck Lever * completion finally releases these pages. 6479a6a180bSChuck Lever * 6489a6a180bSChuck Lever * Assumptions: 6499a6a180bSChuck Lever * - The Reply's transport header will never be larger than a page. 650c06b540aSTom Tucker */ 6519a6a180bSChuck Lever static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, 6529a6a180bSChuck Lever __be32 *rdma_argp, __be32 *rdma_resp, 653c06b540aSTom Tucker struct svc_rqst *rqstp, 6549a6a180bSChuck Lever __be32 *wr_lst, __be32 *rp_ch) 655c06b540aSTom Tucker { 6564201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt; 6579a6a180bSChuck Lever int ret; 6580e7f011aSTom Tucker 6594201c746SChuck Lever ctxt = svc_rdma_send_ctxt_get(rdma); 6604201c746SChuck Lever if (!ctxt) 6614201c746SChuck Lever return -ENOMEM; 662c06b540aSTom Tucker 6639a6a180bSChuck Lever ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, 6649a6a180bSChuck Lever svc_rdma_reply_hdr_len(rdma_resp)); 6659a6a180bSChuck Lever if (ret < 0) 666afd566eaSTom Tucker goto err; 667afd566eaSTom Tucker 6689a6a180bSChuck Lever if (!rp_ch) { 6699a6a180bSChuck Lever ret = svc_rdma_map_reply_msg(rdma, ctxt, 6709a6a180bSChuck Lever &rqstp->rq_res, wr_lst); 6719a6a180bSChuck Lever if (ret < 0) 6723fe04ee9SChuck Lever goto err; 6733fe04ee9SChuck Lever } 674c06b540aSTom Tucker 675c55ab070SChuck Lever svc_rdma_save_io_pages(rqstp, ctxt); 6760bf48289SSteve Wise 677986b7889SChuck Lever ctxt->sc_send_wr.opcode = IB_WR_SEND; 678986b7889SChuck Lever if (rdma->sc_snd_w_inv) { 679986b7889SChuck Lever ctxt->sc_send_wr.ex.invalidate_rkey = 680986b7889SChuck Lever svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch); 681986b7889SChuck Lever if (ctxt->sc_send_wr.ex.invalidate_rkey) 682986b7889SChuck Lever ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; 683986b7889SChuck Lever } 684986b7889SChuck Lever dprintk("svcrdma: posting Send WR with %u sge(s)\n", 685986b7889SChuck Lever ctxt->sc_send_wr.num_sge); 686986b7889SChuck Lever ret = svc_rdma_send(rdma, &ctxt->sc_send_wr); 687c06b540aSTom Tucker if (ret) 688afd566eaSTom Tucker goto err; 689c06b540aSTom Tucker 690afd566eaSTom Tucker return 0; 691afd566eaSTom Tucker 692afd566eaSTom Tucker err: 6934201c746SChuck Lever svc_rdma_send_ctxt_put(rdma, ctxt); 6949ec64052SChuck Lever return ret; 695c06b540aSTom Tucker } 696c06b540aSTom Tucker 6974757d90bSChuck Lever /* Given the client-provided Write and Reply chunks, the server was not 6984757d90bSChuck Lever * able to form a complete reply. Return an RDMA_ERROR message so the 6994757d90bSChuck Lever * client can retire this RPC transaction. As above, the Send completion 7004757d90bSChuck Lever * routine releases payload pages that were part of a previous RDMA Write. 7014757d90bSChuck Lever * 7024757d90bSChuck Lever * Remote Invalidation is skipped for simplicity. 7034757d90bSChuck Lever */ 7044757d90bSChuck Lever static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, 7054757d90bSChuck Lever __be32 *rdma_resp, struct svc_rqst *rqstp) 7064757d90bSChuck Lever { 7074201c746SChuck Lever struct svc_rdma_send_ctxt *ctxt; 7084757d90bSChuck Lever __be32 *p; 7094757d90bSChuck Lever int ret; 7104757d90bSChuck Lever 7114201c746SChuck Lever ctxt = svc_rdma_send_ctxt_get(rdma); 7124201c746SChuck Lever if (!ctxt) 7134201c746SChuck Lever return -ENOMEM; 7144757d90bSChuck Lever 7154757d90bSChuck Lever /* Replace the original transport header with an 7164757d90bSChuck Lever * RDMA_ERROR response. XID etc are preserved. 7174757d90bSChuck Lever */ 71898895edbSChuck Lever trace_svcrdma_err_chunk(*rdma_resp); 7194757d90bSChuck Lever p = rdma_resp + 3; 7204757d90bSChuck Lever *p++ = rdma_error; 7214757d90bSChuck Lever *p = err_chunk; 7224757d90bSChuck Lever 7234757d90bSChuck Lever ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, 20); 7244757d90bSChuck Lever if (ret < 0) 7254757d90bSChuck Lever goto err; 7264757d90bSChuck Lever 7274757d90bSChuck Lever svc_rdma_save_io_pages(rqstp, ctxt); 7284757d90bSChuck Lever 729986b7889SChuck Lever ctxt->sc_send_wr.opcode = IB_WR_SEND; 730986b7889SChuck Lever ret = svc_rdma_send(rdma, &ctxt->sc_send_wr); 7314757d90bSChuck Lever if (ret) 7324757d90bSChuck Lever goto err; 7334757d90bSChuck Lever 7344757d90bSChuck Lever return 0; 7354757d90bSChuck Lever 7364757d90bSChuck Lever err: 7374201c746SChuck Lever svc_rdma_send_ctxt_put(rdma, ctxt); 7384757d90bSChuck Lever return ret; 7394757d90bSChuck Lever } 7404757d90bSChuck Lever 741c06b540aSTom Tucker void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) 742c06b540aSTom Tucker { 743c06b540aSTom Tucker } 744c06b540aSTom Tucker 7459a6a180bSChuck Lever /** 7469a6a180bSChuck Lever * svc_rdma_sendto - Transmit an RPC reply 7479a6a180bSChuck Lever * @rqstp: processed RPC request, reply XDR already in ::rq_res 7489a6a180bSChuck Lever * 7499a6a180bSChuck Lever * Any resources still associated with @rqstp are released upon return. 7509a6a180bSChuck Lever * If no reply message was possible, the connection is closed. 7519a6a180bSChuck Lever * 7529a6a180bSChuck Lever * Returns: 7539a6a180bSChuck Lever * %0 if an RPC reply has been successfully posted, 7549a6a180bSChuck Lever * %-ENOMEM if a resource shortage occurred (connection is lost), 7559a6a180bSChuck Lever * %-ENOTCONN if posting failed (connection is lost). 7569a6a180bSChuck Lever */ 757c06b540aSTom Tucker int svc_rdma_sendto(struct svc_rqst *rqstp) 758c06b540aSTom Tucker { 759c06b540aSTom Tucker struct svc_xprt *xprt = rqstp->rq_xprt; 760c06b540aSTom Tucker struct svcxprt_rdma *rdma = 761c06b540aSTom Tucker container_of(xprt, struct svcxprt_rdma, sc_xprt); 7623a88092eSChuck Lever struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; 7639a6a180bSChuck Lever __be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch; 7649a6a180bSChuck Lever struct xdr_buf *xdr = &rqstp->rq_res; 765c06b540aSTom Tucker struct page *res_page; 7669a6a180bSChuck Lever int ret; 767c06b540aSTom Tucker 7683316f063SChuck Lever rdma_argp = rctxt->rc_recv_buf; 7699a6a180bSChuck Lever svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch); 770c06b540aSTom Tucker 771e4eb42ceSChuck Lever /* Create the RDMA response header. xprt->xpt_mutex, 772e4eb42ceSChuck Lever * acquired in svc_send(), serializes RPC replies. The 773e4eb42ceSChuck Lever * code path below that inserts the credit grant value 774e4eb42ceSChuck Lever * into each transport header runs only inside this 775e4eb42ceSChuck Lever * critical section. 776e4eb42ceSChuck Lever */ 77778da2b3cSChuck Lever ret = -ENOMEM; 77878da2b3cSChuck Lever res_page = alloc_page(GFP_KERNEL); 77978da2b3cSChuck Lever if (!res_page) 78078da2b3cSChuck Lever goto err0; 781c06b540aSTom Tucker rdma_resp = page_address(res_page); 78298fc21d3SChuck Lever 7839a6a180bSChuck Lever p = rdma_resp; 7849a6a180bSChuck Lever *p++ = *rdma_argp; 7859a6a180bSChuck Lever *p++ = *(rdma_argp + 1); 78698fc21d3SChuck Lever *p++ = rdma->sc_fc_credits; 7879a6a180bSChuck Lever *p++ = rp_ch ? rdma_nomsg : rdma_msg; 78898fc21d3SChuck Lever 78998fc21d3SChuck Lever /* Start with empty chunks */ 79098fc21d3SChuck Lever *p++ = xdr_zero; 79198fc21d3SChuck Lever *p++ = xdr_zero; 79298fc21d3SChuck Lever *p = xdr_zero; 793c06b540aSTom Tucker 7949a6a180bSChuck Lever if (wr_lst) { 7959a6a180bSChuck Lever /* XXX: Presume the client sent only one Write chunk */ 7969a6a180bSChuck Lever ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr); 79708ae4e7fSChuck Lever if (ret < 0) 7984757d90bSChuck Lever goto err2; 7999a6a180bSChuck Lever svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret); 80008ae4e7fSChuck Lever } 8019a6a180bSChuck Lever if (rp_ch) { 8029a6a180bSChuck Lever ret = svc_rdma_send_reply_chunk(rdma, rp_ch, wr_lst, xdr); 80308ae4e7fSChuck Lever if (ret < 0) 8044757d90bSChuck Lever goto err2; 8059a6a180bSChuck Lever svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret); 80608ae4e7fSChuck Lever } 807c06b540aSTom Tucker 8089a6a180bSChuck Lever ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp, 8099a6a180bSChuck Lever wr_lst, rp_ch); 8103e1eeb98SChuck Lever if (ret < 0) 8119995237bSChuck Lever goto err0; 8123a88092eSChuck Lever ret = 0; 8133a88092eSChuck Lever 8143a88092eSChuck Lever out: 8153a88092eSChuck Lever rqstp->rq_xprt_ctxt = NULL; 8163a88092eSChuck Lever svc_rdma_recv_ctxt_put(rdma, rctxt); 8173a88092eSChuck Lever return ret; 818afd566eaSTom Tucker 8194757d90bSChuck Lever err2: 820b20dae70SColin Ian King if (ret != -E2BIG && ret != -EINVAL) 8214757d90bSChuck Lever goto err1; 8224757d90bSChuck Lever 8234757d90bSChuck Lever ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp); 8244757d90bSChuck Lever if (ret < 0) 8254757d90bSChuck Lever goto err0; 8263a88092eSChuck Lever ret = 0; 8273a88092eSChuck Lever goto out; 8284757d90bSChuck Lever 829afd566eaSTom Tucker err1: 830afd566eaSTom Tucker put_page(res_page); 831afd566eaSTom Tucker err0: 832bd2abef3SChuck Lever trace_svcrdma_send_failed(rqstp, ret); 8339a6a180bSChuck Lever set_bit(XPT_CLOSE, &xprt->xpt_flags); 8343a88092eSChuck Lever ret = -ENOTCONN; 8353a88092eSChuck Lever goto out; 836c06b540aSTom Tucker } 837