1a2268cfbSChuck Lever // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2f58851e6S\"Talpey, Thomas\ /*
33a9568feSChuck Lever * Copyright (c) 2014-2020, Oracle and/or its affiliates.
4e9601828S\"Talpey, Thomas\ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5e9601828S\"Talpey, Thomas\ *
6e9601828S\"Talpey, Thomas\ * This software is available to you under a choice of one of two
7e9601828S\"Talpey, Thomas\ * licenses. You may choose to be licensed under the terms of the GNU
8e9601828S\"Talpey, Thomas\ * General Public License (GPL) Version 2, available from the file
9e9601828S\"Talpey, Thomas\ * COPYING in the main directory of this source tree, or the BSD-type
10e9601828S\"Talpey, Thomas\ * license below:
11e9601828S\"Talpey, Thomas\ *
12e9601828S\"Talpey, Thomas\ * Redistribution and use in source and binary forms, with or without
13e9601828S\"Talpey, Thomas\ * modification, are permitted provided that the following conditions
14e9601828S\"Talpey, Thomas\ * are met:
15e9601828S\"Talpey, Thomas\ *
16e9601828S\"Talpey, Thomas\ * Redistributions of source code must retain the above copyright
17e9601828S\"Talpey, Thomas\ * notice, this list of conditions and the following disclaimer.
18e9601828S\"Talpey, Thomas\ *
19e9601828S\"Talpey, Thomas\ * Redistributions in binary form must reproduce the above
20e9601828S\"Talpey, Thomas\ * copyright notice, this list of conditions and the following
21e9601828S\"Talpey, Thomas\ * disclaimer in the documentation and/or other materials provided
22e9601828S\"Talpey, Thomas\ * with the distribution.
23e9601828S\"Talpey, Thomas\ *
24e9601828S\"Talpey, Thomas\ * Neither the name of the Network Appliance, Inc. nor the names of
25e9601828S\"Talpey, Thomas\ * its contributors may be used to endorse or promote products
26e9601828S\"Talpey, Thomas\ * derived from this software without specific prior written
27e9601828S\"Talpey, Thomas\ * permission.
28e9601828S\"Talpey, Thomas\ *
29e9601828S\"Talpey, Thomas\ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30e9601828S\"Talpey, Thomas\ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31e9601828S\"Talpey, Thomas\ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32e9601828S\"Talpey, Thomas\ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33e9601828S\"Talpey, Thomas\ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34e9601828S\"Talpey, Thomas\ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35e9601828S\"Talpey, Thomas\ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36e9601828S\"Talpey, Thomas\ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37e9601828S\"Talpey, Thomas\ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38e9601828S\"Talpey, Thomas\ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39e9601828S\"Talpey, Thomas\ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40e9601828S\"Talpey, Thomas\ */
41e9601828S\"Talpey, Thomas\
42e9601828S\"Talpey, Thomas\ /*
43e9601828S\"Talpey, Thomas\ * rpc_rdma.c
44e9601828S\"Talpey, Thomas\ *
45e9601828S\"Talpey, Thomas\ * This file contains the guts of the RPC RDMA protocol, and
46e9601828S\"Talpey, Thomas\ * does marshaling/unmarshaling, etc. It is also where interfacing
47e9601828S\"Talpey, Thomas\ * to the Linux RPC framework lives.
48f58851e6S\"Talpey, Thomas\ */
49f58851e6S\"Talpey, Thomas\
50e9601828S\"Talpey, Thomas\ #include <linux/highmem.h>
51e9601828S\"Talpey, Thomas\
52bd2abef3SChuck Lever #include <linux/sunrpc/svc_rdma.h>
53bd2abef3SChuck Lever
54b6e717cbSChuck Lever #include "xprt_rdma.h"
55b6e717cbSChuck Lever #include <trace/events/rpcrdma.h>
56b6e717cbSChuck Lever
57302d3debSChuck Lever /* Returns size of largest RPC-over-RDMA header in a Call message
58302d3debSChuck Lever *
5994f58c58SChuck Lever * The largest Call header contains a full-size Read list and a
6094f58c58SChuck Lever * minimal Reply chunk.
61302d3debSChuck Lever */
rpcrdma_max_call_header_size(unsigned int maxsegs)62302d3debSChuck Lever static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
63302d3debSChuck Lever {
64302d3debSChuck Lever unsigned int size;
65302d3debSChuck Lever
66302d3debSChuck Lever /* Fixed header fields and list discriminators */
67302d3debSChuck Lever size = RPCRDMA_HDRLEN_MIN;
68302d3debSChuck Lever
69302d3debSChuck Lever /* Maximum Read list size */
7091228844SColin Ian King size += maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
71302d3debSChuck Lever
7294f58c58SChuck Lever /* Minimal Read chunk size */
7394f58c58SChuck Lever size += sizeof(__be32); /* segment count */
742232df5eSChuck Lever size += rpcrdma_segment_maxsz * sizeof(__be32);
7594f58c58SChuck Lever size += sizeof(__be32); /* list discriminator */
7694f58c58SChuck Lever
77302d3debSChuck Lever return size;
78302d3debSChuck Lever }
79302d3debSChuck Lever
80302d3debSChuck Lever /* Returns size of largest RPC-over-RDMA header in a Reply message
81302d3debSChuck Lever *
82302d3debSChuck Lever * There is only one Write list or one Reply chunk per Reply
83302d3debSChuck Lever * message. The larger list is the Write list.
84302d3debSChuck Lever */
rpcrdma_max_reply_header_size(unsigned int maxsegs)85302d3debSChuck Lever static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
86302d3debSChuck Lever {
87302d3debSChuck Lever unsigned int size;
88302d3debSChuck Lever
89302d3debSChuck Lever /* Fixed header fields and list discriminators */
90302d3debSChuck Lever size = RPCRDMA_HDRLEN_MIN;
91302d3debSChuck Lever
92302d3debSChuck Lever /* Maximum Write list size */
9391228844SColin Ian King size += sizeof(__be32); /* segment count */
942232df5eSChuck Lever size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
95302d3debSChuck Lever size += sizeof(__be32); /* list discriminator */
96302d3debSChuck Lever
97302d3debSChuck Lever return size;
98302d3debSChuck Lever }
99302d3debSChuck Lever
10094087e97SChuck Lever /**
10194087e97SChuck Lever * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
10293aa8e0aSChuck Lever * @ep: endpoint to initialize
10394087e97SChuck Lever *
10494087e97SChuck Lever * The max_inline fields contain the maximum size of an RPC message
10594087e97SChuck Lever * so the marshaling code doesn't have to repeat this calculation
10694087e97SChuck Lever * for every RPC.
10794087e97SChuck Lever */
rpcrdma_set_max_header_sizes(struct rpcrdma_ep * ep)10893aa8e0aSChuck Lever void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep)
109302d3debSChuck Lever {
11093aa8e0aSChuck Lever unsigned int maxsegs = ep->re_max_rdma_segs;
11187cfb9a0SChuck Lever
11293aa8e0aSChuck Lever ep->re_max_inline_send =
11393aa8e0aSChuck Lever ep->re_inline_send - rpcrdma_max_call_header_size(maxsegs);
11493aa8e0aSChuck Lever ep->re_max_inline_recv =
11593aa8e0aSChuck Lever ep->re_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
116302d3debSChuck Lever }
117e9601828S\"Talpey, Thomas\
1185457ced0SChuck Lever /* The client can send a request inline as long as the RPCRDMA header
1195457ced0SChuck Lever * plus the RPC call fit under the transport's inline limit. If the
1205457ced0SChuck Lever * combined call message size exceeds that limit, the client must use
12116f906d6SChuck Lever * a Read chunk for this operation.
12216f906d6SChuck Lever *
12316f906d6SChuck Lever * A Read chunk is also required if sending the RPC call inline would
12416f906d6SChuck Lever * exceed this device's max_sge limit.
1255457ced0SChuck Lever */
rpcrdma_args_inline(struct rpcrdma_xprt * r_xprt,struct rpc_rqst * rqst)126302d3debSChuck Lever static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
127302d3debSChuck Lever struct rpc_rqst *rqst)
1285457ced0SChuck Lever {
12916f906d6SChuck Lever struct xdr_buf *xdr = &rqst->rq_snd_buf;
130e28ce900SChuck Lever struct rpcrdma_ep *ep = r_xprt->rx_ep;
13116f906d6SChuck Lever unsigned int count, remaining, offset;
1325457ced0SChuck Lever
133e28ce900SChuck Lever if (xdr->len > ep->re_max_inline_send)
13416f906d6SChuck Lever return false;
13516f906d6SChuck Lever
13616f906d6SChuck Lever if (xdr->page_len) {
13716f906d6SChuck Lever remaining = xdr->page_len;
138d933cc32SChuck Lever offset = offset_in_page(xdr->page_base);
1391179e2c2SChuck Lever count = RPCRDMA_MIN_SEND_SGES;
14016f906d6SChuck Lever while (remaining) {
14116f906d6SChuck Lever remaining -= min_t(unsigned int,
14216f906d6SChuck Lever PAGE_SIZE - offset, remaining);
14316f906d6SChuck Lever offset = 0;
144e28ce900SChuck Lever if (++count > ep->re_attr.cap.max_send_sge)
14516f906d6SChuck Lever return false;
14616f906d6SChuck Lever }
14716f906d6SChuck Lever }
14816f906d6SChuck Lever
14916f906d6SChuck Lever return true;
1505457ced0SChuck Lever }
1515457ced0SChuck Lever
1525457ced0SChuck Lever /* The client can't know how large the actual reply will be. Thus it
1535457ced0SChuck Lever * plans for the largest possible reply for that particular ULP
1545457ced0SChuck Lever * operation. If the maximum combined reply message size exceeds that
1555457ced0SChuck Lever * limit, the client must provide a write list or a reply chunk for
1565457ced0SChuck Lever * this request.
1575457ced0SChuck Lever */
rpcrdma_results_inline(struct rpcrdma_xprt * r_xprt,struct rpc_rqst * rqst)158302d3debSChuck Lever static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
159302d3debSChuck Lever struct rpc_rqst *rqst)
1605457ced0SChuck Lever {
161e28ce900SChuck Lever return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep->re_max_inline_recv;
1625457ced0SChuck Lever }
1635457ced0SChuck Lever
164d4550bbeSChuck Lever /* The client is required to provide a Reply chunk if the maximum
165d4550bbeSChuck Lever * size of the non-payload part of the RPC Reply is larger than
166d4550bbeSChuck Lever * the inline threshold.
167d4550bbeSChuck Lever */
168d4550bbeSChuck Lever static bool
rpcrdma_nonpayload_inline(const struct rpcrdma_xprt * r_xprt,const struct rpc_rqst * rqst)169d4550bbeSChuck Lever rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
170d4550bbeSChuck Lever const struct rpc_rqst *rqst)
171d4550bbeSChuck Lever {
172d4550bbeSChuck Lever const struct xdr_buf *buf = &rqst->rq_rcv_buf;
173d4550bbeSChuck Lever
17494087e97SChuck Lever return (buf->head[0].iov_len + buf->tail[0].iov_len) <
175e28ce900SChuck Lever r_xprt->rx_ep->re_max_inline_recv;
176d4550bbeSChuck Lever }
177d4550bbeSChuck Lever
17815261b91SChuck Lever /* ACL likes to be lazy in allocating pages. For TCP, these
17915261b91SChuck Lever * pages can be allocated during receive processing. Not true
18015261b91SChuck Lever * for RDMA, which must always provision receive buffers
18115261b91SChuck Lever * up front.
18215261b91SChuck Lever */
18315261b91SChuck Lever static noinline int
rpcrdma_alloc_sparse_pages(struct xdr_buf * buf)18415261b91SChuck Lever rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
18515261b91SChuck Lever {
18615261b91SChuck Lever struct page **ppages;
18715261b91SChuck Lever int len;
18815261b91SChuck Lever
18915261b91SChuck Lever len = buf->page_len;
19015261b91SChuck Lever ppages = buf->pages + (buf->page_base >> PAGE_SHIFT);
19115261b91SChuck Lever while (len > 0) {
19215261b91SChuck Lever if (!*ppages)
19315261b91SChuck Lever *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
19415261b91SChuck Lever if (!*ppages)
19515261b91SChuck Lever return -ENOBUFS;
19615261b91SChuck Lever ppages++;
19715261b91SChuck Lever len -= PAGE_SIZE;
19815261b91SChuck Lever }
19915261b91SChuck Lever
20015261b91SChuck Lever return 0;
20115261b91SChuck Lever }
20215261b91SChuck Lever
2039929f4adSChuck Lever /* Convert @vec to a single SGL element.
20428d9d56fSChuck Lever *
20528d9d56fSChuck Lever * Returns pointer to next available SGE, and bumps the total number
20628d9d56fSChuck Lever * of SGEs consumed.
207821c791aSChuck Lever */
20828d9d56fSChuck Lever static struct rpcrdma_mr_seg *
rpcrdma_convert_kvec(struct kvec * vec,struct rpcrdma_mr_seg * seg,unsigned int * n)20928d9d56fSChuck Lever rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
21028d9d56fSChuck Lever unsigned int *n)
211821c791aSChuck Lever {
21254e6aec5SChuck Lever seg->mr_page = virt_to_page(vec->iov_base);
21367b16625SChuck Lever seg->mr_offset = offset_in_page(vec->iov_base);
2149929f4adSChuck Lever seg->mr_len = vec->iov_len;
21528d9d56fSChuck Lever ++seg;
21628d9d56fSChuck Lever ++(*n);
21728d9d56fSChuck Lever return seg;
218821c791aSChuck Lever }
219821c791aSChuck Lever
22028d9d56fSChuck Lever /* Convert @xdrbuf into SGEs no larger than a page each. As they
22128d9d56fSChuck Lever * are registered, these SGEs are then coalesced into RDMA segments
22228d9d56fSChuck Lever * when the selected memreg mode supports it.
223e9601828S\"Talpey, Thomas\ *
22428d9d56fSChuck Lever * Returns positive number of SGEs consumed, or a negative errno.
225e9601828S\"Talpey, Thomas\ */
226e9601828S\"Talpey, Thomas\
227e9601828S\"Talpey, Thomas\ static int
rpcrdma_convert_iovs(struct rpcrdma_xprt * r_xprt,struct xdr_buf * xdrbuf,unsigned int pos,enum rpcrdma_chunktype type,struct rpcrdma_mr_seg * seg)228b5f0afbeSChuck Lever rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
229b5f0afbeSChuck Lever unsigned int pos, enum rpcrdma_chunktype type,
230b5f0afbeSChuck Lever struct rpcrdma_mr_seg *seg)
231e9601828S\"Talpey, Thomas\ {
23228d9d56fSChuck Lever unsigned long page_base;
23328d9d56fSChuck Lever unsigned int len, n;
234bd7ea31bSTom Tucker struct page **ppages;
235e9601828S\"Talpey, Thomas\
2365ab81428SChuck Lever n = 0;
23728d9d56fSChuck Lever if (pos == 0)
23828d9d56fSChuck Lever seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n);
239e9601828S\"Talpey, Thomas\
240bd7ea31bSTom Tucker len = xdrbuf->page_len;
241bd7ea31bSTom Tucker ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
242d933cc32SChuck Lever page_base = offset_in_page(xdrbuf->page_base);
24328d9d56fSChuck Lever while (len) {
24428d9d56fSChuck Lever seg->mr_page = *ppages;
24567b16625SChuck Lever seg->mr_offset = page_base;
24628d9d56fSChuck Lever seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
24728d9d56fSChuck Lever len -= seg->mr_len;
24828d9d56fSChuck Lever ++ppages;
24928d9d56fSChuck Lever ++seg;
250e9601828S\"Talpey, Thomas\ ++n;
25128d9d56fSChuck Lever page_base = 0;
252e9601828S\"Talpey, Thomas\ }
253bd7ea31bSTom Tucker
25421037b8cSChuck Lever if (type == rpcrdma_readch || type == rpcrdma_writech)
25528d9d56fSChuck Lever goto out;
256c8b920bbSChuck Lever
25728d9d56fSChuck Lever if (xdrbuf->tail[0].iov_len)
2589929f4adSChuck Lever rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
259e9601828S\"Talpey, Thomas\
26028d9d56fSChuck Lever out:
26128d9d56fSChuck Lever if (unlikely(n > RPCRDMA_MAX_SEGS))
2625ab81428SChuck Lever return -EIO;
26328d9d56fSChuck Lever return n;
264e9601828S\"Talpey, Thomas\ }
265e9601828S\"Talpey, Thomas\
26639f4cd9eSChuck Lever static int
encode_rdma_segment(struct xdr_stream * xdr,struct rpcrdma_mr * mr)26796ceddeaSChuck Lever encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
26839f4cd9eSChuck Lever {
26939f4cd9eSChuck Lever __be32 *p;
27039f4cd9eSChuck Lever
27139f4cd9eSChuck Lever p = xdr_reserve_space(xdr, 4 * sizeof(*p));
27239f4cd9eSChuck Lever if (unlikely(!p))
27339f4cd9eSChuck Lever return -EMSGSIZE;
27439f4cd9eSChuck Lever
275379c3bc6SChuck Lever xdr_encode_rdma_segment(p, mr->mr_handle, mr->mr_length, mr->mr_offset);
27639f4cd9eSChuck Lever return 0;
27739f4cd9eSChuck Lever }
27839f4cd9eSChuck Lever
27939f4cd9eSChuck Lever static int
encode_read_segment(struct xdr_stream * xdr,struct rpcrdma_mr * mr,u32 position)28096ceddeaSChuck Lever encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
28139f4cd9eSChuck Lever u32 position)
28239f4cd9eSChuck Lever {
28339f4cd9eSChuck Lever __be32 *p;
28439f4cd9eSChuck Lever
28539f4cd9eSChuck Lever p = xdr_reserve_space(xdr, 6 * sizeof(*p));
28639f4cd9eSChuck Lever if (unlikely(!p))
28739f4cd9eSChuck Lever return -EMSGSIZE;
28839f4cd9eSChuck Lever
28939f4cd9eSChuck Lever *p++ = xdr_one; /* Item present */
290379c3bc6SChuck Lever xdr_encode_read_segment(p, position, mr->mr_handle, mr->mr_length,
291379c3bc6SChuck Lever mr->mr_offset);
29239f4cd9eSChuck Lever return 0;
29339f4cd9eSChuck Lever }
29439f4cd9eSChuck Lever
rpcrdma_mr_prepare(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct rpcrdma_mr_seg * seg,int nsegs,bool writing,struct rpcrdma_mr ** mr)2953b39f52aSChuck Lever static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
2963b39f52aSChuck Lever struct rpcrdma_req *req,
2973b39f52aSChuck Lever struct rpcrdma_mr_seg *seg,
2983b39f52aSChuck Lever int nsegs, bool writing,
2993b39f52aSChuck Lever struct rpcrdma_mr **mr)
3003b39f52aSChuck Lever {
3016dc6ec9eSChuck Lever *mr = rpcrdma_mr_pop(&req->rl_free_mrs);
3026dc6ec9eSChuck Lever if (!*mr) {
3033b39f52aSChuck Lever *mr = rpcrdma_mr_get(r_xprt);
3043b39f52aSChuck Lever if (!*mr)
3053b39f52aSChuck Lever goto out_getmr_err;
3066dc6ec9eSChuck Lever (*mr)->mr_req = req;
3076dc6ec9eSChuck Lever }
3083b39f52aSChuck Lever
3093b39f52aSChuck Lever rpcrdma_mr_push(*mr, &req->rl_registered);
3103b39f52aSChuck Lever return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
3113b39f52aSChuck Lever
3123b39f52aSChuck Lever out_getmr_err:
3130307cdecSChuck Lever trace_xprtrdma_nomrs_err(r_xprt, req);
3143b39f52aSChuck Lever xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
3159d2da4ffSChuck Lever rpcrdma_mrs_refresh(r_xprt);
3163b39f52aSChuck Lever return ERR_PTR(-EAGAIN);
3173b39f52aSChuck Lever }
3183b39f52aSChuck Lever
31939f4cd9eSChuck Lever /* Register and XDR encode the Read list. Supports encoding a list of read
32094f58c58SChuck Lever * segments that belong to a single read chunk.
32194f58c58SChuck Lever *
32294f58c58SChuck Lever * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
32394f58c58SChuck Lever *
32494f58c58SChuck Lever * Read chunklist (a linked list):
32594f58c58SChuck Lever * N elements, position P (same P for all chunks of same arg!):
32694f58c58SChuck Lever * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
32794f58c58SChuck Lever *
32839f4cd9eSChuck Lever * Returns zero on success, or a negative errno if a failure occurred.
32939f4cd9eSChuck Lever * @xdr is advanced to the next position in the stream.
33039f4cd9eSChuck Lever *
33139f4cd9eSChuck Lever * Only a single @pos value is currently supported.
33294f58c58SChuck Lever */
rpcrdma_encode_read_list(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct rpc_rqst * rqst,enum rpcrdma_chunktype rtype)3331738de33SChuck Lever static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
3341738de33SChuck Lever struct rpcrdma_req *req,
3351738de33SChuck Lever struct rpc_rqst *rqst,
3361738de33SChuck Lever enum rpcrdma_chunktype rtype)
33794f58c58SChuck Lever {
33839f4cd9eSChuck Lever struct xdr_stream *xdr = &req->rl_stream;
3395ab81428SChuck Lever struct rpcrdma_mr_seg *seg;
34096ceddeaSChuck Lever struct rpcrdma_mr *mr;
34194f58c58SChuck Lever unsigned int pos;
3426748b0caSChuck Lever int nsegs;
34394f58c58SChuck Lever
344614f3c96SChuck Lever if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped)
3456a6c6defSChuck Lever goto done;
3466a6c6defSChuck Lever
34794f58c58SChuck Lever pos = rqst->rq_snd_buf.head[0].iov_len;
34894f58c58SChuck Lever if (rtype == rpcrdma_areadch)
34994f58c58SChuck Lever pos = 0;
3505ab81428SChuck Lever seg = req->rl_segments;
351b5f0afbeSChuck Lever nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
352b5f0afbeSChuck Lever rtype, seg);
35394f58c58SChuck Lever if (nsegs < 0)
35439f4cd9eSChuck Lever return nsegs;
35594f58c58SChuck Lever
35694f58c58SChuck Lever do {
3573b39f52aSChuck Lever seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr);
3586748b0caSChuck Lever if (IS_ERR(seg))
359ed3aa742SChuck Lever return PTR_ERR(seg);
36094f58c58SChuck Lever
36196ceddeaSChuck Lever if (encode_read_segment(xdr, mr, pos) < 0)
36239f4cd9eSChuck Lever return -EMSGSIZE;
36394f58c58SChuck Lever
364aba11831SChuck Lever trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
36594f58c58SChuck Lever r_xprt->rx_stats.read_chunk_count++;
36696ceddeaSChuck Lever nsegs -= mr->mr_nents;
36794f58c58SChuck Lever } while (nsegs);
36894f58c58SChuck Lever
3696a6c6defSChuck Lever done:
37048a124e3SChuck Lever if (xdr_stream_encode_item_absent(xdr) < 0)
37148a124e3SChuck Lever return -EMSGSIZE;
37248a124e3SChuck Lever return 0;
37394f58c58SChuck Lever }
37494f58c58SChuck Lever
37539f4cd9eSChuck Lever /* Register and XDR encode the Write list. Supports encoding a list
37639f4cd9eSChuck Lever * containing one array of plain segments that belong to a single
37739f4cd9eSChuck Lever * write chunk.
37894f58c58SChuck Lever *
37994f58c58SChuck Lever * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
38094f58c58SChuck Lever *
38194f58c58SChuck Lever * Write chunklist (a list of (one) counted array):
38294f58c58SChuck Lever * N elements:
38394f58c58SChuck Lever * 1 - N - HLOO - HLOO - ... - HLOO - 0
38494f58c58SChuck Lever *
38539f4cd9eSChuck Lever * Returns zero on success, or a negative errno if a failure occurred.
38639f4cd9eSChuck Lever * @xdr is advanced to the next position in the stream.
38739f4cd9eSChuck Lever *
38839f4cd9eSChuck Lever * Only a single Write chunk is currently supported.
38994f58c58SChuck Lever */
rpcrdma_encode_write_list(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct rpc_rqst * rqst,enum rpcrdma_chunktype wtype)3901738de33SChuck Lever static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
3911738de33SChuck Lever struct rpcrdma_req *req,
3921738de33SChuck Lever struct rpc_rqst *rqst,
3931738de33SChuck Lever enum rpcrdma_chunktype wtype)
39494f58c58SChuck Lever {
39539f4cd9eSChuck Lever struct xdr_stream *xdr = &req->rl_stream;
39621037b8cSChuck Lever struct rpcrdma_ep *ep = r_xprt->rx_ep;
3975ab81428SChuck Lever struct rpcrdma_mr_seg *seg;
39896ceddeaSChuck Lever struct rpcrdma_mr *mr;
3996748b0caSChuck Lever int nsegs, nchunks;
40094f58c58SChuck Lever __be32 *segcount;
40194f58c58SChuck Lever
4026a6c6defSChuck Lever if (wtype != rpcrdma_writech)
4036a6c6defSChuck Lever goto done;
4046a6c6defSChuck Lever
4055ab81428SChuck Lever seg = req->rl_segments;
406b5f0afbeSChuck Lever nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
40794f58c58SChuck Lever rqst->rq_rcv_buf.head[0].iov_len,
408b5f0afbeSChuck Lever wtype, seg);
40994f58c58SChuck Lever if (nsegs < 0)
41039f4cd9eSChuck Lever return nsegs;
41194f58c58SChuck Lever
4125c266df5SChuck Lever if (xdr_stream_encode_item_present(xdr) < 0)
41339f4cd9eSChuck Lever return -EMSGSIZE;
41439f4cd9eSChuck Lever segcount = xdr_reserve_space(xdr, sizeof(*segcount));
41539f4cd9eSChuck Lever if (unlikely(!segcount))
41639f4cd9eSChuck Lever return -EMSGSIZE;
41739f4cd9eSChuck Lever /* Actual value encoded below */
41894f58c58SChuck Lever
41994f58c58SChuck Lever nchunks = 0;
42094f58c58SChuck Lever do {
4213b39f52aSChuck Lever seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
4226748b0caSChuck Lever if (IS_ERR(seg))
423ed3aa742SChuck Lever return PTR_ERR(seg);
42494f58c58SChuck Lever
42596ceddeaSChuck Lever if (encode_rdma_segment(xdr, mr) < 0)
42639f4cd9eSChuck Lever return -EMSGSIZE;
42794f58c58SChuck Lever
428aba11831SChuck Lever trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
42994f58c58SChuck Lever r_xprt->rx_stats.write_chunk_count++;
430aae2349cSChuck Lever r_xprt->rx_stats.total_rdma_request += mr->mr_length;
43194f58c58SChuck Lever nchunks++;
43296ceddeaSChuck Lever nsegs -= mr->mr_nents;
43394f58c58SChuck Lever } while (nsegs);
43494f58c58SChuck Lever
43521037b8cSChuck Lever if (xdr_pad_size(rqst->rq_rcv_buf.page_len)) {
43621037b8cSChuck Lever if (encode_rdma_segment(xdr, ep->re_write_pad_mr) < 0)
43721037b8cSChuck Lever return -EMSGSIZE;
43821037b8cSChuck Lever
43921037b8cSChuck Lever trace_xprtrdma_chunk_wp(rqst->rq_task, ep->re_write_pad_mr,
44021037b8cSChuck Lever nsegs);
44121037b8cSChuck Lever r_xprt->rx_stats.write_chunk_count++;
44221037b8cSChuck Lever r_xprt->rx_stats.total_rdma_request += mr->mr_length;
44321037b8cSChuck Lever nchunks++;
44421037b8cSChuck Lever nsegs -= mr->mr_nents;
44521037b8cSChuck Lever }
44621037b8cSChuck Lever
44794f58c58SChuck Lever /* Update count of segments in this Write chunk */
44894f58c58SChuck Lever *segcount = cpu_to_be32(nchunks);
44994f58c58SChuck Lever
4506a6c6defSChuck Lever done:
45148a124e3SChuck Lever if (xdr_stream_encode_item_absent(xdr) < 0)
45248a124e3SChuck Lever return -EMSGSIZE;
45348a124e3SChuck Lever return 0;
45494f58c58SChuck Lever }
45594f58c58SChuck Lever
45639f4cd9eSChuck Lever /* Register and XDR encode the Reply chunk. Supports encoding an array
45739f4cd9eSChuck Lever * of plain segments that belong to a single write (reply) chunk.
45894f58c58SChuck Lever *
45994f58c58SChuck Lever * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
46094f58c58SChuck Lever *
46194f58c58SChuck Lever * Reply chunk (a counted array):
46294f58c58SChuck Lever * N elements:
46394f58c58SChuck Lever * 1 - N - HLOO - HLOO - ... - HLOO
46494f58c58SChuck Lever *
46539f4cd9eSChuck Lever * Returns zero on success, or a negative errno if a failure occurred.
46639f4cd9eSChuck Lever * @xdr is advanced to the next position in the stream.
46794f58c58SChuck Lever */
rpcrdma_encode_reply_chunk(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct rpc_rqst * rqst,enum rpcrdma_chunktype wtype)4681738de33SChuck Lever static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
4691738de33SChuck Lever struct rpcrdma_req *req,
4701738de33SChuck Lever struct rpc_rqst *rqst,
4711738de33SChuck Lever enum rpcrdma_chunktype wtype)
47294f58c58SChuck Lever {
47339f4cd9eSChuck Lever struct xdr_stream *xdr = &req->rl_stream;
4745ab81428SChuck Lever struct rpcrdma_mr_seg *seg;
47596ceddeaSChuck Lever struct rpcrdma_mr *mr;
4766748b0caSChuck Lever int nsegs, nchunks;
47794f58c58SChuck Lever __be32 *segcount;
47894f58c58SChuck Lever
47948a124e3SChuck Lever if (wtype != rpcrdma_replych) {
48048a124e3SChuck Lever if (xdr_stream_encode_item_absent(xdr) < 0)
48148a124e3SChuck Lever return -EMSGSIZE;
48248a124e3SChuck Lever return 0;
48348a124e3SChuck Lever }
4846a6c6defSChuck Lever
4855ab81428SChuck Lever seg = req->rl_segments;
486b5f0afbeSChuck Lever nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
48794f58c58SChuck Lever if (nsegs < 0)
48839f4cd9eSChuck Lever return nsegs;
48994f58c58SChuck Lever
4905c266df5SChuck Lever if (xdr_stream_encode_item_present(xdr) < 0)
49139f4cd9eSChuck Lever return -EMSGSIZE;
49239f4cd9eSChuck Lever segcount = xdr_reserve_space(xdr, sizeof(*segcount));
49339f4cd9eSChuck Lever if (unlikely(!segcount))
49439f4cd9eSChuck Lever return -EMSGSIZE;
49539f4cd9eSChuck Lever /* Actual value encoded below */
49694f58c58SChuck Lever
49794f58c58SChuck Lever nchunks = 0;
49894f58c58SChuck Lever do {
4993b39f52aSChuck Lever seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
5006748b0caSChuck Lever if (IS_ERR(seg))
501ed3aa742SChuck Lever return PTR_ERR(seg);
50294f58c58SChuck Lever
50396ceddeaSChuck Lever if (encode_rdma_segment(xdr, mr) < 0)
50439f4cd9eSChuck Lever return -EMSGSIZE;
50594f58c58SChuck Lever
506aba11831SChuck Lever trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
50794f58c58SChuck Lever r_xprt->rx_stats.reply_chunk_count++;
508aae2349cSChuck Lever r_xprt->rx_stats.total_rdma_request += mr->mr_length;
50994f58c58SChuck Lever nchunks++;
51096ceddeaSChuck Lever nsegs -= mr->mr_nents;
51194f58c58SChuck Lever } while (nsegs);
51294f58c58SChuck Lever
51394f58c58SChuck Lever /* Update count of segments in the Reply chunk */
51494f58c58SChuck Lever *segcount = cpu_to_be32(nchunks);
51594f58c58SChuck Lever
51639f4cd9eSChuck Lever return 0;
51794f58c58SChuck Lever }
51894f58c58SChuck Lever
rpcrdma_sendctx_done(struct kref * kref)5190ab11523SChuck Lever static void rpcrdma_sendctx_done(struct kref *kref)
5200ab11523SChuck Lever {
5210ab11523SChuck Lever struct rpcrdma_req *req =
5220ab11523SChuck Lever container_of(kref, struct rpcrdma_req, rl_kref);
5230ab11523SChuck Lever struct rpcrdma_rep *rep = req->rl_reply;
5240ab11523SChuck Lever
5250ab11523SChuck Lever rpcrdma_complete_rqst(rep);
5260ab11523SChuck Lever rep->rr_rxprt->rx_stats.reply_waits_for_send++;
5270ab11523SChuck Lever }
5280ab11523SChuck Lever
529394b2c77SChuck Lever /**
530dbcc53a5SChuck Lever * rpcrdma_sendctx_unmap - DMA-unmap Send buffer
531ae72950aSChuck Lever * @sc: sendctx containing SGEs to unmap
532394b2c77SChuck Lever *
533394b2c77SChuck Lever */
rpcrdma_sendctx_unmap(struct rpcrdma_sendctx * sc)534dbcc53a5SChuck Lever void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
535394b2c77SChuck Lever {
536b5cde6aaSChuck Lever struct rpcrdma_regbuf *rb = sc->sc_req->rl_sendbuf;
537394b2c77SChuck Lever struct ib_sge *sge;
538394b2c77SChuck Lever
5390ab11523SChuck Lever if (!sc->sc_unmap_count)
5400ab11523SChuck Lever return;
5410ab11523SChuck Lever
542394b2c77SChuck Lever /* The first two SGEs contain the transport header and
543394b2c77SChuck Lever * the inline buffer. These are always left mapped so
544394b2c77SChuck Lever * they can be cheaply re-used.
545394b2c77SChuck Lever */
546dbcc53a5SChuck Lever for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
547dbcc53a5SChuck Lever ++sge, --sc->sc_unmap_count)
548b5cde6aaSChuck Lever ib_dma_unmap_page(rdmab_device(rb), sge->addr, sge->length,
549dbcc53a5SChuck Lever DMA_TO_DEVICE);
55001bb35c8SChuck Lever
5510ab11523SChuck Lever kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
55201bb35c8SChuck Lever }
553394b2c77SChuck Lever
554a062a2a3SChuck Lever /* Prepare an SGE for the RPC-over-RDMA transport header.
555e9601828S\"Talpey, Thomas\ */
rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,u32 len)556b78de1dcSChuck Lever static void rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
557d2832af3SChuck Lever struct rpcrdma_req *req, u32 len)
558e9601828S\"Talpey, Thomas\ {
559ae72950aSChuck Lever struct rpcrdma_sendctx *sc = req->rl_sendctx;
560655fec69SChuck Lever struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
561d6764bbdSChuck Lever struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
562e9601828S\"Talpey, Thomas\
563655fec69SChuck Lever sge->addr = rdmab_addr(rb);
564655fec69SChuck Lever sge->length = len;
565a062a2a3SChuck Lever sge->lkey = rdmab_lkey(rb);
566bd7ea31bSTom Tucker
567d2832af3SChuck Lever ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
568d2832af3SChuck Lever DMA_TO_DEVICE);
569655fec69SChuck Lever }
570655fec69SChuck Lever
571d6764bbdSChuck Lever /* The head iovec is straightforward, as it is usually already
572655fec69SChuck Lever * DMA-mapped. Sync the content that has changed.
573655fec69SChuck Lever */
rpcrdma_prepare_head_iov(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,unsigned int len)574d6764bbdSChuck Lever static bool rpcrdma_prepare_head_iov(struct rpcrdma_xprt *r_xprt,
575d6764bbdSChuck Lever struct rpcrdma_req *req, unsigned int len)
576d6764bbdSChuck Lever {
577d6764bbdSChuck Lever struct rpcrdma_sendctx *sc = req->rl_sendctx;
578d6764bbdSChuck Lever struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
579d6764bbdSChuck Lever struct rpcrdma_regbuf *rb = req->rl_sendbuf;
580d6764bbdSChuck Lever
581d2832af3SChuck Lever if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
582d6764bbdSChuck Lever return false;
583655fec69SChuck Lever
584d6764bbdSChuck Lever sge->addr = rdmab_addr(rb);
585d6764bbdSChuck Lever sge->length = len;
586d6764bbdSChuck Lever sge->lkey = rdmab_lkey(rb);
587655fec69SChuck Lever
588d6764bbdSChuck Lever ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
589d6764bbdSChuck Lever DMA_TO_DEVICE);
590d6764bbdSChuck Lever return true;
591655fec69SChuck Lever }
592655fec69SChuck Lever
593d6764bbdSChuck Lever /* If there is a page list present, DMA map and prepare an
594d6764bbdSChuck Lever * SGE for each page to be sent.
595655fec69SChuck Lever */
rpcrdma_prepare_pagelist(struct rpcrdma_req * req,struct xdr_buf * xdr)596d6764bbdSChuck Lever static bool rpcrdma_prepare_pagelist(struct rpcrdma_req *req,
597d6764bbdSChuck Lever struct xdr_buf *xdr)
598d6764bbdSChuck Lever {
599d6764bbdSChuck Lever struct rpcrdma_sendctx *sc = req->rl_sendctx;
600d6764bbdSChuck Lever struct rpcrdma_regbuf *rb = req->rl_sendbuf;
601d6764bbdSChuck Lever unsigned int page_base, len, remaining;
602d6764bbdSChuck Lever struct page **ppages;
603d6764bbdSChuck Lever struct ib_sge *sge;
604d6764bbdSChuck Lever
605655fec69SChuck Lever ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
606d933cc32SChuck Lever page_base = offset_in_page(xdr->page_base);
607655fec69SChuck Lever remaining = xdr->page_len;
608655fec69SChuck Lever while (remaining) {
609d6764bbdSChuck Lever sge = &sc->sc_sges[req->rl_wr.num_sge++];
610d6764bbdSChuck Lever len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
611d6764bbdSChuck Lever sge->addr = ib_dma_map_page(rdmab_device(rb), *ppages,
612d2832af3SChuck Lever page_base, len, DMA_TO_DEVICE);
613d6764bbdSChuck Lever if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
614655fec69SChuck Lever goto out_mapping_err;
615d6764bbdSChuck Lever
616d6764bbdSChuck Lever sge->length = len;
617d6764bbdSChuck Lever sge->lkey = rdmab_lkey(rb);
618655fec69SChuck Lever
619ae72950aSChuck Lever sc->sc_unmap_count++;
620655fec69SChuck Lever ppages++;
621655fec69SChuck Lever remaining -= len;
622bd7ea31bSTom Tucker page_base = 0;
623e9601828S\"Talpey, Thomas\ }
624655fec69SChuck Lever
625655fec69SChuck Lever return true;
626655fec69SChuck Lever
627d6764bbdSChuck Lever out_mapping_err:
628d6764bbdSChuck Lever trace_xprtrdma_dma_maperr(sge->addr);
629857f9acaSChuck Lever return false;
630d6764bbdSChuck Lever }
631857f9acaSChuck Lever
632ae605ee9SChuck Lever /* The tail iovec may include an XDR pad for the page list,
633ae605ee9SChuck Lever * as well as additional content, and may not reside in the
634ae605ee9SChuck Lever * same page as the head iovec.
635d6764bbdSChuck Lever */
rpcrdma_prepare_tail_iov(struct rpcrdma_req * req,struct xdr_buf * xdr,unsigned int page_base,unsigned int len)636d6764bbdSChuck Lever static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
637d6764bbdSChuck Lever struct xdr_buf *xdr,
638d6764bbdSChuck Lever unsigned int page_base, unsigned int len)
639d6764bbdSChuck Lever {
640d6764bbdSChuck Lever struct rpcrdma_sendctx *sc = req->rl_sendctx;
641d6764bbdSChuck Lever struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
642d6764bbdSChuck Lever struct rpcrdma_regbuf *rb = req->rl_sendbuf;
643d6764bbdSChuck Lever struct page *page = virt_to_page(xdr->tail[0].iov_base);
644d6764bbdSChuck Lever
645d6764bbdSChuck Lever sge->addr = ib_dma_map_page(rdmab_device(rb), page, page_base, len,
646d6764bbdSChuck Lever DMA_TO_DEVICE);
647d6764bbdSChuck Lever if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
648d6764bbdSChuck Lever goto out_mapping_err;
649d6764bbdSChuck Lever
650d6764bbdSChuck Lever sge->length = len;
651d6764bbdSChuck Lever sge->lkey = rdmab_lkey(rb);
652d6764bbdSChuck Lever ++sc->sc_unmap_count;
653d6764bbdSChuck Lever return true;
654655fec69SChuck Lever
655655fec69SChuck Lever out_mapping_err:
656d6764bbdSChuck Lever trace_xprtrdma_dma_maperr(sge->addr);
657655fec69SChuck Lever return false;
658655fec69SChuck Lever }
659655fec69SChuck Lever
660614f3c96SChuck Lever /* Copy the tail to the end of the head buffer.
661614f3c96SChuck Lever */
rpcrdma_pullup_tail_iov(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct xdr_buf * xdr)662614f3c96SChuck Lever static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt,
663614f3c96SChuck Lever struct rpcrdma_req *req,
664614f3c96SChuck Lever struct xdr_buf *xdr)
665614f3c96SChuck Lever {
666614f3c96SChuck Lever unsigned char *dst;
667614f3c96SChuck Lever
668614f3c96SChuck Lever dst = (unsigned char *)xdr->head[0].iov_base;
669614f3c96SChuck Lever dst += xdr->head[0].iov_len + xdr->page_len;
670614f3c96SChuck Lever memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len);
671614f3c96SChuck Lever r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len;
672614f3c96SChuck Lever }
673614f3c96SChuck Lever
674614f3c96SChuck Lever /* Copy pagelist content into the head buffer.
675614f3c96SChuck Lever */
rpcrdma_pullup_pagelist(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct xdr_buf * xdr)676614f3c96SChuck Lever static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt,
677614f3c96SChuck Lever struct rpcrdma_req *req,
678614f3c96SChuck Lever struct xdr_buf *xdr)
679614f3c96SChuck Lever {
680614f3c96SChuck Lever unsigned int len, page_base, remaining;
681614f3c96SChuck Lever struct page **ppages;
682614f3c96SChuck Lever unsigned char *src, *dst;
683614f3c96SChuck Lever
684614f3c96SChuck Lever dst = (unsigned char *)xdr->head[0].iov_base;
685614f3c96SChuck Lever dst += xdr->head[0].iov_len;
686614f3c96SChuck Lever ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
687614f3c96SChuck Lever page_base = offset_in_page(xdr->page_base);
688614f3c96SChuck Lever remaining = xdr->page_len;
689614f3c96SChuck Lever while (remaining) {
690614f3c96SChuck Lever src = page_address(*ppages);
691614f3c96SChuck Lever src += page_base;
692614f3c96SChuck Lever len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
693614f3c96SChuck Lever memcpy(dst, src, len);
694614f3c96SChuck Lever r_xprt->rx_stats.pullup_copy_count += len;
695614f3c96SChuck Lever
696614f3c96SChuck Lever ppages++;
697614f3c96SChuck Lever dst += len;
698614f3c96SChuck Lever remaining -= len;
699614f3c96SChuck Lever page_base = 0;
700614f3c96SChuck Lever }
701614f3c96SChuck Lever }
702614f3c96SChuck Lever
703614f3c96SChuck Lever /* Copy the contents of @xdr into @rl_sendbuf and DMA sync it.
704614f3c96SChuck Lever * When the head, pagelist, and tail are small, a pull-up copy
705614f3c96SChuck Lever * is considerably less costly than DMA mapping the components
706614f3c96SChuck Lever * of @xdr.
707614f3c96SChuck Lever *
708614f3c96SChuck Lever * Assumptions:
709614f3c96SChuck Lever * - the caller has already verified that the total length
710614f3c96SChuck Lever * of the RPC Call body will fit into @rl_sendbuf.
711614f3c96SChuck Lever */
rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct xdr_buf * xdr)712614f3c96SChuck Lever static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt,
713614f3c96SChuck Lever struct rpcrdma_req *req,
714614f3c96SChuck Lever struct xdr_buf *xdr)
715614f3c96SChuck Lever {
716614f3c96SChuck Lever if (unlikely(xdr->tail[0].iov_len))
717614f3c96SChuck Lever rpcrdma_pullup_tail_iov(r_xprt, req, xdr);
718614f3c96SChuck Lever
719614f3c96SChuck Lever if (unlikely(xdr->page_len))
720614f3c96SChuck Lever rpcrdma_pullup_pagelist(r_xprt, req, xdr);
721614f3c96SChuck Lever
722614f3c96SChuck Lever /* The whole RPC message resides in the head iovec now */
723614f3c96SChuck Lever return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len);
724614f3c96SChuck Lever }
725614f3c96SChuck Lever
rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct xdr_buf * xdr)726d6764bbdSChuck Lever static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt,
727d6764bbdSChuck Lever struct rpcrdma_req *req,
728d6764bbdSChuck Lever struct xdr_buf *xdr)
729d6764bbdSChuck Lever {
730d6764bbdSChuck Lever struct kvec *tail = &xdr->tail[0];
731d6764bbdSChuck Lever
732d6764bbdSChuck Lever if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
733d6764bbdSChuck Lever return false;
734d6764bbdSChuck Lever if (xdr->page_len)
735d6764bbdSChuck Lever if (!rpcrdma_prepare_pagelist(req, xdr))
736d6764bbdSChuck Lever return false;
737d6764bbdSChuck Lever if (tail->iov_len)
738d6764bbdSChuck Lever if (!rpcrdma_prepare_tail_iov(req, xdr,
739d6764bbdSChuck Lever offset_in_page(tail->iov_base),
740d6764bbdSChuck Lever tail->iov_len))
741d6764bbdSChuck Lever return false;
742d6764bbdSChuck Lever
743d6764bbdSChuck Lever if (req->rl_sendctx->sc_unmap_count)
744d6764bbdSChuck Lever kref_get(&req->rl_kref);
745d6764bbdSChuck Lever return true;
746d6764bbdSChuck Lever }
747d6764bbdSChuck Lever
rpcrdma_prepare_readch(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct xdr_buf * xdr)748d6764bbdSChuck Lever static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
749d6764bbdSChuck Lever struct rpcrdma_req *req,
750d6764bbdSChuck Lever struct xdr_buf *xdr)
751d6764bbdSChuck Lever {
752d6764bbdSChuck Lever if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
753d6764bbdSChuck Lever return false;
754d6764bbdSChuck Lever
755ae605ee9SChuck Lever /* If there is a Read chunk, the page list is being handled
756d6764bbdSChuck Lever * via explicit RDMA, and thus is skipped here.
757d6764bbdSChuck Lever */
758d6764bbdSChuck Lever
759ae605ee9SChuck Lever /* Do not include the tail if it is only an XDR pad */
760ae605ee9SChuck Lever if (xdr->tail[0].iov_len > 3) {
761ae605ee9SChuck Lever unsigned int page_base, len;
762ae605ee9SChuck Lever
763ae605ee9SChuck Lever /* If the content in the page list is an odd length,
764ae605ee9SChuck Lever * xdr_write_pages() adds a pad at the beginning of
765ae605ee9SChuck Lever * the tail iovec. Force the tail's non-pad content to
766ae605ee9SChuck Lever * land at the next XDR position in the Send message.
767ae605ee9SChuck Lever */
768ae605ee9SChuck Lever page_base = offset_in_page(xdr->tail[0].iov_base);
769ae605ee9SChuck Lever len = xdr->tail[0].iov_len;
770ae605ee9SChuck Lever page_base += len & 3;
771ae605ee9SChuck Lever len -= len & 3;
772ae605ee9SChuck Lever if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
773d6764bbdSChuck Lever return false;
774d6764bbdSChuck Lever kref_get(&req->rl_kref);
775d6764bbdSChuck Lever }
776d6764bbdSChuck Lever
777d6764bbdSChuck Lever return true;
778d6764bbdSChuck Lever }
779d6764bbdSChuck Lever
780857f9acaSChuck Lever /**
781857f9acaSChuck Lever * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
782857f9acaSChuck Lever * @r_xprt: controlling transport
783857f9acaSChuck Lever * @req: context of RPC Call being marshalled
784857f9acaSChuck Lever * @hdrlen: size of transport header, in bytes
785857f9acaSChuck Lever * @xdr: xdr_buf containing RPC Call
786857f9acaSChuck Lever * @rtype: chunk type being encoded
787857f9acaSChuck Lever *
788857f9acaSChuck Lever * Returns 0 on success; otherwise a negative errno is returned.
789857f9acaSChuck Lever */
rpcrdma_prepare_send_sges(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,u32 hdrlen,struct xdr_buf * xdr,enum rpcrdma_chunktype rtype)790d6764bbdSChuck Lever inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
791857f9acaSChuck Lever struct rpcrdma_req *req, u32 hdrlen,
792d6764bbdSChuck Lever struct xdr_buf *xdr,
793d6764bbdSChuck Lever enum rpcrdma_chunktype rtype)
794655fec69SChuck Lever {
79505eb06d8SChuck Lever int ret;
79605eb06d8SChuck Lever
79705eb06d8SChuck Lever ret = -EAGAIN;
798dbcc53a5SChuck Lever req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
799ae72950aSChuck Lever if (!req->rl_sendctx)
800d6764bbdSChuck Lever goto out_nosc;
801ae72950aSChuck Lever req->rl_sendctx->sc_unmap_count = 0;
80201bb35c8SChuck Lever req->rl_sendctx->sc_req = req;
8030ab11523SChuck Lever kref_init(&req->rl_kref);
804dc15c3d5SChuck Lever req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe;
805dc15c3d5SChuck Lever req->rl_wr.sg_list = req->rl_sendctx->sc_sges;
806dc15c3d5SChuck Lever req->rl_wr.num_sge = 0;
807dc15c3d5SChuck Lever req->rl_wr.opcode = IB_WR_SEND;
808655fec69SChuck Lever
809b78de1dcSChuck Lever rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen);
810d6764bbdSChuck Lever
811b78de1dcSChuck Lever ret = -EIO;
812d6764bbdSChuck Lever switch (rtype) {
813614f3c96SChuck Lever case rpcrdma_noch_pullup:
814614f3c96SChuck Lever if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr))
815614f3c96SChuck Lever goto out_unmap;
816614f3c96SChuck Lever break;
817614f3c96SChuck Lever case rpcrdma_noch_mapped:
818d6764bbdSChuck Lever if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr))
819d6764bbdSChuck Lever goto out_unmap;
820d6764bbdSChuck Lever break;
821d6764bbdSChuck Lever case rpcrdma_readch:
822d6764bbdSChuck Lever if (!rpcrdma_prepare_readch(r_xprt, req, xdr))
823d6764bbdSChuck Lever goto out_unmap;
824d6764bbdSChuck Lever break;
825d6764bbdSChuck Lever case rpcrdma_areadch:
826d6764bbdSChuck Lever break;
827d6764bbdSChuck Lever default:
828d6764bbdSChuck Lever goto out_unmap;
829d6764bbdSChuck Lever }
830d6764bbdSChuck Lever
831857f9acaSChuck Lever return 0;
83205eb06d8SChuck Lever
833d6764bbdSChuck Lever out_unmap:
834d6764bbdSChuck Lever rpcrdma_sendctx_unmap(req->rl_sendctx);
835d6764bbdSChuck Lever out_nosc:
83605eb06d8SChuck Lever trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
83705eb06d8SChuck Lever return ret;
838655fec69SChuck Lever }
839655fec69SChuck Lever
84009e60641SChuck Lever /**
84109e60641SChuck Lever * rpcrdma_marshal_req - Marshal and send one RPC request
84209e60641SChuck Lever * @r_xprt: controlling transport
84309e60641SChuck Lever * @rqst: RPC request to be marshaled
844e9601828S\"Talpey, Thomas\ *
84509e60641SChuck Lever * For the RPC in "rqst", this function:
84609e60641SChuck Lever * - Chooses the transfer mode (eg., RDMA_MSG or RDMA_NOMSG)
84709e60641SChuck Lever * - Registers Read, Write, and Reply chunks
84809e60641SChuck Lever * - Constructs the transport header
84909e60641SChuck Lever * - Posts a Send WR to send the transport header and request
85009e60641SChuck Lever *
85109e60641SChuck Lever * Returns:
85209e60641SChuck Lever * %0 if the RPC was sent successfully,
85309e60641SChuck Lever * %-ENOTCONN if the connection was lost,
8549e679d5eSChuck Lever * %-EAGAIN if the caller should call again with the same arguments,
8559e679d5eSChuck Lever * %-ENOBUFS if the caller should call again after a delay,
8567a80f3f0SChuck Lever * %-EMSGSIZE if the transport header is too small,
85709e60641SChuck Lever * %-EIO if a permanent problem occurred while marshaling.
858e9601828S\"Talpey, Thomas\ */
859e9601828S\"Talpey, Thomas\ int
rpcrdma_marshal_req(struct rpcrdma_xprt * r_xprt,struct rpc_rqst * rqst)86009e60641SChuck Lever rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
861e9601828S\"Talpey, Thomas\ {
862e9601828S\"Talpey, Thomas\ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
8637a80f3f0SChuck Lever struct xdr_stream *xdr = &req->rl_stream;
864e2377945SChuck Lever enum rpcrdma_chunktype rtype, wtype;
865614f3c96SChuck Lever struct xdr_buf *buf = &rqst->rq_snd_buf;
86665b80179SChuck Lever bool ddp_allowed;
8677a80f3f0SChuck Lever __be32 *p;
86839f4cd9eSChuck Lever int ret;
869e9601828S\"Talpey, Thomas\
87015261b91SChuck Lever if (unlikely(rqst->rq_rcv_buf.flags & XDRBUF_SPARSE_PAGES)) {
87115261b91SChuck Lever ret = rpcrdma_alloc_sparse_pages(&rqst->rq_rcv_buf);
87215261b91SChuck Lever if (ret)
87315261b91SChuck Lever return ret;
87415261b91SChuck Lever }
87515261b91SChuck Lever
8767a80f3f0SChuck Lever rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
8778cec3dbaSChuck Lever xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
8788cec3dbaSChuck Lever rqst);
8797a80f3f0SChuck Lever
8807a80f3f0SChuck Lever /* Fixed header fields */
88139f4cd9eSChuck Lever ret = -EMSGSIZE;
8827a80f3f0SChuck Lever p = xdr_reserve_space(xdr, 4 * sizeof(*p));
8837a80f3f0SChuck Lever if (!p)
8847a80f3f0SChuck Lever goto out_err;
8857a80f3f0SChuck Lever *p++ = rqst->rq_xid;
8867a80f3f0SChuck Lever *p++ = rpcrdma_version;
8877581d901SChuck Lever *p++ = r_xprt->rx_buf.rb_max_requests;
888e9601828S\"Talpey, Thomas\
88965b80179SChuck Lever /* When the ULP employs a GSS flavor that guarantees integrity
89065b80179SChuck Lever * or privacy, direct data placement of individual data items
89165b80179SChuck Lever * is not allowed.
89265b80179SChuck Lever */
89353bc19f1SChuck Lever ddp_allowed = !test_bit(RPCAUTH_AUTH_DATATOUCH,
89453bc19f1SChuck Lever &rqst->rq_cred->cr_auth->au_flags);
89565b80179SChuck Lever
896e9601828S\"Talpey, Thomas\ /*
897e9601828S\"Talpey, Thomas\ * Chunks needed for results?
898e9601828S\"Talpey, Thomas\ *
899e9601828S\"Talpey, Thomas\ * o If the expected result is under the inline threshold, all ops
90033943b29SChuck Lever * return as inline.
901cce6deebSChuck Lever * o Large read ops return data as write chunk(s), header as
902cce6deebSChuck Lever * inline.
903e9601828S\"Talpey, Thomas\ * o Large non-read ops return as a single reply chunk.
904e9601828S\"Talpey, Thomas\ */
905cce6deebSChuck Lever if (rpcrdma_results_inline(r_xprt, rqst))
90602eb57d8SChuck Lever wtype = rpcrdma_noch;
907d4550bbeSChuck Lever else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
908d4550bbeSChuck Lever rpcrdma_nonpayload_inline(r_xprt, rqst))
909cce6deebSChuck Lever wtype = rpcrdma_writech;
910e9601828S\"Talpey, Thomas\ else
911e2377945SChuck Lever wtype = rpcrdma_replych;
912e9601828S\"Talpey, Thomas\
913e9601828S\"Talpey, Thomas\ /*
914e9601828S\"Talpey, Thomas\ * Chunks needed for arguments?
915e9601828S\"Talpey, Thomas\ *
916e9601828S\"Talpey, Thomas\ * o If the total request is under the inline threshold, all ops
917e9601828S\"Talpey, Thomas\ * are sent as inline.
918e9601828S\"Talpey, Thomas\ * o Large write ops transmit data as read chunk(s), header as
919e9601828S\"Talpey, Thomas\ * inline.
9202fcc213aSChuck Lever * o Large non-write ops are sent with the entire message as a
9212fcc213aSChuck Lever * single read chunk (protocol 0-position special case).
922e9601828S\"Talpey, Thomas\ *
9232fcc213aSChuck Lever * This assumes that the upper layer does not present a request
9242fcc213aSChuck Lever * that both has a data payload, and whose non-data arguments
9252fcc213aSChuck Lever * by themselves are larger than the inline threshold.
926e9601828S\"Talpey, Thomas\ */
927302d3debSChuck Lever if (rpcrdma_args_inline(r_xprt, rqst)) {
9287a80f3f0SChuck Lever *p++ = rdma_msg;
929614f3c96SChuck Lever rtype = buf->len < rdmab_length(req->rl_sendbuf) ?
930614f3c96SChuck Lever rpcrdma_noch_pullup : rpcrdma_noch_mapped;
931614f3c96SChuck Lever } else if (ddp_allowed && buf->flags & XDRBUF_WRITE) {
9327a80f3f0SChuck Lever *p++ = rdma_msg;
933e2377945SChuck Lever rtype = rpcrdma_readch;
9342fcc213aSChuck Lever } else {
935860477d1SChuck Lever r_xprt->rx_stats.nomsg_call_count++;
9367a80f3f0SChuck Lever *p++ = rdma_nomsg;
9372fcc213aSChuck Lever rtype = rpcrdma_areadch;
9382fcc213aSChuck Lever }
939e9601828S\"Talpey, Thomas\
94094f58c58SChuck Lever /* This implementation supports the following combinations
94194f58c58SChuck Lever * of chunk lists in one RPC-over-RDMA Call message:
94294f58c58SChuck Lever *
94394f58c58SChuck Lever * - Read list
94494f58c58SChuck Lever * - Write list
94594f58c58SChuck Lever * - Reply chunk
94694f58c58SChuck Lever * - Read list + Reply chunk
94794f58c58SChuck Lever *
94894f58c58SChuck Lever * It might not yet support the following combinations:
94994f58c58SChuck Lever *
95094f58c58SChuck Lever * - Read list + Write list
95194f58c58SChuck Lever *
95294f58c58SChuck Lever * It does not support the following combinations:
95394f58c58SChuck Lever *
95494f58c58SChuck Lever * - Write list + Reply chunk
95594f58c58SChuck Lever * - Read list + Write list + Reply chunk
95694f58c58SChuck Lever *
95794f58c58SChuck Lever * This implementation supports only a single chunk in each
95894f58c58SChuck Lever * Read or Write list. Thus for example the client cannot
95994f58c58SChuck Lever * send a Call message with a Position Zero Read chunk and a
96094f58c58SChuck Lever * regular Read chunk at the same time.
961e9601828S\"Talpey, Thomas\ */
96239f4cd9eSChuck Lever ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
96339f4cd9eSChuck Lever if (ret)
96418c0fb31SChuck Lever goto out_err;
96539f4cd9eSChuck Lever ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
96639f4cd9eSChuck Lever if (ret)
96739f4cd9eSChuck Lever goto out_err;
96839f4cd9eSChuck Lever ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
96939f4cd9eSChuck Lever if (ret)
97039f4cd9eSChuck Lever goto out_err;
97139f4cd9eSChuck Lever
9721310051cSChuck Lever ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
973614f3c96SChuck Lever buf, rtype);
974857f9acaSChuck Lever if (ret)
97518c0fb31SChuck Lever goto out_err;
9761310051cSChuck Lever
9771310051cSChuck Lever trace_xprtrdma_marshal(req, rtype, wtype);
978655fec69SChuck Lever return 0;
97994f58c58SChuck Lever
98018c0fb31SChuck Lever out_err:
98117e4c443SChuck Lever trace_xprtrdma_marshal_failed(rqst, ret);
98218c0fb31SChuck Lever r_xprt->rx_stats.failed_marshal_count++;
98340088f0eSChuck Lever frwr_reset(req);
98439f4cd9eSChuck Lever return ret;
985e9601828S\"Talpey, Thomas\ }
986e9601828S\"Talpey, Thomas\
__rpcrdma_update_cwnd_locked(struct rpc_xprt * xprt,struct rpcrdma_buffer * buf,u32 grant)987eea63ca7SChuck Lever static void __rpcrdma_update_cwnd_locked(struct rpc_xprt *xprt,
988eea63ca7SChuck Lever struct rpcrdma_buffer *buf,
989eea63ca7SChuck Lever u32 grant)
990eea63ca7SChuck Lever {
991eea63ca7SChuck Lever buf->rb_credits = grant;
992eea63ca7SChuck Lever xprt->cwnd = grant << RPC_CWNDSHIFT;
993eea63ca7SChuck Lever }
994eea63ca7SChuck Lever
rpcrdma_update_cwnd(struct rpcrdma_xprt * r_xprt,u32 grant)995eea63ca7SChuck Lever static void rpcrdma_update_cwnd(struct rpcrdma_xprt *r_xprt, u32 grant)
996eea63ca7SChuck Lever {
997eea63ca7SChuck Lever struct rpc_xprt *xprt = &r_xprt->rx_xprt;
998eea63ca7SChuck Lever
999eea63ca7SChuck Lever spin_lock(&xprt->transport_lock);
1000eea63ca7SChuck Lever __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, grant);
1001eea63ca7SChuck Lever spin_unlock(&xprt->transport_lock);
1002eea63ca7SChuck Lever }
1003eea63ca7SChuck Lever
1004eea63ca7SChuck Lever /**
1005eea63ca7SChuck Lever * rpcrdma_reset_cwnd - Reset the xprt's congestion window
1006eea63ca7SChuck Lever * @r_xprt: controlling transport instance
1007eea63ca7SChuck Lever *
1008eea63ca7SChuck Lever * Prepare @r_xprt for the next connection by reinitializing
1009eea63ca7SChuck Lever * its credit grant to one (see RFC 8166, Section 3.3.3).
1010eea63ca7SChuck Lever */
rpcrdma_reset_cwnd(struct rpcrdma_xprt * r_xprt)1011eea63ca7SChuck Lever void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt)
1012eea63ca7SChuck Lever {
1013eea63ca7SChuck Lever struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1014eea63ca7SChuck Lever
1015eea63ca7SChuck Lever spin_lock(&xprt->transport_lock);
1016eea63ca7SChuck Lever xprt->cong = 0;
1017eea63ca7SChuck Lever __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, 1);
1018eea63ca7SChuck Lever spin_unlock(&xprt->transport_lock);
1019eea63ca7SChuck Lever }
1020eea63ca7SChuck Lever
1021cb0ae1fbSChuck Lever /**
1022cb0ae1fbSChuck Lever * rpcrdma_inline_fixup - Scatter inline received data into rqst's iovecs
1023cb0ae1fbSChuck Lever * @rqst: controlling RPC request
1024cb0ae1fbSChuck Lever * @srcp: points to RPC message payload in receive buffer
1025cb0ae1fbSChuck Lever * @copy_len: remaining length of receive buffer content
1026cb0ae1fbSChuck Lever * @pad: Write chunk pad bytes needed (zero for pure inline)
1027cb0ae1fbSChuck Lever *
1028cb0ae1fbSChuck Lever * The upper layer has set the maximum number of bytes it can
1029cb0ae1fbSChuck Lever * receive in each component of rq_rcv_buf. These values are set in
1030cb0ae1fbSChuck Lever * the head.iov_len, page_len, tail.iov_len, and buflen fields.
1031cfabe2c6SChuck Lever *
1032cfabe2c6SChuck Lever * Unlike the TCP equivalent (xdr_partial_copy_from_skb), in
1033cfabe2c6SChuck Lever * many cases this function simply updates iov_base pointers in
1034cfabe2c6SChuck Lever * rq_rcv_buf to point directly to the received reply data, to
1035cfabe2c6SChuck Lever * avoid copying reply data.
103664695bdeSChuck Lever *
103764695bdeSChuck Lever * Returns the count of bytes which had to be memcopied.
1038e9601828S\"Talpey, Thomas\ */
103964695bdeSChuck Lever static unsigned long
rpcrdma_inline_fixup(struct rpc_rqst * rqst,char * srcp,int copy_len,int pad)10409191ca3bSTom Talpey rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
1041e9601828S\"Talpey, Thomas\ {
104264695bdeSChuck Lever unsigned long fixup_copy_count;
104364695bdeSChuck Lever int i, npages, curlen;
1044e9601828S\"Talpey, Thomas\ char *destp;
1045bd7ea31bSTom Tucker struct page **ppages;
1046bd7ea31bSTom Tucker int page_base;
1047e9601828S\"Talpey, Thomas\
1048cb0ae1fbSChuck Lever /* The head iovec is redirected to the RPC reply message
1049cb0ae1fbSChuck Lever * in the receive buffer, to avoid a memcopy.
1050cb0ae1fbSChuck Lever */
1051cb0ae1fbSChuck Lever rqst->rq_rcv_buf.head[0].iov_base = srcp;
1052cfabe2c6SChuck Lever rqst->rq_private_buf.head[0].iov_base = srcp;
1053e9601828S\"Talpey, Thomas\
1054cb0ae1fbSChuck Lever /* The contents of the receive buffer that follow
1055cb0ae1fbSChuck Lever * head.iov_len bytes are copied into the page list.
1056cb0ae1fbSChuck Lever */
1057cb0ae1fbSChuck Lever curlen = rqst->rq_rcv_buf.head[0].iov_len;
1058cb0ae1fbSChuck Lever if (curlen > copy_len)
1059cb0ae1fbSChuck Lever curlen = copy_len;
1060e9601828S\"Talpey, Thomas\ srcp += curlen;
1061e9601828S\"Talpey, Thomas\ copy_len -= curlen;
1062e9601828S\"Talpey, Thomas\
1063d933cc32SChuck Lever ppages = rqst->rq_rcv_buf.pages +
1064d933cc32SChuck Lever (rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
1065d933cc32SChuck Lever page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
106664695bdeSChuck Lever fixup_copy_count = 0;
1067e9601828S\"Talpey, Thomas\ if (copy_len && rqst->rq_rcv_buf.page_len) {
106880414abcSChuck Lever int pagelist_len;
106980414abcSChuck Lever
107080414abcSChuck Lever pagelist_len = rqst->rq_rcv_buf.page_len;
107180414abcSChuck Lever if (pagelist_len > copy_len)
107280414abcSChuck Lever pagelist_len = copy_len;
107380414abcSChuck Lever npages = PAGE_ALIGN(page_base + pagelist_len) >> PAGE_SHIFT;
107464695bdeSChuck Lever for (i = 0; i < npages; i++) {
1075bd7ea31bSTom Tucker curlen = PAGE_SIZE - page_base;
107680414abcSChuck Lever if (curlen > pagelist_len)
107780414abcSChuck Lever curlen = pagelist_len;
107880414abcSChuck Lever
1079b8541786SCong Wang destp = kmap_atomic(ppages[i]);
1080bd7ea31bSTom Tucker memcpy(destp + page_base, srcp, curlen);
1081bd7ea31bSTom Tucker flush_dcache_page(ppages[i]);
1082b8541786SCong Wang kunmap_atomic(destp);
1083e9601828S\"Talpey, Thomas\ srcp += curlen;
1084e9601828S\"Talpey, Thomas\ copy_len -= curlen;
108564695bdeSChuck Lever fixup_copy_count += curlen;
108680414abcSChuck Lever pagelist_len -= curlen;
108780414abcSChuck Lever if (!pagelist_len)
1088e9601828S\"Talpey, Thomas\ break;
1089bd7ea31bSTom Tucker page_base = 0;
1090e9601828S\"Talpey, Thomas\ }
1091cb0ae1fbSChuck Lever
1092cb0ae1fbSChuck Lever /* Implicit padding for the last segment in a Write
1093cb0ae1fbSChuck Lever * chunk is inserted inline at the front of the tail
1094cb0ae1fbSChuck Lever * iovec. The upper layer ignores the content of
1095cb0ae1fbSChuck Lever * the pad. Simply ensure inline content in the tail
1096cb0ae1fbSChuck Lever * that follows the Write chunk is properly aligned.
1097cb0ae1fbSChuck Lever */
1098cb0ae1fbSChuck Lever if (pad)
1099cb0ae1fbSChuck Lever srcp -= pad;
11002b7bbc96SChuck Lever }
1101e9601828S\"Talpey, Thomas\
1102cb0ae1fbSChuck Lever /* The tail iovec is redirected to the remaining data
1103cb0ae1fbSChuck Lever * in the receive buffer, to avoid a memcopy.
1104cb0ae1fbSChuck Lever */
1105cfabe2c6SChuck Lever if (copy_len || pad) {
1106cb0ae1fbSChuck Lever rqst->rq_rcv_buf.tail[0].iov_base = srcp;
1107cfabe2c6SChuck Lever rqst->rq_private_buf.tail[0].iov_base = srcp;
1108cfabe2c6SChuck Lever }
11099191ca3bSTom Talpey
1110d4957f01SChuck Lever if (fixup_copy_count)
1111d4957f01SChuck Lever trace_xprtrdma_fixup(rqst, fixup_copy_count);
111264695bdeSChuck Lever return fixup_copy_count;
1113e9601828S\"Talpey, Thomas\ }
1114e9601828S\"Talpey, Thomas\
111563cae470SChuck Lever /* By convention, backchannel calls arrive via rdma_msg type
111663cae470SChuck Lever * messages, and never populate the chunk lists. This makes
111763cae470SChuck Lever * the RPC/RDMA header small and fixed in size, so it is
111863cae470SChuck Lever * straightforward to check the RPC header's direction field.
111963cae470SChuck Lever */
112063cae470SChuck Lever static bool
rpcrdma_is_bcall(struct rpcrdma_xprt * r_xprt,struct rpcrdma_rep * rep)11215381e0ecSChuck Lever rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
112241c8f70fSChuck Lever #if defined(CONFIG_SUNRPC_BACKCHANNEL)
112363cae470SChuck Lever {
1124*11270e7cSKinglong Mee struct rpc_xprt *xprt = &r_xprt->rx_xprt;
112541c8f70fSChuck Lever struct xdr_stream *xdr = &rep->rr_stream;
112641c8f70fSChuck Lever __be32 *p;
112763cae470SChuck Lever
11285381e0ecSChuck Lever if (rep->rr_proc != rdma_msg)
112963cae470SChuck Lever return false;
113063cae470SChuck Lever
113141c8f70fSChuck Lever /* Peek at stream contents without advancing. */
113241c8f70fSChuck Lever p = xdr_inline_decode(xdr, 0);
113341c8f70fSChuck Lever
113441c8f70fSChuck Lever /* Chunk lists */
113507e9a632SChuck Lever if (xdr_item_is_present(p++))
113663cae470SChuck Lever return false;
113707e9a632SChuck Lever if (xdr_item_is_present(p++))
113841c8f70fSChuck Lever return false;
113907e9a632SChuck Lever if (xdr_item_is_present(p++))
114063cae470SChuck Lever return false;
114163cae470SChuck Lever
114241c8f70fSChuck Lever /* RPC header */
11435381e0ecSChuck Lever if (*p++ != rep->rr_xid)
114441c8f70fSChuck Lever return false;
114541c8f70fSChuck Lever if (*p != cpu_to_be32(RPC_CALL))
114641c8f70fSChuck Lever return false;
114741c8f70fSChuck Lever
1148*11270e7cSKinglong Mee /* No bc service. */
1149*11270e7cSKinglong Mee if (xprt->bc_serv == NULL)
1150*11270e7cSKinglong Mee return false;
1151*11270e7cSKinglong Mee
115241c8f70fSChuck Lever /* Now that we are sure this is a backchannel call,
115341c8f70fSChuck Lever * advance to the RPC header.
115441c8f70fSChuck Lever */
115541c8f70fSChuck Lever p = xdr_inline_decode(xdr, 3 * sizeof(*p));
115641c8f70fSChuck Lever if (unlikely(!p))
115763cae470SChuck Lever return true;
115841c8f70fSChuck Lever
115984dff5ebSChuck Lever rpcrdma_bc_receive_call(r_xprt, rep);
116063cae470SChuck Lever return true;
116163cae470SChuck Lever }
116241c8f70fSChuck Lever #else /* CONFIG_SUNRPC_BACKCHANNEL */
116341c8f70fSChuck Lever {
116441c8f70fSChuck Lever return false;
116563cae470SChuck Lever }
116663cae470SChuck Lever #endif /* CONFIG_SUNRPC_BACKCHANNEL */
116763cae470SChuck Lever
decode_rdma_segment(struct xdr_stream * xdr,u32 * length)1168264b0cdbSChuck Lever static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
1169264b0cdbSChuck Lever {
1170e11b7c96SChuck Lever u32 handle;
1171e11b7c96SChuck Lever u64 offset;
1172264b0cdbSChuck Lever __be32 *p;
1173264b0cdbSChuck Lever
1174264b0cdbSChuck Lever p = xdr_inline_decode(xdr, 4 * sizeof(*p));
1175264b0cdbSChuck Lever if (unlikely(!p))
1176264b0cdbSChuck Lever return -EIO;
1177264b0cdbSChuck Lever
1178f60a0869SChuck Lever xdr_decode_rdma_segment(p, &handle, length, &offset);
1179e11b7c96SChuck Lever trace_xprtrdma_decode_seg(handle, *length, offset);
1180264b0cdbSChuck Lever return 0;
1181264b0cdbSChuck Lever }
1182264b0cdbSChuck Lever
decode_write_chunk(struct xdr_stream * xdr,u32 * length)1183264b0cdbSChuck Lever static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
1184264b0cdbSChuck Lever {
1185264b0cdbSChuck Lever u32 segcount, seglength;
1186264b0cdbSChuck Lever __be32 *p;
1187264b0cdbSChuck Lever
1188264b0cdbSChuck Lever p = xdr_inline_decode(xdr, sizeof(*p));
1189264b0cdbSChuck Lever if (unlikely(!p))
1190264b0cdbSChuck Lever return -EIO;
1191264b0cdbSChuck Lever
1192264b0cdbSChuck Lever *length = 0;
1193264b0cdbSChuck Lever segcount = be32_to_cpup(p);
1194264b0cdbSChuck Lever while (segcount--) {
1195264b0cdbSChuck Lever if (decode_rdma_segment(xdr, &seglength))
1196264b0cdbSChuck Lever return -EIO;
1197264b0cdbSChuck Lever *length += seglength;
1198264b0cdbSChuck Lever }
1199264b0cdbSChuck Lever
1200264b0cdbSChuck Lever return 0;
1201264b0cdbSChuck Lever }
1202264b0cdbSChuck Lever
1203264b0cdbSChuck Lever /* In RPC-over-RDMA Version One replies, a Read list is never
1204264b0cdbSChuck Lever * expected. This decoder is a stub that returns an error if
1205264b0cdbSChuck Lever * a Read list is present.
1206264b0cdbSChuck Lever */
decode_read_list(struct xdr_stream * xdr)1207264b0cdbSChuck Lever static int decode_read_list(struct xdr_stream *xdr)
1208264b0cdbSChuck Lever {
1209264b0cdbSChuck Lever __be32 *p;
1210264b0cdbSChuck Lever
1211264b0cdbSChuck Lever p = xdr_inline_decode(xdr, sizeof(*p));
1212264b0cdbSChuck Lever if (unlikely(!p))
1213264b0cdbSChuck Lever return -EIO;
121407e9a632SChuck Lever if (unlikely(xdr_item_is_present(p)))
1215264b0cdbSChuck Lever return -EIO;
1216264b0cdbSChuck Lever return 0;
1217264b0cdbSChuck Lever }
1218264b0cdbSChuck Lever
1219264b0cdbSChuck Lever /* Supports only one Write chunk in the Write list
1220264b0cdbSChuck Lever */
decode_write_list(struct xdr_stream * xdr,u32 * length)1221264b0cdbSChuck Lever static int decode_write_list(struct xdr_stream *xdr, u32 *length)
1222264b0cdbSChuck Lever {
1223264b0cdbSChuck Lever u32 chunklen;
1224264b0cdbSChuck Lever bool first;
1225264b0cdbSChuck Lever __be32 *p;
1226264b0cdbSChuck Lever
1227264b0cdbSChuck Lever *length = 0;
1228264b0cdbSChuck Lever first = true;
1229264b0cdbSChuck Lever do {
1230264b0cdbSChuck Lever p = xdr_inline_decode(xdr, sizeof(*p));
1231264b0cdbSChuck Lever if (unlikely(!p))
1232264b0cdbSChuck Lever return -EIO;
123307e9a632SChuck Lever if (xdr_item_is_absent(p))
1234264b0cdbSChuck Lever break;
1235264b0cdbSChuck Lever if (!first)
1236264b0cdbSChuck Lever return -EIO;
1237264b0cdbSChuck Lever
1238264b0cdbSChuck Lever if (decode_write_chunk(xdr, &chunklen))
1239264b0cdbSChuck Lever return -EIO;
1240264b0cdbSChuck Lever *length += chunklen;
1241264b0cdbSChuck Lever first = false;
1242264b0cdbSChuck Lever } while (true);
1243264b0cdbSChuck Lever return 0;
1244264b0cdbSChuck Lever }
1245264b0cdbSChuck Lever
decode_reply_chunk(struct xdr_stream * xdr,u32 * length)1246264b0cdbSChuck Lever static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
1247264b0cdbSChuck Lever {
1248264b0cdbSChuck Lever __be32 *p;
1249264b0cdbSChuck Lever
1250264b0cdbSChuck Lever p = xdr_inline_decode(xdr, sizeof(*p));
1251264b0cdbSChuck Lever if (unlikely(!p))
1252264b0cdbSChuck Lever return -EIO;
1253264b0cdbSChuck Lever
1254264b0cdbSChuck Lever *length = 0;
125507e9a632SChuck Lever if (xdr_item_is_present(p))
1256264b0cdbSChuck Lever if (decode_write_chunk(xdr, length))
1257264b0cdbSChuck Lever return -EIO;
1258264b0cdbSChuck Lever return 0;
1259264b0cdbSChuck Lever }
1260264b0cdbSChuck Lever
126107ff2dd5SChuck Lever static int
rpcrdma_decode_msg(struct rpcrdma_xprt * r_xprt,struct rpcrdma_rep * rep,struct rpc_rqst * rqst)126207ff2dd5SChuck Lever rpcrdma_decode_msg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
126307ff2dd5SChuck Lever struct rpc_rqst *rqst)
126407ff2dd5SChuck Lever {
126507ff2dd5SChuck Lever struct xdr_stream *xdr = &rep->rr_stream;
1266264b0cdbSChuck Lever u32 writelist, replychunk, rpclen;
1267264b0cdbSChuck Lever char *base;
126807ff2dd5SChuck Lever
1269264b0cdbSChuck Lever /* Decode the chunk lists */
1270264b0cdbSChuck Lever if (decode_read_list(xdr))
1271264b0cdbSChuck Lever return -EIO;
1272264b0cdbSChuck Lever if (decode_write_list(xdr, &writelist))
1273264b0cdbSChuck Lever return -EIO;
1274264b0cdbSChuck Lever if (decode_reply_chunk(xdr, &replychunk))
127507ff2dd5SChuck Lever return -EIO;
127607ff2dd5SChuck Lever
1277264b0cdbSChuck Lever /* RDMA_MSG sanity checks */
1278264b0cdbSChuck Lever if (unlikely(replychunk))
127907ff2dd5SChuck Lever return -EIO;
128007ff2dd5SChuck Lever
1281264b0cdbSChuck Lever /* Build the RPC reply's Payload stream in rqst->rq_rcv_buf */
1282264b0cdbSChuck Lever base = (char *)xdr_inline_decode(xdr, 0);
1283264b0cdbSChuck Lever rpclen = xdr_stream_remaining(xdr);
128407ff2dd5SChuck Lever r_xprt->rx_stats.fixup_copy_count +=
1285264b0cdbSChuck Lever rpcrdma_inline_fixup(rqst, base, rpclen, writelist & 3);
128607ff2dd5SChuck Lever
1287264b0cdbSChuck Lever r_xprt->rx_stats.total_rdma_reply += writelist;
1288264b0cdbSChuck Lever return rpclen + xdr_align_size(writelist);
128907ff2dd5SChuck Lever }
129007ff2dd5SChuck Lever
129107ff2dd5SChuck Lever static noinline int
rpcrdma_decode_nomsg(struct rpcrdma_xprt * r_xprt,struct rpcrdma_rep * rep)129207ff2dd5SChuck Lever rpcrdma_decode_nomsg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
129307ff2dd5SChuck Lever {
129407ff2dd5SChuck Lever struct xdr_stream *xdr = &rep->rr_stream;
1295264b0cdbSChuck Lever u32 writelist, replychunk;
129607ff2dd5SChuck Lever
1297264b0cdbSChuck Lever /* Decode the chunk lists */
1298264b0cdbSChuck Lever if (decode_read_list(xdr))
1299264b0cdbSChuck Lever return -EIO;
1300264b0cdbSChuck Lever if (decode_write_list(xdr, &writelist))
1301264b0cdbSChuck Lever return -EIO;
1302264b0cdbSChuck Lever if (decode_reply_chunk(xdr, &replychunk))
130307ff2dd5SChuck Lever return -EIO;
130407ff2dd5SChuck Lever
1305264b0cdbSChuck Lever /* RDMA_NOMSG sanity checks */
1306264b0cdbSChuck Lever if (unlikely(writelist))
130707ff2dd5SChuck Lever return -EIO;
1308264b0cdbSChuck Lever if (unlikely(!replychunk))
130907ff2dd5SChuck Lever return -EIO;
131007ff2dd5SChuck Lever
1311264b0cdbSChuck Lever /* Reply chunk buffer already is the reply vector */
1312264b0cdbSChuck Lever r_xprt->rx_stats.total_rdma_reply += replychunk;
1313264b0cdbSChuck Lever return replychunk;
131407ff2dd5SChuck Lever }
131507ff2dd5SChuck Lever
131607ff2dd5SChuck Lever static noinline int
rpcrdma_decode_error(struct rpcrdma_xprt * r_xprt,struct rpcrdma_rep * rep,struct rpc_rqst * rqst)131707ff2dd5SChuck Lever rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
131807ff2dd5SChuck Lever struct rpc_rqst *rqst)
131907ff2dd5SChuck Lever {
132007ff2dd5SChuck Lever struct xdr_stream *xdr = &rep->rr_stream;
132107ff2dd5SChuck Lever __be32 *p;
132207ff2dd5SChuck Lever
132307ff2dd5SChuck Lever p = xdr_inline_decode(xdr, sizeof(*p));
132407ff2dd5SChuck Lever if (unlikely(!p))
132507ff2dd5SChuck Lever return -EIO;
132607ff2dd5SChuck Lever
132707ff2dd5SChuck Lever switch (*p) {
132807ff2dd5SChuck Lever case err_vers:
132907ff2dd5SChuck Lever p = xdr_inline_decode(xdr, 2 * sizeof(*p));
133007ff2dd5SChuck Lever if (!p)
133107ff2dd5SChuck Lever break;
13323821e232SChuck Lever trace_xprtrdma_err_vers(rqst, p, p + 1);
133307ff2dd5SChuck Lever break;
133407ff2dd5SChuck Lever case err_chunk:
13353821e232SChuck Lever trace_xprtrdma_err_chunk(rqst);
133607ff2dd5SChuck Lever break;
133707ff2dd5SChuck Lever default:
13383821e232SChuck Lever trace_xprtrdma_err_unrecognized(rqst, p);
133907ff2dd5SChuck Lever }
134007ff2dd5SChuck Lever
13417b2182ecSChuck Lever return -EIO;
134207ff2dd5SChuck Lever }
134307ff2dd5SChuck Lever
13448a053433SChuck Lever /**
13458a053433SChuck Lever * rpcrdma_unpin_rqst - Release rqst without completing it
13468a053433SChuck Lever * @rep: RPC/RDMA Receive context
13478a053433SChuck Lever *
13488a053433SChuck Lever * This is done when a connection is lost so that a Reply
13498a053433SChuck Lever * can be dropped and its matching Call can be subsequently
13508a053433SChuck Lever * retransmitted on a new connection.
13518a053433SChuck Lever */
rpcrdma_unpin_rqst(struct rpcrdma_rep * rep)13528a053433SChuck Lever void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep)
13538a053433SChuck Lever {
13548a053433SChuck Lever struct rpc_xprt *xprt = &rep->rr_rxprt->rx_xprt;
13558a053433SChuck Lever struct rpc_rqst *rqst = rep->rr_rqst;
13568a053433SChuck Lever struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
13578a053433SChuck Lever
13588a053433SChuck Lever req->rl_reply = NULL;
13598a053433SChuck Lever rep->rr_rqst = NULL;
13608a053433SChuck Lever
13618a053433SChuck Lever spin_lock(&xprt->queue_lock);
13628a053433SChuck Lever xprt_unpin_rqst(rqst);
13638a053433SChuck Lever spin_unlock(&xprt->queue_lock);
13648a053433SChuck Lever }
13658a053433SChuck Lever
13668a053433SChuck Lever /**
13678a053433SChuck Lever * rpcrdma_complete_rqst - Pass completed rqst back to RPC
13688a053433SChuck Lever * @rep: RPC/RDMA Receive context
13698a053433SChuck Lever *
13708a053433SChuck Lever * Reconstruct the RPC reply and complete the transaction
13718a053433SChuck Lever * while @rqst is still pinned to ensure the rep, rqst, and
13728a053433SChuck Lever * rq_task pointers remain stable.
1373e1352c96SChuck Lever */
rpcrdma_complete_rqst(struct rpcrdma_rep * rep)1374e1352c96SChuck Lever void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
1375e1352c96SChuck Lever {
1376e1352c96SChuck Lever struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1377e1352c96SChuck Lever struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1378e1352c96SChuck Lever struct rpc_rqst *rqst = rep->rr_rqst;
1379e1352c96SChuck Lever int status;
1380e1352c96SChuck Lever
1381e1352c96SChuck Lever switch (rep->rr_proc) {
1382e1352c96SChuck Lever case rdma_msg:
1383e1352c96SChuck Lever status = rpcrdma_decode_msg(r_xprt, rep, rqst);
1384e1352c96SChuck Lever break;
1385e1352c96SChuck Lever case rdma_nomsg:
1386e1352c96SChuck Lever status = rpcrdma_decode_nomsg(r_xprt, rep);
1387e1352c96SChuck Lever break;
1388e1352c96SChuck Lever case rdma_error:
1389e1352c96SChuck Lever status = rpcrdma_decode_error(r_xprt, rep, rqst);
1390e1352c96SChuck Lever break;
1391e1352c96SChuck Lever default:
1392e1352c96SChuck Lever status = -EIO;
1393e1352c96SChuck Lever }
1394e1352c96SChuck Lever if (status < 0)
1395e1352c96SChuck Lever goto out_badheader;
1396e1352c96SChuck Lever
1397e1352c96SChuck Lever out:
139875c84151STrond Myklebust spin_lock(&xprt->queue_lock);
1399e1352c96SChuck Lever xprt_complete_rqst(rqst->rq_task, status);
1400e1352c96SChuck Lever xprt_unpin_rqst(rqst);
140175c84151STrond Myklebust spin_unlock(&xprt->queue_lock);
1402e1352c96SChuck Lever return;
1403e1352c96SChuck Lever
1404e1352c96SChuck Lever out_badheader:
14053a9568feSChuck Lever trace_xprtrdma_reply_hdr_err(rep);
1406e1352c96SChuck Lever r_xprt->rx_stats.bad_reply_count++;
14077b2182ecSChuck Lever rqst->rq_task->tk_status = status;
14087b2182ecSChuck Lever status = 0;
1409e1352c96SChuck Lever goto out;
1410e1352c96SChuck Lever }
1411e1352c96SChuck Lever
rpcrdma_reply_done(struct kref * kref)14120ab11523SChuck Lever static void rpcrdma_reply_done(struct kref *kref)
14130ba6f370SChuck Lever {
14140ab11523SChuck Lever struct rpcrdma_req *req =
14150ab11523SChuck Lever container_of(kref, struct rpcrdma_req, rl_kref);
141601bb35c8SChuck Lever
14170ab11523SChuck Lever rpcrdma_complete_rqst(req->rl_reply);
14180ba6f370SChuck Lever }
14190ba6f370SChuck Lever
1420d8099fedSChuck Lever /**
1421d8099fedSChuck Lever * rpcrdma_reply_handler - Process received RPC/RDMA messages
1422d8099fedSChuck Lever * @rep: Incoming rpcrdma_rep object to process
1423fe97b47cSChuck Lever *
1424e9601828S\"Talpey, Thomas\ * Errors must result in the RPC task either being awakened, or
1425e9601828S\"Talpey, Thomas\ * allowed to timeout, to discover the errors at that time.
1426e9601828S\"Talpey, Thomas\ */
rpcrdma_reply_handler(struct rpcrdma_rep * rep)1427d8f532d2SChuck Lever void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1428e9601828S\"Talpey, Thomas\ {
1429431af645SChuck Lever struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1430431af645SChuck Lever struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1431be798f90SChuck Lever struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1432e9601828S\"Talpey, Thomas\ struct rpcrdma_req *req;
1433e9601828S\"Talpey, Thomas\ struct rpc_rqst *rqst;
1434be798f90SChuck Lever u32 credits;
14355381e0ecSChuck Lever __be32 *p;
1436e9601828S\"Talpey, Thomas\
1437f9e1afe0SChuck Lever /* Any data means we had a useful conversation, so
1438f9e1afe0SChuck Lever * then we don't need to delay the next reconnect.
1439f9e1afe0SChuck Lever */
1440f9e1afe0SChuck Lever if (xprt->reestablish_timeout)
1441f9e1afe0SChuck Lever xprt->reestablish_timeout = 0;
1442f9e1afe0SChuck Lever
14437c8d9e7cSChuck Lever /* Fixed transport header fields */
14445381e0ecSChuck Lever xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
14450ccc61b1SChuck Lever rep->rr_hdrbuf.head[0].iov_base, NULL);
14465381e0ecSChuck Lever p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
144796f8778fSChuck Lever if (unlikely(!p))
1448b0e178a2SChuck Lever goto out_shortreply;
14495381e0ecSChuck Lever rep->rr_xid = *p++;
14505381e0ecSChuck Lever rep->rr_vers = *p++;
1451be798f90SChuck Lever credits = be32_to_cpu(*p++);
14525381e0ecSChuck Lever rep->rr_proc = *p++;
1453b0e178a2SChuck Lever
14545381e0ecSChuck Lever if (rep->rr_vers != rpcrdma_version)
145561433af5SChuck Lever goto out_badversion;
145661433af5SChuck Lever
14575381e0ecSChuck Lever if (rpcrdma_is_bcall(r_xprt, rep))
145841c8f70fSChuck Lever return;
1459e9601828S\"Talpey, Thomas\
1460fe97b47cSChuck Lever /* Match incoming rpcrdma_rep to an rpcrdma_req to
1461fe97b47cSChuck Lever * get context for handling any incoming chunks.
1462fe97b47cSChuck Lever */
146375c84151STrond Myklebust spin_lock(&xprt->queue_lock);
14645381e0ecSChuck Lever rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
14659590d083SChuck Lever if (!rqst)
14669590d083SChuck Lever goto out_norqst;
14679590d083SChuck Lever xprt_pin_rqst(rqst);
146893bdcf9fSTrond Myklebust spin_unlock(&xprt->queue_lock);
1469be798f90SChuck Lever
1470be798f90SChuck Lever if (credits == 0)
1471be798f90SChuck Lever credits = 1; /* don't deadlock */
1472e28ce900SChuck Lever else if (credits > r_xprt->rx_ep->re_max_requests)
1473e28ce900SChuck Lever credits = r_xprt->rx_ep->re_max_requests;
147435d8b10aSChuck Lever rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
147535d8b10aSChuck Lever false);
1476eea63ca7SChuck Lever if (buf->rb_credits != credits)
1477eea63ca7SChuck Lever rpcrdma_update_cwnd(r_xprt, credits);
1478be798f90SChuck Lever
14799590d083SChuck Lever req = rpcr_to_rdmar(rqst);
148003ffd924SChuck Lever if (unlikely(req->rl_reply))
1481c35ca60dSChuck Lever rpcrdma_rep_put(buf, req->rl_reply);
14824b196dc6SChuck Lever req->rl_reply = rep;
1483e1352c96SChuck Lever rep->rr_rqst = rqst;
1484431af645SChuck Lever
148503ffd924SChuck Lever trace_xprtrdma_reply(rqst->rq_task, rep, credits);
1486d8099fedSChuck Lever
1487d8099fedSChuck Lever if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1488d8099fedSChuck Lever frwr_reminv(rep, &req->rl_registered);
14890ab11523SChuck Lever if (!list_empty(&req->rl_registered))
1490d8099fedSChuck Lever frwr_unmap_async(r_xprt, req);
1491d8099fedSChuck Lever /* LocalInv completion will complete the RPC */
14920ab11523SChuck Lever else
14930ab11523SChuck Lever kref_put(&req->rl_kref, rpcrdma_reply_done);
1494b0e178a2SChuck Lever return;
1495b0e178a2SChuck Lever
149661433af5SChuck Lever out_badversion:
14973a9568feSChuck Lever trace_xprtrdma_reply_vers_err(rep);
14986ceea368SChuck Lever goto out;
149961433af5SChuck Lever
1500431af645SChuck Lever out_norqst:
150175c84151STrond Myklebust spin_unlock(&xprt->queue_lock);
15023a9568feSChuck Lever trace_xprtrdma_reply_rqst_err(rep);
15036ceea368SChuck Lever goto out;
1504b0e178a2SChuck Lever
15059590d083SChuck Lever out_shortreply:
15063a9568feSChuck Lever trace_xprtrdma_reply_short_err(rep);
1507b0e178a2SChuck Lever
15086ceea368SChuck Lever out:
1509c35ca60dSChuck Lever rpcrdma_rep_put(buf, rep);
1510e9601828S\"Talpey, Thomas\ }
1511