xref: /openbmc/linux/net/sunrpc/xprtrdma/rpc_rdma.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1a2268cfbSChuck Lever // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2f58851e6S\"Talpey, Thomas\ /*
33a9568feSChuck Lever  * Copyright (c) 2014-2020, Oracle and/or its affiliates.
4e9601828S\"Talpey, Thomas\  * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5e9601828S\"Talpey, Thomas\  *
6e9601828S\"Talpey, Thomas\  * This software is available to you under a choice of one of two
7e9601828S\"Talpey, Thomas\  * licenses.  You may choose to be licensed under the terms of the GNU
8e9601828S\"Talpey, Thomas\  * General Public License (GPL) Version 2, available from the file
9e9601828S\"Talpey, Thomas\  * COPYING in the main directory of this source tree, or the BSD-type
10e9601828S\"Talpey, Thomas\  * license below:
11e9601828S\"Talpey, Thomas\  *
12e9601828S\"Talpey, Thomas\  * Redistribution and use in source and binary forms, with or without
13e9601828S\"Talpey, Thomas\  * modification, are permitted provided that the following conditions
14e9601828S\"Talpey, Thomas\  * are met:
15e9601828S\"Talpey, Thomas\  *
16e9601828S\"Talpey, Thomas\  *      Redistributions of source code must retain the above copyright
17e9601828S\"Talpey, Thomas\  *      notice, this list of conditions and the following disclaimer.
18e9601828S\"Talpey, Thomas\  *
19e9601828S\"Talpey, Thomas\  *      Redistributions in binary form must reproduce the above
20e9601828S\"Talpey, Thomas\  *      copyright notice, this list of conditions and the following
21e9601828S\"Talpey, Thomas\  *      disclaimer in the documentation and/or other materials provided
22e9601828S\"Talpey, Thomas\  *      with the distribution.
23e9601828S\"Talpey, Thomas\  *
24e9601828S\"Talpey, Thomas\  *      Neither the name of the Network Appliance, Inc. nor the names of
25e9601828S\"Talpey, Thomas\  *      its contributors may be used to endorse or promote products
26e9601828S\"Talpey, Thomas\  *      derived from this software without specific prior written
27e9601828S\"Talpey, Thomas\  *      permission.
28e9601828S\"Talpey, Thomas\  *
29e9601828S\"Talpey, Thomas\  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30e9601828S\"Talpey, Thomas\  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31e9601828S\"Talpey, Thomas\  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32e9601828S\"Talpey, Thomas\  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33e9601828S\"Talpey, Thomas\  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34e9601828S\"Talpey, Thomas\  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35e9601828S\"Talpey, Thomas\  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36e9601828S\"Talpey, Thomas\  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37e9601828S\"Talpey, Thomas\  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38e9601828S\"Talpey, Thomas\  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39e9601828S\"Talpey, Thomas\  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40e9601828S\"Talpey, Thomas\  */
41e9601828S\"Talpey, Thomas\ 
42e9601828S\"Talpey, Thomas\ /*
43e9601828S\"Talpey, Thomas\  * rpc_rdma.c
44e9601828S\"Talpey, Thomas\  *
45e9601828S\"Talpey, Thomas\  * This file contains the guts of the RPC RDMA protocol, and
46e9601828S\"Talpey, Thomas\  * does marshaling/unmarshaling, etc. It is also where interfacing
47e9601828S\"Talpey, Thomas\  * to the Linux RPC framework lives.
48f58851e6S\"Talpey, Thomas\  */
49f58851e6S\"Talpey, Thomas\ 
50e9601828S\"Talpey, Thomas\ #include <linux/highmem.h>
51e9601828S\"Talpey, Thomas\ 
52bd2abef3SChuck Lever #include <linux/sunrpc/svc_rdma.h>
53bd2abef3SChuck Lever 
54b6e717cbSChuck Lever #include "xprt_rdma.h"
55b6e717cbSChuck Lever #include <trace/events/rpcrdma.h>
56b6e717cbSChuck Lever 
57302d3debSChuck Lever /* Returns size of largest RPC-over-RDMA header in a Call message
58302d3debSChuck Lever  *
5994f58c58SChuck Lever  * The largest Call header contains a full-size Read list and a
6094f58c58SChuck Lever  * minimal Reply chunk.
61302d3debSChuck Lever  */
rpcrdma_max_call_header_size(unsigned int maxsegs)62302d3debSChuck Lever static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
63302d3debSChuck Lever {
64302d3debSChuck Lever 	unsigned int size;
65302d3debSChuck Lever 
66302d3debSChuck Lever 	/* Fixed header fields and list discriminators */
67302d3debSChuck Lever 	size = RPCRDMA_HDRLEN_MIN;
68302d3debSChuck Lever 
69302d3debSChuck Lever 	/* Maximum Read list size */
7091228844SColin Ian King 	size += maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
71302d3debSChuck Lever 
7294f58c58SChuck Lever 	/* Minimal Read chunk size */
7394f58c58SChuck Lever 	size += sizeof(__be32);	/* segment count */
742232df5eSChuck Lever 	size += rpcrdma_segment_maxsz * sizeof(__be32);
7594f58c58SChuck Lever 	size += sizeof(__be32);	/* list discriminator */
7694f58c58SChuck Lever 
77302d3debSChuck Lever 	return size;
78302d3debSChuck Lever }
79302d3debSChuck Lever 
80302d3debSChuck Lever /* Returns size of largest RPC-over-RDMA header in a Reply message
81302d3debSChuck Lever  *
82302d3debSChuck Lever  * There is only one Write list or one Reply chunk per Reply
83302d3debSChuck Lever  * message.  The larger list is the Write list.
84302d3debSChuck Lever  */
rpcrdma_max_reply_header_size(unsigned int maxsegs)85302d3debSChuck Lever static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
86302d3debSChuck Lever {
87302d3debSChuck Lever 	unsigned int size;
88302d3debSChuck Lever 
89302d3debSChuck Lever 	/* Fixed header fields and list discriminators */
90302d3debSChuck Lever 	size = RPCRDMA_HDRLEN_MIN;
91302d3debSChuck Lever 
92302d3debSChuck Lever 	/* Maximum Write list size */
9391228844SColin Ian King 	size += sizeof(__be32);		/* segment count */
942232df5eSChuck Lever 	size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
95302d3debSChuck Lever 	size += sizeof(__be32);	/* list discriminator */
96302d3debSChuck Lever 
97302d3debSChuck Lever 	return size;
98302d3debSChuck Lever }
99302d3debSChuck Lever 
10094087e97SChuck Lever /**
10194087e97SChuck Lever  * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
10293aa8e0aSChuck Lever  * @ep: endpoint to initialize
10394087e97SChuck Lever  *
10494087e97SChuck Lever  * The max_inline fields contain the maximum size of an RPC message
10594087e97SChuck Lever  * so the marshaling code doesn't have to repeat this calculation
10694087e97SChuck Lever  * for every RPC.
10794087e97SChuck Lever  */
rpcrdma_set_max_header_sizes(struct rpcrdma_ep * ep)10893aa8e0aSChuck Lever void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep)
109302d3debSChuck Lever {
11093aa8e0aSChuck Lever 	unsigned int maxsegs = ep->re_max_rdma_segs;
11187cfb9a0SChuck Lever 
11293aa8e0aSChuck Lever 	ep->re_max_inline_send =
11393aa8e0aSChuck Lever 		ep->re_inline_send - rpcrdma_max_call_header_size(maxsegs);
11493aa8e0aSChuck Lever 	ep->re_max_inline_recv =
11593aa8e0aSChuck Lever 		ep->re_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
116302d3debSChuck Lever }
117e9601828S\"Talpey, Thomas\ 
1185457ced0SChuck Lever /* The client can send a request inline as long as the RPCRDMA header
1195457ced0SChuck Lever  * plus the RPC call fit under the transport's inline limit. If the
1205457ced0SChuck Lever  * combined call message size exceeds that limit, the client must use
12116f906d6SChuck Lever  * a Read chunk for this operation.
12216f906d6SChuck Lever  *
12316f906d6SChuck Lever  * A Read chunk is also required if sending the RPC call inline would
12416f906d6SChuck Lever  * exceed this device's max_sge limit.
1255457ced0SChuck Lever  */
rpcrdma_args_inline(struct rpcrdma_xprt * r_xprt,struct rpc_rqst * rqst)126302d3debSChuck Lever static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
127302d3debSChuck Lever 				struct rpc_rqst *rqst)
1285457ced0SChuck Lever {
12916f906d6SChuck Lever 	struct xdr_buf *xdr = &rqst->rq_snd_buf;
130e28ce900SChuck Lever 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
13116f906d6SChuck Lever 	unsigned int count, remaining, offset;
1325457ced0SChuck Lever 
133e28ce900SChuck Lever 	if (xdr->len > ep->re_max_inline_send)
13416f906d6SChuck Lever 		return false;
13516f906d6SChuck Lever 
13616f906d6SChuck Lever 	if (xdr->page_len) {
13716f906d6SChuck Lever 		remaining = xdr->page_len;
138d933cc32SChuck Lever 		offset = offset_in_page(xdr->page_base);
1391179e2c2SChuck Lever 		count = RPCRDMA_MIN_SEND_SGES;
14016f906d6SChuck Lever 		while (remaining) {
14116f906d6SChuck Lever 			remaining -= min_t(unsigned int,
14216f906d6SChuck Lever 					   PAGE_SIZE - offset, remaining);
14316f906d6SChuck Lever 			offset = 0;
144e28ce900SChuck Lever 			if (++count > ep->re_attr.cap.max_send_sge)
14516f906d6SChuck Lever 				return false;
14616f906d6SChuck Lever 		}
14716f906d6SChuck Lever 	}
14816f906d6SChuck Lever 
14916f906d6SChuck Lever 	return true;
1505457ced0SChuck Lever }
1515457ced0SChuck Lever 
1525457ced0SChuck Lever /* The client can't know how large the actual reply will be. Thus it
1535457ced0SChuck Lever  * plans for the largest possible reply for that particular ULP
1545457ced0SChuck Lever  * operation. If the maximum combined reply message size exceeds that
1555457ced0SChuck Lever  * limit, the client must provide a write list or a reply chunk for
1565457ced0SChuck Lever  * this request.
1575457ced0SChuck Lever  */
rpcrdma_results_inline(struct rpcrdma_xprt * r_xprt,struct rpc_rqst * rqst)158302d3debSChuck Lever static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
159302d3debSChuck Lever 				   struct rpc_rqst *rqst)
1605457ced0SChuck Lever {
161e28ce900SChuck Lever 	return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep->re_max_inline_recv;
1625457ced0SChuck Lever }
1635457ced0SChuck Lever 
164d4550bbeSChuck Lever /* The client is required to provide a Reply chunk if the maximum
165d4550bbeSChuck Lever  * size of the non-payload part of the RPC Reply is larger than
166d4550bbeSChuck Lever  * the inline threshold.
167d4550bbeSChuck Lever  */
168d4550bbeSChuck Lever static bool
rpcrdma_nonpayload_inline(const struct rpcrdma_xprt * r_xprt,const struct rpc_rqst * rqst)169d4550bbeSChuck Lever rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
170d4550bbeSChuck Lever 			  const struct rpc_rqst *rqst)
171d4550bbeSChuck Lever {
172d4550bbeSChuck Lever 	const struct xdr_buf *buf = &rqst->rq_rcv_buf;
173d4550bbeSChuck Lever 
17494087e97SChuck Lever 	return (buf->head[0].iov_len + buf->tail[0].iov_len) <
175e28ce900SChuck Lever 		r_xprt->rx_ep->re_max_inline_recv;
176d4550bbeSChuck Lever }
177d4550bbeSChuck Lever 
17815261b91SChuck Lever /* ACL likes to be lazy in allocating pages. For TCP, these
17915261b91SChuck Lever  * pages can be allocated during receive processing. Not true
18015261b91SChuck Lever  * for RDMA, which must always provision receive buffers
18115261b91SChuck Lever  * up front.
18215261b91SChuck Lever  */
18315261b91SChuck Lever static noinline int
rpcrdma_alloc_sparse_pages(struct xdr_buf * buf)18415261b91SChuck Lever rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
18515261b91SChuck Lever {
18615261b91SChuck Lever 	struct page **ppages;
18715261b91SChuck Lever 	int len;
18815261b91SChuck Lever 
18915261b91SChuck Lever 	len = buf->page_len;
19015261b91SChuck Lever 	ppages = buf->pages + (buf->page_base >> PAGE_SHIFT);
19115261b91SChuck Lever 	while (len > 0) {
19215261b91SChuck Lever 		if (!*ppages)
19315261b91SChuck Lever 			*ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
19415261b91SChuck Lever 		if (!*ppages)
19515261b91SChuck Lever 			return -ENOBUFS;
19615261b91SChuck Lever 		ppages++;
19715261b91SChuck Lever 		len -= PAGE_SIZE;
19815261b91SChuck Lever 	}
19915261b91SChuck Lever 
20015261b91SChuck Lever 	return 0;
20115261b91SChuck Lever }
20215261b91SChuck Lever 
2039929f4adSChuck Lever /* Convert @vec to a single SGL element.
20428d9d56fSChuck Lever  *
20528d9d56fSChuck Lever  * Returns pointer to next available SGE, and bumps the total number
20628d9d56fSChuck Lever  * of SGEs consumed.
207821c791aSChuck Lever  */
20828d9d56fSChuck Lever static struct rpcrdma_mr_seg *
rpcrdma_convert_kvec(struct kvec * vec,struct rpcrdma_mr_seg * seg,unsigned int * n)20928d9d56fSChuck Lever rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
21028d9d56fSChuck Lever 		     unsigned int *n)
211821c791aSChuck Lever {
21254e6aec5SChuck Lever 	seg->mr_page = virt_to_page(vec->iov_base);
21367b16625SChuck Lever 	seg->mr_offset = offset_in_page(vec->iov_base);
2149929f4adSChuck Lever 	seg->mr_len = vec->iov_len;
21528d9d56fSChuck Lever 	++seg;
21628d9d56fSChuck Lever 	++(*n);
21728d9d56fSChuck Lever 	return seg;
218821c791aSChuck Lever }
219821c791aSChuck Lever 
22028d9d56fSChuck Lever /* Convert @xdrbuf into SGEs no larger than a page each. As they
22128d9d56fSChuck Lever  * are registered, these SGEs are then coalesced into RDMA segments
22228d9d56fSChuck Lever  * when the selected memreg mode supports it.
223e9601828S\"Talpey, Thomas\  *
22428d9d56fSChuck Lever  * Returns positive number of SGEs consumed, or a negative errno.
225e9601828S\"Talpey, Thomas\  */
226e9601828S\"Talpey, Thomas\ 
227e9601828S\"Talpey, Thomas\ static int
rpcrdma_convert_iovs(struct rpcrdma_xprt * r_xprt,struct xdr_buf * xdrbuf,unsigned int pos,enum rpcrdma_chunktype type,struct rpcrdma_mr_seg * seg)228b5f0afbeSChuck Lever rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
229b5f0afbeSChuck Lever 		     unsigned int pos, enum rpcrdma_chunktype type,
230b5f0afbeSChuck Lever 		     struct rpcrdma_mr_seg *seg)
231e9601828S\"Talpey, Thomas\ {
23228d9d56fSChuck Lever 	unsigned long page_base;
23328d9d56fSChuck Lever 	unsigned int len, n;
234bd7ea31bSTom Tucker 	struct page **ppages;
235e9601828S\"Talpey, Thomas\ 
2365ab81428SChuck Lever 	n = 0;
23728d9d56fSChuck Lever 	if (pos == 0)
23828d9d56fSChuck Lever 		seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n);
239e9601828S\"Talpey, Thomas\ 
240bd7ea31bSTom Tucker 	len = xdrbuf->page_len;
241bd7ea31bSTom Tucker 	ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
242d933cc32SChuck Lever 	page_base = offset_in_page(xdrbuf->page_base);
24328d9d56fSChuck Lever 	while (len) {
24428d9d56fSChuck Lever 		seg->mr_page = *ppages;
24567b16625SChuck Lever 		seg->mr_offset = page_base;
24628d9d56fSChuck Lever 		seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
24728d9d56fSChuck Lever 		len -= seg->mr_len;
24828d9d56fSChuck Lever 		++ppages;
24928d9d56fSChuck Lever 		++seg;
250e9601828S\"Talpey, Thomas\ 		++n;
25128d9d56fSChuck Lever 		page_base = 0;
252e9601828S\"Talpey, Thomas\ 	}
253bd7ea31bSTom Tucker 
25421037b8cSChuck Lever 	if (type == rpcrdma_readch || type == rpcrdma_writech)
25528d9d56fSChuck Lever 		goto out;
256c8b920bbSChuck Lever 
25728d9d56fSChuck Lever 	if (xdrbuf->tail[0].iov_len)
2589929f4adSChuck Lever 		rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
259e9601828S\"Talpey, Thomas\ 
26028d9d56fSChuck Lever out:
26128d9d56fSChuck Lever 	if (unlikely(n > RPCRDMA_MAX_SEGS))
2625ab81428SChuck Lever 		return -EIO;
26328d9d56fSChuck Lever 	return n;
264e9601828S\"Talpey, Thomas\ }
265e9601828S\"Talpey, Thomas\ 
26639f4cd9eSChuck Lever static int
encode_rdma_segment(struct xdr_stream * xdr,struct rpcrdma_mr * mr)26796ceddeaSChuck Lever encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
26839f4cd9eSChuck Lever {
26939f4cd9eSChuck Lever 	__be32 *p;
27039f4cd9eSChuck Lever 
27139f4cd9eSChuck Lever 	p = xdr_reserve_space(xdr, 4 * sizeof(*p));
27239f4cd9eSChuck Lever 	if (unlikely(!p))
27339f4cd9eSChuck Lever 		return -EMSGSIZE;
27439f4cd9eSChuck Lever 
275379c3bc6SChuck Lever 	xdr_encode_rdma_segment(p, mr->mr_handle, mr->mr_length, mr->mr_offset);
27639f4cd9eSChuck Lever 	return 0;
27739f4cd9eSChuck Lever }
27839f4cd9eSChuck Lever 
27939f4cd9eSChuck Lever static int
encode_read_segment(struct xdr_stream * xdr,struct rpcrdma_mr * mr,u32 position)28096ceddeaSChuck Lever encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
28139f4cd9eSChuck Lever 		    u32 position)
28239f4cd9eSChuck Lever {
28339f4cd9eSChuck Lever 	__be32 *p;
28439f4cd9eSChuck Lever 
28539f4cd9eSChuck Lever 	p = xdr_reserve_space(xdr, 6 * sizeof(*p));
28639f4cd9eSChuck Lever 	if (unlikely(!p))
28739f4cd9eSChuck Lever 		return -EMSGSIZE;
28839f4cd9eSChuck Lever 
28939f4cd9eSChuck Lever 	*p++ = xdr_one;			/* Item present */
290379c3bc6SChuck Lever 	xdr_encode_read_segment(p, position, mr->mr_handle, mr->mr_length,
291379c3bc6SChuck Lever 				mr->mr_offset);
29239f4cd9eSChuck Lever 	return 0;
29339f4cd9eSChuck Lever }
29439f4cd9eSChuck Lever 
rpcrdma_mr_prepare(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct rpcrdma_mr_seg * seg,int nsegs,bool writing,struct rpcrdma_mr ** mr)2953b39f52aSChuck Lever static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
2963b39f52aSChuck Lever 						 struct rpcrdma_req *req,
2973b39f52aSChuck Lever 						 struct rpcrdma_mr_seg *seg,
2983b39f52aSChuck Lever 						 int nsegs, bool writing,
2993b39f52aSChuck Lever 						 struct rpcrdma_mr **mr)
3003b39f52aSChuck Lever {
3016dc6ec9eSChuck Lever 	*mr = rpcrdma_mr_pop(&req->rl_free_mrs);
3026dc6ec9eSChuck Lever 	if (!*mr) {
3033b39f52aSChuck Lever 		*mr = rpcrdma_mr_get(r_xprt);
3043b39f52aSChuck Lever 		if (!*mr)
3053b39f52aSChuck Lever 			goto out_getmr_err;
3066dc6ec9eSChuck Lever 		(*mr)->mr_req = req;
3076dc6ec9eSChuck Lever 	}
3083b39f52aSChuck Lever 
3093b39f52aSChuck Lever 	rpcrdma_mr_push(*mr, &req->rl_registered);
3103b39f52aSChuck Lever 	return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
3113b39f52aSChuck Lever 
3123b39f52aSChuck Lever out_getmr_err:
3130307cdecSChuck Lever 	trace_xprtrdma_nomrs_err(r_xprt, req);
3143b39f52aSChuck Lever 	xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
3159d2da4ffSChuck Lever 	rpcrdma_mrs_refresh(r_xprt);
3163b39f52aSChuck Lever 	return ERR_PTR(-EAGAIN);
3173b39f52aSChuck Lever }
3183b39f52aSChuck Lever 
31939f4cd9eSChuck Lever /* Register and XDR encode the Read list. Supports encoding a list of read
32094f58c58SChuck Lever  * segments that belong to a single read chunk.
32194f58c58SChuck Lever  *
32294f58c58SChuck Lever  * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
32394f58c58SChuck Lever  *
32494f58c58SChuck Lever  *  Read chunklist (a linked list):
32594f58c58SChuck Lever  *   N elements, position P (same P for all chunks of same arg!):
32694f58c58SChuck Lever  *    1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
32794f58c58SChuck Lever  *
32839f4cd9eSChuck Lever  * Returns zero on success, or a negative errno if a failure occurred.
32939f4cd9eSChuck Lever  * @xdr is advanced to the next position in the stream.
33039f4cd9eSChuck Lever  *
33139f4cd9eSChuck Lever  * Only a single @pos value is currently supported.
33294f58c58SChuck Lever  */
rpcrdma_encode_read_list(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct rpc_rqst * rqst,enum rpcrdma_chunktype rtype)3331738de33SChuck Lever static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
3341738de33SChuck Lever 				    struct rpcrdma_req *req,
3351738de33SChuck Lever 				    struct rpc_rqst *rqst,
3361738de33SChuck Lever 				    enum rpcrdma_chunktype rtype)
33794f58c58SChuck Lever {
33839f4cd9eSChuck Lever 	struct xdr_stream *xdr = &req->rl_stream;
3395ab81428SChuck Lever 	struct rpcrdma_mr_seg *seg;
34096ceddeaSChuck Lever 	struct rpcrdma_mr *mr;
34194f58c58SChuck Lever 	unsigned int pos;
3426748b0caSChuck Lever 	int nsegs;
34394f58c58SChuck Lever 
344614f3c96SChuck Lever 	if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped)
3456a6c6defSChuck Lever 		goto done;
3466a6c6defSChuck Lever 
34794f58c58SChuck Lever 	pos = rqst->rq_snd_buf.head[0].iov_len;
34894f58c58SChuck Lever 	if (rtype == rpcrdma_areadch)
34994f58c58SChuck Lever 		pos = 0;
3505ab81428SChuck Lever 	seg = req->rl_segments;
351b5f0afbeSChuck Lever 	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
352b5f0afbeSChuck Lever 				     rtype, seg);
35394f58c58SChuck Lever 	if (nsegs < 0)
35439f4cd9eSChuck Lever 		return nsegs;
35594f58c58SChuck Lever 
35694f58c58SChuck Lever 	do {
3573b39f52aSChuck Lever 		seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr);
3586748b0caSChuck Lever 		if (IS_ERR(seg))
359ed3aa742SChuck Lever 			return PTR_ERR(seg);
36094f58c58SChuck Lever 
36196ceddeaSChuck Lever 		if (encode_read_segment(xdr, mr, pos) < 0)
36239f4cd9eSChuck Lever 			return -EMSGSIZE;
36394f58c58SChuck Lever 
364aba11831SChuck Lever 		trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
36594f58c58SChuck Lever 		r_xprt->rx_stats.read_chunk_count++;
36696ceddeaSChuck Lever 		nsegs -= mr->mr_nents;
36794f58c58SChuck Lever 	} while (nsegs);
36894f58c58SChuck Lever 
3696a6c6defSChuck Lever done:
37048a124e3SChuck Lever 	if (xdr_stream_encode_item_absent(xdr) < 0)
37148a124e3SChuck Lever 		return -EMSGSIZE;
37248a124e3SChuck Lever 	return 0;
37394f58c58SChuck Lever }
37494f58c58SChuck Lever 
37539f4cd9eSChuck Lever /* Register and XDR encode the Write list. Supports encoding a list
37639f4cd9eSChuck Lever  * containing one array of plain segments that belong to a single
37739f4cd9eSChuck Lever  * write chunk.
37894f58c58SChuck Lever  *
37994f58c58SChuck Lever  * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
38094f58c58SChuck Lever  *
38194f58c58SChuck Lever  *  Write chunklist (a list of (one) counted array):
38294f58c58SChuck Lever  *   N elements:
38394f58c58SChuck Lever  *    1 - N - HLOO - HLOO - ... - HLOO - 0
38494f58c58SChuck Lever  *
38539f4cd9eSChuck Lever  * Returns zero on success, or a negative errno if a failure occurred.
38639f4cd9eSChuck Lever  * @xdr is advanced to the next position in the stream.
38739f4cd9eSChuck Lever  *
38839f4cd9eSChuck Lever  * Only a single Write chunk is currently supported.
38994f58c58SChuck Lever  */
rpcrdma_encode_write_list(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct rpc_rqst * rqst,enum rpcrdma_chunktype wtype)3901738de33SChuck Lever static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
3911738de33SChuck Lever 				     struct rpcrdma_req *req,
3921738de33SChuck Lever 				     struct rpc_rqst *rqst,
3931738de33SChuck Lever 				     enum rpcrdma_chunktype wtype)
39494f58c58SChuck Lever {
39539f4cd9eSChuck Lever 	struct xdr_stream *xdr = &req->rl_stream;
39621037b8cSChuck Lever 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
3975ab81428SChuck Lever 	struct rpcrdma_mr_seg *seg;
39896ceddeaSChuck Lever 	struct rpcrdma_mr *mr;
3996748b0caSChuck Lever 	int nsegs, nchunks;
40094f58c58SChuck Lever 	__be32 *segcount;
40194f58c58SChuck Lever 
4026a6c6defSChuck Lever 	if (wtype != rpcrdma_writech)
4036a6c6defSChuck Lever 		goto done;
4046a6c6defSChuck Lever 
4055ab81428SChuck Lever 	seg = req->rl_segments;
406b5f0afbeSChuck Lever 	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
40794f58c58SChuck Lever 				     rqst->rq_rcv_buf.head[0].iov_len,
408b5f0afbeSChuck Lever 				     wtype, seg);
40994f58c58SChuck Lever 	if (nsegs < 0)
41039f4cd9eSChuck Lever 		return nsegs;
41194f58c58SChuck Lever 
4125c266df5SChuck Lever 	if (xdr_stream_encode_item_present(xdr) < 0)
41339f4cd9eSChuck Lever 		return -EMSGSIZE;
41439f4cd9eSChuck Lever 	segcount = xdr_reserve_space(xdr, sizeof(*segcount));
41539f4cd9eSChuck Lever 	if (unlikely(!segcount))
41639f4cd9eSChuck Lever 		return -EMSGSIZE;
41739f4cd9eSChuck Lever 	/* Actual value encoded below */
41894f58c58SChuck Lever 
41994f58c58SChuck Lever 	nchunks = 0;
42094f58c58SChuck Lever 	do {
4213b39f52aSChuck Lever 		seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
4226748b0caSChuck Lever 		if (IS_ERR(seg))
423ed3aa742SChuck Lever 			return PTR_ERR(seg);
42494f58c58SChuck Lever 
42596ceddeaSChuck Lever 		if (encode_rdma_segment(xdr, mr) < 0)
42639f4cd9eSChuck Lever 			return -EMSGSIZE;
42794f58c58SChuck Lever 
428aba11831SChuck Lever 		trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
42994f58c58SChuck Lever 		r_xprt->rx_stats.write_chunk_count++;
430aae2349cSChuck Lever 		r_xprt->rx_stats.total_rdma_request += mr->mr_length;
43194f58c58SChuck Lever 		nchunks++;
43296ceddeaSChuck Lever 		nsegs -= mr->mr_nents;
43394f58c58SChuck Lever 	} while (nsegs);
43494f58c58SChuck Lever 
43521037b8cSChuck Lever 	if (xdr_pad_size(rqst->rq_rcv_buf.page_len)) {
43621037b8cSChuck Lever 		if (encode_rdma_segment(xdr, ep->re_write_pad_mr) < 0)
43721037b8cSChuck Lever 			return -EMSGSIZE;
43821037b8cSChuck Lever 
43921037b8cSChuck Lever 		trace_xprtrdma_chunk_wp(rqst->rq_task, ep->re_write_pad_mr,
44021037b8cSChuck Lever 					nsegs);
44121037b8cSChuck Lever 		r_xprt->rx_stats.write_chunk_count++;
44221037b8cSChuck Lever 		r_xprt->rx_stats.total_rdma_request += mr->mr_length;
44321037b8cSChuck Lever 		nchunks++;
44421037b8cSChuck Lever 		nsegs -= mr->mr_nents;
44521037b8cSChuck Lever 	}
44621037b8cSChuck Lever 
44794f58c58SChuck Lever 	/* Update count of segments in this Write chunk */
44894f58c58SChuck Lever 	*segcount = cpu_to_be32(nchunks);
44994f58c58SChuck Lever 
4506a6c6defSChuck Lever done:
45148a124e3SChuck Lever 	if (xdr_stream_encode_item_absent(xdr) < 0)
45248a124e3SChuck Lever 		return -EMSGSIZE;
45348a124e3SChuck Lever 	return 0;
45494f58c58SChuck Lever }
45594f58c58SChuck Lever 
45639f4cd9eSChuck Lever /* Register and XDR encode the Reply chunk. Supports encoding an array
45739f4cd9eSChuck Lever  * of plain segments that belong to a single write (reply) chunk.
45894f58c58SChuck Lever  *
45994f58c58SChuck Lever  * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
46094f58c58SChuck Lever  *
46194f58c58SChuck Lever  *  Reply chunk (a counted array):
46294f58c58SChuck Lever  *   N elements:
46394f58c58SChuck Lever  *    1 - N - HLOO - HLOO - ... - HLOO
46494f58c58SChuck Lever  *
46539f4cd9eSChuck Lever  * Returns zero on success, or a negative errno if a failure occurred.
46639f4cd9eSChuck Lever  * @xdr is advanced to the next position in the stream.
46794f58c58SChuck Lever  */
rpcrdma_encode_reply_chunk(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct rpc_rqst * rqst,enum rpcrdma_chunktype wtype)4681738de33SChuck Lever static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
4691738de33SChuck Lever 				      struct rpcrdma_req *req,
4701738de33SChuck Lever 				      struct rpc_rqst *rqst,
4711738de33SChuck Lever 				      enum rpcrdma_chunktype wtype)
47294f58c58SChuck Lever {
47339f4cd9eSChuck Lever 	struct xdr_stream *xdr = &req->rl_stream;
4745ab81428SChuck Lever 	struct rpcrdma_mr_seg *seg;
47596ceddeaSChuck Lever 	struct rpcrdma_mr *mr;
4766748b0caSChuck Lever 	int nsegs, nchunks;
47794f58c58SChuck Lever 	__be32 *segcount;
47894f58c58SChuck Lever 
47948a124e3SChuck Lever 	if (wtype != rpcrdma_replych) {
48048a124e3SChuck Lever 		if (xdr_stream_encode_item_absent(xdr) < 0)
48148a124e3SChuck Lever 			return -EMSGSIZE;
48248a124e3SChuck Lever 		return 0;
48348a124e3SChuck Lever 	}
4846a6c6defSChuck Lever 
4855ab81428SChuck Lever 	seg = req->rl_segments;
486b5f0afbeSChuck Lever 	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
48794f58c58SChuck Lever 	if (nsegs < 0)
48839f4cd9eSChuck Lever 		return nsegs;
48994f58c58SChuck Lever 
4905c266df5SChuck Lever 	if (xdr_stream_encode_item_present(xdr) < 0)
49139f4cd9eSChuck Lever 		return -EMSGSIZE;
49239f4cd9eSChuck Lever 	segcount = xdr_reserve_space(xdr, sizeof(*segcount));
49339f4cd9eSChuck Lever 	if (unlikely(!segcount))
49439f4cd9eSChuck Lever 		return -EMSGSIZE;
49539f4cd9eSChuck Lever 	/* Actual value encoded below */
49694f58c58SChuck Lever 
49794f58c58SChuck Lever 	nchunks = 0;
49894f58c58SChuck Lever 	do {
4993b39f52aSChuck Lever 		seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
5006748b0caSChuck Lever 		if (IS_ERR(seg))
501ed3aa742SChuck Lever 			return PTR_ERR(seg);
50294f58c58SChuck Lever 
50396ceddeaSChuck Lever 		if (encode_rdma_segment(xdr, mr) < 0)
50439f4cd9eSChuck Lever 			return -EMSGSIZE;
50594f58c58SChuck Lever 
506aba11831SChuck Lever 		trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
50794f58c58SChuck Lever 		r_xprt->rx_stats.reply_chunk_count++;
508aae2349cSChuck Lever 		r_xprt->rx_stats.total_rdma_request += mr->mr_length;
50994f58c58SChuck Lever 		nchunks++;
51096ceddeaSChuck Lever 		nsegs -= mr->mr_nents;
51194f58c58SChuck Lever 	} while (nsegs);
51294f58c58SChuck Lever 
51394f58c58SChuck Lever 	/* Update count of segments in the Reply chunk */
51494f58c58SChuck Lever 	*segcount = cpu_to_be32(nchunks);
51594f58c58SChuck Lever 
51639f4cd9eSChuck Lever 	return 0;
51794f58c58SChuck Lever }
51894f58c58SChuck Lever 
rpcrdma_sendctx_done(struct kref * kref)5190ab11523SChuck Lever static void rpcrdma_sendctx_done(struct kref *kref)
5200ab11523SChuck Lever {
5210ab11523SChuck Lever 	struct rpcrdma_req *req =
5220ab11523SChuck Lever 		container_of(kref, struct rpcrdma_req, rl_kref);
5230ab11523SChuck Lever 	struct rpcrdma_rep *rep = req->rl_reply;
5240ab11523SChuck Lever 
5250ab11523SChuck Lever 	rpcrdma_complete_rqst(rep);
5260ab11523SChuck Lever 	rep->rr_rxprt->rx_stats.reply_waits_for_send++;
5270ab11523SChuck Lever }
5280ab11523SChuck Lever 
529394b2c77SChuck Lever /**
530dbcc53a5SChuck Lever  * rpcrdma_sendctx_unmap - DMA-unmap Send buffer
531ae72950aSChuck Lever  * @sc: sendctx containing SGEs to unmap
532394b2c77SChuck Lever  *
533394b2c77SChuck Lever  */
rpcrdma_sendctx_unmap(struct rpcrdma_sendctx * sc)534dbcc53a5SChuck Lever void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
535394b2c77SChuck Lever {
536b5cde6aaSChuck Lever 	struct rpcrdma_regbuf *rb = sc->sc_req->rl_sendbuf;
537394b2c77SChuck Lever 	struct ib_sge *sge;
538394b2c77SChuck Lever 
5390ab11523SChuck Lever 	if (!sc->sc_unmap_count)
5400ab11523SChuck Lever 		return;
5410ab11523SChuck Lever 
542394b2c77SChuck Lever 	/* The first two SGEs contain the transport header and
543394b2c77SChuck Lever 	 * the inline buffer. These are always left mapped so
544394b2c77SChuck Lever 	 * they can be cheaply re-used.
545394b2c77SChuck Lever 	 */
546dbcc53a5SChuck Lever 	for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
547dbcc53a5SChuck Lever 	     ++sge, --sc->sc_unmap_count)
548b5cde6aaSChuck Lever 		ib_dma_unmap_page(rdmab_device(rb), sge->addr, sge->length,
549dbcc53a5SChuck Lever 				  DMA_TO_DEVICE);
55001bb35c8SChuck Lever 
5510ab11523SChuck Lever 	kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
55201bb35c8SChuck Lever }
553394b2c77SChuck Lever 
554a062a2a3SChuck Lever /* Prepare an SGE for the RPC-over-RDMA transport header.
555e9601828S\"Talpey, Thomas\  */
rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,u32 len)556b78de1dcSChuck Lever static void rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
557d2832af3SChuck Lever 				    struct rpcrdma_req *req, u32 len)
558e9601828S\"Talpey, Thomas\ {
559ae72950aSChuck Lever 	struct rpcrdma_sendctx *sc = req->rl_sendctx;
560655fec69SChuck Lever 	struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
561d6764bbdSChuck Lever 	struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
562e9601828S\"Talpey, Thomas\ 
563655fec69SChuck Lever 	sge->addr = rdmab_addr(rb);
564655fec69SChuck Lever 	sge->length = len;
565a062a2a3SChuck Lever 	sge->lkey = rdmab_lkey(rb);
566bd7ea31bSTom Tucker 
567d2832af3SChuck Lever 	ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
568d2832af3SChuck Lever 				      DMA_TO_DEVICE);
569655fec69SChuck Lever }
570655fec69SChuck Lever 
571d6764bbdSChuck Lever /* The head iovec is straightforward, as it is usually already
572655fec69SChuck Lever  * DMA-mapped. Sync the content that has changed.
573655fec69SChuck Lever  */
rpcrdma_prepare_head_iov(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,unsigned int len)574d6764bbdSChuck Lever static bool rpcrdma_prepare_head_iov(struct rpcrdma_xprt *r_xprt,
575d6764bbdSChuck Lever 				     struct rpcrdma_req *req, unsigned int len)
576d6764bbdSChuck Lever {
577d6764bbdSChuck Lever 	struct rpcrdma_sendctx *sc = req->rl_sendctx;
578d6764bbdSChuck Lever 	struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
579d6764bbdSChuck Lever 	struct rpcrdma_regbuf *rb = req->rl_sendbuf;
580d6764bbdSChuck Lever 
581d2832af3SChuck Lever 	if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
582d6764bbdSChuck Lever 		return false;
583655fec69SChuck Lever 
584d6764bbdSChuck Lever 	sge->addr = rdmab_addr(rb);
585d6764bbdSChuck Lever 	sge->length = len;
586d6764bbdSChuck Lever 	sge->lkey = rdmab_lkey(rb);
587655fec69SChuck Lever 
588d6764bbdSChuck Lever 	ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
589d6764bbdSChuck Lever 				      DMA_TO_DEVICE);
590d6764bbdSChuck Lever 	return true;
591655fec69SChuck Lever }
592655fec69SChuck Lever 
593d6764bbdSChuck Lever /* If there is a page list present, DMA map and prepare an
594d6764bbdSChuck Lever  * SGE for each page to be sent.
595655fec69SChuck Lever  */
rpcrdma_prepare_pagelist(struct rpcrdma_req * req,struct xdr_buf * xdr)596d6764bbdSChuck Lever static bool rpcrdma_prepare_pagelist(struct rpcrdma_req *req,
597d6764bbdSChuck Lever 				     struct xdr_buf *xdr)
598d6764bbdSChuck Lever {
599d6764bbdSChuck Lever 	struct rpcrdma_sendctx *sc = req->rl_sendctx;
600d6764bbdSChuck Lever 	struct rpcrdma_regbuf *rb = req->rl_sendbuf;
601d6764bbdSChuck Lever 	unsigned int page_base, len, remaining;
602d6764bbdSChuck Lever 	struct page **ppages;
603d6764bbdSChuck Lever 	struct ib_sge *sge;
604d6764bbdSChuck Lever 
605655fec69SChuck Lever 	ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
606d933cc32SChuck Lever 	page_base = offset_in_page(xdr->page_base);
607655fec69SChuck Lever 	remaining = xdr->page_len;
608655fec69SChuck Lever 	while (remaining) {
609d6764bbdSChuck Lever 		sge = &sc->sc_sges[req->rl_wr.num_sge++];
610d6764bbdSChuck Lever 		len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
611d6764bbdSChuck Lever 		sge->addr = ib_dma_map_page(rdmab_device(rb), *ppages,
612d2832af3SChuck Lever 					    page_base, len, DMA_TO_DEVICE);
613d6764bbdSChuck Lever 		if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
614655fec69SChuck Lever 			goto out_mapping_err;
615d6764bbdSChuck Lever 
616d6764bbdSChuck Lever 		sge->length = len;
617d6764bbdSChuck Lever 		sge->lkey = rdmab_lkey(rb);
618655fec69SChuck Lever 
619ae72950aSChuck Lever 		sc->sc_unmap_count++;
620655fec69SChuck Lever 		ppages++;
621655fec69SChuck Lever 		remaining -= len;
622bd7ea31bSTom Tucker 		page_base = 0;
623e9601828S\"Talpey, Thomas\ 	}
624655fec69SChuck Lever 
625655fec69SChuck Lever 	return true;
626655fec69SChuck Lever 
627d6764bbdSChuck Lever out_mapping_err:
628d6764bbdSChuck Lever 	trace_xprtrdma_dma_maperr(sge->addr);
629857f9acaSChuck Lever 	return false;
630d6764bbdSChuck Lever }
631857f9acaSChuck Lever 
632ae605ee9SChuck Lever /* The tail iovec may include an XDR pad for the page list,
633ae605ee9SChuck Lever  * as well as additional content, and may not reside in the
634ae605ee9SChuck Lever  * same page as the head iovec.
635d6764bbdSChuck Lever  */
rpcrdma_prepare_tail_iov(struct rpcrdma_req * req,struct xdr_buf * xdr,unsigned int page_base,unsigned int len)636d6764bbdSChuck Lever static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
637d6764bbdSChuck Lever 				     struct xdr_buf *xdr,
638d6764bbdSChuck Lever 				     unsigned int page_base, unsigned int len)
639d6764bbdSChuck Lever {
640d6764bbdSChuck Lever 	struct rpcrdma_sendctx *sc = req->rl_sendctx;
641d6764bbdSChuck Lever 	struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
642d6764bbdSChuck Lever 	struct rpcrdma_regbuf *rb = req->rl_sendbuf;
643d6764bbdSChuck Lever 	struct page *page = virt_to_page(xdr->tail[0].iov_base);
644d6764bbdSChuck Lever 
645d6764bbdSChuck Lever 	sge->addr = ib_dma_map_page(rdmab_device(rb), page, page_base, len,
646d6764bbdSChuck Lever 				    DMA_TO_DEVICE);
647d6764bbdSChuck Lever 	if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
648d6764bbdSChuck Lever 		goto out_mapping_err;
649d6764bbdSChuck Lever 
650d6764bbdSChuck Lever 	sge->length = len;
651d6764bbdSChuck Lever 	sge->lkey = rdmab_lkey(rb);
652d6764bbdSChuck Lever 	++sc->sc_unmap_count;
653d6764bbdSChuck Lever 	return true;
654655fec69SChuck Lever 
655655fec69SChuck Lever out_mapping_err:
656d6764bbdSChuck Lever 	trace_xprtrdma_dma_maperr(sge->addr);
657655fec69SChuck Lever 	return false;
658655fec69SChuck Lever }
659655fec69SChuck Lever 
660614f3c96SChuck Lever /* Copy the tail to the end of the head buffer.
661614f3c96SChuck Lever  */
rpcrdma_pullup_tail_iov(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct xdr_buf * xdr)662614f3c96SChuck Lever static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt,
663614f3c96SChuck Lever 				    struct rpcrdma_req *req,
664614f3c96SChuck Lever 				    struct xdr_buf *xdr)
665614f3c96SChuck Lever {
666614f3c96SChuck Lever 	unsigned char *dst;
667614f3c96SChuck Lever 
668614f3c96SChuck Lever 	dst = (unsigned char *)xdr->head[0].iov_base;
669614f3c96SChuck Lever 	dst += xdr->head[0].iov_len + xdr->page_len;
670614f3c96SChuck Lever 	memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len);
671614f3c96SChuck Lever 	r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len;
672614f3c96SChuck Lever }
673614f3c96SChuck Lever 
674614f3c96SChuck Lever /* Copy pagelist content into the head buffer.
675614f3c96SChuck Lever  */
rpcrdma_pullup_pagelist(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct xdr_buf * xdr)676614f3c96SChuck Lever static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt,
677614f3c96SChuck Lever 				    struct rpcrdma_req *req,
678614f3c96SChuck Lever 				    struct xdr_buf *xdr)
679614f3c96SChuck Lever {
680614f3c96SChuck Lever 	unsigned int len, page_base, remaining;
681614f3c96SChuck Lever 	struct page **ppages;
682614f3c96SChuck Lever 	unsigned char *src, *dst;
683614f3c96SChuck Lever 
684614f3c96SChuck Lever 	dst = (unsigned char *)xdr->head[0].iov_base;
685614f3c96SChuck Lever 	dst += xdr->head[0].iov_len;
686614f3c96SChuck Lever 	ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
687614f3c96SChuck Lever 	page_base = offset_in_page(xdr->page_base);
688614f3c96SChuck Lever 	remaining = xdr->page_len;
689614f3c96SChuck Lever 	while (remaining) {
690614f3c96SChuck Lever 		src = page_address(*ppages);
691614f3c96SChuck Lever 		src += page_base;
692614f3c96SChuck Lever 		len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
693614f3c96SChuck Lever 		memcpy(dst, src, len);
694614f3c96SChuck Lever 		r_xprt->rx_stats.pullup_copy_count += len;
695614f3c96SChuck Lever 
696614f3c96SChuck Lever 		ppages++;
697614f3c96SChuck Lever 		dst += len;
698614f3c96SChuck Lever 		remaining -= len;
699614f3c96SChuck Lever 		page_base = 0;
700614f3c96SChuck Lever 	}
701614f3c96SChuck Lever }
702614f3c96SChuck Lever 
703614f3c96SChuck Lever /* Copy the contents of @xdr into @rl_sendbuf and DMA sync it.
704614f3c96SChuck Lever  * When the head, pagelist, and tail are small, a pull-up copy
705614f3c96SChuck Lever  * is considerably less costly than DMA mapping the components
706614f3c96SChuck Lever  * of @xdr.
707614f3c96SChuck Lever  *
708614f3c96SChuck Lever  * Assumptions:
709614f3c96SChuck Lever  *  - the caller has already verified that the total length
710614f3c96SChuck Lever  *    of the RPC Call body will fit into @rl_sendbuf.
711614f3c96SChuck Lever  */
rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct xdr_buf * xdr)712614f3c96SChuck Lever static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt,
713614f3c96SChuck Lever 					struct rpcrdma_req *req,
714614f3c96SChuck Lever 					struct xdr_buf *xdr)
715614f3c96SChuck Lever {
716614f3c96SChuck Lever 	if (unlikely(xdr->tail[0].iov_len))
717614f3c96SChuck Lever 		rpcrdma_pullup_tail_iov(r_xprt, req, xdr);
718614f3c96SChuck Lever 
719614f3c96SChuck Lever 	if (unlikely(xdr->page_len))
720614f3c96SChuck Lever 		rpcrdma_pullup_pagelist(r_xprt, req, xdr);
721614f3c96SChuck Lever 
722614f3c96SChuck Lever 	/* The whole RPC message resides in the head iovec now */
723614f3c96SChuck Lever 	return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len);
724614f3c96SChuck Lever }
725614f3c96SChuck Lever 
rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct xdr_buf * xdr)726d6764bbdSChuck Lever static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt,
727d6764bbdSChuck Lever 					struct rpcrdma_req *req,
728d6764bbdSChuck Lever 					struct xdr_buf *xdr)
729d6764bbdSChuck Lever {
730d6764bbdSChuck Lever 	struct kvec *tail = &xdr->tail[0];
731d6764bbdSChuck Lever 
732d6764bbdSChuck Lever 	if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
733d6764bbdSChuck Lever 		return false;
734d6764bbdSChuck Lever 	if (xdr->page_len)
735d6764bbdSChuck Lever 		if (!rpcrdma_prepare_pagelist(req, xdr))
736d6764bbdSChuck Lever 			return false;
737d6764bbdSChuck Lever 	if (tail->iov_len)
738d6764bbdSChuck Lever 		if (!rpcrdma_prepare_tail_iov(req, xdr,
739d6764bbdSChuck Lever 					      offset_in_page(tail->iov_base),
740d6764bbdSChuck Lever 					      tail->iov_len))
741d6764bbdSChuck Lever 			return false;
742d6764bbdSChuck Lever 
743d6764bbdSChuck Lever 	if (req->rl_sendctx->sc_unmap_count)
744d6764bbdSChuck Lever 		kref_get(&req->rl_kref);
745d6764bbdSChuck Lever 	return true;
746d6764bbdSChuck Lever }
747d6764bbdSChuck Lever 
rpcrdma_prepare_readch(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,struct xdr_buf * xdr)748d6764bbdSChuck Lever static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
749d6764bbdSChuck Lever 				   struct rpcrdma_req *req,
750d6764bbdSChuck Lever 				   struct xdr_buf *xdr)
751d6764bbdSChuck Lever {
752d6764bbdSChuck Lever 	if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
753d6764bbdSChuck Lever 		return false;
754d6764bbdSChuck Lever 
755ae605ee9SChuck Lever 	/* If there is a Read chunk, the page list is being handled
756d6764bbdSChuck Lever 	 * via explicit RDMA, and thus is skipped here.
757d6764bbdSChuck Lever 	 */
758d6764bbdSChuck Lever 
759ae605ee9SChuck Lever 	/* Do not include the tail if it is only an XDR pad */
760ae605ee9SChuck Lever 	if (xdr->tail[0].iov_len > 3) {
761ae605ee9SChuck Lever 		unsigned int page_base, len;
762ae605ee9SChuck Lever 
763ae605ee9SChuck Lever 		/* If the content in the page list is an odd length,
764ae605ee9SChuck Lever 		 * xdr_write_pages() adds a pad at the beginning of
765ae605ee9SChuck Lever 		 * the tail iovec. Force the tail's non-pad content to
766ae605ee9SChuck Lever 		 * land at the next XDR position in the Send message.
767ae605ee9SChuck Lever 		 */
768ae605ee9SChuck Lever 		page_base = offset_in_page(xdr->tail[0].iov_base);
769ae605ee9SChuck Lever 		len = xdr->tail[0].iov_len;
770ae605ee9SChuck Lever 		page_base += len & 3;
771ae605ee9SChuck Lever 		len -= len & 3;
772ae605ee9SChuck Lever 		if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
773d6764bbdSChuck Lever 			return false;
774d6764bbdSChuck Lever 		kref_get(&req->rl_kref);
775d6764bbdSChuck Lever 	}
776d6764bbdSChuck Lever 
777d6764bbdSChuck Lever 	return true;
778d6764bbdSChuck Lever }
779d6764bbdSChuck Lever 
780857f9acaSChuck Lever /**
781857f9acaSChuck Lever  * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
782857f9acaSChuck Lever  * @r_xprt: controlling transport
783857f9acaSChuck Lever  * @req: context of RPC Call being marshalled
784857f9acaSChuck Lever  * @hdrlen: size of transport header, in bytes
785857f9acaSChuck Lever  * @xdr: xdr_buf containing RPC Call
786857f9acaSChuck Lever  * @rtype: chunk type being encoded
787857f9acaSChuck Lever  *
788857f9acaSChuck Lever  * Returns 0 on success; otherwise a negative errno is returned.
789857f9acaSChuck Lever  */
rpcrdma_prepare_send_sges(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req,u32 hdrlen,struct xdr_buf * xdr,enum rpcrdma_chunktype rtype)790d6764bbdSChuck Lever inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
791857f9acaSChuck Lever 				     struct rpcrdma_req *req, u32 hdrlen,
792d6764bbdSChuck Lever 				     struct xdr_buf *xdr,
793d6764bbdSChuck Lever 				     enum rpcrdma_chunktype rtype)
794655fec69SChuck Lever {
79505eb06d8SChuck Lever 	int ret;
79605eb06d8SChuck Lever 
79705eb06d8SChuck Lever 	ret = -EAGAIN;
798dbcc53a5SChuck Lever 	req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
799ae72950aSChuck Lever 	if (!req->rl_sendctx)
800d6764bbdSChuck Lever 		goto out_nosc;
801ae72950aSChuck Lever 	req->rl_sendctx->sc_unmap_count = 0;
80201bb35c8SChuck Lever 	req->rl_sendctx->sc_req = req;
8030ab11523SChuck Lever 	kref_init(&req->rl_kref);
804dc15c3d5SChuck Lever 	req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe;
805dc15c3d5SChuck Lever 	req->rl_wr.sg_list = req->rl_sendctx->sc_sges;
806dc15c3d5SChuck Lever 	req->rl_wr.num_sge = 0;
807dc15c3d5SChuck Lever 	req->rl_wr.opcode = IB_WR_SEND;
808655fec69SChuck Lever 
809b78de1dcSChuck Lever 	rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen);
810d6764bbdSChuck Lever 
811b78de1dcSChuck Lever 	ret = -EIO;
812d6764bbdSChuck Lever 	switch (rtype) {
813614f3c96SChuck Lever 	case rpcrdma_noch_pullup:
814614f3c96SChuck Lever 		if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr))
815614f3c96SChuck Lever 			goto out_unmap;
816614f3c96SChuck Lever 		break;
817614f3c96SChuck Lever 	case rpcrdma_noch_mapped:
818d6764bbdSChuck Lever 		if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr))
819d6764bbdSChuck Lever 			goto out_unmap;
820d6764bbdSChuck Lever 		break;
821d6764bbdSChuck Lever 	case rpcrdma_readch:
822d6764bbdSChuck Lever 		if (!rpcrdma_prepare_readch(r_xprt, req, xdr))
823d6764bbdSChuck Lever 			goto out_unmap;
824d6764bbdSChuck Lever 		break;
825d6764bbdSChuck Lever 	case rpcrdma_areadch:
826d6764bbdSChuck Lever 		break;
827d6764bbdSChuck Lever 	default:
828d6764bbdSChuck Lever 		goto out_unmap;
829d6764bbdSChuck Lever 	}
830d6764bbdSChuck Lever 
831857f9acaSChuck Lever 	return 0;
83205eb06d8SChuck Lever 
833d6764bbdSChuck Lever out_unmap:
834d6764bbdSChuck Lever 	rpcrdma_sendctx_unmap(req->rl_sendctx);
835d6764bbdSChuck Lever out_nosc:
83605eb06d8SChuck Lever 	trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
83705eb06d8SChuck Lever 	return ret;
838655fec69SChuck Lever }
839655fec69SChuck Lever 
84009e60641SChuck Lever /**
84109e60641SChuck Lever  * rpcrdma_marshal_req - Marshal and send one RPC request
84209e60641SChuck Lever  * @r_xprt: controlling transport
84309e60641SChuck Lever  * @rqst: RPC request to be marshaled
844e9601828S\"Talpey, Thomas\  *
84509e60641SChuck Lever  * For the RPC in "rqst", this function:
84609e60641SChuck Lever  *  - Chooses the transfer mode (eg., RDMA_MSG or RDMA_NOMSG)
84709e60641SChuck Lever  *  - Registers Read, Write, and Reply chunks
84809e60641SChuck Lever  *  - Constructs the transport header
84909e60641SChuck Lever  *  - Posts a Send WR to send the transport header and request
85009e60641SChuck Lever  *
85109e60641SChuck Lever  * Returns:
85209e60641SChuck Lever  *	%0 if the RPC was sent successfully,
85309e60641SChuck Lever  *	%-ENOTCONN if the connection was lost,
8549e679d5eSChuck Lever  *	%-EAGAIN if the caller should call again with the same arguments,
8559e679d5eSChuck Lever  *	%-ENOBUFS if the caller should call again after a delay,
8567a80f3f0SChuck Lever  *	%-EMSGSIZE if the transport header is too small,
85709e60641SChuck Lever  *	%-EIO if a permanent problem occurred while marshaling.
858e9601828S\"Talpey, Thomas\  */
859e9601828S\"Talpey, Thomas\ int
rpcrdma_marshal_req(struct rpcrdma_xprt * r_xprt,struct rpc_rqst * rqst)86009e60641SChuck Lever rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
861e9601828S\"Talpey, Thomas\ {
862e9601828S\"Talpey, Thomas\ 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
8637a80f3f0SChuck Lever 	struct xdr_stream *xdr = &req->rl_stream;
864e2377945SChuck Lever 	enum rpcrdma_chunktype rtype, wtype;
865614f3c96SChuck Lever 	struct xdr_buf *buf = &rqst->rq_snd_buf;
86665b80179SChuck Lever 	bool ddp_allowed;
8677a80f3f0SChuck Lever 	__be32 *p;
86839f4cd9eSChuck Lever 	int ret;
869e9601828S\"Talpey, Thomas\ 
87015261b91SChuck Lever 	if (unlikely(rqst->rq_rcv_buf.flags & XDRBUF_SPARSE_PAGES)) {
87115261b91SChuck Lever 		ret = rpcrdma_alloc_sparse_pages(&rqst->rq_rcv_buf);
87215261b91SChuck Lever 		if (ret)
87315261b91SChuck Lever 			return ret;
87415261b91SChuck Lever 	}
87515261b91SChuck Lever 
8767a80f3f0SChuck Lever 	rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
8778cec3dbaSChuck Lever 	xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
8788cec3dbaSChuck Lever 			rqst);
8797a80f3f0SChuck Lever 
8807a80f3f0SChuck Lever 	/* Fixed header fields */
88139f4cd9eSChuck Lever 	ret = -EMSGSIZE;
8827a80f3f0SChuck Lever 	p = xdr_reserve_space(xdr, 4 * sizeof(*p));
8837a80f3f0SChuck Lever 	if (!p)
8847a80f3f0SChuck Lever 		goto out_err;
8857a80f3f0SChuck Lever 	*p++ = rqst->rq_xid;
8867a80f3f0SChuck Lever 	*p++ = rpcrdma_version;
8877581d901SChuck Lever 	*p++ = r_xprt->rx_buf.rb_max_requests;
888e9601828S\"Talpey, Thomas\ 
88965b80179SChuck Lever 	/* When the ULP employs a GSS flavor that guarantees integrity
89065b80179SChuck Lever 	 * or privacy, direct data placement of individual data items
89165b80179SChuck Lever 	 * is not allowed.
89265b80179SChuck Lever 	 */
89353bc19f1SChuck Lever 	ddp_allowed = !test_bit(RPCAUTH_AUTH_DATATOUCH,
89453bc19f1SChuck Lever 				&rqst->rq_cred->cr_auth->au_flags);
89565b80179SChuck Lever 
896e9601828S\"Talpey, Thomas\ 	/*
897e9601828S\"Talpey, Thomas\ 	 * Chunks needed for results?
898e9601828S\"Talpey, Thomas\ 	 *
899e9601828S\"Talpey, Thomas\ 	 * o If the expected result is under the inline threshold, all ops
90033943b29SChuck Lever 	 *   return as inline.
901cce6deebSChuck Lever 	 * o Large read ops return data as write chunk(s), header as
902cce6deebSChuck Lever 	 *   inline.
903e9601828S\"Talpey, Thomas\ 	 * o Large non-read ops return as a single reply chunk.
904e9601828S\"Talpey, Thomas\ 	 */
905cce6deebSChuck Lever 	if (rpcrdma_results_inline(r_xprt, rqst))
90602eb57d8SChuck Lever 		wtype = rpcrdma_noch;
907d4550bbeSChuck Lever 	else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
908d4550bbeSChuck Lever 		 rpcrdma_nonpayload_inline(r_xprt, rqst))
909cce6deebSChuck Lever 		wtype = rpcrdma_writech;
910e9601828S\"Talpey, Thomas\ 	else
911e2377945SChuck Lever 		wtype = rpcrdma_replych;
912e9601828S\"Talpey, Thomas\ 
913e9601828S\"Talpey, Thomas\ 	/*
914e9601828S\"Talpey, Thomas\ 	 * Chunks needed for arguments?
915e9601828S\"Talpey, Thomas\ 	 *
916e9601828S\"Talpey, Thomas\ 	 * o If the total request is under the inline threshold, all ops
917e9601828S\"Talpey, Thomas\ 	 *   are sent as inline.
918e9601828S\"Talpey, Thomas\ 	 * o Large write ops transmit data as read chunk(s), header as
919e9601828S\"Talpey, Thomas\ 	 *   inline.
9202fcc213aSChuck Lever 	 * o Large non-write ops are sent with the entire message as a
9212fcc213aSChuck Lever 	 *   single read chunk (protocol 0-position special case).
922e9601828S\"Talpey, Thomas\ 	 *
9232fcc213aSChuck Lever 	 * This assumes that the upper layer does not present a request
9242fcc213aSChuck Lever 	 * that both has a data payload, and whose non-data arguments
9252fcc213aSChuck Lever 	 * by themselves are larger than the inline threshold.
926e9601828S\"Talpey, Thomas\ 	 */
927302d3debSChuck Lever 	if (rpcrdma_args_inline(r_xprt, rqst)) {
9287a80f3f0SChuck Lever 		*p++ = rdma_msg;
929614f3c96SChuck Lever 		rtype = buf->len < rdmab_length(req->rl_sendbuf) ?
930614f3c96SChuck Lever 			rpcrdma_noch_pullup : rpcrdma_noch_mapped;
931614f3c96SChuck Lever 	} else if (ddp_allowed && buf->flags & XDRBUF_WRITE) {
9327a80f3f0SChuck Lever 		*p++ = rdma_msg;
933e2377945SChuck Lever 		rtype = rpcrdma_readch;
9342fcc213aSChuck Lever 	} else {
935860477d1SChuck Lever 		r_xprt->rx_stats.nomsg_call_count++;
9367a80f3f0SChuck Lever 		*p++ = rdma_nomsg;
9372fcc213aSChuck Lever 		rtype = rpcrdma_areadch;
9382fcc213aSChuck Lever 	}
939e9601828S\"Talpey, Thomas\ 
94094f58c58SChuck Lever 	/* This implementation supports the following combinations
94194f58c58SChuck Lever 	 * of chunk lists in one RPC-over-RDMA Call message:
94294f58c58SChuck Lever 	 *
94394f58c58SChuck Lever 	 *   - Read list
94494f58c58SChuck Lever 	 *   - Write list
94594f58c58SChuck Lever 	 *   - Reply chunk
94694f58c58SChuck Lever 	 *   - Read list + Reply chunk
94794f58c58SChuck Lever 	 *
94894f58c58SChuck Lever 	 * It might not yet support the following combinations:
94994f58c58SChuck Lever 	 *
95094f58c58SChuck Lever 	 *   - Read list + Write list
95194f58c58SChuck Lever 	 *
95294f58c58SChuck Lever 	 * It does not support the following combinations:
95394f58c58SChuck Lever 	 *
95494f58c58SChuck Lever 	 *   - Write list + Reply chunk
95594f58c58SChuck Lever 	 *   - Read list + Write list + Reply chunk
95694f58c58SChuck Lever 	 *
95794f58c58SChuck Lever 	 * This implementation supports only a single chunk in each
95894f58c58SChuck Lever 	 * Read or Write list. Thus for example the client cannot
95994f58c58SChuck Lever 	 * send a Call message with a Position Zero Read chunk and a
96094f58c58SChuck Lever 	 * regular Read chunk at the same time.
961e9601828S\"Talpey, Thomas\ 	 */
96239f4cd9eSChuck Lever 	ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
96339f4cd9eSChuck Lever 	if (ret)
96418c0fb31SChuck Lever 		goto out_err;
96539f4cd9eSChuck Lever 	ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
96639f4cd9eSChuck Lever 	if (ret)
96739f4cd9eSChuck Lever 		goto out_err;
96839f4cd9eSChuck Lever 	ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
96939f4cd9eSChuck Lever 	if (ret)
97039f4cd9eSChuck Lever 		goto out_err;
97139f4cd9eSChuck Lever 
9721310051cSChuck Lever 	ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
973614f3c96SChuck Lever 					buf, rtype);
974857f9acaSChuck Lever 	if (ret)
97518c0fb31SChuck Lever 		goto out_err;
9761310051cSChuck Lever 
9771310051cSChuck Lever 	trace_xprtrdma_marshal(req, rtype, wtype);
978655fec69SChuck Lever 	return 0;
97994f58c58SChuck Lever 
98018c0fb31SChuck Lever out_err:
98117e4c443SChuck Lever 	trace_xprtrdma_marshal_failed(rqst, ret);
98218c0fb31SChuck Lever 	r_xprt->rx_stats.failed_marshal_count++;
98340088f0eSChuck Lever 	frwr_reset(req);
98439f4cd9eSChuck Lever 	return ret;
985e9601828S\"Talpey, Thomas\ }
986e9601828S\"Talpey, Thomas\ 
__rpcrdma_update_cwnd_locked(struct rpc_xprt * xprt,struct rpcrdma_buffer * buf,u32 grant)987eea63ca7SChuck Lever static void __rpcrdma_update_cwnd_locked(struct rpc_xprt *xprt,
988eea63ca7SChuck Lever 					 struct rpcrdma_buffer *buf,
989eea63ca7SChuck Lever 					 u32 grant)
990eea63ca7SChuck Lever {
991eea63ca7SChuck Lever 	buf->rb_credits = grant;
992eea63ca7SChuck Lever 	xprt->cwnd = grant << RPC_CWNDSHIFT;
993eea63ca7SChuck Lever }
994eea63ca7SChuck Lever 
rpcrdma_update_cwnd(struct rpcrdma_xprt * r_xprt,u32 grant)995eea63ca7SChuck Lever static void rpcrdma_update_cwnd(struct rpcrdma_xprt *r_xprt, u32 grant)
996eea63ca7SChuck Lever {
997eea63ca7SChuck Lever 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
998eea63ca7SChuck Lever 
999eea63ca7SChuck Lever 	spin_lock(&xprt->transport_lock);
1000eea63ca7SChuck Lever 	__rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, grant);
1001eea63ca7SChuck Lever 	spin_unlock(&xprt->transport_lock);
1002eea63ca7SChuck Lever }
1003eea63ca7SChuck Lever 
1004eea63ca7SChuck Lever /**
1005eea63ca7SChuck Lever  * rpcrdma_reset_cwnd - Reset the xprt's congestion window
1006eea63ca7SChuck Lever  * @r_xprt: controlling transport instance
1007eea63ca7SChuck Lever  *
1008eea63ca7SChuck Lever  * Prepare @r_xprt for the next connection by reinitializing
1009eea63ca7SChuck Lever  * its credit grant to one (see RFC 8166, Section 3.3.3).
1010eea63ca7SChuck Lever  */
rpcrdma_reset_cwnd(struct rpcrdma_xprt * r_xprt)1011eea63ca7SChuck Lever void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt)
1012eea63ca7SChuck Lever {
1013eea63ca7SChuck Lever 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1014eea63ca7SChuck Lever 
1015eea63ca7SChuck Lever 	spin_lock(&xprt->transport_lock);
1016eea63ca7SChuck Lever 	xprt->cong = 0;
1017eea63ca7SChuck Lever 	__rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, 1);
1018eea63ca7SChuck Lever 	spin_unlock(&xprt->transport_lock);
1019eea63ca7SChuck Lever }
1020eea63ca7SChuck Lever 
1021cb0ae1fbSChuck Lever /**
1022cb0ae1fbSChuck Lever  * rpcrdma_inline_fixup - Scatter inline received data into rqst's iovecs
1023cb0ae1fbSChuck Lever  * @rqst: controlling RPC request
1024cb0ae1fbSChuck Lever  * @srcp: points to RPC message payload in receive buffer
1025cb0ae1fbSChuck Lever  * @copy_len: remaining length of receive buffer content
1026cb0ae1fbSChuck Lever  * @pad: Write chunk pad bytes needed (zero for pure inline)
1027cb0ae1fbSChuck Lever  *
1028cb0ae1fbSChuck Lever  * The upper layer has set the maximum number of bytes it can
1029cb0ae1fbSChuck Lever  * receive in each component of rq_rcv_buf. These values are set in
1030cb0ae1fbSChuck Lever  * the head.iov_len, page_len, tail.iov_len, and buflen fields.
1031cfabe2c6SChuck Lever  *
1032cfabe2c6SChuck Lever  * Unlike the TCP equivalent (xdr_partial_copy_from_skb), in
1033cfabe2c6SChuck Lever  * many cases this function simply updates iov_base pointers in
1034cfabe2c6SChuck Lever  * rq_rcv_buf to point directly to the received reply data, to
1035cfabe2c6SChuck Lever  * avoid copying reply data.
103664695bdeSChuck Lever  *
103764695bdeSChuck Lever  * Returns the count of bytes which had to be memcopied.
1038e9601828S\"Talpey, Thomas\  */
103964695bdeSChuck Lever static unsigned long
rpcrdma_inline_fixup(struct rpc_rqst * rqst,char * srcp,int copy_len,int pad)10409191ca3bSTom Talpey rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
1041e9601828S\"Talpey, Thomas\ {
104264695bdeSChuck Lever 	unsigned long fixup_copy_count;
104364695bdeSChuck Lever 	int i, npages, curlen;
1044e9601828S\"Talpey, Thomas\ 	char *destp;
1045bd7ea31bSTom Tucker 	struct page **ppages;
1046bd7ea31bSTom Tucker 	int page_base;
1047e9601828S\"Talpey, Thomas\ 
1048cb0ae1fbSChuck Lever 	/* The head iovec is redirected to the RPC reply message
1049cb0ae1fbSChuck Lever 	 * in the receive buffer, to avoid a memcopy.
1050cb0ae1fbSChuck Lever 	 */
1051cb0ae1fbSChuck Lever 	rqst->rq_rcv_buf.head[0].iov_base = srcp;
1052cfabe2c6SChuck Lever 	rqst->rq_private_buf.head[0].iov_base = srcp;
1053e9601828S\"Talpey, Thomas\ 
1054cb0ae1fbSChuck Lever 	/* The contents of the receive buffer that follow
1055cb0ae1fbSChuck Lever 	 * head.iov_len bytes are copied into the page list.
1056cb0ae1fbSChuck Lever 	 */
1057cb0ae1fbSChuck Lever 	curlen = rqst->rq_rcv_buf.head[0].iov_len;
1058cb0ae1fbSChuck Lever 	if (curlen > copy_len)
1059cb0ae1fbSChuck Lever 		curlen = copy_len;
1060e9601828S\"Talpey, Thomas\ 	srcp += curlen;
1061e9601828S\"Talpey, Thomas\ 	copy_len -= curlen;
1062e9601828S\"Talpey, Thomas\ 
1063d933cc32SChuck Lever 	ppages = rqst->rq_rcv_buf.pages +
1064d933cc32SChuck Lever 		(rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
1065d933cc32SChuck Lever 	page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
106664695bdeSChuck Lever 	fixup_copy_count = 0;
1067e9601828S\"Talpey, Thomas\ 	if (copy_len && rqst->rq_rcv_buf.page_len) {
106880414abcSChuck Lever 		int pagelist_len;
106980414abcSChuck Lever 
107080414abcSChuck Lever 		pagelist_len = rqst->rq_rcv_buf.page_len;
107180414abcSChuck Lever 		if (pagelist_len > copy_len)
107280414abcSChuck Lever 			pagelist_len = copy_len;
107380414abcSChuck Lever 		npages = PAGE_ALIGN(page_base + pagelist_len) >> PAGE_SHIFT;
107464695bdeSChuck Lever 		for (i = 0; i < npages; i++) {
1075bd7ea31bSTom Tucker 			curlen = PAGE_SIZE - page_base;
107680414abcSChuck Lever 			if (curlen > pagelist_len)
107780414abcSChuck Lever 				curlen = pagelist_len;
107880414abcSChuck Lever 
1079b8541786SCong Wang 			destp = kmap_atomic(ppages[i]);
1080bd7ea31bSTom Tucker 			memcpy(destp + page_base, srcp, curlen);
1081bd7ea31bSTom Tucker 			flush_dcache_page(ppages[i]);
1082b8541786SCong Wang 			kunmap_atomic(destp);
1083e9601828S\"Talpey, Thomas\ 			srcp += curlen;
1084e9601828S\"Talpey, Thomas\ 			copy_len -= curlen;
108564695bdeSChuck Lever 			fixup_copy_count += curlen;
108680414abcSChuck Lever 			pagelist_len -= curlen;
108780414abcSChuck Lever 			if (!pagelist_len)
1088e9601828S\"Talpey, Thomas\ 				break;
1089bd7ea31bSTom Tucker 			page_base = 0;
1090e9601828S\"Talpey, Thomas\ 		}
1091cb0ae1fbSChuck Lever 
1092cb0ae1fbSChuck Lever 		/* Implicit padding for the last segment in a Write
1093cb0ae1fbSChuck Lever 		 * chunk is inserted inline at the front of the tail
1094cb0ae1fbSChuck Lever 		 * iovec. The upper layer ignores the content of
1095cb0ae1fbSChuck Lever 		 * the pad. Simply ensure inline content in the tail
1096cb0ae1fbSChuck Lever 		 * that follows the Write chunk is properly aligned.
1097cb0ae1fbSChuck Lever 		 */
1098cb0ae1fbSChuck Lever 		if (pad)
1099cb0ae1fbSChuck Lever 			srcp -= pad;
11002b7bbc96SChuck Lever 	}
1101e9601828S\"Talpey, Thomas\ 
1102cb0ae1fbSChuck Lever 	/* The tail iovec is redirected to the remaining data
1103cb0ae1fbSChuck Lever 	 * in the receive buffer, to avoid a memcopy.
1104cb0ae1fbSChuck Lever 	 */
1105cfabe2c6SChuck Lever 	if (copy_len || pad) {
1106cb0ae1fbSChuck Lever 		rqst->rq_rcv_buf.tail[0].iov_base = srcp;
1107cfabe2c6SChuck Lever 		rqst->rq_private_buf.tail[0].iov_base = srcp;
1108cfabe2c6SChuck Lever 	}
11099191ca3bSTom Talpey 
1110d4957f01SChuck Lever 	if (fixup_copy_count)
1111d4957f01SChuck Lever 		trace_xprtrdma_fixup(rqst, fixup_copy_count);
111264695bdeSChuck Lever 	return fixup_copy_count;
1113e9601828S\"Talpey, Thomas\ }
1114e9601828S\"Talpey, Thomas\ 
111563cae470SChuck Lever /* By convention, backchannel calls arrive via rdma_msg type
111663cae470SChuck Lever  * messages, and never populate the chunk lists. This makes
111763cae470SChuck Lever  * the RPC/RDMA header small and fixed in size, so it is
111863cae470SChuck Lever  * straightforward to check the RPC header's direction field.
111963cae470SChuck Lever  */
112063cae470SChuck Lever static bool
rpcrdma_is_bcall(struct rpcrdma_xprt * r_xprt,struct rpcrdma_rep * rep)11215381e0ecSChuck Lever rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
112241c8f70fSChuck Lever #if defined(CONFIG_SUNRPC_BACKCHANNEL)
112363cae470SChuck Lever {
1124*11270e7cSKinglong Mee 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
112541c8f70fSChuck Lever 	struct xdr_stream *xdr = &rep->rr_stream;
112641c8f70fSChuck Lever 	__be32 *p;
112763cae470SChuck Lever 
11285381e0ecSChuck Lever 	if (rep->rr_proc != rdma_msg)
112963cae470SChuck Lever 		return false;
113063cae470SChuck Lever 
113141c8f70fSChuck Lever 	/* Peek at stream contents without advancing. */
113241c8f70fSChuck Lever 	p = xdr_inline_decode(xdr, 0);
113341c8f70fSChuck Lever 
113441c8f70fSChuck Lever 	/* Chunk lists */
113507e9a632SChuck Lever 	if (xdr_item_is_present(p++))
113663cae470SChuck Lever 		return false;
113707e9a632SChuck Lever 	if (xdr_item_is_present(p++))
113841c8f70fSChuck Lever 		return false;
113907e9a632SChuck Lever 	if (xdr_item_is_present(p++))
114063cae470SChuck Lever 		return false;
114163cae470SChuck Lever 
114241c8f70fSChuck Lever 	/* RPC header */
11435381e0ecSChuck Lever 	if (*p++ != rep->rr_xid)
114441c8f70fSChuck Lever 		return false;
114541c8f70fSChuck Lever 	if (*p != cpu_to_be32(RPC_CALL))
114641c8f70fSChuck Lever 		return false;
114741c8f70fSChuck Lever 
1148*11270e7cSKinglong Mee 	/* No bc service. */
1149*11270e7cSKinglong Mee 	if (xprt->bc_serv == NULL)
1150*11270e7cSKinglong Mee 		return false;
1151*11270e7cSKinglong Mee 
115241c8f70fSChuck Lever 	/* Now that we are sure this is a backchannel call,
115341c8f70fSChuck Lever 	 * advance to the RPC header.
115441c8f70fSChuck Lever 	 */
115541c8f70fSChuck Lever 	p = xdr_inline_decode(xdr, 3 * sizeof(*p));
115641c8f70fSChuck Lever 	if (unlikely(!p))
115763cae470SChuck Lever 		return true;
115841c8f70fSChuck Lever 
115984dff5ebSChuck Lever 	rpcrdma_bc_receive_call(r_xprt, rep);
116063cae470SChuck Lever 	return true;
116163cae470SChuck Lever }
116241c8f70fSChuck Lever #else	/* CONFIG_SUNRPC_BACKCHANNEL */
116341c8f70fSChuck Lever {
116441c8f70fSChuck Lever 	return false;
116563cae470SChuck Lever }
116663cae470SChuck Lever #endif	/* CONFIG_SUNRPC_BACKCHANNEL */
116763cae470SChuck Lever 
decode_rdma_segment(struct xdr_stream * xdr,u32 * length)1168264b0cdbSChuck Lever static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
1169264b0cdbSChuck Lever {
1170e11b7c96SChuck Lever 	u32 handle;
1171e11b7c96SChuck Lever 	u64 offset;
1172264b0cdbSChuck Lever 	__be32 *p;
1173264b0cdbSChuck Lever 
1174264b0cdbSChuck Lever 	p = xdr_inline_decode(xdr, 4 * sizeof(*p));
1175264b0cdbSChuck Lever 	if (unlikely(!p))
1176264b0cdbSChuck Lever 		return -EIO;
1177264b0cdbSChuck Lever 
1178f60a0869SChuck Lever 	xdr_decode_rdma_segment(p, &handle, length, &offset);
1179e11b7c96SChuck Lever 	trace_xprtrdma_decode_seg(handle, *length, offset);
1180264b0cdbSChuck Lever 	return 0;
1181264b0cdbSChuck Lever }
1182264b0cdbSChuck Lever 
decode_write_chunk(struct xdr_stream * xdr,u32 * length)1183264b0cdbSChuck Lever static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
1184264b0cdbSChuck Lever {
1185264b0cdbSChuck Lever 	u32 segcount, seglength;
1186264b0cdbSChuck Lever 	__be32 *p;
1187264b0cdbSChuck Lever 
1188264b0cdbSChuck Lever 	p = xdr_inline_decode(xdr, sizeof(*p));
1189264b0cdbSChuck Lever 	if (unlikely(!p))
1190264b0cdbSChuck Lever 		return -EIO;
1191264b0cdbSChuck Lever 
1192264b0cdbSChuck Lever 	*length = 0;
1193264b0cdbSChuck Lever 	segcount = be32_to_cpup(p);
1194264b0cdbSChuck Lever 	while (segcount--) {
1195264b0cdbSChuck Lever 		if (decode_rdma_segment(xdr, &seglength))
1196264b0cdbSChuck Lever 			return -EIO;
1197264b0cdbSChuck Lever 		*length += seglength;
1198264b0cdbSChuck Lever 	}
1199264b0cdbSChuck Lever 
1200264b0cdbSChuck Lever 	return 0;
1201264b0cdbSChuck Lever }
1202264b0cdbSChuck Lever 
1203264b0cdbSChuck Lever /* In RPC-over-RDMA Version One replies, a Read list is never
1204264b0cdbSChuck Lever  * expected. This decoder is a stub that returns an error if
1205264b0cdbSChuck Lever  * a Read list is present.
1206264b0cdbSChuck Lever  */
decode_read_list(struct xdr_stream * xdr)1207264b0cdbSChuck Lever static int decode_read_list(struct xdr_stream *xdr)
1208264b0cdbSChuck Lever {
1209264b0cdbSChuck Lever 	__be32 *p;
1210264b0cdbSChuck Lever 
1211264b0cdbSChuck Lever 	p = xdr_inline_decode(xdr, sizeof(*p));
1212264b0cdbSChuck Lever 	if (unlikely(!p))
1213264b0cdbSChuck Lever 		return -EIO;
121407e9a632SChuck Lever 	if (unlikely(xdr_item_is_present(p)))
1215264b0cdbSChuck Lever 		return -EIO;
1216264b0cdbSChuck Lever 	return 0;
1217264b0cdbSChuck Lever }
1218264b0cdbSChuck Lever 
1219264b0cdbSChuck Lever /* Supports only one Write chunk in the Write list
1220264b0cdbSChuck Lever  */
decode_write_list(struct xdr_stream * xdr,u32 * length)1221264b0cdbSChuck Lever static int decode_write_list(struct xdr_stream *xdr, u32 *length)
1222264b0cdbSChuck Lever {
1223264b0cdbSChuck Lever 	u32 chunklen;
1224264b0cdbSChuck Lever 	bool first;
1225264b0cdbSChuck Lever 	__be32 *p;
1226264b0cdbSChuck Lever 
1227264b0cdbSChuck Lever 	*length = 0;
1228264b0cdbSChuck Lever 	first = true;
1229264b0cdbSChuck Lever 	do {
1230264b0cdbSChuck Lever 		p = xdr_inline_decode(xdr, sizeof(*p));
1231264b0cdbSChuck Lever 		if (unlikely(!p))
1232264b0cdbSChuck Lever 			return -EIO;
123307e9a632SChuck Lever 		if (xdr_item_is_absent(p))
1234264b0cdbSChuck Lever 			break;
1235264b0cdbSChuck Lever 		if (!first)
1236264b0cdbSChuck Lever 			return -EIO;
1237264b0cdbSChuck Lever 
1238264b0cdbSChuck Lever 		if (decode_write_chunk(xdr, &chunklen))
1239264b0cdbSChuck Lever 			return -EIO;
1240264b0cdbSChuck Lever 		*length += chunklen;
1241264b0cdbSChuck Lever 		first = false;
1242264b0cdbSChuck Lever 	} while (true);
1243264b0cdbSChuck Lever 	return 0;
1244264b0cdbSChuck Lever }
1245264b0cdbSChuck Lever 
decode_reply_chunk(struct xdr_stream * xdr,u32 * length)1246264b0cdbSChuck Lever static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
1247264b0cdbSChuck Lever {
1248264b0cdbSChuck Lever 	__be32 *p;
1249264b0cdbSChuck Lever 
1250264b0cdbSChuck Lever 	p = xdr_inline_decode(xdr, sizeof(*p));
1251264b0cdbSChuck Lever 	if (unlikely(!p))
1252264b0cdbSChuck Lever 		return -EIO;
1253264b0cdbSChuck Lever 
1254264b0cdbSChuck Lever 	*length = 0;
125507e9a632SChuck Lever 	if (xdr_item_is_present(p))
1256264b0cdbSChuck Lever 		if (decode_write_chunk(xdr, length))
1257264b0cdbSChuck Lever 			return -EIO;
1258264b0cdbSChuck Lever 	return 0;
1259264b0cdbSChuck Lever }
1260264b0cdbSChuck Lever 
126107ff2dd5SChuck Lever static int
rpcrdma_decode_msg(struct rpcrdma_xprt * r_xprt,struct rpcrdma_rep * rep,struct rpc_rqst * rqst)126207ff2dd5SChuck Lever rpcrdma_decode_msg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
126307ff2dd5SChuck Lever 		   struct rpc_rqst *rqst)
126407ff2dd5SChuck Lever {
126507ff2dd5SChuck Lever 	struct xdr_stream *xdr = &rep->rr_stream;
1266264b0cdbSChuck Lever 	u32 writelist, replychunk, rpclen;
1267264b0cdbSChuck Lever 	char *base;
126807ff2dd5SChuck Lever 
1269264b0cdbSChuck Lever 	/* Decode the chunk lists */
1270264b0cdbSChuck Lever 	if (decode_read_list(xdr))
1271264b0cdbSChuck Lever 		return -EIO;
1272264b0cdbSChuck Lever 	if (decode_write_list(xdr, &writelist))
1273264b0cdbSChuck Lever 		return -EIO;
1274264b0cdbSChuck Lever 	if (decode_reply_chunk(xdr, &replychunk))
127507ff2dd5SChuck Lever 		return -EIO;
127607ff2dd5SChuck Lever 
1277264b0cdbSChuck Lever 	/* RDMA_MSG sanity checks */
1278264b0cdbSChuck Lever 	if (unlikely(replychunk))
127907ff2dd5SChuck Lever 		return -EIO;
128007ff2dd5SChuck Lever 
1281264b0cdbSChuck Lever 	/* Build the RPC reply's Payload stream in rqst->rq_rcv_buf */
1282264b0cdbSChuck Lever 	base = (char *)xdr_inline_decode(xdr, 0);
1283264b0cdbSChuck Lever 	rpclen = xdr_stream_remaining(xdr);
128407ff2dd5SChuck Lever 	r_xprt->rx_stats.fixup_copy_count +=
1285264b0cdbSChuck Lever 		rpcrdma_inline_fixup(rqst, base, rpclen, writelist & 3);
128607ff2dd5SChuck Lever 
1287264b0cdbSChuck Lever 	r_xprt->rx_stats.total_rdma_reply += writelist;
1288264b0cdbSChuck Lever 	return rpclen + xdr_align_size(writelist);
128907ff2dd5SChuck Lever }
129007ff2dd5SChuck Lever 
129107ff2dd5SChuck Lever static noinline int
rpcrdma_decode_nomsg(struct rpcrdma_xprt * r_xprt,struct rpcrdma_rep * rep)129207ff2dd5SChuck Lever rpcrdma_decode_nomsg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
129307ff2dd5SChuck Lever {
129407ff2dd5SChuck Lever 	struct xdr_stream *xdr = &rep->rr_stream;
1295264b0cdbSChuck Lever 	u32 writelist, replychunk;
129607ff2dd5SChuck Lever 
1297264b0cdbSChuck Lever 	/* Decode the chunk lists */
1298264b0cdbSChuck Lever 	if (decode_read_list(xdr))
1299264b0cdbSChuck Lever 		return -EIO;
1300264b0cdbSChuck Lever 	if (decode_write_list(xdr, &writelist))
1301264b0cdbSChuck Lever 		return -EIO;
1302264b0cdbSChuck Lever 	if (decode_reply_chunk(xdr, &replychunk))
130307ff2dd5SChuck Lever 		return -EIO;
130407ff2dd5SChuck Lever 
1305264b0cdbSChuck Lever 	/* RDMA_NOMSG sanity checks */
1306264b0cdbSChuck Lever 	if (unlikely(writelist))
130707ff2dd5SChuck Lever 		return -EIO;
1308264b0cdbSChuck Lever 	if (unlikely(!replychunk))
130907ff2dd5SChuck Lever 		return -EIO;
131007ff2dd5SChuck Lever 
1311264b0cdbSChuck Lever 	/* Reply chunk buffer already is the reply vector */
1312264b0cdbSChuck Lever 	r_xprt->rx_stats.total_rdma_reply += replychunk;
1313264b0cdbSChuck Lever 	return replychunk;
131407ff2dd5SChuck Lever }
131507ff2dd5SChuck Lever 
131607ff2dd5SChuck Lever static noinline int
rpcrdma_decode_error(struct rpcrdma_xprt * r_xprt,struct rpcrdma_rep * rep,struct rpc_rqst * rqst)131707ff2dd5SChuck Lever rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
131807ff2dd5SChuck Lever 		     struct rpc_rqst *rqst)
131907ff2dd5SChuck Lever {
132007ff2dd5SChuck Lever 	struct xdr_stream *xdr = &rep->rr_stream;
132107ff2dd5SChuck Lever 	__be32 *p;
132207ff2dd5SChuck Lever 
132307ff2dd5SChuck Lever 	p = xdr_inline_decode(xdr, sizeof(*p));
132407ff2dd5SChuck Lever 	if (unlikely(!p))
132507ff2dd5SChuck Lever 		return -EIO;
132607ff2dd5SChuck Lever 
132707ff2dd5SChuck Lever 	switch (*p) {
132807ff2dd5SChuck Lever 	case err_vers:
132907ff2dd5SChuck Lever 		p = xdr_inline_decode(xdr, 2 * sizeof(*p));
133007ff2dd5SChuck Lever 		if (!p)
133107ff2dd5SChuck Lever 			break;
13323821e232SChuck Lever 		trace_xprtrdma_err_vers(rqst, p, p + 1);
133307ff2dd5SChuck Lever 		break;
133407ff2dd5SChuck Lever 	case err_chunk:
13353821e232SChuck Lever 		trace_xprtrdma_err_chunk(rqst);
133607ff2dd5SChuck Lever 		break;
133707ff2dd5SChuck Lever 	default:
13383821e232SChuck Lever 		trace_xprtrdma_err_unrecognized(rqst, p);
133907ff2dd5SChuck Lever 	}
134007ff2dd5SChuck Lever 
13417b2182ecSChuck Lever 	return -EIO;
134207ff2dd5SChuck Lever }
134307ff2dd5SChuck Lever 
13448a053433SChuck Lever /**
13458a053433SChuck Lever  * rpcrdma_unpin_rqst - Release rqst without completing it
13468a053433SChuck Lever  * @rep: RPC/RDMA Receive context
13478a053433SChuck Lever  *
13488a053433SChuck Lever  * This is done when a connection is lost so that a Reply
13498a053433SChuck Lever  * can be dropped and its matching Call can be subsequently
13508a053433SChuck Lever  * retransmitted on a new connection.
13518a053433SChuck Lever  */
rpcrdma_unpin_rqst(struct rpcrdma_rep * rep)13528a053433SChuck Lever void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep)
13538a053433SChuck Lever {
13548a053433SChuck Lever 	struct rpc_xprt *xprt = &rep->rr_rxprt->rx_xprt;
13558a053433SChuck Lever 	struct rpc_rqst *rqst = rep->rr_rqst;
13568a053433SChuck Lever 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
13578a053433SChuck Lever 
13588a053433SChuck Lever 	req->rl_reply = NULL;
13598a053433SChuck Lever 	rep->rr_rqst = NULL;
13608a053433SChuck Lever 
13618a053433SChuck Lever 	spin_lock(&xprt->queue_lock);
13628a053433SChuck Lever 	xprt_unpin_rqst(rqst);
13638a053433SChuck Lever 	spin_unlock(&xprt->queue_lock);
13648a053433SChuck Lever }
13658a053433SChuck Lever 
13668a053433SChuck Lever /**
13678a053433SChuck Lever  * rpcrdma_complete_rqst - Pass completed rqst back to RPC
13688a053433SChuck Lever  * @rep: RPC/RDMA Receive context
13698a053433SChuck Lever  *
13708a053433SChuck Lever  * Reconstruct the RPC reply and complete the transaction
13718a053433SChuck Lever  * while @rqst is still pinned to ensure the rep, rqst, and
13728a053433SChuck Lever  * rq_task pointers remain stable.
1373e1352c96SChuck Lever  */
rpcrdma_complete_rqst(struct rpcrdma_rep * rep)1374e1352c96SChuck Lever void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
1375e1352c96SChuck Lever {
1376e1352c96SChuck Lever 	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1377e1352c96SChuck Lever 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1378e1352c96SChuck Lever 	struct rpc_rqst *rqst = rep->rr_rqst;
1379e1352c96SChuck Lever 	int status;
1380e1352c96SChuck Lever 
1381e1352c96SChuck Lever 	switch (rep->rr_proc) {
1382e1352c96SChuck Lever 	case rdma_msg:
1383e1352c96SChuck Lever 		status = rpcrdma_decode_msg(r_xprt, rep, rqst);
1384e1352c96SChuck Lever 		break;
1385e1352c96SChuck Lever 	case rdma_nomsg:
1386e1352c96SChuck Lever 		status = rpcrdma_decode_nomsg(r_xprt, rep);
1387e1352c96SChuck Lever 		break;
1388e1352c96SChuck Lever 	case rdma_error:
1389e1352c96SChuck Lever 		status = rpcrdma_decode_error(r_xprt, rep, rqst);
1390e1352c96SChuck Lever 		break;
1391e1352c96SChuck Lever 	default:
1392e1352c96SChuck Lever 		status = -EIO;
1393e1352c96SChuck Lever 	}
1394e1352c96SChuck Lever 	if (status < 0)
1395e1352c96SChuck Lever 		goto out_badheader;
1396e1352c96SChuck Lever 
1397e1352c96SChuck Lever out:
139875c84151STrond Myklebust 	spin_lock(&xprt->queue_lock);
1399e1352c96SChuck Lever 	xprt_complete_rqst(rqst->rq_task, status);
1400e1352c96SChuck Lever 	xprt_unpin_rqst(rqst);
140175c84151STrond Myklebust 	spin_unlock(&xprt->queue_lock);
1402e1352c96SChuck Lever 	return;
1403e1352c96SChuck Lever 
1404e1352c96SChuck Lever out_badheader:
14053a9568feSChuck Lever 	trace_xprtrdma_reply_hdr_err(rep);
1406e1352c96SChuck Lever 	r_xprt->rx_stats.bad_reply_count++;
14077b2182ecSChuck Lever 	rqst->rq_task->tk_status = status;
14087b2182ecSChuck Lever 	status = 0;
1409e1352c96SChuck Lever 	goto out;
1410e1352c96SChuck Lever }
1411e1352c96SChuck Lever 
rpcrdma_reply_done(struct kref * kref)14120ab11523SChuck Lever static void rpcrdma_reply_done(struct kref *kref)
14130ba6f370SChuck Lever {
14140ab11523SChuck Lever 	struct rpcrdma_req *req =
14150ab11523SChuck Lever 		container_of(kref, struct rpcrdma_req, rl_kref);
141601bb35c8SChuck Lever 
14170ab11523SChuck Lever 	rpcrdma_complete_rqst(req->rl_reply);
14180ba6f370SChuck Lever }
14190ba6f370SChuck Lever 
1420d8099fedSChuck Lever /**
1421d8099fedSChuck Lever  * rpcrdma_reply_handler - Process received RPC/RDMA messages
1422d8099fedSChuck Lever  * @rep: Incoming rpcrdma_rep object to process
1423fe97b47cSChuck Lever  *
1424e9601828S\"Talpey, Thomas\  * Errors must result in the RPC task either being awakened, or
1425e9601828S\"Talpey, Thomas\  * allowed to timeout, to discover the errors at that time.
1426e9601828S\"Talpey, Thomas\  */
rpcrdma_reply_handler(struct rpcrdma_rep * rep)1427d8f532d2SChuck Lever void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1428e9601828S\"Talpey, Thomas\ {
1429431af645SChuck Lever 	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1430431af645SChuck Lever 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1431be798f90SChuck Lever 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1432e9601828S\"Talpey, Thomas\ 	struct rpcrdma_req *req;
1433e9601828S\"Talpey, Thomas\ 	struct rpc_rqst *rqst;
1434be798f90SChuck Lever 	u32 credits;
14355381e0ecSChuck Lever 	__be32 *p;
1436e9601828S\"Talpey, Thomas\ 
1437f9e1afe0SChuck Lever 	/* Any data means we had a useful conversation, so
1438f9e1afe0SChuck Lever 	 * then we don't need to delay the next reconnect.
1439f9e1afe0SChuck Lever 	 */
1440f9e1afe0SChuck Lever 	if (xprt->reestablish_timeout)
1441f9e1afe0SChuck Lever 		xprt->reestablish_timeout = 0;
1442f9e1afe0SChuck Lever 
14437c8d9e7cSChuck Lever 	/* Fixed transport header fields */
14445381e0ecSChuck Lever 	xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
14450ccc61b1SChuck Lever 			rep->rr_hdrbuf.head[0].iov_base, NULL);
14465381e0ecSChuck Lever 	p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
144796f8778fSChuck Lever 	if (unlikely(!p))
1448b0e178a2SChuck Lever 		goto out_shortreply;
14495381e0ecSChuck Lever 	rep->rr_xid = *p++;
14505381e0ecSChuck Lever 	rep->rr_vers = *p++;
1451be798f90SChuck Lever 	credits = be32_to_cpu(*p++);
14525381e0ecSChuck Lever 	rep->rr_proc = *p++;
1453b0e178a2SChuck Lever 
14545381e0ecSChuck Lever 	if (rep->rr_vers != rpcrdma_version)
145561433af5SChuck Lever 		goto out_badversion;
145661433af5SChuck Lever 
14575381e0ecSChuck Lever 	if (rpcrdma_is_bcall(r_xprt, rep))
145841c8f70fSChuck Lever 		return;
1459e9601828S\"Talpey, Thomas\ 
1460fe97b47cSChuck Lever 	/* Match incoming rpcrdma_rep to an rpcrdma_req to
1461fe97b47cSChuck Lever 	 * get context for handling any incoming chunks.
1462fe97b47cSChuck Lever 	 */
146375c84151STrond Myklebust 	spin_lock(&xprt->queue_lock);
14645381e0ecSChuck Lever 	rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
14659590d083SChuck Lever 	if (!rqst)
14669590d083SChuck Lever 		goto out_norqst;
14679590d083SChuck Lever 	xprt_pin_rqst(rqst);
146893bdcf9fSTrond Myklebust 	spin_unlock(&xprt->queue_lock);
1469be798f90SChuck Lever 
1470be798f90SChuck Lever 	if (credits == 0)
1471be798f90SChuck Lever 		credits = 1;	/* don't deadlock */
1472e28ce900SChuck Lever 	else if (credits > r_xprt->rx_ep->re_max_requests)
1473e28ce900SChuck Lever 		credits = r_xprt->rx_ep->re_max_requests;
147435d8b10aSChuck Lever 	rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
147535d8b10aSChuck Lever 			   false);
1476eea63ca7SChuck Lever 	if (buf->rb_credits != credits)
1477eea63ca7SChuck Lever 		rpcrdma_update_cwnd(r_xprt, credits);
1478be798f90SChuck Lever 
14799590d083SChuck Lever 	req = rpcr_to_rdmar(rqst);
148003ffd924SChuck Lever 	if (unlikely(req->rl_reply))
1481c35ca60dSChuck Lever 		rpcrdma_rep_put(buf, req->rl_reply);
14824b196dc6SChuck Lever 	req->rl_reply = rep;
1483e1352c96SChuck Lever 	rep->rr_rqst = rqst;
1484431af645SChuck Lever 
148503ffd924SChuck Lever 	trace_xprtrdma_reply(rqst->rq_task, rep, credits);
1486d8099fedSChuck Lever 
1487d8099fedSChuck Lever 	if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1488d8099fedSChuck Lever 		frwr_reminv(rep, &req->rl_registered);
14890ab11523SChuck Lever 	if (!list_empty(&req->rl_registered))
1490d8099fedSChuck Lever 		frwr_unmap_async(r_xprt, req);
1491d8099fedSChuck Lever 		/* LocalInv completion will complete the RPC */
14920ab11523SChuck Lever 	else
14930ab11523SChuck Lever 		kref_put(&req->rl_kref, rpcrdma_reply_done);
1494b0e178a2SChuck Lever 	return;
1495b0e178a2SChuck Lever 
149661433af5SChuck Lever out_badversion:
14973a9568feSChuck Lever 	trace_xprtrdma_reply_vers_err(rep);
14986ceea368SChuck Lever 	goto out;
149961433af5SChuck Lever 
1500431af645SChuck Lever out_norqst:
150175c84151STrond Myklebust 	spin_unlock(&xprt->queue_lock);
15023a9568feSChuck Lever 	trace_xprtrdma_reply_rqst_err(rep);
15036ceea368SChuck Lever 	goto out;
1504b0e178a2SChuck Lever 
15059590d083SChuck Lever out_shortreply:
15063a9568feSChuck Lever 	trace_xprtrdma_reply_short_err(rep);
1507b0e178a2SChuck Lever 
15086ceea368SChuck Lever out:
1509c35ca60dSChuck Lever 	rpcrdma_rep_put(buf, rep);
1510e9601828S\"Talpey, Thomas\ }
1511