xref: /openbmc/linux/net/sunrpc/socklib.c (revision de4eda9d)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2094bb20bSChuck Lever /*
3094bb20bSChuck Lever  * linux/net/sunrpc/socklib.c
4094bb20bSChuck Lever  *
5094bb20bSChuck Lever  * Common socket helper routines for RPC client and server
6094bb20bSChuck Lever  *
7094bb20bSChuck Lever  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
8094bb20bSChuck Lever  */
9094bb20bSChuck Lever 
10fb286bb2SHerbert Xu #include <linux/compiler.h>
11fb286bb2SHerbert Xu #include <linux/netdevice.h>
125a0e3ad6STejun Heo #include <linux/gfp.h>
13fb286bb2SHerbert Xu #include <linux/skbuff.h>
14094bb20bSChuck Lever #include <linux/types.h>
15094bb20bSChuck Lever #include <linux/pagemap.h>
16094bb20bSChuck Lever #include <linux/udp.h>
179e55eef4SChuck Lever #include <linux/sunrpc/msg_prot.h>
18b2648015STrond Myklebust #include <linux/sunrpc/sched.h>
19094bb20bSChuck Lever #include <linux/sunrpc/xdr.h>
20bc3b2d7fSPaul Gortmaker #include <linux/export.h>
21094bb20bSChuck Lever 
229e55eef4SChuck Lever #include "socklib.h"
239e55eef4SChuck Lever 
249e55eef4SChuck Lever /*
259e55eef4SChuck Lever  * Helper structure for copying from an sk_buff.
269e55eef4SChuck Lever  */
279e55eef4SChuck Lever struct xdr_skb_reader {
289e55eef4SChuck Lever 	struct sk_buff	*skb;
299e55eef4SChuck Lever 	unsigned int	offset;
309e55eef4SChuck Lever 	size_t		count;
319e55eef4SChuck Lever 	__wsum		csum;
329e55eef4SChuck Lever };
339e55eef4SChuck Lever 
349e55eef4SChuck Lever typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to,
359e55eef4SChuck Lever 				     size_t len);
36094bb20bSChuck Lever 
37094bb20bSChuck Lever /**
389d292316SChuck Lever  * xdr_skb_read_bits - copy some data bits from skb to internal buffer
39094bb20bSChuck Lever  * @desc: sk_buff copy helper
40094bb20bSChuck Lever  * @to: copy destination
41094bb20bSChuck Lever  * @len: number of bytes to copy
42094bb20bSChuck Lever  *
43094bb20bSChuck Lever  * Possibly called several times to iterate over an sk_buff and copy
44094bb20bSChuck Lever  * data out of it.
45094bb20bSChuck Lever  */
46550aebfeSTrond Myklebust static size_t
xdr_skb_read_bits(struct xdr_skb_reader * desc,void * to,size_t len)47550aebfeSTrond Myklebust xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
48094bb20bSChuck Lever {
49094bb20bSChuck Lever 	if (len > desc->count)
50094bb20bSChuck Lever 		len = desc->count;
519d292316SChuck Lever 	if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
52094bb20bSChuck Lever 		return 0;
53094bb20bSChuck Lever 	desc->count -= len;
54094bb20bSChuck Lever 	desc->offset += len;
55094bb20bSChuck Lever 	return len;
56094bb20bSChuck Lever }
57094bb20bSChuck Lever 
58094bb20bSChuck Lever /**
599d292316SChuck Lever  * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
60094bb20bSChuck Lever  * @desc: sk_buff copy helper
61094bb20bSChuck Lever  * @to: copy destination
62094bb20bSChuck Lever  * @len: number of bytes to copy
63094bb20bSChuck Lever  *
64094bb20bSChuck Lever  * Same as skb_read_bits, but calculate a checksum at the same time.
65094bb20bSChuck Lever  */
xdr_skb_read_and_csum_bits(struct xdr_skb_reader * desc,void * to,size_t len)66dd456471SChuck Lever static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len)
67094bb20bSChuck Lever {
685f92a738SAl Viro 	unsigned int pos;
695f92a738SAl Viro 	__wsum csum2;
70094bb20bSChuck Lever 
71094bb20bSChuck Lever 	if (len > desc->count)
72094bb20bSChuck Lever 		len = desc->count;
73094bb20bSChuck Lever 	pos = desc->offset;
748d5930dfSAl Viro 	csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len);
75094bb20bSChuck Lever 	desc->csum = csum_block_add(desc->csum, csum2, pos);
76094bb20bSChuck Lever 	desc->count -= len;
77094bb20bSChuck Lever 	desc->offset += len;
78094bb20bSChuck Lever 	return len;
79094bb20bSChuck Lever }
80094bb20bSChuck Lever 
81094bb20bSChuck Lever /**
82094bb20bSChuck Lever  * xdr_partial_copy_from_skb - copy data out of an skb
83094bb20bSChuck Lever  * @xdr: target XDR buffer
84094bb20bSChuck Lever  * @base: starting offset
85094bb20bSChuck Lever  * @desc: sk_buff copy helper
86094bb20bSChuck Lever  * @copy_actor: virtual method for copying data
87094bb20bSChuck Lever  *
88094bb20bSChuck Lever  */
89ec846469STrond Myklebust static ssize_t
xdr_partial_copy_from_skb(struct xdr_buf * xdr,unsigned int base,struct xdr_skb_reader * desc,xdr_skb_read_actor copy_actor)90ec846469STrond Myklebust xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
91094bb20bSChuck Lever {
92094bb20bSChuck Lever 	struct page	**ppage = xdr->pages;
93094bb20bSChuck Lever 	unsigned int	len, pglen = xdr->page_len;
94094bb20bSChuck Lever 	ssize_t		copied = 0;
95322e2efeSChuck Lever 	size_t		ret;
96094bb20bSChuck Lever 
97094bb20bSChuck Lever 	len = xdr->head[0].iov_len;
98094bb20bSChuck Lever 	if (base < len) {
99094bb20bSChuck Lever 		len -= base;
100094bb20bSChuck Lever 		ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
101094bb20bSChuck Lever 		copied += ret;
102094bb20bSChuck Lever 		if (ret != len || !desc->count)
103094bb20bSChuck Lever 			goto out;
104094bb20bSChuck Lever 		base = 0;
105094bb20bSChuck Lever 	} else
106094bb20bSChuck Lever 		base -= len;
107094bb20bSChuck Lever 
108094bb20bSChuck Lever 	if (unlikely(pglen == 0))
109094bb20bSChuck Lever 		goto copy_tail;
110094bb20bSChuck Lever 	if (unlikely(base >= pglen)) {
111094bb20bSChuck Lever 		base -= pglen;
112094bb20bSChuck Lever 		goto copy_tail;
113094bb20bSChuck Lever 	}
114094bb20bSChuck Lever 	if (base || xdr->page_base) {
115094bb20bSChuck Lever 		pglen -= base;
116094bb20bSChuck Lever 		base += xdr->page_base;
11709cbfeafSKirill A. Shutemov 		ppage += base >> PAGE_SHIFT;
11809cbfeafSKirill A. Shutemov 		base &= ~PAGE_MASK;
119094bb20bSChuck Lever 	}
120094bb20bSChuck Lever 	do {
121094bb20bSChuck Lever 		char *kaddr;
122094bb20bSChuck Lever 
123094bb20bSChuck Lever 		/* ACL likes to be lazy in allocating pages - ACLs
124094bb20bSChuck Lever 		 * are small by default but can get huge. */
125431f6eb3STrond Myklebust 		if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
12652db6f9aSChuck Lever 			*ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
127094bb20bSChuck Lever 			if (unlikely(*ppage == NULL)) {
128094bb20bSChuck Lever 				if (copied == 0)
129094bb20bSChuck Lever 					copied = -ENOMEM;
130094bb20bSChuck Lever 				goto out;
131094bb20bSChuck Lever 			}
132094bb20bSChuck Lever 		}
133094bb20bSChuck Lever 
13409cbfeafSKirill A. Shutemov 		len = PAGE_SIZE;
135b8541786SCong Wang 		kaddr = kmap_atomic(*ppage);
136094bb20bSChuck Lever 		if (base) {
137094bb20bSChuck Lever 			len -= base;
138094bb20bSChuck Lever 			if (pglen < len)
139094bb20bSChuck Lever 				len = pglen;
140094bb20bSChuck Lever 			ret = copy_actor(desc, kaddr + base, len);
141094bb20bSChuck Lever 			base = 0;
142094bb20bSChuck Lever 		} else {
143094bb20bSChuck Lever 			if (pglen < len)
144094bb20bSChuck Lever 				len = pglen;
145094bb20bSChuck Lever 			ret = copy_actor(desc, kaddr, len);
146094bb20bSChuck Lever 		}
147094bb20bSChuck Lever 		flush_dcache_page(*ppage);
148b8541786SCong Wang 		kunmap_atomic(kaddr);
149094bb20bSChuck Lever 		copied += ret;
150094bb20bSChuck Lever 		if (ret != len || !desc->count)
151094bb20bSChuck Lever 			goto out;
152094bb20bSChuck Lever 		ppage++;
153094bb20bSChuck Lever 	} while ((pglen -= len) != 0);
154094bb20bSChuck Lever copy_tail:
155094bb20bSChuck Lever 	len = xdr->tail[0].iov_len;
156094bb20bSChuck Lever 	if (base < len)
157094bb20bSChuck Lever 		copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
158094bb20bSChuck Lever out:
159094bb20bSChuck Lever 	return copied;
160094bb20bSChuck Lever }
161094bb20bSChuck Lever 
162094bb20bSChuck Lever /**
163094bb20bSChuck Lever  * csum_partial_copy_to_xdr - checksum and copy data
164094bb20bSChuck Lever  * @xdr: target XDR buffer
165094bb20bSChuck Lever  * @skb: source skb
166094bb20bSChuck Lever  *
167094bb20bSChuck Lever  * We have set things up such that we perform the checksum of the UDP
168094bb20bSChuck Lever  * packet in parallel with the copies into the RPC client iovec.  -DaveM
169094bb20bSChuck Lever  */
csum_partial_copy_to_xdr(struct xdr_buf * xdr,struct sk_buff * skb)170094bb20bSChuck Lever int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
171094bb20bSChuck Lever {
172dd456471SChuck Lever 	struct xdr_skb_reader	desc;
173094bb20bSChuck Lever 
174094bb20bSChuck Lever 	desc.skb = skb;
1751da8c681SWillem de Bruijn 	desc.offset = 0;
176094bb20bSChuck Lever 	desc.count = skb->len - desc.offset;
177094bb20bSChuck Lever 
17860476372SHerbert Xu 	if (skb_csum_unnecessary(skb))
179094bb20bSChuck Lever 		goto no_checksum;
180094bb20bSChuck Lever 
181094bb20bSChuck Lever 	desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
1829d292316SChuck Lever 	if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0)
183094bb20bSChuck Lever 		return -1;
184094bb20bSChuck Lever 	if (desc.offset != skb->len) {
1855f92a738SAl Viro 		__wsum csum2;
186094bb20bSChuck Lever 		csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
187094bb20bSChuck Lever 		desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
188094bb20bSChuck Lever 	}
189094bb20bSChuck Lever 	if (desc.count)
190094bb20bSChuck Lever 		return -1;
191d3bc23e7SAl Viro 	if (csum_fold(desc.csum))
192094bb20bSChuck Lever 		return -1;
1937e3cead5STom Herbert 	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
1947e3cead5STom Herbert 	    !skb->csum_complete_sw)
1957fe50ac8SCong Wang 		netdev_rx_csum_fault(skb->dev, skb);
196094bb20bSChuck Lever 	return 0;
197094bb20bSChuck Lever no_checksum:
1989d292316SChuck Lever 	if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
199094bb20bSChuck Lever 		return -1;
200094bb20bSChuck Lever 	if (desc.count)
201094bb20bSChuck Lever 		return -1;
202094bb20bSChuck Lever 	return 0;
203094bb20bSChuck Lever }
20412444809S\"Talpey, Thomas\ EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
2059e55eef4SChuck Lever 
xprt_sendmsg(struct socket * sock,struct msghdr * msg,size_t seek)2069e55eef4SChuck Lever static inline int xprt_sendmsg(struct socket *sock, struct msghdr *msg,
2079e55eef4SChuck Lever 			       size_t seek)
2089e55eef4SChuck Lever {
2099e55eef4SChuck Lever 	if (seek)
2109e55eef4SChuck Lever 		iov_iter_advance(&msg->msg_iter, seek);
2119e55eef4SChuck Lever 	return sock_sendmsg(sock, msg);
2129e55eef4SChuck Lever }
2139e55eef4SChuck Lever 
xprt_send_kvec(struct socket * sock,struct msghdr * msg,struct kvec * vec,size_t seek)2149e55eef4SChuck Lever static int xprt_send_kvec(struct socket *sock, struct msghdr *msg,
2159e55eef4SChuck Lever 			  struct kvec *vec, size_t seek)
2169e55eef4SChuck Lever {
217*de4eda9dSAl Viro 	iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, 1, vec->iov_len);
2189e55eef4SChuck Lever 	return xprt_sendmsg(sock, msg, seek);
2199e55eef4SChuck Lever }
2209e55eef4SChuck Lever 
xprt_send_pagedata(struct socket * sock,struct msghdr * msg,struct xdr_buf * xdr,size_t base)2219e55eef4SChuck Lever static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg,
2229e55eef4SChuck Lever 			      struct xdr_buf *xdr, size_t base)
2239e55eef4SChuck Lever {
224*de4eda9dSAl Viro 	iov_iter_bvec(&msg->msg_iter, ITER_SOURCE, xdr->bvec, xdr_buf_pagecount(xdr),
2259e55eef4SChuck Lever 		      xdr->page_len + xdr->page_base);
2269e55eef4SChuck Lever 	return xprt_sendmsg(sock, msg, base + xdr->page_base);
2279e55eef4SChuck Lever }
2289e55eef4SChuck Lever 
2299e55eef4SChuck Lever /* Common case:
2309e55eef4SChuck Lever  *  - stream transport
2319e55eef4SChuck Lever  *  - sending from byte 0 of the message
2329e55eef4SChuck Lever  *  - the message is wholly contained in @xdr's head iovec
2339e55eef4SChuck Lever  */
xprt_send_rm_and_kvec(struct socket * sock,struct msghdr * msg,rpc_fraghdr marker,struct kvec * vec,size_t base)2349e55eef4SChuck Lever static int xprt_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
2359e55eef4SChuck Lever 				 rpc_fraghdr marker, struct kvec *vec,
2369e55eef4SChuck Lever 				 size_t base)
2379e55eef4SChuck Lever {
2389e55eef4SChuck Lever 	struct kvec iov[2] = {
2399e55eef4SChuck Lever 		[0] = {
2409e55eef4SChuck Lever 			.iov_base	= &marker,
2419e55eef4SChuck Lever 			.iov_len	= sizeof(marker)
2429e55eef4SChuck Lever 		},
2439e55eef4SChuck Lever 		[1] = *vec,
2449e55eef4SChuck Lever 	};
2459e55eef4SChuck Lever 	size_t len = iov[0].iov_len + iov[1].iov_len;
2469e55eef4SChuck Lever 
247*de4eda9dSAl Viro 	iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, iov, 2, len);
2489e55eef4SChuck Lever 	return xprt_sendmsg(sock, msg, base);
2499e55eef4SChuck Lever }
2509e55eef4SChuck Lever 
2519e55eef4SChuck Lever /**
2529e55eef4SChuck Lever  * xprt_sock_sendmsg - write an xdr_buf directly to a socket
2539e55eef4SChuck Lever  * @sock: open socket to send on
2549e55eef4SChuck Lever  * @msg: socket message metadata
2559e55eef4SChuck Lever  * @xdr: xdr_buf containing this request
2569e55eef4SChuck Lever  * @base: starting position in the buffer
2579e55eef4SChuck Lever  * @marker: stream record marker field
2589e55eef4SChuck Lever  * @sent_p: return the total number of bytes successfully queued for sending
2599e55eef4SChuck Lever  *
2609e55eef4SChuck Lever  * Return values:
2619e55eef4SChuck Lever  *   On success, returns zero and fills in @sent_p.
2629e55eef4SChuck Lever  *   %-ENOTSOCK if  @sock is not a struct socket.
2639e55eef4SChuck Lever  */
xprt_sock_sendmsg(struct socket * sock,struct msghdr * msg,struct xdr_buf * xdr,unsigned int base,rpc_fraghdr marker,unsigned int * sent_p)2649e55eef4SChuck Lever int xprt_sock_sendmsg(struct socket *sock, struct msghdr *msg,
2659e55eef4SChuck Lever 		      struct xdr_buf *xdr, unsigned int base,
2669e55eef4SChuck Lever 		      rpc_fraghdr marker, unsigned int *sent_p)
2679e55eef4SChuck Lever {
2689e55eef4SChuck Lever 	unsigned int rmsize = marker ? sizeof(marker) : 0;
2699e55eef4SChuck Lever 	unsigned int remainder = rmsize + xdr->len - base;
2709e55eef4SChuck Lever 	unsigned int want;
2719e55eef4SChuck Lever 	int err = 0;
2729e55eef4SChuck Lever 
2739e55eef4SChuck Lever 	*sent_p = 0;
2749e55eef4SChuck Lever 
2759e55eef4SChuck Lever 	if (unlikely(!sock))
2769e55eef4SChuck Lever 		return -ENOTSOCK;
2779e55eef4SChuck Lever 
2789e55eef4SChuck Lever 	msg->msg_flags |= MSG_MORE;
2799e55eef4SChuck Lever 	want = xdr->head[0].iov_len + rmsize;
2809e55eef4SChuck Lever 	if (base < want) {
2819e55eef4SChuck Lever 		unsigned int len = want - base;
2829e55eef4SChuck Lever 
2839e55eef4SChuck Lever 		remainder -= len;
2849e55eef4SChuck Lever 		if (remainder == 0)
2859e55eef4SChuck Lever 			msg->msg_flags &= ~MSG_MORE;
2869e55eef4SChuck Lever 		if (rmsize)
2879e55eef4SChuck Lever 			err = xprt_send_rm_and_kvec(sock, msg, marker,
2889e55eef4SChuck Lever 						    &xdr->head[0], base);
2899e55eef4SChuck Lever 		else
2909e55eef4SChuck Lever 			err = xprt_send_kvec(sock, msg, &xdr->head[0], base);
2919e55eef4SChuck Lever 		if (remainder == 0 || err != len)
2929e55eef4SChuck Lever 			goto out;
2939e55eef4SChuck Lever 		*sent_p += err;
2949e55eef4SChuck Lever 		base = 0;
2959e55eef4SChuck Lever 	} else {
2969e55eef4SChuck Lever 		base -= want;
2979e55eef4SChuck Lever 	}
2989e55eef4SChuck Lever 
2999e55eef4SChuck Lever 	if (base < xdr->page_len) {
3009e55eef4SChuck Lever 		unsigned int len = xdr->page_len - base;
3019e55eef4SChuck Lever 
3029e55eef4SChuck Lever 		remainder -= len;
3039e55eef4SChuck Lever 		if (remainder == 0)
3049e55eef4SChuck Lever 			msg->msg_flags &= ~MSG_MORE;
3059e55eef4SChuck Lever 		err = xprt_send_pagedata(sock, msg, xdr, base);
3069e55eef4SChuck Lever 		if (remainder == 0 || err != len)
3079e55eef4SChuck Lever 			goto out;
3089e55eef4SChuck Lever 		*sent_p += err;
3099e55eef4SChuck Lever 		base = 0;
3109e55eef4SChuck Lever 	} else {
3119e55eef4SChuck Lever 		base -= xdr->page_len;
3129e55eef4SChuck Lever 	}
3139e55eef4SChuck Lever 
3149e55eef4SChuck Lever 	if (base >= xdr->tail[0].iov_len)
3159e55eef4SChuck Lever 		return 0;
3169e55eef4SChuck Lever 	msg->msg_flags &= ~MSG_MORE;
3179e55eef4SChuck Lever 	err = xprt_send_kvec(sock, msg, &xdr->tail[0], base);
3189e55eef4SChuck Lever out:
3199e55eef4SChuck Lever 	if (err > 0) {
3209e55eef4SChuck Lever 		*sent_p += err;
3219e55eef4SChuck Lever 		err = 0;
3229e55eef4SChuck Lever 	}
3239e55eef4SChuck Lever 	return err;
3249e55eef4SChuck Lever }
325