1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2094bb20bSChuck Lever /*
3094bb20bSChuck Lever * linux/net/sunrpc/socklib.c
4094bb20bSChuck Lever *
5094bb20bSChuck Lever * Common socket helper routines for RPC client and server
6094bb20bSChuck Lever *
7094bb20bSChuck Lever * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
8094bb20bSChuck Lever */
9094bb20bSChuck Lever
10fb286bb2SHerbert Xu #include <linux/compiler.h>
11fb286bb2SHerbert Xu #include <linux/netdevice.h>
125a0e3ad6STejun Heo #include <linux/gfp.h>
13fb286bb2SHerbert Xu #include <linux/skbuff.h>
14094bb20bSChuck Lever #include <linux/types.h>
15094bb20bSChuck Lever #include <linux/pagemap.h>
16094bb20bSChuck Lever #include <linux/udp.h>
179e55eef4SChuck Lever #include <linux/sunrpc/msg_prot.h>
18b2648015STrond Myklebust #include <linux/sunrpc/sched.h>
19094bb20bSChuck Lever #include <linux/sunrpc/xdr.h>
20bc3b2d7fSPaul Gortmaker #include <linux/export.h>
21094bb20bSChuck Lever
229e55eef4SChuck Lever #include "socklib.h"
239e55eef4SChuck Lever
249e55eef4SChuck Lever /*
259e55eef4SChuck Lever * Helper structure for copying from an sk_buff.
269e55eef4SChuck Lever */
279e55eef4SChuck Lever struct xdr_skb_reader {
289e55eef4SChuck Lever struct sk_buff *skb;
299e55eef4SChuck Lever unsigned int offset;
309e55eef4SChuck Lever size_t count;
319e55eef4SChuck Lever __wsum csum;
329e55eef4SChuck Lever };
339e55eef4SChuck Lever
349e55eef4SChuck Lever typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to,
359e55eef4SChuck Lever size_t len);
36094bb20bSChuck Lever
37094bb20bSChuck Lever /**
389d292316SChuck Lever * xdr_skb_read_bits - copy some data bits from skb to internal buffer
39094bb20bSChuck Lever * @desc: sk_buff copy helper
40094bb20bSChuck Lever * @to: copy destination
41094bb20bSChuck Lever * @len: number of bytes to copy
42094bb20bSChuck Lever *
43094bb20bSChuck Lever * Possibly called several times to iterate over an sk_buff and copy
44094bb20bSChuck Lever * data out of it.
45094bb20bSChuck Lever */
46550aebfeSTrond Myklebust static size_t
xdr_skb_read_bits(struct xdr_skb_reader * desc,void * to,size_t len)47550aebfeSTrond Myklebust xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
48094bb20bSChuck Lever {
49094bb20bSChuck Lever if (len > desc->count)
50094bb20bSChuck Lever len = desc->count;
519d292316SChuck Lever if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
52094bb20bSChuck Lever return 0;
53094bb20bSChuck Lever desc->count -= len;
54094bb20bSChuck Lever desc->offset += len;
55094bb20bSChuck Lever return len;
56094bb20bSChuck Lever }
57094bb20bSChuck Lever
58094bb20bSChuck Lever /**
599d292316SChuck Lever * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
60094bb20bSChuck Lever * @desc: sk_buff copy helper
61094bb20bSChuck Lever * @to: copy destination
62094bb20bSChuck Lever * @len: number of bytes to copy
63094bb20bSChuck Lever *
64094bb20bSChuck Lever * Same as skb_read_bits, but calculate a checksum at the same time.
65094bb20bSChuck Lever */
xdr_skb_read_and_csum_bits(struct xdr_skb_reader * desc,void * to,size_t len)66dd456471SChuck Lever static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len)
67094bb20bSChuck Lever {
685f92a738SAl Viro unsigned int pos;
695f92a738SAl Viro __wsum csum2;
70094bb20bSChuck Lever
71094bb20bSChuck Lever if (len > desc->count)
72094bb20bSChuck Lever len = desc->count;
73094bb20bSChuck Lever pos = desc->offset;
748d5930dfSAl Viro csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len);
75094bb20bSChuck Lever desc->csum = csum_block_add(desc->csum, csum2, pos);
76094bb20bSChuck Lever desc->count -= len;
77094bb20bSChuck Lever desc->offset += len;
78094bb20bSChuck Lever return len;
79094bb20bSChuck Lever }
80094bb20bSChuck Lever
81094bb20bSChuck Lever /**
82094bb20bSChuck Lever * xdr_partial_copy_from_skb - copy data out of an skb
83094bb20bSChuck Lever * @xdr: target XDR buffer
84094bb20bSChuck Lever * @base: starting offset
85094bb20bSChuck Lever * @desc: sk_buff copy helper
86094bb20bSChuck Lever * @copy_actor: virtual method for copying data
87094bb20bSChuck Lever *
88094bb20bSChuck Lever */
89ec846469STrond Myklebust static ssize_t
xdr_partial_copy_from_skb(struct xdr_buf * xdr,unsigned int base,struct xdr_skb_reader * desc,xdr_skb_read_actor copy_actor)90ec846469STrond Myklebust xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
91094bb20bSChuck Lever {
92094bb20bSChuck Lever struct page **ppage = xdr->pages;
93094bb20bSChuck Lever unsigned int len, pglen = xdr->page_len;
94094bb20bSChuck Lever ssize_t copied = 0;
95322e2efeSChuck Lever size_t ret;
96094bb20bSChuck Lever
97094bb20bSChuck Lever len = xdr->head[0].iov_len;
98094bb20bSChuck Lever if (base < len) {
99094bb20bSChuck Lever len -= base;
100094bb20bSChuck Lever ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
101094bb20bSChuck Lever copied += ret;
102094bb20bSChuck Lever if (ret != len || !desc->count)
103094bb20bSChuck Lever goto out;
104094bb20bSChuck Lever base = 0;
105094bb20bSChuck Lever } else
106094bb20bSChuck Lever base -= len;
107094bb20bSChuck Lever
108094bb20bSChuck Lever if (unlikely(pglen == 0))
109094bb20bSChuck Lever goto copy_tail;
110094bb20bSChuck Lever if (unlikely(base >= pglen)) {
111094bb20bSChuck Lever base -= pglen;
112094bb20bSChuck Lever goto copy_tail;
113094bb20bSChuck Lever }
114094bb20bSChuck Lever if (base || xdr->page_base) {
115094bb20bSChuck Lever pglen -= base;
116094bb20bSChuck Lever base += xdr->page_base;
11709cbfeafSKirill A. Shutemov ppage += base >> PAGE_SHIFT;
11809cbfeafSKirill A. Shutemov base &= ~PAGE_MASK;
119094bb20bSChuck Lever }
120094bb20bSChuck Lever do {
121094bb20bSChuck Lever char *kaddr;
122094bb20bSChuck Lever
123094bb20bSChuck Lever /* ACL likes to be lazy in allocating pages - ACLs
124094bb20bSChuck Lever * are small by default but can get huge. */
125431f6eb3STrond Myklebust if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
12652db6f9aSChuck Lever *ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
127094bb20bSChuck Lever if (unlikely(*ppage == NULL)) {
128094bb20bSChuck Lever if (copied == 0)
129094bb20bSChuck Lever copied = -ENOMEM;
130094bb20bSChuck Lever goto out;
131094bb20bSChuck Lever }
132094bb20bSChuck Lever }
133094bb20bSChuck Lever
13409cbfeafSKirill A. Shutemov len = PAGE_SIZE;
135b8541786SCong Wang kaddr = kmap_atomic(*ppage);
136094bb20bSChuck Lever if (base) {
137094bb20bSChuck Lever len -= base;
138094bb20bSChuck Lever if (pglen < len)
139094bb20bSChuck Lever len = pglen;
140094bb20bSChuck Lever ret = copy_actor(desc, kaddr + base, len);
141094bb20bSChuck Lever base = 0;
142094bb20bSChuck Lever } else {
143094bb20bSChuck Lever if (pglen < len)
144094bb20bSChuck Lever len = pglen;
145094bb20bSChuck Lever ret = copy_actor(desc, kaddr, len);
146094bb20bSChuck Lever }
147094bb20bSChuck Lever flush_dcache_page(*ppage);
148b8541786SCong Wang kunmap_atomic(kaddr);
149094bb20bSChuck Lever copied += ret;
150094bb20bSChuck Lever if (ret != len || !desc->count)
151094bb20bSChuck Lever goto out;
152094bb20bSChuck Lever ppage++;
153094bb20bSChuck Lever } while ((pglen -= len) != 0);
154094bb20bSChuck Lever copy_tail:
155094bb20bSChuck Lever len = xdr->tail[0].iov_len;
156094bb20bSChuck Lever if (base < len)
157094bb20bSChuck Lever copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
158094bb20bSChuck Lever out:
159094bb20bSChuck Lever return copied;
160094bb20bSChuck Lever }
161094bb20bSChuck Lever
162094bb20bSChuck Lever /**
163094bb20bSChuck Lever * csum_partial_copy_to_xdr - checksum and copy data
164094bb20bSChuck Lever * @xdr: target XDR buffer
165094bb20bSChuck Lever * @skb: source skb
166094bb20bSChuck Lever *
167094bb20bSChuck Lever * We have set things up such that we perform the checksum of the UDP
168094bb20bSChuck Lever * packet in parallel with the copies into the RPC client iovec. -DaveM
169094bb20bSChuck Lever */
csum_partial_copy_to_xdr(struct xdr_buf * xdr,struct sk_buff * skb)170094bb20bSChuck Lever int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
171094bb20bSChuck Lever {
172dd456471SChuck Lever struct xdr_skb_reader desc;
173094bb20bSChuck Lever
174094bb20bSChuck Lever desc.skb = skb;
1751da8c681SWillem de Bruijn desc.offset = 0;
176094bb20bSChuck Lever desc.count = skb->len - desc.offset;
177094bb20bSChuck Lever
17860476372SHerbert Xu if (skb_csum_unnecessary(skb))
179094bb20bSChuck Lever goto no_checksum;
180094bb20bSChuck Lever
181094bb20bSChuck Lever desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
1829d292316SChuck Lever if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0)
183094bb20bSChuck Lever return -1;
184094bb20bSChuck Lever if (desc.offset != skb->len) {
1855f92a738SAl Viro __wsum csum2;
186094bb20bSChuck Lever csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
187094bb20bSChuck Lever desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
188094bb20bSChuck Lever }
189094bb20bSChuck Lever if (desc.count)
190094bb20bSChuck Lever return -1;
191d3bc23e7SAl Viro if (csum_fold(desc.csum))
192094bb20bSChuck Lever return -1;
1937e3cead5STom Herbert if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
1947e3cead5STom Herbert !skb->csum_complete_sw)
1957fe50ac8SCong Wang netdev_rx_csum_fault(skb->dev, skb);
196094bb20bSChuck Lever return 0;
197094bb20bSChuck Lever no_checksum:
1989d292316SChuck Lever if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
199094bb20bSChuck Lever return -1;
200094bb20bSChuck Lever if (desc.count)
201094bb20bSChuck Lever return -1;
202094bb20bSChuck Lever return 0;
203094bb20bSChuck Lever }
20412444809S\"Talpey, Thomas\ EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
2059e55eef4SChuck Lever
xprt_sendmsg(struct socket * sock,struct msghdr * msg,size_t seek)2069e55eef4SChuck Lever static inline int xprt_sendmsg(struct socket *sock, struct msghdr *msg,
2079e55eef4SChuck Lever size_t seek)
2089e55eef4SChuck Lever {
2099e55eef4SChuck Lever if (seek)
2109e55eef4SChuck Lever iov_iter_advance(&msg->msg_iter, seek);
2119e55eef4SChuck Lever return sock_sendmsg(sock, msg);
2129e55eef4SChuck Lever }
2139e55eef4SChuck Lever
xprt_send_kvec(struct socket * sock,struct msghdr * msg,struct kvec * vec,size_t seek)2149e55eef4SChuck Lever static int xprt_send_kvec(struct socket *sock, struct msghdr *msg,
2159e55eef4SChuck Lever struct kvec *vec, size_t seek)
2169e55eef4SChuck Lever {
217*de4eda9dSAl Viro iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, 1, vec->iov_len);
2189e55eef4SChuck Lever return xprt_sendmsg(sock, msg, seek);
2199e55eef4SChuck Lever }
2209e55eef4SChuck Lever
xprt_send_pagedata(struct socket * sock,struct msghdr * msg,struct xdr_buf * xdr,size_t base)2219e55eef4SChuck Lever static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg,
2229e55eef4SChuck Lever struct xdr_buf *xdr, size_t base)
2239e55eef4SChuck Lever {
224*de4eda9dSAl Viro iov_iter_bvec(&msg->msg_iter, ITER_SOURCE, xdr->bvec, xdr_buf_pagecount(xdr),
2259e55eef4SChuck Lever xdr->page_len + xdr->page_base);
2269e55eef4SChuck Lever return xprt_sendmsg(sock, msg, base + xdr->page_base);
2279e55eef4SChuck Lever }
2289e55eef4SChuck Lever
2299e55eef4SChuck Lever /* Common case:
2309e55eef4SChuck Lever * - stream transport
2319e55eef4SChuck Lever * - sending from byte 0 of the message
2329e55eef4SChuck Lever * - the message is wholly contained in @xdr's head iovec
2339e55eef4SChuck Lever */
xprt_send_rm_and_kvec(struct socket * sock,struct msghdr * msg,rpc_fraghdr marker,struct kvec * vec,size_t base)2349e55eef4SChuck Lever static int xprt_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
2359e55eef4SChuck Lever rpc_fraghdr marker, struct kvec *vec,
2369e55eef4SChuck Lever size_t base)
2379e55eef4SChuck Lever {
2389e55eef4SChuck Lever struct kvec iov[2] = {
2399e55eef4SChuck Lever [0] = {
2409e55eef4SChuck Lever .iov_base = &marker,
2419e55eef4SChuck Lever .iov_len = sizeof(marker)
2429e55eef4SChuck Lever },
2439e55eef4SChuck Lever [1] = *vec,
2449e55eef4SChuck Lever };
2459e55eef4SChuck Lever size_t len = iov[0].iov_len + iov[1].iov_len;
2469e55eef4SChuck Lever
247*de4eda9dSAl Viro iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, iov, 2, len);
2489e55eef4SChuck Lever return xprt_sendmsg(sock, msg, base);
2499e55eef4SChuck Lever }
2509e55eef4SChuck Lever
2519e55eef4SChuck Lever /**
2529e55eef4SChuck Lever * xprt_sock_sendmsg - write an xdr_buf directly to a socket
2539e55eef4SChuck Lever * @sock: open socket to send on
2549e55eef4SChuck Lever * @msg: socket message metadata
2559e55eef4SChuck Lever * @xdr: xdr_buf containing this request
2569e55eef4SChuck Lever * @base: starting position in the buffer
2579e55eef4SChuck Lever * @marker: stream record marker field
2589e55eef4SChuck Lever * @sent_p: return the total number of bytes successfully queued for sending
2599e55eef4SChuck Lever *
2609e55eef4SChuck Lever * Return values:
2619e55eef4SChuck Lever * On success, returns zero and fills in @sent_p.
2629e55eef4SChuck Lever * %-ENOTSOCK if @sock is not a struct socket.
2639e55eef4SChuck Lever */
xprt_sock_sendmsg(struct socket * sock,struct msghdr * msg,struct xdr_buf * xdr,unsigned int base,rpc_fraghdr marker,unsigned int * sent_p)2649e55eef4SChuck Lever int xprt_sock_sendmsg(struct socket *sock, struct msghdr *msg,
2659e55eef4SChuck Lever struct xdr_buf *xdr, unsigned int base,
2669e55eef4SChuck Lever rpc_fraghdr marker, unsigned int *sent_p)
2679e55eef4SChuck Lever {
2689e55eef4SChuck Lever unsigned int rmsize = marker ? sizeof(marker) : 0;
2699e55eef4SChuck Lever unsigned int remainder = rmsize + xdr->len - base;
2709e55eef4SChuck Lever unsigned int want;
2719e55eef4SChuck Lever int err = 0;
2729e55eef4SChuck Lever
2739e55eef4SChuck Lever *sent_p = 0;
2749e55eef4SChuck Lever
2759e55eef4SChuck Lever if (unlikely(!sock))
2769e55eef4SChuck Lever return -ENOTSOCK;
2779e55eef4SChuck Lever
2789e55eef4SChuck Lever msg->msg_flags |= MSG_MORE;
2799e55eef4SChuck Lever want = xdr->head[0].iov_len + rmsize;
2809e55eef4SChuck Lever if (base < want) {
2819e55eef4SChuck Lever unsigned int len = want - base;
2829e55eef4SChuck Lever
2839e55eef4SChuck Lever remainder -= len;
2849e55eef4SChuck Lever if (remainder == 0)
2859e55eef4SChuck Lever msg->msg_flags &= ~MSG_MORE;
2869e55eef4SChuck Lever if (rmsize)
2879e55eef4SChuck Lever err = xprt_send_rm_and_kvec(sock, msg, marker,
2889e55eef4SChuck Lever &xdr->head[0], base);
2899e55eef4SChuck Lever else
2909e55eef4SChuck Lever err = xprt_send_kvec(sock, msg, &xdr->head[0], base);
2919e55eef4SChuck Lever if (remainder == 0 || err != len)
2929e55eef4SChuck Lever goto out;
2939e55eef4SChuck Lever *sent_p += err;
2949e55eef4SChuck Lever base = 0;
2959e55eef4SChuck Lever } else {
2969e55eef4SChuck Lever base -= want;
2979e55eef4SChuck Lever }
2989e55eef4SChuck Lever
2999e55eef4SChuck Lever if (base < xdr->page_len) {
3009e55eef4SChuck Lever unsigned int len = xdr->page_len - base;
3019e55eef4SChuck Lever
3029e55eef4SChuck Lever remainder -= len;
3039e55eef4SChuck Lever if (remainder == 0)
3049e55eef4SChuck Lever msg->msg_flags &= ~MSG_MORE;
3059e55eef4SChuck Lever err = xprt_send_pagedata(sock, msg, xdr, base);
3069e55eef4SChuck Lever if (remainder == 0 || err != len)
3079e55eef4SChuck Lever goto out;
3089e55eef4SChuck Lever *sent_p += err;
3099e55eef4SChuck Lever base = 0;
3109e55eef4SChuck Lever } else {
3119e55eef4SChuck Lever base -= xdr->page_len;
3129e55eef4SChuck Lever }
3139e55eef4SChuck Lever
3149e55eef4SChuck Lever if (base >= xdr->tail[0].iov_len)
3159e55eef4SChuck Lever return 0;
3169e55eef4SChuck Lever msg->msg_flags &= ~MSG_MORE;
3179e55eef4SChuck Lever err = xprt_send_kvec(sock, msg, &xdr->tail[0], base);
3189e55eef4SChuck Lever out:
3199e55eef4SChuck Lever if (err > 0) {
3209e55eef4SChuck Lever *sent_p += err;
3219e55eef4SChuck Lever err = 0;
3229e55eef4SChuck Lever }
3239e55eef4SChuck Lever return err;
3249e55eef4SChuck Lever }
325