12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX
41da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket
51da177e4SLinus Torvalds * interface as the means of communication with the user level.
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * Generic socket support routines. Memory allocators, socket lock/release
81da177e4SLinus Torvalds * handler for protocols to use and generic option handler.
91da177e4SLinus Torvalds *
1002c30a84SJesper Juhl * Authors: Ross Biro
111da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
121da177e4SLinus Torvalds * Florian La Roche, <flla@stud.uni-sb.de>
131da177e4SLinus Torvalds * Alan Cox, <A.Cox@swansea.ac.uk>
141da177e4SLinus Torvalds *
151da177e4SLinus Torvalds * Fixes:
161da177e4SLinus Torvalds * Alan Cox : Numerous verify_area() problems
171da177e4SLinus Torvalds * Alan Cox : Connecting on a connecting socket
181da177e4SLinus Torvalds * now returns an error for tcp.
191da177e4SLinus Torvalds * Alan Cox : sock->protocol is set correctly.
201da177e4SLinus Torvalds * and is not sometimes left as 0.
211da177e4SLinus Torvalds * Alan Cox : connect handles icmp errors on a
221da177e4SLinus Torvalds * connect properly. Unfortunately there
231da177e4SLinus Torvalds * is a restart syscall nasty there. I
241da177e4SLinus Torvalds * can't match BSD without hacking the C
251da177e4SLinus Torvalds * library. Ideas urgently sought!
261da177e4SLinus Torvalds * Alan Cox : Disallow bind() to addresses that are
271da177e4SLinus Torvalds * not ours - especially broadcast ones!!
281da177e4SLinus Torvalds * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
291da177e4SLinus Torvalds * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
301da177e4SLinus Torvalds * instead they leave that for the DESTROY timer.
311da177e4SLinus Torvalds * Alan Cox : Clean up error flag in accept
321da177e4SLinus Torvalds * Alan Cox : TCP ack handling is buggy, the DESTROY timer
331da177e4SLinus Torvalds * was buggy. Put a remove_sock() in the handler
341da177e4SLinus Torvalds * for memory when we hit 0. Also altered the timer
351da177e4SLinus Torvalds * code. The ACK stuff can wait and needs major
361da177e4SLinus Torvalds * TCP layer surgery.
371da177e4SLinus Torvalds * Alan Cox : Fixed TCP ack bug, removed remove sock
381da177e4SLinus Torvalds * and fixed timer/inet_bh race.
391da177e4SLinus Torvalds * Alan Cox : Added zapped flag for TCP
401da177e4SLinus Torvalds * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
411da177e4SLinus Torvalds * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
421da177e4SLinus Torvalds * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
431da177e4SLinus Torvalds * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
441da177e4SLinus Torvalds * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
451da177e4SLinus Torvalds * Rick Sladkey : Relaxed UDP rules for matching packets.
461da177e4SLinus Torvalds * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
471da177e4SLinus Torvalds * Pauline Middelink : identd support
481da177e4SLinus Torvalds * Alan Cox : Fixed connect() taking signals I think.
491da177e4SLinus Torvalds * Alan Cox : SO_LINGER supported
501da177e4SLinus Torvalds * Alan Cox : Error reporting fixes
511da177e4SLinus Torvalds * Anonymous : inet_create tidied up (sk->reuse setting)
521da177e4SLinus Torvalds * Alan Cox : inet sockets don't set sk->type!
531da177e4SLinus Torvalds * Alan Cox : Split socket option code
541da177e4SLinus Torvalds * Alan Cox : Callbacks
551da177e4SLinus Torvalds * Alan Cox : Nagle flag for Charles & Johannes stuff
561da177e4SLinus Torvalds * Alex : Removed restriction on inet fioctl
571da177e4SLinus Torvalds * Alan Cox : Splitting INET from NET core
581da177e4SLinus Torvalds * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
591da177e4SLinus Torvalds * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
601da177e4SLinus Torvalds * Alan Cox : Split IP from generic code
611da177e4SLinus Torvalds * Alan Cox : New kfree_skbmem()
621da177e4SLinus Torvalds * Alan Cox : Make SO_DEBUG superuser only.
631da177e4SLinus Torvalds * Alan Cox : Allow anyone to clear SO_DEBUG
641da177e4SLinus Torvalds * (compatibility fix)
651da177e4SLinus Torvalds * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
661da177e4SLinus Torvalds * Alan Cox : Allocator for a socket is settable.
671da177e4SLinus Torvalds * Alan Cox : SO_ERROR includes soft errors.
681da177e4SLinus Torvalds * Alan Cox : Allow NULL arguments on some SO_ opts
691da177e4SLinus Torvalds * Alan Cox : Generic socket allocation to make hooks
701da177e4SLinus Torvalds * easier (suggested by Craig Metz).
711da177e4SLinus Torvalds * Michael Pall : SO_ERROR returns positive errno again
721da177e4SLinus Torvalds * Steve Whitehouse: Added default destructor to free
731da177e4SLinus Torvalds * protocol private data.
741da177e4SLinus Torvalds * Steve Whitehouse: Added various other default routines
751da177e4SLinus Torvalds * common to several socket families.
761da177e4SLinus Torvalds * Chris Evans : Call suser() check last on F_SETOWN
771da177e4SLinus Torvalds * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
781da177e4SLinus Torvalds * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
791da177e4SLinus Torvalds * Andi Kleen : Fix write_space callback
801da177e4SLinus Torvalds * Chris Evans : Security fixes - signedness again
811da177e4SLinus Torvalds * Arnaldo C. Melo : cleanups, use skb_queue_purge
821da177e4SLinus Torvalds *
831da177e4SLinus Torvalds * To Fix:
841da177e4SLinus Torvalds */
851da177e4SLinus Torvalds
86e005d193SJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
87e005d193SJoe Perches
8880b14deeSRichard Cochran #include <asm/unaligned.h>
894fc268d2SRandy Dunlap #include <linux/capability.h>
901da177e4SLinus Torvalds #include <linux/errno.h>
91cb820f8eSRichard Cochran #include <linux/errqueue.h>
921da177e4SLinus Torvalds #include <linux/types.h>
931da177e4SLinus Torvalds #include <linux/socket.h>
941da177e4SLinus Torvalds #include <linux/in.h>
951da177e4SLinus Torvalds #include <linux/kernel.h>
961da177e4SLinus Torvalds #include <linux/module.h>
971da177e4SLinus Torvalds #include <linux/proc_fs.h>
981da177e4SLinus Torvalds #include <linux/seq_file.h>
991da177e4SLinus Torvalds #include <linux/sched.h>
100f1083048SVlastimil Babka #include <linux/sched/mm.h>
1011da177e4SLinus Torvalds #include <linux/timer.h>
1021da177e4SLinus Torvalds #include <linux/string.h>
1031da177e4SLinus Torvalds #include <linux/sockios.h>
1041da177e4SLinus Torvalds #include <linux/net.h>
1051da177e4SLinus Torvalds #include <linux/mm.h>
1061da177e4SLinus Torvalds #include <linux/slab.h>
1071da177e4SLinus Torvalds #include <linux/interrupt.h>
1081da177e4SLinus Torvalds #include <linux/poll.h>
1091da177e4SLinus Torvalds #include <linux/tcp.h>
110ef8ad307SEric Dumazet #include <linux/udp.h>
1111da177e4SLinus Torvalds #include <linux/init.h>
112a1f8e7f7SAl Viro #include <linux/highmem.h>
1133f551f94SEric W. Biederman #include <linux/user_namespace.h>
114c5905afbSIngo Molnar #include <linux/static_key.h>
1153969eb38SDavid S. Miller #include <linux/memcontrol.h>
1168c1ae10dSDavid S. Miller #include <linux/prefetch.h>
117a6c0d093SChristoph Hellwig #include <linux/compat.h>
118e1d001faSBreno Leitao #include <linux/mroute.h>
119e1d001faSBreno Leitao #include <linux/mroute6.h>
120e1d001faSBreno Leitao #include <linux/icmpv6.h>
1211da177e4SLinus Torvalds
1227c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
1231da177e4SLinus Torvalds
1241da177e4SLinus Torvalds #include <linux/netdevice.h>
1251da177e4SLinus Torvalds #include <net/protocol.h>
1261da177e4SLinus Torvalds #include <linux/skbuff.h>
127457c4cbcSEric W. Biederman #include <net/net_namespace.h>
1282e6599cbSArnaldo Carvalho de Melo #include <net/request_sock.h>
1291da177e4SLinus Torvalds #include <net/sock.h>
13020d49473SPatrick Ohly #include <linux/net_tstamp.h>
1311da177e4SLinus Torvalds #include <net/xfrm.h>
1321da177e4SLinus Torvalds #include <linux/ipsec.h>
133f8451725SHerbert Xu #include <net/cls_cgroup.h>
1345bc1421eSNeil Horman #include <net/netprio_cgroup.h>
135eb4cb008SCraig Gallek #include <linux/sock_diag.h>
1361da177e4SLinus Torvalds
1371da177e4SLinus Torvalds #include <linux/filter.h>
138538950a1SCraig Gallek #include <net/sock_reuseport.h>
1396ac99e8fSMartin KaFai Lau #include <net/bpf_sk_storage.h>
1401da177e4SLinus Torvalds
1413847ce32SSatoru Moriya #include <trace/events/sock.h>
1423847ce32SSatoru Moriya
1431da177e4SLinus Torvalds #include <net/tcp.h>
144076bb0c8SEliezer Tamir #include <net/busy_poll.h>
145e1d001faSBreno Leitao #include <net/phonet/phonet.h>
14606021292SEliezer Tamir
147d463126eSYangbo Lu #include <linux/ethtool.h>
148d463126eSYangbo Lu
1496264f58cSJakub Kicinski #include "dev.h"
1506264f58cSJakub Kicinski
15136b77a52SGlauber Costa static DEFINE_MUTEX(proto_list_mutex);
152d1a4c0b3SGlauber Costa static LIST_HEAD(proto_list);
153d1a4c0b3SGlauber Costa
1540a8afd9fSPavel Begunkov static void sock_def_write_space_wfree(struct sock *sk);
155052ada09SPavel Begunkov static void sock_def_write_space(struct sock *sk);
156052ada09SPavel Begunkov
157a3b299daSEric W. Biederman /**
158a3b299daSEric W. Biederman * sk_ns_capable - General socket capability test
159a3b299daSEric W. Biederman * @sk: Socket to use a capability on or through
160a3b299daSEric W. Biederman * @user_ns: The user namespace of the capability to use
161a3b299daSEric W. Biederman * @cap: The capability to use
162a3b299daSEric W. Biederman *
163a3b299daSEric W. Biederman * Test to see if the opener of the socket had when the socket was
164a3b299daSEric W. Biederman * created and the current process has the capability @cap in the user
165a3b299daSEric W. Biederman * namespace @user_ns.
166a3b299daSEric W. Biederman */
sk_ns_capable(const struct sock * sk,struct user_namespace * user_ns,int cap)167a3b299daSEric W. Biederman bool sk_ns_capable(const struct sock *sk,
168a3b299daSEric W. Biederman struct user_namespace *user_ns, int cap)
169a3b299daSEric W. Biederman {
170a3b299daSEric W. Biederman return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
171a3b299daSEric W. Biederman ns_capable(user_ns, cap);
172a3b299daSEric W. Biederman }
173a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_ns_capable);
174a3b299daSEric W. Biederman
175a3b299daSEric W. Biederman /**
176a3b299daSEric W. Biederman * sk_capable - Socket global capability test
177a3b299daSEric W. Biederman * @sk: Socket to use a capability on or through
178e793c0f7SMasanari Iida * @cap: The global capability to use
179a3b299daSEric W. Biederman *
180a3b299daSEric W. Biederman * Test to see if the opener of the socket had when the socket was
181a3b299daSEric W. Biederman * created and the current process has the capability @cap in all user
182a3b299daSEric W. Biederman * namespaces.
183a3b299daSEric W. Biederman */
sk_capable(const struct sock * sk,int cap)184a3b299daSEric W. Biederman bool sk_capable(const struct sock *sk, int cap)
185a3b299daSEric W. Biederman {
186a3b299daSEric W. Biederman return sk_ns_capable(sk, &init_user_ns, cap);
187a3b299daSEric W. Biederman }
188a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_capable);
189a3b299daSEric W. Biederman
190a3b299daSEric W. Biederman /**
191a3b299daSEric W. Biederman * sk_net_capable - Network namespace socket capability test
192a3b299daSEric W. Biederman * @sk: Socket to use a capability on or through
193a3b299daSEric W. Biederman * @cap: The capability to use
194a3b299daSEric W. Biederman *
195e793c0f7SMasanari Iida * Test to see if the opener of the socket had when the socket was created
196a3b299daSEric W. Biederman * and the current process has the capability @cap over the network namespace
197a3b299daSEric W. Biederman * the socket is a member of.
198a3b299daSEric W. Biederman */
sk_net_capable(const struct sock * sk,int cap)199a3b299daSEric W. Biederman bool sk_net_capable(const struct sock *sk, int cap)
200a3b299daSEric W. Biederman {
201a3b299daSEric W. Biederman return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
202a3b299daSEric W. Biederman }
203a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_net_capable);
204a3b299daSEric W. Biederman
205da21f24dSIngo Molnar /*
206da21f24dSIngo Molnar * Each address family might have different locking rules, so we have
207cdfbabfbSDavid Howells * one slock key per address family and separate keys for internal and
208cdfbabfbSDavid Howells * userspace sockets.
209da21f24dSIngo Molnar */
210a5b5bb9aSIngo Molnar static struct lock_class_key af_family_keys[AF_MAX];
211cdfbabfbSDavid Howells static struct lock_class_key af_family_kern_keys[AF_MAX];
212a5b5bb9aSIngo Molnar static struct lock_class_key af_family_slock_keys[AF_MAX];
213cdfbabfbSDavid Howells static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
214a5b5bb9aSIngo Molnar
215a5b5bb9aSIngo Molnar /*
216a5b5bb9aSIngo Molnar * Make lock validator output more readable. (we pre-construct these
217a5b5bb9aSIngo Molnar * strings build-time, so that runtime initialization of socket
218a5b5bb9aSIngo Molnar * locks is fast):
219a5b5bb9aSIngo Molnar */
220cdfbabfbSDavid Howells
221cdfbabfbSDavid Howells #define _sock_locks(x) \
222cdfbabfbSDavid Howells x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \
223cdfbabfbSDavid Howells x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \
224cdfbabfbSDavid Howells x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \
225cdfbabfbSDavid Howells x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \
226cdfbabfbSDavid Howells x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \
227cdfbabfbSDavid Howells x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \
228cdfbabfbSDavid Howells x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \
229cdfbabfbSDavid Howells x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \
230cdfbabfbSDavid Howells x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \
231cdfbabfbSDavid Howells x "27" , x "28" , x "AF_CAN" , \
232cdfbabfbSDavid Howells x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \
233cdfbabfbSDavid Howells x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \
234cdfbabfbSDavid Howells x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
235cdfbabfbSDavid Howells x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
23668e8b849SBjörn Töpel x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \
237bc49d816SJeremy Kerr x "AF_MCTP" , \
23868e8b849SBjörn Töpel x "AF_MAX"
239cdfbabfbSDavid Howells
24036cbd3dcSJan Engelhardt static const char *const af_family_key_strings[AF_MAX+1] = {
241cdfbabfbSDavid Howells _sock_locks("sk_lock-")
242a5b5bb9aSIngo Molnar };
24336cbd3dcSJan Engelhardt static const char *const af_family_slock_key_strings[AF_MAX+1] = {
244cdfbabfbSDavid Howells _sock_locks("slock-")
245a5b5bb9aSIngo Molnar };
24636cbd3dcSJan Engelhardt static const char *const af_family_clock_key_strings[AF_MAX+1] = {
247cdfbabfbSDavid Howells _sock_locks("clock-")
248cdfbabfbSDavid Howells };
249cdfbabfbSDavid Howells
250cdfbabfbSDavid Howells static const char *const af_family_kern_key_strings[AF_MAX+1] = {
251cdfbabfbSDavid Howells _sock_locks("k-sk_lock-")
252cdfbabfbSDavid Howells };
253cdfbabfbSDavid Howells static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
254cdfbabfbSDavid Howells _sock_locks("k-slock-")
255cdfbabfbSDavid Howells };
256cdfbabfbSDavid Howells static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
257cdfbabfbSDavid Howells _sock_locks("k-clock-")
258443aef0eSPeter Zijlstra };
259581319c5SPaolo Abeni static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
2606b431d50SMatthieu Baerts _sock_locks("rlock-")
261581319c5SPaolo Abeni };
262581319c5SPaolo Abeni static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
2636b431d50SMatthieu Baerts _sock_locks("wlock-")
264581319c5SPaolo Abeni };
265581319c5SPaolo Abeni static const char *const af_family_elock_key_strings[AF_MAX+1] = {
2666b431d50SMatthieu Baerts _sock_locks("elock-")
267581319c5SPaolo Abeni };
268da21f24dSIngo Molnar
269da21f24dSIngo Molnar /*
270581319c5SPaolo Abeni * sk_callback_lock and sk queues locking rules are per-address-family,
271da21f24dSIngo Molnar * so split the lock classes by using a per-AF key:
272da21f24dSIngo Molnar */
273da21f24dSIngo Molnar static struct lock_class_key af_callback_keys[AF_MAX];
274581319c5SPaolo Abeni static struct lock_class_key af_rlock_keys[AF_MAX];
275581319c5SPaolo Abeni static struct lock_class_key af_wlock_keys[AF_MAX];
276581319c5SPaolo Abeni static struct lock_class_key af_elock_keys[AF_MAX];
277cdfbabfbSDavid Howells static struct lock_class_key af_kern_callback_keys[AF_MAX];
278da21f24dSIngo Molnar
2791da177e4SLinus Torvalds /* Run time adjustable parameters. */
280ab32ea5dSBrian Haley __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
2816d8ebc8aSHans Schillstrom EXPORT_SYMBOL(sysctl_wmem_max);
282ab32ea5dSBrian Haley __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
2836d8ebc8aSHans Schillstrom EXPORT_SYMBOL(sysctl_rmem_max);
284ab32ea5dSBrian Haley __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
285ab32ea5dSBrian Haley __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
286fe1e8381SAdam Li int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
2871da177e4SLinus Torvalds
28825985edcSLucas De Marchi /* Maximal space eaten by iovec or ancillary data plus some space */
289ab32ea5dSBrian Haley int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
2902a91525cSEric Dumazet EXPORT_SYMBOL(sysctl_optmem_max);
2911da177e4SLinus Torvalds
292b245be1fSWillem de Bruijn int sysctl_tstamp_allow_data __read_mostly = 1;
293b245be1fSWillem de Bruijn
294a7950ae8SDavidlohr Bueso DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
295a7950ae8SDavidlohr Bueso EXPORT_SYMBOL_GPL(memalloc_socks_key);
296c93bdd0eSMel Gorman
2977cb02404SMel Gorman /**
2987cb02404SMel Gorman * sk_set_memalloc - sets %SOCK_MEMALLOC
2997cb02404SMel Gorman * @sk: socket to set it on
3007cb02404SMel Gorman *
3017cb02404SMel Gorman * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
3027cb02404SMel Gorman * It's the responsibility of the admin to adjust min_free_kbytes
3037cb02404SMel Gorman * to meet the requirements
3047cb02404SMel Gorman */
sk_set_memalloc(struct sock * sk)3057cb02404SMel Gorman void sk_set_memalloc(struct sock *sk)
3067cb02404SMel Gorman {
3077cb02404SMel Gorman sock_set_flag(sk, SOCK_MEMALLOC);
3087cb02404SMel Gorman sk->sk_allocation |= __GFP_MEMALLOC;
309a7950ae8SDavidlohr Bueso static_branch_inc(&memalloc_socks_key);
3107cb02404SMel Gorman }
3117cb02404SMel Gorman EXPORT_SYMBOL_GPL(sk_set_memalloc);
3127cb02404SMel Gorman
sk_clear_memalloc(struct sock * sk)3137cb02404SMel Gorman void sk_clear_memalloc(struct sock *sk)
3147cb02404SMel Gorman {
3157cb02404SMel Gorman sock_reset_flag(sk, SOCK_MEMALLOC);
3167cb02404SMel Gorman sk->sk_allocation &= ~__GFP_MEMALLOC;
317a7950ae8SDavidlohr Bueso static_branch_dec(&memalloc_socks_key);
318c76562b6SMel Gorman
319c76562b6SMel Gorman /*
320c76562b6SMel Gorman * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
3215d753610SMel Gorman * progress of swapping. SOCK_MEMALLOC may be cleared while
3225d753610SMel Gorman * it has rmem allocations due to the last swapfile being deactivated
3235d753610SMel Gorman * but there is a risk that the socket is unusable due to exceeding
3245d753610SMel Gorman * the rmem limits. Reclaim the reserves and obey rmem limits again.
325c76562b6SMel Gorman */
326c76562b6SMel Gorman sk_mem_reclaim(sk);
3277cb02404SMel Gorman }
3287cb02404SMel Gorman EXPORT_SYMBOL_GPL(sk_clear_memalloc);
3297cb02404SMel Gorman
__sk_backlog_rcv(struct sock * sk,struct sk_buff * skb)330b4b9e355SMel Gorman int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
331b4b9e355SMel Gorman {
332b4b9e355SMel Gorman int ret;
333f1083048SVlastimil Babka unsigned int noreclaim_flag;
334b4b9e355SMel Gorman
335b4b9e355SMel Gorman /* these should have been dropped before queueing */
336b4b9e355SMel Gorman BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
337b4b9e355SMel Gorman
338f1083048SVlastimil Babka noreclaim_flag = memalloc_noreclaim_save();
339d2489c7bSEric Dumazet ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv,
340d2489c7bSEric Dumazet tcp_v6_do_rcv,
341d2489c7bSEric Dumazet tcp_v4_do_rcv,
342d2489c7bSEric Dumazet sk, skb);
343f1083048SVlastimil Babka memalloc_noreclaim_restore(noreclaim_flag);
344b4b9e355SMel Gorman
345b4b9e355SMel Gorman return ret;
346b4b9e355SMel Gorman }
347b4b9e355SMel Gorman EXPORT_SYMBOL(__sk_backlog_rcv);
348b4b9e355SMel Gorman
sk_error_report(struct sock * sk)349e3ae2365SAlexander Aring void sk_error_report(struct sock *sk)
350e3ae2365SAlexander Aring {
351e3ae2365SAlexander Aring sk->sk_error_report(sk);
352e6a3e443SAlexander Aring
353e6a3e443SAlexander Aring switch (sk->sk_family) {
354e6a3e443SAlexander Aring case AF_INET:
355e6a3e443SAlexander Aring fallthrough;
356e6a3e443SAlexander Aring case AF_INET6:
357e6a3e443SAlexander Aring trace_inet_sk_error_report(sk);
358e6a3e443SAlexander Aring break;
359e6a3e443SAlexander Aring default:
360e6a3e443SAlexander Aring break;
361e6a3e443SAlexander Aring }
362e3ae2365SAlexander Aring }
363e3ae2365SAlexander Aring EXPORT_SYMBOL(sk_error_report);
364e3ae2365SAlexander Aring
sock_get_timeout(long timeo,void * optval,bool old_timeval)3654c1e34c0SRichard Palethorpe int sock_get_timeout(long timeo, void *optval, bool old_timeval)
366fe0c72f3SArnd Bergmann {
367a9beb86aSDeepa Dinamani struct __kernel_sock_timeval tv;
368fe0c72f3SArnd Bergmann
369fe0c72f3SArnd Bergmann if (timeo == MAX_SCHEDULE_TIMEOUT) {
370fe0c72f3SArnd Bergmann tv.tv_sec = 0;
371fe0c72f3SArnd Bergmann tv.tv_usec = 0;
372fe0c72f3SArnd Bergmann } else {
373fe0c72f3SArnd Bergmann tv.tv_sec = timeo / HZ;
374fe0c72f3SArnd Bergmann tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
375fe0c72f3SArnd Bergmann }
376fe0c72f3SArnd Bergmann
377e6986423SArnd Bergmann if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
378fe0c72f3SArnd Bergmann struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
379fe0c72f3SArnd Bergmann *(struct old_timeval32 *)optval = tv32;
380fe0c72f3SArnd Bergmann return sizeof(tv32);
381fe0c72f3SArnd Bergmann }
382fe0c72f3SArnd Bergmann
383a9beb86aSDeepa Dinamani if (old_timeval) {
384a9beb86aSDeepa Dinamani struct __kernel_old_timeval old_tv;
385a9beb86aSDeepa Dinamani old_tv.tv_sec = tv.tv_sec;
386a9beb86aSDeepa Dinamani old_tv.tv_usec = tv.tv_usec;
387a9beb86aSDeepa Dinamani *(struct __kernel_old_timeval *)optval = old_tv;
38828e72b26SVito Caputo return sizeof(old_tv);
389fe0c72f3SArnd Bergmann }
390fe0c72f3SArnd Bergmann
39128e72b26SVito Caputo *(struct __kernel_sock_timeval *)optval = tv;
39228e72b26SVito Caputo return sizeof(tv);
393a9beb86aSDeepa Dinamani }
3944c1e34c0SRichard Palethorpe EXPORT_SYMBOL(sock_get_timeout);
395a9beb86aSDeepa Dinamani
sock_copy_user_timeval(struct __kernel_sock_timeval * tv,sockptr_t optval,int optlen,bool old_timeval)3964c1e34c0SRichard Palethorpe int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
3974c1e34c0SRichard Palethorpe sockptr_t optval, int optlen, bool old_timeval)
3981da177e4SLinus Torvalds {
399e6986423SArnd Bergmann if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
400fe0c72f3SArnd Bergmann struct old_timeval32 tv32;
401fe0c72f3SArnd Bergmann
402fe0c72f3SArnd Bergmann if (optlen < sizeof(tv32))
403fe0c72f3SArnd Bergmann return -EINVAL;
404fe0c72f3SArnd Bergmann
405c34645acSChristoph Hellwig if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
406fe0c72f3SArnd Bergmann return -EFAULT;
4074c1e34c0SRichard Palethorpe tv->tv_sec = tv32.tv_sec;
4084c1e34c0SRichard Palethorpe tv->tv_usec = tv32.tv_usec;
409a9beb86aSDeepa Dinamani } else if (old_timeval) {
410a9beb86aSDeepa Dinamani struct __kernel_old_timeval old_tv;
411a9beb86aSDeepa Dinamani
412a9beb86aSDeepa Dinamani if (optlen < sizeof(old_tv))
413a9beb86aSDeepa Dinamani return -EINVAL;
414c34645acSChristoph Hellwig if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
415a9beb86aSDeepa Dinamani return -EFAULT;
4164c1e34c0SRichard Palethorpe tv->tv_sec = old_tv.tv_sec;
4174c1e34c0SRichard Palethorpe tv->tv_usec = old_tv.tv_usec;
418fe0c72f3SArnd Bergmann } else {
4194c1e34c0SRichard Palethorpe if (optlen < sizeof(*tv))
4201da177e4SLinus Torvalds return -EINVAL;
4214c1e34c0SRichard Palethorpe if (copy_from_sockptr(tv, optval, sizeof(*tv)))
4221da177e4SLinus Torvalds return -EFAULT;
423fe0c72f3SArnd Bergmann }
4244c1e34c0SRichard Palethorpe
4254c1e34c0SRichard Palethorpe return 0;
4264c1e34c0SRichard Palethorpe }
4274c1e34c0SRichard Palethorpe EXPORT_SYMBOL(sock_copy_user_timeval);
4284c1e34c0SRichard Palethorpe
sock_set_timeout(long * timeo_p,sockptr_t optval,int optlen,bool old_timeval)4294c1e34c0SRichard Palethorpe static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
4304c1e34c0SRichard Palethorpe bool old_timeval)
4314c1e34c0SRichard Palethorpe {
4324c1e34c0SRichard Palethorpe struct __kernel_sock_timeval tv;
4334c1e34c0SRichard Palethorpe int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
434285975ddSEric Dumazet long val;
4354c1e34c0SRichard Palethorpe
4364c1e34c0SRichard Palethorpe if (err)
4374c1e34c0SRichard Palethorpe return err;
4384c1e34c0SRichard Palethorpe
439ba78073eSVasily Averin if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
440ba78073eSVasily Averin return -EDOM;
4411da177e4SLinus Torvalds
442ba78073eSVasily Averin if (tv.tv_sec < 0) {
4436f11df83SAndrew Morton static int warned __read_mostly;
4446f11df83SAndrew Morton
445285975ddSEric Dumazet WRITE_ONCE(*timeo_p, 0);
44650aab54fSIlpo Järvinen if (warned < 10 && net_ratelimit()) {
447ba78073eSVasily Averin warned++;
448e005d193SJoe Perches pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
449e005d193SJoe Perches __func__, current->comm, task_pid_nr(current));
45050aab54fSIlpo Järvinen }
451ba78073eSVasily Averin return 0;
452ba78073eSVasily Averin }
453285975ddSEric Dumazet val = MAX_SCHEDULE_TIMEOUT;
454285975ddSEric Dumazet if ((tv.tv_sec || tv.tv_usec) &&
455285975ddSEric Dumazet (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
456285975ddSEric Dumazet val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
457285975ddSEric Dumazet USEC_PER_SEC / HZ);
458285975ddSEric Dumazet WRITE_ONCE(*timeo_p, val);
4591da177e4SLinus Torvalds return 0;
4601da177e4SLinus Torvalds }
4611da177e4SLinus Torvalds
sock_needs_netstamp(const struct sock * sk)462080a270fSHannes Frederic Sowa static bool sock_needs_netstamp(const struct sock *sk)
463080a270fSHannes Frederic Sowa {
464080a270fSHannes Frederic Sowa switch (sk->sk_family) {
465080a270fSHannes Frederic Sowa case AF_UNSPEC:
466080a270fSHannes Frederic Sowa case AF_UNIX:
467080a270fSHannes Frederic Sowa return false;
468080a270fSHannes Frederic Sowa default:
469080a270fSHannes Frederic Sowa return true;
470080a270fSHannes Frederic Sowa }
471080a270fSHannes Frederic Sowa }
472080a270fSHannes Frederic Sowa
sock_disable_timestamp(struct sock * sk,unsigned long flags)47308e29af3SEric Dumazet static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
4741da177e4SLinus Torvalds {
47508e29af3SEric Dumazet if (sk->sk_flags & flags) {
47608e29af3SEric Dumazet sk->sk_flags &= ~flags;
477080a270fSHannes Frederic Sowa if (sock_needs_netstamp(sk) &&
478080a270fSHannes Frederic Sowa !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
4791da177e4SLinus Torvalds net_disable_timestamp();
4801da177e4SLinus Torvalds }
4811da177e4SLinus Torvalds }
4821da177e4SLinus Torvalds
4831da177e4SLinus Torvalds
__sock_queue_rcv_skb(struct sock * sk,struct sk_buff * skb)484e6afc8acSsamanthakumar int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
485f0088a50SDenis Vlasenko {
4863b885787SNeil Horman unsigned long flags;
4873b885787SNeil Horman struct sk_buff_head *list = &sk->sk_receive_queue;
488f0088a50SDenis Vlasenko
489845d1799Slinke li if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
490766e9037SEric Dumazet atomic_inc(&sk->sk_drops);
4913847ce32SSatoru Moriya trace_sock_rcvqueue_full(sk, skb);
492766e9037SEric Dumazet return -ENOMEM;
493f0088a50SDenis Vlasenko }
494f0088a50SDenis Vlasenko
495c76562b6SMel Gorman if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
496766e9037SEric Dumazet atomic_inc(&sk->sk_drops);
497766e9037SEric Dumazet return -ENOBUFS;
4983ab224beSHideo Aoki }
4993ab224beSHideo Aoki
500f0088a50SDenis Vlasenko skb->dev = NULL;
501f0088a50SDenis Vlasenko skb_set_owner_r(skb, sk);
50249ad9599SDavid S. Miller
5037fee226aSEric Dumazet /* we escape from rcu protected region, make sure we dont leak
5047fee226aSEric Dumazet * a norefcounted dst
5057fee226aSEric Dumazet */
5067fee226aSEric Dumazet skb_dst_force(skb);
5077fee226aSEric Dumazet
5083b885787SNeil Horman spin_lock_irqsave(&list->lock, flags);
5093bc3b96fSEyal Birger sock_skb_set_dropcount(sk, skb);
5103b885787SNeil Horman __skb_queue_tail(list, skb);
5113b885787SNeil Horman spin_unlock_irqrestore(&list->lock, flags);
512f0088a50SDenis Vlasenko
513f0088a50SDenis Vlasenko if (!sock_flag(sk, SOCK_DEAD))
514676d2369SDavid S. Miller sk->sk_data_ready(sk);
515766e9037SEric Dumazet return 0;
516f0088a50SDenis Vlasenko }
517e6afc8acSsamanthakumar EXPORT_SYMBOL(__sock_queue_rcv_skb);
518e6afc8acSsamanthakumar
sock_queue_rcv_skb_reason(struct sock * sk,struct sk_buff * skb,enum skb_drop_reason * reason)519c1b8a567SMenglong Dong int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
520c1b8a567SMenglong Dong enum skb_drop_reason *reason)
521e6afc8acSsamanthakumar {
522c1b8a567SMenglong Dong enum skb_drop_reason drop_reason;
523e6afc8acSsamanthakumar int err;
524e6afc8acSsamanthakumar
525e6afc8acSsamanthakumar err = sk_filter(sk, skb);
526c1b8a567SMenglong Dong if (err) {
527c1b8a567SMenglong Dong drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
528c1b8a567SMenglong Dong goto out;
529e6afc8acSsamanthakumar }
530c1b8a567SMenglong Dong err = __sock_queue_rcv_skb(sk, skb);
531c1b8a567SMenglong Dong switch (err) {
532c1b8a567SMenglong Dong case -ENOMEM:
533c1b8a567SMenglong Dong drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
534c1b8a567SMenglong Dong break;
535c1b8a567SMenglong Dong case -ENOBUFS:
536c1b8a567SMenglong Dong drop_reason = SKB_DROP_REASON_PROTO_MEM;
537c1b8a567SMenglong Dong break;
538c1b8a567SMenglong Dong default:
539c1b8a567SMenglong Dong drop_reason = SKB_NOT_DROPPED_YET;
540c1b8a567SMenglong Dong break;
541c1b8a567SMenglong Dong }
542c1b8a567SMenglong Dong out:
543c1b8a567SMenglong Dong if (reason)
544c1b8a567SMenglong Dong *reason = drop_reason;
545c1b8a567SMenglong Dong return err;
546c1b8a567SMenglong Dong }
547c1b8a567SMenglong Dong EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
548f0088a50SDenis Vlasenko
__sk_receive_skb(struct sock * sk,struct sk_buff * skb,const int nested,unsigned int trim_cap,bool refcounted)5494f0c40d9SWillem de Bruijn int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
550c3f24cfbSEric Dumazet const int nested, unsigned int trim_cap, bool refcounted)
551f0088a50SDenis Vlasenko {
552f0088a50SDenis Vlasenko int rc = NET_RX_SUCCESS;
553f0088a50SDenis Vlasenko
5544f0c40d9SWillem de Bruijn if (sk_filter_trim_cap(sk, skb, trim_cap))
555f0088a50SDenis Vlasenko goto discard_and_relse;
556f0088a50SDenis Vlasenko
557f0088a50SDenis Vlasenko skb->dev = NULL;
558f0088a50SDenis Vlasenko
559845d1799Slinke li if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
560c377411fSEric Dumazet atomic_inc(&sk->sk_drops);
561c377411fSEric Dumazet goto discard_and_relse;
562c377411fSEric Dumazet }
56358a5a7b9SArnaldo Carvalho de Melo if (nested)
56458a5a7b9SArnaldo Carvalho de Melo bh_lock_sock_nested(sk);
56558a5a7b9SArnaldo Carvalho de Melo else
566f0088a50SDenis Vlasenko bh_lock_sock(sk);
567a5b5bb9aSIngo Molnar if (!sock_owned_by_user(sk)) {
568a5b5bb9aSIngo Molnar /*
569a5b5bb9aSIngo Molnar * trylock + unlock semantics:
570a5b5bb9aSIngo Molnar */
571a5b5bb9aSIngo Molnar mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
572a5b5bb9aSIngo Molnar
573c57943a1SPeter Zijlstra rc = sk_backlog_rcv(sk, skb);
574a5b5bb9aSIngo Molnar
5755facae4fSQian Cai mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
5768265792bSEric Dumazet } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
5778eae939fSZhu Yi bh_unlock_sock(sk);
5788eae939fSZhu Yi atomic_inc(&sk->sk_drops);
5798eae939fSZhu Yi goto discard_and_relse;
5808eae939fSZhu Yi }
5818eae939fSZhu Yi
582f0088a50SDenis Vlasenko bh_unlock_sock(sk);
583f0088a50SDenis Vlasenko out:
584c3f24cfbSEric Dumazet if (refcounted)
585f0088a50SDenis Vlasenko sock_put(sk);
586f0088a50SDenis Vlasenko return rc;
587f0088a50SDenis Vlasenko discard_and_relse:
588f0088a50SDenis Vlasenko kfree_skb(skb);
589f0088a50SDenis Vlasenko goto out;
590f0088a50SDenis Vlasenko }
5914f0c40d9SWillem de Bruijn EXPORT_SYMBOL(__sk_receive_skb);
592f0088a50SDenis Vlasenko
593bbd807dfSBrian Vazquez INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
594bbd807dfSBrian Vazquez u32));
595bbd807dfSBrian Vazquez INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
596bbd807dfSBrian Vazquez u32));
__sk_dst_check(struct sock * sk,u32 cookie)597f0088a50SDenis Vlasenko struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
598f0088a50SDenis Vlasenko {
599b6c6712aSEric Dumazet struct dst_entry *dst = __sk_dst_get(sk);
600f0088a50SDenis Vlasenko
601bbd807dfSBrian Vazquez if (dst && dst->obsolete &&
602bbd807dfSBrian Vazquez INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
603bbd807dfSBrian Vazquez dst, cookie) == NULL) {
604e022f0b4SKrishna Kumar sk_tx_queue_clear(sk);
60587324a50SEric Dumazet WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
606a9b3cd7fSStephen Hemminger RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
607f0088a50SDenis Vlasenko dst_release(dst);
608f0088a50SDenis Vlasenko return NULL;
609f0088a50SDenis Vlasenko }
610f0088a50SDenis Vlasenko
611f0088a50SDenis Vlasenko return dst;
612f0088a50SDenis Vlasenko }
613f0088a50SDenis Vlasenko EXPORT_SYMBOL(__sk_dst_check);
614f0088a50SDenis Vlasenko
sk_dst_check(struct sock * sk,u32 cookie)615f0088a50SDenis Vlasenko struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
616f0088a50SDenis Vlasenko {
617f0088a50SDenis Vlasenko struct dst_entry *dst = sk_dst_get(sk);
618f0088a50SDenis Vlasenko
619bbd807dfSBrian Vazquez if (dst && dst->obsolete &&
620bbd807dfSBrian Vazquez INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
621bbd807dfSBrian Vazquez dst, cookie) == NULL) {
622f0088a50SDenis Vlasenko sk_dst_reset(sk);
623f0088a50SDenis Vlasenko dst_release(dst);
624f0088a50SDenis Vlasenko return NULL;
625f0088a50SDenis Vlasenko }
626f0088a50SDenis Vlasenko
627f0088a50SDenis Vlasenko return dst;
628f0088a50SDenis Vlasenko }
629f0088a50SDenis Vlasenko EXPORT_SYMBOL(sk_dst_check);
630f0088a50SDenis Vlasenko
sock_bindtoindex_locked(struct sock * sk,int ifindex)6317594888cSChristoph Hellwig static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
632f5dd3d0cSDavid Herrmann {
633f5dd3d0cSDavid Herrmann int ret = -ENOPROTOOPT;
634f5dd3d0cSDavid Herrmann #ifdef CONFIG_NETDEVICES
635f5dd3d0cSDavid Herrmann struct net *net = sock_net(sk);
636f5dd3d0cSDavid Herrmann
637f5dd3d0cSDavid Herrmann /* Sorry... */
638f5dd3d0cSDavid Herrmann ret = -EPERM;
639c427bfecSVincent Bernat if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
640f5dd3d0cSDavid Herrmann goto out;
641f5dd3d0cSDavid Herrmann
642f5dd3d0cSDavid Herrmann ret = -EINVAL;
643f5dd3d0cSDavid Herrmann if (ifindex < 0)
644f5dd3d0cSDavid Herrmann goto out;
645f5dd3d0cSDavid Herrmann
646e5fccaa1SEric Dumazet /* Paired with all READ_ONCE() done locklessly. */
647e5fccaa1SEric Dumazet WRITE_ONCE(sk->sk_bound_dev_if, ifindex);
648e5fccaa1SEric Dumazet
649f5dd3d0cSDavid Herrmann if (sk->sk_prot->rehash)
650f5dd3d0cSDavid Herrmann sk->sk_prot->rehash(sk);
651f5dd3d0cSDavid Herrmann sk_dst_reset(sk);
652f5dd3d0cSDavid Herrmann
653f5dd3d0cSDavid Herrmann ret = 0;
654f5dd3d0cSDavid Herrmann
655f5dd3d0cSDavid Herrmann out:
656f5dd3d0cSDavid Herrmann #endif
657f5dd3d0cSDavid Herrmann
658f5dd3d0cSDavid Herrmann return ret;
659f5dd3d0cSDavid Herrmann }
660f5dd3d0cSDavid Herrmann
sock_bindtoindex(struct sock * sk,int ifindex,bool lock_sk)6618ea204c2SFerenc Fejes int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
6627594888cSChristoph Hellwig {
6637594888cSChristoph Hellwig int ret;
6647594888cSChristoph Hellwig
6658ea204c2SFerenc Fejes if (lock_sk)
6667594888cSChristoph Hellwig lock_sock(sk);
6677594888cSChristoph Hellwig ret = sock_bindtoindex_locked(sk, ifindex);
6688ea204c2SFerenc Fejes if (lock_sk)
6697594888cSChristoph Hellwig release_sock(sk);
6707594888cSChristoph Hellwig
6717594888cSChristoph Hellwig return ret;
6727594888cSChristoph Hellwig }
6737594888cSChristoph Hellwig EXPORT_SYMBOL(sock_bindtoindex);
6747594888cSChristoph Hellwig
sock_setbindtodevice(struct sock * sk,sockptr_t optval,int optlen)6755790642bSChristoph Hellwig static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
6764878809fSDavid S. Miller {
6774878809fSDavid S. Miller int ret = -ENOPROTOOPT;
6784878809fSDavid S. Miller #ifdef CONFIG_NETDEVICES
6793b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk);
6804878809fSDavid S. Miller char devname[IFNAMSIZ];
6814878809fSDavid S. Miller int index;
6824878809fSDavid S. Miller
6834878809fSDavid S. Miller ret = -EINVAL;
6844878809fSDavid S. Miller if (optlen < 0)
6854878809fSDavid S. Miller goto out;
6864878809fSDavid S. Miller
6874878809fSDavid S. Miller /* Bind this socket to a particular device like "eth0",
6884878809fSDavid S. Miller * as specified in the passed interface name. If the
6894878809fSDavid S. Miller * name is "" or the option length is zero the socket
6904878809fSDavid S. Miller * is not bound.
6914878809fSDavid S. Miller */
6924878809fSDavid S. Miller if (optlen > IFNAMSIZ - 1)
6934878809fSDavid S. Miller optlen = IFNAMSIZ - 1;
6944878809fSDavid S. Miller memset(devname, 0, sizeof(devname));
6954878809fSDavid S. Miller
6964878809fSDavid S. Miller ret = -EFAULT;
6975790642bSChristoph Hellwig if (copy_from_sockptr(devname, optval, optlen))
6984878809fSDavid S. Miller goto out;
6994878809fSDavid S. Miller
7004878809fSDavid S. Miller index = 0;
701000ba2e4SDavid S. Miller if (devname[0] != '\0') {
702bf8e56bfSEric Dumazet struct net_device *dev;
7034878809fSDavid S. Miller
704bf8e56bfSEric Dumazet rcu_read_lock();
705bf8e56bfSEric Dumazet dev = dev_get_by_name_rcu(net, devname);
706bf8e56bfSEric Dumazet if (dev)
707bf8e56bfSEric Dumazet index = dev->ifindex;
708bf8e56bfSEric Dumazet rcu_read_unlock();
7094878809fSDavid S. Miller ret = -ENODEV;
7104878809fSDavid S. Miller if (!dev)
7114878809fSDavid S. Miller goto out;
7124878809fSDavid S. Miller }
7134878809fSDavid S. Miller
71424426654SMartin KaFai Lau sockopt_lock_sock(sk);
71524426654SMartin KaFai Lau ret = sock_bindtoindex_locked(sk, index);
71624426654SMartin KaFai Lau sockopt_release_sock(sk);
7174878809fSDavid S. Miller out:
7184878809fSDavid S. Miller #endif
7194878809fSDavid S. Miller
7204878809fSDavid S. Miller return ret;
7214878809fSDavid S. Miller }
7224878809fSDavid S. Miller
sock_getbindtodevice(struct sock * sk,sockptr_t optval,sockptr_t optlen,int len)7234ff09db1SMartin KaFai Lau static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
7244ff09db1SMartin KaFai Lau sockptr_t optlen, int len)
725c91f6df2SBrian Haley {
726c91f6df2SBrian Haley int ret = -ENOPROTOOPT;
727c91f6df2SBrian Haley #ifdef CONFIG_NETDEVICES
728e5fccaa1SEric Dumazet int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
729c91f6df2SBrian Haley struct net *net = sock_net(sk);
730c91f6df2SBrian Haley char devname[IFNAMSIZ];
731c91f6df2SBrian Haley
732e5fccaa1SEric Dumazet if (bound_dev_if == 0) {
733c91f6df2SBrian Haley len = 0;
734c91f6df2SBrian Haley goto zero;
735c91f6df2SBrian Haley }
736c91f6df2SBrian Haley
737c91f6df2SBrian Haley ret = -EINVAL;
738c91f6df2SBrian Haley if (len < IFNAMSIZ)
739c91f6df2SBrian Haley goto out;
740c91f6df2SBrian Haley
741e5fccaa1SEric Dumazet ret = netdev_get_name(net, devname, bound_dev_if);
7425dbe7c17SNicolas Schichan if (ret)
743c91f6df2SBrian Haley goto out;
744c91f6df2SBrian Haley
745c91f6df2SBrian Haley len = strlen(devname) + 1;
746c91f6df2SBrian Haley
747c91f6df2SBrian Haley ret = -EFAULT;
7484ff09db1SMartin KaFai Lau if (copy_to_sockptr(optval, devname, len))
749c91f6df2SBrian Haley goto out;
750c91f6df2SBrian Haley
751c91f6df2SBrian Haley zero:
752c91f6df2SBrian Haley ret = -EFAULT;
7534ff09db1SMartin KaFai Lau if (copy_to_sockptr(optlen, &len, sizeof(int)))
754c91f6df2SBrian Haley goto out;
755c91f6df2SBrian Haley
756c91f6df2SBrian Haley ret = 0;
757c91f6df2SBrian Haley
758c91f6df2SBrian Haley out:
759c91f6df2SBrian Haley #endif
760c91f6df2SBrian Haley
761c91f6df2SBrian Haley return ret;
762c91f6df2SBrian Haley }
763c91f6df2SBrian Haley
sk_mc_loop(struct sock * sk)764f60e5990Shannes@stressinduktion.org bool sk_mc_loop(struct sock *sk)
765f60e5990Shannes@stressinduktion.org {
766f60e5990Shannes@stressinduktion.org if (dev_recursion_level())
767f60e5990Shannes@stressinduktion.org return false;
768f60e5990Shannes@stressinduktion.org if (!sk)
769f60e5990Shannes@stressinduktion.org return true;
770a3e0fdf7SEric Dumazet /* IPV6_ADDRFORM can change sk->sk_family under us. */
771a3e0fdf7SEric Dumazet switch (READ_ONCE(sk->sk_family)) {
772f60e5990Shannes@stressinduktion.org case AF_INET:
773b09bde5cSEric Dumazet return inet_test_bit(MC_LOOP, sk);
774f60e5990Shannes@stressinduktion.org #if IS_ENABLED(CONFIG_IPV6)
775f60e5990Shannes@stressinduktion.org case AF_INET6:
776f60e5990Shannes@stressinduktion.org return inet6_sk(sk)->mc_loop;
777f60e5990Shannes@stressinduktion.org #endif
778f60e5990Shannes@stressinduktion.org }
7790ad6f6e7SEric Dumazet WARN_ON_ONCE(1);
780f60e5990Shannes@stressinduktion.org return true;
781f60e5990Shannes@stressinduktion.org }
782f60e5990Shannes@stressinduktion.org EXPORT_SYMBOL(sk_mc_loop);
783f60e5990Shannes@stressinduktion.org
sock_set_reuseaddr(struct sock * sk)784b58f0e8fSChristoph Hellwig void sock_set_reuseaddr(struct sock *sk)
785b58f0e8fSChristoph Hellwig {
786b58f0e8fSChristoph Hellwig lock_sock(sk);
787b58f0e8fSChristoph Hellwig sk->sk_reuse = SK_CAN_REUSE;
788b58f0e8fSChristoph Hellwig release_sock(sk);
789b58f0e8fSChristoph Hellwig }
790b58f0e8fSChristoph Hellwig EXPORT_SYMBOL(sock_set_reuseaddr);
791b58f0e8fSChristoph Hellwig
sock_set_reuseport(struct sock * sk)792fe31a326SChristoph Hellwig void sock_set_reuseport(struct sock *sk)
793fe31a326SChristoph Hellwig {
794fe31a326SChristoph Hellwig lock_sock(sk);
795fe31a326SChristoph Hellwig sk->sk_reuseport = true;
796fe31a326SChristoph Hellwig release_sock(sk);
797fe31a326SChristoph Hellwig }
798fe31a326SChristoph Hellwig EXPORT_SYMBOL(sock_set_reuseport);
799fe31a326SChristoph Hellwig
sock_no_linger(struct sock * sk)800c433594cSChristoph Hellwig void sock_no_linger(struct sock *sk)
801c433594cSChristoph Hellwig {
802c433594cSChristoph Hellwig lock_sock(sk);
803bc1fb82aSEric Dumazet WRITE_ONCE(sk->sk_lingertime, 0);
804c433594cSChristoph Hellwig sock_set_flag(sk, SOCK_LINGER);
805c433594cSChristoph Hellwig release_sock(sk);
806c433594cSChristoph Hellwig }
807c433594cSChristoph Hellwig EXPORT_SYMBOL(sock_no_linger);
808c433594cSChristoph Hellwig
sock_set_priority(struct sock * sk,u32 priority)8096e434967SChristoph Hellwig void sock_set_priority(struct sock *sk, u32 priority)
8106e434967SChristoph Hellwig {
8116e434967SChristoph Hellwig lock_sock(sk);
8128bf43be7SEric Dumazet WRITE_ONCE(sk->sk_priority, priority);
8136e434967SChristoph Hellwig release_sock(sk);
8146e434967SChristoph Hellwig }
8156e434967SChristoph Hellwig EXPORT_SYMBOL(sock_set_priority);
8166e434967SChristoph Hellwig
sock_set_sndtimeo(struct sock * sk,s64 secs)81776ee0785SChristoph Hellwig void sock_set_sndtimeo(struct sock *sk, s64 secs)
81876ee0785SChristoph Hellwig {
81976ee0785SChristoph Hellwig lock_sock(sk);
82076ee0785SChristoph Hellwig if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
821285975ddSEric Dumazet WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
82276ee0785SChristoph Hellwig else
823285975ddSEric Dumazet WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
82476ee0785SChristoph Hellwig release_sock(sk);
82576ee0785SChristoph Hellwig }
82676ee0785SChristoph Hellwig EXPORT_SYMBOL(sock_set_sndtimeo);
82776ee0785SChristoph Hellwig
__sock_set_timestamps(struct sock * sk,bool val,bool new,bool ns)828783da70eSChristoph Hellwig static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
829783da70eSChristoph Hellwig {
830783da70eSChristoph Hellwig if (val) {
831783da70eSChristoph Hellwig sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
832783da70eSChristoph Hellwig sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
833783da70eSChristoph Hellwig sock_set_flag(sk, SOCK_RCVTSTAMP);
834783da70eSChristoph Hellwig sock_enable_timestamp(sk, SOCK_TIMESTAMP);
835783da70eSChristoph Hellwig } else {
836783da70eSChristoph Hellwig sock_reset_flag(sk, SOCK_RCVTSTAMP);
837783da70eSChristoph Hellwig sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
838783da70eSChristoph Hellwig }
839783da70eSChristoph Hellwig }
840783da70eSChristoph Hellwig
sock_enable_timestamps(struct sock * sk)841783da70eSChristoph Hellwig void sock_enable_timestamps(struct sock *sk)
842783da70eSChristoph Hellwig {
843783da70eSChristoph Hellwig lock_sock(sk);
844783da70eSChristoph Hellwig __sock_set_timestamps(sk, true, false, true);
845783da70eSChristoph Hellwig release_sock(sk);
846783da70eSChristoph Hellwig }
847783da70eSChristoph Hellwig EXPORT_SYMBOL(sock_enable_timestamps);
848783da70eSChristoph Hellwig
sock_set_timestamp(struct sock * sk,int optname,bool valbool)849371087aaSFlorian Westphal void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
850371087aaSFlorian Westphal {
851371087aaSFlorian Westphal switch (optname) {
852371087aaSFlorian Westphal case SO_TIMESTAMP_OLD:
853371087aaSFlorian Westphal __sock_set_timestamps(sk, valbool, false, false);
854371087aaSFlorian Westphal break;
855371087aaSFlorian Westphal case SO_TIMESTAMP_NEW:
856371087aaSFlorian Westphal __sock_set_timestamps(sk, valbool, true, false);
857371087aaSFlorian Westphal break;
858371087aaSFlorian Westphal case SO_TIMESTAMPNS_OLD:
859371087aaSFlorian Westphal __sock_set_timestamps(sk, valbool, false, true);
860371087aaSFlorian Westphal break;
861371087aaSFlorian Westphal case SO_TIMESTAMPNS_NEW:
862371087aaSFlorian Westphal __sock_set_timestamps(sk, valbool, true, true);
863371087aaSFlorian Westphal break;
864371087aaSFlorian Westphal }
865371087aaSFlorian Westphal }
866371087aaSFlorian Westphal
sock_timestamping_bind_phc(struct sock * sk,int phc_index)867d463126eSYangbo Lu static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
868ced122d9SFlorian Westphal {
869d463126eSYangbo Lu struct net *net = sock_net(sk);
870d463126eSYangbo Lu struct net_device *dev = NULL;
871d463126eSYangbo Lu bool match = false;
872d463126eSYangbo Lu int *vclock_index;
873d463126eSYangbo Lu int i, num;
874d463126eSYangbo Lu
875d463126eSYangbo Lu if (sk->sk_bound_dev_if)
876d463126eSYangbo Lu dev = dev_get_by_index(net, sk->sk_bound_dev_if);
877d463126eSYangbo Lu
878d463126eSYangbo Lu if (!dev) {
879d463126eSYangbo Lu pr_err("%s: sock not bind to device\n", __func__);
880d463126eSYangbo Lu return -EOPNOTSUPP;
881d463126eSYangbo Lu }
882d463126eSYangbo Lu
883d463126eSYangbo Lu num = ethtool_get_phc_vclocks(dev, &vclock_index);
8842a4d75bfSMiroslav Lichvar dev_put(dev);
8852a4d75bfSMiroslav Lichvar
886d463126eSYangbo Lu for (i = 0; i < num; i++) {
887d463126eSYangbo Lu if (*(vclock_index + i) == phc_index) {
888d463126eSYangbo Lu match = true;
889d463126eSYangbo Lu break;
890d463126eSYangbo Lu }
891d463126eSYangbo Lu }
892d463126eSYangbo Lu
893d463126eSYangbo Lu if (num > 0)
894d463126eSYangbo Lu kfree(vclock_index);
895d463126eSYangbo Lu
896d463126eSYangbo Lu if (!match)
897d463126eSYangbo Lu return -EINVAL;
898d463126eSYangbo Lu
899251cd405SEric Dumazet WRITE_ONCE(sk->sk_bind_phc, phc_index);
900d463126eSYangbo Lu
901d463126eSYangbo Lu return 0;
902d463126eSYangbo Lu }
903d463126eSYangbo Lu
sock_set_timestamping(struct sock * sk,int optname,struct so_timestamping timestamping)904d463126eSYangbo Lu int sock_set_timestamping(struct sock *sk, int optname,
905d463126eSYangbo Lu struct so_timestamping timestamping)
906d463126eSYangbo Lu {
907d463126eSYangbo Lu int val = timestamping.flags;
908d463126eSYangbo Lu int ret;
909d463126eSYangbo Lu
910ced122d9SFlorian Westphal if (val & ~SOF_TIMESTAMPING_MASK)
911ced122d9SFlorian Westphal return -EINVAL;
912ced122d9SFlorian Westphal
913b534dc46SWillem de Bruijn if (val & SOF_TIMESTAMPING_OPT_ID_TCP &&
914b534dc46SWillem de Bruijn !(val & SOF_TIMESTAMPING_OPT_ID))
915b534dc46SWillem de Bruijn return -EINVAL;
916b534dc46SWillem de Bruijn
917ced122d9SFlorian Westphal if (val & SOF_TIMESTAMPING_OPT_ID &&
918ced122d9SFlorian Westphal !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
91942f67eeaSEric Dumazet if (sk_is_tcp(sk)) {
920ced122d9SFlorian Westphal if ((1 << sk->sk_state) &
921ced122d9SFlorian Westphal (TCPF_CLOSE | TCPF_LISTEN))
922ced122d9SFlorian Westphal return -EINVAL;
923b534dc46SWillem de Bruijn if (val & SOF_TIMESTAMPING_OPT_ID_TCP)
924b534dc46SWillem de Bruijn atomic_set(&sk->sk_tskey, tcp_sk(sk)->write_seq);
925b534dc46SWillem de Bruijn else
926a1cdec57SEric Dumazet atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
927ced122d9SFlorian Westphal } else {
928a1cdec57SEric Dumazet atomic_set(&sk->sk_tskey, 0);
929ced122d9SFlorian Westphal }
930ced122d9SFlorian Westphal }
931ced122d9SFlorian Westphal
932ced122d9SFlorian Westphal if (val & SOF_TIMESTAMPING_OPT_STATS &&
933ced122d9SFlorian Westphal !(val & SOF_TIMESTAMPING_OPT_TSONLY))
934ced122d9SFlorian Westphal return -EINVAL;
935ced122d9SFlorian Westphal
936d463126eSYangbo Lu if (val & SOF_TIMESTAMPING_BIND_PHC) {
937d463126eSYangbo Lu ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
938d463126eSYangbo Lu if (ret)
939d463126eSYangbo Lu return ret;
940d463126eSYangbo Lu }
941d463126eSYangbo Lu
942e3390b30SEric Dumazet WRITE_ONCE(sk->sk_tsflags, val);
943ced122d9SFlorian Westphal sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
944ced122d9SFlorian Westphal
945ced122d9SFlorian Westphal if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
946ced122d9SFlorian Westphal sock_enable_timestamp(sk,
947ced122d9SFlorian Westphal SOCK_TIMESTAMPING_RX_SOFTWARE);
948ced122d9SFlorian Westphal else
949ced122d9SFlorian Westphal sock_disable_timestamp(sk,
950ced122d9SFlorian Westphal (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
951ced122d9SFlorian Westphal return 0;
952ced122d9SFlorian Westphal }
953ced122d9SFlorian Westphal
sock_set_keepalive(struct sock * sk)954ce3d9544SChristoph Hellwig void sock_set_keepalive(struct sock *sk)
955ce3d9544SChristoph Hellwig {
956ce3d9544SChristoph Hellwig lock_sock(sk);
957ce3d9544SChristoph Hellwig if (sk->sk_prot->keepalive)
958ce3d9544SChristoph Hellwig sk->sk_prot->keepalive(sk, true);
959ce3d9544SChristoph Hellwig sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
960ce3d9544SChristoph Hellwig release_sock(sk);
961ce3d9544SChristoph Hellwig }
962ce3d9544SChristoph Hellwig EXPORT_SYMBOL(sock_set_keepalive);
963ce3d9544SChristoph Hellwig
__sock_set_rcvbuf(struct sock * sk,int val)96426cfabf9SChristoph Hellwig static void __sock_set_rcvbuf(struct sock *sk, int val)
96526cfabf9SChristoph Hellwig {
96626cfabf9SChristoph Hellwig /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
96726cfabf9SChristoph Hellwig * as a negative value.
96826cfabf9SChristoph Hellwig */
96926cfabf9SChristoph Hellwig val = min_t(int, val, INT_MAX / 2);
97026cfabf9SChristoph Hellwig sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
97126cfabf9SChristoph Hellwig
97226cfabf9SChristoph Hellwig /* We double it on the way in to account for "struct sk_buff" etc.
97326cfabf9SChristoph Hellwig * overhead. Applications assume that the SO_RCVBUF setting they make
97426cfabf9SChristoph Hellwig * will allow that much actual data to be received on that socket.
97526cfabf9SChristoph Hellwig *
97626cfabf9SChristoph Hellwig * Applications are unaware that "struct sk_buff" and other overheads
97726cfabf9SChristoph Hellwig * allocate from the receive buffer during socket buffer allocation.
97826cfabf9SChristoph Hellwig *
97926cfabf9SChristoph Hellwig * And after considering the possible alternatives, returning the value
98026cfabf9SChristoph Hellwig * we actually used in getsockopt is the most desirable behavior.
98126cfabf9SChristoph Hellwig */
98226cfabf9SChristoph Hellwig WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
98326cfabf9SChristoph Hellwig }
98426cfabf9SChristoph Hellwig
sock_set_rcvbuf(struct sock * sk,int val)98526cfabf9SChristoph Hellwig void sock_set_rcvbuf(struct sock *sk, int val)
98626cfabf9SChristoph Hellwig {
98726cfabf9SChristoph Hellwig lock_sock(sk);
98826cfabf9SChristoph Hellwig __sock_set_rcvbuf(sk, val);
98926cfabf9SChristoph Hellwig release_sock(sk);
99026cfabf9SChristoph Hellwig }
99126cfabf9SChristoph Hellwig EXPORT_SYMBOL(sock_set_rcvbuf);
99226cfabf9SChristoph Hellwig
__sock_set_mark(struct sock * sk,u32 val)993dd9082f4SAlexander Aring static void __sock_set_mark(struct sock *sk, u32 val)
994dd9082f4SAlexander Aring {
995dd9082f4SAlexander Aring if (val != sk->sk_mark) {
9963c5b4d69SEric Dumazet WRITE_ONCE(sk->sk_mark, val);
997dd9082f4SAlexander Aring sk_dst_reset(sk);
998dd9082f4SAlexander Aring }
999dd9082f4SAlexander Aring }
1000dd9082f4SAlexander Aring
sock_set_mark(struct sock * sk,u32 val)100184d1c617SAlexander Aring void sock_set_mark(struct sock *sk, u32 val)
100284d1c617SAlexander Aring {
100384d1c617SAlexander Aring lock_sock(sk);
1004dd9082f4SAlexander Aring __sock_set_mark(sk, val);
100584d1c617SAlexander Aring release_sock(sk);
100684d1c617SAlexander Aring }
100784d1c617SAlexander Aring EXPORT_SYMBOL(sock_set_mark);
100884d1c617SAlexander Aring
sock_release_reserved_memory(struct sock * sk,int bytes)10092bb2f5fbSWei Wang static void sock_release_reserved_memory(struct sock *sk, int bytes)
10102bb2f5fbSWei Wang {
10112bb2f5fbSWei Wang /* Round down bytes to multiple of pages */
1012100fdd1fSEric Dumazet bytes = round_down(bytes, PAGE_SIZE);
10132bb2f5fbSWei Wang
10142bb2f5fbSWei Wang WARN_ON(bytes > sk->sk_reserved_mem);
1015fe11fdcbSEric Dumazet WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
10162bb2f5fbSWei Wang sk_mem_reclaim(sk);
10172bb2f5fbSWei Wang }
10182bb2f5fbSWei Wang
sock_reserve_memory(struct sock * sk,int bytes)10192bb2f5fbSWei Wang static int sock_reserve_memory(struct sock *sk, int bytes)
10202bb2f5fbSWei Wang {
10212bb2f5fbSWei Wang long allocated;
10222bb2f5fbSWei Wang bool charged;
10232bb2f5fbSWei Wang int pages;
10242bb2f5fbSWei Wang
1025d00c8ee3SEric Dumazet if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk))
10262bb2f5fbSWei Wang return -EOPNOTSUPP;
10272bb2f5fbSWei Wang
10282bb2f5fbSWei Wang if (!bytes)
10292bb2f5fbSWei Wang return 0;
10302bb2f5fbSWei Wang
10312bb2f5fbSWei Wang pages = sk_mem_pages(bytes);
10322bb2f5fbSWei Wang
10332bb2f5fbSWei Wang /* pre-charge to memcg */
10342bb2f5fbSWei Wang charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
10352bb2f5fbSWei Wang GFP_KERNEL | __GFP_RETRY_MAYFAIL);
10362bb2f5fbSWei Wang if (!charged)
10372bb2f5fbSWei Wang return -ENOMEM;
10382bb2f5fbSWei Wang
10392bb2f5fbSWei Wang /* pre-charge to forward_alloc */
1040219160beSEric Dumazet sk_memory_allocated_add(sk, pages);
1041219160beSEric Dumazet allocated = sk_memory_allocated(sk);
10422bb2f5fbSWei Wang /* If the system goes into memory pressure with this
10432bb2f5fbSWei Wang * precharge, give up and return error.
10442bb2f5fbSWei Wang */
10452bb2f5fbSWei Wang if (allocated > sk_prot_mem_limits(sk, 1)) {
10462bb2f5fbSWei Wang sk_memory_allocated_sub(sk, pages);
10472bb2f5fbSWei Wang mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
10482bb2f5fbSWei Wang return -ENOMEM;
10492bb2f5fbSWei Wang }
10505e6300e7SEric Dumazet sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
10512bb2f5fbSWei Wang
1052fe11fdcbSEric Dumazet WRITE_ONCE(sk->sk_reserved_mem,
1053fe11fdcbSEric Dumazet sk->sk_reserved_mem + (pages << PAGE_SHIFT));
10542bb2f5fbSWei Wang
10552bb2f5fbSWei Wang return 0;
10562bb2f5fbSWei Wang }
10572bb2f5fbSWei Wang
sockopt_lock_sock(struct sock * sk)105824426654SMartin KaFai Lau void sockopt_lock_sock(struct sock *sk)
105924426654SMartin KaFai Lau {
106024426654SMartin KaFai Lau /* When current->bpf_ctx is set, the setsockopt is called from
106124426654SMartin KaFai Lau * a bpf prog. bpf has ensured the sk lock has been
106224426654SMartin KaFai Lau * acquired before calling setsockopt().
106324426654SMartin KaFai Lau */
106424426654SMartin KaFai Lau if (has_current_bpf_ctx())
106524426654SMartin KaFai Lau return;
106624426654SMartin KaFai Lau
106724426654SMartin KaFai Lau lock_sock(sk);
106824426654SMartin KaFai Lau }
106924426654SMartin KaFai Lau EXPORT_SYMBOL(sockopt_lock_sock);
107024426654SMartin KaFai Lau
sockopt_release_sock(struct sock * sk)107124426654SMartin KaFai Lau void sockopt_release_sock(struct sock *sk)
107224426654SMartin KaFai Lau {
107324426654SMartin KaFai Lau if (has_current_bpf_ctx())
107424426654SMartin KaFai Lau return;
107524426654SMartin KaFai Lau
107624426654SMartin KaFai Lau release_sock(sk);
107724426654SMartin KaFai Lau }
107824426654SMartin KaFai Lau EXPORT_SYMBOL(sockopt_release_sock);
107924426654SMartin KaFai Lau
sockopt_ns_capable(struct user_namespace * ns,int cap)1080e42c7beeSMartin KaFai Lau bool sockopt_ns_capable(struct user_namespace *ns, int cap)
1081e42c7beeSMartin KaFai Lau {
1082e42c7beeSMartin KaFai Lau return has_current_bpf_ctx() || ns_capable(ns, cap);
1083e42c7beeSMartin KaFai Lau }
1084e42c7beeSMartin KaFai Lau EXPORT_SYMBOL(sockopt_ns_capable);
1085e42c7beeSMartin KaFai Lau
sockopt_capable(int cap)1086e42c7beeSMartin KaFai Lau bool sockopt_capable(int cap)
1087e42c7beeSMartin KaFai Lau {
1088e42c7beeSMartin KaFai Lau return has_current_bpf_ctx() || capable(cap);
1089e42c7beeSMartin KaFai Lau }
1090e42c7beeSMartin KaFai Lau EXPORT_SYMBOL(sockopt_capable);
1091e42c7beeSMartin KaFai Lau
10921da177e4SLinus Torvalds /*
10931da177e4SLinus Torvalds * This is meant for all protocols to use and covers goings on
10941da177e4SLinus Torvalds * at the socket level. Everything here is generic.
10951da177e4SLinus Torvalds */
10961da177e4SLinus Torvalds
sk_setsockopt(struct sock * sk,int level,int optname,sockptr_t optval,unsigned int optlen)109729003875SMartin KaFai Lau int sk_setsockopt(struct sock *sk, int level, int optname,
1098c8c1bbb6SChristoph Hellwig sockptr_t optval, unsigned int optlen)
10991da177e4SLinus Torvalds {
1100d463126eSYangbo Lu struct so_timestamping timestamping;
11014d748f99SMartin KaFai Lau struct socket *sock = sk->sk_socket;
110280b14deeSRichard Cochran struct sock_txtime sk_txtime;
11031da177e4SLinus Torvalds int val;
11041da177e4SLinus Torvalds int valbool;
11051da177e4SLinus Torvalds struct linger ling;
11061da177e4SLinus Torvalds int ret = 0;
11071da177e4SLinus Torvalds
11081da177e4SLinus Torvalds /*
11091da177e4SLinus Torvalds * Options without arguments
11101da177e4SLinus Torvalds */
11111da177e4SLinus Torvalds
11124878809fSDavid S. Miller if (optname == SO_BINDTODEVICE)
1113c91f6df2SBrian Haley return sock_setbindtodevice(sk, optval, optlen);
11144878809fSDavid S. Miller
11151da177e4SLinus Torvalds if (optlen < sizeof(int))
1116e71a4783SStephen Hemminger return -EINVAL;
11171da177e4SLinus Torvalds
1118c8c1bbb6SChristoph Hellwig if (copy_from_sockptr(&val, optval, sizeof(val)))
11191da177e4SLinus Torvalds return -EFAULT;
11201da177e4SLinus Torvalds
11211da177e4SLinus Torvalds valbool = val ? 1 : 0;
11221da177e4SLinus Torvalds
112324426654SMartin KaFai Lau sockopt_lock_sock(sk);
11241da177e4SLinus Torvalds
1125e71a4783SStephen Hemminger switch (optname) {
11261da177e4SLinus Torvalds case SO_DEBUG:
1127e42c7beeSMartin KaFai Lau if (val && !sockopt_capable(CAP_NET_ADMIN))
11281da177e4SLinus Torvalds ret = -EACCES;
11292a91525cSEric Dumazet else
1130c0ef877bSPavel Emelyanov sock_valbool_flag(sk, SOCK_DBG, valbool);
11311da177e4SLinus Torvalds break;
11321da177e4SLinus Torvalds case SO_REUSEADDR:
1133cdb8744dSBart Van Assche sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
11341da177e4SLinus Torvalds break;
1135055dc21aSTom Herbert case SO_REUSEPORT:
1136*ad91a2daSEric Dumazet if (valbool && !sk_is_inet(sk))
1137*ad91a2daSEric Dumazet ret = -EOPNOTSUPP;
1138*ad91a2daSEric Dumazet else
1139055dc21aSTom Herbert sk->sk_reuseport = valbool;
1140055dc21aSTom Herbert break;
11411da177e4SLinus Torvalds case SO_TYPE:
114249c794e9SJan Engelhardt case SO_PROTOCOL:
11430d6038eeSJan Engelhardt case SO_DOMAIN:
11441da177e4SLinus Torvalds case SO_ERROR:
11451da177e4SLinus Torvalds ret = -ENOPROTOOPT;
11461da177e4SLinus Torvalds break;
11471da177e4SLinus Torvalds case SO_DONTROUTE:
1148c0ef877bSPavel Emelyanov sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
11490fbe82e6Syupeng sk_dst_reset(sk);
11501da177e4SLinus Torvalds break;
11511da177e4SLinus Torvalds case SO_BROADCAST:
11521da177e4SLinus Torvalds sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
11531da177e4SLinus Torvalds break;
11541da177e4SLinus Torvalds case SO_SNDBUF:
11551da177e4SLinus Torvalds /* Don't error on this BSD doesn't and if you think
115682981930SEric Dumazet * about it this is right. Otherwise apps have to
115782981930SEric Dumazet * play 'guess the biggest size' games. RCVBUF/SNDBUF
115882981930SEric Dumazet * are treated in BSD as hints
115982981930SEric Dumazet */
11601227c177SKuniyuki Iwashima val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
1161b0573deaSPatrick McHardy set_sndbuf:
11624057765fSGuillaume Nault /* Ensure val * 2 fits into an int, to prevent max_t()
11634057765fSGuillaume Nault * from treating it as a negative value.
11644057765fSGuillaume Nault */
11654057765fSGuillaume Nault val = min_t(int, val, INT_MAX / 2);
11661da177e4SLinus Torvalds sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1167e292f05eSEric Dumazet WRITE_ONCE(sk->sk_sndbuf,
1168e292f05eSEric Dumazet max_t(int, val * 2, SOCK_MIN_SNDBUF));
116982981930SEric Dumazet /* Wake up sending tasks if we upped the value. */
11701da177e4SLinus Torvalds sk->sk_write_space(sk);
11711da177e4SLinus Torvalds break;
11721da177e4SLinus Torvalds
1173b0573deaSPatrick McHardy case SO_SNDBUFFORCE:
1174e42c7beeSMartin KaFai Lau if (!sockopt_capable(CAP_NET_ADMIN)) {
1175b0573deaSPatrick McHardy ret = -EPERM;
1176b0573deaSPatrick McHardy break;
1177b0573deaSPatrick McHardy }
11784057765fSGuillaume Nault
11794057765fSGuillaume Nault /* No negative values (to prevent underflow, as val will be
11804057765fSGuillaume Nault * multiplied by 2).
11814057765fSGuillaume Nault */
11824057765fSGuillaume Nault if (val < 0)
11834057765fSGuillaume Nault val = 0;
1184b0573deaSPatrick McHardy goto set_sndbuf;
1185b0573deaSPatrick McHardy
11861da177e4SLinus Torvalds case SO_RCVBUF:
11871da177e4SLinus Torvalds /* Don't error on this BSD doesn't and if you think
118882981930SEric Dumazet * about it this is right. Otherwise apps have to
118982981930SEric Dumazet * play 'guess the biggest size' games. RCVBUF/SNDBUF
119082981930SEric Dumazet * are treated in BSD as hints
119182981930SEric Dumazet */
11921227c177SKuniyuki Iwashima __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
11931da177e4SLinus Torvalds break;
11941da177e4SLinus Torvalds
1195b0573deaSPatrick McHardy case SO_RCVBUFFORCE:
1196e42c7beeSMartin KaFai Lau if (!sockopt_capable(CAP_NET_ADMIN)) {
1197b0573deaSPatrick McHardy ret = -EPERM;
1198b0573deaSPatrick McHardy break;
1199b0573deaSPatrick McHardy }
12004057765fSGuillaume Nault
12014057765fSGuillaume Nault /* No negative values (to prevent underflow, as val will be
12024057765fSGuillaume Nault * multiplied by 2).
12034057765fSGuillaume Nault */
120426cfabf9SChristoph Hellwig __sock_set_rcvbuf(sk, max(val, 0));
120526cfabf9SChristoph Hellwig break;
1206b0573deaSPatrick McHardy
12071da177e4SLinus Torvalds case SO_KEEPALIVE:
12084b9d07a4SUrsula Braun if (sk->sk_prot->keepalive)
12094b9d07a4SUrsula Braun sk->sk_prot->keepalive(sk, valbool);
12101da177e4SLinus Torvalds sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
12111da177e4SLinus Torvalds break;
12121da177e4SLinus Torvalds
12131da177e4SLinus Torvalds case SO_OOBINLINE:
12141da177e4SLinus Torvalds sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
12151da177e4SLinus Torvalds break;
12161da177e4SLinus Torvalds
12171da177e4SLinus Torvalds case SO_NO_CHECK:
121828448b80STom Herbert sk->sk_no_check_tx = valbool;
12191da177e4SLinus Torvalds break;
12201da177e4SLinus Torvalds
12211da177e4SLinus Torvalds case SO_PRIORITY:
12225e1fccc0SEric W. Biederman if ((val >= 0 && val <= 6) ||
1223e42c7beeSMartin KaFai Lau sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
1224e42c7beeSMartin KaFai Lau sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
12258bf43be7SEric Dumazet WRITE_ONCE(sk->sk_priority, val);
12261da177e4SLinus Torvalds else
12271da177e4SLinus Torvalds ret = -EPERM;
12281da177e4SLinus Torvalds break;
12291da177e4SLinus Torvalds
12301da177e4SLinus Torvalds case SO_LINGER:
12311da177e4SLinus Torvalds if (optlen < sizeof(ling)) {
12321da177e4SLinus Torvalds ret = -EINVAL; /* 1003.1g */
12331da177e4SLinus Torvalds break;
12341da177e4SLinus Torvalds }
1235c8c1bbb6SChristoph Hellwig if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
12361da177e4SLinus Torvalds ret = -EFAULT;
12371da177e4SLinus Torvalds break;
12381da177e4SLinus Torvalds }
1239bc1fb82aSEric Dumazet if (!ling.l_onoff) {
12401da177e4SLinus Torvalds sock_reset_flag(sk, SOCK_LINGER);
1241bc1fb82aSEric Dumazet } else {
1242bc1fb82aSEric Dumazet unsigned long t_sec = ling.l_linger;
1243bc1fb82aSEric Dumazet
1244bc1fb82aSEric Dumazet if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ)
1245bc1fb82aSEric Dumazet WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT);
12461da177e4SLinus Torvalds else
1247bc1fb82aSEric Dumazet WRITE_ONCE(sk->sk_lingertime, t_sec * HZ);
12481da177e4SLinus Torvalds sock_set_flag(sk, SOCK_LINGER);
12491da177e4SLinus Torvalds }
12501da177e4SLinus Torvalds break;
12511da177e4SLinus Torvalds
12521da177e4SLinus Torvalds case SO_BSDCOMPAT:
12531da177e4SLinus Torvalds break;
12541da177e4SLinus Torvalds
12551da177e4SLinus Torvalds case SO_PASSCRED:
1256274c4a6dSAndy Shevchenko assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
12571da177e4SLinus Torvalds break;
12581da177e4SLinus Torvalds
12595e2ff670SAlexander Mikhalitsyn case SO_PASSPIDFD:
1260274c4a6dSAndy Shevchenko assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
12615e2ff670SAlexander Mikhalitsyn break;
12625e2ff670SAlexander Mikhalitsyn
12637f1bc6e9SDeepa Dinamani case SO_TIMESTAMP_OLD:
1264783da70eSChristoph Hellwig case SO_TIMESTAMP_NEW:
1265783da70eSChristoph Hellwig case SO_TIMESTAMPNS_OLD:
1266783da70eSChristoph Hellwig case SO_TIMESTAMPNS_NEW:
126781b4a0ccSEric Dumazet sock_set_timestamp(sk, optname, valbool);
1268783da70eSChristoph Hellwig break;
1269ced122d9SFlorian Westphal
12709718475eSDeepa Dinamani case SO_TIMESTAMPING_NEW:
12717f1bc6e9SDeepa Dinamani case SO_TIMESTAMPING_OLD:
1272d463126eSYangbo Lu if (optlen == sizeof(timestamping)) {
1273d463126eSYangbo Lu if (copy_from_sockptr(×tamping, optval,
1274271dbc31SDan Carpenter sizeof(timestamping))) {
1275271dbc31SDan Carpenter ret = -EFAULT;
1276271dbc31SDan Carpenter break;
1277271dbc31SDan Carpenter }
1278d463126eSYangbo Lu } else {
1279d463126eSYangbo Lu memset(×tamping, 0, sizeof(timestamping));
1280d463126eSYangbo Lu timestamping.flags = val;
1281d463126eSYangbo Lu }
1282d463126eSYangbo Lu ret = sock_set_timestamping(sk, optname, timestamping);
128320d49473SPatrick Ohly break;
128420d49473SPatrick Ohly
12851da177e4SLinus Torvalds case SO_RCVLOWAT:
12861ded5e5aSEric Dumazet {
12871ded5e5aSEric Dumazet int (*set_rcvlowat)(struct sock *sk, int val) = NULL;
12881ded5e5aSEric Dumazet
12891da177e4SLinus Torvalds if (val < 0)
12901da177e4SLinus Torvalds val = INT_MAX;
12911ded5e5aSEric Dumazet if (sock)
12921ded5e5aSEric Dumazet set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
12931ded5e5aSEric Dumazet if (set_rcvlowat)
12941ded5e5aSEric Dumazet ret = set_rcvlowat(sk, val);
1295d1361840SEric Dumazet else
1296eac66402SEric Dumazet WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
12971da177e4SLinus Torvalds break;
12981ded5e5aSEric Dumazet }
129945bdc661SDeepa Dinamani case SO_RCVTIMEO_OLD:
1300a9beb86aSDeepa Dinamani case SO_RCVTIMEO_NEW:
1301c8c1bbb6SChristoph Hellwig ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1302c34645acSChristoph Hellwig optlen, optname == SO_RCVTIMEO_OLD);
13031da177e4SLinus Torvalds break;
13041da177e4SLinus Torvalds
130545bdc661SDeepa Dinamani case SO_SNDTIMEO_OLD:
1306a9beb86aSDeepa Dinamani case SO_SNDTIMEO_NEW:
1307c8c1bbb6SChristoph Hellwig ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1308c34645acSChristoph Hellwig optlen, optname == SO_SNDTIMEO_OLD);
13091da177e4SLinus Torvalds break;
13101da177e4SLinus Torvalds
13114d295e54SChristoph Hellwig case SO_ATTACH_FILTER: {
13121da177e4SLinus Torvalds struct sock_fprog fprog;
13131da177e4SLinus Torvalds
1314c8c1bbb6SChristoph Hellwig ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
13154d295e54SChristoph Hellwig if (!ret)
13161da177e4SLinus Torvalds ret = sk_attach_filter(&fprog, sk);
13171da177e4SLinus Torvalds break;
13184d295e54SChristoph Hellwig }
131989aa0758SAlexei Starovoitov case SO_ATTACH_BPF:
132089aa0758SAlexei Starovoitov ret = -EINVAL;
132189aa0758SAlexei Starovoitov if (optlen == sizeof(u32)) {
132289aa0758SAlexei Starovoitov u32 ufd;
132389aa0758SAlexei Starovoitov
132489aa0758SAlexei Starovoitov ret = -EFAULT;
1325c8c1bbb6SChristoph Hellwig if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
132689aa0758SAlexei Starovoitov break;
132789aa0758SAlexei Starovoitov
132889aa0758SAlexei Starovoitov ret = sk_attach_bpf(ufd, sk);
132989aa0758SAlexei Starovoitov }
133089aa0758SAlexei Starovoitov break;
133189aa0758SAlexei Starovoitov
13324d295e54SChristoph Hellwig case SO_ATTACH_REUSEPORT_CBPF: {
1333538950a1SCraig Gallek struct sock_fprog fprog;
1334538950a1SCraig Gallek
1335c8c1bbb6SChristoph Hellwig ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
13364d295e54SChristoph Hellwig if (!ret)
1337538950a1SCraig Gallek ret = sk_reuseport_attach_filter(&fprog, sk);
1338538950a1SCraig Gallek break;
13394d295e54SChristoph Hellwig }
1340538950a1SCraig Gallek case SO_ATTACH_REUSEPORT_EBPF:
1341538950a1SCraig Gallek ret = -EINVAL;
1342538950a1SCraig Gallek if (optlen == sizeof(u32)) {
1343538950a1SCraig Gallek u32 ufd;
1344538950a1SCraig Gallek
1345538950a1SCraig Gallek ret = -EFAULT;
1346c8c1bbb6SChristoph Hellwig if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1347538950a1SCraig Gallek break;
1348538950a1SCraig Gallek
1349538950a1SCraig Gallek ret = sk_reuseport_attach_bpf(ufd, sk);
1350538950a1SCraig Gallek }
1351538950a1SCraig Gallek break;
1352538950a1SCraig Gallek
135399f3a064SMartin KaFai Lau case SO_DETACH_REUSEPORT_BPF:
135499f3a064SMartin KaFai Lau ret = reuseport_detach_prog(sk);
135599f3a064SMartin KaFai Lau break;
135699f3a064SMartin KaFai Lau
13571da177e4SLinus Torvalds case SO_DETACH_FILTER:
135855b33325SPavel Emelyanov ret = sk_detach_filter(sk);
13591da177e4SLinus Torvalds break;
13601da177e4SLinus Torvalds
1361d59577b6SVincent Bernat case SO_LOCK_FILTER:
1362d59577b6SVincent Bernat if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1363d59577b6SVincent Bernat ret = -EPERM;
1364d59577b6SVincent Bernat else
1365d59577b6SVincent Bernat sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1366d59577b6SVincent Bernat break;
1367d59577b6SVincent Bernat
1368877ce7c1SCatherine Zhang case SO_PASSSEC:
1369274c4a6dSAndy Shevchenko assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
1370877ce7c1SCatherine Zhang break;
13714a19ec58SLaszlo Attila Toth case SO_MARK:
1372e42c7beeSMartin KaFai Lau if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1373e42c7beeSMartin KaFai Lau !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
13744a19ec58SLaszlo Attila Toth ret = -EPERM;
1375dd9082f4SAlexander Aring break;
137650254256SDavid Barmann }
1377dd9082f4SAlexander Aring
1378dd9082f4SAlexander Aring __sock_set_mark(sk, val);
13794a19ec58SLaszlo Attila Toth break;
13806fd1d51cSErin MacNeil case SO_RCVMARK:
13816fd1d51cSErin MacNeil sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
13826fd1d51cSErin MacNeil break;
1383877ce7c1SCatherine Zhang
13843b885787SNeil Horman case SO_RXQ_OVFL:
13858083f0fcSJohannes Berg sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
13863b885787SNeil Horman break;
13876e3e939fSJohannes Berg
13886e3e939fSJohannes Berg case SO_WIFI_STATUS:
13896e3e939fSJohannes Berg sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
13906e3e939fSJohannes Berg break;
13916e3e939fSJohannes Berg
1392ef64a54fSPavel Emelyanov case SO_PEEK_OFF:
13931ded5e5aSEric Dumazet {
13941ded5e5aSEric Dumazet int (*set_peek_off)(struct sock *sk, int val);
13951ded5e5aSEric Dumazet
13961ded5e5aSEric Dumazet set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
13971ded5e5aSEric Dumazet if (set_peek_off)
13981ded5e5aSEric Dumazet ret = set_peek_off(sk, val);
1399ef64a54fSPavel Emelyanov else
1400ef64a54fSPavel Emelyanov ret = -EOPNOTSUPP;
1401ef64a54fSPavel Emelyanov break;
14021ded5e5aSEric Dumazet }
14033bdc0ebaSBen Greear
14043bdc0ebaSBen Greear case SO_NOFCS:
14053bdc0ebaSBen Greear sock_valbool_flag(sk, SOCK_NOFCS, valbool);
14063bdc0ebaSBen Greear break;
14073bdc0ebaSBen Greear
14087d4c04fcSKeller, Jacob E case SO_SELECT_ERR_QUEUE:
14097d4c04fcSKeller, Jacob E sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
14107d4c04fcSKeller, Jacob E break;
14117d4c04fcSKeller, Jacob E
1412e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
141364b0dc51SEliezer Tamir case SO_BUSY_POLL:
1414dafcc438SEliezer Tamir if (val < 0)
1415dafcc438SEliezer Tamir ret = -EINVAL;
1416dafcc438SEliezer Tamir else
14170dbffbb5SEric Dumazet WRITE_ONCE(sk->sk_ll_usec, val);
1418dafcc438SEliezer Tamir break;
14197fd3253aSBjörn Töpel case SO_PREFER_BUSY_POLL:
1420e42c7beeSMartin KaFai Lau if (valbool && !sockopt_capable(CAP_NET_ADMIN))
14217fd3253aSBjörn Töpel ret = -EPERM;
14227fd3253aSBjörn Töpel else
14237fd3253aSBjörn Töpel WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
14247fd3253aSBjörn Töpel break;
14257c951cafSBjörn Töpel case SO_BUSY_POLL_BUDGET:
1426e42c7beeSMartin KaFai Lau if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) {
14277c951cafSBjörn Töpel ret = -EPERM;
14287c951cafSBjörn Töpel } else {
14297c951cafSBjörn Töpel if (val < 0 || val > U16_MAX)
14307c951cafSBjörn Töpel ret = -EINVAL;
14317c951cafSBjörn Töpel else
14327c951cafSBjörn Töpel WRITE_ONCE(sk->sk_busy_poll_budget, val);
14337c951cafSBjörn Töpel }
14347c951cafSBjörn Töpel break;
1435dafcc438SEliezer Tamir #endif
143662748f32SEric Dumazet
143762748f32SEric Dumazet case SO_MAX_PACING_RATE:
14386bdef102SEric Dumazet {
1439700465fdSKe Li unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
14406bdef102SEric Dumazet
14416bdef102SEric Dumazet if (sizeof(ulval) != sizeof(val) &&
14426bdef102SEric Dumazet optlen >= sizeof(ulval) &&
1443c8c1bbb6SChristoph Hellwig copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
14446bdef102SEric Dumazet ret = -EFAULT;
14456bdef102SEric Dumazet break;
14466bdef102SEric Dumazet }
14476bdef102SEric Dumazet if (ulval != ~0UL)
1448218af599SEric Dumazet cmpxchg(&sk->sk_pacing_status,
1449218af599SEric Dumazet SK_PACING_NONE,
1450218af599SEric Dumazet SK_PACING_NEEDED);
1451ea7f45efSEric Dumazet /* Pairs with READ_ONCE() from sk_getsockopt() */
1452ea7f45efSEric Dumazet WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
14536bdef102SEric Dumazet sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
145462748f32SEric Dumazet break;
14556bdef102SEric Dumazet }
145670da268bSEric Dumazet case SO_INCOMING_CPU:
1457b261eda8SKuniyuki Iwashima reuseport_update_incoming_cpu(sk, val);
145870da268bSEric Dumazet break;
145970da268bSEric Dumazet
1460a87cb3e4STom Herbert case SO_CNX_ADVICE:
1461a87cb3e4STom Herbert if (val == 1)
1462a87cb3e4STom Herbert dst_negative_advice(sk);
1463a87cb3e4STom Herbert break;
146476851d12SWillem de Bruijn
146576851d12SWillem de Bruijn case SO_ZEROCOPY:
146628190752SSowmini Varadhan if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
146742f67eeaSEric Dumazet if (!(sk_is_tcp(sk) ||
1468b5947e5dSWillem de Bruijn (sk->sk_type == SOCK_DGRAM &&
1469b5947e5dSWillem de Bruijn sk->sk_protocol == IPPROTO_UDP)))
1470869420a8SSamuel Thibault ret = -EOPNOTSUPP;
147128190752SSowmini Varadhan } else if (sk->sk_family != PF_RDS) {
1472869420a8SSamuel Thibault ret = -EOPNOTSUPP;
147328190752SSowmini Varadhan }
147428190752SSowmini Varadhan if (!ret) {
147528190752SSowmini Varadhan if (val < 0 || val > 1)
147676851d12SWillem de Bruijn ret = -EINVAL;
147776851d12SWillem de Bruijn else
147876851d12SWillem de Bruijn sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
147928190752SSowmini Varadhan }
1480334e6413SJesus Sanchez-Palencia break;
1481334e6413SJesus Sanchez-Palencia
148280b14deeSRichard Cochran case SO_TXTIME:
1483790709f2SEric Dumazet if (optlen != sizeof(struct sock_txtime)) {
148480b14deeSRichard Cochran ret = -EINVAL;
1485790709f2SEric Dumazet break;
1486c8c1bbb6SChristoph Hellwig } else if (copy_from_sockptr(&sk_txtime, optval,
148780b14deeSRichard Cochran sizeof(struct sock_txtime))) {
148880b14deeSRichard Cochran ret = -EFAULT;
1489790709f2SEric Dumazet break;
149080b14deeSRichard Cochran } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
149180b14deeSRichard Cochran ret = -EINVAL;
1492790709f2SEric Dumazet break;
1493790709f2SEric Dumazet }
1494790709f2SEric Dumazet /* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1495790709f2SEric Dumazet * scheduler has enough safe guards.
1496790709f2SEric Dumazet */
1497790709f2SEric Dumazet if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1498e42c7beeSMartin KaFai Lau !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1499790709f2SEric Dumazet ret = -EPERM;
1500790709f2SEric Dumazet break;
1501790709f2SEric Dumazet }
150280b14deeSRichard Cochran sock_valbool_flag(sk, SOCK_TXTIME, true);
150380b14deeSRichard Cochran sk->sk_clockid = sk_txtime.clockid;
150480b14deeSRichard Cochran sk->sk_txtime_deadline_mode =
150580b14deeSRichard Cochran !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
15064b15c707SJesus Sanchez-Palencia sk->sk_txtime_report_errors =
15074b15c707SJesus Sanchez-Palencia !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
150880b14deeSRichard Cochran break;
150980b14deeSRichard Cochran
1510f5dd3d0cSDavid Herrmann case SO_BINDTOIFINDEX:
15117594888cSChristoph Hellwig ret = sock_bindtoindex_locked(sk, val);
1512f5dd3d0cSDavid Herrmann break;
1513f5dd3d0cSDavid Herrmann
151404190bf8SPavel Tikhomirov case SO_BUF_LOCK:
151504190bf8SPavel Tikhomirov if (val & ~SOCK_BUF_LOCK_MASK) {
151604190bf8SPavel Tikhomirov ret = -EINVAL;
151704190bf8SPavel Tikhomirov break;
151804190bf8SPavel Tikhomirov }
151904190bf8SPavel Tikhomirov sk->sk_userlocks = val | (sk->sk_userlocks &
152004190bf8SPavel Tikhomirov ~SOCK_BUF_LOCK_MASK);
152104190bf8SPavel Tikhomirov break;
152204190bf8SPavel Tikhomirov
15232bb2f5fbSWei Wang case SO_RESERVE_MEM:
15242bb2f5fbSWei Wang {
15252bb2f5fbSWei Wang int delta;
15262bb2f5fbSWei Wang
15272bb2f5fbSWei Wang if (val < 0) {
15282bb2f5fbSWei Wang ret = -EINVAL;
15292bb2f5fbSWei Wang break;
15302bb2f5fbSWei Wang }
15312bb2f5fbSWei Wang
15322bb2f5fbSWei Wang delta = val - sk->sk_reserved_mem;
15332bb2f5fbSWei Wang if (delta < 0)
15342bb2f5fbSWei Wang sock_release_reserved_memory(sk, -delta);
15352bb2f5fbSWei Wang else
15362bb2f5fbSWei Wang ret = sock_reserve_memory(sk, delta);
15372bb2f5fbSWei Wang break;
15382bb2f5fbSWei Wang }
15392bb2f5fbSWei Wang
154026859240SAkhmat Karakotov case SO_TXREHASH:
154126859240SAkhmat Karakotov if (val < -1 || val > 1) {
154226859240SAkhmat Karakotov ret = -EINVAL;
154326859240SAkhmat Karakotov break;
154426859240SAkhmat Karakotov }
1545c11204c7SKevin Yang if ((u8)val == SOCK_TXREHASH_DEFAULT)
1546c11204c7SKevin Yang val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
1547c76a0328SEric Dumazet /* Paired with READ_ONCE() in tcp_rtx_synack()
1548c76a0328SEric Dumazet * and sk_getsockopt().
1549c76a0328SEric Dumazet */
1550cb6cd2ceSAkhmat Karakotov WRITE_ONCE(sk->sk_txrehash, (u8)val);
155126859240SAkhmat Karakotov break;
155226859240SAkhmat Karakotov
15531da177e4SLinus Torvalds default:
15541da177e4SLinus Torvalds ret = -ENOPROTOOPT;
15551da177e4SLinus Torvalds break;
15561da177e4SLinus Torvalds }
155724426654SMartin KaFai Lau sockopt_release_sock(sk);
15581da177e4SLinus Torvalds return ret;
15591da177e4SLinus Torvalds }
15604d748f99SMartin KaFai Lau
sock_setsockopt(struct socket * sock,int level,int optname,sockptr_t optval,unsigned int optlen)15614d748f99SMartin KaFai Lau int sock_setsockopt(struct socket *sock, int level, int optname,
15624d748f99SMartin KaFai Lau sockptr_t optval, unsigned int optlen)
15634d748f99SMartin KaFai Lau {
15644d748f99SMartin KaFai Lau return sk_setsockopt(sock->sk, level, optname,
15654d748f99SMartin KaFai Lau optval, optlen);
15664d748f99SMartin KaFai Lau }
15672a91525cSEric Dumazet EXPORT_SYMBOL(sock_setsockopt);
15681da177e4SLinus Torvalds
sk_get_peer_cred(struct sock * sk)156935306eb2SEric Dumazet static const struct cred *sk_get_peer_cred(struct sock *sk)
157035306eb2SEric Dumazet {
157135306eb2SEric Dumazet const struct cred *cred;
157235306eb2SEric Dumazet
157335306eb2SEric Dumazet spin_lock(&sk->sk_peer_lock);
157435306eb2SEric Dumazet cred = get_cred(sk->sk_peer_cred);
157535306eb2SEric Dumazet spin_unlock(&sk->sk_peer_lock);
157635306eb2SEric Dumazet
157735306eb2SEric Dumazet return cred;
157835306eb2SEric Dumazet }
15791da177e4SLinus Torvalds
cred_to_ucred(struct pid * pid,const struct cred * cred,struct ucred * ucred)15808f09898bSstephen hemminger static void cred_to_ucred(struct pid *pid, const struct cred *cred,
15813f551f94SEric W. Biederman struct ucred *ucred)
15823f551f94SEric W. Biederman {
15833f551f94SEric W. Biederman ucred->pid = pid_vnr(pid);
15843f551f94SEric W. Biederman ucred->uid = ucred->gid = -1;
15853f551f94SEric W. Biederman if (cred) {
15863f551f94SEric W. Biederman struct user_namespace *current_ns = current_user_ns();
15873f551f94SEric W. Biederman
1588b2e4f544SEric W. Biederman ucred->uid = from_kuid_munged(current_ns, cred->euid);
1589b2e4f544SEric W. Biederman ucred->gid = from_kgid_munged(current_ns, cred->egid);
15903f551f94SEric W. Biederman }
15913f551f94SEric W. Biederman }
15923f551f94SEric W. Biederman
groups_to_user(sockptr_t dst,const struct group_info * src)15934ff09db1SMartin KaFai Lau static int groups_to_user(sockptr_t dst, const struct group_info *src)
159428b5ba2aSDavid Herrmann {
159528b5ba2aSDavid Herrmann struct user_namespace *user_ns = current_user_ns();
159628b5ba2aSDavid Herrmann int i;
159728b5ba2aSDavid Herrmann
15984ff09db1SMartin KaFai Lau for (i = 0; i < src->ngroups; i++) {
15994ff09db1SMartin KaFai Lau gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
16004ff09db1SMartin KaFai Lau
16014ff09db1SMartin KaFai Lau if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
160228b5ba2aSDavid Herrmann return -EFAULT;
16034ff09db1SMartin KaFai Lau }
160428b5ba2aSDavid Herrmann
160528b5ba2aSDavid Herrmann return 0;
160628b5ba2aSDavid Herrmann }
160728b5ba2aSDavid Herrmann
sk_getsockopt(struct sock * sk,int level,int optname,sockptr_t optval,sockptr_t optlen)160865ddc82dSMartin KaFai Lau int sk_getsockopt(struct sock *sk, int level, int optname,
16094ff09db1SMartin KaFai Lau sockptr_t optval, sockptr_t optlen)
16101da177e4SLinus Torvalds {
1611ba74a760SMartin KaFai Lau struct socket *sock = sk->sk_socket;
16121da177e4SLinus Torvalds
1613e71a4783SStephen Hemminger union {
16141da177e4SLinus Torvalds int val;
16155daab9dbSChenbo Feng u64 val64;
1616677f136cSEric Dumazet unsigned long ulval;
16171da177e4SLinus Torvalds struct linger ling;
1618fe0c72f3SArnd Bergmann struct old_timeval32 tm32;
1619fe0c72f3SArnd Bergmann struct __kernel_old_timeval tm;
1620a9beb86aSDeepa Dinamani struct __kernel_sock_timeval stm;
162180b14deeSRichard Cochran struct sock_txtime txtime;
1622d463126eSYangbo Lu struct so_timestamping timestamping;
16231da177e4SLinus Torvalds } v;
16241da177e4SLinus Torvalds
16254d0392beSH Hartley Sweeten int lv = sizeof(int);
16261da177e4SLinus Torvalds int len;
16271da177e4SLinus Torvalds
16284ff09db1SMartin KaFai Lau if (copy_from_sockptr(&len, optlen, sizeof(int)))
16291da177e4SLinus Torvalds return -EFAULT;
16301da177e4SLinus Torvalds if (len < 0)
16311da177e4SLinus Torvalds return -EINVAL;
16321da177e4SLinus Torvalds
163350fee1deSEugene Teo memset(&v, 0, sizeof(v));
1634df0bca04SClément Lecigne
1635e71a4783SStephen Hemminger switch (optname) {
16361da177e4SLinus Torvalds case SO_DEBUG:
16371da177e4SLinus Torvalds v.val = sock_flag(sk, SOCK_DBG);
16381da177e4SLinus Torvalds break;
16391da177e4SLinus Torvalds
16401da177e4SLinus Torvalds case SO_DONTROUTE:
16411da177e4SLinus Torvalds v.val = sock_flag(sk, SOCK_LOCALROUTE);
16421da177e4SLinus Torvalds break;
16431da177e4SLinus Torvalds
16441da177e4SLinus Torvalds case SO_BROADCAST:
16451b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_BROADCAST);
16461da177e4SLinus Torvalds break;
16471da177e4SLinus Torvalds
16481da177e4SLinus Torvalds case SO_SNDBUF:
164974bc0843SEric Dumazet v.val = READ_ONCE(sk->sk_sndbuf);
16501da177e4SLinus Torvalds break;
16511da177e4SLinus Torvalds
16521da177e4SLinus Torvalds case SO_RCVBUF:
1653b4b55325SEric Dumazet v.val = READ_ONCE(sk->sk_rcvbuf);
16541da177e4SLinus Torvalds break;
16551da177e4SLinus Torvalds
16561da177e4SLinus Torvalds case SO_REUSEADDR:
16571da177e4SLinus Torvalds v.val = sk->sk_reuse;
16581da177e4SLinus Torvalds break;
16591da177e4SLinus Torvalds
1660055dc21aSTom Herbert case SO_REUSEPORT:
1661055dc21aSTom Herbert v.val = sk->sk_reuseport;
1662055dc21aSTom Herbert break;
1663055dc21aSTom Herbert
16641da177e4SLinus Torvalds case SO_KEEPALIVE:
16651b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_KEEPOPEN);
16661da177e4SLinus Torvalds break;
16671da177e4SLinus Torvalds
16681da177e4SLinus Torvalds case SO_TYPE:
16691da177e4SLinus Torvalds v.val = sk->sk_type;
16701da177e4SLinus Torvalds break;
16711da177e4SLinus Torvalds
167249c794e9SJan Engelhardt case SO_PROTOCOL:
167349c794e9SJan Engelhardt v.val = sk->sk_protocol;
167449c794e9SJan Engelhardt break;
167549c794e9SJan Engelhardt
16760d6038eeSJan Engelhardt case SO_DOMAIN:
16770d6038eeSJan Engelhardt v.val = sk->sk_family;
16780d6038eeSJan Engelhardt break;
16790d6038eeSJan Engelhardt
16801da177e4SLinus Torvalds case SO_ERROR:
16811da177e4SLinus Torvalds v.val = -sock_error(sk);
16821da177e4SLinus Torvalds if (v.val == 0)
16831da177e4SLinus Torvalds v.val = xchg(&sk->sk_err_soft, 0);
16841da177e4SLinus Torvalds break;
16851da177e4SLinus Torvalds
16861da177e4SLinus Torvalds case SO_OOBINLINE:
16871b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_URGINLINE);
16881da177e4SLinus Torvalds break;
16891da177e4SLinus Torvalds
16901da177e4SLinus Torvalds case SO_NO_CHECK:
169128448b80STom Herbert v.val = sk->sk_no_check_tx;
16921da177e4SLinus Torvalds break;
16931da177e4SLinus Torvalds
16941da177e4SLinus Torvalds case SO_PRIORITY:
16958bf43be7SEric Dumazet v.val = READ_ONCE(sk->sk_priority);
16961da177e4SLinus Torvalds break;
16971da177e4SLinus Torvalds
16981da177e4SLinus Torvalds case SO_LINGER:
16991da177e4SLinus Torvalds lv = sizeof(v.ling);
17001b23a5dfSEric Dumazet v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1701bc1fb82aSEric Dumazet v.ling.l_linger = READ_ONCE(sk->sk_lingertime) / HZ;
17021da177e4SLinus Torvalds break;
17031da177e4SLinus Torvalds
17041da177e4SLinus Torvalds case SO_BSDCOMPAT:
17051da177e4SLinus Torvalds break;
17061da177e4SLinus Torvalds
17077f1bc6e9SDeepa Dinamani case SO_TIMESTAMP_OLD:
170892f37fd2SEric Dumazet v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1709887feae3SDeepa Dinamani !sock_flag(sk, SOCK_TSTAMP_NEW) &&
171092f37fd2SEric Dumazet !sock_flag(sk, SOCK_RCVTSTAMPNS);
171192f37fd2SEric Dumazet break;
171292f37fd2SEric Dumazet
17137f1bc6e9SDeepa Dinamani case SO_TIMESTAMPNS_OLD:
1714887feae3SDeepa Dinamani v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
1715887feae3SDeepa Dinamani break;
1716887feae3SDeepa Dinamani
1717887feae3SDeepa Dinamani case SO_TIMESTAMP_NEW:
1718887feae3SDeepa Dinamani v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1719887feae3SDeepa Dinamani break;
1720887feae3SDeepa Dinamani
1721887feae3SDeepa Dinamani case SO_TIMESTAMPNS_NEW:
1722887feae3SDeepa Dinamani v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
17231da177e4SLinus Torvalds break;
17241da177e4SLinus Torvalds
17257f1bc6e9SDeepa Dinamani case SO_TIMESTAMPING_OLD:
1726742e4af3SJörn-Thorben Hinz case SO_TIMESTAMPING_NEW:
1727d463126eSYangbo Lu lv = sizeof(v.timestamping);
1728742e4af3SJörn-Thorben Hinz /* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only
1729742e4af3SJörn-Thorben Hinz * returning the flags when they were set through the same option.
1730742e4af3SJörn-Thorben Hinz * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
1731742e4af3SJörn-Thorben Hinz */
1732742e4af3SJörn-Thorben Hinz if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
1733e3390b30SEric Dumazet v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
1734251cd405SEric Dumazet v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
1735742e4af3SJörn-Thorben Hinz }
173620d49473SPatrick Ohly break;
173720d49473SPatrick Ohly
1738a9beb86aSDeepa Dinamani case SO_RCVTIMEO_OLD:
1739a9beb86aSDeepa Dinamani case SO_RCVTIMEO_NEW:
1740285975ddSEric Dumazet lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
1741285975ddSEric Dumazet SO_RCVTIMEO_OLD == optname);
17421da177e4SLinus Torvalds break;
17431da177e4SLinus Torvalds
1744a9beb86aSDeepa Dinamani case SO_SNDTIMEO_OLD:
1745a9beb86aSDeepa Dinamani case SO_SNDTIMEO_NEW:
1746285975ddSEric Dumazet lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
1747285975ddSEric Dumazet SO_SNDTIMEO_OLD == optname);
17481da177e4SLinus Torvalds break;
17491da177e4SLinus Torvalds
17501da177e4SLinus Torvalds case SO_RCVLOWAT:
1751e6d12bdbSEric Dumazet v.val = READ_ONCE(sk->sk_rcvlowat);
17521da177e4SLinus Torvalds break;
17531da177e4SLinus Torvalds
17541da177e4SLinus Torvalds case SO_SNDLOWAT:
17551da177e4SLinus Torvalds v.val = 1;
17561da177e4SLinus Torvalds break;
17571da177e4SLinus Torvalds
17581da177e4SLinus Torvalds case SO_PASSCRED:
175982981930SEric Dumazet v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
17601da177e4SLinus Torvalds break;
17611da177e4SLinus Torvalds
17625e2ff670SAlexander Mikhalitsyn case SO_PASSPIDFD:
17635e2ff670SAlexander Mikhalitsyn v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags);
17645e2ff670SAlexander Mikhalitsyn break;
17655e2ff670SAlexander Mikhalitsyn
17661da177e4SLinus Torvalds case SO_PEERCRED:
1767109f6e39SEric W. Biederman {
1768109f6e39SEric W. Biederman struct ucred peercred;
1769109f6e39SEric W. Biederman if (len > sizeof(peercred))
1770109f6e39SEric W. Biederman len = sizeof(peercred);
177135306eb2SEric Dumazet
177235306eb2SEric Dumazet spin_lock(&sk->sk_peer_lock);
1773109f6e39SEric W. Biederman cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
177435306eb2SEric Dumazet spin_unlock(&sk->sk_peer_lock);
177535306eb2SEric Dumazet
17764ff09db1SMartin KaFai Lau if (copy_to_sockptr(optval, &peercred, len))
17771da177e4SLinus Torvalds return -EFAULT;
17781da177e4SLinus Torvalds goto lenout;
1779109f6e39SEric W. Biederman }
17801da177e4SLinus Torvalds
17817b26952aSAlexander Mikhalitsyn case SO_PEERPIDFD:
17827b26952aSAlexander Mikhalitsyn {
17837b26952aSAlexander Mikhalitsyn struct pid *peer_pid;
17847b26952aSAlexander Mikhalitsyn struct file *pidfd_file = NULL;
17857b26952aSAlexander Mikhalitsyn int pidfd;
17867b26952aSAlexander Mikhalitsyn
17877b26952aSAlexander Mikhalitsyn if (len > sizeof(pidfd))
17887b26952aSAlexander Mikhalitsyn len = sizeof(pidfd);
17897b26952aSAlexander Mikhalitsyn
17907b26952aSAlexander Mikhalitsyn spin_lock(&sk->sk_peer_lock);
17917b26952aSAlexander Mikhalitsyn peer_pid = get_pid(sk->sk_peer_pid);
17927b26952aSAlexander Mikhalitsyn spin_unlock(&sk->sk_peer_lock);
17937b26952aSAlexander Mikhalitsyn
17947b26952aSAlexander Mikhalitsyn if (!peer_pid)
1795b6f79e82SDavid Rheinsberg return -ENODATA;
17967b26952aSAlexander Mikhalitsyn
17977b26952aSAlexander Mikhalitsyn pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
17987b26952aSAlexander Mikhalitsyn put_pid(peer_pid);
17997b26952aSAlexander Mikhalitsyn if (pidfd < 0)
18007b26952aSAlexander Mikhalitsyn return pidfd;
18017b26952aSAlexander Mikhalitsyn
18027b26952aSAlexander Mikhalitsyn if (copy_to_sockptr(optval, &pidfd, len) ||
18037b26952aSAlexander Mikhalitsyn copy_to_sockptr(optlen, &len, sizeof(int))) {
18047b26952aSAlexander Mikhalitsyn put_unused_fd(pidfd);
18057b26952aSAlexander Mikhalitsyn fput(pidfd_file);
18067b26952aSAlexander Mikhalitsyn
18077b26952aSAlexander Mikhalitsyn return -EFAULT;
18087b26952aSAlexander Mikhalitsyn }
18097b26952aSAlexander Mikhalitsyn
18107b26952aSAlexander Mikhalitsyn fd_install(pidfd, pidfd_file);
18117b26952aSAlexander Mikhalitsyn return 0;
18127b26952aSAlexander Mikhalitsyn }
18137b26952aSAlexander Mikhalitsyn
181428b5ba2aSDavid Herrmann case SO_PEERGROUPS:
181528b5ba2aSDavid Herrmann {
181635306eb2SEric Dumazet const struct cred *cred;
181728b5ba2aSDavid Herrmann int ret, n;
181828b5ba2aSDavid Herrmann
181935306eb2SEric Dumazet cred = sk_get_peer_cred(sk);
182035306eb2SEric Dumazet if (!cred)
182128b5ba2aSDavid Herrmann return -ENODATA;
182228b5ba2aSDavid Herrmann
182335306eb2SEric Dumazet n = cred->group_info->ngroups;
182428b5ba2aSDavid Herrmann if (len < n * sizeof(gid_t)) {
182528b5ba2aSDavid Herrmann len = n * sizeof(gid_t);
182635306eb2SEric Dumazet put_cred(cred);
18274ff09db1SMartin KaFai Lau return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
182828b5ba2aSDavid Herrmann }
182928b5ba2aSDavid Herrmann len = n * sizeof(gid_t);
183028b5ba2aSDavid Herrmann
18314ff09db1SMartin KaFai Lau ret = groups_to_user(optval, cred->group_info);
183235306eb2SEric Dumazet put_cred(cred);
183328b5ba2aSDavid Herrmann if (ret)
183428b5ba2aSDavid Herrmann return ret;
183528b5ba2aSDavid Herrmann goto lenout;
183628b5ba2aSDavid Herrmann }
183728b5ba2aSDavid Herrmann
18381da177e4SLinus Torvalds case SO_PEERNAME:
18391da177e4SLinus Torvalds {
18408936bf53SKuniyuki Iwashima struct sockaddr_storage address;
18411da177e4SLinus Torvalds
18421ded5e5aSEric Dumazet lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
18439b2c45d4SDenys Vlasenko if (lv < 0)
18441da177e4SLinus Torvalds return -ENOTCONN;
18451da177e4SLinus Torvalds if (lv < len)
18461da177e4SLinus Torvalds return -EINVAL;
18478936bf53SKuniyuki Iwashima if (copy_to_sockptr(optval, &address, len))
18481da177e4SLinus Torvalds return -EFAULT;
18491da177e4SLinus Torvalds goto lenout;
18501da177e4SLinus Torvalds }
18511da177e4SLinus Torvalds
18521da177e4SLinus Torvalds /* Dubious BSD thing... Probably nobody even uses it, but
18531da177e4SLinus Torvalds * the UNIX standard wants it for whatever reason... -DaveM
18541da177e4SLinus Torvalds */
18551da177e4SLinus Torvalds case SO_ACCEPTCONN:
18561da177e4SLinus Torvalds v.val = sk->sk_state == TCP_LISTEN;
18571da177e4SLinus Torvalds break;
18581da177e4SLinus Torvalds
1859877ce7c1SCatherine Zhang case SO_PASSSEC:
186082981930SEric Dumazet v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1861877ce7c1SCatherine Zhang break;
1862877ce7c1SCatherine Zhang
18631da177e4SLinus Torvalds case SO_PEERSEC:
1864b10b9c34SPaul Moore return security_socket_getpeersec_stream(sock,
1865b10b9c34SPaul Moore optval, optlen, len);
18661da177e4SLinus Torvalds
18674a19ec58SLaszlo Attila Toth case SO_MARK:
18683c5b4d69SEric Dumazet v.val = READ_ONCE(sk->sk_mark);
18694a19ec58SLaszlo Attila Toth break;
18704a19ec58SLaszlo Attila Toth
18716fd1d51cSErin MacNeil case SO_RCVMARK:
18726fd1d51cSErin MacNeil v.val = sock_flag(sk, SOCK_RCVMARK);
18736fd1d51cSErin MacNeil break;
18746fd1d51cSErin MacNeil
18753b885787SNeil Horman case SO_RXQ_OVFL:
18761b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_RXQ_OVFL);
18773b885787SNeil Horman break;
18783b885787SNeil Horman
18796e3e939fSJohannes Berg case SO_WIFI_STATUS:
18801b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_WIFI_STATUS);
18816e3e939fSJohannes Berg break;
18826e3e939fSJohannes Berg
1883ef64a54fSPavel Emelyanov case SO_PEEK_OFF:
18841ded5e5aSEric Dumazet if (!READ_ONCE(sock->ops)->set_peek_off)
1885ef64a54fSPavel Emelyanov return -EOPNOTSUPP;
1886ef64a54fSPavel Emelyanov
188711695c6eSEric Dumazet v.val = READ_ONCE(sk->sk_peek_off);
1888ef64a54fSPavel Emelyanov break;
1889bc2f7996SDavid S. Miller case SO_NOFCS:
18901b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_NOFCS);
1891bc2f7996SDavid S. Miller break;
1892c91f6df2SBrian Haley
1893f7b86bfeSPavel Emelyanov case SO_BINDTODEVICE:
1894c91f6df2SBrian Haley return sock_getbindtodevice(sk, optval, optlen, len);
1895c91f6df2SBrian Haley
1896a8fc9277SPavel Emelyanov case SO_GET_FILTER:
18974ff09db1SMartin KaFai Lau len = sk_get_filter(sk, optval, len);
1898a8fc9277SPavel Emelyanov if (len < 0)
1899a8fc9277SPavel Emelyanov return len;
1900a8fc9277SPavel Emelyanov
1901a8fc9277SPavel Emelyanov goto lenout;
1902c91f6df2SBrian Haley
1903d59577b6SVincent Bernat case SO_LOCK_FILTER:
1904d59577b6SVincent Bernat v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1905d59577b6SVincent Bernat break;
1906d59577b6SVincent Bernat
1907ea02f941SMichal Sekletar case SO_BPF_EXTENSIONS:
1908ea02f941SMichal Sekletar v.val = bpf_tell_extensions();
1909ea02f941SMichal Sekletar break;
1910ea02f941SMichal Sekletar
19117d4c04fcSKeller, Jacob E case SO_SELECT_ERR_QUEUE:
19127d4c04fcSKeller, Jacob E v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
19137d4c04fcSKeller, Jacob E break;
19147d4c04fcSKeller, Jacob E
1915e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
191664b0dc51SEliezer Tamir case SO_BUSY_POLL:
1917e5f0d2ddSEric Dumazet v.val = READ_ONCE(sk->sk_ll_usec);
1918dafcc438SEliezer Tamir break;
19197fd3253aSBjörn Töpel case SO_PREFER_BUSY_POLL:
19207fd3253aSBjörn Töpel v.val = READ_ONCE(sk->sk_prefer_busy_poll);
19217fd3253aSBjörn Töpel break;
1922dafcc438SEliezer Tamir #endif
1923dafcc438SEliezer Tamir
192462748f32SEric Dumazet case SO_MAX_PACING_RATE:
1925ea7f45efSEric Dumazet /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
1926677f136cSEric Dumazet if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1927677f136cSEric Dumazet lv = sizeof(v.ulval);
1928ea7f45efSEric Dumazet v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
1929677f136cSEric Dumazet } else {
193076a9ebe8SEric Dumazet /* 32bit version */
1931ea7f45efSEric Dumazet v.val = min_t(unsigned long, ~0U,
1932ea7f45efSEric Dumazet READ_ONCE(sk->sk_max_pacing_rate));
1933677f136cSEric Dumazet }
193462748f32SEric Dumazet break;
193562748f32SEric Dumazet
19362c8c56e1SEric Dumazet case SO_INCOMING_CPU:
19377170a977SEric Dumazet v.val = READ_ONCE(sk->sk_incoming_cpu);
19382c8c56e1SEric Dumazet break;
19392c8c56e1SEric Dumazet
1940a2d133b1SJosh Hunt case SO_MEMINFO:
1941a2d133b1SJosh Hunt {
1942a2d133b1SJosh Hunt u32 meminfo[SK_MEMINFO_VARS];
1943a2d133b1SJosh Hunt
1944a2d133b1SJosh Hunt sk_get_meminfo(sk, meminfo);
1945a2d133b1SJosh Hunt
1946a2d133b1SJosh Hunt len = min_t(unsigned int, len, sizeof(meminfo));
19474ff09db1SMartin KaFai Lau if (copy_to_sockptr(optval, &meminfo, len))
1948a2d133b1SJosh Hunt return -EFAULT;
1949a2d133b1SJosh Hunt
1950a2d133b1SJosh Hunt goto lenout;
1951a2d133b1SJosh Hunt }
19526d433902SSridhar Samudrala
19536d433902SSridhar Samudrala #ifdef CONFIG_NET_RX_BUSY_POLL
19546d433902SSridhar Samudrala case SO_INCOMING_NAPI_ID:
19556d433902SSridhar Samudrala v.val = READ_ONCE(sk->sk_napi_id);
19566d433902SSridhar Samudrala
19576d433902SSridhar Samudrala /* aggregate non-NAPI IDs down to 0 */
19586d433902SSridhar Samudrala if (v.val < MIN_NAPI_ID)
19596d433902SSridhar Samudrala v.val = 0;
19606d433902SSridhar Samudrala
19616d433902SSridhar Samudrala break;
19626d433902SSridhar Samudrala #endif
19636d433902SSridhar Samudrala
19645daab9dbSChenbo Feng case SO_COOKIE:
19655daab9dbSChenbo Feng lv = sizeof(u64);
19665daab9dbSChenbo Feng if (len < lv)
19675daab9dbSChenbo Feng return -EINVAL;
19685daab9dbSChenbo Feng v.val64 = sock_gen_cookie(sk);
19695daab9dbSChenbo Feng break;
19705daab9dbSChenbo Feng
197176851d12SWillem de Bruijn case SO_ZEROCOPY:
197276851d12SWillem de Bruijn v.val = sock_flag(sk, SOCK_ZEROCOPY);
197376851d12SWillem de Bruijn break;
197476851d12SWillem de Bruijn
197580b14deeSRichard Cochran case SO_TXTIME:
197680b14deeSRichard Cochran lv = sizeof(v.txtime);
197780b14deeSRichard Cochran v.txtime.clockid = sk->sk_clockid;
197880b14deeSRichard Cochran v.txtime.flags |= sk->sk_txtime_deadline_mode ?
197980b14deeSRichard Cochran SOF_TXTIME_DEADLINE_MODE : 0;
19804b15c707SJesus Sanchez-Palencia v.txtime.flags |= sk->sk_txtime_report_errors ?
19814b15c707SJesus Sanchez-Palencia SOF_TXTIME_REPORT_ERRORS : 0;
198280b14deeSRichard Cochran break;
198380b14deeSRichard Cochran
1984f5dd3d0cSDavid Herrmann case SO_BINDTOIFINDEX:
1985e5fccaa1SEric Dumazet v.val = READ_ONCE(sk->sk_bound_dev_if);
1986f5dd3d0cSDavid Herrmann break;
1987f5dd3d0cSDavid Herrmann
1988e8b9eab9SMartynas Pumputis case SO_NETNS_COOKIE:
1989e8b9eab9SMartynas Pumputis lv = sizeof(u64);
1990e8b9eab9SMartynas Pumputis if (len != lv)
1991e8b9eab9SMartynas Pumputis return -EINVAL;
1992e8b9eab9SMartynas Pumputis v.val64 = sock_net(sk)->net_cookie;
1993e8b9eab9SMartynas Pumputis break;
1994e8b9eab9SMartynas Pumputis
199504190bf8SPavel Tikhomirov case SO_BUF_LOCK:
199604190bf8SPavel Tikhomirov v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
199704190bf8SPavel Tikhomirov break;
199804190bf8SPavel Tikhomirov
19992bb2f5fbSWei Wang case SO_RESERVE_MEM:
2000fe11fdcbSEric Dumazet v.val = READ_ONCE(sk->sk_reserved_mem);
20012bb2f5fbSWei Wang break;
20022bb2f5fbSWei Wang
200326859240SAkhmat Karakotov case SO_TXREHASH:
2004c76a0328SEric Dumazet /* Paired with WRITE_ONCE() in sk_setsockopt() */
2005c76a0328SEric Dumazet v.val = READ_ONCE(sk->sk_txrehash);
200626859240SAkhmat Karakotov break;
200726859240SAkhmat Karakotov
20081da177e4SLinus Torvalds default:
2009443b5991SYOSHIFUJI Hideaki/吉藤英明 /* We implement the SO_SNDLOWAT etc to not be settable
2010443b5991SYOSHIFUJI Hideaki/吉藤英明 * (1003.1g 7).
2011443b5991SYOSHIFUJI Hideaki/吉藤英明 */
2012e71a4783SStephen Hemminger return -ENOPROTOOPT;
20131da177e4SLinus Torvalds }
2014e71a4783SStephen Hemminger
20151da177e4SLinus Torvalds if (len > lv)
20161da177e4SLinus Torvalds len = lv;
20174ff09db1SMartin KaFai Lau if (copy_to_sockptr(optval, &v, len))
20181da177e4SLinus Torvalds return -EFAULT;
20191da177e4SLinus Torvalds lenout:
20204ff09db1SMartin KaFai Lau if (copy_to_sockptr(optlen, &len, sizeof(int)))
20211da177e4SLinus Torvalds return -EFAULT;
20221da177e4SLinus Torvalds return 0;
20231da177e4SLinus Torvalds }
20241da177e4SLinus Torvalds
2025a5b5bb9aSIngo Molnar /*
2026a5b5bb9aSIngo Molnar * Initialize an sk_lock.
2027a5b5bb9aSIngo Molnar *
2028a5b5bb9aSIngo Molnar * (We also register the sk_lock with the lock validator.)
2029a5b5bb9aSIngo Molnar */
sock_lock_init(struct sock * sk)2030b6f99a21SDave Jones static inline void sock_lock_init(struct sock *sk)
2031a5b5bb9aSIngo Molnar {
2032cdfbabfbSDavid Howells if (sk->sk_kern_sock)
2033cdfbabfbSDavid Howells sock_lock_init_class_and_name(
2034cdfbabfbSDavid Howells sk,
2035cdfbabfbSDavid Howells af_family_kern_slock_key_strings[sk->sk_family],
2036cdfbabfbSDavid Howells af_family_kern_slock_keys + sk->sk_family,
2037cdfbabfbSDavid Howells af_family_kern_key_strings[sk->sk_family],
2038cdfbabfbSDavid Howells af_family_kern_keys + sk->sk_family);
2039cdfbabfbSDavid Howells else
2040cdfbabfbSDavid Howells sock_lock_init_class_and_name(
2041cdfbabfbSDavid Howells sk,
2042ed07536eSPeter Zijlstra af_family_slock_key_strings[sk->sk_family],
2043a5b5bb9aSIngo Molnar af_family_slock_keys + sk->sk_family,
2044a5b5bb9aSIngo Molnar af_family_key_strings[sk->sk_family],
2045ed07536eSPeter Zijlstra af_family_keys + sk->sk_family);
2046a5b5bb9aSIngo Molnar }
2047a5b5bb9aSIngo Molnar
20484dc6dc71SEric Dumazet /*
20494dc6dc71SEric Dumazet * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
20504dc6dc71SEric Dumazet * even temporarly, because of RCU lookups. sk_node should also be left as is.
205168835abaSEric Dumazet * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
20524dc6dc71SEric Dumazet */
sock_copy(struct sock * nsk,const struct sock * osk)2053f1a6c4daSPavel Emelyanov static void sock_copy(struct sock *nsk, const struct sock *osk)
2054f1a6c4daSPavel Emelyanov {
2055b8e202d1SJakub Sitnicki const struct proto *prot = READ_ONCE(osk->sk_prot);
2056f1a6c4daSPavel Emelyanov #ifdef CONFIG_SECURITY_NETWORK
2057f1a6c4daSPavel Emelyanov void *sptr = nsk->sk_security;
2058f1a6c4daSPavel Emelyanov #endif
2059df610cd9SKuniyuki Iwashima
2060df610cd9SKuniyuki Iwashima /* If we move sk_tx_queue_mapping out of the private section,
2061df610cd9SKuniyuki Iwashima * we must check if sk_tx_queue_clear() is called after
2062df610cd9SKuniyuki Iwashima * sock_copy() in sk_clone_lock().
2063df610cd9SKuniyuki Iwashima */
2064df610cd9SKuniyuki Iwashima BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
2065df610cd9SKuniyuki Iwashima offsetof(struct sock, sk_dontcopy_begin) ||
2066df610cd9SKuniyuki Iwashima offsetof(struct sock, sk_tx_queue_mapping) >=
2067df610cd9SKuniyuki Iwashima offsetof(struct sock, sk_dontcopy_end));
2068df610cd9SKuniyuki Iwashima
206968835abaSEric Dumazet memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
207068835abaSEric Dumazet
207168835abaSEric Dumazet memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
2072b8e202d1SJakub Sitnicki prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
207368835abaSEric Dumazet
2074f1a6c4daSPavel Emelyanov #ifdef CONFIG_SECURITY_NETWORK
2075f1a6c4daSPavel Emelyanov nsk->sk_security = sptr;
2076f1a6c4daSPavel Emelyanov security_sk_clone(osk, nsk);
2077f1a6c4daSPavel Emelyanov #endif
2078f1a6c4daSPavel Emelyanov }
2079f1a6c4daSPavel Emelyanov
sk_prot_alloc(struct proto * prot,gfp_t priority,int family)20802e4afe7bSPavel Emelyanov static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
20812e4afe7bSPavel Emelyanov int family)
2082c308c1b2SPavel Emelyanov {
2083c308c1b2SPavel Emelyanov struct sock *sk;
2084c308c1b2SPavel Emelyanov struct kmem_cache *slab;
2085c308c1b2SPavel Emelyanov
2086c308c1b2SPavel Emelyanov slab = prot->slab;
2087e912b114SEric Dumazet if (slab != NULL) {
2088e912b114SEric Dumazet sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
2089e912b114SEric Dumazet if (!sk)
2090e912b114SEric Dumazet return sk;
20916471384aSAlexander Potapenko if (want_init_on_alloc(priority))
2092fcbdf09dSOctavian Purdila sk_prot_clear_nulls(sk, prot->obj_size);
2093fcbdf09dSOctavian Purdila } else
2094c308c1b2SPavel Emelyanov sk = kmalloc(prot->obj_size, priority);
2095c308c1b2SPavel Emelyanov
20962e4afe7bSPavel Emelyanov if (sk != NULL) {
20972e4afe7bSPavel Emelyanov if (security_sk_alloc(sk, family, priority))
20982e4afe7bSPavel Emelyanov goto out_free;
20992e4afe7bSPavel Emelyanov
21002e4afe7bSPavel Emelyanov if (!try_module_get(prot->owner))
21012e4afe7bSPavel Emelyanov goto out_free_sec;
21022e4afe7bSPavel Emelyanov }
21032e4afe7bSPavel Emelyanov
2104c308c1b2SPavel Emelyanov return sk;
21052e4afe7bSPavel Emelyanov
21062e4afe7bSPavel Emelyanov out_free_sec:
21072e4afe7bSPavel Emelyanov security_sk_free(sk);
21082e4afe7bSPavel Emelyanov out_free:
21092e4afe7bSPavel Emelyanov if (slab != NULL)
21102e4afe7bSPavel Emelyanov kmem_cache_free(slab, sk);
21112e4afe7bSPavel Emelyanov else
21122e4afe7bSPavel Emelyanov kfree(sk);
21132e4afe7bSPavel Emelyanov return NULL;
2114c308c1b2SPavel Emelyanov }
2115c308c1b2SPavel Emelyanov
sk_prot_free(struct proto * prot,struct sock * sk)2116c308c1b2SPavel Emelyanov static void sk_prot_free(struct proto *prot, struct sock *sk)
2117c308c1b2SPavel Emelyanov {
2118c308c1b2SPavel Emelyanov struct kmem_cache *slab;
21192e4afe7bSPavel Emelyanov struct module *owner;
2120c308c1b2SPavel Emelyanov
21212e4afe7bSPavel Emelyanov owner = prot->owner;
2122c308c1b2SPavel Emelyanov slab = prot->slab;
21232e4afe7bSPavel Emelyanov
2124bd1060a1STejun Heo cgroup_sk_free(&sk->sk_cgrp_data);
21252d758073SJohannes Weiner mem_cgroup_sk_free(sk);
21262e4afe7bSPavel Emelyanov security_sk_free(sk);
2127c308c1b2SPavel Emelyanov if (slab != NULL)
2128c308c1b2SPavel Emelyanov kmem_cache_free(slab, sk);
2129c308c1b2SPavel Emelyanov else
2130c308c1b2SPavel Emelyanov kfree(sk);
21312e4afe7bSPavel Emelyanov module_put(owner);
2132c308c1b2SPavel Emelyanov }
2133c308c1b2SPavel Emelyanov
21341da177e4SLinus Torvalds /**
21351da177e4SLinus Torvalds * sk_alloc - All socket objects are allocated here
2136c4ea43c5SRandy Dunlap * @net: the applicable net namespace
21374dc3b16bSPavel Pisa * @family: protocol family
21384dc3b16bSPavel Pisa * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
21394dc3b16bSPavel Pisa * @prot: struct proto associated with this new sock instance
214011aa9c28SEric W. Biederman * @kern: is this to be a kernel socket?
21411da177e4SLinus Torvalds */
sk_alloc(struct net * net,int family,gfp_t priority,struct proto * prot,int kern)21421b8d7ae4SEric W. Biederman struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
214311aa9c28SEric W. Biederman struct proto *prot, int kern)
21441da177e4SLinus Torvalds {
2145c308c1b2SPavel Emelyanov struct sock *sk;
21461da177e4SLinus Torvalds
2147154adbc8SPavel Emelyanov sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
21481da177e4SLinus Torvalds if (sk) {
21491da177e4SLinus Torvalds sk->sk_family = family;
2150476e19cfSArnaldo Carvalho de Melo /*
2151476e19cfSArnaldo Carvalho de Melo * See comment in struct sock definition to understand
2152476e19cfSArnaldo Carvalho de Melo * why we need sk_prot_creator -acme
2153476e19cfSArnaldo Carvalho de Melo */
2154476e19cfSArnaldo Carvalho de Melo sk->sk_prot = sk->sk_prot_creator = prot;
2155cdfbabfbSDavid Howells sk->sk_kern_sock = kern;
21561da177e4SLinus Torvalds sock_lock_init(sk);
215726abe143SEric W. Biederman sk->sk_net_refcnt = kern ? 0 : 1;
2158648845abSTonghao Zhang if (likely(sk->sk_net_refcnt)) {
2159ffa84b5fSEric Dumazet get_net_track(net, &sk->ns_tracker, priority);
2160648845abSTonghao Zhang sock_inuse_add(net, 1);
21610cafd77dSEric Dumazet } else {
21620cafd77dSEric Dumazet __netns_tracker_alloc(net, &sk->ns_tracker,
21630cafd77dSEric Dumazet false, priority);
2164648845abSTonghao Zhang }
2165648845abSTonghao Zhang
216626abe143SEric W. Biederman sock_net_set(sk, net);
216714afee4bSReshetova, Elena refcount_set(&sk->sk_wmem_alloc, 1);
2168f8451725SHerbert Xu
21692d758073SJohannes Weiner mem_cgroup_sk_alloc(sk);
2170d979a39dSJohannes Weiner cgroup_sk_alloc(&sk->sk_cgrp_data);
21712a56a1feSTejun Heo sock_update_classid(&sk->sk_cgrp_data);
21722a56a1feSTejun Heo sock_update_netprioidx(&sk->sk_cgrp_data);
217341b14fb8STariq Toukan sk_tx_queue_clear(sk);
21741da177e4SLinus Torvalds }
2175a79af59eSFrank Filz
21762e4afe7bSPavel Emelyanov return sk;
21771da177e4SLinus Torvalds }
21782a91525cSEric Dumazet EXPORT_SYMBOL(sk_alloc);
21791da177e4SLinus Torvalds
2180a4298e45SEric Dumazet /* Sockets having SOCK_RCU_FREE will call this function after one RCU
2181a4298e45SEric Dumazet * grace period. This is the case for UDP sockets and TCP listeners.
2182a4298e45SEric Dumazet */
__sk_destruct(struct rcu_head * head)2183a4298e45SEric Dumazet static void __sk_destruct(struct rcu_head *head)
21841da177e4SLinus Torvalds {
2185a4298e45SEric Dumazet struct sock *sk = container_of(head, struct sock, sk_rcu);
21861da177e4SLinus Torvalds struct sk_filter *filter;
21871da177e4SLinus Torvalds
21881da177e4SLinus Torvalds if (sk->sk_destruct)
21891da177e4SLinus Torvalds sk->sk_destruct(sk);
21901da177e4SLinus Torvalds
2191a898def2SPaul E. McKenney filter = rcu_dereference_check(sk->sk_filter,
219214afee4bSReshetova, Elena refcount_read(&sk->sk_wmem_alloc) == 0);
21931da177e4SLinus Torvalds if (filter) {
2194309dd5fcSPavel Emelyanov sk_filter_uncharge(sk, filter);
2195a9b3cd7fSStephen Hemminger RCU_INIT_POINTER(sk->sk_filter, NULL);
21961da177e4SLinus Torvalds }
21971da177e4SLinus Torvalds
219808e29af3SEric Dumazet sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
21991da177e4SLinus Torvalds
22006ac99e8fSMartin KaFai Lau #ifdef CONFIG_BPF_SYSCALL
22016ac99e8fSMartin KaFai Lau bpf_sk_storage_free(sk);
22026ac99e8fSMartin KaFai Lau #endif
22036ac99e8fSMartin KaFai Lau
22041da177e4SLinus Torvalds if (atomic_read(&sk->sk_omem_alloc))
2205e005d193SJoe Perches pr_debug("%s: optmem leakage (%d bytes) detected\n",
22060dc47877SHarvey Harrison __func__, atomic_read(&sk->sk_omem_alloc));
22071da177e4SLinus Torvalds
220822a0e18eSEric Dumazet if (sk->sk_frag.page) {
220922a0e18eSEric Dumazet put_page(sk->sk_frag.page);
221022a0e18eSEric Dumazet sk->sk_frag.page = NULL;
221122a0e18eSEric Dumazet }
221222a0e18eSEric Dumazet
221335306eb2SEric Dumazet /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
2214109f6e39SEric W. Biederman put_cred(sk->sk_peer_cred);
2215109f6e39SEric W. Biederman put_pid(sk->sk_peer_pid);
221635306eb2SEric Dumazet
221726abe143SEric W. Biederman if (likely(sk->sk_net_refcnt))
2218ffa84b5fSEric Dumazet put_net_track(sock_net(sk), &sk->ns_tracker);
22190cafd77dSEric Dumazet else
22200cafd77dSEric Dumazet __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
22210cafd77dSEric Dumazet
2222c308c1b2SPavel Emelyanov sk_prot_free(sk->sk_prot_creator, sk);
22231da177e4SLinus Torvalds }
22242b85a34eSEric Dumazet
sk_destruct(struct sock * sk)2225a4298e45SEric Dumazet void sk_destruct(struct sock *sk)
2226a4298e45SEric Dumazet {
22278c7138b3SMartin KaFai Lau bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
22288c7138b3SMartin KaFai Lau
22298c7138b3SMartin KaFai Lau if (rcu_access_pointer(sk->sk_reuseport_cb)) {
22308c7138b3SMartin KaFai Lau reuseport_detach_sock(sk);
22318c7138b3SMartin KaFai Lau use_call_rcu = true;
22328c7138b3SMartin KaFai Lau }
22338c7138b3SMartin KaFai Lau
22348c7138b3SMartin KaFai Lau if (use_call_rcu)
2235a4298e45SEric Dumazet call_rcu(&sk->sk_rcu, __sk_destruct);
2236a4298e45SEric Dumazet else
2237a4298e45SEric Dumazet __sk_destruct(&sk->sk_rcu);
2238a4298e45SEric Dumazet }
2239a4298e45SEric Dumazet
__sk_free(struct sock * sk)2240eb4cb008SCraig Gallek static void __sk_free(struct sock *sk)
2241eb4cb008SCraig Gallek {
2242648845abSTonghao Zhang if (likely(sk->sk_net_refcnt))
2243648845abSTonghao Zhang sock_inuse_add(sock_net(sk), -1);
2244648845abSTonghao Zhang
22459709020cSEric Dumazet if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
2246eb4cb008SCraig Gallek sock_diag_broadcast_destroy(sk);
2247eb4cb008SCraig Gallek else
2248eb4cb008SCraig Gallek sk_destruct(sk);
2249eb4cb008SCraig Gallek }
2250eb4cb008SCraig Gallek
sk_free(struct sock * sk)22512b85a34eSEric Dumazet void sk_free(struct sock *sk)
22522b85a34eSEric Dumazet {
22532b85a34eSEric Dumazet /*
225425985edcSLucas De Marchi * We subtract one from sk_wmem_alloc and can know if
22552b85a34eSEric Dumazet * some packets are still in some tx queue.
22562b85a34eSEric Dumazet * If not null, sock_wfree() will call __sk_free(sk) later
22572b85a34eSEric Dumazet */
225814afee4bSReshetova, Elena if (refcount_dec_and_test(&sk->sk_wmem_alloc))
22592b85a34eSEric Dumazet __sk_free(sk);
22602b85a34eSEric Dumazet }
22612a91525cSEric Dumazet EXPORT_SYMBOL(sk_free);
22621da177e4SLinus Torvalds
sk_init_common(struct sock * sk)2263581319c5SPaolo Abeni static void sk_init_common(struct sock *sk)
2264581319c5SPaolo Abeni {
2265581319c5SPaolo Abeni skb_queue_head_init(&sk->sk_receive_queue);
2266581319c5SPaolo Abeni skb_queue_head_init(&sk->sk_write_queue);
2267581319c5SPaolo Abeni skb_queue_head_init(&sk->sk_error_queue);
2268581319c5SPaolo Abeni
2269581319c5SPaolo Abeni rwlock_init(&sk->sk_callback_lock);
2270581319c5SPaolo Abeni lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
2271581319c5SPaolo Abeni af_rlock_keys + sk->sk_family,
2272581319c5SPaolo Abeni af_family_rlock_key_strings[sk->sk_family]);
2273581319c5SPaolo Abeni lockdep_set_class_and_name(&sk->sk_write_queue.lock,
2274581319c5SPaolo Abeni af_wlock_keys + sk->sk_family,
2275581319c5SPaolo Abeni af_family_wlock_key_strings[sk->sk_family]);
2276581319c5SPaolo Abeni lockdep_set_class_and_name(&sk->sk_error_queue.lock,
2277581319c5SPaolo Abeni af_elock_keys + sk->sk_family,
2278581319c5SPaolo Abeni af_family_elock_key_strings[sk->sk_family]);
2279581319c5SPaolo Abeni lockdep_set_class_and_name(&sk->sk_callback_lock,
2280581319c5SPaolo Abeni af_callback_keys + sk->sk_family,
2281581319c5SPaolo Abeni af_family_clock_key_strings[sk->sk_family]);
2282581319c5SPaolo Abeni }
2283581319c5SPaolo Abeni
2284e56c57d0SEric Dumazet /**
2285e56c57d0SEric Dumazet * sk_clone_lock - clone a socket, and lock its clone
2286e56c57d0SEric Dumazet * @sk: the socket to clone
2287e56c57d0SEric Dumazet * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2288e56c57d0SEric Dumazet *
2289e56c57d0SEric Dumazet * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
2290e56c57d0SEric Dumazet */
sk_clone_lock(const struct sock * sk,const gfp_t priority)2291e56c57d0SEric Dumazet struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
229287d11cebSArnaldo Carvalho de Melo {
2293b8e202d1SJakub Sitnicki struct proto *prot = READ_ONCE(sk->sk_prot);
2294bbc20b70SEric Dumazet struct sk_filter *filter;
2295278571baSAlexei Starovoitov bool is_charged = true;
2296bbc20b70SEric Dumazet struct sock *newsk;
229787d11cebSArnaldo Carvalho de Melo
2298b8e202d1SJakub Sitnicki newsk = sk_prot_alloc(prot, priority, sk->sk_family);
2299bbc20b70SEric Dumazet if (!newsk)
2300bbc20b70SEric Dumazet goto out;
230187d11cebSArnaldo Carvalho de Melo
2302892c141eSVenkat Yekkirala sock_copy(newsk, sk);
230387d11cebSArnaldo Carvalho de Melo
2304b8e202d1SJakub Sitnicki newsk->sk_prot_creator = prot;
23059d538fa6SChristoph Paasch
230687d11cebSArnaldo Carvalho de Melo /* SANITY */
2307938cca9eSTetsuo Handa if (likely(newsk->sk_net_refcnt)) {
2308ffa84b5fSEric Dumazet get_net_track(sock_net(newsk), &newsk->ns_tracker, priority);
2309938cca9eSTetsuo Handa sock_inuse_add(sock_net(newsk), 1);
23100cafd77dSEric Dumazet } else {
23110cafd77dSEric Dumazet /* Kernel sockets are not elevating the struct net refcount.
23120cafd77dSEric Dumazet * Instead, use a tracker to more easily detect if a layer
23130cafd77dSEric Dumazet * is not properly dismantling its kernel sockets at netns
23140cafd77dSEric Dumazet * destroy time.
23150cafd77dSEric Dumazet */
23160cafd77dSEric Dumazet __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker,
23170cafd77dSEric Dumazet false, priority);
2318938cca9eSTetsuo Handa }
231987d11cebSArnaldo Carvalho de Melo sk_node_init(&newsk->sk_node);
232087d11cebSArnaldo Carvalho de Melo sock_lock_init(newsk);
232187d11cebSArnaldo Carvalho de Melo bh_lock_sock(newsk);
2322fa438ccfSEric Dumazet newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
23238eae939fSZhu Yi newsk->sk_backlog.len = 0;
232487d11cebSArnaldo Carvalho de Melo
232587d11cebSArnaldo Carvalho de Melo atomic_set(&newsk->sk_rmem_alloc, 0);
2326bbc20b70SEric Dumazet
2327bbc20b70SEric Dumazet /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
232814afee4bSReshetova, Elena refcount_set(&newsk->sk_wmem_alloc, 1);
2329bbc20b70SEric Dumazet
233087d11cebSArnaldo Carvalho de Melo atomic_set(&newsk->sk_omem_alloc, 0);
2331581319c5SPaolo Abeni sk_init_common(newsk);
233287d11cebSArnaldo Carvalho de Melo
233387d11cebSArnaldo Carvalho de Melo newsk->sk_dst_cache = NULL;
23349b8805a3SJulian Anastasov newsk->sk_dst_pending_confirm = 0;
233587d11cebSArnaldo Carvalho de Melo newsk->sk_wmem_queued = 0;
233687d11cebSArnaldo Carvalho de Melo newsk->sk_forward_alloc = 0;
23372bb2f5fbSWei Wang newsk->sk_reserved_mem = 0;
23389caad864SEric Dumazet atomic_set(&newsk->sk_drops, 0);
233987d11cebSArnaldo Carvalho de Melo newsk->sk_send_head = NULL;
234087d11cebSArnaldo Carvalho de Melo newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
234152267790SWillem de Bruijn atomic_set(&newsk->sk_zckey, 0);
234287d11cebSArnaldo Carvalho de Melo
234387d11cebSArnaldo Carvalho de Melo sock_reset_flag(newsk, SOCK_DONE);
2344d752a498SShakeel Butt
2345d752a498SShakeel Butt /* sk->sk_memcg will be populated at accept() time */
2346d752a498SShakeel Butt newsk->sk_memcg = NULL;
2347d752a498SShakeel Butt
2348ad0f75e5SCong Wang cgroup_sk_clone(&newsk->sk_cgrp_data);
234987d11cebSArnaldo Carvalho de Melo
2350eefca20eSEric Dumazet rcu_read_lock();
2351eefca20eSEric Dumazet filter = rcu_dereference(sk->sk_filter);
235287d11cebSArnaldo Carvalho de Melo if (filter != NULL)
2353278571baSAlexei Starovoitov /* though it's an empty new sock, the charging may fail
2354278571baSAlexei Starovoitov * if sysctl_optmem_max was changed between creation of
2355278571baSAlexei Starovoitov * original socket and cloning
2356278571baSAlexei Starovoitov */
2357278571baSAlexei Starovoitov is_charged = sk_filter_charge(newsk, filter);
2358eefca20eSEric Dumazet RCU_INIT_POINTER(newsk->sk_filter, filter);
2359eefca20eSEric Dumazet rcu_read_unlock();
236087d11cebSArnaldo Carvalho de Melo
2361d188ba86SEric Dumazet if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
2362a97e50ccSDaniel Borkmann /* We need to make sure that we don't uncharge the new
2363a97e50ccSDaniel Borkmann * socket if we couldn't charge it in the first place
2364a97e50ccSDaniel Borkmann * as otherwise we uncharge the parent's filter.
2365a97e50ccSDaniel Borkmann */
2366a97e50ccSDaniel Borkmann if (!is_charged)
2367a97e50ccSDaniel Borkmann RCU_INIT_POINTER(newsk->sk_filter, NULL);
236894352d45SArnaldo Carvalho de Melo sk_free_unlock_clone(newsk);
236987d11cebSArnaldo Carvalho de Melo newsk = NULL;
237087d11cebSArnaldo Carvalho de Melo goto out;
237187d11cebSArnaldo Carvalho de Melo }
2372fa463497SCraig Gallek RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
23738f51dfc7SStanislav Fomichev
23748f51dfc7SStanislav Fomichev if (bpf_sk_storage_clone(sk, newsk)) {
23758f51dfc7SStanislav Fomichev sk_free_unlock_clone(newsk);
23768f51dfc7SStanislav Fomichev newsk = NULL;
23778f51dfc7SStanislav Fomichev goto out;
23788f51dfc7SStanislav Fomichev }
237987d11cebSArnaldo Carvalho de Melo
2380f1ff5ce2SJakub Sitnicki /* Clear sk_user_data if parent had the pointer tagged
2381f1ff5ce2SJakub Sitnicki * as not suitable for copying when cloning.
2382f1ff5ce2SJakub Sitnicki */
2383f1ff5ce2SJakub Sitnicki if (sk_user_data_is_nocopy(newsk))
23847a1ca972SJakub Sitnicki newsk->sk_user_data = NULL;
2385f1ff5ce2SJakub Sitnicki
238687d11cebSArnaldo Carvalho de Melo newsk->sk_err = 0;
2387e551c32dSEric Dumazet newsk->sk_err_soft = 0;
238887d11cebSArnaldo Carvalho de Melo newsk->sk_priority = 0;
23892c8c56e1SEric Dumazet newsk->sk_incoming_cpu = raw_smp_processor_id();
2390d979a39dSJohannes Weiner
2391bbc20b70SEric Dumazet /* Before updating sk_refcnt, we must commit prior changes to memory
23922cdb54c9SMauro Carvalho Chehab * (Documentation/RCU/rculist_nulls.rst for details)
23934dc6dc71SEric Dumazet */
23944dc6dc71SEric Dumazet smp_wmb();
239541c6d650SReshetova, Elena refcount_set(&newsk->sk_refcnt, 2);
239687d11cebSArnaldo Carvalho de Melo
2397972692e0SDavid S. Miller sk_set_socket(newsk, NULL);
239841b14fb8STariq Toukan sk_tx_queue_clear(newsk);
2399c2f26e8fSLi RongQing RCU_INIT_POINTER(newsk->sk_wq, NULL);
240087d11cebSArnaldo Carvalho de Melo
240187d11cebSArnaldo Carvalho de Melo if (newsk->sk_prot->sockets_allocated)
2402180d8cd9SGlauber Costa sk_sockets_allocated_inc(newsk);
2403704da560SOctavian Purdila
2404bbc20b70SEric Dumazet if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2405704da560SOctavian Purdila net_enable_timestamp();
240687d11cebSArnaldo Carvalho de Melo out:
240787d11cebSArnaldo Carvalho de Melo return newsk;
240887d11cebSArnaldo Carvalho de Melo }
2409e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(sk_clone_lock);
241087d11cebSArnaldo Carvalho de Melo
sk_free_unlock_clone(struct sock * sk)241194352d45SArnaldo Carvalho de Melo void sk_free_unlock_clone(struct sock *sk)
241294352d45SArnaldo Carvalho de Melo {
241394352d45SArnaldo Carvalho de Melo /* It is still raw copy of parent, so invalidate
241494352d45SArnaldo Carvalho de Melo * destructor and make plain sk_free() */
241594352d45SArnaldo Carvalho de Melo sk->sk_destruct = NULL;
241694352d45SArnaldo Carvalho de Melo bh_unlock_sock(sk);
241794352d45SArnaldo Carvalho de Melo sk_free(sk);
241894352d45SArnaldo Carvalho de Melo }
241994352d45SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
242094352d45SArnaldo Carvalho de Melo
sk_dst_gso_max_size(struct sock * sk,struct dst_entry * dst)2421b1a78b9bSXin Long static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
24227c4e983cSAlexander Duyck {
2423b1a78b9bSXin Long bool is_ipv6 = false;
2424b1a78b9bSXin Long u32 max_size;
2425b1a78b9bSXin Long
24267c4e983cSAlexander Duyck #if IS_ENABLED(CONFIG_IPV6)
2427b1a78b9bSXin Long is_ipv6 = (sk->sk_family == AF_INET6 &&
2428b1a78b9bSXin Long !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
24297c4e983cSAlexander Duyck #endif
2430b1a78b9bSXin Long /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
2431b1a78b9bSXin Long max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
2432b1a78b9bSXin Long READ_ONCE(dst->dev->gso_ipv4_max_size);
2433b1a78b9bSXin Long if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
2434b1a78b9bSXin Long max_size = GSO_LEGACY_MAX_SIZE;
2435b1a78b9bSXin Long
2436b1a78b9bSXin Long return max_size - (MAX_TCP_HEADER + 1);
24377c4e983cSAlexander Duyck }
24387c4e983cSAlexander Duyck
sk_setup_caps(struct sock * sk,struct dst_entry * dst)24399958089aSAndi Kleen void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
24409958089aSAndi Kleen {
2441d6a4e26aSEric Dumazet u32 max_segs = 1;
2442d6a4e26aSEric Dumazet
2443d0d598caSEric Dumazet sk->sk_route_caps = dst->dev->features;
2444d0d598caSEric Dumazet if (sk_is_tcp(sk))
2445d0d598caSEric Dumazet sk->sk_route_caps |= NETIF_F_GSO;
24469958089aSAndi Kleen if (sk->sk_route_caps & NETIF_F_GSO)
24474fcd6b99SHerbert Xu sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
2448aba54656SEric Dumazet if (unlikely(sk->sk_gso_disabled))
2449aba54656SEric Dumazet sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
24509958089aSAndi Kleen if (sk_can_gso(sk)) {
2451f70f250aSSteffen Klassert if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
24529958089aSAndi Kleen sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
245382cc1a7aSPeter P Waskiewicz Jr } else {
24549958089aSAndi Kleen sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
2455b1a78b9bSXin Long sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
24566d872df3SEric Dumazet /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
24576d872df3SEric Dumazet max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
245882cc1a7aSPeter P Waskiewicz Jr }
24599958089aSAndi Kleen }
2460d6a4e26aSEric Dumazet sk->sk_gso_max_segs = max_segs;
2461448a5ce1SVladislav Efanov sk_dst_set(sk, dst);
24629958089aSAndi Kleen }
24639958089aSAndi Kleen EXPORT_SYMBOL_GPL(sk_setup_caps);
24649958089aSAndi Kleen
24651da177e4SLinus Torvalds /*
24661da177e4SLinus Torvalds * Simple resource managers for sockets.
24671da177e4SLinus Torvalds */
24681da177e4SLinus Torvalds
24691da177e4SLinus Torvalds
24701da177e4SLinus Torvalds /*
24711da177e4SLinus Torvalds * Write buffer destructor automatically called from kfree_skb.
24721da177e4SLinus Torvalds */
sock_wfree(struct sk_buff * skb)24731da177e4SLinus Torvalds void sock_wfree(struct sk_buff *skb)
24741da177e4SLinus Torvalds {
24751da177e4SLinus Torvalds struct sock *sk = skb->sk;
2476d99927f4SEric Dumazet unsigned int len = skb->truesize;
2477052ada09SPavel Begunkov bool free;
24781da177e4SLinus Torvalds
2479d99927f4SEric Dumazet if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
2480052ada09SPavel Begunkov if (sock_flag(sk, SOCK_RCU_FREE) &&
2481052ada09SPavel Begunkov sk->sk_write_space == sock_def_write_space) {
2482052ada09SPavel Begunkov rcu_read_lock();
2483052ada09SPavel Begunkov free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
24840a8afd9fSPavel Begunkov sock_def_write_space_wfree(sk);
2485052ada09SPavel Begunkov rcu_read_unlock();
2486052ada09SPavel Begunkov if (unlikely(free))
2487052ada09SPavel Begunkov __sk_free(sk);
2488052ada09SPavel Begunkov return;
2489052ada09SPavel Begunkov }
2490052ada09SPavel Begunkov
24912b85a34eSEric Dumazet /*
2492d99927f4SEric Dumazet * Keep a reference on sk_wmem_alloc, this will be released
2493d99927f4SEric Dumazet * after sk_write_space() call
24942b85a34eSEric Dumazet */
249514afee4bSReshetova, Elena WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
2496d99927f4SEric Dumazet sk->sk_write_space(sk);
2497d99927f4SEric Dumazet len = 1;
2498d99927f4SEric Dumazet }
2499d99927f4SEric Dumazet /*
2500d99927f4SEric Dumazet * if sk_wmem_alloc reaches 0, we must finish what sk_free()
2501d99927f4SEric Dumazet * could not do because of in-flight packets
2502d99927f4SEric Dumazet */
250314afee4bSReshetova, Elena if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
25042b85a34eSEric Dumazet __sk_free(sk);
25051da177e4SLinus Torvalds }
25062a91525cSEric Dumazet EXPORT_SYMBOL(sock_wfree);
25071da177e4SLinus Torvalds
25081d2077acSEric Dumazet /* This variant of sock_wfree() is used by TCP,
25091d2077acSEric Dumazet * since it sets SOCK_USE_WRITE_QUEUE.
25101d2077acSEric Dumazet */
__sock_wfree(struct sk_buff * skb)25111d2077acSEric Dumazet void __sock_wfree(struct sk_buff *skb)
25121d2077acSEric Dumazet {
25131d2077acSEric Dumazet struct sock *sk = skb->sk;
25141d2077acSEric Dumazet
251514afee4bSReshetova, Elena if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
25161d2077acSEric Dumazet __sk_free(sk);
25171d2077acSEric Dumazet }
25181d2077acSEric Dumazet
skb_set_owner_w(struct sk_buff * skb,struct sock * sk)25199e17f8a4SEric Dumazet void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
25209e17f8a4SEric Dumazet {
25219e17f8a4SEric Dumazet skb_orphan(skb);
25229e17f8a4SEric Dumazet skb->sk = sk;
25239e17f8a4SEric Dumazet #ifdef CONFIG_INET
25249e17f8a4SEric Dumazet if (unlikely(!sk_fullsock(sk))) {
25259e17f8a4SEric Dumazet skb->destructor = sock_edemux;
25269e17f8a4SEric Dumazet sock_hold(sk);
25279e17f8a4SEric Dumazet return;
25289e17f8a4SEric Dumazet }
25299e17f8a4SEric Dumazet #endif
25309e17f8a4SEric Dumazet skb->destructor = sock_wfree;
25319e17f8a4SEric Dumazet skb_set_hash_from_sk(skb, sk);
25329e17f8a4SEric Dumazet /*
25339e17f8a4SEric Dumazet * We used to take a refcount on sk, but following operation
25349e17f8a4SEric Dumazet * is enough to guarantee sk_free() wont free this sock until
25359e17f8a4SEric Dumazet * all in-flight packets are completed
25369e17f8a4SEric Dumazet */
253714afee4bSReshetova, Elena refcount_add(skb->truesize, &sk->sk_wmem_alloc);
25389e17f8a4SEric Dumazet }
25399e17f8a4SEric Dumazet EXPORT_SYMBOL(skb_set_owner_w);
25409e17f8a4SEric Dumazet
can_skb_orphan_partial(const struct sk_buff * skb)254141477662SJakub Kicinski static bool can_skb_orphan_partial(const struct sk_buff *skb)
254241477662SJakub Kicinski {
254341477662SJakub Kicinski #ifdef CONFIG_TLS_DEVICE
254441477662SJakub Kicinski /* Drivers depend on in-order delivery for crypto offload,
254541477662SJakub Kicinski * partial orphan breaks out-of-order-OK logic.
254641477662SJakub Kicinski */
254741477662SJakub Kicinski if (skb->decrypted)
254841477662SJakub Kicinski return false;
254941477662SJakub Kicinski #endif
255041477662SJakub Kicinski return (skb->destructor == sock_wfree ||
255141477662SJakub Kicinski (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
255241477662SJakub Kicinski }
255341477662SJakub Kicinski
25541d2077acSEric Dumazet /* This helper is used by netem, as it can hold packets in its
25551d2077acSEric Dumazet * delay queue. We want to allow the owner socket to send more
25561d2077acSEric Dumazet * packets, as if they were already TX completed by a typical driver.
25571d2077acSEric Dumazet * But we also want to keep skb->sk set because some packet schedulers
2558f6ba8d33SEric Dumazet * rely on it (sch_fq for example).
25591d2077acSEric Dumazet */
skb_orphan_partial(struct sk_buff * skb)2560f2f872f9SEric Dumazet void skb_orphan_partial(struct sk_buff *skb)
2561f2f872f9SEric Dumazet {
2562f6ba8d33SEric Dumazet if (skb_is_tcp_pure_ack(skb))
25631d2077acSEric Dumazet return;
25641d2077acSEric Dumazet
2565098116e7SPaolo Abeni if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2566098116e7SPaolo Abeni return;
2567098116e7SPaolo Abeni
2568f2f872f9SEric Dumazet skb_orphan(skb);
2569f2f872f9SEric Dumazet }
2570f2f872f9SEric Dumazet EXPORT_SYMBOL(skb_orphan_partial);
2571f2f872f9SEric Dumazet
25721da177e4SLinus Torvalds /*
25731da177e4SLinus Torvalds * Read buffer destructor automatically called from kfree_skb.
25741da177e4SLinus Torvalds */
sock_rfree(struct sk_buff * skb)25751da177e4SLinus Torvalds void sock_rfree(struct sk_buff *skb)
25761da177e4SLinus Torvalds {
25771da177e4SLinus Torvalds struct sock *sk = skb->sk;
2578d361fd59SEric Dumazet unsigned int len = skb->truesize;
25791da177e4SLinus Torvalds
2580d361fd59SEric Dumazet atomic_sub(len, &sk->sk_rmem_alloc);
2581d361fd59SEric Dumazet sk_mem_uncharge(sk, len);
25821da177e4SLinus Torvalds }
25832a91525cSEric Dumazet EXPORT_SYMBOL(sock_rfree);
25841da177e4SLinus Torvalds
25857768eed8SOliver Hartkopp /*
25867768eed8SOliver Hartkopp * Buffer destructor for skbs that are not used directly in read or write
25877768eed8SOliver Hartkopp * path, e.g. for error handler skbs. Automatically called from kfree_skb.
25887768eed8SOliver Hartkopp */
sock_efree(struct sk_buff * skb)258962bccb8cSAlexander Duyck void sock_efree(struct sk_buff *skb)
259062bccb8cSAlexander Duyck {
259162bccb8cSAlexander Duyck sock_put(skb->sk);
259262bccb8cSAlexander Duyck }
259362bccb8cSAlexander Duyck EXPORT_SYMBOL(sock_efree);
259462bccb8cSAlexander Duyck
2595cf7fbe66SJoe Stringer /* Buffer destructor for prefetch/receive path where reference count may
2596cf7fbe66SJoe Stringer * not be held, e.g. for listen sockets.
2597cf7fbe66SJoe Stringer */
2598cf7fbe66SJoe Stringer #ifdef CONFIG_INET
sock_pfree(struct sk_buff * skb)2599cf7fbe66SJoe Stringer void sock_pfree(struct sk_buff *skb)
2600cf7fbe66SJoe Stringer {
26017ae215d2SJoe Stringer if (sk_is_refcounted(skb->sk))
2602cf7fbe66SJoe Stringer sock_gen_put(skb->sk);
2603cf7fbe66SJoe Stringer }
2604cf7fbe66SJoe Stringer EXPORT_SYMBOL(sock_pfree);
2605cf7fbe66SJoe Stringer #endif /* CONFIG_INET */
2606cf7fbe66SJoe Stringer
sock_i_uid(struct sock * sk)2607976d0201SEric W. Biederman kuid_t sock_i_uid(struct sock *sk)
26081da177e4SLinus Torvalds {
2609976d0201SEric W. Biederman kuid_t uid;
26101da177e4SLinus Torvalds
2611f064af1eSEric Dumazet read_lock_bh(&sk->sk_callback_lock);
2612976d0201SEric W. Biederman uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2613f064af1eSEric Dumazet read_unlock_bh(&sk->sk_callback_lock);
26141da177e4SLinus Torvalds return uid;
26151da177e4SLinus Torvalds }
26162a91525cSEric Dumazet EXPORT_SYMBOL(sock_i_uid);
26171da177e4SLinus Torvalds
__sock_i_ino(struct sock * sk)261825a9c8a4SKuniyuki Iwashima unsigned long __sock_i_ino(struct sock *sk)
261925a9c8a4SKuniyuki Iwashima {
262025a9c8a4SKuniyuki Iwashima unsigned long ino;
262125a9c8a4SKuniyuki Iwashima
262225a9c8a4SKuniyuki Iwashima read_lock(&sk->sk_callback_lock);
262325a9c8a4SKuniyuki Iwashima ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
262425a9c8a4SKuniyuki Iwashima read_unlock(&sk->sk_callback_lock);
262525a9c8a4SKuniyuki Iwashima return ino;
262625a9c8a4SKuniyuki Iwashima }
262725a9c8a4SKuniyuki Iwashima EXPORT_SYMBOL(__sock_i_ino);
262825a9c8a4SKuniyuki Iwashima
sock_i_ino(struct sock * sk)26291da177e4SLinus Torvalds unsigned long sock_i_ino(struct sock *sk)
26301da177e4SLinus Torvalds {
26311da177e4SLinus Torvalds unsigned long ino;
26321da177e4SLinus Torvalds
263325a9c8a4SKuniyuki Iwashima local_bh_disable();
263425a9c8a4SKuniyuki Iwashima ino = __sock_i_ino(sk);
263525a9c8a4SKuniyuki Iwashima local_bh_enable();
26361da177e4SLinus Torvalds return ino;
26371da177e4SLinus Torvalds }
26382a91525cSEric Dumazet EXPORT_SYMBOL(sock_i_ino);
26391da177e4SLinus Torvalds
26401da177e4SLinus Torvalds /*
26411da177e4SLinus Torvalds * Allocate a skb from the socket's send buffer.
26421da177e4SLinus Torvalds */
sock_wmalloc(struct sock * sk,unsigned long size,int force,gfp_t priority)264386a76cafSVictor Fusco struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
2644dd0fc66fSAl Viro gfp_t priority)
26451da177e4SLinus Torvalds {
2646e292f05eSEric Dumazet if (force ||
2647e292f05eSEric Dumazet refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
26481da177e4SLinus Torvalds struct sk_buff *skb = alloc_skb(size, priority);
2649e292f05eSEric Dumazet
26501da177e4SLinus Torvalds if (skb) {
26511da177e4SLinus Torvalds skb_set_owner_w(skb, sk);
26521da177e4SLinus Torvalds return skb;
26531da177e4SLinus Torvalds }
26541da177e4SLinus Torvalds }
26551da177e4SLinus Torvalds return NULL;
26561da177e4SLinus Torvalds }
26572a91525cSEric Dumazet EXPORT_SYMBOL(sock_wmalloc);
26581da177e4SLinus Torvalds
sock_ofree(struct sk_buff * skb)265998ba0bd5SWillem de Bruijn static void sock_ofree(struct sk_buff *skb)
266098ba0bd5SWillem de Bruijn {
266198ba0bd5SWillem de Bruijn struct sock *sk = skb->sk;
266298ba0bd5SWillem de Bruijn
266398ba0bd5SWillem de Bruijn atomic_sub(skb->truesize, &sk->sk_omem_alloc);
266498ba0bd5SWillem de Bruijn }
266598ba0bd5SWillem de Bruijn
sock_omalloc(struct sock * sk,unsigned long size,gfp_t priority)266698ba0bd5SWillem de Bruijn struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
266798ba0bd5SWillem de Bruijn gfp_t priority)
266898ba0bd5SWillem de Bruijn {
266998ba0bd5SWillem de Bruijn struct sk_buff *skb;
267098ba0bd5SWillem de Bruijn
267198ba0bd5SWillem de Bruijn /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
267298ba0bd5SWillem de Bruijn if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
26737de6d09fSKuniyuki Iwashima READ_ONCE(sysctl_optmem_max))
267498ba0bd5SWillem de Bruijn return NULL;
267598ba0bd5SWillem de Bruijn
267698ba0bd5SWillem de Bruijn skb = alloc_skb(size, priority);
267798ba0bd5SWillem de Bruijn if (!skb)
267898ba0bd5SWillem de Bruijn return NULL;
267998ba0bd5SWillem de Bruijn
268098ba0bd5SWillem de Bruijn atomic_add(skb->truesize, &sk->sk_omem_alloc);
268198ba0bd5SWillem de Bruijn skb->sk = sk;
268298ba0bd5SWillem de Bruijn skb->destructor = sock_ofree;
268398ba0bd5SWillem de Bruijn return skb;
268498ba0bd5SWillem de Bruijn }
268598ba0bd5SWillem de Bruijn
26861da177e4SLinus Torvalds /*
26871da177e4SLinus Torvalds * Allocate a memory block from the socket's option memory buffer.
26881da177e4SLinus Torvalds */
sock_kmalloc(struct sock * sk,int size,gfp_t priority)2689dd0fc66fSAl Viro void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
26901da177e4SLinus Torvalds {
26917de6d09fSKuniyuki Iwashima int optmem_max = READ_ONCE(sysctl_optmem_max);
26927de6d09fSKuniyuki Iwashima
26937de6d09fSKuniyuki Iwashima if ((unsigned int)size <= optmem_max &&
26947de6d09fSKuniyuki Iwashima atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
26951da177e4SLinus Torvalds void *mem;
26961da177e4SLinus Torvalds /* First do the add, to avoid the race if kmalloc
26971da177e4SLinus Torvalds * might sleep.
26981da177e4SLinus Torvalds */
26991da177e4SLinus Torvalds atomic_add(size, &sk->sk_omem_alloc);
27001da177e4SLinus Torvalds mem = kmalloc(size, priority);
27011da177e4SLinus Torvalds if (mem)
27021da177e4SLinus Torvalds return mem;
27031da177e4SLinus Torvalds atomic_sub(size, &sk->sk_omem_alloc);
27041da177e4SLinus Torvalds }
27051da177e4SLinus Torvalds return NULL;
27061da177e4SLinus Torvalds }
27072a91525cSEric Dumazet EXPORT_SYMBOL(sock_kmalloc);
27081da177e4SLinus Torvalds
270979e88659SDaniel Borkmann /* Free an option memory block. Note, we actually want the inline
271079e88659SDaniel Borkmann * here as this allows gcc to detect the nullify and fold away the
271179e88659SDaniel Borkmann * condition entirely.
27121da177e4SLinus Torvalds */
__sock_kfree_s(struct sock * sk,void * mem,int size,const bool nullify)271379e88659SDaniel Borkmann static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
271479e88659SDaniel Borkmann const bool nullify)
27151da177e4SLinus Torvalds {
2716e53da5fbSDavid S. Miller if (WARN_ON_ONCE(!mem))
2717e53da5fbSDavid S. Miller return;
271879e88659SDaniel Borkmann if (nullify)
2719453431a5SWaiman Long kfree_sensitive(mem);
272079e88659SDaniel Borkmann else
27211da177e4SLinus Torvalds kfree(mem);
27221da177e4SLinus Torvalds atomic_sub(size, &sk->sk_omem_alloc);
27231da177e4SLinus Torvalds }
272479e88659SDaniel Borkmann
sock_kfree_s(struct sock * sk,void * mem,int size)272579e88659SDaniel Borkmann void sock_kfree_s(struct sock *sk, void *mem, int size)
272679e88659SDaniel Borkmann {
272779e88659SDaniel Borkmann __sock_kfree_s(sk, mem, size, false);
272879e88659SDaniel Borkmann }
27292a91525cSEric Dumazet EXPORT_SYMBOL(sock_kfree_s);
27301da177e4SLinus Torvalds
sock_kzfree_s(struct sock * sk,void * mem,int size)273179e88659SDaniel Borkmann void sock_kzfree_s(struct sock *sk, void *mem, int size)
273279e88659SDaniel Borkmann {
273379e88659SDaniel Borkmann __sock_kfree_s(sk, mem, size, true);
273479e88659SDaniel Borkmann }
273579e88659SDaniel Borkmann EXPORT_SYMBOL(sock_kzfree_s);
273679e88659SDaniel Borkmann
27371da177e4SLinus Torvalds /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
27381da177e4SLinus Torvalds I think, these locks should be removed for datagram sockets.
27391da177e4SLinus Torvalds */
sock_wait_for_wmem(struct sock * sk,long timeo)27401da177e4SLinus Torvalds static long sock_wait_for_wmem(struct sock *sk, long timeo)
27411da177e4SLinus Torvalds {
27421da177e4SLinus Torvalds DEFINE_WAIT(wait);
27431da177e4SLinus Torvalds
27449cd3e072SEric Dumazet sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
27451da177e4SLinus Torvalds for (;;) {
27461da177e4SLinus Torvalds if (!timeo)
27471da177e4SLinus Torvalds break;
27481da177e4SLinus Torvalds if (signal_pending(current))
27491da177e4SLinus Torvalds break;
27501da177e4SLinus Torvalds set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2751aa395145SEric Dumazet prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2752e292f05eSEric Dumazet if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
27531da177e4SLinus Torvalds break;
2754afe8764fSKuniyuki Iwashima if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
27551da177e4SLinus Torvalds break;
2756b1928129SKuniyuki Iwashima if (READ_ONCE(sk->sk_err))
27571da177e4SLinus Torvalds break;
27581da177e4SLinus Torvalds timeo = schedule_timeout(timeo);
27591da177e4SLinus Torvalds }
2760aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait);
27611da177e4SLinus Torvalds return timeo;
27621da177e4SLinus Torvalds }
27631da177e4SLinus Torvalds
27641da177e4SLinus Torvalds
27651da177e4SLinus Torvalds /*
27661da177e4SLinus Torvalds * Generic send/receive buffer handlers
27671da177e4SLinus Torvalds */
27681da177e4SLinus Torvalds
sock_alloc_send_pskb(struct sock * sk,unsigned long header_len,unsigned long data_len,int noblock,int * errcode,int max_page_order)27694cc7f68dSHerbert Xu struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
27704cc7f68dSHerbert Xu unsigned long data_len, int noblock,
277128d64271SEric Dumazet int *errcode, int max_page_order)
27721da177e4SLinus Torvalds {
27732e4e4410SEric Dumazet struct sk_buff *skb;
27741da177e4SLinus Torvalds long timeo;
27751da177e4SLinus Torvalds int err;
27761da177e4SLinus Torvalds
27771da177e4SLinus Torvalds timeo = sock_sndtimeo(sk, noblock);
27782e4e4410SEric Dumazet for (;;) {
27791da177e4SLinus Torvalds err = sock_error(sk);
27801da177e4SLinus Torvalds if (err != 0)
27811da177e4SLinus Torvalds goto failure;
27821da177e4SLinus Torvalds
27831da177e4SLinus Torvalds err = -EPIPE;
2784afe8764fSKuniyuki Iwashima if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
27851da177e4SLinus Torvalds goto failure;
27861da177e4SLinus Torvalds
2787e292f05eSEric Dumazet if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
27882e4e4410SEric Dumazet break;
27892e4e4410SEric Dumazet
27909cd3e072SEric Dumazet sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
27911da177e4SLinus Torvalds set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
27921da177e4SLinus Torvalds err = -EAGAIN;
27931da177e4SLinus Torvalds if (!timeo)
27941da177e4SLinus Torvalds goto failure;
27951da177e4SLinus Torvalds if (signal_pending(current))
27961da177e4SLinus Torvalds goto interrupted;
27971da177e4SLinus Torvalds timeo = sock_wait_for_wmem(sk, timeo);
279828d64271SEric Dumazet }
27992e4e4410SEric Dumazet skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
28002e4e4410SEric Dumazet errcode, sk->sk_allocation);
28012e4e4410SEric Dumazet if (skb)
28021da177e4SLinus Torvalds skb_set_owner_w(skb, sk);
28031da177e4SLinus Torvalds return skb;
28041da177e4SLinus Torvalds
28051da177e4SLinus Torvalds interrupted:
28061da177e4SLinus Torvalds err = sock_intr_errno(timeo);
28071da177e4SLinus Torvalds failure:
28081da177e4SLinus Torvalds *errcode = err;
28091da177e4SLinus Torvalds return NULL;
28101da177e4SLinus Torvalds }
28114cc7f68dSHerbert Xu EXPORT_SYMBOL(sock_alloc_send_pskb);
28121da177e4SLinus Torvalds
__sock_cmsg_send(struct sock * sk,struct cmsghdr * cmsg,struct sockcm_cookie * sockc)2813233baf9aSxu xin int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
2814f28ea365SEdward Jee struct sockcm_cookie *sockc)
2815f28ea365SEdward Jee {
28163dd17e63SSoheil Hassas Yeganeh u32 tsflags;
28173dd17e63SSoheil Hassas Yeganeh
2818f28ea365SEdward Jee switch (cmsg->cmsg_type) {
2819f28ea365SEdward Jee case SO_MARK:
282091f0d8a4SJakub Kicinski if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
282191f0d8a4SJakub Kicinski !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2822f28ea365SEdward Jee return -EPERM;
2823f28ea365SEdward Jee if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2824f28ea365SEdward Jee return -EINVAL;
2825f28ea365SEdward Jee sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2826f28ea365SEdward Jee break;
28277f1bc6e9SDeepa Dinamani case SO_TIMESTAMPING_OLD:
2828200bc366SThomas Lange case SO_TIMESTAMPING_NEW:
28293dd17e63SSoheil Hassas Yeganeh if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
28303dd17e63SSoheil Hassas Yeganeh return -EINVAL;
28313dd17e63SSoheil Hassas Yeganeh
28323dd17e63SSoheil Hassas Yeganeh tsflags = *(u32 *)CMSG_DATA(cmsg);
28333dd17e63SSoheil Hassas Yeganeh if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
28343dd17e63SSoheil Hassas Yeganeh return -EINVAL;
28353dd17e63SSoheil Hassas Yeganeh
28363dd17e63SSoheil Hassas Yeganeh sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
28373dd17e63SSoheil Hassas Yeganeh sockc->tsflags |= tsflags;
28383dd17e63SSoheil Hassas Yeganeh break;
283980b14deeSRichard Cochran case SCM_TXTIME:
284080b14deeSRichard Cochran if (!sock_flag(sk, SOCK_TXTIME))
284180b14deeSRichard Cochran return -EINVAL;
284280b14deeSRichard Cochran if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
284380b14deeSRichard Cochran return -EINVAL;
284480b14deeSRichard Cochran sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
284580b14deeSRichard Cochran break;
2846779f1edeSSoheil Hassas Yeganeh /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2847779f1edeSSoheil Hassas Yeganeh case SCM_RIGHTS:
2848779f1edeSSoheil Hassas Yeganeh case SCM_CREDENTIALS:
2849779f1edeSSoheil Hassas Yeganeh break;
2850f28ea365SEdward Jee default:
2851f28ea365SEdward Jee return -EINVAL;
2852f28ea365SEdward Jee }
285339771b12SWillem de Bruijn return 0;
285439771b12SWillem de Bruijn }
285539771b12SWillem de Bruijn EXPORT_SYMBOL(__sock_cmsg_send);
285639771b12SWillem de Bruijn
sock_cmsg_send(struct sock * sk,struct msghdr * msg,struct sockcm_cookie * sockc)285739771b12SWillem de Bruijn int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
285839771b12SWillem de Bruijn struct sockcm_cookie *sockc)
285939771b12SWillem de Bruijn {
286039771b12SWillem de Bruijn struct cmsghdr *cmsg;
286139771b12SWillem de Bruijn int ret;
286239771b12SWillem de Bruijn
286339771b12SWillem de Bruijn for_each_cmsghdr(cmsg, msg) {
286439771b12SWillem de Bruijn if (!CMSG_OK(msg, cmsg))
286539771b12SWillem de Bruijn return -EINVAL;
286639771b12SWillem de Bruijn if (cmsg->cmsg_level != SOL_SOCKET)
286739771b12SWillem de Bruijn continue;
2868233baf9aSxu xin ret = __sock_cmsg_send(sk, cmsg, sockc);
286939771b12SWillem de Bruijn if (ret)
287039771b12SWillem de Bruijn return ret;
2871f28ea365SEdward Jee }
2872f28ea365SEdward Jee return 0;
2873f28ea365SEdward Jee }
2874f28ea365SEdward Jee EXPORT_SYMBOL(sock_cmsg_send);
2875f28ea365SEdward Jee
sk_enter_memory_pressure(struct sock * sk)287606044751SEric Dumazet static void sk_enter_memory_pressure(struct sock *sk)
287706044751SEric Dumazet {
287806044751SEric Dumazet if (!sk->sk_prot->enter_memory_pressure)
287906044751SEric Dumazet return;
288006044751SEric Dumazet
288106044751SEric Dumazet sk->sk_prot->enter_memory_pressure(sk);
288206044751SEric Dumazet }
288306044751SEric Dumazet
sk_leave_memory_pressure(struct sock * sk)288406044751SEric Dumazet static void sk_leave_memory_pressure(struct sock *sk)
288506044751SEric Dumazet {
288606044751SEric Dumazet if (sk->sk_prot->leave_memory_pressure) {
28875c1ebbfaSBrian Vazquez INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure,
28885c1ebbfaSBrian Vazquez tcp_leave_memory_pressure, sk);
288906044751SEric Dumazet } else {
289006044751SEric Dumazet unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
289106044751SEric Dumazet
2892503978acSEric Dumazet if (memory_pressure && READ_ONCE(*memory_pressure))
2893503978acSEric Dumazet WRITE_ONCE(*memory_pressure, 0);
289406044751SEric Dumazet }
289506044751SEric Dumazet }
289606044751SEric Dumazet
2897ce27ec60SEric Dumazet DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
28985640f768SEric Dumazet
2899400dfd3aSEric Dumazet /**
2900400dfd3aSEric Dumazet * skb_page_frag_refill - check that a page_frag contains enough room
2901400dfd3aSEric Dumazet * @sz: minimum size of the fragment we want to get
2902400dfd3aSEric Dumazet * @pfrag: pointer to page_frag
290382d5e2b8SEric Dumazet * @gfp: priority for memory allocation
2904400dfd3aSEric Dumazet *
2905400dfd3aSEric Dumazet * Note: While this allocator tries to use high order pages, there is
2906400dfd3aSEric Dumazet * no guarantee that allocations succeed. Therefore, @sz MUST be
2907400dfd3aSEric Dumazet * less or equal than PAGE_SIZE.
2908400dfd3aSEric Dumazet */
skb_page_frag_refill(unsigned int sz,struct page_frag * pfrag,gfp_t gfp)2909d9b2938aSEric Dumazet bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
29105640f768SEric Dumazet {
29115640f768SEric Dumazet if (pfrag->page) {
2912fe896d18SJoonsoo Kim if (page_ref_count(pfrag->page) == 1) {
29135640f768SEric Dumazet pfrag->offset = 0;
29145640f768SEric Dumazet return true;
29155640f768SEric Dumazet }
2916400dfd3aSEric Dumazet if (pfrag->offset + sz <= pfrag->size)
29175640f768SEric Dumazet return true;
29185640f768SEric Dumazet put_page(pfrag->page);
29195640f768SEric Dumazet }
29205640f768SEric Dumazet
29215640f768SEric Dumazet pfrag->offset = 0;
2922ce27ec60SEric Dumazet if (SKB_FRAG_PAGE_ORDER &&
2923ce27ec60SEric Dumazet !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
2924d0164adcSMel Gorman /* Avoid direct reclaim but allow kswapd to wake */
2925d0164adcSMel Gorman pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2926d0164adcSMel Gorman __GFP_COMP | __GFP_NOWARN |
2927d0164adcSMel Gorman __GFP_NORETRY,
2928d9b2938aSEric Dumazet SKB_FRAG_PAGE_ORDER);
2929d9b2938aSEric Dumazet if (likely(pfrag->page)) {
2930d9b2938aSEric Dumazet pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
29315640f768SEric Dumazet return true;
29325640f768SEric Dumazet }
2933d9b2938aSEric Dumazet }
2934d9b2938aSEric Dumazet pfrag->page = alloc_page(gfp);
2935d9b2938aSEric Dumazet if (likely(pfrag->page)) {
2936d9b2938aSEric Dumazet pfrag->size = PAGE_SIZE;
2937d9b2938aSEric Dumazet return true;
2938d9b2938aSEric Dumazet }
2939400dfd3aSEric Dumazet return false;
2940400dfd3aSEric Dumazet }
2941400dfd3aSEric Dumazet EXPORT_SYMBOL(skb_page_frag_refill);
2942400dfd3aSEric Dumazet
sk_page_frag_refill(struct sock * sk,struct page_frag * pfrag)2943400dfd3aSEric Dumazet bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2944400dfd3aSEric Dumazet {
2945400dfd3aSEric Dumazet if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2946400dfd3aSEric Dumazet return true;
2947400dfd3aSEric Dumazet
29485640f768SEric Dumazet sk_enter_memory_pressure(sk);
29495640f768SEric Dumazet sk_stream_moderate_sndbuf(sk);
29505640f768SEric Dumazet return false;
29515640f768SEric Dumazet }
29525640f768SEric Dumazet EXPORT_SYMBOL(sk_page_frag_refill);
29535640f768SEric Dumazet
__lock_sock(struct sock * sk)2954ad80b0fcSPaolo Abeni void __lock_sock(struct sock *sk)
2955f39234d6SNamhyung Kim __releases(&sk->sk_lock.slock)
2956f39234d6SNamhyung Kim __acquires(&sk->sk_lock.slock)
29571da177e4SLinus Torvalds {
29581da177e4SLinus Torvalds DEFINE_WAIT(wait);
29591da177e4SLinus Torvalds
29601da177e4SLinus Torvalds for (;;) {
29611da177e4SLinus Torvalds prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
29621da177e4SLinus Torvalds TASK_UNINTERRUPTIBLE);
29631da177e4SLinus Torvalds spin_unlock_bh(&sk->sk_lock.slock);
29641da177e4SLinus Torvalds schedule();
29651da177e4SLinus Torvalds spin_lock_bh(&sk->sk_lock.slock);
29661da177e4SLinus Torvalds if (!sock_owned_by_user(sk))
29671da177e4SLinus Torvalds break;
29681da177e4SLinus Torvalds }
29691da177e4SLinus Torvalds finish_wait(&sk->sk_lock.wq, &wait);
29701da177e4SLinus Torvalds }
29711da177e4SLinus Torvalds
__release_sock(struct sock * sk)29728873c064SEric Dumazet void __release_sock(struct sock *sk)
2973f39234d6SNamhyung Kim __releases(&sk->sk_lock.slock)
2974f39234d6SNamhyung Kim __acquires(&sk->sk_lock.slock)
29751da177e4SLinus Torvalds {
29765413d1baSEric Dumazet struct sk_buff *skb, *next;
29771da177e4SLinus Torvalds
29785413d1baSEric Dumazet while ((skb = sk->sk_backlog.head) != NULL) {
29791da177e4SLinus Torvalds sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
29805413d1baSEric Dumazet
29815413d1baSEric Dumazet spin_unlock_bh(&sk->sk_lock.slock);
29821da177e4SLinus Torvalds
29831da177e4SLinus Torvalds do {
29845413d1baSEric Dumazet next = skb->next;
2985e4cbb02aSEric Dumazet prefetch(next);
298663fbdd3cSEric Dumazet DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb));
2987a8305bffSDavid S. Miller skb_mark_not_on_list(skb);
2988c57943a1SPeter Zijlstra sk_backlog_rcv(sk, skb);
29891da177e4SLinus Torvalds
29905413d1baSEric Dumazet cond_resched();
29911da177e4SLinus Torvalds
29921da177e4SLinus Torvalds skb = next;
29931da177e4SLinus Torvalds } while (skb != NULL);
29941da177e4SLinus Torvalds
29955413d1baSEric Dumazet spin_lock_bh(&sk->sk_lock.slock);
29965413d1baSEric Dumazet }
29978eae939fSZhu Yi
29988eae939fSZhu Yi /*
29998eae939fSZhu Yi * Doing the zeroing here guarantee we can not loop forever
30008eae939fSZhu Yi * while a wild producer attempts to flood us.
30018eae939fSZhu Yi */
30028eae939fSZhu Yi sk->sk_backlog.len = 0;
30031da177e4SLinus Torvalds }
30041da177e4SLinus Torvalds
__sk_flush_backlog(struct sock * sk)3005d41a69f1SEric Dumazet void __sk_flush_backlog(struct sock *sk)
3006d41a69f1SEric Dumazet {
3007d41a69f1SEric Dumazet spin_lock_bh(&sk->sk_lock.slock);
3008d41a69f1SEric Dumazet __release_sock(sk);
3009d41a69f1SEric Dumazet spin_unlock_bh(&sk->sk_lock.slock);
3010d41a69f1SEric Dumazet }
3011c46b0183SJakub Kicinski EXPORT_SYMBOL_GPL(__sk_flush_backlog);
3012d41a69f1SEric Dumazet
30131da177e4SLinus Torvalds /**
30141da177e4SLinus Torvalds * sk_wait_data - wait for data to arrive at sk_receive_queue
30154dc3b16bSPavel Pisa * @sk: sock to wait on
30164dc3b16bSPavel Pisa * @timeo: for how long
3017dfbafc99SSabrina Dubroca * @skb: last skb seen on sk_receive_queue
30181da177e4SLinus Torvalds *
30191da177e4SLinus Torvalds * Now socket state including sk->sk_err is changed only under lock,
30201da177e4SLinus Torvalds * hence we may omit checks after joining wait queue.
30211da177e4SLinus Torvalds * We check receive queue before schedule() only as optimization;
30221da177e4SLinus Torvalds * it is very likely that release_sock() added new data.
30231da177e4SLinus Torvalds */
sk_wait_data(struct sock * sk,long * timeo,const struct sk_buff * skb)3024dfbafc99SSabrina Dubroca int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
30251da177e4SLinus Torvalds {
3026d9dc8b0fSWANG Cong DEFINE_WAIT_FUNC(wait, woken_wake_function);
30271da177e4SLinus Torvalds int rc;
30281da177e4SLinus Torvalds
3029d9dc8b0fSWANG Cong add_wait_queue(sk_sleep(sk), &wait);
30309cd3e072SEric Dumazet sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3031d9dc8b0fSWANG Cong rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
30329cd3e072SEric Dumazet sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3033d9dc8b0fSWANG Cong remove_wait_queue(sk_sleep(sk), &wait);
30341da177e4SLinus Torvalds return rc;
30351da177e4SLinus Torvalds }
30361da177e4SLinus Torvalds EXPORT_SYMBOL(sk_wait_data);
30371da177e4SLinus Torvalds
30383ab224beSHideo Aoki /**
3039f8c3bf00SPaolo Abeni * __sk_mem_raise_allocated - increase memory_allocated
30403ab224beSHideo Aoki * @sk: socket
30413ab224beSHideo Aoki * @size: memory size to allocate
3042f8c3bf00SPaolo Abeni * @amt: pages to allocate
30433ab224beSHideo Aoki * @kind: allocation type
30443ab224beSHideo Aoki *
3045f8c3bf00SPaolo Abeni * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
30463ab224beSHideo Aoki */
__sk_mem_raise_allocated(struct sock * sk,int size,int amt,int kind)3047f8c3bf00SPaolo Abeni int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
30483ab224beSHideo Aoki {
30494b1327beSWei Wang bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
3050219160beSEric Dumazet struct proto *prot = sk->sk_prot;
3051d6f19938SYafang Shao bool charged = true;
3052219160beSEric Dumazet long allocated;
3053e805605cSJohannes Weiner
3054219160beSEric Dumazet sk_memory_allocated_add(sk, amt);
3055219160beSEric Dumazet allocated = sk_memory_allocated(sk);
30564b1327beSWei Wang if (memcg_charge &&
30574b1327beSWei Wang !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
30584b1327beSWei Wang gfp_memcg_charge())))
3059e805605cSJohannes Weiner goto suppress_allocation;
30603ab224beSHideo Aoki
30613ab224beSHideo Aoki /* Under limit. */
3062e805605cSJohannes Weiner if (allocated <= sk_prot_mem_limits(sk, 0)) {
3063180d8cd9SGlauber Costa sk_leave_memory_pressure(sk);
30643ab224beSHideo Aoki return 1;
30653ab224beSHideo Aoki }
30663ab224beSHideo Aoki
3067e805605cSJohannes Weiner /* Under pressure. */
3068e805605cSJohannes Weiner if (allocated > sk_prot_mem_limits(sk, 1))
3069180d8cd9SGlauber Costa sk_enter_memory_pressure(sk);
30703ab224beSHideo Aoki
3071e805605cSJohannes Weiner /* Over hard limit. */
3072e805605cSJohannes Weiner if (allocated > sk_prot_mem_limits(sk, 2))
30733ab224beSHideo Aoki goto suppress_allocation;
30743ab224beSHideo Aoki
30753ab224beSHideo Aoki /* guarantee minimum buffer size under pressure */
30763ab224beSHideo Aoki if (kind == SK_MEM_RECV) {
3077a3dcaf17SEric Dumazet if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
30783ab224beSHideo Aoki return 1;
3079180d8cd9SGlauber Costa
30803ab224beSHideo Aoki } else { /* SK_MEM_SEND */
3081a3dcaf17SEric Dumazet int wmem0 = sk_get_wmem0(sk, prot);
3082a3dcaf17SEric Dumazet
30833ab224beSHideo Aoki if (sk->sk_type == SOCK_STREAM) {
3084a3dcaf17SEric Dumazet if (sk->sk_wmem_queued < wmem0)
30853ab224beSHideo Aoki return 1;
3086a3dcaf17SEric Dumazet } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
30873ab224beSHideo Aoki return 1;
30883ab224beSHideo Aoki }
3089a3dcaf17SEric Dumazet }
30903ab224beSHideo Aoki
3091180d8cd9SGlauber Costa if (sk_has_memory_pressure(sk)) {
30925bf325a5SEric Dumazet u64 alloc;
30931748376bSEric Dumazet
3094180d8cd9SGlauber Costa if (!sk_under_memory_pressure(sk))
30951748376bSEric Dumazet return 1;
3096180d8cd9SGlauber Costa alloc = sk_sockets_allocated_read_positive(sk);
3097180d8cd9SGlauber Costa if (sk_prot_mem_limits(sk, 2) > alloc *
30983ab224beSHideo Aoki sk_mem_pages(sk->sk_wmem_queued +
30993ab224beSHideo Aoki atomic_read(&sk->sk_rmem_alloc) +
31003ab224beSHideo Aoki sk->sk_forward_alloc))
31013ab224beSHideo Aoki return 1;
31023ab224beSHideo Aoki }
31033ab224beSHideo Aoki
31043ab224beSHideo Aoki suppress_allocation:
31053ab224beSHideo Aoki
31063ab224beSHideo Aoki if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
31073ab224beSHideo Aoki sk_stream_moderate_sndbuf(sk);
31083ab224beSHideo Aoki
31093ab224beSHideo Aoki /* Fail only if socket is _under_ its sndbuf.
31103ab224beSHideo Aoki * In this case we cannot block, so that we have to fail.
31113ab224beSHideo Aoki */
31124b1327beSWei Wang if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
31134b1327beSWei Wang /* Force charge with __GFP_NOFAIL */
31144b1327beSWei Wang if (memcg_charge && !charged) {
31154b1327beSWei Wang mem_cgroup_charge_skmem(sk->sk_memcg, amt,
31164b1327beSWei Wang gfp_memcg_charge() | __GFP_NOFAIL);
31174b1327beSWei Wang }
31183ab224beSHideo Aoki return 1;
31193ab224beSHideo Aoki }
31204b1327beSWei Wang }
31213ab224beSHideo Aoki
3122d6f19938SYafang Shao if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
3123d6f19938SYafang Shao trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
31243847ce32SSatoru Moriya
31250e90b31fSGlauber Costa sk_memory_allocated_sub(sk, amt);
3126180d8cd9SGlauber Costa
31274b1327beSWei Wang if (memcg_charge && charged)
3128baac50bbSJohannes Weiner mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
3129e805605cSJohannes Weiner
31303ab224beSHideo Aoki return 0;
31313ab224beSHideo Aoki }
3132f8c3bf00SPaolo Abeni
3133f8c3bf00SPaolo Abeni /**
3134f8c3bf00SPaolo Abeni * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
3135f8c3bf00SPaolo Abeni * @sk: socket
3136f8c3bf00SPaolo Abeni * @size: memory size to allocate
3137f8c3bf00SPaolo Abeni * @kind: allocation type
3138f8c3bf00SPaolo Abeni *
3139f8c3bf00SPaolo Abeni * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
3140f8c3bf00SPaolo Abeni * rmem allocation. This function assumes that protocols which have
3141f8c3bf00SPaolo Abeni * memory_pressure use sk_wmem_queued as write buffer accounting.
3142f8c3bf00SPaolo Abeni */
__sk_mem_schedule(struct sock * sk,int size,int kind)3143f8c3bf00SPaolo Abeni int __sk_mem_schedule(struct sock *sk, int size, int kind)
3144f8c3bf00SPaolo Abeni {
3145f8c3bf00SPaolo Abeni int ret, amt = sk_mem_pages(size);
3146f8c3bf00SPaolo Abeni
31475e6300e7SEric Dumazet sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
3148f8c3bf00SPaolo Abeni ret = __sk_mem_raise_allocated(sk, size, amt, kind);
3149f8c3bf00SPaolo Abeni if (!ret)
31505e6300e7SEric Dumazet sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
3151f8c3bf00SPaolo Abeni return ret;
3152f8c3bf00SPaolo Abeni }
31533ab224beSHideo Aoki EXPORT_SYMBOL(__sk_mem_schedule);
31543ab224beSHideo Aoki
31553ab224beSHideo Aoki /**
3156f8c3bf00SPaolo Abeni * __sk_mem_reduce_allocated - reclaim memory_allocated
31573ab224beSHideo Aoki * @sk: socket
3158f8c3bf00SPaolo Abeni * @amount: number of quanta
3159f8c3bf00SPaolo Abeni *
3160f8c3bf00SPaolo Abeni * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
31613ab224beSHideo Aoki */
__sk_mem_reduce_allocated(struct sock * sk,int amount)3162f8c3bf00SPaolo Abeni void __sk_mem_reduce_allocated(struct sock *sk, int amount)
31633ab224beSHideo Aoki {
31641a24e04eSEric Dumazet sk_memory_allocated_sub(sk, amount);
31653ab224beSHideo Aoki
3166baac50bbSJohannes Weiner if (mem_cgroup_sockets_enabled && sk->sk_memcg)
3167baac50bbSJohannes Weiner mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
3168e805605cSJohannes Weiner
31692d0c88e8SAbel Wu if (sk_under_global_memory_pressure(sk) &&
3170180d8cd9SGlauber Costa (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
3171180d8cd9SGlauber Costa sk_leave_memory_pressure(sk);
31723ab224beSHideo Aoki }
3173f8c3bf00SPaolo Abeni
3174f8c3bf00SPaolo Abeni /**
3175f8c3bf00SPaolo Abeni * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
3176f8c3bf00SPaolo Abeni * @sk: socket
3177100fdd1fSEric Dumazet * @amount: number of bytes (rounded down to a PAGE_SIZE multiple)
3178f8c3bf00SPaolo Abeni */
__sk_mem_reclaim(struct sock * sk,int amount)3179f8c3bf00SPaolo Abeni void __sk_mem_reclaim(struct sock *sk, int amount)
3180f8c3bf00SPaolo Abeni {
3181100fdd1fSEric Dumazet amount >>= PAGE_SHIFT;
31825e6300e7SEric Dumazet sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
3183f8c3bf00SPaolo Abeni __sk_mem_reduce_allocated(sk, amount);
3184f8c3bf00SPaolo Abeni }
31853ab224beSHideo Aoki EXPORT_SYMBOL(__sk_mem_reclaim);
31863ab224beSHideo Aoki
sk_set_peek_off(struct sock * sk,int val)3187627d2d6bSsamanthakumar int sk_set_peek_off(struct sock *sk, int val)
3188627d2d6bSsamanthakumar {
318911695c6eSEric Dumazet WRITE_ONCE(sk->sk_peek_off, val);
3190627d2d6bSsamanthakumar return 0;
3191627d2d6bSsamanthakumar }
3192627d2d6bSsamanthakumar EXPORT_SYMBOL_GPL(sk_set_peek_off);
31933ab224beSHideo Aoki
31941da177e4SLinus Torvalds /*
31951da177e4SLinus Torvalds * Set of default routines for initialising struct proto_ops when
31961da177e4SLinus Torvalds * the protocol does not support a particular function. In certain
31971da177e4SLinus Torvalds * cases where it makes no sense for a protocol to have a "do nothing"
31981da177e4SLinus Torvalds * function, some default processing is provided.
31991da177e4SLinus Torvalds */
32001da177e4SLinus Torvalds
sock_no_bind(struct socket * sock,struct sockaddr * saddr,int len)32011da177e4SLinus Torvalds int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
32021da177e4SLinus Torvalds {
32031da177e4SLinus Torvalds return -EOPNOTSUPP;
32041da177e4SLinus Torvalds }
32052a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_bind);
32061da177e4SLinus Torvalds
sock_no_connect(struct socket * sock,struct sockaddr * saddr,int len,int flags)32071da177e4SLinus Torvalds int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
32081da177e4SLinus Torvalds int len, int flags)
32091da177e4SLinus Torvalds {
32101da177e4SLinus Torvalds return -EOPNOTSUPP;
32111da177e4SLinus Torvalds }
32122a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_connect);
32131da177e4SLinus Torvalds
sock_no_socketpair(struct socket * sock1,struct socket * sock2)32141da177e4SLinus Torvalds int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
32151da177e4SLinus Torvalds {
32161da177e4SLinus Torvalds return -EOPNOTSUPP;
32171da177e4SLinus Torvalds }
32182a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_socketpair);
32191da177e4SLinus Torvalds
sock_no_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)3220cdfbabfbSDavid Howells int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
3221cdfbabfbSDavid Howells bool kern)
32221da177e4SLinus Torvalds {
32231da177e4SLinus Torvalds return -EOPNOTSUPP;
32241da177e4SLinus Torvalds }
32252a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_accept);
32261da177e4SLinus Torvalds
sock_no_getname(struct socket * sock,struct sockaddr * saddr,int peer)32271da177e4SLinus Torvalds int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
32289b2c45d4SDenys Vlasenko int peer)
32291da177e4SLinus Torvalds {
32301da177e4SLinus Torvalds return -EOPNOTSUPP;
32311da177e4SLinus Torvalds }
32322a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_getname);
32331da177e4SLinus Torvalds
sock_no_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)32341da177e4SLinus Torvalds int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
32351da177e4SLinus Torvalds {
32361da177e4SLinus Torvalds return -EOPNOTSUPP;
32371da177e4SLinus Torvalds }
32382a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_ioctl);
32391da177e4SLinus Torvalds
sock_no_listen(struct socket * sock,int backlog)32401da177e4SLinus Torvalds int sock_no_listen(struct socket *sock, int backlog)
32411da177e4SLinus Torvalds {
32421da177e4SLinus Torvalds return -EOPNOTSUPP;
32431da177e4SLinus Torvalds }
32442a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_listen);
32451da177e4SLinus Torvalds
sock_no_shutdown(struct socket * sock,int how)32461da177e4SLinus Torvalds int sock_no_shutdown(struct socket *sock, int how)
32471da177e4SLinus Torvalds {
32481da177e4SLinus Torvalds return -EOPNOTSUPP;
32491da177e4SLinus Torvalds }
32502a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_shutdown);
32511da177e4SLinus Torvalds
sock_no_sendmsg(struct socket * sock,struct msghdr * m,size_t len)32521b784140SYing Xue int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
32531da177e4SLinus Torvalds {
32541da177e4SLinus Torvalds return -EOPNOTSUPP;
32551da177e4SLinus Torvalds }
32562a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_sendmsg);
32571da177e4SLinus Torvalds
sock_no_sendmsg_locked(struct sock * sk,struct msghdr * m,size_t len)3258306b13ebSTom Herbert int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
3259306b13ebSTom Herbert {
3260306b13ebSTom Herbert return -EOPNOTSUPP;
3261306b13ebSTom Herbert }
3262306b13ebSTom Herbert EXPORT_SYMBOL(sock_no_sendmsg_locked);
3263306b13ebSTom Herbert
sock_no_recvmsg(struct socket * sock,struct msghdr * m,size_t len,int flags)32641b784140SYing Xue int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
32651b784140SYing Xue int flags)
32661da177e4SLinus Torvalds {
32671da177e4SLinus Torvalds return -EOPNOTSUPP;
32681da177e4SLinus Torvalds }
32692a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_recvmsg);
32701da177e4SLinus Torvalds
sock_no_mmap(struct file * file,struct socket * sock,struct vm_area_struct * vma)32711da177e4SLinus Torvalds int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
32721da177e4SLinus Torvalds {
32731da177e4SLinus Torvalds /* Mirror missing mmap method error code */
32741da177e4SLinus Torvalds return -ENODEV;
32751da177e4SLinus Torvalds }
32762a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_mmap);
32771da177e4SLinus Torvalds
3278d9539752SKees Cook /*
3279d9539752SKees Cook * When a file is received (via SCM_RIGHTS, etc), we must bump the
3280d9539752SKees Cook * various sock-based usage counts.
3281d9539752SKees Cook */
__receive_sock(struct file * file)3282d9539752SKees Cook void __receive_sock(struct file *file)
3283d9539752SKees Cook {
3284d9539752SKees Cook struct socket *sock;
3285d9539752SKees Cook
3286dba4a925SFlorent Revest sock = sock_from_file(file);
3287d9539752SKees Cook if (sock) {
3288d9539752SKees Cook sock_update_netprioidx(&sock->sk->sk_cgrp_data);
3289d9539752SKees Cook sock_update_classid(&sock->sk->sk_cgrp_data);
3290d9539752SKees Cook }
3291d9539752SKees Cook }
3292d9539752SKees Cook
32931da177e4SLinus Torvalds /*
32941da177e4SLinus Torvalds * Default Socket Callbacks
32951da177e4SLinus Torvalds */
32961da177e4SLinus Torvalds
sock_def_wakeup(struct sock * sk)32971da177e4SLinus Torvalds static void sock_def_wakeup(struct sock *sk)
32981da177e4SLinus Torvalds {
329943815482SEric Dumazet struct socket_wq *wq;
330043815482SEric Dumazet
330143815482SEric Dumazet rcu_read_lock();
330243815482SEric Dumazet wq = rcu_dereference(sk->sk_wq);
33031ce0bf50SHerbert Xu if (skwq_has_sleeper(wq))
330443815482SEric Dumazet wake_up_interruptible_all(&wq->wait);
330543815482SEric Dumazet rcu_read_unlock();
33061da177e4SLinus Torvalds }
33071da177e4SLinus Torvalds
sock_def_error_report(struct sock * sk)33081da177e4SLinus Torvalds static void sock_def_error_report(struct sock *sk)
33091da177e4SLinus Torvalds {
331043815482SEric Dumazet struct socket_wq *wq;
331143815482SEric Dumazet
331243815482SEric Dumazet rcu_read_lock();
331343815482SEric Dumazet wq = rcu_dereference(sk->sk_wq);
33141ce0bf50SHerbert Xu if (skwq_has_sleeper(wq))
3315a9a08845SLinus Torvalds wake_up_interruptible_poll(&wq->wait, EPOLLERR);
33168d8ad9d7SPavel Emelyanov sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
331743815482SEric Dumazet rcu_read_unlock();
33181da177e4SLinus Torvalds }
33191da177e4SLinus Torvalds
sock_def_readable(struct sock * sk)332043a825afSBjörn Töpel void sock_def_readable(struct sock *sk)
33211da177e4SLinus Torvalds {
332243815482SEric Dumazet struct socket_wq *wq;
332343815482SEric Dumazet
332440e0b090SPeilin Ye trace_sk_data_ready(sk);
332540e0b090SPeilin Ye
332643815482SEric Dumazet rcu_read_lock();
332743815482SEric Dumazet wq = rcu_dereference(sk->sk_wq);
33281ce0bf50SHerbert Xu if (skwq_has_sleeper(wq))
3329a9a08845SLinus Torvalds wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
3330a9a08845SLinus Torvalds EPOLLRDNORM | EPOLLRDBAND);
33318d8ad9d7SPavel Emelyanov sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
333243815482SEric Dumazet rcu_read_unlock();
33331da177e4SLinus Torvalds }
33341da177e4SLinus Torvalds
sock_def_write_space(struct sock * sk)33351da177e4SLinus Torvalds static void sock_def_write_space(struct sock *sk)
33361da177e4SLinus Torvalds {
333743815482SEric Dumazet struct socket_wq *wq;
333843815482SEric Dumazet
333943815482SEric Dumazet rcu_read_lock();
33401da177e4SLinus Torvalds
33411da177e4SLinus Torvalds /* Do not wake up a writer until he can make "significant"
33421da177e4SLinus Torvalds * progress. --DaveM
33431da177e4SLinus Torvalds */
334414bfee9bSPavel Begunkov if (sock_writeable(sk)) {
334543815482SEric Dumazet wq = rcu_dereference(sk->sk_wq);
33461ce0bf50SHerbert Xu if (skwq_has_sleeper(wq))
3347a9a08845SLinus Torvalds wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
3348a9a08845SLinus Torvalds EPOLLWRNORM | EPOLLWRBAND);
33491da177e4SLinus Torvalds
33501da177e4SLinus Torvalds /* Should agree with poll, otherwise some programs break */
33518d8ad9d7SPavel Emelyanov sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
33521da177e4SLinus Torvalds }
33531da177e4SLinus Torvalds
335443815482SEric Dumazet rcu_read_unlock();
33551da177e4SLinus Torvalds }
33561da177e4SLinus Torvalds
33570a8afd9fSPavel Begunkov /* An optimised version of sock_def_write_space(), should only be called
33580a8afd9fSPavel Begunkov * for SOCK_RCU_FREE sockets under RCU read section and after putting
33590a8afd9fSPavel Begunkov * ->sk_wmem_alloc.
33600a8afd9fSPavel Begunkov */
sock_def_write_space_wfree(struct sock * sk)33610a8afd9fSPavel Begunkov static void sock_def_write_space_wfree(struct sock *sk)
33620a8afd9fSPavel Begunkov {
33630a8afd9fSPavel Begunkov /* Do not wake up a writer until he can make "significant"
33640a8afd9fSPavel Begunkov * progress. --DaveM
33650a8afd9fSPavel Begunkov */
33660a8afd9fSPavel Begunkov if (sock_writeable(sk)) {
33670a8afd9fSPavel Begunkov struct socket_wq *wq = rcu_dereference(sk->sk_wq);
33680a8afd9fSPavel Begunkov
33690a8afd9fSPavel Begunkov /* rely on refcount_sub from sock_wfree() */
33700a8afd9fSPavel Begunkov smp_mb__after_atomic();
33710a8afd9fSPavel Begunkov if (wq && waitqueue_active(&wq->wait))
33720a8afd9fSPavel Begunkov wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
33730a8afd9fSPavel Begunkov EPOLLWRNORM | EPOLLWRBAND);
33740a8afd9fSPavel Begunkov
33750a8afd9fSPavel Begunkov /* Should agree with poll, otherwise some programs break */
33760a8afd9fSPavel Begunkov sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
33770a8afd9fSPavel Begunkov }
33780a8afd9fSPavel Begunkov }
33790a8afd9fSPavel Begunkov
sock_def_destruct(struct sock * sk)33801da177e4SLinus Torvalds static void sock_def_destruct(struct sock *sk)
33811da177e4SLinus Torvalds {
33821da177e4SLinus Torvalds }
33831da177e4SLinus Torvalds
sk_send_sigurg(struct sock * sk)33841da177e4SLinus Torvalds void sk_send_sigurg(struct sock *sk)
33851da177e4SLinus Torvalds {
33861da177e4SLinus Torvalds if (sk->sk_socket && sk->sk_socket->file)
33871da177e4SLinus Torvalds if (send_sigurg(&sk->sk_socket->file->f_owner))
33888d8ad9d7SPavel Emelyanov sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
33891da177e4SLinus Torvalds }
33902a91525cSEric Dumazet EXPORT_SYMBOL(sk_send_sigurg);
33911da177e4SLinus Torvalds
sk_reset_timer(struct sock * sk,struct timer_list * timer,unsigned long expires)33921da177e4SLinus Torvalds void sk_reset_timer(struct sock *sk, struct timer_list* timer,
33931da177e4SLinus Torvalds unsigned long expires)
33941da177e4SLinus Torvalds {
33951da177e4SLinus Torvalds if (!mod_timer(timer, expires))
33961da177e4SLinus Torvalds sock_hold(sk);
33971da177e4SLinus Torvalds }
33981da177e4SLinus Torvalds EXPORT_SYMBOL(sk_reset_timer);
33991da177e4SLinus Torvalds
sk_stop_timer(struct sock * sk,struct timer_list * timer)34001da177e4SLinus Torvalds void sk_stop_timer(struct sock *sk, struct timer_list* timer)
34011da177e4SLinus Torvalds {
340225cc4ae9SYing Xue if (del_timer(timer))
34031da177e4SLinus Torvalds __sock_put(sk);
34041da177e4SLinus Torvalds }
34051da177e4SLinus Torvalds EXPORT_SYMBOL(sk_stop_timer);
34061da177e4SLinus Torvalds
sk_stop_timer_sync(struct sock * sk,struct timer_list * timer)340708b81d87SGeliang Tang void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
340808b81d87SGeliang Tang {
340908b81d87SGeliang Tang if (del_timer_sync(timer))
341008b81d87SGeliang Tang __sock_put(sk);
341108b81d87SGeliang Tang }
341208b81d87SGeliang Tang EXPORT_SYMBOL(sk_stop_timer_sync);
341308b81d87SGeliang Tang
sock_init_data_uid(struct socket * sock,struct sock * sk,kuid_t uid)3414584f3742SPietro Borrello void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
34151da177e4SLinus Torvalds {
3416581319c5SPaolo Abeni sk_init_common(sk);
34171da177e4SLinus Torvalds sk->sk_send_head = NULL;
34181da177e4SLinus Torvalds
341999767f27SKees Cook timer_setup(&sk->sk_timer, NULL, 0);
34201da177e4SLinus Torvalds
34211da177e4SLinus Torvalds sk->sk_allocation = GFP_KERNEL;
34221227c177SKuniyuki Iwashima sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
34231227c177SKuniyuki Iwashima sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
34241da177e4SLinus Torvalds sk->sk_state = TCP_CLOSE;
3425fb87bd47SGuillaume Nault sk->sk_use_task_frag = true;
3426972692e0SDavid S. Miller sk_set_socket(sk, sock);
34271da177e4SLinus Torvalds
34281da177e4SLinus Torvalds sock_set_flag(sk, SOCK_ZAPPED);
34291da177e4SLinus Torvalds
3430e71a4783SStephen Hemminger if (sock) {
34311da177e4SLinus Torvalds sk->sk_type = sock->type;
3432333f7909SAl Viro RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
34331da177e4SLinus Torvalds sock->sk = sk;
343486741ec2SLorenzo Colitti } else {
3435c2f26e8fSLi RongQing RCU_INIT_POINTER(sk->sk_wq, NULL);
343686741ec2SLorenzo Colitti }
3437584f3742SPietro Borrello sk->sk_uid = uid;
34381da177e4SLinus Torvalds
34391da177e4SLinus Torvalds rwlock_init(&sk->sk_callback_lock);
3440cdfbabfbSDavid Howells if (sk->sk_kern_sock)
3441cdfbabfbSDavid Howells lockdep_set_class_and_name(
3442cdfbabfbSDavid Howells &sk->sk_callback_lock,
3443cdfbabfbSDavid Howells af_kern_callback_keys + sk->sk_family,
3444cdfbabfbSDavid Howells af_family_kern_clock_key_strings[sk->sk_family]);
3445cdfbabfbSDavid Howells else
3446cdfbabfbSDavid Howells lockdep_set_class_and_name(
3447cdfbabfbSDavid Howells &sk->sk_callback_lock,
3448443aef0eSPeter Zijlstra af_callback_keys + sk->sk_family,
3449443aef0eSPeter Zijlstra af_family_clock_key_strings[sk->sk_family]);
34501da177e4SLinus Torvalds
34511da177e4SLinus Torvalds sk->sk_state_change = sock_def_wakeup;
34521da177e4SLinus Torvalds sk->sk_data_ready = sock_def_readable;
34531da177e4SLinus Torvalds sk->sk_write_space = sock_def_write_space;
34541da177e4SLinus Torvalds sk->sk_error_report = sock_def_error_report;
34551da177e4SLinus Torvalds sk->sk_destruct = sock_def_destruct;
34561da177e4SLinus Torvalds
34575640f768SEric Dumazet sk->sk_frag.page = NULL;
34585640f768SEric Dumazet sk->sk_frag.offset = 0;
3459ef64a54fSPavel Emelyanov sk->sk_peek_off = -1;
34601da177e4SLinus Torvalds
3461109f6e39SEric W. Biederman sk->sk_peer_pid = NULL;
3462109f6e39SEric W. Biederman sk->sk_peer_cred = NULL;
346335306eb2SEric Dumazet spin_lock_init(&sk->sk_peer_lock);
346435306eb2SEric Dumazet
34651da177e4SLinus Torvalds sk->sk_write_pending = 0;
34661da177e4SLinus Torvalds sk->sk_rcvlowat = 1;
34671da177e4SLinus Torvalds sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
34681da177e4SLinus Torvalds sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
34691da177e4SLinus Torvalds
34706c7c98baSPaolo Abeni sk->sk_stamp = SK_DEFAULT_STAMP;
34713a0ed3e9SDeepa Dinamani #if BITS_PER_LONG==32
34723a0ed3e9SDeepa Dinamani seqlock_init(&sk->sk_stamp_seq);
34733a0ed3e9SDeepa Dinamani #endif
347452267790SWillem de Bruijn atomic_set(&sk->sk_zckey, 0);
34751da177e4SLinus Torvalds
3476e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
347706021292SEliezer Tamir sk->sk_napi_id = 0;
3478e59ef36fSKuniyuki Iwashima sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
347906021292SEliezer Tamir #endif
348006021292SEliezer Tamir
348176a9ebe8SEric Dumazet sk->sk_max_pacing_rate = ~0UL;
348276a9ebe8SEric Dumazet sk->sk_pacing_rate = ~0UL;
34837c68fa2bSEric Dumazet WRITE_ONCE(sk->sk_pacing_shift, 10);
348470da268bSEric Dumazet sk->sk_incoming_cpu = -1;
3485c6345ce7SAmritha Nambiar
3486c6345ce7SAmritha Nambiar sk_rx_queue_clear(sk);
34874dc6dc71SEric Dumazet /*
34884dc6dc71SEric Dumazet * Before updating sk_refcnt, we must commit prior changes to memory
34892cdb54c9SMauro Carvalho Chehab * (Documentation/RCU/rculist_nulls.rst for details)
34904dc6dc71SEric Dumazet */
34914dc6dc71SEric Dumazet smp_wmb();
349241c6d650SReshetova, Elena refcount_set(&sk->sk_refcnt, 1);
349333c732c3SWang Chen atomic_set(&sk->sk_drops, 0);
34941da177e4SLinus Torvalds }
3495584f3742SPietro Borrello EXPORT_SYMBOL(sock_init_data_uid);
3496584f3742SPietro Borrello
sock_init_data(struct socket * sock,struct sock * sk)3497584f3742SPietro Borrello void sock_init_data(struct socket *sock, struct sock *sk)
3498584f3742SPietro Borrello {
3499584f3742SPietro Borrello kuid_t uid = sock ?
3500584f3742SPietro Borrello SOCK_INODE(sock)->i_uid :
3501584f3742SPietro Borrello make_kuid(sock_net(sk)->user_ns, 0);
3502584f3742SPietro Borrello
3503584f3742SPietro Borrello sock_init_data_uid(sock, sk, uid);
3504584f3742SPietro Borrello }
35052a91525cSEric Dumazet EXPORT_SYMBOL(sock_init_data);
35061da177e4SLinus Torvalds
lock_sock_nested(struct sock * sk,int subclass)3507b5606c2dSHarvey Harrison void lock_sock_nested(struct sock *sk, int subclass)
35081da177e4SLinus Torvalds {
35092dcb96baSThomas Gleixner /* The sk_lock has mutex_lock() semantics here. */
35102dcb96baSThomas Gleixner mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
35112dcb96baSThomas Gleixner
35121da177e4SLinus Torvalds might_sleep();
3513a5b5bb9aSIngo Molnar spin_lock_bh(&sk->sk_lock.slock);
351433d60fbdSKuniyuki Iwashima if (sock_owned_by_user_nocheck(sk))
35151da177e4SLinus Torvalds __lock_sock(sk);
3516d2e9117cSJohn Heffner sk->sk_lock.owned = 1;
35172dcb96baSThomas Gleixner spin_unlock_bh(&sk->sk_lock.slock);
35181da177e4SLinus Torvalds }
3519fcc70d5fSPeter Zijlstra EXPORT_SYMBOL(lock_sock_nested);
35201da177e4SLinus Torvalds
release_sock(struct sock * sk)3521b5606c2dSHarvey Harrison void release_sock(struct sock *sk)
35221da177e4SLinus Torvalds {
3523a5b5bb9aSIngo Molnar spin_lock_bh(&sk->sk_lock.slock);
35241da177e4SLinus Torvalds if (sk->sk_backlog.tail)
35251da177e4SLinus Torvalds __release_sock(sk);
352646d3ceabSEric Dumazet
3527c3f9b018SEric Dumazet /* Warning : release_cb() might need to release sk ownership,
3528c3f9b018SEric Dumazet * ie call sock_release_ownership(sk) before us.
3529c3f9b018SEric Dumazet */
353046d3ceabSEric Dumazet if (sk->sk_prot->release_cb)
353146d3ceabSEric Dumazet sk->sk_prot->release_cb(sk);
353246d3ceabSEric Dumazet
3533c3f9b018SEric Dumazet sock_release_ownership(sk);
3534a5b5bb9aSIngo Molnar if (waitqueue_active(&sk->sk_lock.wq))
3535a5b5bb9aSIngo Molnar wake_up(&sk->sk_lock.wq);
3536a5b5bb9aSIngo Molnar spin_unlock_bh(&sk->sk_lock.slock);
35371da177e4SLinus Torvalds }
35381da177e4SLinus Torvalds EXPORT_SYMBOL(release_sock);
35391da177e4SLinus Torvalds
__lock_sock_fast(struct sock * sk)354049054556SPaolo Abeni bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
35418a74ad60SEric Dumazet {
35428a74ad60SEric Dumazet might_sleep();
35438a74ad60SEric Dumazet spin_lock_bh(&sk->sk_lock.slock);
35448a74ad60SEric Dumazet
354533d60fbdSKuniyuki Iwashima if (!sock_owned_by_user_nocheck(sk)) {
35468a74ad60SEric Dumazet /*
35472dcb96baSThomas Gleixner * Fast path return with bottom halves disabled and
35482dcb96baSThomas Gleixner * sock::sk_lock.slock held.
35492dcb96baSThomas Gleixner *
35502dcb96baSThomas Gleixner * The 'mutex' is not contended and holding
35512dcb96baSThomas Gleixner * sock::sk_lock.slock prevents all other lockers to
35522dcb96baSThomas Gleixner * proceed so the corresponding unlock_sock_fast() can
35532dcb96baSThomas Gleixner * avoid the slow path of release_sock() completely and
35542dcb96baSThomas Gleixner * just release slock.
35552dcb96baSThomas Gleixner *
35562dcb96baSThomas Gleixner * From a semantical POV this is equivalent to 'acquiring'
35572dcb96baSThomas Gleixner * the 'mutex', hence the corresponding lockdep
35582dcb96baSThomas Gleixner * mutex_release() has to happen in the fast path of
35592dcb96baSThomas Gleixner * unlock_sock_fast().
35608a74ad60SEric Dumazet */
35618a74ad60SEric Dumazet return false;
35622dcb96baSThomas Gleixner }
35638a74ad60SEric Dumazet
35648a74ad60SEric Dumazet __lock_sock(sk);
35658a74ad60SEric Dumazet sk->sk_lock.owned = 1;
356612f4bd86SPaolo Abeni __acquire(&sk->sk_lock.slock);
35672dcb96baSThomas Gleixner spin_unlock_bh(&sk->sk_lock.slock);
35688a74ad60SEric Dumazet return true;
35698a74ad60SEric Dumazet }
357049054556SPaolo Abeni EXPORT_SYMBOL(__lock_sock_fast);
35718a74ad60SEric Dumazet
sock_gettstamp(struct socket * sock,void __user * userstamp,bool timeval,bool time32)3572c7cbdbf2SArnd Bergmann int sock_gettstamp(struct socket *sock, void __user *userstamp,
3573c7cbdbf2SArnd Bergmann bool timeval, bool time32)
35741da177e4SLinus Torvalds {
3575c7cbdbf2SArnd Bergmann struct sock *sk = sock->sk;
3576c7cbdbf2SArnd Bergmann struct timespec64 ts;
35779dae3497SYafang Shao
357820d49473SPatrick Ohly sock_enable_timestamp(sk, SOCK_TIMESTAMP);
3579c7cbdbf2SArnd Bergmann ts = ktime_to_timespec64(sock_read_timestamp(sk));
3580ae40eb1eSEric Dumazet if (ts.tv_sec == -1)
3581ae40eb1eSEric Dumazet return -ENOENT;
3582ae40eb1eSEric Dumazet if (ts.tv_sec == 0) {
35833a0ed3e9SDeepa Dinamani ktime_t kt = ktime_get_real();
3584f95f96a4SYueHaibing sock_write_timestamp(sk, kt);
3585c7cbdbf2SArnd Bergmann ts = ktime_to_timespec64(kt);
3586ae40eb1eSEric Dumazet }
3587c7cbdbf2SArnd Bergmann
3588c7cbdbf2SArnd Bergmann if (timeval)
3589c7cbdbf2SArnd Bergmann ts.tv_nsec /= 1000;
3590c7cbdbf2SArnd Bergmann
3591c7cbdbf2SArnd Bergmann #ifdef CONFIG_COMPAT_32BIT_TIME
3592c7cbdbf2SArnd Bergmann if (time32)
3593c7cbdbf2SArnd Bergmann return put_old_timespec32(&ts, userstamp);
3594c7cbdbf2SArnd Bergmann #endif
3595c7cbdbf2SArnd Bergmann #ifdef CONFIG_SPARC64
3596c7cbdbf2SArnd Bergmann /* beware of padding in sparc64 timeval */
3597c7cbdbf2SArnd Bergmann if (timeval && !in_compat_syscall()) {
3598c7cbdbf2SArnd Bergmann struct __kernel_old_timeval __user tv = {
3599c98f4822SStephen Rothwell .tv_sec = ts.tv_sec,
3600c98f4822SStephen Rothwell .tv_usec = ts.tv_nsec,
3601c7cbdbf2SArnd Bergmann };
3602c98f4822SStephen Rothwell if (copy_to_user(userstamp, &tv, sizeof(tv)))
3603c7cbdbf2SArnd Bergmann return -EFAULT;
3604c7cbdbf2SArnd Bergmann return 0;
3605ae40eb1eSEric Dumazet }
3606c7cbdbf2SArnd Bergmann #endif
3607c7cbdbf2SArnd Bergmann return put_timespec64(&ts, userstamp);
3608c7cbdbf2SArnd Bergmann }
3609c7cbdbf2SArnd Bergmann EXPORT_SYMBOL(sock_gettstamp);
3610ae40eb1eSEric Dumazet
sock_enable_timestamp(struct sock * sk,enum sock_flags flag)3611193d357dSAlexey Dobriyan void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
36121da177e4SLinus Torvalds {
361320d49473SPatrick Ohly if (!sock_flag(sk, flag)) {
361408e29af3SEric Dumazet unsigned long previous_flags = sk->sk_flags;
361508e29af3SEric Dumazet
361620d49473SPatrick Ohly sock_set_flag(sk, flag);
361720d49473SPatrick Ohly /*
361820d49473SPatrick Ohly * we just set one of the two flags which require net
361920d49473SPatrick Ohly * time stamping, but time stamping might have been on
362020d49473SPatrick Ohly * already because of the other one
362120d49473SPatrick Ohly */
3622080a270fSHannes Frederic Sowa if (sock_needs_netstamp(sk) &&
3623080a270fSHannes Frederic Sowa !(previous_flags & SK_FLAGS_TIMESTAMP))
36241da177e4SLinus Torvalds net_enable_timestamp();
36251da177e4SLinus Torvalds }
36261da177e4SLinus Torvalds }
36271da177e4SLinus Torvalds
sock_recv_errqueue(struct sock * sk,struct msghdr * msg,int len,int level,int type)3628cb820f8eSRichard Cochran int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
3629cb820f8eSRichard Cochran int level, int type)
3630cb820f8eSRichard Cochran {
3631cb820f8eSRichard Cochran struct sock_exterr_skb *serr;
3632364a9e93SWillem de Bruijn struct sk_buff *skb;
3633cb820f8eSRichard Cochran int copied, err;
3634cb820f8eSRichard Cochran
3635cb820f8eSRichard Cochran err = -EAGAIN;
3636364a9e93SWillem de Bruijn skb = sock_dequeue_err_skb(sk);
3637cb820f8eSRichard Cochran if (skb == NULL)
3638cb820f8eSRichard Cochran goto out;
3639cb820f8eSRichard Cochran
3640cb820f8eSRichard Cochran copied = skb->len;
3641cb820f8eSRichard Cochran if (copied > len) {
3642cb820f8eSRichard Cochran msg->msg_flags |= MSG_TRUNC;
3643cb820f8eSRichard Cochran copied = len;
3644cb820f8eSRichard Cochran }
364551f3d02bSDavid S. Miller err = skb_copy_datagram_msg(skb, 0, msg, copied);
3646cb820f8eSRichard Cochran if (err)
3647cb820f8eSRichard Cochran goto out_free_skb;
3648cb820f8eSRichard Cochran
3649cb820f8eSRichard Cochran sock_recv_timestamp(msg, sk, skb);
3650cb820f8eSRichard Cochran
3651cb820f8eSRichard Cochran serr = SKB_EXT_ERR(skb);
3652cb820f8eSRichard Cochran put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
3653cb820f8eSRichard Cochran
3654cb820f8eSRichard Cochran msg->msg_flags |= MSG_ERRQUEUE;
3655cb820f8eSRichard Cochran err = copied;
3656cb820f8eSRichard Cochran
3657cb820f8eSRichard Cochran out_free_skb:
3658cb820f8eSRichard Cochran kfree_skb(skb);
3659cb820f8eSRichard Cochran out:
3660cb820f8eSRichard Cochran return err;
3661cb820f8eSRichard Cochran }
3662cb820f8eSRichard Cochran EXPORT_SYMBOL(sock_recv_errqueue);
3663cb820f8eSRichard Cochran
36641da177e4SLinus Torvalds /*
36651da177e4SLinus Torvalds * Get a socket option on an socket.
36661da177e4SLinus Torvalds *
36671da177e4SLinus Torvalds * FIX: POSIX 1003.1g is very ambiguous here. It states that
36681da177e4SLinus Torvalds * asynchronous errors should be reported by getsockopt. We assume
36691da177e4SLinus Torvalds * this means if you specify SO_ERROR (otherwise whats the point of it).
36701da177e4SLinus Torvalds */
sock_common_getsockopt(struct socket * sock,int level,int optname,char __user * optval,int __user * optlen)36711da177e4SLinus Torvalds int sock_common_getsockopt(struct socket *sock, int level, int optname,
36721da177e4SLinus Torvalds char __user *optval, int __user *optlen)
36731da177e4SLinus Torvalds {
36741da177e4SLinus Torvalds struct sock *sk = sock->sk;
36751da177e4SLinus Torvalds
3676364f997bSKuniyuki Iwashima /* IPV6_ADDRFORM can change sk->sk_prot under us. */
3677364f997bSKuniyuki Iwashima return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen);
36781da177e4SLinus Torvalds }
36791da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_getsockopt);
36801da177e4SLinus Torvalds
sock_common_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)36811b784140SYing Xue int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
36821b784140SYing Xue int flags)
36831da177e4SLinus Torvalds {
36841da177e4SLinus Torvalds struct sock *sk = sock->sk;
36851da177e4SLinus Torvalds int addr_len = 0;
36861da177e4SLinus Torvalds int err;
36871da177e4SLinus Torvalds
3688ec095263SOliver Hartkopp err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len);
36891da177e4SLinus Torvalds if (err >= 0)
36901da177e4SLinus Torvalds msg->msg_namelen = addr_len;
36911da177e4SLinus Torvalds return err;
36921da177e4SLinus Torvalds }
36931da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_recvmsg);
36941da177e4SLinus Torvalds
36951da177e4SLinus Torvalds /*
36961da177e4SLinus Torvalds * Set socket options on an inet socket.
36971da177e4SLinus Torvalds */
sock_common_setsockopt(struct socket * sock,int level,int optname,sockptr_t optval,unsigned int optlen)36981da177e4SLinus Torvalds int sock_common_setsockopt(struct socket *sock, int level, int optname,
3699a7b75c5aSChristoph Hellwig sockptr_t optval, unsigned int optlen)
37001da177e4SLinus Torvalds {
37011da177e4SLinus Torvalds struct sock *sk = sock->sk;
37021da177e4SLinus Torvalds
3703364f997bSKuniyuki Iwashima /* IPV6_ADDRFORM can change sk->sk_prot under us. */
3704364f997bSKuniyuki Iwashima return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen);
37051da177e4SLinus Torvalds }
37061da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_setsockopt);
37071da177e4SLinus Torvalds
sk_common_release(struct sock * sk)37081da177e4SLinus Torvalds void sk_common_release(struct sock *sk)
37091da177e4SLinus Torvalds {
37101da177e4SLinus Torvalds if (sk->sk_prot->destroy)
37111da177e4SLinus Torvalds sk->sk_prot->destroy(sk);
37121da177e4SLinus Torvalds
37131da177e4SLinus Torvalds /*
3714645f0897SMiaohe Lin * Observation: when sk_common_release is called, processes have
37151da177e4SLinus Torvalds * no access to socket. But net still has.
37161da177e4SLinus Torvalds * Step one, detach it from networking:
37171da177e4SLinus Torvalds *
37181da177e4SLinus Torvalds * A. Remove from hash tables.
37191da177e4SLinus Torvalds */
37201da177e4SLinus Torvalds
37211da177e4SLinus Torvalds sk->sk_prot->unhash(sk);
37221da177e4SLinus Torvalds
3723454c454eSIgnat Korchagin if (sk->sk_socket)
3724454c454eSIgnat Korchagin sk->sk_socket->sk = NULL;
3725454c454eSIgnat Korchagin
37261da177e4SLinus Torvalds /*
37271da177e4SLinus Torvalds * In this point socket cannot receive new packets, but it is possible
37281da177e4SLinus Torvalds * that some packets are in flight because some CPU runs receiver and
37291da177e4SLinus Torvalds * did hash table lookup before we unhashed socket. They will achieve
37301da177e4SLinus Torvalds * receive queue and will be purged by socket destructor.
37311da177e4SLinus Torvalds *
37321da177e4SLinus Torvalds * Also we still have packets pending on receive queue and probably,
37331da177e4SLinus Torvalds * our own packets waiting in device queues. sock_destroy will drain
37341da177e4SLinus Torvalds * receive queue, but transmitted packets will delay socket destruction
37351da177e4SLinus Torvalds * until the last reference will be released.
37361da177e4SLinus Torvalds */
37371da177e4SLinus Torvalds
37381da177e4SLinus Torvalds sock_orphan(sk);
37391da177e4SLinus Torvalds
37401da177e4SLinus Torvalds xfrm_sk_free_policy(sk);
37411da177e4SLinus Torvalds
37421da177e4SLinus Torvalds sock_put(sk);
37431da177e4SLinus Torvalds }
37441da177e4SLinus Torvalds EXPORT_SYMBOL(sk_common_release);
37451da177e4SLinus Torvalds
sk_get_meminfo(const struct sock * sk,u32 * mem)3746a2d133b1SJosh Hunt void sk_get_meminfo(const struct sock *sk, u32 *mem)
3747a2d133b1SJosh Hunt {
3748a2d133b1SJosh Hunt memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3749a2d133b1SJosh Hunt
3750a2d133b1SJosh Hunt mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3751ebb3b78dSEric Dumazet mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3752a2d133b1SJosh Hunt mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3753e292f05eSEric Dumazet mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
375466d58f04SEric Dumazet mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
3755ab4e846aSEric Dumazet mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3756a2d133b1SJosh Hunt mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
375770c26558SEric Dumazet mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3758a2d133b1SJosh Hunt mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3759a2d133b1SJosh Hunt }
3760a2d133b1SJosh Hunt
376113ff3d6fSPavel Emelyanov #ifdef CONFIG_PROC_FS
376213ff3d6fSPavel Emelyanov static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
376370ee1159SPavel Emelyanov
sock_prot_inuse_get(struct net * net,struct proto * prot)376470ee1159SPavel Emelyanov int sock_prot_inuse_get(struct net *net, struct proto *prot)
376570ee1159SPavel Emelyanov {
376670ee1159SPavel Emelyanov int cpu, idx = prot->inuse_idx;
376770ee1159SPavel Emelyanov int res = 0;
376870ee1159SPavel Emelyanov
376970ee1159SPavel Emelyanov for_each_possible_cpu(cpu)
377008fc7f81STonghao Zhang res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
377170ee1159SPavel Emelyanov
377270ee1159SPavel Emelyanov return res >= 0 ? res : 0;
377370ee1159SPavel Emelyanov }
377470ee1159SPavel Emelyanov EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
377570ee1159SPavel Emelyanov
sock_inuse_get(struct net * net)3776648845abSTonghao Zhang int sock_inuse_get(struct net *net)
3777648845abSTonghao Zhang {
3778648845abSTonghao Zhang int cpu, res = 0;
3779648845abSTonghao Zhang
3780648845abSTonghao Zhang for_each_possible_cpu(cpu)
37814199bae1SEric Dumazet res += per_cpu_ptr(net->core.prot_inuse, cpu)->all;
3782648845abSTonghao Zhang
3783648845abSTonghao Zhang return res;
3784648845abSTonghao Zhang }
3785648845abSTonghao Zhang
3786648845abSTonghao Zhang EXPORT_SYMBOL_GPL(sock_inuse_get);
3787648845abSTonghao Zhang
sock_inuse_init_net(struct net * net)37882c8c1e72SAlexey Dobriyan static int __net_init sock_inuse_init_net(struct net *net)
378970ee1159SPavel Emelyanov {
379008fc7f81STonghao Zhang net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3791648845abSTonghao Zhang if (net->core.prot_inuse == NULL)
3792648845abSTonghao Zhang return -ENOMEM;
3793648845abSTonghao Zhang return 0;
379470ee1159SPavel Emelyanov }
379570ee1159SPavel Emelyanov
sock_inuse_exit_net(struct net * net)37962c8c1e72SAlexey Dobriyan static void __net_exit sock_inuse_exit_net(struct net *net)
379770ee1159SPavel Emelyanov {
379808fc7f81STonghao Zhang free_percpu(net->core.prot_inuse);
379970ee1159SPavel Emelyanov }
380070ee1159SPavel Emelyanov
380170ee1159SPavel Emelyanov static struct pernet_operations net_inuse_ops = {
380270ee1159SPavel Emelyanov .init = sock_inuse_init_net,
380370ee1159SPavel Emelyanov .exit = sock_inuse_exit_net,
380470ee1159SPavel Emelyanov };
380570ee1159SPavel Emelyanov
net_inuse_init(void)380670ee1159SPavel Emelyanov static __init int net_inuse_init(void)
380770ee1159SPavel Emelyanov {
380870ee1159SPavel Emelyanov if (register_pernet_subsys(&net_inuse_ops))
380970ee1159SPavel Emelyanov panic("Cannot initialize net inuse counters");
381070ee1159SPavel Emelyanov
381170ee1159SPavel Emelyanov return 0;
381270ee1159SPavel Emelyanov }
381370ee1159SPavel Emelyanov
381470ee1159SPavel Emelyanov core_initcall(net_inuse_init);
381513ff3d6fSPavel Emelyanov
assign_proto_idx(struct proto * prot)3816b45ce321Szhanglin static int assign_proto_idx(struct proto *prot)
381713ff3d6fSPavel Emelyanov {
381813ff3d6fSPavel Emelyanov prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
381913ff3d6fSPavel Emelyanov
382013ff3d6fSPavel Emelyanov if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3821e005d193SJoe Perches pr_err("PROTO_INUSE_NR exhausted\n");
3822b45ce321Szhanglin return -ENOSPC;
382313ff3d6fSPavel Emelyanov }
382413ff3d6fSPavel Emelyanov
382513ff3d6fSPavel Emelyanov set_bit(prot->inuse_idx, proto_inuse_idx);
3826b45ce321Szhanglin return 0;
382713ff3d6fSPavel Emelyanov }
382813ff3d6fSPavel Emelyanov
release_proto_idx(struct proto * prot)382913ff3d6fSPavel Emelyanov static void release_proto_idx(struct proto *prot)
383013ff3d6fSPavel Emelyanov {
383113ff3d6fSPavel Emelyanov if (prot->inuse_idx != PROTO_INUSE_NR - 1)
383213ff3d6fSPavel Emelyanov clear_bit(prot->inuse_idx, proto_inuse_idx);
383313ff3d6fSPavel Emelyanov }
383413ff3d6fSPavel Emelyanov #else
assign_proto_idx(struct proto * prot)3835b45ce321Szhanglin static inline int assign_proto_idx(struct proto *prot)
383613ff3d6fSPavel Emelyanov {
3837b45ce321Szhanglin return 0;
383813ff3d6fSPavel Emelyanov }
383913ff3d6fSPavel Emelyanov
release_proto_idx(struct proto * prot)384013ff3d6fSPavel Emelyanov static inline void release_proto_idx(struct proto *prot)
384113ff3d6fSPavel Emelyanov {
384213ff3d6fSPavel Emelyanov }
3843648845abSTonghao Zhang
384413ff3d6fSPavel Emelyanov #endif
384513ff3d6fSPavel Emelyanov
tw_prot_cleanup(struct timewait_sock_ops * twsk_prot)38460f5907afSMiaohe Lin static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
38470f5907afSMiaohe Lin {
38480f5907afSMiaohe Lin if (!twsk_prot)
38490f5907afSMiaohe Lin return;
38500f5907afSMiaohe Lin kfree(twsk_prot->twsk_slab_name);
38510f5907afSMiaohe Lin twsk_prot->twsk_slab_name = NULL;
38520f5907afSMiaohe Lin kmem_cache_destroy(twsk_prot->twsk_slab);
38530f5907afSMiaohe Lin twsk_prot->twsk_slab = NULL;
38540f5907afSMiaohe Lin }
38550f5907afSMiaohe Lin
tw_prot_init(const struct proto * prot)3856b80350f3STonghao Zhang static int tw_prot_init(const struct proto *prot)
3857b80350f3STonghao Zhang {
3858b80350f3STonghao Zhang struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
3859b80350f3STonghao Zhang
3860b80350f3STonghao Zhang if (!twsk_prot)
3861b80350f3STonghao Zhang return 0;
3862b80350f3STonghao Zhang
3863b80350f3STonghao Zhang twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
3864b80350f3STonghao Zhang prot->name);
3865b80350f3STonghao Zhang if (!twsk_prot->twsk_slab_name)
3866b80350f3STonghao Zhang return -ENOMEM;
3867b80350f3STonghao Zhang
3868b80350f3STonghao Zhang twsk_prot->twsk_slab =
3869b80350f3STonghao Zhang kmem_cache_create(twsk_prot->twsk_slab_name,
3870b80350f3STonghao Zhang twsk_prot->twsk_obj_size, 0,
3871b80350f3STonghao Zhang SLAB_ACCOUNT | prot->slab_flags,
3872b80350f3STonghao Zhang NULL);
3873b80350f3STonghao Zhang if (!twsk_prot->twsk_slab) {
3874b80350f3STonghao Zhang pr_crit("%s: Can't create timewait sock SLAB cache!\n",
3875b80350f3STonghao Zhang prot->name);
3876b80350f3STonghao Zhang return -ENOMEM;
3877b80350f3STonghao Zhang }
3878b80350f3STonghao Zhang
3879b80350f3STonghao Zhang return 0;
3880b80350f3STonghao Zhang }
3881b80350f3STonghao Zhang
req_prot_cleanup(struct request_sock_ops * rsk_prot)38820159dfd3SEric Dumazet static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
38830159dfd3SEric Dumazet {
38840159dfd3SEric Dumazet if (!rsk_prot)
38850159dfd3SEric Dumazet return;
38860159dfd3SEric Dumazet kfree(rsk_prot->slab_name);
38870159dfd3SEric Dumazet rsk_prot->slab_name = NULL;
38880159dfd3SEric Dumazet kmem_cache_destroy(rsk_prot->slab);
38890159dfd3SEric Dumazet rsk_prot->slab = NULL;
38900159dfd3SEric Dumazet }
38910159dfd3SEric Dumazet
req_prot_init(const struct proto * prot)38920159dfd3SEric Dumazet static int req_prot_init(const struct proto *prot)
38930159dfd3SEric Dumazet {
38940159dfd3SEric Dumazet struct request_sock_ops *rsk_prot = prot->rsk_prot;
38950159dfd3SEric Dumazet
38960159dfd3SEric Dumazet if (!rsk_prot)
38970159dfd3SEric Dumazet return 0;
38980159dfd3SEric Dumazet
38990159dfd3SEric Dumazet rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
39000159dfd3SEric Dumazet prot->name);
39010159dfd3SEric Dumazet if (!rsk_prot->slab_name)
39020159dfd3SEric Dumazet return -ENOMEM;
39030159dfd3SEric Dumazet
39040159dfd3SEric Dumazet rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
39050159dfd3SEric Dumazet rsk_prot->obj_size, 0,
3906e699e2c6SShakeel Butt SLAB_ACCOUNT | prot->slab_flags,
3907e699e2c6SShakeel Butt NULL);
39080159dfd3SEric Dumazet
39090159dfd3SEric Dumazet if (!rsk_prot->slab) {
39100159dfd3SEric Dumazet pr_crit("%s: Can't create request sock SLAB cache!\n",
39110159dfd3SEric Dumazet prot->name);
39120159dfd3SEric Dumazet return -ENOMEM;
39130159dfd3SEric Dumazet }
39140159dfd3SEric Dumazet return 0;
39150159dfd3SEric Dumazet }
39160159dfd3SEric Dumazet
proto_register(struct proto * prot,int alloc_slab)39171da177e4SLinus Torvalds int proto_register(struct proto *prot, int alloc_slab)
39181da177e4SLinus Torvalds {
3919b45ce321Szhanglin int ret = -ENOBUFS;
3920b45ce321Szhanglin
3921f20cfd66SEric Dumazet if (prot->memory_allocated && !prot->sysctl_mem) {
3922f20cfd66SEric Dumazet pr_err("%s: missing sysctl_mem\n", prot->name);
3923f20cfd66SEric Dumazet return -EINVAL;
3924f20cfd66SEric Dumazet }
39250defbb0aSEric Dumazet if (prot->memory_allocated && !prot->per_cpu_fw_alloc) {
39260defbb0aSEric Dumazet pr_err("%s: missing per_cpu_fw_alloc\n", prot->name);
39270defbb0aSEric Dumazet return -EINVAL;
39280defbb0aSEric Dumazet }
39291da177e4SLinus Torvalds if (alloc_slab) {
393030c2c9f1SDavid Windsor prot->slab = kmem_cache_create_usercopy(prot->name,
393130c2c9f1SDavid Windsor prot->obj_size, 0,
3932e699e2c6SShakeel Butt SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3933e699e2c6SShakeel Butt prot->slab_flags,
3934289a4860SKees Cook prot->useroffset, prot->usersize,
3935271b72c7SEric Dumazet NULL);
39361da177e4SLinus Torvalds
39371da177e4SLinus Torvalds if (prot->slab == NULL) {
3938e005d193SJoe Perches pr_crit("%s: Can't create sock SLAB cache!\n",
39391da177e4SLinus Torvalds prot->name);
394060e7663dSPavel Emelyanov goto out;
39411da177e4SLinus Torvalds }
39422e6599cbSArnaldo Carvalho de Melo
39430159dfd3SEric Dumazet if (req_prot_init(prot))
39440159dfd3SEric Dumazet goto out_free_request_sock_slab;
39458feaf0c0SArnaldo Carvalho de Melo
3946b80350f3STonghao Zhang if (tw_prot_init(prot))
39470f5907afSMiaohe Lin goto out_free_timewait_sock_slab;
39488feaf0c0SArnaldo Carvalho de Melo }
39491da177e4SLinus Torvalds
395036b77a52SGlauber Costa mutex_lock(&proto_list_mutex);
3951b45ce321Szhanglin ret = assign_proto_idx(prot);
3952b45ce321Szhanglin if (ret) {
395336b77a52SGlauber Costa mutex_unlock(&proto_list_mutex);
39540f5907afSMiaohe Lin goto out_free_timewait_sock_slab;
3955b45ce321Szhanglin }
3956b45ce321Szhanglin list_add(&prot->node, &proto_list);
3957b45ce321Szhanglin mutex_unlock(&proto_list_mutex);
3958b45ce321Szhanglin return ret;
3959b733c007SPavel Emelyanov
39600f5907afSMiaohe Lin out_free_timewait_sock_slab:
3961ed744d81STonghao Zhang if (alloc_slab)
39620f5907afSMiaohe Lin tw_prot_cleanup(prot->twsk_prot);
39638feaf0c0SArnaldo Carvalho de Melo out_free_request_sock_slab:
3964b45ce321Szhanglin if (alloc_slab) {
39650159dfd3SEric Dumazet req_prot_cleanup(prot->rsk_prot);
39660159dfd3SEric Dumazet
39672e6599cbSArnaldo Carvalho de Melo kmem_cache_destroy(prot->slab);
39682e6599cbSArnaldo Carvalho de Melo prot->slab = NULL;
3969b45ce321Szhanglin }
3970b733c007SPavel Emelyanov out:
3971b45ce321Szhanglin return ret;
39721da177e4SLinus Torvalds }
39731da177e4SLinus Torvalds EXPORT_SYMBOL(proto_register);
39741da177e4SLinus Torvalds
proto_unregister(struct proto * prot)39751da177e4SLinus Torvalds void proto_unregister(struct proto *prot)
39761da177e4SLinus Torvalds {
397736b77a52SGlauber Costa mutex_lock(&proto_list_mutex);
397813ff3d6fSPavel Emelyanov release_proto_idx(prot);
39790a3f4358SPatrick McHardy list_del(&prot->node);
398036b77a52SGlauber Costa mutex_unlock(&proto_list_mutex);
39811da177e4SLinus Torvalds
39821da177e4SLinus Torvalds kmem_cache_destroy(prot->slab);
39831da177e4SLinus Torvalds prot->slab = NULL;
39841da177e4SLinus Torvalds
39850159dfd3SEric Dumazet req_prot_cleanup(prot->rsk_prot);
39860f5907afSMiaohe Lin tw_prot_cleanup(prot->twsk_prot);
39871da177e4SLinus Torvalds }
39881da177e4SLinus Torvalds EXPORT_SYMBOL(proto_unregister);
39891da177e4SLinus Torvalds
sock_load_diag_module(int family,int protocol)3990bf2ae2e4SXin Long int sock_load_diag_module(int family, int protocol)
3991bf2ae2e4SXin Long {
3992bf2ae2e4SXin Long if (!protocol) {
3993bf2ae2e4SXin Long if (!sock_is_registered(family))
3994bf2ae2e4SXin Long return -ENOENT;
3995bf2ae2e4SXin Long
3996bf2ae2e4SXin Long return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3997bf2ae2e4SXin Long NETLINK_SOCK_DIAG, family);
3998bf2ae2e4SXin Long }
3999bf2ae2e4SXin Long
4000bf2ae2e4SXin Long #ifdef CONFIG_INET
4001bf2ae2e4SXin Long if (family == AF_INET &&
4002c34c1287SAndrei Vagin protocol != IPPROTO_RAW &&
40033f935c75SPaolo Abeni protocol < MAX_INET_PROTOS &&
4004bf2ae2e4SXin Long !rcu_access_pointer(inet_protos[protocol]))
4005bf2ae2e4SXin Long return -ENOENT;
4006bf2ae2e4SXin Long #endif
4007bf2ae2e4SXin Long
4008bf2ae2e4SXin Long return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
4009bf2ae2e4SXin Long NETLINK_SOCK_DIAG, family, protocol);
4010bf2ae2e4SXin Long }
4011bf2ae2e4SXin Long EXPORT_SYMBOL(sock_load_diag_module);
4012bf2ae2e4SXin Long
40131da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
proto_seq_start(struct seq_file * seq,loff_t * pos)40141da177e4SLinus Torvalds static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
401536b77a52SGlauber Costa __acquires(proto_list_mutex)
40161da177e4SLinus Torvalds {
401736b77a52SGlauber Costa mutex_lock(&proto_list_mutex);
401860f0438aSPavel Emelianov return seq_list_start_head(&proto_list, *pos);
40191da177e4SLinus Torvalds }
40201da177e4SLinus Torvalds
proto_seq_next(struct seq_file * seq,void * v,loff_t * pos)40211da177e4SLinus Torvalds static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
40221da177e4SLinus Torvalds {
402360f0438aSPavel Emelianov return seq_list_next(v, &proto_list, pos);
40241da177e4SLinus Torvalds }
40251da177e4SLinus Torvalds
proto_seq_stop(struct seq_file * seq,void * v)40261da177e4SLinus Torvalds static void proto_seq_stop(struct seq_file *seq, void *v)
402736b77a52SGlauber Costa __releases(proto_list_mutex)
40281da177e4SLinus Torvalds {
402936b77a52SGlauber Costa mutex_unlock(&proto_list_mutex);
40301da177e4SLinus Torvalds }
40311da177e4SLinus Torvalds
proto_method_implemented(const void * method)40321da177e4SLinus Torvalds static char proto_method_implemented(const void *method)
40331da177e4SLinus Torvalds {
40341da177e4SLinus Torvalds return method == NULL ? 'n' : 'y';
40351da177e4SLinus Torvalds }
sock_prot_memory_allocated(struct proto * proto)4036180d8cd9SGlauber Costa static long sock_prot_memory_allocated(struct proto *proto)
4037180d8cd9SGlauber Costa {
4038180d8cd9SGlauber Costa return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
4039180d8cd9SGlauber Costa }
4040180d8cd9SGlauber Costa
sock_prot_memory_pressure(struct proto * proto)40417a512eb8SAlexey Dobriyan static const char *sock_prot_memory_pressure(struct proto *proto)
4042180d8cd9SGlauber Costa {
4043180d8cd9SGlauber Costa return proto->memory_pressure != NULL ?
4044180d8cd9SGlauber Costa proto_memory_pressure(proto) ? "yes" : "no" : "NI";
4045180d8cd9SGlauber Costa }
40461da177e4SLinus Torvalds
proto_seq_printf(struct seq_file * seq,struct proto * proto)40471da177e4SLinus Torvalds static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
40481da177e4SLinus Torvalds {
4049180d8cd9SGlauber Costa
40508d987e5cSEric Dumazet seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
4051dc97391eSDavid Howells "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
40521da177e4SLinus Torvalds proto->name,
40531da177e4SLinus Torvalds proto->obj_size,
405414e943dbSEric Dumazet sock_prot_inuse_get(seq_file_net(seq), proto),
4055180d8cd9SGlauber Costa sock_prot_memory_allocated(proto),
4056180d8cd9SGlauber Costa sock_prot_memory_pressure(proto),
40571da177e4SLinus Torvalds proto->max_header,
40581da177e4SLinus Torvalds proto->slab == NULL ? "no" : "yes",
40591da177e4SLinus Torvalds module_name(proto->owner),
40601da177e4SLinus Torvalds proto_method_implemented(proto->close),
40611da177e4SLinus Torvalds proto_method_implemented(proto->connect),
40621da177e4SLinus Torvalds proto_method_implemented(proto->disconnect),
40631da177e4SLinus Torvalds proto_method_implemented(proto->accept),
40641da177e4SLinus Torvalds proto_method_implemented(proto->ioctl),
40651da177e4SLinus Torvalds proto_method_implemented(proto->init),
40661da177e4SLinus Torvalds proto_method_implemented(proto->destroy),
40671da177e4SLinus Torvalds proto_method_implemented(proto->shutdown),
40681da177e4SLinus Torvalds proto_method_implemented(proto->setsockopt),
40691da177e4SLinus Torvalds proto_method_implemented(proto->getsockopt),
40701da177e4SLinus Torvalds proto_method_implemented(proto->sendmsg),
40711da177e4SLinus Torvalds proto_method_implemented(proto->recvmsg),
40721da177e4SLinus Torvalds proto_method_implemented(proto->bind),
40731da177e4SLinus Torvalds proto_method_implemented(proto->backlog_rcv),
40741da177e4SLinus Torvalds proto_method_implemented(proto->hash),
40751da177e4SLinus Torvalds proto_method_implemented(proto->unhash),
40761da177e4SLinus Torvalds proto_method_implemented(proto->get_port),
40771da177e4SLinus Torvalds proto_method_implemented(proto->enter_memory_pressure));
40781da177e4SLinus Torvalds }
40791da177e4SLinus Torvalds
proto_seq_show(struct seq_file * seq,void * v)40801da177e4SLinus Torvalds static int proto_seq_show(struct seq_file *seq, void *v)
40811da177e4SLinus Torvalds {
408260f0438aSPavel Emelianov if (v == &proto_list)
40831da177e4SLinus Torvalds seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
40841da177e4SLinus Torvalds "protocol",
40851da177e4SLinus Torvalds "size",
40861da177e4SLinus Torvalds "sockets",
40871da177e4SLinus Torvalds "memory",
40881da177e4SLinus Torvalds "press",
40891da177e4SLinus Torvalds "maxhdr",
40901da177e4SLinus Torvalds "slab",
40911da177e4SLinus Torvalds "module",
4092dc97391eSDavid Howells "cl co di ac io in de sh ss gs se re bi br ha uh gp em\n");
40931da177e4SLinus Torvalds else
409460f0438aSPavel Emelianov proto_seq_printf(seq, list_entry(v, struct proto, node));
40951da177e4SLinus Torvalds return 0;
40961da177e4SLinus Torvalds }
40971da177e4SLinus Torvalds
4098f690808eSStephen Hemminger static const struct seq_operations proto_seq_ops = {
40991da177e4SLinus Torvalds .start = proto_seq_start,
41001da177e4SLinus Torvalds .next = proto_seq_next,
41011da177e4SLinus Torvalds .stop = proto_seq_stop,
41021da177e4SLinus Torvalds .show = proto_seq_show,
41031da177e4SLinus Torvalds };
41041da177e4SLinus Torvalds
proto_init_net(struct net * net)410514e943dbSEric Dumazet static __net_init int proto_init_net(struct net *net)
410614e943dbSEric Dumazet {
4107c3506372SChristoph Hellwig if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
4108c3506372SChristoph Hellwig sizeof(struct seq_net_private)))
410914e943dbSEric Dumazet return -ENOMEM;
411014e943dbSEric Dumazet
411114e943dbSEric Dumazet return 0;
411214e943dbSEric Dumazet }
411314e943dbSEric Dumazet
proto_exit_net(struct net * net)411414e943dbSEric Dumazet static __net_exit void proto_exit_net(struct net *net)
411514e943dbSEric Dumazet {
4116ece31ffdSGao feng remove_proc_entry("protocols", net->proc_net);
411714e943dbSEric Dumazet }
411814e943dbSEric Dumazet
411914e943dbSEric Dumazet
412014e943dbSEric Dumazet static __net_initdata struct pernet_operations proto_net_ops = {
412114e943dbSEric Dumazet .init = proto_init_net,
412214e943dbSEric Dumazet .exit = proto_exit_net,
41231da177e4SLinus Torvalds };
41241da177e4SLinus Torvalds
proto_init(void)41251da177e4SLinus Torvalds static int __init proto_init(void)
41261da177e4SLinus Torvalds {
412714e943dbSEric Dumazet return register_pernet_subsys(&proto_net_ops);
41281da177e4SLinus Torvalds }
41291da177e4SLinus Torvalds
41301da177e4SLinus Torvalds subsys_initcall(proto_init);
41311da177e4SLinus Torvalds
41321da177e4SLinus Torvalds #endif /* PROC_FS */
41337db6b048SSridhar Samudrala
41347db6b048SSridhar Samudrala #ifdef CONFIG_NET_RX_BUSY_POLL
sk_busy_loop_end(void * p,unsigned long start_time)41357db6b048SSridhar Samudrala bool sk_busy_loop_end(void *p, unsigned long start_time)
41367db6b048SSridhar Samudrala {
41377db6b048SSridhar Samudrala struct sock *sk = p;
41387db6b048SSridhar Samudrala
4139ef8ad307SEric Dumazet if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
4140ef8ad307SEric Dumazet return true;
4141ef8ad307SEric Dumazet
4142ef8ad307SEric Dumazet if (sk_is_udp(sk) &&
4143ef8ad307SEric Dumazet !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
4144ef8ad307SEric Dumazet return true;
4145ef8ad307SEric Dumazet
4146ef8ad307SEric Dumazet return sk_busy_loop_timeout(sk, start_time);
41477db6b048SSridhar Samudrala }
41487db6b048SSridhar Samudrala EXPORT_SYMBOL(sk_busy_loop_end);
41497db6b048SSridhar Samudrala #endif /* CONFIG_NET_RX_BUSY_POLL */
4150c0425a42SChristoph Hellwig
sock_bind_add(struct sock * sk,struct sockaddr * addr,int addr_len)4151c0425a42SChristoph Hellwig int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
4152c0425a42SChristoph Hellwig {
4153c0425a42SChristoph Hellwig if (!sk->sk_prot->bind_add)
4154c0425a42SChristoph Hellwig return -EOPNOTSUPP;
4155c0425a42SChristoph Hellwig return sk->sk_prot->bind_add(sk, addr, addr_len);
4156c0425a42SChristoph Hellwig }
4157c0425a42SChristoph Hellwig EXPORT_SYMBOL(sock_bind_add);
4158e1d001faSBreno Leitao
4159e1d001faSBreno Leitao /* Copy 'size' bytes from userspace and return `size` back to userspace */
sock_ioctl_inout(struct sock * sk,unsigned int cmd,void __user * arg,void * karg,size_t size)4160e1d001faSBreno Leitao int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
4161e1d001faSBreno Leitao void __user *arg, void *karg, size_t size)
4162e1d001faSBreno Leitao {
4163e1d001faSBreno Leitao int ret;
4164e1d001faSBreno Leitao
4165e1d001faSBreno Leitao if (copy_from_user(karg, arg, size))
4166e1d001faSBreno Leitao return -EFAULT;
4167e1d001faSBreno Leitao
4168e1d001faSBreno Leitao ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
4169e1d001faSBreno Leitao if (ret)
4170e1d001faSBreno Leitao return ret;
4171e1d001faSBreno Leitao
4172e1d001faSBreno Leitao if (copy_to_user(arg, karg, size))
4173e1d001faSBreno Leitao return -EFAULT;
4174e1d001faSBreno Leitao
4175e1d001faSBreno Leitao return 0;
4176e1d001faSBreno Leitao }
4177e1d001faSBreno Leitao EXPORT_SYMBOL(sock_ioctl_inout);
4178e1d001faSBreno Leitao
4179e1d001faSBreno Leitao /* This is the most common ioctl prep function, where the result (4 bytes) is
4180e1d001faSBreno Leitao * copied back to userspace if the ioctl() returns successfully. No input is
4181e1d001faSBreno Leitao * copied from userspace as input argument.
4182e1d001faSBreno Leitao */
sock_ioctl_out(struct sock * sk,unsigned int cmd,void __user * arg)4183e1d001faSBreno Leitao static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg)
4184e1d001faSBreno Leitao {
4185e1d001faSBreno Leitao int ret, karg = 0;
4186e1d001faSBreno Leitao
4187e1d001faSBreno Leitao ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
4188e1d001faSBreno Leitao if (ret)
4189e1d001faSBreno Leitao return ret;
4190e1d001faSBreno Leitao
4191e1d001faSBreno Leitao return put_user(karg, (int __user *)arg);
4192e1d001faSBreno Leitao }
4193e1d001faSBreno Leitao
4194e1d001faSBreno Leitao /* A wrapper around sock ioctls, which copies the data from userspace
4195e1d001faSBreno Leitao * (depending on the protocol/ioctl), and copies back the result to userspace.
4196e1d001faSBreno Leitao * The main motivation for this function is to pass kernel memory to the
4197e1d001faSBreno Leitao * protocol ioctl callbacks, instead of userspace memory.
4198e1d001faSBreno Leitao */
sk_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)4199e1d001faSBreno Leitao int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
4200e1d001faSBreno Leitao {
4201e1d001faSBreno Leitao int rc = 1;
4202e1d001faSBreno Leitao
4203634236b3SEric Dumazet if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET)
4204e1d001faSBreno Leitao rc = ipmr_sk_ioctl(sk, cmd, arg);
4205634236b3SEric Dumazet else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6)
4206e1d001faSBreno Leitao rc = ip6mr_sk_ioctl(sk, cmd, arg);
4207e1d001faSBreno Leitao else if (sk_is_phonet(sk))
4208e1d001faSBreno Leitao rc = phonet_sk_ioctl(sk, cmd, arg);
4209e1d001faSBreno Leitao
4210e1d001faSBreno Leitao /* If ioctl was processed, returns its value */
4211e1d001faSBreno Leitao if (rc <= 0)
4212e1d001faSBreno Leitao return rc;
4213e1d001faSBreno Leitao
4214e1d001faSBreno Leitao /* Otherwise call the default handler */
4215e1d001faSBreno Leitao return sock_ioctl_out(sk, cmd, arg);
4216e1d001faSBreno Leitao }
4217e1d001faSBreno Leitao EXPORT_SYMBOL(sk_ioctl);
4218