12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX
41da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket
51da177e4SLinus Torvalds * interface as the means of communication with the user level.
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * Generic socket support routines. Memory allocators, socket lock/release
81da177e4SLinus Torvalds * handler for protocols to use and generic option handler.
91da177e4SLinus Torvalds *
1002c30a84SJesper Juhl * Authors: Ross Biro
111da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
121da177e4SLinus Torvalds * Florian La Roche, <flla@stud.uni-sb.de>
131da177e4SLinus Torvalds * Alan Cox, <A.Cox@swansea.ac.uk>
141da177e4SLinus Torvalds *
151da177e4SLinus Torvalds * Fixes:
161da177e4SLinus Torvalds * Alan Cox : Numerous verify_area() problems
171da177e4SLinus Torvalds * Alan Cox : Connecting on a connecting socket
181da177e4SLinus Torvalds * now returns an error for tcp.
191da177e4SLinus Torvalds * Alan Cox : sock->protocol is set correctly.
201da177e4SLinus Torvalds * and is not sometimes left as 0.
211da177e4SLinus Torvalds * Alan Cox : connect handles icmp errors on a
221da177e4SLinus Torvalds * connect properly. Unfortunately there
231da177e4SLinus Torvalds * is a restart syscall nasty there. I
241da177e4SLinus Torvalds * can't match BSD without hacking the C
251da177e4SLinus Torvalds * library. Ideas urgently sought!
261da177e4SLinus Torvalds * Alan Cox : Disallow bind() to addresses that are
271da177e4SLinus Torvalds * not ours - especially broadcast ones!!
281da177e4SLinus Torvalds * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
291da177e4SLinus Torvalds * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
301da177e4SLinus Torvalds * instead they leave that for the DESTROY timer.
311da177e4SLinus Torvalds * Alan Cox : Clean up error flag in accept
321da177e4SLinus Torvalds * Alan Cox : TCP ack handling is buggy, the DESTROY timer
331da177e4SLinus Torvalds * was buggy. Put a remove_sock() in the handler
341da177e4SLinus Torvalds * for memory when we hit 0. Also altered the timer
351da177e4SLinus Torvalds * code. The ACK stuff can wait and needs major
361da177e4SLinus Torvalds * TCP layer surgery.
371da177e4SLinus Torvalds * Alan Cox : Fixed TCP ack bug, removed remove sock
381da177e4SLinus Torvalds * and fixed timer/inet_bh race.
391da177e4SLinus Torvalds * Alan Cox : Added zapped flag for TCP
401da177e4SLinus Torvalds * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
411da177e4SLinus Torvalds * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
421da177e4SLinus Torvalds * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
431da177e4SLinus Torvalds * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
441da177e4SLinus Torvalds * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
451da177e4SLinus Torvalds * Rick Sladkey : Relaxed UDP rules for matching packets.
461da177e4SLinus Torvalds * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
471da177e4SLinus Torvalds * Pauline Middelink : identd support
481da177e4SLinus Torvalds * Alan Cox : Fixed connect() taking signals I think.
491da177e4SLinus Torvalds * Alan Cox : SO_LINGER supported
501da177e4SLinus Torvalds * Alan Cox : Error reporting fixes
511da177e4SLinus Torvalds * Anonymous : inet_create tidied up (sk->reuse setting)
521da177e4SLinus Torvalds * Alan Cox : inet sockets don't set sk->type!
531da177e4SLinus Torvalds * Alan Cox : Split socket option code
541da177e4SLinus Torvalds * Alan Cox : Callbacks
551da177e4SLinus Torvalds * Alan Cox : Nagle flag for Charles & Johannes stuff
561da177e4SLinus Torvalds * Alex : Removed restriction on inet fioctl
571da177e4SLinus Torvalds * Alan Cox : Splitting INET from NET core
581da177e4SLinus Torvalds * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
591da177e4SLinus Torvalds * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
601da177e4SLinus Torvalds * Alan Cox : Split IP from generic code
611da177e4SLinus Torvalds * Alan Cox : New kfree_skbmem()
621da177e4SLinus Torvalds * Alan Cox : Make SO_DEBUG superuser only.
631da177e4SLinus Torvalds * Alan Cox : Allow anyone to clear SO_DEBUG
641da177e4SLinus Torvalds * (compatibility fix)
651da177e4SLinus Torvalds * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
661da177e4SLinus Torvalds * Alan Cox : Allocator for a socket is settable.
671da177e4SLinus Torvalds * Alan Cox : SO_ERROR includes soft errors.
681da177e4SLinus Torvalds * Alan Cox : Allow NULL arguments on some SO_ opts
691da177e4SLinus Torvalds * Alan Cox : Generic socket allocation to make hooks
701da177e4SLinus Torvalds * easier (suggested by Craig Metz).
711da177e4SLinus Torvalds * Michael Pall : SO_ERROR returns positive errno again
721da177e4SLinus Torvalds * Steve Whitehouse: Added default destructor to free
731da177e4SLinus Torvalds * protocol private data.
741da177e4SLinus Torvalds * Steve Whitehouse: Added various other default routines
751da177e4SLinus Torvalds * common to several socket families.
761da177e4SLinus Torvalds * Chris Evans : Call suser() check last on F_SETOWN
771da177e4SLinus Torvalds * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
781da177e4SLinus Torvalds * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
791da177e4SLinus Torvalds * Andi Kleen : Fix write_space callback
801da177e4SLinus Torvalds * Chris Evans : Security fixes - signedness again
811da177e4SLinus Torvalds * Arnaldo C. Melo : cleanups, use skb_queue_purge
821da177e4SLinus Torvalds *
831da177e4SLinus Torvalds * To Fix:
841da177e4SLinus Torvalds */
851da177e4SLinus Torvalds
86e005d193SJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
87e005d193SJoe Perches
8880b14deeSRichard Cochran #include <asm/unaligned.h>
894fc268d2SRandy Dunlap #include <linux/capability.h>
901da177e4SLinus Torvalds #include <linux/errno.h>
91cb820f8eSRichard Cochran #include <linux/errqueue.h>
921da177e4SLinus Torvalds #include <linux/types.h>
931da177e4SLinus Torvalds #include <linux/socket.h>
941da177e4SLinus Torvalds #include <linux/in.h>
951da177e4SLinus Torvalds #include <linux/kernel.h>
961da177e4SLinus Torvalds #include <linux/module.h>
971da177e4SLinus Torvalds #include <linux/proc_fs.h>
981da177e4SLinus Torvalds #include <linux/seq_file.h>
991da177e4SLinus Torvalds #include <linux/sched.h>
100f1083048SVlastimil Babka #include <linux/sched/mm.h>
1011da177e4SLinus Torvalds #include <linux/timer.h>
1021da177e4SLinus Torvalds #include <linux/string.h>
1031da177e4SLinus Torvalds #include <linux/sockios.h>
1041da177e4SLinus Torvalds #include <linux/net.h>
1051da177e4SLinus Torvalds #include <linux/mm.h>
1061da177e4SLinus Torvalds #include <linux/slab.h>
1071da177e4SLinus Torvalds #include <linux/interrupt.h>
1081da177e4SLinus Torvalds #include <linux/poll.h>
1091da177e4SLinus Torvalds #include <linux/tcp.h>
110ef8ad307SEric Dumazet #include <linux/udp.h>
1111da177e4SLinus Torvalds #include <linux/init.h>
112a1f8e7f7SAl Viro #include <linux/highmem.h>
1133f551f94SEric W. Biederman #include <linux/user_namespace.h>
114c5905afbSIngo Molnar #include <linux/static_key.h>
1153969eb38SDavid S. Miller #include <linux/memcontrol.h>
1168c1ae10dSDavid S. Miller #include <linux/prefetch.h>
117a6c0d093SChristoph Hellwig #include <linux/compat.h>
118e1d001faSBreno Leitao #include <linux/mroute.h>
119e1d001faSBreno Leitao #include <linux/mroute6.h>
120e1d001faSBreno Leitao #include <linux/icmpv6.h>
1211da177e4SLinus Torvalds
1227c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
1231da177e4SLinus Torvalds
1241da177e4SLinus Torvalds #include <linux/netdevice.h>
1251da177e4SLinus Torvalds #include <net/protocol.h>
1261da177e4SLinus Torvalds #include <linux/skbuff.h>
127457c4cbcSEric W. Biederman #include <net/net_namespace.h>
1282e6599cbSArnaldo Carvalho de Melo #include <net/request_sock.h>
1291da177e4SLinus Torvalds #include <net/sock.h>
13020d49473SPatrick Ohly #include <linux/net_tstamp.h>
1311da177e4SLinus Torvalds #include <net/xfrm.h>
1321da177e4SLinus Torvalds #include <linux/ipsec.h>
133f8451725SHerbert Xu #include <net/cls_cgroup.h>
1345bc1421eSNeil Horman #include <net/netprio_cgroup.h>
135eb4cb008SCraig Gallek #include <linux/sock_diag.h>
1361da177e4SLinus Torvalds
1371da177e4SLinus Torvalds #include <linux/filter.h>
138538950a1SCraig Gallek #include <net/sock_reuseport.h>
1396ac99e8fSMartin KaFai Lau #include <net/bpf_sk_storage.h>
1401da177e4SLinus Torvalds
1413847ce32SSatoru Moriya #include <trace/events/sock.h>
1423847ce32SSatoru Moriya
1431da177e4SLinus Torvalds #include <net/tcp.h>
144076bb0c8SEliezer Tamir #include <net/busy_poll.h>
145e1d001faSBreno Leitao #include <net/phonet/phonet.h>
14606021292SEliezer Tamir
147d463126eSYangbo Lu #include <linux/ethtool.h>
148d463126eSYangbo Lu
1496264f58cSJakub Kicinski #include "dev.h"
1506264f58cSJakub Kicinski
15136b77a52SGlauber Costa static DEFINE_MUTEX(proto_list_mutex);
152d1a4c0b3SGlauber Costa static LIST_HEAD(proto_list);
153d1a4c0b3SGlauber Costa
1540a8afd9fSPavel Begunkov static void sock_def_write_space_wfree(struct sock *sk);
155052ada09SPavel Begunkov static void sock_def_write_space(struct sock *sk);
156052ada09SPavel Begunkov
157a3b299daSEric W. Biederman /**
158a3b299daSEric W. Biederman * sk_ns_capable - General socket capability test
159a3b299daSEric W. Biederman * @sk: Socket to use a capability on or through
160a3b299daSEric W. Biederman * @user_ns: The user namespace of the capability to use
161a3b299daSEric W. Biederman * @cap: The capability to use
162a3b299daSEric W. Biederman *
163a3b299daSEric W. Biederman * Test to see if the opener of the socket had when the socket was
164a3b299daSEric W. Biederman * created and the current process has the capability @cap in the user
165a3b299daSEric W. Biederman * namespace @user_ns.
166a3b299daSEric W. Biederman */
sk_ns_capable(const struct sock * sk,struct user_namespace * user_ns,int cap)167a3b299daSEric W. Biederman bool sk_ns_capable(const struct sock *sk,
168a3b299daSEric W. Biederman struct user_namespace *user_ns, int cap)
169a3b299daSEric W. Biederman {
170a3b299daSEric W. Biederman return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
171a3b299daSEric W. Biederman ns_capable(user_ns, cap);
172a3b299daSEric W. Biederman }
173a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_ns_capable);
174a3b299daSEric W. Biederman
175a3b299daSEric W. Biederman /**
176a3b299daSEric W. Biederman * sk_capable - Socket global capability test
177a3b299daSEric W. Biederman * @sk: Socket to use a capability on or through
178e793c0f7SMasanari Iida * @cap: The global capability to use
179a3b299daSEric W. Biederman *
180a3b299daSEric W. Biederman * Test to see if the opener of the socket had when the socket was
181a3b299daSEric W. Biederman * created and the current process has the capability @cap in all user
182a3b299daSEric W. Biederman * namespaces.
183a3b299daSEric W. Biederman */
sk_capable(const struct sock * sk,int cap)184a3b299daSEric W. Biederman bool sk_capable(const struct sock *sk, int cap)
185a3b299daSEric W. Biederman {
186a3b299daSEric W. Biederman return sk_ns_capable(sk, &init_user_ns, cap);
187a3b299daSEric W. Biederman }
188a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_capable);
189a3b299daSEric W. Biederman
190a3b299daSEric W. Biederman /**
191a3b299daSEric W. Biederman * sk_net_capable - Network namespace socket capability test
192a3b299daSEric W. Biederman * @sk: Socket to use a capability on or through
193a3b299daSEric W. Biederman * @cap: The capability to use
194a3b299daSEric W. Biederman *
195e793c0f7SMasanari Iida * Test to see if the opener of the socket had when the socket was created
196a3b299daSEric W. Biederman * and the current process has the capability @cap over the network namespace
197a3b299daSEric W. Biederman * the socket is a member of.
198a3b299daSEric W. Biederman */
sk_net_capable(const struct sock * sk,int cap)199a3b299daSEric W. Biederman bool sk_net_capable(const struct sock *sk, int cap)
200a3b299daSEric W. Biederman {
201a3b299daSEric W. Biederman return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
202a3b299daSEric W. Biederman }
203a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_net_capable);
204a3b299daSEric W. Biederman
205da21f24dSIngo Molnar /*
206da21f24dSIngo Molnar * Each address family might have different locking rules, so we have
207cdfbabfbSDavid Howells * one slock key per address family and separate keys for internal and
208cdfbabfbSDavid Howells * userspace sockets.
209da21f24dSIngo Molnar */
210a5b5bb9aSIngo Molnar static struct lock_class_key af_family_keys[AF_MAX];
211cdfbabfbSDavid Howells static struct lock_class_key af_family_kern_keys[AF_MAX];
212a5b5bb9aSIngo Molnar static struct lock_class_key af_family_slock_keys[AF_MAX];
213cdfbabfbSDavid Howells static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
214a5b5bb9aSIngo Molnar
215a5b5bb9aSIngo Molnar /*
216a5b5bb9aSIngo Molnar * Make lock validator output more readable. (we pre-construct these
217a5b5bb9aSIngo Molnar * strings build-time, so that runtime initialization of socket
218a5b5bb9aSIngo Molnar * locks is fast):
219a5b5bb9aSIngo Molnar */
220cdfbabfbSDavid Howells
221cdfbabfbSDavid Howells #define _sock_locks(x) \
222cdfbabfbSDavid Howells x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \
223cdfbabfbSDavid Howells x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \
224cdfbabfbSDavid Howells x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \
225cdfbabfbSDavid Howells x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \
226cdfbabfbSDavid Howells x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \
227cdfbabfbSDavid Howells x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \
228cdfbabfbSDavid Howells x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \
229cdfbabfbSDavid Howells x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \
230cdfbabfbSDavid Howells x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \
231cdfbabfbSDavid Howells x "27" , x "28" , x "AF_CAN" , \
232cdfbabfbSDavid Howells x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \
233cdfbabfbSDavid Howells x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \
234cdfbabfbSDavid Howells x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
235cdfbabfbSDavid Howells x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
23668e8b849SBjörn Töpel x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \
237bc49d816SJeremy Kerr x "AF_MCTP" , \
23868e8b849SBjörn Töpel x "AF_MAX"
239cdfbabfbSDavid Howells
24036cbd3dcSJan Engelhardt static const char *const af_family_key_strings[AF_MAX+1] = {
241cdfbabfbSDavid Howells _sock_locks("sk_lock-")
242a5b5bb9aSIngo Molnar };
24336cbd3dcSJan Engelhardt static const char *const af_family_slock_key_strings[AF_MAX+1] = {
244cdfbabfbSDavid Howells _sock_locks("slock-")
245a5b5bb9aSIngo Molnar };
24636cbd3dcSJan Engelhardt static const char *const af_family_clock_key_strings[AF_MAX+1] = {
247cdfbabfbSDavid Howells _sock_locks("clock-")
248cdfbabfbSDavid Howells };
249cdfbabfbSDavid Howells
250cdfbabfbSDavid Howells static const char *const af_family_kern_key_strings[AF_MAX+1] = {
251cdfbabfbSDavid Howells _sock_locks("k-sk_lock-")
252cdfbabfbSDavid Howells };
253cdfbabfbSDavid Howells static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
254cdfbabfbSDavid Howells _sock_locks("k-slock-")
255cdfbabfbSDavid Howells };
256cdfbabfbSDavid Howells static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
257cdfbabfbSDavid Howells _sock_locks("k-clock-")
258443aef0eSPeter Zijlstra };
259581319c5SPaolo Abeni static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
2606b431d50SMatthieu Baerts _sock_locks("rlock-")
261581319c5SPaolo Abeni };
262581319c5SPaolo Abeni static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
2636b431d50SMatthieu Baerts _sock_locks("wlock-")
264581319c5SPaolo Abeni };
265581319c5SPaolo Abeni static const char *const af_family_elock_key_strings[AF_MAX+1] = {
2666b431d50SMatthieu Baerts _sock_locks("elock-")
267581319c5SPaolo Abeni };
268da21f24dSIngo Molnar
269da21f24dSIngo Molnar /*
270581319c5SPaolo Abeni * sk_callback_lock and sk queues locking rules are per-address-family,
271da21f24dSIngo Molnar * so split the lock classes by using a per-AF key:
272da21f24dSIngo Molnar */
273da21f24dSIngo Molnar static struct lock_class_key af_callback_keys[AF_MAX];
274581319c5SPaolo Abeni static struct lock_class_key af_rlock_keys[AF_MAX];
275581319c5SPaolo Abeni static struct lock_class_key af_wlock_keys[AF_MAX];
276581319c5SPaolo Abeni static struct lock_class_key af_elock_keys[AF_MAX];
277cdfbabfbSDavid Howells static struct lock_class_key af_kern_callback_keys[AF_MAX];
278da21f24dSIngo Molnar
2791da177e4SLinus Torvalds /* Run time adjustable parameters. */
280ab32ea5dSBrian Haley __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
2816d8ebc8aSHans Schillstrom EXPORT_SYMBOL(sysctl_wmem_max);
282ab32ea5dSBrian Haley __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
2836d8ebc8aSHans Schillstrom EXPORT_SYMBOL(sysctl_rmem_max);
284ab32ea5dSBrian Haley __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
285ab32ea5dSBrian Haley __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
286fe1e8381SAdam Li int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
2871da177e4SLinus Torvalds
28825985edcSLucas De Marchi /* Maximal space eaten by iovec or ancillary data plus some space */
289ab32ea5dSBrian Haley int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
2902a91525cSEric Dumazet EXPORT_SYMBOL(sysctl_optmem_max);
2911da177e4SLinus Torvalds
292b245be1fSWillem de Bruijn int sysctl_tstamp_allow_data __read_mostly = 1;
293b245be1fSWillem de Bruijn
294a7950ae8SDavidlohr Bueso DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
295a7950ae8SDavidlohr Bueso EXPORT_SYMBOL_GPL(memalloc_socks_key);
296c93bdd0eSMel Gorman
2977cb02404SMel Gorman /**
2987cb02404SMel Gorman * sk_set_memalloc - sets %SOCK_MEMALLOC
2997cb02404SMel Gorman * @sk: socket to set it on
3007cb02404SMel Gorman *
3017cb02404SMel Gorman * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
3027cb02404SMel Gorman * It's the responsibility of the admin to adjust min_free_kbytes
3037cb02404SMel Gorman * to meet the requirements
3047cb02404SMel Gorman */
sk_set_memalloc(struct sock * sk)3057cb02404SMel Gorman void sk_set_memalloc(struct sock *sk)
3067cb02404SMel Gorman {
3077cb02404SMel Gorman sock_set_flag(sk, SOCK_MEMALLOC);
3087cb02404SMel Gorman sk->sk_allocation |= __GFP_MEMALLOC;
309a7950ae8SDavidlohr Bueso static_branch_inc(&memalloc_socks_key);
3107cb02404SMel Gorman }
3117cb02404SMel Gorman EXPORT_SYMBOL_GPL(sk_set_memalloc);
3127cb02404SMel Gorman
sk_clear_memalloc(struct sock * sk)3137cb02404SMel Gorman void sk_clear_memalloc(struct sock *sk)
3147cb02404SMel Gorman {
3157cb02404SMel Gorman sock_reset_flag(sk, SOCK_MEMALLOC);
3167cb02404SMel Gorman sk->sk_allocation &= ~__GFP_MEMALLOC;
317a7950ae8SDavidlohr Bueso static_branch_dec(&memalloc_socks_key);
318c76562b6SMel Gorman
319c76562b6SMel Gorman /*
320c76562b6SMel Gorman * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
3215d753610SMel Gorman * progress of swapping. SOCK_MEMALLOC may be cleared while
3225d753610SMel Gorman * it has rmem allocations due to the last swapfile being deactivated
3235d753610SMel Gorman * but there is a risk that the socket is unusable due to exceeding
3245d753610SMel Gorman * the rmem limits. Reclaim the reserves and obey rmem limits again.
325c76562b6SMel Gorman */
326c76562b6SMel Gorman sk_mem_reclaim(sk);
3277cb02404SMel Gorman }
3287cb02404SMel Gorman EXPORT_SYMBOL_GPL(sk_clear_memalloc);
3297cb02404SMel Gorman
__sk_backlog_rcv(struct sock * sk,struct sk_buff * skb)330b4b9e355SMel Gorman int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
331b4b9e355SMel Gorman {
332b4b9e355SMel Gorman int ret;
333f1083048SVlastimil Babka unsigned int noreclaim_flag;
334b4b9e355SMel Gorman
335b4b9e355SMel Gorman /* these should have been dropped before queueing */
336b4b9e355SMel Gorman BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
337b4b9e355SMel Gorman
338f1083048SVlastimil Babka noreclaim_flag = memalloc_noreclaim_save();
339d2489c7bSEric Dumazet ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv,
340d2489c7bSEric Dumazet tcp_v6_do_rcv,
341d2489c7bSEric Dumazet tcp_v4_do_rcv,
342d2489c7bSEric Dumazet sk, skb);
343f1083048SVlastimil Babka memalloc_noreclaim_restore(noreclaim_flag);
344b4b9e355SMel Gorman
345b4b9e355SMel Gorman return ret;
346b4b9e355SMel Gorman }
347b4b9e355SMel Gorman EXPORT_SYMBOL(__sk_backlog_rcv);
348b4b9e355SMel Gorman
sk_error_report(struct sock * sk)349e3ae2365SAlexander Aring void sk_error_report(struct sock *sk)
350e3ae2365SAlexander Aring {
351e3ae2365SAlexander Aring sk->sk_error_report(sk);
352e6a3e443SAlexander Aring
353e6a3e443SAlexander Aring switch (sk->sk_family) {
354e6a3e443SAlexander Aring case AF_INET:
355e6a3e443SAlexander Aring fallthrough;
356e6a3e443SAlexander Aring case AF_INET6:
357e6a3e443SAlexander Aring trace_inet_sk_error_report(sk);
358e6a3e443SAlexander Aring break;
359e6a3e443SAlexander Aring default:
360e6a3e443SAlexander Aring break;
361e6a3e443SAlexander Aring }
362e3ae2365SAlexander Aring }
363e3ae2365SAlexander Aring EXPORT_SYMBOL(sk_error_report);
364e3ae2365SAlexander Aring
sock_get_timeout(long timeo,void * optval,bool old_timeval)3654c1e34c0SRichard Palethorpe int sock_get_timeout(long timeo, void *optval, bool old_timeval)
366fe0c72f3SArnd Bergmann {
367a9beb86aSDeepa Dinamani struct __kernel_sock_timeval tv;
368fe0c72f3SArnd Bergmann
369fe0c72f3SArnd Bergmann if (timeo == MAX_SCHEDULE_TIMEOUT) {
370fe0c72f3SArnd Bergmann tv.tv_sec = 0;
371fe0c72f3SArnd Bergmann tv.tv_usec = 0;
372fe0c72f3SArnd Bergmann } else {
373fe0c72f3SArnd Bergmann tv.tv_sec = timeo / HZ;
374fe0c72f3SArnd Bergmann tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
375fe0c72f3SArnd Bergmann }
376fe0c72f3SArnd Bergmann
377e6986423SArnd Bergmann if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
378fe0c72f3SArnd Bergmann struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
379fe0c72f3SArnd Bergmann *(struct old_timeval32 *)optval = tv32;
380fe0c72f3SArnd Bergmann return sizeof(tv32);
381fe0c72f3SArnd Bergmann }
382fe0c72f3SArnd Bergmann
383a9beb86aSDeepa Dinamani if (old_timeval) {
384a9beb86aSDeepa Dinamani struct __kernel_old_timeval old_tv;
385a9beb86aSDeepa Dinamani old_tv.tv_sec = tv.tv_sec;
386a9beb86aSDeepa Dinamani old_tv.tv_usec = tv.tv_usec;
387a9beb86aSDeepa Dinamani *(struct __kernel_old_timeval *)optval = old_tv;
38828e72b26SVito Caputo return sizeof(old_tv);
389fe0c72f3SArnd Bergmann }
390fe0c72f3SArnd Bergmann
39128e72b26SVito Caputo *(struct __kernel_sock_timeval *)optval = tv;
39228e72b26SVito Caputo return sizeof(tv);
393a9beb86aSDeepa Dinamani }
3944c1e34c0SRichard Palethorpe EXPORT_SYMBOL(sock_get_timeout);
395a9beb86aSDeepa Dinamani
sock_copy_user_timeval(struct __kernel_sock_timeval * tv,sockptr_t optval,int optlen,bool old_timeval)3964c1e34c0SRichard Palethorpe int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
3974c1e34c0SRichard Palethorpe sockptr_t optval, int optlen, bool old_timeval)
3981da177e4SLinus Torvalds {
399e6986423SArnd Bergmann if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
400fe0c72f3SArnd Bergmann struct old_timeval32 tv32;
401fe0c72f3SArnd Bergmann
402fe0c72f3SArnd Bergmann if (optlen < sizeof(tv32))
403fe0c72f3SArnd Bergmann return -EINVAL;
404fe0c72f3SArnd Bergmann
405c34645acSChristoph Hellwig if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
406fe0c72f3SArnd Bergmann return -EFAULT;
4074c1e34c0SRichard Palethorpe tv->tv_sec = tv32.tv_sec;
4084c1e34c0SRichard Palethorpe tv->tv_usec = tv32.tv_usec;
409a9beb86aSDeepa Dinamani } else if (old_timeval) {
410a9beb86aSDeepa Dinamani struct __kernel_old_timeval old_tv;
411a9beb86aSDeepa Dinamani
412a9beb86aSDeepa Dinamani if (optlen < sizeof(old_tv))
413a9beb86aSDeepa Dinamani return -EINVAL;
414c34645acSChristoph Hellwig if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
415a9beb86aSDeepa Dinamani return -EFAULT;
4164c1e34c0SRichard Palethorpe tv->tv_sec = old_tv.tv_sec;
4174c1e34c0SRichard Palethorpe tv->tv_usec = old_tv.tv_usec;
418fe0c72f3SArnd Bergmann } else {
4194c1e34c0SRichard Palethorpe if (optlen < sizeof(*tv))
4201da177e4SLinus Torvalds return -EINVAL;
4214c1e34c0SRichard Palethorpe if (copy_from_sockptr(tv, optval, sizeof(*tv)))
4221da177e4SLinus Torvalds return -EFAULT;
423fe0c72f3SArnd Bergmann }
4244c1e34c0SRichard Palethorpe
4254c1e34c0SRichard Palethorpe return 0;
4264c1e34c0SRichard Palethorpe }
4274c1e34c0SRichard Palethorpe EXPORT_SYMBOL(sock_copy_user_timeval);
4284c1e34c0SRichard Palethorpe
sock_set_timeout(long * timeo_p,sockptr_t optval,int optlen,bool old_timeval)4294c1e34c0SRichard Palethorpe static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
4304c1e34c0SRichard Palethorpe bool old_timeval)
4314c1e34c0SRichard Palethorpe {
4324c1e34c0SRichard Palethorpe struct __kernel_sock_timeval tv;
4334c1e34c0SRichard Palethorpe int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
434285975ddSEric Dumazet long val;
4354c1e34c0SRichard Palethorpe
4364c1e34c0SRichard Palethorpe if (err)
4374c1e34c0SRichard Palethorpe return err;
4384c1e34c0SRichard Palethorpe
439ba78073eSVasily Averin if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
440ba78073eSVasily Averin return -EDOM;
4411da177e4SLinus Torvalds
442ba78073eSVasily Averin if (tv.tv_sec < 0) {
4436f11df83SAndrew Morton static int warned __read_mostly;
4446f11df83SAndrew Morton
445285975ddSEric Dumazet WRITE_ONCE(*timeo_p, 0);
44650aab54fSIlpo Järvinen if (warned < 10 && net_ratelimit()) {
447ba78073eSVasily Averin warned++;
448e005d193SJoe Perches pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
449e005d193SJoe Perches __func__, current->comm, task_pid_nr(current));
45050aab54fSIlpo Järvinen }
451ba78073eSVasily Averin return 0;
452ba78073eSVasily Averin }
453285975ddSEric Dumazet val = MAX_SCHEDULE_TIMEOUT;
454285975ddSEric Dumazet if ((tv.tv_sec || tv.tv_usec) &&
455285975ddSEric Dumazet (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
456285975ddSEric Dumazet val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
457285975ddSEric Dumazet USEC_PER_SEC / HZ);
458285975ddSEric Dumazet WRITE_ONCE(*timeo_p, val);
4591da177e4SLinus Torvalds return 0;
4601da177e4SLinus Torvalds }
4611da177e4SLinus Torvalds
sock_needs_netstamp(const struct sock * sk)462080a270fSHannes Frederic Sowa static bool sock_needs_netstamp(const struct sock *sk)
463080a270fSHannes Frederic Sowa {
464080a270fSHannes Frederic Sowa switch (sk->sk_family) {
465080a270fSHannes Frederic Sowa case AF_UNSPEC:
466080a270fSHannes Frederic Sowa case AF_UNIX:
467080a270fSHannes Frederic Sowa return false;
468080a270fSHannes Frederic Sowa default:
469080a270fSHannes Frederic Sowa return true;
470080a270fSHannes Frederic Sowa }
471080a270fSHannes Frederic Sowa }
472080a270fSHannes Frederic Sowa
sock_disable_timestamp(struct sock * sk,unsigned long flags)47308e29af3SEric Dumazet static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
4741da177e4SLinus Torvalds {
47508e29af3SEric Dumazet if (sk->sk_flags & flags) {
47608e29af3SEric Dumazet sk->sk_flags &= ~flags;
477080a270fSHannes Frederic Sowa if (sock_needs_netstamp(sk) &&
478080a270fSHannes Frederic Sowa !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
4791da177e4SLinus Torvalds net_disable_timestamp();
4801da177e4SLinus Torvalds }
4811da177e4SLinus Torvalds }
4821da177e4SLinus Torvalds
4831da177e4SLinus Torvalds
__sock_queue_rcv_skb(struct sock * sk,struct sk_buff * skb)484e6afc8acSsamanthakumar int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
485f0088a50SDenis Vlasenko {
4863b885787SNeil Horman unsigned long flags;
4873b885787SNeil Horman struct sk_buff_head *list = &sk->sk_receive_queue;
488f0088a50SDenis Vlasenko
489845d1799Slinke li if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
490766e9037SEric Dumazet atomic_inc(&sk->sk_drops);
4913847ce32SSatoru Moriya trace_sock_rcvqueue_full(sk, skb);
492766e9037SEric Dumazet return -ENOMEM;
493f0088a50SDenis Vlasenko }
494f0088a50SDenis Vlasenko
495c76562b6SMel Gorman if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
496766e9037SEric Dumazet atomic_inc(&sk->sk_drops);
497766e9037SEric Dumazet return -ENOBUFS;
4983ab224beSHideo Aoki }
4993ab224beSHideo Aoki
500f0088a50SDenis Vlasenko skb->dev = NULL;
501f0088a50SDenis Vlasenko skb_set_owner_r(skb, sk);
50249ad9599SDavid S. Miller
5037fee226aSEric Dumazet /* we escape from rcu protected region, make sure we dont leak
5047fee226aSEric Dumazet * a norefcounted dst
5057fee226aSEric Dumazet */
5067fee226aSEric Dumazet skb_dst_force(skb);
5077fee226aSEric Dumazet
5083b885787SNeil Horman spin_lock_irqsave(&list->lock, flags);
5093bc3b96fSEyal Birger sock_skb_set_dropcount(sk, skb);
5103b885787SNeil Horman __skb_queue_tail(list, skb);
5113b885787SNeil Horman spin_unlock_irqrestore(&list->lock, flags);
512f0088a50SDenis Vlasenko
513f0088a50SDenis Vlasenko if (!sock_flag(sk, SOCK_DEAD))
514676d2369SDavid S. Miller sk->sk_data_ready(sk);
515766e9037SEric Dumazet return 0;
516f0088a50SDenis Vlasenko }
517e6afc8acSsamanthakumar EXPORT_SYMBOL(__sock_queue_rcv_skb);
518e6afc8acSsamanthakumar
sock_queue_rcv_skb_reason(struct sock * sk,struct sk_buff * skb,enum skb_drop_reason * reason)519c1b8a567SMenglong Dong int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
520c1b8a567SMenglong Dong enum skb_drop_reason *reason)
521e6afc8acSsamanthakumar {
522c1b8a567SMenglong Dong enum skb_drop_reason drop_reason;
523e6afc8acSsamanthakumar int err;
524e6afc8acSsamanthakumar
525e6afc8acSsamanthakumar err = sk_filter(sk, skb);
526c1b8a567SMenglong Dong if (err) {
527c1b8a567SMenglong Dong drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
528c1b8a567SMenglong Dong goto out;
529e6afc8acSsamanthakumar }
530c1b8a567SMenglong Dong err = __sock_queue_rcv_skb(sk, skb);
531c1b8a567SMenglong Dong switch (err) {
532c1b8a567SMenglong Dong case -ENOMEM:
533c1b8a567SMenglong Dong drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
534c1b8a567SMenglong Dong break;
535c1b8a567SMenglong Dong case -ENOBUFS:
536c1b8a567SMenglong Dong drop_reason = SKB_DROP_REASON_PROTO_MEM;
537c1b8a567SMenglong Dong break;
538c1b8a567SMenglong Dong default:
539c1b8a567SMenglong Dong drop_reason = SKB_NOT_DROPPED_YET;
540c1b8a567SMenglong Dong break;
541c1b8a567SMenglong Dong }
542c1b8a567SMenglong Dong out:
543c1b8a567SMenglong Dong if (reason)
544c1b8a567SMenglong Dong *reason = drop_reason;
545c1b8a567SMenglong Dong return err;
546c1b8a567SMenglong Dong }
547c1b8a567SMenglong Dong EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
548f0088a50SDenis Vlasenko
__sk_receive_skb(struct sock * sk,struct sk_buff * skb,const int nested,unsigned int trim_cap,bool refcounted)5494f0c40d9SWillem de Bruijn int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
550c3f24cfbSEric Dumazet const int nested, unsigned int trim_cap, bool refcounted)
551f0088a50SDenis Vlasenko {
552f0088a50SDenis Vlasenko int rc = NET_RX_SUCCESS;
553f0088a50SDenis Vlasenko
5544f0c40d9SWillem de Bruijn if (sk_filter_trim_cap(sk, skb, trim_cap))
555f0088a50SDenis Vlasenko goto discard_and_relse;
556f0088a50SDenis Vlasenko
557f0088a50SDenis Vlasenko skb->dev = NULL;
558f0088a50SDenis Vlasenko
559845d1799Slinke li if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
560c377411fSEric Dumazet atomic_inc(&sk->sk_drops);
561c377411fSEric Dumazet goto discard_and_relse;
562c377411fSEric Dumazet }
56358a5a7b9SArnaldo Carvalho de Melo if (nested)
56458a5a7b9SArnaldo Carvalho de Melo bh_lock_sock_nested(sk);
56558a5a7b9SArnaldo Carvalho de Melo else
566f0088a50SDenis Vlasenko bh_lock_sock(sk);
567a5b5bb9aSIngo Molnar if (!sock_owned_by_user(sk)) {
568a5b5bb9aSIngo Molnar /*
569a5b5bb9aSIngo Molnar * trylock + unlock semantics:
570a5b5bb9aSIngo Molnar */
571a5b5bb9aSIngo Molnar mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
572a5b5bb9aSIngo Molnar
573c57943a1SPeter Zijlstra rc = sk_backlog_rcv(sk, skb);
574a5b5bb9aSIngo Molnar
5755facae4fSQian Cai mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
5768265792bSEric Dumazet } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
5778eae939fSZhu Yi bh_unlock_sock(sk);
5788eae939fSZhu Yi atomic_inc(&sk->sk_drops);
5798eae939fSZhu Yi goto discard_and_relse;
5808eae939fSZhu Yi }
5818eae939fSZhu Yi
582f0088a50SDenis Vlasenko bh_unlock_sock(sk);
583f0088a50SDenis Vlasenko out:
584c3f24cfbSEric Dumazet if (refcounted)
585f0088a50SDenis Vlasenko sock_put(sk);
586f0088a50SDenis Vlasenko return rc;
587f0088a50SDenis Vlasenko discard_and_relse:
588f0088a50SDenis Vlasenko kfree_skb(skb);
589f0088a50SDenis Vlasenko goto out;
590f0088a50SDenis Vlasenko }
5914f0c40d9SWillem de Bruijn EXPORT_SYMBOL(__sk_receive_skb);
592f0088a50SDenis Vlasenko
593bbd807dfSBrian Vazquez INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
594bbd807dfSBrian Vazquez u32));
595bbd807dfSBrian Vazquez INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
596bbd807dfSBrian Vazquez u32));
__sk_dst_check(struct sock * sk,u32 cookie)597f0088a50SDenis Vlasenko struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
598f0088a50SDenis Vlasenko {
599b6c6712aSEric Dumazet struct dst_entry *dst = __sk_dst_get(sk);
600f0088a50SDenis Vlasenko
601bbd807dfSBrian Vazquez if (dst && dst->obsolete &&
602bbd807dfSBrian Vazquez INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
603bbd807dfSBrian Vazquez dst, cookie) == NULL) {
604e022f0b4SKrishna Kumar sk_tx_queue_clear(sk);
60587324a50SEric Dumazet WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
606a9b3cd7fSStephen Hemminger RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
607f0088a50SDenis Vlasenko dst_release(dst);
608f0088a50SDenis Vlasenko return NULL;
609f0088a50SDenis Vlasenko }
610f0088a50SDenis Vlasenko
611f0088a50SDenis Vlasenko return dst;
612f0088a50SDenis Vlasenko }
613f0088a50SDenis Vlasenko EXPORT_SYMBOL(__sk_dst_check);
614f0088a50SDenis Vlasenko
sk_dst_check(struct sock * sk,u32 cookie)615f0088a50SDenis Vlasenko struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
616f0088a50SDenis Vlasenko {
617f0088a50SDenis Vlasenko struct dst_entry *dst = sk_dst_get(sk);
618f0088a50SDenis Vlasenko
619bbd807dfSBrian Vazquez if (dst && dst->obsolete &&
620bbd807dfSBrian Vazquez INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
621bbd807dfSBrian Vazquez dst, cookie) == NULL) {
622f0088a50SDenis Vlasenko sk_dst_reset(sk);
623f0088a50SDenis Vlasenko dst_release(dst);
624f0088a50SDenis Vlasenko return NULL;
625f0088a50SDenis Vlasenko }
626f0088a50SDenis Vlasenko
627f0088a50SDenis Vlasenko return dst;
628f0088a50SDenis Vlasenko }
629f0088a50SDenis Vlasenko EXPORT_SYMBOL(sk_dst_check);
630f0088a50SDenis Vlasenko
sock_bindtoindex_locked(struct sock * sk,int ifindex)6317594888cSChristoph Hellwig static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
632f5dd3d0cSDavid Herrmann {
633f5dd3d0cSDavid Herrmann int ret = -ENOPROTOOPT;
634f5dd3d0cSDavid Herrmann #ifdef CONFIG_NETDEVICES
635f5dd3d0cSDavid Herrmann struct net *net = sock_net(sk);
636f5dd3d0cSDavid Herrmann
637f5dd3d0cSDavid Herrmann /* Sorry... */
638f5dd3d0cSDavid Herrmann ret = -EPERM;
639c427bfecSVincent Bernat if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
640f5dd3d0cSDavid Herrmann goto out;
641f5dd3d0cSDavid Herrmann
642f5dd3d0cSDavid Herrmann ret = -EINVAL;
643f5dd3d0cSDavid Herrmann if (ifindex < 0)
644f5dd3d0cSDavid Herrmann goto out;
645f5dd3d0cSDavid Herrmann
646e5fccaa1SEric Dumazet /* Paired with all READ_ONCE() done locklessly. */
647e5fccaa1SEric Dumazet WRITE_ONCE(sk->sk_bound_dev_if, ifindex);
648e5fccaa1SEric Dumazet
649f5dd3d0cSDavid Herrmann if (sk->sk_prot->rehash)
650f5dd3d0cSDavid Herrmann sk->sk_prot->rehash(sk);
651f5dd3d0cSDavid Herrmann sk_dst_reset(sk);
652f5dd3d0cSDavid Herrmann
653f5dd3d0cSDavid Herrmann ret = 0;
654f5dd3d0cSDavid Herrmann
655f5dd3d0cSDavid Herrmann out:
656f5dd3d0cSDavid Herrmann #endif
657f5dd3d0cSDavid Herrmann
658f5dd3d0cSDavid Herrmann return ret;
659f5dd3d0cSDavid Herrmann }
660f5dd3d0cSDavid Herrmann
sock_bindtoindex(struct sock * sk,int ifindex,bool lock_sk)6618ea204c2SFerenc Fejes int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
6627594888cSChristoph Hellwig {
6637594888cSChristoph Hellwig int ret;
6647594888cSChristoph Hellwig
6658ea204c2SFerenc Fejes if (lock_sk)
6667594888cSChristoph Hellwig lock_sock(sk);
6677594888cSChristoph Hellwig ret = sock_bindtoindex_locked(sk, ifindex);
6688ea204c2SFerenc Fejes if (lock_sk)
6697594888cSChristoph Hellwig release_sock(sk);
6707594888cSChristoph Hellwig
6717594888cSChristoph Hellwig return ret;
6727594888cSChristoph Hellwig }
6737594888cSChristoph Hellwig EXPORT_SYMBOL(sock_bindtoindex);
6747594888cSChristoph Hellwig
sock_setbindtodevice(struct sock * sk,sockptr_t optval,int optlen)6755790642bSChristoph Hellwig static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
6764878809fSDavid S. Miller {
6774878809fSDavid S. Miller int ret = -ENOPROTOOPT;
6784878809fSDavid S. Miller #ifdef CONFIG_NETDEVICES
6793b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk);
6804878809fSDavid S. Miller char devname[IFNAMSIZ];
6814878809fSDavid S. Miller int index;
6824878809fSDavid S. Miller
6834878809fSDavid S. Miller ret = -EINVAL;
6844878809fSDavid S. Miller if (optlen < 0)
6854878809fSDavid S. Miller goto out;
6864878809fSDavid S. Miller
6874878809fSDavid S. Miller /* Bind this socket to a particular device like "eth0",
6884878809fSDavid S. Miller * as specified in the passed interface name. If the
6894878809fSDavid S. Miller * name is "" or the option length is zero the socket
6904878809fSDavid S. Miller * is not bound.
6914878809fSDavid S. Miller */
6924878809fSDavid S. Miller if (optlen > IFNAMSIZ - 1)
6934878809fSDavid S. Miller optlen = IFNAMSIZ - 1;
6944878809fSDavid S. Miller memset(devname, 0, sizeof(devname));
6954878809fSDavid S. Miller
6964878809fSDavid S. Miller ret = -EFAULT;
6975790642bSChristoph Hellwig if (copy_from_sockptr(devname, optval, optlen))
6984878809fSDavid S. Miller goto out;
6994878809fSDavid S. Miller
7004878809fSDavid S. Miller index = 0;
701000ba2e4SDavid S. Miller if (devname[0] != '\0') {
702bf8e56bfSEric Dumazet struct net_device *dev;
7034878809fSDavid S. Miller
704bf8e56bfSEric Dumazet rcu_read_lock();
705bf8e56bfSEric Dumazet dev = dev_get_by_name_rcu(net, devname);
706bf8e56bfSEric Dumazet if (dev)
707bf8e56bfSEric Dumazet index = dev->ifindex;
708bf8e56bfSEric Dumazet rcu_read_unlock();
7094878809fSDavid S. Miller ret = -ENODEV;
7104878809fSDavid S. Miller if (!dev)
7114878809fSDavid S. Miller goto out;
7124878809fSDavid S. Miller }
7134878809fSDavid S. Miller
71424426654SMartin KaFai Lau sockopt_lock_sock(sk);
71524426654SMartin KaFai Lau ret = sock_bindtoindex_locked(sk, index);
71624426654SMartin KaFai Lau sockopt_release_sock(sk);
7174878809fSDavid S. Miller out:
7184878809fSDavid S. Miller #endif
7194878809fSDavid S. Miller
7204878809fSDavid S. Miller return ret;
7214878809fSDavid S. Miller }
7224878809fSDavid S. Miller
sock_getbindtodevice(struct sock * sk,sockptr_t optval,sockptr_t optlen,int len)7234ff09db1SMartin KaFai Lau static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
7244ff09db1SMartin KaFai Lau sockptr_t optlen, int len)
725c91f6df2SBrian Haley {
726c91f6df2SBrian Haley int ret = -ENOPROTOOPT;
727c91f6df2SBrian Haley #ifdef CONFIG_NETDEVICES
728e5fccaa1SEric Dumazet int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
729c91f6df2SBrian Haley struct net *net = sock_net(sk);
730c91f6df2SBrian Haley char devname[IFNAMSIZ];
731c91f6df2SBrian Haley
732e5fccaa1SEric Dumazet if (bound_dev_if == 0) {
733c91f6df2SBrian Haley len = 0;
734c91f6df2SBrian Haley goto zero;
735c91f6df2SBrian Haley }
736c91f6df2SBrian Haley
737c91f6df2SBrian Haley ret = -EINVAL;
738c91f6df2SBrian Haley if (len < IFNAMSIZ)
739c91f6df2SBrian Haley goto out;
740c91f6df2SBrian Haley
741e5fccaa1SEric Dumazet ret = netdev_get_name(net, devname, bound_dev_if);
7425dbe7c17SNicolas Schichan if (ret)
743c91f6df2SBrian Haley goto out;
744c91f6df2SBrian Haley
745c91f6df2SBrian Haley len = strlen(devname) + 1;
746c91f6df2SBrian Haley
747c91f6df2SBrian Haley ret = -EFAULT;
7484ff09db1SMartin KaFai Lau if (copy_to_sockptr(optval, devname, len))
749c91f6df2SBrian Haley goto out;
750c91f6df2SBrian Haley
751c91f6df2SBrian Haley zero:
752c91f6df2SBrian Haley ret = -EFAULT;
7534ff09db1SMartin KaFai Lau if (copy_to_sockptr(optlen, &len, sizeof(int)))
754c91f6df2SBrian Haley goto out;
755c91f6df2SBrian Haley
756c91f6df2SBrian Haley ret = 0;
757c91f6df2SBrian Haley
758c91f6df2SBrian Haley out:
759c91f6df2SBrian Haley #endif
760c91f6df2SBrian Haley
761c91f6df2SBrian Haley return ret;
762c91f6df2SBrian Haley }
763c91f6df2SBrian Haley
sk_mc_loop(struct sock * sk)764f60e5990Shannes@stressinduktion.org bool sk_mc_loop(struct sock *sk)
765f60e5990Shannes@stressinduktion.org {
766f60e5990Shannes@stressinduktion.org if (dev_recursion_level())
767f60e5990Shannes@stressinduktion.org return false;
768f60e5990Shannes@stressinduktion.org if (!sk)
769f60e5990Shannes@stressinduktion.org return true;
770a3e0fdf7SEric Dumazet /* IPV6_ADDRFORM can change sk->sk_family under us. */
771a3e0fdf7SEric Dumazet switch (READ_ONCE(sk->sk_family)) {
772f60e5990Shannes@stressinduktion.org case AF_INET:
773b09bde5cSEric Dumazet return inet_test_bit(MC_LOOP, sk);
774f60e5990Shannes@stressinduktion.org #if IS_ENABLED(CONFIG_IPV6)
775f60e5990Shannes@stressinduktion.org case AF_INET6:
776f60e5990Shannes@stressinduktion.org return inet6_sk(sk)->mc_loop;
777f60e5990Shannes@stressinduktion.org #endif
778f60e5990Shannes@stressinduktion.org }
7790ad6f6e7SEric Dumazet WARN_ON_ONCE(1);
780f60e5990Shannes@stressinduktion.org return true;
781f60e5990Shannes@stressinduktion.org }
782f60e5990Shannes@stressinduktion.org EXPORT_SYMBOL(sk_mc_loop);
783f60e5990Shannes@stressinduktion.org
sock_set_reuseaddr(struct sock * sk)784b58f0e8fSChristoph Hellwig void sock_set_reuseaddr(struct sock *sk)
785b58f0e8fSChristoph Hellwig {
786b58f0e8fSChristoph Hellwig lock_sock(sk);
787b58f0e8fSChristoph Hellwig sk->sk_reuse = SK_CAN_REUSE;
788b58f0e8fSChristoph Hellwig release_sock(sk);
789b58f0e8fSChristoph Hellwig }
790b58f0e8fSChristoph Hellwig EXPORT_SYMBOL(sock_set_reuseaddr);
791b58f0e8fSChristoph Hellwig
sock_set_reuseport(struct sock * sk)792fe31a326SChristoph Hellwig void sock_set_reuseport(struct sock *sk)
793fe31a326SChristoph Hellwig {
794fe31a326SChristoph Hellwig lock_sock(sk);
795fe31a326SChristoph Hellwig sk->sk_reuseport = true;
796fe31a326SChristoph Hellwig release_sock(sk);
797fe31a326SChristoph Hellwig }
798fe31a326SChristoph Hellwig EXPORT_SYMBOL(sock_set_reuseport);
799fe31a326SChristoph Hellwig
sock_no_linger(struct sock * sk)800c433594cSChristoph Hellwig void sock_no_linger(struct sock *sk)
801c433594cSChristoph Hellwig {
802c433594cSChristoph Hellwig lock_sock(sk);
803bc1fb82aSEric Dumazet WRITE_ONCE(sk->sk_lingertime, 0);
804c433594cSChristoph Hellwig sock_set_flag(sk, SOCK_LINGER);
805c433594cSChristoph Hellwig release_sock(sk);
806c433594cSChristoph Hellwig }
807c433594cSChristoph Hellwig EXPORT_SYMBOL(sock_no_linger);
808c433594cSChristoph Hellwig
sock_set_priority(struct sock * sk,u32 priority)8096e434967SChristoph Hellwig void sock_set_priority(struct sock *sk, u32 priority)
8106e434967SChristoph Hellwig {
8116e434967SChristoph Hellwig lock_sock(sk);
8128bf43be7SEric Dumazet WRITE_ONCE(sk->sk_priority, priority);
8136e434967SChristoph Hellwig release_sock(sk);
8146e434967SChristoph Hellwig }
8156e434967SChristoph Hellwig EXPORT_SYMBOL(sock_set_priority);
8166e434967SChristoph Hellwig
sock_set_sndtimeo(struct sock * sk,s64 secs)81776ee0785SChristoph Hellwig void sock_set_sndtimeo(struct sock *sk, s64 secs)
81876ee0785SChristoph Hellwig {
81976ee0785SChristoph Hellwig lock_sock(sk);
82076ee0785SChristoph Hellwig if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
821285975ddSEric Dumazet WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
82276ee0785SChristoph Hellwig else
823285975ddSEric Dumazet WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
82476ee0785SChristoph Hellwig release_sock(sk);
82576ee0785SChristoph Hellwig }
82676ee0785SChristoph Hellwig EXPORT_SYMBOL(sock_set_sndtimeo);
82776ee0785SChristoph Hellwig
__sock_set_timestamps(struct sock * sk,bool val,bool new,bool ns)828783da70eSChristoph Hellwig static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
829783da70eSChristoph Hellwig {
830783da70eSChristoph Hellwig if (val) {
831783da70eSChristoph Hellwig sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
832783da70eSChristoph Hellwig sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
833783da70eSChristoph Hellwig sock_set_flag(sk, SOCK_RCVTSTAMP);
834783da70eSChristoph Hellwig sock_enable_timestamp(sk, SOCK_TIMESTAMP);
835783da70eSChristoph Hellwig } else {
836783da70eSChristoph Hellwig sock_reset_flag(sk, SOCK_RCVTSTAMP);
837783da70eSChristoph Hellwig sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
838783da70eSChristoph Hellwig }
839783da70eSChristoph Hellwig }
840783da70eSChristoph Hellwig
sock_enable_timestamps(struct sock * sk)841783da70eSChristoph Hellwig void sock_enable_timestamps(struct sock *sk)
842783da70eSChristoph Hellwig {
843783da70eSChristoph Hellwig lock_sock(sk);
844783da70eSChristoph Hellwig __sock_set_timestamps(sk, true, false, true);
845783da70eSChristoph Hellwig release_sock(sk);
846783da70eSChristoph Hellwig }
847783da70eSChristoph Hellwig EXPORT_SYMBOL(sock_enable_timestamps);
848783da70eSChristoph Hellwig
sock_set_timestamp(struct sock * sk,int optname,bool valbool)849371087aaSFlorian Westphal void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
850371087aaSFlorian Westphal {
851371087aaSFlorian Westphal switch (optname) {
852371087aaSFlorian Westphal case SO_TIMESTAMP_OLD:
853371087aaSFlorian Westphal __sock_set_timestamps(sk, valbool, false, false);
854371087aaSFlorian Westphal break;
855371087aaSFlorian Westphal case SO_TIMESTAMP_NEW:
856371087aaSFlorian Westphal __sock_set_timestamps(sk, valbool, true, false);
857371087aaSFlorian Westphal break;
858371087aaSFlorian Westphal case SO_TIMESTAMPNS_OLD:
859371087aaSFlorian Westphal __sock_set_timestamps(sk, valbool, false, true);
860371087aaSFlorian Westphal break;
861371087aaSFlorian Westphal case SO_TIMESTAMPNS_NEW:
862371087aaSFlorian Westphal __sock_set_timestamps(sk, valbool, true, true);
863371087aaSFlorian Westphal break;
864371087aaSFlorian Westphal }
865371087aaSFlorian Westphal }
866371087aaSFlorian Westphal
sock_timestamping_bind_phc(struct sock * sk,int phc_index)867d463126eSYangbo Lu static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
868ced122d9SFlorian Westphal {
869d463126eSYangbo Lu struct net *net = sock_net(sk);
870d463126eSYangbo Lu struct net_device *dev = NULL;
871d463126eSYangbo Lu bool match = false;
872d463126eSYangbo Lu int *vclock_index;
873d463126eSYangbo Lu int i, num;
874d463126eSYangbo Lu
875d463126eSYangbo Lu if (sk->sk_bound_dev_if)
876d463126eSYangbo Lu dev = dev_get_by_index(net, sk->sk_bound_dev_if);
877d463126eSYangbo Lu
878d463126eSYangbo Lu if (!dev) {
879d463126eSYangbo Lu pr_err("%s: sock not bind to device\n", __func__);
880d463126eSYangbo Lu return -EOPNOTSUPP;
881d463126eSYangbo Lu }
882d463126eSYangbo Lu
883d463126eSYangbo Lu num = ethtool_get_phc_vclocks(dev, &vclock_index);
8842a4d75bfSMiroslav Lichvar dev_put(dev);
8852a4d75bfSMiroslav Lichvar
886d463126eSYangbo Lu for (i = 0; i < num; i++) {
887d463126eSYangbo Lu if (*(vclock_index + i) == phc_index) {
888d463126eSYangbo Lu match = true;
889d463126eSYangbo Lu break;
890d463126eSYangbo Lu }
891d463126eSYangbo Lu }
892d463126eSYangbo Lu
893d463126eSYangbo Lu if (num > 0)
894d463126eSYangbo Lu kfree(vclock_index);
895d463126eSYangbo Lu
896d463126eSYangbo Lu if (!match)
897d463126eSYangbo Lu return -EINVAL;
898d463126eSYangbo Lu
899251cd405SEric Dumazet WRITE_ONCE(sk->sk_bind_phc, phc_index);
900d463126eSYangbo Lu
901d463126eSYangbo Lu return 0;
902d463126eSYangbo Lu }
903d463126eSYangbo Lu
sock_set_timestamping(struct sock * sk,int optname,struct so_timestamping timestamping)904d463126eSYangbo Lu int sock_set_timestamping(struct sock *sk, int optname,
905d463126eSYangbo Lu struct so_timestamping timestamping)
906d463126eSYangbo Lu {
907d463126eSYangbo Lu int val = timestamping.flags;
908d463126eSYangbo Lu int ret;
909d463126eSYangbo Lu
910ced122d9SFlorian Westphal if (val & ~SOF_TIMESTAMPING_MASK)
911ced122d9SFlorian Westphal return -EINVAL;
912ced122d9SFlorian Westphal
913b534dc46SWillem de Bruijn if (val & SOF_TIMESTAMPING_OPT_ID_TCP &&
914b534dc46SWillem de Bruijn !(val & SOF_TIMESTAMPING_OPT_ID))
915b534dc46SWillem de Bruijn return -EINVAL;
916b534dc46SWillem de Bruijn
917ced122d9SFlorian Westphal if (val & SOF_TIMESTAMPING_OPT_ID &&
918ced122d9SFlorian Westphal !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
91942f67eeaSEric Dumazet if (sk_is_tcp(sk)) {
920ced122d9SFlorian Westphal if ((1 << sk->sk_state) &
921ced122d9SFlorian Westphal (TCPF_CLOSE | TCPF_LISTEN))
922ced122d9SFlorian Westphal return -EINVAL;
923b534dc46SWillem de Bruijn if (val & SOF_TIMESTAMPING_OPT_ID_TCP)
924b534dc46SWillem de Bruijn atomic_set(&sk->sk_tskey, tcp_sk(sk)->write_seq);
925b534dc46SWillem de Bruijn else
926a1cdec57SEric Dumazet atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
927ced122d9SFlorian Westphal } else {
928a1cdec57SEric Dumazet atomic_set(&sk->sk_tskey, 0);
929ced122d9SFlorian Westphal }
930ced122d9SFlorian Westphal }
931ced122d9SFlorian Westphal
932ced122d9SFlorian Westphal if (val & SOF_TIMESTAMPING_OPT_STATS &&
933ced122d9SFlorian Westphal !(val & SOF_TIMESTAMPING_OPT_TSONLY))
934ced122d9SFlorian Westphal return -EINVAL;
935ced122d9SFlorian Westphal
936d463126eSYangbo Lu if (val & SOF_TIMESTAMPING_BIND_PHC) {
937d463126eSYangbo Lu ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
938d463126eSYangbo Lu if (ret)
939d463126eSYangbo Lu return ret;
940d463126eSYangbo Lu }
941d463126eSYangbo Lu
942e3390b30SEric Dumazet WRITE_ONCE(sk->sk_tsflags, val);
943ced122d9SFlorian Westphal sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
944ced122d9SFlorian Westphal
945ced122d9SFlorian Westphal if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
946ced122d9SFlorian Westphal sock_enable_timestamp(sk,
947ced122d9SFlorian Westphal SOCK_TIMESTAMPING_RX_SOFTWARE);
948ced122d9SFlorian Westphal else
949ced122d9SFlorian Westphal sock_disable_timestamp(sk,
950ced122d9SFlorian Westphal (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
951ced122d9SFlorian Westphal return 0;
952ced122d9SFlorian Westphal }
953ced122d9SFlorian Westphal
sock_set_keepalive(struct sock * sk)954ce3d9544SChristoph Hellwig void sock_set_keepalive(struct sock *sk)
955ce3d9544SChristoph Hellwig {
956ce3d9544SChristoph Hellwig lock_sock(sk);
957ce3d9544SChristoph Hellwig if (sk->sk_prot->keepalive)
958ce3d9544SChristoph Hellwig sk->sk_prot->keepalive(sk, true);
959ce3d9544SChristoph Hellwig sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
960ce3d9544SChristoph Hellwig release_sock(sk);
961ce3d9544SChristoph Hellwig }
962ce3d9544SChristoph Hellwig EXPORT_SYMBOL(sock_set_keepalive);
963ce3d9544SChristoph Hellwig
__sock_set_rcvbuf(struct sock * sk,int val)96426cfabf9SChristoph Hellwig static void __sock_set_rcvbuf(struct sock *sk, int val)
96526cfabf9SChristoph Hellwig {
96626cfabf9SChristoph Hellwig /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
96726cfabf9SChristoph Hellwig * as a negative value.
96826cfabf9SChristoph Hellwig */
96926cfabf9SChristoph Hellwig val = min_t(int, val, INT_MAX / 2);
97026cfabf9SChristoph Hellwig sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
97126cfabf9SChristoph Hellwig
97226cfabf9SChristoph Hellwig /* We double it on the way in to account for "struct sk_buff" etc.
97326cfabf9SChristoph Hellwig * overhead. Applications assume that the SO_RCVBUF setting they make
97426cfabf9SChristoph Hellwig * will allow that much actual data to be received on that socket.
97526cfabf9SChristoph Hellwig *
97626cfabf9SChristoph Hellwig * Applications are unaware that "struct sk_buff" and other overheads
97726cfabf9SChristoph Hellwig * allocate from the receive buffer during socket buffer allocation.
97826cfabf9SChristoph Hellwig *
97926cfabf9SChristoph Hellwig * And after considering the possible alternatives, returning the value
98026cfabf9SChristoph Hellwig * we actually used in getsockopt is the most desirable behavior.
98126cfabf9SChristoph Hellwig */
98226cfabf9SChristoph Hellwig WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
98326cfabf9SChristoph Hellwig }
98426cfabf9SChristoph Hellwig
sock_set_rcvbuf(struct sock * sk,int val)98526cfabf9SChristoph Hellwig void sock_set_rcvbuf(struct sock *sk, int val)
98626cfabf9SChristoph Hellwig {
98726cfabf9SChristoph Hellwig lock_sock(sk);
98826cfabf9SChristoph Hellwig __sock_set_rcvbuf(sk, val);
98926cfabf9SChristoph Hellwig release_sock(sk);
99026cfabf9SChristoph Hellwig }
99126cfabf9SChristoph Hellwig EXPORT_SYMBOL(sock_set_rcvbuf);
99226cfabf9SChristoph Hellwig
__sock_set_mark(struct sock * sk,u32 val)993dd9082f4SAlexander Aring static void __sock_set_mark(struct sock *sk, u32 val)
994dd9082f4SAlexander Aring {
995dd9082f4SAlexander Aring if (val != sk->sk_mark) {
9963c5b4d69SEric Dumazet WRITE_ONCE(sk->sk_mark, val);
997dd9082f4SAlexander Aring sk_dst_reset(sk);
998dd9082f4SAlexander Aring }
999dd9082f4SAlexander Aring }
1000dd9082f4SAlexander Aring
sock_set_mark(struct sock * sk,u32 val)100184d1c617SAlexander Aring void sock_set_mark(struct sock *sk, u32 val)
100284d1c617SAlexander Aring {
100384d1c617SAlexander Aring lock_sock(sk);
1004dd9082f4SAlexander Aring __sock_set_mark(sk, val);
100584d1c617SAlexander Aring release_sock(sk);
100684d1c617SAlexander Aring }
100784d1c617SAlexander Aring EXPORT_SYMBOL(sock_set_mark);
100884d1c617SAlexander Aring
sock_release_reserved_memory(struct sock * sk,int bytes)10092bb2f5fbSWei Wang static void sock_release_reserved_memory(struct sock *sk, int bytes)
10102bb2f5fbSWei Wang {
10112bb2f5fbSWei Wang /* Round down bytes to multiple of pages */
1012100fdd1fSEric Dumazet bytes = round_down(bytes, PAGE_SIZE);
10132bb2f5fbSWei Wang
10142bb2f5fbSWei Wang WARN_ON(bytes > sk->sk_reserved_mem);
1015fe11fdcbSEric Dumazet WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
10162bb2f5fbSWei Wang sk_mem_reclaim(sk);
10172bb2f5fbSWei Wang }
10182bb2f5fbSWei Wang
sock_reserve_memory(struct sock * sk,int bytes)10192bb2f5fbSWei Wang static int sock_reserve_memory(struct sock *sk, int bytes)
10202bb2f5fbSWei Wang {
10212bb2f5fbSWei Wang long allocated;
10222bb2f5fbSWei Wang bool charged;
10232bb2f5fbSWei Wang int pages;
10242bb2f5fbSWei Wang
1025d00c8ee3SEric Dumazet if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk))
10262bb2f5fbSWei Wang return -EOPNOTSUPP;
10272bb2f5fbSWei Wang
10282bb2f5fbSWei Wang if (!bytes)
10292bb2f5fbSWei Wang return 0;
10302bb2f5fbSWei Wang
10312bb2f5fbSWei Wang pages = sk_mem_pages(bytes);
10322bb2f5fbSWei Wang
10332bb2f5fbSWei Wang /* pre-charge to memcg */
10342bb2f5fbSWei Wang charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
10352bb2f5fbSWei Wang GFP_KERNEL | __GFP_RETRY_MAYFAIL);
10362bb2f5fbSWei Wang if (!charged)
10372bb2f5fbSWei Wang return -ENOMEM;
10382bb2f5fbSWei Wang
10392bb2f5fbSWei Wang /* pre-charge to forward_alloc */
1040219160beSEric Dumazet sk_memory_allocated_add(sk, pages);
1041219160beSEric Dumazet allocated = sk_memory_allocated(sk);
10422bb2f5fbSWei Wang /* If the system goes into memory pressure with this
10432bb2f5fbSWei Wang * precharge, give up and return error.
10442bb2f5fbSWei Wang */
10452bb2f5fbSWei Wang if (allocated > sk_prot_mem_limits(sk, 1)) {
10462bb2f5fbSWei Wang sk_memory_allocated_sub(sk, pages);
10472bb2f5fbSWei Wang mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
10482bb2f5fbSWei Wang return -ENOMEM;
10492bb2f5fbSWei Wang }
10505e6300e7SEric Dumazet sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
10512bb2f5fbSWei Wang
1052fe11fdcbSEric Dumazet WRITE_ONCE(sk->sk_reserved_mem,
1053fe11fdcbSEric Dumazet sk->sk_reserved_mem + (pages << PAGE_SHIFT));
10542bb2f5fbSWei Wang
10552bb2f5fbSWei Wang return 0;
10562bb2f5fbSWei Wang }
10572bb2f5fbSWei Wang
sockopt_lock_sock(struct sock * sk)105824426654SMartin KaFai Lau void sockopt_lock_sock(struct sock *sk)
105924426654SMartin KaFai Lau {
106024426654SMartin KaFai Lau /* When current->bpf_ctx is set, the setsockopt is called from
106124426654SMartin KaFai Lau * a bpf prog. bpf has ensured the sk lock has been
106224426654SMartin KaFai Lau * acquired before calling setsockopt().
106324426654SMartin KaFai Lau */
106424426654SMartin KaFai Lau if (has_current_bpf_ctx())
106524426654SMartin KaFai Lau return;
106624426654SMartin KaFai Lau
106724426654SMartin KaFai Lau lock_sock(sk);
106824426654SMartin KaFai Lau }
106924426654SMartin KaFai Lau EXPORT_SYMBOL(sockopt_lock_sock);
107024426654SMartin KaFai Lau
sockopt_release_sock(struct sock * sk)107124426654SMartin KaFai Lau void sockopt_release_sock(struct sock *sk)
107224426654SMartin KaFai Lau {
107324426654SMartin KaFai Lau if (has_current_bpf_ctx())
107424426654SMartin KaFai Lau return;
107524426654SMartin KaFai Lau
107624426654SMartin KaFai Lau release_sock(sk);
107724426654SMartin KaFai Lau }
107824426654SMartin KaFai Lau EXPORT_SYMBOL(sockopt_release_sock);
107924426654SMartin KaFai Lau
sockopt_ns_capable(struct user_namespace * ns,int cap)1080e42c7beeSMartin KaFai Lau bool sockopt_ns_capable(struct user_namespace *ns, int cap)
1081e42c7beeSMartin KaFai Lau {
1082e42c7beeSMartin KaFai Lau return has_current_bpf_ctx() || ns_capable(ns, cap);
1083e42c7beeSMartin KaFai Lau }
1084e42c7beeSMartin KaFai Lau EXPORT_SYMBOL(sockopt_ns_capable);
1085e42c7beeSMartin KaFai Lau
sockopt_capable(int cap)1086e42c7beeSMartin KaFai Lau bool sockopt_capable(int cap)
1087e42c7beeSMartin KaFai Lau {
1088e42c7beeSMartin KaFai Lau return has_current_bpf_ctx() || capable(cap);
1089e42c7beeSMartin KaFai Lau }
1090e42c7beeSMartin KaFai Lau EXPORT_SYMBOL(sockopt_capable);
1091e42c7beeSMartin KaFai Lau
10921da177e4SLinus Torvalds /*
10931da177e4SLinus Torvalds * This is meant for all protocols to use and covers goings on
10941da177e4SLinus Torvalds * at the socket level. Everything here is generic.
10951da177e4SLinus Torvalds */
10961da177e4SLinus Torvalds
sk_setsockopt(struct sock * sk,int level,int optname,sockptr_t optval,unsigned int optlen)109729003875SMartin KaFai Lau int sk_setsockopt(struct sock *sk, int level, int optname,
1098c8c1bbb6SChristoph Hellwig sockptr_t optval, unsigned int optlen)
10991da177e4SLinus Torvalds {
1100d463126eSYangbo Lu struct so_timestamping timestamping;
11014d748f99SMartin KaFai Lau struct socket *sock = sk->sk_socket;
110280b14deeSRichard Cochran struct sock_txtime sk_txtime;
11031da177e4SLinus Torvalds int val;
11041da177e4SLinus Torvalds int valbool;
11051da177e4SLinus Torvalds struct linger ling;
11061da177e4SLinus Torvalds int ret = 0;
11071da177e4SLinus Torvalds
11081da177e4SLinus Torvalds /*
11091da177e4SLinus Torvalds * Options without arguments
11101da177e4SLinus Torvalds */
11111da177e4SLinus Torvalds
11124878809fSDavid S. Miller if (optname == SO_BINDTODEVICE)
1113c91f6df2SBrian Haley return sock_setbindtodevice(sk, optval, optlen);
11144878809fSDavid S. Miller
11151da177e4SLinus Torvalds if (optlen < sizeof(int))
1116e71a4783SStephen Hemminger return -EINVAL;
11171da177e4SLinus Torvalds
1118c8c1bbb6SChristoph Hellwig if (copy_from_sockptr(&val, optval, sizeof(val)))
11191da177e4SLinus Torvalds return -EFAULT;
11201da177e4SLinus Torvalds
11211da177e4SLinus Torvalds valbool = val ? 1 : 0;
11221da177e4SLinus Torvalds
112324426654SMartin KaFai Lau sockopt_lock_sock(sk);
11241da177e4SLinus Torvalds
1125e71a4783SStephen Hemminger switch (optname) {
11261da177e4SLinus Torvalds case SO_DEBUG:
1127e42c7beeSMartin KaFai Lau if (val && !sockopt_capable(CAP_NET_ADMIN))
11281da177e4SLinus Torvalds ret = -EACCES;
11292a91525cSEric Dumazet else
1130c0ef877bSPavel Emelyanov sock_valbool_flag(sk, SOCK_DBG, valbool);
11311da177e4SLinus Torvalds break;
11321da177e4SLinus Torvalds case SO_REUSEADDR:
1133cdb8744dSBart Van Assche sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
11341da177e4SLinus Torvalds break;
1135055dc21aSTom Herbert case SO_REUSEPORT:
1136055dc21aSTom Herbert sk->sk_reuseport = valbool;
1137055dc21aSTom Herbert break;
11381da177e4SLinus Torvalds case SO_TYPE:
113949c794e9SJan Engelhardt case SO_PROTOCOL:
11400d6038eeSJan Engelhardt case SO_DOMAIN:
11411da177e4SLinus Torvalds case SO_ERROR:
11421da177e4SLinus Torvalds ret = -ENOPROTOOPT;
11431da177e4SLinus Torvalds break;
11441da177e4SLinus Torvalds case SO_DONTROUTE:
1145c0ef877bSPavel Emelyanov sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
11460fbe82e6Syupeng sk_dst_reset(sk);
11471da177e4SLinus Torvalds break;
11481da177e4SLinus Torvalds case SO_BROADCAST:
11491da177e4SLinus Torvalds sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
11501da177e4SLinus Torvalds break;
11511da177e4SLinus Torvalds case SO_SNDBUF:
11521da177e4SLinus Torvalds /* Don't error on this BSD doesn't and if you think
115382981930SEric Dumazet * about it this is right. Otherwise apps have to
115482981930SEric Dumazet * play 'guess the biggest size' games. RCVBUF/SNDBUF
115582981930SEric Dumazet * are treated in BSD as hints
115682981930SEric Dumazet */
11571227c177SKuniyuki Iwashima val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
1158b0573deaSPatrick McHardy set_sndbuf:
11594057765fSGuillaume Nault /* Ensure val * 2 fits into an int, to prevent max_t()
11604057765fSGuillaume Nault * from treating it as a negative value.
11614057765fSGuillaume Nault */
11624057765fSGuillaume Nault val = min_t(int, val, INT_MAX / 2);
11631da177e4SLinus Torvalds sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1164e292f05eSEric Dumazet WRITE_ONCE(sk->sk_sndbuf,
1165e292f05eSEric Dumazet max_t(int, val * 2, SOCK_MIN_SNDBUF));
116682981930SEric Dumazet /* Wake up sending tasks if we upped the value. */
11671da177e4SLinus Torvalds sk->sk_write_space(sk);
11681da177e4SLinus Torvalds break;
11691da177e4SLinus Torvalds
1170b0573deaSPatrick McHardy case SO_SNDBUFFORCE:
1171e42c7beeSMartin KaFai Lau if (!sockopt_capable(CAP_NET_ADMIN)) {
1172b0573deaSPatrick McHardy ret = -EPERM;
1173b0573deaSPatrick McHardy break;
1174b0573deaSPatrick McHardy }
11754057765fSGuillaume Nault
11764057765fSGuillaume Nault /* No negative values (to prevent underflow, as val will be
11774057765fSGuillaume Nault * multiplied by 2).
11784057765fSGuillaume Nault */
11794057765fSGuillaume Nault if (val < 0)
11804057765fSGuillaume Nault val = 0;
1181b0573deaSPatrick McHardy goto set_sndbuf;
1182b0573deaSPatrick McHardy
11831da177e4SLinus Torvalds case SO_RCVBUF:
11841da177e4SLinus Torvalds /* Don't error on this BSD doesn't and if you think
118582981930SEric Dumazet * about it this is right. Otherwise apps have to
118682981930SEric Dumazet * play 'guess the biggest size' games. RCVBUF/SNDBUF
118782981930SEric Dumazet * are treated in BSD as hints
118882981930SEric Dumazet */
11891227c177SKuniyuki Iwashima __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
11901da177e4SLinus Torvalds break;
11911da177e4SLinus Torvalds
1192b0573deaSPatrick McHardy case SO_RCVBUFFORCE:
1193e42c7beeSMartin KaFai Lau if (!sockopt_capable(CAP_NET_ADMIN)) {
1194b0573deaSPatrick McHardy ret = -EPERM;
1195b0573deaSPatrick McHardy break;
1196b0573deaSPatrick McHardy }
11974057765fSGuillaume Nault
11984057765fSGuillaume Nault /* No negative values (to prevent underflow, as val will be
11994057765fSGuillaume Nault * multiplied by 2).
12004057765fSGuillaume Nault */
120126cfabf9SChristoph Hellwig __sock_set_rcvbuf(sk, max(val, 0));
120226cfabf9SChristoph Hellwig break;
1203b0573deaSPatrick McHardy
12041da177e4SLinus Torvalds case SO_KEEPALIVE:
12054b9d07a4SUrsula Braun if (sk->sk_prot->keepalive)
12064b9d07a4SUrsula Braun sk->sk_prot->keepalive(sk, valbool);
12071da177e4SLinus Torvalds sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
12081da177e4SLinus Torvalds break;
12091da177e4SLinus Torvalds
12101da177e4SLinus Torvalds case SO_OOBINLINE:
12111da177e4SLinus Torvalds sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
12121da177e4SLinus Torvalds break;
12131da177e4SLinus Torvalds
12141da177e4SLinus Torvalds case SO_NO_CHECK:
121528448b80STom Herbert sk->sk_no_check_tx = valbool;
12161da177e4SLinus Torvalds break;
12171da177e4SLinus Torvalds
12181da177e4SLinus Torvalds case SO_PRIORITY:
12195e1fccc0SEric W. Biederman if ((val >= 0 && val <= 6) ||
1220e42c7beeSMartin KaFai Lau sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
1221e42c7beeSMartin KaFai Lau sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
12228bf43be7SEric Dumazet WRITE_ONCE(sk->sk_priority, val);
12231da177e4SLinus Torvalds else
12241da177e4SLinus Torvalds ret = -EPERM;
12251da177e4SLinus Torvalds break;
12261da177e4SLinus Torvalds
12271da177e4SLinus Torvalds case SO_LINGER:
12281da177e4SLinus Torvalds if (optlen < sizeof(ling)) {
12291da177e4SLinus Torvalds ret = -EINVAL; /* 1003.1g */
12301da177e4SLinus Torvalds break;
12311da177e4SLinus Torvalds }
1232c8c1bbb6SChristoph Hellwig if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
12331da177e4SLinus Torvalds ret = -EFAULT;
12341da177e4SLinus Torvalds break;
12351da177e4SLinus Torvalds }
1236bc1fb82aSEric Dumazet if (!ling.l_onoff) {
12371da177e4SLinus Torvalds sock_reset_flag(sk, SOCK_LINGER);
1238bc1fb82aSEric Dumazet } else {
1239bc1fb82aSEric Dumazet unsigned long t_sec = ling.l_linger;
1240bc1fb82aSEric Dumazet
1241bc1fb82aSEric Dumazet if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ)
1242bc1fb82aSEric Dumazet WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT);
12431da177e4SLinus Torvalds else
1244bc1fb82aSEric Dumazet WRITE_ONCE(sk->sk_lingertime, t_sec * HZ);
12451da177e4SLinus Torvalds sock_set_flag(sk, SOCK_LINGER);
12461da177e4SLinus Torvalds }
12471da177e4SLinus Torvalds break;
12481da177e4SLinus Torvalds
12491da177e4SLinus Torvalds case SO_BSDCOMPAT:
12501da177e4SLinus Torvalds break;
12511da177e4SLinus Torvalds
12521da177e4SLinus Torvalds case SO_PASSCRED:
1253274c4a6dSAndy Shevchenko assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
12541da177e4SLinus Torvalds break;
12551da177e4SLinus Torvalds
12565e2ff670SAlexander Mikhalitsyn case SO_PASSPIDFD:
1257274c4a6dSAndy Shevchenko assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
12585e2ff670SAlexander Mikhalitsyn break;
12595e2ff670SAlexander Mikhalitsyn
12607f1bc6e9SDeepa Dinamani case SO_TIMESTAMP_OLD:
1261783da70eSChristoph Hellwig case SO_TIMESTAMP_NEW:
1262783da70eSChristoph Hellwig case SO_TIMESTAMPNS_OLD:
1263783da70eSChristoph Hellwig case SO_TIMESTAMPNS_NEW:
126481b4a0ccSEric Dumazet sock_set_timestamp(sk, optname, valbool);
1265783da70eSChristoph Hellwig break;
1266ced122d9SFlorian Westphal
12679718475eSDeepa Dinamani case SO_TIMESTAMPING_NEW:
12687f1bc6e9SDeepa Dinamani case SO_TIMESTAMPING_OLD:
1269d463126eSYangbo Lu if (optlen == sizeof(timestamping)) {
1270d463126eSYangbo Lu if (copy_from_sockptr(×tamping, optval,
1271271dbc31SDan Carpenter sizeof(timestamping))) {
1272271dbc31SDan Carpenter ret = -EFAULT;
1273271dbc31SDan Carpenter break;
1274271dbc31SDan Carpenter }
1275d463126eSYangbo Lu } else {
1276d463126eSYangbo Lu memset(×tamping, 0, sizeof(timestamping));
1277d463126eSYangbo Lu timestamping.flags = val;
1278d463126eSYangbo Lu }
1279d463126eSYangbo Lu ret = sock_set_timestamping(sk, optname, timestamping);
128020d49473SPatrick Ohly break;
128120d49473SPatrick Ohly
12821da177e4SLinus Torvalds case SO_RCVLOWAT:
12831ded5e5aSEric Dumazet {
12841ded5e5aSEric Dumazet int (*set_rcvlowat)(struct sock *sk, int val) = NULL;
12851ded5e5aSEric Dumazet
12861da177e4SLinus Torvalds if (val < 0)
12871da177e4SLinus Torvalds val = INT_MAX;
12881ded5e5aSEric Dumazet if (sock)
12891ded5e5aSEric Dumazet set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
12901ded5e5aSEric Dumazet if (set_rcvlowat)
12911ded5e5aSEric Dumazet ret = set_rcvlowat(sk, val);
1292d1361840SEric Dumazet else
1293eac66402SEric Dumazet WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
12941da177e4SLinus Torvalds break;
12951ded5e5aSEric Dumazet }
129645bdc661SDeepa Dinamani case SO_RCVTIMEO_OLD:
1297a9beb86aSDeepa Dinamani case SO_RCVTIMEO_NEW:
1298c8c1bbb6SChristoph Hellwig ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1299c34645acSChristoph Hellwig optlen, optname == SO_RCVTIMEO_OLD);
13001da177e4SLinus Torvalds break;
13011da177e4SLinus Torvalds
130245bdc661SDeepa Dinamani case SO_SNDTIMEO_OLD:
1303a9beb86aSDeepa Dinamani case SO_SNDTIMEO_NEW:
1304c8c1bbb6SChristoph Hellwig ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1305c34645acSChristoph Hellwig optlen, optname == SO_SNDTIMEO_OLD);
13061da177e4SLinus Torvalds break;
13071da177e4SLinus Torvalds
13084d295e54SChristoph Hellwig case SO_ATTACH_FILTER: {
13091da177e4SLinus Torvalds struct sock_fprog fprog;
13101da177e4SLinus Torvalds
1311c8c1bbb6SChristoph Hellwig ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
13124d295e54SChristoph Hellwig if (!ret)
13131da177e4SLinus Torvalds ret = sk_attach_filter(&fprog, sk);
13141da177e4SLinus Torvalds break;
13154d295e54SChristoph Hellwig }
131689aa0758SAlexei Starovoitov case SO_ATTACH_BPF:
131789aa0758SAlexei Starovoitov ret = -EINVAL;
131889aa0758SAlexei Starovoitov if (optlen == sizeof(u32)) {
131989aa0758SAlexei Starovoitov u32 ufd;
132089aa0758SAlexei Starovoitov
132189aa0758SAlexei Starovoitov ret = -EFAULT;
1322c8c1bbb6SChristoph Hellwig if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
132389aa0758SAlexei Starovoitov break;
132489aa0758SAlexei Starovoitov
132589aa0758SAlexei Starovoitov ret = sk_attach_bpf(ufd, sk);
132689aa0758SAlexei Starovoitov }
132789aa0758SAlexei Starovoitov break;
132889aa0758SAlexei Starovoitov
13294d295e54SChristoph Hellwig case SO_ATTACH_REUSEPORT_CBPF: {
1330538950a1SCraig Gallek struct sock_fprog fprog;
1331538950a1SCraig Gallek
1332c8c1bbb6SChristoph Hellwig ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
13334d295e54SChristoph Hellwig if (!ret)
1334538950a1SCraig Gallek ret = sk_reuseport_attach_filter(&fprog, sk);
1335538950a1SCraig Gallek break;
13364d295e54SChristoph Hellwig }
1337538950a1SCraig Gallek case SO_ATTACH_REUSEPORT_EBPF:
1338538950a1SCraig Gallek ret = -EINVAL;
1339538950a1SCraig Gallek if (optlen == sizeof(u32)) {
1340538950a1SCraig Gallek u32 ufd;
1341538950a1SCraig Gallek
1342538950a1SCraig Gallek ret = -EFAULT;
1343c8c1bbb6SChristoph Hellwig if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1344538950a1SCraig Gallek break;
1345538950a1SCraig Gallek
1346538950a1SCraig Gallek ret = sk_reuseport_attach_bpf(ufd, sk);
1347538950a1SCraig Gallek }
1348538950a1SCraig Gallek break;
1349538950a1SCraig Gallek
135099f3a064SMartin KaFai Lau case SO_DETACH_REUSEPORT_BPF:
135199f3a064SMartin KaFai Lau ret = reuseport_detach_prog(sk);
135299f3a064SMartin KaFai Lau break;
135399f3a064SMartin KaFai Lau
13541da177e4SLinus Torvalds case SO_DETACH_FILTER:
135555b33325SPavel Emelyanov ret = sk_detach_filter(sk);
13561da177e4SLinus Torvalds break;
13571da177e4SLinus Torvalds
1358d59577b6SVincent Bernat case SO_LOCK_FILTER:
1359d59577b6SVincent Bernat if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1360d59577b6SVincent Bernat ret = -EPERM;
1361d59577b6SVincent Bernat else
1362d59577b6SVincent Bernat sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1363d59577b6SVincent Bernat break;
1364d59577b6SVincent Bernat
1365877ce7c1SCatherine Zhang case SO_PASSSEC:
1366274c4a6dSAndy Shevchenko assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
1367877ce7c1SCatherine Zhang break;
13684a19ec58SLaszlo Attila Toth case SO_MARK:
1369e42c7beeSMartin KaFai Lau if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1370e42c7beeSMartin KaFai Lau !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
13714a19ec58SLaszlo Attila Toth ret = -EPERM;
1372dd9082f4SAlexander Aring break;
137350254256SDavid Barmann }
1374dd9082f4SAlexander Aring
1375dd9082f4SAlexander Aring __sock_set_mark(sk, val);
13764a19ec58SLaszlo Attila Toth break;
13776fd1d51cSErin MacNeil case SO_RCVMARK:
13786fd1d51cSErin MacNeil sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
13796fd1d51cSErin MacNeil break;
1380877ce7c1SCatherine Zhang
13813b885787SNeil Horman case SO_RXQ_OVFL:
13828083f0fcSJohannes Berg sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
13833b885787SNeil Horman break;
13846e3e939fSJohannes Berg
13856e3e939fSJohannes Berg case SO_WIFI_STATUS:
13866e3e939fSJohannes Berg sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
13876e3e939fSJohannes Berg break;
13886e3e939fSJohannes Berg
1389ef64a54fSPavel Emelyanov case SO_PEEK_OFF:
13901ded5e5aSEric Dumazet {
13911ded5e5aSEric Dumazet int (*set_peek_off)(struct sock *sk, int val);
13921ded5e5aSEric Dumazet
13931ded5e5aSEric Dumazet set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
13941ded5e5aSEric Dumazet if (set_peek_off)
13951ded5e5aSEric Dumazet ret = set_peek_off(sk, val);
1396ef64a54fSPavel Emelyanov else
1397ef64a54fSPavel Emelyanov ret = -EOPNOTSUPP;
1398ef64a54fSPavel Emelyanov break;
13991ded5e5aSEric Dumazet }
14003bdc0ebaSBen Greear
14013bdc0ebaSBen Greear case SO_NOFCS:
14023bdc0ebaSBen Greear sock_valbool_flag(sk, SOCK_NOFCS, valbool);
14033bdc0ebaSBen Greear break;
14043bdc0ebaSBen Greear
14057d4c04fcSKeller, Jacob E case SO_SELECT_ERR_QUEUE:
14067d4c04fcSKeller, Jacob E sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
14077d4c04fcSKeller, Jacob E break;
14087d4c04fcSKeller, Jacob E
1409e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
141064b0dc51SEliezer Tamir case SO_BUSY_POLL:
1411dafcc438SEliezer Tamir if (val < 0)
1412dafcc438SEliezer Tamir ret = -EINVAL;
1413dafcc438SEliezer Tamir else
14140dbffbb5SEric Dumazet WRITE_ONCE(sk->sk_ll_usec, val);
1415dafcc438SEliezer Tamir break;
14167fd3253aSBjörn Töpel case SO_PREFER_BUSY_POLL:
1417e42c7beeSMartin KaFai Lau if (valbool && !sockopt_capable(CAP_NET_ADMIN))
14187fd3253aSBjörn Töpel ret = -EPERM;
14197fd3253aSBjörn Töpel else
14207fd3253aSBjörn Töpel WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
14217fd3253aSBjörn Töpel break;
14227c951cafSBjörn Töpel case SO_BUSY_POLL_BUDGET:
1423e42c7beeSMartin KaFai Lau if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) {
14247c951cafSBjörn Töpel ret = -EPERM;
14257c951cafSBjörn Töpel } else {
14267c951cafSBjörn Töpel if (val < 0 || val > U16_MAX)
14277c951cafSBjörn Töpel ret = -EINVAL;
14287c951cafSBjörn Töpel else
14297c951cafSBjörn Töpel WRITE_ONCE(sk->sk_busy_poll_budget, val);
14307c951cafSBjörn Töpel }
14317c951cafSBjörn Töpel break;
1432dafcc438SEliezer Tamir #endif
143362748f32SEric Dumazet
143462748f32SEric Dumazet case SO_MAX_PACING_RATE:
14356bdef102SEric Dumazet {
1436700465fdSKe Li unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
14376bdef102SEric Dumazet
14386bdef102SEric Dumazet if (sizeof(ulval) != sizeof(val) &&
14396bdef102SEric Dumazet optlen >= sizeof(ulval) &&
1440c8c1bbb6SChristoph Hellwig copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
14416bdef102SEric Dumazet ret = -EFAULT;
14426bdef102SEric Dumazet break;
14436bdef102SEric Dumazet }
14446bdef102SEric Dumazet if (ulval != ~0UL)
1445218af599SEric Dumazet cmpxchg(&sk->sk_pacing_status,
1446218af599SEric Dumazet SK_PACING_NONE,
1447218af599SEric Dumazet SK_PACING_NEEDED);
1448ea7f45efSEric Dumazet /* Pairs with READ_ONCE() from sk_getsockopt() */
1449ea7f45efSEric Dumazet WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
14506bdef102SEric Dumazet sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
145162748f32SEric Dumazet break;
14526bdef102SEric Dumazet }
145370da268bSEric Dumazet case SO_INCOMING_CPU:
1454b261eda8SKuniyuki Iwashima reuseport_update_incoming_cpu(sk, val);
145570da268bSEric Dumazet break;
145670da268bSEric Dumazet
1457a87cb3e4STom Herbert case SO_CNX_ADVICE:
1458a87cb3e4STom Herbert if (val == 1)
1459a87cb3e4STom Herbert dst_negative_advice(sk);
1460a87cb3e4STom Herbert break;
146176851d12SWillem de Bruijn
146276851d12SWillem de Bruijn case SO_ZEROCOPY:
146328190752SSowmini Varadhan if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
146442f67eeaSEric Dumazet if (!(sk_is_tcp(sk) ||
1465b5947e5dSWillem de Bruijn (sk->sk_type == SOCK_DGRAM &&
1466b5947e5dSWillem de Bruijn sk->sk_protocol == IPPROTO_UDP)))
1467869420a8SSamuel Thibault ret = -EOPNOTSUPP;
146828190752SSowmini Varadhan } else if (sk->sk_family != PF_RDS) {
1469869420a8SSamuel Thibault ret = -EOPNOTSUPP;
147028190752SSowmini Varadhan }
147128190752SSowmini Varadhan if (!ret) {
147228190752SSowmini Varadhan if (val < 0 || val > 1)
147376851d12SWillem de Bruijn ret = -EINVAL;
147476851d12SWillem de Bruijn else
147576851d12SWillem de Bruijn sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
147628190752SSowmini Varadhan }
1477334e6413SJesus Sanchez-Palencia break;
1478334e6413SJesus Sanchez-Palencia
147980b14deeSRichard Cochran case SO_TXTIME:
1480790709f2SEric Dumazet if (optlen != sizeof(struct sock_txtime)) {
148180b14deeSRichard Cochran ret = -EINVAL;
1482790709f2SEric Dumazet break;
1483c8c1bbb6SChristoph Hellwig } else if (copy_from_sockptr(&sk_txtime, optval,
148480b14deeSRichard Cochran sizeof(struct sock_txtime))) {
148580b14deeSRichard Cochran ret = -EFAULT;
1486790709f2SEric Dumazet break;
148780b14deeSRichard Cochran } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
148880b14deeSRichard Cochran ret = -EINVAL;
1489790709f2SEric Dumazet break;
1490790709f2SEric Dumazet }
1491790709f2SEric Dumazet /* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1492790709f2SEric Dumazet * scheduler has enough safe guards.
1493790709f2SEric Dumazet */
1494790709f2SEric Dumazet if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1495e42c7beeSMartin KaFai Lau !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1496790709f2SEric Dumazet ret = -EPERM;
1497790709f2SEric Dumazet break;
1498790709f2SEric Dumazet }
149980b14deeSRichard Cochran sock_valbool_flag(sk, SOCK_TXTIME, true);
150080b14deeSRichard Cochran sk->sk_clockid = sk_txtime.clockid;
150180b14deeSRichard Cochran sk->sk_txtime_deadline_mode =
150280b14deeSRichard Cochran !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
15034b15c707SJesus Sanchez-Palencia sk->sk_txtime_report_errors =
15044b15c707SJesus Sanchez-Palencia !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
150580b14deeSRichard Cochran break;
150680b14deeSRichard Cochran
1507f5dd3d0cSDavid Herrmann case SO_BINDTOIFINDEX:
15087594888cSChristoph Hellwig ret = sock_bindtoindex_locked(sk, val);
1509f5dd3d0cSDavid Herrmann break;
1510f5dd3d0cSDavid Herrmann
151104190bf8SPavel Tikhomirov case SO_BUF_LOCK:
151204190bf8SPavel Tikhomirov if (val & ~SOCK_BUF_LOCK_MASK) {
151304190bf8SPavel Tikhomirov ret = -EINVAL;
151404190bf8SPavel Tikhomirov break;
151504190bf8SPavel Tikhomirov }
151604190bf8SPavel Tikhomirov sk->sk_userlocks = val | (sk->sk_userlocks &
151704190bf8SPavel Tikhomirov ~SOCK_BUF_LOCK_MASK);
151804190bf8SPavel Tikhomirov break;
151904190bf8SPavel Tikhomirov
15202bb2f5fbSWei Wang case SO_RESERVE_MEM:
15212bb2f5fbSWei Wang {
15222bb2f5fbSWei Wang int delta;
15232bb2f5fbSWei Wang
15242bb2f5fbSWei Wang if (val < 0) {
15252bb2f5fbSWei Wang ret = -EINVAL;
15262bb2f5fbSWei Wang break;
15272bb2f5fbSWei Wang }
15282bb2f5fbSWei Wang
15292bb2f5fbSWei Wang delta = val - sk->sk_reserved_mem;
15302bb2f5fbSWei Wang if (delta < 0)
15312bb2f5fbSWei Wang sock_release_reserved_memory(sk, -delta);
15322bb2f5fbSWei Wang else
15332bb2f5fbSWei Wang ret = sock_reserve_memory(sk, delta);
15342bb2f5fbSWei Wang break;
15352bb2f5fbSWei Wang }
15362bb2f5fbSWei Wang
153726859240SAkhmat Karakotov case SO_TXREHASH:
153826859240SAkhmat Karakotov if (val < -1 || val > 1) {
153926859240SAkhmat Karakotov ret = -EINVAL;
154026859240SAkhmat Karakotov break;
154126859240SAkhmat Karakotov }
1542c11204c7SKevin Yang if ((u8)val == SOCK_TXREHASH_DEFAULT)
1543c11204c7SKevin Yang val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
1544c76a0328SEric Dumazet /* Paired with READ_ONCE() in tcp_rtx_synack()
1545c76a0328SEric Dumazet * and sk_getsockopt().
1546c76a0328SEric Dumazet */
1547cb6cd2ceSAkhmat Karakotov WRITE_ONCE(sk->sk_txrehash, (u8)val);
154826859240SAkhmat Karakotov break;
154926859240SAkhmat Karakotov
15501da177e4SLinus Torvalds default:
15511da177e4SLinus Torvalds ret = -ENOPROTOOPT;
15521da177e4SLinus Torvalds break;
15531da177e4SLinus Torvalds }
155424426654SMartin KaFai Lau sockopt_release_sock(sk);
15551da177e4SLinus Torvalds return ret;
15561da177e4SLinus Torvalds }
15574d748f99SMartin KaFai Lau
sock_setsockopt(struct socket * sock,int level,int optname,sockptr_t optval,unsigned int optlen)15584d748f99SMartin KaFai Lau int sock_setsockopt(struct socket *sock, int level, int optname,
15594d748f99SMartin KaFai Lau sockptr_t optval, unsigned int optlen)
15604d748f99SMartin KaFai Lau {
15614d748f99SMartin KaFai Lau return sk_setsockopt(sock->sk, level, optname,
15624d748f99SMartin KaFai Lau optval, optlen);
15634d748f99SMartin KaFai Lau }
15642a91525cSEric Dumazet EXPORT_SYMBOL(sock_setsockopt);
15651da177e4SLinus Torvalds
sk_get_peer_cred(struct sock * sk)156635306eb2SEric Dumazet static const struct cred *sk_get_peer_cred(struct sock *sk)
156735306eb2SEric Dumazet {
156835306eb2SEric Dumazet const struct cred *cred;
156935306eb2SEric Dumazet
157035306eb2SEric Dumazet spin_lock(&sk->sk_peer_lock);
157135306eb2SEric Dumazet cred = get_cred(sk->sk_peer_cred);
157235306eb2SEric Dumazet spin_unlock(&sk->sk_peer_lock);
157335306eb2SEric Dumazet
157435306eb2SEric Dumazet return cred;
157535306eb2SEric Dumazet }
15761da177e4SLinus Torvalds
cred_to_ucred(struct pid * pid,const struct cred * cred,struct ucred * ucred)15778f09898bSstephen hemminger static void cred_to_ucred(struct pid *pid, const struct cred *cred,
15783f551f94SEric W. Biederman struct ucred *ucred)
15793f551f94SEric W. Biederman {
15803f551f94SEric W. Biederman ucred->pid = pid_vnr(pid);
15813f551f94SEric W. Biederman ucred->uid = ucred->gid = -1;
15823f551f94SEric W. Biederman if (cred) {
15833f551f94SEric W. Biederman struct user_namespace *current_ns = current_user_ns();
15843f551f94SEric W. Biederman
1585b2e4f544SEric W. Biederman ucred->uid = from_kuid_munged(current_ns, cred->euid);
1586b2e4f544SEric W. Biederman ucred->gid = from_kgid_munged(current_ns, cred->egid);
15873f551f94SEric W. Biederman }
15883f551f94SEric W. Biederman }
15893f551f94SEric W. Biederman
groups_to_user(sockptr_t dst,const struct group_info * src)15904ff09db1SMartin KaFai Lau static int groups_to_user(sockptr_t dst, const struct group_info *src)
159128b5ba2aSDavid Herrmann {
159228b5ba2aSDavid Herrmann struct user_namespace *user_ns = current_user_ns();
159328b5ba2aSDavid Herrmann int i;
159428b5ba2aSDavid Herrmann
15954ff09db1SMartin KaFai Lau for (i = 0; i < src->ngroups; i++) {
15964ff09db1SMartin KaFai Lau gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
15974ff09db1SMartin KaFai Lau
15984ff09db1SMartin KaFai Lau if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
159928b5ba2aSDavid Herrmann return -EFAULT;
16004ff09db1SMartin KaFai Lau }
160128b5ba2aSDavid Herrmann
160228b5ba2aSDavid Herrmann return 0;
160328b5ba2aSDavid Herrmann }
160428b5ba2aSDavid Herrmann
sk_getsockopt(struct sock * sk,int level,int optname,sockptr_t optval,sockptr_t optlen)160565ddc82dSMartin KaFai Lau int sk_getsockopt(struct sock *sk, int level, int optname,
16064ff09db1SMartin KaFai Lau sockptr_t optval, sockptr_t optlen)
16071da177e4SLinus Torvalds {
1608ba74a760SMartin KaFai Lau struct socket *sock = sk->sk_socket;
16091da177e4SLinus Torvalds
1610e71a4783SStephen Hemminger union {
16111da177e4SLinus Torvalds int val;
16125daab9dbSChenbo Feng u64 val64;
1613677f136cSEric Dumazet unsigned long ulval;
16141da177e4SLinus Torvalds struct linger ling;
1615fe0c72f3SArnd Bergmann struct old_timeval32 tm32;
1616fe0c72f3SArnd Bergmann struct __kernel_old_timeval tm;
1617a9beb86aSDeepa Dinamani struct __kernel_sock_timeval stm;
161880b14deeSRichard Cochran struct sock_txtime txtime;
1619d463126eSYangbo Lu struct so_timestamping timestamping;
16201da177e4SLinus Torvalds } v;
16211da177e4SLinus Torvalds
16224d0392beSH Hartley Sweeten int lv = sizeof(int);
16231da177e4SLinus Torvalds int len;
16241da177e4SLinus Torvalds
16254ff09db1SMartin KaFai Lau if (copy_from_sockptr(&len, optlen, sizeof(int)))
16261da177e4SLinus Torvalds return -EFAULT;
16271da177e4SLinus Torvalds if (len < 0)
16281da177e4SLinus Torvalds return -EINVAL;
16291da177e4SLinus Torvalds
163050fee1deSEugene Teo memset(&v, 0, sizeof(v));
1631df0bca04SClément Lecigne
1632e71a4783SStephen Hemminger switch (optname) {
16331da177e4SLinus Torvalds case SO_DEBUG:
16341da177e4SLinus Torvalds v.val = sock_flag(sk, SOCK_DBG);
16351da177e4SLinus Torvalds break;
16361da177e4SLinus Torvalds
16371da177e4SLinus Torvalds case SO_DONTROUTE:
16381da177e4SLinus Torvalds v.val = sock_flag(sk, SOCK_LOCALROUTE);
16391da177e4SLinus Torvalds break;
16401da177e4SLinus Torvalds
16411da177e4SLinus Torvalds case SO_BROADCAST:
16421b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_BROADCAST);
16431da177e4SLinus Torvalds break;
16441da177e4SLinus Torvalds
16451da177e4SLinus Torvalds case SO_SNDBUF:
164674bc0843SEric Dumazet v.val = READ_ONCE(sk->sk_sndbuf);
16471da177e4SLinus Torvalds break;
16481da177e4SLinus Torvalds
16491da177e4SLinus Torvalds case SO_RCVBUF:
1650b4b55325SEric Dumazet v.val = READ_ONCE(sk->sk_rcvbuf);
16511da177e4SLinus Torvalds break;
16521da177e4SLinus Torvalds
16531da177e4SLinus Torvalds case SO_REUSEADDR:
16541da177e4SLinus Torvalds v.val = sk->sk_reuse;
16551da177e4SLinus Torvalds break;
16561da177e4SLinus Torvalds
1657055dc21aSTom Herbert case SO_REUSEPORT:
1658055dc21aSTom Herbert v.val = sk->sk_reuseport;
1659055dc21aSTom Herbert break;
1660055dc21aSTom Herbert
16611da177e4SLinus Torvalds case SO_KEEPALIVE:
16621b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_KEEPOPEN);
16631da177e4SLinus Torvalds break;
16641da177e4SLinus Torvalds
16651da177e4SLinus Torvalds case SO_TYPE:
16661da177e4SLinus Torvalds v.val = sk->sk_type;
16671da177e4SLinus Torvalds break;
16681da177e4SLinus Torvalds
166949c794e9SJan Engelhardt case SO_PROTOCOL:
167049c794e9SJan Engelhardt v.val = sk->sk_protocol;
167149c794e9SJan Engelhardt break;
167249c794e9SJan Engelhardt
16730d6038eeSJan Engelhardt case SO_DOMAIN:
16740d6038eeSJan Engelhardt v.val = sk->sk_family;
16750d6038eeSJan Engelhardt break;
16760d6038eeSJan Engelhardt
16771da177e4SLinus Torvalds case SO_ERROR:
16781da177e4SLinus Torvalds v.val = -sock_error(sk);
16791da177e4SLinus Torvalds if (v.val == 0)
16801da177e4SLinus Torvalds v.val = xchg(&sk->sk_err_soft, 0);
16811da177e4SLinus Torvalds break;
16821da177e4SLinus Torvalds
16831da177e4SLinus Torvalds case SO_OOBINLINE:
16841b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_URGINLINE);
16851da177e4SLinus Torvalds break;
16861da177e4SLinus Torvalds
16871da177e4SLinus Torvalds case SO_NO_CHECK:
168828448b80STom Herbert v.val = sk->sk_no_check_tx;
16891da177e4SLinus Torvalds break;
16901da177e4SLinus Torvalds
16911da177e4SLinus Torvalds case SO_PRIORITY:
16928bf43be7SEric Dumazet v.val = READ_ONCE(sk->sk_priority);
16931da177e4SLinus Torvalds break;
16941da177e4SLinus Torvalds
16951da177e4SLinus Torvalds case SO_LINGER:
16961da177e4SLinus Torvalds lv = sizeof(v.ling);
16971b23a5dfSEric Dumazet v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1698bc1fb82aSEric Dumazet v.ling.l_linger = READ_ONCE(sk->sk_lingertime) / HZ;
16991da177e4SLinus Torvalds break;
17001da177e4SLinus Torvalds
17011da177e4SLinus Torvalds case SO_BSDCOMPAT:
17021da177e4SLinus Torvalds break;
17031da177e4SLinus Torvalds
17047f1bc6e9SDeepa Dinamani case SO_TIMESTAMP_OLD:
170592f37fd2SEric Dumazet v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1706887feae3SDeepa Dinamani !sock_flag(sk, SOCK_TSTAMP_NEW) &&
170792f37fd2SEric Dumazet !sock_flag(sk, SOCK_RCVTSTAMPNS);
170892f37fd2SEric Dumazet break;
170992f37fd2SEric Dumazet
17107f1bc6e9SDeepa Dinamani case SO_TIMESTAMPNS_OLD:
1711887feae3SDeepa Dinamani v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
1712887feae3SDeepa Dinamani break;
1713887feae3SDeepa Dinamani
1714887feae3SDeepa Dinamani case SO_TIMESTAMP_NEW:
1715887feae3SDeepa Dinamani v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1716887feae3SDeepa Dinamani break;
1717887feae3SDeepa Dinamani
1718887feae3SDeepa Dinamani case SO_TIMESTAMPNS_NEW:
1719887feae3SDeepa Dinamani v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
17201da177e4SLinus Torvalds break;
17211da177e4SLinus Torvalds
17227f1bc6e9SDeepa Dinamani case SO_TIMESTAMPING_OLD:
1723742e4af3SJörn-Thorben Hinz case SO_TIMESTAMPING_NEW:
1724d463126eSYangbo Lu lv = sizeof(v.timestamping);
1725742e4af3SJörn-Thorben Hinz /* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only
1726742e4af3SJörn-Thorben Hinz * returning the flags when they were set through the same option.
1727742e4af3SJörn-Thorben Hinz * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
1728742e4af3SJörn-Thorben Hinz */
1729742e4af3SJörn-Thorben Hinz if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
1730e3390b30SEric Dumazet v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
1731251cd405SEric Dumazet v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
1732742e4af3SJörn-Thorben Hinz }
173320d49473SPatrick Ohly break;
173420d49473SPatrick Ohly
1735a9beb86aSDeepa Dinamani case SO_RCVTIMEO_OLD:
1736a9beb86aSDeepa Dinamani case SO_RCVTIMEO_NEW:
1737285975ddSEric Dumazet lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
1738285975ddSEric Dumazet SO_RCVTIMEO_OLD == optname);
17391da177e4SLinus Torvalds break;
17401da177e4SLinus Torvalds
1741a9beb86aSDeepa Dinamani case SO_SNDTIMEO_OLD:
1742a9beb86aSDeepa Dinamani case SO_SNDTIMEO_NEW:
1743285975ddSEric Dumazet lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
1744285975ddSEric Dumazet SO_SNDTIMEO_OLD == optname);
17451da177e4SLinus Torvalds break;
17461da177e4SLinus Torvalds
17471da177e4SLinus Torvalds case SO_RCVLOWAT:
1748e6d12bdbSEric Dumazet v.val = READ_ONCE(sk->sk_rcvlowat);
17491da177e4SLinus Torvalds break;
17501da177e4SLinus Torvalds
17511da177e4SLinus Torvalds case SO_SNDLOWAT:
17521da177e4SLinus Torvalds v.val = 1;
17531da177e4SLinus Torvalds break;
17541da177e4SLinus Torvalds
17551da177e4SLinus Torvalds case SO_PASSCRED:
175682981930SEric Dumazet v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
17571da177e4SLinus Torvalds break;
17581da177e4SLinus Torvalds
17595e2ff670SAlexander Mikhalitsyn case SO_PASSPIDFD:
17605e2ff670SAlexander Mikhalitsyn v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags);
17615e2ff670SAlexander Mikhalitsyn break;
17625e2ff670SAlexander Mikhalitsyn
17631da177e4SLinus Torvalds case SO_PEERCRED:
1764109f6e39SEric W. Biederman {
1765109f6e39SEric W. Biederman struct ucred peercred;
1766109f6e39SEric W. Biederman if (len > sizeof(peercred))
1767109f6e39SEric W. Biederman len = sizeof(peercred);
176835306eb2SEric Dumazet
176935306eb2SEric Dumazet spin_lock(&sk->sk_peer_lock);
1770109f6e39SEric W. Biederman cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
177135306eb2SEric Dumazet spin_unlock(&sk->sk_peer_lock);
177235306eb2SEric Dumazet
17734ff09db1SMartin KaFai Lau if (copy_to_sockptr(optval, &peercred, len))
17741da177e4SLinus Torvalds return -EFAULT;
17751da177e4SLinus Torvalds goto lenout;
1776109f6e39SEric W. Biederman }
17771da177e4SLinus Torvalds
17787b26952aSAlexander Mikhalitsyn case SO_PEERPIDFD:
17797b26952aSAlexander Mikhalitsyn {
17807b26952aSAlexander Mikhalitsyn struct pid *peer_pid;
17817b26952aSAlexander Mikhalitsyn struct file *pidfd_file = NULL;
17827b26952aSAlexander Mikhalitsyn int pidfd;
17837b26952aSAlexander Mikhalitsyn
17847b26952aSAlexander Mikhalitsyn if (len > sizeof(pidfd))
17857b26952aSAlexander Mikhalitsyn len = sizeof(pidfd);
17867b26952aSAlexander Mikhalitsyn
17877b26952aSAlexander Mikhalitsyn spin_lock(&sk->sk_peer_lock);
17887b26952aSAlexander Mikhalitsyn peer_pid = get_pid(sk->sk_peer_pid);
17897b26952aSAlexander Mikhalitsyn spin_unlock(&sk->sk_peer_lock);
17907b26952aSAlexander Mikhalitsyn
17917b26952aSAlexander Mikhalitsyn if (!peer_pid)
1792b6f79e82SDavid Rheinsberg return -ENODATA;
17937b26952aSAlexander Mikhalitsyn
17947b26952aSAlexander Mikhalitsyn pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
17957b26952aSAlexander Mikhalitsyn put_pid(peer_pid);
17967b26952aSAlexander Mikhalitsyn if (pidfd < 0)
17977b26952aSAlexander Mikhalitsyn return pidfd;
17987b26952aSAlexander Mikhalitsyn
17997b26952aSAlexander Mikhalitsyn if (copy_to_sockptr(optval, &pidfd, len) ||
18007b26952aSAlexander Mikhalitsyn copy_to_sockptr(optlen, &len, sizeof(int))) {
18017b26952aSAlexander Mikhalitsyn put_unused_fd(pidfd);
18027b26952aSAlexander Mikhalitsyn fput(pidfd_file);
18037b26952aSAlexander Mikhalitsyn
18047b26952aSAlexander Mikhalitsyn return -EFAULT;
18057b26952aSAlexander Mikhalitsyn }
18067b26952aSAlexander Mikhalitsyn
18077b26952aSAlexander Mikhalitsyn fd_install(pidfd, pidfd_file);
18087b26952aSAlexander Mikhalitsyn return 0;
18097b26952aSAlexander Mikhalitsyn }
18107b26952aSAlexander Mikhalitsyn
181128b5ba2aSDavid Herrmann case SO_PEERGROUPS:
181228b5ba2aSDavid Herrmann {
181335306eb2SEric Dumazet const struct cred *cred;
181428b5ba2aSDavid Herrmann int ret, n;
181528b5ba2aSDavid Herrmann
181635306eb2SEric Dumazet cred = sk_get_peer_cred(sk);
181735306eb2SEric Dumazet if (!cred)
181828b5ba2aSDavid Herrmann return -ENODATA;
181928b5ba2aSDavid Herrmann
182035306eb2SEric Dumazet n = cred->group_info->ngroups;
182128b5ba2aSDavid Herrmann if (len < n * sizeof(gid_t)) {
182228b5ba2aSDavid Herrmann len = n * sizeof(gid_t);
182335306eb2SEric Dumazet put_cred(cred);
18244ff09db1SMartin KaFai Lau return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
182528b5ba2aSDavid Herrmann }
182628b5ba2aSDavid Herrmann len = n * sizeof(gid_t);
182728b5ba2aSDavid Herrmann
18284ff09db1SMartin KaFai Lau ret = groups_to_user(optval, cred->group_info);
182935306eb2SEric Dumazet put_cred(cred);
183028b5ba2aSDavid Herrmann if (ret)
183128b5ba2aSDavid Herrmann return ret;
183228b5ba2aSDavid Herrmann goto lenout;
183328b5ba2aSDavid Herrmann }
183428b5ba2aSDavid Herrmann
18351da177e4SLinus Torvalds case SO_PEERNAME:
18361da177e4SLinus Torvalds {
18378936bf53SKuniyuki Iwashima struct sockaddr_storage address;
18381da177e4SLinus Torvalds
18391ded5e5aSEric Dumazet lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
18409b2c45d4SDenys Vlasenko if (lv < 0)
18411da177e4SLinus Torvalds return -ENOTCONN;
18421da177e4SLinus Torvalds if (lv < len)
18431da177e4SLinus Torvalds return -EINVAL;
18448936bf53SKuniyuki Iwashima if (copy_to_sockptr(optval, &address, len))
18451da177e4SLinus Torvalds return -EFAULT;
18461da177e4SLinus Torvalds goto lenout;
18471da177e4SLinus Torvalds }
18481da177e4SLinus Torvalds
18491da177e4SLinus Torvalds /* Dubious BSD thing... Probably nobody even uses it, but
18501da177e4SLinus Torvalds * the UNIX standard wants it for whatever reason... -DaveM
18511da177e4SLinus Torvalds */
18521da177e4SLinus Torvalds case SO_ACCEPTCONN:
18531da177e4SLinus Torvalds v.val = sk->sk_state == TCP_LISTEN;
18541da177e4SLinus Torvalds break;
18551da177e4SLinus Torvalds
1856877ce7c1SCatherine Zhang case SO_PASSSEC:
185782981930SEric Dumazet v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1858877ce7c1SCatherine Zhang break;
1859877ce7c1SCatherine Zhang
18601da177e4SLinus Torvalds case SO_PEERSEC:
1861b10b9c34SPaul Moore return security_socket_getpeersec_stream(sock,
1862b10b9c34SPaul Moore optval, optlen, len);
18631da177e4SLinus Torvalds
18644a19ec58SLaszlo Attila Toth case SO_MARK:
18653c5b4d69SEric Dumazet v.val = READ_ONCE(sk->sk_mark);
18664a19ec58SLaszlo Attila Toth break;
18674a19ec58SLaszlo Attila Toth
18686fd1d51cSErin MacNeil case SO_RCVMARK:
18696fd1d51cSErin MacNeil v.val = sock_flag(sk, SOCK_RCVMARK);
18706fd1d51cSErin MacNeil break;
18716fd1d51cSErin MacNeil
18723b885787SNeil Horman case SO_RXQ_OVFL:
18731b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_RXQ_OVFL);
18743b885787SNeil Horman break;
18753b885787SNeil Horman
18766e3e939fSJohannes Berg case SO_WIFI_STATUS:
18771b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_WIFI_STATUS);
18786e3e939fSJohannes Berg break;
18796e3e939fSJohannes Berg
1880ef64a54fSPavel Emelyanov case SO_PEEK_OFF:
18811ded5e5aSEric Dumazet if (!READ_ONCE(sock->ops)->set_peek_off)
1882ef64a54fSPavel Emelyanov return -EOPNOTSUPP;
1883ef64a54fSPavel Emelyanov
188411695c6eSEric Dumazet v.val = READ_ONCE(sk->sk_peek_off);
1885ef64a54fSPavel Emelyanov break;
1886bc2f7996SDavid S. Miller case SO_NOFCS:
18871b23a5dfSEric Dumazet v.val = sock_flag(sk, SOCK_NOFCS);
1888bc2f7996SDavid S. Miller break;
1889c91f6df2SBrian Haley
1890f7b86bfeSPavel Emelyanov case SO_BINDTODEVICE:
1891c91f6df2SBrian Haley return sock_getbindtodevice(sk, optval, optlen, len);
1892c91f6df2SBrian Haley
1893a8fc9277SPavel Emelyanov case SO_GET_FILTER:
18944ff09db1SMartin KaFai Lau len = sk_get_filter(sk, optval, len);
1895a8fc9277SPavel Emelyanov if (len < 0)
1896a8fc9277SPavel Emelyanov return len;
1897a8fc9277SPavel Emelyanov
1898a8fc9277SPavel Emelyanov goto lenout;
1899c91f6df2SBrian Haley
1900d59577b6SVincent Bernat case SO_LOCK_FILTER:
1901d59577b6SVincent Bernat v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1902d59577b6SVincent Bernat break;
1903d59577b6SVincent Bernat
1904ea02f941SMichal Sekletar case SO_BPF_EXTENSIONS:
1905ea02f941SMichal Sekletar v.val = bpf_tell_extensions();
1906ea02f941SMichal Sekletar break;
1907ea02f941SMichal Sekletar
19087d4c04fcSKeller, Jacob E case SO_SELECT_ERR_QUEUE:
19097d4c04fcSKeller, Jacob E v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
19107d4c04fcSKeller, Jacob E break;
19117d4c04fcSKeller, Jacob E
1912e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
191364b0dc51SEliezer Tamir case SO_BUSY_POLL:
1914e5f0d2ddSEric Dumazet v.val = READ_ONCE(sk->sk_ll_usec);
1915dafcc438SEliezer Tamir break;
19167fd3253aSBjörn Töpel case SO_PREFER_BUSY_POLL:
19177fd3253aSBjörn Töpel v.val = READ_ONCE(sk->sk_prefer_busy_poll);
19187fd3253aSBjörn Töpel break;
1919dafcc438SEliezer Tamir #endif
1920dafcc438SEliezer Tamir
192162748f32SEric Dumazet case SO_MAX_PACING_RATE:
1922ea7f45efSEric Dumazet /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
1923677f136cSEric Dumazet if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1924677f136cSEric Dumazet lv = sizeof(v.ulval);
1925ea7f45efSEric Dumazet v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
1926677f136cSEric Dumazet } else {
192776a9ebe8SEric Dumazet /* 32bit version */
1928ea7f45efSEric Dumazet v.val = min_t(unsigned long, ~0U,
1929ea7f45efSEric Dumazet READ_ONCE(sk->sk_max_pacing_rate));
1930677f136cSEric Dumazet }
193162748f32SEric Dumazet break;
193262748f32SEric Dumazet
19332c8c56e1SEric Dumazet case SO_INCOMING_CPU:
19347170a977SEric Dumazet v.val = READ_ONCE(sk->sk_incoming_cpu);
19352c8c56e1SEric Dumazet break;
19362c8c56e1SEric Dumazet
1937a2d133b1SJosh Hunt case SO_MEMINFO:
1938a2d133b1SJosh Hunt {
1939a2d133b1SJosh Hunt u32 meminfo[SK_MEMINFO_VARS];
1940a2d133b1SJosh Hunt
1941a2d133b1SJosh Hunt sk_get_meminfo(sk, meminfo);
1942a2d133b1SJosh Hunt
1943a2d133b1SJosh Hunt len = min_t(unsigned int, len, sizeof(meminfo));
19444ff09db1SMartin KaFai Lau if (copy_to_sockptr(optval, &meminfo, len))
1945a2d133b1SJosh Hunt return -EFAULT;
1946a2d133b1SJosh Hunt
1947a2d133b1SJosh Hunt goto lenout;
1948a2d133b1SJosh Hunt }
19496d433902SSridhar Samudrala
19506d433902SSridhar Samudrala #ifdef CONFIG_NET_RX_BUSY_POLL
19516d433902SSridhar Samudrala case SO_INCOMING_NAPI_ID:
19526d433902SSridhar Samudrala v.val = READ_ONCE(sk->sk_napi_id);
19536d433902SSridhar Samudrala
19546d433902SSridhar Samudrala /* aggregate non-NAPI IDs down to 0 */
19556d433902SSridhar Samudrala if (v.val < MIN_NAPI_ID)
19566d433902SSridhar Samudrala v.val = 0;
19576d433902SSridhar Samudrala
19586d433902SSridhar Samudrala break;
19596d433902SSridhar Samudrala #endif
19606d433902SSridhar Samudrala
19615daab9dbSChenbo Feng case SO_COOKIE:
19625daab9dbSChenbo Feng lv = sizeof(u64);
19635daab9dbSChenbo Feng if (len < lv)
19645daab9dbSChenbo Feng return -EINVAL;
19655daab9dbSChenbo Feng v.val64 = sock_gen_cookie(sk);
19665daab9dbSChenbo Feng break;
19675daab9dbSChenbo Feng
196876851d12SWillem de Bruijn case SO_ZEROCOPY:
196976851d12SWillem de Bruijn v.val = sock_flag(sk, SOCK_ZEROCOPY);
197076851d12SWillem de Bruijn break;
197176851d12SWillem de Bruijn
197280b14deeSRichard Cochran case SO_TXTIME:
197380b14deeSRichard Cochran lv = sizeof(v.txtime);
197480b14deeSRichard Cochran v.txtime.clockid = sk->sk_clockid;
197580b14deeSRichard Cochran v.txtime.flags |= sk->sk_txtime_deadline_mode ?
197680b14deeSRichard Cochran SOF_TXTIME_DEADLINE_MODE : 0;
19774b15c707SJesus Sanchez-Palencia v.txtime.flags |= sk->sk_txtime_report_errors ?
19784b15c707SJesus Sanchez-Palencia SOF_TXTIME_REPORT_ERRORS : 0;
197980b14deeSRichard Cochran break;
198080b14deeSRichard Cochran
1981f5dd3d0cSDavid Herrmann case SO_BINDTOIFINDEX:
1982e5fccaa1SEric Dumazet v.val = READ_ONCE(sk->sk_bound_dev_if);
1983f5dd3d0cSDavid Herrmann break;
1984f5dd3d0cSDavid Herrmann
1985e8b9eab9SMartynas Pumputis case SO_NETNS_COOKIE:
1986e8b9eab9SMartynas Pumputis lv = sizeof(u64);
1987e8b9eab9SMartynas Pumputis if (len != lv)
1988e8b9eab9SMartynas Pumputis return -EINVAL;
1989e8b9eab9SMartynas Pumputis v.val64 = sock_net(sk)->net_cookie;
1990e8b9eab9SMartynas Pumputis break;
1991e8b9eab9SMartynas Pumputis
199204190bf8SPavel Tikhomirov case SO_BUF_LOCK:
199304190bf8SPavel Tikhomirov v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
199404190bf8SPavel Tikhomirov break;
199504190bf8SPavel Tikhomirov
19962bb2f5fbSWei Wang case SO_RESERVE_MEM:
1997fe11fdcbSEric Dumazet v.val = READ_ONCE(sk->sk_reserved_mem);
19982bb2f5fbSWei Wang break;
19992bb2f5fbSWei Wang
200026859240SAkhmat Karakotov case SO_TXREHASH:
2001c76a0328SEric Dumazet /* Paired with WRITE_ONCE() in sk_setsockopt() */
2002c76a0328SEric Dumazet v.val = READ_ONCE(sk->sk_txrehash);
200326859240SAkhmat Karakotov break;
200426859240SAkhmat Karakotov
20051da177e4SLinus Torvalds default:
2006443b5991SYOSHIFUJI Hideaki/吉藤英明 /* We implement the SO_SNDLOWAT etc to not be settable
2007443b5991SYOSHIFUJI Hideaki/吉藤英明 * (1003.1g 7).
2008443b5991SYOSHIFUJI Hideaki/吉藤英明 */
2009e71a4783SStephen Hemminger return -ENOPROTOOPT;
20101da177e4SLinus Torvalds }
2011e71a4783SStephen Hemminger
20121da177e4SLinus Torvalds if (len > lv)
20131da177e4SLinus Torvalds len = lv;
20144ff09db1SMartin KaFai Lau if (copy_to_sockptr(optval, &v, len))
20151da177e4SLinus Torvalds return -EFAULT;
20161da177e4SLinus Torvalds lenout:
20174ff09db1SMartin KaFai Lau if (copy_to_sockptr(optlen, &len, sizeof(int)))
20181da177e4SLinus Torvalds return -EFAULT;
20191da177e4SLinus Torvalds return 0;
20201da177e4SLinus Torvalds }
20211da177e4SLinus Torvalds
sock_getsockopt(struct socket * sock,int level,int optname,char __user * optval,int __user * optlen)2022ba74a760SMartin KaFai Lau int sock_getsockopt(struct socket *sock, int level, int optname,
2023ba74a760SMartin KaFai Lau char __user *optval, int __user *optlen)
2024ba74a760SMartin KaFai Lau {
20254ff09db1SMartin KaFai Lau return sk_getsockopt(sock->sk, level, optname,
20264ff09db1SMartin KaFai Lau USER_SOCKPTR(optval),
20274ff09db1SMartin KaFai Lau USER_SOCKPTR(optlen));
2028ba74a760SMartin KaFai Lau }
2029ba74a760SMartin KaFai Lau
2030a5b5bb9aSIngo Molnar /*
2031a5b5bb9aSIngo Molnar * Initialize an sk_lock.
2032a5b5bb9aSIngo Molnar *
2033a5b5bb9aSIngo Molnar * (We also register the sk_lock with the lock validator.)
2034a5b5bb9aSIngo Molnar */
sock_lock_init(struct sock * sk)2035b6f99a21SDave Jones static inline void sock_lock_init(struct sock *sk)
2036a5b5bb9aSIngo Molnar {
2037cdfbabfbSDavid Howells if (sk->sk_kern_sock)
2038cdfbabfbSDavid Howells sock_lock_init_class_and_name(
2039cdfbabfbSDavid Howells sk,
2040cdfbabfbSDavid Howells af_family_kern_slock_key_strings[sk->sk_family],
2041cdfbabfbSDavid Howells af_family_kern_slock_keys + sk->sk_family,
2042cdfbabfbSDavid Howells af_family_kern_key_strings[sk->sk_family],
2043cdfbabfbSDavid Howells af_family_kern_keys + sk->sk_family);
2044cdfbabfbSDavid Howells else
2045cdfbabfbSDavid Howells sock_lock_init_class_and_name(
2046cdfbabfbSDavid Howells sk,
2047ed07536eSPeter Zijlstra af_family_slock_key_strings[sk->sk_family],
2048a5b5bb9aSIngo Molnar af_family_slock_keys + sk->sk_family,
2049a5b5bb9aSIngo Molnar af_family_key_strings[sk->sk_family],
2050ed07536eSPeter Zijlstra af_family_keys + sk->sk_family);
2051a5b5bb9aSIngo Molnar }
2052a5b5bb9aSIngo Molnar
20534dc6dc71SEric Dumazet /*
20544dc6dc71SEric Dumazet * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
20554dc6dc71SEric Dumazet * even temporarly, because of RCU lookups. sk_node should also be left as is.
205668835abaSEric Dumazet * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
20574dc6dc71SEric Dumazet */
sock_copy(struct sock * nsk,const struct sock * osk)2058f1a6c4daSPavel Emelyanov static void sock_copy(struct sock *nsk, const struct sock *osk)
2059f1a6c4daSPavel Emelyanov {
2060b8e202d1SJakub Sitnicki const struct proto *prot = READ_ONCE(osk->sk_prot);
2061f1a6c4daSPavel Emelyanov #ifdef CONFIG_SECURITY_NETWORK
2062f1a6c4daSPavel Emelyanov void *sptr = nsk->sk_security;
2063f1a6c4daSPavel Emelyanov #endif
2064df610cd9SKuniyuki Iwashima
2065df610cd9SKuniyuki Iwashima /* If we move sk_tx_queue_mapping out of the private section,
2066df610cd9SKuniyuki Iwashima * we must check if sk_tx_queue_clear() is called after
2067df610cd9SKuniyuki Iwashima * sock_copy() in sk_clone_lock().
2068df610cd9SKuniyuki Iwashima */
2069df610cd9SKuniyuki Iwashima BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
2070df610cd9SKuniyuki Iwashima offsetof(struct sock, sk_dontcopy_begin) ||
2071df610cd9SKuniyuki Iwashima offsetof(struct sock, sk_tx_queue_mapping) >=
2072df610cd9SKuniyuki Iwashima offsetof(struct sock, sk_dontcopy_end));
2073df610cd9SKuniyuki Iwashima
207468835abaSEric Dumazet memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
207568835abaSEric Dumazet
207668835abaSEric Dumazet memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
2077b8e202d1SJakub Sitnicki prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
207868835abaSEric Dumazet
2079f1a6c4daSPavel Emelyanov #ifdef CONFIG_SECURITY_NETWORK
2080f1a6c4daSPavel Emelyanov nsk->sk_security = sptr;
2081f1a6c4daSPavel Emelyanov security_sk_clone(osk, nsk);
2082f1a6c4daSPavel Emelyanov #endif
2083f1a6c4daSPavel Emelyanov }
2084f1a6c4daSPavel Emelyanov
sk_prot_alloc(struct proto * prot,gfp_t priority,int family)20852e4afe7bSPavel Emelyanov static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
20862e4afe7bSPavel Emelyanov int family)
2087c308c1b2SPavel Emelyanov {
2088c308c1b2SPavel Emelyanov struct sock *sk;
2089c308c1b2SPavel Emelyanov struct kmem_cache *slab;
2090c308c1b2SPavel Emelyanov
2091c308c1b2SPavel Emelyanov slab = prot->slab;
2092e912b114SEric Dumazet if (slab != NULL) {
2093e912b114SEric Dumazet sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
2094e912b114SEric Dumazet if (!sk)
2095e912b114SEric Dumazet return sk;
20966471384aSAlexander Potapenko if (want_init_on_alloc(priority))
2097fcbdf09dSOctavian Purdila sk_prot_clear_nulls(sk, prot->obj_size);
2098fcbdf09dSOctavian Purdila } else
2099c308c1b2SPavel Emelyanov sk = kmalloc(prot->obj_size, priority);
2100c308c1b2SPavel Emelyanov
21012e4afe7bSPavel Emelyanov if (sk != NULL) {
21022e4afe7bSPavel Emelyanov if (security_sk_alloc(sk, family, priority))
21032e4afe7bSPavel Emelyanov goto out_free;
21042e4afe7bSPavel Emelyanov
21052e4afe7bSPavel Emelyanov if (!try_module_get(prot->owner))
21062e4afe7bSPavel Emelyanov goto out_free_sec;
21072e4afe7bSPavel Emelyanov }
21082e4afe7bSPavel Emelyanov
2109c308c1b2SPavel Emelyanov return sk;
21102e4afe7bSPavel Emelyanov
21112e4afe7bSPavel Emelyanov out_free_sec:
21122e4afe7bSPavel Emelyanov security_sk_free(sk);
21132e4afe7bSPavel Emelyanov out_free:
21142e4afe7bSPavel Emelyanov if (slab != NULL)
21152e4afe7bSPavel Emelyanov kmem_cache_free(slab, sk);
21162e4afe7bSPavel Emelyanov else
21172e4afe7bSPavel Emelyanov kfree(sk);
21182e4afe7bSPavel Emelyanov return NULL;
2119c308c1b2SPavel Emelyanov }
2120c308c1b2SPavel Emelyanov
sk_prot_free(struct proto * prot,struct sock * sk)2121c308c1b2SPavel Emelyanov static void sk_prot_free(struct proto *prot, struct sock *sk)
2122c308c1b2SPavel Emelyanov {
2123c308c1b2SPavel Emelyanov struct kmem_cache *slab;
21242e4afe7bSPavel Emelyanov struct module *owner;
2125c308c1b2SPavel Emelyanov
21262e4afe7bSPavel Emelyanov owner = prot->owner;
2127c308c1b2SPavel Emelyanov slab = prot->slab;
21282e4afe7bSPavel Emelyanov
2129bd1060a1STejun Heo cgroup_sk_free(&sk->sk_cgrp_data);
21302d758073SJohannes Weiner mem_cgroup_sk_free(sk);
21312e4afe7bSPavel Emelyanov security_sk_free(sk);
2132c308c1b2SPavel Emelyanov if (slab != NULL)
2133c308c1b2SPavel Emelyanov kmem_cache_free(slab, sk);
2134c308c1b2SPavel Emelyanov else
2135c308c1b2SPavel Emelyanov kfree(sk);
21362e4afe7bSPavel Emelyanov module_put(owner);
2137c308c1b2SPavel Emelyanov }
2138c308c1b2SPavel Emelyanov
21391da177e4SLinus Torvalds /**
21401da177e4SLinus Torvalds * sk_alloc - All socket objects are allocated here
2141c4ea43c5SRandy Dunlap * @net: the applicable net namespace
21424dc3b16bSPavel Pisa * @family: protocol family
21434dc3b16bSPavel Pisa * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
21444dc3b16bSPavel Pisa * @prot: struct proto associated with this new sock instance
214511aa9c28SEric W. Biederman * @kern: is this to be a kernel socket?
21461da177e4SLinus Torvalds */
sk_alloc(struct net * net,int family,gfp_t priority,struct proto * prot,int kern)21471b8d7ae4SEric W. Biederman struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
214811aa9c28SEric W. Biederman struct proto *prot, int kern)
21491da177e4SLinus Torvalds {
2150c308c1b2SPavel Emelyanov struct sock *sk;
21511da177e4SLinus Torvalds
2152154adbc8SPavel Emelyanov sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
21531da177e4SLinus Torvalds if (sk) {
21541da177e4SLinus Torvalds sk->sk_family = family;
2155476e19cfSArnaldo Carvalho de Melo /*
2156476e19cfSArnaldo Carvalho de Melo * See comment in struct sock definition to understand
2157476e19cfSArnaldo Carvalho de Melo * why we need sk_prot_creator -acme
2158476e19cfSArnaldo Carvalho de Melo */
2159476e19cfSArnaldo Carvalho de Melo sk->sk_prot = sk->sk_prot_creator = prot;
2160cdfbabfbSDavid Howells sk->sk_kern_sock = kern;
21611da177e4SLinus Torvalds sock_lock_init(sk);
216226abe143SEric W. Biederman sk->sk_net_refcnt = kern ? 0 : 1;
2163648845abSTonghao Zhang if (likely(sk->sk_net_refcnt)) {
2164ffa84b5fSEric Dumazet get_net_track(net, &sk->ns_tracker, priority);
2165648845abSTonghao Zhang sock_inuse_add(net, 1);
21660cafd77dSEric Dumazet } else {
21670cafd77dSEric Dumazet __netns_tracker_alloc(net, &sk->ns_tracker,
21680cafd77dSEric Dumazet false, priority);
2169648845abSTonghao Zhang }
2170648845abSTonghao Zhang
217126abe143SEric W. Biederman sock_net_set(sk, net);
217214afee4bSReshetova, Elena refcount_set(&sk->sk_wmem_alloc, 1);
2173f8451725SHerbert Xu
21742d758073SJohannes Weiner mem_cgroup_sk_alloc(sk);
2175d979a39dSJohannes Weiner cgroup_sk_alloc(&sk->sk_cgrp_data);
21762a56a1feSTejun Heo sock_update_classid(&sk->sk_cgrp_data);
21772a56a1feSTejun Heo sock_update_netprioidx(&sk->sk_cgrp_data);
217841b14fb8STariq Toukan sk_tx_queue_clear(sk);
21791da177e4SLinus Torvalds }
2180a79af59eSFrank Filz
21812e4afe7bSPavel Emelyanov return sk;
21821da177e4SLinus Torvalds }
21832a91525cSEric Dumazet EXPORT_SYMBOL(sk_alloc);
21841da177e4SLinus Torvalds
2185a4298e45SEric Dumazet /* Sockets having SOCK_RCU_FREE will call this function after one RCU
2186a4298e45SEric Dumazet * grace period. This is the case for UDP sockets and TCP listeners.
2187a4298e45SEric Dumazet */
__sk_destruct(struct rcu_head * head)2188a4298e45SEric Dumazet static void __sk_destruct(struct rcu_head *head)
21891da177e4SLinus Torvalds {
2190a4298e45SEric Dumazet struct sock *sk = container_of(head, struct sock, sk_rcu);
21911da177e4SLinus Torvalds struct sk_filter *filter;
21921da177e4SLinus Torvalds
21931da177e4SLinus Torvalds if (sk->sk_destruct)
21941da177e4SLinus Torvalds sk->sk_destruct(sk);
21951da177e4SLinus Torvalds
2196a898def2SPaul E. McKenney filter = rcu_dereference_check(sk->sk_filter,
219714afee4bSReshetova, Elena refcount_read(&sk->sk_wmem_alloc) == 0);
21981da177e4SLinus Torvalds if (filter) {
2199309dd5fcSPavel Emelyanov sk_filter_uncharge(sk, filter);
2200a9b3cd7fSStephen Hemminger RCU_INIT_POINTER(sk->sk_filter, NULL);
22011da177e4SLinus Torvalds }
22021da177e4SLinus Torvalds
220308e29af3SEric Dumazet sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
22041da177e4SLinus Torvalds
22056ac99e8fSMartin KaFai Lau #ifdef CONFIG_BPF_SYSCALL
22066ac99e8fSMartin KaFai Lau bpf_sk_storage_free(sk);
22076ac99e8fSMartin KaFai Lau #endif
22086ac99e8fSMartin KaFai Lau
22091da177e4SLinus Torvalds if (atomic_read(&sk->sk_omem_alloc))
2210e005d193SJoe Perches pr_debug("%s: optmem leakage (%d bytes) detected\n",
22110dc47877SHarvey Harrison __func__, atomic_read(&sk->sk_omem_alloc));
22121da177e4SLinus Torvalds
221322a0e18eSEric Dumazet if (sk->sk_frag.page) {
221422a0e18eSEric Dumazet put_page(sk->sk_frag.page);
221522a0e18eSEric Dumazet sk->sk_frag.page = NULL;
221622a0e18eSEric Dumazet }
221722a0e18eSEric Dumazet
221835306eb2SEric Dumazet /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
2219109f6e39SEric W. Biederman put_cred(sk->sk_peer_cred);
2220109f6e39SEric W. Biederman put_pid(sk->sk_peer_pid);
222135306eb2SEric Dumazet
222226abe143SEric W. Biederman if (likely(sk->sk_net_refcnt))
2223ffa84b5fSEric Dumazet put_net_track(sock_net(sk), &sk->ns_tracker);
22240cafd77dSEric Dumazet else
22250cafd77dSEric Dumazet __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
22260cafd77dSEric Dumazet
2227c308c1b2SPavel Emelyanov sk_prot_free(sk->sk_prot_creator, sk);
22281da177e4SLinus Torvalds }
22292b85a34eSEric Dumazet
sk_destruct(struct sock * sk)2230a4298e45SEric Dumazet void sk_destruct(struct sock *sk)
2231a4298e45SEric Dumazet {
22328c7138b3SMartin KaFai Lau bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
22338c7138b3SMartin KaFai Lau
22348c7138b3SMartin KaFai Lau if (rcu_access_pointer(sk->sk_reuseport_cb)) {
22358c7138b3SMartin KaFai Lau reuseport_detach_sock(sk);
22368c7138b3SMartin KaFai Lau use_call_rcu = true;
22378c7138b3SMartin KaFai Lau }
22388c7138b3SMartin KaFai Lau
22398c7138b3SMartin KaFai Lau if (use_call_rcu)
2240a4298e45SEric Dumazet call_rcu(&sk->sk_rcu, __sk_destruct);
2241a4298e45SEric Dumazet else
2242a4298e45SEric Dumazet __sk_destruct(&sk->sk_rcu);
2243a4298e45SEric Dumazet }
2244a4298e45SEric Dumazet
__sk_free(struct sock * sk)2245eb4cb008SCraig Gallek static void __sk_free(struct sock *sk)
2246eb4cb008SCraig Gallek {
2247648845abSTonghao Zhang if (likely(sk->sk_net_refcnt))
2248648845abSTonghao Zhang sock_inuse_add(sock_net(sk), -1);
2249648845abSTonghao Zhang
22509709020cSEric Dumazet if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
2251eb4cb008SCraig Gallek sock_diag_broadcast_destroy(sk);
2252eb4cb008SCraig Gallek else
2253eb4cb008SCraig Gallek sk_destruct(sk);
2254eb4cb008SCraig Gallek }
2255eb4cb008SCraig Gallek
sk_free(struct sock * sk)22562b85a34eSEric Dumazet void sk_free(struct sock *sk)
22572b85a34eSEric Dumazet {
22582b85a34eSEric Dumazet /*
225925985edcSLucas De Marchi * We subtract one from sk_wmem_alloc and can know if
22602b85a34eSEric Dumazet * some packets are still in some tx queue.
22612b85a34eSEric Dumazet * If not null, sock_wfree() will call __sk_free(sk) later
22622b85a34eSEric Dumazet */
226314afee4bSReshetova, Elena if (refcount_dec_and_test(&sk->sk_wmem_alloc))
22642b85a34eSEric Dumazet __sk_free(sk);
22652b85a34eSEric Dumazet }
22662a91525cSEric Dumazet EXPORT_SYMBOL(sk_free);
22671da177e4SLinus Torvalds
sk_init_common(struct sock * sk)2268581319c5SPaolo Abeni static void sk_init_common(struct sock *sk)
2269581319c5SPaolo Abeni {
2270581319c5SPaolo Abeni skb_queue_head_init(&sk->sk_receive_queue);
2271581319c5SPaolo Abeni skb_queue_head_init(&sk->sk_write_queue);
2272581319c5SPaolo Abeni skb_queue_head_init(&sk->sk_error_queue);
2273581319c5SPaolo Abeni
2274581319c5SPaolo Abeni rwlock_init(&sk->sk_callback_lock);
2275581319c5SPaolo Abeni lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
2276581319c5SPaolo Abeni af_rlock_keys + sk->sk_family,
2277581319c5SPaolo Abeni af_family_rlock_key_strings[sk->sk_family]);
2278581319c5SPaolo Abeni lockdep_set_class_and_name(&sk->sk_write_queue.lock,
2279581319c5SPaolo Abeni af_wlock_keys + sk->sk_family,
2280581319c5SPaolo Abeni af_family_wlock_key_strings[sk->sk_family]);
2281581319c5SPaolo Abeni lockdep_set_class_and_name(&sk->sk_error_queue.lock,
2282581319c5SPaolo Abeni af_elock_keys + sk->sk_family,
2283581319c5SPaolo Abeni af_family_elock_key_strings[sk->sk_family]);
2284581319c5SPaolo Abeni lockdep_set_class_and_name(&sk->sk_callback_lock,
2285581319c5SPaolo Abeni af_callback_keys + sk->sk_family,
2286581319c5SPaolo Abeni af_family_clock_key_strings[sk->sk_family]);
2287581319c5SPaolo Abeni }
2288581319c5SPaolo Abeni
2289e56c57d0SEric Dumazet /**
2290e56c57d0SEric Dumazet * sk_clone_lock - clone a socket, and lock its clone
2291e56c57d0SEric Dumazet * @sk: the socket to clone
2292e56c57d0SEric Dumazet * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2293e56c57d0SEric Dumazet *
2294e56c57d0SEric Dumazet * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
2295e56c57d0SEric Dumazet */
sk_clone_lock(const struct sock * sk,const gfp_t priority)2296e56c57d0SEric Dumazet struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
229787d11cebSArnaldo Carvalho de Melo {
2298b8e202d1SJakub Sitnicki struct proto *prot = READ_ONCE(sk->sk_prot);
2299bbc20b70SEric Dumazet struct sk_filter *filter;
2300278571baSAlexei Starovoitov bool is_charged = true;
2301bbc20b70SEric Dumazet struct sock *newsk;
230287d11cebSArnaldo Carvalho de Melo
2303b8e202d1SJakub Sitnicki newsk = sk_prot_alloc(prot, priority, sk->sk_family);
2304bbc20b70SEric Dumazet if (!newsk)
2305bbc20b70SEric Dumazet goto out;
230687d11cebSArnaldo Carvalho de Melo
2307892c141eSVenkat Yekkirala sock_copy(newsk, sk);
230887d11cebSArnaldo Carvalho de Melo
2309b8e202d1SJakub Sitnicki newsk->sk_prot_creator = prot;
23109d538fa6SChristoph Paasch
231187d11cebSArnaldo Carvalho de Melo /* SANITY */
2312938cca9eSTetsuo Handa if (likely(newsk->sk_net_refcnt)) {
2313ffa84b5fSEric Dumazet get_net_track(sock_net(newsk), &newsk->ns_tracker, priority);
2314938cca9eSTetsuo Handa sock_inuse_add(sock_net(newsk), 1);
23150cafd77dSEric Dumazet } else {
23160cafd77dSEric Dumazet /* Kernel sockets are not elevating the struct net refcount.
23170cafd77dSEric Dumazet * Instead, use a tracker to more easily detect if a layer
23180cafd77dSEric Dumazet * is not properly dismantling its kernel sockets at netns
23190cafd77dSEric Dumazet * destroy time.
23200cafd77dSEric Dumazet */
23210cafd77dSEric Dumazet __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker,
23220cafd77dSEric Dumazet false, priority);
2323938cca9eSTetsuo Handa }
232487d11cebSArnaldo Carvalho de Melo sk_node_init(&newsk->sk_node);
232587d11cebSArnaldo Carvalho de Melo sock_lock_init(newsk);
232687d11cebSArnaldo Carvalho de Melo bh_lock_sock(newsk);
2327fa438ccfSEric Dumazet newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
23288eae939fSZhu Yi newsk->sk_backlog.len = 0;
232987d11cebSArnaldo Carvalho de Melo
233087d11cebSArnaldo Carvalho de Melo atomic_set(&newsk->sk_rmem_alloc, 0);
2331bbc20b70SEric Dumazet
2332bbc20b70SEric Dumazet /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
233314afee4bSReshetova, Elena refcount_set(&newsk->sk_wmem_alloc, 1);
2334bbc20b70SEric Dumazet
233587d11cebSArnaldo Carvalho de Melo atomic_set(&newsk->sk_omem_alloc, 0);
2336581319c5SPaolo Abeni sk_init_common(newsk);
233787d11cebSArnaldo Carvalho de Melo
233887d11cebSArnaldo Carvalho de Melo newsk->sk_dst_cache = NULL;
23399b8805a3SJulian Anastasov newsk->sk_dst_pending_confirm = 0;
234087d11cebSArnaldo Carvalho de Melo newsk->sk_wmem_queued = 0;
234187d11cebSArnaldo Carvalho de Melo newsk->sk_forward_alloc = 0;
23422bb2f5fbSWei Wang newsk->sk_reserved_mem = 0;
23439caad864SEric Dumazet atomic_set(&newsk->sk_drops, 0);
234487d11cebSArnaldo Carvalho de Melo newsk->sk_send_head = NULL;
234587d11cebSArnaldo Carvalho de Melo newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
234652267790SWillem de Bruijn atomic_set(&newsk->sk_zckey, 0);
234787d11cebSArnaldo Carvalho de Melo
234887d11cebSArnaldo Carvalho de Melo sock_reset_flag(newsk, SOCK_DONE);
2349d752a498SShakeel Butt
2350d752a498SShakeel Butt /* sk->sk_memcg will be populated at accept() time */
2351d752a498SShakeel Butt newsk->sk_memcg = NULL;
2352d752a498SShakeel Butt
2353ad0f75e5SCong Wang cgroup_sk_clone(&newsk->sk_cgrp_data);
235487d11cebSArnaldo Carvalho de Melo
2355eefca20eSEric Dumazet rcu_read_lock();
2356eefca20eSEric Dumazet filter = rcu_dereference(sk->sk_filter);
235787d11cebSArnaldo Carvalho de Melo if (filter != NULL)
2358278571baSAlexei Starovoitov /* though it's an empty new sock, the charging may fail
2359278571baSAlexei Starovoitov * if sysctl_optmem_max was changed between creation of
2360278571baSAlexei Starovoitov * original socket and cloning
2361278571baSAlexei Starovoitov */
2362278571baSAlexei Starovoitov is_charged = sk_filter_charge(newsk, filter);
2363eefca20eSEric Dumazet RCU_INIT_POINTER(newsk->sk_filter, filter);
2364eefca20eSEric Dumazet rcu_read_unlock();
236587d11cebSArnaldo Carvalho de Melo
2366d188ba86SEric Dumazet if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
2367a97e50ccSDaniel Borkmann /* We need to make sure that we don't uncharge the new
2368a97e50ccSDaniel Borkmann * socket if we couldn't charge it in the first place
2369a97e50ccSDaniel Borkmann * as otherwise we uncharge the parent's filter.
2370a97e50ccSDaniel Borkmann */
2371a97e50ccSDaniel Borkmann if (!is_charged)
2372a97e50ccSDaniel Borkmann RCU_INIT_POINTER(newsk->sk_filter, NULL);
237394352d45SArnaldo Carvalho de Melo sk_free_unlock_clone(newsk);
237487d11cebSArnaldo Carvalho de Melo newsk = NULL;
237587d11cebSArnaldo Carvalho de Melo goto out;
237687d11cebSArnaldo Carvalho de Melo }
2377fa463497SCraig Gallek RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
23788f51dfc7SStanislav Fomichev
23798f51dfc7SStanislav Fomichev if (bpf_sk_storage_clone(sk, newsk)) {
23808f51dfc7SStanislav Fomichev sk_free_unlock_clone(newsk);
23818f51dfc7SStanislav Fomichev newsk = NULL;
23828f51dfc7SStanislav Fomichev goto out;
23838f51dfc7SStanislav Fomichev }
238487d11cebSArnaldo Carvalho de Melo
2385f1ff5ce2SJakub Sitnicki /* Clear sk_user_data if parent had the pointer tagged
2386f1ff5ce2SJakub Sitnicki * as not suitable for copying when cloning.
2387f1ff5ce2SJakub Sitnicki */
2388f1ff5ce2SJakub Sitnicki if (sk_user_data_is_nocopy(newsk))
23897a1ca972SJakub Sitnicki newsk->sk_user_data = NULL;
2390f1ff5ce2SJakub Sitnicki
239187d11cebSArnaldo Carvalho de Melo newsk->sk_err = 0;
2392e551c32dSEric Dumazet newsk->sk_err_soft = 0;
239387d11cebSArnaldo Carvalho de Melo newsk->sk_priority = 0;
23942c8c56e1SEric Dumazet newsk->sk_incoming_cpu = raw_smp_processor_id();
2395d979a39dSJohannes Weiner
2396bbc20b70SEric Dumazet /* Before updating sk_refcnt, we must commit prior changes to memory
23972cdb54c9SMauro Carvalho Chehab * (Documentation/RCU/rculist_nulls.rst for details)
23984dc6dc71SEric Dumazet */
23994dc6dc71SEric Dumazet smp_wmb();
240041c6d650SReshetova, Elena refcount_set(&newsk->sk_refcnt, 2);
240187d11cebSArnaldo Carvalho de Melo
2402972692e0SDavid S. Miller sk_set_socket(newsk, NULL);
240341b14fb8STariq Toukan sk_tx_queue_clear(newsk);
2404c2f26e8fSLi RongQing RCU_INIT_POINTER(newsk->sk_wq, NULL);
240587d11cebSArnaldo Carvalho de Melo
240687d11cebSArnaldo Carvalho de Melo if (newsk->sk_prot->sockets_allocated)
2407180d8cd9SGlauber Costa sk_sockets_allocated_inc(newsk);
2408704da560SOctavian Purdila
2409bbc20b70SEric Dumazet if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2410704da560SOctavian Purdila net_enable_timestamp();
241187d11cebSArnaldo Carvalho de Melo out:
241287d11cebSArnaldo Carvalho de Melo return newsk;
241387d11cebSArnaldo Carvalho de Melo }
2414e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(sk_clone_lock);
241587d11cebSArnaldo Carvalho de Melo
sk_free_unlock_clone(struct sock * sk)241694352d45SArnaldo Carvalho de Melo void sk_free_unlock_clone(struct sock *sk)
241794352d45SArnaldo Carvalho de Melo {
241894352d45SArnaldo Carvalho de Melo /* It is still raw copy of parent, so invalidate
241994352d45SArnaldo Carvalho de Melo * destructor and make plain sk_free() */
242094352d45SArnaldo Carvalho de Melo sk->sk_destruct = NULL;
242194352d45SArnaldo Carvalho de Melo bh_unlock_sock(sk);
242294352d45SArnaldo Carvalho de Melo sk_free(sk);
242394352d45SArnaldo Carvalho de Melo }
242494352d45SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
242594352d45SArnaldo Carvalho de Melo
sk_dst_gso_max_size(struct sock * sk,struct dst_entry * dst)2426b1a78b9bSXin Long static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
24277c4e983cSAlexander Duyck {
2428b1a78b9bSXin Long bool is_ipv6 = false;
2429b1a78b9bSXin Long u32 max_size;
2430b1a78b9bSXin Long
24317c4e983cSAlexander Duyck #if IS_ENABLED(CONFIG_IPV6)
2432b1a78b9bSXin Long is_ipv6 = (sk->sk_family == AF_INET6 &&
2433b1a78b9bSXin Long !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
24347c4e983cSAlexander Duyck #endif
2435b1a78b9bSXin Long /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
2436b1a78b9bSXin Long max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
2437b1a78b9bSXin Long READ_ONCE(dst->dev->gso_ipv4_max_size);
2438b1a78b9bSXin Long if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
2439b1a78b9bSXin Long max_size = GSO_LEGACY_MAX_SIZE;
2440b1a78b9bSXin Long
2441b1a78b9bSXin Long return max_size - (MAX_TCP_HEADER + 1);
24427c4e983cSAlexander Duyck }
24437c4e983cSAlexander Duyck
sk_setup_caps(struct sock * sk,struct dst_entry * dst)24449958089aSAndi Kleen void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
24459958089aSAndi Kleen {
2446d6a4e26aSEric Dumazet u32 max_segs = 1;
2447d6a4e26aSEric Dumazet
2448d0d598caSEric Dumazet sk->sk_route_caps = dst->dev->features;
2449d0d598caSEric Dumazet if (sk_is_tcp(sk))
2450d0d598caSEric Dumazet sk->sk_route_caps |= NETIF_F_GSO;
24519958089aSAndi Kleen if (sk->sk_route_caps & NETIF_F_GSO)
24524fcd6b99SHerbert Xu sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
2453aba54656SEric Dumazet if (unlikely(sk->sk_gso_disabled))
2454aba54656SEric Dumazet sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
24559958089aSAndi Kleen if (sk_can_gso(sk)) {
2456f70f250aSSteffen Klassert if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
24579958089aSAndi Kleen sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
245882cc1a7aSPeter P Waskiewicz Jr } else {
24599958089aSAndi Kleen sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
2460b1a78b9bSXin Long sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
24616d872df3SEric Dumazet /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
24626d872df3SEric Dumazet max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
246382cc1a7aSPeter P Waskiewicz Jr }
24649958089aSAndi Kleen }
2465d6a4e26aSEric Dumazet sk->sk_gso_max_segs = max_segs;
2466448a5ce1SVladislav Efanov sk_dst_set(sk, dst);
24679958089aSAndi Kleen }
24689958089aSAndi Kleen EXPORT_SYMBOL_GPL(sk_setup_caps);
24699958089aSAndi Kleen
24701da177e4SLinus Torvalds /*
24711da177e4SLinus Torvalds * Simple resource managers for sockets.
24721da177e4SLinus Torvalds */
24731da177e4SLinus Torvalds
24741da177e4SLinus Torvalds
24751da177e4SLinus Torvalds /*
24761da177e4SLinus Torvalds * Write buffer destructor automatically called from kfree_skb.
24771da177e4SLinus Torvalds */
sock_wfree(struct sk_buff * skb)24781da177e4SLinus Torvalds void sock_wfree(struct sk_buff *skb)
24791da177e4SLinus Torvalds {
24801da177e4SLinus Torvalds struct sock *sk = skb->sk;
2481d99927f4SEric Dumazet unsigned int len = skb->truesize;
2482052ada09SPavel Begunkov bool free;
24831da177e4SLinus Torvalds
2484d99927f4SEric Dumazet if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
2485052ada09SPavel Begunkov if (sock_flag(sk, SOCK_RCU_FREE) &&
2486052ada09SPavel Begunkov sk->sk_write_space == sock_def_write_space) {
2487052ada09SPavel Begunkov rcu_read_lock();
2488052ada09SPavel Begunkov free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
24890a8afd9fSPavel Begunkov sock_def_write_space_wfree(sk);
2490052ada09SPavel Begunkov rcu_read_unlock();
2491052ada09SPavel Begunkov if (unlikely(free))
2492052ada09SPavel Begunkov __sk_free(sk);
2493052ada09SPavel Begunkov return;
2494052ada09SPavel Begunkov }
2495052ada09SPavel Begunkov
24962b85a34eSEric Dumazet /*
2497d99927f4SEric Dumazet * Keep a reference on sk_wmem_alloc, this will be released
2498d99927f4SEric Dumazet * after sk_write_space() call
24992b85a34eSEric Dumazet */
250014afee4bSReshetova, Elena WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
2501d99927f4SEric Dumazet sk->sk_write_space(sk);
2502d99927f4SEric Dumazet len = 1;
2503d99927f4SEric Dumazet }
2504d99927f4SEric Dumazet /*
2505d99927f4SEric Dumazet * if sk_wmem_alloc reaches 0, we must finish what sk_free()
2506d99927f4SEric Dumazet * could not do because of in-flight packets
2507d99927f4SEric Dumazet */
250814afee4bSReshetova, Elena if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
25092b85a34eSEric Dumazet __sk_free(sk);
25101da177e4SLinus Torvalds }
25112a91525cSEric Dumazet EXPORT_SYMBOL(sock_wfree);
25121da177e4SLinus Torvalds
25131d2077acSEric Dumazet /* This variant of sock_wfree() is used by TCP,
25141d2077acSEric Dumazet * since it sets SOCK_USE_WRITE_QUEUE.
25151d2077acSEric Dumazet */
__sock_wfree(struct sk_buff * skb)25161d2077acSEric Dumazet void __sock_wfree(struct sk_buff *skb)
25171d2077acSEric Dumazet {
25181d2077acSEric Dumazet struct sock *sk = skb->sk;
25191d2077acSEric Dumazet
252014afee4bSReshetova, Elena if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
25211d2077acSEric Dumazet __sk_free(sk);
25221d2077acSEric Dumazet }
25231d2077acSEric Dumazet
skb_set_owner_w(struct sk_buff * skb,struct sock * sk)25249e17f8a4SEric Dumazet void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
25259e17f8a4SEric Dumazet {
25269e17f8a4SEric Dumazet skb_orphan(skb);
25279e17f8a4SEric Dumazet skb->sk = sk;
25289e17f8a4SEric Dumazet #ifdef CONFIG_INET
25299e17f8a4SEric Dumazet if (unlikely(!sk_fullsock(sk))) {
25309e17f8a4SEric Dumazet skb->destructor = sock_edemux;
25319e17f8a4SEric Dumazet sock_hold(sk);
25329e17f8a4SEric Dumazet return;
25339e17f8a4SEric Dumazet }
25349e17f8a4SEric Dumazet #endif
25359e17f8a4SEric Dumazet skb->destructor = sock_wfree;
25369e17f8a4SEric Dumazet skb_set_hash_from_sk(skb, sk);
25379e17f8a4SEric Dumazet /*
25389e17f8a4SEric Dumazet * We used to take a refcount on sk, but following operation
25399e17f8a4SEric Dumazet * is enough to guarantee sk_free() wont free this sock until
25409e17f8a4SEric Dumazet * all in-flight packets are completed
25419e17f8a4SEric Dumazet */
254214afee4bSReshetova, Elena refcount_add(skb->truesize, &sk->sk_wmem_alloc);
25439e17f8a4SEric Dumazet }
25449e17f8a4SEric Dumazet EXPORT_SYMBOL(skb_set_owner_w);
25459e17f8a4SEric Dumazet
can_skb_orphan_partial(const struct sk_buff * skb)254641477662SJakub Kicinski static bool can_skb_orphan_partial(const struct sk_buff *skb)
254741477662SJakub Kicinski {
254841477662SJakub Kicinski #ifdef CONFIG_TLS_DEVICE
254941477662SJakub Kicinski /* Drivers depend on in-order delivery for crypto offload,
255041477662SJakub Kicinski * partial orphan breaks out-of-order-OK logic.
255141477662SJakub Kicinski */
255241477662SJakub Kicinski if (skb->decrypted)
255341477662SJakub Kicinski return false;
255441477662SJakub Kicinski #endif
255541477662SJakub Kicinski return (skb->destructor == sock_wfree ||
255641477662SJakub Kicinski (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
255741477662SJakub Kicinski }
255841477662SJakub Kicinski
25591d2077acSEric Dumazet /* This helper is used by netem, as it can hold packets in its
25601d2077acSEric Dumazet * delay queue. We want to allow the owner socket to send more
25611d2077acSEric Dumazet * packets, as if they were already TX completed by a typical driver.
25621d2077acSEric Dumazet * But we also want to keep skb->sk set because some packet schedulers
2563f6ba8d33SEric Dumazet * rely on it (sch_fq for example).
25641d2077acSEric Dumazet */
skb_orphan_partial(struct sk_buff * skb)2565f2f872f9SEric Dumazet void skb_orphan_partial(struct sk_buff *skb)
2566f2f872f9SEric Dumazet {
2567f6ba8d33SEric Dumazet if (skb_is_tcp_pure_ack(skb))
25681d2077acSEric Dumazet return;
25691d2077acSEric Dumazet
2570098116e7SPaolo Abeni if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2571098116e7SPaolo Abeni return;
2572098116e7SPaolo Abeni
2573f2f872f9SEric Dumazet skb_orphan(skb);
2574f2f872f9SEric Dumazet }
2575f2f872f9SEric Dumazet EXPORT_SYMBOL(skb_orphan_partial);
2576f2f872f9SEric Dumazet
25771da177e4SLinus Torvalds /*
25781da177e4SLinus Torvalds * Read buffer destructor automatically called from kfree_skb.
25791da177e4SLinus Torvalds */
sock_rfree(struct sk_buff * skb)25801da177e4SLinus Torvalds void sock_rfree(struct sk_buff *skb)
25811da177e4SLinus Torvalds {
25821da177e4SLinus Torvalds struct sock *sk = skb->sk;
2583d361fd59SEric Dumazet unsigned int len = skb->truesize;
25841da177e4SLinus Torvalds
2585d361fd59SEric Dumazet atomic_sub(len, &sk->sk_rmem_alloc);
2586d361fd59SEric Dumazet sk_mem_uncharge(sk, len);
25871da177e4SLinus Torvalds }
25882a91525cSEric Dumazet EXPORT_SYMBOL(sock_rfree);
25891da177e4SLinus Torvalds
25907768eed8SOliver Hartkopp /*
25917768eed8SOliver Hartkopp * Buffer destructor for skbs that are not used directly in read or write
25927768eed8SOliver Hartkopp * path, e.g. for error handler skbs. Automatically called from kfree_skb.
25937768eed8SOliver Hartkopp */
sock_efree(struct sk_buff * skb)259462bccb8cSAlexander Duyck void sock_efree(struct sk_buff *skb)
259562bccb8cSAlexander Duyck {
259662bccb8cSAlexander Duyck sock_put(skb->sk);
259762bccb8cSAlexander Duyck }
259862bccb8cSAlexander Duyck EXPORT_SYMBOL(sock_efree);
259962bccb8cSAlexander Duyck
2600cf7fbe66SJoe Stringer /* Buffer destructor for prefetch/receive path where reference count may
2601cf7fbe66SJoe Stringer * not be held, e.g. for listen sockets.
2602cf7fbe66SJoe Stringer */
2603cf7fbe66SJoe Stringer #ifdef CONFIG_INET
sock_pfree(struct sk_buff * skb)2604cf7fbe66SJoe Stringer void sock_pfree(struct sk_buff *skb)
2605cf7fbe66SJoe Stringer {
26067ae215d2SJoe Stringer if (sk_is_refcounted(skb->sk))
2607cf7fbe66SJoe Stringer sock_gen_put(skb->sk);
2608cf7fbe66SJoe Stringer }
2609cf7fbe66SJoe Stringer EXPORT_SYMBOL(sock_pfree);
2610cf7fbe66SJoe Stringer #endif /* CONFIG_INET */
2611cf7fbe66SJoe Stringer
sock_i_uid(struct sock * sk)2612976d0201SEric W. Biederman kuid_t sock_i_uid(struct sock *sk)
26131da177e4SLinus Torvalds {
2614976d0201SEric W. Biederman kuid_t uid;
26151da177e4SLinus Torvalds
2616f064af1eSEric Dumazet read_lock_bh(&sk->sk_callback_lock);
2617976d0201SEric W. Biederman uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2618f064af1eSEric Dumazet read_unlock_bh(&sk->sk_callback_lock);
26191da177e4SLinus Torvalds return uid;
26201da177e4SLinus Torvalds }
26212a91525cSEric Dumazet EXPORT_SYMBOL(sock_i_uid);
26221da177e4SLinus Torvalds
__sock_i_ino(struct sock * sk)262325a9c8a4SKuniyuki Iwashima unsigned long __sock_i_ino(struct sock *sk)
262425a9c8a4SKuniyuki Iwashima {
262525a9c8a4SKuniyuki Iwashima unsigned long ino;
262625a9c8a4SKuniyuki Iwashima
262725a9c8a4SKuniyuki Iwashima read_lock(&sk->sk_callback_lock);
262825a9c8a4SKuniyuki Iwashima ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
262925a9c8a4SKuniyuki Iwashima read_unlock(&sk->sk_callback_lock);
263025a9c8a4SKuniyuki Iwashima return ino;
263125a9c8a4SKuniyuki Iwashima }
263225a9c8a4SKuniyuki Iwashima EXPORT_SYMBOL(__sock_i_ino);
263325a9c8a4SKuniyuki Iwashima
sock_i_ino(struct sock * sk)26341da177e4SLinus Torvalds unsigned long sock_i_ino(struct sock *sk)
26351da177e4SLinus Torvalds {
26361da177e4SLinus Torvalds unsigned long ino;
26371da177e4SLinus Torvalds
263825a9c8a4SKuniyuki Iwashima local_bh_disable();
263925a9c8a4SKuniyuki Iwashima ino = __sock_i_ino(sk);
264025a9c8a4SKuniyuki Iwashima local_bh_enable();
26411da177e4SLinus Torvalds return ino;
26421da177e4SLinus Torvalds }
26432a91525cSEric Dumazet EXPORT_SYMBOL(sock_i_ino);
26441da177e4SLinus Torvalds
26451da177e4SLinus Torvalds /*
26461da177e4SLinus Torvalds * Allocate a skb from the socket's send buffer.
26471da177e4SLinus Torvalds */
sock_wmalloc(struct sock * sk,unsigned long size,int force,gfp_t priority)264886a76cafSVictor Fusco struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
2649dd0fc66fSAl Viro gfp_t priority)
26501da177e4SLinus Torvalds {
2651e292f05eSEric Dumazet if (force ||
2652e292f05eSEric Dumazet refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
26531da177e4SLinus Torvalds struct sk_buff *skb = alloc_skb(size, priority);
2654e292f05eSEric Dumazet
26551da177e4SLinus Torvalds if (skb) {
26561da177e4SLinus Torvalds skb_set_owner_w(skb, sk);
26571da177e4SLinus Torvalds return skb;
26581da177e4SLinus Torvalds }
26591da177e4SLinus Torvalds }
26601da177e4SLinus Torvalds return NULL;
26611da177e4SLinus Torvalds }
26622a91525cSEric Dumazet EXPORT_SYMBOL(sock_wmalloc);
26631da177e4SLinus Torvalds
sock_ofree(struct sk_buff * skb)266498ba0bd5SWillem de Bruijn static void sock_ofree(struct sk_buff *skb)
266598ba0bd5SWillem de Bruijn {
266698ba0bd5SWillem de Bruijn struct sock *sk = skb->sk;
266798ba0bd5SWillem de Bruijn
266898ba0bd5SWillem de Bruijn atomic_sub(skb->truesize, &sk->sk_omem_alloc);
266998ba0bd5SWillem de Bruijn }
267098ba0bd5SWillem de Bruijn
sock_omalloc(struct sock * sk,unsigned long size,gfp_t priority)267198ba0bd5SWillem de Bruijn struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
267298ba0bd5SWillem de Bruijn gfp_t priority)
267398ba0bd5SWillem de Bruijn {
267498ba0bd5SWillem de Bruijn struct sk_buff *skb;
267598ba0bd5SWillem de Bruijn
267698ba0bd5SWillem de Bruijn /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
267798ba0bd5SWillem de Bruijn if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
26787de6d09fSKuniyuki Iwashima READ_ONCE(sysctl_optmem_max))
267998ba0bd5SWillem de Bruijn return NULL;
268098ba0bd5SWillem de Bruijn
268198ba0bd5SWillem de Bruijn skb = alloc_skb(size, priority);
268298ba0bd5SWillem de Bruijn if (!skb)
268398ba0bd5SWillem de Bruijn return NULL;
268498ba0bd5SWillem de Bruijn
268598ba0bd5SWillem de Bruijn atomic_add(skb->truesize, &sk->sk_omem_alloc);
268698ba0bd5SWillem de Bruijn skb->sk = sk;
268798ba0bd5SWillem de Bruijn skb->destructor = sock_ofree;
268898ba0bd5SWillem de Bruijn return skb;
268998ba0bd5SWillem de Bruijn }
269098ba0bd5SWillem de Bruijn
26911da177e4SLinus Torvalds /*
26921da177e4SLinus Torvalds * Allocate a memory block from the socket's option memory buffer.
26931da177e4SLinus Torvalds */
sock_kmalloc(struct sock * sk,int size,gfp_t priority)2694dd0fc66fSAl Viro void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
26951da177e4SLinus Torvalds {
26967de6d09fSKuniyuki Iwashima int optmem_max = READ_ONCE(sysctl_optmem_max);
26977de6d09fSKuniyuki Iwashima
26987de6d09fSKuniyuki Iwashima if ((unsigned int)size <= optmem_max &&
26997de6d09fSKuniyuki Iwashima atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
27001da177e4SLinus Torvalds void *mem;
27011da177e4SLinus Torvalds /* First do the add, to avoid the race if kmalloc
27021da177e4SLinus Torvalds * might sleep.
27031da177e4SLinus Torvalds */
27041da177e4SLinus Torvalds atomic_add(size, &sk->sk_omem_alloc);
27051da177e4SLinus Torvalds mem = kmalloc(size, priority);
27061da177e4SLinus Torvalds if (mem)
27071da177e4SLinus Torvalds return mem;
27081da177e4SLinus Torvalds atomic_sub(size, &sk->sk_omem_alloc);
27091da177e4SLinus Torvalds }
27101da177e4SLinus Torvalds return NULL;
27111da177e4SLinus Torvalds }
27122a91525cSEric Dumazet EXPORT_SYMBOL(sock_kmalloc);
27131da177e4SLinus Torvalds
271479e88659SDaniel Borkmann /* Free an option memory block. Note, we actually want the inline
271579e88659SDaniel Borkmann * here as this allows gcc to detect the nullify and fold away the
271679e88659SDaniel Borkmann * condition entirely.
27171da177e4SLinus Torvalds */
__sock_kfree_s(struct sock * sk,void * mem,int size,const bool nullify)271879e88659SDaniel Borkmann static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
271979e88659SDaniel Borkmann const bool nullify)
27201da177e4SLinus Torvalds {
2721e53da5fbSDavid S. Miller if (WARN_ON_ONCE(!mem))
2722e53da5fbSDavid S. Miller return;
272379e88659SDaniel Borkmann if (nullify)
2724453431a5SWaiman Long kfree_sensitive(mem);
272579e88659SDaniel Borkmann else
27261da177e4SLinus Torvalds kfree(mem);
27271da177e4SLinus Torvalds atomic_sub(size, &sk->sk_omem_alloc);
27281da177e4SLinus Torvalds }
272979e88659SDaniel Borkmann
sock_kfree_s(struct sock * sk,void * mem,int size)273079e88659SDaniel Borkmann void sock_kfree_s(struct sock *sk, void *mem, int size)
273179e88659SDaniel Borkmann {
273279e88659SDaniel Borkmann __sock_kfree_s(sk, mem, size, false);
273379e88659SDaniel Borkmann }
27342a91525cSEric Dumazet EXPORT_SYMBOL(sock_kfree_s);
27351da177e4SLinus Torvalds
sock_kzfree_s(struct sock * sk,void * mem,int size)273679e88659SDaniel Borkmann void sock_kzfree_s(struct sock *sk, void *mem, int size)
273779e88659SDaniel Borkmann {
273879e88659SDaniel Borkmann __sock_kfree_s(sk, mem, size, true);
273979e88659SDaniel Borkmann }
274079e88659SDaniel Borkmann EXPORT_SYMBOL(sock_kzfree_s);
274179e88659SDaniel Borkmann
27421da177e4SLinus Torvalds /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
27431da177e4SLinus Torvalds I think, these locks should be removed for datagram sockets.
27441da177e4SLinus Torvalds */
sock_wait_for_wmem(struct sock * sk,long timeo)27451da177e4SLinus Torvalds static long sock_wait_for_wmem(struct sock *sk, long timeo)
27461da177e4SLinus Torvalds {
27471da177e4SLinus Torvalds DEFINE_WAIT(wait);
27481da177e4SLinus Torvalds
27499cd3e072SEric Dumazet sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
27501da177e4SLinus Torvalds for (;;) {
27511da177e4SLinus Torvalds if (!timeo)
27521da177e4SLinus Torvalds break;
27531da177e4SLinus Torvalds if (signal_pending(current))
27541da177e4SLinus Torvalds break;
27551da177e4SLinus Torvalds set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2756aa395145SEric Dumazet prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2757e292f05eSEric Dumazet if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
27581da177e4SLinus Torvalds break;
2759afe8764fSKuniyuki Iwashima if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
27601da177e4SLinus Torvalds break;
2761b1928129SKuniyuki Iwashima if (READ_ONCE(sk->sk_err))
27621da177e4SLinus Torvalds break;
27631da177e4SLinus Torvalds timeo = schedule_timeout(timeo);
27641da177e4SLinus Torvalds }
2765aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait);
27661da177e4SLinus Torvalds return timeo;
27671da177e4SLinus Torvalds }
27681da177e4SLinus Torvalds
27691da177e4SLinus Torvalds
27701da177e4SLinus Torvalds /*
27711da177e4SLinus Torvalds * Generic send/receive buffer handlers
27721da177e4SLinus Torvalds */
27731da177e4SLinus Torvalds
sock_alloc_send_pskb(struct sock * sk,unsigned long header_len,unsigned long data_len,int noblock,int * errcode,int max_page_order)27744cc7f68dSHerbert Xu struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
27754cc7f68dSHerbert Xu unsigned long data_len, int noblock,
277628d64271SEric Dumazet int *errcode, int max_page_order)
27771da177e4SLinus Torvalds {
27782e4e4410SEric Dumazet struct sk_buff *skb;
27791da177e4SLinus Torvalds long timeo;
27801da177e4SLinus Torvalds int err;
27811da177e4SLinus Torvalds
27821da177e4SLinus Torvalds timeo = sock_sndtimeo(sk, noblock);
27832e4e4410SEric Dumazet for (;;) {
27841da177e4SLinus Torvalds err = sock_error(sk);
27851da177e4SLinus Torvalds if (err != 0)
27861da177e4SLinus Torvalds goto failure;
27871da177e4SLinus Torvalds
27881da177e4SLinus Torvalds err = -EPIPE;
2789afe8764fSKuniyuki Iwashima if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
27901da177e4SLinus Torvalds goto failure;
27911da177e4SLinus Torvalds
2792e292f05eSEric Dumazet if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
27932e4e4410SEric Dumazet break;
27942e4e4410SEric Dumazet
27959cd3e072SEric Dumazet sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
27961da177e4SLinus Torvalds set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
27971da177e4SLinus Torvalds err = -EAGAIN;
27981da177e4SLinus Torvalds if (!timeo)
27991da177e4SLinus Torvalds goto failure;
28001da177e4SLinus Torvalds if (signal_pending(current))
28011da177e4SLinus Torvalds goto interrupted;
28021da177e4SLinus Torvalds timeo = sock_wait_for_wmem(sk, timeo);
280328d64271SEric Dumazet }
28042e4e4410SEric Dumazet skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
28052e4e4410SEric Dumazet errcode, sk->sk_allocation);
28062e4e4410SEric Dumazet if (skb)
28071da177e4SLinus Torvalds skb_set_owner_w(skb, sk);
28081da177e4SLinus Torvalds return skb;
28091da177e4SLinus Torvalds
28101da177e4SLinus Torvalds interrupted:
28111da177e4SLinus Torvalds err = sock_intr_errno(timeo);
28121da177e4SLinus Torvalds failure:
28131da177e4SLinus Torvalds *errcode = err;
28141da177e4SLinus Torvalds return NULL;
28151da177e4SLinus Torvalds }
28164cc7f68dSHerbert Xu EXPORT_SYMBOL(sock_alloc_send_pskb);
28171da177e4SLinus Torvalds
__sock_cmsg_send(struct sock * sk,struct cmsghdr * cmsg,struct sockcm_cookie * sockc)2818233baf9aSxu xin int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
2819f28ea365SEdward Jee struct sockcm_cookie *sockc)
2820f28ea365SEdward Jee {
28213dd17e63SSoheil Hassas Yeganeh u32 tsflags;
28223dd17e63SSoheil Hassas Yeganeh
2823f28ea365SEdward Jee switch (cmsg->cmsg_type) {
2824f28ea365SEdward Jee case SO_MARK:
282591f0d8a4SJakub Kicinski if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
282691f0d8a4SJakub Kicinski !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2827f28ea365SEdward Jee return -EPERM;
2828f28ea365SEdward Jee if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2829f28ea365SEdward Jee return -EINVAL;
2830f28ea365SEdward Jee sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2831f28ea365SEdward Jee break;
28327f1bc6e9SDeepa Dinamani case SO_TIMESTAMPING_OLD:
2833200bc366SThomas Lange case SO_TIMESTAMPING_NEW:
28343dd17e63SSoheil Hassas Yeganeh if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
28353dd17e63SSoheil Hassas Yeganeh return -EINVAL;
28363dd17e63SSoheil Hassas Yeganeh
28373dd17e63SSoheil Hassas Yeganeh tsflags = *(u32 *)CMSG_DATA(cmsg);
28383dd17e63SSoheil Hassas Yeganeh if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
28393dd17e63SSoheil Hassas Yeganeh return -EINVAL;
28403dd17e63SSoheil Hassas Yeganeh
28413dd17e63SSoheil Hassas Yeganeh sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
28423dd17e63SSoheil Hassas Yeganeh sockc->tsflags |= tsflags;
28433dd17e63SSoheil Hassas Yeganeh break;
284480b14deeSRichard Cochran case SCM_TXTIME:
284580b14deeSRichard Cochran if (!sock_flag(sk, SOCK_TXTIME))
284680b14deeSRichard Cochran return -EINVAL;
284780b14deeSRichard Cochran if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
284880b14deeSRichard Cochran return -EINVAL;
284980b14deeSRichard Cochran sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
285080b14deeSRichard Cochran break;
2851779f1edeSSoheil Hassas Yeganeh /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2852779f1edeSSoheil Hassas Yeganeh case SCM_RIGHTS:
2853779f1edeSSoheil Hassas Yeganeh case SCM_CREDENTIALS:
2854779f1edeSSoheil Hassas Yeganeh break;
2855f28ea365SEdward Jee default:
2856f28ea365SEdward Jee return -EINVAL;
2857f28ea365SEdward Jee }
285839771b12SWillem de Bruijn return 0;
285939771b12SWillem de Bruijn }
286039771b12SWillem de Bruijn EXPORT_SYMBOL(__sock_cmsg_send);
286139771b12SWillem de Bruijn
sock_cmsg_send(struct sock * sk,struct msghdr * msg,struct sockcm_cookie * sockc)286239771b12SWillem de Bruijn int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
286339771b12SWillem de Bruijn struct sockcm_cookie *sockc)
286439771b12SWillem de Bruijn {
286539771b12SWillem de Bruijn struct cmsghdr *cmsg;
286639771b12SWillem de Bruijn int ret;
286739771b12SWillem de Bruijn
286839771b12SWillem de Bruijn for_each_cmsghdr(cmsg, msg) {
286939771b12SWillem de Bruijn if (!CMSG_OK(msg, cmsg))
287039771b12SWillem de Bruijn return -EINVAL;
287139771b12SWillem de Bruijn if (cmsg->cmsg_level != SOL_SOCKET)
287239771b12SWillem de Bruijn continue;
2873233baf9aSxu xin ret = __sock_cmsg_send(sk, cmsg, sockc);
287439771b12SWillem de Bruijn if (ret)
287539771b12SWillem de Bruijn return ret;
2876f28ea365SEdward Jee }
2877f28ea365SEdward Jee return 0;
2878f28ea365SEdward Jee }
2879f28ea365SEdward Jee EXPORT_SYMBOL(sock_cmsg_send);
2880f28ea365SEdward Jee
sk_enter_memory_pressure(struct sock * sk)288106044751SEric Dumazet static void sk_enter_memory_pressure(struct sock *sk)
288206044751SEric Dumazet {
288306044751SEric Dumazet if (!sk->sk_prot->enter_memory_pressure)
288406044751SEric Dumazet return;
288506044751SEric Dumazet
288606044751SEric Dumazet sk->sk_prot->enter_memory_pressure(sk);
288706044751SEric Dumazet }
288806044751SEric Dumazet
sk_leave_memory_pressure(struct sock * sk)288906044751SEric Dumazet static void sk_leave_memory_pressure(struct sock *sk)
289006044751SEric Dumazet {
289106044751SEric Dumazet if (sk->sk_prot->leave_memory_pressure) {
28925c1ebbfaSBrian Vazquez INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure,
28935c1ebbfaSBrian Vazquez tcp_leave_memory_pressure, sk);
289406044751SEric Dumazet } else {
289506044751SEric Dumazet unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
289606044751SEric Dumazet
2897503978acSEric Dumazet if (memory_pressure && READ_ONCE(*memory_pressure))
2898503978acSEric Dumazet WRITE_ONCE(*memory_pressure, 0);
289906044751SEric Dumazet }
290006044751SEric Dumazet }
290106044751SEric Dumazet
2902ce27ec60SEric Dumazet DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
29035640f768SEric Dumazet
2904400dfd3aSEric Dumazet /**
2905400dfd3aSEric Dumazet * skb_page_frag_refill - check that a page_frag contains enough room
2906400dfd3aSEric Dumazet * @sz: minimum size of the fragment we want to get
2907400dfd3aSEric Dumazet * @pfrag: pointer to page_frag
290882d5e2b8SEric Dumazet * @gfp: priority for memory allocation
2909400dfd3aSEric Dumazet *
2910400dfd3aSEric Dumazet * Note: While this allocator tries to use high order pages, there is
2911400dfd3aSEric Dumazet * no guarantee that allocations succeed. Therefore, @sz MUST be
2912400dfd3aSEric Dumazet * less or equal than PAGE_SIZE.
2913400dfd3aSEric Dumazet */
skb_page_frag_refill(unsigned int sz,struct page_frag * pfrag,gfp_t gfp)2914d9b2938aSEric Dumazet bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
29155640f768SEric Dumazet {
29165640f768SEric Dumazet if (pfrag->page) {
2917fe896d18SJoonsoo Kim if (page_ref_count(pfrag->page) == 1) {
29185640f768SEric Dumazet pfrag->offset = 0;
29195640f768SEric Dumazet return true;
29205640f768SEric Dumazet }
2921400dfd3aSEric Dumazet if (pfrag->offset + sz <= pfrag->size)
29225640f768SEric Dumazet return true;
29235640f768SEric Dumazet put_page(pfrag->page);
29245640f768SEric Dumazet }
29255640f768SEric Dumazet
29265640f768SEric Dumazet pfrag->offset = 0;
2927ce27ec60SEric Dumazet if (SKB_FRAG_PAGE_ORDER &&
2928ce27ec60SEric Dumazet !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
2929d0164adcSMel Gorman /* Avoid direct reclaim but allow kswapd to wake */
2930d0164adcSMel Gorman pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2931d0164adcSMel Gorman __GFP_COMP | __GFP_NOWARN |
2932d0164adcSMel Gorman __GFP_NORETRY,
2933d9b2938aSEric Dumazet SKB_FRAG_PAGE_ORDER);
2934d9b2938aSEric Dumazet if (likely(pfrag->page)) {
2935d9b2938aSEric Dumazet pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
29365640f768SEric Dumazet return true;
29375640f768SEric Dumazet }
2938d9b2938aSEric Dumazet }
2939d9b2938aSEric Dumazet pfrag->page = alloc_page(gfp);
2940d9b2938aSEric Dumazet if (likely(pfrag->page)) {
2941d9b2938aSEric Dumazet pfrag->size = PAGE_SIZE;
2942d9b2938aSEric Dumazet return true;
2943d9b2938aSEric Dumazet }
2944400dfd3aSEric Dumazet return false;
2945400dfd3aSEric Dumazet }
2946400dfd3aSEric Dumazet EXPORT_SYMBOL(skb_page_frag_refill);
2947400dfd3aSEric Dumazet
sk_page_frag_refill(struct sock * sk,struct page_frag * pfrag)2948400dfd3aSEric Dumazet bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2949400dfd3aSEric Dumazet {
2950400dfd3aSEric Dumazet if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2951400dfd3aSEric Dumazet return true;
2952400dfd3aSEric Dumazet
29535640f768SEric Dumazet sk_enter_memory_pressure(sk);
29545640f768SEric Dumazet sk_stream_moderate_sndbuf(sk);
29555640f768SEric Dumazet return false;
29565640f768SEric Dumazet }
29575640f768SEric Dumazet EXPORT_SYMBOL(sk_page_frag_refill);
29585640f768SEric Dumazet
__lock_sock(struct sock * sk)2959ad80b0fcSPaolo Abeni void __lock_sock(struct sock *sk)
2960f39234d6SNamhyung Kim __releases(&sk->sk_lock.slock)
2961f39234d6SNamhyung Kim __acquires(&sk->sk_lock.slock)
29621da177e4SLinus Torvalds {
29631da177e4SLinus Torvalds DEFINE_WAIT(wait);
29641da177e4SLinus Torvalds
29651da177e4SLinus Torvalds for (;;) {
29661da177e4SLinus Torvalds prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
29671da177e4SLinus Torvalds TASK_UNINTERRUPTIBLE);
29681da177e4SLinus Torvalds spin_unlock_bh(&sk->sk_lock.slock);
29691da177e4SLinus Torvalds schedule();
29701da177e4SLinus Torvalds spin_lock_bh(&sk->sk_lock.slock);
29711da177e4SLinus Torvalds if (!sock_owned_by_user(sk))
29721da177e4SLinus Torvalds break;
29731da177e4SLinus Torvalds }
29741da177e4SLinus Torvalds finish_wait(&sk->sk_lock.wq, &wait);
29751da177e4SLinus Torvalds }
29761da177e4SLinus Torvalds
__release_sock(struct sock * sk)29778873c064SEric Dumazet void __release_sock(struct sock *sk)
2978f39234d6SNamhyung Kim __releases(&sk->sk_lock.slock)
2979f39234d6SNamhyung Kim __acquires(&sk->sk_lock.slock)
29801da177e4SLinus Torvalds {
29815413d1baSEric Dumazet struct sk_buff *skb, *next;
29821da177e4SLinus Torvalds
29835413d1baSEric Dumazet while ((skb = sk->sk_backlog.head) != NULL) {
29841da177e4SLinus Torvalds sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
29855413d1baSEric Dumazet
29865413d1baSEric Dumazet spin_unlock_bh(&sk->sk_lock.slock);
29871da177e4SLinus Torvalds
29881da177e4SLinus Torvalds do {
29895413d1baSEric Dumazet next = skb->next;
2990e4cbb02aSEric Dumazet prefetch(next);
299163fbdd3cSEric Dumazet DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb));
2992a8305bffSDavid S. Miller skb_mark_not_on_list(skb);
2993c57943a1SPeter Zijlstra sk_backlog_rcv(sk, skb);
29941da177e4SLinus Torvalds
29955413d1baSEric Dumazet cond_resched();
29961da177e4SLinus Torvalds
29971da177e4SLinus Torvalds skb = next;
29981da177e4SLinus Torvalds } while (skb != NULL);
29991da177e4SLinus Torvalds
30005413d1baSEric Dumazet spin_lock_bh(&sk->sk_lock.slock);
30015413d1baSEric Dumazet }
30028eae939fSZhu Yi
30038eae939fSZhu Yi /*
30048eae939fSZhu Yi * Doing the zeroing here guarantee we can not loop forever
30058eae939fSZhu Yi * while a wild producer attempts to flood us.
30068eae939fSZhu Yi */
30078eae939fSZhu Yi sk->sk_backlog.len = 0;
30081da177e4SLinus Torvalds }
30091da177e4SLinus Torvalds
__sk_flush_backlog(struct sock * sk)3010d41a69f1SEric Dumazet void __sk_flush_backlog(struct sock *sk)
3011d41a69f1SEric Dumazet {
3012d41a69f1SEric Dumazet spin_lock_bh(&sk->sk_lock.slock);
3013d41a69f1SEric Dumazet __release_sock(sk);
3014d41a69f1SEric Dumazet spin_unlock_bh(&sk->sk_lock.slock);
3015d41a69f1SEric Dumazet }
3016c46b0183SJakub Kicinski EXPORT_SYMBOL_GPL(__sk_flush_backlog);
3017d41a69f1SEric Dumazet
30181da177e4SLinus Torvalds /**
30191da177e4SLinus Torvalds * sk_wait_data - wait for data to arrive at sk_receive_queue
30204dc3b16bSPavel Pisa * @sk: sock to wait on
30214dc3b16bSPavel Pisa * @timeo: for how long
3022dfbafc99SSabrina Dubroca * @skb: last skb seen on sk_receive_queue
30231da177e4SLinus Torvalds *
30241da177e4SLinus Torvalds * Now socket state including sk->sk_err is changed only under lock,
30251da177e4SLinus Torvalds * hence we may omit checks after joining wait queue.
30261da177e4SLinus Torvalds * We check receive queue before schedule() only as optimization;
30271da177e4SLinus Torvalds * it is very likely that release_sock() added new data.
30281da177e4SLinus Torvalds */
sk_wait_data(struct sock * sk,long * timeo,const struct sk_buff * skb)3029dfbafc99SSabrina Dubroca int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
30301da177e4SLinus Torvalds {
3031d9dc8b0fSWANG Cong DEFINE_WAIT_FUNC(wait, woken_wake_function);
30321da177e4SLinus Torvalds int rc;
30331da177e4SLinus Torvalds
3034d9dc8b0fSWANG Cong add_wait_queue(sk_sleep(sk), &wait);
30359cd3e072SEric Dumazet sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3036d9dc8b0fSWANG Cong rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
30379cd3e072SEric Dumazet sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3038d9dc8b0fSWANG Cong remove_wait_queue(sk_sleep(sk), &wait);
30391da177e4SLinus Torvalds return rc;
30401da177e4SLinus Torvalds }
30411da177e4SLinus Torvalds EXPORT_SYMBOL(sk_wait_data);
30421da177e4SLinus Torvalds
30433ab224beSHideo Aoki /**
3044f8c3bf00SPaolo Abeni * __sk_mem_raise_allocated - increase memory_allocated
30453ab224beSHideo Aoki * @sk: socket
30463ab224beSHideo Aoki * @size: memory size to allocate
3047f8c3bf00SPaolo Abeni * @amt: pages to allocate
30483ab224beSHideo Aoki * @kind: allocation type
30493ab224beSHideo Aoki *
3050f8c3bf00SPaolo Abeni * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
30513ab224beSHideo Aoki */
__sk_mem_raise_allocated(struct sock * sk,int size,int amt,int kind)3052f8c3bf00SPaolo Abeni int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
30533ab224beSHideo Aoki {
30544b1327beSWei Wang bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
3055219160beSEric Dumazet struct proto *prot = sk->sk_prot;
3056d6f19938SYafang Shao bool charged = true;
3057219160beSEric Dumazet long allocated;
3058e805605cSJohannes Weiner
3059219160beSEric Dumazet sk_memory_allocated_add(sk, amt);
3060219160beSEric Dumazet allocated = sk_memory_allocated(sk);
30614b1327beSWei Wang if (memcg_charge &&
30624b1327beSWei Wang !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
30634b1327beSWei Wang gfp_memcg_charge())))
3064e805605cSJohannes Weiner goto suppress_allocation;
30653ab224beSHideo Aoki
30663ab224beSHideo Aoki /* Under limit. */
3067e805605cSJohannes Weiner if (allocated <= sk_prot_mem_limits(sk, 0)) {
3068180d8cd9SGlauber Costa sk_leave_memory_pressure(sk);
30693ab224beSHideo Aoki return 1;
30703ab224beSHideo Aoki }
30713ab224beSHideo Aoki
3072e805605cSJohannes Weiner /* Under pressure. */
3073e805605cSJohannes Weiner if (allocated > sk_prot_mem_limits(sk, 1))
3074180d8cd9SGlauber Costa sk_enter_memory_pressure(sk);
30753ab224beSHideo Aoki
3076e805605cSJohannes Weiner /* Over hard limit. */
3077e805605cSJohannes Weiner if (allocated > sk_prot_mem_limits(sk, 2))
30783ab224beSHideo Aoki goto suppress_allocation;
30793ab224beSHideo Aoki
30803ab224beSHideo Aoki /* guarantee minimum buffer size under pressure */
30813ab224beSHideo Aoki if (kind == SK_MEM_RECV) {
3082a3dcaf17SEric Dumazet if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
30833ab224beSHideo Aoki return 1;
3084180d8cd9SGlauber Costa
30853ab224beSHideo Aoki } else { /* SK_MEM_SEND */
3086a3dcaf17SEric Dumazet int wmem0 = sk_get_wmem0(sk, prot);
3087a3dcaf17SEric Dumazet
30883ab224beSHideo Aoki if (sk->sk_type == SOCK_STREAM) {
3089a3dcaf17SEric Dumazet if (sk->sk_wmem_queued < wmem0)
30903ab224beSHideo Aoki return 1;
3091a3dcaf17SEric Dumazet } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
30923ab224beSHideo Aoki return 1;
30933ab224beSHideo Aoki }
3094a3dcaf17SEric Dumazet }
30953ab224beSHideo Aoki
3096180d8cd9SGlauber Costa if (sk_has_memory_pressure(sk)) {
30975bf325a5SEric Dumazet u64 alloc;
30981748376bSEric Dumazet
3099180d8cd9SGlauber Costa if (!sk_under_memory_pressure(sk))
31001748376bSEric Dumazet return 1;
3101180d8cd9SGlauber Costa alloc = sk_sockets_allocated_read_positive(sk);
3102180d8cd9SGlauber Costa if (sk_prot_mem_limits(sk, 2) > alloc *
31033ab224beSHideo Aoki sk_mem_pages(sk->sk_wmem_queued +
31043ab224beSHideo Aoki atomic_read(&sk->sk_rmem_alloc) +
31053ab224beSHideo Aoki sk->sk_forward_alloc))
31063ab224beSHideo Aoki return 1;
31073ab224beSHideo Aoki }
31083ab224beSHideo Aoki
31093ab224beSHideo Aoki suppress_allocation:
31103ab224beSHideo Aoki
31113ab224beSHideo Aoki if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
31123ab224beSHideo Aoki sk_stream_moderate_sndbuf(sk);
31133ab224beSHideo Aoki
31143ab224beSHideo Aoki /* Fail only if socket is _under_ its sndbuf.
31153ab224beSHideo Aoki * In this case we cannot block, so that we have to fail.
31163ab224beSHideo Aoki */
31174b1327beSWei Wang if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
31184b1327beSWei Wang /* Force charge with __GFP_NOFAIL */
31194b1327beSWei Wang if (memcg_charge && !charged) {
31204b1327beSWei Wang mem_cgroup_charge_skmem(sk->sk_memcg, amt,
31214b1327beSWei Wang gfp_memcg_charge() | __GFP_NOFAIL);
31224b1327beSWei Wang }
31233ab224beSHideo Aoki return 1;
31243ab224beSHideo Aoki }
31254b1327beSWei Wang }
31263ab224beSHideo Aoki
3127d6f19938SYafang Shao if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
3128d6f19938SYafang Shao trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
31293847ce32SSatoru Moriya
31300e90b31fSGlauber Costa sk_memory_allocated_sub(sk, amt);
3131180d8cd9SGlauber Costa
31324b1327beSWei Wang if (memcg_charge && charged)
3133baac50bbSJohannes Weiner mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
3134e805605cSJohannes Weiner
31353ab224beSHideo Aoki return 0;
31363ab224beSHideo Aoki }
3137f8c3bf00SPaolo Abeni
3138f8c3bf00SPaolo Abeni /**
3139f8c3bf00SPaolo Abeni * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
3140f8c3bf00SPaolo Abeni * @sk: socket
3141f8c3bf00SPaolo Abeni * @size: memory size to allocate
3142f8c3bf00SPaolo Abeni * @kind: allocation type
3143f8c3bf00SPaolo Abeni *
3144f8c3bf00SPaolo Abeni * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
3145f8c3bf00SPaolo Abeni * rmem allocation. This function assumes that protocols which have
3146f8c3bf00SPaolo Abeni * memory_pressure use sk_wmem_queued as write buffer accounting.
3147f8c3bf00SPaolo Abeni */
__sk_mem_schedule(struct sock * sk,int size,int kind)3148f8c3bf00SPaolo Abeni int __sk_mem_schedule(struct sock *sk, int size, int kind)
3149f8c3bf00SPaolo Abeni {
3150f8c3bf00SPaolo Abeni int ret, amt = sk_mem_pages(size);
3151f8c3bf00SPaolo Abeni
31525e6300e7SEric Dumazet sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
3153f8c3bf00SPaolo Abeni ret = __sk_mem_raise_allocated(sk, size, amt, kind);
3154f8c3bf00SPaolo Abeni if (!ret)
31555e6300e7SEric Dumazet sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
3156f8c3bf00SPaolo Abeni return ret;
3157f8c3bf00SPaolo Abeni }
31583ab224beSHideo Aoki EXPORT_SYMBOL(__sk_mem_schedule);
31593ab224beSHideo Aoki
31603ab224beSHideo Aoki /**
3161f8c3bf00SPaolo Abeni * __sk_mem_reduce_allocated - reclaim memory_allocated
31623ab224beSHideo Aoki * @sk: socket
3163f8c3bf00SPaolo Abeni * @amount: number of quanta
3164f8c3bf00SPaolo Abeni *
3165f8c3bf00SPaolo Abeni * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
31663ab224beSHideo Aoki */
__sk_mem_reduce_allocated(struct sock * sk,int amount)3167f8c3bf00SPaolo Abeni void __sk_mem_reduce_allocated(struct sock *sk, int amount)
31683ab224beSHideo Aoki {
31691a24e04eSEric Dumazet sk_memory_allocated_sub(sk, amount);
31703ab224beSHideo Aoki
3171baac50bbSJohannes Weiner if (mem_cgroup_sockets_enabled && sk->sk_memcg)
3172baac50bbSJohannes Weiner mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
3173e805605cSJohannes Weiner
31742d0c88e8SAbel Wu if (sk_under_global_memory_pressure(sk) &&
3175180d8cd9SGlauber Costa (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
3176180d8cd9SGlauber Costa sk_leave_memory_pressure(sk);
31773ab224beSHideo Aoki }
3178f8c3bf00SPaolo Abeni
3179f8c3bf00SPaolo Abeni /**
3180f8c3bf00SPaolo Abeni * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
3181f8c3bf00SPaolo Abeni * @sk: socket
3182100fdd1fSEric Dumazet * @amount: number of bytes (rounded down to a PAGE_SIZE multiple)
3183f8c3bf00SPaolo Abeni */
__sk_mem_reclaim(struct sock * sk,int amount)3184f8c3bf00SPaolo Abeni void __sk_mem_reclaim(struct sock *sk, int amount)
3185f8c3bf00SPaolo Abeni {
3186100fdd1fSEric Dumazet amount >>= PAGE_SHIFT;
31875e6300e7SEric Dumazet sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
3188f8c3bf00SPaolo Abeni __sk_mem_reduce_allocated(sk, amount);
3189f8c3bf00SPaolo Abeni }
31903ab224beSHideo Aoki EXPORT_SYMBOL(__sk_mem_reclaim);
31913ab224beSHideo Aoki
sk_set_peek_off(struct sock * sk,int val)3192627d2d6bSsamanthakumar int sk_set_peek_off(struct sock *sk, int val)
3193627d2d6bSsamanthakumar {
319411695c6eSEric Dumazet WRITE_ONCE(sk->sk_peek_off, val);
3195627d2d6bSsamanthakumar return 0;
3196627d2d6bSsamanthakumar }
3197627d2d6bSsamanthakumar EXPORT_SYMBOL_GPL(sk_set_peek_off);
31983ab224beSHideo Aoki
31991da177e4SLinus Torvalds /*
32001da177e4SLinus Torvalds * Set of default routines for initialising struct proto_ops when
32011da177e4SLinus Torvalds * the protocol does not support a particular function. In certain
32021da177e4SLinus Torvalds * cases where it makes no sense for a protocol to have a "do nothing"
32031da177e4SLinus Torvalds * function, some default processing is provided.
32041da177e4SLinus Torvalds */
32051da177e4SLinus Torvalds
sock_no_bind(struct socket * sock,struct sockaddr * saddr,int len)32061da177e4SLinus Torvalds int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
32071da177e4SLinus Torvalds {
32081da177e4SLinus Torvalds return -EOPNOTSUPP;
32091da177e4SLinus Torvalds }
32102a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_bind);
32111da177e4SLinus Torvalds
sock_no_connect(struct socket * sock,struct sockaddr * saddr,int len,int flags)32121da177e4SLinus Torvalds int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
32131da177e4SLinus Torvalds int len, int flags)
32141da177e4SLinus Torvalds {
32151da177e4SLinus Torvalds return -EOPNOTSUPP;
32161da177e4SLinus Torvalds }
32172a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_connect);
32181da177e4SLinus Torvalds
sock_no_socketpair(struct socket * sock1,struct socket * sock2)32191da177e4SLinus Torvalds int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
32201da177e4SLinus Torvalds {
32211da177e4SLinus Torvalds return -EOPNOTSUPP;
32221da177e4SLinus Torvalds }
32232a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_socketpair);
32241da177e4SLinus Torvalds
sock_no_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)3225cdfbabfbSDavid Howells int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
3226cdfbabfbSDavid Howells bool kern)
32271da177e4SLinus Torvalds {
32281da177e4SLinus Torvalds return -EOPNOTSUPP;
32291da177e4SLinus Torvalds }
32302a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_accept);
32311da177e4SLinus Torvalds
sock_no_getname(struct socket * sock,struct sockaddr * saddr,int peer)32321da177e4SLinus Torvalds int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
32339b2c45d4SDenys Vlasenko int peer)
32341da177e4SLinus Torvalds {
32351da177e4SLinus Torvalds return -EOPNOTSUPP;
32361da177e4SLinus Torvalds }
32372a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_getname);
32381da177e4SLinus Torvalds
sock_no_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)32391da177e4SLinus Torvalds int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
32401da177e4SLinus Torvalds {
32411da177e4SLinus Torvalds return -EOPNOTSUPP;
32421da177e4SLinus Torvalds }
32432a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_ioctl);
32441da177e4SLinus Torvalds
sock_no_listen(struct socket * sock,int backlog)32451da177e4SLinus Torvalds int sock_no_listen(struct socket *sock, int backlog)
32461da177e4SLinus Torvalds {
32471da177e4SLinus Torvalds return -EOPNOTSUPP;
32481da177e4SLinus Torvalds }
32492a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_listen);
32501da177e4SLinus Torvalds
sock_no_shutdown(struct socket * sock,int how)32511da177e4SLinus Torvalds int sock_no_shutdown(struct socket *sock, int how)
32521da177e4SLinus Torvalds {
32531da177e4SLinus Torvalds return -EOPNOTSUPP;
32541da177e4SLinus Torvalds }
32552a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_shutdown);
32561da177e4SLinus Torvalds
sock_no_sendmsg(struct socket * sock,struct msghdr * m,size_t len)32571b784140SYing Xue int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
32581da177e4SLinus Torvalds {
32591da177e4SLinus Torvalds return -EOPNOTSUPP;
32601da177e4SLinus Torvalds }
32612a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_sendmsg);
32621da177e4SLinus Torvalds
sock_no_sendmsg_locked(struct sock * sk,struct msghdr * m,size_t len)3263306b13ebSTom Herbert int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
3264306b13ebSTom Herbert {
3265306b13ebSTom Herbert return -EOPNOTSUPP;
3266306b13ebSTom Herbert }
3267306b13ebSTom Herbert EXPORT_SYMBOL(sock_no_sendmsg_locked);
3268306b13ebSTom Herbert
sock_no_recvmsg(struct socket * sock,struct msghdr * m,size_t len,int flags)32691b784140SYing Xue int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
32701b784140SYing Xue int flags)
32711da177e4SLinus Torvalds {
32721da177e4SLinus Torvalds return -EOPNOTSUPP;
32731da177e4SLinus Torvalds }
32742a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_recvmsg);
32751da177e4SLinus Torvalds
sock_no_mmap(struct file * file,struct socket * sock,struct vm_area_struct * vma)32761da177e4SLinus Torvalds int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
32771da177e4SLinus Torvalds {
32781da177e4SLinus Torvalds /* Mirror missing mmap method error code */
32791da177e4SLinus Torvalds return -ENODEV;
32801da177e4SLinus Torvalds }
32812a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_mmap);
32821da177e4SLinus Torvalds
3283d9539752SKees Cook /*
3284d9539752SKees Cook * When a file is received (via SCM_RIGHTS, etc), we must bump the
3285d9539752SKees Cook * various sock-based usage counts.
3286d9539752SKees Cook */
__receive_sock(struct file * file)3287d9539752SKees Cook void __receive_sock(struct file *file)
3288d9539752SKees Cook {
3289d9539752SKees Cook struct socket *sock;
3290d9539752SKees Cook
3291dba4a925SFlorent Revest sock = sock_from_file(file);
3292d9539752SKees Cook if (sock) {
3293d9539752SKees Cook sock_update_netprioidx(&sock->sk->sk_cgrp_data);
3294d9539752SKees Cook sock_update_classid(&sock->sk->sk_cgrp_data);
3295d9539752SKees Cook }
3296d9539752SKees Cook }
3297d9539752SKees Cook
32981da177e4SLinus Torvalds /*
32991da177e4SLinus Torvalds * Default Socket Callbacks
33001da177e4SLinus Torvalds */
33011da177e4SLinus Torvalds
sock_def_wakeup(struct sock * sk)33021da177e4SLinus Torvalds static void sock_def_wakeup(struct sock *sk)
33031da177e4SLinus Torvalds {
330443815482SEric Dumazet struct socket_wq *wq;
330543815482SEric Dumazet
330643815482SEric Dumazet rcu_read_lock();
330743815482SEric Dumazet wq = rcu_dereference(sk->sk_wq);
33081ce0bf50SHerbert Xu if (skwq_has_sleeper(wq))
330943815482SEric Dumazet wake_up_interruptible_all(&wq->wait);
331043815482SEric Dumazet rcu_read_unlock();
33111da177e4SLinus Torvalds }
33121da177e4SLinus Torvalds
sock_def_error_report(struct sock * sk)33131da177e4SLinus Torvalds static void sock_def_error_report(struct sock *sk)
33141da177e4SLinus Torvalds {
331543815482SEric Dumazet struct socket_wq *wq;
331643815482SEric Dumazet
331743815482SEric Dumazet rcu_read_lock();
331843815482SEric Dumazet wq = rcu_dereference(sk->sk_wq);
33191ce0bf50SHerbert Xu if (skwq_has_sleeper(wq))
3320a9a08845SLinus Torvalds wake_up_interruptible_poll(&wq->wait, EPOLLERR);
33218d8ad9d7SPavel Emelyanov sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
332243815482SEric Dumazet rcu_read_unlock();
33231da177e4SLinus Torvalds }
33241da177e4SLinus Torvalds
sock_def_readable(struct sock * sk)332543a825afSBjörn Töpel void sock_def_readable(struct sock *sk)
33261da177e4SLinus Torvalds {
332743815482SEric Dumazet struct socket_wq *wq;
332843815482SEric Dumazet
332940e0b090SPeilin Ye trace_sk_data_ready(sk);
333040e0b090SPeilin Ye
333143815482SEric Dumazet rcu_read_lock();
333243815482SEric Dumazet wq = rcu_dereference(sk->sk_wq);
33331ce0bf50SHerbert Xu if (skwq_has_sleeper(wq))
3334a9a08845SLinus Torvalds wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
3335a9a08845SLinus Torvalds EPOLLRDNORM | EPOLLRDBAND);
33368d8ad9d7SPavel Emelyanov sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
333743815482SEric Dumazet rcu_read_unlock();
33381da177e4SLinus Torvalds }
33391da177e4SLinus Torvalds
sock_def_write_space(struct sock * sk)33401da177e4SLinus Torvalds static void sock_def_write_space(struct sock *sk)
33411da177e4SLinus Torvalds {
334243815482SEric Dumazet struct socket_wq *wq;
334343815482SEric Dumazet
334443815482SEric Dumazet rcu_read_lock();
33451da177e4SLinus Torvalds
33461da177e4SLinus Torvalds /* Do not wake up a writer until he can make "significant"
33471da177e4SLinus Torvalds * progress. --DaveM
33481da177e4SLinus Torvalds */
334914bfee9bSPavel Begunkov if (sock_writeable(sk)) {
335043815482SEric Dumazet wq = rcu_dereference(sk->sk_wq);
33511ce0bf50SHerbert Xu if (skwq_has_sleeper(wq))
3352a9a08845SLinus Torvalds wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
3353a9a08845SLinus Torvalds EPOLLWRNORM | EPOLLWRBAND);
33541da177e4SLinus Torvalds
33551da177e4SLinus Torvalds /* Should agree with poll, otherwise some programs break */
33568d8ad9d7SPavel Emelyanov sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
33571da177e4SLinus Torvalds }
33581da177e4SLinus Torvalds
335943815482SEric Dumazet rcu_read_unlock();
33601da177e4SLinus Torvalds }
33611da177e4SLinus Torvalds
33620a8afd9fSPavel Begunkov /* An optimised version of sock_def_write_space(), should only be called
33630a8afd9fSPavel Begunkov * for SOCK_RCU_FREE sockets under RCU read section and after putting
33640a8afd9fSPavel Begunkov * ->sk_wmem_alloc.
33650a8afd9fSPavel Begunkov */
sock_def_write_space_wfree(struct sock * sk)33660a8afd9fSPavel Begunkov static void sock_def_write_space_wfree(struct sock *sk)
33670a8afd9fSPavel Begunkov {
33680a8afd9fSPavel Begunkov /* Do not wake up a writer until he can make "significant"
33690a8afd9fSPavel Begunkov * progress. --DaveM
33700a8afd9fSPavel Begunkov */
33710a8afd9fSPavel Begunkov if (sock_writeable(sk)) {
33720a8afd9fSPavel Begunkov struct socket_wq *wq = rcu_dereference(sk->sk_wq);
33730a8afd9fSPavel Begunkov
33740a8afd9fSPavel Begunkov /* rely on refcount_sub from sock_wfree() */
33750a8afd9fSPavel Begunkov smp_mb__after_atomic();
33760a8afd9fSPavel Begunkov if (wq && waitqueue_active(&wq->wait))
33770a8afd9fSPavel Begunkov wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
33780a8afd9fSPavel Begunkov EPOLLWRNORM | EPOLLWRBAND);
33790a8afd9fSPavel Begunkov
33800a8afd9fSPavel Begunkov /* Should agree with poll, otherwise some programs break */
33810a8afd9fSPavel Begunkov sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
33820a8afd9fSPavel Begunkov }
33830a8afd9fSPavel Begunkov }
33840a8afd9fSPavel Begunkov
sock_def_destruct(struct sock * sk)33851da177e4SLinus Torvalds static void sock_def_destruct(struct sock *sk)
33861da177e4SLinus Torvalds {
33871da177e4SLinus Torvalds }
33881da177e4SLinus Torvalds
sk_send_sigurg(struct sock * sk)33891da177e4SLinus Torvalds void sk_send_sigurg(struct sock *sk)
33901da177e4SLinus Torvalds {
33911da177e4SLinus Torvalds if (sk->sk_socket && sk->sk_socket->file)
33921da177e4SLinus Torvalds if (send_sigurg(&sk->sk_socket->file->f_owner))
33938d8ad9d7SPavel Emelyanov sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
33941da177e4SLinus Torvalds }
33952a91525cSEric Dumazet EXPORT_SYMBOL(sk_send_sigurg);
33961da177e4SLinus Torvalds
sk_reset_timer(struct sock * sk,struct timer_list * timer,unsigned long expires)33971da177e4SLinus Torvalds void sk_reset_timer(struct sock *sk, struct timer_list* timer,
33981da177e4SLinus Torvalds unsigned long expires)
33991da177e4SLinus Torvalds {
34001da177e4SLinus Torvalds if (!mod_timer(timer, expires))
34011da177e4SLinus Torvalds sock_hold(sk);
34021da177e4SLinus Torvalds }
34031da177e4SLinus Torvalds EXPORT_SYMBOL(sk_reset_timer);
34041da177e4SLinus Torvalds
sk_stop_timer(struct sock * sk,struct timer_list * timer)34051da177e4SLinus Torvalds void sk_stop_timer(struct sock *sk, struct timer_list* timer)
34061da177e4SLinus Torvalds {
340725cc4ae9SYing Xue if (del_timer(timer))
34081da177e4SLinus Torvalds __sock_put(sk);
34091da177e4SLinus Torvalds }
34101da177e4SLinus Torvalds EXPORT_SYMBOL(sk_stop_timer);
34111da177e4SLinus Torvalds
sk_stop_timer_sync(struct sock * sk,struct timer_list * timer)341208b81d87SGeliang Tang void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
341308b81d87SGeliang Tang {
341408b81d87SGeliang Tang if (del_timer_sync(timer))
341508b81d87SGeliang Tang __sock_put(sk);
341608b81d87SGeliang Tang }
341708b81d87SGeliang Tang EXPORT_SYMBOL(sk_stop_timer_sync);
341808b81d87SGeliang Tang
sock_init_data_uid(struct socket * sock,struct sock * sk,kuid_t uid)3419584f3742SPietro Borrello void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
34201da177e4SLinus Torvalds {
3421581319c5SPaolo Abeni sk_init_common(sk);
34221da177e4SLinus Torvalds sk->sk_send_head = NULL;
34231da177e4SLinus Torvalds
342499767f27SKees Cook timer_setup(&sk->sk_timer, NULL, 0);
34251da177e4SLinus Torvalds
34261da177e4SLinus Torvalds sk->sk_allocation = GFP_KERNEL;
34271227c177SKuniyuki Iwashima sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
34281227c177SKuniyuki Iwashima sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
34291da177e4SLinus Torvalds sk->sk_state = TCP_CLOSE;
3430fb87bd47SGuillaume Nault sk->sk_use_task_frag = true;
3431972692e0SDavid S. Miller sk_set_socket(sk, sock);
34321da177e4SLinus Torvalds
34331da177e4SLinus Torvalds sock_set_flag(sk, SOCK_ZAPPED);
34341da177e4SLinus Torvalds
3435e71a4783SStephen Hemminger if (sock) {
34361da177e4SLinus Torvalds sk->sk_type = sock->type;
3437333f7909SAl Viro RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
34381da177e4SLinus Torvalds sock->sk = sk;
343986741ec2SLorenzo Colitti } else {
3440c2f26e8fSLi RongQing RCU_INIT_POINTER(sk->sk_wq, NULL);
344186741ec2SLorenzo Colitti }
3442584f3742SPietro Borrello sk->sk_uid = uid;
34431da177e4SLinus Torvalds
34441da177e4SLinus Torvalds rwlock_init(&sk->sk_callback_lock);
3445cdfbabfbSDavid Howells if (sk->sk_kern_sock)
3446cdfbabfbSDavid Howells lockdep_set_class_and_name(
3447cdfbabfbSDavid Howells &sk->sk_callback_lock,
3448cdfbabfbSDavid Howells af_kern_callback_keys + sk->sk_family,
3449cdfbabfbSDavid Howells af_family_kern_clock_key_strings[sk->sk_family]);
3450cdfbabfbSDavid Howells else
3451cdfbabfbSDavid Howells lockdep_set_class_and_name(
3452cdfbabfbSDavid Howells &sk->sk_callback_lock,
3453443aef0eSPeter Zijlstra af_callback_keys + sk->sk_family,
3454443aef0eSPeter Zijlstra af_family_clock_key_strings[sk->sk_family]);
34551da177e4SLinus Torvalds
34561da177e4SLinus Torvalds sk->sk_state_change = sock_def_wakeup;
34571da177e4SLinus Torvalds sk->sk_data_ready = sock_def_readable;
34581da177e4SLinus Torvalds sk->sk_write_space = sock_def_write_space;
34591da177e4SLinus Torvalds sk->sk_error_report = sock_def_error_report;
34601da177e4SLinus Torvalds sk->sk_destruct = sock_def_destruct;
34611da177e4SLinus Torvalds
34625640f768SEric Dumazet sk->sk_frag.page = NULL;
34635640f768SEric Dumazet sk->sk_frag.offset = 0;
3464ef64a54fSPavel Emelyanov sk->sk_peek_off = -1;
34651da177e4SLinus Torvalds
3466109f6e39SEric W. Biederman sk->sk_peer_pid = NULL;
3467109f6e39SEric W. Biederman sk->sk_peer_cred = NULL;
346835306eb2SEric Dumazet spin_lock_init(&sk->sk_peer_lock);
346935306eb2SEric Dumazet
34701da177e4SLinus Torvalds sk->sk_write_pending = 0;
34711da177e4SLinus Torvalds sk->sk_rcvlowat = 1;
34721da177e4SLinus Torvalds sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
34731da177e4SLinus Torvalds sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
34741da177e4SLinus Torvalds
34756c7c98baSPaolo Abeni sk->sk_stamp = SK_DEFAULT_STAMP;
34763a0ed3e9SDeepa Dinamani #if BITS_PER_LONG==32
34773a0ed3e9SDeepa Dinamani seqlock_init(&sk->sk_stamp_seq);
34783a0ed3e9SDeepa Dinamani #endif
347952267790SWillem de Bruijn atomic_set(&sk->sk_zckey, 0);
34801da177e4SLinus Torvalds
3481e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
348206021292SEliezer Tamir sk->sk_napi_id = 0;
3483e59ef36fSKuniyuki Iwashima sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
348406021292SEliezer Tamir #endif
348506021292SEliezer Tamir
348676a9ebe8SEric Dumazet sk->sk_max_pacing_rate = ~0UL;
348776a9ebe8SEric Dumazet sk->sk_pacing_rate = ~0UL;
34887c68fa2bSEric Dumazet WRITE_ONCE(sk->sk_pacing_shift, 10);
348970da268bSEric Dumazet sk->sk_incoming_cpu = -1;
3490c6345ce7SAmritha Nambiar
3491c6345ce7SAmritha Nambiar sk_rx_queue_clear(sk);
34924dc6dc71SEric Dumazet /*
34934dc6dc71SEric Dumazet * Before updating sk_refcnt, we must commit prior changes to memory
34942cdb54c9SMauro Carvalho Chehab * (Documentation/RCU/rculist_nulls.rst for details)
34954dc6dc71SEric Dumazet */
34964dc6dc71SEric Dumazet smp_wmb();
349741c6d650SReshetova, Elena refcount_set(&sk->sk_refcnt, 1);
349833c732c3SWang Chen atomic_set(&sk->sk_drops, 0);
34991da177e4SLinus Torvalds }
3500584f3742SPietro Borrello EXPORT_SYMBOL(sock_init_data_uid);
3501584f3742SPietro Borrello
sock_init_data(struct socket * sock,struct sock * sk)3502584f3742SPietro Borrello void sock_init_data(struct socket *sock, struct sock *sk)
3503584f3742SPietro Borrello {
3504584f3742SPietro Borrello kuid_t uid = sock ?
3505584f3742SPietro Borrello SOCK_INODE(sock)->i_uid :
3506584f3742SPietro Borrello make_kuid(sock_net(sk)->user_ns, 0);
3507584f3742SPietro Borrello
3508584f3742SPietro Borrello sock_init_data_uid(sock, sk, uid);
3509584f3742SPietro Borrello }
35102a91525cSEric Dumazet EXPORT_SYMBOL(sock_init_data);
35111da177e4SLinus Torvalds
lock_sock_nested(struct sock * sk,int subclass)3512b5606c2dSHarvey Harrison void lock_sock_nested(struct sock *sk, int subclass)
35131da177e4SLinus Torvalds {
35142dcb96baSThomas Gleixner /* The sk_lock has mutex_lock() semantics here. */
35152dcb96baSThomas Gleixner mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
35162dcb96baSThomas Gleixner
35171da177e4SLinus Torvalds might_sleep();
3518a5b5bb9aSIngo Molnar spin_lock_bh(&sk->sk_lock.slock);
351933d60fbdSKuniyuki Iwashima if (sock_owned_by_user_nocheck(sk))
35201da177e4SLinus Torvalds __lock_sock(sk);
3521d2e9117cSJohn Heffner sk->sk_lock.owned = 1;
35222dcb96baSThomas Gleixner spin_unlock_bh(&sk->sk_lock.slock);
35231da177e4SLinus Torvalds }
3524fcc70d5fSPeter Zijlstra EXPORT_SYMBOL(lock_sock_nested);
35251da177e4SLinus Torvalds
release_sock(struct sock * sk)3526b5606c2dSHarvey Harrison void release_sock(struct sock *sk)
35271da177e4SLinus Torvalds {
3528a5b5bb9aSIngo Molnar spin_lock_bh(&sk->sk_lock.slock);
35291da177e4SLinus Torvalds if (sk->sk_backlog.tail)
35301da177e4SLinus Torvalds __release_sock(sk);
353146d3ceabSEric Dumazet
3532c3f9b018SEric Dumazet /* Warning : release_cb() might need to release sk ownership,
3533c3f9b018SEric Dumazet * ie call sock_release_ownership(sk) before us.
3534c3f9b018SEric Dumazet */
353546d3ceabSEric Dumazet if (sk->sk_prot->release_cb)
353646d3ceabSEric Dumazet sk->sk_prot->release_cb(sk);
353746d3ceabSEric Dumazet
3538c3f9b018SEric Dumazet sock_release_ownership(sk);
3539a5b5bb9aSIngo Molnar if (waitqueue_active(&sk->sk_lock.wq))
3540a5b5bb9aSIngo Molnar wake_up(&sk->sk_lock.wq);
3541a5b5bb9aSIngo Molnar spin_unlock_bh(&sk->sk_lock.slock);
35421da177e4SLinus Torvalds }
35431da177e4SLinus Torvalds EXPORT_SYMBOL(release_sock);
35441da177e4SLinus Torvalds
__lock_sock_fast(struct sock * sk)354549054556SPaolo Abeni bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
35468a74ad60SEric Dumazet {
35478a74ad60SEric Dumazet might_sleep();
35488a74ad60SEric Dumazet spin_lock_bh(&sk->sk_lock.slock);
35498a74ad60SEric Dumazet
355033d60fbdSKuniyuki Iwashima if (!sock_owned_by_user_nocheck(sk)) {
35518a74ad60SEric Dumazet /*
35522dcb96baSThomas Gleixner * Fast path return with bottom halves disabled and
35532dcb96baSThomas Gleixner * sock::sk_lock.slock held.
35542dcb96baSThomas Gleixner *
35552dcb96baSThomas Gleixner * The 'mutex' is not contended and holding
35562dcb96baSThomas Gleixner * sock::sk_lock.slock prevents all other lockers to
35572dcb96baSThomas Gleixner * proceed so the corresponding unlock_sock_fast() can
35582dcb96baSThomas Gleixner * avoid the slow path of release_sock() completely and
35592dcb96baSThomas Gleixner * just release slock.
35602dcb96baSThomas Gleixner *
35612dcb96baSThomas Gleixner * From a semantical POV this is equivalent to 'acquiring'
35622dcb96baSThomas Gleixner * the 'mutex', hence the corresponding lockdep
35632dcb96baSThomas Gleixner * mutex_release() has to happen in the fast path of
35642dcb96baSThomas Gleixner * unlock_sock_fast().
35658a74ad60SEric Dumazet */
35668a74ad60SEric Dumazet return false;
35672dcb96baSThomas Gleixner }
35688a74ad60SEric Dumazet
35698a74ad60SEric Dumazet __lock_sock(sk);
35708a74ad60SEric Dumazet sk->sk_lock.owned = 1;
357112f4bd86SPaolo Abeni __acquire(&sk->sk_lock.slock);
35722dcb96baSThomas Gleixner spin_unlock_bh(&sk->sk_lock.slock);
35738a74ad60SEric Dumazet return true;
35748a74ad60SEric Dumazet }
357549054556SPaolo Abeni EXPORT_SYMBOL(__lock_sock_fast);
35768a74ad60SEric Dumazet
sock_gettstamp(struct socket * sock,void __user * userstamp,bool timeval,bool time32)3577c7cbdbf2SArnd Bergmann int sock_gettstamp(struct socket *sock, void __user *userstamp,
3578c7cbdbf2SArnd Bergmann bool timeval, bool time32)
35791da177e4SLinus Torvalds {
3580c7cbdbf2SArnd Bergmann struct sock *sk = sock->sk;
3581c7cbdbf2SArnd Bergmann struct timespec64 ts;
35829dae3497SYafang Shao
358320d49473SPatrick Ohly sock_enable_timestamp(sk, SOCK_TIMESTAMP);
3584c7cbdbf2SArnd Bergmann ts = ktime_to_timespec64(sock_read_timestamp(sk));
3585ae40eb1eSEric Dumazet if (ts.tv_sec == -1)
3586ae40eb1eSEric Dumazet return -ENOENT;
3587ae40eb1eSEric Dumazet if (ts.tv_sec == 0) {
35883a0ed3e9SDeepa Dinamani ktime_t kt = ktime_get_real();
3589f95f96a4SYueHaibing sock_write_timestamp(sk, kt);
3590c7cbdbf2SArnd Bergmann ts = ktime_to_timespec64(kt);
3591ae40eb1eSEric Dumazet }
3592c7cbdbf2SArnd Bergmann
3593c7cbdbf2SArnd Bergmann if (timeval)
3594c7cbdbf2SArnd Bergmann ts.tv_nsec /= 1000;
3595c7cbdbf2SArnd Bergmann
3596c7cbdbf2SArnd Bergmann #ifdef CONFIG_COMPAT_32BIT_TIME
3597c7cbdbf2SArnd Bergmann if (time32)
3598c7cbdbf2SArnd Bergmann return put_old_timespec32(&ts, userstamp);
3599c7cbdbf2SArnd Bergmann #endif
3600c7cbdbf2SArnd Bergmann #ifdef CONFIG_SPARC64
3601c7cbdbf2SArnd Bergmann /* beware of padding in sparc64 timeval */
3602c7cbdbf2SArnd Bergmann if (timeval && !in_compat_syscall()) {
3603c7cbdbf2SArnd Bergmann struct __kernel_old_timeval __user tv = {
3604c98f4822SStephen Rothwell .tv_sec = ts.tv_sec,
3605c98f4822SStephen Rothwell .tv_usec = ts.tv_nsec,
3606c7cbdbf2SArnd Bergmann };
3607c98f4822SStephen Rothwell if (copy_to_user(userstamp, &tv, sizeof(tv)))
3608c7cbdbf2SArnd Bergmann return -EFAULT;
3609c7cbdbf2SArnd Bergmann return 0;
3610ae40eb1eSEric Dumazet }
3611c7cbdbf2SArnd Bergmann #endif
3612c7cbdbf2SArnd Bergmann return put_timespec64(&ts, userstamp);
3613c7cbdbf2SArnd Bergmann }
3614c7cbdbf2SArnd Bergmann EXPORT_SYMBOL(sock_gettstamp);
3615ae40eb1eSEric Dumazet
sock_enable_timestamp(struct sock * sk,enum sock_flags flag)3616193d357dSAlexey Dobriyan void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
36171da177e4SLinus Torvalds {
361820d49473SPatrick Ohly if (!sock_flag(sk, flag)) {
361908e29af3SEric Dumazet unsigned long previous_flags = sk->sk_flags;
362008e29af3SEric Dumazet
362120d49473SPatrick Ohly sock_set_flag(sk, flag);
362220d49473SPatrick Ohly /*
362320d49473SPatrick Ohly * we just set one of the two flags which require net
362420d49473SPatrick Ohly * time stamping, but time stamping might have been on
362520d49473SPatrick Ohly * already because of the other one
362620d49473SPatrick Ohly */
3627080a270fSHannes Frederic Sowa if (sock_needs_netstamp(sk) &&
3628080a270fSHannes Frederic Sowa !(previous_flags & SK_FLAGS_TIMESTAMP))
36291da177e4SLinus Torvalds net_enable_timestamp();
36301da177e4SLinus Torvalds }
36311da177e4SLinus Torvalds }
36321da177e4SLinus Torvalds
sock_recv_errqueue(struct sock * sk,struct msghdr * msg,int len,int level,int type)3633cb820f8eSRichard Cochran int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
3634cb820f8eSRichard Cochran int level, int type)
3635cb820f8eSRichard Cochran {
3636cb820f8eSRichard Cochran struct sock_exterr_skb *serr;
3637364a9e93SWillem de Bruijn struct sk_buff *skb;
3638cb820f8eSRichard Cochran int copied, err;
3639cb820f8eSRichard Cochran
3640cb820f8eSRichard Cochran err = -EAGAIN;
3641364a9e93SWillem de Bruijn skb = sock_dequeue_err_skb(sk);
3642cb820f8eSRichard Cochran if (skb == NULL)
3643cb820f8eSRichard Cochran goto out;
3644cb820f8eSRichard Cochran
3645cb820f8eSRichard Cochran copied = skb->len;
3646cb820f8eSRichard Cochran if (copied > len) {
3647cb820f8eSRichard Cochran msg->msg_flags |= MSG_TRUNC;
3648cb820f8eSRichard Cochran copied = len;
3649cb820f8eSRichard Cochran }
365051f3d02bSDavid S. Miller err = skb_copy_datagram_msg(skb, 0, msg, copied);
3651cb820f8eSRichard Cochran if (err)
3652cb820f8eSRichard Cochran goto out_free_skb;
3653cb820f8eSRichard Cochran
3654cb820f8eSRichard Cochran sock_recv_timestamp(msg, sk, skb);
3655cb820f8eSRichard Cochran
3656cb820f8eSRichard Cochran serr = SKB_EXT_ERR(skb);
3657cb820f8eSRichard Cochran put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
3658cb820f8eSRichard Cochran
3659cb820f8eSRichard Cochran msg->msg_flags |= MSG_ERRQUEUE;
3660cb820f8eSRichard Cochran err = copied;
3661cb820f8eSRichard Cochran
3662cb820f8eSRichard Cochran out_free_skb:
3663cb820f8eSRichard Cochran kfree_skb(skb);
3664cb820f8eSRichard Cochran out:
3665cb820f8eSRichard Cochran return err;
3666cb820f8eSRichard Cochran }
3667cb820f8eSRichard Cochran EXPORT_SYMBOL(sock_recv_errqueue);
3668cb820f8eSRichard Cochran
36691da177e4SLinus Torvalds /*
36701da177e4SLinus Torvalds * Get a socket option on an socket.
36711da177e4SLinus Torvalds *
36721da177e4SLinus Torvalds * FIX: POSIX 1003.1g is very ambiguous here. It states that
36731da177e4SLinus Torvalds * asynchronous errors should be reported by getsockopt. We assume
36741da177e4SLinus Torvalds * this means if you specify SO_ERROR (otherwise whats the point of it).
36751da177e4SLinus Torvalds */
sock_common_getsockopt(struct socket * sock,int level,int optname,char __user * optval,int __user * optlen)36761da177e4SLinus Torvalds int sock_common_getsockopt(struct socket *sock, int level, int optname,
36771da177e4SLinus Torvalds char __user *optval, int __user *optlen)
36781da177e4SLinus Torvalds {
36791da177e4SLinus Torvalds struct sock *sk = sock->sk;
36801da177e4SLinus Torvalds
3681364f997bSKuniyuki Iwashima /* IPV6_ADDRFORM can change sk->sk_prot under us. */
3682364f997bSKuniyuki Iwashima return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen);
36831da177e4SLinus Torvalds }
36841da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_getsockopt);
36851da177e4SLinus Torvalds
sock_common_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)36861b784140SYing Xue int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
36871b784140SYing Xue int flags)
36881da177e4SLinus Torvalds {
36891da177e4SLinus Torvalds struct sock *sk = sock->sk;
36901da177e4SLinus Torvalds int addr_len = 0;
36911da177e4SLinus Torvalds int err;
36921da177e4SLinus Torvalds
3693ec095263SOliver Hartkopp err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len);
36941da177e4SLinus Torvalds if (err >= 0)
36951da177e4SLinus Torvalds msg->msg_namelen = addr_len;
36961da177e4SLinus Torvalds return err;
36971da177e4SLinus Torvalds }
36981da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_recvmsg);
36991da177e4SLinus Torvalds
37001da177e4SLinus Torvalds /*
37011da177e4SLinus Torvalds * Set socket options on an inet socket.
37021da177e4SLinus Torvalds */
sock_common_setsockopt(struct socket * sock,int level,int optname,sockptr_t optval,unsigned int optlen)37031da177e4SLinus Torvalds int sock_common_setsockopt(struct socket *sock, int level, int optname,
3704a7b75c5aSChristoph Hellwig sockptr_t optval, unsigned int optlen)
37051da177e4SLinus Torvalds {
37061da177e4SLinus Torvalds struct sock *sk = sock->sk;
37071da177e4SLinus Torvalds
3708364f997bSKuniyuki Iwashima /* IPV6_ADDRFORM can change sk->sk_prot under us. */
3709364f997bSKuniyuki Iwashima return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen);
37101da177e4SLinus Torvalds }
37111da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_setsockopt);
37121da177e4SLinus Torvalds
sk_common_release(struct sock * sk)37131da177e4SLinus Torvalds void sk_common_release(struct sock *sk)
37141da177e4SLinus Torvalds {
37151da177e4SLinus Torvalds if (sk->sk_prot->destroy)
37161da177e4SLinus Torvalds sk->sk_prot->destroy(sk);
37171da177e4SLinus Torvalds
37181da177e4SLinus Torvalds /*
3719645f0897SMiaohe Lin * Observation: when sk_common_release is called, processes have
37201da177e4SLinus Torvalds * no access to socket. But net still has.
37211da177e4SLinus Torvalds * Step one, detach it from networking:
37221da177e4SLinus Torvalds *
37231da177e4SLinus Torvalds * A. Remove from hash tables.
37241da177e4SLinus Torvalds */
37251da177e4SLinus Torvalds
37261da177e4SLinus Torvalds sk->sk_prot->unhash(sk);
37271da177e4SLinus Torvalds
37281da177e4SLinus Torvalds /*
37291da177e4SLinus Torvalds * In this point socket cannot receive new packets, but it is possible
37301da177e4SLinus Torvalds * that some packets are in flight because some CPU runs receiver and
37311da177e4SLinus Torvalds * did hash table lookup before we unhashed socket. They will achieve
37321da177e4SLinus Torvalds * receive queue and will be purged by socket destructor.
37331da177e4SLinus Torvalds *
37341da177e4SLinus Torvalds * Also we still have packets pending on receive queue and probably,
37351da177e4SLinus Torvalds * our own packets waiting in device queues. sock_destroy will drain
37361da177e4SLinus Torvalds * receive queue, but transmitted packets will delay socket destruction
37371da177e4SLinus Torvalds * until the last reference will be released.
37381da177e4SLinus Torvalds */
37391da177e4SLinus Torvalds
37401da177e4SLinus Torvalds sock_orphan(sk);
37411da177e4SLinus Torvalds
37421da177e4SLinus Torvalds xfrm_sk_free_policy(sk);
37431da177e4SLinus Torvalds
37441da177e4SLinus Torvalds sock_put(sk);
37451da177e4SLinus Torvalds }
37461da177e4SLinus Torvalds EXPORT_SYMBOL(sk_common_release);
37471da177e4SLinus Torvalds
sk_get_meminfo(const struct sock * sk,u32 * mem)3748a2d133b1SJosh Hunt void sk_get_meminfo(const struct sock *sk, u32 *mem)
3749a2d133b1SJosh Hunt {
3750a2d133b1SJosh Hunt memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3751a2d133b1SJosh Hunt
3752a2d133b1SJosh Hunt mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3753ebb3b78dSEric Dumazet mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3754a2d133b1SJosh Hunt mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3755e292f05eSEric Dumazet mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
375666d58f04SEric Dumazet mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
3757ab4e846aSEric Dumazet mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3758a2d133b1SJosh Hunt mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
375970c26558SEric Dumazet mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3760a2d133b1SJosh Hunt mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3761a2d133b1SJosh Hunt }
3762a2d133b1SJosh Hunt
376313ff3d6fSPavel Emelyanov #ifdef CONFIG_PROC_FS
376413ff3d6fSPavel Emelyanov static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
376570ee1159SPavel Emelyanov
sock_prot_inuse_get(struct net * net,struct proto * prot)376670ee1159SPavel Emelyanov int sock_prot_inuse_get(struct net *net, struct proto *prot)
376770ee1159SPavel Emelyanov {
376870ee1159SPavel Emelyanov int cpu, idx = prot->inuse_idx;
376970ee1159SPavel Emelyanov int res = 0;
377070ee1159SPavel Emelyanov
377170ee1159SPavel Emelyanov for_each_possible_cpu(cpu)
377208fc7f81STonghao Zhang res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
377370ee1159SPavel Emelyanov
377470ee1159SPavel Emelyanov return res >= 0 ? res : 0;
377570ee1159SPavel Emelyanov }
377670ee1159SPavel Emelyanov EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
377770ee1159SPavel Emelyanov
sock_inuse_get(struct net * net)3778648845abSTonghao Zhang int sock_inuse_get(struct net *net)
3779648845abSTonghao Zhang {
3780648845abSTonghao Zhang int cpu, res = 0;
3781648845abSTonghao Zhang
3782648845abSTonghao Zhang for_each_possible_cpu(cpu)
37834199bae1SEric Dumazet res += per_cpu_ptr(net->core.prot_inuse, cpu)->all;
3784648845abSTonghao Zhang
3785648845abSTonghao Zhang return res;
3786648845abSTonghao Zhang }
3787648845abSTonghao Zhang
3788648845abSTonghao Zhang EXPORT_SYMBOL_GPL(sock_inuse_get);
3789648845abSTonghao Zhang
sock_inuse_init_net(struct net * net)37902c8c1e72SAlexey Dobriyan static int __net_init sock_inuse_init_net(struct net *net)
379170ee1159SPavel Emelyanov {
379208fc7f81STonghao Zhang net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3793648845abSTonghao Zhang if (net->core.prot_inuse == NULL)
3794648845abSTonghao Zhang return -ENOMEM;
3795648845abSTonghao Zhang return 0;
379670ee1159SPavel Emelyanov }
379770ee1159SPavel Emelyanov
sock_inuse_exit_net(struct net * net)37982c8c1e72SAlexey Dobriyan static void __net_exit sock_inuse_exit_net(struct net *net)
379970ee1159SPavel Emelyanov {
380008fc7f81STonghao Zhang free_percpu(net->core.prot_inuse);
380170ee1159SPavel Emelyanov }
380270ee1159SPavel Emelyanov
380370ee1159SPavel Emelyanov static struct pernet_operations net_inuse_ops = {
380470ee1159SPavel Emelyanov .init = sock_inuse_init_net,
380570ee1159SPavel Emelyanov .exit = sock_inuse_exit_net,
380670ee1159SPavel Emelyanov };
380770ee1159SPavel Emelyanov
net_inuse_init(void)380870ee1159SPavel Emelyanov static __init int net_inuse_init(void)
380970ee1159SPavel Emelyanov {
381070ee1159SPavel Emelyanov if (register_pernet_subsys(&net_inuse_ops))
381170ee1159SPavel Emelyanov panic("Cannot initialize net inuse counters");
381270ee1159SPavel Emelyanov
381370ee1159SPavel Emelyanov return 0;
381470ee1159SPavel Emelyanov }
381570ee1159SPavel Emelyanov
381670ee1159SPavel Emelyanov core_initcall(net_inuse_init);
381713ff3d6fSPavel Emelyanov
assign_proto_idx(struct proto * prot)3818b45ce321Szhanglin static int assign_proto_idx(struct proto *prot)
381913ff3d6fSPavel Emelyanov {
382013ff3d6fSPavel Emelyanov prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
382113ff3d6fSPavel Emelyanov
382213ff3d6fSPavel Emelyanov if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3823e005d193SJoe Perches pr_err("PROTO_INUSE_NR exhausted\n");
3824b45ce321Szhanglin return -ENOSPC;
382513ff3d6fSPavel Emelyanov }
382613ff3d6fSPavel Emelyanov
382713ff3d6fSPavel Emelyanov set_bit(prot->inuse_idx, proto_inuse_idx);
3828b45ce321Szhanglin return 0;
382913ff3d6fSPavel Emelyanov }
383013ff3d6fSPavel Emelyanov
release_proto_idx(struct proto * prot)383113ff3d6fSPavel Emelyanov static void release_proto_idx(struct proto *prot)
383213ff3d6fSPavel Emelyanov {
383313ff3d6fSPavel Emelyanov if (prot->inuse_idx != PROTO_INUSE_NR - 1)
383413ff3d6fSPavel Emelyanov clear_bit(prot->inuse_idx, proto_inuse_idx);
383513ff3d6fSPavel Emelyanov }
383613ff3d6fSPavel Emelyanov #else
assign_proto_idx(struct proto * prot)3837b45ce321Szhanglin static inline int assign_proto_idx(struct proto *prot)
383813ff3d6fSPavel Emelyanov {
3839b45ce321Szhanglin return 0;
384013ff3d6fSPavel Emelyanov }
384113ff3d6fSPavel Emelyanov
release_proto_idx(struct proto * prot)384213ff3d6fSPavel Emelyanov static inline void release_proto_idx(struct proto *prot)
384313ff3d6fSPavel Emelyanov {
384413ff3d6fSPavel Emelyanov }
3845648845abSTonghao Zhang
384613ff3d6fSPavel Emelyanov #endif
384713ff3d6fSPavel Emelyanov
tw_prot_cleanup(struct timewait_sock_ops * twsk_prot)38480f5907afSMiaohe Lin static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
38490f5907afSMiaohe Lin {
38500f5907afSMiaohe Lin if (!twsk_prot)
38510f5907afSMiaohe Lin return;
38520f5907afSMiaohe Lin kfree(twsk_prot->twsk_slab_name);
38530f5907afSMiaohe Lin twsk_prot->twsk_slab_name = NULL;
38540f5907afSMiaohe Lin kmem_cache_destroy(twsk_prot->twsk_slab);
38550f5907afSMiaohe Lin twsk_prot->twsk_slab = NULL;
38560f5907afSMiaohe Lin }
38570f5907afSMiaohe Lin
tw_prot_init(const struct proto * prot)3858b80350f3STonghao Zhang static int tw_prot_init(const struct proto *prot)
3859b80350f3STonghao Zhang {
3860b80350f3STonghao Zhang struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
3861b80350f3STonghao Zhang
3862b80350f3STonghao Zhang if (!twsk_prot)
3863b80350f3STonghao Zhang return 0;
3864b80350f3STonghao Zhang
3865b80350f3STonghao Zhang twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
3866b80350f3STonghao Zhang prot->name);
3867b80350f3STonghao Zhang if (!twsk_prot->twsk_slab_name)
3868b80350f3STonghao Zhang return -ENOMEM;
3869b80350f3STonghao Zhang
3870b80350f3STonghao Zhang twsk_prot->twsk_slab =
3871b80350f3STonghao Zhang kmem_cache_create(twsk_prot->twsk_slab_name,
3872b80350f3STonghao Zhang twsk_prot->twsk_obj_size, 0,
3873b80350f3STonghao Zhang SLAB_ACCOUNT | prot->slab_flags,
3874b80350f3STonghao Zhang NULL);
3875b80350f3STonghao Zhang if (!twsk_prot->twsk_slab) {
3876b80350f3STonghao Zhang pr_crit("%s: Can't create timewait sock SLAB cache!\n",
3877b80350f3STonghao Zhang prot->name);
3878b80350f3STonghao Zhang return -ENOMEM;
3879b80350f3STonghao Zhang }
3880b80350f3STonghao Zhang
3881b80350f3STonghao Zhang return 0;
3882b80350f3STonghao Zhang }
3883b80350f3STonghao Zhang
req_prot_cleanup(struct request_sock_ops * rsk_prot)38840159dfd3SEric Dumazet static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
38850159dfd3SEric Dumazet {
38860159dfd3SEric Dumazet if (!rsk_prot)
38870159dfd3SEric Dumazet return;
38880159dfd3SEric Dumazet kfree(rsk_prot->slab_name);
38890159dfd3SEric Dumazet rsk_prot->slab_name = NULL;
38900159dfd3SEric Dumazet kmem_cache_destroy(rsk_prot->slab);
38910159dfd3SEric Dumazet rsk_prot->slab = NULL;
38920159dfd3SEric Dumazet }
38930159dfd3SEric Dumazet
req_prot_init(const struct proto * prot)38940159dfd3SEric Dumazet static int req_prot_init(const struct proto *prot)
38950159dfd3SEric Dumazet {
38960159dfd3SEric Dumazet struct request_sock_ops *rsk_prot = prot->rsk_prot;
38970159dfd3SEric Dumazet
38980159dfd3SEric Dumazet if (!rsk_prot)
38990159dfd3SEric Dumazet return 0;
39000159dfd3SEric Dumazet
39010159dfd3SEric Dumazet rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
39020159dfd3SEric Dumazet prot->name);
39030159dfd3SEric Dumazet if (!rsk_prot->slab_name)
39040159dfd3SEric Dumazet return -ENOMEM;
39050159dfd3SEric Dumazet
39060159dfd3SEric Dumazet rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
39070159dfd3SEric Dumazet rsk_prot->obj_size, 0,
3908e699e2c6SShakeel Butt SLAB_ACCOUNT | prot->slab_flags,
3909e699e2c6SShakeel Butt NULL);
39100159dfd3SEric Dumazet
39110159dfd3SEric Dumazet if (!rsk_prot->slab) {
39120159dfd3SEric Dumazet pr_crit("%s: Can't create request sock SLAB cache!\n",
39130159dfd3SEric Dumazet prot->name);
39140159dfd3SEric Dumazet return -ENOMEM;
39150159dfd3SEric Dumazet }
39160159dfd3SEric Dumazet return 0;
39170159dfd3SEric Dumazet }
39180159dfd3SEric Dumazet
proto_register(struct proto * prot,int alloc_slab)39191da177e4SLinus Torvalds int proto_register(struct proto *prot, int alloc_slab)
39201da177e4SLinus Torvalds {
3921b45ce321Szhanglin int ret = -ENOBUFS;
3922b45ce321Szhanglin
3923f20cfd66SEric Dumazet if (prot->memory_allocated && !prot->sysctl_mem) {
3924f20cfd66SEric Dumazet pr_err("%s: missing sysctl_mem\n", prot->name);
3925f20cfd66SEric Dumazet return -EINVAL;
3926f20cfd66SEric Dumazet }
39270defbb0aSEric Dumazet if (prot->memory_allocated && !prot->per_cpu_fw_alloc) {
39280defbb0aSEric Dumazet pr_err("%s: missing per_cpu_fw_alloc\n", prot->name);
39290defbb0aSEric Dumazet return -EINVAL;
39300defbb0aSEric Dumazet }
39311da177e4SLinus Torvalds if (alloc_slab) {
393230c2c9f1SDavid Windsor prot->slab = kmem_cache_create_usercopy(prot->name,
393330c2c9f1SDavid Windsor prot->obj_size, 0,
3934e699e2c6SShakeel Butt SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3935e699e2c6SShakeel Butt prot->slab_flags,
3936289a4860SKees Cook prot->useroffset, prot->usersize,
3937271b72c7SEric Dumazet NULL);
39381da177e4SLinus Torvalds
39391da177e4SLinus Torvalds if (prot->slab == NULL) {
3940e005d193SJoe Perches pr_crit("%s: Can't create sock SLAB cache!\n",
39411da177e4SLinus Torvalds prot->name);
394260e7663dSPavel Emelyanov goto out;
39431da177e4SLinus Torvalds }
39442e6599cbSArnaldo Carvalho de Melo
39450159dfd3SEric Dumazet if (req_prot_init(prot))
39460159dfd3SEric Dumazet goto out_free_request_sock_slab;
39478feaf0c0SArnaldo Carvalho de Melo
3948b80350f3STonghao Zhang if (tw_prot_init(prot))
39490f5907afSMiaohe Lin goto out_free_timewait_sock_slab;
39508feaf0c0SArnaldo Carvalho de Melo }
39511da177e4SLinus Torvalds
395236b77a52SGlauber Costa mutex_lock(&proto_list_mutex);
3953b45ce321Szhanglin ret = assign_proto_idx(prot);
3954b45ce321Szhanglin if (ret) {
395536b77a52SGlauber Costa mutex_unlock(&proto_list_mutex);
39560f5907afSMiaohe Lin goto out_free_timewait_sock_slab;
3957b45ce321Szhanglin }
3958b45ce321Szhanglin list_add(&prot->node, &proto_list);
3959b45ce321Szhanglin mutex_unlock(&proto_list_mutex);
3960b45ce321Szhanglin return ret;
3961b733c007SPavel Emelyanov
39620f5907afSMiaohe Lin out_free_timewait_sock_slab:
3963ed744d81STonghao Zhang if (alloc_slab)
39640f5907afSMiaohe Lin tw_prot_cleanup(prot->twsk_prot);
39658feaf0c0SArnaldo Carvalho de Melo out_free_request_sock_slab:
3966b45ce321Szhanglin if (alloc_slab) {
39670159dfd3SEric Dumazet req_prot_cleanup(prot->rsk_prot);
39680159dfd3SEric Dumazet
39692e6599cbSArnaldo Carvalho de Melo kmem_cache_destroy(prot->slab);
39702e6599cbSArnaldo Carvalho de Melo prot->slab = NULL;
3971b45ce321Szhanglin }
3972b733c007SPavel Emelyanov out:
3973b45ce321Szhanglin return ret;
39741da177e4SLinus Torvalds }
39751da177e4SLinus Torvalds EXPORT_SYMBOL(proto_register);
39761da177e4SLinus Torvalds
proto_unregister(struct proto * prot)39771da177e4SLinus Torvalds void proto_unregister(struct proto *prot)
39781da177e4SLinus Torvalds {
397936b77a52SGlauber Costa mutex_lock(&proto_list_mutex);
398013ff3d6fSPavel Emelyanov release_proto_idx(prot);
39810a3f4358SPatrick McHardy list_del(&prot->node);
398236b77a52SGlauber Costa mutex_unlock(&proto_list_mutex);
39831da177e4SLinus Torvalds
39841da177e4SLinus Torvalds kmem_cache_destroy(prot->slab);
39851da177e4SLinus Torvalds prot->slab = NULL;
39861da177e4SLinus Torvalds
39870159dfd3SEric Dumazet req_prot_cleanup(prot->rsk_prot);
39880f5907afSMiaohe Lin tw_prot_cleanup(prot->twsk_prot);
39891da177e4SLinus Torvalds }
39901da177e4SLinus Torvalds EXPORT_SYMBOL(proto_unregister);
39911da177e4SLinus Torvalds
sock_load_diag_module(int family,int protocol)3992bf2ae2e4SXin Long int sock_load_diag_module(int family, int protocol)
3993bf2ae2e4SXin Long {
3994bf2ae2e4SXin Long if (!protocol) {
3995bf2ae2e4SXin Long if (!sock_is_registered(family))
3996bf2ae2e4SXin Long return -ENOENT;
3997bf2ae2e4SXin Long
3998bf2ae2e4SXin Long return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3999bf2ae2e4SXin Long NETLINK_SOCK_DIAG, family);
4000bf2ae2e4SXin Long }
4001bf2ae2e4SXin Long
4002bf2ae2e4SXin Long #ifdef CONFIG_INET
4003bf2ae2e4SXin Long if (family == AF_INET &&
4004c34c1287SAndrei Vagin protocol != IPPROTO_RAW &&
40053f935c75SPaolo Abeni protocol < MAX_INET_PROTOS &&
4006bf2ae2e4SXin Long !rcu_access_pointer(inet_protos[protocol]))
4007bf2ae2e4SXin Long return -ENOENT;
4008bf2ae2e4SXin Long #endif
4009bf2ae2e4SXin Long
4010bf2ae2e4SXin Long return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
4011bf2ae2e4SXin Long NETLINK_SOCK_DIAG, family, protocol);
4012bf2ae2e4SXin Long }
4013bf2ae2e4SXin Long EXPORT_SYMBOL(sock_load_diag_module);
4014bf2ae2e4SXin Long
40151da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
proto_seq_start(struct seq_file * seq,loff_t * pos)40161da177e4SLinus Torvalds static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
401736b77a52SGlauber Costa __acquires(proto_list_mutex)
40181da177e4SLinus Torvalds {
401936b77a52SGlauber Costa mutex_lock(&proto_list_mutex);
402060f0438aSPavel Emelianov return seq_list_start_head(&proto_list, *pos);
40211da177e4SLinus Torvalds }
40221da177e4SLinus Torvalds
proto_seq_next(struct seq_file * seq,void * v,loff_t * pos)40231da177e4SLinus Torvalds static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
40241da177e4SLinus Torvalds {
402560f0438aSPavel Emelianov return seq_list_next(v, &proto_list, pos);
40261da177e4SLinus Torvalds }
40271da177e4SLinus Torvalds
proto_seq_stop(struct seq_file * seq,void * v)40281da177e4SLinus Torvalds static void proto_seq_stop(struct seq_file *seq, void *v)
402936b77a52SGlauber Costa __releases(proto_list_mutex)
40301da177e4SLinus Torvalds {
403136b77a52SGlauber Costa mutex_unlock(&proto_list_mutex);
40321da177e4SLinus Torvalds }
40331da177e4SLinus Torvalds
proto_method_implemented(const void * method)40341da177e4SLinus Torvalds static char proto_method_implemented(const void *method)
40351da177e4SLinus Torvalds {
40361da177e4SLinus Torvalds return method == NULL ? 'n' : 'y';
40371da177e4SLinus Torvalds }
sock_prot_memory_allocated(struct proto * proto)4038180d8cd9SGlauber Costa static long sock_prot_memory_allocated(struct proto *proto)
4039180d8cd9SGlauber Costa {
4040180d8cd9SGlauber Costa return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
4041180d8cd9SGlauber Costa }
4042180d8cd9SGlauber Costa
sock_prot_memory_pressure(struct proto * proto)40437a512eb8SAlexey Dobriyan static const char *sock_prot_memory_pressure(struct proto *proto)
4044180d8cd9SGlauber Costa {
4045180d8cd9SGlauber Costa return proto->memory_pressure != NULL ?
4046180d8cd9SGlauber Costa proto_memory_pressure(proto) ? "yes" : "no" : "NI";
4047180d8cd9SGlauber Costa }
40481da177e4SLinus Torvalds
proto_seq_printf(struct seq_file * seq,struct proto * proto)40491da177e4SLinus Torvalds static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
40501da177e4SLinus Torvalds {
4051180d8cd9SGlauber Costa
40528d987e5cSEric Dumazet seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
4053dc97391eSDavid Howells "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
40541da177e4SLinus Torvalds proto->name,
40551da177e4SLinus Torvalds proto->obj_size,
405614e943dbSEric Dumazet sock_prot_inuse_get(seq_file_net(seq), proto),
4057180d8cd9SGlauber Costa sock_prot_memory_allocated(proto),
4058180d8cd9SGlauber Costa sock_prot_memory_pressure(proto),
40591da177e4SLinus Torvalds proto->max_header,
40601da177e4SLinus Torvalds proto->slab == NULL ? "no" : "yes",
40611da177e4SLinus Torvalds module_name(proto->owner),
40621da177e4SLinus Torvalds proto_method_implemented(proto->close),
40631da177e4SLinus Torvalds proto_method_implemented(proto->connect),
40641da177e4SLinus Torvalds proto_method_implemented(proto->disconnect),
40651da177e4SLinus Torvalds proto_method_implemented(proto->accept),
40661da177e4SLinus Torvalds proto_method_implemented(proto->ioctl),
40671da177e4SLinus Torvalds proto_method_implemented(proto->init),
40681da177e4SLinus Torvalds proto_method_implemented(proto->destroy),
40691da177e4SLinus Torvalds proto_method_implemented(proto->shutdown),
40701da177e4SLinus Torvalds proto_method_implemented(proto->setsockopt),
40711da177e4SLinus Torvalds proto_method_implemented(proto->getsockopt),
40721da177e4SLinus Torvalds proto_method_implemented(proto->sendmsg),
40731da177e4SLinus Torvalds proto_method_implemented(proto->recvmsg),
40741da177e4SLinus Torvalds proto_method_implemented(proto->bind),
40751da177e4SLinus Torvalds proto_method_implemented(proto->backlog_rcv),
40761da177e4SLinus Torvalds proto_method_implemented(proto->hash),
40771da177e4SLinus Torvalds proto_method_implemented(proto->unhash),
40781da177e4SLinus Torvalds proto_method_implemented(proto->get_port),
40791da177e4SLinus Torvalds proto_method_implemented(proto->enter_memory_pressure));
40801da177e4SLinus Torvalds }
40811da177e4SLinus Torvalds
proto_seq_show(struct seq_file * seq,void * v)40821da177e4SLinus Torvalds static int proto_seq_show(struct seq_file *seq, void *v)
40831da177e4SLinus Torvalds {
408460f0438aSPavel Emelianov if (v == &proto_list)
40851da177e4SLinus Torvalds seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
40861da177e4SLinus Torvalds "protocol",
40871da177e4SLinus Torvalds "size",
40881da177e4SLinus Torvalds "sockets",
40891da177e4SLinus Torvalds "memory",
40901da177e4SLinus Torvalds "press",
40911da177e4SLinus Torvalds "maxhdr",
40921da177e4SLinus Torvalds "slab",
40931da177e4SLinus Torvalds "module",
4094dc97391eSDavid Howells "cl co di ac io in de sh ss gs se re bi br ha uh gp em\n");
40951da177e4SLinus Torvalds else
409660f0438aSPavel Emelianov proto_seq_printf(seq, list_entry(v, struct proto, node));
40971da177e4SLinus Torvalds return 0;
40981da177e4SLinus Torvalds }
40991da177e4SLinus Torvalds
4100f690808eSStephen Hemminger static const struct seq_operations proto_seq_ops = {
41011da177e4SLinus Torvalds .start = proto_seq_start,
41021da177e4SLinus Torvalds .next = proto_seq_next,
41031da177e4SLinus Torvalds .stop = proto_seq_stop,
41041da177e4SLinus Torvalds .show = proto_seq_show,
41051da177e4SLinus Torvalds };
41061da177e4SLinus Torvalds
proto_init_net(struct net * net)410714e943dbSEric Dumazet static __net_init int proto_init_net(struct net *net)
410814e943dbSEric Dumazet {
4109c3506372SChristoph Hellwig if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
4110c3506372SChristoph Hellwig sizeof(struct seq_net_private)))
411114e943dbSEric Dumazet return -ENOMEM;
411214e943dbSEric Dumazet
411314e943dbSEric Dumazet return 0;
411414e943dbSEric Dumazet }
411514e943dbSEric Dumazet
proto_exit_net(struct net * net)411614e943dbSEric Dumazet static __net_exit void proto_exit_net(struct net *net)
411714e943dbSEric Dumazet {
4118ece31ffdSGao feng remove_proc_entry("protocols", net->proc_net);
411914e943dbSEric Dumazet }
412014e943dbSEric Dumazet
412114e943dbSEric Dumazet
412214e943dbSEric Dumazet static __net_initdata struct pernet_operations proto_net_ops = {
412314e943dbSEric Dumazet .init = proto_init_net,
412414e943dbSEric Dumazet .exit = proto_exit_net,
41251da177e4SLinus Torvalds };
41261da177e4SLinus Torvalds
proto_init(void)41271da177e4SLinus Torvalds static int __init proto_init(void)
41281da177e4SLinus Torvalds {
412914e943dbSEric Dumazet return register_pernet_subsys(&proto_net_ops);
41301da177e4SLinus Torvalds }
41311da177e4SLinus Torvalds
41321da177e4SLinus Torvalds subsys_initcall(proto_init);
41331da177e4SLinus Torvalds
41341da177e4SLinus Torvalds #endif /* PROC_FS */
41357db6b048SSridhar Samudrala
41367db6b048SSridhar Samudrala #ifdef CONFIG_NET_RX_BUSY_POLL
sk_busy_loop_end(void * p,unsigned long start_time)41377db6b048SSridhar Samudrala bool sk_busy_loop_end(void *p, unsigned long start_time)
41387db6b048SSridhar Samudrala {
41397db6b048SSridhar Samudrala struct sock *sk = p;
41407db6b048SSridhar Samudrala
4141ef8ad307SEric Dumazet if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
4142ef8ad307SEric Dumazet return true;
4143ef8ad307SEric Dumazet
4144ef8ad307SEric Dumazet if (sk_is_udp(sk) &&
4145ef8ad307SEric Dumazet !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
4146ef8ad307SEric Dumazet return true;
4147ef8ad307SEric Dumazet
4148ef8ad307SEric Dumazet return sk_busy_loop_timeout(sk, start_time);
41497db6b048SSridhar Samudrala }
41507db6b048SSridhar Samudrala EXPORT_SYMBOL(sk_busy_loop_end);
41517db6b048SSridhar Samudrala #endif /* CONFIG_NET_RX_BUSY_POLL */
4152c0425a42SChristoph Hellwig
sock_bind_add(struct sock * sk,struct sockaddr * addr,int addr_len)4153c0425a42SChristoph Hellwig int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
4154c0425a42SChristoph Hellwig {
4155c0425a42SChristoph Hellwig if (!sk->sk_prot->bind_add)
4156c0425a42SChristoph Hellwig return -EOPNOTSUPP;
4157c0425a42SChristoph Hellwig return sk->sk_prot->bind_add(sk, addr, addr_len);
4158c0425a42SChristoph Hellwig }
4159c0425a42SChristoph Hellwig EXPORT_SYMBOL(sock_bind_add);
4160e1d001faSBreno Leitao
4161e1d001faSBreno Leitao /* Copy 'size' bytes from userspace and return `size` back to userspace */
sock_ioctl_inout(struct sock * sk,unsigned int cmd,void __user * arg,void * karg,size_t size)4162e1d001faSBreno Leitao int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
4163e1d001faSBreno Leitao void __user *arg, void *karg, size_t size)
4164e1d001faSBreno Leitao {
4165e1d001faSBreno Leitao int ret;
4166e1d001faSBreno Leitao
4167e1d001faSBreno Leitao if (copy_from_user(karg, arg, size))
4168e1d001faSBreno Leitao return -EFAULT;
4169e1d001faSBreno Leitao
4170e1d001faSBreno Leitao ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
4171e1d001faSBreno Leitao if (ret)
4172e1d001faSBreno Leitao return ret;
4173e1d001faSBreno Leitao
4174e1d001faSBreno Leitao if (copy_to_user(arg, karg, size))
4175e1d001faSBreno Leitao return -EFAULT;
4176e1d001faSBreno Leitao
4177e1d001faSBreno Leitao return 0;
4178e1d001faSBreno Leitao }
4179e1d001faSBreno Leitao EXPORT_SYMBOL(sock_ioctl_inout);
4180e1d001faSBreno Leitao
4181e1d001faSBreno Leitao /* This is the most common ioctl prep function, where the result (4 bytes) is
4182e1d001faSBreno Leitao * copied back to userspace if the ioctl() returns successfully. No input is
4183e1d001faSBreno Leitao * copied from userspace as input argument.
4184e1d001faSBreno Leitao */
sock_ioctl_out(struct sock * sk,unsigned int cmd,void __user * arg)4185e1d001faSBreno Leitao static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg)
4186e1d001faSBreno Leitao {
4187e1d001faSBreno Leitao int ret, karg = 0;
4188e1d001faSBreno Leitao
4189e1d001faSBreno Leitao ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
4190e1d001faSBreno Leitao if (ret)
4191e1d001faSBreno Leitao return ret;
4192e1d001faSBreno Leitao
4193e1d001faSBreno Leitao return put_user(karg, (int __user *)arg);
4194e1d001faSBreno Leitao }
4195e1d001faSBreno Leitao
4196e1d001faSBreno Leitao /* A wrapper around sock ioctls, which copies the data from userspace
4197e1d001faSBreno Leitao * (depending on the protocol/ioctl), and copies back the result to userspace.
4198e1d001faSBreno Leitao * The main motivation for this function is to pass kernel memory to the
4199e1d001faSBreno Leitao * protocol ioctl callbacks, instead of userspace memory.
4200e1d001faSBreno Leitao */
sk_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)4201e1d001faSBreno Leitao int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
4202e1d001faSBreno Leitao {
4203e1d001faSBreno Leitao int rc = 1;
4204e1d001faSBreno Leitao
4205634236b3SEric Dumazet if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET)
4206e1d001faSBreno Leitao rc = ipmr_sk_ioctl(sk, cmd, arg);
4207634236b3SEric Dumazet else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6)
4208e1d001faSBreno Leitao rc = ip6mr_sk_ioctl(sk, cmd, arg);
4209e1d001faSBreno Leitao else if (sk_is_phonet(sk))
4210e1d001faSBreno Leitao rc = phonet_sk_ioctl(sk, cmd, arg);
4211e1d001faSBreno Leitao
4212e1d001faSBreno Leitao /* If ioctl was processed, returns its value */
4213e1d001faSBreno Leitao if (rc <= 0)
4214e1d001faSBreno Leitao return rc;
4215e1d001faSBreno Leitao
4216e1d001faSBreno Leitao /* Otherwise call the default handler */
4217e1d001faSBreno Leitao return sock_ioctl_out(sk, cmd, arg);
4218e1d001faSBreno Leitao }
4219e1d001faSBreno Leitao EXPORT_SYMBOL(sk_ioctl);
4220