xref: /openbmc/linux/net/core/sock.c (revision 9144f784f852f9a125cabe9927b986d909bfa439)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
41da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
51da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  *		Generic socket support routines. Memory allocators, socket lock/release
81da177e4SLinus Torvalds  *		handler for protocols to use and generic option handler.
91da177e4SLinus Torvalds  *
1002c30a84SJesper Juhl  * Authors:	Ross Biro
111da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
121da177e4SLinus Torvalds  *		Florian La Roche, <flla@stud.uni-sb.de>
131da177e4SLinus Torvalds  *		Alan Cox, <A.Cox@swansea.ac.uk>
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  * Fixes:
161da177e4SLinus Torvalds  *		Alan Cox	: 	Numerous verify_area() problems
171da177e4SLinus Torvalds  *		Alan Cox	:	Connecting on a connecting socket
181da177e4SLinus Torvalds  *					now returns an error for tcp.
191da177e4SLinus Torvalds  *		Alan Cox	:	sock->protocol is set correctly.
201da177e4SLinus Torvalds  *					and is not sometimes left as 0.
211da177e4SLinus Torvalds  *		Alan Cox	:	connect handles icmp errors on a
221da177e4SLinus Torvalds  *					connect properly. Unfortunately there
231da177e4SLinus Torvalds  *					is a restart syscall nasty there. I
241da177e4SLinus Torvalds  *					can't match BSD without hacking the C
251da177e4SLinus Torvalds  *					library. Ideas urgently sought!
261da177e4SLinus Torvalds  *		Alan Cox	:	Disallow bind() to addresses that are
271da177e4SLinus Torvalds  *					not ours - especially broadcast ones!!
281da177e4SLinus Torvalds  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
291da177e4SLinus Torvalds  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
301da177e4SLinus Torvalds  *					instead they leave that for the DESTROY timer.
311da177e4SLinus Torvalds  *		Alan Cox	:	Clean up error flag in accept
321da177e4SLinus Torvalds  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
331da177e4SLinus Torvalds  *					was buggy. Put a remove_sock() in the handler
341da177e4SLinus Torvalds  *					for memory when we hit 0. Also altered the timer
351da177e4SLinus Torvalds  *					code. The ACK stuff can wait and needs major
361da177e4SLinus Torvalds  *					TCP layer surgery.
371da177e4SLinus Torvalds  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
381da177e4SLinus Torvalds  *					and fixed timer/inet_bh race.
391da177e4SLinus Torvalds  *		Alan Cox	:	Added zapped flag for TCP
401da177e4SLinus Torvalds  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
411da177e4SLinus Torvalds  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
421da177e4SLinus Torvalds  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
431da177e4SLinus Torvalds  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
441da177e4SLinus Torvalds  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
451da177e4SLinus Torvalds  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
461da177e4SLinus Torvalds  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
471da177e4SLinus Torvalds  *	Pauline Middelink	:	identd support
481da177e4SLinus Torvalds  *		Alan Cox	:	Fixed connect() taking signals I think.
491da177e4SLinus Torvalds  *		Alan Cox	:	SO_LINGER supported
501da177e4SLinus Torvalds  *		Alan Cox	:	Error reporting fixes
511da177e4SLinus Torvalds  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
521da177e4SLinus Torvalds  *		Alan Cox	:	inet sockets don't set sk->type!
531da177e4SLinus Torvalds  *		Alan Cox	:	Split socket option code
541da177e4SLinus Torvalds  *		Alan Cox	:	Callbacks
551da177e4SLinus Torvalds  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
561da177e4SLinus Torvalds  *		Alex		:	Removed restriction on inet fioctl
571da177e4SLinus Torvalds  *		Alan Cox	:	Splitting INET from NET core
581da177e4SLinus Torvalds  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
591da177e4SLinus Torvalds  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
601da177e4SLinus Torvalds  *		Alan Cox	:	Split IP from generic code
611da177e4SLinus Torvalds  *		Alan Cox	:	New kfree_skbmem()
621da177e4SLinus Torvalds  *		Alan Cox	:	Make SO_DEBUG superuser only.
631da177e4SLinus Torvalds  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
641da177e4SLinus Torvalds  *					(compatibility fix)
651da177e4SLinus Torvalds  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
661da177e4SLinus Torvalds  *		Alan Cox	:	Allocator for a socket is settable.
671da177e4SLinus Torvalds  *		Alan Cox	:	SO_ERROR includes soft errors.
681da177e4SLinus Torvalds  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
691da177e4SLinus Torvalds  *		Alan Cox	: 	Generic socket allocation to make hooks
701da177e4SLinus Torvalds  *					easier (suggested by Craig Metz).
711da177e4SLinus Torvalds  *		Michael Pall	:	SO_ERROR returns positive errno again
721da177e4SLinus Torvalds  *              Steve Whitehouse:       Added default destructor to free
731da177e4SLinus Torvalds  *                                      protocol private data.
741da177e4SLinus Torvalds  *              Steve Whitehouse:       Added various other default routines
751da177e4SLinus Torvalds  *                                      common to several socket families.
761da177e4SLinus Torvalds  *              Chris Evans     :       Call suser() check last on F_SETOWN
771da177e4SLinus Torvalds  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
781da177e4SLinus Torvalds  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
791da177e4SLinus Torvalds  *		Andi Kleen	:	Fix write_space callback
801da177e4SLinus Torvalds  *		Chris Evans	:	Security fixes - signedness again
811da177e4SLinus Torvalds  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
821da177e4SLinus Torvalds  *
831da177e4SLinus Torvalds  * To Fix:
841da177e4SLinus Torvalds  */
851da177e4SLinus Torvalds 
86e005d193SJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
87e005d193SJoe Perches 
8880b14deeSRichard Cochran #include <asm/unaligned.h>
894fc268d2SRandy Dunlap #include <linux/capability.h>
901da177e4SLinus Torvalds #include <linux/errno.h>
91cb820f8eSRichard Cochran #include <linux/errqueue.h>
921da177e4SLinus Torvalds #include <linux/types.h>
931da177e4SLinus Torvalds #include <linux/socket.h>
941da177e4SLinus Torvalds #include <linux/in.h>
951da177e4SLinus Torvalds #include <linux/kernel.h>
961da177e4SLinus Torvalds #include <linux/module.h>
971da177e4SLinus Torvalds #include <linux/proc_fs.h>
981da177e4SLinus Torvalds #include <linux/seq_file.h>
991da177e4SLinus Torvalds #include <linux/sched.h>
100f1083048SVlastimil Babka #include <linux/sched/mm.h>
1011da177e4SLinus Torvalds #include <linux/timer.h>
1021da177e4SLinus Torvalds #include <linux/string.h>
1031da177e4SLinus Torvalds #include <linux/sockios.h>
1041da177e4SLinus Torvalds #include <linux/net.h>
1051da177e4SLinus Torvalds #include <linux/mm.h>
1061da177e4SLinus Torvalds #include <linux/slab.h>
1071da177e4SLinus Torvalds #include <linux/interrupt.h>
1081da177e4SLinus Torvalds #include <linux/poll.h>
1091da177e4SLinus Torvalds #include <linux/tcp.h>
110ef8ad307SEric Dumazet #include <linux/udp.h>
1111da177e4SLinus Torvalds #include <linux/init.h>
112a1f8e7f7SAl Viro #include <linux/highmem.h>
1133f551f94SEric W. Biederman #include <linux/user_namespace.h>
114c5905afbSIngo Molnar #include <linux/static_key.h>
1153969eb38SDavid S. Miller #include <linux/memcontrol.h>
1168c1ae10dSDavid S. Miller #include <linux/prefetch.h>
117a6c0d093SChristoph Hellwig #include <linux/compat.h>
118e1d001faSBreno Leitao #include <linux/mroute.h>
119e1d001faSBreno Leitao #include <linux/mroute6.h>
120e1d001faSBreno Leitao #include <linux/icmpv6.h>
1211da177e4SLinus Torvalds 
1227c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
1231da177e4SLinus Torvalds 
1241da177e4SLinus Torvalds #include <linux/netdevice.h>
1251da177e4SLinus Torvalds #include <net/protocol.h>
1261da177e4SLinus Torvalds #include <linux/skbuff.h>
127457c4cbcSEric W. Biederman #include <net/net_namespace.h>
1282e6599cbSArnaldo Carvalho de Melo #include <net/request_sock.h>
1291da177e4SLinus Torvalds #include <net/sock.h>
13020d49473SPatrick Ohly #include <linux/net_tstamp.h>
1311da177e4SLinus Torvalds #include <net/xfrm.h>
1321da177e4SLinus Torvalds #include <linux/ipsec.h>
133f8451725SHerbert Xu #include <net/cls_cgroup.h>
1345bc1421eSNeil Horman #include <net/netprio_cgroup.h>
135eb4cb008SCraig Gallek #include <linux/sock_diag.h>
1361da177e4SLinus Torvalds 
1371da177e4SLinus Torvalds #include <linux/filter.h>
138538950a1SCraig Gallek #include <net/sock_reuseport.h>
1396ac99e8fSMartin KaFai Lau #include <net/bpf_sk_storage.h>
1401da177e4SLinus Torvalds 
1413847ce32SSatoru Moriya #include <trace/events/sock.h>
1423847ce32SSatoru Moriya 
1431da177e4SLinus Torvalds #include <net/tcp.h>
144076bb0c8SEliezer Tamir #include <net/busy_poll.h>
145e1d001faSBreno Leitao #include <net/phonet/phonet.h>
14606021292SEliezer Tamir 
147d463126eSYangbo Lu #include <linux/ethtool.h>
148d463126eSYangbo Lu 
1496264f58cSJakub Kicinski #include "dev.h"
1506264f58cSJakub Kicinski 
15136b77a52SGlauber Costa static DEFINE_MUTEX(proto_list_mutex);
152d1a4c0b3SGlauber Costa static LIST_HEAD(proto_list);
153d1a4c0b3SGlauber Costa 
1540a8afd9fSPavel Begunkov static void sock_def_write_space_wfree(struct sock *sk);
155052ada09SPavel Begunkov static void sock_def_write_space(struct sock *sk);
156052ada09SPavel Begunkov 
157a3b299daSEric W. Biederman /**
158a3b299daSEric W. Biederman  * sk_ns_capable - General socket capability test
159a3b299daSEric W. Biederman  * @sk: Socket to use a capability on or through
160a3b299daSEric W. Biederman  * @user_ns: The user namespace of the capability to use
161a3b299daSEric W. Biederman  * @cap: The capability to use
162a3b299daSEric W. Biederman  *
163a3b299daSEric W. Biederman  * Test to see if the opener of the socket had when the socket was
164a3b299daSEric W. Biederman  * created and the current process has the capability @cap in the user
165a3b299daSEric W. Biederman  * namespace @user_ns.
166a3b299daSEric W. Biederman  */
sk_ns_capable(const struct sock * sk,struct user_namespace * user_ns,int cap)167a3b299daSEric W. Biederman bool sk_ns_capable(const struct sock *sk,
168a3b299daSEric W. Biederman 		   struct user_namespace *user_ns, int cap)
169a3b299daSEric W. Biederman {
170a3b299daSEric W. Biederman 	return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
171a3b299daSEric W. Biederman 		ns_capable(user_ns, cap);
172a3b299daSEric W. Biederman }
173a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_ns_capable);
174a3b299daSEric W. Biederman 
175a3b299daSEric W. Biederman /**
176a3b299daSEric W. Biederman  * sk_capable - Socket global capability test
177a3b299daSEric W. Biederman  * @sk: Socket to use a capability on or through
178e793c0f7SMasanari Iida  * @cap: The global capability to use
179a3b299daSEric W. Biederman  *
180a3b299daSEric W. Biederman  * Test to see if the opener of the socket had when the socket was
181a3b299daSEric W. Biederman  * created and the current process has the capability @cap in all user
182a3b299daSEric W. Biederman  * namespaces.
183a3b299daSEric W. Biederman  */
sk_capable(const struct sock * sk,int cap)184a3b299daSEric W. Biederman bool sk_capable(const struct sock *sk, int cap)
185a3b299daSEric W. Biederman {
186a3b299daSEric W. Biederman 	return sk_ns_capable(sk, &init_user_ns, cap);
187a3b299daSEric W. Biederman }
188a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_capable);
189a3b299daSEric W. Biederman 
190a3b299daSEric W. Biederman /**
191a3b299daSEric W. Biederman  * sk_net_capable - Network namespace socket capability test
192a3b299daSEric W. Biederman  * @sk: Socket to use a capability on or through
193a3b299daSEric W. Biederman  * @cap: The capability to use
194a3b299daSEric W. Biederman  *
195e793c0f7SMasanari Iida  * Test to see if the opener of the socket had when the socket was created
196a3b299daSEric W. Biederman  * and the current process has the capability @cap over the network namespace
197a3b299daSEric W. Biederman  * the socket is a member of.
198a3b299daSEric W. Biederman  */
sk_net_capable(const struct sock * sk,int cap)199a3b299daSEric W. Biederman bool sk_net_capable(const struct sock *sk, int cap)
200a3b299daSEric W. Biederman {
201a3b299daSEric W. Biederman 	return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
202a3b299daSEric W. Biederman }
203a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_net_capable);
204a3b299daSEric W. Biederman 
205da21f24dSIngo Molnar /*
206da21f24dSIngo Molnar  * Each address family might have different locking rules, so we have
207cdfbabfbSDavid Howells  * one slock key per address family and separate keys for internal and
208cdfbabfbSDavid Howells  * userspace sockets.
209da21f24dSIngo Molnar  */
210a5b5bb9aSIngo Molnar static struct lock_class_key af_family_keys[AF_MAX];
211cdfbabfbSDavid Howells static struct lock_class_key af_family_kern_keys[AF_MAX];
212a5b5bb9aSIngo Molnar static struct lock_class_key af_family_slock_keys[AF_MAX];
213cdfbabfbSDavid Howells static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
214a5b5bb9aSIngo Molnar 
215a5b5bb9aSIngo Molnar /*
216a5b5bb9aSIngo Molnar  * Make lock validator output more readable. (we pre-construct these
217a5b5bb9aSIngo Molnar  * strings build-time, so that runtime initialization of socket
218a5b5bb9aSIngo Molnar  * locks is fast):
219a5b5bb9aSIngo Molnar  */
220cdfbabfbSDavid Howells 
221cdfbabfbSDavid Howells #define _sock_locks(x)						  \
222cdfbabfbSDavid Howells   x "AF_UNSPEC",	x "AF_UNIX"     ,	x "AF_INET"     , \
223cdfbabfbSDavid Howells   x "AF_AX25"  ,	x "AF_IPX"      ,	x "AF_APPLETALK", \
224cdfbabfbSDavid Howells   x "AF_NETROM",	x "AF_BRIDGE"   ,	x "AF_ATMPVC"   , \
225cdfbabfbSDavid Howells   x "AF_X25"   ,	x "AF_INET6"    ,	x "AF_ROSE"     , \
226cdfbabfbSDavid Howells   x "AF_DECnet",	x "AF_NETBEUI"  ,	x "AF_SECURITY" , \
227cdfbabfbSDavid Howells   x "AF_KEY"   ,	x "AF_NETLINK"  ,	x "AF_PACKET"   , \
228cdfbabfbSDavid Howells   x "AF_ASH"   ,	x "AF_ECONET"   ,	x "AF_ATMSVC"   , \
229cdfbabfbSDavid Howells   x "AF_RDS"   ,	x "AF_SNA"      ,	x "AF_IRDA"     , \
230cdfbabfbSDavid Howells   x "AF_PPPOX" ,	x "AF_WANPIPE"  ,	x "AF_LLC"      , \
231cdfbabfbSDavid Howells   x "27"       ,	x "28"          ,	x "AF_CAN"      , \
232cdfbabfbSDavid Howells   x "AF_TIPC"  ,	x "AF_BLUETOOTH",	x "IUCV"        , \
233cdfbabfbSDavid Howells   x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
234cdfbabfbSDavid Howells   x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
235cdfbabfbSDavid Howells   x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
23668e8b849SBjörn Töpel   x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_XDP"	, \
237bc49d816SJeremy Kerr   x "AF_MCTP"  , \
23868e8b849SBjörn Töpel   x "AF_MAX"
239cdfbabfbSDavid Howells 
24036cbd3dcSJan Engelhardt static const char *const af_family_key_strings[AF_MAX+1] = {
241cdfbabfbSDavid Howells 	_sock_locks("sk_lock-")
242a5b5bb9aSIngo Molnar };
24336cbd3dcSJan Engelhardt static const char *const af_family_slock_key_strings[AF_MAX+1] = {
244cdfbabfbSDavid Howells 	_sock_locks("slock-")
245a5b5bb9aSIngo Molnar };
24636cbd3dcSJan Engelhardt static const char *const af_family_clock_key_strings[AF_MAX+1] = {
247cdfbabfbSDavid Howells 	_sock_locks("clock-")
248cdfbabfbSDavid Howells };
249cdfbabfbSDavid Howells 
250cdfbabfbSDavid Howells static const char *const af_family_kern_key_strings[AF_MAX+1] = {
251cdfbabfbSDavid Howells 	_sock_locks("k-sk_lock-")
252cdfbabfbSDavid Howells };
253cdfbabfbSDavid Howells static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
254cdfbabfbSDavid Howells 	_sock_locks("k-slock-")
255cdfbabfbSDavid Howells };
256cdfbabfbSDavid Howells static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
257cdfbabfbSDavid Howells 	_sock_locks("k-clock-")
258443aef0eSPeter Zijlstra };
259581319c5SPaolo Abeni static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
2606b431d50SMatthieu Baerts 	_sock_locks("rlock-")
261581319c5SPaolo Abeni };
262581319c5SPaolo Abeni static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
2636b431d50SMatthieu Baerts 	_sock_locks("wlock-")
264581319c5SPaolo Abeni };
265581319c5SPaolo Abeni static const char *const af_family_elock_key_strings[AF_MAX+1] = {
2666b431d50SMatthieu Baerts 	_sock_locks("elock-")
267581319c5SPaolo Abeni };
268da21f24dSIngo Molnar 
269da21f24dSIngo Molnar /*
270581319c5SPaolo Abeni  * sk_callback_lock and sk queues locking rules are per-address-family,
271da21f24dSIngo Molnar  * so split the lock classes by using a per-AF key:
272da21f24dSIngo Molnar  */
273da21f24dSIngo Molnar static struct lock_class_key af_callback_keys[AF_MAX];
274581319c5SPaolo Abeni static struct lock_class_key af_rlock_keys[AF_MAX];
275581319c5SPaolo Abeni static struct lock_class_key af_wlock_keys[AF_MAX];
276581319c5SPaolo Abeni static struct lock_class_key af_elock_keys[AF_MAX];
277cdfbabfbSDavid Howells static struct lock_class_key af_kern_callback_keys[AF_MAX];
278da21f24dSIngo Molnar 
2791da177e4SLinus Torvalds /* Run time adjustable parameters. */
280ab32ea5dSBrian Haley __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
2816d8ebc8aSHans Schillstrom EXPORT_SYMBOL(sysctl_wmem_max);
282ab32ea5dSBrian Haley __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
2836d8ebc8aSHans Schillstrom EXPORT_SYMBOL(sysctl_rmem_max);
284ab32ea5dSBrian Haley __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
285ab32ea5dSBrian Haley __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
286fe1e8381SAdam Li int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
2871da177e4SLinus Torvalds 
28825985edcSLucas De Marchi /* Maximal space eaten by iovec or ancillary data plus some space */
289ab32ea5dSBrian Haley int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
2902a91525cSEric Dumazet EXPORT_SYMBOL(sysctl_optmem_max);
2911da177e4SLinus Torvalds 
292b245be1fSWillem de Bruijn int sysctl_tstamp_allow_data __read_mostly = 1;
293b245be1fSWillem de Bruijn 
294a7950ae8SDavidlohr Bueso DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
295a7950ae8SDavidlohr Bueso EXPORT_SYMBOL_GPL(memalloc_socks_key);
296c93bdd0eSMel Gorman 
2977cb02404SMel Gorman /**
2987cb02404SMel Gorman  * sk_set_memalloc - sets %SOCK_MEMALLOC
2997cb02404SMel Gorman  * @sk: socket to set it on
3007cb02404SMel Gorman  *
3017cb02404SMel Gorman  * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
3027cb02404SMel Gorman  * It's the responsibility of the admin to adjust min_free_kbytes
3037cb02404SMel Gorman  * to meet the requirements
3047cb02404SMel Gorman  */
sk_set_memalloc(struct sock * sk)3057cb02404SMel Gorman void sk_set_memalloc(struct sock *sk)
3067cb02404SMel Gorman {
3077cb02404SMel Gorman 	sock_set_flag(sk, SOCK_MEMALLOC);
3087cb02404SMel Gorman 	sk->sk_allocation |= __GFP_MEMALLOC;
309a7950ae8SDavidlohr Bueso 	static_branch_inc(&memalloc_socks_key);
3107cb02404SMel Gorman }
3117cb02404SMel Gorman EXPORT_SYMBOL_GPL(sk_set_memalloc);
3127cb02404SMel Gorman 
sk_clear_memalloc(struct sock * sk)3137cb02404SMel Gorman void sk_clear_memalloc(struct sock *sk)
3147cb02404SMel Gorman {
3157cb02404SMel Gorman 	sock_reset_flag(sk, SOCK_MEMALLOC);
3167cb02404SMel Gorman 	sk->sk_allocation &= ~__GFP_MEMALLOC;
317a7950ae8SDavidlohr Bueso 	static_branch_dec(&memalloc_socks_key);
318c76562b6SMel Gorman 
319c76562b6SMel Gorman 	/*
320c76562b6SMel Gorman 	 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
3215d753610SMel Gorman 	 * progress of swapping. SOCK_MEMALLOC may be cleared while
3225d753610SMel Gorman 	 * it has rmem allocations due to the last swapfile being deactivated
3235d753610SMel Gorman 	 * but there is a risk that the socket is unusable due to exceeding
3245d753610SMel Gorman 	 * the rmem limits. Reclaim the reserves and obey rmem limits again.
325c76562b6SMel Gorman 	 */
326c76562b6SMel Gorman 	sk_mem_reclaim(sk);
3277cb02404SMel Gorman }
3287cb02404SMel Gorman EXPORT_SYMBOL_GPL(sk_clear_memalloc);
3297cb02404SMel Gorman 
__sk_backlog_rcv(struct sock * sk,struct sk_buff * skb)330b4b9e355SMel Gorman int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
331b4b9e355SMel Gorman {
332b4b9e355SMel Gorman 	int ret;
333f1083048SVlastimil Babka 	unsigned int noreclaim_flag;
334b4b9e355SMel Gorman 
335b4b9e355SMel Gorman 	/* these should have been dropped before queueing */
336b4b9e355SMel Gorman 	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
337b4b9e355SMel Gorman 
338f1083048SVlastimil Babka 	noreclaim_flag = memalloc_noreclaim_save();
339d2489c7bSEric Dumazet 	ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv,
340d2489c7bSEric Dumazet 				 tcp_v6_do_rcv,
341d2489c7bSEric Dumazet 				 tcp_v4_do_rcv,
342d2489c7bSEric Dumazet 				 sk, skb);
343f1083048SVlastimil Babka 	memalloc_noreclaim_restore(noreclaim_flag);
344b4b9e355SMel Gorman 
345b4b9e355SMel Gorman 	return ret;
346b4b9e355SMel Gorman }
347b4b9e355SMel Gorman EXPORT_SYMBOL(__sk_backlog_rcv);
348b4b9e355SMel Gorman 
sk_error_report(struct sock * sk)349e3ae2365SAlexander Aring void sk_error_report(struct sock *sk)
350e3ae2365SAlexander Aring {
351e3ae2365SAlexander Aring 	sk->sk_error_report(sk);
352e6a3e443SAlexander Aring 
353e6a3e443SAlexander Aring 	switch (sk->sk_family) {
354e6a3e443SAlexander Aring 	case AF_INET:
355e6a3e443SAlexander Aring 		fallthrough;
356e6a3e443SAlexander Aring 	case AF_INET6:
357e6a3e443SAlexander Aring 		trace_inet_sk_error_report(sk);
358e6a3e443SAlexander Aring 		break;
359e6a3e443SAlexander Aring 	default:
360e6a3e443SAlexander Aring 		break;
361e6a3e443SAlexander Aring 	}
362e3ae2365SAlexander Aring }
363e3ae2365SAlexander Aring EXPORT_SYMBOL(sk_error_report);
364e3ae2365SAlexander Aring 
sock_get_timeout(long timeo,void * optval,bool old_timeval)3654c1e34c0SRichard Palethorpe int sock_get_timeout(long timeo, void *optval, bool old_timeval)
366fe0c72f3SArnd Bergmann {
367a9beb86aSDeepa Dinamani 	struct __kernel_sock_timeval tv;
368fe0c72f3SArnd Bergmann 
369fe0c72f3SArnd Bergmann 	if (timeo == MAX_SCHEDULE_TIMEOUT) {
370fe0c72f3SArnd Bergmann 		tv.tv_sec = 0;
371fe0c72f3SArnd Bergmann 		tv.tv_usec = 0;
372fe0c72f3SArnd Bergmann 	} else {
373fe0c72f3SArnd Bergmann 		tv.tv_sec = timeo / HZ;
374fe0c72f3SArnd Bergmann 		tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
375fe0c72f3SArnd Bergmann 	}
376fe0c72f3SArnd Bergmann 
377e6986423SArnd Bergmann 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
378fe0c72f3SArnd Bergmann 		struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
379fe0c72f3SArnd Bergmann 		*(struct old_timeval32 *)optval = tv32;
380fe0c72f3SArnd Bergmann 		return sizeof(tv32);
381fe0c72f3SArnd Bergmann 	}
382fe0c72f3SArnd Bergmann 
383a9beb86aSDeepa Dinamani 	if (old_timeval) {
384a9beb86aSDeepa Dinamani 		struct __kernel_old_timeval old_tv;
385a9beb86aSDeepa Dinamani 		old_tv.tv_sec = tv.tv_sec;
386a9beb86aSDeepa Dinamani 		old_tv.tv_usec = tv.tv_usec;
387a9beb86aSDeepa Dinamani 		*(struct __kernel_old_timeval *)optval = old_tv;
38828e72b26SVito Caputo 		return sizeof(old_tv);
389fe0c72f3SArnd Bergmann 	}
390fe0c72f3SArnd Bergmann 
39128e72b26SVito Caputo 	*(struct __kernel_sock_timeval *)optval = tv;
39228e72b26SVito Caputo 	return sizeof(tv);
393a9beb86aSDeepa Dinamani }
3944c1e34c0SRichard Palethorpe EXPORT_SYMBOL(sock_get_timeout);
395a9beb86aSDeepa Dinamani 
sock_copy_user_timeval(struct __kernel_sock_timeval * tv,sockptr_t optval,int optlen,bool old_timeval)3964c1e34c0SRichard Palethorpe int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
3974c1e34c0SRichard Palethorpe 			   sockptr_t optval, int optlen, bool old_timeval)
3981da177e4SLinus Torvalds {
399e6986423SArnd Bergmann 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
400fe0c72f3SArnd Bergmann 		struct old_timeval32 tv32;
401fe0c72f3SArnd Bergmann 
402fe0c72f3SArnd Bergmann 		if (optlen < sizeof(tv32))
403fe0c72f3SArnd Bergmann 			return -EINVAL;
404fe0c72f3SArnd Bergmann 
405c34645acSChristoph Hellwig 		if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
406fe0c72f3SArnd Bergmann 			return -EFAULT;
4074c1e34c0SRichard Palethorpe 		tv->tv_sec = tv32.tv_sec;
4084c1e34c0SRichard Palethorpe 		tv->tv_usec = tv32.tv_usec;
409a9beb86aSDeepa Dinamani 	} else if (old_timeval) {
410a9beb86aSDeepa Dinamani 		struct __kernel_old_timeval old_tv;
411a9beb86aSDeepa Dinamani 
412a9beb86aSDeepa Dinamani 		if (optlen < sizeof(old_tv))
413a9beb86aSDeepa Dinamani 			return -EINVAL;
414c34645acSChristoph Hellwig 		if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
415a9beb86aSDeepa Dinamani 			return -EFAULT;
4164c1e34c0SRichard Palethorpe 		tv->tv_sec = old_tv.tv_sec;
4174c1e34c0SRichard Palethorpe 		tv->tv_usec = old_tv.tv_usec;
418fe0c72f3SArnd Bergmann 	} else {
4194c1e34c0SRichard Palethorpe 		if (optlen < sizeof(*tv))
4201da177e4SLinus Torvalds 			return -EINVAL;
4214c1e34c0SRichard Palethorpe 		if (copy_from_sockptr(tv, optval, sizeof(*tv)))
4221da177e4SLinus Torvalds 			return -EFAULT;
423fe0c72f3SArnd Bergmann 	}
4244c1e34c0SRichard Palethorpe 
4254c1e34c0SRichard Palethorpe 	return 0;
4264c1e34c0SRichard Palethorpe }
4274c1e34c0SRichard Palethorpe EXPORT_SYMBOL(sock_copy_user_timeval);
4284c1e34c0SRichard Palethorpe 
sock_set_timeout(long * timeo_p,sockptr_t optval,int optlen,bool old_timeval)4294c1e34c0SRichard Palethorpe static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
4304c1e34c0SRichard Palethorpe 			    bool old_timeval)
4314c1e34c0SRichard Palethorpe {
4324c1e34c0SRichard Palethorpe 	struct __kernel_sock_timeval tv;
4334c1e34c0SRichard Palethorpe 	int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
434285975ddSEric Dumazet 	long val;
4354c1e34c0SRichard Palethorpe 
4364c1e34c0SRichard Palethorpe 	if (err)
4374c1e34c0SRichard Palethorpe 		return err;
4384c1e34c0SRichard Palethorpe 
439ba78073eSVasily Averin 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
440ba78073eSVasily Averin 		return -EDOM;
4411da177e4SLinus Torvalds 
442ba78073eSVasily Averin 	if (tv.tv_sec < 0) {
4436f11df83SAndrew Morton 		static int warned __read_mostly;
4446f11df83SAndrew Morton 
445285975ddSEric Dumazet 		WRITE_ONCE(*timeo_p, 0);
44650aab54fSIlpo Järvinen 		if (warned < 10 && net_ratelimit()) {
447ba78073eSVasily Averin 			warned++;
448e005d193SJoe Perches 			pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
449e005d193SJoe Perches 				__func__, current->comm, task_pid_nr(current));
45050aab54fSIlpo Järvinen 		}
451ba78073eSVasily Averin 		return 0;
452ba78073eSVasily Averin 	}
453285975ddSEric Dumazet 	val = MAX_SCHEDULE_TIMEOUT;
454285975ddSEric Dumazet 	if ((tv.tv_sec || tv.tv_usec) &&
455285975ddSEric Dumazet 	    (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
456285975ddSEric Dumazet 		val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
457285975ddSEric Dumazet 						    USEC_PER_SEC / HZ);
458285975ddSEric Dumazet 	WRITE_ONCE(*timeo_p, val);
4591da177e4SLinus Torvalds 	return 0;
4601da177e4SLinus Torvalds }
4611da177e4SLinus Torvalds 
sock_needs_netstamp(const struct sock * sk)462080a270fSHannes Frederic Sowa static bool sock_needs_netstamp(const struct sock *sk)
463080a270fSHannes Frederic Sowa {
464080a270fSHannes Frederic Sowa 	switch (sk->sk_family) {
465080a270fSHannes Frederic Sowa 	case AF_UNSPEC:
466080a270fSHannes Frederic Sowa 	case AF_UNIX:
467080a270fSHannes Frederic Sowa 		return false;
468080a270fSHannes Frederic Sowa 	default:
469080a270fSHannes Frederic Sowa 		return true;
470080a270fSHannes Frederic Sowa 	}
471080a270fSHannes Frederic Sowa }
472080a270fSHannes Frederic Sowa 
sock_disable_timestamp(struct sock * sk,unsigned long flags)47308e29af3SEric Dumazet static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
4741da177e4SLinus Torvalds {
47508e29af3SEric Dumazet 	if (sk->sk_flags & flags) {
47608e29af3SEric Dumazet 		sk->sk_flags &= ~flags;
477080a270fSHannes Frederic Sowa 		if (sock_needs_netstamp(sk) &&
478080a270fSHannes Frederic Sowa 		    !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
4791da177e4SLinus Torvalds 			net_disable_timestamp();
4801da177e4SLinus Torvalds 	}
4811da177e4SLinus Torvalds }
4821da177e4SLinus Torvalds 
4831da177e4SLinus Torvalds 
__sock_queue_rcv_skb(struct sock * sk,struct sk_buff * skb)484e6afc8acSsamanthakumar int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
485f0088a50SDenis Vlasenko {
4863b885787SNeil Horman 	unsigned long flags;
4873b885787SNeil Horman 	struct sk_buff_head *list = &sk->sk_receive_queue;
488f0088a50SDenis Vlasenko 
489845d1799Slinke li 	if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
490766e9037SEric Dumazet 		atomic_inc(&sk->sk_drops);
4913847ce32SSatoru Moriya 		trace_sock_rcvqueue_full(sk, skb);
492766e9037SEric Dumazet 		return -ENOMEM;
493f0088a50SDenis Vlasenko 	}
494f0088a50SDenis Vlasenko 
495c76562b6SMel Gorman 	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
496766e9037SEric Dumazet 		atomic_inc(&sk->sk_drops);
497766e9037SEric Dumazet 		return -ENOBUFS;
4983ab224beSHideo Aoki 	}
4993ab224beSHideo Aoki 
500f0088a50SDenis Vlasenko 	skb->dev = NULL;
501f0088a50SDenis Vlasenko 	skb_set_owner_r(skb, sk);
50249ad9599SDavid S. Miller 
5037fee226aSEric Dumazet 	/* we escape from rcu protected region, make sure we dont leak
5047fee226aSEric Dumazet 	 * a norefcounted dst
5057fee226aSEric Dumazet 	 */
5067fee226aSEric Dumazet 	skb_dst_force(skb);
5077fee226aSEric Dumazet 
5083b885787SNeil Horman 	spin_lock_irqsave(&list->lock, flags);
5093bc3b96fSEyal Birger 	sock_skb_set_dropcount(sk, skb);
5103b885787SNeil Horman 	__skb_queue_tail(list, skb);
5113b885787SNeil Horman 	spin_unlock_irqrestore(&list->lock, flags);
512f0088a50SDenis Vlasenko 
513f0088a50SDenis Vlasenko 	if (!sock_flag(sk, SOCK_DEAD))
514676d2369SDavid S. Miller 		sk->sk_data_ready(sk);
515766e9037SEric Dumazet 	return 0;
516f0088a50SDenis Vlasenko }
517e6afc8acSsamanthakumar EXPORT_SYMBOL(__sock_queue_rcv_skb);
518e6afc8acSsamanthakumar 
sock_queue_rcv_skb_reason(struct sock * sk,struct sk_buff * skb,enum skb_drop_reason * reason)519c1b8a567SMenglong Dong int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
520c1b8a567SMenglong Dong 			      enum skb_drop_reason *reason)
521e6afc8acSsamanthakumar {
522c1b8a567SMenglong Dong 	enum skb_drop_reason drop_reason;
523e6afc8acSsamanthakumar 	int err;
524e6afc8acSsamanthakumar 
525e6afc8acSsamanthakumar 	err = sk_filter(sk, skb);
526c1b8a567SMenglong Dong 	if (err) {
527c1b8a567SMenglong Dong 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
528c1b8a567SMenglong Dong 		goto out;
529e6afc8acSsamanthakumar 	}
530c1b8a567SMenglong Dong 	err = __sock_queue_rcv_skb(sk, skb);
531c1b8a567SMenglong Dong 	switch (err) {
532c1b8a567SMenglong Dong 	case -ENOMEM:
533c1b8a567SMenglong Dong 		drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
534c1b8a567SMenglong Dong 		break;
535c1b8a567SMenglong Dong 	case -ENOBUFS:
536c1b8a567SMenglong Dong 		drop_reason = SKB_DROP_REASON_PROTO_MEM;
537c1b8a567SMenglong Dong 		break;
538c1b8a567SMenglong Dong 	default:
539c1b8a567SMenglong Dong 		drop_reason = SKB_NOT_DROPPED_YET;
540c1b8a567SMenglong Dong 		break;
541c1b8a567SMenglong Dong 	}
542c1b8a567SMenglong Dong out:
543c1b8a567SMenglong Dong 	if (reason)
544c1b8a567SMenglong Dong 		*reason = drop_reason;
545c1b8a567SMenglong Dong 	return err;
546c1b8a567SMenglong Dong }
547c1b8a567SMenglong Dong EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
548f0088a50SDenis Vlasenko 
__sk_receive_skb(struct sock * sk,struct sk_buff * skb,const int nested,unsigned int trim_cap,bool refcounted)5494f0c40d9SWillem de Bruijn int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
550c3f24cfbSEric Dumazet 		     const int nested, unsigned int trim_cap, bool refcounted)
551f0088a50SDenis Vlasenko {
552f0088a50SDenis Vlasenko 	int rc = NET_RX_SUCCESS;
553f0088a50SDenis Vlasenko 
5544f0c40d9SWillem de Bruijn 	if (sk_filter_trim_cap(sk, skb, trim_cap))
555f0088a50SDenis Vlasenko 		goto discard_and_relse;
556f0088a50SDenis Vlasenko 
557f0088a50SDenis Vlasenko 	skb->dev = NULL;
558f0088a50SDenis Vlasenko 
559845d1799Slinke li 	if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
560c377411fSEric Dumazet 		atomic_inc(&sk->sk_drops);
561c377411fSEric Dumazet 		goto discard_and_relse;
562c377411fSEric Dumazet 	}
56358a5a7b9SArnaldo Carvalho de Melo 	if (nested)
56458a5a7b9SArnaldo Carvalho de Melo 		bh_lock_sock_nested(sk);
56558a5a7b9SArnaldo Carvalho de Melo 	else
566f0088a50SDenis Vlasenko 		bh_lock_sock(sk);
567a5b5bb9aSIngo Molnar 	if (!sock_owned_by_user(sk)) {
568a5b5bb9aSIngo Molnar 		/*
569a5b5bb9aSIngo Molnar 		 * trylock + unlock semantics:
570a5b5bb9aSIngo Molnar 		 */
571a5b5bb9aSIngo Molnar 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
572a5b5bb9aSIngo Molnar 
573c57943a1SPeter Zijlstra 		rc = sk_backlog_rcv(sk, skb);
574a5b5bb9aSIngo Molnar 
5755facae4fSQian Cai 		mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
5768265792bSEric Dumazet 	} else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
5778eae939fSZhu Yi 		bh_unlock_sock(sk);
5788eae939fSZhu Yi 		atomic_inc(&sk->sk_drops);
5798eae939fSZhu Yi 		goto discard_and_relse;
5808eae939fSZhu Yi 	}
5818eae939fSZhu Yi 
582f0088a50SDenis Vlasenko 	bh_unlock_sock(sk);
583f0088a50SDenis Vlasenko out:
584c3f24cfbSEric Dumazet 	if (refcounted)
585f0088a50SDenis Vlasenko 		sock_put(sk);
586f0088a50SDenis Vlasenko 	return rc;
587f0088a50SDenis Vlasenko discard_and_relse:
588f0088a50SDenis Vlasenko 	kfree_skb(skb);
589f0088a50SDenis Vlasenko 	goto out;
590f0088a50SDenis Vlasenko }
5914f0c40d9SWillem de Bruijn EXPORT_SYMBOL(__sk_receive_skb);
592f0088a50SDenis Vlasenko 
593bbd807dfSBrian Vazquez INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
594bbd807dfSBrian Vazquez 							  u32));
595bbd807dfSBrian Vazquez INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
596bbd807dfSBrian Vazquez 							   u32));
__sk_dst_check(struct sock * sk,u32 cookie)597f0088a50SDenis Vlasenko struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
598f0088a50SDenis Vlasenko {
599b6c6712aSEric Dumazet 	struct dst_entry *dst = __sk_dst_get(sk);
600f0088a50SDenis Vlasenko 
601bbd807dfSBrian Vazquez 	if (dst && dst->obsolete &&
602bbd807dfSBrian Vazquez 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
603bbd807dfSBrian Vazquez 			       dst, cookie) == NULL) {
604e022f0b4SKrishna Kumar 		sk_tx_queue_clear(sk);
60587324a50SEric Dumazet 		WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
606a9b3cd7fSStephen Hemminger 		RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
607f0088a50SDenis Vlasenko 		dst_release(dst);
608f0088a50SDenis Vlasenko 		return NULL;
609f0088a50SDenis Vlasenko 	}
610f0088a50SDenis Vlasenko 
611f0088a50SDenis Vlasenko 	return dst;
612f0088a50SDenis Vlasenko }
613f0088a50SDenis Vlasenko EXPORT_SYMBOL(__sk_dst_check);
614f0088a50SDenis Vlasenko 
sk_dst_check(struct sock * sk,u32 cookie)615f0088a50SDenis Vlasenko struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
616f0088a50SDenis Vlasenko {
617f0088a50SDenis Vlasenko 	struct dst_entry *dst = sk_dst_get(sk);
618f0088a50SDenis Vlasenko 
619bbd807dfSBrian Vazquez 	if (dst && dst->obsolete &&
620bbd807dfSBrian Vazquez 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
621bbd807dfSBrian Vazquez 			       dst, cookie) == NULL) {
622f0088a50SDenis Vlasenko 		sk_dst_reset(sk);
623f0088a50SDenis Vlasenko 		dst_release(dst);
624f0088a50SDenis Vlasenko 		return NULL;
625f0088a50SDenis Vlasenko 	}
626f0088a50SDenis Vlasenko 
627f0088a50SDenis Vlasenko 	return dst;
628f0088a50SDenis Vlasenko }
629f0088a50SDenis Vlasenko EXPORT_SYMBOL(sk_dst_check);
630f0088a50SDenis Vlasenko 
sock_bindtoindex_locked(struct sock * sk,int ifindex)6317594888cSChristoph Hellwig static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
632f5dd3d0cSDavid Herrmann {
633f5dd3d0cSDavid Herrmann 	int ret = -ENOPROTOOPT;
634f5dd3d0cSDavid Herrmann #ifdef CONFIG_NETDEVICES
635f5dd3d0cSDavid Herrmann 	struct net *net = sock_net(sk);
636f5dd3d0cSDavid Herrmann 
637f5dd3d0cSDavid Herrmann 	/* Sorry... */
638f5dd3d0cSDavid Herrmann 	ret = -EPERM;
639c427bfecSVincent Bernat 	if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
640f5dd3d0cSDavid Herrmann 		goto out;
641f5dd3d0cSDavid Herrmann 
642f5dd3d0cSDavid Herrmann 	ret = -EINVAL;
643f5dd3d0cSDavid Herrmann 	if (ifindex < 0)
644f5dd3d0cSDavid Herrmann 		goto out;
645f5dd3d0cSDavid Herrmann 
646e5fccaa1SEric Dumazet 	/* Paired with all READ_ONCE() done locklessly. */
647e5fccaa1SEric Dumazet 	WRITE_ONCE(sk->sk_bound_dev_if, ifindex);
648e5fccaa1SEric Dumazet 
649f5dd3d0cSDavid Herrmann 	if (sk->sk_prot->rehash)
650f5dd3d0cSDavid Herrmann 		sk->sk_prot->rehash(sk);
651f5dd3d0cSDavid Herrmann 	sk_dst_reset(sk);
652f5dd3d0cSDavid Herrmann 
653f5dd3d0cSDavid Herrmann 	ret = 0;
654f5dd3d0cSDavid Herrmann 
655f5dd3d0cSDavid Herrmann out:
656f5dd3d0cSDavid Herrmann #endif
657f5dd3d0cSDavid Herrmann 
658f5dd3d0cSDavid Herrmann 	return ret;
659f5dd3d0cSDavid Herrmann }
660f5dd3d0cSDavid Herrmann 
sock_bindtoindex(struct sock * sk,int ifindex,bool lock_sk)6618ea204c2SFerenc Fejes int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
6627594888cSChristoph Hellwig {
6637594888cSChristoph Hellwig 	int ret;
6647594888cSChristoph Hellwig 
6658ea204c2SFerenc Fejes 	if (lock_sk)
6667594888cSChristoph Hellwig 		lock_sock(sk);
6677594888cSChristoph Hellwig 	ret = sock_bindtoindex_locked(sk, ifindex);
6688ea204c2SFerenc Fejes 	if (lock_sk)
6697594888cSChristoph Hellwig 		release_sock(sk);
6707594888cSChristoph Hellwig 
6717594888cSChristoph Hellwig 	return ret;
6727594888cSChristoph Hellwig }
6737594888cSChristoph Hellwig EXPORT_SYMBOL(sock_bindtoindex);
6747594888cSChristoph Hellwig 
sock_setbindtodevice(struct sock * sk,sockptr_t optval,int optlen)6755790642bSChristoph Hellwig static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
6764878809fSDavid S. Miller {
6774878809fSDavid S. Miller 	int ret = -ENOPROTOOPT;
6784878809fSDavid S. Miller #ifdef CONFIG_NETDEVICES
6793b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(sk);
6804878809fSDavid S. Miller 	char devname[IFNAMSIZ];
6814878809fSDavid S. Miller 	int index;
6824878809fSDavid S. Miller 
6834878809fSDavid S. Miller 	ret = -EINVAL;
6844878809fSDavid S. Miller 	if (optlen < 0)
6854878809fSDavid S. Miller 		goto out;
6864878809fSDavid S. Miller 
6874878809fSDavid S. Miller 	/* Bind this socket to a particular device like "eth0",
6884878809fSDavid S. Miller 	 * as specified in the passed interface name. If the
6894878809fSDavid S. Miller 	 * name is "" or the option length is zero the socket
6904878809fSDavid S. Miller 	 * is not bound.
6914878809fSDavid S. Miller 	 */
6924878809fSDavid S. Miller 	if (optlen > IFNAMSIZ - 1)
6934878809fSDavid S. Miller 		optlen = IFNAMSIZ - 1;
6944878809fSDavid S. Miller 	memset(devname, 0, sizeof(devname));
6954878809fSDavid S. Miller 
6964878809fSDavid S. Miller 	ret = -EFAULT;
6975790642bSChristoph Hellwig 	if (copy_from_sockptr(devname, optval, optlen))
6984878809fSDavid S. Miller 		goto out;
6994878809fSDavid S. Miller 
7004878809fSDavid S. Miller 	index = 0;
701000ba2e4SDavid S. Miller 	if (devname[0] != '\0') {
702bf8e56bfSEric Dumazet 		struct net_device *dev;
7034878809fSDavid S. Miller 
704bf8e56bfSEric Dumazet 		rcu_read_lock();
705bf8e56bfSEric Dumazet 		dev = dev_get_by_name_rcu(net, devname);
706bf8e56bfSEric Dumazet 		if (dev)
707bf8e56bfSEric Dumazet 			index = dev->ifindex;
708bf8e56bfSEric Dumazet 		rcu_read_unlock();
7094878809fSDavid S. Miller 		ret = -ENODEV;
7104878809fSDavid S. Miller 		if (!dev)
7114878809fSDavid S. Miller 			goto out;
7124878809fSDavid S. Miller 	}
7134878809fSDavid S. Miller 
71424426654SMartin KaFai Lau 	sockopt_lock_sock(sk);
71524426654SMartin KaFai Lau 	ret = sock_bindtoindex_locked(sk, index);
71624426654SMartin KaFai Lau 	sockopt_release_sock(sk);
7174878809fSDavid S. Miller out:
7184878809fSDavid S. Miller #endif
7194878809fSDavid S. Miller 
7204878809fSDavid S. Miller 	return ret;
7214878809fSDavid S. Miller }
7224878809fSDavid S. Miller 
sock_getbindtodevice(struct sock * sk,sockptr_t optval,sockptr_t optlen,int len)7234ff09db1SMartin KaFai Lau static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
7244ff09db1SMartin KaFai Lau 				sockptr_t optlen, int len)
725c91f6df2SBrian Haley {
726c91f6df2SBrian Haley 	int ret = -ENOPROTOOPT;
727c91f6df2SBrian Haley #ifdef CONFIG_NETDEVICES
728e5fccaa1SEric Dumazet 	int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
729c91f6df2SBrian Haley 	struct net *net = sock_net(sk);
730c91f6df2SBrian Haley 	char devname[IFNAMSIZ];
731c91f6df2SBrian Haley 
732e5fccaa1SEric Dumazet 	if (bound_dev_if == 0) {
733c91f6df2SBrian Haley 		len = 0;
734c91f6df2SBrian Haley 		goto zero;
735c91f6df2SBrian Haley 	}
736c91f6df2SBrian Haley 
737c91f6df2SBrian Haley 	ret = -EINVAL;
738c91f6df2SBrian Haley 	if (len < IFNAMSIZ)
739c91f6df2SBrian Haley 		goto out;
740c91f6df2SBrian Haley 
741e5fccaa1SEric Dumazet 	ret = netdev_get_name(net, devname, bound_dev_if);
7425dbe7c17SNicolas Schichan 	if (ret)
743c91f6df2SBrian Haley 		goto out;
744c91f6df2SBrian Haley 
745c91f6df2SBrian Haley 	len = strlen(devname) + 1;
746c91f6df2SBrian Haley 
747c91f6df2SBrian Haley 	ret = -EFAULT;
7484ff09db1SMartin KaFai Lau 	if (copy_to_sockptr(optval, devname, len))
749c91f6df2SBrian Haley 		goto out;
750c91f6df2SBrian Haley 
751c91f6df2SBrian Haley zero:
752c91f6df2SBrian Haley 	ret = -EFAULT;
7534ff09db1SMartin KaFai Lau 	if (copy_to_sockptr(optlen, &len, sizeof(int)))
754c91f6df2SBrian Haley 		goto out;
755c91f6df2SBrian Haley 
756c91f6df2SBrian Haley 	ret = 0;
757c91f6df2SBrian Haley 
758c91f6df2SBrian Haley out:
759c91f6df2SBrian Haley #endif
760c91f6df2SBrian Haley 
761c91f6df2SBrian Haley 	return ret;
762c91f6df2SBrian Haley }
763c91f6df2SBrian Haley 
sk_mc_loop(struct sock * sk)764f60e5990Shannes@stressinduktion.org bool sk_mc_loop(struct sock *sk)
765f60e5990Shannes@stressinduktion.org {
766f60e5990Shannes@stressinduktion.org 	if (dev_recursion_level())
767f60e5990Shannes@stressinduktion.org 		return false;
768f60e5990Shannes@stressinduktion.org 	if (!sk)
769f60e5990Shannes@stressinduktion.org 		return true;
770a3e0fdf7SEric Dumazet 	/* IPV6_ADDRFORM can change sk->sk_family under us. */
771a3e0fdf7SEric Dumazet 	switch (READ_ONCE(sk->sk_family)) {
772f60e5990Shannes@stressinduktion.org 	case AF_INET:
773b09bde5cSEric Dumazet 		return inet_test_bit(MC_LOOP, sk);
774f60e5990Shannes@stressinduktion.org #if IS_ENABLED(CONFIG_IPV6)
775f60e5990Shannes@stressinduktion.org 	case AF_INET6:
776f60e5990Shannes@stressinduktion.org 		return inet6_sk(sk)->mc_loop;
777f60e5990Shannes@stressinduktion.org #endif
778f60e5990Shannes@stressinduktion.org 	}
7790ad6f6e7SEric Dumazet 	WARN_ON_ONCE(1);
780f60e5990Shannes@stressinduktion.org 	return true;
781f60e5990Shannes@stressinduktion.org }
782f60e5990Shannes@stressinduktion.org EXPORT_SYMBOL(sk_mc_loop);
783f60e5990Shannes@stressinduktion.org 
sock_set_reuseaddr(struct sock * sk)784b58f0e8fSChristoph Hellwig void sock_set_reuseaddr(struct sock *sk)
785b58f0e8fSChristoph Hellwig {
786b58f0e8fSChristoph Hellwig 	lock_sock(sk);
787b58f0e8fSChristoph Hellwig 	sk->sk_reuse = SK_CAN_REUSE;
788b58f0e8fSChristoph Hellwig 	release_sock(sk);
789b58f0e8fSChristoph Hellwig }
790b58f0e8fSChristoph Hellwig EXPORT_SYMBOL(sock_set_reuseaddr);
791b58f0e8fSChristoph Hellwig 
sock_set_reuseport(struct sock * sk)792fe31a326SChristoph Hellwig void sock_set_reuseport(struct sock *sk)
793fe31a326SChristoph Hellwig {
794fe31a326SChristoph Hellwig 	lock_sock(sk);
795fe31a326SChristoph Hellwig 	sk->sk_reuseport = true;
796fe31a326SChristoph Hellwig 	release_sock(sk);
797fe31a326SChristoph Hellwig }
798fe31a326SChristoph Hellwig EXPORT_SYMBOL(sock_set_reuseport);
799fe31a326SChristoph Hellwig 
sock_no_linger(struct sock * sk)800c433594cSChristoph Hellwig void sock_no_linger(struct sock *sk)
801c433594cSChristoph Hellwig {
802c433594cSChristoph Hellwig 	lock_sock(sk);
803bc1fb82aSEric Dumazet 	WRITE_ONCE(sk->sk_lingertime, 0);
804c433594cSChristoph Hellwig 	sock_set_flag(sk, SOCK_LINGER);
805c433594cSChristoph Hellwig 	release_sock(sk);
806c433594cSChristoph Hellwig }
807c433594cSChristoph Hellwig EXPORT_SYMBOL(sock_no_linger);
808c433594cSChristoph Hellwig 
sock_set_priority(struct sock * sk,u32 priority)8096e434967SChristoph Hellwig void sock_set_priority(struct sock *sk, u32 priority)
8106e434967SChristoph Hellwig {
8116e434967SChristoph Hellwig 	lock_sock(sk);
8128bf43be7SEric Dumazet 	WRITE_ONCE(sk->sk_priority, priority);
8136e434967SChristoph Hellwig 	release_sock(sk);
8146e434967SChristoph Hellwig }
8156e434967SChristoph Hellwig EXPORT_SYMBOL(sock_set_priority);
8166e434967SChristoph Hellwig 
sock_set_sndtimeo(struct sock * sk,s64 secs)81776ee0785SChristoph Hellwig void sock_set_sndtimeo(struct sock *sk, s64 secs)
81876ee0785SChristoph Hellwig {
81976ee0785SChristoph Hellwig 	lock_sock(sk);
82076ee0785SChristoph Hellwig 	if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
821285975ddSEric Dumazet 		WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
82276ee0785SChristoph Hellwig 	else
823285975ddSEric Dumazet 		WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
82476ee0785SChristoph Hellwig 	release_sock(sk);
82576ee0785SChristoph Hellwig }
82676ee0785SChristoph Hellwig EXPORT_SYMBOL(sock_set_sndtimeo);
82776ee0785SChristoph Hellwig 
__sock_set_timestamps(struct sock * sk,bool val,bool new,bool ns)828783da70eSChristoph Hellwig static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
829783da70eSChristoph Hellwig {
830783da70eSChristoph Hellwig 	if (val)  {
831783da70eSChristoph Hellwig 		sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
832783da70eSChristoph Hellwig 		sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
833783da70eSChristoph Hellwig 		sock_set_flag(sk, SOCK_RCVTSTAMP);
834783da70eSChristoph Hellwig 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
835783da70eSChristoph Hellwig 	} else {
836783da70eSChristoph Hellwig 		sock_reset_flag(sk, SOCK_RCVTSTAMP);
837783da70eSChristoph Hellwig 		sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
838783da70eSChristoph Hellwig 	}
839783da70eSChristoph Hellwig }
840783da70eSChristoph Hellwig 
sock_enable_timestamps(struct sock * sk)841783da70eSChristoph Hellwig void sock_enable_timestamps(struct sock *sk)
842783da70eSChristoph Hellwig {
843783da70eSChristoph Hellwig 	lock_sock(sk);
844783da70eSChristoph Hellwig 	__sock_set_timestamps(sk, true, false, true);
845783da70eSChristoph Hellwig 	release_sock(sk);
846783da70eSChristoph Hellwig }
847783da70eSChristoph Hellwig EXPORT_SYMBOL(sock_enable_timestamps);
848783da70eSChristoph Hellwig 
sock_set_timestamp(struct sock * sk,int optname,bool valbool)849371087aaSFlorian Westphal void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
850371087aaSFlorian Westphal {
851371087aaSFlorian Westphal 	switch (optname) {
852371087aaSFlorian Westphal 	case SO_TIMESTAMP_OLD:
853371087aaSFlorian Westphal 		__sock_set_timestamps(sk, valbool, false, false);
854371087aaSFlorian Westphal 		break;
855371087aaSFlorian Westphal 	case SO_TIMESTAMP_NEW:
856371087aaSFlorian Westphal 		__sock_set_timestamps(sk, valbool, true, false);
857371087aaSFlorian Westphal 		break;
858371087aaSFlorian Westphal 	case SO_TIMESTAMPNS_OLD:
859371087aaSFlorian Westphal 		__sock_set_timestamps(sk, valbool, false, true);
860371087aaSFlorian Westphal 		break;
861371087aaSFlorian Westphal 	case SO_TIMESTAMPNS_NEW:
862371087aaSFlorian Westphal 		__sock_set_timestamps(sk, valbool, true, true);
863371087aaSFlorian Westphal 		break;
864371087aaSFlorian Westphal 	}
865371087aaSFlorian Westphal }
866371087aaSFlorian Westphal 
sock_timestamping_bind_phc(struct sock * sk,int phc_index)867d463126eSYangbo Lu static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
868ced122d9SFlorian Westphal {
869d463126eSYangbo Lu 	struct net *net = sock_net(sk);
870d463126eSYangbo Lu 	struct net_device *dev = NULL;
871d463126eSYangbo Lu 	bool match = false;
872d463126eSYangbo Lu 	int *vclock_index;
873d463126eSYangbo Lu 	int i, num;
874d463126eSYangbo Lu 
875d463126eSYangbo Lu 	if (sk->sk_bound_dev_if)
876d463126eSYangbo Lu 		dev = dev_get_by_index(net, sk->sk_bound_dev_if);
877d463126eSYangbo Lu 
878d463126eSYangbo Lu 	if (!dev) {
879d463126eSYangbo Lu 		pr_err("%s: sock not bind to device\n", __func__);
880d463126eSYangbo Lu 		return -EOPNOTSUPP;
881d463126eSYangbo Lu 	}
882d463126eSYangbo Lu 
883d463126eSYangbo Lu 	num = ethtool_get_phc_vclocks(dev, &vclock_index);
8842a4d75bfSMiroslav Lichvar 	dev_put(dev);
8852a4d75bfSMiroslav Lichvar 
886d463126eSYangbo Lu 	for (i = 0; i < num; i++) {
887d463126eSYangbo Lu 		if (*(vclock_index + i) == phc_index) {
888d463126eSYangbo Lu 			match = true;
889d463126eSYangbo Lu 			break;
890d463126eSYangbo Lu 		}
891d463126eSYangbo Lu 	}
892d463126eSYangbo Lu 
893d463126eSYangbo Lu 	if (num > 0)
894d463126eSYangbo Lu 		kfree(vclock_index);
895d463126eSYangbo Lu 
896d463126eSYangbo Lu 	if (!match)
897d463126eSYangbo Lu 		return -EINVAL;
898d463126eSYangbo Lu 
899251cd405SEric Dumazet 	WRITE_ONCE(sk->sk_bind_phc, phc_index);
900d463126eSYangbo Lu 
901d463126eSYangbo Lu 	return 0;
902d463126eSYangbo Lu }
903d463126eSYangbo Lu 
sock_set_timestamping(struct sock * sk,int optname,struct so_timestamping timestamping)904d463126eSYangbo Lu int sock_set_timestamping(struct sock *sk, int optname,
905d463126eSYangbo Lu 			  struct so_timestamping timestamping)
906d463126eSYangbo Lu {
907d463126eSYangbo Lu 	int val = timestamping.flags;
908d463126eSYangbo Lu 	int ret;
909d463126eSYangbo Lu 
910ced122d9SFlorian Westphal 	if (val & ~SOF_TIMESTAMPING_MASK)
911ced122d9SFlorian Westphal 		return -EINVAL;
912ced122d9SFlorian Westphal 
913b534dc46SWillem de Bruijn 	if (val & SOF_TIMESTAMPING_OPT_ID_TCP &&
914b534dc46SWillem de Bruijn 	    !(val & SOF_TIMESTAMPING_OPT_ID))
915b534dc46SWillem de Bruijn 		return -EINVAL;
916b534dc46SWillem de Bruijn 
917ced122d9SFlorian Westphal 	if (val & SOF_TIMESTAMPING_OPT_ID &&
918ced122d9SFlorian Westphal 	    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
91942f67eeaSEric Dumazet 		if (sk_is_tcp(sk)) {
920ced122d9SFlorian Westphal 			if ((1 << sk->sk_state) &
921ced122d9SFlorian Westphal 			    (TCPF_CLOSE | TCPF_LISTEN))
922ced122d9SFlorian Westphal 				return -EINVAL;
923b534dc46SWillem de Bruijn 			if (val & SOF_TIMESTAMPING_OPT_ID_TCP)
924b534dc46SWillem de Bruijn 				atomic_set(&sk->sk_tskey, tcp_sk(sk)->write_seq);
925b534dc46SWillem de Bruijn 			else
926a1cdec57SEric Dumazet 				atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
927ced122d9SFlorian Westphal 		} else {
928a1cdec57SEric Dumazet 			atomic_set(&sk->sk_tskey, 0);
929ced122d9SFlorian Westphal 		}
930ced122d9SFlorian Westphal 	}
931ced122d9SFlorian Westphal 
932ced122d9SFlorian Westphal 	if (val & SOF_TIMESTAMPING_OPT_STATS &&
933ced122d9SFlorian Westphal 	    !(val & SOF_TIMESTAMPING_OPT_TSONLY))
934ced122d9SFlorian Westphal 		return -EINVAL;
935ced122d9SFlorian Westphal 
936d463126eSYangbo Lu 	if (val & SOF_TIMESTAMPING_BIND_PHC) {
937d463126eSYangbo Lu 		ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
938d463126eSYangbo Lu 		if (ret)
939d463126eSYangbo Lu 			return ret;
940d463126eSYangbo Lu 	}
941d463126eSYangbo Lu 
942e3390b30SEric Dumazet 	WRITE_ONCE(sk->sk_tsflags, val);
943ced122d9SFlorian Westphal 	sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
944ced122d9SFlorian Westphal 
945ced122d9SFlorian Westphal 	if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
946ced122d9SFlorian Westphal 		sock_enable_timestamp(sk,
947ced122d9SFlorian Westphal 				      SOCK_TIMESTAMPING_RX_SOFTWARE);
948ced122d9SFlorian Westphal 	else
949ced122d9SFlorian Westphal 		sock_disable_timestamp(sk,
950ced122d9SFlorian Westphal 				       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
951ced122d9SFlorian Westphal 	return 0;
952ced122d9SFlorian Westphal }
953ced122d9SFlorian Westphal 
sock_set_keepalive(struct sock * sk)954ce3d9544SChristoph Hellwig void sock_set_keepalive(struct sock *sk)
955ce3d9544SChristoph Hellwig {
956ce3d9544SChristoph Hellwig 	lock_sock(sk);
957ce3d9544SChristoph Hellwig 	if (sk->sk_prot->keepalive)
958ce3d9544SChristoph Hellwig 		sk->sk_prot->keepalive(sk, true);
959ce3d9544SChristoph Hellwig 	sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
960ce3d9544SChristoph Hellwig 	release_sock(sk);
961ce3d9544SChristoph Hellwig }
962ce3d9544SChristoph Hellwig EXPORT_SYMBOL(sock_set_keepalive);
963ce3d9544SChristoph Hellwig 
__sock_set_rcvbuf(struct sock * sk,int val)96426cfabf9SChristoph Hellwig static void __sock_set_rcvbuf(struct sock *sk, int val)
96526cfabf9SChristoph Hellwig {
96626cfabf9SChristoph Hellwig 	/* Ensure val * 2 fits into an int, to prevent max_t() from treating it
96726cfabf9SChristoph Hellwig 	 * as a negative value.
96826cfabf9SChristoph Hellwig 	 */
96926cfabf9SChristoph Hellwig 	val = min_t(int, val, INT_MAX / 2);
97026cfabf9SChristoph Hellwig 	sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
97126cfabf9SChristoph Hellwig 
97226cfabf9SChristoph Hellwig 	/* We double it on the way in to account for "struct sk_buff" etc.
97326cfabf9SChristoph Hellwig 	 * overhead.   Applications assume that the SO_RCVBUF setting they make
97426cfabf9SChristoph Hellwig 	 * will allow that much actual data to be received on that socket.
97526cfabf9SChristoph Hellwig 	 *
97626cfabf9SChristoph Hellwig 	 * Applications are unaware that "struct sk_buff" and other overheads
97726cfabf9SChristoph Hellwig 	 * allocate from the receive buffer during socket buffer allocation.
97826cfabf9SChristoph Hellwig 	 *
97926cfabf9SChristoph Hellwig 	 * And after considering the possible alternatives, returning the value
98026cfabf9SChristoph Hellwig 	 * we actually used in getsockopt is the most desirable behavior.
98126cfabf9SChristoph Hellwig 	 */
98226cfabf9SChristoph Hellwig 	WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
98326cfabf9SChristoph Hellwig }
98426cfabf9SChristoph Hellwig 
sock_set_rcvbuf(struct sock * sk,int val)98526cfabf9SChristoph Hellwig void sock_set_rcvbuf(struct sock *sk, int val)
98626cfabf9SChristoph Hellwig {
98726cfabf9SChristoph Hellwig 	lock_sock(sk);
98826cfabf9SChristoph Hellwig 	__sock_set_rcvbuf(sk, val);
98926cfabf9SChristoph Hellwig 	release_sock(sk);
99026cfabf9SChristoph Hellwig }
99126cfabf9SChristoph Hellwig EXPORT_SYMBOL(sock_set_rcvbuf);
99226cfabf9SChristoph Hellwig 
__sock_set_mark(struct sock * sk,u32 val)993dd9082f4SAlexander Aring static void __sock_set_mark(struct sock *sk, u32 val)
994dd9082f4SAlexander Aring {
995dd9082f4SAlexander Aring 	if (val != sk->sk_mark) {
9963c5b4d69SEric Dumazet 		WRITE_ONCE(sk->sk_mark, val);
997dd9082f4SAlexander Aring 		sk_dst_reset(sk);
998dd9082f4SAlexander Aring 	}
999dd9082f4SAlexander Aring }
1000dd9082f4SAlexander Aring 
sock_set_mark(struct sock * sk,u32 val)100184d1c617SAlexander Aring void sock_set_mark(struct sock *sk, u32 val)
100284d1c617SAlexander Aring {
100384d1c617SAlexander Aring 	lock_sock(sk);
1004dd9082f4SAlexander Aring 	__sock_set_mark(sk, val);
100584d1c617SAlexander Aring 	release_sock(sk);
100684d1c617SAlexander Aring }
100784d1c617SAlexander Aring EXPORT_SYMBOL(sock_set_mark);
100884d1c617SAlexander Aring 
sock_release_reserved_memory(struct sock * sk,int bytes)10092bb2f5fbSWei Wang static void sock_release_reserved_memory(struct sock *sk, int bytes)
10102bb2f5fbSWei Wang {
10112bb2f5fbSWei Wang 	/* Round down bytes to multiple of pages */
1012100fdd1fSEric Dumazet 	bytes = round_down(bytes, PAGE_SIZE);
10132bb2f5fbSWei Wang 
10142bb2f5fbSWei Wang 	WARN_ON(bytes > sk->sk_reserved_mem);
1015fe11fdcbSEric Dumazet 	WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
10162bb2f5fbSWei Wang 	sk_mem_reclaim(sk);
10172bb2f5fbSWei Wang }
10182bb2f5fbSWei Wang 
sock_reserve_memory(struct sock * sk,int bytes)10192bb2f5fbSWei Wang static int sock_reserve_memory(struct sock *sk, int bytes)
10202bb2f5fbSWei Wang {
10212bb2f5fbSWei Wang 	long allocated;
10222bb2f5fbSWei Wang 	bool charged;
10232bb2f5fbSWei Wang 	int pages;
10242bb2f5fbSWei Wang 
1025d00c8ee3SEric Dumazet 	if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk))
10262bb2f5fbSWei Wang 		return -EOPNOTSUPP;
10272bb2f5fbSWei Wang 
10282bb2f5fbSWei Wang 	if (!bytes)
10292bb2f5fbSWei Wang 		return 0;
10302bb2f5fbSWei Wang 
10312bb2f5fbSWei Wang 	pages = sk_mem_pages(bytes);
10322bb2f5fbSWei Wang 
10332bb2f5fbSWei Wang 	/* pre-charge to memcg */
10342bb2f5fbSWei Wang 	charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
10352bb2f5fbSWei Wang 					  GFP_KERNEL | __GFP_RETRY_MAYFAIL);
10362bb2f5fbSWei Wang 	if (!charged)
10372bb2f5fbSWei Wang 		return -ENOMEM;
10382bb2f5fbSWei Wang 
10392bb2f5fbSWei Wang 	/* pre-charge to forward_alloc */
1040219160beSEric Dumazet 	sk_memory_allocated_add(sk, pages);
1041219160beSEric Dumazet 	allocated = sk_memory_allocated(sk);
10422bb2f5fbSWei Wang 	/* If the system goes into memory pressure with this
10432bb2f5fbSWei Wang 	 * precharge, give up and return error.
10442bb2f5fbSWei Wang 	 */
10452bb2f5fbSWei Wang 	if (allocated > sk_prot_mem_limits(sk, 1)) {
10462bb2f5fbSWei Wang 		sk_memory_allocated_sub(sk, pages);
10472bb2f5fbSWei Wang 		mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
10482bb2f5fbSWei Wang 		return -ENOMEM;
10492bb2f5fbSWei Wang 	}
10505e6300e7SEric Dumazet 	sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
10512bb2f5fbSWei Wang 
1052fe11fdcbSEric Dumazet 	WRITE_ONCE(sk->sk_reserved_mem,
1053fe11fdcbSEric Dumazet 		   sk->sk_reserved_mem + (pages << PAGE_SHIFT));
10542bb2f5fbSWei Wang 
10552bb2f5fbSWei Wang 	return 0;
10562bb2f5fbSWei Wang }
10572bb2f5fbSWei Wang 
sockopt_lock_sock(struct sock * sk)105824426654SMartin KaFai Lau void sockopt_lock_sock(struct sock *sk)
105924426654SMartin KaFai Lau {
106024426654SMartin KaFai Lau 	/* When current->bpf_ctx is set, the setsockopt is called from
106124426654SMartin KaFai Lau 	 * a bpf prog.  bpf has ensured the sk lock has been
106224426654SMartin KaFai Lau 	 * acquired before calling setsockopt().
106324426654SMartin KaFai Lau 	 */
106424426654SMartin KaFai Lau 	if (has_current_bpf_ctx())
106524426654SMartin KaFai Lau 		return;
106624426654SMartin KaFai Lau 
106724426654SMartin KaFai Lau 	lock_sock(sk);
106824426654SMartin KaFai Lau }
106924426654SMartin KaFai Lau EXPORT_SYMBOL(sockopt_lock_sock);
107024426654SMartin KaFai Lau 
sockopt_release_sock(struct sock * sk)107124426654SMartin KaFai Lau void sockopt_release_sock(struct sock *sk)
107224426654SMartin KaFai Lau {
107324426654SMartin KaFai Lau 	if (has_current_bpf_ctx())
107424426654SMartin KaFai Lau 		return;
107524426654SMartin KaFai Lau 
107624426654SMartin KaFai Lau 	release_sock(sk);
107724426654SMartin KaFai Lau }
107824426654SMartin KaFai Lau EXPORT_SYMBOL(sockopt_release_sock);
107924426654SMartin KaFai Lau 
sockopt_ns_capable(struct user_namespace * ns,int cap)1080e42c7beeSMartin KaFai Lau bool sockopt_ns_capable(struct user_namespace *ns, int cap)
1081e42c7beeSMartin KaFai Lau {
1082e42c7beeSMartin KaFai Lau 	return has_current_bpf_ctx() || ns_capable(ns, cap);
1083e42c7beeSMartin KaFai Lau }
1084e42c7beeSMartin KaFai Lau EXPORT_SYMBOL(sockopt_ns_capable);
1085e42c7beeSMartin KaFai Lau 
sockopt_capable(int cap)1086e42c7beeSMartin KaFai Lau bool sockopt_capable(int cap)
1087e42c7beeSMartin KaFai Lau {
1088e42c7beeSMartin KaFai Lau 	return has_current_bpf_ctx() || capable(cap);
1089e42c7beeSMartin KaFai Lau }
1090e42c7beeSMartin KaFai Lau EXPORT_SYMBOL(sockopt_capable);
1091e42c7beeSMartin KaFai Lau 
10921da177e4SLinus Torvalds /*
10931da177e4SLinus Torvalds  *	This is meant for all protocols to use and covers goings on
10941da177e4SLinus Torvalds  *	at the socket level. Everything here is generic.
10951da177e4SLinus Torvalds  */
10961da177e4SLinus Torvalds 
sk_setsockopt(struct sock * sk,int level,int optname,sockptr_t optval,unsigned int optlen)109729003875SMartin KaFai Lau int sk_setsockopt(struct sock *sk, int level, int optname,
1098c8c1bbb6SChristoph Hellwig 		  sockptr_t optval, unsigned int optlen)
10991da177e4SLinus Torvalds {
1100d463126eSYangbo Lu 	struct so_timestamping timestamping;
11014d748f99SMartin KaFai Lau 	struct socket *sock = sk->sk_socket;
110280b14deeSRichard Cochran 	struct sock_txtime sk_txtime;
11031da177e4SLinus Torvalds 	int val;
11041da177e4SLinus Torvalds 	int valbool;
11051da177e4SLinus Torvalds 	struct linger ling;
11061da177e4SLinus Torvalds 	int ret = 0;
11071da177e4SLinus Torvalds 
11081da177e4SLinus Torvalds 	/*
11091da177e4SLinus Torvalds 	 *	Options without arguments
11101da177e4SLinus Torvalds 	 */
11111da177e4SLinus Torvalds 
11124878809fSDavid S. Miller 	if (optname == SO_BINDTODEVICE)
1113c91f6df2SBrian Haley 		return sock_setbindtodevice(sk, optval, optlen);
11144878809fSDavid S. Miller 
11151da177e4SLinus Torvalds 	if (optlen < sizeof(int))
1116e71a4783SStephen Hemminger 		return -EINVAL;
11171da177e4SLinus Torvalds 
1118c8c1bbb6SChristoph Hellwig 	if (copy_from_sockptr(&val, optval, sizeof(val)))
11191da177e4SLinus Torvalds 		return -EFAULT;
11201da177e4SLinus Torvalds 
11211da177e4SLinus Torvalds 	valbool = val ? 1 : 0;
11221da177e4SLinus Torvalds 
112324426654SMartin KaFai Lau 	sockopt_lock_sock(sk);
11241da177e4SLinus Torvalds 
1125e71a4783SStephen Hemminger 	switch (optname) {
11261da177e4SLinus Torvalds 	case SO_DEBUG:
1127e42c7beeSMartin KaFai Lau 		if (val && !sockopt_capable(CAP_NET_ADMIN))
11281da177e4SLinus Torvalds 			ret = -EACCES;
11292a91525cSEric Dumazet 		else
1130c0ef877bSPavel Emelyanov 			sock_valbool_flag(sk, SOCK_DBG, valbool);
11311da177e4SLinus Torvalds 		break;
11321da177e4SLinus Torvalds 	case SO_REUSEADDR:
1133cdb8744dSBart Van Assche 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
11341da177e4SLinus Torvalds 		break;
1135055dc21aSTom Herbert 	case SO_REUSEPORT:
1136*ad91a2daSEric Dumazet 		if (valbool && !sk_is_inet(sk))
1137*ad91a2daSEric Dumazet 			ret = -EOPNOTSUPP;
1138*ad91a2daSEric Dumazet 		else
1139055dc21aSTom Herbert 			sk->sk_reuseport = valbool;
1140055dc21aSTom Herbert 		break;
11411da177e4SLinus Torvalds 	case SO_TYPE:
114249c794e9SJan Engelhardt 	case SO_PROTOCOL:
11430d6038eeSJan Engelhardt 	case SO_DOMAIN:
11441da177e4SLinus Torvalds 	case SO_ERROR:
11451da177e4SLinus Torvalds 		ret = -ENOPROTOOPT;
11461da177e4SLinus Torvalds 		break;
11471da177e4SLinus Torvalds 	case SO_DONTROUTE:
1148c0ef877bSPavel Emelyanov 		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
11490fbe82e6Syupeng 		sk_dst_reset(sk);
11501da177e4SLinus Torvalds 		break;
11511da177e4SLinus Torvalds 	case SO_BROADCAST:
11521da177e4SLinus Torvalds 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
11531da177e4SLinus Torvalds 		break;
11541da177e4SLinus Torvalds 	case SO_SNDBUF:
11551da177e4SLinus Torvalds 		/* Don't error on this BSD doesn't and if you think
115682981930SEric Dumazet 		 * about it this is right. Otherwise apps have to
115782981930SEric Dumazet 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
115882981930SEric Dumazet 		 * are treated in BSD as hints
115982981930SEric Dumazet 		 */
11601227c177SKuniyuki Iwashima 		val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
1161b0573deaSPatrick McHardy set_sndbuf:
11624057765fSGuillaume Nault 		/* Ensure val * 2 fits into an int, to prevent max_t()
11634057765fSGuillaume Nault 		 * from treating it as a negative value.
11644057765fSGuillaume Nault 		 */
11654057765fSGuillaume Nault 		val = min_t(int, val, INT_MAX / 2);
11661da177e4SLinus Torvalds 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1167e292f05eSEric Dumazet 		WRITE_ONCE(sk->sk_sndbuf,
1168e292f05eSEric Dumazet 			   max_t(int, val * 2, SOCK_MIN_SNDBUF));
116982981930SEric Dumazet 		/* Wake up sending tasks if we upped the value. */
11701da177e4SLinus Torvalds 		sk->sk_write_space(sk);
11711da177e4SLinus Torvalds 		break;
11721da177e4SLinus Torvalds 
1173b0573deaSPatrick McHardy 	case SO_SNDBUFFORCE:
1174e42c7beeSMartin KaFai Lau 		if (!sockopt_capable(CAP_NET_ADMIN)) {
1175b0573deaSPatrick McHardy 			ret = -EPERM;
1176b0573deaSPatrick McHardy 			break;
1177b0573deaSPatrick McHardy 		}
11784057765fSGuillaume Nault 
11794057765fSGuillaume Nault 		/* No negative values (to prevent underflow, as val will be
11804057765fSGuillaume Nault 		 * multiplied by 2).
11814057765fSGuillaume Nault 		 */
11824057765fSGuillaume Nault 		if (val < 0)
11834057765fSGuillaume Nault 			val = 0;
1184b0573deaSPatrick McHardy 		goto set_sndbuf;
1185b0573deaSPatrick McHardy 
11861da177e4SLinus Torvalds 	case SO_RCVBUF:
11871da177e4SLinus Torvalds 		/* Don't error on this BSD doesn't and if you think
118882981930SEric Dumazet 		 * about it this is right. Otherwise apps have to
118982981930SEric Dumazet 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
119082981930SEric Dumazet 		 * are treated in BSD as hints
119182981930SEric Dumazet 		 */
11921227c177SKuniyuki Iwashima 		__sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
11931da177e4SLinus Torvalds 		break;
11941da177e4SLinus Torvalds 
1195b0573deaSPatrick McHardy 	case SO_RCVBUFFORCE:
1196e42c7beeSMartin KaFai Lau 		if (!sockopt_capable(CAP_NET_ADMIN)) {
1197b0573deaSPatrick McHardy 			ret = -EPERM;
1198b0573deaSPatrick McHardy 			break;
1199b0573deaSPatrick McHardy 		}
12004057765fSGuillaume Nault 
12014057765fSGuillaume Nault 		/* No negative values (to prevent underflow, as val will be
12024057765fSGuillaume Nault 		 * multiplied by 2).
12034057765fSGuillaume Nault 		 */
120426cfabf9SChristoph Hellwig 		__sock_set_rcvbuf(sk, max(val, 0));
120526cfabf9SChristoph Hellwig 		break;
1206b0573deaSPatrick McHardy 
12071da177e4SLinus Torvalds 	case SO_KEEPALIVE:
12084b9d07a4SUrsula Braun 		if (sk->sk_prot->keepalive)
12094b9d07a4SUrsula Braun 			sk->sk_prot->keepalive(sk, valbool);
12101da177e4SLinus Torvalds 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
12111da177e4SLinus Torvalds 		break;
12121da177e4SLinus Torvalds 
12131da177e4SLinus Torvalds 	case SO_OOBINLINE:
12141da177e4SLinus Torvalds 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
12151da177e4SLinus Torvalds 		break;
12161da177e4SLinus Torvalds 
12171da177e4SLinus Torvalds 	case SO_NO_CHECK:
121828448b80STom Herbert 		sk->sk_no_check_tx = valbool;
12191da177e4SLinus Torvalds 		break;
12201da177e4SLinus Torvalds 
12211da177e4SLinus Torvalds 	case SO_PRIORITY:
12225e1fccc0SEric W. Biederman 		if ((val >= 0 && val <= 6) ||
1223e42c7beeSMartin KaFai Lau 		    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
1224e42c7beeSMartin KaFai Lau 		    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
12258bf43be7SEric Dumazet 			WRITE_ONCE(sk->sk_priority, val);
12261da177e4SLinus Torvalds 		else
12271da177e4SLinus Torvalds 			ret = -EPERM;
12281da177e4SLinus Torvalds 		break;
12291da177e4SLinus Torvalds 
12301da177e4SLinus Torvalds 	case SO_LINGER:
12311da177e4SLinus Torvalds 		if (optlen < sizeof(ling)) {
12321da177e4SLinus Torvalds 			ret = -EINVAL;	/* 1003.1g */
12331da177e4SLinus Torvalds 			break;
12341da177e4SLinus Torvalds 		}
1235c8c1bbb6SChristoph Hellwig 		if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
12361da177e4SLinus Torvalds 			ret = -EFAULT;
12371da177e4SLinus Torvalds 			break;
12381da177e4SLinus Torvalds 		}
1239bc1fb82aSEric Dumazet 		if (!ling.l_onoff) {
12401da177e4SLinus Torvalds 			sock_reset_flag(sk, SOCK_LINGER);
1241bc1fb82aSEric Dumazet 		} else {
1242bc1fb82aSEric Dumazet 			unsigned long t_sec = ling.l_linger;
1243bc1fb82aSEric Dumazet 
1244bc1fb82aSEric Dumazet 			if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ)
1245bc1fb82aSEric Dumazet 				WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT);
12461da177e4SLinus Torvalds 			else
1247bc1fb82aSEric Dumazet 				WRITE_ONCE(sk->sk_lingertime, t_sec * HZ);
12481da177e4SLinus Torvalds 			sock_set_flag(sk, SOCK_LINGER);
12491da177e4SLinus Torvalds 		}
12501da177e4SLinus Torvalds 		break;
12511da177e4SLinus Torvalds 
12521da177e4SLinus Torvalds 	case SO_BSDCOMPAT:
12531da177e4SLinus Torvalds 		break;
12541da177e4SLinus Torvalds 
12551da177e4SLinus Torvalds 	case SO_PASSCRED:
1256274c4a6dSAndy Shevchenko 		assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
12571da177e4SLinus Torvalds 		break;
12581da177e4SLinus Torvalds 
12595e2ff670SAlexander Mikhalitsyn 	case SO_PASSPIDFD:
1260274c4a6dSAndy Shevchenko 		assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
12615e2ff670SAlexander Mikhalitsyn 		break;
12625e2ff670SAlexander Mikhalitsyn 
12637f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMP_OLD:
1264783da70eSChristoph Hellwig 	case SO_TIMESTAMP_NEW:
1265783da70eSChristoph Hellwig 	case SO_TIMESTAMPNS_OLD:
1266783da70eSChristoph Hellwig 	case SO_TIMESTAMPNS_NEW:
126781b4a0ccSEric Dumazet 		sock_set_timestamp(sk, optname, valbool);
1268783da70eSChristoph Hellwig 		break;
1269ced122d9SFlorian Westphal 
12709718475eSDeepa Dinamani 	case SO_TIMESTAMPING_NEW:
12717f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMPING_OLD:
1272d463126eSYangbo Lu 		if (optlen == sizeof(timestamping)) {
1273d463126eSYangbo Lu 			if (copy_from_sockptr(&timestamping, optval,
1274271dbc31SDan Carpenter 					      sizeof(timestamping))) {
1275271dbc31SDan Carpenter 				ret = -EFAULT;
1276271dbc31SDan Carpenter 				break;
1277271dbc31SDan Carpenter 			}
1278d463126eSYangbo Lu 		} else {
1279d463126eSYangbo Lu 			memset(&timestamping, 0, sizeof(timestamping));
1280d463126eSYangbo Lu 			timestamping.flags = val;
1281d463126eSYangbo Lu 		}
1282d463126eSYangbo Lu 		ret = sock_set_timestamping(sk, optname, timestamping);
128320d49473SPatrick Ohly 		break;
128420d49473SPatrick Ohly 
12851da177e4SLinus Torvalds 	case SO_RCVLOWAT:
12861ded5e5aSEric Dumazet 		{
12871ded5e5aSEric Dumazet 		int (*set_rcvlowat)(struct sock *sk, int val) = NULL;
12881ded5e5aSEric Dumazet 
12891da177e4SLinus Torvalds 		if (val < 0)
12901da177e4SLinus Torvalds 			val = INT_MAX;
12911ded5e5aSEric Dumazet 		if (sock)
12921ded5e5aSEric Dumazet 			set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
12931ded5e5aSEric Dumazet 		if (set_rcvlowat)
12941ded5e5aSEric Dumazet 			ret = set_rcvlowat(sk, val);
1295d1361840SEric Dumazet 		else
1296eac66402SEric Dumazet 			WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
12971da177e4SLinus Torvalds 		break;
12981ded5e5aSEric Dumazet 		}
129945bdc661SDeepa Dinamani 	case SO_RCVTIMEO_OLD:
1300a9beb86aSDeepa Dinamani 	case SO_RCVTIMEO_NEW:
1301c8c1bbb6SChristoph Hellwig 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1302c34645acSChristoph Hellwig 				       optlen, optname == SO_RCVTIMEO_OLD);
13031da177e4SLinus Torvalds 		break;
13041da177e4SLinus Torvalds 
130545bdc661SDeepa Dinamani 	case SO_SNDTIMEO_OLD:
1306a9beb86aSDeepa Dinamani 	case SO_SNDTIMEO_NEW:
1307c8c1bbb6SChristoph Hellwig 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1308c34645acSChristoph Hellwig 				       optlen, optname == SO_SNDTIMEO_OLD);
13091da177e4SLinus Torvalds 		break;
13101da177e4SLinus Torvalds 
13114d295e54SChristoph Hellwig 	case SO_ATTACH_FILTER: {
13121da177e4SLinus Torvalds 		struct sock_fprog fprog;
13131da177e4SLinus Torvalds 
1314c8c1bbb6SChristoph Hellwig 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
13154d295e54SChristoph Hellwig 		if (!ret)
13161da177e4SLinus Torvalds 			ret = sk_attach_filter(&fprog, sk);
13171da177e4SLinus Torvalds 		break;
13184d295e54SChristoph Hellwig 	}
131989aa0758SAlexei Starovoitov 	case SO_ATTACH_BPF:
132089aa0758SAlexei Starovoitov 		ret = -EINVAL;
132189aa0758SAlexei Starovoitov 		if (optlen == sizeof(u32)) {
132289aa0758SAlexei Starovoitov 			u32 ufd;
132389aa0758SAlexei Starovoitov 
132489aa0758SAlexei Starovoitov 			ret = -EFAULT;
1325c8c1bbb6SChristoph Hellwig 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
132689aa0758SAlexei Starovoitov 				break;
132789aa0758SAlexei Starovoitov 
132889aa0758SAlexei Starovoitov 			ret = sk_attach_bpf(ufd, sk);
132989aa0758SAlexei Starovoitov 		}
133089aa0758SAlexei Starovoitov 		break;
133189aa0758SAlexei Starovoitov 
13324d295e54SChristoph Hellwig 	case SO_ATTACH_REUSEPORT_CBPF: {
1333538950a1SCraig Gallek 		struct sock_fprog fprog;
1334538950a1SCraig Gallek 
1335c8c1bbb6SChristoph Hellwig 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
13364d295e54SChristoph Hellwig 		if (!ret)
1337538950a1SCraig Gallek 			ret = sk_reuseport_attach_filter(&fprog, sk);
1338538950a1SCraig Gallek 		break;
13394d295e54SChristoph Hellwig 	}
1340538950a1SCraig Gallek 	case SO_ATTACH_REUSEPORT_EBPF:
1341538950a1SCraig Gallek 		ret = -EINVAL;
1342538950a1SCraig Gallek 		if (optlen == sizeof(u32)) {
1343538950a1SCraig Gallek 			u32 ufd;
1344538950a1SCraig Gallek 
1345538950a1SCraig Gallek 			ret = -EFAULT;
1346c8c1bbb6SChristoph Hellwig 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1347538950a1SCraig Gallek 				break;
1348538950a1SCraig Gallek 
1349538950a1SCraig Gallek 			ret = sk_reuseport_attach_bpf(ufd, sk);
1350538950a1SCraig Gallek 		}
1351538950a1SCraig Gallek 		break;
1352538950a1SCraig Gallek 
135399f3a064SMartin KaFai Lau 	case SO_DETACH_REUSEPORT_BPF:
135499f3a064SMartin KaFai Lau 		ret = reuseport_detach_prog(sk);
135599f3a064SMartin KaFai Lau 		break;
135699f3a064SMartin KaFai Lau 
13571da177e4SLinus Torvalds 	case SO_DETACH_FILTER:
135855b33325SPavel Emelyanov 		ret = sk_detach_filter(sk);
13591da177e4SLinus Torvalds 		break;
13601da177e4SLinus Torvalds 
1361d59577b6SVincent Bernat 	case SO_LOCK_FILTER:
1362d59577b6SVincent Bernat 		if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1363d59577b6SVincent Bernat 			ret = -EPERM;
1364d59577b6SVincent Bernat 		else
1365d59577b6SVincent Bernat 			sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1366d59577b6SVincent Bernat 		break;
1367d59577b6SVincent Bernat 
1368877ce7c1SCatherine Zhang 	case SO_PASSSEC:
1369274c4a6dSAndy Shevchenko 		assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
1370877ce7c1SCatherine Zhang 		break;
13714a19ec58SLaszlo Attila Toth 	case SO_MARK:
1372e42c7beeSMartin KaFai Lau 		if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1373e42c7beeSMartin KaFai Lau 		    !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
13744a19ec58SLaszlo Attila Toth 			ret = -EPERM;
1375dd9082f4SAlexander Aring 			break;
137650254256SDavid Barmann 		}
1377dd9082f4SAlexander Aring 
1378dd9082f4SAlexander Aring 		__sock_set_mark(sk, val);
13794a19ec58SLaszlo Attila Toth 		break;
13806fd1d51cSErin MacNeil 	case SO_RCVMARK:
13816fd1d51cSErin MacNeil 		sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
13826fd1d51cSErin MacNeil 		break;
1383877ce7c1SCatherine Zhang 
13843b885787SNeil Horman 	case SO_RXQ_OVFL:
13858083f0fcSJohannes Berg 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
13863b885787SNeil Horman 		break;
13876e3e939fSJohannes Berg 
13886e3e939fSJohannes Berg 	case SO_WIFI_STATUS:
13896e3e939fSJohannes Berg 		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
13906e3e939fSJohannes Berg 		break;
13916e3e939fSJohannes Berg 
1392ef64a54fSPavel Emelyanov 	case SO_PEEK_OFF:
13931ded5e5aSEric Dumazet 		{
13941ded5e5aSEric Dumazet 		int (*set_peek_off)(struct sock *sk, int val);
13951ded5e5aSEric Dumazet 
13961ded5e5aSEric Dumazet 		set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
13971ded5e5aSEric Dumazet 		if (set_peek_off)
13981ded5e5aSEric Dumazet 			ret = set_peek_off(sk, val);
1399ef64a54fSPavel Emelyanov 		else
1400ef64a54fSPavel Emelyanov 			ret = -EOPNOTSUPP;
1401ef64a54fSPavel Emelyanov 		break;
14021ded5e5aSEric Dumazet 		}
14033bdc0ebaSBen Greear 
14043bdc0ebaSBen Greear 	case SO_NOFCS:
14053bdc0ebaSBen Greear 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
14063bdc0ebaSBen Greear 		break;
14073bdc0ebaSBen Greear 
14087d4c04fcSKeller, Jacob E 	case SO_SELECT_ERR_QUEUE:
14097d4c04fcSKeller, Jacob E 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
14107d4c04fcSKeller, Jacob E 		break;
14117d4c04fcSKeller, Jacob E 
1412e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
141364b0dc51SEliezer Tamir 	case SO_BUSY_POLL:
1414dafcc438SEliezer Tamir 		if (val < 0)
1415dafcc438SEliezer Tamir 			ret = -EINVAL;
1416dafcc438SEliezer Tamir 		else
14170dbffbb5SEric Dumazet 			WRITE_ONCE(sk->sk_ll_usec, val);
1418dafcc438SEliezer Tamir 		break;
14197fd3253aSBjörn Töpel 	case SO_PREFER_BUSY_POLL:
1420e42c7beeSMartin KaFai Lau 		if (valbool && !sockopt_capable(CAP_NET_ADMIN))
14217fd3253aSBjörn Töpel 			ret = -EPERM;
14227fd3253aSBjörn Töpel 		else
14237fd3253aSBjörn Töpel 			WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
14247fd3253aSBjörn Töpel 		break;
14257c951cafSBjörn Töpel 	case SO_BUSY_POLL_BUDGET:
1426e42c7beeSMartin KaFai Lau 		if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) {
14277c951cafSBjörn Töpel 			ret = -EPERM;
14287c951cafSBjörn Töpel 		} else {
14297c951cafSBjörn Töpel 			if (val < 0 || val > U16_MAX)
14307c951cafSBjörn Töpel 				ret = -EINVAL;
14317c951cafSBjörn Töpel 			else
14327c951cafSBjörn Töpel 				WRITE_ONCE(sk->sk_busy_poll_budget, val);
14337c951cafSBjörn Töpel 		}
14347c951cafSBjörn Töpel 		break;
1435dafcc438SEliezer Tamir #endif
143662748f32SEric Dumazet 
143762748f32SEric Dumazet 	case SO_MAX_PACING_RATE:
14386bdef102SEric Dumazet 		{
1439700465fdSKe Li 		unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
14406bdef102SEric Dumazet 
14416bdef102SEric Dumazet 		if (sizeof(ulval) != sizeof(val) &&
14426bdef102SEric Dumazet 		    optlen >= sizeof(ulval) &&
1443c8c1bbb6SChristoph Hellwig 		    copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
14446bdef102SEric Dumazet 			ret = -EFAULT;
14456bdef102SEric Dumazet 			break;
14466bdef102SEric Dumazet 		}
14476bdef102SEric Dumazet 		if (ulval != ~0UL)
1448218af599SEric Dumazet 			cmpxchg(&sk->sk_pacing_status,
1449218af599SEric Dumazet 				SK_PACING_NONE,
1450218af599SEric Dumazet 				SK_PACING_NEEDED);
1451ea7f45efSEric Dumazet 		/* Pairs with READ_ONCE() from sk_getsockopt() */
1452ea7f45efSEric Dumazet 		WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
14536bdef102SEric Dumazet 		sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
145462748f32SEric Dumazet 		break;
14556bdef102SEric Dumazet 		}
145670da268bSEric Dumazet 	case SO_INCOMING_CPU:
1457b261eda8SKuniyuki Iwashima 		reuseport_update_incoming_cpu(sk, val);
145870da268bSEric Dumazet 		break;
145970da268bSEric Dumazet 
1460a87cb3e4STom Herbert 	case SO_CNX_ADVICE:
1461a87cb3e4STom Herbert 		if (val == 1)
1462a87cb3e4STom Herbert 			dst_negative_advice(sk);
1463a87cb3e4STom Herbert 		break;
146476851d12SWillem de Bruijn 
146576851d12SWillem de Bruijn 	case SO_ZEROCOPY:
146628190752SSowmini Varadhan 		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
146742f67eeaSEric Dumazet 			if (!(sk_is_tcp(sk) ||
1468b5947e5dSWillem de Bruijn 			      (sk->sk_type == SOCK_DGRAM &&
1469b5947e5dSWillem de Bruijn 			       sk->sk_protocol == IPPROTO_UDP)))
1470869420a8SSamuel Thibault 				ret = -EOPNOTSUPP;
147128190752SSowmini Varadhan 		} else if (sk->sk_family != PF_RDS) {
1472869420a8SSamuel Thibault 			ret = -EOPNOTSUPP;
147328190752SSowmini Varadhan 		}
147428190752SSowmini Varadhan 		if (!ret) {
147528190752SSowmini Varadhan 			if (val < 0 || val > 1)
147676851d12SWillem de Bruijn 				ret = -EINVAL;
147776851d12SWillem de Bruijn 			else
147876851d12SWillem de Bruijn 				sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
147928190752SSowmini Varadhan 		}
1480334e6413SJesus Sanchez-Palencia 		break;
1481334e6413SJesus Sanchez-Palencia 
148280b14deeSRichard Cochran 	case SO_TXTIME:
1483790709f2SEric Dumazet 		if (optlen != sizeof(struct sock_txtime)) {
148480b14deeSRichard Cochran 			ret = -EINVAL;
1485790709f2SEric Dumazet 			break;
1486c8c1bbb6SChristoph Hellwig 		} else if (copy_from_sockptr(&sk_txtime, optval,
148780b14deeSRichard Cochran 			   sizeof(struct sock_txtime))) {
148880b14deeSRichard Cochran 			ret = -EFAULT;
1489790709f2SEric Dumazet 			break;
149080b14deeSRichard Cochran 		} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
149180b14deeSRichard Cochran 			ret = -EINVAL;
1492790709f2SEric Dumazet 			break;
1493790709f2SEric Dumazet 		}
1494790709f2SEric Dumazet 		/* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1495790709f2SEric Dumazet 		 * scheduler has enough safe guards.
1496790709f2SEric Dumazet 		 */
1497790709f2SEric Dumazet 		if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1498e42c7beeSMartin KaFai Lau 		    !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1499790709f2SEric Dumazet 			ret = -EPERM;
1500790709f2SEric Dumazet 			break;
1501790709f2SEric Dumazet 		}
150280b14deeSRichard Cochran 		sock_valbool_flag(sk, SOCK_TXTIME, true);
150380b14deeSRichard Cochran 		sk->sk_clockid = sk_txtime.clockid;
150480b14deeSRichard Cochran 		sk->sk_txtime_deadline_mode =
150580b14deeSRichard Cochran 			!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
15064b15c707SJesus Sanchez-Palencia 		sk->sk_txtime_report_errors =
15074b15c707SJesus Sanchez-Palencia 			!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
150880b14deeSRichard Cochran 		break;
150980b14deeSRichard Cochran 
1510f5dd3d0cSDavid Herrmann 	case SO_BINDTOIFINDEX:
15117594888cSChristoph Hellwig 		ret = sock_bindtoindex_locked(sk, val);
1512f5dd3d0cSDavid Herrmann 		break;
1513f5dd3d0cSDavid Herrmann 
151404190bf8SPavel Tikhomirov 	case SO_BUF_LOCK:
151504190bf8SPavel Tikhomirov 		if (val & ~SOCK_BUF_LOCK_MASK) {
151604190bf8SPavel Tikhomirov 			ret = -EINVAL;
151704190bf8SPavel Tikhomirov 			break;
151804190bf8SPavel Tikhomirov 		}
151904190bf8SPavel Tikhomirov 		sk->sk_userlocks = val | (sk->sk_userlocks &
152004190bf8SPavel Tikhomirov 					  ~SOCK_BUF_LOCK_MASK);
152104190bf8SPavel Tikhomirov 		break;
152204190bf8SPavel Tikhomirov 
15232bb2f5fbSWei Wang 	case SO_RESERVE_MEM:
15242bb2f5fbSWei Wang 	{
15252bb2f5fbSWei Wang 		int delta;
15262bb2f5fbSWei Wang 
15272bb2f5fbSWei Wang 		if (val < 0) {
15282bb2f5fbSWei Wang 			ret = -EINVAL;
15292bb2f5fbSWei Wang 			break;
15302bb2f5fbSWei Wang 		}
15312bb2f5fbSWei Wang 
15322bb2f5fbSWei Wang 		delta = val - sk->sk_reserved_mem;
15332bb2f5fbSWei Wang 		if (delta < 0)
15342bb2f5fbSWei Wang 			sock_release_reserved_memory(sk, -delta);
15352bb2f5fbSWei Wang 		else
15362bb2f5fbSWei Wang 			ret = sock_reserve_memory(sk, delta);
15372bb2f5fbSWei Wang 		break;
15382bb2f5fbSWei Wang 	}
15392bb2f5fbSWei Wang 
154026859240SAkhmat Karakotov 	case SO_TXREHASH:
154126859240SAkhmat Karakotov 		if (val < -1 || val > 1) {
154226859240SAkhmat Karakotov 			ret = -EINVAL;
154326859240SAkhmat Karakotov 			break;
154426859240SAkhmat Karakotov 		}
1545c11204c7SKevin Yang 		if ((u8)val == SOCK_TXREHASH_DEFAULT)
1546c11204c7SKevin Yang 			val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
1547c76a0328SEric Dumazet 		/* Paired with READ_ONCE() in tcp_rtx_synack()
1548c76a0328SEric Dumazet 		 * and sk_getsockopt().
1549c76a0328SEric Dumazet 		 */
1550cb6cd2ceSAkhmat Karakotov 		WRITE_ONCE(sk->sk_txrehash, (u8)val);
155126859240SAkhmat Karakotov 		break;
155226859240SAkhmat Karakotov 
15531da177e4SLinus Torvalds 	default:
15541da177e4SLinus Torvalds 		ret = -ENOPROTOOPT;
15551da177e4SLinus Torvalds 		break;
15561da177e4SLinus Torvalds 	}
155724426654SMartin KaFai Lau 	sockopt_release_sock(sk);
15581da177e4SLinus Torvalds 	return ret;
15591da177e4SLinus Torvalds }
15604d748f99SMartin KaFai Lau 
sock_setsockopt(struct socket * sock,int level,int optname,sockptr_t optval,unsigned int optlen)15614d748f99SMartin KaFai Lau int sock_setsockopt(struct socket *sock, int level, int optname,
15624d748f99SMartin KaFai Lau 		    sockptr_t optval, unsigned int optlen)
15634d748f99SMartin KaFai Lau {
15644d748f99SMartin KaFai Lau 	return sk_setsockopt(sock->sk, level, optname,
15654d748f99SMartin KaFai Lau 			     optval, optlen);
15664d748f99SMartin KaFai Lau }
15672a91525cSEric Dumazet EXPORT_SYMBOL(sock_setsockopt);
15681da177e4SLinus Torvalds 
sk_get_peer_cred(struct sock * sk)156935306eb2SEric Dumazet static const struct cred *sk_get_peer_cred(struct sock *sk)
157035306eb2SEric Dumazet {
157135306eb2SEric Dumazet 	const struct cred *cred;
157235306eb2SEric Dumazet 
157335306eb2SEric Dumazet 	spin_lock(&sk->sk_peer_lock);
157435306eb2SEric Dumazet 	cred = get_cred(sk->sk_peer_cred);
157535306eb2SEric Dumazet 	spin_unlock(&sk->sk_peer_lock);
157635306eb2SEric Dumazet 
157735306eb2SEric Dumazet 	return cred;
157835306eb2SEric Dumazet }
15791da177e4SLinus Torvalds 
cred_to_ucred(struct pid * pid,const struct cred * cred,struct ucred * ucred)15808f09898bSstephen hemminger static void cred_to_ucred(struct pid *pid, const struct cred *cred,
15813f551f94SEric W. Biederman 			  struct ucred *ucred)
15823f551f94SEric W. Biederman {
15833f551f94SEric W. Biederman 	ucred->pid = pid_vnr(pid);
15843f551f94SEric W. Biederman 	ucred->uid = ucred->gid = -1;
15853f551f94SEric W. Biederman 	if (cred) {
15863f551f94SEric W. Biederman 		struct user_namespace *current_ns = current_user_ns();
15873f551f94SEric W. Biederman 
1588b2e4f544SEric W. Biederman 		ucred->uid = from_kuid_munged(current_ns, cred->euid);
1589b2e4f544SEric W. Biederman 		ucred->gid = from_kgid_munged(current_ns, cred->egid);
15903f551f94SEric W. Biederman 	}
15913f551f94SEric W. Biederman }
15923f551f94SEric W. Biederman 
groups_to_user(sockptr_t dst,const struct group_info * src)15934ff09db1SMartin KaFai Lau static int groups_to_user(sockptr_t dst, const struct group_info *src)
159428b5ba2aSDavid Herrmann {
159528b5ba2aSDavid Herrmann 	struct user_namespace *user_ns = current_user_ns();
159628b5ba2aSDavid Herrmann 	int i;
159728b5ba2aSDavid Herrmann 
15984ff09db1SMartin KaFai Lau 	for (i = 0; i < src->ngroups; i++) {
15994ff09db1SMartin KaFai Lau 		gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
16004ff09db1SMartin KaFai Lau 
16014ff09db1SMartin KaFai Lau 		if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
160228b5ba2aSDavid Herrmann 			return -EFAULT;
16034ff09db1SMartin KaFai Lau 	}
160428b5ba2aSDavid Herrmann 
160528b5ba2aSDavid Herrmann 	return 0;
160628b5ba2aSDavid Herrmann }
160728b5ba2aSDavid Herrmann 
sk_getsockopt(struct sock * sk,int level,int optname,sockptr_t optval,sockptr_t optlen)160865ddc82dSMartin KaFai Lau int sk_getsockopt(struct sock *sk, int level, int optname,
16094ff09db1SMartin KaFai Lau 		  sockptr_t optval, sockptr_t optlen)
16101da177e4SLinus Torvalds {
1611ba74a760SMartin KaFai Lau 	struct socket *sock = sk->sk_socket;
16121da177e4SLinus Torvalds 
1613e71a4783SStephen Hemminger 	union {
16141da177e4SLinus Torvalds 		int val;
16155daab9dbSChenbo Feng 		u64 val64;
1616677f136cSEric Dumazet 		unsigned long ulval;
16171da177e4SLinus Torvalds 		struct linger ling;
1618fe0c72f3SArnd Bergmann 		struct old_timeval32 tm32;
1619fe0c72f3SArnd Bergmann 		struct __kernel_old_timeval tm;
1620a9beb86aSDeepa Dinamani 		struct  __kernel_sock_timeval stm;
162180b14deeSRichard Cochran 		struct sock_txtime txtime;
1622d463126eSYangbo Lu 		struct so_timestamping timestamping;
16231da177e4SLinus Torvalds 	} v;
16241da177e4SLinus Torvalds 
16254d0392beSH Hartley Sweeten 	int lv = sizeof(int);
16261da177e4SLinus Torvalds 	int len;
16271da177e4SLinus Torvalds 
16284ff09db1SMartin KaFai Lau 	if (copy_from_sockptr(&len, optlen, sizeof(int)))
16291da177e4SLinus Torvalds 		return -EFAULT;
16301da177e4SLinus Torvalds 	if (len < 0)
16311da177e4SLinus Torvalds 		return -EINVAL;
16321da177e4SLinus Torvalds 
163350fee1deSEugene Teo 	memset(&v, 0, sizeof(v));
1634df0bca04SClément Lecigne 
1635e71a4783SStephen Hemminger 	switch (optname) {
16361da177e4SLinus Torvalds 	case SO_DEBUG:
16371da177e4SLinus Torvalds 		v.val = sock_flag(sk, SOCK_DBG);
16381da177e4SLinus Torvalds 		break;
16391da177e4SLinus Torvalds 
16401da177e4SLinus Torvalds 	case SO_DONTROUTE:
16411da177e4SLinus Torvalds 		v.val = sock_flag(sk, SOCK_LOCALROUTE);
16421da177e4SLinus Torvalds 		break;
16431da177e4SLinus Torvalds 
16441da177e4SLinus Torvalds 	case SO_BROADCAST:
16451b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_BROADCAST);
16461da177e4SLinus Torvalds 		break;
16471da177e4SLinus Torvalds 
16481da177e4SLinus Torvalds 	case SO_SNDBUF:
164974bc0843SEric Dumazet 		v.val = READ_ONCE(sk->sk_sndbuf);
16501da177e4SLinus Torvalds 		break;
16511da177e4SLinus Torvalds 
16521da177e4SLinus Torvalds 	case SO_RCVBUF:
1653b4b55325SEric Dumazet 		v.val = READ_ONCE(sk->sk_rcvbuf);
16541da177e4SLinus Torvalds 		break;
16551da177e4SLinus Torvalds 
16561da177e4SLinus Torvalds 	case SO_REUSEADDR:
16571da177e4SLinus Torvalds 		v.val = sk->sk_reuse;
16581da177e4SLinus Torvalds 		break;
16591da177e4SLinus Torvalds 
1660055dc21aSTom Herbert 	case SO_REUSEPORT:
1661055dc21aSTom Herbert 		v.val = sk->sk_reuseport;
1662055dc21aSTom Herbert 		break;
1663055dc21aSTom Herbert 
16641da177e4SLinus Torvalds 	case SO_KEEPALIVE:
16651b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
16661da177e4SLinus Torvalds 		break;
16671da177e4SLinus Torvalds 
16681da177e4SLinus Torvalds 	case SO_TYPE:
16691da177e4SLinus Torvalds 		v.val = sk->sk_type;
16701da177e4SLinus Torvalds 		break;
16711da177e4SLinus Torvalds 
167249c794e9SJan Engelhardt 	case SO_PROTOCOL:
167349c794e9SJan Engelhardt 		v.val = sk->sk_protocol;
167449c794e9SJan Engelhardt 		break;
167549c794e9SJan Engelhardt 
16760d6038eeSJan Engelhardt 	case SO_DOMAIN:
16770d6038eeSJan Engelhardt 		v.val = sk->sk_family;
16780d6038eeSJan Engelhardt 		break;
16790d6038eeSJan Engelhardt 
16801da177e4SLinus Torvalds 	case SO_ERROR:
16811da177e4SLinus Torvalds 		v.val = -sock_error(sk);
16821da177e4SLinus Torvalds 		if (v.val == 0)
16831da177e4SLinus Torvalds 			v.val = xchg(&sk->sk_err_soft, 0);
16841da177e4SLinus Torvalds 		break;
16851da177e4SLinus Torvalds 
16861da177e4SLinus Torvalds 	case SO_OOBINLINE:
16871b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_URGINLINE);
16881da177e4SLinus Torvalds 		break;
16891da177e4SLinus Torvalds 
16901da177e4SLinus Torvalds 	case SO_NO_CHECK:
169128448b80STom Herbert 		v.val = sk->sk_no_check_tx;
16921da177e4SLinus Torvalds 		break;
16931da177e4SLinus Torvalds 
16941da177e4SLinus Torvalds 	case SO_PRIORITY:
16958bf43be7SEric Dumazet 		v.val = READ_ONCE(sk->sk_priority);
16961da177e4SLinus Torvalds 		break;
16971da177e4SLinus Torvalds 
16981da177e4SLinus Torvalds 	case SO_LINGER:
16991da177e4SLinus Torvalds 		lv		= sizeof(v.ling);
17001b23a5dfSEric Dumazet 		v.ling.l_onoff	= sock_flag(sk, SOCK_LINGER);
1701bc1fb82aSEric Dumazet 		v.ling.l_linger	= READ_ONCE(sk->sk_lingertime) / HZ;
17021da177e4SLinus Torvalds 		break;
17031da177e4SLinus Torvalds 
17041da177e4SLinus Torvalds 	case SO_BSDCOMPAT:
17051da177e4SLinus Torvalds 		break;
17061da177e4SLinus Torvalds 
17077f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMP_OLD:
170892f37fd2SEric Dumazet 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1709887feae3SDeepa Dinamani 				!sock_flag(sk, SOCK_TSTAMP_NEW) &&
171092f37fd2SEric Dumazet 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
171192f37fd2SEric Dumazet 		break;
171292f37fd2SEric Dumazet 
17137f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMPNS_OLD:
1714887feae3SDeepa Dinamani 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
1715887feae3SDeepa Dinamani 		break;
1716887feae3SDeepa Dinamani 
1717887feae3SDeepa Dinamani 	case SO_TIMESTAMP_NEW:
1718887feae3SDeepa Dinamani 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1719887feae3SDeepa Dinamani 		break;
1720887feae3SDeepa Dinamani 
1721887feae3SDeepa Dinamani 	case SO_TIMESTAMPNS_NEW:
1722887feae3SDeepa Dinamani 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
17231da177e4SLinus Torvalds 		break;
17241da177e4SLinus Torvalds 
17257f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMPING_OLD:
1726742e4af3SJörn-Thorben Hinz 	case SO_TIMESTAMPING_NEW:
1727d463126eSYangbo Lu 		lv = sizeof(v.timestamping);
1728742e4af3SJörn-Thorben Hinz 		/* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only
1729742e4af3SJörn-Thorben Hinz 		 * returning the flags when they were set through the same option.
1730742e4af3SJörn-Thorben Hinz 		 * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
1731742e4af3SJörn-Thorben Hinz 		 */
1732742e4af3SJörn-Thorben Hinz 		if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
1733e3390b30SEric Dumazet 			v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
1734251cd405SEric Dumazet 			v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
1735742e4af3SJörn-Thorben Hinz 		}
173620d49473SPatrick Ohly 		break;
173720d49473SPatrick Ohly 
1738a9beb86aSDeepa Dinamani 	case SO_RCVTIMEO_OLD:
1739a9beb86aSDeepa Dinamani 	case SO_RCVTIMEO_NEW:
1740285975ddSEric Dumazet 		lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
1741285975ddSEric Dumazet 				      SO_RCVTIMEO_OLD == optname);
17421da177e4SLinus Torvalds 		break;
17431da177e4SLinus Torvalds 
1744a9beb86aSDeepa Dinamani 	case SO_SNDTIMEO_OLD:
1745a9beb86aSDeepa Dinamani 	case SO_SNDTIMEO_NEW:
1746285975ddSEric Dumazet 		lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
1747285975ddSEric Dumazet 				      SO_SNDTIMEO_OLD == optname);
17481da177e4SLinus Torvalds 		break;
17491da177e4SLinus Torvalds 
17501da177e4SLinus Torvalds 	case SO_RCVLOWAT:
1751e6d12bdbSEric Dumazet 		v.val = READ_ONCE(sk->sk_rcvlowat);
17521da177e4SLinus Torvalds 		break;
17531da177e4SLinus Torvalds 
17541da177e4SLinus Torvalds 	case SO_SNDLOWAT:
17551da177e4SLinus Torvalds 		v.val = 1;
17561da177e4SLinus Torvalds 		break;
17571da177e4SLinus Torvalds 
17581da177e4SLinus Torvalds 	case SO_PASSCRED:
175982981930SEric Dumazet 		v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
17601da177e4SLinus Torvalds 		break;
17611da177e4SLinus Torvalds 
17625e2ff670SAlexander Mikhalitsyn 	case SO_PASSPIDFD:
17635e2ff670SAlexander Mikhalitsyn 		v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags);
17645e2ff670SAlexander Mikhalitsyn 		break;
17655e2ff670SAlexander Mikhalitsyn 
17661da177e4SLinus Torvalds 	case SO_PEERCRED:
1767109f6e39SEric W. Biederman 	{
1768109f6e39SEric W. Biederman 		struct ucred peercred;
1769109f6e39SEric W. Biederman 		if (len > sizeof(peercred))
1770109f6e39SEric W. Biederman 			len = sizeof(peercred);
177135306eb2SEric Dumazet 
177235306eb2SEric Dumazet 		spin_lock(&sk->sk_peer_lock);
1773109f6e39SEric W. Biederman 		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
177435306eb2SEric Dumazet 		spin_unlock(&sk->sk_peer_lock);
177535306eb2SEric Dumazet 
17764ff09db1SMartin KaFai Lau 		if (copy_to_sockptr(optval, &peercred, len))
17771da177e4SLinus Torvalds 			return -EFAULT;
17781da177e4SLinus Torvalds 		goto lenout;
1779109f6e39SEric W. Biederman 	}
17801da177e4SLinus Torvalds 
17817b26952aSAlexander Mikhalitsyn 	case SO_PEERPIDFD:
17827b26952aSAlexander Mikhalitsyn 	{
17837b26952aSAlexander Mikhalitsyn 		struct pid *peer_pid;
17847b26952aSAlexander Mikhalitsyn 		struct file *pidfd_file = NULL;
17857b26952aSAlexander Mikhalitsyn 		int pidfd;
17867b26952aSAlexander Mikhalitsyn 
17877b26952aSAlexander Mikhalitsyn 		if (len > sizeof(pidfd))
17887b26952aSAlexander Mikhalitsyn 			len = sizeof(pidfd);
17897b26952aSAlexander Mikhalitsyn 
17907b26952aSAlexander Mikhalitsyn 		spin_lock(&sk->sk_peer_lock);
17917b26952aSAlexander Mikhalitsyn 		peer_pid = get_pid(sk->sk_peer_pid);
17927b26952aSAlexander Mikhalitsyn 		spin_unlock(&sk->sk_peer_lock);
17937b26952aSAlexander Mikhalitsyn 
17947b26952aSAlexander Mikhalitsyn 		if (!peer_pid)
1795b6f79e82SDavid Rheinsberg 			return -ENODATA;
17967b26952aSAlexander Mikhalitsyn 
17977b26952aSAlexander Mikhalitsyn 		pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
17987b26952aSAlexander Mikhalitsyn 		put_pid(peer_pid);
17997b26952aSAlexander Mikhalitsyn 		if (pidfd < 0)
18007b26952aSAlexander Mikhalitsyn 			return pidfd;
18017b26952aSAlexander Mikhalitsyn 
18027b26952aSAlexander Mikhalitsyn 		if (copy_to_sockptr(optval, &pidfd, len) ||
18037b26952aSAlexander Mikhalitsyn 		    copy_to_sockptr(optlen, &len, sizeof(int))) {
18047b26952aSAlexander Mikhalitsyn 			put_unused_fd(pidfd);
18057b26952aSAlexander Mikhalitsyn 			fput(pidfd_file);
18067b26952aSAlexander Mikhalitsyn 
18077b26952aSAlexander Mikhalitsyn 			return -EFAULT;
18087b26952aSAlexander Mikhalitsyn 		}
18097b26952aSAlexander Mikhalitsyn 
18107b26952aSAlexander Mikhalitsyn 		fd_install(pidfd, pidfd_file);
18117b26952aSAlexander Mikhalitsyn 		return 0;
18127b26952aSAlexander Mikhalitsyn 	}
18137b26952aSAlexander Mikhalitsyn 
181428b5ba2aSDavid Herrmann 	case SO_PEERGROUPS:
181528b5ba2aSDavid Herrmann 	{
181635306eb2SEric Dumazet 		const struct cred *cred;
181728b5ba2aSDavid Herrmann 		int ret, n;
181828b5ba2aSDavid Herrmann 
181935306eb2SEric Dumazet 		cred = sk_get_peer_cred(sk);
182035306eb2SEric Dumazet 		if (!cred)
182128b5ba2aSDavid Herrmann 			return -ENODATA;
182228b5ba2aSDavid Herrmann 
182335306eb2SEric Dumazet 		n = cred->group_info->ngroups;
182428b5ba2aSDavid Herrmann 		if (len < n * sizeof(gid_t)) {
182528b5ba2aSDavid Herrmann 			len = n * sizeof(gid_t);
182635306eb2SEric Dumazet 			put_cred(cred);
18274ff09db1SMartin KaFai Lau 			return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
182828b5ba2aSDavid Herrmann 		}
182928b5ba2aSDavid Herrmann 		len = n * sizeof(gid_t);
183028b5ba2aSDavid Herrmann 
18314ff09db1SMartin KaFai Lau 		ret = groups_to_user(optval, cred->group_info);
183235306eb2SEric Dumazet 		put_cred(cred);
183328b5ba2aSDavid Herrmann 		if (ret)
183428b5ba2aSDavid Herrmann 			return ret;
183528b5ba2aSDavid Herrmann 		goto lenout;
183628b5ba2aSDavid Herrmann 	}
183728b5ba2aSDavid Herrmann 
18381da177e4SLinus Torvalds 	case SO_PEERNAME:
18391da177e4SLinus Torvalds 	{
18408936bf53SKuniyuki Iwashima 		struct sockaddr_storage address;
18411da177e4SLinus Torvalds 
18421ded5e5aSEric Dumazet 		lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
18439b2c45d4SDenys Vlasenko 		if (lv < 0)
18441da177e4SLinus Torvalds 			return -ENOTCONN;
18451da177e4SLinus Torvalds 		if (lv < len)
18461da177e4SLinus Torvalds 			return -EINVAL;
18478936bf53SKuniyuki Iwashima 		if (copy_to_sockptr(optval, &address, len))
18481da177e4SLinus Torvalds 			return -EFAULT;
18491da177e4SLinus Torvalds 		goto lenout;
18501da177e4SLinus Torvalds 	}
18511da177e4SLinus Torvalds 
18521da177e4SLinus Torvalds 	/* Dubious BSD thing... Probably nobody even uses it, but
18531da177e4SLinus Torvalds 	 * the UNIX standard wants it for whatever reason... -DaveM
18541da177e4SLinus Torvalds 	 */
18551da177e4SLinus Torvalds 	case SO_ACCEPTCONN:
18561da177e4SLinus Torvalds 		v.val = sk->sk_state == TCP_LISTEN;
18571da177e4SLinus Torvalds 		break;
18581da177e4SLinus Torvalds 
1859877ce7c1SCatherine Zhang 	case SO_PASSSEC:
186082981930SEric Dumazet 		v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1861877ce7c1SCatherine Zhang 		break;
1862877ce7c1SCatherine Zhang 
18631da177e4SLinus Torvalds 	case SO_PEERSEC:
1864b10b9c34SPaul Moore 		return security_socket_getpeersec_stream(sock,
1865b10b9c34SPaul Moore 							 optval, optlen, len);
18661da177e4SLinus Torvalds 
18674a19ec58SLaszlo Attila Toth 	case SO_MARK:
18683c5b4d69SEric Dumazet 		v.val = READ_ONCE(sk->sk_mark);
18694a19ec58SLaszlo Attila Toth 		break;
18704a19ec58SLaszlo Attila Toth 
18716fd1d51cSErin MacNeil 	case SO_RCVMARK:
18726fd1d51cSErin MacNeil 		v.val = sock_flag(sk, SOCK_RCVMARK);
18736fd1d51cSErin MacNeil 		break;
18746fd1d51cSErin MacNeil 
18753b885787SNeil Horman 	case SO_RXQ_OVFL:
18761b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
18773b885787SNeil Horman 		break;
18783b885787SNeil Horman 
18796e3e939fSJohannes Berg 	case SO_WIFI_STATUS:
18801b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_WIFI_STATUS);
18816e3e939fSJohannes Berg 		break;
18826e3e939fSJohannes Berg 
1883ef64a54fSPavel Emelyanov 	case SO_PEEK_OFF:
18841ded5e5aSEric Dumazet 		if (!READ_ONCE(sock->ops)->set_peek_off)
1885ef64a54fSPavel Emelyanov 			return -EOPNOTSUPP;
1886ef64a54fSPavel Emelyanov 
188711695c6eSEric Dumazet 		v.val = READ_ONCE(sk->sk_peek_off);
1888ef64a54fSPavel Emelyanov 		break;
1889bc2f7996SDavid S. Miller 	case SO_NOFCS:
18901b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_NOFCS);
1891bc2f7996SDavid S. Miller 		break;
1892c91f6df2SBrian Haley 
1893f7b86bfeSPavel Emelyanov 	case SO_BINDTODEVICE:
1894c91f6df2SBrian Haley 		return sock_getbindtodevice(sk, optval, optlen, len);
1895c91f6df2SBrian Haley 
1896a8fc9277SPavel Emelyanov 	case SO_GET_FILTER:
18974ff09db1SMartin KaFai Lau 		len = sk_get_filter(sk, optval, len);
1898a8fc9277SPavel Emelyanov 		if (len < 0)
1899a8fc9277SPavel Emelyanov 			return len;
1900a8fc9277SPavel Emelyanov 
1901a8fc9277SPavel Emelyanov 		goto lenout;
1902c91f6df2SBrian Haley 
1903d59577b6SVincent Bernat 	case SO_LOCK_FILTER:
1904d59577b6SVincent Bernat 		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1905d59577b6SVincent Bernat 		break;
1906d59577b6SVincent Bernat 
1907ea02f941SMichal Sekletar 	case SO_BPF_EXTENSIONS:
1908ea02f941SMichal Sekletar 		v.val = bpf_tell_extensions();
1909ea02f941SMichal Sekletar 		break;
1910ea02f941SMichal Sekletar 
19117d4c04fcSKeller, Jacob E 	case SO_SELECT_ERR_QUEUE:
19127d4c04fcSKeller, Jacob E 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
19137d4c04fcSKeller, Jacob E 		break;
19147d4c04fcSKeller, Jacob E 
1915e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
191664b0dc51SEliezer Tamir 	case SO_BUSY_POLL:
1917e5f0d2ddSEric Dumazet 		v.val = READ_ONCE(sk->sk_ll_usec);
1918dafcc438SEliezer Tamir 		break;
19197fd3253aSBjörn Töpel 	case SO_PREFER_BUSY_POLL:
19207fd3253aSBjörn Töpel 		v.val = READ_ONCE(sk->sk_prefer_busy_poll);
19217fd3253aSBjörn Töpel 		break;
1922dafcc438SEliezer Tamir #endif
1923dafcc438SEliezer Tamir 
192462748f32SEric Dumazet 	case SO_MAX_PACING_RATE:
1925ea7f45efSEric Dumazet 		/* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
1926677f136cSEric Dumazet 		if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1927677f136cSEric Dumazet 			lv = sizeof(v.ulval);
1928ea7f45efSEric Dumazet 			v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
1929677f136cSEric Dumazet 		} else {
193076a9ebe8SEric Dumazet 			/* 32bit version */
1931ea7f45efSEric Dumazet 			v.val = min_t(unsigned long, ~0U,
1932ea7f45efSEric Dumazet 				      READ_ONCE(sk->sk_max_pacing_rate));
1933677f136cSEric Dumazet 		}
193462748f32SEric Dumazet 		break;
193562748f32SEric Dumazet 
19362c8c56e1SEric Dumazet 	case SO_INCOMING_CPU:
19377170a977SEric Dumazet 		v.val = READ_ONCE(sk->sk_incoming_cpu);
19382c8c56e1SEric Dumazet 		break;
19392c8c56e1SEric Dumazet 
1940a2d133b1SJosh Hunt 	case SO_MEMINFO:
1941a2d133b1SJosh Hunt 	{
1942a2d133b1SJosh Hunt 		u32 meminfo[SK_MEMINFO_VARS];
1943a2d133b1SJosh Hunt 
1944a2d133b1SJosh Hunt 		sk_get_meminfo(sk, meminfo);
1945a2d133b1SJosh Hunt 
1946a2d133b1SJosh Hunt 		len = min_t(unsigned int, len, sizeof(meminfo));
19474ff09db1SMartin KaFai Lau 		if (copy_to_sockptr(optval, &meminfo, len))
1948a2d133b1SJosh Hunt 			return -EFAULT;
1949a2d133b1SJosh Hunt 
1950a2d133b1SJosh Hunt 		goto lenout;
1951a2d133b1SJosh Hunt 	}
19526d433902SSridhar Samudrala 
19536d433902SSridhar Samudrala #ifdef CONFIG_NET_RX_BUSY_POLL
19546d433902SSridhar Samudrala 	case SO_INCOMING_NAPI_ID:
19556d433902SSridhar Samudrala 		v.val = READ_ONCE(sk->sk_napi_id);
19566d433902SSridhar Samudrala 
19576d433902SSridhar Samudrala 		/* aggregate non-NAPI IDs down to 0 */
19586d433902SSridhar Samudrala 		if (v.val < MIN_NAPI_ID)
19596d433902SSridhar Samudrala 			v.val = 0;
19606d433902SSridhar Samudrala 
19616d433902SSridhar Samudrala 		break;
19626d433902SSridhar Samudrala #endif
19636d433902SSridhar Samudrala 
19645daab9dbSChenbo Feng 	case SO_COOKIE:
19655daab9dbSChenbo Feng 		lv = sizeof(u64);
19665daab9dbSChenbo Feng 		if (len < lv)
19675daab9dbSChenbo Feng 			return -EINVAL;
19685daab9dbSChenbo Feng 		v.val64 = sock_gen_cookie(sk);
19695daab9dbSChenbo Feng 		break;
19705daab9dbSChenbo Feng 
197176851d12SWillem de Bruijn 	case SO_ZEROCOPY:
197276851d12SWillem de Bruijn 		v.val = sock_flag(sk, SOCK_ZEROCOPY);
197376851d12SWillem de Bruijn 		break;
197476851d12SWillem de Bruijn 
197580b14deeSRichard Cochran 	case SO_TXTIME:
197680b14deeSRichard Cochran 		lv = sizeof(v.txtime);
197780b14deeSRichard Cochran 		v.txtime.clockid = sk->sk_clockid;
197880b14deeSRichard Cochran 		v.txtime.flags |= sk->sk_txtime_deadline_mode ?
197980b14deeSRichard Cochran 				  SOF_TXTIME_DEADLINE_MODE : 0;
19804b15c707SJesus Sanchez-Palencia 		v.txtime.flags |= sk->sk_txtime_report_errors ?
19814b15c707SJesus Sanchez-Palencia 				  SOF_TXTIME_REPORT_ERRORS : 0;
198280b14deeSRichard Cochran 		break;
198380b14deeSRichard Cochran 
1984f5dd3d0cSDavid Herrmann 	case SO_BINDTOIFINDEX:
1985e5fccaa1SEric Dumazet 		v.val = READ_ONCE(sk->sk_bound_dev_if);
1986f5dd3d0cSDavid Herrmann 		break;
1987f5dd3d0cSDavid Herrmann 
1988e8b9eab9SMartynas Pumputis 	case SO_NETNS_COOKIE:
1989e8b9eab9SMartynas Pumputis 		lv = sizeof(u64);
1990e8b9eab9SMartynas Pumputis 		if (len != lv)
1991e8b9eab9SMartynas Pumputis 			return -EINVAL;
1992e8b9eab9SMartynas Pumputis 		v.val64 = sock_net(sk)->net_cookie;
1993e8b9eab9SMartynas Pumputis 		break;
1994e8b9eab9SMartynas Pumputis 
199504190bf8SPavel Tikhomirov 	case SO_BUF_LOCK:
199604190bf8SPavel Tikhomirov 		v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
199704190bf8SPavel Tikhomirov 		break;
199804190bf8SPavel Tikhomirov 
19992bb2f5fbSWei Wang 	case SO_RESERVE_MEM:
2000fe11fdcbSEric Dumazet 		v.val = READ_ONCE(sk->sk_reserved_mem);
20012bb2f5fbSWei Wang 		break;
20022bb2f5fbSWei Wang 
200326859240SAkhmat Karakotov 	case SO_TXREHASH:
2004c76a0328SEric Dumazet 		/* Paired with WRITE_ONCE() in sk_setsockopt() */
2005c76a0328SEric Dumazet 		v.val = READ_ONCE(sk->sk_txrehash);
200626859240SAkhmat Karakotov 		break;
200726859240SAkhmat Karakotov 
20081da177e4SLinus Torvalds 	default:
2009443b5991SYOSHIFUJI Hideaki/吉藤英明 		/* We implement the SO_SNDLOWAT etc to not be settable
2010443b5991SYOSHIFUJI Hideaki/吉藤英明 		 * (1003.1g 7).
2011443b5991SYOSHIFUJI Hideaki/吉藤英明 		 */
2012e71a4783SStephen Hemminger 		return -ENOPROTOOPT;
20131da177e4SLinus Torvalds 	}
2014e71a4783SStephen Hemminger 
20151da177e4SLinus Torvalds 	if (len > lv)
20161da177e4SLinus Torvalds 		len = lv;
20174ff09db1SMartin KaFai Lau 	if (copy_to_sockptr(optval, &v, len))
20181da177e4SLinus Torvalds 		return -EFAULT;
20191da177e4SLinus Torvalds lenout:
20204ff09db1SMartin KaFai Lau 	if (copy_to_sockptr(optlen, &len, sizeof(int)))
20211da177e4SLinus Torvalds 		return -EFAULT;
20221da177e4SLinus Torvalds 	return 0;
20231da177e4SLinus Torvalds }
20241da177e4SLinus Torvalds 
2025a5b5bb9aSIngo Molnar /*
2026a5b5bb9aSIngo Molnar  * Initialize an sk_lock.
2027a5b5bb9aSIngo Molnar  *
2028a5b5bb9aSIngo Molnar  * (We also register the sk_lock with the lock validator.)
2029a5b5bb9aSIngo Molnar  */
sock_lock_init(struct sock * sk)2030b6f99a21SDave Jones static inline void sock_lock_init(struct sock *sk)
2031a5b5bb9aSIngo Molnar {
2032cdfbabfbSDavid Howells 	if (sk->sk_kern_sock)
2033cdfbabfbSDavid Howells 		sock_lock_init_class_and_name(
2034cdfbabfbSDavid Howells 			sk,
2035cdfbabfbSDavid Howells 			af_family_kern_slock_key_strings[sk->sk_family],
2036cdfbabfbSDavid Howells 			af_family_kern_slock_keys + sk->sk_family,
2037cdfbabfbSDavid Howells 			af_family_kern_key_strings[sk->sk_family],
2038cdfbabfbSDavid Howells 			af_family_kern_keys + sk->sk_family);
2039cdfbabfbSDavid Howells 	else
2040cdfbabfbSDavid Howells 		sock_lock_init_class_and_name(
2041cdfbabfbSDavid Howells 			sk,
2042ed07536eSPeter Zijlstra 			af_family_slock_key_strings[sk->sk_family],
2043a5b5bb9aSIngo Molnar 			af_family_slock_keys + sk->sk_family,
2044a5b5bb9aSIngo Molnar 			af_family_key_strings[sk->sk_family],
2045ed07536eSPeter Zijlstra 			af_family_keys + sk->sk_family);
2046a5b5bb9aSIngo Molnar }
2047a5b5bb9aSIngo Molnar 
20484dc6dc71SEric Dumazet /*
20494dc6dc71SEric Dumazet  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
20504dc6dc71SEric Dumazet  * even temporarly, because of RCU lookups. sk_node should also be left as is.
205168835abaSEric Dumazet  * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
20524dc6dc71SEric Dumazet  */
sock_copy(struct sock * nsk,const struct sock * osk)2053f1a6c4daSPavel Emelyanov static void sock_copy(struct sock *nsk, const struct sock *osk)
2054f1a6c4daSPavel Emelyanov {
2055b8e202d1SJakub Sitnicki 	const struct proto *prot = READ_ONCE(osk->sk_prot);
2056f1a6c4daSPavel Emelyanov #ifdef CONFIG_SECURITY_NETWORK
2057f1a6c4daSPavel Emelyanov 	void *sptr = nsk->sk_security;
2058f1a6c4daSPavel Emelyanov #endif
2059df610cd9SKuniyuki Iwashima 
2060df610cd9SKuniyuki Iwashima 	/* If we move sk_tx_queue_mapping out of the private section,
2061df610cd9SKuniyuki Iwashima 	 * we must check if sk_tx_queue_clear() is called after
2062df610cd9SKuniyuki Iwashima 	 * sock_copy() in sk_clone_lock().
2063df610cd9SKuniyuki Iwashima 	 */
2064df610cd9SKuniyuki Iwashima 	BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
2065df610cd9SKuniyuki Iwashima 		     offsetof(struct sock, sk_dontcopy_begin) ||
2066df610cd9SKuniyuki Iwashima 		     offsetof(struct sock, sk_tx_queue_mapping) >=
2067df610cd9SKuniyuki Iwashima 		     offsetof(struct sock, sk_dontcopy_end));
2068df610cd9SKuniyuki Iwashima 
206968835abaSEric Dumazet 	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
207068835abaSEric Dumazet 
207168835abaSEric Dumazet 	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
2072b8e202d1SJakub Sitnicki 	       prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
207368835abaSEric Dumazet 
2074f1a6c4daSPavel Emelyanov #ifdef CONFIG_SECURITY_NETWORK
2075f1a6c4daSPavel Emelyanov 	nsk->sk_security = sptr;
2076f1a6c4daSPavel Emelyanov 	security_sk_clone(osk, nsk);
2077f1a6c4daSPavel Emelyanov #endif
2078f1a6c4daSPavel Emelyanov }
2079f1a6c4daSPavel Emelyanov 
sk_prot_alloc(struct proto * prot,gfp_t priority,int family)20802e4afe7bSPavel Emelyanov static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
20812e4afe7bSPavel Emelyanov 		int family)
2082c308c1b2SPavel Emelyanov {
2083c308c1b2SPavel Emelyanov 	struct sock *sk;
2084c308c1b2SPavel Emelyanov 	struct kmem_cache *slab;
2085c308c1b2SPavel Emelyanov 
2086c308c1b2SPavel Emelyanov 	slab = prot->slab;
2087e912b114SEric Dumazet 	if (slab != NULL) {
2088e912b114SEric Dumazet 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
2089e912b114SEric Dumazet 		if (!sk)
2090e912b114SEric Dumazet 			return sk;
20916471384aSAlexander Potapenko 		if (want_init_on_alloc(priority))
2092fcbdf09dSOctavian Purdila 			sk_prot_clear_nulls(sk, prot->obj_size);
2093fcbdf09dSOctavian Purdila 	} else
2094c308c1b2SPavel Emelyanov 		sk = kmalloc(prot->obj_size, priority);
2095c308c1b2SPavel Emelyanov 
20962e4afe7bSPavel Emelyanov 	if (sk != NULL) {
20972e4afe7bSPavel Emelyanov 		if (security_sk_alloc(sk, family, priority))
20982e4afe7bSPavel Emelyanov 			goto out_free;
20992e4afe7bSPavel Emelyanov 
21002e4afe7bSPavel Emelyanov 		if (!try_module_get(prot->owner))
21012e4afe7bSPavel Emelyanov 			goto out_free_sec;
21022e4afe7bSPavel Emelyanov 	}
21032e4afe7bSPavel Emelyanov 
2104c308c1b2SPavel Emelyanov 	return sk;
21052e4afe7bSPavel Emelyanov 
21062e4afe7bSPavel Emelyanov out_free_sec:
21072e4afe7bSPavel Emelyanov 	security_sk_free(sk);
21082e4afe7bSPavel Emelyanov out_free:
21092e4afe7bSPavel Emelyanov 	if (slab != NULL)
21102e4afe7bSPavel Emelyanov 		kmem_cache_free(slab, sk);
21112e4afe7bSPavel Emelyanov 	else
21122e4afe7bSPavel Emelyanov 		kfree(sk);
21132e4afe7bSPavel Emelyanov 	return NULL;
2114c308c1b2SPavel Emelyanov }
2115c308c1b2SPavel Emelyanov 
sk_prot_free(struct proto * prot,struct sock * sk)2116c308c1b2SPavel Emelyanov static void sk_prot_free(struct proto *prot, struct sock *sk)
2117c308c1b2SPavel Emelyanov {
2118c308c1b2SPavel Emelyanov 	struct kmem_cache *slab;
21192e4afe7bSPavel Emelyanov 	struct module *owner;
2120c308c1b2SPavel Emelyanov 
21212e4afe7bSPavel Emelyanov 	owner = prot->owner;
2122c308c1b2SPavel Emelyanov 	slab = prot->slab;
21232e4afe7bSPavel Emelyanov 
2124bd1060a1STejun Heo 	cgroup_sk_free(&sk->sk_cgrp_data);
21252d758073SJohannes Weiner 	mem_cgroup_sk_free(sk);
21262e4afe7bSPavel Emelyanov 	security_sk_free(sk);
2127c308c1b2SPavel Emelyanov 	if (slab != NULL)
2128c308c1b2SPavel Emelyanov 		kmem_cache_free(slab, sk);
2129c308c1b2SPavel Emelyanov 	else
2130c308c1b2SPavel Emelyanov 		kfree(sk);
21312e4afe7bSPavel Emelyanov 	module_put(owner);
2132c308c1b2SPavel Emelyanov }
2133c308c1b2SPavel Emelyanov 
21341da177e4SLinus Torvalds /**
21351da177e4SLinus Torvalds  *	sk_alloc - All socket objects are allocated here
2136c4ea43c5SRandy Dunlap  *	@net: the applicable net namespace
21374dc3b16bSPavel Pisa  *	@family: protocol family
21384dc3b16bSPavel Pisa  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
21394dc3b16bSPavel Pisa  *	@prot: struct proto associated with this new sock instance
214011aa9c28SEric W. Biederman  *	@kern: is this to be a kernel socket?
21411da177e4SLinus Torvalds  */
sk_alloc(struct net * net,int family,gfp_t priority,struct proto * prot,int kern)21421b8d7ae4SEric W. Biederman struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
214311aa9c28SEric W. Biederman 		      struct proto *prot, int kern)
21441da177e4SLinus Torvalds {
2145c308c1b2SPavel Emelyanov 	struct sock *sk;
21461da177e4SLinus Torvalds 
2147154adbc8SPavel Emelyanov 	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
21481da177e4SLinus Torvalds 	if (sk) {
21491da177e4SLinus Torvalds 		sk->sk_family = family;
2150476e19cfSArnaldo Carvalho de Melo 		/*
2151476e19cfSArnaldo Carvalho de Melo 		 * See comment in struct sock definition to understand
2152476e19cfSArnaldo Carvalho de Melo 		 * why we need sk_prot_creator -acme
2153476e19cfSArnaldo Carvalho de Melo 		 */
2154476e19cfSArnaldo Carvalho de Melo 		sk->sk_prot = sk->sk_prot_creator = prot;
2155cdfbabfbSDavid Howells 		sk->sk_kern_sock = kern;
21561da177e4SLinus Torvalds 		sock_lock_init(sk);
215726abe143SEric W. Biederman 		sk->sk_net_refcnt = kern ? 0 : 1;
2158648845abSTonghao Zhang 		if (likely(sk->sk_net_refcnt)) {
2159ffa84b5fSEric Dumazet 			get_net_track(net, &sk->ns_tracker, priority);
2160648845abSTonghao Zhang 			sock_inuse_add(net, 1);
21610cafd77dSEric Dumazet 		} else {
21620cafd77dSEric Dumazet 			__netns_tracker_alloc(net, &sk->ns_tracker,
21630cafd77dSEric Dumazet 					      false, priority);
2164648845abSTonghao Zhang 		}
2165648845abSTonghao Zhang 
216626abe143SEric W. Biederman 		sock_net_set(sk, net);
216714afee4bSReshetova, Elena 		refcount_set(&sk->sk_wmem_alloc, 1);
2168f8451725SHerbert Xu 
21692d758073SJohannes Weiner 		mem_cgroup_sk_alloc(sk);
2170d979a39dSJohannes Weiner 		cgroup_sk_alloc(&sk->sk_cgrp_data);
21712a56a1feSTejun Heo 		sock_update_classid(&sk->sk_cgrp_data);
21722a56a1feSTejun Heo 		sock_update_netprioidx(&sk->sk_cgrp_data);
217341b14fb8STariq Toukan 		sk_tx_queue_clear(sk);
21741da177e4SLinus Torvalds 	}
2175a79af59eSFrank Filz 
21762e4afe7bSPavel Emelyanov 	return sk;
21771da177e4SLinus Torvalds }
21782a91525cSEric Dumazet EXPORT_SYMBOL(sk_alloc);
21791da177e4SLinus Torvalds 
2180a4298e45SEric Dumazet /* Sockets having SOCK_RCU_FREE will call this function after one RCU
2181a4298e45SEric Dumazet  * grace period. This is the case for UDP sockets and TCP listeners.
2182a4298e45SEric Dumazet  */
__sk_destruct(struct rcu_head * head)2183a4298e45SEric Dumazet static void __sk_destruct(struct rcu_head *head)
21841da177e4SLinus Torvalds {
2185a4298e45SEric Dumazet 	struct sock *sk = container_of(head, struct sock, sk_rcu);
21861da177e4SLinus Torvalds 	struct sk_filter *filter;
21871da177e4SLinus Torvalds 
21881da177e4SLinus Torvalds 	if (sk->sk_destruct)
21891da177e4SLinus Torvalds 		sk->sk_destruct(sk);
21901da177e4SLinus Torvalds 
2191a898def2SPaul E. McKenney 	filter = rcu_dereference_check(sk->sk_filter,
219214afee4bSReshetova, Elena 				       refcount_read(&sk->sk_wmem_alloc) == 0);
21931da177e4SLinus Torvalds 	if (filter) {
2194309dd5fcSPavel Emelyanov 		sk_filter_uncharge(sk, filter);
2195a9b3cd7fSStephen Hemminger 		RCU_INIT_POINTER(sk->sk_filter, NULL);
21961da177e4SLinus Torvalds 	}
21971da177e4SLinus Torvalds 
219808e29af3SEric Dumazet 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
21991da177e4SLinus Torvalds 
22006ac99e8fSMartin KaFai Lau #ifdef CONFIG_BPF_SYSCALL
22016ac99e8fSMartin KaFai Lau 	bpf_sk_storage_free(sk);
22026ac99e8fSMartin KaFai Lau #endif
22036ac99e8fSMartin KaFai Lau 
22041da177e4SLinus Torvalds 	if (atomic_read(&sk->sk_omem_alloc))
2205e005d193SJoe Perches 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
22060dc47877SHarvey Harrison 			 __func__, atomic_read(&sk->sk_omem_alloc));
22071da177e4SLinus Torvalds 
220822a0e18eSEric Dumazet 	if (sk->sk_frag.page) {
220922a0e18eSEric Dumazet 		put_page(sk->sk_frag.page);
221022a0e18eSEric Dumazet 		sk->sk_frag.page = NULL;
221122a0e18eSEric Dumazet 	}
221222a0e18eSEric Dumazet 
221335306eb2SEric Dumazet 	/* We do not need to acquire sk->sk_peer_lock, we are the last user. */
2214109f6e39SEric W. Biederman 	put_cred(sk->sk_peer_cred);
2215109f6e39SEric W. Biederman 	put_pid(sk->sk_peer_pid);
221635306eb2SEric Dumazet 
221726abe143SEric W. Biederman 	if (likely(sk->sk_net_refcnt))
2218ffa84b5fSEric Dumazet 		put_net_track(sock_net(sk), &sk->ns_tracker);
22190cafd77dSEric Dumazet 	else
22200cafd77dSEric Dumazet 		__netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
22210cafd77dSEric Dumazet 
2222c308c1b2SPavel Emelyanov 	sk_prot_free(sk->sk_prot_creator, sk);
22231da177e4SLinus Torvalds }
22242b85a34eSEric Dumazet 
sk_destruct(struct sock * sk)2225a4298e45SEric Dumazet void sk_destruct(struct sock *sk)
2226a4298e45SEric Dumazet {
22278c7138b3SMartin KaFai Lau 	bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
22288c7138b3SMartin KaFai Lau 
22298c7138b3SMartin KaFai Lau 	if (rcu_access_pointer(sk->sk_reuseport_cb)) {
22308c7138b3SMartin KaFai Lau 		reuseport_detach_sock(sk);
22318c7138b3SMartin KaFai Lau 		use_call_rcu = true;
22328c7138b3SMartin KaFai Lau 	}
22338c7138b3SMartin KaFai Lau 
22348c7138b3SMartin KaFai Lau 	if (use_call_rcu)
2235a4298e45SEric Dumazet 		call_rcu(&sk->sk_rcu, __sk_destruct);
2236a4298e45SEric Dumazet 	else
2237a4298e45SEric Dumazet 		__sk_destruct(&sk->sk_rcu);
2238a4298e45SEric Dumazet }
2239a4298e45SEric Dumazet 
__sk_free(struct sock * sk)2240eb4cb008SCraig Gallek static void __sk_free(struct sock *sk)
2241eb4cb008SCraig Gallek {
2242648845abSTonghao Zhang 	if (likely(sk->sk_net_refcnt))
2243648845abSTonghao Zhang 		sock_inuse_add(sock_net(sk), -1);
2244648845abSTonghao Zhang 
22459709020cSEric Dumazet 	if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
2246eb4cb008SCraig Gallek 		sock_diag_broadcast_destroy(sk);
2247eb4cb008SCraig Gallek 	else
2248eb4cb008SCraig Gallek 		sk_destruct(sk);
2249eb4cb008SCraig Gallek }
2250eb4cb008SCraig Gallek 
sk_free(struct sock * sk)22512b85a34eSEric Dumazet void sk_free(struct sock *sk)
22522b85a34eSEric Dumazet {
22532b85a34eSEric Dumazet 	/*
225425985edcSLucas De Marchi 	 * We subtract one from sk_wmem_alloc and can know if
22552b85a34eSEric Dumazet 	 * some packets are still in some tx queue.
22562b85a34eSEric Dumazet 	 * If not null, sock_wfree() will call __sk_free(sk) later
22572b85a34eSEric Dumazet 	 */
225814afee4bSReshetova, Elena 	if (refcount_dec_and_test(&sk->sk_wmem_alloc))
22592b85a34eSEric Dumazet 		__sk_free(sk);
22602b85a34eSEric Dumazet }
22612a91525cSEric Dumazet EXPORT_SYMBOL(sk_free);
22621da177e4SLinus Torvalds 
sk_init_common(struct sock * sk)2263581319c5SPaolo Abeni static void sk_init_common(struct sock *sk)
2264581319c5SPaolo Abeni {
2265581319c5SPaolo Abeni 	skb_queue_head_init(&sk->sk_receive_queue);
2266581319c5SPaolo Abeni 	skb_queue_head_init(&sk->sk_write_queue);
2267581319c5SPaolo Abeni 	skb_queue_head_init(&sk->sk_error_queue);
2268581319c5SPaolo Abeni 
2269581319c5SPaolo Abeni 	rwlock_init(&sk->sk_callback_lock);
2270581319c5SPaolo Abeni 	lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
2271581319c5SPaolo Abeni 			af_rlock_keys + sk->sk_family,
2272581319c5SPaolo Abeni 			af_family_rlock_key_strings[sk->sk_family]);
2273581319c5SPaolo Abeni 	lockdep_set_class_and_name(&sk->sk_write_queue.lock,
2274581319c5SPaolo Abeni 			af_wlock_keys + sk->sk_family,
2275581319c5SPaolo Abeni 			af_family_wlock_key_strings[sk->sk_family]);
2276581319c5SPaolo Abeni 	lockdep_set_class_and_name(&sk->sk_error_queue.lock,
2277581319c5SPaolo Abeni 			af_elock_keys + sk->sk_family,
2278581319c5SPaolo Abeni 			af_family_elock_key_strings[sk->sk_family]);
2279581319c5SPaolo Abeni 	lockdep_set_class_and_name(&sk->sk_callback_lock,
2280581319c5SPaolo Abeni 			af_callback_keys + sk->sk_family,
2281581319c5SPaolo Abeni 			af_family_clock_key_strings[sk->sk_family]);
2282581319c5SPaolo Abeni }
2283581319c5SPaolo Abeni 
2284e56c57d0SEric Dumazet /**
2285e56c57d0SEric Dumazet  *	sk_clone_lock - clone a socket, and lock its clone
2286e56c57d0SEric Dumazet  *	@sk: the socket to clone
2287e56c57d0SEric Dumazet  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2288e56c57d0SEric Dumazet  *
2289e56c57d0SEric Dumazet  *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
2290e56c57d0SEric Dumazet  */
sk_clone_lock(const struct sock * sk,const gfp_t priority)2291e56c57d0SEric Dumazet struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
229287d11cebSArnaldo Carvalho de Melo {
2293b8e202d1SJakub Sitnicki 	struct proto *prot = READ_ONCE(sk->sk_prot);
2294bbc20b70SEric Dumazet 	struct sk_filter *filter;
2295278571baSAlexei Starovoitov 	bool is_charged = true;
2296bbc20b70SEric Dumazet 	struct sock *newsk;
229787d11cebSArnaldo Carvalho de Melo 
2298b8e202d1SJakub Sitnicki 	newsk = sk_prot_alloc(prot, priority, sk->sk_family);
2299bbc20b70SEric Dumazet 	if (!newsk)
2300bbc20b70SEric Dumazet 		goto out;
230187d11cebSArnaldo Carvalho de Melo 
2302892c141eSVenkat Yekkirala 	sock_copy(newsk, sk);
230387d11cebSArnaldo Carvalho de Melo 
2304b8e202d1SJakub Sitnicki 	newsk->sk_prot_creator = prot;
23059d538fa6SChristoph Paasch 
230687d11cebSArnaldo Carvalho de Melo 	/* SANITY */
2307938cca9eSTetsuo Handa 	if (likely(newsk->sk_net_refcnt)) {
2308ffa84b5fSEric Dumazet 		get_net_track(sock_net(newsk), &newsk->ns_tracker, priority);
2309938cca9eSTetsuo Handa 		sock_inuse_add(sock_net(newsk), 1);
23100cafd77dSEric Dumazet 	} else {
23110cafd77dSEric Dumazet 		/* Kernel sockets are not elevating the struct net refcount.
23120cafd77dSEric Dumazet 		 * Instead, use a tracker to more easily detect if a layer
23130cafd77dSEric Dumazet 		 * is not properly dismantling its kernel sockets at netns
23140cafd77dSEric Dumazet 		 * destroy time.
23150cafd77dSEric Dumazet 		 */
23160cafd77dSEric Dumazet 		__netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker,
23170cafd77dSEric Dumazet 				      false, priority);
2318938cca9eSTetsuo Handa 	}
231987d11cebSArnaldo Carvalho de Melo 	sk_node_init(&newsk->sk_node);
232087d11cebSArnaldo Carvalho de Melo 	sock_lock_init(newsk);
232187d11cebSArnaldo Carvalho de Melo 	bh_lock_sock(newsk);
2322fa438ccfSEric Dumazet 	newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
23238eae939fSZhu Yi 	newsk->sk_backlog.len = 0;
232487d11cebSArnaldo Carvalho de Melo 
232587d11cebSArnaldo Carvalho de Melo 	atomic_set(&newsk->sk_rmem_alloc, 0);
2326bbc20b70SEric Dumazet 
2327bbc20b70SEric Dumazet 	/* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
232814afee4bSReshetova, Elena 	refcount_set(&newsk->sk_wmem_alloc, 1);
2329bbc20b70SEric Dumazet 
233087d11cebSArnaldo Carvalho de Melo 	atomic_set(&newsk->sk_omem_alloc, 0);
2331581319c5SPaolo Abeni 	sk_init_common(newsk);
233287d11cebSArnaldo Carvalho de Melo 
233387d11cebSArnaldo Carvalho de Melo 	newsk->sk_dst_cache	= NULL;
23349b8805a3SJulian Anastasov 	newsk->sk_dst_pending_confirm = 0;
233587d11cebSArnaldo Carvalho de Melo 	newsk->sk_wmem_queued	= 0;
233687d11cebSArnaldo Carvalho de Melo 	newsk->sk_forward_alloc = 0;
23372bb2f5fbSWei Wang 	newsk->sk_reserved_mem  = 0;
23389caad864SEric Dumazet 	atomic_set(&newsk->sk_drops, 0);
233987d11cebSArnaldo Carvalho de Melo 	newsk->sk_send_head	= NULL;
234087d11cebSArnaldo Carvalho de Melo 	newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
234152267790SWillem de Bruijn 	atomic_set(&newsk->sk_zckey, 0);
234287d11cebSArnaldo Carvalho de Melo 
234387d11cebSArnaldo Carvalho de Melo 	sock_reset_flag(newsk, SOCK_DONE);
2344d752a498SShakeel Butt 
2345d752a498SShakeel Butt 	/* sk->sk_memcg will be populated at accept() time */
2346d752a498SShakeel Butt 	newsk->sk_memcg = NULL;
2347d752a498SShakeel Butt 
2348ad0f75e5SCong Wang 	cgroup_sk_clone(&newsk->sk_cgrp_data);
234987d11cebSArnaldo Carvalho de Melo 
2350eefca20eSEric Dumazet 	rcu_read_lock();
2351eefca20eSEric Dumazet 	filter = rcu_dereference(sk->sk_filter);
235287d11cebSArnaldo Carvalho de Melo 	if (filter != NULL)
2353278571baSAlexei Starovoitov 		/* though it's an empty new sock, the charging may fail
2354278571baSAlexei Starovoitov 		 * if sysctl_optmem_max was changed between creation of
2355278571baSAlexei Starovoitov 		 * original socket and cloning
2356278571baSAlexei Starovoitov 		 */
2357278571baSAlexei Starovoitov 		is_charged = sk_filter_charge(newsk, filter);
2358eefca20eSEric Dumazet 	RCU_INIT_POINTER(newsk->sk_filter, filter);
2359eefca20eSEric Dumazet 	rcu_read_unlock();
236087d11cebSArnaldo Carvalho de Melo 
2361d188ba86SEric Dumazet 	if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
2362a97e50ccSDaniel Borkmann 		/* We need to make sure that we don't uncharge the new
2363a97e50ccSDaniel Borkmann 		 * socket if we couldn't charge it in the first place
2364a97e50ccSDaniel Borkmann 		 * as otherwise we uncharge the parent's filter.
2365a97e50ccSDaniel Borkmann 		 */
2366a97e50ccSDaniel Borkmann 		if (!is_charged)
2367a97e50ccSDaniel Borkmann 			RCU_INIT_POINTER(newsk->sk_filter, NULL);
236894352d45SArnaldo Carvalho de Melo 		sk_free_unlock_clone(newsk);
236987d11cebSArnaldo Carvalho de Melo 		newsk = NULL;
237087d11cebSArnaldo Carvalho de Melo 		goto out;
237187d11cebSArnaldo Carvalho de Melo 	}
2372fa463497SCraig Gallek 	RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
23738f51dfc7SStanislav Fomichev 
23748f51dfc7SStanislav Fomichev 	if (bpf_sk_storage_clone(sk, newsk)) {
23758f51dfc7SStanislav Fomichev 		sk_free_unlock_clone(newsk);
23768f51dfc7SStanislav Fomichev 		newsk = NULL;
23778f51dfc7SStanislav Fomichev 		goto out;
23788f51dfc7SStanislav Fomichev 	}
237987d11cebSArnaldo Carvalho de Melo 
2380f1ff5ce2SJakub Sitnicki 	/* Clear sk_user_data if parent had the pointer tagged
2381f1ff5ce2SJakub Sitnicki 	 * as not suitable for copying when cloning.
2382f1ff5ce2SJakub Sitnicki 	 */
2383f1ff5ce2SJakub Sitnicki 	if (sk_user_data_is_nocopy(newsk))
23847a1ca972SJakub Sitnicki 		newsk->sk_user_data = NULL;
2385f1ff5ce2SJakub Sitnicki 
238687d11cebSArnaldo Carvalho de Melo 	newsk->sk_err	   = 0;
2387e551c32dSEric Dumazet 	newsk->sk_err_soft = 0;
238887d11cebSArnaldo Carvalho de Melo 	newsk->sk_priority = 0;
23892c8c56e1SEric Dumazet 	newsk->sk_incoming_cpu = raw_smp_processor_id();
2390d979a39dSJohannes Weiner 
2391bbc20b70SEric Dumazet 	/* Before updating sk_refcnt, we must commit prior changes to memory
23922cdb54c9SMauro Carvalho Chehab 	 * (Documentation/RCU/rculist_nulls.rst for details)
23934dc6dc71SEric Dumazet 	 */
23944dc6dc71SEric Dumazet 	smp_wmb();
239541c6d650SReshetova, Elena 	refcount_set(&newsk->sk_refcnt, 2);
239687d11cebSArnaldo Carvalho de Melo 
2397972692e0SDavid S. Miller 	sk_set_socket(newsk, NULL);
239841b14fb8STariq Toukan 	sk_tx_queue_clear(newsk);
2399c2f26e8fSLi RongQing 	RCU_INIT_POINTER(newsk->sk_wq, NULL);
240087d11cebSArnaldo Carvalho de Melo 
240187d11cebSArnaldo Carvalho de Melo 	if (newsk->sk_prot->sockets_allocated)
2402180d8cd9SGlauber Costa 		sk_sockets_allocated_inc(newsk);
2403704da560SOctavian Purdila 
2404bbc20b70SEric Dumazet 	if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2405704da560SOctavian Purdila 		net_enable_timestamp();
240687d11cebSArnaldo Carvalho de Melo out:
240787d11cebSArnaldo Carvalho de Melo 	return newsk;
240887d11cebSArnaldo Carvalho de Melo }
2409e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(sk_clone_lock);
241087d11cebSArnaldo Carvalho de Melo 
sk_free_unlock_clone(struct sock * sk)241194352d45SArnaldo Carvalho de Melo void sk_free_unlock_clone(struct sock *sk)
241294352d45SArnaldo Carvalho de Melo {
241394352d45SArnaldo Carvalho de Melo 	/* It is still raw copy of parent, so invalidate
241494352d45SArnaldo Carvalho de Melo 	 * destructor and make plain sk_free() */
241594352d45SArnaldo Carvalho de Melo 	sk->sk_destruct = NULL;
241694352d45SArnaldo Carvalho de Melo 	bh_unlock_sock(sk);
241794352d45SArnaldo Carvalho de Melo 	sk_free(sk);
241894352d45SArnaldo Carvalho de Melo }
241994352d45SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
242094352d45SArnaldo Carvalho de Melo 
sk_dst_gso_max_size(struct sock * sk,struct dst_entry * dst)2421b1a78b9bSXin Long static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
24227c4e983cSAlexander Duyck {
2423b1a78b9bSXin Long 	bool is_ipv6 = false;
2424b1a78b9bSXin Long 	u32 max_size;
2425b1a78b9bSXin Long 
24267c4e983cSAlexander Duyck #if IS_ENABLED(CONFIG_IPV6)
2427b1a78b9bSXin Long 	is_ipv6 = (sk->sk_family == AF_INET6 &&
2428b1a78b9bSXin Long 		   !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
24297c4e983cSAlexander Duyck #endif
2430b1a78b9bSXin Long 	/* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
2431b1a78b9bSXin Long 	max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
2432b1a78b9bSXin Long 			READ_ONCE(dst->dev->gso_ipv4_max_size);
2433b1a78b9bSXin Long 	if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
2434b1a78b9bSXin Long 		max_size = GSO_LEGACY_MAX_SIZE;
2435b1a78b9bSXin Long 
2436b1a78b9bSXin Long 	return max_size - (MAX_TCP_HEADER + 1);
24377c4e983cSAlexander Duyck }
24387c4e983cSAlexander Duyck 
sk_setup_caps(struct sock * sk,struct dst_entry * dst)24399958089aSAndi Kleen void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
24409958089aSAndi Kleen {
2441d6a4e26aSEric Dumazet 	u32 max_segs = 1;
2442d6a4e26aSEric Dumazet 
2443d0d598caSEric Dumazet 	sk->sk_route_caps = dst->dev->features;
2444d0d598caSEric Dumazet 	if (sk_is_tcp(sk))
2445d0d598caSEric Dumazet 		sk->sk_route_caps |= NETIF_F_GSO;
24469958089aSAndi Kleen 	if (sk->sk_route_caps & NETIF_F_GSO)
24474fcd6b99SHerbert Xu 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
2448aba54656SEric Dumazet 	if (unlikely(sk->sk_gso_disabled))
2449aba54656SEric Dumazet 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
24509958089aSAndi Kleen 	if (sk_can_gso(sk)) {
2451f70f250aSSteffen Klassert 		if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
24529958089aSAndi Kleen 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
245382cc1a7aSPeter P Waskiewicz Jr 		} else {
24549958089aSAndi Kleen 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
2455b1a78b9bSXin Long 			sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
24566d872df3SEric Dumazet 			/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
24576d872df3SEric Dumazet 			max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
245882cc1a7aSPeter P Waskiewicz Jr 		}
24599958089aSAndi Kleen 	}
2460d6a4e26aSEric Dumazet 	sk->sk_gso_max_segs = max_segs;
2461448a5ce1SVladislav Efanov 	sk_dst_set(sk, dst);
24629958089aSAndi Kleen }
24639958089aSAndi Kleen EXPORT_SYMBOL_GPL(sk_setup_caps);
24649958089aSAndi Kleen 
24651da177e4SLinus Torvalds /*
24661da177e4SLinus Torvalds  *	Simple resource managers for sockets.
24671da177e4SLinus Torvalds  */
24681da177e4SLinus Torvalds 
24691da177e4SLinus Torvalds 
24701da177e4SLinus Torvalds /*
24711da177e4SLinus Torvalds  * Write buffer destructor automatically called from kfree_skb.
24721da177e4SLinus Torvalds  */
sock_wfree(struct sk_buff * skb)24731da177e4SLinus Torvalds void sock_wfree(struct sk_buff *skb)
24741da177e4SLinus Torvalds {
24751da177e4SLinus Torvalds 	struct sock *sk = skb->sk;
2476d99927f4SEric Dumazet 	unsigned int len = skb->truesize;
2477052ada09SPavel Begunkov 	bool free;
24781da177e4SLinus Torvalds 
2479d99927f4SEric Dumazet 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
2480052ada09SPavel Begunkov 		if (sock_flag(sk, SOCK_RCU_FREE) &&
2481052ada09SPavel Begunkov 		    sk->sk_write_space == sock_def_write_space) {
2482052ada09SPavel Begunkov 			rcu_read_lock();
2483052ada09SPavel Begunkov 			free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
24840a8afd9fSPavel Begunkov 			sock_def_write_space_wfree(sk);
2485052ada09SPavel Begunkov 			rcu_read_unlock();
2486052ada09SPavel Begunkov 			if (unlikely(free))
2487052ada09SPavel Begunkov 				__sk_free(sk);
2488052ada09SPavel Begunkov 			return;
2489052ada09SPavel Begunkov 		}
2490052ada09SPavel Begunkov 
24912b85a34eSEric Dumazet 		/*
2492d99927f4SEric Dumazet 		 * Keep a reference on sk_wmem_alloc, this will be released
2493d99927f4SEric Dumazet 		 * after sk_write_space() call
24942b85a34eSEric Dumazet 		 */
249514afee4bSReshetova, Elena 		WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
2496d99927f4SEric Dumazet 		sk->sk_write_space(sk);
2497d99927f4SEric Dumazet 		len = 1;
2498d99927f4SEric Dumazet 	}
2499d99927f4SEric Dumazet 	/*
2500d99927f4SEric Dumazet 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
2501d99927f4SEric Dumazet 	 * could not do because of in-flight packets
2502d99927f4SEric Dumazet 	 */
250314afee4bSReshetova, Elena 	if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
25042b85a34eSEric Dumazet 		__sk_free(sk);
25051da177e4SLinus Torvalds }
25062a91525cSEric Dumazet EXPORT_SYMBOL(sock_wfree);
25071da177e4SLinus Torvalds 
25081d2077acSEric Dumazet /* This variant of sock_wfree() is used by TCP,
25091d2077acSEric Dumazet  * since it sets SOCK_USE_WRITE_QUEUE.
25101d2077acSEric Dumazet  */
__sock_wfree(struct sk_buff * skb)25111d2077acSEric Dumazet void __sock_wfree(struct sk_buff *skb)
25121d2077acSEric Dumazet {
25131d2077acSEric Dumazet 	struct sock *sk = skb->sk;
25141d2077acSEric Dumazet 
251514afee4bSReshetova, Elena 	if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
25161d2077acSEric Dumazet 		__sk_free(sk);
25171d2077acSEric Dumazet }
25181d2077acSEric Dumazet 
skb_set_owner_w(struct sk_buff * skb,struct sock * sk)25199e17f8a4SEric Dumazet void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
25209e17f8a4SEric Dumazet {
25219e17f8a4SEric Dumazet 	skb_orphan(skb);
25229e17f8a4SEric Dumazet 	skb->sk = sk;
25239e17f8a4SEric Dumazet #ifdef CONFIG_INET
25249e17f8a4SEric Dumazet 	if (unlikely(!sk_fullsock(sk))) {
25259e17f8a4SEric Dumazet 		skb->destructor = sock_edemux;
25269e17f8a4SEric Dumazet 		sock_hold(sk);
25279e17f8a4SEric Dumazet 		return;
25289e17f8a4SEric Dumazet 	}
25299e17f8a4SEric Dumazet #endif
25309e17f8a4SEric Dumazet 	skb->destructor = sock_wfree;
25319e17f8a4SEric Dumazet 	skb_set_hash_from_sk(skb, sk);
25329e17f8a4SEric Dumazet 	/*
25339e17f8a4SEric Dumazet 	 * We used to take a refcount on sk, but following operation
25349e17f8a4SEric Dumazet 	 * is enough to guarantee sk_free() wont free this sock until
25359e17f8a4SEric Dumazet 	 * all in-flight packets are completed
25369e17f8a4SEric Dumazet 	 */
253714afee4bSReshetova, Elena 	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
25389e17f8a4SEric Dumazet }
25399e17f8a4SEric Dumazet EXPORT_SYMBOL(skb_set_owner_w);
25409e17f8a4SEric Dumazet 
can_skb_orphan_partial(const struct sk_buff * skb)254141477662SJakub Kicinski static bool can_skb_orphan_partial(const struct sk_buff *skb)
254241477662SJakub Kicinski {
254341477662SJakub Kicinski #ifdef CONFIG_TLS_DEVICE
254441477662SJakub Kicinski 	/* Drivers depend on in-order delivery for crypto offload,
254541477662SJakub Kicinski 	 * partial orphan breaks out-of-order-OK logic.
254641477662SJakub Kicinski 	 */
254741477662SJakub Kicinski 	if (skb->decrypted)
254841477662SJakub Kicinski 		return false;
254941477662SJakub Kicinski #endif
255041477662SJakub Kicinski 	return (skb->destructor == sock_wfree ||
255141477662SJakub Kicinski 		(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
255241477662SJakub Kicinski }
255341477662SJakub Kicinski 
25541d2077acSEric Dumazet /* This helper is used by netem, as it can hold packets in its
25551d2077acSEric Dumazet  * delay queue. We want to allow the owner socket to send more
25561d2077acSEric Dumazet  * packets, as if they were already TX completed by a typical driver.
25571d2077acSEric Dumazet  * But we also want to keep skb->sk set because some packet schedulers
2558f6ba8d33SEric Dumazet  * rely on it (sch_fq for example).
25591d2077acSEric Dumazet  */
skb_orphan_partial(struct sk_buff * skb)2560f2f872f9SEric Dumazet void skb_orphan_partial(struct sk_buff *skb)
2561f2f872f9SEric Dumazet {
2562f6ba8d33SEric Dumazet 	if (skb_is_tcp_pure_ack(skb))
25631d2077acSEric Dumazet 		return;
25641d2077acSEric Dumazet 
2565098116e7SPaolo Abeni 	if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2566098116e7SPaolo Abeni 		return;
2567098116e7SPaolo Abeni 
2568f2f872f9SEric Dumazet 	skb_orphan(skb);
2569f2f872f9SEric Dumazet }
2570f2f872f9SEric Dumazet EXPORT_SYMBOL(skb_orphan_partial);
2571f2f872f9SEric Dumazet 
25721da177e4SLinus Torvalds /*
25731da177e4SLinus Torvalds  * Read buffer destructor automatically called from kfree_skb.
25741da177e4SLinus Torvalds  */
sock_rfree(struct sk_buff * skb)25751da177e4SLinus Torvalds void sock_rfree(struct sk_buff *skb)
25761da177e4SLinus Torvalds {
25771da177e4SLinus Torvalds 	struct sock *sk = skb->sk;
2578d361fd59SEric Dumazet 	unsigned int len = skb->truesize;
25791da177e4SLinus Torvalds 
2580d361fd59SEric Dumazet 	atomic_sub(len, &sk->sk_rmem_alloc);
2581d361fd59SEric Dumazet 	sk_mem_uncharge(sk, len);
25821da177e4SLinus Torvalds }
25832a91525cSEric Dumazet EXPORT_SYMBOL(sock_rfree);
25841da177e4SLinus Torvalds 
25857768eed8SOliver Hartkopp /*
25867768eed8SOliver Hartkopp  * Buffer destructor for skbs that are not used directly in read or write
25877768eed8SOliver Hartkopp  * path, e.g. for error handler skbs. Automatically called from kfree_skb.
25887768eed8SOliver Hartkopp  */
sock_efree(struct sk_buff * skb)258962bccb8cSAlexander Duyck void sock_efree(struct sk_buff *skb)
259062bccb8cSAlexander Duyck {
259162bccb8cSAlexander Duyck 	sock_put(skb->sk);
259262bccb8cSAlexander Duyck }
259362bccb8cSAlexander Duyck EXPORT_SYMBOL(sock_efree);
259462bccb8cSAlexander Duyck 
2595cf7fbe66SJoe Stringer /* Buffer destructor for prefetch/receive path where reference count may
2596cf7fbe66SJoe Stringer  * not be held, e.g. for listen sockets.
2597cf7fbe66SJoe Stringer  */
2598cf7fbe66SJoe Stringer #ifdef CONFIG_INET
sock_pfree(struct sk_buff * skb)2599cf7fbe66SJoe Stringer void sock_pfree(struct sk_buff *skb)
2600cf7fbe66SJoe Stringer {
26017ae215d2SJoe Stringer 	if (sk_is_refcounted(skb->sk))
2602cf7fbe66SJoe Stringer 		sock_gen_put(skb->sk);
2603cf7fbe66SJoe Stringer }
2604cf7fbe66SJoe Stringer EXPORT_SYMBOL(sock_pfree);
2605cf7fbe66SJoe Stringer #endif /* CONFIG_INET */
2606cf7fbe66SJoe Stringer 
sock_i_uid(struct sock * sk)2607976d0201SEric W. Biederman kuid_t sock_i_uid(struct sock *sk)
26081da177e4SLinus Torvalds {
2609976d0201SEric W. Biederman 	kuid_t uid;
26101da177e4SLinus Torvalds 
2611f064af1eSEric Dumazet 	read_lock_bh(&sk->sk_callback_lock);
2612976d0201SEric W. Biederman 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2613f064af1eSEric Dumazet 	read_unlock_bh(&sk->sk_callback_lock);
26141da177e4SLinus Torvalds 	return uid;
26151da177e4SLinus Torvalds }
26162a91525cSEric Dumazet EXPORT_SYMBOL(sock_i_uid);
26171da177e4SLinus Torvalds 
__sock_i_ino(struct sock * sk)261825a9c8a4SKuniyuki Iwashima unsigned long __sock_i_ino(struct sock *sk)
261925a9c8a4SKuniyuki Iwashima {
262025a9c8a4SKuniyuki Iwashima 	unsigned long ino;
262125a9c8a4SKuniyuki Iwashima 
262225a9c8a4SKuniyuki Iwashima 	read_lock(&sk->sk_callback_lock);
262325a9c8a4SKuniyuki Iwashima 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
262425a9c8a4SKuniyuki Iwashima 	read_unlock(&sk->sk_callback_lock);
262525a9c8a4SKuniyuki Iwashima 	return ino;
262625a9c8a4SKuniyuki Iwashima }
262725a9c8a4SKuniyuki Iwashima EXPORT_SYMBOL(__sock_i_ino);
262825a9c8a4SKuniyuki Iwashima 
sock_i_ino(struct sock * sk)26291da177e4SLinus Torvalds unsigned long sock_i_ino(struct sock *sk)
26301da177e4SLinus Torvalds {
26311da177e4SLinus Torvalds 	unsigned long ino;
26321da177e4SLinus Torvalds 
263325a9c8a4SKuniyuki Iwashima 	local_bh_disable();
263425a9c8a4SKuniyuki Iwashima 	ino = __sock_i_ino(sk);
263525a9c8a4SKuniyuki Iwashima 	local_bh_enable();
26361da177e4SLinus Torvalds 	return ino;
26371da177e4SLinus Torvalds }
26382a91525cSEric Dumazet EXPORT_SYMBOL(sock_i_ino);
26391da177e4SLinus Torvalds 
26401da177e4SLinus Torvalds /*
26411da177e4SLinus Torvalds  * Allocate a skb from the socket's send buffer.
26421da177e4SLinus Torvalds  */
sock_wmalloc(struct sock * sk,unsigned long size,int force,gfp_t priority)264386a76cafSVictor Fusco struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
2644dd0fc66fSAl Viro 			     gfp_t priority)
26451da177e4SLinus Torvalds {
2646e292f05eSEric Dumazet 	if (force ||
2647e292f05eSEric Dumazet 	    refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
26481da177e4SLinus Torvalds 		struct sk_buff *skb = alloc_skb(size, priority);
2649e292f05eSEric Dumazet 
26501da177e4SLinus Torvalds 		if (skb) {
26511da177e4SLinus Torvalds 			skb_set_owner_w(skb, sk);
26521da177e4SLinus Torvalds 			return skb;
26531da177e4SLinus Torvalds 		}
26541da177e4SLinus Torvalds 	}
26551da177e4SLinus Torvalds 	return NULL;
26561da177e4SLinus Torvalds }
26572a91525cSEric Dumazet EXPORT_SYMBOL(sock_wmalloc);
26581da177e4SLinus Torvalds 
sock_ofree(struct sk_buff * skb)265998ba0bd5SWillem de Bruijn static void sock_ofree(struct sk_buff *skb)
266098ba0bd5SWillem de Bruijn {
266198ba0bd5SWillem de Bruijn 	struct sock *sk = skb->sk;
266298ba0bd5SWillem de Bruijn 
266398ba0bd5SWillem de Bruijn 	atomic_sub(skb->truesize, &sk->sk_omem_alloc);
266498ba0bd5SWillem de Bruijn }
266598ba0bd5SWillem de Bruijn 
sock_omalloc(struct sock * sk,unsigned long size,gfp_t priority)266698ba0bd5SWillem de Bruijn struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
266798ba0bd5SWillem de Bruijn 			     gfp_t priority)
266898ba0bd5SWillem de Bruijn {
266998ba0bd5SWillem de Bruijn 	struct sk_buff *skb;
267098ba0bd5SWillem de Bruijn 
267198ba0bd5SWillem de Bruijn 	/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
267298ba0bd5SWillem de Bruijn 	if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
26737de6d09fSKuniyuki Iwashima 	    READ_ONCE(sysctl_optmem_max))
267498ba0bd5SWillem de Bruijn 		return NULL;
267598ba0bd5SWillem de Bruijn 
267698ba0bd5SWillem de Bruijn 	skb = alloc_skb(size, priority);
267798ba0bd5SWillem de Bruijn 	if (!skb)
267898ba0bd5SWillem de Bruijn 		return NULL;
267998ba0bd5SWillem de Bruijn 
268098ba0bd5SWillem de Bruijn 	atomic_add(skb->truesize, &sk->sk_omem_alloc);
268198ba0bd5SWillem de Bruijn 	skb->sk = sk;
268298ba0bd5SWillem de Bruijn 	skb->destructor = sock_ofree;
268398ba0bd5SWillem de Bruijn 	return skb;
268498ba0bd5SWillem de Bruijn }
268598ba0bd5SWillem de Bruijn 
26861da177e4SLinus Torvalds /*
26871da177e4SLinus Torvalds  * Allocate a memory block from the socket's option memory buffer.
26881da177e4SLinus Torvalds  */
sock_kmalloc(struct sock * sk,int size,gfp_t priority)2689dd0fc66fSAl Viro void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
26901da177e4SLinus Torvalds {
26917de6d09fSKuniyuki Iwashima 	int optmem_max = READ_ONCE(sysctl_optmem_max);
26927de6d09fSKuniyuki Iwashima 
26937de6d09fSKuniyuki Iwashima 	if ((unsigned int)size <= optmem_max &&
26947de6d09fSKuniyuki Iwashima 	    atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
26951da177e4SLinus Torvalds 		void *mem;
26961da177e4SLinus Torvalds 		/* First do the add, to avoid the race if kmalloc
26971da177e4SLinus Torvalds 		 * might sleep.
26981da177e4SLinus Torvalds 		 */
26991da177e4SLinus Torvalds 		atomic_add(size, &sk->sk_omem_alloc);
27001da177e4SLinus Torvalds 		mem = kmalloc(size, priority);
27011da177e4SLinus Torvalds 		if (mem)
27021da177e4SLinus Torvalds 			return mem;
27031da177e4SLinus Torvalds 		atomic_sub(size, &sk->sk_omem_alloc);
27041da177e4SLinus Torvalds 	}
27051da177e4SLinus Torvalds 	return NULL;
27061da177e4SLinus Torvalds }
27072a91525cSEric Dumazet EXPORT_SYMBOL(sock_kmalloc);
27081da177e4SLinus Torvalds 
270979e88659SDaniel Borkmann /* Free an option memory block. Note, we actually want the inline
271079e88659SDaniel Borkmann  * here as this allows gcc to detect the nullify and fold away the
271179e88659SDaniel Borkmann  * condition entirely.
27121da177e4SLinus Torvalds  */
__sock_kfree_s(struct sock * sk,void * mem,int size,const bool nullify)271379e88659SDaniel Borkmann static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
271479e88659SDaniel Borkmann 				  const bool nullify)
27151da177e4SLinus Torvalds {
2716e53da5fbSDavid S. Miller 	if (WARN_ON_ONCE(!mem))
2717e53da5fbSDavid S. Miller 		return;
271879e88659SDaniel Borkmann 	if (nullify)
2719453431a5SWaiman Long 		kfree_sensitive(mem);
272079e88659SDaniel Borkmann 	else
27211da177e4SLinus Torvalds 		kfree(mem);
27221da177e4SLinus Torvalds 	atomic_sub(size, &sk->sk_omem_alloc);
27231da177e4SLinus Torvalds }
272479e88659SDaniel Borkmann 
sock_kfree_s(struct sock * sk,void * mem,int size)272579e88659SDaniel Borkmann void sock_kfree_s(struct sock *sk, void *mem, int size)
272679e88659SDaniel Borkmann {
272779e88659SDaniel Borkmann 	__sock_kfree_s(sk, mem, size, false);
272879e88659SDaniel Borkmann }
27292a91525cSEric Dumazet EXPORT_SYMBOL(sock_kfree_s);
27301da177e4SLinus Torvalds 
sock_kzfree_s(struct sock * sk,void * mem,int size)273179e88659SDaniel Borkmann void sock_kzfree_s(struct sock *sk, void *mem, int size)
273279e88659SDaniel Borkmann {
273379e88659SDaniel Borkmann 	__sock_kfree_s(sk, mem, size, true);
273479e88659SDaniel Borkmann }
273579e88659SDaniel Borkmann EXPORT_SYMBOL(sock_kzfree_s);
273679e88659SDaniel Borkmann 
27371da177e4SLinus Torvalds /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
27381da177e4SLinus Torvalds    I think, these locks should be removed for datagram sockets.
27391da177e4SLinus Torvalds  */
sock_wait_for_wmem(struct sock * sk,long timeo)27401da177e4SLinus Torvalds static long sock_wait_for_wmem(struct sock *sk, long timeo)
27411da177e4SLinus Torvalds {
27421da177e4SLinus Torvalds 	DEFINE_WAIT(wait);
27431da177e4SLinus Torvalds 
27449cd3e072SEric Dumazet 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
27451da177e4SLinus Torvalds 	for (;;) {
27461da177e4SLinus Torvalds 		if (!timeo)
27471da177e4SLinus Torvalds 			break;
27481da177e4SLinus Torvalds 		if (signal_pending(current))
27491da177e4SLinus Torvalds 			break;
27501da177e4SLinus Torvalds 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2751aa395145SEric Dumazet 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2752e292f05eSEric Dumazet 		if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
27531da177e4SLinus Torvalds 			break;
2754afe8764fSKuniyuki Iwashima 		if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
27551da177e4SLinus Torvalds 			break;
2756b1928129SKuniyuki Iwashima 		if (READ_ONCE(sk->sk_err))
27571da177e4SLinus Torvalds 			break;
27581da177e4SLinus Torvalds 		timeo = schedule_timeout(timeo);
27591da177e4SLinus Torvalds 	}
2760aa395145SEric Dumazet 	finish_wait(sk_sleep(sk), &wait);
27611da177e4SLinus Torvalds 	return timeo;
27621da177e4SLinus Torvalds }
27631da177e4SLinus Torvalds 
27641da177e4SLinus Torvalds 
27651da177e4SLinus Torvalds /*
27661da177e4SLinus Torvalds  *	Generic send/receive buffer handlers
27671da177e4SLinus Torvalds  */
27681da177e4SLinus Torvalds 
sock_alloc_send_pskb(struct sock * sk,unsigned long header_len,unsigned long data_len,int noblock,int * errcode,int max_page_order)27694cc7f68dSHerbert Xu struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
27704cc7f68dSHerbert Xu 				     unsigned long data_len, int noblock,
277128d64271SEric Dumazet 				     int *errcode, int max_page_order)
27721da177e4SLinus Torvalds {
27732e4e4410SEric Dumazet 	struct sk_buff *skb;
27741da177e4SLinus Torvalds 	long timeo;
27751da177e4SLinus Torvalds 	int err;
27761da177e4SLinus Torvalds 
27771da177e4SLinus Torvalds 	timeo = sock_sndtimeo(sk, noblock);
27782e4e4410SEric Dumazet 	for (;;) {
27791da177e4SLinus Torvalds 		err = sock_error(sk);
27801da177e4SLinus Torvalds 		if (err != 0)
27811da177e4SLinus Torvalds 			goto failure;
27821da177e4SLinus Torvalds 
27831da177e4SLinus Torvalds 		err = -EPIPE;
2784afe8764fSKuniyuki Iwashima 		if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
27851da177e4SLinus Torvalds 			goto failure;
27861da177e4SLinus Torvalds 
2787e292f05eSEric Dumazet 		if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
27882e4e4410SEric Dumazet 			break;
27892e4e4410SEric Dumazet 
27909cd3e072SEric Dumazet 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
27911da177e4SLinus Torvalds 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
27921da177e4SLinus Torvalds 		err = -EAGAIN;
27931da177e4SLinus Torvalds 		if (!timeo)
27941da177e4SLinus Torvalds 			goto failure;
27951da177e4SLinus Torvalds 		if (signal_pending(current))
27961da177e4SLinus Torvalds 			goto interrupted;
27971da177e4SLinus Torvalds 		timeo = sock_wait_for_wmem(sk, timeo);
279828d64271SEric Dumazet 	}
27992e4e4410SEric Dumazet 	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
28002e4e4410SEric Dumazet 				   errcode, sk->sk_allocation);
28012e4e4410SEric Dumazet 	if (skb)
28021da177e4SLinus Torvalds 		skb_set_owner_w(skb, sk);
28031da177e4SLinus Torvalds 	return skb;
28041da177e4SLinus Torvalds 
28051da177e4SLinus Torvalds interrupted:
28061da177e4SLinus Torvalds 	err = sock_intr_errno(timeo);
28071da177e4SLinus Torvalds failure:
28081da177e4SLinus Torvalds 	*errcode = err;
28091da177e4SLinus Torvalds 	return NULL;
28101da177e4SLinus Torvalds }
28114cc7f68dSHerbert Xu EXPORT_SYMBOL(sock_alloc_send_pskb);
28121da177e4SLinus Torvalds 
__sock_cmsg_send(struct sock * sk,struct cmsghdr * cmsg,struct sockcm_cookie * sockc)2813233baf9aSxu xin int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
2814f28ea365SEdward Jee 		     struct sockcm_cookie *sockc)
2815f28ea365SEdward Jee {
28163dd17e63SSoheil Hassas Yeganeh 	u32 tsflags;
28173dd17e63SSoheil Hassas Yeganeh 
2818f28ea365SEdward Jee 	switch (cmsg->cmsg_type) {
2819f28ea365SEdward Jee 	case SO_MARK:
282091f0d8a4SJakub Kicinski 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
282191f0d8a4SJakub Kicinski 		    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2822f28ea365SEdward Jee 			return -EPERM;
2823f28ea365SEdward Jee 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2824f28ea365SEdward Jee 			return -EINVAL;
2825f28ea365SEdward Jee 		sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2826f28ea365SEdward Jee 		break;
28277f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMPING_OLD:
2828200bc366SThomas Lange 	case SO_TIMESTAMPING_NEW:
28293dd17e63SSoheil Hassas Yeganeh 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
28303dd17e63SSoheil Hassas Yeganeh 			return -EINVAL;
28313dd17e63SSoheil Hassas Yeganeh 
28323dd17e63SSoheil Hassas Yeganeh 		tsflags = *(u32 *)CMSG_DATA(cmsg);
28333dd17e63SSoheil Hassas Yeganeh 		if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
28343dd17e63SSoheil Hassas Yeganeh 			return -EINVAL;
28353dd17e63SSoheil Hassas Yeganeh 
28363dd17e63SSoheil Hassas Yeganeh 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
28373dd17e63SSoheil Hassas Yeganeh 		sockc->tsflags |= tsflags;
28383dd17e63SSoheil Hassas Yeganeh 		break;
283980b14deeSRichard Cochran 	case SCM_TXTIME:
284080b14deeSRichard Cochran 		if (!sock_flag(sk, SOCK_TXTIME))
284180b14deeSRichard Cochran 			return -EINVAL;
284280b14deeSRichard Cochran 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
284380b14deeSRichard Cochran 			return -EINVAL;
284480b14deeSRichard Cochran 		sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
284580b14deeSRichard Cochran 		break;
2846779f1edeSSoheil Hassas Yeganeh 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2847779f1edeSSoheil Hassas Yeganeh 	case SCM_RIGHTS:
2848779f1edeSSoheil Hassas Yeganeh 	case SCM_CREDENTIALS:
2849779f1edeSSoheil Hassas Yeganeh 		break;
2850f28ea365SEdward Jee 	default:
2851f28ea365SEdward Jee 		return -EINVAL;
2852f28ea365SEdward Jee 	}
285339771b12SWillem de Bruijn 	return 0;
285439771b12SWillem de Bruijn }
285539771b12SWillem de Bruijn EXPORT_SYMBOL(__sock_cmsg_send);
285639771b12SWillem de Bruijn 
sock_cmsg_send(struct sock * sk,struct msghdr * msg,struct sockcm_cookie * sockc)285739771b12SWillem de Bruijn int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
285839771b12SWillem de Bruijn 		   struct sockcm_cookie *sockc)
285939771b12SWillem de Bruijn {
286039771b12SWillem de Bruijn 	struct cmsghdr *cmsg;
286139771b12SWillem de Bruijn 	int ret;
286239771b12SWillem de Bruijn 
286339771b12SWillem de Bruijn 	for_each_cmsghdr(cmsg, msg) {
286439771b12SWillem de Bruijn 		if (!CMSG_OK(msg, cmsg))
286539771b12SWillem de Bruijn 			return -EINVAL;
286639771b12SWillem de Bruijn 		if (cmsg->cmsg_level != SOL_SOCKET)
286739771b12SWillem de Bruijn 			continue;
2868233baf9aSxu xin 		ret = __sock_cmsg_send(sk, cmsg, sockc);
286939771b12SWillem de Bruijn 		if (ret)
287039771b12SWillem de Bruijn 			return ret;
2871f28ea365SEdward Jee 	}
2872f28ea365SEdward Jee 	return 0;
2873f28ea365SEdward Jee }
2874f28ea365SEdward Jee EXPORT_SYMBOL(sock_cmsg_send);
2875f28ea365SEdward Jee 
sk_enter_memory_pressure(struct sock * sk)287606044751SEric Dumazet static void sk_enter_memory_pressure(struct sock *sk)
287706044751SEric Dumazet {
287806044751SEric Dumazet 	if (!sk->sk_prot->enter_memory_pressure)
287906044751SEric Dumazet 		return;
288006044751SEric Dumazet 
288106044751SEric Dumazet 	sk->sk_prot->enter_memory_pressure(sk);
288206044751SEric Dumazet }
288306044751SEric Dumazet 
sk_leave_memory_pressure(struct sock * sk)288406044751SEric Dumazet static void sk_leave_memory_pressure(struct sock *sk)
288506044751SEric Dumazet {
288606044751SEric Dumazet 	if (sk->sk_prot->leave_memory_pressure) {
28875c1ebbfaSBrian Vazquez 		INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure,
28885c1ebbfaSBrian Vazquez 				     tcp_leave_memory_pressure, sk);
288906044751SEric Dumazet 	} else {
289006044751SEric Dumazet 		unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
289106044751SEric Dumazet 
2892503978acSEric Dumazet 		if (memory_pressure && READ_ONCE(*memory_pressure))
2893503978acSEric Dumazet 			WRITE_ONCE(*memory_pressure, 0);
289406044751SEric Dumazet 	}
289506044751SEric Dumazet }
289606044751SEric Dumazet 
2897ce27ec60SEric Dumazet DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
28985640f768SEric Dumazet 
2899400dfd3aSEric Dumazet /**
2900400dfd3aSEric Dumazet  * skb_page_frag_refill - check that a page_frag contains enough room
2901400dfd3aSEric Dumazet  * @sz: minimum size of the fragment we want to get
2902400dfd3aSEric Dumazet  * @pfrag: pointer to page_frag
290382d5e2b8SEric Dumazet  * @gfp: priority for memory allocation
2904400dfd3aSEric Dumazet  *
2905400dfd3aSEric Dumazet  * Note: While this allocator tries to use high order pages, there is
2906400dfd3aSEric Dumazet  * no guarantee that allocations succeed. Therefore, @sz MUST be
2907400dfd3aSEric Dumazet  * less or equal than PAGE_SIZE.
2908400dfd3aSEric Dumazet  */
skb_page_frag_refill(unsigned int sz,struct page_frag * pfrag,gfp_t gfp)2909d9b2938aSEric Dumazet bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
29105640f768SEric Dumazet {
29115640f768SEric Dumazet 	if (pfrag->page) {
2912fe896d18SJoonsoo Kim 		if (page_ref_count(pfrag->page) == 1) {
29135640f768SEric Dumazet 			pfrag->offset = 0;
29145640f768SEric Dumazet 			return true;
29155640f768SEric Dumazet 		}
2916400dfd3aSEric Dumazet 		if (pfrag->offset + sz <= pfrag->size)
29175640f768SEric Dumazet 			return true;
29185640f768SEric Dumazet 		put_page(pfrag->page);
29195640f768SEric Dumazet 	}
29205640f768SEric Dumazet 
29215640f768SEric Dumazet 	pfrag->offset = 0;
2922ce27ec60SEric Dumazet 	if (SKB_FRAG_PAGE_ORDER &&
2923ce27ec60SEric Dumazet 	    !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
2924d0164adcSMel Gorman 		/* Avoid direct reclaim but allow kswapd to wake */
2925d0164adcSMel Gorman 		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2926d0164adcSMel Gorman 					  __GFP_COMP | __GFP_NOWARN |
2927d0164adcSMel Gorman 					  __GFP_NORETRY,
2928d9b2938aSEric Dumazet 					  SKB_FRAG_PAGE_ORDER);
2929d9b2938aSEric Dumazet 		if (likely(pfrag->page)) {
2930d9b2938aSEric Dumazet 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
29315640f768SEric Dumazet 			return true;
29325640f768SEric Dumazet 		}
2933d9b2938aSEric Dumazet 	}
2934d9b2938aSEric Dumazet 	pfrag->page = alloc_page(gfp);
2935d9b2938aSEric Dumazet 	if (likely(pfrag->page)) {
2936d9b2938aSEric Dumazet 		pfrag->size = PAGE_SIZE;
2937d9b2938aSEric Dumazet 		return true;
2938d9b2938aSEric Dumazet 	}
2939400dfd3aSEric Dumazet 	return false;
2940400dfd3aSEric Dumazet }
2941400dfd3aSEric Dumazet EXPORT_SYMBOL(skb_page_frag_refill);
2942400dfd3aSEric Dumazet 
sk_page_frag_refill(struct sock * sk,struct page_frag * pfrag)2943400dfd3aSEric Dumazet bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2944400dfd3aSEric Dumazet {
2945400dfd3aSEric Dumazet 	if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2946400dfd3aSEric Dumazet 		return true;
2947400dfd3aSEric Dumazet 
29485640f768SEric Dumazet 	sk_enter_memory_pressure(sk);
29495640f768SEric Dumazet 	sk_stream_moderate_sndbuf(sk);
29505640f768SEric Dumazet 	return false;
29515640f768SEric Dumazet }
29525640f768SEric Dumazet EXPORT_SYMBOL(sk_page_frag_refill);
29535640f768SEric Dumazet 
__lock_sock(struct sock * sk)2954ad80b0fcSPaolo Abeni void __lock_sock(struct sock *sk)
2955f39234d6SNamhyung Kim 	__releases(&sk->sk_lock.slock)
2956f39234d6SNamhyung Kim 	__acquires(&sk->sk_lock.slock)
29571da177e4SLinus Torvalds {
29581da177e4SLinus Torvalds 	DEFINE_WAIT(wait);
29591da177e4SLinus Torvalds 
29601da177e4SLinus Torvalds 	for (;;) {
29611da177e4SLinus Torvalds 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
29621da177e4SLinus Torvalds 					TASK_UNINTERRUPTIBLE);
29631da177e4SLinus Torvalds 		spin_unlock_bh(&sk->sk_lock.slock);
29641da177e4SLinus Torvalds 		schedule();
29651da177e4SLinus Torvalds 		spin_lock_bh(&sk->sk_lock.slock);
29661da177e4SLinus Torvalds 		if (!sock_owned_by_user(sk))
29671da177e4SLinus Torvalds 			break;
29681da177e4SLinus Torvalds 	}
29691da177e4SLinus Torvalds 	finish_wait(&sk->sk_lock.wq, &wait);
29701da177e4SLinus Torvalds }
29711da177e4SLinus Torvalds 
__release_sock(struct sock * sk)29728873c064SEric Dumazet void __release_sock(struct sock *sk)
2973f39234d6SNamhyung Kim 	__releases(&sk->sk_lock.slock)
2974f39234d6SNamhyung Kim 	__acquires(&sk->sk_lock.slock)
29751da177e4SLinus Torvalds {
29765413d1baSEric Dumazet 	struct sk_buff *skb, *next;
29771da177e4SLinus Torvalds 
29785413d1baSEric Dumazet 	while ((skb = sk->sk_backlog.head) != NULL) {
29791da177e4SLinus Torvalds 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
29805413d1baSEric Dumazet 
29815413d1baSEric Dumazet 		spin_unlock_bh(&sk->sk_lock.slock);
29821da177e4SLinus Torvalds 
29831da177e4SLinus Torvalds 		do {
29845413d1baSEric Dumazet 			next = skb->next;
2985e4cbb02aSEric Dumazet 			prefetch(next);
298663fbdd3cSEric Dumazet 			DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb));
2987a8305bffSDavid S. Miller 			skb_mark_not_on_list(skb);
2988c57943a1SPeter Zijlstra 			sk_backlog_rcv(sk, skb);
29891da177e4SLinus Torvalds 
29905413d1baSEric Dumazet 			cond_resched();
29911da177e4SLinus Torvalds 
29921da177e4SLinus Torvalds 			skb = next;
29931da177e4SLinus Torvalds 		} while (skb != NULL);
29941da177e4SLinus Torvalds 
29955413d1baSEric Dumazet 		spin_lock_bh(&sk->sk_lock.slock);
29965413d1baSEric Dumazet 	}
29978eae939fSZhu Yi 
29988eae939fSZhu Yi 	/*
29998eae939fSZhu Yi 	 * Doing the zeroing here guarantee we can not loop forever
30008eae939fSZhu Yi 	 * while a wild producer attempts to flood us.
30018eae939fSZhu Yi 	 */
30028eae939fSZhu Yi 	sk->sk_backlog.len = 0;
30031da177e4SLinus Torvalds }
30041da177e4SLinus Torvalds 
__sk_flush_backlog(struct sock * sk)3005d41a69f1SEric Dumazet void __sk_flush_backlog(struct sock *sk)
3006d41a69f1SEric Dumazet {
3007d41a69f1SEric Dumazet 	spin_lock_bh(&sk->sk_lock.slock);
3008d41a69f1SEric Dumazet 	__release_sock(sk);
3009d41a69f1SEric Dumazet 	spin_unlock_bh(&sk->sk_lock.slock);
3010d41a69f1SEric Dumazet }
3011c46b0183SJakub Kicinski EXPORT_SYMBOL_GPL(__sk_flush_backlog);
3012d41a69f1SEric Dumazet 
30131da177e4SLinus Torvalds /**
30141da177e4SLinus Torvalds  * sk_wait_data - wait for data to arrive at sk_receive_queue
30154dc3b16bSPavel Pisa  * @sk:    sock to wait on
30164dc3b16bSPavel Pisa  * @timeo: for how long
3017dfbafc99SSabrina Dubroca  * @skb:   last skb seen on sk_receive_queue
30181da177e4SLinus Torvalds  *
30191da177e4SLinus Torvalds  * Now socket state including sk->sk_err is changed only under lock,
30201da177e4SLinus Torvalds  * hence we may omit checks after joining wait queue.
30211da177e4SLinus Torvalds  * We check receive queue before schedule() only as optimization;
30221da177e4SLinus Torvalds  * it is very likely that release_sock() added new data.
30231da177e4SLinus Torvalds  */
sk_wait_data(struct sock * sk,long * timeo,const struct sk_buff * skb)3024dfbafc99SSabrina Dubroca int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
30251da177e4SLinus Torvalds {
3026d9dc8b0fSWANG Cong 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
30271da177e4SLinus Torvalds 	int rc;
30281da177e4SLinus Torvalds 
3029d9dc8b0fSWANG Cong 	add_wait_queue(sk_sleep(sk), &wait);
30309cd3e072SEric Dumazet 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3031d9dc8b0fSWANG Cong 	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
30329cd3e072SEric Dumazet 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3033d9dc8b0fSWANG Cong 	remove_wait_queue(sk_sleep(sk), &wait);
30341da177e4SLinus Torvalds 	return rc;
30351da177e4SLinus Torvalds }
30361da177e4SLinus Torvalds EXPORT_SYMBOL(sk_wait_data);
30371da177e4SLinus Torvalds 
30383ab224beSHideo Aoki /**
3039f8c3bf00SPaolo Abeni  *	__sk_mem_raise_allocated - increase memory_allocated
30403ab224beSHideo Aoki  *	@sk: socket
30413ab224beSHideo Aoki  *	@size: memory size to allocate
3042f8c3bf00SPaolo Abeni  *	@amt: pages to allocate
30433ab224beSHideo Aoki  *	@kind: allocation type
30443ab224beSHideo Aoki  *
3045f8c3bf00SPaolo Abeni  *	Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
30463ab224beSHideo Aoki  */
__sk_mem_raise_allocated(struct sock * sk,int size,int amt,int kind)3047f8c3bf00SPaolo Abeni int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
30483ab224beSHideo Aoki {
30494b1327beSWei Wang 	bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
3050219160beSEric Dumazet 	struct proto *prot = sk->sk_prot;
3051d6f19938SYafang Shao 	bool charged = true;
3052219160beSEric Dumazet 	long allocated;
3053e805605cSJohannes Weiner 
3054219160beSEric Dumazet 	sk_memory_allocated_add(sk, amt);
3055219160beSEric Dumazet 	allocated = sk_memory_allocated(sk);
30564b1327beSWei Wang 	if (memcg_charge &&
30574b1327beSWei Wang 	    !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
30584b1327beSWei Wang 						gfp_memcg_charge())))
3059e805605cSJohannes Weiner 		goto suppress_allocation;
30603ab224beSHideo Aoki 
30613ab224beSHideo Aoki 	/* Under limit. */
3062e805605cSJohannes Weiner 	if (allocated <= sk_prot_mem_limits(sk, 0)) {
3063180d8cd9SGlauber Costa 		sk_leave_memory_pressure(sk);
30643ab224beSHideo Aoki 		return 1;
30653ab224beSHideo Aoki 	}
30663ab224beSHideo Aoki 
3067e805605cSJohannes Weiner 	/* Under pressure. */
3068e805605cSJohannes Weiner 	if (allocated > sk_prot_mem_limits(sk, 1))
3069180d8cd9SGlauber Costa 		sk_enter_memory_pressure(sk);
30703ab224beSHideo Aoki 
3071e805605cSJohannes Weiner 	/* Over hard limit. */
3072e805605cSJohannes Weiner 	if (allocated > sk_prot_mem_limits(sk, 2))
30733ab224beSHideo Aoki 		goto suppress_allocation;
30743ab224beSHideo Aoki 
30753ab224beSHideo Aoki 	/* guarantee minimum buffer size under pressure */
30763ab224beSHideo Aoki 	if (kind == SK_MEM_RECV) {
3077a3dcaf17SEric Dumazet 		if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
30783ab224beSHideo Aoki 			return 1;
3079180d8cd9SGlauber Costa 
30803ab224beSHideo Aoki 	} else { /* SK_MEM_SEND */
3081a3dcaf17SEric Dumazet 		int wmem0 = sk_get_wmem0(sk, prot);
3082a3dcaf17SEric Dumazet 
30833ab224beSHideo Aoki 		if (sk->sk_type == SOCK_STREAM) {
3084a3dcaf17SEric Dumazet 			if (sk->sk_wmem_queued < wmem0)
30853ab224beSHideo Aoki 				return 1;
3086a3dcaf17SEric Dumazet 		} else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
30873ab224beSHideo Aoki 				return 1;
30883ab224beSHideo Aoki 		}
3089a3dcaf17SEric Dumazet 	}
30903ab224beSHideo Aoki 
3091180d8cd9SGlauber Costa 	if (sk_has_memory_pressure(sk)) {
30925bf325a5SEric Dumazet 		u64 alloc;
30931748376bSEric Dumazet 
3094180d8cd9SGlauber Costa 		if (!sk_under_memory_pressure(sk))
30951748376bSEric Dumazet 			return 1;
3096180d8cd9SGlauber Costa 		alloc = sk_sockets_allocated_read_positive(sk);
3097180d8cd9SGlauber Costa 		if (sk_prot_mem_limits(sk, 2) > alloc *
30983ab224beSHideo Aoki 		    sk_mem_pages(sk->sk_wmem_queued +
30993ab224beSHideo Aoki 				 atomic_read(&sk->sk_rmem_alloc) +
31003ab224beSHideo Aoki 				 sk->sk_forward_alloc))
31013ab224beSHideo Aoki 			return 1;
31023ab224beSHideo Aoki 	}
31033ab224beSHideo Aoki 
31043ab224beSHideo Aoki suppress_allocation:
31053ab224beSHideo Aoki 
31063ab224beSHideo Aoki 	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
31073ab224beSHideo Aoki 		sk_stream_moderate_sndbuf(sk);
31083ab224beSHideo Aoki 
31093ab224beSHideo Aoki 		/* Fail only if socket is _under_ its sndbuf.
31103ab224beSHideo Aoki 		 * In this case we cannot block, so that we have to fail.
31113ab224beSHideo Aoki 		 */
31124b1327beSWei Wang 		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
31134b1327beSWei Wang 			/* Force charge with __GFP_NOFAIL */
31144b1327beSWei Wang 			if (memcg_charge && !charged) {
31154b1327beSWei Wang 				mem_cgroup_charge_skmem(sk->sk_memcg, amt,
31164b1327beSWei Wang 					gfp_memcg_charge() | __GFP_NOFAIL);
31174b1327beSWei Wang 			}
31183ab224beSHideo Aoki 			return 1;
31193ab224beSHideo Aoki 		}
31204b1327beSWei Wang 	}
31213ab224beSHideo Aoki 
3122d6f19938SYafang Shao 	if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
3123d6f19938SYafang Shao 		trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
31243847ce32SSatoru Moriya 
31250e90b31fSGlauber Costa 	sk_memory_allocated_sub(sk, amt);
3126180d8cd9SGlauber Costa 
31274b1327beSWei Wang 	if (memcg_charge && charged)
3128baac50bbSJohannes Weiner 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
3129e805605cSJohannes Weiner 
31303ab224beSHideo Aoki 	return 0;
31313ab224beSHideo Aoki }
3132f8c3bf00SPaolo Abeni 
3133f8c3bf00SPaolo Abeni /**
3134f8c3bf00SPaolo Abeni  *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated
3135f8c3bf00SPaolo Abeni  *	@sk: socket
3136f8c3bf00SPaolo Abeni  *	@size: memory size to allocate
3137f8c3bf00SPaolo Abeni  *	@kind: allocation type
3138f8c3bf00SPaolo Abeni  *
3139f8c3bf00SPaolo Abeni  *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
3140f8c3bf00SPaolo Abeni  *	rmem allocation. This function assumes that protocols which have
3141f8c3bf00SPaolo Abeni  *	memory_pressure use sk_wmem_queued as write buffer accounting.
3142f8c3bf00SPaolo Abeni  */
__sk_mem_schedule(struct sock * sk,int size,int kind)3143f8c3bf00SPaolo Abeni int __sk_mem_schedule(struct sock *sk, int size, int kind)
3144f8c3bf00SPaolo Abeni {
3145f8c3bf00SPaolo Abeni 	int ret, amt = sk_mem_pages(size);
3146f8c3bf00SPaolo Abeni 
31475e6300e7SEric Dumazet 	sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
3148f8c3bf00SPaolo Abeni 	ret = __sk_mem_raise_allocated(sk, size, amt, kind);
3149f8c3bf00SPaolo Abeni 	if (!ret)
31505e6300e7SEric Dumazet 		sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
3151f8c3bf00SPaolo Abeni 	return ret;
3152f8c3bf00SPaolo Abeni }
31533ab224beSHideo Aoki EXPORT_SYMBOL(__sk_mem_schedule);
31543ab224beSHideo Aoki 
31553ab224beSHideo Aoki /**
3156f8c3bf00SPaolo Abeni  *	__sk_mem_reduce_allocated - reclaim memory_allocated
31573ab224beSHideo Aoki  *	@sk: socket
3158f8c3bf00SPaolo Abeni  *	@amount: number of quanta
3159f8c3bf00SPaolo Abeni  *
3160f8c3bf00SPaolo Abeni  *	Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
31613ab224beSHideo Aoki  */
__sk_mem_reduce_allocated(struct sock * sk,int amount)3162f8c3bf00SPaolo Abeni void __sk_mem_reduce_allocated(struct sock *sk, int amount)
31633ab224beSHideo Aoki {
31641a24e04eSEric Dumazet 	sk_memory_allocated_sub(sk, amount);
31653ab224beSHideo Aoki 
3166baac50bbSJohannes Weiner 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
3167baac50bbSJohannes Weiner 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
3168e805605cSJohannes Weiner 
31692d0c88e8SAbel Wu 	if (sk_under_global_memory_pressure(sk) &&
3170180d8cd9SGlauber Costa 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
3171180d8cd9SGlauber Costa 		sk_leave_memory_pressure(sk);
31723ab224beSHideo Aoki }
3173f8c3bf00SPaolo Abeni 
3174f8c3bf00SPaolo Abeni /**
3175f8c3bf00SPaolo Abeni  *	__sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
3176f8c3bf00SPaolo Abeni  *	@sk: socket
3177100fdd1fSEric Dumazet  *	@amount: number of bytes (rounded down to a PAGE_SIZE multiple)
3178f8c3bf00SPaolo Abeni  */
__sk_mem_reclaim(struct sock * sk,int amount)3179f8c3bf00SPaolo Abeni void __sk_mem_reclaim(struct sock *sk, int amount)
3180f8c3bf00SPaolo Abeni {
3181100fdd1fSEric Dumazet 	amount >>= PAGE_SHIFT;
31825e6300e7SEric Dumazet 	sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
3183f8c3bf00SPaolo Abeni 	__sk_mem_reduce_allocated(sk, amount);
3184f8c3bf00SPaolo Abeni }
31853ab224beSHideo Aoki EXPORT_SYMBOL(__sk_mem_reclaim);
31863ab224beSHideo Aoki 
sk_set_peek_off(struct sock * sk,int val)3187627d2d6bSsamanthakumar int sk_set_peek_off(struct sock *sk, int val)
3188627d2d6bSsamanthakumar {
318911695c6eSEric Dumazet 	WRITE_ONCE(sk->sk_peek_off, val);
3190627d2d6bSsamanthakumar 	return 0;
3191627d2d6bSsamanthakumar }
3192627d2d6bSsamanthakumar EXPORT_SYMBOL_GPL(sk_set_peek_off);
31933ab224beSHideo Aoki 
31941da177e4SLinus Torvalds /*
31951da177e4SLinus Torvalds  * Set of default routines for initialising struct proto_ops when
31961da177e4SLinus Torvalds  * the protocol does not support a particular function. In certain
31971da177e4SLinus Torvalds  * cases where it makes no sense for a protocol to have a "do nothing"
31981da177e4SLinus Torvalds  * function, some default processing is provided.
31991da177e4SLinus Torvalds  */
32001da177e4SLinus Torvalds 
sock_no_bind(struct socket * sock,struct sockaddr * saddr,int len)32011da177e4SLinus Torvalds int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
32021da177e4SLinus Torvalds {
32031da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32041da177e4SLinus Torvalds }
32052a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_bind);
32061da177e4SLinus Torvalds 
sock_no_connect(struct socket * sock,struct sockaddr * saddr,int len,int flags)32071da177e4SLinus Torvalds int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
32081da177e4SLinus Torvalds 		    int len, int flags)
32091da177e4SLinus Torvalds {
32101da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32111da177e4SLinus Torvalds }
32122a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_connect);
32131da177e4SLinus Torvalds 
sock_no_socketpair(struct socket * sock1,struct socket * sock2)32141da177e4SLinus Torvalds int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
32151da177e4SLinus Torvalds {
32161da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32171da177e4SLinus Torvalds }
32182a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_socketpair);
32191da177e4SLinus Torvalds 
sock_no_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)3220cdfbabfbSDavid Howells int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
3221cdfbabfbSDavid Howells 		   bool kern)
32221da177e4SLinus Torvalds {
32231da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32241da177e4SLinus Torvalds }
32252a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_accept);
32261da177e4SLinus Torvalds 
sock_no_getname(struct socket * sock,struct sockaddr * saddr,int peer)32271da177e4SLinus Torvalds int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
32289b2c45d4SDenys Vlasenko 		    int peer)
32291da177e4SLinus Torvalds {
32301da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32311da177e4SLinus Torvalds }
32322a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_getname);
32331da177e4SLinus Torvalds 
sock_no_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)32341da177e4SLinus Torvalds int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
32351da177e4SLinus Torvalds {
32361da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32371da177e4SLinus Torvalds }
32382a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_ioctl);
32391da177e4SLinus Torvalds 
sock_no_listen(struct socket * sock,int backlog)32401da177e4SLinus Torvalds int sock_no_listen(struct socket *sock, int backlog)
32411da177e4SLinus Torvalds {
32421da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32431da177e4SLinus Torvalds }
32442a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_listen);
32451da177e4SLinus Torvalds 
sock_no_shutdown(struct socket * sock,int how)32461da177e4SLinus Torvalds int sock_no_shutdown(struct socket *sock, int how)
32471da177e4SLinus Torvalds {
32481da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32491da177e4SLinus Torvalds }
32502a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_shutdown);
32511da177e4SLinus Torvalds 
sock_no_sendmsg(struct socket * sock,struct msghdr * m,size_t len)32521b784140SYing Xue int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
32531da177e4SLinus Torvalds {
32541da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32551da177e4SLinus Torvalds }
32562a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_sendmsg);
32571da177e4SLinus Torvalds 
sock_no_sendmsg_locked(struct sock * sk,struct msghdr * m,size_t len)3258306b13ebSTom Herbert int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
3259306b13ebSTom Herbert {
3260306b13ebSTom Herbert 	return -EOPNOTSUPP;
3261306b13ebSTom Herbert }
3262306b13ebSTom Herbert EXPORT_SYMBOL(sock_no_sendmsg_locked);
3263306b13ebSTom Herbert 
sock_no_recvmsg(struct socket * sock,struct msghdr * m,size_t len,int flags)32641b784140SYing Xue int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
32651b784140SYing Xue 		    int flags)
32661da177e4SLinus Torvalds {
32671da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32681da177e4SLinus Torvalds }
32692a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_recvmsg);
32701da177e4SLinus Torvalds 
sock_no_mmap(struct file * file,struct socket * sock,struct vm_area_struct * vma)32711da177e4SLinus Torvalds int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
32721da177e4SLinus Torvalds {
32731da177e4SLinus Torvalds 	/* Mirror missing mmap method error code */
32741da177e4SLinus Torvalds 	return -ENODEV;
32751da177e4SLinus Torvalds }
32762a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_mmap);
32771da177e4SLinus Torvalds 
3278d9539752SKees Cook /*
3279d9539752SKees Cook  * When a file is received (via SCM_RIGHTS, etc), we must bump the
3280d9539752SKees Cook  * various sock-based usage counts.
3281d9539752SKees Cook  */
__receive_sock(struct file * file)3282d9539752SKees Cook void __receive_sock(struct file *file)
3283d9539752SKees Cook {
3284d9539752SKees Cook 	struct socket *sock;
3285d9539752SKees Cook 
3286dba4a925SFlorent Revest 	sock = sock_from_file(file);
3287d9539752SKees Cook 	if (sock) {
3288d9539752SKees Cook 		sock_update_netprioidx(&sock->sk->sk_cgrp_data);
3289d9539752SKees Cook 		sock_update_classid(&sock->sk->sk_cgrp_data);
3290d9539752SKees Cook 	}
3291d9539752SKees Cook }
3292d9539752SKees Cook 
32931da177e4SLinus Torvalds /*
32941da177e4SLinus Torvalds  *	Default Socket Callbacks
32951da177e4SLinus Torvalds  */
32961da177e4SLinus Torvalds 
sock_def_wakeup(struct sock * sk)32971da177e4SLinus Torvalds static void sock_def_wakeup(struct sock *sk)
32981da177e4SLinus Torvalds {
329943815482SEric Dumazet 	struct socket_wq *wq;
330043815482SEric Dumazet 
330143815482SEric Dumazet 	rcu_read_lock();
330243815482SEric Dumazet 	wq = rcu_dereference(sk->sk_wq);
33031ce0bf50SHerbert Xu 	if (skwq_has_sleeper(wq))
330443815482SEric Dumazet 		wake_up_interruptible_all(&wq->wait);
330543815482SEric Dumazet 	rcu_read_unlock();
33061da177e4SLinus Torvalds }
33071da177e4SLinus Torvalds 
sock_def_error_report(struct sock * sk)33081da177e4SLinus Torvalds static void sock_def_error_report(struct sock *sk)
33091da177e4SLinus Torvalds {
331043815482SEric Dumazet 	struct socket_wq *wq;
331143815482SEric Dumazet 
331243815482SEric Dumazet 	rcu_read_lock();
331343815482SEric Dumazet 	wq = rcu_dereference(sk->sk_wq);
33141ce0bf50SHerbert Xu 	if (skwq_has_sleeper(wq))
3315a9a08845SLinus Torvalds 		wake_up_interruptible_poll(&wq->wait, EPOLLERR);
33168d8ad9d7SPavel Emelyanov 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
331743815482SEric Dumazet 	rcu_read_unlock();
33181da177e4SLinus Torvalds }
33191da177e4SLinus Torvalds 
sock_def_readable(struct sock * sk)332043a825afSBjörn Töpel void sock_def_readable(struct sock *sk)
33211da177e4SLinus Torvalds {
332243815482SEric Dumazet 	struct socket_wq *wq;
332343815482SEric Dumazet 
332440e0b090SPeilin Ye 	trace_sk_data_ready(sk);
332540e0b090SPeilin Ye 
332643815482SEric Dumazet 	rcu_read_lock();
332743815482SEric Dumazet 	wq = rcu_dereference(sk->sk_wq);
33281ce0bf50SHerbert Xu 	if (skwq_has_sleeper(wq))
3329a9a08845SLinus Torvalds 		wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
3330a9a08845SLinus Torvalds 						EPOLLRDNORM | EPOLLRDBAND);
33318d8ad9d7SPavel Emelyanov 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
333243815482SEric Dumazet 	rcu_read_unlock();
33331da177e4SLinus Torvalds }
33341da177e4SLinus Torvalds 
sock_def_write_space(struct sock * sk)33351da177e4SLinus Torvalds static void sock_def_write_space(struct sock *sk)
33361da177e4SLinus Torvalds {
333743815482SEric Dumazet 	struct socket_wq *wq;
333843815482SEric Dumazet 
333943815482SEric Dumazet 	rcu_read_lock();
33401da177e4SLinus Torvalds 
33411da177e4SLinus Torvalds 	/* Do not wake up a writer until he can make "significant"
33421da177e4SLinus Torvalds 	 * progress.  --DaveM
33431da177e4SLinus Torvalds 	 */
334414bfee9bSPavel Begunkov 	if (sock_writeable(sk)) {
334543815482SEric Dumazet 		wq = rcu_dereference(sk->sk_wq);
33461ce0bf50SHerbert Xu 		if (skwq_has_sleeper(wq))
3347a9a08845SLinus Torvalds 			wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
3348a9a08845SLinus Torvalds 						EPOLLWRNORM | EPOLLWRBAND);
33491da177e4SLinus Torvalds 
33501da177e4SLinus Torvalds 		/* Should agree with poll, otherwise some programs break */
33518d8ad9d7SPavel Emelyanov 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
33521da177e4SLinus Torvalds 	}
33531da177e4SLinus Torvalds 
335443815482SEric Dumazet 	rcu_read_unlock();
33551da177e4SLinus Torvalds }
33561da177e4SLinus Torvalds 
33570a8afd9fSPavel Begunkov /* An optimised version of sock_def_write_space(), should only be called
33580a8afd9fSPavel Begunkov  * for SOCK_RCU_FREE sockets under RCU read section and after putting
33590a8afd9fSPavel Begunkov  * ->sk_wmem_alloc.
33600a8afd9fSPavel Begunkov  */
sock_def_write_space_wfree(struct sock * sk)33610a8afd9fSPavel Begunkov static void sock_def_write_space_wfree(struct sock *sk)
33620a8afd9fSPavel Begunkov {
33630a8afd9fSPavel Begunkov 	/* Do not wake up a writer until he can make "significant"
33640a8afd9fSPavel Begunkov 	 * progress.  --DaveM
33650a8afd9fSPavel Begunkov 	 */
33660a8afd9fSPavel Begunkov 	if (sock_writeable(sk)) {
33670a8afd9fSPavel Begunkov 		struct socket_wq *wq = rcu_dereference(sk->sk_wq);
33680a8afd9fSPavel Begunkov 
33690a8afd9fSPavel Begunkov 		/* rely on refcount_sub from sock_wfree() */
33700a8afd9fSPavel Begunkov 		smp_mb__after_atomic();
33710a8afd9fSPavel Begunkov 		if (wq && waitqueue_active(&wq->wait))
33720a8afd9fSPavel Begunkov 			wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
33730a8afd9fSPavel Begunkov 						EPOLLWRNORM | EPOLLWRBAND);
33740a8afd9fSPavel Begunkov 
33750a8afd9fSPavel Begunkov 		/* Should agree with poll, otherwise some programs break */
33760a8afd9fSPavel Begunkov 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
33770a8afd9fSPavel Begunkov 	}
33780a8afd9fSPavel Begunkov }
33790a8afd9fSPavel Begunkov 
sock_def_destruct(struct sock * sk)33801da177e4SLinus Torvalds static void sock_def_destruct(struct sock *sk)
33811da177e4SLinus Torvalds {
33821da177e4SLinus Torvalds }
33831da177e4SLinus Torvalds 
sk_send_sigurg(struct sock * sk)33841da177e4SLinus Torvalds void sk_send_sigurg(struct sock *sk)
33851da177e4SLinus Torvalds {
33861da177e4SLinus Torvalds 	if (sk->sk_socket && sk->sk_socket->file)
33871da177e4SLinus Torvalds 		if (send_sigurg(&sk->sk_socket->file->f_owner))
33888d8ad9d7SPavel Emelyanov 			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
33891da177e4SLinus Torvalds }
33902a91525cSEric Dumazet EXPORT_SYMBOL(sk_send_sigurg);
33911da177e4SLinus Torvalds 
sk_reset_timer(struct sock * sk,struct timer_list * timer,unsigned long expires)33921da177e4SLinus Torvalds void sk_reset_timer(struct sock *sk, struct timer_list* timer,
33931da177e4SLinus Torvalds 		    unsigned long expires)
33941da177e4SLinus Torvalds {
33951da177e4SLinus Torvalds 	if (!mod_timer(timer, expires))
33961da177e4SLinus Torvalds 		sock_hold(sk);
33971da177e4SLinus Torvalds }
33981da177e4SLinus Torvalds EXPORT_SYMBOL(sk_reset_timer);
33991da177e4SLinus Torvalds 
sk_stop_timer(struct sock * sk,struct timer_list * timer)34001da177e4SLinus Torvalds void sk_stop_timer(struct sock *sk, struct timer_list* timer)
34011da177e4SLinus Torvalds {
340225cc4ae9SYing Xue 	if (del_timer(timer))
34031da177e4SLinus Torvalds 		__sock_put(sk);
34041da177e4SLinus Torvalds }
34051da177e4SLinus Torvalds EXPORT_SYMBOL(sk_stop_timer);
34061da177e4SLinus Torvalds 
sk_stop_timer_sync(struct sock * sk,struct timer_list * timer)340708b81d87SGeliang Tang void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
340808b81d87SGeliang Tang {
340908b81d87SGeliang Tang 	if (del_timer_sync(timer))
341008b81d87SGeliang Tang 		__sock_put(sk);
341108b81d87SGeliang Tang }
341208b81d87SGeliang Tang EXPORT_SYMBOL(sk_stop_timer_sync);
341308b81d87SGeliang Tang 
sock_init_data_uid(struct socket * sock,struct sock * sk,kuid_t uid)3414584f3742SPietro Borrello void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
34151da177e4SLinus Torvalds {
3416581319c5SPaolo Abeni 	sk_init_common(sk);
34171da177e4SLinus Torvalds 	sk->sk_send_head	=	NULL;
34181da177e4SLinus Torvalds 
341999767f27SKees Cook 	timer_setup(&sk->sk_timer, NULL, 0);
34201da177e4SLinus Torvalds 
34211da177e4SLinus Torvalds 	sk->sk_allocation	=	GFP_KERNEL;
34221227c177SKuniyuki Iwashima 	sk->sk_rcvbuf		=	READ_ONCE(sysctl_rmem_default);
34231227c177SKuniyuki Iwashima 	sk->sk_sndbuf		=	READ_ONCE(sysctl_wmem_default);
34241da177e4SLinus Torvalds 	sk->sk_state		=	TCP_CLOSE;
3425fb87bd47SGuillaume Nault 	sk->sk_use_task_frag	=	true;
3426972692e0SDavid S. Miller 	sk_set_socket(sk, sock);
34271da177e4SLinus Torvalds 
34281da177e4SLinus Torvalds 	sock_set_flag(sk, SOCK_ZAPPED);
34291da177e4SLinus Torvalds 
3430e71a4783SStephen Hemminger 	if (sock) {
34311da177e4SLinus Torvalds 		sk->sk_type	=	sock->type;
3432333f7909SAl Viro 		RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
34331da177e4SLinus Torvalds 		sock->sk	=	sk;
343486741ec2SLorenzo Colitti 	} else {
3435c2f26e8fSLi RongQing 		RCU_INIT_POINTER(sk->sk_wq, NULL);
343686741ec2SLorenzo Colitti 	}
3437584f3742SPietro Borrello 	sk->sk_uid	=	uid;
34381da177e4SLinus Torvalds 
34391da177e4SLinus Torvalds 	rwlock_init(&sk->sk_callback_lock);
3440cdfbabfbSDavid Howells 	if (sk->sk_kern_sock)
3441cdfbabfbSDavid Howells 		lockdep_set_class_and_name(
3442cdfbabfbSDavid Howells 			&sk->sk_callback_lock,
3443cdfbabfbSDavid Howells 			af_kern_callback_keys + sk->sk_family,
3444cdfbabfbSDavid Howells 			af_family_kern_clock_key_strings[sk->sk_family]);
3445cdfbabfbSDavid Howells 	else
3446cdfbabfbSDavid Howells 		lockdep_set_class_and_name(
3447cdfbabfbSDavid Howells 			&sk->sk_callback_lock,
3448443aef0eSPeter Zijlstra 			af_callback_keys + sk->sk_family,
3449443aef0eSPeter Zijlstra 			af_family_clock_key_strings[sk->sk_family]);
34501da177e4SLinus Torvalds 
34511da177e4SLinus Torvalds 	sk->sk_state_change	=	sock_def_wakeup;
34521da177e4SLinus Torvalds 	sk->sk_data_ready	=	sock_def_readable;
34531da177e4SLinus Torvalds 	sk->sk_write_space	=	sock_def_write_space;
34541da177e4SLinus Torvalds 	sk->sk_error_report	=	sock_def_error_report;
34551da177e4SLinus Torvalds 	sk->sk_destruct		=	sock_def_destruct;
34561da177e4SLinus Torvalds 
34575640f768SEric Dumazet 	sk->sk_frag.page	=	NULL;
34585640f768SEric Dumazet 	sk->sk_frag.offset	=	0;
3459ef64a54fSPavel Emelyanov 	sk->sk_peek_off		=	-1;
34601da177e4SLinus Torvalds 
3461109f6e39SEric W. Biederman 	sk->sk_peer_pid 	=	NULL;
3462109f6e39SEric W. Biederman 	sk->sk_peer_cred	=	NULL;
346335306eb2SEric Dumazet 	spin_lock_init(&sk->sk_peer_lock);
346435306eb2SEric Dumazet 
34651da177e4SLinus Torvalds 	sk->sk_write_pending	=	0;
34661da177e4SLinus Torvalds 	sk->sk_rcvlowat		=	1;
34671da177e4SLinus Torvalds 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
34681da177e4SLinus Torvalds 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
34691da177e4SLinus Torvalds 
34706c7c98baSPaolo Abeni 	sk->sk_stamp = SK_DEFAULT_STAMP;
34713a0ed3e9SDeepa Dinamani #if BITS_PER_LONG==32
34723a0ed3e9SDeepa Dinamani 	seqlock_init(&sk->sk_stamp_seq);
34733a0ed3e9SDeepa Dinamani #endif
347452267790SWillem de Bruijn 	atomic_set(&sk->sk_zckey, 0);
34751da177e4SLinus Torvalds 
3476e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
347706021292SEliezer Tamir 	sk->sk_napi_id		=	0;
3478e59ef36fSKuniyuki Iwashima 	sk->sk_ll_usec		=	READ_ONCE(sysctl_net_busy_read);
347906021292SEliezer Tamir #endif
348006021292SEliezer Tamir 
348176a9ebe8SEric Dumazet 	sk->sk_max_pacing_rate = ~0UL;
348276a9ebe8SEric Dumazet 	sk->sk_pacing_rate = ~0UL;
34837c68fa2bSEric Dumazet 	WRITE_ONCE(sk->sk_pacing_shift, 10);
348470da268bSEric Dumazet 	sk->sk_incoming_cpu = -1;
3485c6345ce7SAmritha Nambiar 
3486c6345ce7SAmritha Nambiar 	sk_rx_queue_clear(sk);
34874dc6dc71SEric Dumazet 	/*
34884dc6dc71SEric Dumazet 	 * Before updating sk_refcnt, we must commit prior changes to memory
34892cdb54c9SMauro Carvalho Chehab 	 * (Documentation/RCU/rculist_nulls.rst for details)
34904dc6dc71SEric Dumazet 	 */
34914dc6dc71SEric Dumazet 	smp_wmb();
349241c6d650SReshetova, Elena 	refcount_set(&sk->sk_refcnt, 1);
349333c732c3SWang Chen 	atomic_set(&sk->sk_drops, 0);
34941da177e4SLinus Torvalds }
3495584f3742SPietro Borrello EXPORT_SYMBOL(sock_init_data_uid);
3496584f3742SPietro Borrello 
sock_init_data(struct socket * sock,struct sock * sk)3497584f3742SPietro Borrello void sock_init_data(struct socket *sock, struct sock *sk)
3498584f3742SPietro Borrello {
3499584f3742SPietro Borrello 	kuid_t uid = sock ?
3500584f3742SPietro Borrello 		SOCK_INODE(sock)->i_uid :
3501584f3742SPietro Borrello 		make_kuid(sock_net(sk)->user_ns, 0);
3502584f3742SPietro Borrello 
3503584f3742SPietro Borrello 	sock_init_data_uid(sock, sk, uid);
3504584f3742SPietro Borrello }
35052a91525cSEric Dumazet EXPORT_SYMBOL(sock_init_data);
35061da177e4SLinus Torvalds 
lock_sock_nested(struct sock * sk,int subclass)3507b5606c2dSHarvey Harrison void lock_sock_nested(struct sock *sk, int subclass)
35081da177e4SLinus Torvalds {
35092dcb96baSThomas Gleixner 	/* The sk_lock has mutex_lock() semantics here. */
35102dcb96baSThomas Gleixner 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
35112dcb96baSThomas Gleixner 
35121da177e4SLinus Torvalds 	might_sleep();
3513a5b5bb9aSIngo Molnar 	spin_lock_bh(&sk->sk_lock.slock);
351433d60fbdSKuniyuki Iwashima 	if (sock_owned_by_user_nocheck(sk))
35151da177e4SLinus Torvalds 		__lock_sock(sk);
3516d2e9117cSJohn Heffner 	sk->sk_lock.owned = 1;
35172dcb96baSThomas Gleixner 	spin_unlock_bh(&sk->sk_lock.slock);
35181da177e4SLinus Torvalds }
3519fcc70d5fSPeter Zijlstra EXPORT_SYMBOL(lock_sock_nested);
35201da177e4SLinus Torvalds 
release_sock(struct sock * sk)3521b5606c2dSHarvey Harrison void release_sock(struct sock *sk)
35221da177e4SLinus Torvalds {
3523a5b5bb9aSIngo Molnar 	spin_lock_bh(&sk->sk_lock.slock);
35241da177e4SLinus Torvalds 	if (sk->sk_backlog.tail)
35251da177e4SLinus Torvalds 		__release_sock(sk);
352646d3ceabSEric Dumazet 
3527c3f9b018SEric Dumazet 	/* Warning : release_cb() might need to release sk ownership,
3528c3f9b018SEric Dumazet 	 * ie call sock_release_ownership(sk) before us.
3529c3f9b018SEric Dumazet 	 */
353046d3ceabSEric Dumazet 	if (sk->sk_prot->release_cb)
353146d3ceabSEric Dumazet 		sk->sk_prot->release_cb(sk);
353246d3ceabSEric Dumazet 
3533c3f9b018SEric Dumazet 	sock_release_ownership(sk);
3534a5b5bb9aSIngo Molnar 	if (waitqueue_active(&sk->sk_lock.wq))
3535a5b5bb9aSIngo Molnar 		wake_up(&sk->sk_lock.wq);
3536a5b5bb9aSIngo Molnar 	spin_unlock_bh(&sk->sk_lock.slock);
35371da177e4SLinus Torvalds }
35381da177e4SLinus Torvalds EXPORT_SYMBOL(release_sock);
35391da177e4SLinus Torvalds 
__lock_sock_fast(struct sock * sk)354049054556SPaolo Abeni bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
35418a74ad60SEric Dumazet {
35428a74ad60SEric Dumazet 	might_sleep();
35438a74ad60SEric Dumazet 	spin_lock_bh(&sk->sk_lock.slock);
35448a74ad60SEric Dumazet 
354533d60fbdSKuniyuki Iwashima 	if (!sock_owned_by_user_nocheck(sk)) {
35468a74ad60SEric Dumazet 		/*
35472dcb96baSThomas Gleixner 		 * Fast path return with bottom halves disabled and
35482dcb96baSThomas Gleixner 		 * sock::sk_lock.slock held.
35492dcb96baSThomas Gleixner 		 *
35502dcb96baSThomas Gleixner 		 * The 'mutex' is not contended and holding
35512dcb96baSThomas Gleixner 		 * sock::sk_lock.slock prevents all other lockers to
35522dcb96baSThomas Gleixner 		 * proceed so the corresponding unlock_sock_fast() can
35532dcb96baSThomas Gleixner 		 * avoid the slow path of release_sock() completely and
35542dcb96baSThomas Gleixner 		 * just release slock.
35552dcb96baSThomas Gleixner 		 *
35562dcb96baSThomas Gleixner 		 * From a semantical POV this is equivalent to 'acquiring'
35572dcb96baSThomas Gleixner 		 * the 'mutex', hence the corresponding lockdep
35582dcb96baSThomas Gleixner 		 * mutex_release() has to happen in the fast path of
35592dcb96baSThomas Gleixner 		 * unlock_sock_fast().
35608a74ad60SEric Dumazet 		 */
35618a74ad60SEric Dumazet 		return false;
35622dcb96baSThomas Gleixner 	}
35638a74ad60SEric Dumazet 
35648a74ad60SEric Dumazet 	__lock_sock(sk);
35658a74ad60SEric Dumazet 	sk->sk_lock.owned = 1;
356612f4bd86SPaolo Abeni 	__acquire(&sk->sk_lock.slock);
35672dcb96baSThomas Gleixner 	spin_unlock_bh(&sk->sk_lock.slock);
35688a74ad60SEric Dumazet 	return true;
35698a74ad60SEric Dumazet }
357049054556SPaolo Abeni EXPORT_SYMBOL(__lock_sock_fast);
35718a74ad60SEric Dumazet 
sock_gettstamp(struct socket * sock,void __user * userstamp,bool timeval,bool time32)3572c7cbdbf2SArnd Bergmann int sock_gettstamp(struct socket *sock, void __user *userstamp,
3573c7cbdbf2SArnd Bergmann 		   bool timeval, bool time32)
35741da177e4SLinus Torvalds {
3575c7cbdbf2SArnd Bergmann 	struct sock *sk = sock->sk;
3576c7cbdbf2SArnd Bergmann 	struct timespec64 ts;
35779dae3497SYafang Shao 
357820d49473SPatrick Ohly 	sock_enable_timestamp(sk, SOCK_TIMESTAMP);
3579c7cbdbf2SArnd Bergmann 	ts = ktime_to_timespec64(sock_read_timestamp(sk));
3580ae40eb1eSEric Dumazet 	if (ts.tv_sec == -1)
3581ae40eb1eSEric Dumazet 		return -ENOENT;
3582ae40eb1eSEric Dumazet 	if (ts.tv_sec == 0) {
35833a0ed3e9SDeepa Dinamani 		ktime_t kt = ktime_get_real();
3584f95f96a4SYueHaibing 		sock_write_timestamp(sk, kt);
3585c7cbdbf2SArnd Bergmann 		ts = ktime_to_timespec64(kt);
3586ae40eb1eSEric Dumazet 	}
3587c7cbdbf2SArnd Bergmann 
3588c7cbdbf2SArnd Bergmann 	if (timeval)
3589c7cbdbf2SArnd Bergmann 		ts.tv_nsec /= 1000;
3590c7cbdbf2SArnd Bergmann 
3591c7cbdbf2SArnd Bergmann #ifdef CONFIG_COMPAT_32BIT_TIME
3592c7cbdbf2SArnd Bergmann 	if (time32)
3593c7cbdbf2SArnd Bergmann 		return put_old_timespec32(&ts, userstamp);
3594c7cbdbf2SArnd Bergmann #endif
3595c7cbdbf2SArnd Bergmann #ifdef CONFIG_SPARC64
3596c7cbdbf2SArnd Bergmann 	/* beware of padding in sparc64 timeval */
3597c7cbdbf2SArnd Bergmann 	if (timeval && !in_compat_syscall()) {
3598c7cbdbf2SArnd Bergmann 		struct __kernel_old_timeval __user tv = {
3599c98f4822SStephen Rothwell 			.tv_sec = ts.tv_sec,
3600c98f4822SStephen Rothwell 			.tv_usec = ts.tv_nsec,
3601c7cbdbf2SArnd Bergmann 		};
3602c98f4822SStephen Rothwell 		if (copy_to_user(userstamp, &tv, sizeof(tv)))
3603c7cbdbf2SArnd Bergmann 			return -EFAULT;
3604c7cbdbf2SArnd Bergmann 		return 0;
3605ae40eb1eSEric Dumazet 	}
3606c7cbdbf2SArnd Bergmann #endif
3607c7cbdbf2SArnd Bergmann 	return put_timespec64(&ts, userstamp);
3608c7cbdbf2SArnd Bergmann }
3609c7cbdbf2SArnd Bergmann EXPORT_SYMBOL(sock_gettstamp);
3610ae40eb1eSEric Dumazet 
sock_enable_timestamp(struct sock * sk,enum sock_flags flag)3611193d357dSAlexey Dobriyan void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
36121da177e4SLinus Torvalds {
361320d49473SPatrick Ohly 	if (!sock_flag(sk, flag)) {
361408e29af3SEric Dumazet 		unsigned long previous_flags = sk->sk_flags;
361508e29af3SEric Dumazet 
361620d49473SPatrick Ohly 		sock_set_flag(sk, flag);
361720d49473SPatrick Ohly 		/*
361820d49473SPatrick Ohly 		 * we just set one of the two flags which require net
361920d49473SPatrick Ohly 		 * time stamping, but time stamping might have been on
362020d49473SPatrick Ohly 		 * already because of the other one
362120d49473SPatrick Ohly 		 */
3622080a270fSHannes Frederic Sowa 		if (sock_needs_netstamp(sk) &&
3623080a270fSHannes Frederic Sowa 		    !(previous_flags & SK_FLAGS_TIMESTAMP))
36241da177e4SLinus Torvalds 			net_enable_timestamp();
36251da177e4SLinus Torvalds 	}
36261da177e4SLinus Torvalds }
36271da177e4SLinus Torvalds 
sock_recv_errqueue(struct sock * sk,struct msghdr * msg,int len,int level,int type)3628cb820f8eSRichard Cochran int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
3629cb820f8eSRichard Cochran 		       int level, int type)
3630cb820f8eSRichard Cochran {
3631cb820f8eSRichard Cochran 	struct sock_exterr_skb *serr;
3632364a9e93SWillem de Bruijn 	struct sk_buff *skb;
3633cb820f8eSRichard Cochran 	int copied, err;
3634cb820f8eSRichard Cochran 
3635cb820f8eSRichard Cochran 	err = -EAGAIN;
3636364a9e93SWillem de Bruijn 	skb = sock_dequeue_err_skb(sk);
3637cb820f8eSRichard Cochran 	if (skb == NULL)
3638cb820f8eSRichard Cochran 		goto out;
3639cb820f8eSRichard Cochran 
3640cb820f8eSRichard Cochran 	copied = skb->len;
3641cb820f8eSRichard Cochran 	if (copied > len) {
3642cb820f8eSRichard Cochran 		msg->msg_flags |= MSG_TRUNC;
3643cb820f8eSRichard Cochran 		copied = len;
3644cb820f8eSRichard Cochran 	}
364551f3d02bSDavid S. Miller 	err = skb_copy_datagram_msg(skb, 0, msg, copied);
3646cb820f8eSRichard Cochran 	if (err)
3647cb820f8eSRichard Cochran 		goto out_free_skb;
3648cb820f8eSRichard Cochran 
3649cb820f8eSRichard Cochran 	sock_recv_timestamp(msg, sk, skb);
3650cb820f8eSRichard Cochran 
3651cb820f8eSRichard Cochran 	serr = SKB_EXT_ERR(skb);
3652cb820f8eSRichard Cochran 	put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
3653cb820f8eSRichard Cochran 
3654cb820f8eSRichard Cochran 	msg->msg_flags |= MSG_ERRQUEUE;
3655cb820f8eSRichard Cochran 	err = copied;
3656cb820f8eSRichard Cochran 
3657cb820f8eSRichard Cochran out_free_skb:
3658cb820f8eSRichard Cochran 	kfree_skb(skb);
3659cb820f8eSRichard Cochran out:
3660cb820f8eSRichard Cochran 	return err;
3661cb820f8eSRichard Cochran }
3662cb820f8eSRichard Cochran EXPORT_SYMBOL(sock_recv_errqueue);
3663cb820f8eSRichard Cochran 
36641da177e4SLinus Torvalds /*
36651da177e4SLinus Torvalds  *	Get a socket option on an socket.
36661da177e4SLinus Torvalds  *
36671da177e4SLinus Torvalds  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
36681da177e4SLinus Torvalds  *	asynchronous errors should be reported by getsockopt. We assume
36691da177e4SLinus Torvalds  *	this means if you specify SO_ERROR (otherwise whats the point of it).
36701da177e4SLinus Torvalds  */
sock_common_getsockopt(struct socket * sock,int level,int optname,char __user * optval,int __user * optlen)36711da177e4SLinus Torvalds int sock_common_getsockopt(struct socket *sock, int level, int optname,
36721da177e4SLinus Torvalds 			   char __user *optval, int __user *optlen)
36731da177e4SLinus Torvalds {
36741da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
36751da177e4SLinus Torvalds 
3676364f997bSKuniyuki Iwashima 	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
3677364f997bSKuniyuki Iwashima 	return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen);
36781da177e4SLinus Torvalds }
36791da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_getsockopt);
36801da177e4SLinus Torvalds 
sock_common_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)36811b784140SYing Xue int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
36821b784140SYing Xue 			int flags)
36831da177e4SLinus Torvalds {
36841da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
36851da177e4SLinus Torvalds 	int addr_len = 0;
36861da177e4SLinus Torvalds 	int err;
36871da177e4SLinus Torvalds 
3688ec095263SOliver Hartkopp 	err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len);
36891da177e4SLinus Torvalds 	if (err >= 0)
36901da177e4SLinus Torvalds 		msg->msg_namelen = addr_len;
36911da177e4SLinus Torvalds 	return err;
36921da177e4SLinus Torvalds }
36931da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_recvmsg);
36941da177e4SLinus Torvalds 
36951da177e4SLinus Torvalds /*
36961da177e4SLinus Torvalds  *	Set socket options on an inet socket.
36971da177e4SLinus Torvalds  */
sock_common_setsockopt(struct socket * sock,int level,int optname,sockptr_t optval,unsigned int optlen)36981da177e4SLinus Torvalds int sock_common_setsockopt(struct socket *sock, int level, int optname,
3699a7b75c5aSChristoph Hellwig 			   sockptr_t optval, unsigned int optlen)
37001da177e4SLinus Torvalds {
37011da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
37021da177e4SLinus Torvalds 
3703364f997bSKuniyuki Iwashima 	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
3704364f997bSKuniyuki Iwashima 	return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen);
37051da177e4SLinus Torvalds }
37061da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_setsockopt);
37071da177e4SLinus Torvalds 
sk_common_release(struct sock * sk)37081da177e4SLinus Torvalds void sk_common_release(struct sock *sk)
37091da177e4SLinus Torvalds {
37101da177e4SLinus Torvalds 	if (sk->sk_prot->destroy)
37111da177e4SLinus Torvalds 		sk->sk_prot->destroy(sk);
37121da177e4SLinus Torvalds 
37131da177e4SLinus Torvalds 	/*
3714645f0897SMiaohe Lin 	 * Observation: when sk_common_release is called, processes have
37151da177e4SLinus Torvalds 	 * no access to socket. But net still has.
37161da177e4SLinus Torvalds 	 * Step one, detach it from networking:
37171da177e4SLinus Torvalds 	 *
37181da177e4SLinus Torvalds 	 * A. Remove from hash tables.
37191da177e4SLinus Torvalds 	 */
37201da177e4SLinus Torvalds 
37211da177e4SLinus Torvalds 	sk->sk_prot->unhash(sk);
37221da177e4SLinus Torvalds 
3723454c454eSIgnat Korchagin 	if (sk->sk_socket)
3724454c454eSIgnat Korchagin 		sk->sk_socket->sk = NULL;
3725454c454eSIgnat Korchagin 
37261da177e4SLinus Torvalds 	/*
37271da177e4SLinus Torvalds 	 * In this point socket cannot receive new packets, but it is possible
37281da177e4SLinus Torvalds 	 * that some packets are in flight because some CPU runs receiver and
37291da177e4SLinus Torvalds 	 * did hash table lookup before we unhashed socket. They will achieve
37301da177e4SLinus Torvalds 	 * receive queue and will be purged by socket destructor.
37311da177e4SLinus Torvalds 	 *
37321da177e4SLinus Torvalds 	 * Also we still have packets pending on receive queue and probably,
37331da177e4SLinus Torvalds 	 * our own packets waiting in device queues. sock_destroy will drain
37341da177e4SLinus Torvalds 	 * receive queue, but transmitted packets will delay socket destruction
37351da177e4SLinus Torvalds 	 * until the last reference will be released.
37361da177e4SLinus Torvalds 	 */
37371da177e4SLinus Torvalds 
37381da177e4SLinus Torvalds 	sock_orphan(sk);
37391da177e4SLinus Torvalds 
37401da177e4SLinus Torvalds 	xfrm_sk_free_policy(sk);
37411da177e4SLinus Torvalds 
37421da177e4SLinus Torvalds 	sock_put(sk);
37431da177e4SLinus Torvalds }
37441da177e4SLinus Torvalds EXPORT_SYMBOL(sk_common_release);
37451da177e4SLinus Torvalds 
sk_get_meminfo(const struct sock * sk,u32 * mem)3746a2d133b1SJosh Hunt void sk_get_meminfo(const struct sock *sk, u32 *mem)
3747a2d133b1SJosh Hunt {
3748a2d133b1SJosh Hunt 	memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3749a2d133b1SJosh Hunt 
3750a2d133b1SJosh Hunt 	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3751ebb3b78dSEric Dumazet 	mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3752a2d133b1SJosh Hunt 	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3753e292f05eSEric Dumazet 	mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
375466d58f04SEric Dumazet 	mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
3755ab4e846aSEric Dumazet 	mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3756a2d133b1SJosh Hunt 	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
375770c26558SEric Dumazet 	mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3758a2d133b1SJosh Hunt 	mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3759a2d133b1SJosh Hunt }
3760a2d133b1SJosh Hunt 
376113ff3d6fSPavel Emelyanov #ifdef CONFIG_PROC_FS
376213ff3d6fSPavel Emelyanov static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
376370ee1159SPavel Emelyanov 
sock_prot_inuse_get(struct net * net,struct proto * prot)376470ee1159SPavel Emelyanov int sock_prot_inuse_get(struct net *net, struct proto *prot)
376570ee1159SPavel Emelyanov {
376670ee1159SPavel Emelyanov 	int cpu, idx = prot->inuse_idx;
376770ee1159SPavel Emelyanov 	int res = 0;
376870ee1159SPavel Emelyanov 
376970ee1159SPavel Emelyanov 	for_each_possible_cpu(cpu)
377008fc7f81STonghao Zhang 		res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
377170ee1159SPavel Emelyanov 
377270ee1159SPavel Emelyanov 	return res >= 0 ? res : 0;
377370ee1159SPavel Emelyanov }
377470ee1159SPavel Emelyanov EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
377570ee1159SPavel Emelyanov 
sock_inuse_get(struct net * net)3776648845abSTonghao Zhang int sock_inuse_get(struct net *net)
3777648845abSTonghao Zhang {
3778648845abSTonghao Zhang 	int cpu, res = 0;
3779648845abSTonghao Zhang 
3780648845abSTonghao Zhang 	for_each_possible_cpu(cpu)
37814199bae1SEric Dumazet 		res += per_cpu_ptr(net->core.prot_inuse, cpu)->all;
3782648845abSTonghao Zhang 
3783648845abSTonghao Zhang 	return res;
3784648845abSTonghao Zhang }
3785648845abSTonghao Zhang 
3786648845abSTonghao Zhang EXPORT_SYMBOL_GPL(sock_inuse_get);
3787648845abSTonghao Zhang 
sock_inuse_init_net(struct net * net)37882c8c1e72SAlexey Dobriyan static int __net_init sock_inuse_init_net(struct net *net)
378970ee1159SPavel Emelyanov {
379008fc7f81STonghao Zhang 	net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3791648845abSTonghao Zhang 	if (net->core.prot_inuse == NULL)
3792648845abSTonghao Zhang 		return -ENOMEM;
3793648845abSTonghao Zhang 	return 0;
379470ee1159SPavel Emelyanov }
379570ee1159SPavel Emelyanov 
sock_inuse_exit_net(struct net * net)37962c8c1e72SAlexey Dobriyan static void __net_exit sock_inuse_exit_net(struct net *net)
379770ee1159SPavel Emelyanov {
379808fc7f81STonghao Zhang 	free_percpu(net->core.prot_inuse);
379970ee1159SPavel Emelyanov }
380070ee1159SPavel Emelyanov 
380170ee1159SPavel Emelyanov static struct pernet_operations net_inuse_ops = {
380270ee1159SPavel Emelyanov 	.init = sock_inuse_init_net,
380370ee1159SPavel Emelyanov 	.exit = sock_inuse_exit_net,
380470ee1159SPavel Emelyanov };
380570ee1159SPavel Emelyanov 
net_inuse_init(void)380670ee1159SPavel Emelyanov static __init int net_inuse_init(void)
380770ee1159SPavel Emelyanov {
380870ee1159SPavel Emelyanov 	if (register_pernet_subsys(&net_inuse_ops))
380970ee1159SPavel Emelyanov 		panic("Cannot initialize net inuse counters");
381070ee1159SPavel Emelyanov 
381170ee1159SPavel Emelyanov 	return 0;
381270ee1159SPavel Emelyanov }
381370ee1159SPavel Emelyanov 
381470ee1159SPavel Emelyanov core_initcall(net_inuse_init);
381513ff3d6fSPavel Emelyanov 
assign_proto_idx(struct proto * prot)3816b45ce321Szhanglin static int assign_proto_idx(struct proto *prot)
381713ff3d6fSPavel Emelyanov {
381813ff3d6fSPavel Emelyanov 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
381913ff3d6fSPavel Emelyanov 
382013ff3d6fSPavel Emelyanov 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3821e005d193SJoe Perches 		pr_err("PROTO_INUSE_NR exhausted\n");
3822b45ce321Szhanglin 		return -ENOSPC;
382313ff3d6fSPavel Emelyanov 	}
382413ff3d6fSPavel Emelyanov 
382513ff3d6fSPavel Emelyanov 	set_bit(prot->inuse_idx, proto_inuse_idx);
3826b45ce321Szhanglin 	return 0;
382713ff3d6fSPavel Emelyanov }
382813ff3d6fSPavel Emelyanov 
release_proto_idx(struct proto * prot)382913ff3d6fSPavel Emelyanov static void release_proto_idx(struct proto *prot)
383013ff3d6fSPavel Emelyanov {
383113ff3d6fSPavel Emelyanov 	if (prot->inuse_idx != PROTO_INUSE_NR - 1)
383213ff3d6fSPavel Emelyanov 		clear_bit(prot->inuse_idx, proto_inuse_idx);
383313ff3d6fSPavel Emelyanov }
383413ff3d6fSPavel Emelyanov #else
assign_proto_idx(struct proto * prot)3835b45ce321Szhanglin static inline int assign_proto_idx(struct proto *prot)
383613ff3d6fSPavel Emelyanov {
3837b45ce321Szhanglin 	return 0;
383813ff3d6fSPavel Emelyanov }
383913ff3d6fSPavel Emelyanov 
release_proto_idx(struct proto * prot)384013ff3d6fSPavel Emelyanov static inline void release_proto_idx(struct proto *prot)
384113ff3d6fSPavel Emelyanov {
384213ff3d6fSPavel Emelyanov }
3843648845abSTonghao Zhang 
384413ff3d6fSPavel Emelyanov #endif
384513ff3d6fSPavel Emelyanov 
tw_prot_cleanup(struct timewait_sock_ops * twsk_prot)38460f5907afSMiaohe Lin static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
38470f5907afSMiaohe Lin {
38480f5907afSMiaohe Lin 	if (!twsk_prot)
38490f5907afSMiaohe Lin 		return;
38500f5907afSMiaohe Lin 	kfree(twsk_prot->twsk_slab_name);
38510f5907afSMiaohe Lin 	twsk_prot->twsk_slab_name = NULL;
38520f5907afSMiaohe Lin 	kmem_cache_destroy(twsk_prot->twsk_slab);
38530f5907afSMiaohe Lin 	twsk_prot->twsk_slab = NULL;
38540f5907afSMiaohe Lin }
38550f5907afSMiaohe Lin 
tw_prot_init(const struct proto * prot)3856b80350f3STonghao Zhang static int tw_prot_init(const struct proto *prot)
3857b80350f3STonghao Zhang {
3858b80350f3STonghao Zhang 	struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
3859b80350f3STonghao Zhang 
3860b80350f3STonghao Zhang 	if (!twsk_prot)
3861b80350f3STonghao Zhang 		return 0;
3862b80350f3STonghao Zhang 
3863b80350f3STonghao Zhang 	twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
3864b80350f3STonghao Zhang 					      prot->name);
3865b80350f3STonghao Zhang 	if (!twsk_prot->twsk_slab_name)
3866b80350f3STonghao Zhang 		return -ENOMEM;
3867b80350f3STonghao Zhang 
3868b80350f3STonghao Zhang 	twsk_prot->twsk_slab =
3869b80350f3STonghao Zhang 		kmem_cache_create(twsk_prot->twsk_slab_name,
3870b80350f3STonghao Zhang 				  twsk_prot->twsk_obj_size, 0,
3871b80350f3STonghao Zhang 				  SLAB_ACCOUNT | prot->slab_flags,
3872b80350f3STonghao Zhang 				  NULL);
3873b80350f3STonghao Zhang 	if (!twsk_prot->twsk_slab) {
3874b80350f3STonghao Zhang 		pr_crit("%s: Can't create timewait sock SLAB cache!\n",
3875b80350f3STonghao Zhang 			prot->name);
3876b80350f3STonghao Zhang 		return -ENOMEM;
3877b80350f3STonghao Zhang 	}
3878b80350f3STonghao Zhang 
3879b80350f3STonghao Zhang 	return 0;
3880b80350f3STonghao Zhang }
3881b80350f3STonghao Zhang 
req_prot_cleanup(struct request_sock_ops * rsk_prot)38820159dfd3SEric Dumazet static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
38830159dfd3SEric Dumazet {
38840159dfd3SEric Dumazet 	if (!rsk_prot)
38850159dfd3SEric Dumazet 		return;
38860159dfd3SEric Dumazet 	kfree(rsk_prot->slab_name);
38870159dfd3SEric Dumazet 	rsk_prot->slab_name = NULL;
38880159dfd3SEric Dumazet 	kmem_cache_destroy(rsk_prot->slab);
38890159dfd3SEric Dumazet 	rsk_prot->slab = NULL;
38900159dfd3SEric Dumazet }
38910159dfd3SEric Dumazet 
req_prot_init(const struct proto * prot)38920159dfd3SEric Dumazet static int req_prot_init(const struct proto *prot)
38930159dfd3SEric Dumazet {
38940159dfd3SEric Dumazet 	struct request_sock_ops *rsk_prot = prot->rsk_prot;
38950159dfd3SEric Dumazet 
38960159dfd3SEric Dumazet 	if (!rsk_prot)
38970159dfd3SEric Dumazet 		return 0;
38980159dfd3SEric Dumazet 
38990159dfd3SEric Dumazet 	rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
39000159dfd3SEric Dumazet 					prot->name);
39010159dfd3SEric Dumazet 	if (!rsk_prot->slab_name)
39020159dfd3SEric Dumazet 		return -ENOMEM;
39030159dfd3SEric Dumazet 
39040159dfd3SEric Dumazet 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
39050159dfd3SEric Dumazet 					   rsk_prot->obj_size, 0,
3906e699e2c6SShakeel Butt 					   SLAB_ACCOUNT | prot->slab_flags,
3907e699e2c6SShakeel Butt 					   NULL);
39080159dfd3SEric Dumazet 
39090159dfd3SEric Dumazet 	if (!rsk_prot->slab) {
39100159dfd3SEric Dumazet 		pr_crit("%s: Can't create request sock SLAB cache!\n",
39110159dfd3SEric Dumazet 			prot->name);
39120159dfd3SEric Dumazet 		return -ENOMEM;
39130159dfd3SEric Dumazet 	}
39140159dfd3SEric Dumazet 	return 0;
39150159dfd3SEric Dumazet }
39160159dfd3SEric Dumazet 
proto_register(struct proto * prot,int alloc_slab)39171da177e4SLinus Torvalds int proto_register(struct proto *prot, int alloc_slab)
39181da177e4SLinus Torvalds {
3919b45ce321Szhanglin 	int ret = -ENOBUFS;
3920b45ce321Szhanglin 
3921f20cfd66SEric Dumazet 	if (prot->memory_allocated && !prot->sysctl_mem) {
3922f20cfd66SEric Dumazet 		pr_err("%s: missing sysctl_mem\n", prot->name);
3923f20cfd66SEric Dumazet 		return -EINVAL;
3924f20cfd66SEric Dumazet 	}
39250defbb0aSEric Dumazet 	if (prot->memory_allocated && !prot->per_cpu_fw_alloc) {
39260defbb0aSEric Dumazet 		pr_err("%s: missing per_cpu_fw_alloc\n", prot->name);
39270defbb0aSEric Dumazet 		return -EINVAL;
39280defbb0aSEric Dumazet 	}
39291da177e4SLinus Torvalds 	if (alloc_slab) {
393030c2c9f1SDavid Windsor 		prot->slab = kmem_cache_create_usercopy(prot->name,
393130c2c9f1SDavid Windsor 					prot->obj_size, 0,
3932e699e2c6SShakeel Butt 					SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3933e699e2c6SShakeel Butt 					prot->slab_flags,
3934289a4860SKees Cook 					prot->useroffset, prot->usersize,
3935271b72c7SEric Dumazet 					NULL);
39361da177e4SLinus Torvalds 
39371da177e4SLinus Torvalds 		if (prot->slab == NULL) {
3938e005d193SJoe Perches 			pr_crit("%s: Can't create sock SLAB cache!\n",
39391da177e4SLinus Torvalds 				prot->name);
394060e7663dSPavel Emelyanov 			goto out;
39411da177e4SLinus Torvalds 		}
39422e6599cbSArnaldo Carvalho de Melo 
39430159dfd3SEric Dumazet 		if (req_prot_init(prot))
39440159dfd3SEric Dumazet 			goto out_free_request_sock_slab;
39458feaf0c0SArnaldo Carvalho de Melo 
3946b80350f3STonghao Zhang 		if (tw_prot_init(prot))
39470f5907afSMiaohe Lin 			goto out_free_timewait_sock_slab;
39488feaf0c0SArnaldo Carvalho de Melo 	}
39491da177e4SLinus Torvalds 
395036b77a52SGlauber Costa 	mutex_lock(&proto_list_mutex);
3951b45ce321Szhanglin 	ret = assign_proto_idx(prot);
3952b45ce321Szhanglin 	if (ret) {
395336b77a52SGlauber Costa 		mutex_unlock(&proto_list_mutex);
39540f5907afSMiaohe Lin 		goto out_free_timewait_sock_slab;
3955b45ce321Szhanglin 	}
3956b45ce321Szhanglin 	list_add(&prot->node, &proto_list);
3957b45ce321Szhanglin 	mutex_unlock(&proto_list_mutex);
3958b45ce321Szhanglin 	return ret;
3959b733c007SPavel Emelyanov 
39600f5907afSMiaohe Lin out_free_timewait_sock_slab:
3961ed744d81STonghao Zhang 	if (alloc_slab)
39620f5907afSMiaohe Lin 		tw_prot_cleanup(prot->twsk_prot);
39638feaf0c0SArnaldo Carvalho de Melo out_free_request_sock_slab:
3964b45ce321Szhanglin 	if (alloc_slab) {
39650159dfd3SEric Dumazet 		req_prot_cleanup(prot->rsk_prot);
39660159dfd3SEric Dumazet 
39672e6599cbSArnaldo Carvalho de Melo 		kmem_cache_destroy(prot->slab);
39682e6599cbSArnaldo Carvalho de Melo 		prot->slab = NULL;
3969b45ce321Szhanglin 	}
3970b733c007SPavel Emelyanov out:
3971b45ce321Szhanglin 	return ret;
39721da177e4SLinus Torvalds }
39731da177e4SLinus Torvalds EXPORT_SYMBOL(proto_register);
39741da177e4SLinus Torvalds 
proto_unregister(struct proto * prot)39751da177e4SLinus Torvalds void proto_unregister(struct proto *prot)
39761da177e4SLinus Torvalds {
397736b77a52SGlauber Costa 	mutex_lock(&proto_list_mutex);
397813ff3d6fSPavel Emelyanov 	release_proto_idx(prot);
39790a3f4358SPatrick McHardy 	list_del(&prot->node);
398036b77a52SGlauber Costa 	mutex_unlock(&proto_list_mutex);
39811da177e4SLinus Torvalds 
39821da177e4SLinus Torvalds 	kmem_cache_destroy(prot->slab);
39831da177e4SLinus Torvalds 	prot->slab = NULL;
39841da177e4SLinus Torvalds 
39850159dfd3SEric Dumazet 	req_prot_cleanup(prot->rsk_prot);
39860f5907afSMiaohe Lin 	tw_prot_cleanup(prot->twsk_prot);
39871da177e4SLinus Torvalds }
39881da177e4SLinus Torvalds EXPORT_SYMBOL(proto_unregister);
39891da177e4SLinus Torvalds 
sock_load_diag_module(int family,int protocol)3990bf2ae2e4SXin Long int sock_load_diag_module(int family, int protocol)
3991bf2ae2e4SXin Long {
3992bf2ae2e4SXin Long 	if (!protocol) {
3993bf2ae2e4SXin Long 		if (!sock_is_registered(family))
3994bf2ae2e4SXin Long 			return -ENOENT;
3995bf2ae2e4SXin Long 
3996bf2ae2e4SXin Long 		return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3997bf2ae2e4SXin Long 				      NETLINK_SOCK_DIAG, family);
3998bf2ae2e4SXin Long 	}
3999bf2ae2e4SXin Long 
4000bf2ae2e4SXin Long #ifdef CONFIG_INET
4001bf2ae2e4SXin Long 	if (family == AF_INET &&
4002c34c1287SAndrei Vagin 	    protocol != IPPROTO_RAW &&
40033f935c75SPaolo Abeni 	    protocol < MAX_INET_PROTOS &&
4004bf2ae2e4SXin Long 	    !rcu_access_pointer(inet_protos[protocol]))
4005bf2ae2e4SXin Long 		return -ENOENT;
4006bf2ae2e4SXin Long #endif
4007bf2ae2e4SXin Long 
4008bf2ae2e4SXin Long 	return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
4009bf2ae2e4SXin Long 			      NETLINK_SOCK_DIAG, family, protocol);
4010bf2ae2e4SXin Long }
4011bf2ae2e4SXin Long EXPORT_SYMBOL(sock_load_diag_module);
4012bf2ae2e4SXin Long 
40131da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
proto_seq_start(struct seq_file * seq,loff_t * pos)40141da177e4SLinus Torvalds static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
401536b77a52SGlauber Costa 	__acquires(proto_list_mutex)
40161da177e4SLinus Torvalds {
401736b77a52SGlauber Costa 	mutex_lock(&proto_list_mutex);
401860f0438aSPavel Emelianov 	return seq_list_start_head(&proto_list, *pos);
40191da177e4SLinus Torvalds }
40201da177e4SLinus Torvalds 
proto_seq_next(struct seq_file * seq,void * v,loff_t * pos)40211da177e4SLinus Torvalds static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
40221da177e4SLinus Torvalds {
402360f0438aSPavel Emelianov 	return seq_list_next(v, &proto_list, pos);
40241da177e4SLinus Torvalds }
40251da177e4SLinus Torvalds 
proto_seq_stop(struct seq_file * seq,void * v)40261da177e4SLinus Torvalds static void proto_seq_stop(struct seq_file *seq, void *v)
402736b77a52SGlauber Costa 	__releases(proto_list_mutex)
40281da177e4SLinus Torvalds {
402936b77a52SGlauber Costa 	mutex_unlock(&proto_list_mutex);
40301da177e4SLinus Torvalds }
40311da177e4SLinus Torvalds 
proto_method_implemented(const void * method)40321da177e4SLinus Torvalds static char proto_method_implemented(const void *method)
40331da177e4SLinus Torvalds {
40341da177e4SLinus Torvalds 	return method == NULL ? 'n' : 'y';
40351da177e4SLinus Torvalds }
sock_prot_memory_allocated(struct proto * proto)4036180d8cd9SGlauber Costa static long sock_prot_memory_allocated(struct proto *proto)
4037180d8cd9SGlauber Costa {
4038180d8cd9SGlauber Costa 	return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
4039180d8cd9SGlauber Costa }
4040180d8cd9SGlauber Costa 
sock_prot_memory_pressure(struct proto * proto)40417a512eb8SAlexey Dobriyan static const char *sock_prot_memory_pressure(struct proto *proto)
4042180d8cd9SGlauber Costa {
4043180d8cd9SGlauber Costa 	return proto->memory_pressure != NULL ?
4044180d8cd9SGlauber Costa 	proto_memory_pressure(proto) ? "yes" : "no" : "NI";
4045180d8cd9SGlauber Costa }
40461da177e4SLinus Torvalds 
proto_seq_printf(struct seq_file * seq,struct proto * proto)40471da177e4SLinus Torvalds static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
40481da177e4SLinus Torvalds {
4049180d8cd9SGlauber Costa 
40508d987e5cSEric Dumazet 	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
4051dc97391eSDavid Howells 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
40521da177e4SLinus Torvalds 		   proto->name,
40531da177e4SLinus Torvalds 		   proto->obj_size,
405414e943dbSEric Dumazet 		   sock_prot_inuse_get(seq_file_net(seq), proto),
4055180d8cd9SGlauber Costa 		   sock_prot_memory_allocated(proto),
4056180d8cd9SGlauber Costa 		   sock_prot_memory_pressure(proto),
40571da177e4SLinus Torvalds 		   proto->max_header,
40581da177e4SLinus Torvalds 		   proto->slab == NULL ? "no" : "yes",
40591da177e4SLinus Torvalds 		   module_name(proto->owner),
40601da177e4SLinus Torvalds 		   proto_method_implemented(proto->close),
40611da177e4SLinus Torvalds 		   proto_method_implemented(proto->connect),
40621da177e4SLinus Torvalds 		   proto_method_implemented(proto->disconnect),
40631da177e4SLinus Torvalds 		   proto_method_implemented(proto->accept),
40641da177e4SLinus Torvalds 		   proto_method_implemented(proto->ioctl),
40651da177e4SLinus Torvalds 		   proto_method_implemented(proto->init),
40661da177e4SLinus Torvalds 		   proto_method_implemented(proto->destroy),
40671da177e4SLinus Torvalds 		   proto_method_implemented(proto->shutdown),
40681da177e4SLinus Torvalds 		   proto_method_implemented(proto->setsockopt),
40691da177e4SLinus Torvalds 		   proto_method_implemented(proto->getsockopt),
40701da177e4SLinus Torvalds 		   proto_method_implemented(proto->sendmsg),
40711da177e4SLinus Torvalds 		   proto_method_implemented(proto->recvmsg),
40721da177e4SLinus Torvalds 		   proto_method_implemented(proto->bind),
40731da177e4SLinus Torvalds 		   proto_method_implemented(proto->backlog_rcv),
40741da177e4SLinus Torvalds 		   proto_method_implemented(proto->hash),
40751da177e4SLinus Torvalds 		   proto_method_implemented(proto->unhash),
40761da177e4SLinus Torvalds 		   proto_method_implemented(proto->get_port),
40771da177e4SLinus Torvalds 		   proto_method_implemented(proto->enter_memory_pressure));
40781da177e4SLinus Torvalds }
40791da177e4SLinus Torvalds 
proto_seq_show(struct seq_file * seq,void * v)40801da177e4SLinus Torvalds static int proto_seq_show(struct seq_file *seq, void *v)
40811da177e4SLinus Torvalds {
408260f0438aSPavel Emelianov 	if (v == &proto_list)
40831da177e4SLinus Torvalds 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
40841da177e4SLinus Torvalds 			   "protocol",
40851da177e4SLinus Torvalds 			   "size",
40861da177e4SLinus Torvalds 			   "sockets",
40871da177e4SLinus Torvalds 			   "memory",
40881da177e4SLinus Torvalds 			   "press",
40891da177e4SLinus Torvalds 			   "maxhdr",
40901da177e4SLinus Torvalds 			   "slab",
40911da177e4SLinus Torvalds 			   "module",
4092dc97391eSDavid Howells 			   "cl co di ac io in de sh ss gs se re bi br ha uh gp em\n");
40931da177e4SLinus Torvalds 	else
409460f0438aSPavel Emelianov 		proto_seq_printf(seq, list_entry(v, struct proto, node));
40951da177e4SLinus Torvalds 	return 0;
40961da177e4SLinus Torvalds }
40971da177e4SLinus Torvalds 
4098f690808eSStephen Hemminger static const struct seq_operations proto_seq_ops = {
40991da177e4SLinus Torvalds 	.start  = proto_seq_start,
41001da177e4SLinus Torvalds 	.next   = proto_seq_next,
41011da177e4SLinus Torvalds 	.stop   = proto_seq_stop,
41021da177e4SLinus Torvalds 	.show   = proto_seq_show,
41031da177e4SLinus Torvalds };
41041da177e4SLinus Torvalds 
proto_init_net(struct net * net)410514e943dbSEric Dumazet static __net_init int proto_init_net(struct net *net)
410614e943dbSEric Dumazet {
4107c3506372SChristoph Hellwig 	if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
4108c3506372SChristoph Hellwig 			sizeof(struct seq_net_private)))
410914e943dbSEric Dumazet 		return -ENOMEM;
411014e943dbSEric Dumazet 
411114e943dbSEric Dumazet 	return 0;
411214e943dbSEric Dumazet }
411314e943dbSEric Dumazet 
proto_exit_net(struct net * net)411414e943dbSEric Dumazet static __net_exit void proto_exit_net(struct net *net)
411514e943dbSEric Dumazet {
4116ece31ffdSGao feng 	remove_proc_entry("protocols", net->proc_net);
411714e943dbSEric Dumazet }
411814e943dbSEric Dumazet 
411914e943dbSEric Dumazet 
412014e943dbSEric Dumazet static __net_initdata struct pernet_operations proto_net_ops = {
412114e943dbSEric Dumazet 	.init = proto_init_net,
412214e943dbSEric Dumazet 	.exit = proto_exit_net,
41231da177e4SLinus Torvalds };
41241da177e4SLinus Torvalds 
proto_init(void)41251da177e4SLinus Torvalds static int __init proto_init(void)
41261da177e4SLinus Torvalds {
412714e943dbSEric Dumazet 	return register_pernet_subsys(&proto_net_ops);
41281da177e4SLinus Torvalds }
41291da177e4SLinus Torvalds 
41301da177e4SLinus Torvalds subsys_initcall(proto_init);
41311da177e4SLinus Torvalds 
41321da177e4SLinus Torvalds #endif /* PROC_FS */
41337db6b048SSridhar Samudrala 
41347db6b048SSridhar Samudrala #ifdef CONFIG_NET_RX_BUSY_POLL
sk_busy_loop_end(void * p,unsigned long start_time)41357db6b048SSridhar Samudrala bool sk_busy_loop_end(void *p, unsigned long start_time)
41367db6b048SSridhar Samudrala {
41377db6b048SSridhar Samudrala 	struct sock *sk = p;
41387db6b048SSridhar Samudrala 
4139ef8ad307SEric Dumazet 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
4140ef8ad307SEric Dumazet 		return true;
4141ef8ad307SEric Dumazet 
4142ef8ad307SEric Dumazet 	if (sk_is_udp(sk) &&
4143ef8ad307SEric Dumazet 	    !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
4144ef8ad307SEric Dumazet 		return true;
4145ef8ad307SEric Dumazet 
4146ef8ad307SEric Dumazet 	return sk_busy_loop_timeout(sk, start_time);
41477db6b048SSridhar Samudrala }
41487db6b048SSridhar Samudrala EXPORT_SYMBOL(sk_busy_loop_end);
41497db6b048SSridhar Samudrala #endif /* CONFIG_NET_RX_BUSY_POLL */
4150c0425a42SChristoph Hellwig 
sock_bind_add(struct sock * sk,struct sockaddr * addr,int addr_len)4151c0425a42SChristoph Hellwig int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
4152c0425a42SChristoph Hellwig {
4153c0425a42SChristoph Hellwig 	if (!sk->sk_prot->bind_add)
4154c0425a42SChristoph Hellwig 		return -EOPNOTSUPP;
4155c0425a42SChristoph Hellwig 	return sk->sk_prot->bind_add(sk, addr, addr_len);
4156c0425a42SChristoph Hellwig }
4157c0425a42SChristoph Hellwig EXPORT_SYMBOL(sock_bind_add);
4158e1d001faSBreno Leitao 
4159e1d001faSBreno Leitao /* Copy 'size' bytes from userspace and return `size` back to userspace */
sock_ioctl_inout(struct sock * sk,unsigned int cmd,void __user * arg,void * karg,size_t size)4160e1d001faSBreno Leitao int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
4161e1d001faSBreno Leitao 		     void __user *arg, void *karg, size_t size)
4162e1d001faSBreno Leitao {
4163e1d001faSBreno Leitao 	int ret;
4164e1d001faSBreno Leitao 
4165e1d001faSBreno Leitao 	if (copy_from_user(karg, arg, size))
4166e1d001faSBreno Leitao 		return -EFAULT;
4167e1d001faSBreno Leitao 
4168e1d001faSBreno Leitao 	ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
4169e1d001faSBreno Leitao 	if (ret)
4170e1d001faSBreno Leitao 		return ret;
4171e1d001faSBreno Leitao 
4172e1d001faSBreno Leitao 	if (copy_to_user(arg, karg, size))
4173e1d001faSBreno Leitao 		return -EFAULT;
4174e1d001faSBreno Leitao 
4175e1d001faSBreno Leitao 	return 0;
4176e1d001faSBreno Leitao }
4177e1d001faSBreno Leitao EXPORT_SYMBOL(sock_ioctl_inout);
4178e1d001faSBreno Leitao 
4179e1d001faSBreno Leitao /* This is the most common ioctl prep function, where the result (4 bytes) is
4180e1d001faSBreno Leitao  * copied back to userspace if the ioctl() returns successfully. No input is
4181e1d001faSBreno Leitao  * copied from userspace as input argument.
4182e1d001faSBreno Leitao  */
sock_ioctl_out(struct sock * sk,unsigned int cmd,void __user * arg)4183e1d001faSBreno Leitao static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg)
4184e1d001faSBreno Leitao {
4185e1d001faSBreno Leitao 	int ret, karg = 0;
4186e1d001faSBreno Leitao 
4187e1d001faSBreno Leitao 	ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
4188e1d001faSBreno Leitao 	if (ret)
4189e1d001faSBreno Leitao 		return ret;
4190e1d001faSBreno Leitao 
4191e1d001faSBreno Leitao 	return put_user(karg, (int __user *)arg);
4192e1d001faSBreno Leitao }
4193e1d001faSBreno Leitao 
4194e1d001faSBreno Leitao /* A wrapper around sock ioctls, which copies the data from userspace
4195e1d001faSBreno Leitao  * (depending on the protocol/ioctl), and copies back the result to userspace.
4196e1d001faSBreno Leitao  * The main motivation for this function is to pass kernel memory to the
4197e1d001faSBreno Leitao  * protocol ioctl callbacks, instead of userspace memory.
4198e1d001faSBreno Leitao  */
sk_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)4199e1d001faSBreno Leitao int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
4200e1d001faSBreno Leitao {
4201e1d001faSBreno Leitao 	int rc = 1;
4202e1d001faSBreno Leitao 
4203634236b3SEric Dumazet 	if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET)
4204e1d001faSBreno Leitao 		rc = ipmr_sk_ioctl(sk, cmd, arg);
4205634236b3SEric Dumazet 	else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6)
4206e1d001faSBreno Leitao 		rc = ip6mr_sk_ioctl(sk, cmd, arg);
4207e1d001faSBreno Leitao 	else if (sk_is_phonet(sk))
4208e1d001faSBreno Leitao 		rc = phonet_sk_ioctl(sk, cmd, arg);
4209e1d001faSBreno Leitao 
4210e1d001faSBreno Leitao 	/* If ioctl was processed, returns its value */
4211e1d001faSBreno Leitao 	if (rc <= 0)
4212e1d001faSBreno Leitao 		return rc;
4213e1d001faSBreno Leitao 
4214e1d001faSBreno Leitao 	/* Otherwise call the default handler */
4215e1d001faSBreno Leitao 	return sock_ioctl_out(sk, cmd, arg);
4216e1d001faSBreno Leitao }
4217e1d001faSBreno Leitao EXPORT_SYMBOL(sk_ioctl);
4218