xref: /openbmc/linux/net/core/sock.c (revision 845d1799)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
41da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
51da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  *		Generic socket support routines. Memory allocators, socket lock/release
81da177e4SLinus Torvalds  *		handler for protocols to use and generic option handler.
91da177e4SLinus Torvalds  *
1002c30a84SJesper Juhl  * Authors:	Ross Biro
111da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
121da177e4SLinus Torvalds  *		Florian La Roche, <flla@stud.uni-sb.de>
131da177e4SLinus Torvalds  *		Alan Cox, <A.Cox@swansea.ac.uk>
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  * Fixes:
161da177e4SLinus Torvalds  *		Alan Cox	: 	Numerous verify_area() problems
171da177e4SLinus Torvalds  *		Alan Cox	:	Connecting on a connecting socket
181da177e4SLinus Torvalds  *					now returns an error for tcp.
191da177e4SLinus Torvalds  *		Alan Cox	:	sock->protocol is set correctly.
201da177e4SLinus Torvalds  *					and is not sometimes left as 0.
211da177e4SLinus Torvalds  *		Alan Cox	:	connect handles icmp errors on a
221da177e4SLinus Torvalds  *					connect properly. Unfortunately there
231da177e4SLinus Torvalds  *					is a restart syscall nasty there. I
241da177e4SLinus Torvalds  *					can't match BSD without hacking the C
251da177e4SLinus Torvalds  *					library. Ideas urgently sought!
261da177e4SLinus Torvalds  *		Alan Cox	:	Disallow bind() to addresses that are
271da177e4SLinus Torvalds  *					not ours - especially broadcast ones!!
281da177e4SLinus Torvalds  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
291da177e4SLinus Torvalds  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
301da177e4SLinus Torvalds  *					instead they leave that for the DESTROY timer.
311da177e4SLinus Torvalds  *		Alan Cox	:	Clean up error flag in accept
321da177e4SLinus Torvalds  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
331da177e4SLinus Torvalds  *					was buggy. Put a remove_sock() in the handler
341da177e4SLinus Torvalds  *					for memory when we hit 0. Also altered the timer
351da177e4SLinus Torvalds  *					code. The ACK stuff can wait and needs major
361da177e4SLinus Torvalds  *					TCP layer surgery.
371da177e4SLinus Torvalds  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
381da177e4SLinus Torvalds  *					and fixed timer/inet_bh race.
391da177e4SLinus Torvalds  *		Alan Cox	:	Added zapped flag for TCP
401da177e4SLinus Torvalds  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
411da177e4SLinus Torvalds  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
421da177e4SLinus Torvalds  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
431da177e4SLinus Torvalds  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
441da177e4SLinus Torvalds  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
451da177e4SLinus Torvalds  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
461da177e4SLinus Torvalds  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
471da177e4SLinus Torvalds  *	Pauline Middelink	:	identd support
481da177e4SLinus Torvalds  *		Alan Cox	:	Fixed connect() taking signals I think.
491da177e4SLinus Torvalds  *		Alan Cox	:	SO_LINGER supported
501da177e4SLinus Torvalds  *		Alan Cox	:	Error reporting fixes
511da177e4SLinus Torvalds  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
521da177e4SLinus Torvalds  *		Alan Cox	:	inet sockets don't set sk->type!
531da177e4SLinus Torvalds  *		Alan Cox	:	Split socket option code
541da177e4SLinus Torvalds  *		Alan Cox	:	Callbacks
551da177e4SLinus Torvalds  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
561da177e4SLinus Torvalds  *		Alex		:	Removed restriction on inet fioctl
571da177e4SLinus Torvalds  *		Alan Cox	:	Splitting INET from NET core
581da177e4SLinus Torvalds  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
591da177e4SLinus Torvalds  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
601da177e4SLinus Torvalds  *		Alan Cox	:	Split IP from generic code
611da177e4SLinus Torvalds  *		Alan Cox	:	New kfree_skbmem()
621da177e4SLinus Torvalds  *		Alan Cox	:	Make SO_DEBUG superuser only.
631da177e4SLinus Torvalds  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
641da177e4SLinus Torvalds  *					(compatibility fix)
651da177e4SLinus Torvalds  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
661da177e4SLinus Torvalds  *		Alan Cox	:	Allocator for a socket is settable.
671da177e4SLinus Torvalds  *		Alan Cox	:	SO_ERROR includes soft errors.
681da177e4SLinus Torvalds  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
691da177e4SLinus Torvalds  *		Alan Cox	: 	Generic socket allocation to make hooks
701da177e4SLinus Torvalds  *					easier (suggested by Craig Metz).
711da177e4SLinus Torvalds  *		Michael Pall	:	SO_ERROR returns positive errno again
721da177e4SLinus Torvalds  *              Steve Whitehouse:       Added default destructor to free
731da177e4SLinus Torvalds  *                                      protocol private data.
741da177e4SLinus Torvalds  *              Steve Whitehouse:       Added various other default routines
751da177e4SLinus Torvalds  *                                      common to several socket families.
761da177e4SLinus Torvalds  *              Chris Evans     :       Call suser() check last on F_SETOWN
771da177e4SLinus Torvalds  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
781da177e4SLinus Torvalds  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
791da177e4SLinus Torvalds  *		Andi Kleen	:	Fix write_space callback
801da177e4SLinus Torvalds  *		Chris Evans	:	Security fixes - signedness again
811da177e4SLinus Torvalds  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
821da177e4SLinus Torvalds  *
831da177e4SLinus Torvalds  * To Fix:
841da177e4SLinus Torvalds  */
851da177e4SLinus Torvalds 
86e005d193SJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
87e005d193SJoe Perches 
8880b14deeSRichard Cochran #include <asm/unaligned.h>
894fc268d2SRandy Dunlap #include <linux/capability.h>
901da177e4SLinus Torvalds #include <linux/errno.h>
91cb820f8eSRichard Cochran #include <linux/errqueue.h>
921da177e4SLinus Torvalds #include <linux/types.h>
931da177e4SLinus Torvalds #include <linux/socket.h>
941da177e4SLinus Torvalds #include <linux/in.h>
951da177e4SLinus Torvalds #include <linux/kernel.h>
961da177e4SLinus Torvalds #include <linux/module.h>
971da177e4SLinus Torvalds #include <linux/proc_fs.h>
981da177e4SLinus Torvalds #include <linux/seq_file.h>
991da177e4SLinus Torvalds #include <linux/sched.h>
100f1083048SVlastimil Babka #include <linux/sched/mm.h>
1011da177e4SLinus Torvalds #include <linux/timer.h>
1021da177e4SLinus Torvalds #include <linux/string.h>
1031da177e4SLinus Torvalds #include <linux/sockios.h>
1041da177e4SLinus Torvalds #include <linux/net.h>
1051da177e4SLinus Torvalds #include <linux/mm.h>
1061da177e4SLinus Torvalds #include <linux/slab.h>
1071da177e4SLinus Torvalds #include <linux/interrupt.h>
1081da177e4SLinus Torvalds #include <linux/poll.h>
1091da177e4SLinus Torvalds #include <linux/tcp.h>
110ef8ad307SEric Dumazet #include <linux/udp.h>
1111da177e4SLinus Torvalds #include <linux/init.h>
112a1f8e7f7SAl Viro #include <linux/highmem.h>
1133f551f94SEric W. Biederman #include <linux/user_namespace.h>
114c5905afbSIngo Molnar #include <linux/static_key.h>
1153969eb38SDavid S. Miller #include <linux/memcontrol.h>
1168c1ae10dSDavid S. Miller #include <linux/prefetch.h>
117a6c0d093SChristoph Hellwig #include <linux/compat.h>
118e1d001faSBreno Leitao #include <linux/mroute.h>
119e1d001faSBreno Leitao #include <linux/mroute6.h>
120e1d001faSBreno Leitao #include <linux/icmpv6.h>
1211da177e4SLinus Torvalds 
1227c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
1231da177e4SLinus Torvalds 
1241da177e4SLinus Torvalds #include <linux/netdevice.h>
1251da177e4SLinus Torvalds #include <net/protocol.h>
1261da177e4SLinus Torvalds #include <linux/skbuff.h>
127457c4cbcSEric W. Biederman #include <net/net_namespace.h>
1282e6599cbSArnaldo Carvalho de Melo #include <net/request_sock.h>
1291da177e4SLinus Torvalds #include <net/sock.h>
13020d49473SPatrick Ohly #include <linux/net_tstamp.h>
1311da177e4SLinus Torvalds #include <net/xfrm.h>
1321da177e4SLinus Torvalds #include <linux/ipsec.h>
133f8451725SHerbert Xu #include <net/cls_cgroup.h>
1345bc1421eSNeil Horman #include <net/netprio_cgroup.h>
135eb4cb008SCraig Gallek #include <linux/sock_diag.h>
1361da177e4SLinus Torvalds 
1371da177e4SLinus Torvalds #include <linux/filter.h>
138538950a1SCraig Gallek #include <net/sock_reuseport.h>
1396ac99e8fSMartin KaFai Lau #include <net/bpf_sk_storage.h>
1401da177e4SLinus Torvalds 
1413847ce32SSatoru Moriya #include <trace/events/sock.h>
1423847ce32SSatoru Moriya 
1431da177e4SLinus Torvalds #include <net/tcp.h>
144076bb0c8SEliezer Tamir #include <net/busy_poll.h>
145e1d001faSBreno Leitao #include <net/phonet/phonet.h>
14606021292SEliezer Tamir 
147d463126eSYangbo Lu #include <linux/ethtool.h>
148d463126eSYangbo Lu 
1496264f58cSJakub Kicinski #include "dev.h"
1506264f58cSJakub Kicinski 
15136b77a52SGlauber Costa static DEFINE_MUTEX(proto_list_mutex);
152d1a4c0b3SGlauber Costa static LIST_HEAD(proto_list);
153d1a4c0b3SGlauber Costa 
1540a8afd9fSPavel Begunkov static void sock_def_write_space_wfree(struct sock *sk);
155052ada09SPavel Begunkov static void sock_def_write_space(struct sock *sk);
156052ada09SPavel Begunkov 
157a3b299daSEric W. Biederman /**
158a3b299daSEric W. Biederman  * sk_ns_capable - General socket capability test
159a3b299daSEric W. Biederman  * @sk: Socket to use a capability on or through
160a3b299daSEric W. Biederman  * @user_ns: The user namespace of the capability to use
161a3b299daSEric W. Biederman  * @cap: The capability to use
162a3b299daSEric W. Biederman  *
163a3b299daSEric W. Biederman  * Test to see if the opener of the socket had when the socket was
164a3b299daSEric W. Biederman  * created and the current process has the capability @cap in the user
165a3b299daSEric W. Biederman  * namespace @user_ns.
166a3b299daSEric W. Biederman  */
sk_ns_capable(const struct sock * sk,struct user_namespace * user_ns,int cap)167a3b299daSEric W. Biederman bool sk_ns_capable(const struct sock *sk,
168a3b299daSEric W. Biederman 		   struct user_namespace *user_ns, int cap)
169a3b299daSEric W. Biederman {
170a3b299daSEric W. Biederman 	return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
171a3b299daSEric W. Biederman 		ns_capable(user_ns, cap);
172a3b299daSEric W. Biederman }
173a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_ns_capable);
174a3b299daSEric W. Biederman 
175a3b299daSEric W. Biederman /**
176a3b299daSEric W. Biederman  * sk_capable - Socket global capability test
177a3b299daSEric W. Biederman  * @sk: Socket to use a capability on or through
178e793c0f7SMasanari Iida  * @cap: The global capability to use
179a3b299daSEric W. Biederman  *
180a3b299daSEric W. Biederman  * Test to see if the opener of the socket had when the socket was
181a3b299daSEric W. Biederman  * created and the current process has the capability @cap in all user
182a3b299daSEric W. Biederman  * namespaces.
183a3b299daSEric W. Biederman  */
sk_capable(const struct sock * sk,int cap)184a3b299daSEric W. Biederman bool sk_capable(const struct sock *sk, int cap)
185a3b299daSEric W. Biederman {
186a3b299daSEric W. Biederman 	return sk_ns_capable(sk, &init_user_ns, cap);
187a3b299daSEric W. Biederman }
188a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_capable);
189a3b299daSEric W. Biederman 
190a3b299daSEric W. Biederman /**
191a3b299daSEric W. Biederman  * sk_net_capable - Network namespace socket capability test
192a3b299daSEric W. Biederman  * @sk: Socket to use a capability on or through
193a3b299daSEric W. Biederman  * @cap: The capability to use
194a3b299daSEric W. Biederman  *
195e793c0f7SMasanari Iida  * Test to see if the opener of the socket had when the socket was created
196a3b299daSEric W. Biederman  * and the current process has the capability @cap over the network namespace
197a3b299daSEric W. Biederman  * the socket is a member of.
198a3b299daSEric W. Biederman  */
sk_net_capable(const struct sock * sk,int cap)199a3b299daSEric W. Biederman bool sk_net_capable(const struct sock *sk, int cap)
200a3b299daSEric W. Biederman {
201a3b299daSEric W. Biederman 	return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
202a3b299daSEric W. Biederman }
203a3b299daSEric W. Biederman EXPORT_SYMBOL(sk_net_capable);
204a3b299daSEric W. Biederman 
205da21f24dSIngo Molnar /*
206da21f24dSIngo Molnar  * Each address family might have different locking rules, so we have
207cdfbabfbSDavid Howells  * one slock key per address family and separate keys for internal and
208cdfbabfbSDavid Howells  * userspace sockets.
209da21f24dSIngo Molnar  */
210a5b5bb9aSIngo Molnar static struct lock_class_key af_family_keys[AF_MAX];
211cdfbabfbSDavid Howells static struct lock_class_key af_family_kern_keys[AF_MAX];
212a5b5bb9aSIngo Molnar static struct lock_class_key af_family_slock_keys[AF_MAX];
213cdfbabfbSDavid Howells static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
214a5b5bb9aSIngo Molnar 
215a5b5bb9aSIngo Molnar /*
216a5b5bb9aSIngo Molnar  * Make lock validator output more readable. (we pre-construct these
217a5b5bb9aSIngo Molnar  * strings build-time, so that runtime initialization of socket
218a5b5bb9aSIngo Molnar  * locks is fast):
219a5b5bb9aSIngo Molnar  */
220cdfbabfbSDavid Howells 
221cdfbabfbSDavid Howells #define _sock_locks(x)						  \
222cdfbabfbSDavid Howells   x "AF_UNSPEC",	x "AF_UNIX"     ,	x "AF_INET"     , \
223cdfbabfbSDavid Howells   x "AF_AX25"  ,	x "AF_IPX"      ,	x "AF_APPLETALK", \
224cdfbabfbSDavid Howells   x "AF_NETROM",	x "AF_BRIDGE"   ,	x "AF_ATMPVC"   , \
225cdfbabfbSDavid Howells   x "AF_X25"   ,	x "AF_INET6"    ,	x "AF_ROSE"     , \
226cdfbabfbSDavid Howells   x "AF_DECnet",	x "AF_NETBEUI"  ,	x "AF_SECURITY" , \
227cdfbabfbSDavid Howells   x "AF_KEY"   ,	x "AF_NETLINK"  ,	x "AF_PACKET"   , \
228cdfbabfbSDavid Howells   x "AF_ASH"   ,	x "AF_ECONET"   ,	x "AF_ATMSVC"   , \
229cdfbabfbSDavid Howells   x "AF_RDS"   ,	x "AF_SNA"      ,	x "AF_IRDA"     , \
230cdfbabfbSDavid Howells   x "AF_PPPOX" ,	x "AF_WANPIPE"  ,	x "AF_LLC"      , \
231cdfbabfbSDavid Howells   x "27"       ,	x "28"          ,	x "AF_CAN"      , \
232cdfbabfbSDavid Howells   x "AF_TIPC"  ,	x "AF_BLUETOOTH",	x "IUCV"        , \
233cdfbabfbSDavid Howells   x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
234cdfbabfbSDavid Howells   x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
235cdfbabfbSDavid Howells   x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
23668e8b849SBjörn Töpel   x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_XDP"	, \
237bc49d816SJeremy Kerr   x "AF_MCTP"  , \
23868e8b849SBjörn Töpel   x "AF_MAX"
239cdfbabfbSDavid Howells 
24036cbd3dcSJan Engelhardt static const char *const af_family_key_strings[AF_MAX+1] = {
241cdfbabfbSDavid Howells 	_sock_locks("sk_lock-")
242a5b5bb9aSIngo Molnar };
24336cbd3dcSJan Engelhardt static const char *const af_family_slock_key_strings[AF_MAX+1] = {
244cdfbabfbSDavid Howells 	_sock_locks("slock-")
245a5b5bb9aSIngo Molnar };
24636cbd3dcSJan Engelhardt static const char *const af_family_clock_key_strings[AF_MAX+1] = {
247cdfbabfbSDavid Howells 	_sock_locks("clock-")
248cdfbabfbSDavid Howells };
249cdfbabfbSDavid Howells 
250cdfbabfbSDavid Howells static const char *const af_family_kern_key_strings[AF_MAX+1] = {
251cdfbabfbSDavid Howells 	_sock_locks("k-sk_lock-")
252cdfbabfbSDavid Howells };
253cdfbabfbSDavid Howells static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
254cdfbabfbSDavid Howells 	_sock_locks("k-slock-")
255cdfbabfbSDavid Howells };
256cdfbabfbSDavid Howells static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
257cdfbabfbSDavid Howells 	_sock_locks("k-clock-")
258443aef0eSPeter Zijlstra };
259581319c5SPaolo Abeni static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
2606b431d50SMatthieu Baerts 	_sock_locks("rlock-")
261581319c5SPaolo Abeni };
262581319c5SPaolo Abeni static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
2636b431d50SMatthieu Baerts 	_sock_locks("wlock-")
264581319c5SPaolo Abeni };
265581319c5SPaolo Abeni static const char *const af_family_elock_key_strings[AF_MAX+1] = {
2666b431d50SMatthieu Baerts 	_sock_locks("elock-")
267581319c5SPaolo Abeni };
268da21f24dSIngo Molnar 
269da21f24dSIngo Molnar /*
270581319c5SPaolo Abeni  * sk_callback_lock and sk queues locking rules are per-address-family,
271da21f24dSIngo Molnar  * so split the lock classes by using a per-AF key:
272da21f24dSIngo Molnar  */
273da21f24dSIngo Molnar static struct lock_class_key af_callback_keys[AF_MAX];
274581319c5SPaolo Abeni static struct lock_class_key af_rlock_keys[AF_MAX];
275581319c5SPaolo Abeni static struct lock_class_key af_wlock_keys[AF_MAX];
276581319c5SPaolo Abeni static struct lock_class_key af_elock_keys[AF_MAX];
277cdfbabfbSDavid Howells static struct lock_class_key af_kern_callback_keys[AF_MAX];
278da21f24dSIngo Molnar 
2791da177e4SLinus Torvalds /* Run time adjustable parameters. */
280ab32ea5dSBrian Haley __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
2816d8ebc8aSHans Schillstrom EXPORT_SYMBOL(sysctl_wmem_max);
282ab32ea5dSBrian Haley __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
2836d8ebc8aSHans Schillstrom EXPORT_SYMBOL(sysctl_rmem_max);
284ab32ea5dSBrian Haley __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
285ab32ea5dSBrian Haley __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
286fe1e8381SAdam Li int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
2871da177e4SLinus Torvalds 
28825985edcSLucas De Marchi /* Maximal space eaten by iovec or ancillary data plus some space */
289ab32ea5dSBrian Haley int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
2902a91525cSEric Dumazet EXPORT_SYMBOL(sysctl_optmem_max);
2911da177e4SLinus Torvalds 
292b245be1fSWillem de Bruijn int sysctl_tstamp_allow_data __read_mostly = 1;
293b245be1fSWillem de Bruijn 
294a7950ae8SDavidlohr Bueso DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
295a7950ae8SDavidlohr Bueso EXPORT_SYMBOL_GPL(memalloc_socks_key);
296c93bdd0eSMel Gorman 
2977cb02404SMel Gorman /**
2987cb02404SMel Gorman  * sk_set_memalloc - sets %SOCK_MEMALLOC
2997cb02404SMel Gorman  * @sk: socket to set it on
3007cb02404SMel Gorman  *
3017cb02404SMel Gorman  * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
3027cb02404SMel Gorman  * It's the responsibility of the admin to adjust min_free_kbytes
3037cb02404SMel Gorman  * to meet the requirements
3047cb02404SMel Gorman  */
sk_set_memalloc(struct sock * sk)3057cb02404SMel Gorman void sk_set_memalloc(struct sock *sk)
3067cb02404SMel Gorman {
3077cb02404SMel Gorman 	sock_set_flag(sk, SOCK_MEMALLOC);
3087cb02404SMel Gorman 	sk->sk_allocation |= __GFP_MEMALLOC;
309a7950ae8SDavidlohr Bueso 	static_branch_inc(&memalloc_socks_key);
3107cb02404SMel Gorman }
3117cb02404SMel Gorman EXPORT_SYMBOL_GPL(sk_set_memalloc);
3127cb02404SMel Gorman 
sk_clear_memalloc(struct sock * sk)3137cb02404SMel Gorman void sk_clear_memalloc(struct sock *sk)
3147cb02404SMel Gorman {
3157cb02404SMel Gorman 	sock_reset_flag(sk, SOCK_MEMALLOC);
3167cb02404SMel Gorman 	sk->sk_allocation &= ~__GFP_MEMALLOC;
317a7950ae8SDavidlohr Bueso 	static_branch_dec(&memalloc_socks_key);
318c76562b6SMel Gorman 
319c76562b6SMel Gorman 	/*
320c76562b6SMel Gorman 	 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
3215d753610SMel Gorman 	 * progress of swapping. SOCK_MEMALLOC may be cleared while
3225d753610SMel Gorman 	 * it has rmem allocations due to the last swapfile being deactivated
3235d753610SMel Gorman 	 * but there is a risk that the socket is unusable due to exceeding
3245d753610SMel Gorman 	 * the rmem limits. Reclaim the reserves and obey rmem limits again.
325c76562b6SMel Gorman 	 */
326c76562b6SMel Gorman 	sk_mem_reclaim(sk);
3277cb02404SMel Gorman }
3287cb02404SMel Gorman EXPORT_SYMBOL_GPL(sk_clear_memalloc);
3297cb02404SMel Gorman 
__sk_backlog_rcv(struct sock * sk,struct sk_buff * skb)330b4b9e355SMel Gorman int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
331b4b9e355SMel Gorman {
332b4b9e355SMel Gorman 	int ret;
333f1083048SVlastimil Babka 	unsigned int noreclaim_flag;
334b4b9e355SMel Gorman 
335b4b9e355SMel Gorman 	/* these should have been dropped before queueing */
336b4b9e355SMel Gorman 	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
337b4b9e355SMel Gorman 
338f1083048SVlastimil Babka 	noreclaim_flag = memalloc_noreclaim_save();
339d2489c7bSEric Dumazet 	ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv,
340d2489c7bSEric Dumazet 				 tcp_v6_do_rcv,
341d2489c7bSEric Dumazet 				 tcp_v4_do_rcv,
342d2489c7bSEric Dumazet 				 sk, skb);
343f1083048SVlastimil Babka 	memalloc_noreclaim_restore(noreclaim_flag);
344b4b9e355SMel Gorman 
345b4b9e355SMel Gorman 	return ret;
346b4b9e355SMel Gorman }
347b4b9e355SMel Gorman EXPORT_SYMBOL(__sk_backlog_rcv);
348b4b9e355SMel Gorman 
sk_error_report(struct sock * sk)349e3ae2365SAlexander Aring void sk_error_report(struct sock *sk)
350e3ae2365SAlexander Aring {
351e3ae2365SAlexander Aring 	sk->sk_error_report(sk);
352e6a3e443SAlexander Aring 
353e6a3e443SAlexander Aring 	switch (sk->sk_family) {
354e6a3e443SAlexander Aring 	case AF_INET:
355e6a3e443SAlexander Aring 		fallthrough;
356e6a3e443SAlexander Aring 	case AF_INET6:
357e6a3e443SAlexander Aring 		trace_inet_sk_error_report(sk);
358e6a3e443SAlexander Aring 		break;
359e6a3e443SAlexander Aring 	default:
360e6a3e443SAlexander Aring 		break;
361e6a3e443SAlexander Aring 	}
362e3ae2365SAlexander Aring }
363e3ae2365SAlexander Aring EXPORT_SYMBOL(sk_error_report);
364e3ae2365SAlexander Aring 
sock_get_timeout(long timeo,void * optval,bool old_timeval)3654c1e34c0SRichard Palethorpe int sock_get_timeout(long timeo, void *optval, bool old_timeval)
366fe0c72f3SArnd Bergmann {
367a9beb86aSDeepa Dinamani 	struct __kernel_sock_timeval tv;
368fe0c72f3SArnd Bergmann 
369fe0c72f3SArnd Bergmann 	if (timeo == MAX_SCHEDULE_TIMEOUT) {
370fe0c72f3SArnd Bergmann 		tv.tv_sec = 0;
371fe0c72f3SArnd Bergmann 		tv.tv_usec = 0;
372fe0c72f3SArnd Bergmann 	} else {
373fe0c72f3SArnd Bergmann 		tv.tv_sec = timeo / HZ;
374fe0c72f3SArnd Bergmann 		tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
375fe0c72f3SArnd Bergmann 	}
376fe0c72f3SArnd Bergmann 
377e6986423SArnd Bergmann 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
378fe0c72f3SArnd Bergmann 		struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
379fe0c72f3SArnd Bergmann 		*(struct old_timeval32 *)optval = tv32;
380fe0c72f3SArnd Bergmann 		return sizeof(tv32);
381fe0c72f3SArnd Bergmann 	}
382fe0c72f3SArnd Bergmann 
383a9beb86aSDeepa Dinamani 	if (old_timeval) {
384a9beb86aSDeepa Dinamani 		struct __kernel_old_timeval old_tv;
385a9beb86aSDeepa Dinamani 		old_tv.tv_sec = tv.tv_sec;
386a9beb86aSDeepa Dinamani 		old_tv.tv_usec = tv.tv_usec;
387a9beb86aSDeepa Dinamani 		*(struct __kernel_old_timeval *)optval = old_tv;
38828e72b26SVito Caputo 		return sizeof(old_tv);
389fe0c72f3SArnd Bergmann 	}
390fe0c72f3SArnd Bergmann 
39128e72b26SVito Caputo 	*(struct __kernel_sock_timeval *)optval = tv;
39228e72b26SVito Caputo 	return sizeof(tv);
393a9beb86aSDeepa Dinamani }
3944c1e34c0SRichard Palethorpe EXPORT_SYMBOL(sock_get_timeout);
395a9beb86aSDeepa Dinamani 
sock_copy_user_timeval(struct __kernel_sock_timeval * tv,sockptr_t optval,int optlen,bool old_timeval)3964c1e34c0SRichard Palethorpe int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
3974c1e34c0SRichard Palethorpe 			   sockptr_t optval, int optlen, bool old_timeval)
3981da177e4SLinus Torvalds {
399e6986423SArnd Bergmann 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
400fe0c72f3SArnd Bergmann 		struct old_timeval32 tv32;
401fe0c72f3SArnd Bergmann 
402fe0c72f3SArnd Bergmann 		if (optlen < sizeof(tv32))
403fe0c72f3SArnd Bergmann 			return -EINVAL;
404fe0c72f3SArnd Bergmann 
405c34645acSChristoph Hellwig 		if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
406fe0c72f3SArnd Bergmann 			return -EFAULT;
4074c1e34c0SRichard Palethorpe 		tv->tv_sec = tv32.tv_sec;
4084c1e34c0SRichard Palethorpe 		tv->tv_usec = tv32.tv_usec;
409a9beb86aSDeepa Dinamani 	} else if (old_timeval) {
410a9beb86aSDeepa Dinamani 		struct __kernel_old_timeval old_tv;
411a9beb86aSDeepa Dinamani 
412a9beb86aSDeepa Dinamani 		if (optlen < sizeof(old_tv))
413a9beb86aSDeepa Dinamani 			return -EINVAL;
414c34645acSChristoph Hellwig 		if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
415a9beb86aSDeepa Dinamani 			return -EFAULT;
4164c1e34c0SRichard Palethorpe 		tv->tv_sec = old_tv.tv_sec;
4174c1e34c0SRichard Palethorpe 		tv->tv_usec = old_tv.tv_usec;
418fe0c72f3SArnd Bergmann 	} else {
4194c1e34c0SRichard Palethorpe 		if (optlen < sizeof(*tv))
4201da177e4SLinus Torvalds 			return -EINVAL;
4214c1e34c0SRichard Palethorpe 		if (copy_from_sockptr(tv, optval, sizeof(*tv)))
4221da177e4SLinus Torvalds 			return -EFAULT;
423fe0c72f3SArnd Bergmann 	}
4244c1e34c0SRichard Palethorpe 
4254c1e34c0SRichard Palethorpe 	return 0;
4264c1e34c0SRichard Palethorpe }
4274c1e34c0SRichard Palethorpe EXPORT_SYMBOL(sock_copy_user_timeval);
4284c1e34c0SRichard Palethorpe 
sock_set_timeout(long * timeo_p,sockptr_t optval,int optlen,bool old_timeval)4294c1e34c0SRichard Palethorpe static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
4304c1e34c0SRichard Palethorpe 			    bool old_timeval)
4314c1e34c0SRichard Palethorpe {
4324c1e34c0SRichard Palethorpe 	struct __kernel_sock_timeval tv;
4334c1e34c0SRichard Palethorpe 	int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
434285975ddSEric Dumazet 	long val;
4354c1e34c0SRichard Palethorpe 
4364c1e34c0SRichard Palethorpe 	if (err)
4374c1e34c0SRichard Palethorpe 		return err;
4384c1e34c0SRichard Palethorpe 
439ba78073eSVasily Averin 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
440ba78073eSVasily Averin 		return -EDOM;
4411da177e4SLinus Torvalds 
442ba78073eSVasily Averin 	if (tv.tv_sec < 0) {
4436f11df83SAndrew Morton 		static int warned __read_mostly;
4446f11df83SAndrew Morton 
445285975ddSEric Dumazet 		WRITE_ONCE(*timeo_p, 0);
44650aab54fSIlpo Järvinen 		if (warned < 10 && net_ratelimit()) {
447ba78073eSVasily Averin 			warned++;
448e005d193SJoe Perches 			pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
449e005d193SJoe Perches 				__func__, current->comm, task_pid_nr(current));
45050aab54fSIlpo Järvinen 		}
451ba78073eSVasily Averin 		return 0;
452ba78073eSVasily Averin 	}
453285975ddSEric Dumazet 	val = MAX_SCHEDULE_TIMEOUT;
454285975ddSEric Dumazet 	if ((tv.tv_sec || tv.tv_usec) &&
455285975ddSEric Dumazet 	    (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
456285975ddSEric Dumazet 		val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
457285975ddSEric Dumazet 						    USEC_PER_SEC / HZ);
458285975ddSEric Dumazet 	WRITE_ONCE(*timeo_p, val);
4591da177e4SLinus Torvalds 	return 0;
4601da177e4SLinus Torvalds }
4611da177e4SLinus Torvalds 
sock_needs_netstamp(const struct sock * sk)462080a270fSHannes Frederic Sowa static bool sock_needs_netstamp(const struct sock *sk)
463080a270fSHannes Frederic Sowa {
464080a270fSHannes Frederic Sowa 	switch (sk->sk_family) {
465080a270fSHannes Frederic Sowa 	case AF_UNSPEC:
466080a270fSHannes Frederic Sowa 	case AF_UNIX:
467080a270fSHannes Frederic Sowa 		return false;
468080a270fSHannes Frederic Sowa 	default:
469080a270fSHannes Frederic Sowa 		return true;
470080a270fSHannes Frederic Sowa 	}
471080a270fSHannes Frederic Sowa }
472080a270fSHannes Frederic Sowa 
sock_disable_timestamp(struct sock * sk,unsigned long flags)47308e29af3SEric Dumazet static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
4741da177e4SLinus Torvalds {
47508e29af3SEric Dumazet 	if (sk->sk_flags & flags) {
47608e29af3SEric Dumazet 		sk->sk_flags &= ~flags;
477080a270fSHannes Frederic Sowa 		if (sock_needs_netstamp(sk) &&
478080a270fSHannes Frederic Sowa 		    !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
4791da177e4SLinus Torvalds 			net_disable_timestamp();
4801da177e4SLinus Torvalds 	}
4811da177e4SLinus Torvalds }
4821da177e4SLinus Torvalds 
4831da177e4SLinus Torvalds 
__sock_queue_rcv_skb(struct sock * sk,struct sk_buff * skb)484e6afc8acSsamanthakumar int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
485f0088a50SDenis Vlasenko {
4863b885787SNeil Horman 	unsigned long flags;
4873b885787SNeil Horman 	struct sk_buff_head *list = &sk->sk_receive_queue;
488f0088a50SDenis Vlasenko 
489845d1799Slinke li 	if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
490766e9037SEric Dumazet 		atomic_inc(&sk->sk_drops);
4913847ce32SSatoru Moriya 		trace_sock_rcvqueue_full(sk, skb);
492766e9037SEric Dumazet 		return -ENOMEM;
493f0088a50SDenis Vlasenko 	}
494f0088a50SDenis Vlasenko 
495c76562b6SMel Gorman 	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
496766e9037SEric Dumazet 		atomic_inc(&sk->sk_drops);
497766e9037SEric Dumazet 		return -ENOBUFS;
4983ab224beSHideo Aoki 	}
4993ab224beSHideo Aoki 
500f0088a50SDenis Vlasenko 	skb->dev = NULL;
501f0088a50SDenis Vlasenko 	skb_set_owner_r(skb, sk);
50249ad9599SDavid S. Miller 
5037fee226aSEric Dumazet 	/* we escape from rcu protected region, make sure we dont leak
5047fee226aSEric Dumazet 	 * a norefcounted dst
5057fee226aSEric Dumazet 	 */
5067fee226aSEric Dumazet 	skb_dst_force(skb);
5077fee226aSEric Dumazet 
5083b885787SNeil Horman 	spin_lock_irqsave(&list->lock, flags);
5093bc3b96fSEyal Birger 	sock_skb_set_dropcount(sk, skb);
5103b885787SNeil Horman 	__skb_queue_tail(list, skb);
5113b885787SNeil Horman 	spin_unlock_irqrestore(&list->lock, flags);
512f0088a50SDenis Vlasenko 
513f0088a50SDenis Vlasenko 	if (!sock_flag(sk, SOCK_DEAD))
514676d2369SDavid S. Miller 		sk->sk_data_ready(sk);
515766e9037SEric Dumazet 	return 0;
516f0088a50SDenis Vlasenko }
517e6afc8acSsamanthakumar EXPORT_SYMBOL(__sock_queue_rcv_skb);
518e6afc8acSsamanthakumar 
sock_queue_rcv_skb_reason(struct sock * sk,struct sk_buff * skb,enum skb_drop_reason * reason)519c1b8a567SMenglong Dong int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
520c1b8a567SMenglong Dong 			      enum skb_drop_reason *reason)
521e6afc8acSsamanthakumar {
522c1b8a567SMenglong Dong 	enum skb_drop_reason drop_reason;
523e6afc8acSsamanthakumar 	int err;
524e6afc8acSsamanthakumar 
525e6afc8acSsamanthakumar 	err = sk_filter(sk, skb);
526c1b8a567SMenglong Dong 	if (err) {
527c1b8a567SMenglong Dong 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
528c1b8a567SMenglong Dong 		goto out;
529e6afc8acSsamanthakumar 	}
530c1b8a567SMenglong Dong 	err = __sock_queue_rcv_skb(sk, skb);
531c1b8a567SMenglong Dong 	switch (err) {
532c1b8a567SMenglong Dong 	case -ENOMEM:
533c1b8a567SMenglong Dong 		drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
534c1b8a567SMenglong Dong 		break;
535c1b8a567SMenglong Dong 	case -ENOBUFS:
536c1b8a567SMenglong Dong 		drop_reason = SKB_DROP_REASON_PROTO_MEM;
537c1b8a567SMenglong Dong 		break;
538c1b8a567SMenglong Dong 	default:
539c1b8a567SMenglong Dong 		drop_reason = SKB_NOT_DROPPED_YET;
540c1b8a567SMenglong Dong 		break;
541c1b8a567SMenglong Dong 	}
542c1b8a567SMenglong Dong out:
543c1b8a567SMenglong Dong 	if (reason)
544c1b8a567SMenglong Dong 		*reason = drop_reason;
545c1b8a567SMenglong Dong 	return err;
546c1b8a567SMenglong Dong }
547c1b8a567SMenglong Dong EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
548f0088a50SDenis Vlasenko 
__sk_receive_skb(struct sock * sk,struct sk_buff * skb,const int nested,unsigned int trim_cap,bool refcounted)5494f0c40d9SWillem de Bruijn int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
550c3f24cfbSEric Dumazet 		     const int nested, unsigned int trim_cap, bool refcounted)
551f0088a50SDenis Vlasenko {
552f0088a50SDenis Vlasenko 	int rc = NET_RX_SUCCESS;
553f0088a50SDenis Vlasenko 
5544f0c40d9SWillem de Bruijn 	if (sk_filter_trim_cap(sk, skb, trim_cap))
555f0088a50SDenis Vlasenko 		goto discard_and_relse;
556f0088a50SDenis Vlasenko 
557f0088a50SDenis Vlasenko 	skb->dev = NULL;
558f0088a50SDenis Vlasenko 
559845d1799Slinke li 	if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
560c377411fSEric Dumazet 		atomic_inc(&sk->sk_drops);
561c377411fSEric Dumazet 		goto discard_and_relse;
562c377411fSEric Dumazet 	}
56358a5a7b9SArnaldo Carvalho de Melo 	if (nested)
56458a5a7b9SArnaldo Carvalho de Melo 		bh_lock_sock_nested(sk);
56558a5a7b9SArnaldo Carvalho de Melo 	else
566f0088a50SDenis Vlasenko 		bh_lock_sock(sk);
567a5b5bb9aSIngo Molnar 	if (!sock_owned_by_user(sk)) {
568a5b5bb9aSIngo Molnar 		/*
569a5b5bb9aSIngo Molnar 		 * trylock + unlock semantics:
570a5b5bb9aSIngo Molnar 		 */
571a5b5bb9aSIngo Molnar 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
572a5b5bb9aSIngo Molnar 
573c57943a1SPeter Zijlstra 		rc = sk_backlog_rcv(sk, skb);
574a5b5bb9aSIngo Molnar 
5755facae4fSQian Cai 		mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
5768265792bSEric Dumazet 	} else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
5778eae939fSZhu Yi 		bh_unlock_sock(sk);
5788eae939fSZhu Yi 		atomic_inc(&sk->sk_drops);
5798eae939fSZhu Yi 		goto discard_and_relse;
5808eae939fSZhu Yi 	}
5818eae939fSZhu Yi 
582f0088a50SDenis Vlasenko 	bh_unlock_sock(sk);
583f0088a50SDenis Vlasenko out:
584c3f24cfbSEric Dumazet 	if (refcounted)
585f0088a50SDenis Vlasenko 		sock_put(sk);
586f0088a50SDenis Vlasenko 	return rc;
587f0088a50SDenis Vlasenko discard_and_relse:
588f0088a50SDenis Vlasenko 	kfree_skb(skb);
589f0088a50SDenis Vlasenko 	goto out;
590f0088a50SDenis Vlasenko }
5914f0c40d9SWillem de Bruijn EXPORT_SYMBOL(__sk_receive_skb);
592f0088a50SDenis Vlasenko 
593bbd807dfSBrian Vazquez INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
594bbd807dfSBrian Vazquez 							  u32));
595bbd807dfSBrian Vazquez INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
596bbd807dfSBrian Vazquez 							   u32));
__sk_dst_check(struct sock * sk,u32 cookie)597f0088a50SDenis Vlasenko struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
598f0088a50SDenis Vlasenko {
599b6c6712aSEric Dumazet 	struct dst_entry *dst = __sk_dst_get(sk);
600f0088a50SDenis Vlasenko 
601bbd807dfSBrian Vazquez 	if (dst && dst->obsolete &&
602bbd807dfSBrian Vazquez 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
603bbd807dfSBrian Vazquez 			       dst, cookie) == NULL) {
604e022f0b4SKrishna Kumar 		sk_tx_queue_clear(sk);
60587324a50SEric Dumazet 		WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
606a9b3cd7fSStephen Hemminger 		RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
607f0088a50SDenis Vlasenko 		dst_release(dst);
608f0088a50SDenis Vlasenko 		return NULL;
609f0088a50SDenis Vlasenko 	}
610f0088a50SDenis Vlasenko 
611f0088a50SDenis Vlasenko 	return dst;
612f0088a50SDenis Vlasenko }
613f0088a50SDenis Vlasenko EXPORT_SYMBOL(__sk_dst_check);
614f0088a50SDenis Vlasenko 
sk_dst_check(struct sock * sk,u32 cookie)615f0088a50SDenis Vlasenko struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
616f0088a50SDenis Vlasenko {
617f0088a50SDenis Vlasenko 	struct dst_entry *dst = sk_dst_get(sk);
618f0088a50SDenis Vlasenko 
619bbd807dfSBrian Vazquez 	if (dst && dst->obsolete &&
620bbd807dfSBrian Vazquez 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
621bbd807dfSBrian Vazquez 			       dst, cookie) == NULL) {
622f0088a50SDenis Vlasenko 		sk_dst_reset(sk);
623f0088a50SDenis Vlasenko 		dst_release(dst);
624f0088a50SDenis Vlasenko 		return NULL;
625f0088a50SDenis Vlasenko 	}
626f0088a50SDenis Vlasenko 
627f0088a50SDenis Vlasenko 	return dst;
628f0088a50SDenis Vlasenko }
629f0088a50SDenis Vlasenko EXPORT_SYMBOL(sk_dst_check);
630f0088a50SDenis Vlasenko 
sock_bindtoindex_locked(struct sock * sk,int ifindex)6317594888cSChristoph Hellwig static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
632f5dd3d0cSDavid Herrmann {
633f5dd3d0cSDavid Herrmann 	int ret = -ENOPROTOOPT;
634f5dd3d0cSDavid Herrmann #ifdef CONFIG_NETDEVICES
635f5dd3d0cSDavid Herrmann 	struct net *net = sock_net(sk);
636f5dd3d0cSDavid Herrmann 
637f5dd3d0cSDavid Herrmann 	/* Sorry... */
638f5dd3d0cSDavid Herrmann 	ret = -EPERM;
639c427bfecSVincent Bernat 	if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
640f5dd3d0cSDavid Herrmann 		goto out;
641f5dd3d0cSDavid Herrmann 
642f5dd3d0cSDavid Herrmann 	ret = -EINVAL;
643f5dd3d0cSDavid Herrmann 	if (ifindex < 0)
644f5dd3d0cSDavid Herrmann 		goto out;
645f5dd3d0cSDavid Herrmann 
646e5fccaa1SEric Dumazet 	/* Paired with all READ_ONCE() done locklessly. */
647e5fccaa1SEric Dumazet 	WRITE_ONCE(sk->sk_bound_dev_if, ifindex);
648e5fccaa1SEric Dumazet 
649f5dd3d0cSDavid Herrmann 	if (sk->sk_prot->rehash)
650f5dd3d0cSDavid Herrmann 		sk->sk_prot->rehash(sk);
651f5dd3d0cSDavid Herrmann 	sk_dst_reset(sk);
652f5dd3d0cSDavid Herrmann 
653f5dd3d0cSDavid Herrmann 	ret = 0;
654f5dd3d0cSDavid Herrmann 
655f5dd3d0cSDavid Herrmann out:
656f5dd3d0cSDavid Herrmann #endif
657f5dd3d0cSDavid Herrmann 
658f5dd3d0cSDavid Herrmann 	return ret;
659f5dd3d0cSDavid Herrmann }
660f5dd3d0cSDavid Herrmann 
sock_bindtoindex(struct sock * sk,int ifindex,bool lock_sk)6618ea204c2SFerenc Fejes int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
6627594888cSChristoph Hellwig {
6637594888cSChristoph Hellwig 	int ret;
6647594888cSChristoph Hellwig 
6658ea204c2SFerenc Fejes 	if (lock_sk)
6667594888cSChristoph Hellwig 		lock_sock(sk);
6677594888cSChristoph Hellwig 	ret = sock_bindtoindex_locked(sk, ifindex);
6688ea204c2SFerenc Fejes 	if (lock_sk)
6697594888cSChristoph Hellwig 		release_sock(sk);
6707594888cSChristoph Hellwig 
6717594888cSChristoph Hellwig 	return ret;
6727594888cSChristoph Hellwig }
6737594888cSChristoph Hellwig EXPORT_SYMBOL(sock_bindtoindex);
6747594888cSChristoph Hellwig 
sock_setbindtodevice(struct sock * sk,sockptr_t optval,int optlen)6755790642bSChristoph Hellwig static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
6764878809fSDavid S. Miller {
6774878809fSDavid S. Miller 	int ret = -ENOPROTOOPT;
6784878809fSDavid S. Miller #ifdef CONFIG_NETDEVICES
6793b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(sk);
6804878809fSDavid S. Miller 	char devname[IFNAMSIZ];
6814878809fSDavid S. Miller 	int index;
6824878809fSDavid S. Miller 
6834878809fSDavid S. Miller 	ret = -EINVAL;
6844878809fSDavid S. Miller 	if (optlen < 0)
6854878809fSDavid S. Miller 		goto out;
6864878809fSDavid S. Miller 
6874878809fSDavid S. Miller 	/* Bind this socket to a particular device like "eth0",
6884878809fSDavid S. Miller 	 * as specified in the passed interface name. If the
6894878809fSDavid S. Miller 	 * name is "" or the option length is zero the socket
6904878809fSDavid S. Miller 	 * is not bound.
6914878809fSDavid S. Miller 	 */
6924878809fSDavid S. Miller 	if (optlen > IFNAMSIZ - 1)
6934878809fSDavid S. Miller 		optlen = IFNAMSIZ - 1;
6944878809fSDavid S. Miller 	memset(devname, 0, sizeof(devname));
6954878809fSDavid S. Miller 
6964878809fSDavid S. Miller 	ret = -EFAULT;
6975790642bSChristoph Hellwig 	if (copy_from_sockptr(devname, optval, optlen))
6984878809fSDavid S. Miller 		goto out;
6994878809fSDavid S. Miller 
7004878809fSDavid S. Miller 	index = 0;
701000ba2e4SDavid S. Miller 	if (devname[0] != '\0') {
702bf8e56bfSEric Dumazet 		struct net_device *dev;
7034878809fSDavid S. Miller 
704bf8e56bfSEric Dumazet 		rcu_read_lock();
705bf8e56bfSEric Dumazet 		dev = dev_get_by_name_rcu(net, devname);
706bf8e56bfSEric Dumazet 		if (dev)
707bf8e56bfSEric Dumazet 			index = dev->ifindex;
708bf8e56bfSEric Dumazet 		rcu_read_unlock();
7094878809fSDavid S. Miller 		ret = -ENODEV;
7104878809fSDavid S. Miller 		if (!dev)
7114878809fSDavid S. Miller 			goto out;
7124878809fSDavid S. Miller 	}
7134878809fSDavid S. Miller 
71424426654SMartin KaFai Lau 	sockopt_lock_sock(sk);
71524426654SMartin KaFai Lau 	ret = sock_bindtoindex_locked(sk, index);
71624426654SMartin KaFai Lau 	sockopt_release_sock(sk);
7174878809fSDavid S. Miller out:
7184878809fSDavid S. Miller #endif
7194878809fSDavid S. Miller 
7204878809fSDavid S. Miller 	return ret;
7214878809fSDavid S. Miller }
7224878809fSDavid S. Miller 
sock_getbindtodevice(struct sock * sk,sockptr_t optval,sockptr_t optlen,int len)7234ff09db1SMartin KaFai Lau static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
7244ff09db1SMartin KaFai Lau 				sockptr_t optlen, int len)
725c91f6df2SBrian Haley {
726c91f6df2SBrian Haley 	int ret = -ENOPROTOOPT;
727c91f6df2SBrian Haley #ifdef CONFIG_NETDEVICES
728e5fccaa1SEric Dumazet 	int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
729c91f6df2SBrian Haley 	struct net *net = sock_net(sk);
730c91f6df2SBrian Haley 	char devname[IFNAMSIZ];
731c91f6df2SBrian Haley 
732e5fccaa1SEric Dumazet 	if (bound_dev_if == 0) {
733c91f6df2SBrian Haley 		len = 0;
734c91f6df2SBrian Haley 		goto zero;
735c91f6df2SBrian Haley 	}
736c91f6df2SBrian Haley 
737c91f6df2SBrian Haley 	ret = -EINVAL;
738c91f6df2SBrian Haley 	if (len < IFNAMSIZ)
739c91f6df2SBrian Haley 		goto out;
740c91f6df2SBrian Haley 
741e5fccaa1SEric Dumazet 	ret = netdev_get_name(net, devname, bound_dev_if);
7425dbe7c17SNicolas Schichan 	if (ret)
743c91f6df2SBrian Haley 		goto out;
744c91f6df2SBrian Haley 
745c91f6df2SBrian Haley 	len = strlen(devname) + 1;
746c91f6df2SBrian Haley 
747c91f6df2SBrian Haley 	ret = -EFAULT;
7484ff09db1SMartin KaFai Lau 	if (copy_to_sockptr(optval, devname, len))
749c91f6df2SBrian Haley 		goto out;
750c91f6df2SBrian Haley 
751c91f6df2SBrian Haley zero:
752c91f6df2SBrian Haley 	ret = -EFAULT;
7534ff09db1SMartin KaFai Lau 	if (copy_to_sockptr(optlen, &len, sizeof(int)))
754c91f6df2SBrian Haley 		goto out;
755c91f6df2SBrian Haley 
756c91f6df2SBrian Haley 	ret = 0;
757c91f6df2SBrian Haley 
758c91f6df2SBrian Haley out:
759c91f6df2SBrian Haley #endif
760c91f6df2SBrian Haley 
761c91f6df2SBrian Haley 	return ret;
762c91f6df2SBrian Haley }
763c91f6df2SBrian Haley 
sk_mc_loop(struct sock * sk)764f60e5990Shannes@stressinduktion.org bool sk_mc_loop(struct sock *sk)
765f60e5990Shannes@stressinduktion.org {
766f60e5990Shannes@stressinduktion.org 	if (dev_recursion_level())
767f60e5990Shannes@stressinduktion.org 		return false;
768f60e5990Shannes@stressinduktion.org 	if (!sk)
769f60e5990Shannes@stressinduktion.org 		return true;
770a3e0fdf7SEric Dumazet 	/* IPV6_ADDRFORM can change sk->sk_family under us. */
771a3e0fdf7SEric Dumazet 	switch (READ_ONCE(sk->sk_family)) {
772f60e5990Shannes@stressinduktion.org 	case AF_INET:
773b09bde5cSEric Dumazet 		return inet_test_bit(MC_LOOP, sk);
774f60e5990Shannes@stressinduktion.org #if IS_ENABLED(CONFIG_IPV6)
775f60e5990Shannes@stressinduktion.org 	case AF_INET6:
776f60e5990Shannes@stressinduktion.org 		return inet6_sk(sk)->mc_loop;
777f60e5990Shannes@stressinduktion.org #endif
778f60e5990Shannes@stressinduktion.org 	}
7790ad6f6e7SEric Dumazet 	WARN_ON_ONCE(1);
780f60e5990Shannes@stressinduktion.org 	return true;
781f60e5990Shannes@stressinduktion.org }
782f60e5990Shannes@stressinduktion.org EXPORT_SYMBOL(sk_mc_loop);
783f60e5990Shannes@stressinduktion.org 
sock_set_reuseaddr(struct sock * sk)784b58f0e8fSChristoph Hellwig void sock_set_reuseaddr(struct sock *sk)
785b58f0e8fSChristoph Hellwig {
786b58f0e8fSChristoph Hellwig 	lock_sock(sk);
787b58f0e8fSChristoph Hellwig 	sk->sk_reuse = SK_CAN_REUSE;
788b58f0e8fSChristoph Hellwig 	release_sock(sk);
789b58f0e8fSChristoph Hellwig }
790b58f0e8fSChristoph Hellwig EXPORT_SYMBOL(sock_set_reuseaddr);
791b58f0e8fSChristoph Hellwig 
sock_set_reuseport(struct sock * sk)792fe31a326SChristoph Hellwig void sock_set_reuseport(struct sock *sk)
793fe31a326SChristoph Hellwig {
794fe31a326SChristoph Hellwig 	lock_sock(sk);
795fe31a326SChristoph Hellwig 	sk->sk_reuseport = true;
796fe31a326SChristoph Hellwig 	release_sock(sk);
797fe31a326SChristoph Hellwig }
798fe31a326SChristoph Hellwig EXPORT_SYMBOL(sock_set_reuseport);
799fe31a326SChristoph Hellwig 
sock_no_linger(struct sock * sk)800c433594cSChristoph Hellwig void sock_no_linger(struct sock *sk)
801c433594cSChristoph Hellwig {
802c433594cSChristoph Hellwig 	lock_sock(sk);
803bc1fb82aSEric Dumazet 	WRITE_ONCE(sk->sk_lingertime, 0);
804c433594cSChristoph Hellwig 	sock_set_flag(sk, SOCK_LINGER);
805c433594cSChristoph Hellwig 	release_sock(sk);
806c433594cSChristoph Hellwig }
807c433594cSChristoph Hellwig EXPORT_SYMBOL(sock_no_linger);
808c433594cSChristoph Hellwig 
sock_set_priority(struct sock * sk,u32 priority)8096e434967SChristoph Hellwig void sock_set_priority(struct sock *sk, u32 priority)
8106e434967SChristoph Hellwig {
8116e434967SChristoph Hellwig 	lock_sock(sk);
8128bf43be7SEric Dumazet 	WRITE_ONCE(sk->sk_priority, priority);
8136e434967SChristoph Hellwig 	release_sock(sk);
8146e434967SChristoph Hellwig }
8156e434967SChristoph Hellwig EXPORT_SYMBOL(sock_set_priority);
8166e434967SChristoph Hellwig 
sock_set_sndtimeo(struct sock * sk,s64 secs)81776ee0785SChristoph Hellwig void sock_set_sndtimeo(struct sock *sk, s64 secs)
81876ee0785SChristoph Hellwig {
81976ee0785SChristoph Hellwig 	lock_sock(sk);
82076ee0785SChristoph Hellwig 	if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
821285975ddSEric Dumazet 		WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
82276ee0785SChristoph Hellwig 	else
823285975ddSEric Dumazet 		WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
82476ee0785SChristoph Hellwig 	release_sock(sk);
82576ee0785SChristoph Hellwig }
82676ee0785SChristoph Hellwig EXPORT_SYMBOL(sock_set_sndtimeo);
82776ee0785SChristoph Hellwig 
__sock_set_timestamps(struct sock * sk,bool val,bool new,bool ns)828783da70eSChristoph Hellwig static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
829783da70eSChristoph Hellwig {
830783da70eSChristoph Hellwig 	if (val)  {
831783da70eSChristoph Hellwig 		sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
832783da70eSChristoph Hellwig 		sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
833783da70eSChristoph Hellwig 		sock_set_flag(sk, SOCK_RCVTSTAMP);
834783da70eSChristoph Hellwig 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
835783da70eSChristoph Hellwig 	} else {
836783da70eSChristoph Hellwig 		sock_reset_flag(sk, SOCK_RCVTSTAMP);
837783da70eSChristoph Hellwig 		sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
838783da70eSChristoph Hellwig 	}
839783da70eSChristoph Hellwig }
840783da70eSChristoph Hellwig 
sock_enable_timestamps(struct sock * sk)841783da70eSChristoph Hellwig void sock_enable_timestamps(struct sock *sk)
842783da70eSChristoph Hellwig {
843783da70eSChristoph Hellwig 	lock_sock(sk);
844783da70eSChristoph Hellwig 	__sock_set_timestamps(sk, true, false, true);
845783da70eSChristoph Hellwig 	release_sock(sk);
846783da70eSChristoph Hellwig }
847783da70eSChristoph Hellwig EXPORT_SYMBOL(sock_enable_timestamps);
848783da70eSChristoph Hellwig 
sock_set_timestamp(struct sock * sk,int optname,bool valbool)849371087aaSFlorian Westphal void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
850371087aaSFlorian Westphal {
851371087aaSFlorian Westphal 	switch (optname) {
852371087aaSFlorian Westphal 	case SO_TIMESTAMP_OLD:
853371087aaSFlorian Westphal 		__sock_set_timestamps(sk, valbool, false, false);
854371087aaSFlorian Westphal 		break;
855371087aaSFlorian Westphal 	case SO_TIMESTAMP_NEW:
856371087aaSFlorian Westphal 		__sock_set_timestamps(sk, valbool, true, false);
857371087aaSFlorian Westphal 		break;
858371087aaSFlorian Westphal 	case SO_TIMESTAMPNS_OLD:
859371087aaSFlorian Westphal 		__sock_set_timestamps(sk, valbool, false, true);
860371087aaSFlorian Westphal 		break;
861371087aaSFlorian Westphal 	case SO_TIMESTAMPNS_NEW:
862371087aaSFlorian Westphal 		__sock_set_timestamps(sk, valbool, true, true);
863371087aaSFlorian Westphal 		break;
864371087aaSFlorian Westphal 	}
865371087aaSFlorian Westphal }
866371087aaSFlorian Westphal 
sock_timestamping_bind_phc(struct sock * sk,int phc_index)867d463126eSYangbo Lu static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
868ced122d9SFlorian Westphal {
869d463126eSYangbo Lu 	struct net *net = sock_net(sk);
870d463126eSYangbo Lu 	struct net_device *dev = NULL;
871d463126eSYangbo Lu 	bool match = false;
872d463126eSYangbo Lu 	int *vclock_index;
873d463126eSYangbo Lu 	int i, num;
874d463126eSYangbo Lu 
875d463126eSYangbo Lu 	if (sk->sk_bound_dev_if)
876d463126eSYangbo Lu 		dev = dev_get_by_index(net, sk->sk_bound_dev_if);
877d463126eSYangbo Lu 
878d463126eSYangbo Lu 	if (!dev) {
879d463126eSYangbo Lu 		pr_err("%s: sock not bind to device\n", __func__);
880d463126eSYangbo Lu 		return -EOPNOTSUPP;
881d463126eSYangbo Lu 	}
882d463126eSYangbo Lu 
883d463126eSYangbo Lu 	num = ethtool_get_phc_vclocks(dev, &vclock_index);
8842a4d75bfSMiroslav Lichvar 	dev_put(dev);
8852a4d75bfSMiroslav Lichvar 
886d463126eSYangbo Lu 	for (i = 0; i < num; i++) {
887d463126eSYangbo Lu 		if (*(vclock_index + i) == phc_index) {
888d463126eSYangbo Lu 			match = true;
889d463126eSYangbo Lu 			break;
890d463126eSYangbo Lu 		}
891d463126eSYangbo Lu 	}
892d463126eSYangbo Lu 
893d463126eSYangbo Lu 	if (num > 0)
894d463126eSYangbo Lu 		kfree(vclock_index);
895d463126eSYangbo Lu 
896d463126eSYangbo Lu 	if (!match)
897d463126eSYangbo Lu 		return -EINVAL;
898d463126eSYangbo Lu 
899251cd405SEric Dumazet 	WRITE_ONCE(sk->sk_bind_phc, phc_index);
900d463126eSYangbo Lu 
901d463126eSYangbo Lu 	return 0;
902d463126eSYangbo Lu }
903d463126eSYangbo Lu 
sock_set_timestamping(struct sock * sk,int optname,struct so_timestamping timestamping)904d463126eSYangbo Lu int sock_set_timestamping(struct sock *sk, int optname,
905d463126eSYangbo Lu 			  struct so_timestamping timestamping)
906d463126eSYangbo Lu {
907d463126eSYangbo Lu 	int val = timestamping.flags;
908d463126eSYangbo Lu 	int ret;
909d463126eSYangbo Lu 
910ced122d9SFlorian Westphal 	if (val & ~SOF_TIMESTAMPING_MASK)
911ced122d9SFlorian Westphal 		return -EINVAL;
912ced122d9SFlorian Westphal 
913b534dc46SWillem de Bruijn 	if (val & SOF_TIMESTAMPING_OPT_ID_TCP &&
914b534dc46SWillem de Bruijn 	    !(val & SOF_TIMESTAMPING_OPT_ID))
915b534dc46SWillem de Bruijn 		return -EINVAL;
916b534dc46SWillem de Bruijn 
917ced122d9SFlorian Westphal 	if (val & SOF_TIMESTAMPING_OPT_ID &&
918ced122d9SFlorian Westphal 	    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
91942f67eeaSEric Dumazet 		if (sk_is_tcp(sk)) {
920ced122d9SFlorian Westphal 			if ((1 << sk->sk_state) &
921ced122d9SFlorian Westphal 			    (TCPF_CLOSE | TCPF_LISTEN))
922ced122d9SFlorian Westphal 				return -EINVAL;
923b534dc46SWillem de Bruijn 			if (val & SOF_TIMESTAMPING_OPT_ID_TCP)
924b534dc46SWillem de Bruijn 				atomic_set(&sk->sk_tskey, tcp_sk(sk)->write_seq);
925b534dc46SWillem de Bruijn 			else
926a1cdec57SEric Dumazet 				atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
927ced122d9SFlorian Westphal 		} else {
928a1cdec57SEric Dumazet 			atomic_set(&sk->sk_tskey, 0);
929ced122d9SFlorian Westphal 		}
930ced122d9SFlorian Westphal 	}
931ced122d9SFlorian Westphal 
932ced122d9SFlorian Westphal 	if (val & SOF_TIMESTAMPING_OPT_STATS &&
933ced122d9SFlorian Westphal 	    !(val & SOF_TIMESTAMPING_OPT_TSONLY))
934ced122d9SFlorian Westphal 		return -EINVAL;
935ced122d9SFlorian Westphal 
936d463126eSYangbo Lu 	if (val & SOF_TIMESTAMPING_BIND_PHC) {
937d463126eSYangbo Lu 		ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
938d463126eSYangbo Lu 		if (ret)
939d463126eSYangbo Lu 			return ret;
940d463126eSYangbo Lu 	}
941d463126eSYangbo Lu 
942e3390b30SEric Dumazet 	WRITE_ONCE(sk->sk_tsflags, val);
943ced122d9SFlorian Westphal 	sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
944ced122d9SFlorian Westphal 
945ced122d9SFlorian Westphal 	if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
946ced122d9SFlorian Westphal 		sock_enable_timestamp(sk,
947ced122d9SFlorian Westphal 				      SOCK_TIMESTAMPING_RX_SOFTWARE);
948ced122d9SFlorian Westphal 	else
949ced122d9SFlorian Westphal 		sock_disable_timestamp(sk,
950ced122d9SFlorian Westphal 				       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
951ced122d9SFlorian Westphal 	return 0;
952ced122d9SFlorian Westphal }
953ced122d9SFlorian Westphal 
sock_set_keepalive(struct sock * sk)954ce3d9544SChristoph Hellwig void sock_set_keepalive(struct sock *sk)
955ce3d9544SChristoph Hellwig {
956ce3d9544SChristoph Hellwig 	lock_sock(sk);
957ce3d9544SChristoph Hellwig 	if (sk->sk_prot->keepalive)
958ce3d9544SChristoph Hellwig 		sk->sk_prot->keepalive(sk, true);
959ce3d9544SChristoph Hellwig 	sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
960ce3d9544SChristoph Hellwig 	release_sock(sk);
961ce3d9544SChristoph Hellwig }
962ce3d9544SChristoph Hellwig EXPORT_SYMBOL(sock_set_keepalive);
963ce3d9544SChristoph Hellwig 
__sock_set_rcvbuf(struct sock * sk,int val)96426cfabf9SChristoph Hellwig static void __sock_set_rcvbuf(struct sock *sk, int val)
96526cfabf9SChristoph Hellwig {
96626cfabf9SChristoph Hellwig 	/* Ensure val * 2 fits into an int, to prevent max_t() from treating it
96726cfabf9SChristoph Hellwig 	 * as a negative value.
96826cfabf9SChristoph Hellwig 	 */
96926cfabf9SChristoph Hellwig 	val = min_t(int, val, INT_MAX / 2);
97026cfabf9SChristoph Hellwig 	sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
97126cfabf9SChristoph Hellwig 
97226cfabf9SChristoph Hellwig 	/* We double it on the way in to account for "struct sk_buff" etc.
97326cfabf9SChristoph Hellwig 	 * overhead.   Applications assume that the SO_RCVBUF setting they make
97426cfabf9SChristoph Hellwig 	 * will allow that much actual data to be received on that socket.
97526cfabf9SChristoph Hellwig 	 *
97626cfabf9SChristoph Hellwig 	 * Applications are unaware that "struct sk_buff" and other overheads
97726cfabf9SChristoph Hellwig 	 * allocate from the receive buffer during socket buffer allocation.
97826cfabf9SChristoph Hellwig 	 *
97926cfabf9SChristoph Hellwig 	 * And after considering the possible alternatives, returning the value
98026cfabf9SChristoph Hellwig 	 * we actually used in getsockopt is the most desirable behavior.
98126cfabf9SChristoph Hellwig 	 */
98226cfabf9SChristoph Hellwig 	WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
98326cfabf9SChristoph Hellwig }
98426cfabf9SChristoph Hellwig 
sock_set_rcvbuf(struct sock * sk,int val)98526cfabf9SChristoph Hellwig void sock_set_rcvbuf(struct sock *sk, int val)
98626cfabf9SChristoph Hellwig {
98726cfabf9SChristoph Hellwig 	lock_sock(sk);
98826cfabf9SChristoph Hellwig 	__sock_set_rcvbuf(sk, val);
98926cfabf9SChristoph Hellwig 	release_sock(sk);
99026cfabf9SChristoph Hellwig }
99126cfabf9SChristoph Hellwig EXPORT_SYMBOL(sock_set_rcvbuf);
99226cfabf9SChristoph Hellwig 
__sock_set_mark(struct sock * sk,u32 val)993dd9082f4SAlexander Aring static void __sock_set_mark(struct sock *sk, u32 val)
994dd9082f4SAlexander Aring {
995dd9082f4SAlexander Aring 	if (val != sk->sk_mark) {
9963c5b4d69SEric Dumazet 		WRITE_ONCE(sk->sk_mark, val);
997dd9082f4SAlexander Aring 		sk_dst_reset(sk);
998dd9082f4SAlexander Aring 	}
999dd9082f4SAlexander Aring }
1000dd9082f4SAlexander Aring 
sock_set_mark(struct sock * sk,u32 val)100184d1c617SAlexander Aring void sock_set_mark(struct sock *sk, u32 val)
100284d1c617SAlexander Aring {
100384d1c617SAlexander Aring 	lock_sock(sk);
1004dd9082f4SAlexander Aring 	__sock_set_mark(sk, val);
100584d1c617SAlexander Aring 	release_sock(sk);
100684d1c617SAlexander Aring }
100784d1c617SAlexander Aring EXPORT_SYMBOL(sock_set_mark);
100884d1c617SAlexander Aring 
sock_release_reserved_memory(struct sock * sk,int bytes)10092bb2f5fbSWei Wang static void sock_release_reserved_memory(struct sock *sk, int bytes)
10102bb2f5fbSWei Wang {
10112bb2f5fbSWei Wang 	/* Round down bytes to multiple of pages */
1012100fdd1fSEric Dumazet 	bytes = round_down(bytes, PAGE_SIZE);
10132bb2f5fbSWei Wang 
10142bb2f5fbSWei Wang 	WARN_ON(bytes > sk->sk_reserved_mem);
1015fe11fdcbSEric Dumazet 	WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
10162bb2f5fbSWei Wang 	sk_mem_reclaim(sk);
10172bb2f5fbSWei Wang }
10182bb2f5fbSWei Wang 
sock_reserve_memory(struct sock * sk,int bytes)10192bb2f5fbSWei Wang static int sock_reserve_memory(struct sock *sk, int bytes)
10202bb2f5fbSWei Wang {
10212bb2f5fbSWei Wang 	long allocated;
10222bb2f5fbSWei Wang 	bool charged;
10232bb2f5fbSWei Wang 	int pages;
10242bb2f5fbSWei Wang 
1025d00c8ee3SEric Dumazet 	if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk))
10262bb2f5fbSWei Wang 		return -EOPNOTSUPP;
10272bb2f5fbSWei Wang 
10282bb2f5fbSWei Wang 	if (!bytes)
10292bb2f5fbSWei Wang 		return 0;
10302bb2f5fbSWei Wang 
10312bb2f5fbSWei Wang 	pages = sk_mem_pages(bytes);
10322bb2f5fbSWei Wang 
10332bb2f5fbSWei Wang 	/* pre-charge to memcg */
10342bb2f5fbSWei Wang 	charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
10352bb2f5fbSWei Wang 					  GFP_KERNEL | __GFP_RETRY_MAYFAIL);
10362bb2f5fbSWei Wang 	if (!charged)
10372bb2f5fbSWei Wang 		return -ENOMEM;
10382bb2f5fbSWei Wang 
10392bb2f5fbSWei Wang 	/* pre-charge to forward_alloc */
1040219160beSEric Dumazet 	sk_memory_allocated_add(sk, pages);
1041219160beSEric Dumazet 	allocated = sk_memory_allocated(sk);
10422bb2f5fbSWei Wang 	/* If the system goes into memory pressure with this
10432bb2f5fbSWei Wang 	 * precharge, give up and return error.
10442bb2f5fbSWei Wang 	 */
10452bb2f5fbSWei Wang 	if (allocated > sk_prot_mem_limits(sk, 1)) {
10462bb2f5fbSWei Wang 		sk_memory_allocated_sub(sk, pages);
10472bb2f5fbSWei Wang 		mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
10482bb2f5fbSWei Wang 		return -ENOMEM;
10492bb2f5fbSWei Wang 	}
10505e6300e7SEric Dumazet 	sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
10512bb2f5fbSWei Wang 
1052fe11fdcbSEric Dumazet 	WRITE_ONCE(sk->sk_reserved_mem,
1053fe11fdcbSEric Dumazet 		   sk->sk_reserved_mem + (pages << PAGE_SHIFT));
10542bb2f5fbSWei Wang 
10552bb2f5fbSWei Wang 	return 0;
10562bb2f5fbSWei Wang }
10572bb2f5fbSWei Wang 
sockopt_lock_sock(struct sock * sk)105824426654SMartin KaFai Lau void sockopt_lock_sock(struct sock *sk)
105924426654SMartin KaFai Lau {
106024426654SMartin KaFai Lau 	/* When current->bpf_ctx is set, the setsockopt is called from
106124426654SMartin KaFai Lau 	 * a bpf prog.  bpf has ensured the sk lock has been
106224426654SMartin KaFai Lau 	 * acquired before calling setsockopt().
106324426654SMartin KaFai Lau 	 */
106424426654SMartin KaFai Lau 	if (has_current_bpf_ctx())
106524426654SMartin KaFai Lau 		return;
106624426654SMartin KaFai Lau 
106724426654SMartin KaFai Lau 	lock_sock(sk);
106824426654SMartin KaFai Lau }
106924426654SMartin KaFai Lau EXPORT_SYMBOL(sockopt_lock_sock);
107024426654SMartin KaFai Lau 
sockopt_release_sock(struct sock * sk)107124426654SMartin KaFai Lau void sockopt_release_sock(struct sock *sk)
107224426654SMartin KaFai Lau {
107324426654SMartin KaFai Lau 	if (has_current_bpf_ctx())
107424426654SMartin KaFai Lau 		return;
107524426654SMartin KaFai Lau 
107624426654SMartin KaFai Lau 	release_sock(sk);
107724426654SMartin KaFai Lau }
107824426654SMartin KaFai Lau EXPORT_SYMBOL(sockopt_release_sock);
107924426654SMartin KaFai Lau 
sockopt_ns_capable(struct user_namespace * ns,int cap)1080e42c7beeSMartin KaFai Lau bool sockopt_ns_capable(struct user_namespace *ns, int cap)
1081e42c7beeSMartin KaFai Lau {
1082e42c7beeSMartin KaFai Lau 	return has_current_bpf_ctx() || ns_capable(ns, cap);
1083e42c7beeSMartin KaFai Lau }
1084e42c7beeSMartin KaFai Lau EXPORT_SYMBOL(sockopt_ns_capable);
1085e42c7beeSMartin KaFai Lau 
sockopt_capable(int cap)1086e42c7beeSMartin KaFai Lau bool sockopt_capable(int cap)
1087e42c7beeSMartin KaFai Lau {
1088e42c7beeSMartin KaFai Lau 	return has_current_bpf_ctx() || capable(cap);
1089e42c7beeSMartin KaFai Lau }
1090e42c7beeSMartin KaFai Lau EXPORT_SYMBOL(sockopt_capable);
1091e42c7beeSMartin KaFai Lau 
10921da177e4SLinus Torvalds /*
10931da177e4SLinus Torvalds  *	This is meant for all protocols to use and covers goings on
10941da177e4SLinus Torvalds  *	at the socket level. Everything here is generic.
10951da177e4SLinus Torvalds  */
10961da177e4SLinus Torvalds 
sk_setsockopt(struct sock * sk,int level,int optname,sockptr_t optval,unsigned int optlen)109729003875SMartin KaFai Lau int sk_setsockopt(struct sock *sk, int level, int optname,
1098c8c1bbb6SChristoph Hellwig 		  sockptr_t optval, unsigned int optlen)
10991da177e4SLinus Torvalds {
1100d463126eSYangbo Lu 	struct so_timestamping timestamping;
11014d748f99SMartin KaFai Lau 	struct socket *sock = sk->sk_socket;
110280b14deeSRichard Cochran 	struct sock_txtime sk_txtime;
11031da177e4SLinus Torvalds 	int val;
11041da177e4SLinus Torvalds 	int valbool;
11051da177e4SLinus Torvalds 	struct linger ling;
11061da177e4SLinus Torvalds 	int ret = 0;
11071da177e4SLinus Torvalds 
11081da177e4SLinus Torvalds 	/*
11091da177e4SLinus Torvalds 	 *	Options without arguments
11101da177e4SLinus Torvalds 	 */
11111da177e4SLinus Torvalds 
11124878809fSDavid S. Miller 	if (optname == SO_BINDTODEVICE)
1113c91f6df2SBrian Haley 		return sock_setbindtodevice(sk, optval, optlen);
11144878809fSDavid S. Miller 
11151da177e4SLinus Torvalds 	if (optlen < sizeof(int))
1116e71a4783SStephen Hemminger 		return -EINVAL;
11171da177e4SLinus Torvalds 
1118c8c1bbb6SChristoph Hellwig 	if (copy_from_sockptr(&val, optval, sizeof(val)))
11191da177e4SLinus Torvalds 		return -EFAULT;
11201da177e4SLinus Torvalds 
11211da177e4SLinus Torvalds 	valbool = val ? 1 : 0;
11221da177e4SLinus Torvalds 
112324426654SMartin KaFai Lau 	sockopt_lock_sock(sk);
11241da177e4SLinus Torvalds 
1125e71a4783SStephen Hemminger 	switch (optname) {
11261da177e4SLinus Torvalds 	case SO_DEBUG:
1127e42c7beeSMartin KaFai Lau 		if (val && !sockopt_capable(CAP_NET_ADMIN))
11281da177e4SLinus Torvalds 			ret = -EACCES;
11292a91525cSEric Dumazet 		else
1130c0ef877bSPavel Emelyanov 			sock_valbool_flag(sk, SOCK_DBG, valbool);
11311da177e4SLinus Torvalds 		break;
11321da177e4SLinus Torvalds 	case SO_REUSEADDR:
1133cdb8744dSBart Van Assche 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
11341da177e4SLinus Torvalds 		break;
1135055dc21aSTom Herbert 	case SO_REUSEPORT:
1136055dc21aSTom Herbert 		sk->sk_reuseport = valbool;
1137055dc21aSTom Herbert 		break;
11381da177e4SLinus Torvalds 	case SO_TYPE:
113949c794e9SJan Engelhardt 	case SO_PROTOCOL:
11400d6038eeSJan Engelhardt 	case SO_DOMAIN:
11411da177e4SLinus Torvalds 	case SO_ERROR:
11421da177e4SLinus Torvalds 		ret = -ENOPROTOOPT;
11431da177e4SLinus Torvalds 		break;
11441da177e4SLinus Torvalds 	case SO_DONTROUTE:
1145c0ef877bSPavel Emelyanov 		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
11460fbe82e6Syupeng 		sk_dst_reset(sk);
11471da177e4SLinus Torvalds 		break;
11481da177e4SLinus Torvalds 	case SO_BROADCAST:
11491da177e4SLinus Torvalds 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
11501da177e4SLinus Torvalds 		break;
11511da177e4SLinus Torvalds 	case SO_SNDBUF:
11521da177e4SLinus Torvalds 		/* Don't error on this BSD doesn't and if you think
115382981930SEric Dumazet 		 * about it this is right. Otherwise apps have to
115482981930SEric Dumazet 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
115582981930SEric Dumazet 		 * are treated in BSD as hints
115682981930SEric Dumazet 		 */
11571227c177SKuniyuki Iwashima 		val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
1158b0573deaSPatrick McHardy set_sndbuf:
11594057765fSGuillaume Nault 		/* Ensure val * 2 fits into an int, to prevent max_t()
11604057765fSGuillaume Nault 		 * from treating it as a negative value.
11614057765fSGuillaume Nault 		 */
11624057765fSGuillaume Nault 		val = min_t(int, val, INT_MAX / 2);
11631da177e4SLinus Torvalds 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1164e292f05eSEric Dumazet 		WRITE_ONCE(sk->sk_sndbuf,
1165e292f05eSEric Dumazet 			   max_t(int, val * 2, SOCK_MIN_SNDBUF));
116682981930SEric Dumazet 		/* Wake up sending tasks if we upped the value. */
11671da177e4SLinus Torvalds 		sk->sk_write_space(sk);
11681da177e4SLinus Torvalds 		break;
11691da177e4SLinus Torvalds 
1170b0573deaSPatrick McHardy 	case SO_SNDBUFFORCE:
1171e42c7beeSMartin KaFai Lau 		if (!sockopt_capable(CAP_NET_ADMIN)) {
1172b0573deaSPatrick McHardy 			ret = -EPERM;
1173b0573deaSPatrick McHardy 			break;
1174b0573deaSPatrick McHardy 		}
11754057765fSGuillaume Nault 
11764057765fSGuillaume Nault 		/* No negative values (to prevent underflow, as val will be
11774057765fSGuillaume Nault 		 * multiplied by 2).
11784057765fSGuillaume Nault 		 */
11794057765fSGuillaume Nault 		if (val < 0)
11804057765fSGuillaume Nault 			val = 0;
1181b0573deaSPatrick McHardy 		goto set_sndbuf;
1182b0573deaSPatrick McHardy 
11831da177e4SLinus Torvalds 	case SO_RCVBUF:
11841da177e4SLinus Torvalds 		/* Don't error on this BSD doesn't and if you think
118582981930SEric Dumazet 		 * about it this is right. Otherwise apps have to
118682981930SEric Dumazet 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
118782981930SEric Dumazet 		 * are treated in BSD as hints
118882981930SEric Dumazet 		 */
11891227c177SKuniyuki Iwashima 		__sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
11901da177e4SLinus Torvalds 		break;
11911da177e4SLinus Torvalds 
1192b0573deaSPatrick McHardy 	case SO_RCVBUFFORCE:
1193e42c7beeSMartin KaFai Lau 		if (!sockopt_capable(CAP_NET_ADMIN)) {
1194b0573deaSPatrick McHardy 			ret = -EPERM;
1195b0573deaSPatrick McHardy 			break;
1196b0573deaSPatrick McHardy 		}
11974057765fSGuillaume Nault 
11984057765fSGuillaume Nault 		/* No negative values (to prevent underflow, as val will be
11994057765fSGuillaume Nault 		 * multiplied by 2).
12004057765fSGuillaume Nault 		 */
120126cfabf9SChristoph Hellwig 		__sock_set_rcvbuf(sk, max(val, 0));
120226cfabf9SChristoph Hellwig 		break;
1203b0573deaSPatrick McHardy 
12041da177e4SLinus Torvalds 	case SO_KEEPALIVE:
12054b9d07a4SUrsula Braun 		if (sk->sk_prot->keepalive)
12064b9d07a4SUrsula Braun 			sk->sk_prot->keepalive(sk, valbool);
12071da177e4SLinus Torvalds 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
12081da177e4SLinus Torvalds 		break;
12091da177e4SLinus Torvalds 
12101da177e4SLinus Torvalds 	case SO_OOBINLINE:
12111da177e4SLinus Torvalds 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
12121da177e4SLinus Torvalds 		break;
12131da177e4SLinus Torvalds 
12141da177e4SLinus Torvalds 	case SO_NO_CHECK:
121528448b80STom Herbert 		sk->sk_no_check_tx = valbool;
12161da177e4SLinus Torvalds 		break;
12171da177e4SLinus Torvalds 
12181da177e4SLinus Torvalds 	case SO_PRIORITY:
12195e1fccc0SEric W. Biederman 		if ((val >= 0 && val <= 6) ||
1220e42c7beeSMartin KaFai Lau 		    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
1221e42c7beeSMartin KaFai Lau 		    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
12228bf43be7SEric Dumazet 			WRITE_ONCE(sk->sk_priority, val);
12231da177e4SLinus Torvalds 		else
12241da177e4SLinus Torvalds 			ret = -EPERM;
12251da177e4SLinus Torvalds 		break;
12261da177e4SLinus Torvalds 
12271da177e4SLinus Torvalds 	case SO_LINGER:
12281da177e4SLinus Torvalds 		if (optlen < sizeof(ling)) {
12291da177e4SLinus Torvalds 			ret = -EINVAL;	/* 1003.1g */
12301da177e4SLinus Torvalds 			break;
12311da177e4SLinus Torvalds 		}
1232c8c1bbb6SChristoph Hellwig 		if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
12331da177e4SLinus Torvalds 			ret = -EFAULT;
12341da177e4SLinus Torvalds 			break;
12351da177e4SLinus Torvalds 		}
1236bc1fb82aSEric Dumazet 		if (!ling.l_onoff) {
12371da177e4SLinus Torvalds 			sock_reset_flag(sk, SOCK_LINGER);
1238bc1fb82aSEric Dumazet 		} else {
1239bc1fb82aSEric Dumazet 			unsigned long t_sec = ling.l_linger;
1240bc1fb82aSEric Dumazet 
1241bc1fb82aSEric Dumazet 			if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ)
1242bc1fb82aSEric Dumazet 				WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT);
12431da177e4SLinus Torvalds 			else
1244bc1fb82aSEric Dumazet 				WRITE_ONCE(sk->sk_lingertime, t_sec * HZ);
12451da177e4SLinus Torvalds 			sock_set_flag(sk, SOCK_LINGER);
12461da177e4SLinus Torvalds 		}
12471da177e4SLinus Torvalds 		break;
12481da177e4SLinus Torvalds 
12491da177e4SLinus Torvalds 	case SO_BSDCOMPAT:
12501da177e4SLinus Torvalds 		break;
12511da177e4SLinus Torvalds 
12521da177e4SLinus Torvalds 	case SO_PASSCRED:
1253274c4a6dSAndy Shevchenko 		assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
12541da177e4SLinus Torvalds 		break;
12551da177e4SLinus Torvalds 
12565e2ff670SAlexander Mikhalitsyn 	case SO_PASSPIDFD:
1257274c4a6dSAndy Shevchenko 		assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
12585e2ff670SAlexander Mikhalitsyn 		break;
12595e2ff670SAlexander Mikhalitsyn 
12607f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMP_OLD:
1261783da70eSChristoph Hellwig 	case SO_TIMESTAMP_NEW:
1262783da70eSChristoph Hellwig 	case SO_TIMESTAMPNS_OLD:
1263783da70eSChristoph Hellwig 	case SO_TIMESTAMPNS_NEW:
126481b4a0ccSEric Dumazet 		sock_set_timestamp(sk, optname, valbool);
1265783da70eSChristoph Hellwig 		break;
1266ced122d9SFlorian Westphal 
12679718475eSDeepa Dinamani 	case SO_TIMESTAMPING_NEW:
12687f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMPING_OLD:
1269d463126eSYangbo Lu 		if (optlen == sizeof(timestamping)) {
1270d463126eSYangbo Lu 			if (copy_from_sockptr(&timestamping, optval,
1271271dbc31SDan Carpenter 					      sizeof(timestamping))) {
1272271dbc31SDan Carpenter 				ret = -EFAULT;
1273271dbc31SDan Carpenter 				break;
1274271dbc31SDan Carpenter 			}
1275d463126eSYangbo Lu 		} else {
1276d463126eSYangbo Lu 			memset(&timestamping, 0, sizeof(timestamping));
1277d463126eSYangbo Lu 			timestamping.flags = val;
1278d463126eSYangbo Lu 		}
1279d463126eSYangbo Lu 		ret = sock_set_timestamping(sk, optname, timestamping);
128020d49473SPatrick Ohly 		break;
128120d49473SPatrick Ohly 
12821da177e4SLinus Torvalds 	case SO_RCVLOWAT:
12831ded5e5aSEric Dumazet 		{
12841ded5e5aSEric Dumazet 		int (*set_rcvlowat)(struct sock *sk, int val) = NULL;
12851ded5e5aSEric Dumazet 
12861da177e4SLinus Torvalds 		if (val < 0)
12871da177e4SLinus Torvalds 			val = INT_MAX;
12881ded5e5aSEric Dumazet 		if (sock)
12891ded5e5aSEric Dumazet 			set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
12901ded5e5aSEric Dumazet 		if (set_rcvlowat)
12911ded5e5aSEric Dumazet 			ret = set_rcvlowat(sk, val);
1292d1361840SEric Dumazet 		else
1293eac66402SEric Dumazet 			WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
12941da177e4SLinus Torvalds 		break;
12951ded5e5aSEric Dumazet 		}
129645bdc661SDeepa Dinamani 	case SO_RCVTIMEO_OLD:
1297a9beb86aSDeepa Dinamani 	case SO_RCVTIMEO_NEW:
1298c8c1bbb6SChristoph Hellwig 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1299c34645acSChristoph Hellwig 				       optlen, optname == SO_RCVTIMEO_OLD);
13001da177e4SLinus Torvalds 		break;
13011da177e4SLinus Torvalds 
130245bdc661SDeepa Dinamani 	case SO_SNDTIMEO_OLD:
1303a9beb86aSDeepa Dinamani 	case SO_SNDTIMEO_NEW:
1304c8c1bbb6SChristoph Hellwig 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1305c34645acSChristoph Hellwig 				       optlen, optname == SO_SNDTIMEO_OLD);
13061da177e4SLinus Torvalds 		break;
13071da177e4SLinus Torvalds 
13084d295e54SChristoph Hellwig 	case SO_ATTACH_FILTER: {
13091da177e4SLinus Torvalds 		struct sock_fprog fprog;
13101da177e4SLinus Torvalds 
1311c8c1bbb6SChristoph Hellwig 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
13124d295e54SChristoph Hellwig 		if (!ret)
13131da177e4SLinus Torvalds 			ret = sk_attach_filter(&fprog, sk);
13141da177e4SLinus Torvalds 		break;
13154d295e54SChristoph Hellwig 	}
131689aa0758SAlexei Starovoitov 	case SO_ATTACH_BPF:
131789aa0758SAlexei Starovoitov 		ret = -EINVAL;
131889aa0758SAlexei Starovoitov 		if (optlen == sizeof(u32)) {
131989aa0758SAlexei Starovoitov 			u32 ufd;
132089aa0758SAlexei Starovoitov 
132189aa0758SAlexei Starovoitov 			ret = -EFAULT;
1322c8c1bbb6SChristoph Hellwig 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
132389aa0758SAlexei Starovoitov 				break;
132489aa0758SAlexei Starovoitov 
132589aa0758SAlexei Starovoitov 			ret = sk_attach_bpf(ufd, sk);
132689aa0758SAlexei Starovoitov 		}
132789aa0758SAlexei Starovoitov 		break;
132889aa0758SAlexei Starovoitov 
13294d295e54SChristoph Hellwig 	case SO_ATTACH_REUSEPORT_CBPF: {
1330538950a1SCraig Gallek 		struct sock_fprog fprog;
1331538950a1SCraig Gallek 
1332c8c1bbb6SChristoph Hellwig 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
13334d295e54SChristoph Hellwig 		if (!ret)
1334538950a1SCraig Gallek 			ret = sk_reuseport_attach_filter(&fprog, sk);
1335538950a1SCraig Gallek 		break;
13364d295e54SChristoph Hellwig 	}
1337538950a1SCraig Gallek 	case SO_ATTACH_REUSEPORT_EBPF:
1338538950a1SCraig Gallek 		ret = -EINVAL;
1339538950a1SCraig Gallek 		if (optlen == sizeof(u32)) {
1340538950a1SCraig Gallek 			u32 ufd;
1341538950a1SCraig Gallek 
1342538950a1SCraig Gallek 			ret = -EFAULT;
1343c8c1bbb6SChristoph Hellwig 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1344538950a1SCraig Gallek 				break;
1345538950a1SCraig Gallek 
1346538950a1SCraig Gallek 			ret = sk_reuseport_attach_bpf(ufd, sk);
1347538950a1SCraig Gallek 		}
1348538950a1SCraig Gallek 		break;
1349538950a1SCraig Gallek 
135099f3a064SMartin KaFai Lau 	case SO_DETACH_REUSEPORT_BPF:
135199f3a064SMartin KaFai Lau 		ret = reuseport_detach_prog(sk);
135299f3a064SMartin KaFai Lau 		break;
135399f3a064SMartin KaFai Lau 
13541da177e4SLinus Torvalds 	case SO_DETACH_FILTER:
135555b33325SPavel Emelyanov 		ret = sk_detach_filter(sk);
13561da177e4SLinus Torvalds 		break;
13571da177e4SLinus Torvalds 
1358d59577b6SVincent Bernat 	case SO_LOCK_FILTER:
1359d59577b6SVincent Bernat 		if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1360d59577b6SVincent Bernat 			ret = -EPERM;
1361d59577b6SVincent Bernat 		else
1362d59577b6SVincent Bernat 			sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1363d59577b6SVincent Bernat 		break;
1364d59577b6SVincent Bernat 
1365877ce7c1SCatherine Zhang 	case SO_PASSSEC:
1366274c4a6dSAndy Shevchenko 		assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
1367877ce7c1SCatherine Zhang 		break;
13684a19ec58SLaszlo Attila Toth 	case SO_MARK:
1369e42c7beeSMartin KaFai Lau 		if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1370e42c7beeSMartin KaFai Lau 		    !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
13714a19ec58SLaszlo Attila Toth 			ret = -EPERM;
1372dd9082f4SAlexander Aring 			break;
137350254256SDavid Barmann 		}
1374dd9082f4SAlexander Aring 
1375dd9082f4SAlexander Aring 		__sock_set_mark(sk, val);
13764a19ec58SLaszlo Attila Toth 		break;
13776fd1d51cSErin MacNeil 	case SO_RCVMARK:
13786fd1d51cSErin MacNeil 		sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
13796fd1d51cSErin MacNeil 		break;
1380877ce7c1SCatherine Zhang 
13813b885787SNeil Horman 	case SO_RXQ_OVFL:
13828083f0fcSJohannes Berg 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
13833b885787SNeil Horman 		break;
13846e3e939fSJohannes Berg 
13856e3e939fSJohannes Berg 	case SO_WIFI_STATUS:
13866e3e939fSJohannes Berg 		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
13876e3e939fSJohannes Berg 		break;
13886e3e939fSJohannes Berg 
1389ef64a54fSPavel Emelyanov 	case SO_PEEK_OFF:
13901ded5e5aSEric Dumazet 		{
13911ded5e5aSEric Dumazet 		int (*set_peek_off)(struct sock *sk, int val);
13921ded5e5aSEric Dumazet 
13931ded5e5aSEric Dumazet 		set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
13941ded5e5aSEric Dumazet 		if (set_peek_off)
13951ded5e5aSEric Dumazet 			ret = set_peek_off(sk, val);
1396ef64a54fSPavel Emelyanov 		else
1397ef64a54fSPavel Emelyanov 			ret = -EOPNOTSUPP;
1398ef64a54fSPavel Emelyanov 		break;
13991ded5e5aSEric Dumazet 		}
14003bdc0ebaSBen Greear 
14013bdc0ebaSBen Greear 	case SO_NOFCS:
14023bdc0ebaSBen Greear 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
14033bdc0ebaSBen Greear 		break;
14043bdc0ebaSBen Greear 
14057d4c04fcSKeller, Jacob E 	case SO_SELECT_ERR_QUEUE:
14067d4c04fcSKeller, Jacob E 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
14077d4c04fcSKeller, Jacob E 		break;
14087d4c04fcSKeller, Jacob E 
1409e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
141064b0dc51SEliezer Tamir 	case SO_BUSY_POLL:
1411dafcc438SEliezer Tamir 		if (val < 0)
1412dafcc438SEliezer Tamir 			ret = -EINVAL;
1413dafcc438SEliezer Tamir 		else
14140dbffbb5SEric Dumazet 			WRITE_ONCE(sk->sk_ll_usec, val);
1415dafcc438SEliezer Tamir 		break;
14167fd3253aSBjörn Töpel 	case SO_PREFER_BUSY_POLL:
1417e42c7beeSMartin KaFai Lau 		if (valbool && !sockopt_capable(CAP_NET_ADMIN))
14187fd3253aSBjörn Töpel 			ret = -EPERM;
14197fd3253aSBjörn Töpel 		else
14207fd3253aSBjörn Töpel 			WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
14217fd3253aSBjörn Töpel 		break;
14227c951cafSBjörn Töpel 	case SO_BUSY_POLL_BUDGET:
1423e42c7beeSMartin KaFai Lau 		if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) {
14247c951cafSBjörn Töpel 			ret = -EPERM;
14257c951cafSBjörn Töpel 		} else {
14267c951cafSBjörn Töpel 			if (val < 0 || val > U16_MAX)
14277c951cafSBjörn Töpel 				ret = -EINVAL;
14287c951cafSBjörn Töpel 			else
14297c951cafSBjörn Töpel 				WRITE_ONCE(sk->sk_busy_poll_budget, val);
14307c951cafSBjörn Töpel 		}
14317c951cafSBjörn Töpel 		break;
1432dafcc438SEliezer Tamir #endif
143362748f32SEric Dumazet 
143462748f32SEric Dumazet 	case SO_MAX_PACING_RATE:
14356bdef102SEric Dumazet 		{
1436700465fdSKe Li 		unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
14376bdef102SEric Dumazet 
14386bdef102SEric Dumazet 		if (sizeof(ulval) != sizeof(val) &&
14396bdef102SEric Dumazet 		    optlen >= sizeof(ulval) &&
1440c8c1bbb6SChristoph Hellwig 		    copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
14416bdef102SEric Dumazet 			ret = -EFAULT;
14426bdef102SEric Dumazet 			break;
14436bdef102SEric Dumazet 		}
14446bdef102SEric Dumazet 		if (ulval != ~0UL)
1445218af599SEric Dumazet 			cmpxchg(&sk->sk_pacing_status,
1446218af599SEric Dumazet 				SK_PACING_NONE,
1447218af599SEric Dumazet 				SK_PACING_NEEDED);
1448ea7f45efSEric Dumazet 		/* Pairs with READ_ONCE() from sk_getsockopt() */
1449ea7f45efSEric Dumazet 		WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
14506bdef102SEric Dumazet 		sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
145162748f32SEric Dumazet 		break;
14526bdef102SEric Dumazet 		}
145370da268bSEric Dumazet 	case SO_INCOMING_CPU:
1454b261eda8SKuniyuki Iwashima 		reuseport_update_incoming_cpu(sk, val);
145570da268bSEric Dumazet 		break;
145670da268bSEric Dumazet 
1457a87cb3e4STom Herbert 	case SO_CNX_ADVICE:
1458a87cb3e4STom Herbert 		if (val == 1)
1459a87cb3e4STom Herbert 			dst_negative_advice(sk);
1460a87cb3e4STom Herbert 		break;
146176851d12SWillem de Bruijn 
146276851d12SWillem de Bruijn 	case SO_ZEROCOPY:
146328190752SSowmini Varadhan 		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
146442f67eeaSEric Dumazet 			if (!(sk_is_tcp(sk) ||
1465b5947e5dSWillem de Bruijn 			      (sk->sk_type == SOCK_DGRAM &&
1466b5947e5dSWillem de Bruijn 			       sk->sk_protocol == IPPROTO_UDP)))
1467869420a8SSamuel Thibault 				ret = -EOPNOTSUPP;
146828190752SSowmini Varadhan 		} else if (sk->sk_family != PF_RDS) {
1469869420a8SSamuel Thibault 			ret = -EOPNOTSUPP;
147028190752SSowmini Varadhan 		}
147128190752SSowmini Varadhan 		if (!ret) {
147228190752SSowmini Varadhan 			if (val < 0 || val > 1)
147376851d12SWillem de Bruijn 				ret = -EINVAL;
147476851d12SWillem de Bruijn 			else
147576851d12SWillem de Bruijn 				sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
147628190752SSowmini Varadhan 		}
1477334e6413SJesus Sanchez-Palencia 		break;
1478334e6413SJesus Sanchez-Palencia 
147980b14deeSRichard Cochran 	case SO_TXTIME:
1480790709f2SEric Dumazet 		if (optlen != sizeof(struct sock_txtime)) {
148180b14deeSRichard Cochran 			ret = -EINVAL;
1482790709f2SEric Dumazet 			break;
1483c8c1bbb6SChristoph Hellwig 		} else if (copy_from_sockptr(&sk_txtime, optval,
148480b14deeSRichard Cochran 			   sizeof(struct sock_txtime))) {
148580b14deeSRichard Cochran 			ret = -EFAULT;
1486790709f2SEric Dumazet 			break;
148780b14deeSRichard Cochran 		} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
148880b14deeSRichard Cochran 			ret = -EINVAL;
1489790709f2SEric Dumazet 			break;
1490790709f2SEric Dumazet 		}
1491790709f2SEric Dumazet 		/* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1492790709f2SEric Dumazet 		 * scheduler has enough safe guards.
1493790709f2SEric Dumazet 		 */
1494790709f2SEric Dumazet 		if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1495e42c7beeSMartin KaFai Lau 		    !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1496790709f2SEric Dumazet 			ret = -EPERM;
1497790709f2SEric Dumazet 			break;
1498790709f2SEric Dumazet 		}
149980b14deeSRichard Cochran 		sock_valbool_flag(sk, SOCK_TXTIME, true);
150080b14deeSRichard Cochran 		sk->sk_clockid = sk_txtime.clockid;
150180b14deeSRichard Cochran 		sk->sk_txtime_deadline_mode =
150280b14deeSRichard Cochran 			!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
15034b15c707SJesus Sanchez-Palencia 		sk->sk_txtime_report_errors =
15044b15c707SJesus Sanchez-Palencia 			!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
150580b14deeSRichard Cochran 		break;
150680b14deeSRichard Cochran 
1507f5dd3d0cSDavid Herrmann 	case SO_BINDTOIFINDEX:
15087594888cSChristoph Hellwig 		ret = sock_bindtoindex_locked(sk, val);
1509f5dd3d0cSDavid Herrmann 		break;
1510f5dd3d0cSDavid Herrmann 
151104190bf8SPavel Tikhomirov 	case SO_BUF_LOCK:
151204190bf8SPavel Tikhomirov 		if (val & ~SOCK_BUF_LOCK_MASK) {
151304190bf8SPavel Tikhomirov 			ret = -EINVAL;
151404190bf8SPavel Tikhomirov 			break;
151504190bf8SPavel Tikhomirov 		}
151604190bf8SPavel Tikhomirov 		sk->sk_userlocks = val | (sk->sk_userlocks &
151704190bf8SPavel Tikhomirov 					  ~SOCK_BUF_LOCK_MASK);
151804190bf8SPavel Tikhomirov 		break;
151904190bf8SPavel Tikhomirov 
15202bb2f5fbSWei Wang 	case SO_RESERVE_MEM:
15212bb2f5fbSWei Wang 	{
15222bb2f5fbSWei Wang 		int delta;
15232bb2f5fbSWei Wang 
15242bb2f5fbSWei Wang 		if (val < 0) {
15252bb2f5fbSWei Wang 			ret = -EINVAL;
15262bb2f5fbSWei Wang 			break;
15272bb2f5fbSWei Wang 		}
15282bb2f5fbSWei Wang 
15292bb2f5fbSWei Wang 		delta = val - sk->sk_reserved_mem;
15302bb2f5fbSWei Wang 		if (delta < 0)
15312bb2f5fbSWei Wang 			sock_release_reserved_memory(sk, -delta);
15322bb2f5fbSWei Wang 		else
15332bb2f5fbSWei Wang 			ret = sock_reserve_memory(sk, delta);
15342bb2f5fbSWei Wang 		break;
15352bb2f5fbSWei Wang 	}
15362bb2f5fbSWei Wang 
153726859240SAkhmat Karakotov 	case SO_TXREHASH:
153826859240SAkhmat Karakotov 		if (val < -1 || val > 1) {
153926859240SAkhmat Karakotov 			ret = -EINVAL;
154026859240SAkhmat Karakotov 			break;
154126859240SAkhmat Karakotov 		}
1542c11204c7SKevin Yang 		if ((u8)val == SOCK_TXREHASH_DEFAULT)
1543c11204c7SKevin Yang 			val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
1544c76a0328SEric Dumazet 		/* Paired with READ_ONCE() in tcp_rtx_synack()
1545c76a0328SEric Dumazet 		 * and sk_getsockopt().
1546c76a0328SEric Dumazet 		 */
1547cb6cd2ceSAkhmat Karakotov 		WRITE_ONCE(sk->sk_txrehash, (u8)val);
154826859240SAkhmat Karakotov 		break;
154926859240SAkhmat Karakotov 
15501da177e4SLinus Torvalds 	default:
15511da177e4SLinus Torvalds 		ret = -ENOPROTOOPT;
15521da177e4SLinus Torvalds 		break;
15531da177e4SLinus Torvalds 	}
155424426654SMartin KaFai Lau 	sockopt_release_sock(sk);
15551da177e4SLinus Torvalds 	return ret;
15561da177e4SLinus Torvalds }
15574d748f99SMartin KaFai Lau 
sock_setsockopt(struct socket * sock,int level,int optname,sockptr_t optval,unsigned int optlen)15584d748f99SMartin KaFai Lau int sock_setsockopt(struct socket *sock, int level, int optname,
15594d748f99SMartin KaFai Lau 		    sockptr_t optval, unsigned int optlen)
15604d748f99SMartin KaFai Lau {
15614d748f99SMartin KaFai Lau 	return sk_setsockopt(sock->sk, level, optname,
15624d748f99SMartin KaFai Lau 			     optval, optlen);
15634d748f99SMartin KaFai Lau }
15642a91525cSEric Dumazet EXPORT_SYMBOL(sock_setsockopt);
15651da177e4SLinus Torvalds 
sk_get_peer_cred(struct sock * sk)156635306eb2SEric Dumazet static const struct cred *sk_get_peer_cred(struct sock *sk)
156735306eb2SEric Dumazet {
156835306eb2SEric Dumazet 	const struct cred *cred;
156935306eb2SEric Dumazet 
157035306eb2SEric Dumazet 	spin_lock(&sk->sk_peer_lock);
157135306eb2SEric Dumazet 	cred = get_cred(sk->sk_peer_cred);
157235306eb2SEric Dumazet 	spin_unlock(&sk->sk_peer_lock);
157335306eb2SEric Dumazet 
157435306eb2SEric Dumazet 	return cred;
157535306eb2SEric Dumazet }
15761da177e4SLinus Torvalds 
cred_to_ucred(struct pid * pid,const struct cred * cred,struct ucred * ucred)15778f09898bSstephen hemminger static void cred_to_ucred(struct pid *pid, const struct cred *cred,
15783f551f94SEric W. Biederman 			  struct ucred *ucred)
15793f551f94SEric W. Biederman {
15803f551f94SEric W. Biederman 	ucred->pid = pid_vnr(pid);
15813f551f94SEric W. Biederman 	ucred->uid = ucred->gid = -1;
15823f551f94SEric W. Biederman 	if (cred) {
15833f551f94SEric W. Biederman 		struct user_namespace *current_ns = current_user_ns();
15843f551f94SEric W. Biederman 
1585b2e4f544SEric W. Biederman 		ucred->uid = from_kuid_munged(current_ns, cred->euid);
1586b2e4f544SEric W. Biederman 		ucred->gid = from_kgid_munged(current_ns, cred->egid);
15873f551f94SEric W. Biederman 	}
15883f551f94SEric W. Biederman }
15893f551f94SEric W. Biederman 
groups_to_user(sockptr_t dst,const struct group_info * src)15904ff09db1SMartin KaFai Lau static int groups_to_user(sockptr_t dst, const struct group_info *src)
159128b5ba2aSDavid Herrmann {
159228b5ba2aSDavid Herrmann 	struct user_namespace *user_ns = current_user_ns();
159328b5ba2aSDavid Herrmann 	int i;
159428b5ba2aSDavid Herrmann 
15954ff09db1SMartin KaFai Lau 	for (i = 0; i < src->ngroups; i++) {
15964ff09db1SMartin KaFai Lau 		gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
15974ff09db1SMartin KaFai Lau 
15984ff09db1SMartin KaFai Lau 		if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
159928b5ba2aSDavid Herrmann 			return -EFAULT;
16004ff09db1SMartin KaFai Lau 	}
160128b5ba2aSDavid Herrmann 
160228b5ba2aSDavid Herrmann 	return 0;
160328b5ba2aSDavid Herrmann }
160428b5ba2aSDavid Herrmann 
sk_getsockopt(struct sock * sk,int level,int optname,sockptr_t optval,sockptr_t optlen)160565ddc82dSMartin KaFai Lau int sk_getsockopt(struct sock *sk, int level, int optname,
16064ff09db1SMartin KaFai Lau 		  sockptr_t optval, sockptr_t optlen)
16071da177e4SLinus Torvalds {
1608ba74a760SMartin KaFai Lau 	struct socket *sock = sk->sk_socket;
16091da177e4SLinus Torvalds 
1610e71a4783SStephen Hemminger 	union {
16111da177e4SLinus Torvalds 		int val;
16125daab9dbSChenbo Feng 		u64 val64;
1613677f136cSEric Dumazet 		unsigned long ulval;
16141da177e4SLinus Torvalds 		struct linger ling;
1615fe0c72f3SArnd Bergmann 		struct old_timeval32 tm32;
1616fe0c72f3SArnd Bergmann 		struct __kernel_old_timeval tm;
1617a9beb86aSDeepa Dinamani 		struct  __kernel_sock_timeval stm;
161880b14deeSRichard Cochran 		struct sock_txtime txtime;
1619d463126eSYangbo Lu 		struct so_timestamping timestamping;
16201da177e4SLinus Torvalds 	} v;
16211da177e4SLinus Torvalds 
16224d0392beSH Hartley Sweeten 	int lv = sizeof(int);
16231da177e4SLinus Torvalds 	int len;
16241da177e4SLinus Torvalds 
16254ff09db1SMartin KaFai Lau 	if (copy_from_sockptr(&len, optlen, sizeof(int)))
16261da177e4SLinus Torvalds 		return -EFAULT;
16271da177e4SLinus Torvalds 	if (len < 0)
16281da177e4SLinus Torvalds 		return -EINVAL;
16291da177e4SLinus Torvalds 
163050fee1deSEugene Teo 	memset(&v, 0, sizeof(v));
1631df0bca04SClément Lecigne 
1632e71a4783SStephen Hemminger 	switch (optname) {
16331da177e4SLinus Torvalds 	case SO_DEBUG:
16341da177e4SLinus Torvalds 		v.val = sock_flag(sk, SOCK_DBG);
16351da177e4SLinus Torvalds 		break;
16361da177e4SLinus Torvalds 
16371da177e4SLinus Torvalds 	case SO_DONTROUTE:
16381da177e4SLinus Torvalds 		v.val = sock_flag(sk, SOCK_LOCALROUTE);
16391da177e4SLinus Torvalds 		break;
16401da177e4SLinus Torvalds 
16411da177e4SLinus Torvalds 	case SO_BROADCAST:
16421b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_BROADCAST);
16431da177e4SLinus Torvalds 		break;
16441da177e4SLinus Torvalds 
16451da177e4SLinus Torvalds 	case SO_SNDBUF:
164674bc0843SEric Dumazet 		v.val = READ_ONCE(sk->sk_sndbuf);
16471da177e4SLinus Torvalds 		break;
16481da177e4SLinus Torvalds 
16491da177e4SLinus Torvalds 	case SO_RCVBUF:
1650b4b55325SEric Dumazet 		v.val = READ_ONCE(sk->sk_rcvbuf);
16511da177e4SLinus Torvalds 		break;
16521da177e4SLinus Torvalds 
16531da177e4SLinus Torvalds 	case SO_REUSEADDR:
16541da177e4SLinus Torvalds 		v.val = sk->sk_reuse;
16551da177e4SLinus Torvalds 		break;
16561da177e4SLinus Torvalds 
1657055dc21aSTom Herbert 	case SO_REUSEPORT:
1658055dc21aSTom Herbert 		v.val = sk->sk_reuseport;
1659055dc21aSTom Herbert 		break;
1660055dc21aSTom Herbert 
16611da177e4SLinus Torvalds 	case SO_KEEPALIVE:
16621b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
16631da177e4SLinus Torvalds 		break;
16641da177e4SLinus Torvalds 
16651da177e4SLinus Torvalds 	case SO_TYPE:
16661da177e4SLinus Torvalds 		v.val = sk->sk_type;
16671da177e4SLinus Torvalds 		break;
16681da177e4SLinus Torvalds 
166949c794e9SJan Engelhardt 	case SO_PROTOCOL:
167049c794e9SJan Engelhardt 		v.val = sk->sk_protocol;
167149c794e9SJan Engelhardt 		break;
167249c794e9SJan Engelhardt 
16730d6038eeSJan Engelhardt 	case SO_DOMAIN:
16740d6038eeSJan Engelhardt 		v.val = sk->sk_family;
16750d6038eeSJan Engelhardt 		break;
16760d6038eeSJan Engelhardt 
16771da177e4SLinus Torvalds 	case SO_ERROR:
16781da177e4SLinus Torvalds 		v.val = -sock_error(sk);
16791da177e4SLinus Torvalds 		if (v.val == 0)
16801da177e4SLinus Torvalds 			v.val = xchg(&sk->sk_err_soft, 0);
16811da177e4SLinus Torvalds 		break;
16821da177e4SLinus Torvalds 
16831da177e4SLinus Torvalds 	case SO_OOBINLINE:
16841b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_URGINLINE);
16851da177e4SLinus Torvalds 		break;
16861da177e4SLinus Torvalds 
16871da177e4SLinus Torvalds 	case SO_NO_CHECK:
168828448b80STom Herbert 		v.val = sk->sk_no_check_tx;
16891da177e4SLinus Torvalds 		break;
16901da177e4SLinus Torvalds 
16911da177e4SLinus Torvalds 	case SO_PRIORITY:
16928bf43be7SEric Dumazet 		v.val = READ_ONCE(sk->sk_priority);
16931da177e4SLinus Torvalds 		break;
16941da177e4SLinus Torvalds 
16951da177e4SLinus Torvalds 	case SO_LINGER:
16961da177e4SLinus Torvalds 		lv		= sizeof(v.ling);
16971b23a5dfSEric Dumazet 		v.ling.l_onoff	= sock_flag(sk, SOCK_LINGER);
1698bc1fb82aSEric Dumazet 		v.ling.l_linger	= READ_ONCE(sk->sk_lingertime) / HZ;
16991da177e4SLinus Torvalds 		break;
17001da177e4SLinus Torvalds 
17011da177e4SLinus Torvalds 	case SO_BSDCOMPAT:
17021da177e4SLinus Torvalds 		break;
17031da177e4SLinus Torvalds 
17047f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMP_OLD:
170592f37fd2SEric Dumazet 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1706887feae3SDeepa Dinamani 				!sock_flag(sk, SOCK_TSTAMP_NEW) &&
170792f37fd2SEric Dumazet 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
170892f37fd2SEric Dumazet 		break;
170992f37fd2SEric Dumazet 
17107f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMPNS_OLD:
1711887feae3SDeepa Dinamani 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
1712887feae3SDeepa Dinamani 		break;
1713887feae3SDeepa Dinamani 
1714887feae3SDeepa Dinamani 	case SO_TIMESTAMP_NEW:
1715887feae3SDeepa Dinamani 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1716887feae3SDeepa Dinamani 		break;
1717887feae3SDeepa Dinamani 
1718887feae3SDeepa Dinamani 	case SO_TIMESTAMPNS_NEW:
1719887feae3SDeepa Dinamani 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
17201da177e4SLinus Torvalds 		break;
17211da177e4SLinus Torvalds 
17227f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMPING_OLD:
1723742e4af3SJörn-Thorben Hinz 	case SO_TIMESTAMPING_NEW:
1724d463126eSYangbo Lu 		lv = sizeof(v.timestamping);
1725742e4af3SJörn-Thorben Hinz 		/* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only
1726742e4af3SJörn-Thorben Hinz 		 * returning the flags when they were set through the same option.
1727742e4af3SJörn-Thorben Hinz 		 * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
1728742e4af3SJörn-Thorben Hinz 		 */
1729742e4af3SJörn-Thorben Hinz 		if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
1730e3390b30SEric Dumazet 			v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
1731251cd405SEric Dumazet 			v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
1732742e4af3SJörn-Thorben Hinz 		}
173320d49473SPatrick Ohly 		break;
173420d49473SPatrick Ohly 
1735a9beb86aSDeepa Dinamani 	case SO_RCVTIMEO_OLD:
1736a9beb86aSDeepa Dinamani 	case SO_RCVTIMEO_NEW:
1737285975ddSEric Dumazet 		lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
1738285975ddSEric Dumazet 				      SO_RCVTIMEO_OLD == optname);
17391da177e4SLinus Torvalds 		break;
17401da177e4SLinus Torvalds 
1741a9beb86aSDeepa Dinamani 	case SO_SNDTIMEO_OLD:
1742a9beb86aSDeepa Dinamani 	case SO_SNDTIMEO_NEW:
1743285975ddSEric Dumazet 		lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
1744285975ddSEric Dumazet 				      SO_SNDTIMEO_OLD == optname);
17451da177e4SLinus Torvalds 		break;
17461da177e4SLinus Torvalds 
17471da177e4SLinus Torvalds 	case SO_RCVLOWAT:
1748e6d12bdbSEric Dumazet 		v.val = READ_ONCE(sk->sk_rcvlowat);
17491da177e4SLinus Torvalds 		break;
17501da177e4SLinus Torvalds 
17511da177e4SLinus Torvalds 	case SO_SNDLOWAT:
17521da177e4SLinus Torvalds 		v.val = 1;
17531da177e4SLinus Torvalds 		break;
17541da177e4SLinus Torvalds 
17551da177e4SLinus Torvalds 	case SO_PASSCRED:
175682981930SEric Dumazet 		v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
17571da177e4SLinus Torvalds 		break;
17581da177e4SLinus Torvalds 
17595e2ff670SAlexander Mikhalitsyn 	case SO_PASSPIDFD:
17605e2ff670SAlexander Mikhalitsyn 		v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags);
17615e2ff670SAlexander Mikhalitsyn 		break;
17625e2ff670SAlexander Mikhalitsyn 
17631da177e4SLinus Torvalds 	case SO_PEERCRED:
1764109f6e39SEric W. Biederman 	{
1765109f6e39SEric W. Biederman 		struct ucred peercred;
1766109f6e39SEric W. Biederman 		if (len > sizeof(peercred))
1767109f6e39SEric W. Biederman 			len = sizeof(peercred);
176835306eb2SEric Dumazet 
176935306eb2SEric Dumazet 		spin_lock(&sk->sk_peer_lock);
1770109f6e39SEric W. Biederman 		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
177135306eb2SEric Dumazet 		spin_unlock(&sk->sk_peer_lock);
177235306eb2SEric Dumazet 
17734ff09db1SMartin KaFai Lau 		if (copy_to_sockptr(optval, &peercred, len))
17741da177e4SLinus Torvalds 			return -EFAULT;
17751da177e4SLinus Torvalds 		goto lenout;
1776109f6e39SEric W. Biederman 	}
17771da177e4SLinus Torvalds 
17787b26952aSAlexander Mikhalitsyn 	case SO_PEERPIDFD:
17797b26952aSAlexander Mikhalitsyn 	{
17807b26952aSAlexander Mikhalitsyn 		struct pid *peer_pid;
17817b26952aSAlexander Mikhalitsyn 		struct file *pidfd_file = NULL;
17827b26952aSAlexander Mikhalitsyn 		int pidfd;
17837b26952aSAlexander Mikhalitsyn 
17847b26952aSAlexander Mikhalitsyn 		if (len > sizeof(pidfd))
17857b26952aSAlexander Mikhalitsyn 			len = sizeof(pidfd);
17867b26952aSAlexander Mikhalitsyn 
17877b26952aSAlexander Mikhalitsyn 		spin_lock(&sk->sk_peer_lock);
17887b26952aSAlexander Mikhalitsyn 		peer_pid = get_pid(sk->sk_peer_pid);
17897b26952aSAlexander Mikhalitsyn 		spin_unlock(&sk->sk_peer_lock);
17907b26952aSAlexander Mikhalitsyn 
17917b26952aSAlexander Mikhalitsyn 		if (!peer_pid)
1792b6f79e82SDavid Rheinsberg 			return -ENODATA;
17937b26952aSAlexander Mikhalitsyn 
17947b26952aSAlexander Mikhalitsyn 		pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
17957b26952aSAlexander Mikhalitsyn 		put_pid(peer_pid);
17967b26952aSAlexander Mikhalitsyn 		if (pidfd < 0)
17977b26952aSAlexander Mikhalitsyn 			return pidfd;
17987b26952aSAlexander Mikhalitsyn 
17997b26952aSAlexander Mikhalitsyn 		if (copy_to_sockptr(optval, &pidfd, len) ||
18007b26952aSAlexander Mikhalitsyn 		    copy_to_sockptr(optlen, &len, sizeof(int))) {
18017b26952aSAlexander Mikhalitsyn 			put_unused_fd(pidfd);
18027b26952aSAlexander Mikhalitsyn 			fput(pidfd_file);
18037b26952aSAlexander Mikhalitsyn 
18047b26952aSAlexander Mikhalitsyn 			return -EFAULT;
18057b26952aSAlexander Mikhalitsyn 		}
18067b26952aSAlexander Mikhalitsyn 
18077b26952aSAlexander Mikhalitsyn 		fd_install(pidfd, pidfd_file);
18087b26952aSAlexander Mikhalitsyn 		return 0;
18097b26952aSAlexander Mikhalitsyn 	}
18107b26952aSAlexander Mikhalitsyn 
181128b5ba2aSDavid Herrmann 	case SO_PEERGROUPS:
181228b5ba2aSDavid Herrmann 	{
181335306eb2SEric Dumazet 		const struct cred *cred;
181428b5ba2aSDavid Herrmann 		int ret, n;
181528b5ba2aSDavid Herrmann 
181635306eb2SEric Dumazet 		cred = sk_get_peer_cred(sk);
181735306eb2SEric Dumazet 		if (!cred)
181828b5ba2aSDavid Herrmann 			return -ENODATA;
181928b5ba2aSDavid Herrmann 
182035306eb2SEric Dumazet 		n = cred->group_info->ngroups;
182128b5ba2aSDavid Herrmann 		if (len < n * sizeof(gid_t)) {
182228b5ba2aSDavid Herrmann 			len = n * sizeof(gid_t);
182335306eb2SEric Dumazet 			put_cred(cred);
18244ff09db1SMartin KaFai Lau 			return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
182528b5ba2aSDavid Herrmann 		}
182628b5ba2aSDavid Herrmann 		len = n * sizeof(gid_t);
182728b5ba2aSDavid Herrmann 
18284ff09db1SMartin KaFai Lau 		ret = groups_to_user(optval, cred->group_info);
182935306eb2SEric Dumazet 		put_cred(cred);
183028b5ba2aSDavid Herrmann 		if (ret)
183128b5ba2aSDavid Herrmann 			return ret;
183228b5ba2aSDavid Herrmann 		goto lenout;
183328b5ba2aSDavid Herrmann 	}
183428b5ba2aSDavid Herrmann 
18351da177e4SLinus Torvalds 	case SO_PEERNAME:
18361da177e4SLinus Torvalds 	{
18378936bf53SKuniyuki Iwashima 		struct sockaddr_storage address;
18381da177e4SLinus Torvalds 
18391ded5e5aSEric Dumazet 		lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
18409b2c45d4SDenys Vlasenko 		if (lv < 0)
18411da177e4SLinus Torvalds 			return -ENOTCONN;
18421da177e4SLinus Torvalds 		if (lv < len)
18431da177e4SLinus Torvalds 			return -EINVAL;
18448936bf53SKuniyuki Iwashima 		if (copy_to_sockptr(optval, &address, len))
18451da177e4SLinus Torvalds 			return -EFAULT;
18461da177e4SLinus Torvalds 		goto lenout;
18471da177e4SLinus Torvalds 	}
18481da177e4SLinus Torvalds 
18491da177e4SLinus Torvalds 	/* Dubious BSD thing... Probably nobody even uses it, but
18501da177e4SLinus Torvalds 	 * the UNIX standard wants it for whatever reason... -DaveM
18511da177e4SLinus Torvalds 	 */
18521da177e4SLinus Torvalds 	case SO_ACCEPTCONN:
18531da177e4SLinus Torvalds 		v.val = sk->sk_state == TCP_LISTEN;
18541da177e4SLinus Torvalds 		break;
18551da177e4SLinus Torvalds 
1856877ce7c1SCatherine Zhang 	case SO_PASSSEC:
185782981930SEric Dumazet 		v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1858877ce7c1SCatherine Zhang 		break;
1859877ce7c1SCatherine Zhang 
18601da177e4SLinus Torvalds 	case SO_PEERSEC:
1861b10b9c34SPaul Moore 		return security_socket_getpeersec_stream(sock,
1862b10b9c34SPaul Moore 							 optval, optlen, len);
18631da177e4SLinus Torvalds 
18644a19ec58SLaszlo Attila Toth 	case SO_MARK:
18653c5b4d69SEric Dumazet 		v.val = READ_ONCE(sk->sk_mark);
18664a19ec58SLaszlo Attila Toth 		break;
18674a19ec58SLaszlo Attila Toth 
18686fd1d51cSErin MacNeil 	case SO_RCVMARK:
18696fd1d51cSErin MacNeil 		v.val = sock_flag(sk, SOCK_RCVMARK);
18706fd1d51cSErin MacNeil 		break;
18716fd1d51cSErin MacNeil 
18723b885787SNeil Horman 	case SO_RXQ_OVFL:
18731b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
18743b885787SNeil Horman 		break;
18753b885787SNeil Horman 
18766e3e939fSJohannes Berg 	case SO_WIFI_STATUS:
18771b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_WIFI_STATUS);
18786e3e939fSJohannes Berg 		break;
18796e3e939fSJohannes Berg 
1880ef64a54fSPavel Emelyanov 	case SO_PEEK_OFF:
18811ded5e5aSEric Dumazet 		if (!READ_ONCE(sock->ops)->set_peek_off)
1882ef64a54fSPavel Emelyanov 			return -EOPNOTSUPP;
1883ef64a54fSPavel Emelyanov 
188411695c6eSEric Dumazet 		v.val = READ_ONCE(sk->sk_peek_off);
1885ef64a54fSPavel Emelyanov 		break;
1886bc2f7996SDavid S. Miller 	case SO_NOFCS:
18871b23a5dfSEric Dumazet 		v.val = sock_flag(sk, SOCK_NOFCS);
1888bc2f7996SDavid S. Miller 		break;
1889c91f6df2SBrian Haley 
1890f7b86bfeSPavel Emelyanov 	case SO_BINDTODEVICE:
1891c91f6df2SBrian Haley 		return sock_getbindtodevice(sk, optval, optlen, len);
1892c91f6df2SBrian Haley 
1893a8fc9277SPavel Emelyanov 	case SO_GET_FILTER:
18944ff09db1SMartin KaFai Lau 		len = sk_get_filter(sk, optval, len);
1895a8fc9277SPavel Emelyanov 		if (len < 0)
1896a8fc9277SPavel Emelyanov 			return len;
1897a8fc9277SPavel Emelyanov 
1898a8fc9277SPavel Emelyanov 		goto lenout;
1899c91f6df2SBrian Haley 
1900d59577b6SVincent Bernat 	case SO_LOCK_FILTER:
1901d59577b6SVincent Bernat 		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1902d59577b6SVincent Bernat 		break;
1903d59577b6SVincent Bernat 
1904ea02f941SMichal Sekletar 	case SO_BPF_EXTENSIONS:
1905ea02f941SMichal Sekletar 		v.val = bpf_tell_extensions();
1906ea02f941SMichal Sekletar 		break;
1907ea02f941SMichal Sekletar 
19087d4c04fcSKeller, Jacob E 	case SO_SELECT_ERR_QUEUE:
19097d4c04fcSKeller, Jacob E 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
19107d4c04fcSKeller, Jacob E 		break;
19117d4c04fcSKeller, Jacob E 
1912e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
191364b0dc51SEliezer Tamir 	case SO_BUSY_POLL:
1914e5f0d2ddSEric Dumazet 		v.val = READ_ONCE(sk->sk_ll_usec);
1915dafcc438SEliezer Tamir 		break;
19167fd3253aSBjörn Töpel 	case SO_PREFER_BUSY_POLL:
19177fd3253aSBjörn Töpel 		v.val = READ_ONCE(sk->sk_prefer_busy_poll);
19187fd3253aSBjörn Töpel 		break;
1919dafcc438SEliezer Tamir #endif
1920dafcc438SEliezer Tamir 
192162748f32SEric Dumazet 	case SO_MAX_PACING_RATE:
1922ea7f45efSEric Dumazet 		/* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
1923677f136cSEric Dumazet 		if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1924677f136cSEric Dumazet 			lv = sizeof(v.ulval);
1925ea7f45efSEric Dumazet 			v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
1926677f136cSEric Dumazet 		} else {
192776a9ebe8SEric Dumazet 			/* 32bit version */
1928ea7f45efSEric Dumazet 			v.val = min_t(unsigned long, ~0U,
1929ea7f45efSEric Dumazet 				      READ_ONCE(sk->sk_max_pacing_rate));
1930677f136cSEric Dumazet 		}
193162748f32SEric Dumazet 		break;
193262748f32SEric Dumazet 
19332c8c56e1SEric Dumazet 	case SO_INCOMING_CPU:
19347170a977SEric Dumazet 		v.val = READ_ONCE(sk->sk_incoming_cpu);
19352c8c56e1SEric Dumazet 		break;
19362c8c56e1SEric Dumazet 
1937a2d133b1SJosh Hunt 	case SO_MEMINFO:
1938a2d133b1SJosh Hunt 	{
1939a2d133b1SJosh Hunt 		u32 meminfo[SK_MEMINFO_VARS];
1940a2d133b1SJosh Hunt 
1941a2d133b1SJosh Hunt 		sk_get_meminfo(sk, meminfo);
1942a2d133b1SJosh Hunt 
1943a2d133b1SJosh Hunt 		len = min_t(unsigned int, len, sizeof(meminfo));
19444ff09db1SMartin KaFai Lau 		if (copy_to_sockptr(optval, &meminfo, len))
1945a2d133b1SJosh Hunt 			return -EFAULT;
1946a2d133b1SJosh Hunt 
1947a2d133b1SJosh Hunt 		goto lenout;
1948a2d133b1SJosh Hunt 	}
19496d433902SSridhar Samudrala 
19506d433902SSridhar Samudrala #ifdef CONFIG_NET_RX_BUSY_POLL
19516d433902SSridhar Samudrala 	case SO_INCOMING_NAPI_ID:
19526d433902SSridhar Samudrala 		v.val = READ_ONCE(sk->sk_napi_id);
19536d433902SSridhar Samudrala 
19546d433902SSridhar Samudrala 		/* aggregate non-NAPI IDs down to 0 */
19556d433902SSridhar Samudrala 		if (v.val < MIN_NAPI_ID)
19566d433902SSridhar Samudrala 			v.val = 0;
19576d433902SSridhar Samudrala 
19586d433902SSridhar Samudrala 		break;
19596d433902SSridhar Samudrala #endif
19606d433902SSridhar Samudrala 
19615daab9dbSChenbo Feng 	case SO_COOKIE:
19625daab9dbSChenbo Feng 		lv = sizeof(u64);
19635daab9dbSChenbo Feng 		if (len < lv)
19645daab9dbSChenbo Feng 			return -EINVAL;
19655daab9dbSChenbo Feng 		v.val64 = sock_gen_cookie(sk);
19665daab9dbSChenbo Feng 		break;
19675daab9dbSChenbo Feng 
196876851d12SWillem de Bruijn 	case SO_ZEROCOPY:
196976851d12SWillem de Bruijn 		v.val = sock_flag(sk, SOCK_ZEROCOPY);
197076851d12SWillem de Bruijn 		break;
197176851d12SWillem de Bruijn 
197280b14deeSRichard Cochran 	case SO_TXTIME:
197380b14deeSRichard Cochran 		lv = sizeof(v.txtime);
197480b14deeSRichard Cochran 		v.txtime.clockid = sk->sk_clockid;
197580b14deeSRichard Cochran 		v.txtime.flags |= sk->sk_txtime_deadline_mode ?
197680b14deeSRichard Cochran 				  SOF_TXTIME_DEADLINE_MODE : 0;
19774b15c707SJesus Sanchez-Palencia 		v.txtime.flags |= sk->sk_txtime_report_errors ?
19784b15c707SJesus Sanchez-Palencia 				  SOF_TXTIME_REPORT_ERRORS : 0;
197980b14deeSRichard Cochran 		break;
198080b14deeSRichard Cochran 
1981f5dd3d0cSDavid Herrmann 	case SO_BINDTOIFINDEX:
1982e5fccaa1SEric Dumazet 		v.val = READ_ONCE(sk->sk_bound_dev_if);
1983f5dd3d0cSDavid Herrmann 		break;
1984f5dd3d0cSDavid Herrmann 
1985e8b9eab9SMartynas Pumputis 	case SO_NETNS_COOKIE:
1986e8b9eab9SMartynas Pumputis 		lv = sizeof(u64);
1987e8b9eab9SMartynas Pumputis 		if (len != lv)
1988e8b9eab9SMartynas Pumputis 			return -EINVAL;
1989e8b9eab9SMartynas Pumputis 		v.val64 = sock_net(sk)->net_cookie;
1990e8b9eab9SMartynas Pumputis 		break;
1991e8b9eab9SMartynas Pumputis 
199204190bf8SPavel Tikhomirov 	case SO_BUF_LOCK:
199304190bf8SPavel Tikhomirov 		v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
199404190bf8SPavel Tikhomirov 		break;
199504190bf8SPavel Tikhomirov 
19962bb2f5fbSWei Wang 	case SO_RESERVE_MEM:
1997fe11fdcbSEric Dumazet 		v.val = READ_ONCE(sk->sk_reserved_mem);
19982bb2f5fbSWei Wang 		break;
19992bb2f5fbSWei Wang 
200026859240SAkhmat Karakotov 	case SO_TXREHASH:
2001c76a0328SEric Dumazet 		/* Paired with WRITE_ONCE() in sk_setsockopt() */
2002c76a0328SEric Dumazet 		v.val = READ_ONCE(sk->sk_txrehash);
200326859240SAkhmat Karakotov 		break;
200426859240SAkhmat Karakotov 
20051da177e4SLinus Torvalds 	default:
2006443b5991SYOSHIFUJI Hideaki/吉藤英明 		/* We implement the SO_SNDLOWAT etc to not be settable
2007443b5991SYOSHIFUJI Hideaki/吉藤英明 		 * (1003.1g 7).
2008443b5991SYOSHIFUJI Hideaki/吉藤英明 		 */
2009e71a4783SStephen Hemminger 		return -ENOPROTOOPT;
20101da177e4SLinus Torvalds 	}
2011e71a4783SStephen Hemminger 
20121da177e4SLinus Torvalds 	if (len > lv)
20131da177e4SLinus Torvalds 		len = lv;
20144ff09db1SMartin KaFai Lau 	if (copy_to_sockptr(optval, &v, len))
20151da177e4SLinus Torvalds 		return -EFAULT;
20161da177e4SLinus Torvalds lenout:
20174ff09db1SMartin KaFai Lau 	if (copy_to_sockptr(optlen, &len, sizeof(int)))
20181da177e4SLinus Torvalds 		return -EFAULT;
20191da177e4SLinus Torvalds 	return 0;
20201da177e4SLinus Torvalds }
20211da177e4SLinus Torvalds 
sock_getsockopt(struct socket * sock,int level,int optname,char __user * optval,int __user * optlen)2022ba74a760SMartin KaFai Lau int sock_getsockopt(struct socket *sock, int level, int optname,
2023ba74a760SMartin KaFai Lau 		    char __user *optval, int __user *optlen)
2024ba74a760SMartin KaFai Lau {
20254ff09db1SMartin KaFai Lau 	return sk_getsockopt(sock->sk, level, optname,
20264ff09db1SMartin KaFai Lau 			     USER_SOCKPTR(optval),
20274ff09db1SMartin KaFai Lau 			     USER_SOCKPTR(optlen));
2028ba74a760SMartin KaFai Lau }
2029ba74a760SMartin KaFai Lau 
2030a5b5bb9aSIngo Molnar /*
2031a5b5bb9aSIngo Molnar  * Initialize an sk_lock.
2032a5b5bb9aSIngo Molnar  *
2033a5b5bb9aSIngo Molnar  * (We also register the sk_lock with the lock validator.)
2034a5b5bb9aSIngo Molnar  */
sock_lock_init(struct sock * sk)2035b6f99a21SDave Jones static inline void sock_lock_init(struct sock *sk)
2036a5b5bb9aSIngo Molnar {
2037cdfbabfbSDavid Howells 	if (sk->sk_kern_sock)
2038cdfbabfbSDavid Howells 		sock_lock_init_class_and_name(
2039cdfbabfbSDavid Howells 			sk,
2040cdfbabfbSDavid Howells 			af_family_kern_slock_key_strings[sk->sk_family],
2041cdfbabfbSDavid Howells 			af_family_kern_slock_keys + sk->sk_family,
2042cdfbabfbSDavid Howells 			af_family_kern_key_strings[sk->sk_family],
2043cdfbabfbSDavid Howells 			af_family_kern_keys + sk->sk_family);
2044cdfbabfbSDavid Howells 	else
2045cdfbabfbSDavid Howells 		sock_lock_init_class_and_name(
2046cdfbabfbSDavid Howells 			sk,
2047ed07536eSPeter Zijlstra 			af_family_slock_key_strings[sk->sk_family],
2048a5b5bb9aSIngo Molnar 			af_family_slock_keys + sk->sk_family,
2049a5b5bb9aSIngo Molnar 			af_family_key_strings[sk->sk_family],
2050ed07536eSPeter Zijlstra 			af_family_keys + sk->sk_family);
2051a5b5bb9aSIngo Molnar }
2052a5b5bb9aSIngo Molnar 
20534dc6dc71SEric Dumazet /*
20544dc6dc71SEric Dumazet  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
20554dc6dc71SEric Dumazet  * even temporarly, because of RCU lookups. sk_node should also be left as is.
205668835abaSEric Dumazet  * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
20574dc6dc71SEric Dumazet  */
sock_copy(struct sock * nsk,const struct sock * osk)2058f1a6c4daSPavel Emelyanov static void sock_copy(struct sock *nsk, const struct sock *osk)
2059f1a6c4daSPavel Emelyanov {
2060b8e202d1SJakub Sitnicki 	const struct proto *prot = READ_ONCE(osk->sk_prot);
2061f1a6c4daSPavel Emelyanov #ifdef CONFIG_SECURITY_NETWORK
2062f1a6c4daSPavel Emelyanov 	void *sptr = nsk->sk_security;
2063f1a6c4daSPavel Emelyanov #endif
2064df610cd9SKuniyuki Iwashima 
2065df610cd9SKuniyuki Iwashima 	/* If we move sk_tx_queue_mapping out of the private section,
2066df610cd9SKuniyuki Iwashima 	 * we must check if sk_tx_queue_clear() is called after
2067df610cd9SKuniyuki Iwashima 	 * sock_copy() in sk_clone_lock().
2068df610cd9SKuniyuki Iwashima 	 */
2069df610cd9SKuniyuki Iwashima 	BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
2070df610cd9SKuniyuki Iwashima 		     offsetof(struct sock, sk_dontcopy_begin) ||
2071df610cd9SKuniyuki Iwashima 		     offsetof(struct sock, sk_tx_queue_mapping) >=
2072df610cd9SKuniyuki Iwashima 		     offsetof(struct sock, sk_dontcopy_end));
2073df610cd9SKuniyuki Iwashima 
207468835abaSEric Dumazet 	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
207568835abaSEric Dumazet 
207668835abaSEric Dumazet 	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
2077b8e202d1SJakub Sitnicki 	       prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
207868835abaSEric Dumazet 
2079f1a6c4daSPavel Emelyanov #ifdef CONFIG_SECURITY_NETWORK
2080f1a6c4daSPavel Emelyanov 	nsk->sk_security = sptr;
2081f1a6c4daSPavel Emelyanov 	security_sk_clone(osk, nsk);
2082f1a6c4daSPavel Emelyanov #endif
2083f1a6c4daSPavel Emelyanov }
2084f1a6c4daSPavel Emelyanov 
sk_prot_alloc(struct proto * prot,gfp_t priority,int family)20852e4afe7bSPavel Emelyanov static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
20862e4afe7bSPavel Emelyanov 		int family)
2087c308c1b2SPavel Emelyanov {
2088c308c1b2SPavel Emelyanov 	struct sock *sk;
2089c308c1b2SPavel Emelyanov 	struct kmem_cache *slab;
2090c308c1b2SPavel Emelyanov 
2091c308c1b2SPavel Emelyanov 	slab = prot->slab;
2092e912b114SEric Dumazet 	if (slab != NULL) {
2093e912b114SEric Dumazet 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
2094e912b114SEric Dumazet 		if (!sk)
2095e912b114SEric Dumazet 			return sk;
20966471384aSAlexander Potapenko 		if (want_init_on_alloc(priority))
2097fcbdf09dSOctavian Purdila 			sk_prot_clear_nulls(sk, prot->obj_size);
2098fcbdf09dSOctavian Purdila 	} else
2099c308c1b2SPavel Emelyanov 		sk = kmalloc(prot->obj_size, priority);
2100c308c1b2SPavel Emelyanov 
21012e4afe7bSPavel Emelyanov 	if (sk != NULL) {
21022e4afe7bSPavel Emelyanov 		if (security_sk_alloc(sk, family, priority))
21032e4afe7bSPavel Emelyanov 			goto out_free;
21042e4afe7bSPavel Emelyanov 
21052e4afe7bSPavel Emelyanov 		if (!try_module_get(prot->owner))
21062e4afe7bSPavel Emelyanov 			goto out_free_sec;
21072e4afe7bSPavel Emelyanov 	}
21082e4afe7bSPavel Emelyanov 
2109c308c1b2SPavel Emelyanov 	return sk;
21102e4afe7bSPavel Emelyanov 
21112e4afe7bSPavel Emelyanov out_free_sec:
21122e4afe7bSPavel Emelyanov 	security_sk_free(sk);
21132e4afe7bSPavel Emelyanov out_free:
21142e4afe7bSPavel Emelyanov 	if (slab != NULL)
21152e4afe7bSPavel Emelyanov 		kmem_cache_free(slab, sk);
21162e4afe7bSPavel Emelyanov 	else
21172e4afe7bSPavel Emelyanov 		kfree(sk);
21182e4afe7bSPavel Emelyanov 	return NULL;
2119c308c1b2SPavel Emelyanov }
2120c308c1b2SPavel Emelyanov 
sk_prot_free(struct proto * prot,struct sock * sk)2121c308c1b2SPavel Emelyanov static void sk_prot_free(struct proto *prot, struct sock *sk)
2122c308c1b2SPavel Emelyanov {
2123c308c1b2SPavel Emelyanov 	struct kmem_cache *slab;
21242e4afe7bSPavel Emelyanov 	struct module *owner;
2125c308c1b2SPavel Emelyanov 
21262e4afe7bSPavel Emelyanov 	owner = prot->owner;
2127c308c1b2SPavel Emelyanov 	slab = prot->slab;
21282e4afe7bSPavel Emelyanov 
2129bd1060a1STejun Heo 	cgroup_sk_free(&sk->sk_cgrp_data);
21302d758073SJohannes Weiner 	mem_cgroup_sk_free(sk);
21312e4afe7bSPavel Emelyanov 	security_sk_free(sk);
2132c308c1b2SPavel Emelyanov 	if (slab != NULL)
2133c308c1b2SPavel Emelyanov 		kmem_cache_free(slab, sk);
2134c308c1b2SPavel Emelyanov 	else
2135c308c1b2SPavel Emelyanov 		kfree(sk);
21362e4afe7bSPavel Emelyanov 	module_put(owner);
2137c308c1b2SPavel Emelyanov }
2138c308c1b2SPavel Emelyanov 
21391da177e4SLinus Torvalds /**
21401da177e4SLinus Torvalds  *	sk_alloc - All socket objects are allocated here
2141c4ea43c5SRandy Dunlap  *	@net: the applicable net namespace
21424dc3b16bSPavel Pisa  *	@family: protocol family
21434dc3b16bSPavel Pisa  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
21444dc3b16bSPavel Pisa  *	@prot: struct proto associated with this new sock instance
214511aa9c28SEric W. Biederman  *	@kern: is this to be a kernel socket?
21461da177e4SLinus Torvalds  */
sk_alloc(struct net * net,int family,gfp_t priority,struct proto * prot,int kern)21471b8d7ae4SEric W. Biederman struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
214811aa9c28SEric W. Biederman 		      struct proto *prot, int kern)
21491da177e4SLinus Torvalds {
2150c308c1b2SPavel Emelyanov 	struct sock *sk;
21511da177e4SLinus Torvalds 
2152154adbc8SPavel Emelyanov 	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
21531da177e4SLinus Torvalds 	if (sk) {
21541da177e4SLinus Torvalds 		sk->sk_family = family;
2155476e19cfSArnaldo Carvalho de Melo 		/*
2156476e19cfSArnaldo Carvalho de Melo 		 * See comment in struct sock definition to understand
2157476e19cfSArnaldo Carvalho de Melo 		 * why we need sk_prot_creator -acme
2158476e19cfSArnaldo Carvalho de Melo 		 */
2159476e19cfSArnaldo Carvalho de Melo 		sk->sk_prot = sk->sk_prot_creator = prot;
2160cdfbabfbSDavid Howells 		sk->sk_kern_sock = kern;
21611da177e4SLinus Torvalds 		sock_lock_init(sk);
216226abe143SEric W. Biederman 		sk->sk_net_refcnt = kern ? 0 : 1;
2163648845abSTonghao Zhang 		if (likely(sk->sk_net_refcnt)) {
2164ffa84b5fSEric Dumazet 			get_net_track(net, &sk->ns_tracker, priority);
2165648845abSTonghao Zhang 			sock_inuse_add(net, 1);
21660cafd77dSEric Dumazet 		} else {
21670cafd77dSEric Dumazet 			__netns_tracker_alloc(net, &sk->ns_tracker,
21680cafd77dSEric Dumazet 					      false, priority);
2169648845abSTonghao Zhang 		}
2170648845abSTonghao Zhang 
217126abe143SEric W. Biederman 		sock_net_set(sk, net);
217214afee4bSReshetova, Elena 		refcount_set(&sk->sk_wmem_alloc, 1);
2173f8451725SHerbert Xu 
21742d758073SJohannes Weiner 		mem_cgroup_sk_alloc(sk);
2175d979a39dSJohannes Weiner 		cgroup_sk_alloc(&sk->sk_cgrp_data);
21762a56a1feSTejun Heo 		sock_update_classid(&sk->sk_cgrp_data);
21772a56a1feSTejun Heo 		sock_update_netprioidx(&sk->sk_cgrp_data);
217841b14fb8STariq Toukan 		sk_tx_queue_clear(sk);
21791da177e4SLinus Torvalds 	}
2180a79af59eSFrank Filz 
21812e4afe7bSPavel Emelyanov 	return sk;
21821da177e4SLinus Torvalds }
21832a91525cSEric Dumazet EXPORT_SYMBOL(sk_alloc);
21841da177e4SLinus Torvalds 
2185a4298e45SEric Dumazet /* Sockets having SOCK_RCU_FREE will call this function after one RCU
2186a4298e45SEric Dumazet  * grace period. This is the case for UDP sockets and TCP listeners.
2187a4298e45SEric Dumazet  */
__sk_destruct(struct rcu_head * head)2188a4298e45SEric Dumazet static void __sk_destruct(struct rcu_head *head)
21891da177e4SLinus Torvalds {
2190a4298e45SEric Dumazet 	struct sock *sk = container_of(head, struct sock, sk_rcu);
21911da177e4SLinus Torvalds 	struct sk_filter *filter;
21921da177e4SLinus Torvalds 
21931da177e4SLinus Torvalds 	if (sk->sk_destruct)
21941da177e4SLinus Torvalds 		sk->sk_destruct(sk);
21951da177e4SLinus Torvalds 
2196a898def2SPaul E. McKenney 	filter = rcu_dereference_check(sk->sk_filter,
219714afee4bSReshetova, Elena 				       refcount_read(&sk->sk_wmem_alloc) == 0);
21981da177e4SLinus Torvalds 	if (filter) {
2199309dd5fcSPavel Emelyanov 		sk_filter_uncharge(sk, filter);
2200a9b3cd7fSStephen Hemminger 		RCU_INIT_POINTER(sk->sk_filter, NULL);
22011da177e4SLinus Torvalds 	}
22021da177e4SLinus Torvalds 
220308e29af3SEric Dumazet 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
22041da177e4SLinus Torvalds 
22056ac99e8fSMartin KaFai Lau #ifdef CONFIG_BPF_SYSCALL
22066ac99e8fSMartin KaFai Lau 	bpf_sk_storage_free(sk);
22076ac99e8fSMartin KaFai Lau #endif
22086ac99e8fSMartin KaFai Lau 
22091da177e4SLinus Torvalds 	if (atomic_read(&sk->sk_omem_alloc))
2210e005d193SJoe Perches 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
22110dc47877SHarvey Harrison 			 __func__, atomic_read(&sk->sk_omem_alloc));
22121da177e4SLinus Torvalds 
221322a0e18eSEric Dumazet 	if (sk->sk_frag.page) {
221422a0e18eSEric Dumazet 		put_page(sk->sk_frag.page);
221522a0e18eSEric Dumazet 		sk->sk_frag.page = NULL;
221622a0e18eSEric Dumazet 	}
221722a0e18eSEric Dumazet 
221835306eb2SEric Dumazet 	/* We do not need to acquire sk->sk_peer_lock, we are the last user. */
2219109f6e39SEric W. Biederman 	put_cred(sk->sk_peer_cred);
2220109f6e39SEric W. Biederman 	put_pid(sk->sk_peer_pid);
222135306eb2SEric Dumazet 
222226abe143SEric W. Biederman 	if (likely(sk->sk_net_refcnt))
2223ffa84b5fSEric Dumazet 		put_net_track(sock_net(sk), &sk->ns_tracker);
22240cafd77dSEric Dumazet 	else
22250cafd77dSEric Dumazet 		__netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
22260cafd77dSEric Dumazet 
2227c308c1b2SPavel Emelyanov 	sk_prot_free(sk->sk_prot_creator, sk);
22281da177e4SLinus Torvalds }
22292b85a34eSEric Dumazet 
sk_destruct(struct sock * sk)2230a4298e45SEric Dumazet void sk_destruct(struct sock *sk)
2231a4298e45SEric Dumazet {
22328c7138b3SMartin KaFai Lau 	bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
22338c7138b3SMartin KaFai Lau 
22348c7138b3SMartin KaFai Lau 	if (rcu_access_pointer(sk->sk_reuseport_cb)) {
22358c7138b3SMartin KaFai Lau 		reuseport_detach_sock(sk);
22368c7138b3SMartin KaFai Lau 		use_call_rcu = true;
22378c7138b3SMartin KaFai Lau 	}
22388c7138b3SMartin KaFai Lau 
22398c7138b3SMartin KaFai Lau 	if (use_call_rcu)
2240a4298e45SEric Dumazet 		call_rcu(&sk->sk_rcu, __sk_destruct);
2241a4298e45SEric Dumazet 	else
2242a4298e45SEric Dumazet 		__sk_destruct(&sk->sk_rcu);
2243a4298e45SEric Dumazet }
2244a4298e45SEric Dumazet 
__sk_free(struct sock * sk)2245eb4cb008SCraig Gallek static void __sk_free(struct sock *sk)
2246eb4cb008SCraig Gallek {
2247648845abSTonghao Zhang 	if (likely(sk->sk_net_refcnt))
2248648845abSTonghao Zhang 		sock_inuse_add(sock_net(sk), -1);
2249648845abSTonghao Zhang 
22509709020cSEric Dumazet 	if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
2251eb4cb008SCraig Gallek 		sock_diag_broadcast_destroy(sk);
2252eb4cb008SCraig Gallek 	else
2253eb4cb008SCraig Gallek 		sk_destruct(sk);
2254eb4cb008SCraig Gallek }
2255eb4cb008SCraig Gallek 
sk_free(struct sock * sk)22562b85a34eSEric Dumazet void sk_free(struct sock *sk)
22572b85a34eSEric Dumazet {
22582b85a34eSEric Dumazet 	/*
225925985edcSLucas De Marchi 	 * We subtract one from sk_wmem_alloc and can know if
22602b85a34eSEric Dumazet 	 * some packets are still in some tx queue.
22612b85a34eSEric Dumazet 	 * If not null, sock_wfree() will call __sk_free(sk) later
22622b85a34eSEric Dumazet 	 */
226314afee4bSReshetova, Elena 	if (refcount_dec_and_test(&sk->sk_wmem_alloc))
22642b85a34eSEric Dumazet 		__sk_free(sk);
22652b85a34eSEric Dumazet }
22662a91525cSEric Dumazet EXPORT_SYMBOL(sk_free);
22671da177e4SLinus Torvalds 
sk_init_common(struct sock * sk)2268581319c5SPaolo Abeni static void sk_init_common(struct sock *sk)
2269581319c5SPaolo Abeni {
2270581319c5SPaolo Abeni 	skb_queue_head_init(&sk->sk_receive_queue);
2271581319c5SPaolo Abeni 	skb_queue_head_init(&sk->sk_write_queue);
2272581319c5SPaolo Abeni 	skb_queue_head_init(&sk->sk_error_queue);
2273581319c5SPaolo Abeni 
2274581319c5SPaolo Abeni 	rwlock_init(&sk->sk_callback_lock);
2275581319c5SPaolo Abeni 	lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
2276581319c5SPaolo Abeni 			af_rlock_keys + sk->sk_family,
2277581319c5SPaolo Abeni 			af_family_rlock_key_strings[sk->sk_family]);
2278581319c5SPaolo Abeni 	lockdep_set_class_and_name(&sk->sk_write_queue.lock,
2279581319c5SPaolo Abeni 			af_wlock_keys + sk->sk_family,
2280581319c5SPaolo Abeni 			af_family_wlock_key_strings[sk->sk_family]);
2281581319c5SPaolo Abeni 	lockdep_set_class_and_name(&sk->sk_error_queue.lock,
2282581319c5SPaolo Abeni 			af_elock_keys + sk->sk_family,
2283581319c5SPaolo Abeni 			af_family_elock_key_strings[sk->sk_family]);
2284581319c5SPaolo Abeni 	lockdep_set_class_and_name(&sk->sk_callback_lock,
2285581319c5SPaolo Abeni 			af_callback_keys + sk->sk_family,
2286581319c5SPaolo Abeni 			af_family_clock_key_strings[sk->sk_family]);
2287581319c5SPaolo Abeni }
2288581319c5SPaolo Abeni 
2289e56c57d0SEric Dumazet /**
2290e56c57d0SEric Dumazet  *	sk_clone_lock - clone a socket, and lock its clone
2291e56c57d0SEric Dumazet  *	@sk: the socket to clone
2292e56c57d0SEric Dumazet  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2293e56c57d0SEric Dumazet  *
2294e56c57d0SEric Dumazet  *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
2295e56c57d0SEric Dumazet  */
sk_clone_lock(const struct sock * sk,const gfp_t priority)2296e56c57d0SEric Dumazet struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
229787d11cebSArnaldo Carvalho de Melo {
2298b8e202d1SJakub Sitnicki 	struct proto *prot = READ_ONCE(sk->sk_prot);
2299bbc20b70SEric Dumazet 	struct sk_filter *filter;
2300278571baSAlexei Starovoitov 	bool is_charged = true;
2301bbc20b70SEric Dumazet 	struct sock *newsk;
230287d11cebSArnaldo Carvalho de Melo 
2303b8e202d1SJakub Sitnicki 	newsk = sk_prot_alloc(prot, priority, sk->sk_family);
2304bbc20b70SEric Dumazet 	if (!newsk)
2305bbc20b70SEric Dumazet 		goto out;
230687d11cebSArnaldo Carvalho de Melo 
2307892c141eSVenkat Yekkirala 	sock_copy(newsk, sk);
230887d11cebSArnaldo Carvalho de Melo 
2309b8e202d1SJakub Sitnicki 	newsk->sk_prot_creator = prot;
23109d538fa6SChristoph Paasch 
231187d11cebSArnaldo Carvalho de Melo 	/* SANITY */
2312938cca9eSTetsuo Handa 	if (likely(newsk->sk_net_refcnt)) {
2313ffa84b5fSEric Dumazet 		get_net_track(sock_net(newsk), &newsk->ns_tracker, priority);
2314938cca9eSTetsuo Handa 		sock_inuse_add(sock_net(newsk), 1);
23150cafd77dSEric Dumazet 	} else {
23160cafd77dSEric Dumazet 		/* Kernel sockets are not elevating the struct net refcount.
23170cafd77dSEric Dumazet 		 * Instead, use a tracker to more easily detect if a layer
23180cafd77dSEric Dumazet 		 * is not properly dismantling its kernel sockets at netns
23190cafd77dSEric Dumazet 		 * destroy time.
23200cafd77dSEric Dumazet 		 */
23210cafd77dSEric Dumazet 		__netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker,
23220cafd77dSEric Dumazet 				      false, priority);
2323938cca9eSTetsuo Handa 	}
232487d11cebSArnaldo Carvalho de Melo 	sk_node_init(&newsk->sk_node);
232587d11cebSArnaldo Carvalho de Melo 	sock_lock_init(newsk);
232687d11cebSArnaldo Carvalho de Melo 	bh_lock_sock(newsk);
2327fa438ccfSEric Dumazet 	newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
23288eae939fSZhu Yi 	newsk->sk_backlog.len = 0;
232987d11cebSArnaldo Carvalho de Melo 
233087d11cebSArnaldo Carvalho de Melo 	atomic_set(&newsk->sk_rmem_alloc, 0);
2331bbc20b70SEric Dumazet 
2332bbc20b70SEric Dumazet 	/* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
233314afee4bSReshetova, Elena 	refcount_set(&newsk->sk_wmem_alloc, 1);
2334bbc20b70SEric Dumazet 
233587d11cebSArnaldo Carvalho de Melo 	atomic_set(&newsk->sk_omem_alloc, 0);
2336581319c5SPaolo Abeni 	sk_init_common(newsk);
233787d11cebSArnaldo Carvalho de Melo 
233887d11cebSArnaldo Carvalho de Melo 	newsk->sk_dst_cache	= NULL;
23399b8805a3SJulian Anastasov 	newsk->sk_dst_pending_confirm = 0;
234087d11cebSArnaldo Carvalho de Melo 	newsk->sk_wmem_queued	= 0;
234187d11cebSArnaldo Carvalho de Melo 	newsk->sk_forward_alloc = 0;
23422bb2f5fbSWei Wang 	newsk->sk_reserved_mem  = 0;
23439caad864SEric Dumazet 	atomic_set(&newsk->sk_drops, 0);
234487d11cebSArnaldo Carvalho de Melo 	newsk->sk_send_head	= NULL;
234587d11cebSArnaldo Carvalho de Melo 	newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
234652267790SWillem de Bruijn 	atomic_set(&newsk->sk_zckey, 0);
234787d11cebSArnaldo Carvalho de Melo 
234887d11cebSArnaldo Carvalho de Melo 	sock_reset_flag(newsk, SOCK_DONE);
2349d752a498SShakeel Butt 
2350d752a498SShakeel Butt 	/* sk->sk_memcg will be populated at accept() time */
2351d752a498SShakeel Butt 	newsk->sk_memcg = NULL;
2352d752a498SShakeel Butt 
2353ad0f75e5SCong Wang 	cgroup_sk_clone(&newsk->sk_cgrp_data);
235487d11cebSArnaldo Carvalho de Melo 
2355eefca20eSEric Dumazet 	rcu_read_lock();
2356eefca20eSEric Dumazet 	filter = rcu_dereference(sk->sk_filter);
235787d11cebSArnaldo Carvalho de Melo 	if (filter != NULL)
2358278571baSAlexei Starovoitov 		/* though it's an empty new sock, the charging may fail
2359278571baSAlexei Starovoitov 		 * if sysctl_optmem_max was changed between creation of
2360278571baSAlexei Starovoitov 		 * original socket and cloning
2361278571baSAlexei Starovoitov 		 */
2362278571baSAlexei Starovoitov 		is_charged = sk_filter_charge(newsk, filter);
2363eefca20eSEric Dumazet 	RCU_INIT_POINTER(newsk->sk_filter, filter);
2364eefca20eSEric Dumazet 	rcu_read_unlock();
236587d11cebSArnaldo Carvalho de Melo 
2366d188ba86SEric Dumazet 	if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
2367a97e50ccSDaniel Borkmann 		/* We need to make sure that we don't uncharge the new
2368a97e50ccSDaniel Borkmann 		 * socket if we couldn't charge it in the first place
2369a97e50ccSDaniel Borkmann 		 * as otherwise we uncharge the parent's filter.
2370a97e50ccSDaniel Borkmann 		 */
2371a97e50ccSDaniel Borkmann 		if (!is_charged)
2372a97e50ccSDaniel Borkmann 			RCU_INIT_POINTER(newsk->sk_filter, NULL);
237394352d45SArnaldo Carvalho de Melo 		sk_free_unlock_clone(newsk);
237487d11cebSArnaldo Carvalho de Melo 		newsk = NULL;
237587d11cebSArnaldo Carvalho de Melo 		goto out;
237687d11cebSArnaldo Carvalho de Melo 	}
2377fa463497SCraig Gallek 	RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
23788f51dfc7SStanislav Fomichev 
23798f51dfc7SStanislav Fomichev 	if (bpf_sk_storage_clone(sk, newsk)) {
23808f51dfc7SStanislav Fomichev 		sk_free_unlock_clone(newsk);
23818f51dfc7SStanislav Fomichev 		newsk = NULL;
23828f51dfc7SStanislav Fomichev 		goto out;
23838f51dfc7SStanislav Fomichev 	}
238487d11cebSArnaldo Carvalho de Melo 
2385f1ff5ce2SJakub Sitnicki 	/* Clear sk_user_data if parent had the pointer tagged
2386f1ff5ce2SJakub Sitnicki 	 * as not suitable for copying when cloning.
2387f1ff5ce2SJakub Sitnicki 	 */
2388f1ff5ce2SJakub Sitnicki 	if (sk_user_data_is_nocopy(newsk))
23897a1ca972SJakub Sitnicki 		newsk->sk_user_data = NULL;
2390f1ff5ce2SJakub Sitnicki 
239187d11cebSArnaldo Carvalho de Melo 	newsk->sk_err	   = 0;
2392e551c32dSEric Dumazet 	newsk->sk_err_soft = 0;
239387d11cebSArnaldo Carvalho de Melo 	newsk->sk_priority = 0;
23942c8c56e1SEric Dumazet 	newsk->sk_incoming_cpu = raw_smp_processor_id();
2395d979a39dSJohannes Weiner 
2396bbc20b70SEric Dumazet 	/* Before updating sk_refcnt, we must commit prior changes to memory
23972cdb54c9SMauro Carvalho Chehab 	 * (Documentation/RCU/rculist_nulls.rst for details)
23984dc6dc71SEric Dumazet 	 */
23994dc6dc71SEric Dumazet 	smp_wmb();
240041c6d650SReshetova, Elena 	refcount_set(&newsk->sk_refcnt, 2);
240187d11cebSArnaldo Carvalho de Melo 
2402972692e0SDavid S. Miller 	sk_set_socket(newsk, NULL);
240341b14fb8STariq Toukan 	sk_tx_queue_clear(newsk);
2404c2f26e8fSLi RongQing 	RCU_INIT_POINTER(newsk->sk_wq, NULL);
240587d11cebSArnaldo Carvalho de Melo 
240687d11cebSArnaldo Carvalho de Melo 	if (newsk->sk_prot->sockets_allocated)
2407180d8cd9SGlauber Costa 		sk_sockets_allocated_inc(newsk);
2408704da560SOctavian Purdila 
2409bbc20b70SEric Dumazet 	if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2410704da560SOctavian Purdila 		net_enable_timestamp();
241187d11cebSArnaldo Carvalho de Melo out:
241287d11cebSArnaldo Carvalho de Melo 	return newsk;
241387d11cebSArnaldo Carvalho de Melo }
2414e56c57d0SEric Dumazet EXPORT_SYMBOL_GPL(sk_clone_lock);
241587d11cebSArnaldo Carvalho de Melo 
sk_free_unlock_clone(struct sock * sk)241694352d45SArnaldo Carvalho de Melo void sk_free_unlock_clone(struct sock *sk)
241794352d45SArnaldo Carvalho de Melo {
241894352d45SArnaldo Carvalho de Melo 	/* It is still raw copy of parent, so invalidate
241994352d45SArnaldo Carvalho de Melo 	 * destructor and make plain sk_free() */
242094352d45SArnaldo Carvalho de Melo 	sk->sk_destruct = NULL;
242194352d45SArnaldo Carvalho de Melo 	bh_unlock_sock(sk);
242294352d45SArnaldo Carvalho de Melo 	sk_free(sk);
242394352d45SArnaldo Carvalho de Melo }
242494352d45SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
242594352d45SArnaldo Carvalho de Melo 
sk_dst_gso_max_size(struct sock * sk,struct dst_entry * dst)2426b1a78b9bSXin Long static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
24277c4e983cSAlexander Duyck {
2428b1a78b9bSXin Long 	bool is_ipv6 = false;
2429b1a78b9bSXin Long 	u32 max_size;
2430b1a78b9bSXin Long 
24317c4e983cSAlexander Duyck #if IS_ENABLED(CONFIG_IPV6)
2432b1a78b9bSXin Long 	is_ipv6 = (sk->sk_family == AF_INET6 &&
2433b1a78b9bSXin Long 		   !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
24347c4e983cSAlexander Duyck #endif
2435b1a78b9bSXin Long 	/* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
2436b1a78b9bSXin Long 	max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
2437b1a78b9bSXin Long 			READ_ONCE(dst->dev->gso_ipv4_max_size);
2438b1a78b9bSXin Long 	if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
2439b1a78b9bSXin Long 		max_size = GSO_LEGACY_MAX_SIZE;
2440b1a78b9bSXin Long 
2441b1a78b9bSXin Long 	return max_size - (MAX_TCP_HEADER + 1);
24427c4e983cSAlexander Duyck }
24437c4e983cSAlexander Duyck 
sk_setup_caps(struct sock * sk,struct dst_entry * dst)24449958089aSAndi Kleen void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
24459958089aSAndi Kleen {
2446d6a4e26aSEric Dumazet 	u32 max_segs = 1;
2447d6a4e26aSEric Dumazet 
2448d0d598caSEric Dumazet 	sk->sk_route_caps = dst->dev->features;
2449d0d598caSEric Dumazet 	if (sk_is_tcp(sk))
2450d0d598caSEric Dumazet 		sk->sk_route_caps |= NETIF_F_GSO;
24519958089aSAndi Kleen 	if (sk->sk_route_caps & NETIF_F_GSO)
24524fcd6b99SHerbert Xu 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
2453aba54656SEric Dumazet 	if (unlikely(sk->sk_gso_disabled))
2454aba54656SEric Dumazet 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
24559958089aSAndi Kleen 	if (sk_can_gso(sk)) {
2456f70f250aSSteffen Klassert 		if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
24579958089aSAndi Kleen 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
245882cc1a7aSPeter P Waskiewicz Jr 		} else {
24599958089aSAndi Kleen 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
2460b1a78b9bSXin Long 			sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
24616d872df3SEric Dumazet 			/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
24626d872df3SEric Dumazet 			max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
246382cc1a7aSPeter P Waskiewicz Jr 		}
24649958089aSAndi Kleen 	}
2465d6a4e26aSEric Dumazet 	sk->sk_gso_max_segs = max_segs;
2466448a5ce1SVladislav Efanov 	sk_dst_set(sk, dst);
24679958089aSAndi Kleen }
24689958089aSAndi Kleen EXPORT_SYMBOL_GPL(sk_setup_caps);
24699958089aSAndi Kleen 
24701da177e4SLinus Torvalds /*
24711da177e4SLinus Torvalds  *	Simple resource managers for sockets.
24721da177e4SLinus Torvalds  */
24731da177e4SLinus Torvalds 
24741da177e4SLinus Torvalds 
24751da177e4SLinus Torvalds /*
24761da177e4SLinus Torvalds  * Write buffer destructor automatically called from kfree_skb.
24771da177e4SLinus Torvalds  */
sock_wfree(struct sk_buff * skb)24781da177e4SLinus Torvalds void sock_wfree(struct sk_buff *skb)
24791da177e4SLinus Torvalds {
24801da177e4SLinus Torvalds 	struct sock *sk = skb->sk;
2481d99927f4SEric Dumazet 	unsigned int len = skb->truesize;
2482052ada09SPavel Begunkov 	bool free;
24831da177e4SLinus Torvalds 
2484d99927f4SEric Dumazet 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
2485052ada09SPavel Begunkov 		if (sock_flag(sk, SOCK_RCU_FREE) &&
2486052ada09SPavel Begunkov 		    sk->sk_write_space == sock_def_write_space) {
2487052ada09SPavel Begunkov 			rcu_read_lock();
2488052ada09SPavel Begunkov 			free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
24890a8afd9fSPavel Begunkov 			sock_def_write_space_wfree(sk);
2490052ada09SPavel Begunkov 			rcu_read_unlock();
2491052ada09SPavel Begunkov 			if (unlikely(free))
2492052ada09SPavel Begunkov 				__sk_free(sk);
2493052ada09SPavel Begunkov 			return;
2494052ada09SPavel Begunkov 		}
2495052ada09SPavel Begunkov 
24962b85a34eSEric Dumazet 		/*
2497d99927f4SEric Dumazet 		 * Keep a reference on sk_wmem_alloc, this will be released
2498d99927f4SEric Dumazet 		 * after sk_write_space() call
24992b85a34eSEric Dumazet 		 */
250014afee4bSReshetova, Elena 		WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
2501d99927f4SEric Dumazet 		sk->sk_write_space(sk);
2502d99927f4SEric Dumazet 		len = 1;
2503d99927f4SEric Dumazet 	}
2504d99927f4SEric Dumazet 	/*
2505d99927f4SEric Dumazet 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
2506d99927f4SEric Dumazet 	 * could not do because of in-flight packets
2507d99927f4SEric Dumazet 	 */
250814afee4bSReshetova, Elena 	if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
25092b85a34eSEric Dumazet 		__sk_free(sk);
25101da177e4SLinus Torvalds }
25112a91525cSEric Dumazet EXPORT_SYMBOL(sock_wfree);
25121da177e4SLinus Torvalds 
25131d2077acSEric Dumazet /* This variant of sock_wfree() is used by TCP,
25141d2077acSEric Dumazet  * since it sets SOCK_USE_WRITE_QUEUE.
25151d2077acSEric Dumazet  */
__sock_wfree(struct sk_buff * skb)25161d2077acSEric Dumazet void __sock_wfree(struct sk_buff *skb)
25171d2077acSEric Dumazet {
25181d2077acSEric Dumazet 	struct sock *sk = skb->sk;
25191d2077acSEric Dumazet 
252014afee4bSReshetova, Elena 	if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
25211d2077acSEric Dumazet 		__sk_free(sk);
25221d2077acSEric Dumazet }
25231d2077acSEric Dumazet 
skb_set_owner_w(struct sk_buff * skb,struct sock * sk)25249e17f8a4SEric Dumazet void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
25259e17f8a4SEric Dumazet {
25269e17f8a4SEric Dumazet 	skb_orphan(skb);
25279e17f8a4SEric Dumazet 	skb->sk = sk;
25289e17f8a4SEric Dumazet #ifdef CONFIG_INET
25299e17f8a4SEric Dumazet 	if (unlikely(!sk_fullsock(sk))) {
25309e17f8a4SEric Dumazet 		skb->destructor = sock_edemux;
25319e17f8a4SEric Dumazet 		sock_hold(sk);
25329e17f8a4SEric Dumazet 		return;
25339e17f8a4SEric Dumazet 	}
25349e17f8a4SEric Dumazet #endif
25359e17f8a4SEric Dumazet 	skb->destructor = sock_wfree;
25369e17f8a4SEric Dumazet 	skb_set_hash_from_sk(skb, sk);
25379e17f8a4SEric Dumazet 	/*
25389e17f8a4SEric Dumazet 	 * We used to take a refcount on sk, but following operation
25399e17f8a4SEric Dumazet 	 * is enough to guarantee sk_free() wont free this sock until
25409e17f8a4SEric Dumazet 	 * all in-flight packets are completed
25419e17f8a4SEric Dumazet 	 */
254214afee4bSReshetova, Elena 	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
25439e17f8a4SEric Dumazet }
25449e17f8a4SEric Dumazet EXPORT_SYMBOL(skb_set_owner_w);
25459e17f8a4SEric Dumazet 
can_skb_orphan_partial(const struct sk_buff * skb)254641477662SJakub Kicinski static bool can_skb_orphan_partial(const struct sk_buff *skb)
254741477662SJakub Kicinski {
254841477662SJakub Kicinski #ifdef CONFIG_TLS_DEVICE
254941477662SJakub Kicinski 	/* Drivers depend on in-order delivery for crypto offload,
255041477662SJakub Kicinski 	 * partial orphan breaks out-of-order-OK logic.
255141477662SJakub Kicinski 	 */
255241477662SJakub Kicinski 	if (skb->decrypted)
255341477662SJakub Kicinski 		return false;
255441477662SJakub Kicinski #endif
255541477662SJakub Kicinski 	return (skb->destructor == sock_wfree ||
255641477662SJakub Kicinski 		(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
255741477662SJakub Kicinski }
255841477662SJakub Kicinski 
25591d2077acSEric Dumazet /* This helper is used by netem, as it can hold packets in its
25601d2077acSEric Dumazet  * delay queue. We want to allow the owner socket to send more
25611d2077acSEric Dumazet  * packets, as if they were already TX completed by a typical driver.
25621d2077acSEric Dumazet  * But we also want to keep skb->sk set because some packet schedulers
2563f6ba8d33SEric Dumazet  * rely on it (sch_fq for example).
25641d2077acSEric Dumazet  */
skb_orphan_partial(struct sk_buff * skb)2565f2f872f9SEric Dumazet void skb_orphan_partial(struct sk_buff *skb)
2566f2f872f9SEric Dumazet {
2567f6ba8d33SEric Dumazet 	if (skb_is_tcp_pure_ack(skb))
25681d2077acSEric Dumazet 		return;
25691d2077acSEric Dumazet 
2570098116e7SPaolo Abeni 	if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2571098116e7SPaolo Abeni 		return;
2572098116e7SPaolo Abeni 
2573f2f872f9SEric Dumazet 	skb_orphan(skb);
2574f2f872f9SEric Dumazet }
2575f2f872f9SEric Dumazet EXPORT_SYMBOL(skb_orphan_partial);
2576f2f872f9SEric Dumazet 
25771da177e4SLinus Torvalds /*
25781da177e4SLinus Torvalds  * Read buffer destructor automatically called from kfree_skb.
25791da177e4SLinus Torvalds  */
sock_rfree(struct sk_buff * skb)25801da177e4SLinus Torvalds void sock_rfree(struct sk_buff *skb)
25811da177e4SLinus Torvalds {
25821da177e4SLinus Torvalds 	struct sock *sk = skb->sk;
2583d361fd59SEric Dumazet 	unsigned int len = skb->truesize;
25841da177e4SLinus Torvalds 
2585d361fd59SEric Dumazet 	atomic_sub(len, &sk->sk_rmem_alloc);
2586d361fd59SEric Dumazet 	sk_mem_uncharge(sk, len);
25871da177e4SLinus Torvalds }
25882a91525cSEric Dumazet EXPORT_SYMBOL(sock_rfree);
25891da177e4SLinus Torvalds 
25907768eed8SOliver Hartkopp /*
25917768eed8SOliver Hartkopp  * Buffer destructor for skbs that are not used directly in read or write
25927768eed8SOliver Hartkopp  * path, e.g. for error handler skbs. Automatically called from kfree_skb.
25937768eed8SOliver Hartkopp  */
sock_efree(struct sk_buff * skb)259462bccb8cSAlexander Duyck void sock_efree(struct sk_buff *skb)
259562bccb8cSAlexander Duyck {
259662bccb8cSAlexander Duyck 	sock_put(skb->sk);
259762bccb8cSAlexander Duyck }
259862bccb8cSAlexander Duyck EXPORT_SYMBOL(sock_efree);
259962bccb8cSAlexander Duyck 
2600cf7fbe66SJoe Stringer /* Buffer destructor for prefetch/receive path where reference count may
2601cf7fbe66SJoe Stringer  * not be held, e.g. for listen sockets.
2602cf7fbe66SJoe Stringer  */
2603cf7fbe66SJoe Stringer #ifdef CONFIG_INET
sock_pfree(struct sk_buff * skb)2604cf7fbe66SJoe Stringer void sock_pfree(struct sk_buff *skb)
2605cf7fbe66SJoe Stringer {
26067ae215d2SJoe Stringer 	if (sk_is_refcounted(skb->sk))
2607cf7fbe66SJoe Stringer 		sock_gen_put(skb->sk);
2608cf7fbe66SJoe Stringer }
2609cf7fbe66SJoe Stringer EXPORT_SYMBOL(sock_pfree);
2610cf7fbe66SJoe Stringer #endif /* CONFIG_INET */
2611cf7fbe66SJoe Stringer 
sock_i_uid(struct sock * sk)2612976d0201SEric W. Biederman kuid_t sock_i_uid(struct sock *sk)
26131da177e4SLinus Torvalds {
2614976d0201SEric W. Biederman 	kuid_t uid;
26151da177e4SLinus Torvalds 
2616f064af1eSEric Dumazet 	read_lock_bh(&sk->sk_callback_lock);
2617976d0201SEric W. Biederman 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2618f064af1eSEric Dumazet 	read_unlock_bh(&sk->sk_callback_lock);
26191da177e4SLinus Torvalds 	return uid;
26201da177e4SLinus Torvalds }
26212a91525cSEric Dumazet EXPORT_SYMBOL(sock_i_uid);
26221da177e4SLinus Torvalds 
__sock_i_ino(struct sock * sk)262325a9c8a4SKuniyuki Iwashima unsigned long __sock_i_ino(struct sock *sk)
262425a9c8a4SKuniyuki Iwashima {
262525a9c8a4SKuniyuki Iwashima 	unsigned long ino;
262625a9c8a4SKuniyuki Iwashima 
262725a9c8a4SKuniyuki Iwashima 	read_lock(&sk->sk_callback_lock);
262825a9c8a4SKuniyuki Iwashima 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
262925a9c8a4SKuniyuki Iwashima 	read_unlock(&sk->sk_callback_lock);
263025a9c8a4SKuniyuki Iwashima 	return ino;
263125a9c8a4SKuniyuki Iwashima }
263225a9c8a4SKuniyuki Iwashima EXPORT_SYMBOL(__sock_i_ino);
263325a9c8a4SKuniyuki Iwashima 
sock_i_ino(struct sock * sk)26341da177e4SLinus Torvalds unsigned long sock_i_ino(struct sock *sk)
26351da177e4SLinus Torvalds {
26361da177e4SLinus Torvalds 	unsigned long ino;
26371da177e4SLinus Torvalds 
263825a9c8a4SKuniyuki Iwashima 	local_bh_disable();
263925a9c8a4SKuniyuki Iwashima 	ino = __sock_i_ino(sk);
264025a9c8a4SKuniyuki Iwashima 	local_bh_enable();
26411da177e4SLinus Torvalds 	return ino;
26421da177e4SLinus Torvalds }
26432a91525cSEric Dumazet EXPORT_SYMBOL(sock_i_ino);
26441da177e4SLinus Torvalds 
26451da177e4SLinus Torvalds /*
26461da177e4SLinus Torvalds  * Allocate a skb from the socket's send buffer.
26471da177e4SLinus Torvalds  */
sock_wmalloc(struct sock * sk,unsigned long size,int force,gfp_t priority)264886a76cafSVictor Fusco struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
2649dd0fc66fSAl Viro 			     gfp_t priority)
26501da177e4SLinus Torvalds {
2651e292f05eSEric Dumazet 	if (force ||
2652e292f05eSEric Dumazet 	    refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
26531da177e4SLinus Torvalds 		struct sk_buff *skb = alloc_skb(size, priority);
2654e292f05eSEric Dumazet 
26551da177e4SLinus Torvalds 		if (skb) {
26561da177e4SLinus Torvalds 			skb_set_owner_w(skb, sk);
26571da177e4SLinus Torvalds 			return skb;
26581da177e4SLinus Torvalds 		}
26591da177e4SLinus Torvalds 	}
26601da177e4SLinus Torvalds 	return NULL;
26611da177e4SLinus Torvalds }
26622a91525cSEric Dumazet EXPORT_SYMBOL(sock_wmalloc);
26631da177e4SLinus Torvalds 
sock_ofree(struct sk_buff * skb)266498ba0bd5SWillem de Bruijn static void sock_ofree(struct sk_buff *skb)
266598ba0bd5SWillem de Bruijn {
266698ba0bd5SWillem de Bruijn 	struct sock *sk = skb->sk;
266798ba0bd5SWillem de Bruijn 
266898ba0bd5SWillem de Bruijn 	atomic_sub(skb->truesize, &sk->sk_omem_alloc);
266998ba0bd5SWillem de Bruijn }
267098ba0bd5SWillem de Bruijn 
sock_omalloc(struct sock * sk,unsigned long size,gfp_t priority)267198ba0bd5SWillem de Bruijn struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
267298ba0bd5SWillem de Bruijn 			     gfp_t priority)
267398ba0bd5SWillem de Bruijn {
267498ba0bd5SWillem de Bruijn 	struct sk_buff *skb;
267598ba0bd5SWillem de Bruijn 
267698ba0bd5SWillem de Bruijn 	/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
267798ba0bd5SWillem de Bruijn 	if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
26787de6d09fSKuniyuki Iwashima 	    READ_ONCE(sysctl_optmem_max))
267998ba0bd5SWillem de Bruijn 		return NULL;
268098ba0bd5SWillem de Bruijn 
268198ba0bd5SWillem de Bruijn 	skb = alloc_skb(size, priority);
268298ba0bd5SWillem de Bruijn 	if (!skb)
268398ba0bd5SWillem de Bruijn 		return NULL;
268498ba0bd5SWillem de Bruijn 
268598ba0bd5SWillem de Bruijn 	atomic_add(skb->truesize, &sk->sk_omem_alloc);
268698ba0bd5SWillem de Bruijn 	skb->sk = sk;
268798ba0bd5SWillem de Bruijn 	skb->destructor = sock_ofree;
268898ba0bd5SWillem de Bruijn 	return skb;
268998ba0bd5SWillem de Bruijn }
269098ba0bd5SWillem de Bruijn 
26911da177e4SLinus Torvalds /*
26921da177e4SLinus Torvalds  * Allocate a memory block from the socket's option memory buffer.
26931da177e4SLinus Torvalds  */
sock_kmalloc(struct sock * sk,int size,gfp_t priority)2694dd0fc66fSAl Viro void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
26951da177e4SLinus Torvalds {
26967de6d09fSKuniyuki Iwashima 	int optmem_max = READ_ONCE(sysctl_optmem_max);
26977de6d09fSKuniyuki Iwashima 
26987de6d09fSKuniyuki Iwashima 	if ((unsigned int)size <= optmem_max &&
26997de6d09fSKuniyuki Iwashima 	    atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
27001da177e4SLinus Torvalds 		void *mem;
27011da177e4SLinus Torvalds 		/* First do the add, to avoid the race if kmalloc
27021da177e4SLinus Torvalds 		 * might sleep.
27031da177e4SLinus Torvalds 		 */
27041da177e4SLinus Torvalds 		atomic_add(size, &sk->sk_omem_alloc);
27051da177e4SLinus Torvalds 		mem = kmalloc(size, priority);
27061da177e4SLinus Torvalds 		if (mem)
27071da177e4SLinus Torvalds 			return mem;
27081da177e4SLinus Torvalds 		atomic_sub(size, &sk->sk_omem_alloc);
27091da177e4SLinus Torvalds 	}
27101da177e4SLinus Torvalds 	return NULL;
27111da177e4SLinus Torvalds }
27122a91525cSEric Dumazet EXPORT_SYMBOL(sock_kmalloc);
27131da177e4SLinus Torvalds 
271479e88659SDaniel Borkmann /* Free an option memory block. Note, we actually want the inline
271579e88659SDaniel Borkmann  * here as this allows gcc to detect the nullify and fold away the
271679e88659SDaniel Borkmann  * condition entirely.
27171da177e4SLinus Torvalds  */
__sock_kfree_s(struct sock * sk,void * mem,int size,const bool nullify)271879e88659SDaniel Borkmann static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
271979e88659SDaniel Borkmann 				  const bool nullify)
27201da177e4SLinus Torvalds {
2721e53da5fbSDavid S. Miller 	if (WARN_ON_ONCE(!mem))
2722e53da5fbSDavid S. Miller 		return;
272379e88659SDaniel Borkmann 	if (nullify)
2724453431a5SWaiman Long 		kfree_sensitive(mem);
272579e88659SDaniel Borkmann 	else
27261da177e4SLinus Torvalds 		kfree(mem);
27271da177e4SLinus Torvalds 	atomic_sub(size, &sk->sk_omem_alloc);
27281da177e4SLinus Torvalds }
272979e88659SDaniel Borkmann 
sock_kfree_s(struct sock * sk,void * mem,int size)273079e88659SDaniel Borkmann void sock_kfree_s(struct sock *sk, void *mem, int size)
273179e88659SDaniel Borkmann {
273279e88659SDaniel Borkmann 	__sock_kfree_s(sk, mem, size, false);
273379e88659SDaniel Borkmann }
27342a91525cSEric Dumazet EXPORT_SYMBOL(sock_kfree_s);
27351da177e4SLinus Torvalds 
sock_kzfree_s(struct sock * sk,void * mem,int size)273679e88659SDaniel Borkmann void sock_kzfree_s(struct sock *sk, void *mem, int size)
273779e88659SDaniel Borkmann {
273879e88659SDaniel Borkmann 	__sock_kfree_s(sk, mem, size, true);
273979e88659SDaniel Borkmann }
274079e88659SDaniel Borkmann EXPORT_SYMBOL(sock_kzfree_s);
274179e88659SDaniel Borkmann 
27421da177e4SLinus Torvalds /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
27431da177e4SLinus Torvalds    I think, these locks should be removed for datagram sockets.
27441da177e4SLinus Torvalds  */
sock_wait_for_wmem(struct sock * sk,long timeo)27451da177e4SLinus Torvalds static long sock_wait_for_wmem(struct sock *sk, long timeo)
27461da177e4SLinus Torvalds {
27471da177e4SLinus Torvalds 	DEFINE_WAIT(wait);
27481da177e4SLinus Torvalds 
27499cd3e072SEric Dumazet 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
27501da177e4SLinus Torvalds 	for (;;) {
27511da177e4SLinus Torvalds 		if (!timeo)
27521da177e4SLinus Torvalds 			break;
27531da177e4SLinus Torvalds 		if (signal_pending(current))
27541da177e4SLinus Torvalds 			break;
27551da177e4SLinus Torvalds 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2756aa395145SEric Dumazet 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2757e292f05eSEric Dumazet 		if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
27581da177e4SLinus Torvalds 			break;
2759afe8764fSKuniyuki Iwashima 		if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
27601da177e4SLinus Torvalds 			break;
2761b1928129SKuniyuki Iwashima 		if (READ_ONCE(sk->sk_err))
27621da177e4SLinus Torvalds 			break;
27631da177e4SLinus Torvalds 		timeo = schedule_timeout(timeo);
27641da177e4SLinus Torvalds 	}
2765aa395145SEric Dumazet 	finish_wait(sk_sleep(sk), &wait);
27661da177e4SLinus Torvalds 	return timeo;
27671da177e4SLinus Torvalds }
27681da177e4SLinus Torvalds 
27691da177e4SLinus Torvalds 
27701da177e4SLinus Torvalds /*
27711da177e4SLinus Torvalds  *	Generic send/receive buffer handlers
27721da177e4SLinus Torvalds  */
27731da177e4SLinus Torvalds 
sock_alloc_send_pskb(struct sock * sk,unsigned long header_len,unsigned long data_len,int noblock,int * errcode,int max_page_order)27744cc7f68dSHerbert Xu struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
27754cc7f68dSHerbert Xu 				     unsigned long data_len, int noblock,
277628d64271SEric Dumazet 				     int *errcode, int max_page_order)
27771da177e4SLinus Torvalds {
27782e4e4410SEric Dumazet 	struct sk_buff *skb;
27791da177e4SLinus Torvalds 	long timeo;
27801da177e4SLinus Torvalds 	int err;
27811da177e4SLinus Torvalds 
27821da177e4SLinus Torvalds 	timeo = sock_sndtimeo(sk, noblock);
27832e4e4410SEric Dumazet 	for (;;) {
27841da177e4SLinus Torvalds 		err = sock_error(sk);
27851da177e4SLinus Torvalds 		if (err != 0)
27861da177e4SLinus Torvalds 			goto failure;
27871da177e4SLinus Torvalds 
27881da177e4SLinus Torvalds 		err = -EPIPE;
2789afe8764fSKuniyuki Iwashima 		if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
27901da177e4SLinus Torvalds 			goto failure;
27911da177e4SLinus Torvalds 
2792e292f05eSEric Dumazet 		if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
27932e4e4410SEric Dumazet 			break;
27942e4e4410SEric Dumazet 
27959cd3e072SEric Dumazet 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
27961da177e4SLinus Torvalds 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
27971da177e4SLinus Torvalds 		err = -EAGAIN;
27981da177e4SLinus Torvalds 		if (!timeo)
27991da177e4SLinus Torvalds 			goto failure;
28001da177e4SLinus Torvalds 		if (signal_pending(current))
28011da177e4SLinus Torvalds 			goto interrupted;
28021da177e4SLinus Torvalds 		timeo = sock_wait_for_wmem(sk, timeo);
280328d64271SEric Dumazet 	}
28042e4e4410SEric Dumazet 	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
28052e4e4410SEric Dumazet 				   errcode, sk->sk_allocation);
28062e4e4410SEric Dumazet 	if (skb)
28071da177e4SLinus Torvalds 		skb_set_owner_w(skb, sk);
28081da177e4SLinus Torvalds 	return skb;
28091da177e4SLinus Torvalds 
28101da177e4SLinus Torvalds interrupted:
28111da177e4SLinus Torvalds 	err = sock_intr_errno(timeo);
28121da177e4SLinus Torvalds failure:
28131da177e4SLinus Torvalds 	*errcode = err;
28141da177e4SLinus Torvalds 	return NULL;
28151da177e4SLinus Torvalds }
28164cc7f68dSHerbert Xu EXPORT_SYMBOL(sock_alloc_send_pskb);
28171da177e4SLinus Torvalds 
__sock_cmsg_send(struct sock * sk,struct cmsghdr * cmsg,struct sockcm_cookie * sockc)2818233baf9aSxu xin int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
2819f28ea365SEdward Jee 		     struct sockcm_cookie *sockc)
2820f28ea365SEdward Jee {
28213dd17e63SSoheil Hassas Yeganeh 	u32 tsflags;
28223dd17e63SSoheil Hassas Yeganeh 
2823f28ea365SEdward Jee 	switch (cmsg->cmsg_type) {
2824f28ea365SEdward Jee 	case SO_MARK:
282591f0d8a4SJakub Kicinski 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
282691f0d8a4SJakub Kicinski 		    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2827f28ea365SEdward Jee 			return -EPERM;
2828f28ea365SEdward Jee 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2829f28ea365SEdward Jee 			return -EINVAL;
2830f28ea365SEdward Jee 		sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2831f28ea365SEdward Jee 		break;
28327f1bc6e9SDeepa Dinamani 	case SO_TIMESTAMPING_OLD:
2833200bc366SThomas Lange 	case SO_TIMESTAMPING_NEW:
28343dd17e63SSoheil Hassas Yeganeh 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
28353dd17e63SSoheil Hassas Yeganeh 			return -EINVAL;
28363dd17e63SSoheil Hassas Yeganeh 
28373dd17e63SSoheil Hassas Yeganeh 		tsflags = *(u32 *)CMSG_DATA(cmsg);
28383dd17e63SSoheil Hassas Yeganeh 		if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
28393dd17e63SSoheil Hassas Yeganeh 			return -EINVAL;
28403dd17e63SSoheil Hassas Yeganeh 
28413dd17e63SSoheil Hassas Yeganeh 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
28423dd17e63SSoheil Hassas Yeganeh 		sockc->tsflags |= tsflags;
28433dd17e63SSoheil Hassas Yeganeh 		break;
284480b14deeSRichard Cochran 	case SCM_TXTIME:
284580b14deeSRichard Cochran 		if (!sock_flag(sk, SOCK_TXTIME))
284680b14deeSRichard Cochran 			return -EINVAL;
284780b14deeSRichard Cochran 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
284880b14deeSRichard Cochran 			return -EINVAL;
284980b14deeSRichard Cochran 		sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
285080b14deeSRichard Cochran 		break;
2851779f1edeSSoheil Hassas Yeganeh 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2852779f1edeSSoheil Hassas Yeganeh 	case SCM_RIGHTS:
2853779f1edeSSoheil Hassas Yeganeh 	case SCM_CREDENTIALS:
2854779f1edeSSoheil Hassas Yeganeh 		break;
2855f28ea365SEdward Jee 	default:
2856f28ea365SEdward Jee 		return -EINVAL;
2857f28ea365SEdward Jee 	}
285839771b12SWillem de Bruijn 	return 0;
285939771b12SWillem de Bruijn }
286039771b12SWillem de Bruijn EXPORT_SYMBOL(__sock_cmsg_send);
286139771b12SWillem de Bruijn 
sock_cmsg_send(struct sock * sk,struct msghdr * msg,struct sockcm_cookie * sockc)286239771b12SWillem de Bruijn int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
286339771b12SWillem de Bruijn 		   struct sockcm_cookie *sockc)
286439771b12SWillem de Bruijn {
286539771b12SWillem de Bruijn 	struct cmsghdr *cmsg;
286639771b12SWillem de Bruijn 	int ret;
286739771b12SWillem de Bruijn 
286839771b12SWillem de Bruijn 	for_each_cmsghdr(cmsg, msg) {
286939771b12SWillem de Bruijn 		if (!CMSG_OK(msg, cmsg))
287039771b12SWillem de Bruijn 			return -EINVAL;
287139771b12SWillem de Bruijn 		if (cmsg->cmsg_level != SOL_SOCKET)
287239771b12SWillem de Bruijn 			continue;
2873233baf9aSxu xin 		ret = __sock_cmsg_send(sk, cmsg, sockc);
287439771b12SWillem de Bruijn 		if (ret)
287539771b12SWillem de Bruijn 			return ret;
2876f28ea365SEdward Jee 	}
2877f28ea365SEdward Jee 	return 0;
2878f28ea365SEdward Jee }
2879f28ea365SEdward Jee EXPORT_SYMBOL(sock_cmsg_send);
2880f28ea365SEdward Jee 
sk_enter_memory_pressure(struct sock * sk)288106044751SEric Dumazet static void sk_enter_memory_pressure(struct sock *sk)
288206044751SEric Dumazet {
288306044751SEric Dumazet 	if (!sk->sk_prot->enter_memory_pressure)
288406044751SEric Dumazet 		return;
288506044751SEric Dumazet 
288606044751SEric Dumazet 	sk->sk_prot->enter_memory_pressure(sk);
288706044751SEric Dumazet }
288806044751SEric Dumazet 
sk_leave_memory_pressure(struct sock * sk)288906044751SEric Dumazet static void sk_leave_memory_pressure(struct sock *sk)
289006044751SEric Dumazet {
289106044751SEric Dumazet 	if (sk->sk_prot->leave_memory_pressure) {
28925c1ebbfaSBrian Vazquez 		INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure,
28935c1ebbfaSBrian Vazquez 				     tcp_leave_memory_pressure, sk);
289406044751SEric Dumazet 	} else {
289506044751SEric Dumazet 		unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
289606044751SEric Dumazet 
2897503978acSEric Dumazet 		if (memory_pressure && READ_ONCE(*memory_pressure))
2898503978acSEric Dumazet 			WRITE_ONCE(*memory_pressure, 0);
289906044751SEric Dumazet 	}
290006044751SEric Dumazet }
290106044751SEric Dumazet 
2902ce27ec60SEric Dumazet DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
29035640f768SEric Dumazet 
2904400dfd3aSEric Dumazet /**
2905400dfd3aSEric Dumazet  * skb_page_frag_refill - check that a page_frag contains enough room
2906400dfd3aSEric Dumazet  * @sz: minimum size of the fragment we want to get
2907400dfd3aSEric Dumazet  * @pfrag: pointer to page_frag
290882d5e2b8SEric Dumazet  * @gfp: priority for memory allocation
2909400dfd3aSEric Dumazet  *
2910400dfd3aSEric Dumazet  * Note: While this allocator tries to use high order pages, there is
2911400dfd3aSEric Dumazet  * no guarantee that allocations succeed. Therefore, @sz MUST be
2912400dfd3aSEric Dumazet  * less or equal than PAGE_SIZE.
2913400dfd3aSEric Dumazet  */
skb_page_frag_refill(unsigned int sz,struct page_frag * pfrag,gfp_t gfp)2914d9b2938aSEric Dumazet bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
29155640f768SEric Dumazet {
29165640f768SEric Dumazet 	if (pfrag->page) {
2917fe896d18SJoonsoo Kim 		if (page_ref_count(pfrag->page) == 1) {
29185640f768SEric Dumazet 			pfrag->offset = 0;
29195640f768SEric Dumazet 			return true;
29205640f768SEric Dumazet 		}
2921400dfd3aSEric Dumazet 		if (pfrag->offset + sz <= pfrag->size)
29225640f768SEric Dumazet 			return true;
29235640f768SEric Dumazet 		put_page(pfrag->page);
29245640f768SEric Dumazet 	}
29255640f768SEric Dumazet 
29265640f768SEric Dumazet 	pfrag->offset = 0;
2927ce27ec60SEric Dumazet 	if (SKB_FRAG_PAGE_ORDER &&
2928ce27ec60SEric Dumazet 	    !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
2929d0164adcSMel Gorman 		/* Avoid direct reclaim but allow kswapd to wake */
2930d0164adcSMel Gorman 		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2931d0164adcSMel Gorman 					  __GFP_COMP | __GFP_NOWARN |
2932d0164adcSMel Gorman 					  __GFP_NORETRY,
2933d9b2938aSEric Dumazet 					  SKB_FRAG_PAGE_ORDER);
2934d9b2938aSEric Dumazet 		if (likely(pfrag->page)) {
2935d9b2938aSEric Dumazet 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
29365640f768SEric Dumazet 			return true;
29375640f768SEric Dumazet 		}
2938d9b2938aSEric Dumazet 	}
2939d9b2938aSEric Dumazet 	pfrag->page = alloc_page(gfp);
2940d9b2938aSEric Dumazet 	if (likely(pfrag->page)) {
2941d9b2938aSEric Dumazet 		pfrag->size = PAGE_SIZE;
2942d9b2938aSEric Dumazet 		return true;
2943d9b2938aSEric Dumazet 	}
2944400dfd3aSEric Dumazet 	return false;
2945400dfd3aSEric Dumazet }
2946400dfd3aSEric Dumazet EXPORT_SYMBOL(skb_page_frag_refill);
2947400dfd3aSEric Dumazet 
sk_page_frag_refill(struct sock * sk,struct page_frag * pfrag)2948400dfd3aSEric Dumazet bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2949400dfd3aSEric Dumazet {
2950400dfd3aSEric Dumazet 	if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2951400dfd3aSEric Dumazet 		return true;
2952400dfd3aSEric Dumazet 
29535640f768SEric Dumazet 	sk_enter_memory_pressure(sk);
29545640f768SEric Dumazet 	sk_stream_moderate_sndbuf(sk);
29555640f768SEric Dumazet 	return false;
29565640f768SEric Dumazet }
29575640f768SEric Dumazet EXPORT_SYMBOL(sk_page_frag_refill);
29585640f768SEric Dumazet 
__lock_sock(struct sock * sk)2959ad80b0fcSPaolo Abeni void __lock_sock(struct sock *sk)
2960f39234d6SNamhyung Kim 	__releases(&sk->sk_lock.slock)
2961f39234d6SNamhyung Kim 	__acquires(&sk->sk_lock.slock)
29621da177e4SLinus Torvalds {
29631da177e4SLinus Torvalds 	DEFINE_WAIT(wait);
29641da177e4SLinus Torvalds 
29651da177e4SLinus Torvalds 	for (;;) {
29661da177e4SLinus Torvalds 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
29671da177e4SLinus Torvalds 					TASK_UNINTERRUPTIBLE);
29681da177e4SLinus Torvalds 		spin_unlock_bh(&sk->sk_lock.slock);
29691da177e4SLinus Torvalds 		schedule();
29701da177e4SLinus Torvalds 		spin_lock_bh(&sk->sk_lock.slock);
29711da177e4SLinus Torvalds 		if (!sock_owned_by_user(sk))
29721da177e4SLinus Torvalds 			break;
29731da177e4SLinus Torvalds 	}
29741da177e4SLinus Torvalds 	finish_wait(&sk->sk_lock.wq, &wait);
29751da177e4SLinus Torvalds }
29761da177e4SLinus Torvalds 
__release_sock(struct sock * sk)29778873c064SEric Dumazet void __release_sock(struct sock *sk)
2978f39234d6SNamhyung Kim 	__releases(&sk->sk_lock.slock)
2979f39234d6SNamhyung Kim 	__acquires(&sk->sk_lock.slock)
29801da177e4SLinus Torvalds {
29815413d1baSEric Dumazet 	struct sk_buff *skb, *next;
29821da177e4SLinus Torvalds 
29835413d1baSEric Dumazet 	while ((skb = sk->sk_backlog.head) != NULL) {
29841da177e4SLinus Torvalds 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
29855413d1baSEric Dumazet 
29865413d1baSEric Dumazet 		spin_unlock_bh(&sk->sk_lock.slock);
29871da177e4SLinus Torvalds 
29881da177e4SLinus Torvalds 		do {
29895413d1baSEric Dumazet 			next = skb->next;
2990e4cbb02aSEric Dumazet 			prefetch(next);
299163fbdd3cSEric Dumazet 			DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb));
2992a8305bffSDavid S. Miller 			skb_mark_not_on_list(skb);
2993c57943a1SPeter Zijlstra 			sk_backlog_rcv(sk, skb);
29941da177e4SLinus Torvalds 
29955413d1baSEric Dumazet 			cond_resched();
29961da177e4SLinus Torvalds 
29971da177e4SLinus Torvalds 			skb = next;
29981da177e4SLinus Torvalds 		} while (skb != NULL);
29991da177e4SLinus Torvalds 
30005413d1baSEric Dumazet 		spin_lock_bh(&sk->sk_lock.slock);
30015413d1baSEric Dumazet 	}
30028eae939fSZhu Yi 
30038eae939fSZhu Yi 	/*
30048eae939fSZhu Yi 	 * Doing the zeroing here guarantee we can not loop forever
30058eae939fSZhu Yi 	 * while a wild producer attempts to flood us.
30068eae939fSZhu Yi 	 */
30078eae939fSZhu Yi 	sk->sk_backlog.len = 0;
30081da177e4SLinus Torvalds }
30091da177e4SLinus Torvalds 
__sk_flush_backlog(struct sock * sk)3010d41a69f1SEric Dumazet void __sk_flush_backlog(struct sock *sk)
3011d41a69f1SEric Dumazet {
3012d41a69f1SEric Dumazet 	spin_lock_bh(&sk->sk_lock.slock);
3013d41a69f1SEric Dumazet 	__release_sock(sk);
3014d41a69f1SEric Dumazet 	spin_unlock_bh(&sk->sk_lock.slock);
3015d41a69f1SEric Dumazet }
3016c46b0183SJakub Kicinski EXPORT_SYMBOL_GPL(__sk_flush_backlog);
3017d41a69f1SEric Dumazet 
30181da177e4SLinus Torvalds /**
30191da177e4SLinus Torvalds  * sk_wait_data - wait for data to arrive at sk_receive_queue
30204dc3b16bSPavel Pisa  * @sk:    sock to wait on
30214dc3b16bSPavel Pisa  * @timeo: for how long
3022dfbafc99SSabrina Dubroca  * @skb:   last skb seen on sk_receive_queue
30231da177e4SLinus Torvalds  *
30241da177e4SLinus Torvalds  * Now socket state including sk->sk_err is changed only under lock,
30251da177e4SLinus Torvalds  * hence we may omit checks after joining wait queue.
30261da177e4SLinus Torvalds  * We check receive queue before schedule() only as optimization;
30271da177e4SLinus Torvalds  * it is very likely that release_sock() added new data.
30281da177e4SLinus Torvalds  */
sk_wait_data(struct sock * sk,long * timeo,const struct sk_buff * skb)3029dfbafc99SSabrina Dubroca int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
30301da177e4SLinus Torvalds {
3031d9dc8b0fSWANG Cong 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
30321da177e4SLinus Torvalds 	int rc;
30331da177e4SLinus Torvalds 
3034d9dc8b0fSWANG Cong 	add_wait_queue(sk_sleep(sk), &wait);
30359cd3e072SEric Dumazet 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3036d9dc8b0fSWANG Cong 	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
30379cd3e072SEric Dumazet 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3038d9dc8b0fSWANG Cong 	remove_wait_queue(sk_sleep(sk), &wait);
30391da177e4SLinus Torvalds 	return rc;
30401da177e4SLinus Torvalds }
30411da177e4SLinus Torvalds EXPORT_SYMBOL(sk_wait_data);
30421da177e4SLinus Torvalds 
30433ab224beSHideo Aoki /**
3044f8c3bf00SPaolo Abeni  *	__sk_mem_raise_allocated - increase memory_allocated
30453ab224beSHideo Aoki  *	@sk: socket
30463ab224beSHideo Aoki  *	@size: memory size to allocate
3047f8c3bf00SPaolo Abeni  *	@amt: pages to allocate
30483ab224beSHideo Aoki  *	@kind: allocation type
30493ab224beSHideo Aoki  *
3050f8c3bf00SPaolo Abeni  *	Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
30513ab224beSHideo Aoki  */
__sk_mem_raise_allocated(struct sock * sk,int size,int amt,int kind)3052f8c3bf00SPaolo Abeni int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
30533ab224beSHideo Aoki {
30544b1327beSWei Wang 	bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
3055219160beSEric Dumazet 	struct proto *prot = sk->sk_prot;
3056d6f19938SYafang Shao 	bool charged = true;
3057219160beSEric Dumazet 	long allocated;
3058e805605cSJohannes Weiner 
3059219160beSEric Dumazet 	sk_memory_allocated_add(sk, amt);
3060219160beSEric Dumazet 	allocated = sk_memory_allocated(sk);
30614b1327beSWei Wang 	if (memcg_charge &&
30624b1327beSWei Wang 	    !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
30634b1327beSWei Wang 						gfp_memcg_charge())))
3064e805605cSJohannes Weiner 		goto suppress_allocation;
30653ab224beSHideo Aoki 
30663ab224beSHideo Aoki 	/* Under limit. */
3067e805605cSJohannes Weiner 	if (allocated <= sk_prot_mem_limits(sk, 0)) {
3068180d8cd9SGlauber Costa 		sk_leave_memory_pressure(sk);
30693ab224beSHideo Aoki 		return 1;
30703ab224beSHideo Aoki 	}
30713ab224beSHideo Aoki 
3072e805605cSJohannes Weiner 	/* Under pressure. */
3073e805605cSJohannes Weiner 	if (allocated > sk_prot_mem_limits(sk, 1))
3074180d8cd9SGlauber Costa 		sk_enter_memory_pressure(sk);
30753ab224beSHideo Aoki 
3076e805605cSJohannes Weiner 	/* Over hard limit. */
3077e805605cSJohannes Weiner 	if (allocated > sk_prot_mem_limits(sk, 2))
30783ab224beSHideo Aoki 		goto suppress_allocation;
30793ab224beSHideo Aoki 
30803ab224beSHideo Aoki 	/* guarantee minimum buffer size under pressure */
30813ab224beSHideo Aoki 	if (kind == SK_MEM_RECV) {
3082a3dcaf17SEric Dumazet 		if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
30833ab224beSHideo Aoki 			return 1;
3084180d8cd9SGlauber Costa 
30853ab224beSHideo Aoki 	} else { /* SK_MEM_SEND */
3086a3dcaf17SEric Dumazet 		int wmem0 = sk_get_wmem0(sk, prot);
3087a3dcaf17SEric Dumazet 
30883ab224beSHideo Aoki 		if (sk->sk_type == SOCK_STREAM) {
3089a3dcaf17SEric Dumazet 			if (sk->sk_wmem_queued < wmem0)
30903ab224beSHideo Aoki 				return 1;
3091a3dcaf17SEric Dumazet 		} else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
30923ab224beSHideo Aoki 				return 1;
30933ab224beSHideo Aoki 		}
3094a3dcaf17SEric Dumazet 	}
30953ab224beSHideo Aoki 
3096180d8cd9SGlauber Costa 	if (sk_has_memory_pressure(sk)) {
30975bf325a5SEric Dumazet 		u64 alloc;
30981748376bSEric Dumazet 
3099180d8cd9SGlauber Costa 		if (!sk_under_memory_pressure(sk))
31001748376bSEric Dumazet 			return 1;
3101180d8cd9SGlauber Costa 		alloc = sk_sockets_allocated_read_positive(sk);
3102180d8cd9SGlauber Costa 		if (sk_prot_mem_limits(sk, 2) > alloc *
31033ab224beSHideo Aoki 		    sk_mem_pages(sk->sk_wmem_queued +
31043ab224beSHideo Aoki 				 atomic_read(&sk->sk_rmem_alloc) +
31053ab224beSHideo Aoki 				 sk->sk_forward_alloc))
31063ab224beSHideo Aoki 			return 1;
31073ab224beSHideo Aoki 	}
31083ab224beSHideo Aoki 
31093ab224beSHideo Aoki suppress_allocation:
31103ab224beSHideo Aoki 
31113ab224beSHideo Aoki 	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
31123ab224beSHideo Aoki 		sk_stream_moderate_sndbuf(sk);
31133ab224beSHideo Aoki 
31143ab224beSHideo Aoki 		/* Fail only if socket is _under_ its sndbuf.
31153ab224beSHideo Aoki 		 * In this case we cannot block, so that we have to fail.
31163ab224beSHideo Aoki 		 */
31174b1327beSWei Wang 		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
31184b1327beSWei Wang 			/* Force charge with __GFP_NOFAIL */
31194b1327beSWei Wang 			if (memcg_charge && !charged) {
31204b1327beSWei Wang 				mem_cgroup_charge_skmem(sk->sk_memcg, amt,
31214b1327beSWei Wang 					gfp_memcg_charge() | __GFP_NOFAIL);
31224b1327beSWei Wang 			}
31233ab224beSHideo Aoki 			return 1;
31243ab224beSHideo Aoki 		}
31254b1327beSWei Wang 	}
31263ab224beSHideo Aoki 
3127d6f19938SYafang Shao 	if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
3128d6f19938SYafang Shao 		trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
31293847ce32SSatoru Moriya 
31300e90b31fSGlauber Costa 	sk_memory_allocated_sub(sk, amt);
3131180d8cd9SGlauber Costa 
31324b1327beSWei Wang 	if (memcg_charge && charged)
3133baac50bbSJohannes Weiner 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
3134e805605cSJohannes Weiner 
31353ab224beSHideo Aoki 	return 0;
31363ab224beSHideo Aoki }
3137f8c3bf00SPaolo Abeni 
3138f8c3bf00SPaolo Abeni /**
3139f8c3bf00SPaolo Abeni  *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated
3140f8c3bf00SPaolo Abeni  *	@sk: socket
3141f8c3bf00SPaolo Abeni  *	@size: memory size to allocate
3142f8c3bf00SPaolo Abeni  *	@kind: allocation type
3143f8c3bf00SPaolo Abeni  *
3144f8c3bf00SPaolo Abeni  *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
3145f8c3bf00SPaolo Abeni  *	rmem allocation. This function assumes that protocols which have
3146f8c3bf00SPaolo Abeni  *	memory_pressure use sk_wmem_queued as write buffer accounting.
3147f8c3bf00SPaolo Abeni  */
__sk_mem_schedule(struct sock * sk,int size,int kind)3148f8c3bf00SPaolo Abeni int __sk_mem_schedule(struct sock *sk, int size, int kind)
3149f8c3bf00SPaolo Abeni {
3150f8c3bf00SPaolo Abeni 	int ret, amt = sk_mem_pages(size);
3151f8c3bf00SPaolo Abeni 
31525e6300e7SEric Dumazet 	sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
3153f8c3bf00SPaolo Abeni 	ret = __sk_mem_raise_allocated(sk, size, amt, kind);
3154f8c3bf00SPaolo Abeni 	if (!ret)
31555e6300e7SEric Dumazet 		sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
3156f8c3bf00SPaolo Abeni 	return ret;
3157f8c3bf00SPaolo Abeni }
31583ab224beSHideo Aoki EXPORT_SYMBOL(__sk_mem_schedule);
31593ab224beSHideo Aoki 
31603ab224beSHideo Aoki /**
3161f8c3bf00SPaolo Abeni  *	__sk_mem_reduce_allocated - reclaim memory_allocated
31623ab224beSHideo Aoki  *	@sk: socket
3163f8c3bf00SPaolo Abeni  *	@amount: number of quanta
3164f8c3bf00SPaolo Abeni  *
3165f8c3bf00SPaolo Abeni  *	Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
31663ab224beSHideo Aoki  */
__sk_mem_reduce_allocated(struct sock * sk,int amount)3167f8c3bf00SPaolo Abeni void __sk_mem_reduce_allocated(struct sock *sk, int amount)
31683ab224beSHideo Aoki {
31691a24e04eSEric Dumazet 	sk_memory_allocated_sub(sk, amount);
31703ab224beSHideo Aoki 
3171baac50bbSJohannes Weiner 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
3172baac50bbSJohannes Weiner 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
3173e805605cSJohannes Weiner 
31742d0c88e8SAbel Wu 	if (sk_under_global_memory_pressure(sk) &&
3175180d8cd9SGlauber Costa 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
3176180d8cd9SGlauber Costa 		sk_leave_memory_pressure(sk);
31773ab224beSHideo Aoki }
3178f8c3bf00SPaolo Abeni 
3179f8c3bf00SPaolo Abeni /**
3180f8c3bf00SPaolo Abeni  *	__sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
3181f8c3bf00SPaolo Abeni  *	@sk: socket
3182100fdd1fSEric Dumazet  *	@amount: number of bytes (rounded down to a PAGE_SIZE multiple)
3183f8c3bf00SPaolo Abeni  */
__sk_mem_reclaim(struct sock * sk,int amount)3184f8c3bf00SPaolo Abeni void __sk_mem_reclaim(struct sock *sk, int amount)
3185f8c3bf00SPaolo Abeni {
3186100fdd1fSEric Dumazet 	amount >>= PAGE_SHIFT;
31875e6300e7SEric Dumazet 	sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
3188f8c3bf00SPaolo Abeni 	__sk_mem_reduce_allocated(sk, amount);
3189f8c3bf00SPaolo Abeni }
31903ab224beSHideo Aoki EXPORT_SYMBOL(__sk_mem_reclaim);
31913ab224beSHideo Aoki 
sk_set_peek_off(struct sock * sk,int val)3192627d2d6bSsamanthakumar int sk_set_peek_off(struct sock *sk, int val)
3193627d2d6bSsamanthakumar {
319411695c6eSEric Dumazet 	WRITE_ONCE(sk->sk_peek_off, val);
3195627d2d6bSsamanthakumar 	return 0;
3196627d2d6bSsamanthakumar }
3197627d2d6bSsamanthakumar EXPORT_SYMBOL_GPL(sk_set_peek_off);
31983ab224beSHideo Aoki 
31991da177e4SLinus Torvalds /*
32001da177e4SLinus Torvalds  * Set of default routines for initialising struct proto_ops when
32011da177e4SLinus Torvalds  * the protocol does not support a particular function. In certain
32021da177e4SLinus Torvalds  * cases where it makes no sense for a protocol to have a "do nothing"
32031da177e4SLinus Torvalds  * function, some default processing is provided.
32041da177e4SLinus Torvalds  */
32051da177e4SLinus Torvalds 
sock_no_bind(struct socket * sock,struct sockaddr * saddr,int len)32061da177e4SLinus Torvalds int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
32071da177e4SLinus Torvalds {
32081da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32091da177e4SLinus Torvalds }
32102a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_bind);
32111da177e4SLinus Torvalds 
sock_no_connect(struct socket * sock,struct sockaddr * saddr,int len,int flags)32121da177e4SLinus Torvalds int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
32131da177e4SLinus Torvalds 		    int len, int flags)
32141da177e4SLinus Torvalds {
32151da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32161da177e4SLinus Torvalds }
32172a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_connect);
32181da177e4SLinus Torvalds 
sock_no_socketpair(struct socket * sock1,struct socket * sock2)32191da177e4SLinus Torvalds int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
32201da177e4SLinus Torvalds {
32211da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32221da177e4SLinus Torvalds }
32232a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_socketpair);
32241da177e4SLinus Torvalds 
sock_no_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)3225cdfbabfbSDavid Howells int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
3226cdfbabfbSDavid Howells 		   bool kern)
32271da177e4SLinus Torvalds {
32281da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32291da177e4SLinus Torvalds }
32302a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_accept);
32311da177e4SLinus Torvalds 
sock_no_getname(struct socket * sock,struct sockaddr * saddr,int peer)32321da177e4SLinus Torvalds int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
32339b2c45d4SDenys Vlasenko 		    int peer)
32341da177e4SLinus Torvalds {
32351da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32361da177e4SLinus Torvalds }
32372a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_getname);
32381da177e4SLinus Torvalds 
sock_no_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)32391da177e4SLinus Torvalds int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
32401da177e4SLinus Torvalds {
32411da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32421da177e4SLinus Torvalds }
32432a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_ioctl);
32441da177e4SLinus Torvalds 
sock_no_listen(struct socket * sock,int backlog)32451da177e4SLinus Torvalds int sock_no_listen(struct socket *sock, int backlog)
32461da177e4SLinus Torvalds {
32471da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32481da177e4SLinus Torvalds }
32492a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_listen);
32501da177e4SLinus Torvalds 
sock_no_shutdown(struct socket * sock,int how)32511da177e4SLinus Torvalds int sock_no_shutdown(struct socket *sock, int how)
32521da177e4SLinus Torvalds {
32531da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32541da177e4SLinus Torvalds }
32552a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_shutdown);
32561da177e4SLinus Torvalds 
sock_no_sendmsg(struct socket * sock,struct msghdr * m,size_t len)32571b784140SYing Xue int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
32581da177e4SLinus Torvalds {
32591da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32601da177e4SLinus Torvalds }
32612a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_sendmsg);
32621da177e4SLinus Torvalds 
sock_no_sendmsg_locked(struct sock * sk,struct msghdr * m,size_t len)3263306b13ebSTom Herbert int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
3264306b13ebSTom Herbert {
3265306b13ebSTom Herbert 	return -EOPNOTSUPP;
3266306b13ebSTom Herbert }
3267306b13ebSTom Herbert EXPORT_SYMBOL(sock_no_sendmsg_locked);
3268306b13ebSTom Herbert 
sock_no_recvmsg(struct socket * sock,struct msghdr * m,size_t len,int flags)32691b784140SYing Xue int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
32701b784140SYing Xue 		    int flags)
32711da177e4SLinus Torvalds {
32721da177e4SLinus Torvalds 	return -EOPNOTSUPP;
32731da177e4SLinus Torvalds }
32742a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_recvmsg);
32751da177e4SLinus Torvalds 
sock_no_mmap(struct file * file,struct socket * sock,struct vm_area_struct * vma)32761da177e4SLinus Torvalds int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
32771da177e4SLinus Torvalds {
32781da177e4SLinus Torvalds 	/* Mirror missing mmap method error code */
32791da177e4SLinus Torvalds 	return -ENODEV;
32801da177e4SLinus Torvalds }
32812a91525cSEric Dumazet EXPORT_SYMBOL(sock_no_mmap);
32821da177e4SLinus Torvalds 
3283d9539752SKees Cook /*
3284d9539752SKees Cook  * When a file is received (via SCM_RIGHTS, etc), we must bump the
3285d9539752SKees Cook  * various sock-based usage counts.
3286d9539752SKees Cook  */
__receive_sock(struct file * file)3287d9539752SKees Cook void __receive_sock(struct file *file)
3288d9539752SKees Cook {
3289d9539752SKees Cook 	struct socket *sock;
3290d9539752SKees Cook 
3291dba4a925SFlorent Revest 	sock = sock_from_file(file);
3292d9539752SKees Cook 	if (sock) {
3293d9539752SKees Cook 		sock_update_netprioidx(&sock->sk->sk_cgrp_data);
3294d9539752SKees Cook 		sock_update_classid(&sock->sk->sk_cgrp_data);
3295d9539752SKees Cook 	}
3296d9539752SKees Cook }
3297d9539752SKees Cook 
32981da177e4SLinus Torvalds /*
32991da177e4SLinus Torvalds  *	Default Socket Callbacks
33001da177e4SLinus Torvalds  */
33011da177e4SLinus Torvalds 
sock_def_wakeup(struct sock * sk)33021da177e4SLinus Torvalds static void sock_def_wakeup(struct sock *sk)
33031da177e4SLinus Torvalds {
330443815482SEric Dumazet 	struct socket_wq *wq;
330543815482SEric Dumazet 
330643815482SEric Dumazet 	rcu_read_lock();
330743815482SEric Dumazet 	wq = rcu_dereference(sk->sk_wq);
33081ce0bf50SHerbert Xu 	if (skwq_has_sleeper(wq))
330943815482SEric Dumazet 		wake_up_interruptible_all(&wq->wait);
331043815482SEric Dumazet 	rcu_read_unlock();
33111da177e4SLinus Torvalds }
33121da177e4SLinus Torvalds 
sock_def_error_report(struct sock * sk)33131da177e4SLinus Torvalds static void sock_def_error_report(struct sock *sk)
33141da177e4SLinus Torvalds {
331543815482SEric Dumazet 	struct socket_wq *wq;
331643815482SEric Dumazet 
331743815482SEric Dumazet 	rcu_read_lock();
331843815482SEric Dumazet 	wq = rcu_dereference(sk->sk_wq);
33191ce0bf50SHerbert Xu 	if (skwq_has_sleeper(wq))
3320a9a08845SLinus Torvalds 		wake_up_interruptible_poll(&wq->wait, EPOLLERR);
33218d8ad9d7SPavel Emelyanov 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
332243815482SEric Dumazet 	rcu_read_unlock();
33231da177e4SLinus Torvalds }
33241da177e4SLinus Torvalds 
sock_def_readable(struct sock * sk)332543a825afSBjörn Töpel void sock_def_readable(struct sock *sk)
33261da177e4SLinus Torvalds {
332743815482SEric Dumazet 	struct socket_wq *wq;
332843815482SEric Dumazet 
332940e0b090SPeilin Ye 	trace_sk_data_ready(sk);
333040e0b090SPeilin Ye 
333143815482SEric Dumazet 	rcu_read_lock();
333243815482SEric Dumazet 	wq = rcu_dereference(sk->sk_wq);
33331ce0bf50SHerbert Xu 	if (skwq_has_sleeper(wq))
3334a9a08845SLinus Torvalds 		wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
3335a9a08845SLinus Torvalds 						EPOLLRDNORM | EPOLLRDBAND);
33368d8ad9d7SPavel Emelyanov 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
333743815482SEric Dumazet 	rcu_read_unlock();
33381da177e4SLinus Torvalds }
33391da177e4SLinus Torvalds 
sock_def_write_space(struct sock * sk)33401da177e4SLinus Torvalds static void sock_def_write_space(struct sock *sk)
33411da177e4SLinus Torvalds {
334243815482SEric Dumazet 	struct socket_wq *wq;
334343815482SEric Dumazet 
334443815482SEric Dumazet 	rcu_read_lock();
33451da177e4SLinus Torvalds 
33461da177e4SLinus Torvalds 	/* Do not wake up a writer until he can make "significant"
33471da177e4SLinus Torvalds 	 * progress.  --DaveM
33481da177e4SLinus Torvalds 	 */
334914bfee9bSPavel Begunkov 	if (sock_writeable(sk)) {
335043815482SEric Dumazet 		wq = rcu_dereference(sk->sk_wq);
33511ce0bf50SHerbert Xu 		if (skwq_has_sleeper(wq))
3352a9a08845SLinus Torvalds 			wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
3353a9a08845SLinus Torvalds 						EPOLLWRNORM | EPOLLWRBAND);
33541da177e4SLinus Torvalds 
33551da177e4SLinus Torvalds 		/* Should agree with poll, otherwise some programs break */
33568d8ad9d7SPavel Emelyanov 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
33571da177e4SLinus Torvalds 	}
33581da177e4SLinus Torvalds 
335943815482SEric Dumazet 	rcu_read_unlock();
33601da177e4SLinus Torvalds }
33611da177e4SLinus Torvalds 
33620a8afd9fSPavel Begunkov /* An optimised version of sock_def_write_space(), should only be called
33630a8afd9fSPavel Begunkov  * for SOCK_RCU_FREE sockets under RCU read section and after putting
33640a8afd9fSPavel Begunkov  * ->sk_wmem_alloc.
33650a8afd9fSPavel Begunkov  */
sock_def_write_space_wfree(struct sock * sk)33660a8afd9fSPavel Begunkov static void sock_def_write_space_wfree(struct sock *sk)
33670a8afd9fSPavel Begunkov {
33680a8afd9fSPavel Begunkov 	/* Do not wake up a writer until he can make "significant"
33690a8afd9fSPavel Begunkov 	 * progress.  --DaveM
33700a8afd9fSPavel Begunkov 	 */
33710a8afd9fSPavel Begunkov 	if (sock_writeable(sk)) {
33720a8afd9fSPavel Begunkov 		struct socket_wq *wq = rcu_dereference(sk->sk_wq);
33730a8afd9fSPavel Begunkov 
33740a8afd9fSPavel Begunkov 		/* rely on refcount_sub from sock_wfree() */
33750a8afd9fSPavel Begunkov 		smp_mb__after_atomic();
33760a8afd9fSPavel Begunkov 		if (wq && waitqueue_active(&wq->wait))
33770a8afd9fSPavel Begunkov 			wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
33780a8afd9fSPavel Begunkov 						EPOLLWRNORM | EPOLLWRBAND);
33790a8afd9fSPavel Begunkov 
33800a8afd9fSPavel Begunkov 		/* Should agree with poll, otherwise some programs break */
33810a8afd9fSPavel Begunkov 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
33820a8afd9fSPavel Begunkov 	}
33830a8afd9fSPavel Begunkov }
33840a8afd9fSPavel Begunkov 
sock_def_destruct(struct sock * sk)33851da177e4SLinus Torvalds static void sock_def_destruct(struct sock *sk)
33861da177e4SLinus Torvalds {
33871da177e4SLinus Torvalds }
33881da177e4SLinus Torvalds 
sk_send_sigurg(struct sock * sk)33891da177e4SLinus Torvalds void sk_send_sigurg(struct sock *sk)
33901da177e4SLinus Torvalds {
33911da177e4SLinus Torvalds 	if (sk->sk_socket && sk->sk_socket->file)
33921da177e4SLinus Torvalds 		if (send_sigurg(&sk->sk_socket->file->f_owner))
33938d8ad9d7SPavel Emelyanov 			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
33941da177e4SLinus Torvalds }
33952a91525cSEric Dumazet EXPORT_SYMBOL(sk_send_sigurg);
33961da177e4SLinus Torvalds 
sk_reset_timer(struct sock * sk,struct timer_list * timer,unsigned long expires)33971da177e4SLinus Torvalds void sk_reset_timer(struct sock *sk, struct timer_list* timer,
33981da177e4SLinus Torvalds 		    unsigned long expires)
33991da177e4SLinus Torvalds {
34001da177e4SLinus Torvalds 	if (!mod_timer(timer, expires))
34011da177e4SLinus Torvalds 		sock_hold(sk);
34021da177e4SLinus Torvalds }
34031da177e4SLinus Torvalds EXPORT_SYMBOL(sk_reset_timer);
34041da177e4SLinus Torvalds 
sk_stop_timer(struct sock * sk,struct timer_list * timer)34051da177e4SLinus Torvalds void sk_stop_timer(struct sock *sk, struct timer_list* timer)
34061da177e4SLinus Torvalds {
340725cc4ae9SYing Xue 	if (del_timer(timer))
34081da177e4SLinus Torvalds 		__sock_put(sk);
34091da177e4SLinus Torvalds }
34101da177e4SLinus Torvalds EXPORT_SYMBOL(sk_stop_timer);
34111da177e4SLinus Torvalds 
sk_stop_timer_sync(struct sock * sk,struct timer_list * timer)341208b81d87SGeliang Tang void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
341308b81d87SGeliang Tang {
341408b81d87SGeliang Tang 	if (del_timer_sync(timer))
341508b81d87SGeliang Tang 		__sock_put(sk);
341608b81d87SGeliang Tang }
341708b81d87SGeliang Tang EXPORT_SYMBOL(sk_stop_timer_sync);
341808b81d87SGeliang Tang 
sock_init_data_uid(struct socket * sock,struct sock * sk,kuid_t uid)3419584f3742SPietro Borrello void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
34201da177e4SLinus Torvalds {
3421581319c5SPaolo Abeni 	sk_init_common(sk);
34221da177e4SLinus Torvalds 	sk->sk_send_head	=	NULL;
34231da177e4SLinus Torvalds 
342499767f27SKees Cook 	timer_setup(&sk->sk_timer, NULL, 0);
34251da177e4SLinus Torvalds 
34261da177e4SLinus Torvalds 	sk->sk_allocation	=	GFP_KERNEL;
34271227c177SKuniyuki Iwashima 	sk->sk_rcvbuf		=	READ_ONCE(sysctl_rmem_default);
34281227c177SKuniyuki Iwashima 	sk->sk_sndbuf		=	READ_ONCE(sysctl_wmem_default);
34291da177e4SLinus Torvalds 	sk->sk_state		=	TCP_CLOSE;
3430fb87bd47SGuillaume Nault 	sk->sk_use_task_frag	=	true;
3431972692e0SDavid S. Miller 	sk_set_socket(sk, sock);
34321da177e4SLinus Torvalds 
34331da177e4SLinus Torvalds 	sock_set_flag(sk, SOCK_ZAPPED);
34341da177e4SLinus Torvalds 
3435e71a4783SStephen Hemminger 	if (sock) {
34361da177e4SLinus Torvalds 		sk->sk_type	=	sock->type;
3437333f7909SAl Viro 		RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
34381da177e4SLinus Torvalds 		sock->sk	=	sk;
343986741ec2SLorenzo Colitti 	} else {
3440c2f26e8fSLi RongQing 		RCU_INIT_POINTER(sk->sk_wq, NULL);
344186741ec2SLorenzo Colitti 	}
3442584f3742SPietro Borrello 	sk->sk_uid	=	uid;
34431da177e4SLinus Torvalds 
34441da177e4SLinus Torvalds 	rwlock_init(&sk->sk_callback_lock);
3445cdfbabfbSDavid Howells 	if (sk->sk_kern_sock)
3446cdfbabfbSDavid Howells 		lockdep_set_class_and_name(
3447cdfbabfbSDavid Howells 			&sk->sk_callback_lock,
3448cdfbabfbSDavid Howells 			af_kern_callback_keys + sk->sk_family,
3449cdfbabfbSDavid Howells 			af_family_kern_clock_key_strings[sk->sk_family]);
3450cdfbabfbSDavid Howells 	else
3451cdfbabfbSDavid Howells 		lockdep_set_class_and_name(
3452cdfbabfbSDavid Howells 			&sk->sk_callback_lock,
3453443aef0eSPeter Zijlstra 			af_callback_keys + sk->sk_family,
3454443aef0eSPeter Zijlstra 			af_family_clock_key_strings[sk->sk_family]);
34551da177e4SLinus Torvalds 
34561da177e4SLinus Torvalds 	sk->sk_state_change	=	sock_def_wakeup;
34571da177e4SLinus Torvalds 	sk->sk_data_ready	=	sock_def_readable;
34581da177e4SLinus Torvalds 	sk->sk_write_space	=	sock_def_write_space;
34591da177e4SLinus Torvalds 	sk->sk_error_report	=	sock_def_error_report;
34601da177e4SLinus Torvalds 	sk->sk_destruct		=	sock_def_destruct;
34611da177e4SLinus Torvalds 
34625640f768SEric Dumazet 	sk->sk_frag.page	=	NULL;
34635640f768SEric Dumazet 	sk->sk_frag.offset	=	0;
3464ef64a54fSPavel Emelyanov 	sk->sk_peek_off		=	-1;
34651da177e4SLinus Torvalds 
3466109f6e39SEric W. Biederman 	sk->sk_peer_pid 	=	NULL;
3467109f6e39SEric W. Biederman 	sk->sk_peer_cred	=	NULL;
346835306eb2SEric Dumazet 	spin_lock_init(&sk->sk_peer_lock);
346935306eb2SEric Dumazet 
34701da177e4SLinus Torvalds 	sk->sk_write_pending	=	0;
34711da177e4SLinus Torvalds 	sk->sk_rcvlowat		=	1;
34721da177e4SLinus Torvalds 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
34731da177e4SLinus Torvalds 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
34741da177e4SLinus Torvalds 
34756c7c98baSPaolo Abeni 	sk->sk_stamp = SK_DEFAULT_STAMP;
34763a0ed3e9SDeepa Dinamani #if BITS_PER_LONG==32
34773a0ed3e9SDeepa Dinamani 	seqlock_init(&sk->sk_stamp_seq);
34783a0ed3e9SDeepa Dinamani #endif
347952267790SWillem de Bruijn 	atomic_set(&sk->sk_zckey, 0);
34801da177e4SLinus Torvalds 
3481e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
348206021292SEliezer Tamir 	sk->sk_napi_id		=	0;
3483e59ef36fSKuniyuki Iwashima 	sk->sk_ll_usec		=	READ_ONCE(sysctl_net_busy_read);
348406021292SEliezer Tamir #endif
348506021292SEliezer Tamir 
348676a9ebe8SEric Dumazet 	sk->sk_max_pacing_rate = ~0UL;
348776a9ebe8SEric Dumazet 	sk->sk_pacing_rate = ~0UL;
34887c68fa2bSEric Dumazet 	WRITE_ONCE(sk->sk_pacing_shift, 10);
348970da268bSEric Dumazet 	sk->sk_incoming_cpu = -1;
3490c6345ce7SAmritha Nambiar 
3491c6345ce7SAmritha Nambiar 	sk_rx_queue_clear(sk);
34924dc6dc71SEric Dumazet 	/*
34934dc6dc71SEric Dumazet 	 * Before updating sk_refcnt, we must commit prior changes to memory
34942cdb54c9SMauro Carvalho Chehab 	 * (Documentation/RCU/rculist_nulls.rst for details)
34954dc6dc71SEric Dumazet 	 */
34964dc6dc71SEric Dumazet 	smp_wmb();
349741c6d650SReshetova, Elena 	refcount_set(&sk->sk_refcnt, 1);
349833c732c3SWang Chen 	atomic_set(&sk->sk_drops, 0);
34991da177e4SLinus Torvalds }
3500584f3742SPietro Borrello EXPORT_SYMBOL(sock_init_data_uid);
3501584f3742SPietro Borrello 
sock_init_data(struct socket * sock,struct sock * sk)3502584f3742SPietro Borrello void sock_init_data(struct socket *sock, struct sock *sk)
3503584f3742SPietro Borrello {
3504584f3742SPietro Borrello 	kuid_t uid = sock ?
3505584f3742SPietro Borrello 		SOCK_INODE(sock)->i_uid :
3506584f3742SPietro Borrello 		make_kuid(sock_net(sk)->user_ns, 0);
3507584f3742SPietro Borrello 
3508584f3742SPietro Borrello 	sock_init_data_uid(sock, sk, uid);
3509584f3742SPietro Borrello }
35102a91525cSEric Dumazet EXPORT_SYMBOL(sock_init_data);
35111da177e4SLinus Torvalds 
lock_sock_nested(struct sock * sk,int subclass)3512b5606c2dSHarvey Harrison void lock_sock_nested(struct sock *sk, int subclass)
35131da177e4SLinus Torvalds {
35142dcb96baSThomas Gleixner 	/* The sk_lock has mutex_lock() semantics here. */
35152dcb96baSThomas Gleixner 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
35162dcb96baSThomas Gleixner 
35171da177e4SLinus Torvalds 	might_sleep();
3518a5b5bb9aSIngo Molnar 	spin_lock_bh(&sk->sk_lock.slock);
351933d60fbdSKuniyuki Iwashima 	if (sock_owned_by_user_nocheck(sk))
35201da177e4SLinus Torvalds 		__lock_sock(sk);
3521d2e9117cSJohn Heffner 	sk->sk_lock.owned = 1;
35222dcb96baSThomas Gleixner 	spin_unlock_bh(&sk->sk_lock.slock);
35231da177e4SLinus Torvalds }
3524fcc70d5fSPeter Zijlstra EXPORT_SYMBOL(lock_sock_nested);
35251da177e4SLinus Torvalds 
release_sock(struct sock * sk)3526b5606c2dSHarvey Harrison void release_sock(struct sock *sk)
35271da177e4SLinus Torvalds {
3528a5b5bb9aSIngo Molnar 	spin_lock_bh(&sk->sk_lock.slock);
35291da177e4SLinus Torvalds 	if (sk->sk_backlog.tail)
35301da177e4SLinus Torvalds 		__release_sock(sk);
353146d3ceabSEric Dumazet 
3532c3f9b018SEric Dumazet 	/* Warning : release_cb() might need to release sk ownership,
3533c3f9b018SEric Dumazet 	 * ie call sock_release_ownership(sk) before us.
3534c3f9b018SEric Dumazet 	 */
353546d3ceabSEric Dumazet 	if (sk->sk_prot->release_cb)
353646d3ceabSEric Dumazet 		sk->sk_prot->release_cb(sk);
353746d3ceabSEric Dumazet 
3538c3f9b018SEric Dumazet 	sock_release_ownership(sk);
3539a5b5bb9aSIngo Molnar 	if (waitqueue_active(&sk->sk_lock.wq))
3540a5b5bb9aSIngo Molnar 		wake_up(&sk->sk_lock.wq);
3541a5b5bb9aSIngo Molnar 	spin_unlock_bh(&sk->sk_lock.slock);
35421da177e4SLinus Torvalds }
35431da177e4SLinus Torvalds EXPORT_SYMBOL(release_sock);
35441da177e4SLinus Torvalds 
__lock_sock_fast(struct sock * sk)354549054556SPaolo Abeni bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
35468a74ad60SEric Dumazet {
35478a74ad60SEric Dumazet 	might_sleep();
35488a74ad60SEric Dumazet 	spin_lock_bh(&sk->sk_lock.slock);
35498a74ad60SEric Dumazet 
355033d60fbdSKuniyuki Iwashima 	if (!sock_owned_by_user_nocheck(sk)) {
35518a74ad60SEric Dumazet 		/*
35522dcb96baSThomas Gleixner 		 * Fast path return with bottom halves disabled and
35532dcb96baSThomas Gleixner 		 * sock::sk_lock.slock held.
35542dcb96baSThomas Gleixner 		 *
35552dcb96baSThomas Gleixner 		 * The 'mutex' is not contended and holding
35562dcb96baSThomas Gleixner 		 * sock::sk_lock.slock prevents all other lockers to
35572dcb96baSThomas Gleixner 		 * proceed so the corresponding unlock_sock_fast() can
35582dcb96baSThomas Gleixner 		 * avoid the slow path of release_sock() completely and
35592dcb96baSThomas Gleixner 		 * just release slock.
35602dcb96baSThomas Gleixner 		 *
35612dcb96baSThomas Gleixner 		 * From a semantical POV this is equivalent to 'acquiring'
35622dcb96baSThomas Gleixner 		 * the 'mutex', hence the corresponding lockdep
35632dcb96baSThomas Gleixner 		 * mutex_release() has to happen in the fast path of
35642dcb96baSThomas Gleixner 		 * unlock_sock_fast().
35658a74ad60SEric Dumazet 		 */
35668a74ad60SEric Dumazet 		return false;
35672dcb96baSThomas Gleixner 	}
35688a74ad60SEric Dumazet 
35698a74ad60SEric Dumazet 	__lock_sock(sk);
35708a74ad60SEric Dumazet 	sk->sk_lock.owned = 1;
357112f4bd86SPaolo Abeni 	__acquire(&sk->sk_lock.slock);
35722dcb96baSThomas Gleixner 	spin_unlock_bh(&sk->sk_lock.slock);
35738a74ad60SEric Dumazet 	return true;
35748a74ad60SEric Dumazet }
357549054556SPaolo Abeni EXPORT_SYMBOL(__lock_sock_fast);
35768a74ad60SEric Dumazet 
sock_gettstamp(struct socket * sock,void __user * userstamp,bool timeval,bool time32)3577c7cbdbf2SArnd Bergmann int sock_gettstamp(struct socket *sock, void __user *userstamp,
3578c7cbdbf2SArnd Bergmann 		   bool timeval, bool time32)
35791da177e4SLinus Torvalds {
3580c7cbdbf2SArnd Bergmann 	struct sock *sk = sock->sk;
3581c7cbdbf2SArnd Bergmann 	struct timespec64 ts;
35829dae3497SYafang Shao 
358320d49473SPatrick Ohly 	sock_enable_timestamp(sk, SOCK_TIMESTAMP);
3584c7cbdbf2SArnd Bergmann 	ts = ktime_to_timespec64(sock_read_timestamp(sk));
3585ae40eb1eSEric Dumazet 	if (ts.tv_sec == -1)
3586ae40eb1eSEric Dumazet 		return -ENOENT;
3587ae40eb1eSEric Dumazet 	if (ts.tv_sec == 0) {
35883a0ed3e9SDeepa Dinamani 		ktime_t kt = ktime_get_real();
3589f95f96a4SYueHaibing 		sock_write_timestamp(sk, kt);
3590c7cbdbf2SArnd Bergmann 		ts = ktime_to_timespec64(kt);
3591ae40eb1eSEric Dumazet 	}
3592c7cbdbf2SArnd Bergmann 
3593c7cbdbf2SArnd Bergmann 	if (timeval)
3594c7cbdbf2SArnd Bergmann 		ts.tv_nsec /= 1000;
3595c7cbdbf2SArnd Bergmann 
3596c7cbdbf2SArnd Bergmann #ifdef CONFIG_COMPAT_32BIT_TIME
3597c7cbdbf2SArnd Bergmann 	if (time32)
3598c7cbdbf2SArnd Bergmann 		return put_old_timespec32(&ts, userstamp);
3599c7cbdbf2SArnd Bergmann #endif
3600c7cbdbf2SArnd Bergmann #ifdef CONFIG_SPARC64
3601c7cbdbf2SArnd Bergmann 	/* beware of padding in sparc64 timeval */
3602c7cbdbf2SArnd Bergmann 	if (timeval && !in_compat_syscall()) {
3603c7cbdbf2SArnd Bergmann 		struct __kernel_old_timeval __user tv = {
3604c98f4822SStephen Rothwell 			.tv_sec = ts.tv_sec,
3605c98f4822SStephen Rothwell 			.tv_usec = ts.tv_nsec,
3606c7cbdbf2SArnd Bergmann 		};
3607c98f4822SStephen Rothwell 		if (copy_to_user(userstamp, &tv, sizeof(tv)))
3608c7cbdbf2SArnd Bergmann 			return -EFAULT;
3609c7cbdbf2SArnd Bergmann 		return 0;
3610ae40eb1eSEric Dumazet 	}
3611c7cbdbf2SArnd Bergmann #endif
3612c7cbdbf2SArnd Bergmann 	return put_timespec64(&ts, userstamp);
3613c7cbdbf2SArnd Bergmann }
3614c7cbdbf2SArnd Bergmann EXPORT_SYMBOL(sock_gettstamp);
3615ae40eb1eSEric Dumazet 
sock_enable_timestamp(struct sock * sk,enum sock_flags flag)3616193d357dSAlexey Dobriyan void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
36171da177e4SLinus Torvalds {
361820d49473SPatrick Ohly 	if (!sock_flag(sk, flag)) {
361908e29af3SEric Dumazet 		unsigned long previous_flags = sk->sk_flags;
362008e29af3SEric Dumazet 
362120d49473SPatrick Ohly 		sock_set_flag(sk, flag);
362220d49473SPatrick Ohly 		/*
362320d49473SPatrick Ohly 		 * we just set one of the two flags which require net
362420d49473SPatrick Ohly 		 * time stamping, but time stamping might have been on
362520d49473SPatrick Ohly 		 * already because of the other one
362620d49473SPatrick Ohly 		 */
3627080a270fSHannes Frederic Sowa 		if (sock_needs_netstamp(sk) &&
3628080a270fSHannes Frederic Sowa 		    !(previous_flags & SK_FLAGS_TIMESTAMP))
36291da177e4SLinus Torvalds 			net_enable_timestamp();
36301da177e4SLinus Torvalds 	}
36311da177e4SLinus Torvalds }
36321da177e4SLinus Torvalds 
sock_recv_errqueue(struct sock * sk,struct msghdr * msg,int len,int level,int type)3633cb820f8eSRichard Cochran int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
3634cb820f8eSRichard Cochran 		       int level, int type)
3635cb820f8eSRichard Cochran {
3636cb820f8eSRichard Cochran 	struct sock_exterr_skb *serr;
3637364a9e93SWillem de Bruijn 	struct sk_buff *skb;
3638cb820f8eSRichard Cochran 	int copied, err;
3639cb820f8eSRichard Cochran 
3640cb820f8eSRichard Cochran 	err = -EAGAIN;
3641364a9e93SWillem de Bruijn 	skb = sock_dequeue_err_skb(sk);
3642cb820f8eSRichard Cochran 	if (skb == NULL)
3643cb820f8eSRichard Cochran 		goto out;
3644cb820f8eSRichard Cochran 
3645cb820f8eSRichard Cochran 	copied = skb->len;
3646cb820f8eSRichard Cochran 	if (copied > len) {
3647cb820f8eSRichard Cochran 		msg->msg_flags |= MSG_TRUNC;
3648cb820f8eSRichard Cochran 		copied = len;
3649cb820f8eSRichard Cochran 	}
365051f3d02bSDavid S. Miller 	err = skb_copy_datagram_msg(skb, 0, msg, copied);
3651cb820f8eSRichard Cochran 	if (err)
3652cb820f8eSRichard Cochran 		goto out_free_skb;
3653cb820f8eSRichard Cochran 
3654cb820f8eSRichard Cochran 	sock_recv_timestamp(msg, sk, skb);
3655cb820f8eSRichard Cochran 
3656cb820f8eSRichard Cochran 	serr = SKB_EXT_ERR(skb);
3657cb820f8eSRichard Cochran 	put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
3658cb820f8eSRichard Cochran 
3659cb820f8eSRichard Cochran 	msg->msg_flags |= MSG_ERRQUEUE;
3660cb820f8eSRichard Cochran 	err = copied;
3661cb820f8eSRichard Cochran 
3662cb820f8eSRichard Cochran out_free_skb:
3663cb820f8eSRichard Cochran 	kfree_skb(skb);
3664cb820f8eSRichard Cochran out:
3665cb820f8eSRichard Cochran 	return err;
3666cb820f8eSRichard Cochran }
3667cb820f8eSRichard Cochran EXPORT_SYMBOL(sock_recv_errqueue);
3668cb820f8eSRichard Cochran 
36691da177e4SLinus Torvalds /*
36701da177e4SLinus Torvalds  *	Get a socket option on an socket.
36711da177e4SLinus Torvalds  *
36721da177e4SLinus Torvalds  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
36731da177e4SLinus Torvalds  *	asynchronous errors should be reported by getsockopt. We assume
36741da177e4SLinus Torvalds  *	this means if you specify SO_ERROR (otherwise whats the point of it).
36751da177e4SLinus Torvalds  */
sock_common_getsockopt(struct socket * sock,int level,int optname,char __user * optval,int __user * optlen)36761da177e4SLinus Torvalds int sock_common_getsockopt(struct socket *sock, int level, int optname,
36771da177e4SLinus Torvalds 			   char __user *optval, int __user *optlen)
36781da177e4SLinus Torvalds {
36791da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
36801da177e4SLinus Torvalds 
3681364f997bSKuniyuki Iwashima 	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
3682364f997bSKuniyuki Iwashima 	return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen);
36831da177e4SLinus Torvalds }
36841da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_getsockopt);
36851da177e4SLinus Torvalds 
sock_common_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)36861b784140SYing Xue int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
36871b784140SYing Xue 			int flags)
36881da177e4SLinus Torvalds {
36891da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
36901da177e4SLinus Torvalds 	int addr_len = 0;
36911da177e4SLinus Torvalds 	int err;
36921da177e4SLinus Torvalds 
3693ec095263SOliver Hartkopp 	err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len);
36941da177e4SLinus Torvalds 	if (err >= 0)
36951da177e4SLinus Torvalds 		msg->msg_namelen = addr_len;
36961da177e4SLinus Torvalds 	return err;
36971da177e4SLinus Torvalds }
36981da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_recvmsg);
36991da177e4SLinus Torvalds 
37001da177e4SLinus Torvalds /*
37011da177e4SLinus Torvalds  *	Set socket options on an inet socket.
37021da177e4SLinus Torvalds  */
sock_common_setsockopt(struct socket * sock,int level,int optname,sockptr_t optval,unsigned int optlen)37031da177e4SLinus Torvalds int sock_common_setsockopt(struct socket *sock, int level, int optname,
3704a7b75c5aSChristoph Hellwig 			   sockptr_t optval, unsigned int optlen)
37051da177e4SLinus Torvalds {
37061da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
37071da177e4SLinus Torvalds 
3708364f997bSKuniyuki Iwashima 	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
3709364f997bSKuniyuki Iwashima 	return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen);
37101da177e4SLinus Torvalds }
37111da177e4SLinus Torvalds EXPORT_SYMBOL(sock_common_setsockopt);
37121da177e4SLinus Torvalds 
sk_common_release(struct sock * sk)37131da177e4SLinus Torvalds void sk_common_release(struct sock *sk)
37141da177e4SLinus Torvalds {
37151da177e4SLinus Torvalds 	if (sk->sk_prot->destroy)
37161da177e4SLinus Torvalds 		sk->sk_prot->destroy(sk);
37171da177e4SLinus Torvalds 
37181da177e4SLinus Torvalds 	/*
3719645f0897SMiaohe Lin 	 * Observation: when sk_common_release is called, processes have
37201da177e4SLinus Torvalds 	 * no access to socket. But net still has.
37211da177e4SLinus Torvalds 	 * Step one, detach it from networking:
37221da177e4SLinus Torvalds 	 *
37231da177e4SLinus Torvalds 	 * A. Remove from hash tables.
37241da177e4SLinus Torvalds 	 */
37251da177e4SLinus Torvalds 
37261da177e4SLinus Torvalds 	sk->sk_prot->unhash(sk);
37271da177e4SLinus Torvalds 
37281da177e4SLinus Torvalds 	/*
37291da177e4SLinus Torvalds 	 * In this point socket cannot receive new packets, but it is possible
37301da177e4SLinus Torvalds 	 * that some packets are in flight because some CPU runs receiver and
37311da177e4SLinus Torvalds 	 * did hash table lookup before we unhashed socket. They will achieve
37321da177e4SLinus Torvalds 	 * receive queue and will be purged by socket destructor.
37331da177e4SLinus Torvalds 	 *
37341da177e4SLinus Torvalds 	 * Also we still have packets pending on receive queue and probably,
37351da177e4SLinus Torvalds 	 * our own packets waiting in device queues. sock_destroy will drain
37361da177e4SLinus Torvalds 	 * receive queue, but transmitted packets will delay socket destruction
37371da177e4SLinus Torvalds 	 * until the last reference will be released.
37381da177e4SLinus Torvalds 	 */
37391da177e4SLinus Torvalds 
37401da177e4SLinus Torvalds 	sock_orphan(sk);
37411da177e4SLinus Torvalds 
37421da177e4SLinus Torvalds 	xfrm_sk_free_policy(sk);
37431da177e4SLinus Torvalds 
37441da177e4SLinus Torvalds 	sock_put(sk);
37451da177e4SLinus Torvalds }
37461da177e4SLinus Torvalds EXPORT_SYMBOL(sk_common_release);
37471da177e4SLinus Torvalds 
sk_get_meminfo(const struct sock * sk,u32 * mem)3748a2d133b1SJosh Hunt void sk_get_meminfo(const struct sock *sk, u32 *mem)
3749a2d133b1SJosh Hunt {
3750a2d133b1SJosh Hunt 	memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3751a2d133b1SJosh Hunt 
3752a2d133b1SJosh Hunt 	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3753ebb3b78dSEric Dumazet 	mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3754a2d133b1SJosh Hunt 	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3755e292f05eSEric Dumazet 	mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
375666d58f04SEric Dumazet 	mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
3757ab4e846aSEric Dumazet 	mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3758a2d133b1SJosh Hunt 	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
375970c26558SEric Dumazet 	mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3760a2d133b1SJosh Hunt 	mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3761a2d133b1SJosh Hunt }
3762a2d133b1SJosh Hunt 
376313ff3d6fSPavel Emelyanov #ifdef CONFIG_PROC_FS
376413ff3d6fSPavel Emelyanov static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
376570ee1159SPavel Emelyanov 
sock_prot_inuse_get(struct net * net,struct proto * prot)376670ee1159SPavel Emelyanov int sock_prot_inuse_get(struct net *net, struct proto *prot)
376770ee1159SPavel Emelyanov {
376870ee1159SPavel Emelyanov 	int cpu, idx = prot->inuse_idx;
376970ee1159SPavel Emelyanov 	int res = 0;
377070ee1159SPavel Emelyanov 
377170ee1159SPavel Emelyanov 	for_each_possible_cpu(cpu)
377208fc7f81STonghao Zhang 		res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
377370ee1159SPavel Emelyanov 
377470ee1159SPavel Emelyanov 	return res >= 0 ? res : 0;
377570ee1159SPavel Emelyanov }
377670ee1159SPavel Emelyanov EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
377770ee1159SPavel Emelyanov 
sock_inuse_get(struct net * net)3778648845abSTonghao Zhang int sock_inuse_get(struct net *net)
3779648845abSTonghao Zhang {
3780648845abSTonghao Zhang 	int cpu, res = 0;
3781648845abSTonghao Zhang 
3782648845abSTonghao Zhang 	for_each_possible_cpu(cpu)
37834199bae1SEric Dumazet 		res += per_cpu_ptr(net->core.prot_inuse, cpu)->all;
3784648845abSTonghao Zhang 
3785648845abSTonghao Zhang 	return res;
3786648845abSTonghao Zhang }
3787648845abSTonghao Zhang 
3788648845abSTonghao Zhang EXPORT_SYMBOL_GPL(sock_inuse_get);
3789648845abSTonghao Zhang 
sock_inuse_init_net(struct net * net)37902c8c1e72SAlexey Dobriyan static int __net_init sock_inuse_init_net(struct net *net)
379170ee1159SPavel Emelyanov {
379208fc7f81STonghao Zhang 	net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3793648845abSTonghao Zhang 	if (net->core.prot_inuse == NULL)
3794648845abSTonghao Zhang 		return -ENOMEM;
3795648845abSTonghao Zhang 	return 0;
379670ee1159SPavel Emelyanov }
379770ee1159SPavel Emelyanov 
sock_inuse_exit_net(struct net * net)37982c8c1e72SAlexey Dobriyan static void __net_exit sock_inuse_exit_net(struct net *net)
379970ee1159SPavel Emelyanov {
380008fc7f81STonghao Zhang 	free_percpu(net->core.prot_inuse);
380170ee1159SPavel Emelyanov }
380270ee1159SPavel Emelyanov 
380370ee1159SPavel Emelyanov static struct pernet_operations net_inuse_ops = {
380470ee1159SPavel Emelyanov 	.init = sock_inuse_init_net,
380570ee1159SPavel Emelyanov 	.exit = sock_inuse_exit_net,
380670ee1159SPavel Emelyanov };
380770ee1159SPavel Emelyanov 
net_inuse_init(void)380870ee1159SPavel Emelyanov static __init int net_inuse_init(void)
380970ee1159SPavel Emelyanov {
381070ee1159SPavel Emelyanov 	if (register_pernet_subsys(&net_inuse_ops))
381170ee1159SPavel Emelyanov 		panic("Cannot initialize net inuse counters");
381270ee1159SPavel Emelyanov 
381370ee1159SPavel Emelyanov 	return 0;
381470ee1159SPavel Emelyanov }
381570ee1159SPavel Emelyanov 
381670ee1159SPavel Emelyanov core_initcall(net_inuse_init);
381713ff3d6fSPavel Emelyanov 
assign_proto_idx(struct proto * prot)3818b45ce321Szhanglin static int assign_proto_idx(struct proto *prot)
381913ff3d6fSPavel Emelyanov {
382013ff3d6fSPavel Emelyanov 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
382113ff3d6fSPavel Emelyanov 
382213ff3d6fSPavel Emelyanov 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3823e005d193SJoe Perches 		pr_err("PROTO_INUSE_NR exhausted\n");
3824b45ce321Szhanglin 		return -ENOSPC;
382513ff3d6fSPavel Emelyanov 	}
382613ff3d6fSPavel Emelyanov 
382713ff3d6fSPavel Emelyanov 	set_bit(prot->inuse_idx, proto_inuse_idx);
3828b45ce321Szhanglin 	return 0;
382913ff3d6fSPavel Emelyanov }
383013ff3d6fSPavel Emelyanov 
release_proto_idx(struct proto * prot)383113ff3d6fSPavel Emelyanov static void release_proto_idx(struct proto *prot)
383213ff3d6fSPavel Emelyanov {
383313ff3d6fSPavel Emelyanov 	if (prot->inuse_idx != PROTO_INUSE_NR - 1)
383413ff3d6fSPavel Emelyanov 		clear_bit(prot->inuse_idx, proto_inuse_idx);
383513ff3d6fSPavel Emelyanov }
383613ff3d6fSPavel Emelyanov #else
assign_proto_idx(struct proto * prot)3837b45ce321Szhanglin static inline int assign_proto_idx(struct proto *prot)
383813ff3d6fSPavel Emelyanov {
3839b45ce321Szhanglin 	return 0;
384013ff3d6fSPavel Emelyanov }
384113ff3d6fSPavel Emelyanov 
release_proto_idx(struct proto * prot)384213ff3d6fSPavel Emelyanov static inline void release_proto_idx(struct proto *prot)
384313ff3d6fSPavel Emelyanov {
384413ff3d6fSPavel Emelyanov }
3845648845abSTonghao Zhang 
384613ff3d6fSPavel Emelyanov #endif
384713ff3d6fSPavel Emelyanov 
tw_prot_cleanup(struct timewait_sock_ops * twsk_prot)38480f5907afSMiaohe Lin static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
38490f5907afSMiaohe Lin {
38500f5907afSMiaohe Lin 	if (!twsk_prot)
38510f5907afSMiaohe Lin 		return;
38520f5907afSMiaohe Lin 	kfree(twsk_prot->twsk_slab_name);
38530f5907afSMiaohe Lin 	twsk_prot->twsk_slab_name = NULL;
38540f5907afSMiaohe Lin 	kmem_cache_destroy(twsk_prot->twsk_slab);
38550f5907afSMiaohe Lin 	twsk_prot->twsk_slab = NULL;
38560f5907afSMiaohe Lin }
38570f5907afSMiaohe Lin 
tw_prot_init(const struct proto * prot)3858b80350f3STonghao Zhang static int tw_prot_init(const struct proto *prot)
3859b80350f3STonghao Zhang {
3860b80350f3STonghao Zhang 	struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
3861b80350f3STonghao Zhang 
3862b80350f3STonghao Zhang 	if (!twsk_prot)
3863b80350f3STonghao Zhang 		return 0;
3864b80350f3STonghao Zhang 
3865b80350f3STonghao Zhang 	twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
3866b80350f3STonghao Zhang 					      prot->name);
3867b80350f3STonghao Zhang 	if (!twsk_prot->twsk_slab_name)
3868b80350f3STonghao Zhang 		return -ENOMEM;
3869b80350f3STonghao Zhang 
3870b80350f3STonghao Zhang 	twsk_prot->twsk_slab =
3871b80350f3STonghao Zhang 		kmem_cache_create(twsk_prot->twsk_slab_name,
3872b80350f3STonghao Zhang 				  twsk_prot->twsk_obj_size, 0,
3873b80350f3STonghao Zhang 				  SLAB_ACCOUNT | prot->slab_flags,
3874b80350f3STonghao Zhang 				  NULL);
3875b80350f3STonghao Zhang 	if (!twsk_prot->twsk_slab) {
3876b80350f3STonghao Zhang 		pr_crit("%s: Can't create timewait sock SLAB cache!\n",
3877b80350f3STonghao Zhang 			prot->name);
3878b80350f3STonghao Zhang 		return -ENOMEM;
3879b80350f3STonghao Zhang 	}
3880b80350f3STonghao Zhang 
3881b80350f3STonghao Zhang 	return 0;
3882b80350f3STonghao Zhang }
3883b80350f3STonghao Zhang 
req_prot_cleanup(struct request_sock_ops * rsk_prot)38840159dfd3SEric Dumazet static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
38850159dfd3SEric Dumazet {
38860159dfd3SEric Dumazet 	if (!rsk_prot)
38870159dfd3SEric Dumazet 		return;
38880159dfd3SEric Dumazet 	kfree(rsk_prot->slab_name);
38890159dfd3SEric Dumazet 	rsk_prot->slab_name = NULL;
38900159dfd3SEric Dumazet 	kmem_cache_destroy(rsk_prot->slab);
38910159dfd3SEric Dumazet 	rsk_prot->slab = NULL;
38920159dfd3SEric Dumazet }
38930159dfd3SEric Dumazet 
req_prot_init(const struct proto * prot)38940159dfd3SEric Dumazet static int req_prot_init(const struct proto *prot)
38950159dfd3SEric Dumazet {
38960159dfd3SEric Dumazet 	struct request_sock_ops *rsk_prot = prot->rsk_prot;
38970159dfd3SEric Dumazet 
38980159dfd3SEric Dumazet 	if (!rsk_prot)
38990159dfd3SEric Dumazet 		return 0;
39000159dfd3SEric Dumazet 
39010159dfd3SEric Dumazet 	rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
39020159dfd3SEric Dumazet 					prot->name);
39030159dfd3SEric Dumazet 	if (!rsk_prot->slab_name)
39040159dfd3SEric Dumazet 		return -ENOMEM;
39050159dfd3SEric Dumazet 
39060159dfd3SEric Dumazet 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
39070159dfd3SEric Dumazet 					   rsk_prot->obj_size, 0,
3908e699e2c6SShakeel Butt 					   SLAB_ACCOUNT | prot->slab_flags,
3909e699e2c6SShakeel Butt 					   NULL);
39100159dfd3SEric Dumazet 
39110159dfd3SEric Dumazet 	if (!rsk_prot->slab) {
39120159dfd3SEric Dumazet 		pr_crit("%s: Can't create request sock SLAB cache!\n",
39130159dfd3SEric Dumazet 			prot->name);
39140159dfd3SEric Dumazet 		return -ENOMEM;
39150159dfd3SEric Dumazet 	}
39160159dfd3SEric Dumazet 	return 0;
39170159dfd3SEric Dumazet }
39180159dfd3SEric Dumazet 
proto_register(struct proto * prot,int alloc_slab)39191da177e4SLinus Torvalds int proto_register(struct proto *prot, int alloc_slab)
39201da177e4SLinus Torvalds {
3921b45ce321Szhanglin 	int ret = -ENOBUFS;
3922b45ce321Szhanglin 
3923f20cfd66SEric Dumazet 	if (prot->memory_allocated && !prot->sysctl_mem) {
3924f20cfd66SEric Dumazet 		pr_err("%s: missing sysctl_mem\n", prot->name);
3925f20cfd66SEric Dumazet 		return -EINVAL;
3926f20cfd66SEric Dumazet 	}
39270defbb0aSEric Dumazet 	if (prot->memory_allocated && !prot->per_cpu_fw_alloc) {
39280defbb0aSEric Dumazet 		pr_err("%s: missing per_cpu_fw_alloc\n", prot->name);
39290defbb0aSEric Dumazet 		return -EINVAL;
39300defbb0aSEric Dumazet 	}
39311da177e4SLinus Torvalds 	if (alloc_slab) {
393230c2c9f1SDavid Windsor 		prot->slab = kmem_cache_create_usercopy(prot->name,
393330c2c9f1SDavid Windsor 					prot->obj_size, 0,
3934e699e2c6SShakeel Butt 					SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3935e699e2c6SShakeel Butt 					prot->slab_flags,
3936289a4860SKees Cook 					prot->useroffset, prot->usersize,
3937271b72c7SEric Dumazet 					NULL);
39381da177e4SLinus Torvalds 
39391da177e4SLinus Torvalds 		if (prot->slab == NULL) {
3940e005d193SJoe Perches 			pr_crit("%s: Can't create sock SLAB cache!\n",
39411da177e4SLinus Torvalds 				prot->name);
394260e7663dSPavel Emelyanov 			goto out;
39431da177e4SLinus Torvalds 		}
39442e6599cbSArnaldo Carvalho de Melo 
39450159dfd3SEric Dumazet 		if (req_prot_init(prot))
39460159dfd3SEric Dumazet 			goto out_free_request_sock_slab;
39478feaf0c0SArnaldo Carvalho de Melo 
3948b80350f3STonghao Zhang 		if (tw_prot_init(prot))
39490f5907afSMiaohe Lin 			goto out_free_timewait_sock_slab;
39508feaf0c0SArnaldo Carvalho de Melo 	}
39511da177e4SLinus Torvalds 
395236b77a52SGlauber Costa 	mutex_lock(&proto_list_mutex);
3953b45ce321Szhanglin 	ret = assign_proto_idx(prot);
3954b45ce321Szhanglin 	if (ret) {
395536b77a52SGlauber Costa 		mutex_unlock(&proto_list_mutex);
39560f5907afSMiaohe Lin 		goto out_free_timewait_sock_slab;
3957b45ce321Szhanglin 	}
3958b45ce321Szhanglin 	list_add(&prot->node, &proto_list);
3959b45ce321Szhanglin 	mutex_unlock(&proto_list_mutex);
3960b45ce321Szhanglin 	return ret;
3961b733c007SPavel Emelyanov 
39620f5907afSMiaohe Lin out_free_timewait_sock_slab:
3963ed744d81STonghao Zhang 	if (alloc_slab)
39640f5907afSMiaohe Lin 		tw_prot_cleanup(prot->twsk_prot);
39658feaf0c0SArnaldo Carvalho de Melo out_free_request_sock_slab:
3966b45ce321Szhanglin 	if (alloc_slab) {
39670159dfd3SEric Dumazet 		req_prot_cleanup(prot->rsk_prot);
39680159dfd3SEric Dumazet 
39692e6599cbSArnaldo Carvalho de Melo 		kmem_cache_destroy(prot->slab);
39702e6599cbSArnaldo Carvalho de Melo 		prot->slab = NULL;
3971b45ce321Szhanglin 	}
3972b733c007SPavel Emelyanov out:
3973b45ce321Szhanglin 	return ret;
39741da177e4SLinus Torvalds }
39751da177e4SLinus Torvalds EXPORT_SYMBOL(proto_register);
39761da177e4SLinus Torvalds 
proto_unregister(struct proto * prot)39771da177e4SLinus Torvalds void proto_unregister(struct proto *prot)
39781da177e4SLinus Torvalds {
397936b77a52SGlauber Costa 	mutex_lock(&proto_list_mutex);
398013ff3d6fSPavel Emelyanov 	release_proto_idx(prot);
39810a3f4358SPatrick McHardy 	list_del(&prot->node);
398236b77a52SGlauber Costa 	mutex_unlock(&proto_list_mutex);
39831da177e4SLinus Torvalds 
39841da177e4SLinus Torvalds 	kmem_cache_destroy(prot->slab);
39851da177e4SLinus Torvalds 	prot->slab = NULL;
39861da177e4SLinus Torvalds 
39870159dfd3SEric Dumazet 	req_prot_cleanup(prot->rsk_prot);
39880f5907afSMiaohe Lin 	tw_prot_cleanup(prot->twsk_prot);
39891da177e4SLinus Torvalds }
39901da177e4SLinus Torvalds EXPORT_SYMBOL(proto_unregister);
39911da177e4SLinus Torvalds 
sock_load_diag_module(int family,int protocol)3992bf2ae2e4SXin Long int sock_load_diag_module(int family, int protocol)
3993bf2ae2e4SXin Long {
3994bf2ae2e4SXin Long 	if (!protocol) {
3995bf2ae2e4SXin Long 		if (!sock_is_registered(family))
3996bf2ae2e4SXin Long 			return -ENOENT;
3997bf2ae2e4SXin Long 
3998bf2ae2e4SXin Long 		return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3999bf2ae2e4SXin Long 				      NETLINK_SOCK_DIAG, family);
4000bf2ae2e4SXin Long 	}
4001bf2ae2e4SXin Long 
4002bf2ae2e4SXin Long #ifdef CONFIG_INET
4003bf2ae2e4SXin Long 	if (family == AF_INET &&
4004c34c1287SAndrei Vagin 	    protocol != IPPROTO_RAW &&
40053f935c75SPaolo Abeni 	    protocol < MAX_INET_PROTOS &&
4006bf2ae2e4SXin Long 	    !rcu_access_pointer(inet_protos[protocol]))
4007bf2ae2e4SXin Long 		return -ENOENT;
4008bf2ae2e4SXin Long #endif
4009bf2ae2e4SXin Long 
4010bf2ae2e4SXin Long 	return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
4011bf2ae2e4SXin Long 			      NETLINK_SOCK_DIAG, family, protocol);
4012bf2ae2e4SXin Long }
4013bf2ae2e4SXin Long EXPORT_SYMBOL(sock_load_diag_module);
4014bf2ae2e4SXin Long 
40151da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
proto_seq_start(struct seq_file * seq,loff_t * pos)40161da177e4SLinus Torvalds static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
401736b77a52SGlauber Costa 	__acquires(proto_list_mutex)
40181da177e4SLinus Torvalds {
401936b77a52SGlauber Costa 	mutex_lock(&proto_list_mutex);
402060f0438aSPavel Emelianov 	return seq_list_start_head(&proto_list, *pos);
40211da177e4SLinus Torvalds }
40221da177e4SLinus Torvalds 
proto_seq_next(struct seq_file * seq,void * v,loff_t * pos)40231da177e4SLinus Torvalds static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
40241da177e4SLinus Torvalds {
402560f0438aSPavel Emelianov 	return seq_list_next(v, &proto_list, pos);
40261da177e4SLinus Torvalds }
40271da177e4SLinus Torvalds 
proto_seq_stop(struct seq_file * seq,void * v)40281da177e4SLinus Torvalds static void proto_seq_stop(struct seq_file *seq, void *v)
402936b77a52SGlauber Costa 	__releases(proto_list_mutex)
40301da177e4SLinus Torvalds {
403136b77a52SGlauber Costa 	mutex_unlock(&proto_list_mutex);
40321da177e4SLinus Torvalds }
40331da177e4SLinus Torvalds 
proto_method_implemented(const void * method)40341da177e4SLinus Torvalds static char proto_method_implemented(const void *method)
40351da177e4SLinus Torvalds {
40361da177e4SLinus Torvalds 	return method == NULL ? 'n' : 'y';
40371da177e4SLinus Torvalds }
sock_prot_memory_allocated(struct proto * proto)4038180d8cd9SGlauber Costa static long sock_prot_memory_allocated(struct proto *proto)
4039180d8cd9SGlauber Costa {
4040180d8cd9SGlauber Costa 	return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
4041180d8cd9SGlauber Costa }
4042180d8cd9SGlauber Costa 
sock_prot_memory_pressure(struct proto * proto)40437a512eb8SAlexey Dobriyan static const char *sock_prot_memory_pressure(struct proto *proto)
4044180d8cd9SGlauber Costa {
4045180d8cd9SGlauber Costa 	return proto->memory_pressure != NULL ?
4046180d8cd9SGlauber Costa 	proto_memory_pressure(proto) ? "yes" : "no" : "NI";
4047180d8cd9SGlauber Costa }
40481da177e4SLinus Torvalds 
proto_seq_printf(struct seq_file * seq,struct proto * proto)40491da177e4SLinus Torvalds static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
40501da177e4SLinus Torvalds {
4051180d8cd9SGlauber Costa 
40528d987e5cSEric Dumazet 	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
4053dc97391eSDavid Howells 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
40541da177e4SLinus Torvalds 		   proto->name,
40551da177e4SLinus Torvalds 		   proto->obj_size,
405614e943dbSEric Dumazet 		   sock_prot_inuse_get(seq_file_net(seq), proto),
4057180d8cd9SGlauber Costa 		   sock_prot_memory_allocated(proto),
4058180d8cd9SGlauber Costa 		   sock_prot_memory_pressure(proto),
40591da177e4SLinus Torvalds 		   proto->max_header,
40601da177e4SLinus Torvalds 		   proto->slab == NULL ? "no" : "yes",
40611da177e4SLinus Torvalds 		   module_name(proto->owner),
40621da177e4SLinus Torvalds 		   proto_method_implemented(proto->close),
40631da177e4SLinus Torvalds 		   proto_method_implemented(proto->connect),
40641da177e4SLinus Torvalds 		   proto_method_implemented(proto->disconnect),
40651da177e4SLinus Torvalds 		   proto_method_implemented(proto->accept),
40661da177e4SLinus Torvalds 		   proto_method_implemented(proto->ioctl),
40671da177e4SLinus Torvalds 		   proto_method_implemented(proto->init),
40681da177e4SLinus Torvalds 		   proto_method_implemented(proto->destroy),
40691da177e4SLinus Torvalds 		   proto_method_implemented(proto->shutdown),
40701da177e4SLinus Torvalds 		   proto_method_implemented(proto->setsockopt),
40711da177e4SLinus Torvalds 		   proto_method_implemented(proto->getsockopt),
40721da177e4SLinus Torvalds 		   proto_method_implemented(proto->sendmsg),
40731da177e4SLinus Torvalds 		   proto_method_implemented(proto->recvmsg),
40741da177e4SLinus Torvalds 		   proto_method_implemented(proto->bind),
40751da177e4SLinus Torvalds 		   proto_method_implemented(proto->backlog_rcv),
40761da177e4SLinus Torvalds 		   proto_method_implemented(proto->hash),
40771da177e4SLinus Torvalds 		   proto_method_implemented(proto->unhash),
40781da177e4SLinus Torvalds 		   proto_method_implemented(proto->get_port),
40791da177e4SLinus Torvalds 		   proto_method_implemented(proto->enter_memory_pressure));
40801da177e4SLinus Torvalds }
40811da177e4SLinus Torvalds 
proto_seq_show(struct seq_file * seq,void * v)40821da177e4SLinus Torvalds static int proto_seq_show(struct seq_file *seq, void *v)
40831da177e4SLinus Torvalds {
408460f0438aSPavel Emelianov 	if (v == &proto_list)
40851da177e4SLinus Torvalds 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
40861da177e4SLinus Torvalds 			   "protocol",
40871da177e4SLinus Torvalds 			   "size",
40881da177e4SLinus Torvalds 			   "sockets",
40891da177e4SLinus Torvalds 			   "memory",
40901da177e4SLinus Torvalds 			   "press",
40911da177e4SLinus Torvalds 			   "maxhdr",
40921da177e4SLinus Torvalds 			   "slab",
40931da177e4SLinus Torvalds 			   "module",
4094dc97391eSDavid Howells 			   "cl co di ac io in de sh ss gs se re bi br ha uh gp em\n");
40951da177e4SLinus Torvalds 	else
409660f0438aSPavel Emelianov 		proto_seq_printf(seq, list_entry(v, struct proto, node));
40971da177e4SLinus Torvalds 	return 0;
40981da177e4SLinus Torvalds }
40991da177e4SLinus Torvalds 
4100f690808eSStephen Hemminger static const struct seq_operations proto_seq_ops = {
41011da177e4SLinus Torvalds 	.start  = proto_seq_start,
41021da177e4SLinus Torvalds 	.next   = proto_seq_next,
41031da177e4SLinus Torvalds 	.stop   = proto_seq_stop,
41041da177e4SLinus Torvalds 	.show   = proto_seq_show,
41051da177e4SLinus Torvalds };
41061da177e4SLinus Torvalds 
proto_init_net(struct net * net)410714e943dbSEric Dumazet static __net_init int proto_init_net(struct net *net)
410814e943dbSEric Dumazet {
4109c3506372SChristoph Hellwig 	if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
4110c3506372SChristoph Hellwig 			sizeof(struct seq_net_private)))
411114e943dbSEric Dumazet 		return -ENOMEM;
411214e943dbSEric Dumazet 
411314e943dbSEric Dumazet 	return 0;
411414e943dbSEric Dumazet }
411514e943dbSEric Dumazet 
proto_exit_net(struct net * net)411614e943dbSEric Dumazet static __net_exit void proto_exit_net(struct net *net)
411714e943dbSEric Dumazet {
4118ece31ffdSGao feng 	remove_proc_entry("protocols", net->proc_net);
411914e943dbSEric Dumazet }
412014e943dbSEric Dumazet 
412114e943dbSEric Dumazet 
412214e943dbSEric Dumazet static __net_initdata struct pernet_operations proto_net_ops = {
412314e943dbSEric Dumazet 	.init = proto_init_net,
412414e943dbSEric Dumazet 	.exit = proto_exit_net,
41251da177e4SLinus Torvalds };
41261da177e4SLinus Torvalds 
proto_init(void)41271da177e4SLinus Torvalds static int __init proto_init(void)
41281da177e4SLinus Torvalds {
412914e943dbSEric Dumazet 	return register_pernet_subsys(&proto_net_ops);
41301da177e4SLinus Torvalds }
41311da177e4SLinus Torvalds 
41321da177e4SLinus Torvalds subsys_initcall(proto_init);
41331da177e4SLinus Torvalds 
41341da177e4SLinus Torvalds #endif /* PROC_FS */
41357db6b048SSridhar Samudrala 
41367db6b048SSridhar Samudrala #ifdef CONFIG_NET_RX_BUSY_POLL
sk_busy_loop_end(void * p,unsigned long start_time)41377db6b048SSridhar Samudrala bool sk_busy_loop_end(void *p, unsigned long start_time)
41387db6b048SSridhar Samudrala {
41397db6b048SSridhar Samudrala 	struct sock *sk = p;
41407db6b048SSridhar Samudrala 
4141ef8ad307SEric Dumazet 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
4142ef8ad307SEric Dumazet 		return true;
4143ef8ad307SEric Dumazet 
4144ef8ad307SEric Dumazet 	if (sk_is_udp(sk) &&
4145ef8ad307SEric Dumazet 	    !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
4146ef8ad307SEric Dumazet 		return true;
4147ef8ad307SEric Dumazet 
4148ef8ad307SEric Dumazet 	return sk_busy_loop_timeout(sk, start_time);
41497db6b048SSridhar Samudrala }
41507db6b048SSridhar Samudrala EXPORT_SYMBOL(sk_busy_loop_end);
41517db6b048SSridhar Samudrala #endif /* CONFIG_NET_RX_BUSY_POLL */
4152c0425a42SChristoph Hellwig 
sock_bind_add(struct sock * sk,struct sockaddr * addr,int addr_len)4153c0425a42SChristoph Hellwig int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
4154c0425a42SChristoph Hellwig {
4155c0425a42SChristoph Hellwig 	if (!sk->sk_prot->bind_add)
4156c0425a42SChristoph Hellwig 		return -EOPNOTSUPP;
4157c0425a42SChristoph Hellwig 	return sk->sk_prot->bind_add(sk, addr, addr_len);
4158c0425a42SChristoph Hellwig }
4159c0425a42SChristoph Hellwig EXPORT_SYMBOL(sock_bind_add);
4160e1d001faSBreno Leitao 
4161e1d001faSBreno Leitao /* Copy 'size' bytes from userspace and return `size` back to userspace */
sock_ioctl_inout(struct sock * sk,unsigned int cmd,void __user * arg,void * karg,size_t size)4162e1d001faSBreno Leitao int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
4163e1d001faSBreno Leitao 		     void __user *arg, void *karg, size_t size)
4164e1d001faSBreno Leitao {
4165e1d001faSBreno Leitao 	int ret;
4166e1d001faSBreno Leitao 
4167e1d001faSBreno Leitao 	if (copy_from_user(karg, arg, size))
4168e1d001faSBreno Leitao 		return -EFAULT;
4169e1d001faSBreno Leitao 
4170e1d001faSBreno Leitao 	ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
4171e1d001faSBreno Leitao 	if (ret)
4172e1d001faSBreno Leitao 		return ret;
4173e1d001faSBreno Leitao 
4174e1d001faSBreno Leitao 	if (copy_to_user(arg, karg, size))
4175e1d001faSBreno Leitao 		return -EFAULT;
4176e1d001faSBreno Leitao 
4177e1d001faSBreno Leitao 	return 0;
4178e1d001faSBreno Leitao }
4179e1d001faSBreno Leitao EXPORT_SYMBOL(sock_ioctl_inout);
4180e1d001faSBreno Leitao 
4181e1d001faSBreno Leitao /* This is the most common ioctl prep function, where the result (4 bytes) is
4182e1d001faSBreno Leitao  * copied back to userspace if the ioctl() returns successfully. No input is
4183e1d001faSBreno Leitao  * copied from userspace as input argument.
4184e1d001faSBreno Leitao  */
sock_ioctl_out(struct sock * sk,unsigned int cmd,void __user * arg)4185e1d001faSBreno Leitao static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg)
4186e1d001faSBreno Leitao {
4187e1d001faSBreno Leitao 	int ret, karg = 0;
4188e1d001faSBreno Leitao 
4189e1d001faSBreno Leitao 	ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
4190e1d001faSBreno Leitao 	if (ret)
4191e1d001faSBreno Leitao 		return ret;
4192e1d001faSBreno Leitao 
4193e1d001faSBreno Leitao 	return put_user(karg, (int __user *)arg);
4194e1d001faSBreno Leitao }
4195e1d001faSBreno Leitao 
4196e1d001faSBreno Leitao /* A wrapper around sock ioctls, which copies the data from userspace
4197e1d001faSBreno Leitao  * (depending on the protocol/ioctl), and copies back the result to userspace.
4198e1d001faSBreno Leitao  * The main motivation for this function is to pass kernel memory to the
4199e1d001faSBreno Leitao  * protocol ioctl callbacks, instead of userspace memory.
4200e1d001faSBreno Leitao  */
sk_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)4201e1d001faSBreno Leitao int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
4202e1d001faSBreno Leitao {
4203e1d001faSBreno Leitao 	int rc = 1;
4204e1d001faSBreno Leitao 
4205634236b3SEric Dumazet 	if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET)
4206e1d001faSBreno Leitao 		rc = ipmr_sk_ioctl(sk, cmd, arg);
4207634236b3SEric Dumazet 	else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6)
4208e1d001faSBreno Leitao 		rc = ip6mr_sk_ioctl(sk, cmd, arg);
4209e1d001faSBreno Leitao 	else if (sk_is_phonet(sk))
4210e1d001faSBreno Leitao 		rc = phonet_sk_ioctl(sk, cmd, arg);
4211e1d001faSBreno Leitao 
4212e1d001faSBreno Leitao 	/* If ioctl was processed, returns its value */
4213e1d001faSBreno Leitao 	if (rc <= 0)
4214e1d001faSBreno Leitao 		return rc;
4215e1d001faSBreno Leitao 
4216e1d001faSBreno Leitao 	/* Otherwise call the default handler */
4217e1d001faSBreno Leitao 	return sock_ioctl_out(sk, cmd, arg);
4218e1d001faSBreno Leitao }
4219e1d001faSBreno Leitao EXPORT_SYMBOL(sk_ioctl);
4220