xref: /openbmc/linux/net/core/sock.c (revision 020c5260)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Florian La Roche, <flla@stud.uni-sb.de>
13  *		Alan Cox, <A.Cox@swansea.ac.uk>
14  *
15  * Fixes:
16  *		Alan Cox	: 	Numerous verify_area() problems
17  *		Alan Cox	:	Connecting on a connecting socket
18  *					now returns an error for tcp.
19  *		Alan Cox	:	sock->protocol is set correctly.
20  *					and is not sometimes left as 0.
21  *		Alan Cox	:	connect handles icmp errors on a
22  *					connect properly. Unfortunately there
23  *					is a restart syscall nasty there. I
24  *					can't match BSD without hacking the C
25  *					library. Ideas urgently sought!
26  *		Alan Cox	:	Disallow bind() to addresses that are
27  *					not ours - especially broadcast ones!!
28  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
29  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
30  *					instead they leave that for the DESTROY timer.
31  *		Alan Cox	:	Clean up error flag in accept
32  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
33  *					was buggy. Put a remove_sock() in the handler
34  *					for memory when we hit 0. Also altered the timer
35  *					code. The ACK stuff can wait and needs major
36  *					TCP layer surgery.
37  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
38  *					and fixed timer/inet_bh race.
39  *		Alan Cox	:	Added zapped flag for TCP
40  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
41  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
43  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
44  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
46  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
47  *	Pauline Middelink	:	identd support
48  *		Alan Cox	:	Fixed connect() taking signals I think.
49  *		Alan Cox	:	SO_LINGER supported
50  *		Alan Cox	:	Error reporting fixes
51  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
52  *		Alan Cox	:	inet sockets don't set sk->type!
53  *		Alan Cox	:	Split socket option code
54  *		Alan Cox	:	Callbacks
55  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
56  *		Alex		:	Removed restriction on inet fioctl
57  *		Alan Cox	:	Splitting INET from NET core
58  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
59  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
60  *		Alan Cox	:	Split IP from generic code
61  *		Alan Cox	:	New kfree_skbmem()
62  *		Alan Cox	:	Make SO_DEBUG superuser only.
63  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
64  *					(compatibility fix)
65  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
66  *		Alan Cox	:	Allocator for a socket is settable.
67  *		Alan Cox	:	SO_ERROR includes soft errors.
68  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
69  *		Alan Cox	: 	Generic socket allocation to make hooks
70  *					easier (suggested by Craig Metz).
71  *		Michael Pall	:	SO_ERROR returns positive errno again
72  *              Steve Whitehouse:       Added default destructor to free
73  *                                      protocol private data.
74  *              Steve Whitehouse:       Added various other default routines
75  *                                      common to several socket families.
76  *              Chris Evans     :       Call suser() check last on F_SETOWN
77  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
79  *		Andi Kleen	:	Fix write_space callback
80  *		Chris Evans	:	Security fixes - signedness again
81  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
82  *
83  * To Fix:
84  *
85  *
86  *		This program is free software; you can redistribute it and/or
87  *		modify it under the terms of the GNU General Public License
88  *		as published by the Free Software Foundation; either version
89  *		2 of the License, or (at your option) any later version.
90  */
91 
92 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93 
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/errqueue.h>
97 #include <linux/types.h>
98 #include <linux/socket.h>
99 #include <linux/in.h>
100 #include <linux/kernel.h>
101 #include <linux/module.h>
102 #include <linux/proc_fs.h>
103 #include <linux/seq_file.h>
104 #include <linux/sched.h>
105 #include <linux/sched/mm.h>
106 #include <linux/timer.h>
107 #include <linux/string.h>
108 #include <linux/sockios.h>
109 #include <linux/net.h>
110 #include <linux/mm.h>
111 #include <linux/slab.h>
112 #include <linux/interrupt.h>
113 #include <linux/poll.h>
114 #include <linux/tcp.h>
115 #include <linux/init.h>
116 #include <linux/highmem.h>
117 #include <linux/user_namespace.h>
118 #include <linux/static_key.h>
119 #include <linux/memcontrol.h>
120 #include <linux/prefetch.h>
121 
122 #include <linux/uaccess.h>
123 
124 #include <linux/netdevice.h>
125 #include <net/protocol.h>
126 #include <linux/skbuff.h>
127 #include <net/net_namespace.h>
128 #include <net/request_sock.h>
129 #include <net/sock.h>
130 #include <linux/net_tstamp.h>
131 #include <net/xfrm.h>
132 #include <linux/ipsec.h>
133 #include <net/cls_cgroup.h>
134 #include <net/netprio_cgroup.h>
135 #include <linux/sock_diag.h>
136 
137 #include <linux/filter.h>
138 #include <net/sock_reuseport.h>
139 
140 #include <trace/events/sock.h>
141 
142 #ifdef CONFIG_INET
143 #include <net/tcp.h>
144 #endif
145 
146 #include <net/busy_poll.h>
147 
148 static DEFINE_MUTEX(proto_list_mutex);
149 static LIST_HEAD(proto_list);
150 
151 /**
152  * sk_ns_capable - General socket capability test
153  * @sk: Socket to use a capability on or through
154  * @user_ns: The user namespace of the capability to use
155  * @cap: The capability to use
156  *
157  * Test to see if the opener of the socket had when the socket was
158  * created and the current process has the capability @cap in the user
159  * namespace @user_ns.
160  */
161 bool sk_ns_capable(const struct sock *sk,
162 		   struct user_namespace *user_ns, int cap)
163 {
164 	return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
165 		ns_capable(user_ns, cap);
166 }
167 EXPORT_SYMBOL(sk_ns_capable);
168 
169 /**
170  * sk_capable - Socket global capability test
171  * @sk: Socket to use a capability on or through
172  * @cap: The global capability to use
173  *
174  * Test to see if the opener of the socket had when the socket was
175  * created and the current process has the capability @cap in all user
176  * namespaces.
177  */
178 bool sk_capable(const struct sock *sk, int cap)
179 {
180 	return sk_ns_capable(sk, &init_user_ns, cap);
181 }
182 EXPORT_SYMBOL(sk_capable);
183 
184 /**
185  * sk_net_capable - Network namespace socket capability test
186  * @sk: Socket to use a capability on or through
187  * @cap: The capability to use
188  *
189  * Test to see if the opener of the socket had when the socket was created
190  * and the current process has the capability @cap over the network namespace
191  * the socket is a member of.
192  */
193 bool sk_net_capable(const struct sock *sk, int cap)
194 {
195 	return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
196 }
197 EXPORT_SYMBOL(sk_net_capable);
198 
199 /*
200  * Each address family might have different locking rules, so we have
201  * one slock key per address family and separate keys for internal and
202  * userspace sockets.
203  */
204 static struct lock_class_key af_family_keys[AF_MAX];
205 static struct lock_class_key af_family_kern_keys[AF_MAX];
206 static struct lock_class_key af_family_slock_keys[AF_MAX];
207 static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
208 
209 /*
210  * Make lock validator output more readable. (we pre-construct these
211  * strings build-time, so that runtime initialization of socket
212  * locks is fast):
213  */
214 
215 #define _sock_locks(x)						  \
216   x "AF_UNSPEC",	x "AF_UNIX"     ,	x "AF_INET"     , \
217   x "AF_AX25"  ,	x "AF_IPX"      ,	x "AF_APPLETALK", \
218   x "AF_NETROM",	x "AF_BRIDGE"   ,	x "AF_ATMPVC"   , \
219   x "AF_X25"   ,	x "AF_INET6"    ,	x "AF_ROSE"     , \
220   x "AF_DECnet",	x "AF_NETBEUI"  ,	x "AF_SECURITY" , \
221   x "AF_KEY"   ,	x "AF_NETLINK"  ,	x "AF_PACKET"   , \
222   x "AF_ASH"   ,	x "AF_ECONET"   ,	x "AF_ATMSVC"   , \
223   x "AF_RDS"   ,	x "AF_SNA"      ,	x "AF_IRDA"     , \
224   x "AF_PPPOX" ,	x "AF_WANPIPE"  ,	x "AF_LLC"      , \
225   x "27"       ,	x "28"          ,	x "AF_CAN"      , \
226   x "AF_TIPC"  ,	x "AF_BLUETOOTH",	x "IUCV"        , \
227   x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
228   x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
229   x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
230   x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_MAX"
231 
232 static const char *const af_family_key_strings[AF_MAX+1] = {
233 	_sock_locks("sk_lock-")
234 };
235 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
236 	_sock_locks("slock-")
237 };
238 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
239 	_sock_locks("clock-")
240 };
241 
242 static const char *const af_family_kern_key_strings[AF_MAX+1] = {
243 	_sock_locks("k-sk_lock-")
244 };
245 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
246 	_sock_locks("k-slock-")
247 };
248 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
249 	_sock_locks("k-clock-")
250 };
251 static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
252   "rlock-AF_UNSPEC", "rlock-AF_UNIX"     , "rlock-AF_INET"     ,
253   "rlock-AF_AX25"  , "rlock-AF_IPX"      , "rlock-AF_APPLETALK",
254   "rlock-AF_NETROM", "rlock-AF_BRIDGE"   , "rlock-AF_ATMPVC"   ,
255   "rlock-AF_X25"   , "rlock-AF_INET6"    , "rlock-AF_ROSE"     ,
256   "rlock-AF_DECnet", "rlock-AF_NETBEUI"  , "rlock-AF_SECURITY" ,
257   "rlock-AF_KEY"   , "rlock-AF_NETLINK"  , "rlock-AF_PACKET"   ,
258   "rlock-AF_ASH"   , "rlock-AF_ECONET"   , "rlock-AF_ATMSVC"   ,
259   "rlock-AF_RDS"   , "rlock-AF_SNA"      , "rlock-AF_IRDA"     ,
260   "rlock-AF_PPPOX" , "rlock-AF_WANPIPE"  , "rlock-AF_LLC"      ,
261   "rlock-27"       , "rlock-28"          , "rlock-AF_CAN"      ,
262   "rlock-AF_TIPC"  , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV"     ,
263   "rlock-AF_RXRPC" , "rlock-AF_ISDN"     , "rlock-AF_PHONET"   ,
264   "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG"      ,
265   "rlock-AF_NFC"   , "rlock-AF_VSOCK"    , "rlock-AF_KCM"      ,
266   "rlock-AF_QIPCRTR", "rlock-AF_SMC"     , "rlock-AF_MAX"
267 };
268 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
269   "wlock-AF_UNSPEC", "wlock-AF_UNIX"     , "wlock-AF_INET"     ,
270   "wlock-AF_AX25"  , "wlock-AF_IPX"      , "wlock-AF_APPLETALK",
271   "wlock-AF_NETROM", "wlock-AF_BRIDGE"   , "wlock-AF_ATMPVC"   ,
272   "wlock-AF_X25"   , "wlock-AF_INET6"    , "wlock-AF_ROSE"     ,
273   "wlock-AF_DECnet", "wlock-AF_NETBEUI"  , "wlock-AF_SECURITY" ,
274   "wlock-AF_KEY"   , "wlock-AF_NETLINK"  , "wlock-AF_PACKET"   ,
275   "wlock-AF_ASH"   , "wlock-AF_ECONET"   , "wlock-AF_ATMSVC"   ,
276   "wlock-AF_RDS"   , "wlock-AF_SNA"      , "wlock-AF_IRDA"     ,
277   "wlock-AF_PPPOX" , "wlock-AF_WANPIPE"  , "wlock-AF_LLC"      ,
278   "wlock-27"       , "wlock-28"          , "wlock-AF_CAN"      ,
279   "wlock-AF_TIPC"  , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV"     ,
280   "wlock-AF_RXRPC" , "wlock-AF_ISDN"     , "wlock-AF_PHONET"   ,
281   "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG"      ,
282   "wlock-AF_NFC"   , "wlock-AF_VSOCK"    , "wlock-AF_KCM"      ,
283   "wlock-AF_QIPCRTR", "wlock-AF_SMC"     , "wlock-AF_MAX"
284 };
285 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
286   "elock-AF_UNSPEC", "elock-AF_UNIX"     , "elock-AF_INET"     ,
287   "elock-AF_AX25"  , "elock-AF_IPX"      , "elock-AF_APPLETALK",
288   "elock-AF_NETROM", "elock-AF_BRIDGE"   , "elock-AF_ATMPVC"   ,
289   "elock-AF_X25"   , "elock-AF_INET6"    , "elock-AF_ROSE"     ,
290   "elock-AF_DECnet", "elock-AF_NETBEUI"  , "elock-AF_SECURITY" ,
291   "elock-AF_KEY"   , "elock-AF_NETLINK"  , "elock-AF_PACKET"   ,
292   "elock-AF_ASH"   , "elock-AF_ECONET"   , "elock-AF_ATMSVC"   ,
293   "elock-AF_RDS"   , "elock-AF_SNA"      , "elock-AF_IRDA"     ,
294   "elock-AF_PPPOX" , "elock-AF_WANPIPE"  , "elock-AF_LLC"      ,
295   "elock-27"       , "elock-28"          , "elock-AF_CAN"      ,
296   "elock-AF_TIPC"  , "elock-AF_BLUETOOTH", "elock-AF_IUCV"     ,
297   "elock-AF_RXRPC" , "elock-AF_ISDN"     , "elock-AF_PHONET"   ,
298   "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG"      ,
299   "elock-AF_NFC"   , "elock-AF_VSOCK"    , "elock-AF_KCM"      ,
300   "elock-AF_QIPCRTR", "elock-AF_SMC"     , "elock-AF_MAX"
301 };
302 
303 /*
304  * sk_callback_lock and sk queues locking rules are per-address-family,
305  * so split the lock classes by using a per-AF key:
306  */
307 static struct lock_class_key af_callback_keys[AF_MAX];
308 static struct lock_class_key af_rlock_keys[AF_MAX];
309 static struct lock_class_key af_wlock_keys[AF_MAX];
310 static struct lock_class_key af_elock_keys[AF_MAX];
311 static struct lock_class_key af_kern_callback_keys[AF_MAX];
312 
313 /* Take into consideration the size of the struct sk_buff overhead in the
314  * determination of these values, since that is non-constant across
315  * platforms.  This makes socket queueing behavior and performance
316  * not depend upon such differences.
317  */
318 #define _SK_MEM_PACKETS		256
319 #define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
320 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
321 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
322 
323 /* Run time adjustable parameters. */
324 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
325 EXPORT_SYMBOL(sysctl_wmem_max);
326 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
327 EXPORT_SYMBOL(sysctl_rmem_max);
328 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
329 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
330 
331 /* Maximal space eaten by iovec or ancillary data plus some space */
332 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
333 EXPORT_SYMBOL(sysctl_optmem_max);
334 
335 int sysctl_tstamp_allow_data __read_mostly = 1;
336 
337 struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
338 EXPORT_SYMBOL_GPL(memalloc_socks);
339 
340 /**
341  * sk_set_memalloc - sets %SOCK_MEMALLOC
342  * @sk: socket to set it on
343  *
344  * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
345  * It's the responsibility of the admin to adjust min_free_kbytes
346  * to meet the requirements
347  */
348 void sk_set_memalloc(struct sock *sk)
349 {
350 	sock_set_flag(sk, SOCK_MEMALLOC);
351 	sk->sk_allocation |= __GFP_MEMALLOC;
352 	static_key_slow_inc(&memalloc_socks);
353 }
354 EXPORT_SYMBOL_GPL(sk_set_memalloc);
355 
356 void sk_clear_memalloc(struct sock *sk)
357 {
358 	sock_reset_flag(sk, SOCK_MEMALLOC);
359 	sk->sk_allocation &= ~__GFP_MEMALLOC;
360 	static_key_slow_dec(&memalloc_socks);
361 
362 	/*
363 	 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
364 	 * progress of swapping. SOCK_MEMALLOC may be cleared while
365 	 * it has rmem allocations due to the last swapfile being deactivated
366 	 * but there is a risk that the socket is unusable due to exceeding
367 	 * the rmem limits. Reclaim the reserves and obey rmem limits again.
368 	 */
369 	sk_mem_reclaim(sk);
370 }
371 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
372 
373 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
374 {
375 	int ret;
376 	unsigned int noreclaim_flag;
377 
378 	/* these should have been dropped before queueing */
379 	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
380 
381 	noreclaim_flag = memalloc_noreclaim_save();
382 	ret = sk->sk_backlog_rcv(sk, skb);
383 	memalloc_noreclaim_restore(noreclaim_flag);
384 
385 	return ret;
386 }
387 EXPORT_SYMBOL(__sk_backlog_rcv);
388 
389 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
390 {
391 	struct timeval tv;
392 
393 	if (optlen < sizeof(tv))
394 		return -EINVAL;
395 	if (copy_from_user(&tv, optval, sizeof(tv)))
396 		return -EFAULT;
397 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
398 		return -EDOM;
399 
400 	if (tv.tv_sec < 0) {
401 		static int warned __read_mostly;
402 
403 		*timeo_p = 0;
404 		if (warned < 10 && net_ratelimit()) {
405 			warned++;
406 			pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
407 				__func__, current->comm, task_pid_nr(current));
408 		}
409 		return 0;
410 	}
411 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
412 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
413 		return 0;
414 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
415 		*timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ);
416 	return 0;
417 }
418 
419 static void sock_warn_obsolete_bsdism(const char *name)
420 {
421 	static int warned;
422 	static char warncomm[TASK_COMM_LEN];
423 	if (strcmp(warncomm, current->comm) && warned < 5) {
424 		strcpy(warncomm,  current->comm);
425 		pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
426 			warncomm, name);
427 		warned++;
428 	}
429 }
430 
431 static bool sock_needs_netstamp(const struct sock *sk)
432 {
433 	switch (sk->sk_family) {
434 	case AF_UNSPEC:
435 	case AF_UNIX:
436 		return false;
437 	default:
438 		return true;
439 	}
440 }
441 
442 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
443 {
444 	if (sk->sk_flags & flags) {
445 		sk->sk_flags &= ~flags;
446 		if (sock_needs_netstamp(sk) &&
447 		    !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
448 			net_disable_timestamp();
449 	}
450 }
451 
452 
453 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
454 {
455 	unsigned long flags;
456 	struct sk_buff_head *list = &sk->sk_receive_queue;
457 
458 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
459 		atomic_inc(&sk->sk_drops);
460 		trace_sock_rcvqueue_full(sk, skb);
461 		return -ENOMEM;
462 	}
463 
464 	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
465 		atomic_inc(&sk->sk_drops);
466 		return -ENOBUFS;
467 	}
468 
469 	skb->dev = NULL;
470 	skb_set_owner_r(skb, sk);
471 
472 	/* we escape from rcu protected region, make sure we dont leak
473 	 * a norefcounted dst
474 	 */
475 	skb_dst_force(skb);
476 
477 	spin_lock_irqsave(&list->lock, flags);
478 	sock_skb_set_dropcount(sk, skb);
479 	__skb_queue_tail(list, skb);
480 	spin_unlock_irqrestore(&list->lock, flags);
481 
482 	if (!sock_flag(sk, SOCK_DEAD))
483 		sk->sk_data_ready(sk);
484 	return 0;
485 }
486 EXPORT_SYMBOL(__sock_queue_rcv_skb);
487 
488 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
489 {
490 	int err;
491 
492 	err = sk_filter(sk, skb);
493 	if (err)
494 		return err;
495 
496 	return __sock_queue_rcv_skb(sk, skb);
497 }
498 EXPORT_SYMBOL(sock_queue_rcv_skb);
499 
500 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
501 		     const int nested, unsigned int trim_cap, bool refcounted)
502 {
503 	int rc = NET_RX_SUCCESS;
504 
505 	if (sk_filter_trim_cap(sk, skb, trim_cap))
506 		goto discard_and_relse;
507 
508 	skb->dev = NULL;
509 
510 	if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
511 		atomic_inc(&sk->sk_drops);
512 		goto discard_and_relse;
513 	}
514 	if (nested)
515 		bh_lock_sock_nested(sk);
516 	else
517 		bh_lock_sock(sk);
518 	if (!sock_owned_by_user(sk)) {
519 		/*
520 		 * trylock + unlock semantics:
521 		 */
522 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
523 
524 		rc = sk_backlog_rcv(sk, skb);
525 
526 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
527 	} else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
528 		bh_unlock_sock(sk);
529 		atomic_inc(&sk->sk_drops);
530 		goto discard_and_relse;
531 	}
532 
533 	bh_unlock_sock(sk);
534 out:
535 	if (refcounted)
536 		sock_put(sk);
537 	return rc;
538 discard_and_relse:
539 	kfree_skb(skb);
540 	goto out;
541 }
542 EXPORT_SYMBOL(__sk_receive_skb);
543 
544 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
545 {
546 	struct dst_entry *dst = __sk_dst_get(sk);
547 
548 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
549 		sk_tx_queue_clear(sk);
550 		sk->sk_dst_pending_confirm = 0;
551 		RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
552 		dst_release(dst);
553 		return NULL;
554 	}
555 
556 	return dst;
557 }
558 EXPORT_SYMBOL(__sk_dst_check);
559 
560 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
561 {
562 	struct dst_entry *dst = sk_dst_get(sk);
563 
564 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
565 		sk_dst_reset(sk);
566 		dst_release(dst);
567 		return NULL;
568 	}
569 
570 	return dst;
571 }
572 EXPORT_SYMBOL(sk_dst_check);
573 
574 static int sock_setbindtodevice(struct sock *sk, char __user *optval,
575 				int optlen)
576 {
577 	int ret = -ENOPROTOOPT;
578 #ifdef CONFIG_NETDEVICES
579 	struct net *net = sock_net(sk);
580 	char devname[IFNAMSIZ];
581 	int index;
582 
583 	/* Sorry... */
584 	ret = -EPERM;
585 	if (!ns_capable(net->user_ns, CAP_NET_RAW))
586 		goto out;
587 
588 	ret = -EINVAL;
589 	if (optlen < 0)
590 		goto out;
591 
592 	/* Bind this socket to a particular device like "eth0",
593 	 * as specified in the passed interface name. If the
594 	 * name is "" or the option length is zero the socket
595 	 * is not bound.
596 	 */
597 	if (optlen > IFNAMSIZ - 1)
598 		optlen = IFNAMSIZ - 1;
599 	memset(devname, 0, sizeof(devname));
600 
601 	ret = -EFAULT;
602 	if (copy_from_user(devname, optval, optlen))
603 		goto out;
604 
605 	index = 0;
606 	if (devname[0] != '\0') {
607 		struct net_device *dev;
608 
609 		rcu_read_lock();
610 		dev = dev_get_by_name_rcu(net, devname);
611 		if (dev)
612 			index = dev->ifindex;
613 		rcu_read_unlock();
614 		ret = -ENODEV;
615 		if (!dev)
616 			goto out;
617 	}
618 
619 	lock_sock(sk);
620 	sk->sk_bound_dev_if = index;
621 	sk_dst_reset(sk);
622 	release_sock(sk);
623 
624 	ret = 0;
625 
626 out:
627 #endif
628 
629 	return ret;
630 }
631 
632 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
633 				int __user *optlen, int len)
634 {
635 	int ret = -ENOPROTOOPT;
636 #ifdef CONFIG_NETDEVICES
637 	struct net *net = sock_net(sk);
638 	char devname[IFNAMSIZ];
639 
640 	if (sk->sk_bound_dev_if == 0) {
641 		len = 0;
642 		goto zero;
643 	}
644 
645 	ret = -EINVAL;
646 	if (len < IFNAMSIZ)
647 		goto out;
648 
649 	ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
650 	if (ret)
651 		goto out;
652 
653 	len = strlen(devname) + 1;
654 
655 	ret = -EFAULT;
656 	if (copy_to_user(optval, devname, len))
657 		goto out;
658 
659 zero:
660 	ret = -EFAULT;
661 	if (put_user(len, optlen))
662 		goto out;
663 
664 	ret = 0;
665 
666 out:
667 #endif
668 
669 	return ret;
670 }
671 
672 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
673 {
674 	if (valbool)
675 		sock_set_flag(sk, bit);
676 	else
677 		sock_reset_flag(sk, bit);
678 }
679 
680 bool sk_mc_loop(struct sock *sk)
681 {
682 	if (dev_recursion_level())
683 		return false;
684 	if (!sk)
685 		return true;
686 	switch (sk->sk_family) {
687 	case AF_INET:
688 		return inet_sk(sk)->mc_loop;
689 #if IS_ENABLED(CONFIG_IPV6)
690 	case AF_INET6:
691 		return inet6_sk(sk)->mc_loop;
692 #endif
693 	}
694 	WARN_ON(1);
695 	return true;
696 }
697 EXPORT_SYMBOL(sk_mc_loop);
698 
699 /*
700  *	This is meant for all protocols to use and covers goings on
701  *	at the socket level. Everything here is generic.
702  */
703 
704 int sock_setsockopt(struct socket *sock, int level, int optname,
705 		    char __user *optval, unsigned int optlen)
706 {
707 	struct sock *sk = sock->sk;
708 	int val;
709 	int valbool;
710 	struct linger ling;
711 	int ret = 0;
712 
713 	/*
714 	 *	Options without arguments
715 	 */
716 
717 	if (optname == SO_BINDTODEVICE)
718 		return sock_setbindtodevice(sk, optval, optlen);
719 
720 	if (optlen < sizeof(int))
721 		return -EINVAL;
722 
723 	if (get_user(val, (int __user *)optval))
724 		return -EFAULT;
725 
726 	valbool = val ? 1 : 0;
727 
728 	lock_sock(sk);
729 
730 	switch (optname) {
731 	case SO_DEBUG:
732 		if (val && !capable(CAP_NET_ADMIN))
733 			ret = -EACCES;
734 		else
735 			sock_valbool_flag(sk, SOCK_DBG, valbool);
736 		break;
737 	case SO_REUSEADDR:
738 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
739 		break;
740 	case SO_REUSEPORT:
741 		sk->sk_reuseport = valbool;
742 		break;
743 	case SO_TYPE:
744 	case SO_PROTOCOL:
745 	case SO_DOMAIN:
746 	case SO_ERROR:
747 		ret = -ENOPROTOOPT;
748 		break;
749 	case SO_DONTROUTE:
750 		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
751 		break;
752 	case SO_BROADCAST:
753 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
754 		break;
755 	case SO_SNDBUF:
756 		/* Don't error on this BSD doesn't and if you think
757 		 * about it this is right. Otherwise apps have to
758 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
759 		 * are treated in BSD as hints
760 		 */
761 		val = min_t(u32, val, sysctl_wmem_max);
762 set_sndbuf:
763 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
764 		sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
765 		/* Wake up sending tasks if we upped the value. */
766 		sk->sk_write_space(sk);
767 		break;
768 
769 	case SO_SNDBUFFORCE:
770 		if (!capable(CAP_NET_ADMIN)) {
771 			ret = -EPERM;
772 			break;
773 		}
774 		goto set_sndbuf;
775 
776 	case SO_RCVBUF:
777 		/* Don't error on this BSD doesn't and if you think
778 		 * about it this is right. Otherwise apps have to
779 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
780 		 * are treated in BSD as hints
781 		 */
782 		val = min_t(u32, val, sysctl_rmem_max);
783 set_rcvbuf:
784 		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
785 		/*
786 		 * We double it on the way in to account for
787 		 * "struct sk_buff" etc. overhead.   Applications
788 		 * assume that the SO_RCVBUF setting they make will
789 		 * allow that much actual data to be received on that
790 		 * socket.
791 		 *
792 		 * Applications are unaware that "struct sk_buff" and
793 		 * other overheads allocate from the receive buffer
794 		 * during socket buffer allocation.
795 		 *
796 		 * And after considering the possible alternatives,
797 		 * returning the value we actually used in getsockopt
798 		 * is the most desirable behavior.
799 		 */
800 		sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
801 		break;
802 
803 	case SO_RCVBUFFORCE:
804 		if (!capable(CAP_NET_ADMIN)) {
805 			ret = -EPERM;
806 			break;
807 		}
808 		goto set_rcvbuf;
809 
810 	case SO_KEEPALIVE:
811 		if (sk->sk_prot->keepalive)
812 			sk->sk_prot->keepalive(sk, valbool);
813 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
814 		break;
815 
816 	case SO_OOBINLINE:
817 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
818 		break;
819 
820 	case SO_NO_CHECK:
821 		sk->sk_no_check_tx = valbool;
822 		break;
823 
824 	case SO_PRIORITY:
825 		if ((val >= 0 && val <= 6) ||
826 		    ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
827 			sk->sk_priority = val;
828 		else
829 			ret = -EPERM;
830 		break;
831 
832 	case SO_LINGER:
833 		if (optlen < sizeof(ling)) {
834 			ret = -EINVAL;	/* 1003.1g */
835 			break;
836 		}
837 		if (copy_from_user(&ling, optval, sizeof(ling))) {
838 			ret = -EFAULT;
839 			break;
840 		}
841 		if (!ling.l_onoff)
842 			sock_reset_flag(sk, SOCK_LINGER);
843 		else {
844 #if (BITS_PER_LONG == 32)
845 			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
846 				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
847 			else
848 #endif
849 				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
850 			sock_set_flag(sk, SOCK_LINGER);
851 		}
852 		break;
853 
854 	case SO_BSDCOMPAT:
855 		sock_warn_obsolete_bsdism("setsockopt");
856 		break;
857 
858 	case SO_PASSCRED:
859 		if (valbool)
860 			set_bit(SOCK_PASSCRED, &sock->flags);
861 		else
862 			clear_bit(SOCK_PASSCRED, &sock->flags);
863 		break;
864 
865 	case SO_TIMESTAMP:
866 	case SO_TIMESTAMPNS:
867 		if (valbool)  {
868 			if (optname == SO_TIMESTAMP)
869 				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
870 			else
871 				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
872 			sock_set_flag(sk, SOCK_RCVTSTAMP);
873 			sock_enable_timestamp(sk, SOCK_TIMESTAMP);
874 		} else {
875 			sock_reset_flag(sk, SOCK_RCVTSTAMP);
876 			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
877 		}
878 		break;
879 
880 	case SO_TIMESTAMPING:
881 		if (val & ~SOF_TIMESTAMPING_MASK) {
882 			ret = -EINVAL;
883 			break;
884 		}
885 
886 		if (val & SOF_TIMESTAMPING_OPT_ID &&
887 		    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
888 			if (sk->sk_protocol == IPPROTO_TCP &&
889 			    sk->sk_type == SOCK_STREAM) {
890 				if ((1 << sk->sk_state) &
891 				    (TCPF_CLOSE | TCPF_LISTEN)) {
892 					ret = -EINVAL;
893 					break;
894 				}
895 				sk->sk_tskey = tcp_sk(sk)->snd_una;
896 			} else {
897 				sk->sk_tskey = 0;
898 			}
899 		}
900 
901 		if (val & SOF_TIMESTAMPING_OPT_STATS &&
902 		    !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
903 			ret = -EINVAL;
904 			break;
905 		}
906 
907 		sk->sk_tsflags = val;
908 		if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
909 			sock_enable_timestamp(sk,
910 					      SOCK_TIMESTAMPING_RX_SOFTWARE);
911 		else
912 			sock_disable_timestamp(sk,
913 					       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
914 		break;
915 
916 	case SO_RCVLOWAT:
917 		if (val < 0)
918 			val = INT_MAX;
919 		sk->sk_rcvlowat = val ? : 1;
920 		break;
921 
922 	case SO_RCVTIMEO:
923 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
924 		break;
925 
926 	case SO_SNDTIMEO:
927 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
928 		break;
929 
930 	case SO_ATTACH_FILTER:
931 		ret = -EINVAL;
932 		if (optlen == sizeof(struct sock_fprog)) {
933 			struct sock_fprog fprog;
934 
935 			ret = -EFAULT;
936 			if (copy_from_user(&fprog, optval, sizeof(fprog)))
937 				break;
938 
939 			ret = sk_attach_filter(&fprog, sk);
940 		}
941 		break;
942 
943 	case SO_ATTACH_BPF:
944 		ret = -EINVAL;
945 		if (optlen == sizeof(u32)) {
946 			u32 ufd;
947 
948 			ret = -EFAULT;
949 			if (copy_from_user(&ufd, optval, sizeof(ufd)))
950 				break;
951 
952 			ret = sk_attach_bpf(ufd, sk);
953 		}
954 		break;
955 
956 	case SO_ATTACH_REUSEPORT_CBPF:
957 		ret = -EINVAL;
958 		if (optlen == sizeof(struct sock_fprog)) {
959 			struct sock_fprog fprog;
960 
961 			ret = -EFAULT;
962 			if (copy_from_user(&fprog, optval, sizeof(fprog)))
963 				break;
964 
965 			ret = sk_reuseport_attach_filter(&fprog, sk);
966 		}
967 		break;
968 
969 	case SO_ATTACH_REUSEPORT_EBPF:
970 		ret = -EINVAL;
971 		if (optlen == sizeof(u32)) {
972 			u32 ufd;
973 
974 			ret = -EFAULT;
975 			if (copy_from_user(&ufd, optval, sizeof(ufd)))
976 				break;
977 
978 			ret = sk_reuseport_attach_bpf(ufd, sk);
979 		}
980 		break;
981 
982 	case SO_DETACH_FILTER:
983 		ret = sk_detach_filter(sk);
984 		break;
985 
986 	case SO_LOCK_FILTER:
987 		if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
988 			ret = -EPERM;
989 		else
990 			sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
991 		break;
992 
993 	case SO_PASSSEC:
994 		if (valbool)
995 			set_bit(SOCK_PASSSEC, &sock->flags);
996 		else
997 			clear_bit(SOCK_PASSSEC, &sock->flags);
998 		break;
999 	case SO_MARK:
1000 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1001 			ret = -EPERM;
1002 		else
1003 			sk->sk_mark = val;
1004 		break;
1005 
1006 	case SO_RXQ_OVFL:
1007 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1008 		break;
1009 
1010 	case SO_WIFI_STATUS:
1011 		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1012 		break;
1013 
1014 	case SO_PEEK_OFF:
1015 		if (sock->ops->set_peek_off)
1016 			ret = sock->ops->set_peek_off(sk, val);
1017 		else
1018 			ret = -EOPNOTSUPP;
1019 		break;
1020 
1021 	case SO_NOFCS:
1022 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1023 		break;
1024 
1025 	case SO_SELECT_ERR_QUEUE:
1026 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1027 		break;
1028 
1029 #ifdef CONFIG_NET_RX_BUSY_POLL
1030 	case SO_BUSY_POLL:
1031 		/* allow unprivileged users to decrease the value */
1032 		if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1033 			ret = -EPERM;
1034 		else {
1035 			if (val < 0)
1036 				ret = -EINVAL;
1037 			else
1038 				sk->sk_ll_usec = val;
1039 		}
1040 		break;
1041 #endif
1042 
1043 	case SO_MAX_PACING_RATE:
1044 		sk->sk_max_pacing_rate = val;
1045 		sk->sk_pacing_rate = min(sk->sk_pacing_rate,
1046 					 sk->sk_max_pacing_rate);
1047 		break;
1048 
1049 	case SO_INCOMING_CPU:
1050 		sk->sk_incoming_cpu = val;
1051 		break;
1052 
1053 	case SO_CNX_ADVICE:
1054 		if (val == 1)
1055 			dst_negative_advice(sk);
1056 		break;
1057 	default:
1058 		ret = -ENOPROTOOPT;
1059 		break;
1060 	}
1061 	release_sock(sk);
1062 	return ret;
1063 }
1064 EXPORT_SYMBOL(sock_setsockopt);
1065 
1066 
1067 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1068 			  struct ucred *ucred)
1069 {
1070 	ucred->pid = pid_vnr(pid);
1071 	ucred->uid = ucred->gid = -1;
1072 	if (cred) {
1073 		struct user_namespace *current_ns = current_user_ns();
1074 
1075 		ucred->uid = from_kuid_munged(current_ns, cred->euid);
1076 		ucred->gid = from_kgid_munged(current_ns, cred->egid);
1077 	}
1078 }
1079 
1080 int sock_getsockopt(struct socket *sock, int level, int optname,
1081 		    char __user *optval, int __user *optlen)
1082 {
1083 	struct sock *sk = sock->sk;
1084 
1085 	union {
1086 		int val;
1087 		u64 val64;
1088 		struct linger ling;
1089 		struct timeval tm;
1090 	} v;
1091 
1092 	int lv = sizeof(int);
1093 	int len;
1094 
1095 	if (get_user(len, optlen))
1096 		return -EFAULT;
1097 	if (len < 0)
1098 		return -EINVAL;
1099 
1100 	memset(&v, 0, sizeof(v));
1101 
1102 	switch (optname) {
1103 	case SO_DEBUG:
1104 		v.val = sock_flag(sk, SOCK_DBG);
1105 		break;
1106 
1107 	case SO_DONTROUTE:
1108 		v.val = sock_flag(sk, SOCK_LOCALROUTE);
1109 		break;
1110 
1111 	case SO_BROADCAST:
1112 		v.val = sock_flag(sk, SOCK_BROADCAST);
1113 		break;
1114 
1115 	case SO_SNDBUF:
1116 		v.val = sk->sk_sndbuf;
1117 		break;
1118 
1119 	case SO_RCVBUF:
1120 		v.val = sk->sk_rcvbuf;
1121 		break;
1122 
1123 	case SO_REUSEADDR:
1124 		v.val = sk->sk_reuse;
1125 		break;
1126 
1127 	case SO_REUSEPORT:
1128 		v.val = sk->sk_reuseport;
1129 		break;
1130 
1131 	case SO_KEEPALIVE:
1132 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
1133 		break;
1134 
1135 	case SO_TYPE:
1136 		v.val = sk->sk_type;
1137 		break;
1138 
1139 	case SO_PROTOCOL:
1140 		v.val = sk->sk_protocol;
1141 		break;
1142 
1143 	case SO_DOMAIN:
1144 		v.val = sk->sk_family;
1145 		break;
1146 
1147 	case SO_ERROR:
1148 		v.val = -sock_error(sk);
1149 		if (v.val == 0)
1150 			v.val = xchg(&sk->sk_err_soft, 0);
1151 		break;
1152 
1153 	case SO_OOBINLINE:
1154 		v.val = sock_flag(sk, SOCK_URGINLINE);
1155 		break;
1156 
1157 	case SO_NO_CHECK:
1158 		v.val = sk->sk_no_check_tx;
1159 		break;
1160 
1161 	case SO_PRIORITY:
1162 		v.val = sk->sk_priority;
1163 		break;
1164 
1165 	case SO_LINGER:
1166 		lv		= sizeof(v.ling);
1167 		v.ling.l_onoff	= sock_flag(sk, SOCK_LINGER);
1168 		v.ling.l_linger	= sk->sk_lingertime / HZ;
1169 		break;
1170 
1171 	case SO_BSDCOMPAT:
1172 		sock_warn_obsolete_bsdism("getsockopt");
1173 		break;
1174 
1175 	case SO_TIMESTAMP:
1176 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1177 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
1178 		break;
1179 
1180 	case SO_TIMESTAMPNS:
1181 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1182 		break;
1183 
1184 	case SO_TIMESTAMPING:
1185 		v.val = sk->sk_tsflags;
1186 		break;
1187 
1188 	case SO_RCVTIMEO:
1189 		lv = sizeof(struct timeval);
1190 		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1191 			v.tm.tv_sec = 0;
1192 			v.tm.tv_usec = 0;
1193 		} else {
1194 			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1195 			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ;
1196 		}
1197 		break;
1198 
1199 	case SO_SNDTIMEO:
1200 		lv = sizeof(struct timeval);
1201 		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1202 			v.tm.tv_sec = 0;
1203 			v.tm.tv_usec = 0;
1204 		} else {
1205 			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1206 			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ;
1207 		}
1208 		break;
1209 
1210 	case SO_RCVLOWAT:
1211 		v.val = sk->sk_rcvlowat;
1212 		break;
1213 
1214 	case SO_SNDLOWAT:
1215 		v.val = 1;
1216 		break;
1217 
1218 	case SO_PASSCRED:
1219 		v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1220 		break;
1221 
1222 	case SO_PEERCRED:
1223 	{
1224 		struct ucred peercred;
1225 		if (len > sizeof(peercred))
1226 			len = sizeof(peercred);
1227 		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1228 		if (copy_to_user(optval, &peercred, len))
1229 			return -EFAULT;
1230 		goto lenout;
1231 	}
1232 
1233 	case SO_PEERNAME:
1234 	{
1235 		char address[128];
1236 
1237 		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1238 			return -ENOTCONN;
1239 		if (lv < len)
1240 			return -EINVAL;
1241 		if (copy_to_user(optval, address, len))
1242 			return -EFAULT;
1243 		goto lenout;
1244 	}
1245 
1246 	/* Dubious BSD thing... Probably nobody even uses it, but
1247 	 * the UNIX standard wants it for whatever reason... -DaveM
1248 	 */
1249 	case SO_ACCEPTCONN:
1250 		v.val = sk->sk_state == TCP_LISTEN;
1251 		break;
1252 
1253 	case SO_PASSSEC:
1254 		v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1255 		break;
1256 
1257 	case SO_PEERSEC:
1258 		return security_socket_getpeersec_stream(sock, optval, optlen, len);
1259 
1260 	case SO_MARK:
1261 		v.val = sk->sk_mark;
1262 		break;
1263 
1264 	case SO_RXQ_OVFL:
1265 		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1266 		break;
1267 
1268 	case SO_WIFI_STATUS:
1269 		v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1270 		break;
1271 
1272 	case SO_PEEK_OFF:
1273 		if (!sock->ops->set_peek_off)
1274 			return -EOPNOTSUPP;
1275 
1276 		v.val = sk->sk_peek_off;
1277 		break;
1278 	case SO_NOFCS:
1279 		v.val = sock_flag(sk, SOCK_NOFCS);
1280 		break;
1281 
1282 	case SO_BINDTODEVICE:
1283 		return sock_getbindtodevice(sk, optval, optlen, len);
1284 
1285 	case SO_GET_FILTER:
1286 		len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1287 		if (len < 0)
1288 			return len;
1289 
1290 		goto lenout;
1291 
1292 	case SO_LOCK_FILTER:
1293 		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1294 		break;
1295 
1296 	case SO_BPF_EXTENSIONS:
1297 		v.val = bpf_tell_extensions();
1298 		break;
1299 
1300 	case SO_SELECT_ERR_QUEUE:
1301 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1302 		break;
1303 
1304 #ifdef CONFIG_NET_RX_BUSY_POLL
1305 	case SO_BUSY_POLL:
1306 		v.val = sk->sk_ll_usec;
1307 		break;
1308 #endif
1309 
1310 	case SO_MAX_PACING_RATE:
1311 		v.val = sk->sk_max_pacing_rate;
1312 		break;
1313 
1314 	case SO_INCOMING_CPU:
1315 		v.val = sk->sk_incoming_cpu;
1316 		break;
1317 
1318 	case SO_MEMINFO:
1319 	{
1320 		u32 meminfo[SK_MEMINFO_VARS];
1321 
1322 		if (get_user(len, optlen))
1323 			return -EFAULT;
1324 
1325 		sk_get_meminfo(sk, meminfo);
1326 
1327 		len = min_t(unsigned int, len, sizeof(meminfo));
1328 		if (copy_to_user(optval, &meminfo, len))
1329 			return -EFAULT;
1330 
1331 		goto lenout;
1332 	}
1333 
1334 #ifdef CONFIG_NET_RX_BUSY_POLL
1335 	case SO_INCOMING_NAPI_ID:
1336 		v.val = READ_ONCE(sk->sk_napi_id);
1337 
1338 		/* aggregate non-NAPI IDs down to 0 */
1339 		if (v.val < MIN_NAPI_ID)
1340 			v.val = 0;
1341 
1342 		break;
1343 #endif
1344 
1345 	case SO_COOKIE:
1346 		lv = sizeof(u64);
1347 		if (len < lv)
1348 			return -EINVAL;
1349 		v.val64 = sock_gen_cookie(sk);
1350 		break;
1351 
1352 	default:
1353 		/* We implement the SO_SNDLOWAT etc to not be settable
1354 		 * (1003.1g 7).
1355 		 */
1356 		return -ENOPROTOOPT;
1357 	}
1358 
1359 	if (len > lv)
1360 		len = lv;
1361 	if (copy_to_user(optval, &v, len))
1362 		return -EFAULT;
1363 lenout:
1364 	if (put_user(len, optlen))
1365 		return -EFAULT;
1366 	return 0;
1367 }
1368 
1369 /*
1370  * Initialize an sk_lock.
1371  *
1372  * (We also register the sk_lock with the lock validator.)
1373  */
1374 static inline void sock_lock_init(struct sock *sk)
1375 {
1376 	if (sk->sk_kern_sock)
1377 		sock_lock_init_class_and_name(
1378 			sk,
1379 			af_family_kern_slock_key_strings[sk->sk_family],
1380 			af_family_kern_slock_keys + sk->sk_family,
1381 			af_family_kern_key_strings[sk->sk_family],
1382 			af_family_kern_keys + sk->sk_family);
1383 	else
1384 		sock_lock_init_class_and_name(
1385 			sk,
1386 			af_family_slock_key_strings[sk->sk_family],
1387 			af_family_slock_keys + sk->sk_family,
1388 			af_family_key_strings[sk->sk_family],
1389 			af_family_keys + sk->sk_family);
1390 }
1391 
1392 /*
1393  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1394  * even temporarly, because of RCU lookups. sk_node should also be left as is.
1395  * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
1396  */
1397 static void sock_copy(struct sock *nsk, const struct sock *osk)
1398 {
1399 #ifdef CONFIG_SECURITY_NETWORK
1400 	void *sptr = nsk->sk_security;
1401 #endif
1402 	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1403 
1404 	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1405 	       osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1406 
1407 #ifdef CONFIG_SECURITY_NETWORK
1408 	nsk->sk_security = sptr;
1409 	security_sk_clone(osk, nsk);
1410 #endif
1411 }
1412 
1413 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1414 		int family)
1415 {
1416 	struct sock *sk;
1417 	struct kmem_cache *slab;
1418 
1419 	slab = prot->slab;
1420 	if (slab != NULL) {
1421 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1422 		if (!sk)
1423 			return sk;
1424 		if (priority & __GFP_ZERO)
1425 			sk_prot_clear_nulls(sk, prot->obj_size);
1426 	} else
1427 		sk = kmalloc(prot->obj_size, priority);
1428 
1429 	if (sk != NULL) {
1430 		kmemcheck_annotate_bitfield(sk, flags);
1431 
1432 		if (security_sk_alloc(sk, family, priority))
1433 			goto out_free;
1434 
1435 		if (!try_module_get(prot->owner))
1436 			goto out_free_sec;
1437 		sk_tx_queue_clear(sk);
1438 	}
1439 
1440 	return sk;
1441 
1442 out_free_sec:
1443 	security_sk_free(sk);
1444 out_free:
1445 	if (slab != NULL)
1446 		kmem_cache_free(slab, sk);
1447 	else
1448 		kfree(sk);
1449 	return NULL;
1450 }
1451 
1452 static void sk_prot_free(struct proto *prot, struct sock *sk)
1453 {
1454 	struct kmem_cache *slab;
1455 	struct module *owner;
1456 
1457 	owner = prot->owner;
1458 	slab = prot->slab;
1459 
1460 	cgroup_sk_free(&sk->sk_cgrp_data);
1461 	mem_cgroup_sk_free(sk);
1462 	security_sk_free(sk);
1463 	if (slab != NULL)
1464 		kmem_cache_free(slab, sk);
1465 	else
1466 		kfree(sk);
1467 	module_put(owner);
1468 }
1469 
1470 /**
1471  *	sk_alloc - All socket objects are allocated here
1472  *	@net: the applicable net namespace
1473  *	@family: protocol family
1474  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1475  *	@prot: struct proto associated with this new sock instance
1476  *	@kern: is this to be a kernel socket?
1477  */
1478 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1479 		      struct proto *prot, int kern)
1480 {
1481 	struct sock *sk;
1482 
1483 	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1484 	if (sk) {
1485 		sk->sk_family = family;
1486 		/*
1487 		 * See comment in struct sock definition to understand
1488 		 * why we need sk_prot_creator -acme
1489 		 */
1490 		sk->sk_prot = sk->sk_prot_creator = prot;
1491 		sk->sk_kern_sock = kern;
1492 		sock_lock_init(sk);
1493 		sk->sk_net_refcnt = kern ? 0 : 1;
1494 		if (likely(sk->sk_net_refcnt))
1495 			get_net(net);
1496 		sock_net_set(sk, net);
1497 		atomic_set(&sk->sk_wmem_alloc, 1);
1498 
1499 		mem_cgroup_sk_alloc(sk);
1500 		cgroup_sk_alloc(&sk->sk_cgrp_data);
1501 		sock_update_classid(&sk->sk_cgrp_data);
1502 		sock_update_netprioidx(&sk->sk_cgrp_data);
1503 	}
1504 
1505 	return sk;
1506 }
1507 EXPORT_SYMBOL(sk_alloc);
1508 
1509 /* Sockets having SOCK_RCU_FREE will call this function after one RCU
1510  * grace period. This is the case for UDP sockets and TCP listeners.
1511  */
1512 static void __sk_destruct(struct rcu_head *head)
1513 {
1514 	struct sock *sk = container_of(head, struct sock, sk_rcu);
1515 	struct sk_filter *filter;
1516 
1517 	if (sk->sk_destruct)
1518 		sk->sk_destruct(sk);
1519 
1520 	filter = rcu_dereference_check(sk->sk_filter,
1521 				       atomic_read(&sk->sk_wmem_alloc) == 0);
1522 	if (filter) {
1523 		sk_filter_uncharge(sk, filter);
1524 		RCU_INIT_POINTER(sk->sk_filter, NULL);
1525 	}
1526 	if (rcu_access_pointer(sk->sk_reuseport_cb))
1527 		reuseport_detach_sock(sk);
1528 
1529 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1530 
1531 	if (atomic_read(&sk->sk_omem_alloc))
1532 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
1533 			 __func__, atomic_read(&sk->sk_omem_alloc));
1534 
1535 	if (sk->sk_frag.page) {
1536 		put_page(sk->sk_frag.page);
1537 		sk->sk_frag.page = NULL;
1538 	}
1539 
1540 	if (sk->sk_peer_cred)
1541 		put_cred(sk->sk_peer_cred);
1542 	put_pid(sk->sk_peer_pid);
1543 	if (likely(sk->sk_net_refcnt))
1544 		put_net(sock_net(sk));
1545 	sk_prot_free(sk->sk_prot_creator, sk);
1546 }
1547 
1548 void sk_destruct(struct sock *sk)
1549 {
1550 	if (sock_flag(sk, SOCK_RCU_FREE))
1551 		call_rcu(&sk->sk_rcu, __sk_destruct);
1552 	else
1553 		__sk_destruct(&sk->sk_rcu);
1554 }
1555 
1556 static void __sk_free(struct sock *sk)
1557 {
1558 	if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
1559 		sock_diag_broadcast_destroy(sk);
1560 	else
1561 		sk_destruct(sk);
1562 }
1563 
1564 void sk_free(struct sock *sk)
1565 {
1566 	/*
1567 	 * We subtract one from sk_wmem_alloc and can know if
1568 	 * some packets are still in some tx queue.
1569 	 * If not null, sock_wfree() will call __sk_free(sk) later
1570 	 */
1571 	if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1572 		__sk_free(sk);
1573 }
1574 EXPORT_SYMBOL(sk_free);
1575 
1576 static void sk_init_common(struct sock *sk)
1577 {
1578 	skb_queue_head_init(&sk->sk_receive_queue);
1579 	skb_queue_head_init(&sk->sk_write_queue);
1580 	skb_queue_head_init(&sk->sk_error_queue);
1581 
1582 	rwlock_init(&sk->sk_callback_lock);
1583 	lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
1584 			af_rlock_keys + sk->sk_family,
1585 			af_family_rlock_key_strings[sk->sk_family]);
1586 	lockdep_set_class_and_name(&sk->sk_write_queue.lock,
1587 			af_wlock_keys + sk->sk_family,
1588 			af_family_wlock_key_strings[sk->sk_family]);
1589 	lockdep_set_class_and_name(&sk->sk_error_queue.lock,
1590 			af_elock_keys + sk->sk_family,
1591 			af_family_elock_key_strings[sk->sk_family]);
1592 	lockdep_set_class_and_name(&sk->sk_callback_lock,
1593 			af_callback_keys + sk->sk_family,
1594 			af_family_clock_key_strings[sk->sk_family]);
1595 }
1596 
1597 /**
1598  *	sk_clone_lock - clone a socket, and lock its clone
1599  *	@sk: the socket to clone
1600  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1601  *
1602  *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1603  */
1604 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1605 {
1606 	struct sock *newsk;
1607 	bool is_charged = true;
1608 
1609 	newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1610 	if (newsk != NULL) {
1611 		struct sk_filter *filter;
1612 
1613 		sock_copy(newsk, sk);
1614 
1615 		/* SANITY */
1616 		if (likely(newsk->sk_net_refcnt))
1617 			get_net(sock_net(newsk));
1618 		sk_node_init(&newsk->sk_node);
1619 		sock_lock_init(newsk);
1620 		bh_lock_sock(newsk);
1621 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
1622 		newsk->sk_backlog.len = 0;
1623 
1624 		atomic_set(&newsk->sk_rmem_alloc, 0);
1625 		/*
1626 		 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1627 		 */
1628 		atomic_set(&newsk->sk_wmem_alloc, 1);
1629 		atomic_set(&newsk->sk_omem_alloc, 0);
1630 		sk_init_common(newsk);
1631 
1632 		newsk->sk_dst_cache	= NULL;
1633 		newsk->sk_dst_pending_confirm = 0;
1634 		newsk->sk_wmem_queued	= 0;
1635 		newsk->sk_forward_alloc = 0;
1636 		atomic_set(&newsk->sk_drops, 0);
1637 		newsk->sk_send_head	= NULL;
1638 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1639 
1640 		sock_reset_flag(newsk, SOCK_DONE);
1641 
1642 		filter = rcu_dereference_protected(newsk->sk_filter, 1);
1643 		if (filter != NULL)
1644 			/* though it's an empty new sock, the charging may fail
1645 			 * if sysctl_optmem_max was changed between creation of
1646 			 * original socket and cloning
1647 			 */
1648 			is_charged = sk_filter_charge(newsk, filter);
1649 
1650 		if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1651 			/* We need to make sure that we don't uncharge the new
1652 			 * socket if we couldn't charge it in the first place
1653 			 * as otherwise we uncharge the parent's filter.
1654 			 */
1655 			if (!is_charged)
1656 				RCU_INIT_POINTER(newsk->sk_filter, NULL);
1657 			sk_free_unlock_clone(newsk);
1658 			newsk = NULL;
1659 			goto out;
1660 		}
1661 		RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1662 
1663 		newsk->sk_err	   = 0;
1664 		newsk->sk_err_soft = 0;
1665 		newsk->sk_priority = 0;
1666 		newsk->sk_incoming_cpu = raw_smp_processor_id();
1667 		atomic64_set(&newsk->sk_cookie, 0);
1668 
1669 		mem_cgroup_sk_alloc(newsk);
1670 		cgroup_sk_alloc(&newsk->sk_cgrp_data);
1671 
1672 		/*
1673 		 * Before updating sk_refcnt, we must commit prior changes to memory
1674 		 * (Documentation/RCU/rculist_nulls.txt for details)
1675 		 */
1676 		smp_wmb();
1677 		atomic_set(&newsk->sk_refcnt, 2);
1678 
1679 		/*
1680 		 * Increment the counter in the same struct proto as the master
1681 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1682 		 * is the same as sk->sk_prot->socks, as this field was copied
1683 		 * with memcpy).
1684 		 *
1685 		 * This _changes_ the previous behaviour, where
1686 		 * tcp_create_openreq_child always was incrementing the
1687 		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1688 		 * to be taken into account in all callers. -acme
1689 		 */
1690 		sk_refcnt_debug_inc(newsk);
1691 		sk_set_socket(newsk, NULL);
1692 		newsk->sk_wq = NULL;
1693 
1694 		if (newsk->sk_prot->sockets_allocated)
1695 			sk_sockets_allocated_inc(newsk);
1696 
1697 		if (sock_needs_netstamp(sk) &&
1698 		    newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1699 			net_enable_timestamp();
1700 	}
1701 out:
1702 	return newsk;
1703 }
1704 EXPORT_SYMBOL_GPL(sk_clone_lock);
1705 
1706 void sk_free_unlock_clone(struct sock *sk)
1707 {
1708 	/* It is still raw copy of parent, so invalidate
1709 	 * destructor and make plain sk_free() */
1710 	sk->sk_destruct = NULL;
1711 	bh_unlock_sock(sk);
1712 	sk_free(sk);
1713 }
1714 EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
1715 
1716 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1717 {
1718 	u32 max_segs = 1;
1719 
1720 	sk_dst_set(sk, dst);
1721 	sk->sk_route_caps = dst->dev->features;
1722 	if (sk->sk_route_caps & NETIF_F_GSO)
1723 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1724 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
1725 	if (sk_can_gso(sk)) {
1726 		if (dst->header_len) {
1727 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1728 		} else {
1729 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1730 			sk->sk_gso_max_size = dst->dev->gso_max_size;
1731 			max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
1732 		}
1733 	}
1734 	sk->sk_gso_max_segs = max_segs;
1735 }
1736 EXPORT_SYMBOL_GPL(sk_setup_caps);
1737 
1738 /*
1739  *	Simple resource managers for sockets.
1740  */
1741 
1742 
1743 /*
1744  * Write buffer destructor automatically called from kfree_skb.
1745  */
1746 void sock_wfree(struct sk_buff *skb)
1747 {
1748 	struct sock *sk = skb->sk;
1749 	unsigned int len = skb->truesize;
1750 
1751 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1752 		/*
1753 		 * Keep a reference on sk_wmem_alloc, this will be released
1754 		 * after sk_write_space() call
1755 		 */
1756 		atomic_sub(len - 1, &sk->sk_wmem_alloc);
1757 		sk->sk_write_space(sk);
1758 		len = 1;
1759 	}
1760 	/*
1761 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1762 	 * could not do because of in-flight packets
1763 	 */
1764 	if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1765 		__sk_free(sk);
1766 }
1767 EXPORT_SYMBOL(sock_wfree);
1768 
1769 /* This variant of sock_wfree() is used by TCP,
1770  * since it sets SOCK_USE_WRITE_QUEUE.
1771  */
1772 void __sock_wfree(struct sk_buff *skb)
1773 {
1774 	struct sock *sk = skb->sk;
1775 
1776 	if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
1777 		__sk_free(sk);
1778 }
1779 
1780 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1781 {
1782 	skb_orphan(skb);
1783 	skb->sk = sk;
1784 #ifdef CONFIG_INET
1785 	if (unlikely(!sk_fullsock(sk))) {
1786 		skb->destructor = sock_edemux;
1787 		sock_hold(sk);
1788 		return;
1789 	}
1790 #endif
1791 	skb->destructor = sock_wfree;
1792 	skb_set_hash_from_sk(skb, sk);
1793 	/*
1794 	 * We used to take a refcount on sk, but following operation
1795 	 * is enough to guarantee sk_free() wont free this sock until
1796 	 * all in-flight packets are completed
1797 	 */
1798 	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
1799 }
1800 EXPORT_SYMBOL(skb_set_owner_w);
1801 
1802 /* This helper is used by netem, as it can hold packets in its
1803  * delay queue. We want to allow the owner socket to send more
1804  * packets, as if they were already TX completed by a typical driver.
1805  * But we also want to keep skb->sk set because some packet schedulers
1806  * rely on it (sch_fq for example). So we set skb->truesize to a small
1807  * amount (1) and decrease sk_wmem_alloc accordingly.
1808  */
1809 void skb_orphan_partial(struct sk_buff *skb)
1810 {
1811 	/* If this skb is a TCP pure ACK or already went here,
1812 	 * we have nothing to do. 2 is already a very small truesize.
1813 	 */
1814 	if (skb->truesize <= 2)
1815 		return;
1816 
1817 	/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
1818 	 * so we do not completely orphan skb, but transfert all
1819 	 * accounted bytes but one, to avoid unexpected reorders.
1820 	 */
1821 	if (skb->destructor == sock_wfree
1822 #ifdef CONFIG_INET
1823 	    || skb->destructor == tcp_wfree
1824 #endif
1825 		) {
1826 		atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
1827 		skb->truesize = 1;
1828 	} else {
1829 		skb_orphan(skb);
1830 	}
1831 }
1832 EXPORT_SYMBOL(skb_orphan_partial);
1833 
1834 /*
1835  * Read buffer destructor automatically called from kfree_skb.
1836  */
1837 void sock_rfree(struct sk_buff *skb)
1838 {
1839 	struct sock *sk = skb->sk;
1840 	unsigned int len = skb->truesize;
1841 
1842 	atomic_sub(len, &sk->sk_rmem_alloc);
1843 	sk_mem_uncharge(sk, len);
1844 }
1845 EXPORT_SYMBOL(sock_rfree);
1846 
1847 /*
1848  * Buffer destructor for skbs that are not used directly in read or write
1849  * path, e.g. for error handler skbs. Automatically called from kfree_skb.
1850  */
1851 void sock_efree(struct sk_buff *skb)
1852 {
1853 	sock_put(skb->sk);
1854 }
1855 EXPORT_SYMBOL(sock_efree);
1856 
1857 kuid_t sock_i_uid(struct sock *sk)
1858 {
1859 	kuid_t uid;
1860 
1861 	read_lock_bh(&sk->sk_callback_lock);
1862 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1863 	read_unlock_bh(&sk->sk_callback_lock);
1864 	return uid;
1865 }
1866 EXPORT_SYMBOL(sock_i_uid);
1867 
1868 unsigned long sock_i_ino(struct sock *sk)
1869 {
1870 	unsigned long ino;
1871 
1872 	read_lock_bh(&sk->sk_callback_lock);
1873 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1874 	read_unlock_bh(&sk->sk_callback_lock);
1875 	return ino;
1876 }
1877 EXPORT_SYMBOL(sock_i_ino);
1878 
1879 /*
1880  * Allocate a skb from the socket's send buffer.
1881  */
1882 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1883 			     gfp_t priority)
1884 {
1885 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1886 		struct sk_buff *skb = alloc_skb(size, priority);
1887 		if (skb) {
1888 			skb_set_owner_w(skb, sk);
1889 			return skb;
1890 		}
1891 	}
1892 	return NULL;
1893 }
1894 EXPORT_SYMBOL(sock_wmalloc);
1895 
1896 /*
1897  * Allocate a memory block from the socket's option memory buffer.
1898  */
1899 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1900 {
1901 	if ((unsigned int)size <= sysctl_optmem_max &&
1902 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1903 		void *mem;
1904 		/* First do the add, to avoid the race if kmalloc
1905 		 * might sleep.
1906 		 */
1907 		atomic_add(size, &sk->sk_omem_alloc);
1908 		mem = kmalloc(size, priority);
1909 		if (mem)
1910 			return mem;
1911 		atomic_sub(size, &sk->sk_omem_alloc);
1912 	}
1913 	return NULL;
1914 }
1915 EXPORT_SYMBOL(sock_kmalloc);
1916 
1917 /* Free an option memory block. Note, we actually want the inline
1918  * here as this allows gcc to detect the nullify and fold away the
1919  * condition entirely.
1920  */
1921 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
1922 				  const bool nullify)
1923 {
1924 	if (WARN_ON_ONCE(!mem))
1925 		return;
1926 	if (nullify)
1927 		kzfree(mem);
1928 	else
1929 		kfree(mem);
1930 	atomic_sub(size, &sk->sk_omem_alloc);
1931 }
1932 
1933 void sock_kfree_s(struct sock *sk, void *mem, int size)
1934 {
1935 	__sock_kfree_s(sk, mem, size, false);
1936 }
1937 EXPORT_SYMBOL(sock_kfree_s);
1938 
1939 void sock_kzfree_s(struct sock *sk, void *mem, int size)
1940 {
1941 	__sock_kfree_s(sk, mem, size, true);
1942 }
1943 EXPORT_SYMBOL(sock_kzfree_s);
1944 
1945 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1946    I think, these locks should be removed for datagram sockets.
1947  */
1948 static long sock_wait_for_wmem(struct sock *sk, long timeo)
1949 {
1950 	DEFINE_WAIT(wait);
1951 
1952 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1953 	for (;;) {
1954 		if (!timeo)
1955 			break;
1956 		if (signal_pending(current))
1957 			break;
1958 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1959 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1960 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1961 			break;
1962 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1963 			break;
1964 		if (sk->sk_err)
1965 			break;
1966 		timeo = schedule_timeout(timeo);
1967 	}
1968 	finish_wait(sk_sleep(sk), &wait);
1969 	return timeo;
1970 }
1971 
1972 
1973 /*
1974  *	Generic send/receive buffer handlers
1975  */
1976 
1977 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1978 				     unsigned long data_len, int noblock,
1979 				     int *errcode, int max_page_order)
1980 {
1981 	struct sk_buff *skb;
1982 	long timeo;
1983 	int err;
1984 
1985 	timeo = sock_sndtimeo(sk, noblock);
1986 	for (;;) {
1987 		err = sock_error(sk);
1988 		if (err != 0)
1989 			goto failure;
1990 
1991 		err = -EPIPE;
1992 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1993 			goto failure;
1994 
1995 		if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
1996 			break;
1997 
1998 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1999 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2000 		err = -EAGAIN;
2001 		if (!timeo)
2002 			goto failure;
2003 		if (signal_pending(current))
2004 			goto interrupted;
2005 		timeo = sock_wait_for_wmem(sk, timeo);
2006 	}
2007 	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
2008 				   errcode, sk->sk_allocation);
2009 	if (skb)
2010 		skb_set_owner_w(skb, sk);
2011 	return skb;
2012 
2013 interrupted:
2014 	err = sock_intr_errno(timeo);
2015 failure:
2016 	*errcode = err;
2017 	return NULL;
2018 }
2019 EXPORT_SYMBOL(sock_alloc_send_pskb);
2020 
2021 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
2022 				    int noblock, int *errcode)
2023 {
2024 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
2025 }
2026 EXPORT_SYMBOL(sock_alloc_send_skb);
2027 
2028 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
2029 		     struct sockcm_cookie *sockc)
2030 {
2031 	u32 tsflags;
2032 
2033 	switch (cmsg->cmsg_type) {
2034 	case SO_MARK:
2035 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2036 			return -EPERM;
2037 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2038 			return -EINVAL;
2039 		sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2040 		break;
2041 	case SO_TIMESTAMPING:
2042 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2043 			return -EINVAL;
2044 
2045 		tsflags = *(u32 *)CMSG_DATA(cmsg);
2046 		if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2047 			return -EINVAL;
2048 
2049 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2050 		sockc->tsflags |= tsflags;
2051 		break;
2052 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2053 	case SCM_RIGHTS:
2054 	case SCM_CREDENTIALS:
2055 		break;
2056 	default:
2057 		return -EINVAL;
2058 	}
2059 	return 0;
2060 }
2061 EXPORT_SYMBOL(__sock_cmsg_send);
2062 
2063 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2064 		   struct sockcm_cookie *sockc)
2065 {
2066 	struct cmsghdr *cmsg;
2067 	int ret;
2068 
2069 	for_each_cmsghdr(cmsg, msg) {
2070 		if (!CMSG_OK(msg, cmsg))
2071 			return -EINVAL;
2072 		if (cmsg->cmsg_level != SOL_SOCKET)
2073 			continue;
2074 		ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
2075 		if (ret)
2076 			return ret;
2077 	}
2078 	return 0;
2079 }
2080 EXPORT_SYMBOL(sock_cmsg_send);
2081 
2082 /* On 32bit arches, an skb frag is limited to 2^15 */
2083 #define SKB_FRAG_PAGE_ORDER	get_order(32768)
2084 
2085 /**
2086  * skb_page_frag_refill - check that a page_frag contains enough room
2087  * @sz: minimum size of the fragment we want to get
2088  * @pfrag: pointer to page_frag
2089  * @gfp: priority for memory allocation
2090  *
2091  * Note: While this allocator tries to use high order pages, there is
2092  * no guarantee that allocations succeed. Therefore, @sz MUST be
2093  * less or equal than PAGE_SIZE.
2094  */
2095 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2096 {
2097 	if (pfrag->page) {
2098 		if (page_ref_count(pfrag->page) == 1) {
2099 			pfrag->offset = 0;
2100 			return true;
2101 		}
2102 		if (pfrag->offset + sz <= pfrag->size)
2103 			return true;
2104 		put_page(pfrag->page);
2105 	}
2106 
2107 	pfrag->offset = 0;
2108 	if (SKB_FRAG_PAGE_ORDER) {
2109 		/* Avoid direct reclaim but allow kswapd to wake */
2110 		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2111 					  __GFP_COMP | __GFP_NOWARN |
2112 					  __GFP_NORETRY,
2113 					  SKB_FRAG_PAGE_ORDER);
2114 		if (likely(pfrag->page)) {
2115 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2116 			return true;
2117 		}
2118 	}
2119 	pfrag->page = alloc_page(gfp);
2120 	if (likely(pfrag->page)) {
2121 		pfrag->size = PAGE_SIZE;
2122 		return true;
2123 	}
2124 	return false;
2125 }
2126 EXPORT_SYMBOL(skb_page_frag_refill);
2127 
2128 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2129 {
2130 	if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2131 		return true;
2132 
2133 	sk_enter_memory_pressure(sk);
2134 	sk_stream_moderate_sndbuf(sk);
2135 	return false;
2136 }
2137 EXPORT_SYMBOL(sk_page_frag_refill);
2138 
2139 static void __lock_sock(struct sock *sk)
2140 	__releases(&sk->sk_lock.slock)
2141 	__acquires(&sk->sk_lock.slock)
2142 {
2143 	DEFINE_WAIT(wait);
2144 
2145 	for (;;) {
2146 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2147 					TASK_UNINTERRUPTIBLE);
2148 		spin_unlock_bh(&sk->sk_lock.slock);
2149 		schedule();
2150 		spin_lock_bh(&sk->sk_lock.slock);
2151 		if (!sock_owned_by_user(sk))
2152 			break;
2153 	}
2154 	finish_wait(&sk->sk_lock.wq, &wait);
2155 }
2156 
2157 static void __release_sock(struct sock *sk)
2158 	__releases(&sk->sk_lock.slock)
2159 	__acquires(&sk->sk_lock.slock)
2160 {
2161 	struct sk_buff *skb, *next;
2162 
2163 	while ((skb = sk->sk_backlog.head) != NULL) {
2164 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2165 
2166 		spin_unlock_bh(&sk->sk_lock.slock);
2167 
2168 		do {
2169 			next = skb->next;
2170 			prefetch(next);
2171 			WARN_ON_ONCE(skb_dst_is_noref(skb));
2172 			skb->next = NULL;
2173 			sk_backlog_rcv(sk, skb);
2174 
2175 			cond_resched();
2176 
2177 			skb = next;
2178 		} while (skb != NULL);
2179 
2180 		spin_lock_bh(&sk->sk_lock.slock);
2181 	}
2182 
2183 	/*
2184 	 * Doing the zeroing here guarantee we can not loop forever
2185 	 * while a wild producer attempts to flood us.
2186 	 */
2187 	sk->sk_backlog.len = 0;
2188 }
2189 
2190 void __sk_flush_backlog(struct sock *sk)
2191 {
2192 	spin_lock_bh(&sk->sk_lock.slock);
2193 	__release_sock(sk);
2194 	spin_unlock_bh(&sk->sk_lock.slock);
2195 }
2196 
2197 /**
2198  * sk_wait_data - wait for data to arrive at sk_receive_queue
2199  * @sk:    sock to wait on
2200  * @timeo: for how long
2201  * @skb:   last skb seen on sk_receive_queue
2202  *
2203  * Now socket state including sk->sk_err is changed only under lock,
2204  * hence we may omit checks after joining wait queue.
2205  * We check receive queue before schedule() only as optimization;
2206  * it is very likely that release_sock() added new data.
2207  */
2208 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2209 {
2210 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
2211 	int rc;
2212 
2213 	add_wait_queue(sk_sleep(sk), &wait);
2214 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2215 	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
2216 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2217 	remove_wait_queue(sk_sleep(sk), &wait);
2218 	return rc;
2219 }
2220 EXPORT_SYMBOL(sk_wait_data);
2221 
2222 /**
2223  *	__sk_mem_raise_allocated - increase memory_allocated
2224  *	@sk: socket
2225  *	@size: memory size to allocate
2226  *	@amt: pages to allocate
2227  *	@kind: allocation type
2228  *
2229  *	Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
2230  */
2231 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2232 {
2233 	struct proto *prot = sk->sk_prot;
2234 	long allocated = sk_memory_allocated_add(sk, amt);
2235 
2236 	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2237 	    !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
2238 		goto suppress_allocation;
2239 
2240 	/* Under limit. */
2241 	if (allocated <= sk_prot_mem_limits(sk, 0)) {
2242 		sk_leave_memory_pressure(sk);
2243 		return 1;
2244 	}
2245 
2246 	/* Under pressure. */
2247 	if (allocated > sk_prot_mem_limits(sk, 1))
2248 		sk_enter_memory_pressure(sk);
2249 
2250 	/* Over hard limit. */
2251 	if (allocated > sk_prot_mem_limits(sk, 2))
2252 		goto suppress_allocation;
2253 
2254 	/* guarantee minimum buffer size under pressure */
2255 	if (kind == SK_MEM_RECV) {
2256 		if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2257 			return 1;
2258 
2259 	} else { /* SK_MEM_SEND */
2260 		if (sk->sk_type == SOCK_STREAM) {
2261 			if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2262 				return 1;
2263 		} else if (atomic_read(&sk->sk_wmem_alloc) <
2264 			   prot->sysctl_wmem[0])
2265 				return 1;
2266 	}
2267 
2268 	if (sk_has_memory_pressure(sk)) {
2269 		int alloc;
2270 
2271 		if (!sk_under_memory_pressure(sk))
2272 			return 1;
2273 		alloc = sk_sockets_allocated_read_positive(sk);
2274 		if (sk_prot_mem_limits(sk, 2) > alloc *
2275 		    sk_mem_pages(sk->sk_wmem_queued +
2276 				 atomic_read(&sk->sk_rmem_alloc) +
2277 				 sk->sk_forward_alloc))
2278 			return 1;
2279 	}
2280 
2281 suppress_allocation:
2282 
2283 	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2284 		sk_stream_moderate_sndbuf(sk);
2285 
2286 		/* Fail only if socket is _under_ its sndbuf.
2287 		 * In this case we cannot block, so that we have to fail.
2288 		 */
2289 		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2290 			return 1;
2291 	}
2292 
2293 	trace_sock_exceed_buf_limit(sk, prot, allocated);
2294 
2295 	sk_memory_allocated_sub(sk, amt);
2296 
2297 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2298 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
2299 
2300 	return 0;
2301 }
2302 EXPORT_SYMBOL(__sk_mem_raise_allocated);
2303 
2304 /**
2305  *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2306  *	@sk: socket
2307  *	@size: memory size to allocate
2308  *	@kind: allocation type
2309  *
2310  *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2311  *	rmem allocation. This function assumes that protocols which have
2312  *	memory_pressure use sk_wmem_queued as write buffer accounting.
2313  */
2314 int __sk_mem_schedule(struct sock *sk, int size, int kind)
2315 {
2316 	int ret, amt = sk_mem_pages(size);
2317 
2318 	sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2319 	ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2320 	if (!ret)
2321 		sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2322 	return ret;
2323 }
2324 EXPORT_SYMBOL(__sk_mem_schedule);
2325 
2326 /**
2327  *	__sk_mem_reduce_allocated - reclaim memory_allocated
2328  *	@sk: socket
2329  *	@amount: number of quanta
2330  *
2331  *	Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
2332  */
2333 void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2334 {
2335 	sk_memory_allocated_sub(sk, amount);
2336 
2337 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2338 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
2339 
2340 	if (sk_under_memory_pressure(sk) &&
2341 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2342 		sk_leave_memory_pressure(sk);
2343 }
2344 EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2345 
2346 /**
2347  *	__sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
2348  *	@sk: socket
2349  *	@amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2350  */
2351 void __sk_mem_reclaim(struct sock *sk, int amount)
2352 {
2353 	amount >>= SK_MEM_QUANTUM_SHIFT;
2354 	sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2355 	__sk_mem_reduce_allocated(sk, amount);
2356 }
2357 EXPORT_SYMBOL(__sk_mem_reclaim);
2358 
2359 int sk_set_peek_off(struct sock *sk, int val)
2360 {
2361 	if (val < 0)
2362 		return -EINVAL;
2363 
2364 	sk->sk_peek_off = val;
2365 	return 0;
2366 }
2367 EXPORT_SYMBOL_GPL(sk_set_peek_off);
2368 
2369 /*
2370  * Set of default routines for initialising struct proto_ops when
2371  * the protocol does not support a particular function. In certain
2372  * cases where it makes no sense for a protocol to have a "do nothing"
2373  * function, some default processing is provided.
2374  */
2375 
2376 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2377 {
2378 	return -EOPNOTSUPP;
2379 }
2380 EXPORT_SYMBOL(sock_no_bind);
2381 
2382 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2383 		    int len, int flags)
2384 {
2385 	return -EOPNOTSUPP;
2386 }
2387 EXPORT_SYMBOL(sock_no_connect);
2388 
2389 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2390 {
2391 	return -EOPNOTSUPP;
2392 }
2393 EXPORT_SYMBOL(sock_no_socketpair);
2394 
2395 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
2396 		   bool kern)
2397 {
2398 	return -EOPNOTSUPP;
2399 }
2400 EXPORT_SYMBOL(sock_no_accept);
2401 
2402 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2403 		    int *len, int peer)
2404 {
2405 	return -EOPNOTSUPP;
2406 }
2407 EXPORT_SYMBOL(sock_no_getname);
2408 
2409 unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2410 {
2411 	return 0;
2412 }
2413 EXPORT_SYMBOL(sock_no_poll);
2414 
2415 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2416 {
2417 	return -EOPNOTSUPP;
2418 }
2419 EXPORT_SYMBOL(sock_no_ioctl);
2420 
2421 int sock_no_listen(struct socket *sock, int backlog)
2422 {
2423 	return -EOPNOTSUPP;
2424 }
2425 EXPORT_SYMBOL(sock_no_listen);
2426 
2427 int sock_no_shutdown(struct socket *sock, int how)
2428 {
2429 	return -EOPNOTSUPP;
2430 }
2431 EXPORT_SYMBOL(sock_no_shutdown);
2432 
2433 int sock_no_setsockopt(struct socket *sock, int level, int optname,
2434 		    char __user *optval, unsigned int optlen)
2435 {
2436 	return -EOPNOTSUPP;
2437 }
2438 EXPORT_SYMBOL(sock_no_setsockopt);
2439 
2440 int sock_no_getsockopt(struct socket *sock, int level, int optname,
2441 		    char __user *optval, int __user *optlen)
2442 {
2443 	return -EOPNOTSUPP;
2444 }
2445 EXPORT_SYMBOL(sock_no_getsockopt);
2446 
2447 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2448 {
2449 	return -EOPNOTSUPP;
2450 }
2451 EXPORT_SYMBOL(sock_no_sendmsg);
2452 
2453 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2454 		    int flags)
2455 {
2456 	return -EOPNOTSUPP;
2457 }
2458 EXPORT_SYMBOL(sock_no_recvmsg);
2459 
2460 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2461 {
2462 	/* Mirror missing mmap method error code */
2463 	return -ENODEV;
2464 }
2465 EXPORT_SYMBOL(sock_no_mmap);
2466 
2467 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2468 {
2469 	ssize_t res;
2470 	struct msghdr msg = {.msg_flags = flags};
2471 	struct kvec iov;
2472 	char *kaddr = kmap(page);
2473 	iov.iov_base = kaddr + offset;
2474 	iov.iov_len = size;
2475 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2476 	kunmap(page);
2477 	return res;
2478 }
2479 EXPORT_SYMBOL(sock_no_sendpage);
2480 
2481 /*
2482  *	Default Socket Callbacks
2483  */
2484 
2485 static void sock_def_wakeup(struct sock *sk)
2486 {
2487 	struct socket_wq *wq;
2488 
2489 	rcu_read_lock();
2490 	wq = rcu_dereference(sk->sk_wq);
2491 	if (skwq_has_sleeper(wq))
2492 		wake_up_interruptible_all(&wq->wait);
2493 	rcu_read_unlock();
2494 }
2495 
2496 static void sock_def_error_report(struct sock *sk)
2497 {
2498 	struct socket_wq *wq;
2499 
2500 	rcu_read_lock();
2501 	wq = rcu_dereference(sk->sk_wq);
2502 	if (skwq_has_sleeper(wq))
2503 		wake_up_interruptible_poll(&wq->wait, POLLERR);
2504 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2505 	rcu_read_unlock();
2506 }
2507 
2508 static void sock_def_readable(struct sock *sk)
2509 {
2510 	struct socket_wq *wq;
2511 
2512 	rcu_read_lock();
2513 	wq = rcu_dereference(sk->sk_wq);
2514 	if (skwq_has_sleeper(wq))
2515 		wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2516 						POLLRDNORM | POLLRDBAND);
2517 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2518 	rcu_read_unlock();
2519 }
2520 
2521 static void sock_def_write_space(struct sock *sk)
2522 {
2523 	struct socket_wq *wq;
2524 
2525 	rcu_read_lock();
2526 
2527 	/* Do not wake up a writer until he can make "significant"
2528 	 * progress.  --DaveM
2529 	 */
2530 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2531 		wq = rcu_dereference(sk->sk_wq);
2532 		if (skwq_has_sleeper(wq))
2533 			wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2534 						POLLWRNORM | POLLWRBAND);
2535 
2536 		/* Should agree with poll, otherwise some programs break */
2537 		if (sock_writeable(sk))
2538 			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2539 	}
2540 
2541 	rcu_read_unlock();
2542 }
2543 
2544 static void sock_def_destruct(struct sock *sk)
2545 {
2546 }
2547 
2548 void sk_send_sigurg(struct sock *sk)
2549 {
2550 	if (sk->sk_socket && sk->sk_socket->file)
2551 		if (send_sigurg(&sk->sk_socket->file->f_owner))
2552 			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2553 }
2554 EXPORT_SYMBOL(sk_send_sigurg);
2555 
2556 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2557 		    unsigned long expires)
2558 {
2559 	if (!mod_timer(timer, expires))
2560 		sock_hold(sk);
2561 }
2562 EXPORT_SYMBOL(sk_reset_timer);
2563 
2564 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2565 {
2566 	if (del_timer(timer))
2567 		__sock_put(sk);
2568 }
2569 EXPORT_SYMBOL(sk_stop_timer);
2570 
2571 void sock_init_data(struct socket *sock, struct sock *sk)
2572 {
2573 	sk_init_common(sk);
2574 	sk->sk_send_head	=	NULL;
2575 
2576 	init_timer(&sk->sk_timer);
2577 
2578 	sk->sk_allocation	=	GFP_KERNEL;
2579 	sk->sk_rcvbuf		=	sysctl_rmem_default;
2580 	sk->sk_sndbuf		=	sysctl_wmem_default;
2581 	sk->sk_state		=	TCP_CLOSE;
2582 	sk_set_socket(sk, sock);
2583 
2584 	sock_set_flag(sk, SOCK_ZAPPED);
2585 
2586 	if (sock) {
2587 		sk->sk_type	=	sock->type;
2588 		sk->sk_wq	=	sock->wq;
2589 		sock->sk	=	sk;
2590 		sk->sk_uid	=	SOCK_INODE(sock)->i_uid;
2591 	} else {
2592 		sk->sk_wq	=	NULL;
2593 		sk->sk_uid	=	make_kuid(sock_net(sk)->user_ns, 0);
2594 	}
2595 
2596 	rwlock_init(&sk->sk_callback_lock);
2597 	if (sk->sk_kern_sock)
2598 		lockdep_set_class_and_name(
2599 			&sk->sk_callback_lock,
2600 			af_kern_callback_keys + sk->sk_family,
2601 			af_family_kern_clock_key_strings[sk->sk_family]);
2602 	else
2603 		lockdep_set_class_and_name(
2604 			&sk->sk_callback_lock,
2605 			af_callback_keys + sk->sk_family,
2606 			af_family_clock_key_strings[sk->sk_family]);
2607 
2608 	sk->sk_state_change	=	sock_def_wakeup;
2609 	sk->sk_data_ready	=	sock_def_readable;
2610 	sk->sk_write_space	=	sock_def_write_space;
2611 	sk->sk_error_report	=	sock_def_error_report;
2612 	sk->sk_destruct		=	sock_def_destruct;
2613 
2614 	sk->sk_frag.page	=	NULL;
2615 	sk->sk_frag.offset	=	0;
2616 	sk->sk_peek_off		=	-1;
2617 
2618 	sk->sk_peer_pid 	=	NULL;
2619 	sk->sk_peer_cred	=	NULL;
2620 	sk->sk_write_pending	=	0;
2621 	sk->sk_rcvlowat		=	1;
2622 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
2623 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
2624 
2625 	sk->sk_stamp = SK_DEFAULT_STAMP;
2626 
2627 #ifdef CONFIG_NET_RX_BUSY_POLL
2628 	sk->sk_napi_id		=	0;
2629 	sk->sk_ll_usec		=	sysctl_net_busy_read;
2630 #endif
2631 
2632 	sk->sk_max_pacing_rate = ~0U;
2633 	sk->sk_pacing_rate = ~0U;
2634 	sk->sk_incoming_cpu = -1;
2635 	/*
2636 	 * Before updating sk_refcnt, we must commit prior changes to memory
2637 	 * (Documentation/RCU/rculist_nulls.txt for details)
2638 	 */
2639 	smp_wmb();
2640 	atomic_set(&sk->sk_refcnt, 1);
2641 	atomic_set(&sk->sk_drops, 0);
2642 }
2643 EXPORT_SYMBOL(sock_init_data);
2644 
2645 void lock_sock_nested(struct sock *sk, int subclass)
2646 {
2647 	might_sleep();
2648 	spin_lock_bh(&sk->sk_lock.slock);
2649 	if (sk->sk_lock.owned)
2650 		__lock_sock(sk);
2651 	sk->sk_lock.owned = 1;
2652 	spin_unlock(&sk->sk_lock.slock);
2653 	/*
2654 	 * The sk_lock has mutex_lock() semantics here:
2655 	 */
2656 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2657 	local_bh_enable();
2658 }
2659 EXPORT_SYMBOL(lock_sock_nested);
2660 
2661 void release_sock(struct sock *sk)
2662 {
2663 	spin_lock_bh(&sk->sk_lock.slock);
2664 	if (sk->sk_backlog.tail)
2665 		__release_sock(sk);
2666 
2667 	/* Warning : release_cb() might need to release sk ownership,
2668 	 * ie call sock_release_ownership(sk) before us.
2669 	 */
2670 	if (sk->sk_prot->release_cb)
2671 		sk->sk_prot->release_cb(sk);
2672 
2673 	sock_release_ownership(sk);
2674 	if (waitqueue_active(&sk->sk_lock.wq))
2675 		wake_up(&sk->sk_lock.wq);
2676 	spin_unlock_bh(&sk->sk_lock.slock);
2677 }
2678 EXPORT_SYMBOL(release_sock);
2679 
2680 /**
2681  * lock_sock_fast - fast version of lock_sock
2682  * @sk: socket
2683  *
2684  * This version should be used for very small section, where process wont block
2685  * return false if fast path is taken
2686  *   sk_lock.slock locked, owned = 0, BH disabled
2687  * return true if slow path is taken
2688  *   sk_lock.slock unlocked, owned = 1, BH enabled
2689  */
2690 bool lock_sock_fast(struct sock *sk)
2691 {
2692 	might_sleep();
2693 	spin_lock_bh(&sk->sk_lock.slock);
2694 
2695 	if (!sk->sk_lock.owned)
2696 		/*
2697 		 * Note : We must disable BH
2698 		 */
2699 		return false;
2700 
2701 	__lock_sock(sk);
2702 	sk->sk_lock.owned = 1;
2703 	spin_unlock(&sk->sk_lock.slock);
2704 	/*
2705 	 * The sk_lock has mutex_lock() semantics here:
2706 	 */
2707 	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2708 	local_bh_enable();
2709 	return true;
2710 }
2711 EXPORT_SYMBOL(lock_sock_fast);
2712 
2713 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2714 {
2715 	struct timeval tv;
2716 	if (!sock_flag(sk, SOCK_TIMESTAMP))
2717 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2718 	tv = ktime_to_timeval(sk->sk_stamp);
2719 	if (tv.tv_sec == -1)
2720 		return -ENOENT;
2721 	if (tv.tv_sec == 0) {
2722 		sk->sk_stamp = ktime_get_real();
2723 		tv = ktime_to_timeval(sk->sk_stamp);
2724 	}
2725 	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2726 }
2727 EXPORT_SYMBOL(sock_get_timestamp);
2728 
2729 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2730 {
2731 	struct timespec ts;
2732 	if (!sock_flag(sk, SOCK_TIMESTAMP))
2733 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2734 	ts = ktime_to_timespec(sk->sk_stamp);
2735 	if (ts.tv_sec == -1)
2736 		return -ENOENT;
2737 	if (ts.tv_sec == 0) {
2738 		sk->sk_stamp = ktime_get_real();
2739 		ts = ktime_to_timespec(sk->sk_stamp);
2740 	}
2741 	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2742 }
2743 EXPORT_SYMBOL(sock_get_timestampns);
2744 
2745 void sock_enable_timestamp(struct sock *sk, int flag)
2746 {
2747 	if (!sock_flag(sk, flag)) {
2748 		unsigned long previous_flags = sk->sk_flags;
2749 
2750 		sock_set_flag(sk, flag);
2751 		/*
2752 		 * we just set one of the two flags which require net
2753 		 * time stamping, but time stamping might have been on
2754 		 * already because of the other one
2755 		 */
2756 		if (sock_needs_netstamp(sk) &&
2757 		    !(previous_flags & SK_FLAGS_TIMESTAMP))
2758 			net_enable_timestamp();
2759 	}
2760 }
2761 
2762 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2763 		       int level, int type)
2764 {
2765 	struct sock_exterr_skb *serr;
2766 	struct sk_buff *skb;
2767 	int copied, err;
2768 
2769 	err = -EAGAIN;
2770 	skb = sock_dequeue_err_skb(sk);
2771 	if (skb == NULL)
2772 		goto out;
2773 
2774 	copied = skb->len;
2775 	if (copied > len) {
2776 		msg->msg_flags |= MSG_TRUNC;
2777 		copied = len;
2778 	}
2779 	err = skb_copy_datagram_msg(skb, 0, msg, copied);
2780 	if (err)
2781 		goto out_free_skb;
2782 
2783 	sock_recv_timestamp(msg, sk, skb);
2784 
2785 	serr = SKB_EXT_ERR(skb);
2786 	put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2787 
2788 	msg->msg_flags |= MSG_ERRQUEUE;
2789 	err = copied;
2790 
2791 out_free_skb:
2792 	kfree_skb(skb);
2793 out:
2794 	return err;
2795 }
2796 EXPORT_SYMBOL(sock_recv_errqueue);
2797 
2798 /*
2799  *	Get a socket option on an socket.
2800  *
2801  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
2802  *	asynchronous errors should be reported by getsockopt. We assume
2803  *	this means if you specify SO_ERROR (otherwise whats the point of it).
2804  */
2805 int sock_common_getsockopt(struct socket *sock, int level, int optname,
2806 			   char __user *optval, int __user *optlen)
2807 {
2808 	struct sock *sk = sock->sk;
2809 
2810 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2811 }
2812 EXPORT_SYMBOL(sock_common_getsockopt);
2813 
2814 #ifdef CONFIG_COMPAT
2815 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2816 				  char __user *optval, int __user *optlen)
2817 {
2818 	struct sock *sk = sock->sk;
2819 
2820 	if (sk->sk_prot->compat_getsockopt != NULL)
2821 		return sk->sk_prot->compat_getsockopt(sk, level, optname,
2822 						      optval, optlen);
2823 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2824 }
2825 EXPORT_SYMBOL(compat_sock_common_getsockopt);
2826 #endif
2827 
2828 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2829 			int flags)
2830 {
2831 	struct sock *sk = sock->sk;
2832 	int addr_len = 0;
2833 	int err;
2834 
2835 	err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2836 				   flags & ~MSG_DONTWAIT, &addr_len);
2837 	if (err >= 0)
2838 		msg->msg_namelen = addr_len;
2839 	return err;
2840 }
2841 EXPORT_SYMBOL(sock_common_recvmsg);
2842 
2843 /*
2844  *	Set socket options on an inet socket.
2845  */
2846 int sock_common_setsockopt(struct socket *sock, int level, int optname,
2847 			   char __user *optval, unsigned int optlen)
2848 {
2849 	struct sock *sk = sock->sk;
2850 
2851 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2852 }
2853 EXPORT_SYMBOL(sock_common_setsockopt);
2854 
2855 #ifdef CONFIG_COMPAT
2856 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2857 				  char __user *optval, unsigned int optlen)
2858 {
2859 	struct sock *sk = sock->sk;
2860 
2861 	if (sk->sk_prot->compat_setsockopt != NULL)
2862 		return sk->sk_prot->compat_setsockopt(sk, level, optname,
2863 						      optval, optlen);
2864 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2865 }
2866 EXPORT_SYMBOL(compat_sock_common_setsockopt);
2867 #endif
2868 
2869 void sk_common_release(struct sock *sk)
2870 {
2871 	if (sk->sk_prot->destroy)
2872 		sk->sk_prot->destroy(sk);
2873 
2874 	/*
2875 	 * Observation: when sock_common_release is called, processes have
2876 	 * no access to socket. But net still has.
2877 	 * Step one, detach it from networking:
2878 	 *
2879 	 * A. Remove from hash tables.
2880 	 */
2881 
2882 	sk->sk_prot->unhash(sk);
2883 
2884 	/*
2885 	 * In this point socket cannot receive new packets, but it is possible
2886 	 * that some packets are in flight because some CPU runs receiver and
2887 	 * did hash table lookup before we unhashed socket. They will achieve
2888 	 * receive queue and will be purged by socket destructor.
2889 	 *
2890 	 * Also we still have packets pending on receive queue and probably,
2891 	 * our own packets waiting in device queues. sock_destroy will drain
2892 	 * receive queue, but transmitted packets will delay socket destruction
2893 	 * until the last reference will be released.
2894 	 */
2895 
2896 	sock_orphan(sk);
2897 
2898 	xfrm_sk_free_policy(sk);
2899 
2900 	sk_refcnt_debug_release(sk);
2901 
2902 	sock_put(sk);
2903 }
2904 EXPORT_SYMBOL(sk_common_release);
2905 
2906 void sk_get_meminfo(const struct sock *sk, u32 *mem)
2907 {
2908 	memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
2909 
2910 	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
2911 	mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
2912 	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
2913 	mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
2914 	mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
2915 	mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
2916 	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
2917 	mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
2918 	mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
2919 }
2920 
2921 #ifdef CONFIG_PROC_FS
2922 #define PROTO_INUSE_NR	64	/* should be enough for the first time */
2923 struct prot_inuse {
2924 	int val[PROTO_INUSE_NR];
2925 };
2926 
2927 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2928 
2929 #ifdef CONFIG_NET_NS
2930 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2931 {
2932 	__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2933 }
2934 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2935 
2936 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2937 {
2938 	int cpu, idx = prot->inuse_idx;
2939 	int res = 0;
2940 
2941 	for_each_possible_cpu(cpu)
2942 		res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2943 
2944 	return res >= 0 ? res : 0;
2945 }
2946 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2947 
2948 static int __net_init sock_inuse_init_net(struct net *net)
2949 {
2950 	net->core.inuse = alloc_percpu(struct prot_inuse);
2951 	return net->core.inuse ? 0 : -ENOMEM;
2952 }
2953 
2954 static void __net_exit sock_inuse_exit_net(struct net *net)
2955 {
2956 	free_percpu(net->core.inuse);
2957 }
2958 
2959 static struct pernet_operations net_inuse_ops = {
2960 	.init = sock_inuse_init_net,
2961 	.exit = sock_inuse_exit_net,
2962 };
2963 
2964 static __init int net_inuse_init(void)
2965 {
2966 	if (register_pernet_subsys(&net_inuse_ops))
2967 		panic("Cannot initialize net inuse counters");
2968 
2969 	return 0;
2970 }
2971 
2972 core_initcall(net_inuse_init);
2973 #else
2974 static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2975 
2976 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2977 {
2978 	__this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2979 }
2980 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2981 
2982 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2983 {
2984 	int cpu, idx = prot->inuse_idx;
2985 	int res = 0;
2986 
2987 	for_each_possible_cpu(cpu)
2988 		res += per_cpu(prot_inuse, cpu).val[idx];
2989 
2990 	return res >= 0 ? res : 0;
2991 }
2992 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2993 #endif
2994 
2995 static void assign_proto_idx(struct proto *prot)
2996 {
2997 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2998 
2999 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3000 		pr_err("PROTO_INUSE_NR exhausted\n");
3001 		return;
3002 	}
3003 
3004 	set_bit(prot->inuse_idx, proto_inuse_idx);
3005 }
3006 
3007 static void release_proto_idx(struct proto *prot)
3008 {
3009 	if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3010 		clear_bit(prot->inuse_idx, proto_inuse_idx);
3011 }
3012 #else
3013 static inline void assign_proto_idx(struct proto *prot)
3014 {
3015 }
3016 
3017 static inline void release_proto_idx(struct proto *prot)
3018 {
3019 }
3020 #endif
3021 
3022 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3023 {
3024 	if (!rsk_prot)
3025 		return;
3026 	kfree(rsk_prot->slab_name);
3027 	rsk_prot->slab_name = NULL;
3028 	kmem_cache_destroy(rsk_prot->slab);
3029 	rsk_prot->slab = NULL;
3030 }
3031 
3032 static int req_prot_init(const struct proto *prot)
3033 {
3034 	struct request_sock_ops *rsk_prot = prot->rsk_prot;
3035 
3036 	if (!rsk_prot)
3037 		return 0;
3038 
3039 	rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3040 					prot->name);
3041 	if (!rsk_prot->slab_name)
3042 		return -ENOMEM;
3043 
3044 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3045 					   rsk_prot->obj_size, 0,
3046 					   prot->slab_flags, NULL);
3047 
3048 	if (!rsk_prot->slab) {
3049 		pr_crit("%s: Can't create request sock SLAB cache!\n",
3050 			prot->name);
3051 		return -ENOMEM;
3052 	}
3053 	return 0;
3054 }
3055 
3056 int proto_register(struct proto *prot, int alloc_slab)
3057 {
3058 	if (alloc_slab) {
3059 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
3060 					SLAB_HWCACHE_ALIGN | prot->slab_flags,
3061 					NULL);
3062 
3063 		if (prot->slab == NULL) {
3064 			pr_crit("%s: Can't create sock SLAB cache!\n",
3065 				prot->name);
3066 			goto out;
3067 		}
3068 
3069 		if (req_prot_init(prot))
3070 			goto out_free_request_sock_slab;
3071 
3072 		if (prot->twsk_prot != NULL) {
3073 			prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
3074 
3075 			if (prot->twsk_prot->twsk_slab_name == NULL)
3076 				goto out_free_request_sock_slab;
3077 
3078 			prot->twsk_prot->twsk_slab =
3079 				kmem_cache_create(prot->twsk_prot->twsk_slab_name,
3080 						  prot->twsk_prot->twsk_obj_size,
3081 						  0,
3082 						  prot->slab_flags,
3083 						  NULL);
3084 			if (prot->twsk_prot->twsk_slab == NULL)
3085 				goto out_free_timewait_sock_slab_name;
3086 		}
3087 	}
3088 
3089 	mutex_lock(&proto_list_mutex);
3090 	list_add(&prot->node, &proto_list);
3091 	assign_proto_idx(prot);
3092 	mutex_unlock(&proto_list_mutex);
3093 	return 0;
3094 
3095 out_free_timewait_sock_slab_name:
3096 	kfree(prot->twsk_prot->twsk_slab_name);
3097 out_free_request_sock_slab:
3098 	req_prot_cleanup(prot->rsk_prot);
3099 
3100 	kmem_cache_destroy(prot->slab);
3101 	prot->slab = NULL;
3102 out:
3103 	return -ENOBUFS;
3104 }
3105 EXPORT_SYMBOL(proto_register);
3106 
3107 void proto_unregister(struct proto *prot)
3108 {
3109 	mutex_lock(&proto_list_mutex);
3110 	release_proto_idx(prot);
3111 	list_del(&prot->node);
3112 	mutex_unlock(&proto_list_mutex);
3113 
3114 	kmem_cache_destroy(prot->slab);
3115 	prot->slab = NULL;
3116 
3117 	req_prot_cleanup(prot->rsk_prot);
3118 
3119 	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
3120 		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
3121 		kfree(prot->twsk_prot->twsk_slab_name);
3122 		prot->twsk_prot->twsk_slab = NULL;
3123 	}
3124 }
3125 EXPORT_SYMBOL(proto_unregister);
3126 
3127 #ifdef CONFIG_PROC_FS
3128 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
3129 	__acquires(proto_list_mutex)
3130 {
3131 	mutex_lock(&proto_list_mutex);
3132 	return seq_list_start_head(&proto_list, *pos);
3133 }
3134 
3135 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3136 {
3137 	return seq_list_next(v, &proto_list, pos);
3138 }
3139 
3140 static void proto_seq_stop(struct seq_file *seq, void *v)
3141 	__releases(proto_list_mutex)
3142 {
3143 	mutex_unlock(&proto_list_mutex);
3144 }
3145 
3146 static char proto_method_implemented(const void *method)
3147 {
3148 	return method == NULL ? 'n' : 'y';
3149 }
3150 static long sock_prot_memory_allocated(struct proto *proto)
3151 {
3152 	return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
3153 }
3154 
3155 static char *sock_prot_memory_pressure(struct proto *proto)
3156 {
3157 	return proto->memory_pressure != NULL ?
3158 	proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3159 }
3160 
3161 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3162 {
3163 
3164 	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
3165 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3166 		   proto->name,
3167 		   proto->obj_size,
3168 		   sock_prot_inuse_get(seq_file_net(seq), proto),
3169 		   sock_prot_memory_allocated(proto),
3170 		   sock_prot_memory_pressure(proto),
3171 		   proto->max_header,
3172 		   proto->slab == NULL ? "no" : "yes",
3173 		   module_name(proto->owner),
3174 		   proto_method_implemented(proto->close),
3175 		   proto_method_implemented(proto->connect),
3176 		   proto_method_implemented(proto->disconnect),
3177 		   proto_method_implemented(proto->accept),
3178 		   proto_method_implemented(proto->ioctl),
3179 		   proto_method_implemented(proto->init),
3180 		   proto_method_implemented(proto->destroy),
3181 		   proto_method_implemented(proto->shutdown),
3182 		   proto_method_implemented(proto->setsockopt),
3183 		   proto_method_implemented(proto->getsockopt),
3184 		   proto_method_implemented(proto->sendmsg),
3185 		   proto_method_implemented(proto->recvmsg),
3186 		   proto_method_implemented(proto->sendpage),
3187 		   proto_method_implemented(proto->bind),
3188 		   proto_method_implemented(proto->backlog_rcv),
3189 		   proto_method_implemented(proto->hash),
3190 		   proto_method_implemented(proto->unhash),
3191 		   proto_method_implemented(proto->get_port),
3192 		   proto_method_implemented(proto->enter_memory_pressure));
3193 }
3194 
3195 static int proto_seq_show(struct seq_file *seq, void *v)
3196 {
3197 	if (v == &proto_list)
3198 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3199 			   "protocol",
3200 			   "size",
3201 			   "sockets",
3202 			   "memory",
3203 			   "press",
3204 			   "maxhdr",
3205 			   "slab",
3206 			   "module",
3207 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3208 	else
3209 		proto_seq_printf(seq, list_entry(v, struct proto, node));
3210 	return 0;
3211 }
3212 
3213 static const struct seq_operations proto_seq_ops = {
3214 	.start  = proto_seq_start,
3215 	.next   = proto_seq_next,
3216 	.stop   = proto_seq_stop,
3217 	.show   = proto_seq_show,
3218 };
3219 
3220 static int proto_seq_open(struct inode *inode, struct file *file)
3221 {
3222 	return seq_open_net(inode, file, &proto_seq_ops,
3223 			    sizeof(struct seq_net_private));
3224 }
3225 
3226 static const struct file_operations proto_seq_fops = {
3227 	.owner		= THIS_MODULE,
3228 	.open		= proto_seq_open,
3229 	.read		= seq_read,
3230 	.llseek		= seq_lseek,
3231 	.release	= seq_release_net,
3232 };
3233 
3234 static __net_init int proto_init_net(struct net *net)
3235 {
3236 	if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
3237 		return -ENOMEM;
3238 
3239 	return 0;
3240 }
3241 
3242 static __net_exit void proto_exit_net(struct net *net)
3243 {
3244 	remove_proc_entry("protocols", net->proc_net);
3245 }
3246 
3247 
3248 static __net_initdata struct pernet_operations proto_net_ops = {
3249 	.init = proto_init_net,
3250 	.exit = proto_exit_net,
3251 };
3252 
3253 static int __init proto_init(void)
3254 {
3255 	return register_pernet_subsys(&proto_net_ops);
3256 }
3257 
3258 subsys_initcall(proto_init);
3259 
3260 #endif /* PROC_FS */
3261 
3262 #ifdef CONFIG_NET_RX_BUSY_POLL
3263 bool sk_busy_loop_end(void *p, unsigned long start_time)
3264 {
3265 	struct sock *sk = p;
3266 
3267 	return !skb_queue_empty(&sk->sk_receive_queue) ||
3268 	       sk_busy_loop_timeout(sk, start_time);
3269 }
3270 EXPORT_SYMBOL(sk_busy_loop_end);
3271 #endif /* CONFIG_NET_RX_BUSY_POLL */
3272