xref: /openbmc/linux/net/core/sock.c (revision 4f139972b489f8bc2c821aa25ac65018d92af3f7)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Florian La Roche, <flla@stud.uni-sb.de>
13  *		Alan Cox, <A.Cox@swansea.ac.uk>
14  *
15  * Fixes:
16  *		Alan Cox	: 	Numerous verify_area() problems
17  *		Alan Cox	:	Connecting on a connecting socket
18  *					now returns an error for tcp.
19  *		Alan Cox	:	sock->protocol is set correctly.
20  *					and is not sometimes left as 0.
21  *		Alan Cox	:	connect handles icmp errors on a
22  *					connect properly. Unfortunately there
23  *					is a restart syscall nasty there. I
24  *					can't match BSD without hacking the C
25  *					library. Ideas urgently sought!
26  *		Alan Cox	:	Disallow bind() to addresses that are
27  *					not ours - especially broadcast ones!!
28  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
29  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
30  *					instead they leave that for the DESTROY timer.
31  *		Alan Cox	:	Clean up error flag in accept
32  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
33  *					was buggy. Put a remove_sock() in the handler
34  *					for memory when we hit 0. Also altered the timer
35  *					code. The ACK stuff can wait and needs major
36  *					TCP layer surgery.
37  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
38  *					and fixed timer/inet_bh race.
39  *		Alan Cox	:	Added zapped flag for TCP
40  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
41  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
43  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
44  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
46  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
47  *	Pauline Middelink	:	identd support
48  *		Alan Cox	:	Fixed connect() taking signals I think.
49  *		Alan Cox	:	SO_LINGER supported
50  *		Alan Cox	:	Error reporting fixes
51  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
52  *		Alan Cox	:	inet sockets don't set sk->type!
53  *		Alan Cox	:	Split socket option code
54  *		Alan Cox	:	Callbacks
55  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
56  *		Alex		:	Removed restriction on inet fioctl
57  *		Alan Cox	:	Splitting INET from NET core
58  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
59  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
60  *		Alan Cox	:	Split IP from generic code
61  *		Alan Cox	:	New kfree_skbmem()
62  *		Alan Cox	:	Make SO_DEBUG superuser only.
63  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
64  *					(compatibility fix)
65  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
66  *		Alan Cox	:	Allocator for a socket is settable.
67  *		Alan Cox	:	SO_ERROR includes soft errors.
68  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
69  *		Alan Cox	: 	Generic socket allocation to make hooks
70  *					easier (suggested by Craig Metz).
71  *		Michael Pall	:	SO_ERROR returns positive errno again
72  *              Steve Whitehouse:       Added default destructor to free
73  *                                      protocol private data.
74  *              Steve Whitehouse:       Added various other default routines
75  *                                      common to several socket families.
76  *              Chris Evans     :       Call suser() check last on F_SETOWN
77  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
79  *		Andi Kleen	:	Fix write_space callback
80  *		Chris Evans	:	Security fixes - signedness again
81  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
82  *
83  * To Fix:
84  *
85  *
86  *		This program is free software; you can redistribute it and/or
87  *		modify it under the terms of the GNU General Public License
88  *		as published by the Free Software Foundation; either version
89  *		2 of the License, or (at your option) any later version.
90  */
91 
92 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93 
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/errqueue.h>
97 #include <linux/types.h>
98 #include <linux/socket.h>
99 #include <linux/in.h>
100 #include <linux/kernel.h>
101 #include <linux/module.h>
102 #include <linux/proc_fs.h>
103 #include <linux/seq_file.h>
104 #include <linux/sched.h>
105 #include <linux/timer.h>
106 #include <linux/string.h>
107 #include <linux/sockios.h>
108 #include <linux/net.h>
109 #include <linux/mm.h>
110 #include <linux/slab.h>
111 #include <linux/interrupt.h>
112 #include <linux/poll.h>
113 #include <linux/tcp.h>
114 #include <linux/init.h>
115 #include <linux/highmem.h>
116 #include <linux/user_namespace.h>
117 #include <linux/static_key.h>
118 #include <linux/memcontrol.h>
119 #include <linux/prefetch.h>
120 
121 #include <linux/uaccess.h>
122 
123 #include <linux/netdevice.h>
124 #include <net/protocol.h>
125 #include <linux/skbuff.h>
126 #include <net/net_namespace.h>
127 #include <net/request_sock.h>
128 #include <net/sock.h>
129 #include <linux/net_tstamp.h>
130 #include <net/xfrm.h>
131 #include <linux/ipsec.h>
132 #include <net/cls_cgroup.h>
133 #include <net/netprio_cgroup.h>
134 #include <linux/sock_diag.h>
135 
136 #include <linux/filter.h>
137 #include <net/sock_reuseport.h>
138 
139 #include <trace/events/sock.h>
140 
141 #ifdef CONFIG_INET
142 #include <net/tcp.h>
143 #endif
144 
145 #include <net/busy_poll.h>
146 
147 static DEFINE_MUTEX(proto_list_mutex);
148 static LIST_HEAD(proto_list);
149 
150 /**
151  * sk_ns_capable - General socket capability test
152  * @sk: Socket to use a capability on or through
153  * @user_ns: The user namespace of the capability to use
154  * @cap: The capability to use
155  *
156  * Test to see if the opener of the socket had when the socket was
157  * created and the current process has the capability @cap in the user
158  * namespace @user_ns.
159  */
160 bool sk_ns_capable(const struct sock *sk,
161 		   struct user_namespace *user_ns, int cap)
162 {
163 	return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
164 		ns_capable(user_ns, cap);
165 }
166 EXPORT_SYMBOL(sk_ns_capable);
167 
168 /**
169  * sk_capable - Socket global capability test
170  * @sk: Socket to use a capability on or through
171  * @cap: The global capability to use
172  *
173  * Test to see if the opener of the socket had when the socket was
174  * created and the current process has the capability @cap in all user
175  * namespaces.
176  */
177 bool sk_capable(const struct sock *sk, int cap)
178 {
179 	return sk_ns_capable(sk, &init_user_ns, cap);
180 }
181 EXPORT_SYMBOL(sk_capable);
182 
183 /**
184  * sk_net_capable - Network namespace socket capability test
185  * @sk: Socket to use a capability on or through
186  * @cap: The capability to use
187  *
188  * Test to see if the opener of the socket had when the socket was created
189  * and the current process has the capability @cap over the network namespace
190  * the socket is a member of.
191  */
192 bool sk_net_capable(const struct sock *sk, int cap)
193 {
194 	return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
195 }
196 EXPORT_SYMBOL(sk_net_capable);
197 
198 /*
199  * Each address family might have different locking rules, so we have
200  * one slock key per address family and separate keys for internal and
201  * userspace sockets.
202  */
203 static struct lock_class_key af_family_keys[AF_MAX];
204 static struct lock_class_key af_family_kern_keys[AF_MAX];
205 static struct lock_class_key af_family_slock_keys[AF_MAX];
206 static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
207 
208 /*
209  * Make lock validator output more readable. (we pre-construct these
210  * strings build-time, so that runtime initialization of socket
211  * locks is fast):
212  */
213 
214 #define _sock_locks(x)						  \
215   x "AF_UNSPEC",	x "AF_UNIX"     ,	x "AF_INET"     , \
216   x "AF_AX25"  ,	x "AF_IPX"      ,	x "AF_APPLETALK", \
217   x "AF_NETROM",	x "AF_BRIDGE"   ,	x "AF_ATMPVC"   , \
218   x "AF_X25"   ,	x "AF_INET6"    ,	x "AF_ROSE"     , \
219   x "AF_DECnet",	x "AF_NETBEUI"  ,	x "AF_SECURITY" , \
220   x "AF_KEY"   ,	x "AF_NETLINK"  ,	x "AF_PACKET"   , \
221   x "AF_ASH"   ,	x "AF_ECONET"   ,	x "AF_ATMSVC"   , \
222   x "AF_RDS"   ,	x "AF_SNA"      ,	x "AF_IRDA"     , \
223   x "AF_PPPOX" ,	x "AF_WANPIPE"  ,	x "AF_LLC"      , \
224   x "27"       ,	x "28"          ,	x "AF_CAN"      , \
225   x "AF_TIPC"  ,	x "AF_BLUETOOTH",	x "IUCV"        , \
226   x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
227   x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
228   x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
229   x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_MAX"
230 
231 static const char *const af_family_key_strings[AF_MAX+1] = {
232 	_sock_locks("sk_lock-")
233 };
234 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
235 	_sock_locks("slock-")
236 };
237 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
238 	_sock_locks("clock-")
239 };
240 
241 static const char *const af_family_kern_key_strings[AF_MAX+1] = {
242 	_sock_locks("k-sk_lock-")
243 };
244 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
245 	_sock_locks("k-slock-")
246 };
247 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
248 	_sock_locks("k-clock-")
249 };
250 static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
251   "rlock-AF_UNSPEC", "rlock-AF_UNIX"     , "rlock-AF_INET"     ,
252   "rlock-AF_AX25"  , "rlock-AF_IPX"      , "rlock-AF_APPLETALK",
253   "rlock-AF_NETROM", "rlock-AF_BRIDGE"   , "rlock-AF_ATMPVC"   ,
254   "rlock-AF_X25"   , "rlock-AF_INET6"    , "rlock-AF_ROSE"     ,
255   "rlock-AF_DECnet", "rlock-AF_NETBEUI"  , "rlock-AF_SECURITY" ,
256   "rlock-AF_KEY"   , "rlock-AF_NETLINK"  , "rlock-AF_PACKET"   ,
257   "rlock-AF_ASH"   , "rlock-AF_ECONET"   , "rlock-AF_ATMSVC"   ,
258   "rlock-AF_RDS"   , "rlock-AF_SNA"      , "rlock-AF_IRDA"     ,
259   "rlock-AF_PPPOX" , "rlock-AF_WANPIPE"  , "rlock-AF_LLC"      ,
260   "rlock-27"       , "rlock-28"          , "rlock-AF_CAN"      ,
261   "rlock-AF_TIPC"  , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV"     ,
262   "rlock-AF_RXRPC" , "rlock-AF_ISDN"     , "rlock-AF_PHONET"   ,
263   "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG"      ,
264   "rlock-AF_NFC"   , "rlock-AF_VSOCK"    , "rlock-AF_KCM"      ,
265   "rlock-AF_QIPCRTR", "rlock-AF_SMC"     , "rlock-AF_MAX"
266 };
267 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
268   "wlock-AF_UNSPEC", "wlock-AF_UNIX"     , "wlock-AF_INET"     ,
269   "wlock-AF_AX25"  , "wlock-AF_IPX"      , "wlock-AF_APPLETALK",
270   "wlock-AF_NETROM", "wlock-AF_BRIDGE"   , "wlock-AF_ATMPVC"   ,
271   "wlock-AF_X25"   , "wlock-AF_INET6"    , "wlock-AF_ROSE"     ,
272   "wlock-AF_DECnet", "wlock-AF_NETBEUI"  , "wlock-AF_SECURITY" ,
273   "wlock-AF_KEY"   , "wlock-AF_NETLINK"  , "wlock-AF_PACKET"   ,
274   "wlock-AF_ASH"   , "wlock-AF_ECONET"   , "wlock-AF_ATMSVC"   ,
275   "wlock-AF_RDS"   , "wlock-AF_SNA"      , "wlock-AF_IRDA"     ,
276   "wlock-AF_PPPOX" , "wlock-AF_WANPIPE"  , "wlock-AF_LLC"      ,
277   "wlock-27"       , "wlock-28"          , "wlock-AF_CAN"      ,
278   "wlock-AF_TIPC"  , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV"     ,
279   "wlock-AF_RXRPC" , "wlock-AF_ISDN"     , "wlock-AF_PHONET"   ,
280   "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG"      ,
281   "wlock-AF_NFC"   , "wlock-AF_VSOCK"    , "wlock-AF_KCM"      ,
282   "wlock-AF_QIPCRTR", "wlock-AF_SMC"     , "wlock-AF_MAX"
283 };
284 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
285   "elock-AF_UNSPEC", "elock-AF_UNIX"     , "elock-AF_INET"     ,
286   "elock-AF_AX25"  , "elock-AF_IPX"      , "elock-AF_APPLETALK",
287   "elock-AF_NETROM", "elock-AF_BRIDGE"   , "elock-AF_ATMPVC"   ,
288   "elock-AF_X25"   , "elock-AF_INET6"    , "elock-AF_ROSE"     ,
289   "elock-AF_DECnet", "elock-AF_NETBEUI"  , "elock-AF_SECURITY" ,
290   "elock-AF_KEY"   , "elock-AF_NETLINK"  , "elock-AF_PACKET"   ,
291   "elock-AF_ASH"   , "elock-AF_ECONET"   , "elock-AF_ATMSVC"   ,
292   "elock-AF_RDS"   , "elock-AF_SNA"      , "elock-AF_IRDA"     ,
293   "elock-AF_PPPOX" , "elock-AF_WANPIPE"  , "elock-AF_LLC"      ,
294   "elock-27"       , "elock-28"          , "elock-AF_CAN"      ,
295   "elock-AF_TIPC"  , "elock-AF_BLUETOOTH", "elock-AF_IUCV"     ,
296   "elock-AF_RXRPC" , "elock-AF_ISDN"     , "elock-AF_PHONET"   ,
297   "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG"      ,
298   "elock-AF_NFC"   , "elock-AF_VSOCK"    , "elock-AF_KCM"      ,
299   "elock-AF_QIPCRTR", "elock-AF_SMC"     , "elock-AF_MAX"
300 };
301 
302 /*
303  * sk_callback_lock and sk queues locking rules are per-address-family,
304  * so split the lock classes by using a per-AF key:
305  */
306 static struct lock_class_key af_callback_keys[AF_MAX];
307 static struct lock_class_key af_rlock_keys[AF_MAX];
308 static struct lock_class_key af_wlock_keys[AF_MAX];
309 static struct lock_class_key af_elock_keys[AF_MAX];
310 static struct lock_class_key af_kern_callback_keys[AF_MAX];
311 
312 /* Take into consideration the size of the struct sk_buff overhead in the
313  * determination of these values, since that is non-constant across
314  * platforms.  This makes socket queueing behavior and performance
315  * not depend upon such differences.
316  */
317 #define _SK_MEM_PACKETS		256
318 #define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
319 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
320 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
321 
322 /* Run time adjustable parameters. */
323 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
324 EXPORT_SYMBOL(sysctl_wmem_max);
325 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
326 EXPORT_SYMBOL(sysctl_rmem_max);
327 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
328 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
329 
330 /* Maximal space eaten by iovec or ancillary data plus some space */
331 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
332 EXPORT_SYMBOL(sysctl_optmem_max);
333 
334 int sysctl_tstamp_allow_data __read_mostly = 1;
335 
336 struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
337 EXPORT_SYMBOL_GPL(memalloc_socks);
338 
339 /**
340  * sk_set_memalloc - sets %SOCK_MEMALLOC
341  * @sk: socket to set it on
342  *
343  * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
344  * It's the responsibility of the admin to adjust min_free_kbytes
345  * to meet the requirements
346  */
347 void sk_set_memalloc(struct sock *sk)
348 {
349 	sock_set_flag(sk, SOCK_MEMALLOC);
350 	sk->sk_allocation |= __GFP_MEMALLOC;
351 	static_key_slow_inc(&memalloc_socks);
352 }
353 EXPORT_SYMBOL_GPL(sk_set_memalloc);
354 
355 void sk_clear_memalloc(struct sock *sk)
356 {
357 	sock_reset_flag(sk, SOCK_MEMALLOC);
358 	sk->sk_allocation &= ~__GFP_MEMALLOC;
359 	static_key_slow_dec(&memalloc_socks);
360 
361 	/*
362 	 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
363 	 * progress of swapping. SOCK_MEMALLOC may be cleared while
364 	 * it has rmem allocations due to the last swapfile being deactivated
365 	 * but there is a risk that the socket is unusable due to exceeding
366 	 * the rmem limits. Reclaim the reserves and obey rmem limits again.
367 	 */
368 	sk_mem_reclaim(sk);
369 }
370 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
371 
372 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
373 {
374 	int ret;
375 	unsigned long pflags = current->flags;
376 
377 	/* these should have been dropped before queueing */
378 	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
379 
380 	current->flags |= PF_MEMALLOC;
381 	ret = sk->sk_backlog_rcv(sk, skb);
382 	tsk_restore_flags(current, pflags, PF_MEMALLOC);
383 
384 	return ret;
385 }
386 EXPORT_SYMBOL(__sk_backlog_rcv);
387 
388 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
389 {
390 	struct timeval tv;
391 
392 	if (optlen < sizeof(tv))
393 		return -EINVAL;
394 	if (copy_from_user(&tv, optval, sizeof(tv)))
395 		return -EFAULT;
396 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
397 		return -EDOM;
398 
399 	if (tv.tv_sec < 0) {
400 		static int warned __read_mostly;
401 
402 		*timeo_p = 0;
403 		if (warned < 10 && net_ratelimit()) {
404 			warned++;
405 			pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
406 				__func__, current->comm, task_pid_nr(current));
407 		}
408 		return 0;
409 	}
410 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
411 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
412 		return 0;
413 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
414 		*timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ);
415 	return 0;
416 }
417 
418 static void sock_warn_obsolete_bsdism(const char *name)
419 {
420 	static int warned;
421 	static char warncomm[TASK_COMM_LEN];
422 	if (strcmp(warncomm, current->comm) && warned < 5) {
423 		strcpy(warncomm,  current->comm);
424 		pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
425 			warncomm, name);
426 		warned++;
427 	}
428 }
429 
430 static bool sock_needs_netstamp(const struct sock *sk)
431 {
432 	switch (sk->sk_family) {
433 	case AF_UNSPEC:
434 	case AF_UNIX:
435 		return false;
436 	default:
437 		return true;
438 	}
439 }
440 
441 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
442 {
443 	if (sk->sk_flags & flags) {
444 		sk->sk_flags &= ~flags;
445 		if (sock_needs_netstamp(sk) &&
446 		    !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
447 			net_disable_timestamp();
448 	}
449 }
450 
451 
452 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
453 {
454 	unsigned long flags;
455 	struct sk_buff_head *list = &sk->sk_receive_queue;
456 
457 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
458 		atomic_inc(&sk->sk_drops);
459 		trace_sock_rcvqueue_full(sk, skb);
460 		return -ENOMEM;
461 	}
462 
463 	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
464 		atomic_inc(&sk->sk_drops);
465 		return -ENOBUFS;
466 	}
467 
468 	skb->dev = NULL;
469 	skb_set_owner_r(skb, sk);
470 
471 	/* we escape from rcu protected region, make sure we dont leak
472 	 * a norefcounted dst
473 	 */
474 	skb_dst_force(skb);
475 
476 	spin_lock_irqsave(&list->lock, flags);
477 	sock_skb_set_dropcount(sk, skb);
478 	__skb_queue_tail(list, skb);
479 	spin_unlock_irqrestore(&list->lock, flags);
480 
481 	if (!sock_flag(sk, SOCK_DEAD))
482 		sk->sk_data_ready(sk);
483 	return 0;
484 }
485 EXPORT_SYMBOL(__sock_queue_rcv_skb);
486 
487 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
488 {
489 	int err;
490 
491 	err = sk_filter(sk, skb);
492 	if (err)
493 		return err;
494 
495 	return __sock_queue_rcv_skb(sk, skb);
496 }
497 EXPORT_SYMBOL(sock_queue_rcv_skb);
498 
499 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
500 		     const int nested, unsigned int trim_cap, bool refcounted)
501 {
502 	int rc = NET_RX_SUCCESS;
503 
504 	if (sk_filter_trim_cap(sk, skb, trim_cap))
505 		goto discard_and_relse;
506 
507 	skb->dev = NULL;
508 
509 	if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
510 		atomic_inc(&sk->sk_drops);
511 		goto discard_and_relse;
512 	}
513 	if (nested)
514 		bh_lock_sock_nested(sk);
515 	else
516 		bh_lock_sock(sk);
517 	if (!sock_owned_by_user(sk)) {
518 		/*
519 		 * trylock + unlock semantics:
520 		 */
521 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
522 
523 		rc = sk_backlog_rcv(sk, skb);
524 
525 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
526 	} else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
527 		bh_unlock_sock(sk);
528 		atomic_inc(&sk->sk_drops);
529 		goto discard_and_relse;
530 	}
531 
532 	bh_unlock_sock(sk);
533 out:
534 	if (refcounted)
535 		sock_put(sk);
536 	return rc;
537 discard_and_relse:
538 	kfree_skb(skb);
539 	goto out;
540 }
541 EXPORT_SYMBOL(__sk_receive_skb);
542 
543 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
544 {
545 	struct dst_entry *dst = __sk_dst_get(sk);
546 
547 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
548 		sk_tx_queue_clear(sk);
549 		sk->sk_dst_pending_confirm = 0;
550 		RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
551 		dst_release(dst);
552 		return NULL;
553 	}
554 
555 	return dst;
556 }
557 EXPORT_SYMBOL(__sk_dst_check);
558 
559 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
560 {
561 	struct dst_entry *dst = sk_dst_get(sk);
562 
563 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
564 		sk_dst_reset(sk);
565 		dst_release(dst);
566 		return NULL;
567 	}
568 
569 	return dst;
570 }
571 EXPORT_SYMBOL(sk_dst_check);
572 
573 static int sock_setbindtodevice(struct sock *sk, char __user *optval,
574 				int optlen)
575 {
576 	int ret = -ENOPROTOOPT;
577 #ifdef CONFIG_NETDEVICES
578 	struct net *net = sock_net(sk);
579 	char devname[IFNAMSIZ];
580 	int index;
581 
582 	/* Sorry... */
583 	ret = -EPERM;
584 	if (!ns_capable(net->user_ns, CAP_NET_RAW))
585 		goto out;
586 
587 	ret = -EINVAL;
588 	if (optlen < 0)
589 		goto out;
590 
591 	/* Bind this socket to a particular device like "eth0",
592 	 * as specified in the passed interface name. If the
593 	 * name is "" or the option length is zero the socket
594 	 * is not bound.
595 	 */
596 	if (optlen > IFNAMSIZ - 1)
597 		optlen = IFNAMSIZ - 1;
598 	memset(devname, 0, sizeof(devname));
599 
600 	ret = -EFAULT;
601 	if (copy_from_user(devname, optval, optlen))
602 		goto out;
603 
604 	index = 0;
605 	if (devname[0] != '\0') {
606 		struct net_device *dev;
607 
608 		rcu_read_lock();
609 		dev = dev_get_by_name_rcu(net, devname);
610 		if (dev)
611 			index = dev->ifindex;
612 		rcu_read_unlock();
613 		ret = -ENODEV;
614 		if (!dev)
615 			goto out;
616 	}
617 
618 	lock_sock(sk);
619 	sk->sk_bound_dev_if = index;
620 	sk_dst_reset(sk);
621 	release_sock(sk);
622 
623 	ret = 0;
624 
625 out:
626 #endif
627 
628 	return ret;
629 }
630 
631 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
632 				int __user *optlen, int len)
633 {
634 	int ret = -ENOPROTOOPT;
635 #ifdef CONFIG_NETDEVICES
636 	struct net *net = sock_net(sk);
637 	char devname[IFNAMSIZ];
638 
639 	if (sk->sk_bound_dev_if == 0) {
640 		len = 0;
641 		goto zero;
642 	}
643 
644 	ret = -EINVAL;
645 	if (len < IFNAMSIZ)
646 		goto out;
647 
648 	ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
649 	if (ret)
650 		goto out;
651 
652 	len = strlen(devname) + 1;
653 
654 	ret = -EFAULT;
655 	if (copy_to_user(optval, devname, len))
656 		goto out;
657 
658 zero:
659 	ret = -EFAULT;
660 	if (put_user(len, optlen))
661 		goto out;
662 
663 	ret = 0;
664 
665 out:
666 #endif
667 
668 	return ret;
669 }
670 
671 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
672 {
673 	if (valbool)
674 		sock_set_flag(sk, bit);
675 	else
676 		sock_reset_flag(sk, bit);
677 }
678 
679 bool sk_mc_loop(struct sock *sk)
680 {
681 	if (dev_recursion_level())
682 		return false;
683 	if (!sk)
684 		return true;
685 	switch (sk->sk_family) {
686 	case AF_INET:
687 		return inet_sk(sk)->mc_loop;
688 #if IS_ENABLED(CONFIG_IPV6)
689 	case AF_INET6:
690 		return inet6_sk(sk)->mc_loop;
691 #endif
692 	}
693 	WARN_ON(1);
694 	return true;
695 }
696 EXPORT_SYMBOL(sk_mc_loop);
697 
698 /*
699  *	This is meant for all protocols to use and covers goings on
700  *	at the socket level. Everything here is generic.
701  */
702 
703 int sock_setsockopt(struct socket *sock, int level, int optname,
704 		    char __user *optval, unsigned int optlen)
705 {
706 	struct sock *sk = sock->sk;
707 	int val;
708 	int valbool;
709 	struct linger ling;
710 	int ret = 0;
711 
712 	/*
713 	 *	Options without arguments
714 	 */
715 
716 	if (optname == SO_BINDTODEVICE)
717 		return sock_setbindtodevice(sk, optval, optlen);
718 
719 	if (optlen < sizeof(int))
720 		return -EINVAL;
721 
722 	if (get_user(val, (int __user *)optval))
723 		return -EFAULT;
724 
725 	valbool = val ? 1 : 0;
726 
727 	lock_sock(sk);
728 
729 	switch (optname) {
730 	case SO_DEBUG:
731 		if (val && !capable(CAP_NET_ADMIN))
732 			ret = -EACCES;
733 		else
734 			sock_valbool_flag(sk, SOCK_DBG, valbool);
735 		break;
736 	case SO_REUSEADDR:
737 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
738 		break;
739 	case SO_REUSEPORT:
740 		sk->sk_reuseport = valbool;
741 		break;
742 	case SO_TYPE:
743 	case SO_PROTOCOL:
744 	case SO_DOMAIN:
745 	case SO_ERROR:
746 		ret = -ENOPROTOOPT;
747 		break;
748 	case SO_DONTROUTE:
749 		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
750 		break;
751 	case SO_BROADCAST:
752 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
753 		break;
754 	case SO_SNDBUF:
755 		/* Don't error on this BSD doesn't and if you think
756 		 * about it this is right. Otherwise apps have to
757 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
758 		 * are treated in BSD as hints
759 		 */
760 		val = min_t(u32, val, sysctl_wmem_max);
761 set_sndbuf:
762 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
763 		sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
764 		/* Wake up sending tasks if we upped the value. */
765 		sk->sk_write_space(sk);
766 		break;
767 
768 	case SO_SNDBUFFORCE:
769 		if (!capable(CAP_NET_ADMIN)) {
770 			ret = -EPERM;
771 			break;
772 		}
773 		goto set_sndbuf;
774 
775 	case SO_RCVBUF:
776 		/* Don't error on this BSD doesn't and if you think
777 		 * about it this is right. Otherwise apps have to
778 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
779 		 * are treated in BSD as hints
780 		 */
781 		val = min_t(u32, val, sysctl_rmem_max);
782 set_rcvbuf:
783 		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
784 		/*
785 		 * We double it on the way in to account for
786 		 * "struct sk_buff" etc. overhead.   Applications
787 		 * assume that the SO_RCVBUF setting they make will
788 		 * allow that much actual data to be received on that
789 		 * socket.
790 		 *
791 		 * Applications are unaware that "struct sk_buff" and
792 		 * other overheads allocate from the receive buffer
793 		 * during socket buffer allocation.
794 		 *
795 		 * And after considering the possible alternatives,
796 		 * returning the value we actually used in getsockopt
797 		 * is the most desirable behavior.
798 		 */
799 		sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
800 		break;
801 
802 	case SO_RCVBUFFORCE:
803 		if (!capable(CAP_NET_ADMIN)) {
804 			ret = -EPERM;
805 			break;
806 		}
807 		goto set_rcvbuf;
808 
809 	case SO_KEEPALIVE:
810 		if (sk->sk_prot->keepalive)
811 			sk->sk_prot->keepalive(sk, valbool);
812 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
813 		break;
814 
815 	case SO_OOBINLINE:
816 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
817 		break;
818 
819 	case SO_NO_CHECK:
820 		sk->sk_no_check_tx = valbool;
821 		break;
822 
823 	case SO_PRIORITY:
824 		if ((val >= 0 && val <= 6) ||
825 		    ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
826 			sk->sk_priority = val;
827 		else
828 			ret = -EPERM;
829 		break;
830 
831 	case SO_LINGER:
832 		if (optlen < sizeof(ling)) {
833 			ret = -EINVAL;	/* 1003.1g */
834 			break;
835 		}
836 		if (copy_from_user(&ling, optval, sizeof(ling))) {
837 			ret = -EFAULT;
838 			break;
839 		}
840 		if (!ling.l_onoff)
841 			sock_reset_flag(sk, SOCK_LINGER);
842 		else {
843 #if (BITS_PER_LONG == 32)
844 			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
845 				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
846 			else
847 #endif
848 				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
849 			sock_set_flag(sk, SOCK_LINGER);
850 		}
851 		break;
852 
853 	case SO_BSDCOMPAT:
854 		sock_warn_obsolete_bsdism("setsockopt");
855 		break;
856 
857 	case SO_PASSCRED:
858 		if (valbool)
859 			set_bit(SOCK_PASSCRED, &sock->flags);
860 		else
861 			clear_bit(SOCK_PASSCRED, &sock->flags);
862 		break;
863 
864 	case SO_TIMESTAMP:
865 	case SO_TIMESTAMPNS:
866 		if (valbool)  {
867 			if (optname == SO_TIMESTAMP)
868 				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
869 			else
870 				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
871 			sock_set_flag(sk, SOCK_RCVTSTAMP);
872 			sock_enable_timestamp(sk, SOCK_TIMESTAMP);
873 		} else {
874 			sock_reset_flag(sk, SOCK_RCVTSTAMP);
875 			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
876 		}
877 		break;
878 
879 	case SO_TIMESTAMPING:
880 		if (val & ~SOF_TIMESTAMPING_MASK) {
881 			ret = -EINVAL;
882 			break;
883 		}
884 
885 		if (val & SOF_TIMESTAMPING_OPT_ID &&
886 		    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
887 			if (sk->sk_protocol == IPPROTO_TCP &&
888 			    sk->sk_type == SOCK_STREAM) {
889 				if ((1 << sk->sk_state) &
890 				    (TCPF_CLOSE | TCPF_LISTEN)) {
891 					ret = -EINVAL;
892 					break;
893 				}
894 				sk->sk_tskey = tcp_sk(sk)->snd_una;
895 			} else {
896 				sk->sk_tskey = 0;
897 			}
898 		}
899 
900 		if (val & SOF_TIMESTAMPING_OPT_STATS &&
901 		    !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
902 			ret = -EINVAL;
903 			break;
904 		}
905 
906 		sk->sk_tsflags = val;
907 		if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
908 			sock_enable_timestamp(sk,
909 					      SOCK_TIMESTAMPING_RX_SOFTWARE);
910 		else
911 			sock_disable_timestamp(sk,
912 					       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
913 		break;
914 
915 	case SO_RCVLOWAT:
916 		if (val < 0)
917 			val = INT_MAX;
918 		sk->sk_rcvlowat = val ? : 1;
919 		break;
920 
921 	case SO_RCVTIMEO:
922 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
923 		break;
924 
925 	case SO_SNDTIMEO:
926 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
927 		break;
928 
929 	case SO_ATTACH_FILTER:
930 		ret = -EINVAL;
931 		if (optlen == sizeof(struct sock_fprog)) {
932 			struct sock_fprog fprog;
933 
934 			ret = -EFAULT;
935 			if (copy_from_user(&fprog, optval, sizeof(fprog)))
936 				break;
937 
938 			ret = sk_attach_filter(&fprog, sk);
939 		}
940 		break;
941 
942 	case SO_ATTACH_BPF:
943 		ret = -EINVAL;
944 		if (optlen == sizeof(u32)) {
945 			u32 ufd;
946 
947 			ret = -EFAULT;
948 			if (copy_from_user(&ufd, optval, sizeof(ufd)))
949 				break;
950 
951 			ret = sk_attach_bpf(ufd, sk);
952 		}
953 		break;
954 
955 	case SO_ATTACH_REUSEPORT_CBPF:
956 		ret = -EINVAL;
957 		if (optlen == sizeof(struct sock_fprog)) {
958 			struct sock_fprog fprog;
959 
960 			ret = -EFAULT;
961 			if (copy_from_user(&fprog, optval, sizeof(fprog)))
962 				break;
963 
964 			ret = sk_reuseport_attach_filter(&fprog, sk);
965 		}
966 		break;
967 
968 	case SO_ATTACH_REUSEPORT_EBPF:
969 		ret = -EINVAL;
970 		if (optlen == sizeof(u32)) {
971 			u32 ufd;
972 
973 			ret = -EFAULT;
974 			if (copy_from_user(&ufd, optval, sizeof(ufd)))
975 				break;
976 
977 			ret = sk_reuseport_attach_bpf(ufd, sk);
978 		}
979 		break;
980 
981 	case SO_DETACH_FILTER:
982 		ret = sk_detach_filter(sk);
983 		break;
984 
985 	case SO_LOCK_FILTER:
986 		if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
987 			ret = -EPERM;
988 		else
989 			sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
990 		break;
991 
992 	case SO_PASSSEC:
993 		if (valbool)
994 			set_bit(SOCK_PASSSEC, &sock->flags);
995 		else
996 			clear_bit(SOCK_PASSSEC, &sock->flags);
997 		break;
998 	case SO_MARK:
999 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1000 			ret = -EPERM;
1001 		else
1002 			sk->sk_mark = val;
1003 		break;
1004 
1005 	case SO_RXQ_OVFL:
1006 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1007 		break;
1008 
1009 	case SO_WIFI_STATUS:
1010 		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1011 		break;
1012 
1013 	case SO_PEEK_OFF:
1014 		if (sock->ops->set_peek_off)
1015 			ret = sock->ops->set_peek_off(sk, val);
1016 		else
1017 			ret = -EOPNOTSUPP;
1018 		break;
1019 
1020 	case SO_NOFCS:
1021 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1022 		break;
1023 
1024 	case SO_SELECT_ERR_QUEUE:
1025 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1026 		break;
1027 
1028 #ifdef CONFIG_NET_RX_BUSY_POLL
1029 	case SO_BUSY_POLL:
1030 		/* allow unprivileged users to decrease the value */
1031 		if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1032 			ret = -EPERM;
1033 		else {
1034 			if (val < 0)
1035 				ret = -EINVAL;
1036 			else
1037 				sk->sk_ll_usec = val;
1038 		}
1039 		break;
1040 #endif
1041 
1042 	case SO_MAX_PACING_RATE:
1043 		sk->sk_max_pacing_rate = val;
1044 		sk->sk_pacing_rate = min(sk->sk_pacing_rate,
1045 					 sk->sk_max_pacing_rate);
1046 		break;
1047 
1048 	case SO_INCOMING_CPU:
1049 		sk->sk_incoming_cpu = val;
1050 		break;
1051 
1052 	case SO_CNX_ADVICE:
1053 		if (val == 1)
1054 			dst_negative_advice(sk);
1055 		break;
1056 	default:
1057 		ret = -ENOPROTOOPT;
1058 		break;
1059 	}
1060 	release_sock(sk);
1061 	return ret;
1062 }
1063 EXPORT_SYMBOL(sock_setsockopt);
1064 
1065 
1066 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1067 			  struct ucred *ucred)
1068 {
1069 	ucred->pid = pid_vnr(pid);
1070 	ucred->uid = ucred->gid = -1;
1071 	if (cred) {
1072 		struct user_namespace *current_ns = current_user_ns();
1073 
1074 		ucred->uid = from_kuid_munged(current_ns, cred->euid);
1075 		ucred->gid = from_kgid_munged(current_ns, cred->egid);
1076 	}
1077 }
1078 
1079 int sock_getsockopt(struct socket *sock, int level, int optname,
1080 		    char __user *optval, int __user *optlen)
1081 {
1082 	struct sock *sk = sock->sk;
1083 
1084 	union {
1085 		int val;
1086 		struct linger ling;
1087 		struct timeval tm;
1088 	} v;
1089 
1090 	int lv = sizeof(int);
1091 	int len;
1092 
1093 	if (get_user(len, optlen))
1094 		return -EFAULT;
1095 	if (len < 0)
1096 		return -EINVAL;
1097 
1098 	memset(&v, 0, sizeof(v));
1099 
1100 	switch (optname) {
1101 	case SO_DEBUG:
1102 		v.val = sock_flag(sk, SOCK_DBG);
1103 		break;
1104 
1105 	case SO_DONTROUTE:
1106 		v.val = sock_flag(sk, SOCK_LOCALROUTE);
1107 		break;
1108 
1109 	case SO_BROADCAST:
1110 		v.val = sock_flag(sk, SOCK_BROADCAST);
1111 		break;
1112 
1113 	case SO_SNDBUF:
1114 		v.val = sk->sk_sndbuf;
1115 		break;
1116 
1117 	case SO_RCVBUF:
1118 		v.val = sk->sk_rcvbuf;
1119 		break;
1120 
1121 	case SO_REUSEADDR:
1122 		v.val = sk->sk_reuse;
1123 		break;
1124 
1125 	case SO_REUSEPORT:
1126 		v.val = sk->sk_reuseport;
1127 		break;
1128 
1129 	case SO_KEEPALIVE:
1130 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
1131 		break;
1132 
1133 	case SO_TYPE:
1134 		v.val = sk->sk_type;
1135 		break;
1136 
1137 	case SO_PROTOCOL:
1138 		v.val = sk->sk_protocol;
1139 		break;
1140 
1141 	case SO_DOMAIN:
1142 		v.val = sk->sk_family;
1143 		break;
1144 
1145 	case SO_ERROR:
1146 		v.val = -sock_error(sk);
1147 		if (v.val == 0)
1148 			v.val = xchg(&sk->sk_err_soft, 0);
1149 		break;
1150 
1151 	case SO_OOBINLINE:
1152 		v.val = sock_flag(sk, SOCK_URGINLINE);
1153 		break;
1154 
1155 	case SO_NO_CHECK:
1156 		v.val = sk->sk_no_check_tx;
1157 		break;
1158 
1159 	case SO_PRIORITY:
1160 		v.val = sk->sk_priority;
1161 		break;
1162 
1163 	case SO_LINGER:
1164 		lv		= sizeof(v.ling);
1165 		v.ling.l_onoff	= sock_flag(sk, SOCK_LINGER);
1166 		v.ling.l_linger	= sk->sk_lingertime / HZ;
1167 		break;
1168 
1169 	case SO_BSDCOMPAT:
1170 		sock_warn_obsolete_bsdism("getsockopt");
1171 		break;
1172 
1173 	case SO_TIMESTAMP:
1174 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1175 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
1176 		break;
1177 
1178 	case SO_TIMESTAMPNS:
1179 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1180 		break;
1181 
1182 	case SO_TIMESTAMPING:
1183 		v.val = sk->sk_tsflags;
1184 		break;
1185 
1186 	case SO_RCVTIMEO:
1187 		lv = sizeof(struct timeval);
1188 		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1189 			v.tm.tv_sec = 0;
1190 			v.tm.tv_usec = 0;
1191 		} else {
1192 			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1193 			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ;
1194 		}
1195 		break;
1196 
1197 	case SO_SNDTIMEO:
1198 		lv = sizeof(struct timeval);
1199 		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1200 			v.tm.tv_sec = 0;
1201 			v.tm.tv_usec = 0;
1202 		} else {
1203 			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1204 			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ;
1205 		}
1206 		break;
1207 
1208 	case SO_RCVLOWAT:
1209 		v.val = sk->sk_rcvlowat;
1210 		break;
1211 
1212 	case SO_SNDLOWAT:
1213 		v.val = 1;
1214 		break;
1215 
1216 	case SO_PASSCRED:
1217 		v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1218 		break;
1219 
1220 	case SO_PEERCRED:
1221 	{
1222 		struct ucred peercred;
1223 		if (len > sizeof(peercred))
1224 			len = sizeof(peercred);
1225 		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1226 		if (copy_to_user(optval, &peercred, len))
1227 			return -EFAULT;
1228 		goto lenout;
1229 	}
1230 
1231 	case SO_PEERNAME:
1232 	{
1233 		char address[128];
1234 
1235 		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1236 			return -ENOTCONN;
1237 		if (lv < len)
1238 			return -EINVAL;
1239 		if (copy_to_user(optval, address, len))
1240 			return -EFAULT;
1241 		goto lenout;
1242 	}
1243 
1244 	/* Dubious BSD thing... Probably nobody even uses it, but
1245 	 * the UNIX standard wants it for whatever reason... -DaveM
1246 	 */
1247 	case SO_ACCEPTCONN:
1248 		v.val = sk->sk_state == TCP_LISTEN;
1249 		break;
1250 
1251 	case SO_PASSSEC:
1252 		v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1253 		break;
1254 
1255 	case SO_PEERSEC:
1256 		return security_socket_getpeersec_stream(sock, optval, optlen, len);
1257 
1258 	case SO_MARK:
1259 		v.val = sk->sk_mark;
1260 		break;
1261 
1262 	case SO_RXQ_OVFL:
1263 		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1264 		break;
1265 
1266 	case SO_WIFI_STATUS:
1267 		v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1268 		break;
1269 
1270 	case SO_PEEK_OFF:
1271 		if (!sock->ops->set_peek_off)
1272 			return -EOPNOTSUPP;
1273 
1274 		v.val = sk->sk_peek_off;
1275 		break;
1276 	case SO_NOFCS:
1277 		v.val = sock_flag(sk, SOCK_NOFCS);
1278 		break;
1279 
1280 	case SO_BINDTODEVICE:
1281 		return sock_getbindtodevice(sk, optval, optlen, len);
1282 
1283 	case SO_GET_FILTER:
1284 		len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1285 		if (len < 0)
1286 			return len;
1287 
1288 		goto lenout;
1289 
1290 	case SO_LOCK_FILTER:
1291 		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1292 		break;
1293 
1294 	case SO_BPF_EXTENSIONS:
1295 		v.val = bpf_tell_extensions();
1296 		break;
1297 
1298 	case SO_SELECT_ERR_QUEUE:
1299 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1300 		break;
1301 
1302 #ifdef CONFIG_NET_RX_BUSY_POLL
1303 	case SO_BUSY_POLL:
1304 		v.val = sk->sk_ll_usec;
1305 		break;
1306 #endif
1307 
1308 	case SO_MAX_PACING_RATE:
1309 		v.val = sk->sk_max_pacing_rate;
1310 		break;
1311 
1312 	case SO_INCOMING_CPU:
1313 		v.val = sk->sk_incoming_cpu;
1314 		break;
1315 
1316 	case SO_MEMINFO:
1317 	{
1318 		u32 meminfo[SK_MEMINFO_VARS];
1319 
1320 		if (get_user(len, optlen))
1321 			return -EFAULT;
1322 
1323 		sk_get_meminfo(sk, meminfo);
1324 
1325 		len = min_t(unsigned int, len, sizeof(meminfo));
1326 		if (copy_to_user(optval, &meminfo, len))
1327 			return -EFAULT;
1328 
1329 		goto lenout;
1330 	}
1331 
1332 #ifdef CONFIG_NET_RX_BUSY_POLL
1333 	case SO_INCOMING_NAPI_ID:
1334 		v.val = READ_ONCE(sk->sk_napi_id);
1335 
1336 		/* aggregate non-NAPI IDs down to 0 */
1337 		if (v.val < MIN_NAPI_ID)
1338 			v.val = 0;
1339 
1340 		break;
1341 #endif
1342 
1343 	default:
1344 		/* We implement the SO_SNDLOWAT etc to not be settable
1345 		 * (1003.1g 7).
1346 		 */
1347 		return -ENOPROTOOPT;
1348 	}
1349 
1350 	if (len > lv)
1351 		len = lv;
1352 	if (copy_to_user(optval, &v, len))
1353 		return -EFAULT;
1354 lenout:
1355 	if (put_user(len, optlen))
1356 		return -EFAULT;
1357 	return 0;
1358 }
1359 
1360 /*
1361  * Initialize an sk_lock.
1362  *
1363  * (We also register the sk_lock with the lock validator.)
1364  */
1365 static inline void sock_lock_init(struct sock *sk)
1366 {
1367 	if (sk->sk_kern_sock)
1368 		sock_lock_init_class_and_name(
1369 			sk,
1370 			af_family_kern_slock_key_strings[sk->sk_family],
1371 			af_family_kern_slock_keys + sk->sk_family,
1372 			af_family_kern_key_strings[sk->sk_family],
1373 			af_family_kern_keys + sk->sk_family);
1374 	else
1375 		sock_lock_init_class_and_name(
1376 			sk,
1377 			af_family_slock_key_strings[sk->sk_family],
1378 			af_family_slock_keys + sk->sk_family,
1379 			af_family_key_strings[sk->sk_family],
1380 			af_family_keys + sk->sk_family);
1381 }
1382 
1383 /*
1384  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1385  * even temporarly, because of RCU lookups. sk_node should also be left as is.
1386  * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
1387  */
1388 static void sock_copy(struct sock *nsk, const struct sock *osk)
1389 {
1390 #ifdef CONFIG_SECURITY_NETWORK
1391 	void *sptr = nsk->sk_security;
1392 #endif
1393 	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1394 
1395 	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1396 	       osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1397 
1398 #ifdef CONFIG_SECURITY_NETWORK
1399 	nsk->sk_security = sptr;
1400 	security_sk_clone(osk, nsk);
1401 #endif
1402 }
1403 
1404 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1405 		int family)
1406 {
1407 	struct sock *sk;
1408 	struct kmem_cache *slab;
1409 
1410 	slab = prot->slab;
1411 	if (slab != NULL) {
1412 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1413 		if (!sk)
1414 			return sk;
1415 		if (priority & __GFP_ZERO)
1416 			sk_prot_clear_nulls(sk, prot->obj_size);
1417 	} else
1418 		sk = kmalloc(prot->obj_size, priority);
1419 
1420 	if (sk != NULL) {
1421 		kmemcheck_annotate_bitfield(sk, flags);
1422 
1423 		if (security_sk_alloc(sk, family, priority))
1424 			goto out_free;
1425 
1426 		if (!try_module_get(prot->owner))
1427 			goto out_free_sec;
1428 		sk_tx_queue_clear(sk);
1429 	}
1430 
1431 	return sk;
1432 
1433 out_free_sec:
1434 	security_sk_free(sk);
1435 out_free:
1436 	if (slab != NULL)
1437 		kmem_cache_free(slab, sk);
1438 	else
1439 		kfree(sk);
1440 	return NULL;
1441 }
1442 
1443 static void sk_prot_free(struct proto *prot, struct sock *sk)
1444 {
1445 	struct kmem_cache *slab;
1446 	struct module *owner;
1447 
1448 	owner = prot->owner;
1449 	slab = prot->slab;
1450 
1451 	cgroup_sk_free(&sk->sk_cgrp_data);
1452 	mem_cgroup_sk_free(sk);
1453 	security_sk_free(sk);
1454 	if (slab != NULL)
1455 		kmem_cache_free(slab, sk);
1456 	else
1457 		kfree(sk);
1458 	module_put(owner);
1459 }
1460 
1461 /**
1462  *	sk_alloc - All socket objects are allocated here
1463  *	@net: the applicable net namespace
1464  *	@family: protocol family
1465  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1466  *	@prot: struct proto associated with this new sock instance
1467  *	@kern: is this to be a kernel socket?
1468  */
1469 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1470 		      struct proto *prot, int kern)
1471 {
1472 	struct sock *sk;
1473 
1474 	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1475 	if (sk) {
1476 		sk->sk_family = family;
1477 		/*
1478 		 * See comment in struct sock definition to understand
1479 		 * why we need sk_prot_creator -acme
1480 		 */
1481 		sk->sk_prot = sk->sk_prot_creator = prot;
1482 		sk->sk_kern_sock = kern;
1483 		sock_lock_init(sk);
1484 		sk->sk_net_refcnt = kern ? 0 : 1;
1485 		if (likely(sk->sk_net_refcnt))
1486 			get_net(net);
1487 		sock_net_set(sk, net);
1488 		atomic_set(&sk->sk_wmem_alloc, 1);
1489 
1490 		mem_cgroup_sk_alloc(sk);
1491 		cgroup_sk_alloc(&sk->sk_cgrp_data);
1492 		sock_update_classid(&sk->sk_cgrp_data);
1493 		sock_update_netprioidx(&sk->sk_cgrp_data);
1494 	}
1495 
1496 	return sk;
1497 }
1498 EXPORT_SYMBOL(sk_alloc);
1499 
1500 /* Sockets having SOCK_RCU_FREE will call this function after one RCU
1501  * grace period. This is the case for UDP sockets and TCP listeners.
1502  */
1503 static void __sk_destruct(struct rcu_head *head)
1504 {
1505 	struct sock *sk = container_of(head, struct sock, sk_rcu);
1506 	struct sk_filter *filter;
1507 
1508 	if (sk->sk_destruct)
1509 		sk->sk_destruct(sk);
1510 
1511 	filter = rcu_dereference_check(sk->sk_filter,
1512 				       atomic_read(&sk->sk_wmem_alloc) == 0);
1513 	if (filter) {
1514 		sk_filter_uncharge(sk, filter);
1515 		RCU_INIT_POINTER(sk->sk_filter, NULL);
1516 	}
1517 	if (rcu_access_pointer(sk->sk_reuseport_cb))
1518 		reuseport_detach_sock(sk);
1519 
1520 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1521 
1522 	if (atomic_read(&sk->sk_omem_alloc))
1523 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
1524 			 __func__, atomic_read(&sk->sk_omem_alloc));
1525 
1526 	if (sk->sk_frag.page) {
1527 		put_page(sk->sk_frag.page);
1528 		sk->sk_frag.page = NULL;
1529 	}
1530 
1531 	if (sk->sk_peer_cred)
1532 		put_cred(sk->sk_peer_cred);
1533 	put_pid(sk->sk_peer_pid);
1534 	if (likely(sk->sk_net_refcnt))
1535 		put_net(sock_net(sk));
1536 	sk_prot_free(sk->sk_prot_creator, sk);
1537 }
1538 
1539 void sk_destruct(struct sock *sk)
1540 {
1541 	if (sock_flag(sk, SOCK_RCU_FREE))
1542 		call_rcu(&sk->sk_rcu, __sk_destruct);
1543 	else
1544 		__sk_destruct(&sk->sk_rcu);
1545 }
1546 
1547 static void __sk_free(struct sock *sk)
1548 {
1549 	if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
1550 		sock_diag_broadcast_destroy(sk);
1551 	else
1552 		sk_destruct(sk);
1553 }
1554 
1555 void sk_free(struct sock *sk)
1556 {
1557 	/*
1558 	 * We subtract one from sk_wmem_alloc and can know if
1559 	 * some packets are still in some tx queue.
1560 	 * If not null, sock_wfree() will call __sk_free(sk) later
1561 	 */
1562 	if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1563 		__sk_free(sk);
1564 }
1565 EXPORT_SYMBOL(sk_free);
1566 
1567 static void sk_init_common(struct sock *sk)
1568 {
1569 	skb_queue_head_init(&sk->sk_receive_queue);
1570 	skb_queue_head_init(&sk->sk_write_queue);
1571 	skb_queue_head_init(&sk->sk_error_queue);
1572 
1573 	rwlock_init(&sk->sk_callback_lock);
1574 	lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
1575 			af_rlock_keys + sk->sk_family,
1576 			af_family_rlock_key_strings[sk->sk_family]);
1577 	lockdep_set_class_and_name(&sk->sk_write_queue.lock,
1578 			af_wlock_keys + sk->sk_family,
1579 			af_family_wlock_key_strings[sk->sk_family]);
1580 	lockdep_set_class_and_name(&sk->sk_error_queue.lock,
1581 			af_elock_keys + sk->sk_family,
1582 			af_family_elock_key_strings[sk->sk_family]);
1583 	lockdep_set_class_and_name(&sk->sk_callback_lock,
1584 			af_callback_keys + sk->sk_family,
1585 			af_family_clock_key_strings[sk->sk_family]);
1586 }
1587 
1588 /**
1589  *	sk_clone_lock - clone a socket, and lock its clone
1590  *	@sk: the socket to clone
1591  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1592  *
1593  *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1594  */
1595 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1596 {
1597 	struct sock *newsk;
1598 	bool is_charged = true;
1599 
1600 	newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1601 	if (newsk != NULL) {
1602 		struct sk_filter *filter;
1603 
1604 		sock_copy(newsk, sk);
1605 
1606 		/* SANITY */
1607 		if (likely(newsk->sk_net_refcnt))
1608 			get_net(sock_net(newsk));
1609 		sk_node_init(&newsk->sk_node);
1610 		sock_lock_init(newsk);
1611 		bh_lock_sock(newsk);
1612 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
1613 		newsk->sk_backlog.len = 0;
1614 
1615 		atomic_set(&newsk->sk_rmem_alloc, 0);
1616 		/*
1617 		 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1618 		 */
1619 		atomic_set(&newsk->sk_wmem_alloc, 1);
1620 		atomic_set(&newsk->sk_omem_alloc, 0);
1621 		sk_init_common(newsk);
1622 
1623 		newsk->sk_dst_cache	= NULL;
1624 		newsk->sk_dst_pending_confirm = 0;
1625 		newsk->sk_wmem_queued	= 0;
1626 		newsk->sk_forward_alloc = 0;
1627 		atomic_set(&newsk->sk_drops, 0);
1628 		newsk->sk_send_head	= NULL;
1629 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1630 
1631 		sock_reset_flag(newsk, SOCK_DONE);
1632 
1633 		filter = rcu_dereference_protected(newsk->sk_filter, 1);
1634 		if (filter != NULL)
1635 			/* though it's an empty new sock, the charging may fail
1636 			 * if sysctl_optmem_max was changed between creation of
1637 			 * original socket and cloning
1638 			 */
1639 			is_charged = sk_filter_charge(newsk, filter);
1640 
1641 		if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1642 			/* We need to make sure that we don't uncharge the new
1643 			 * socket if we couldn't charge it in the first place
1644 			 * as otherwise we uncharge the parent's filter.
1645 			 */
1646 			if (!is_charged)
1647 				RCU_INIT_POINTER(newsk->sk_filter, NULL);
1648 			sk_free_unlock_clone(newsk);
1649 			newsk = NULL;
1650 			goto out;
1651 		}
1652 		RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1653 
1654 		newsk->sk_err	   = 0;
1655 		newsk->sk_err_soft = 0;
1656 		newsk->sk_priority = 0;
1657 		newsk->sk_incoming_cpu = raw_smp_processor_id();
1658 		atomic64_set(&newsk->sk_cookie, 0);
1659 
1660 		mem_cgroup_sk_alloc(newsk);
1661 		cgroup_sk_alloc(&newsk->sk_cgrp_data);
1662 
1663 		/*
1664 		 * Before updating sk_refcnt, we must commit prior changes to memory
1665 		 * (Documentation/RCU/rculist_nulls.txt for details)
1666 		 */
1667 		smp_wmb();
1668 		atomic_set(&newsk->sk_refcnt, 2);
1669 
1670 		/*
1671 		 * Increment the counter in the same struct proto as the master
1672 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1673 		 * is the same as sk->sk_prot->socks, as this field was copied
1674 		 * with memcpy).
1675 		 *
1676 		 * This _changes_ the previous behaviour, where
1677 		 * tcp_create_openreq_child always was incrementing the
1678 		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1679 		 * to be taken into account in all callers. -acme
1680 		 */
1681 		sk_refcnt_debug_inc(newsk);
1682 		sk_set_socket(newsk, NULL);
1683 		newsk->sk_wq = NULL;
1684 
1685 		if (newsk->sk_prot->sockets_allocated)
1686 			sk_sockets_allocated_inc(newsk);
1687 
1688 		if (sock_needs_netstamp(sk) &&
1689 		    newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1690 			net_enable_timestamp();
1691 	}
1692 out:
1693 	return newsk;
1694 }
1695 EXPORT_SYMBOL_GPL(sk_clone_lock);
1696 
1697 void sk_free_unlock_clone(struct sock *sk)
1698 {
1699 	/* It is still raw copy of parent, so invalidate
1700 	 * destructor and make plain sk_free() */
1701 	sk->sk_destruct = NULL;
1702 	bh_unlock_sock(sk);
1703 	sk_free(sk);
1704 }
1705 EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
1706 
1707 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1708 {
1709 	u32 max_segs = 1;
1710 
1711 	sk_dst_set(sk, dst);
1712 	sk->sk_route_caps = dst->dev->features;
1713 	if (sk->sk_route_caps & NETIF_F_GSO)
1714 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1715 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
1716 	if (sk_can_gso(sk)) {
1717 		if (dst->header_len) {
1718 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1719 		} else {
1720 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1721 			sk->sk_gso_max_size = dst->dev->gso_max_size;
1722 			max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
1723 		}
1724 	}
1725 	sk->sk_gso_max_segs = max_segs;
1726 }
1727 EXPORT_SYMBOL_GPL(sk_setup_caps);
1728 
1729 /*
1730  *	Simple resource managers for sockets.
1731  */
1732 
1733 
1734 /*
1735  * Write buffer destructor automatically called from kfree_skb.
1736  */
1737 void sock_wfree(struct sk_buff *skb)
1738 {
1739 	struct sock *sk = skb->sk;
1740 	unsigned int len = skb->truesize;
1741 
1742 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1743 		/*
1744 		 * Keep a reference on sk_wmem_alloc, this will be released
1745 		 * after sk_write_space() call
1746 		 */
1747 		atomic_sub(len - 1, &sk->sk_wmem_alloc);
1748 		sk->sk_write_space(sk);
1749 		len = 1;
1750 	}
1751 	/*
1752 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1753 	 * could not do because of in-flight packets
1754 	 */
1755 	if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1756 		__sk_free(sk);
1757 }
1758 EXPORT_SYMBOL(sock_wfree);
1759 
1760 /* This variant of sock_wfree() is used by TCP,
1761  * since it sets SOCK_USE_WRITE_QUEUE.
1762  */
1763 void __sock_wfree(struct sk_buff *skb)
1764 {
1765 	struct sock *sk = skb->sk;
1766 
1767 	if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
1768 		__sk_free(sk);
1769 }
1770 
1771 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1772 {
1773 	skb_orphan(skb);
1774 	skb->sk = sk;
1775 #ifdef CONFIG_INET
1776 	if (unlikely(!sk_fullsock(sk))) {
1777 		skb->destructor = sock_edemux;
1778 		sock_hold(sk);
1779 		return;
1780 	}
1781 #endif
1782 	skb->destructor = sock_wfree;
1783 	skb_set_hash_from_sk(skb, sk);
1784 	/*
1785 	 * We used to take a refcount on sk, but following operation
1786 	 * is enough to guarantee sk_free() wont free this sock until
1787 	 * all in-flight packets are completed
1788 	 */
1789 	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
1790 }
1791 EXPORT_SYMBOL(skb_set_owner_w);
1792 
1793 /* This helper is used by netem, as it can hold packets in its
1794  * delay queue. We want to allow the owner socket to send more
1795  * packets, as if they were already TX completed by a typical driver.
1796  * But we also want to keep skb->sk set because some packet schedulers
1797  * rely on it (sch_fq for example). So we set skb->truesize to a small
1798  * amount (1) and decrease sk_wmem_alloc accordingly.
1799  */
1800 void skb_orphan_partial(struct sk_buff *skb)
1801 {
1802 	/* If this skb is a TCP pure ACK or already went here,
1803 	 * we have nothing to do. 2 is already a very small truesize.
1804 	 */
1805 	if (skb->truesize <= 2)
1806 		return;
1807 
1808 	/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
1809 	 * so we do not completely orphan skb, but transfert all
1810 	 * accounted bytes but one, to avoid unexpected reorders.
1811 	 */
1812 	if (skb->destructor == sock_wfree
1813 #ifdef CONFIG_INET
1814 	    || skb->destructor == tcp_wfree
1815 #endif
1816 		) {
1817 		atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
1818 		skb->truesize = 1;
1819 	} else {
1820 		skb_orphan(skb);
1821 	}
1822 }
1823 EXPORT_SYMBOL(skb_orphan_partial);
1824 
1825 /*
1826  * Read buffer destructor automatically called from kfree_skb.
1827  */
1828 void sock_rfree(struct sk_buff *skb)
1829 {
1830 	struct sock *sk = skb->sk;
1831 	unsigned int len = skb->truesize;
1832 
1833 	atomic_sub(len, &sk->sk_rmem_alloc);
1834 	sk_mem_uncharge(sk, len);
1835 }
1836 EXPORT_SYMBOL(sock_rfree);
1837 
1838 /*
1839  * Buffer destructor for skbs that are not used directly in read or write
1840  * path, e.g. for error handler skbs. Automatically called from kfree_skb.
1841  */
1842 void sock_efree(struct sk_buff *skb)
1843 {
1844 	sock_put(skb->sk);
1845 }
1846 EXPORT_SYMBOL(sock_efree);
1847 
1848 kuid_t sock_i_uid(struct sock *sk)
1849 {
1850 	kuid_t uid;
1851 
1852 	read_lock_bh(&sk->sk_callback_lock);
1853 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1854 	read_unlock_bh(&sk->sk_callback_lock);
1855 	return uid;
1856 }
1857 EXPORT_SYMBOL(sock_i_uid);
1858 
1859 unsigned long sock_i_ino(struct sock *sk)
1860 {
1861 	unsigned long ino;
1862 
1863 	read_lock_bh(&sk->sk_callback_lock);
1864 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1865 	read_unlock_bh(&sk->sk_callback_lock);
1866 	return ino;
1867 }
1868 EXPORT_SYMBOL(sock_i_ino);
1869 
1870 /*
1871  * Allocate a skb from the socket's send buffer.
1872  */
1873 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1874 			     gfp_t priority)
1875 {
1876 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1877 		struct sk_buff *skb = alloc_skb(size, priority);
1878 		if (skb) {
1879 			skb_set_owner_w(skb, sk);
1880 			return skb;
1881 		}
1882 	}
1883 	return NULL;
1884 }
1885 EXPORT_SYMBOL(sock_wmalloc);
1886 
1887 /*
1888  * Allocate a memory block from the socket's option memory buffer.
1889  */
1890 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1891 {
1892 	if ((unsigned int)size <= sysctl_optmem_max &&
1893 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1894 		void *mem;
1895 		/* First do the add, to avoid the race if kmalloc
1896 		 * might sleep.
1897 		 */
1898 		atomic_add(size, &sk->sk_omem_alloc);
1899 		mem = kmalloc(size, priority);
1900 		if (mem)
1901 			return mem;
1902 		atomic_sub(size, &sk->sk_omem_alloc);
1903 	}
1904 	return NULL;
1905 }
1906 EXPORT_SYMBOL(sock_kmalloc);
1907 
1908 /* Free an option memory block. Note, we actually want the inline
1909  * here as this allows gcc to detect the nullify and fold away the
1910  * condition entirely.
1911  */
1912 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
1913 				  const bool nullify)
1914 {
1915 	if (WARN_ON_ONCE(!mem))
1916 		return;
1917 	if (nullify)
1918 		kzfree(mem);
1919 	else
1920 		kfree(mem);
1921 	atomic_sub(size, &sk->sk_omem_alloc);
1922 }
1923 
1924 void sock_kfree_s(struct sock *sk, void *mem, int size)
1925 {
1926 	__sock_kfree_s(sk, mem, size, false);
1927 }
1928 EXPORT_SYMBOL(sock_kfree_s);
1929 
1930 void sock_kzfree_s(struct sock *sk, void *mem, int size)
1931 {
1932 	__sock_kfree_s(sk, mem, size, true);
1933 }
1934 EXPORT_SYMBOL(sock_kzfree_s);
1935 
1936 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1937    I think, these locks should be removed for datagram sockets.
1938  */
1939 static long sock_wait_for_wmem(struct sock *sk, long timeo)
1940 {
1941 	DEFINE_WAIT(wait);
1942 
1943 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1944 	for (;;) {
1945 		if (!timeo)
1946 			break;
1947 		if (signal_pending(current))
1948 			break;
1949 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1950 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1951 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1952 			break;
1953 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1954 			break;
1955 		if (sk->sk_err)
1956 			break;
1957 		timeo = schedule_timeout(timeo);
1958 	}
1959 	finish_wait(sk_sleep(sk), &wait);
1960 	return timeo;
1961 }
1962 
1963 
1964 /*
1965  *	Generic send/receive buffer handlers
1966  */
1967 
1968 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1969 				     unsigned long data_len, int noblock,
1970 				     int *errcode, int max_page_order)
1971 {
1972 	struct sk_buff *skb;
1973 	long timeo;
1974 	int err;
1975 
1976 	timeo = sock_sndtimeo(sk, noblock);
1977 	for (;;) {
1978 		err = sock_error(sk);
1979 		if (err != 0)
1980 			goto failure;
1981 
1982 		err = -EPIPE;
1983 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1984 			goto failure;
1985 
1986 		if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
1987 			break;
1988 
1989 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1990 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1991 		err = -EAGAIN;
1992 		if (!timeo)
1993 			goto failure;
1994 		if (signal_pending(current))
1995 			goto interrupted;
1996 		timeo = sock_wait_for_wmem(sk, timeo);
1997 	}
1998 	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
1999 				   errcode, sk->sk_allocation);
2000 	if (skb)
2001 		skb_set_owner_w(skb, sk);
2002 	return skb;
2003 
2004 interrupted:
2005 	err = sock_intr_errno(timeo);
2006 failure:
2007 	*errcode = err;
2008 	return NULL;
2009 }
2010 EXPORT_SYMBOL(sock_alloc_send_pskb);
2011 
2012 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
2013 				    int noblock, int *errcode)
2014 {
2015 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
2016 }
2017 EXPORT_SYMBOL(sock_alloc_send_skb);
2018 
2019 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
2020 		     struct sockcm_cookie *sockc)
2021 {
2022 	u32 tsflags;
2023 
2024 	switch (cmsg->cmsg_type) {
2025 	case SO_MARK:
2026 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2027 			return -EPERM;
2028 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2029 			return -EINVAL;
2030 		sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2031 		break;
2032 	case SO_TIMESTAMPING:
2033 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2034 			return -EINVAL;
2035 
2036 		tsflags = *(u32 *)CMSG_DATA(cmsg);
2037 		if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2038 			return -EINVAL;
2039 
2040 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2041 		sockc->tsflags |= tsflags;
2042 		break;
2043 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2044 	case SCM_RIGHTS:
2045 	case SCM_CREDENTIALS:
2046 		break;
2047 	default:
2048 		return -EINVAL;
2049 	}
2050 	return 0;
2051 }
2052 EXPORT_SYMBOL(__sock_cmsg_send);
2053 
2054 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2055 		   struct sockcm_cookie *sockc)
2056 {
2057 	struct cmsghdr *cmsg;
2058 	int ret;
2059 
2060 	for_each_cmsghdr(cmsg, msg) {
2061 		if (!CMSG_OK(msg, cmsg))
2062 			return -EINVAL;
2063 		if (cmsg->cmsg_level != SOL_SOCKET)
2064 			continue;
2065 		ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
2066 		if (ret)
2067 			return ret;
2068 	}
2069 	return 0;
2070 }
2071 EXPORT_SYMBOL(sock_cmsg_send);
2072 
2073 /* On 32bit arches, an skb frag is limited to 2^15 */
2074 #define SKB_FRAG_PAGE_ORDER	get_order(32768)
2075 
2076 /**
2077  * skb_page_frag_refill - check that a page_frag contains enough room
2078  * @sz: minimum size of the fragment we want to get
2079  * @pfrag: pointer to page_frag
2080  * @gfp: priority for memory allocation
2081  *
2082  * Note: While this allocator tries to use high order pages, there is
2083  * no guarantee that allocations succeed. Therefore, @sz MUST be
2084  * less or equal than PAGE_SIZE.
2085  */
2086 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2087 {
2088 	if (pfrag->page) {
2089 		if (page_ref_count(pfrag->page) == 1) {
2090 			pfrag->offset = 0;
2091 			return true;
2092 		}
2093 		if (pfrag->offset + sz <= pfrag->size)
2094 			return true;
2095 		put_page(pfrag->page);
2096 	}
2097 
2098 	pfrag->offset = 0;
2099 	if (SKB_FRAG_PAGE_ORDER) {
2100 		/* Avoid direct reclaim but allow kswapd to wake */
2101 		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2102 					  __GFP_COMP | __GFP_NOWARN |
2103 					  __GFP_NORETRY,
2104 					  SKB_FRAG_PAGE_ORDER);
2105 		if (likely(pfrag->page)) {
2106 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2107 			return true;
2108 		}
2109 	}
2110 	pfrag->page = alloc_page(gfp);
2111 	if (likely(pfrag->page)) {
2112 		pfrag->size = PAGE_SIZE;
2113 		return true;
2114 	}
2115 	return false;
2116 }
2117 EXPORT_SYMBOL(skb_page_frag_refill);
2118 
2119 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2120 {
2121 	if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2122 		return true;
2123 
2124 	sk_enter_memory_pressure(sk);
2125 	sk_stream_moderate_sndbuf(sk);
2126 	return false;
2127 }
2128 EXPORT_SYMBOL(sk_page_frag_refill);
2129 
2130 static void __lock_sock(struct sock *sk)
2131 	__releases(&sk->sk_lock.slock)
2132 	__acquires(&sk->sk_lock.slock)
2133 {
2134 	DEFINE_WAIT(wait);
2135 
2136 	for (;;) {
2137 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2138 					TASK_UNINTERRUPTIBLE);
2139 		spin_unlock_bh(&sk->sk_lock.slock);
2140 		schedule();
2141 		spin_lock_bh(&sk->sk_lock.slock);
2142 		if (!sock_owned_by_user(sk))
2143 			break;
2144 	}
2145 	finish_wait(&sk->sk_lock.wq, &wait);
2146 }
2147 
2148 static void __release_sock(struct sock *sk)
2149 	__releases(&sk->sk_lock.slock)
2150 	__acquires(&sk->sk_lock.slock)
2151 {
2152 	struct sk_buff *skb, *next;
2153 
2154 	while ((skb = sk->sk_backlog.head) != NULL) {
2155 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2156 
2157 		spin_unlock_bh(&sk->sk_lock.slock);
2158 
2159 		do {
2160 			next = skb->next;
2161 			prefetch(next);
2162 			WARN_ON_ONCE(skb_dst_is_noref(skb));
2163 			skb->next = NULL;
2164 			sk_backlog_rcv(sk, skb);
2165 
2166 			cond_resched();
2167 
2168 			skb = next;
2169 		} while (skb != NULL);
2170 
2171 		spin_lock_bh(&sk->sk_lock.slock);
2172 	}
2173 
2174 	/*
2175 	 * Doing the zeroing here guarantee we can not loop forever
2176 	 * while a wild producer attempts to flood us.
2177 	 */
2178 	sk->sk_backlog.len = 0;
2179 }
2180 
2181 void __sk_flush_backlog(struct sock *sk)
2182 {
2183 	spin_lock_bh(&sk->sk_lock.slock);
2184 	__release_sock(sk);
2185 	spin_unlock_bh(&sk->sk_lock.slock);
2186 }
2187 
2188 /**
2189  * sk_wait_data - wait for data to arrive at sk_receive_queue
2190  * @sk:    sock to wait on
2191  * @timeo: for how long
2192  * @skb:   last skb seen on sk_receive_queue
2193  *
2194  * Now socket state including sk->sk_err is changed only under lock,
2195  * hence we may omit checks after joining wait queue.
2196  * We check receive queue before schedule() only as optimization;
2197  * it is very likely that release_sock() added new data.
2198  */
2199 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2200 {
2201 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
2202 	int rc;
2203 
2204 	add_wait_queue(sk_sleep(sk), &wait);
2205 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2206 	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
2207 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2208 	remove_wait_queue(sk_sleep(sk), &wait);
2209 	return rc;
2210 }
2211 EXPORT_SYMBOL(sk_wait_data);
2212 
2213 /**
2214  *	__sk_mem_raise_allocated - increase memory_allocated
2215  *	@sk: socket
2216  *	@size: memory size to allocate
2217  *	@amt: pages to allocate
2218  *	@kind: allocation type
2219  *
2220  *	Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
2221  */
2222 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2223 {
2224 	struct proto *prot = sk->sk_prot;
2225 	long allocated = sk_memory_allocated_add(sk, amt);
2226 
2227 	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2228 	    !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
2229 		goto suppress_allocation;
2230 
2231 	/* Under limit. */
2232 	if (allocated <= sk_prot_mem_limits(sk, 0)) {
2233 		sk_leave_memory_pressure(sk);
2234 		return 1;
2235 	}
2236 
2237 	/* Under pressure. */
2238 	if (allocated > sk_prot_mem_limits(sk, 1))
2239 		sk_enter_memory_pressure(sk);
2240 
2241 	/* Over hard limit. */
2242 	if (allocated > sk_prot_mem_limits(sk, 2))
2243 		goto suppress_allocation;
2244 
2245 	/* guarantee minimum buffer size under pressure */
2246 	if (kind == SK_MEM_RECV) {
2247 		if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2248 			return 1;
2249 
2250 	} else { /* SK_MEM_SEND */
2251 		if (sk->sk_type == SOCK_STREAM) {
2252 			if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2253 				return 1;
2254 		} else if (atomic_read(&sk->sk_wmem_alloc) <
2255 			   prot->sysctl_wmem[0])
2256 				return 1;
2257 	}
2258 
2259 	if (sk_has_memory_pressure(sk)) {
2260 		int alloc;
2261 
2262 		if (!sk_under_memory_pressure(sk))
2263 			return 1;
2264 		alloc = sk_sockets_allocated_read_positive(sk);
2265 		if (sk_prot_mem_limits(sk, 2) > alloc *
2266 		    sk_mem_pages(sk->sk_wmem_queued +
2267 				 atomic_read(&sk->sk_rmem_alloc) +
2268 				 sk->sk_forward_alloc))
2269 			return 1;
2270 	}
2271 
2272 suppress_allocation:
2273 
2274 	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2275 		sk_stream_moderate_sndbuf(sk);
2276 
2277 		/* Fail only if socket is _under_ its sndbuf.
2278 		 * In this case we cannot block, so that we have to fail.
2279 		 */
2280 		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2281 			return 1;
2282 	}
2283 
2284 	trace_sock_exceed_buf_limit(sk, prot, allocated);
2285 
2286 	sk_memory_allocated_sub(sk, amt);
2287 
2288 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2289 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
2290 
2291 	return 0;
2292 }
2293 EXPORT_SYMBOL(__sk_mem_raise_allocated);
2294 
2295 /**
2296  *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2297  *	@sk: socket
2298  *	@size: memory size to allocate
2299  *	@kind: allocation type
2300  *
2301  *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2302  *	rmem allocation. This function assumes that protocols which have
2303  *	memory_pressure use sk_wmem_queued as write buffer accounting.
2304  */
2305 int __sk_mem_schedule(struct sock *sk, int size, int kind)
2306 {
2307 	int ret, amt = sk_mem_pages(size);
2308 
2309 	sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2310 	ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2311 	if (!ret)
2312 		sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2313 	return ret;
2314 }
2315 EXPORT_SYMBOL(__sk_mem_schedule);
2316 
2317 /**
2318  *	__sk_mem_reduce_allocated - reclaim memory_allocated
2319  *	@sk: socket
2320  *	@amount: number of quanta
2321  *
2322  *	Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
2323  */
2324 void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2325 {
2326 	sk_memory_allocated_sub(sk, amount);
2327 
2328 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2329 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
2330 
2331 	if (sk_under_memory_pressure(sk) &&
2332 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2333 		sk_leave_memory_pressure(sk);
2334 }
2335 EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2336 
2337 /**
2338  *	__sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
2339  *	@sk: socket
2340  *	@amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2341  */
2342 void __sk_mem_reclaim(struct sock *sk, int amount)
2343 {
2344 	amount >>= SK_MEM_QUANTUM_SHIFT;
2345 	sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2346 	__sk_mem_reduce_allocated(sk, amount);
2347 }
2348 EXPORT_SYMBOL(__sk_mem_reclaim);
2349 
2350 int sk_set_peek_off(struct sock *sk, int val)
2351 {
2352 	if (val < 0)
2353 		return -EINVAL;
2354 
2355 	sk->sk_peek_off = val;
2356 	return 0;
2357 }
2358 EXPORT_SYMBOL_GPL(sk_set_peek_off);
2359 
2360 /*
2361  * Set of default routines for initialising struct proto_ops when
2362  * the protocol does not support a particular function. In certain
2363  * cases where it makes no sense for a protocol to have a "do nothing"
2364  * function, some default processing is provided.
2365  */
2366 
2367 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2368 {
2369 	return -EOPNOTSUPP;
2370 }
2371 EXPORT_SYMBOL(sock_no_bind);
2372 
2373 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2374 		    int len, int flags)
2375 {
2376 	return -EOPNOTSUPP;
2377 }
2378 EXPORT_SYMBOL(sock_no_connect);
2379 
2380 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2381 {
2382 	return -EOPNOTSUPP;
2383 }
2384 EXPORT_SYMBOL(sock_no_socketpair);
2385 
2386 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
2387 		   bool kern)
2388 {
2389 	return -EOPNOTSUPP;
2390 }
2391 EXPORT_SYMBOL(sock_no_accept);
2392 
2393 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2394 		    int *len, int peer)
2395 {
2396 	return -EOPNOTSUPP;
2397 }
2398 EXPORT_SYMBOL(sock_no_getname);
2399 
2400 unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2401 {
2402 	return 0;
2403 }
2404 EXPORT_SYMBOL(sock_no_poll);
2405 
2406 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2407 {
2408 	return -EOPNOTSUPP;
2409 }
2410 EXPORT_SYMBOL(sock_no_ioctl);
2411 
2412 int sock_no_listen(struct socket *sock, int backlog)
2413 {
2414 	return -EOPNOTSUPP;
2415 }
2416 EXPORT_SYMBOL(sock_no_listen);
2417 
2418 int sock_no_shutdown(struct socket *sock, int how)
2419 {
2420 	return -EOPNOTSUPP;
2421 }
2422 EXPORT_SYMBOL(sock_no_shutdown);
2423 
2424 int sock_no_setsockopt(struct socket *sock, int level, int optname,
2425 		    char __user *optval, unsigned int optlen)
2426 {
2427 	return -EOPNOTSUPP;
2428 }
2429 EXPORT_SYMBOL(sock_no_setsockopt);
2430 
2431 int sock_no_getsockopt(struct socket *sock, int level, int optname,
2432 		    char __user *optval, int __user *optlen)
2433 {
2434 	return -EOPNOTSUPP;
2435 }
2436 EXPORT_SYMBOL(sock_no_getsockopt);
2437 
2438 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2439 {
2440 	return -EOPNOTSUPP;
2441 }
2442 EXPORT_SYMBOL(sock_no_sendmsg);
2443 
2444 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2445 		    int flags)
2446 {
2447 	return -EOPNOTSUPP;
2448 }
2449 EXPORT_SYMBOL(sock_no_recvmsg);
2450 
2451 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2452 {
2453 	/* Mirror missing mmap method error code */
2454 	return -ENODEV;
2455 }
2456 EXPORT_SYMBOL(sock_no_mmap);
2457 
2458 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2459 {
2460 	ssize_t res;
2461 	struct msghdr msg = {.msg_flags = flags};
2462 	struct kvec iov;
2463 	char *kaddr = kmap(page);
2464 	iov.iov_base = kaddr + offset;
2465 	iov.iov_len = size;
2466 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2467 	kunmap(page);
2468 	return res;
2469 }
2470 EXPORT_SYMBOL(sock_no_sendpage);
2471 
2472 /*
2473  *	Default Socket Callbacks
2474  */
2475 
2476 static void sock_def_wakeup(struct sock *sk)
2477 {
2478 	struct socket_wq *wq;
2479 
2480 	rcu_read_lock();
2481 	wq = rcu_dereference(sk->sk_wq);
2482 	if (skwq_has_sleeper(wq))
2483 		wake_up_interruptible_all(&wq->wait);
2484 	rcu_read_unlock();
2485 }
2486 
2487 static void sock_def_error_report(struct sock *sk)
2488 {
2489 	struct socket_wq *wq;
2490 
2491 	rcu_read_lock();
2492 	wq = rcu_dereference(sk->sk_wq);
2493 	if (skwq_has_sleeper(wq))
2494 		wake_up_interruptible_poll(&wq->wait, POLLERR);
2495 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2496 	rcu_read_unlock();
2497 }
2498 
2499 static void sock_def_readable(struct sock *sk)
2500 {
2501 	struct socket_wq *wq;
2502 
2503 	rcu_read_lock();
2504 	wq = rcu_dereference(sk->sk_wq);
2505 	if (skwq_has_sleeper(wq))
2506 		wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2507 						POLLRDNORM | POLLRDBAND);
2508 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2509 	rcu_read_unlock();
2510 }
2511 
2512 static void sock_def_write_space(struct sock *sk)
2513 {
2514 	struct socket_wq *wq;
2515 
2516 	rcu_read_lock();
2517 
2518 	/* Do not wake up a writer until he can make "significant"
2519 	 * progress.  --DaveM
2520 	 */
2521 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2522 		wq = rcu_dereference(sk->sk_wq);
2523 		if (skwq_has_sleeper(wq))
2524 			wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2525 						POLLWRNORM | POLLWRBAND);
2526 
2527 		/* Should agree with poll, otherwise some programs break */
2528 		if (sock_writeable(sk))
2529 			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2530 	}
2531 
2532 	rcu_read_unlock();
2533 }
2534 
2535 static void sock_def_destruct(struct sock *sk)
2536 {
2537 }
2538 
2539 void sk_send_sigurg(struct sock *sk)
2540 {
2541 	if (sk->sk_socket && sk->sk_socket->file)
2542 		if (send_sigurg(&sk->sk_socket->file->f_owner))
2543 			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2544 }
2545 EXPORT_SYMBOL(sk_send_sigurg);
2546 
2547 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2548 		    unsigned long expires)
2549 {
2550 	if (!mod_timer(timer, expires))
2551 		sock_hold(sk);
2552 }
2553 EXPORT_SYMBOL(sk_reset_timer);
2554 
2555 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2556 {
2557 	if (del_timer(timer))
2558 		__sock_put(sk);
2559 }
2560 EXPORT_SYMBOL(sk_stop_timer);
2561 
2562 void sock_init_data(struct socket *sock, struct sock *sk)
2563 {
2564 	sk_init_common(sk);
2565 	sk->sk_send_head	=	NULL;
2566 
2567 	init_timer(&sk->sk_timer);
2568 
2569 	sk->sk_allocation	=	GFP_KERNEL;
2570 	sk->sk_rcvbuf		=	sysctl_rmem_default;
2571 	sk->sk_sndbuf		=	sysctl_wmem_default;
2572 	sk->sk_state		=	TCP_CLOSE;
2573 	sk_set_socket(sk, sock);
2574 
2575 	sock_set_flag(sk, SOCK_ZAPPED);
2576 
2577 	if (sock) {
2578 		sk->sk_type	=	sock->type;
2579 		sk->sk_wq	=	sock->wq;
2580 		sock->sk	=	sk;
2581 		sk->sk_uid	=	SOCK_INODE(sock)->i_uid;
2582 	} else {
2583 		sk->sk_wq	=	NULL;
2584 		sk->sk_uid	=	make_kuid(sock_net(sk)->user_ns, 0);
2585 	}
2586 
2587 	rwlock_init(&sk->sk_callback_lock);
2588 	if (sk->sk_kern_sock)
2589 		lockdep_set_class_and_name(
2590 			&sk->sk_callback_lock,
2591 			af_kern_callback_keys + sk->sk_family,
2592 			af_family_kern_clock_key_strings[sk->sk_family]);
2593 	else
2594 		lockdep_set_class_and_name(
2595 			&sk->sk_callback_lock,
2596 			af_callback_keys + sk->sk_family,
2597 			af_family_clock_key_strings[sk->sk_family]);
2598 
2599 	sk->sk_state_change	=	sock_def_wakeup;
2600 	sk->sk_data_ready	=	sock_def_readable;
2601 	sk->sk_write_space	=	sock_def_write_space;
2602 	sk->sk_error_report	=	sock_def_error_report;
2603 	sk->sk_destruct		=	sock_def_destruct;
2604 
2605 	sk->sk_frag.page	=	NULL;
2606 	sk->sk_frag.offset	=	0;
2607 	sk->sk_peek_off		=	-1;
2608 
2609 	sk->sk_peer_pid 	=	NULL;
2610 	sk->sk_peer_cred	=	NULL;
2611 	sk->sk_write_pending	=	0;
2612 	sk->sk_rcvlowat		=	1;
2613 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
2614 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
2615 
2616 	sk->sk_stamp = SK_DEFAULT_STAMP;
2617 
2618 #ifdef CONFIG_NET_RX_BUSY_POLL
2619 	sk->sk_napi_id		=	0;
2620 	sk->sk_ll_usec		=	sysctl_net_busy_read;
2621 #endif
2622 
2623 	sk->sk_max_pacing_rate = ~0U;
2624 	sk->sk_pacing_rate = ~0U;
2625 	sk->sk_incoming_cpu = -1;
2626 	/*
2627 	 * Before updating sk_refcnt, we must commit prior changes to memory
2628 	 * (Documentation/RCU/rculist_nulls.txt for details)
2629 	 */
2630 	smp_wmb();
2631 	atomic_set(&sk->sk_refcnt, 1);
2632 	atomic_set(&sk->sk_drops, 0);
2633 }
2634 EXPORT_SYMBOL(sock_init_data);
2635 
2636 void lock_sock_nested(struct sock *sk, int subclass)
2637 {
2638 	might_sleep();
2639 	spin_lock_bh(&sk->sk_lock.slock);
2640 	if (sk->sk_lock.owned)
2641 		__lock_sock(sk);
2642 	sk->sk_lock.owned = 1;
2643 	spin_unlock(&sk->sk_lock.slock);
2644 	/*
2645 	 * The sk_lock has mutex_lock() semantics here:
2646 	 */
2647 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2648 	local_bh_enable();
2649 }
2650 EXPORT_SYMBOL(lock_sock_nested);
2651 
2652 void release_sock(struct sock *sk)
2653 {
2654 	spin_lock_bh(&sk->sk_lock.slock);
2655 	if (sk->sk_backlog.tail)
2656 		__release_sock(sk);
2657 
2658 	/* Warning : release_cb() might need to release sk ownership,
2659 	 * ie call sock_release_ownership(sk) before us.
2660 	 */
2661 	if (sk->sk_prot->release_cb)
2662 		sk->sk_prot->release_cb(sk);
2663 
2664 	sock_release_ownership(sk);
2665 	if (waitqueue_active(&sk->sk_lock.wq))
2666 		wake_up(&sk->sk_lock.wq);
2667 	spin_unlock_bh(&sk->sk_lock.slock);
2668 }
2669 EXPORT_SYMBOL(release_sock);
2670 
2671 /**
2672  * lock_sock_fast - fast version of lock_sock
2673  * @sk: socket
2674  *
2675  * This version should be used for very small section, where process wont block
2676  * return false if fast path is taken
2677  *   sk_lock.slock locked, owned = 0, BH disabled
2678  * return true if slow path is taken
2679  *   sk_lock.slock unlocked, owned = 1, BH enabled
2680  */
2681 bool lock_sock_fast(struct sock *sk)
2682 {
2683 	might_sleep();
2684 	spin_lock_bh(&sk->sk_lock.slock);
2685 
2686 	if (!sk->sk_lock.owned)
2687 		/*
2688 		 * Note : We must disable BH
2689 		 */
2690 		return false;
2691 
2692 	__lock_sock(sk);
2693 	sk->sk_lock.owned = 1;
2694 	spin_unlock(&sk->sk_lock.slock);
2695 	/*
2696 	 * The sk_lock has mutex_lock() semantics here:
2697 	 */
2698 	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2699 	local_bh_enable();
2700 	return true;
2701 }
2702 EXPORT_SYMBOL(lock_sock_fast);
2703 
2704 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2705 {
2706 	struct timeval tv;
2707 	if (!sock_flag(sk, SOCK_TIMESTAMP))
2708 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2709 	tv = ktime_to_timeval(sk->sk_stamp);
2710 	if (tv.tv_sec == -1)
2711 		return -ENOENT;
2712 	if (tv.tv_sec == 0) {
2713 		sk->sk_stamp = ktime_get_real();
2714 		tv = ktime_to_timeval(sk->sk_stamp);
2715 	}
2716 	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2717 }
2718 EXPORT_SYMBOL(sock_get_timestamp);
2719 
2720 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2721 {
2722 	struct timespec ts;
2723 	if (!sock_flag(sk, SOCK_TIMESTAMP))
2724 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2725 	ts = ktime_to_timespec(sk->sk_stamp);
2726 	if (ts.tv_sec == -1)
2727 		return -ENOENT;
2728 	if (ts.tv_sec == 0) {
2729 		sk->sk_stamp = ktime_get_real();
2730 		ts = ktime_to_timespec(sk->sk_stamp);
2731 	}
2732 	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2733 }
2734 EXPORT_SYMBOL(sock_get_timestampns);
2735 
2736 void sock_enable_timestamp(struct sock *sk, int flag)
2737 {
2738 	if (!sock_flag(sk, flag)) {
2739 		unsigned long previous_flags = sk->sk_flags;
2740 
2741 		sock_set_flag(sk, flag);
2742 		/*
2743 		 * we just set one of the two flags which require net
2744 		 * time stamping, but time stamping might have been on
2745 		 * already because of the other one
2746 		 */
2747 		if (sock_needs_netstamp(sk) &&
2748 		    !(previous_flags & SK_FLAGS_TIMESTAMP))
2749 			net_enable_timestamp();
2750 	}
2751 }
2752 
2753 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2754 		       int level, int type)
2755 {
2756 	struct sock_exterr_skb *serr;
2757 	struct sk_buff *skb;
2758 	int copied, err;
2759 
2760 	err = -EAGAIN;
2761 	skb = sock_dequeue_err_skb(sk);
2762 	if (skb == NULL)
2763 		goto out;
2764 
2765 	copied = skb->len;
2766 	if (copied > len) {
2767 		msg->msg_flags |= MSG_TRUNC;
2768 		copied = len;
2769 	}
2770 	err = skb_copy_datagram_msg(skb, 0, msg, copied);
2771 	if (err)
2772 		goto out_free_skb;
2773 
2774 	sock_recv_timestamp(msg, sk, skb);
2775 
2776 	serr = SKB_EXT_ERR(skb);
2777 	put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2778 
2779 	msg->msg_flags |= MSG_ERRQUEUE;
2780 	err = copied;
2781 
2782 out_free_skb:
2783 	kfree_skb(skb);
2784 out:
2785 	return err;
2786 }
2787 EXPORT_SYMBOL(sock_recv_errqueue);
2788 
2789 /*
2790  *	Get a socket option on an socket.
2791  *
2792  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
2793  *	asynchronous errors should be reported by getsockopt. We assume
2794  *	this means if you specify SO_ERROR (otherwise whats the point of it).
2795  */
2796 int sock_common_getsockopt(struct socket *sock, int level, int optname,
2797 			   char __user *optval, int __user *optlen)
2798 {
2799 	struct sock *sk = sock->sk;
2800 
2801 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2802 }
2803 EXPORT_SYMBOL(sock_common_getsockopt);
2804 
2805 #ifdef CONFIG_COMPAT
2806 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2807 				  char __user *optval, int __user *optlen)
2808 {
2809 	struct sock *sk = sock->sk;
2810 
2811 	if (sk->sk_prot->compat_getsockopt != NULL)
2812 		return sk->sk_prot->compat_getsockopt(sk, level, optname,
2813 						      optval, optlen);
2814 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2815 }
2816 EXPORT_SYMBOL(compat_sock_common_getsockopt);
2817 #endif
2818 
2819 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2820 			int flags)
2821 {
2822 	struct sock *sk = sock->sk;
2823 	int addr_len = 0;
2824 	int err;
2825 
2826 	err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2827 				   flags & ~MSG_DONTWAIT, &addr_len);
2828 	if (err >= 0)
2829 		msg->msg_namelen = addr_len;
2830 	return err;
2831 }
2832 EXPORT_SYMBOL(sock_common_recvmsg);
2833 
2834 /*
2835  *	Set socket options on an inet socket.
2836  */
2837 int sock_common_setsockopt(struct socket *sock, int level, int optname,
2838 			   char __user *optval, unsigned int optlen)
2839 {
2840 	struct sock *sk = sock->sk;
2841 
2842 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2843 }
2844 EXPORT_SYMBOL(sock_common_setsockopt);
2845 
2846 #ifdef CONFIG_COMPAT
2847 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2848 				  char __user *optval, unsigned int optlen)
2849 {
2850 	struct sock *sk = sock->sk;
2851 
2852 	if (sk->sk_prot->compat_setsockopt != NULL)
2853 		return sk->sk_prot->compat_setsockopt(sk, level, optname,
2854 						      optval, optlen);
2855 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2856 }
2857 EXPORT_SYMBOL(compat_sock_common_setsockopt);
2858 #endif
2859 
2860 void sk_common_release(struct sock *sk)
2861 {
2862 	if (sk->sk_prot->destroy)
2863 		sk->sk_prot->destroy(sk);
2864 
2865 	/*
2866 	 * Observation: when sock_common_release is called, processes have
2867 	 * no access to socket. But net still has.
2868 	 * Step one, detach it from networking:
2869 	 *
2870 	 * A. Remove from hash tables.
2871 	 */
2872 
2873 	sk->sk_prot->unhash(sk);
2874 
2875 	/*
2876 	 * In this point socket cannot receive new packets, but it is possible
2877 	 * that some packets are in flight because some CPU runs receiver and
2878 	 * did hash table lookup before we unhashed socket. They will achieve
2879 	 * receive queue and will be purged by socket destructor.
2880 	 *
2881 	 * Also we still have packets pending on receive queue and probably,
2882 	 * our own packets waiting in device queues. sock_destroy will drain
2883 	 * receive queue, but transmitted packets will delay socket destruction
2884 	 * until the last reference will be released.
2885 	 */
2886 
2887 	sock_orphan(sk);
2888 
2889 	xfrm_sk_free_policy(sk);
2890 
2891 	sk_refcnt_debug_release(sk);
2892 
2893 	sock_put(sk);
2894 }
2895 EXPORT_SYMBOL(sk_common_release);
2896 
2897 void sk_get_meminfo(const struct sock *sk, u32 *mem)
2898 {
2899 	memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
2900 
2901 	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
2902 	mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
2903 	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
2904 	mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
2905 	mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
2906 	mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
2907 	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
2908 	mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
2909 	mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
2910 }
2911 
2912 #ifdef CONFIG_PROC_FS
2913 #define PROTO_INUSE_NR	64	/* should be enough for the first time */
2914 struct prot_inuse {
2915 	int val[PROTO_INUSE_NR];
2916 };
2917 
2918 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2919 
2920 #ifdef CONFIG_NET_NS
2921 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2922 {
2923 	__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2924 }
2925 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2926 
2927 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2928 {
2929 	int cpu, idx = prot->inuse_idx;
2930 	int res = 0;
2931 
2932 	for_each_possible_cpu(cpu)
2933 		res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2934 
2935 	return res >= 0 ? res : 0;
2936 }
2937 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2938 
2939 static int __net_init sock_inuse_init_net(struct net *net)
2940 {
2941 	net->core.inuse = alloc_percpu(struct prot_inuse);
2942 	return net->core.inuse ? 0 : -ENOMEM;
2943 }
2944 
2945 static void __net_exit sock_inuse_exit_net(struct net *net)
2946 {
2947 	free_percpu(net->core.inuse);
2948 }
2949 
2950 static struct pernet_operations net_inuse_ops = {
2951 	.init = sock_inuse_init_net,
2952 	.exit = sock_inuse_exit_net,
2953 };
2954 
2955 static __init int net_inuse_init(void)
2956 {
2957 	if (register_pernet_subsys(&net_inuse_ops))
2958 		panic("Cannot initialize net inuse counters");
2959 
2960 	return 0;
2961 }
2962 
2963 core_initcall(net_inuse_init);
2964 #else
2965 static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2966 
2967 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2968 {
2969 	__this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2970 }
2971 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2972 
2973 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2974 {
2975 	int cpu, idx = prot->inuse_idx;
2976 	int res = 0;
2977 
2978 	for_each_possible_cpu(cpu)
2979 		res += per_cpu(prot_inuse, cpu).val[idx];
2980 
2981 	return res >= 0 ? res : 0;
2982 }
2983 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2984 #endif
2985 
2986 static void assign_proto_idx(struct proto *prot)
2987 {
2988 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2989 
2990 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2991 		pr_err("PROTO_INUSE_NR exhausted\n");
2992 		return;
2993 	}
2994 
2995 	set_bit(prot->inuse_idx, proto_inuse_idx);
2996 }
2997 
2998 static void release_proto_idx(struct proto *prot)
2999 {
3000 	if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3001 		clear_bit(prot->inuse_idx, proto_inuse_idx);
3002 }
3003 #else
3004 static inline void assign_proto_idx(struct proto *prot)
3005 {
3006 }
3007 
3008 static inline void release_proto_idx(struct proto *prot)
3009 {
3010 }
3011 #endif
3012 
3013 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3014 {
3015 	if (!rsk_prot)
3016 		return;
3017 	kfree(rsk_prot->slab_name);
3018 	rsk_prot->slab_name = NULL;
3019 	kmem_cache_destroy(rsk_prot->slab);
3020 	rsk_prot->slab = NULL;
3021 }
3022 
3023 static int req_prot_init(const struct proto *prot)
3024 {
3025 	struct request_sock_ops *rsk_prot = prot->rsk_prot;
3026 
3027 	if (!rsk_prot)
3028 		return 0;
3029 
3030 	rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3031 					prot->name);
3032 	if (!rsk_prot->slab_name)
3033 		return -ENOMEM;
3034 
3035 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3036 					   rsk_prot->obj_size, 0,
3037 					   prot->slab_flags, NULL);
3038 
3039 	if (!rsk_prot->slab) {
3040 		pr_crit("%s: Can't create request sock SLAB cache!\n",
3041 			prot->name);
3042 		return -ENOMEM;
3043 	}
3044 	return 0;
3045 }
3046 
3047 int proto_register(struct proto *prot, int alloc_slab)
3048 {
3049 	if (alloc_slab) {
3050 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
3051 					SLAB_HWCACHE_ALIGN | prot->slab_flags,
3052 					NULL);
3053 
3054 		if (prot->slab == NULL) {
3055 			pr_crit("%s: Can't create sock SLAB cache!\n",
3056 				prot->name);
3057 			goto out;
3058 		}
3059 
3060 		if (req_prot_init(prot))
3061 			goto out_free_request_sock_slab;
3062 
3063 		if (prot->twsk_prot != NULL) {
3064 			prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
3065 
3066 			if (prot->twsk_prot->twsk_slab_name == NULL)
3067 				goto out_free_request_sock_slab;
3068 
3069 			prot->twsk_prot->twsk_slab =
3070 				kmem_cache_create(prot->twsk_prot->twsk_slab_name,
3071 						  prot->twsk_prot->twsk_obj_size,
3072 						  0,
3073 						  prot->slab_flags,
3074 						  NULL);
3075 			if (prot->twsk_prot->twsk_slab == NULL)
3076 				goto out_free_timewait_sock_slab_name;
3077 		}
3078 	}
3079 
3080 	mutex_lock(&proto_list_mutex);
3081 	list_add(&prot->node, &proto_list);
3082 	assign_proto_idx(prot);
3083 	mutex_unlock(&proto_list_mutex);
3084 	return 0;
3085 
3086 out_free_timewait_sock_slab_name:
3087 	kfree(prot->twsk_prot->twsk_slab_name);
3088 out_free_request_sock_slab:
3089 	req_prot_cleanup(prot->rsk_prot);
3090 
3091 	kmem_cache_destroy(prot->slab);
3092 	prot->slab = NULL;
3093 out:
3094 	return -ENOBUFS;
3095 }
3096 EXPORT_SYMBOL(proto_register);
3097 
3098 void proto_unregister(struct proto *prot)
3099 {
3100 	mutex_lock(&proto_list_mutex);
3101 	release_proto_idx(prot);
3102 	list_del(&prot->node);
3103 	mutex_unlock(&proto_list_mutex);
3104 
3105 	kmem_cache_destroy(prot->slab);
3106 	prot->slab = NULL;
3107 
3108 	req_prot_cleanup(prot->rsk_prot);
3109 
3110 	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
3111 		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
3112 		kfree(prot->twsk_prot->twsk_slab_name);
3113 		prot->twsk_prot->twsk_slab = NULL;
3114 	}
3115 }
3116 EXPORT_SYMBOL(proto_unregister);
3117 
3118 #ifdef CONFIG_PROC_FS
3119 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
3120 	__acquires(proto_list_mutex)
3121 {
3122 	mutex_lock(&proto_list_mutex);
3123 	return seq_list_start_head(&proto_list, *pos);
3124 }
3125 
3126 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3127 {
3128 	return seq_list_next(v, &proto_list, pos);
3129 }
3130 
3131 static void proto_seq_stop(struct seq_file *seq, void *v)
3132 	__releases(proto_list_mutex)
3133 {
3134 	mutex_unlock(&proto_list_mutex);
3135 }
3136 
3137 static char proto_method_implemented(const void *method)
3138 {
3139 	return method == NULL ? 'n' : 'y';
3140 }
3141 static long sock_prot_memory_allocated(struct proto *proto)
3142 {
3143 	return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
3144 }
3145 
3146 static char *sock_prot_memory_pressure(struct proto *proto)
3147 {
3148 	return proto->memory_pressure != NULL ?
3149 	proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3150 }
3151 
3152 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3153 {
3154 
3155 	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
3156 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3157 		   proto->name,
3158 		   proto->obj_size,
3159 		   sock_prot_inuse_get(seq_file_net(seq), proto),
3160 		   sock_prot_memory_allocated(proto),
3161 		   sock_prot_memory_pressure(proto),
3162 		   proto->max_header,
3163 		   proto->slab == NULL ? "no" : "yes",
3164 		   module_name(proto->owner),
3165 		   proto_method_implemented(proto->close),
3166 		   proto_method_implemented(proto->connect),
3167 		   proto_method_implemented(proto->disconnect),
3168 		   proto_method_implemented(proto->accept),
3169 		   proto_method_implemented(proto->ioctl),
3170 		   proto_method_implemented(proto->init),
3171 		   proto_method_implemented(proto->destroy),
3172 		   proto_method_implemented(proto->shutdown),
3173 		   proto_method_implemented(proto->setsockopt),
3174 		   proto_method_implemented(proto->getsockopt),
3175 		   proto_method_implemented(proto->sendmsg),
3176 		   proto_method_implemented(proto->recvmsg),
3177 		   proto_method_implemented(proto->sendpage),
3178 		   proto_method_implemented(proto->bind),
3179 		   proto_method_implemented(proto->backlog_rcv),
3180 		   proto_method_implemented(proto->hash),
3181 		   proto_method_implemented(proto->unhash),
3182 		   proto_method_implemented(proto->get_port),
3183 		   proto_method_implemented(proto->enter_memory_pressure));
3184 }
3185 
3186 static int proto_seq_show(struct seq_file *seq, void *v)
3187 {
3188 	if (v == &proto_list)
3189 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3190 			   "protocol",
3191 			   "size",
3192 			   "sockets",
3193 			   "memory",
3194 			   "press",
3195 			   "maxhdr",
3196 			   "slab",
3197 			   "module",
3198 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3199 	else
3200 		proto_seq_printf(seq, list_entry(v, struct proto, node));
3201 	return 0;
3202 }
3203 
3204 static const struct seq_operations proto_seq_ops = {
3205 	.start  = proto_seq_start,
3206 	.next   = proto_seq_next,
3207 	.stop   = proto_seq_stop,
3208 	.show   = proto_seq_show,
3209 };
3210 
3211 static int proto_seq_open(struct inode *inode, struct file *file)
3212 {
3213 	return seq_open_net(inode, file, &proto_seq_ops,
3214 			    sizeof(struct seq_net_private));
3215 }
3216 
3217 static const struct file_operations proto_seq_fops = {
3218 	.owner		= THIS_MODULE,
3219 	.open		= proto_seq_open,
3220 	.read		= seq_read,
3221 	.llseek		= seq_lseek,
3222 	.release	= seq_release_net,
3223 };
3224 
3225 static __net_init int proto_init_net(struct net *net)
3226 {
3227 	if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
3228 		return -ENOMEM;
3229 
3230 	return 0;
3231 }
3232 
3233 static __net_exit void proto_exit_net(struct net *net)
3234 {
3235 	remove_proc_entry("protocols", net->proc_net);
3236 }
3237 
3238 
3239 static __net_initdata struct pernet_operations proto_net_ops = {
3240 	.init = proto_init_net,
3241 	.exit = proto_exit_net,
3242 };
3243 
3244 static int __init proto_init(void)
3245 {
3246 	return register_pernet_subsys(&proto_net_ops);
3247 }
3248 
3249 subsys_initcall(proto_init);
3250 
3251 #endif /* PROC_FS */
3252 
3253 #ifdef CONFIG_NET_RX_BUSY_POLL
3254 bool sk_busy_loop_end(void *p, unsigned long start_time)
3255 {
3256 	struct sock *sk = p;
3257 
3258 	return !skb_queue_empty(&sk->sk_receive_queue) ||
3259 	       sk_busy_loop_timeout(sk, start_time);
3260 }
3261 EXPORT_SYMBOL(sk_busy_loop_end);
3262 #endif /* CONFIG_NET_RX_BUSY_POLL */
3263