xref: /openbmc/linux/net/core/sock.c (revision 1c2dd16a)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Florian La Roche, <flla@stud.uni-sb.de>
13  *		Alan Cox, <A.Cox@swansea.ac.uk>
14  *
15  * Fixes:
16  *		Alan Cox	: 	Numerous verify_area() problems
17  *		Alan Cox	:	Connecting on a connecting socket
18  *					now returns an error for tcp.
19  *		Alan Cox	:	sock->protocol is set correctly.
20  *					and is not sometimes left as 0.
21  *		Alan Cox	:	connect handles icmp errors on a
22  *					connect properly. Unfortunately there
23  *					is a restart syscall nasty there. I
24  *					can't match BSD without hacking the C
25  *					library. Ideas urgently sought!
26  *		Alan Cox	:	Disallow bind() to addresses that are
27  *					not ours - especially broadcast ones!!
28  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
29  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
30  *					instead they leave that for the DESTROY timer.
31  *		Alan Cox	:	Clean up error flag in accept
32  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
33  *					was buggy. Put a remove_sock() in the handler
34  *					for memory when we hit 0. Also altered the timer
35  *					code. The ACK stuff can wait and needs major
36  *					TCP layer surgery.
37  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
38  *					and fixed timer/inet_bh race.
39  *		Alan Cox	:	Added zapped flag for TCP
40  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
41  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
43  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
44  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
46  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
47  *	Pauline Middelink	:	identd support
48  *		Alan Cox	:	Fixed connect() taking signals I think.
49  *		Alan Cox	:	SO_LINGER supported
50  *		Alan Cox	:	Error reporting fixes
51  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
52  *		Alan Cox	:	inet sockets don't set sk->type!
53  *		Alan Cox	:	Split socket option code
54  *		Alan Cox	:	Callbacks
55  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
56  *		Alex		:	Removed restriction on inet fioctl
57  *		Alan Cox	:	Splitting INET from NET core
58  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
59  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
60  *		Alan Cox	:	Split IP from generic code
61  *		Alan Cox	:	New kfree_skbmem()
62  *		Alan Cox	:	Make SO_DEBUG superuser only.
63  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
64  *					(compatibility fix)
65  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
66  *		Alan Cox	:	Allocator for a socket is settable.
67  *		Alan Cox	:	SO_ERROR includes soft errors.
68  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
69  *		Alan Cox	: 	Generic socket allocation to make hooks
70  *					easier (suggested by Craig Metz).
71  *		Michael Pall	:	SO_ERROR returns positive errno again
72  *              Steve Whitehouse:       Added default destructor to free
73  *                                      protocol private data.
74  *              Steve Whitehouse:       Added various other default routines
75  *                                      common to several socket families.
76  *              Chris Evans     :       Call suser() check last on F_SETOWN
77  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
79  *		Andi Kleen	:	Fix write_space callback
80  *		Chris Evans	:	Security fixes - signedness again
81  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
82  *
83  * To Fix:
84  *
85  *
86  *		This program is free software; you can redistribute it and/or
87  *		modify it under the terms of the GNU General Public License
88  *		as published by the Free Software Foundation; either version
89  *		2 of the License, or (at your option) any later version.
90  */
91 
92 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93 
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/errqueue.h>
97 #include <linux/types.h>
98 #include <linux/socket.h>
99 #include <linux/in.h>
100 #include <linux/kernel.h>
101 #include <linux/module.h>
102 #include <linux/proc_fs.h>
103 #include <linux/seq_file.h>
104 #include <linux/sched.h>
105 #include <linux/timer.h>
106 #include <linux/string.h>
107 #include <linux/sockios.h>
108 #include <linux/net.h>
109 #include <linux/mm.h>
110 #include <linux/slab.h>
111 #include <linux/interrupt.h>
112 #include <linux/poll.h>
113 #include <linux/tcp.h>
114 #include <linux/init.h>
115 #include <linux/highmem.h>
116 #include <linux/user_namespace.h>
117 #include <linux/static_key.h>
118 #include <linux/memcontrol.h>
119 #include <linux/prefetch.h>
120 
121 #include <linux/uaccess.h>
122 
123 #include <linux/netdevice.h>
124 #include <net/protocol.h>
125 #include <linux/skbuff.h>
126 #include <net/net_namespace.h>
127 #include <net/request_sock.h>
128 #include <net/sock.h>
129 #include <linux/net_tstamp.h>
130 #include <net/xfrm.h>
131 #include <linux/ipsec.h>
132 #include <net/cls_cgroup.h>
133 #include <net/netprio_cgroup.h>
134 #include <linux/sock_diag.h>
135 
136 #include <linux/filter.h>
137 #include <net/sock_reuseport.h>
138 
139 #include <trace/events/sock.h>
140 
141 #ifdef CONFIG_INET
142 #include <net/tcp.h>
143 #endif
144 
145 #include <net/busy_poll.h>
146 
147 static DEFINE_MUTEX(proto_list_mutex);
148 static LIST_HEAD(proto_list);
149 
150 /**
151  * sk_ns_capable - General socket capability test
152  * @sk: Socket to use a capability on or through
153  * @user_ns: The user namespace of the capability to use
154  * @cap: The capability to use
155  *
156  * Test to see if the opener of the socket had when the socket was
157  * created and the current process has the capability @cap in the user
158  * namespace @user_ns.
159  */
160 bool sk_ns_capable(const struct sock *sk,
161 		   struct user_namespace *user_ns, int cap)
162 {
163 	return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
164 		ns_capable(user_ns, cap);
165 }
166 EXPORT_SYMBOL(sk_ns_capable);
167 
168 /**
169  * sk_capable - Socket global capability test
170  * @sk: Socket to use a capability on or through
171  * @cap: The global capability to use
172  *
173  * Test to see if the opener of the socket had when the socket was
174  * created and the current process has the capability @cap in all user
175  * namespaces.
176  */
177 bool sk_capable(const struct sock *sk, int cap)
178 {
179 	return sk_ns_capable(sk, &init_user_ns, cap);
180 }
181 EXPORT_SYMBOL(sk_capable);
182 
183 /**
184  * sk_net_capable - Network namespace socket capability test
185  * @sk: Socket to use a capability on or through
186  * @cap: The capability to use
187  *
188  * Test to see if the opener of the socket had when the socket was created
189  * and the current process has the capability @cap over the network namespace
190  * the socket is a member of.
191  */
192 bool sk_net_capable(const struct sock *sk, int cap)
193 {
194 	return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
195 }
196 EXPORT_SYMBOL(sk_net_capable);
197 
198 /*
199  * Each address family might have different locking rules, so we have
200  * one slock key per address family and separate keys for internal and
201  * userspace sockets.
202  */
203 static struct lock_class_key af_family_keys[AF_MAX];
204 static struct lock_class_key af_family_kern_keys[AF_MAX];
205 static struct lock_class_key af_family_slock_keys[AF_MAX];
206 static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
207 
208 /*
209  * Make lock validator output more readable. (we pre-construct these
210  * strings build-time, so that runtime initialization of socket
211  * locks is fast):
212  */
213 
214 #define _sock_locks(x)						  \
215   x "AF_UNSPEC",	x "AF_UNIX"     ,	x "AF_INET"     , \
216   x "AF_AX25"  ,	x "AF_IPX"      ,	x "AF_APPLETALK", \
217   x "AF_NETROM",	x "AF_BRIDGE"   ,	x "AF_ATMPVC"   , \
218   x "AF_X25"   ,	x "AF_INET6"    ,	x "AF_ROSE"     , \
219   x "AF_DECnet",	x "AF_NETBEUI"  ,	x "AF_SECURITY" , \
220   x "AF_KEY"   ,	x "AF_NETLINK"  ,	x "AF_PACKET"   , \
221   x "AF_ASH"   ,	x "AF_ECONET"   ,	x "AF_ATMSVC"   , \
222   x "AF_RDS"   ,	x "AF_SNA"      ,	x "AF_IRDA"     , \
223   x "AF_PPPOX" ,	x "AF_WANPIPE"  ,	x "AF_LLC"      , \
224   x "27"       ,	x "28"          ,	x "AF_CAN"      , \
225   x "AF_TIPC"  ,	x "AF_BLUETOOTH",	x "IUCV"        , \
226   x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
227   x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
228   x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
229   x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_MAX"
230 
231 static const char *const af_family_key_strings[AF_MAX+1] = {
232 	_sock_locks("sk_lock-")
233 };
234 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
235 	_sock_locks("slock-")
236 };
237 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
238 	_sock_locks("clock-")
239 };
240 
241 static const char *const af_family_kern_key_strings[AF_MAX+1] = {
242 	_sock_locks("k-sk_lock-")
243 };
244 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
245 	_sock_locks("k-slock-")
246 };
247 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
248 	_sock_locks("k-clock-")
249 };
250 static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
251   "rlock-AF_UNSPEC", "rlock-AF_UNIX"     , "rlock-AF_INET"     ,
252   "rlock-AF_AX25"  , "rlock-AF_IPX"      , "rlock-AF_APPLETALK",
253   "rlock-AF_NETROM", "rlock-AF_BRIDGE"   , "rlock-AF_ATMPVC"   ,
254   "rlock-AF_X25"   , "rlock-AF_INET6"    , "rlock-AF_ROSE"     ,
255   "rlock-AF_DECnet", "rlock-AF_NETBEUI"  , "rlock-AF_SECURITY" ,
256   "rlock-AF_KEY"   , "rlock-AF_NETLINK"  , "rlock-AF_PACKET"   ,
257   "rlock-AF_ASH"   , "rlock-AF_ECONET"   , "rlock-AF_ATMSVC"   ,
258   "rlock-AF_RDS"   , "rlock-AF_SNA"      , "rlock-AF_IRDA"     ,
259   "rlock-AF_PPPOX" , "rlock-AF_WANPIPE"  , "rlock-AF_LLC"      ,
260   "rlock-27"       , "rlock-28"          , "rlock-AF_CAN"      ,
261   "rlock-AF_TIPC"  , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV"     ,
262   "rlock-AF_RXRPC" , "rlock-AF_ISDN"     , "rlock-AF_PHONET"   ,
263   "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG"      ,
264   "rlock-AF_NFC"   , "rlock-AF_VSOCK"    , "rlock-AF_KCM"      ,
265   "rlock-AF_QIPCRTR", "rlock-AF_SMC"     , "rlock-AF_MAX"
266 };
267 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
268   "wlock-AF_UNSPEC", "wlock-AF_UNIX"     , "wlock-AF_INET"     ,
269   "wlock-AF_AX25"  , "wlock-AF_IPX"      , "wlock-AF_APPLETALK",
270   "wlock-AF_NETROM", "wlock-AF_BRIDGE"   , "wlock-AF_ATMPVC"   ,
271   "wlock-AF_X25"   , "wlock-AF_INET6"    , "wlock-AF_ROSE"     ,
272   "wlock-AF_DECnet", "wlock-AF_NETBEUI"  , "wlock-AF_SECURITY" ,
273   "wlock-AF_KEY"   , "wlock-AF_NETLINK"  , "wlock-AF_PACKET"   ,
274   "wlock-AF_ASH"   , "wlock-AF_ECONET"   , "wlock-AF_ATMSVC"   ,
275   "wlock-AF_RDS"   , "wlock-AF_SNA"      , "wlock-AF_IRDA"     ,
276   "wlock-AF_PPPOX" , "wlock-AF_WANPIPE"  , "wlock-AF_LLC"      ,
277   "wlock-27"       , "wlock-28"          , "wlock-AF_CAN"      ,
278   "wlock-AF_TIPC"  , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV"     ,
279   "wlock-AF_RXRPC" , "wlock-AF_ISDN"     , "wlock-AF_PHONET"   ,
280   "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG"      ,
281   "wlock-AF_NFC"   , "wlock-AF_VSOCK"    , "wlock-AF_KCM"      ,
282   "wlock-AF_QIPCRTR", "wlock-AF_SMC"     , "wlock-AF_MAX"
283 };
284 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
285   "elock-AF_UNSPEC", "elock-AF_UNIX"     , "elock-AF_INET"     ,
286   "elock-AF_AX25"  , "elock-AF_IPX"      , "elock-AF_APPLETALK",
287   "elock-AF_NETROM", "elock-AF_BRIDGE"   , "elock-AF_ATMPVC"   ,
288   "elock-AF_X25"   , "elock-AF_INET6"    , "elock-AF_ROSE"     ,
289   "elock-AF_DECnet", "elock-AF_NETBEUI"  , "elock-AF_SECURITY" ,
290   "elock-AF_KEY"   , "elock-AF_NETLINK"  , "elock-AF_PACKET"   ,
291   "elock-AF_ASH"   , "elock-AF_ECONET"   , "elock-AF_ATMSVC"   ,
292   "elock-AF_RDS"   , "elock-AF_SNA"      , "elock-AF_IRDA"     ,
293   "elock-AF_PPPOX" , "elock-AF_WANPIPE"  , "elock-AF_LLC"      ,
294   "elock-27"       , "elock-28"          , "elock-AF_CAN"      ,
295   "elock-AF_TIPC"  , "elock-AF_BLUETOOTH", "elock-AF_IUCV"     ,
296   "elock-AF_RXRPC" , "elock-AF_ISDN"     , "elock-AF_PHONET"   ,
297   "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG"      ,
298   "elock-AF_NFC"   , "elock-AF_VSOCK"    , "elock-AF_KCM"      ,
299   "elock-AF_QIPCRTR", "elock-AF_SMC"     , "elock-AF_MAX"
300 };
301 
302 /*
303  * sk_callback_lock and sk queues locking rules are per-address-family,
304  * so split the lock classes by using a per-AF key:
305  */
306 static struct lock_class_key af_callback_keys[AF_MAX];
307 static struct lock_class_key af_rlock_keys[AF_MAX];
308 static struct lock_class_key af_wlock_keys[AF_MAX];
309 static struct lock_class_key af_elock_keys[AF_MAX];
310 static struct lock_class_key af_kern_callback_keys[AF_MAX];
311 
312 /* Take into consideration the size of the struct sk_buff overhead in the
313  * determination of these values, since that is non-constant across
314  * platforms.  This makes socket queueing behavior and performance
315  * not depend upon such differences.
316  */
317 #define _SK_MEM_PACKETS		256
318 #define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
319 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
320 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
321 
322 /* Run time adjustable parameters. */
323 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
324 EXPORT_SYMBOL(sysctl_wmem_max);
325 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
326 EXPORT_SYMBOL(sysctl_rmem_max);
327 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
328 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
329 
330 /* Maximal space eaten by iovec or ancillary data plus some space */
331 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
332 EXPORT_SYMBOL(sysctl_optmem_max);
333 
334 int sysctl_tstamp_allow_data __read_mostly = 1;
335 
336 struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
337 EXPORT_SYMBOL_GPL(memalloc_socks);
338 
339 /**
340  * sk_set_memalloc - sets %SOCK_MEMALLOC
341  * @sk: socket to set it on
342  *
343  * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
344  * It's the responsibility of the admin to adjust min_free_kbytes
345  * to meet the requirements
346  */
347 void sk_set_memalloc(struct sock *sk)
348 {
349 	sock_set_flag(sk, SOCK_MEMALLOC);
350 	sk->sk_allocation |= __GFP_MEMALLOC;
351 	static_key_slow_inc(&memalloc_socks);
352 }
353 EXPORT_SYMBOL_GPL(sk_set_memalloc);
354 
355 void sk_clear_memalloc(struct sock *sk)
356 {
357 	sock_reset_flag(sk, SOCK_MEMALLOC);
358 	sk->sk_allocation &= ~__GFP_MEMALLOC;
359 	static_key_slow_dec(&memalloc_socks);
360 
361 	/*
362 	 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
363 	 * progress of swapping. SOCK_MEMALLOC may be cleared while
364 	 * it has rmem allocations due to the last swapfile being deactivated
365 	 * but there is a risk that the socket is unusable due to exceeding
366 	 * the rmem limits. Reclaim the reserves and obey rmem limits again.
367 	 */
368 	sk_mem_reclaim(sk);
369 }
370 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
371 
372 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
373 {
374 	int ret;
375 	unsigned long pflags = current->flags;
376 
377 	/* these should have been dropped before queueing */
378 	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
379 
380 	current->flags |= PF_MEMALLOC;
381 	ret = sk->sk_backlog_rcv(sk, skb);
382 	current_restore_flags(pflags, PF_MEMALLOC);
383 
384 	return ret;
385 }
386 EXPORT_SYMBOL(__sk_backlog_rcv);
387 
388 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
389 {
390 	struct timeval tv;
391 
392 	if (optlen < sizeof(tv))
393 		return -EINVAL;
394 	if (copy_from_user(&tv, optval, sizeof(tv)))
395 		return -EFAULT;
396 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
397 		return -EDOM;
398 
399 	if (tv.tv_sec < 0) {
400 		static int warned __read_mostly;
401 
402 		*timeo_p = 0;
403 		if (warned < 10 && net_ratelimit()) {
404 			warned++;
405 			pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
406 				__func__, current->comm, task_pid_nr(current));
407 		}
408 		return 0;
409 	}
410 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
411 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
412 		return 0;
413 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
414 		*timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ);
415 	return 0;
416 }
417 
418 static void sock_warn_obsolete_bsdism(const char *name)
419 {
420 	static int warned;
421 	static char warncomm[TASK_COMM_LEN];
422 	if (strcmp(warncomm, current->comm) && warned < 5) {
423 		strcpy(warncomm,  current->comm);
424 		pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
425 			warncomm, name);
426 		warned++;
427 	}
428 }
429 
430 static bool sock_needs_netstamp(const struct sock *sk)
431 {
432 	switch (sk->sk_family) {
433 	case AF_UNSPEC:
434 	case AF_UNIX:
435 		return false;
436 	default:
437 		return true;
438 	}
439 }
440 
441 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
442 {
443 	if (sk->sk_flags & flags) {
444 		sk->sk_flags &= ~flags;
445 		if (sock_needs_netstamp(sk) &&
446 		    !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
447 			net_disable_timestamp();
448 	}
449 }
450 
451 
452 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
453 {
454 	unsigned long flags;
455 	struct sk_buff_head *list = &sk->sk_receive_queue;
456 
457 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
458 		atomic_inc(&sk->sk_drops);
459 		trace_sock_rcvqueue_full(sk, skb);
460 		return -ENOMEM;
461 	}
462 
463 	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
464 		atomic_inc(&sk->sk_drops);
465 		return -ENOBUFS;
466 	}
467 
468 	skb->dev = NULL;
469 	skb_set_owner_r(skb, sk);
470 
471 	/* we escape from rcu protected region, make sure we dont leak
472 	 * a norefcounted dst
473 	 */
474 	skb_dst_force(skb);
475 
476 	spin_lock_irqsave(&list->lock, flags);
477 	sock_skb_set_dropcount(sk, skb);
478 	__skb_queue_tail(list, skb);
479 	spin_unlock_irqrestore(&list->lock, flags);
480 
481 	if (!sock_flag(sk, SOCK_DEAD))
482 		sk->sk_data_ready(sk);
483 	return 0;
484 }
485 EXPORT_SYMBOL(__sock_queue_rcv_skb);
486 
487 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
488 {
489 	int err;
490 
491 	err = sk_filter(sk, skb);
492 	if (err)
493 		return err;
494 
495 	return __sock_queue_rcv_skb(sk, skb);
496 }
497 EXPORT_SYMBOL(sock_queue_rcv_skb);
498 
499 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
500 		     const int nested, unsigned int trim_cap, bool refcounted)
501 {
502 	int rc = NET_RX_SUCCESS;
503 
504 	if (sk_filter_trim_cap(sk, skb, trim_cap))
505 		goto discard_and_relse;
506 
507 	skb->dev = NULL;
508 
509 	if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
510 		atomic_inc(&sk->sk_drops);
511 		goto discard_and_relse;
512 	}
513 	if (nested)
514 		bh_lock_sock_nested(sk);
515 	else
516 		bh_lock_sock(sk);
517 	if (!sock_owned_by_user(sk)) {
518 		/*
519 		 * trylock + unlock semantics:
520 		 */
521 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
522 
523 		rc = sk_backlog_rcv(sk, skb);
524 
525 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
526 	} else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
527 		bh_unlock_sock(sk);
528 		atomic_inc(&sk->sk_drops);
529 		goto discard_and_relse;
530 	}
531 
532 	bh_unlock_sock(sk);
533 out:
534 	if (refcounted)
535 		sock_put(sk);
536 	return rc;
537 discard_and_relse:
538 	kfree_skb(skb);
539 	goto out;
540 }
541 EXPORT_SYMBOL(__sk_receive_skb);
542 
543 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
544 {
545 	struct dst_entry *dst = __sk_dst_get(sk);
546 
547 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
548 		sk_tx_queue_clear(sk);
549 		sk->sk_dst_pending_confirm = 0;
550 		RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
551 		dst_release(dst);
552 		return NULL;
553 	}
554 
555 	return dst;
556 }
557 EXPORT_SYMBOL(__sk_dst_check);
558 
559 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
560 {
561 	struct dst_entry *dst = sk_dst_get(sk);
562 
563 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
564 		sk_dst_reset(sk);
565 		dst_release(dst);
566 		return NULL;
567 	}
568 
569 	return dst;
570 }
571 EXPORT_SYMBOL(sk_dst_check);
572 
573 static int sock_setbindtodevice(struct sock *sk, char __user *optval,
574 				int optlen)
575 {
576 	int ret = -ENOPROTOOPT;
577 #ifdef CONFIG_NETDEVICES
578 	struct net *net = sock_net(sk);
579 	char devname[IFNAMSIZ];
580 	int index;
581 
582 	/* Sorry... */
583 	ret = -EPERM;
584 	if (!ns_capable(net->user_ns, CAP_NET_RAW))
585 		goto out;
586 
587 	ret = -EINVAL;
588 	if (optlen < 0)
589 		goto out;
590 
591 	/* Bind this socket to a particular device like "eth0",
592 	 * as specified in the passed interface name. If the
593 	 * name is "" or the option length is zero the socket
594 	 * is not bound.
595 	 */
596 	if (optlen > IFNAMSIZ - 1)
597 		optlen = IFNAMSIZ - 1;
598 	memset(devname, 0, sizeof(devname));
599 
600 	ret = -EFAULT;
601 	if (copy_from_user(devname, optval, optlen))
602 		goto out;
603 
604 	index = 0;
605 	if (devname[0] != '\0') {
606 		struct net_device *dev;
607 
608 		rcu_read_lock();
609 		dev = dev_get_by_name_rcu(net, devname);
610 		if (dev)
611 			index = dev->ifindex;
612 		rcu_read_unlock();
613 		ret = -ENODEV;
614 		if (!dev)
615 			goto out;
616 	}
617 
618 	lock_sock(sk);
619 	sk->sk_bound_dev_if = index;
620 	sk_dst_reset(sk);
621 	release_sock(sk);
622 
623 	ret = 0;
624 
625 out:
626 #endif
627 
628 	return ret;
629 }
630 
631 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
632 				int __user *optlen, int len)
633 {
634 	int ret = -ENOPROTOOPT;
635 #ifdef CONFIG_NETDEVICES
636 	struct net *net = sock_net(sk);
637 	char devname[IFNAMSIZ];
638 
639 	if (sk->sk_bound_dev_if == 0) {
640 		len = 0;
641 		goto zero;
642 	}
643 
644 	ret = -EINVAL;
645 	if (len < IFNAMSIZ)
646 		goto out;
647 
648 	ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
649 	if (ret)
650 		goto out;
651 
652 	len = strlen(devname) + 1;
653 
654 	ret = -EFAULT;
655 	if (copy_to_user(optval, devname, len))
656 		goto out;
657 
658 zero:
659 	ret = -EFAULT;
660 	if (put_user(len, optlen))
661 		goto out;
662 
663 	ret = 0;
664 
665 out:
666 #endif
667 
668 	return ret;
669 }
670 
671 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
672 {
673 	if (valbool)
674 		sock_set_flag(sk, bit);
675 	else
676 		sock_reset_flag(sk, bit);
677 }
678 
679 bool sk_mc_loop(struct sock *sk)
680 {
681 	if (dev_recursion_level())
682 		return false;
683 	if (!sk)
684 		return true;
685 	switch (sk->sk_family) {
686 	case AF_INET:
687 		return inet_sk(sk)->mc_loop;
688 #if IS_ENABLED(CONFIG_IPV6)
689 	case AF_INET6:
690 		return inet6_sk(sk)->mc_loop;
691 #endif
692 	}
693 	WARN_ON(1);
694 	return true;
695 }
696 EXPORT_SYMBOL(sk_mc_loop);
697 
698 /*
699  *	This is meant for all protocols to use and covers goings on
700  *	at the socket level. Everything here is generic.
701  */
702 
703 int sock_setsockopt(struct socket *sock, int level, int optname,
704 		    char __user *optval, unsigned int optlen)
705 {
706 	struct sock *sk = sock->sk;
707 	int val;
708 	int valbool;
709 	struct linger ling;
710 	int ret = 0;
711 
712 	/*
713 	 *	Options without arguments
714 	 */
715 
716 	if (optname == SO_BINDTODEVICE)
717 		return sock_setbindtodevice(sk, optval, optlen);
718 
719 	if (optlen < sizeof(int))
720 		return -EINVAL;
721 
722 	if (get_user(val, (int __user *)optval))
723 		return -EFAULT;
724 
725 	valbool = val ? 1 : 0;
726 
727 	lock_sock(sk);
728 
729 	switch (optname) {
730 	case SO_DEBUG:
731 		if (val && !capable(CAP_NET_ADMIN))
732 			ret = -EACCES;
733 		else
734 			sock_valbool_flag(sk, SOCK_DBG, valbool);
735 		break;
736 	case SO_REUSEADDR:
737 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
738 		break;
739 	case SO_REUSEPORT:
740 		sk->sk_reuseport = valbool;
741 		break;
742 	case SO_TYPE:
743 	case SO_PROTOCOL:
744 	case SO_DOMAIN:
745 	case SO_ERROR:
746 		ret = -ENOPROTOOPT;
747 		break;
748 	case SO_DONTROUTE:
749 		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
750 		break;
751 	case SO_BROADCAST:
752 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
753 		break;
754 	case SO_SNDBUF:
755 		/* Don't error on this BSD doesn't and if you think
756 		 * about it this is right. Otherwise apps have to
757 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
758 		 * are treated in BSD as hints
759 		 */
760 		val = min_t(u32, val, sysctl_wmem_max);
761 set_sndbuf:
762 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
763 		sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
764 		/* Wake up sending tasks if we upped the value. */
765 		sk->sk_write_space(sk);
766 		break;
767 
768 	case SO_SNDBUFFORCE:
769 		if (!capable(CAP_NET_ADMIN)) {
770 			ret = -EPERM;
771 			break;
772 		}
773 		goto set_sndbuf;
774 
775 	case SO_RCVBUF:
776 		/* Don't error on this BSD doesn't and if you think
777 		 * about it this is right. Otherwise apps have to
778 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
779 		 * are treated in BSD as hints
780 		 */
781 		val = min_t(u32, val, sysctl_rmem_max);
782 set_rcvbuf:
783 		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
784 		/*
785 		 * We double it on the way in to account for
786 		 * "struct sk_buff" etc. overhead.   Applications
787 		 * assume that the SO_RCVBUF setting they make will
788 		 * allow that much actual data to be received on that
789 		 * socket.
790 		 *
791 		 * Applications are unaware that "struct sk_buff" and
792 		 * other overheads allocate from the receive buffer
793 		 * during socket buffer allocation.
794 		 *
795 		 * And after considering the possible alternatives,
796 		 * returning the value we actually used in getsockopt
797 		 * is the most desirable behavior.
798 		 */
799 		sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
800 		break;
801 
802 	case SO_RCVBUFFORCE:
803 		if (!capable(CAP_NET_ADMIN)) {
804 			ret = -EPERM;
805 			break;
806 		}
807 		goto set_rcvbuf;
808 
809 	case SO_KEEPALIVE:
810 		if (sk->sk_prot->keepalive)
811 			sk->sk_prot->keepalive(sk, valbool);
812 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
813 		break;
814 
815 	case SO_OOBINLINE:
816 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
817 		break;
818 
819 	case SO_NO_CHECK:
820 		sk->sk_no_check_tx = valbool;
821 		break;
822 
823 	case SO_PRIORITY:
824 		if ((val >= 0 && val <= 6) ||
825 		    ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
826 			sk->sk_priority = val;
827 		else
828 			ret = -EPERM;
829 		break;
830 
831 	case SO_LINGER:
832 		if (optlen < sizeof(ling)) {
833 			ret = -EINVAL;	/* 1003.1g */
834 			break;
835 		}
836 		if (copy_from_user(&ling, optval, sizeof(ling))) {
837 			ret = -EFAULT;
838 			break;
839 		}
840 		if (!ling.l_onoff)
841 			sock_reset_flag(sk, SOCK_LINGER);
842 		else {
843 #if (BITS_PER_LONG == 32)
844 			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
845 				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
846 			else
847 #endif
848 				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
849 			sock_set_flag(sk, SOCK_LINGER);
850 		}
851 		break;
852 
853 	case SO_BSDCOMPAT:
854 		sock_warn_obsolete_bsdism("setsockopt");
855 		break;
856 
857 	case SO_PASSCRED:
858 		if (valbool)
859 			set_bit(SOCK_PASSCRED, &sock->flags);
860 		else
861 			clear_bit(SOCK_PASSCRED, &sock->flags);
862 		break;
863 
864 	case SO_TIMESTAMP:
865 	case SO_TIMESTAMPNS:
866 		if (valbool)  {
867 			if (optname == SO_TIMESTAMP)
868 				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
869 			else
870 				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
871 			sock_set_flag(sk, SOCK_RCVTSTAMP);
872 			sock_enable_timestamp(sk, SOCK_TIMESTAMP);
873 		} else {
874 			sock_reset_flag(sk, SOCK_RCVTSTAMP);
875 			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
876 		}
877 		break;
878 
879 	case SO_TIMESTAMPING:
880 		if (val & ~SOF_TIMESTAMPING_MASK) {
881 			ret = -EINVAL;
882 			break;
883 		}
884 
885 		if (val & SOF_TIMESTAMPING_OPT_ID &&
886 		    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
887 			if (sk->sk_protocol == IPPROTO_TCP &&
888 			    sk->sk_type == SOCK_STREAM) {
889 				if ((1 << sk->sk_state) &
890 				    (TCPF_CLOSE | TCPF_LISTEN)) {
891 					ret = -EINVAL;
892 					break;
893 				}
894 				sk->sk_tskey = tcp_sk(sk)->snd_una;
895 			} else {
896 				sk->sk_tskey = 0;
897 			}
898 		}
899 
900 		if (val & SOF_TIMESTAMPING_OPT_STATS &&
901 		    !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
902 			ret = -EINVAL;
903 			break;
904 		}
905 
906 		sk->sk_tsflags = val;
907 		if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
908 			sock_enable_timestamp(sk,
909 					      SOCK_TIMESTAMPING_RX_SOFTWARE);
910 		else
911 			sock_disable_timestamp(sk,
912 					       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
913 		break;
914 
915 	case SO_RCVLOWAT:
916 		if (val < 0)
917 			val = INT_MAX;
918 		sk->sk_rcvlowat = val ? : 1;
919 		break;
920 
921 	case SO_RCVTIMEO:
922 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
923 		break;
924 
925 	case SO_SNDTIMEO:
926 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
927 		break;
928 
929 	case SO_ATTACH_FILTER:
930 		ret = -EINVAL;
931 		if (optlen == sizeof(struct sock_fprog)) {
932 			struct sock_fprog fprog;
933 
934 			ret = -EFAULT;
935 			if (copy_from_user(&fprog, optval, sizeof(fprog)))
936 				break;
937 
938 			ret = sk_attach_filter(&fprog, sk);
939 		}
940 		break;
941 
942 	case SO_ATTACH_BPF:
943 		ret = -EINVAL;
944 		if (optlen == sizeof(u32)) {
945 			u32 ufd;
946 
947 			ret = -EFAULT;
948 			if (copy_from_user(&ufd, optval, sizeof(ufd)))
949 				break;
950 
951 			ret = sk_attach_bpf(ufd, sk);
952 		}
953 		break;
954 
955 	case SO_ATTACH_REUSEPORT_CBPF:
956 		ret = -EINVAL;
957 		if (optlen == sizeof(struct sock_fprog)) {
958 			struct sock_fprog fprog;
959 
960 			ret = -EFAULT;
961 			if (copy_from_user(&fprog, optval, sizeof(fprog)))
962 				break;
963 
964 			ret = sk_reuseport_attach_filter(&fprog, sk);
965 		}
966 		break;
967 
968 	case SO_ATTACH_REUSEPORT_EBPF:
969 		ret = -EINVAL;
970 		if (optlen == sizeof(u32)) {
971 			u32 ufd;
972 
973 			ret = -EFAULT;
974 			if (copy_from_user(&ufd, optval, sizeof(ufd)))
975 				break;
976 
977 			ret = sk_reuseport_attach_bpf(ufd, sk);
978 		}
979 		break;
980 
981 	case SO_DETACH_FILTER:
982 		ret = sk_detach_filter(sk);
983 		break;
984 
985 	case SO_LOCK_FILTER:
986 		if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
987 			ret = -EPERM;
988 		else
989 			sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
990 		break;
991 
992 	case SO_PASSSEC:
993 		if (valbool)
994 			set_bit(SOCK_PASSSEC, &sock->flags);
995 		else
996 			clear_bit(SOCK_PASSSEC, &sock->flags);
997 		break;
998 	case SO_MARK:
999 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1000 			ret = -EPERM;
1001 		else
1002 			sk->sk_mark = val;
1003 		break;
1004 
1005 	case SO_RXQ_OVFL:
1006 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1007 		break;
1008 
1009 	case SO_WIFI_STATUS:
1010 		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1011 		break;
1012 
1013 	case SO_PEEK_OFF:
1014 		if (sock->ops->set_peek_off)
1015 			ret = sock->ops->set_peek_off(sk, val);
1016 		else
1017 			ret = -EOPNOTSUPP;
1018 		break;
1019 
1020 	case SO_NOFCS:
1021 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1022 		break;
1023 
1024 	case SO_SELECT_ERR_QUEUE:
1025 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1026 		break;
1027 
1028 #ifdef CONFIG_NET_RX_BUSY_POLL
1029 	case SO_BUSY_POLL:
1030 		/* allow unprivileged users to decrease the value */
1031 		if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1032 			ret = -EPERM;
1033 		else {
1034 			if (val < 0)
1035 				ret = -EINVAL;
1036 			else
1037 				sk->sk_ll_usec = val;
1038 		}
1039 		break;
1040 #endif
1041 
1042 	case SO_MAX_PACING_RATE:
1043 		sk->sk_max_pacing_rate = val;
1044 		sk->sk_pacing_rate = min(sk->sk_pacing_rate,
1045 					 sk->sk_max_pacing_rate);
1046 		break;
1047 
1048 	case SO_INCOMING_CPU:
1049 		sk->sk_incoming_cpu = val;
1050 		break;
1051 
1052 	case SO_CNX_ADVICE:
1053 		if (val == 1)
1054 			dst_negative_advice(sk);
1055 		break;
1056 	default:
1057 		ret = -ENOPROTOOPT;
1058 		break;
1059 	}
1060 	release_sock(sk);
1061 	return ret;
1062 }
1063 EXPORT_SYMBOL(sock_setsockopt);
1064 
1065 
1066 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1067 			  struct ucred *ucred)
1068 {
1069 	ucred->pid = pid_vnr(pid);
1070 	ucred->uid = ucred->gid = -1;
1071 	if (cred) {
1072 		struct user_namespace *current_ns = current_user_ns();
1073 
1074 		ucred->uid = from_kuid_munged(current_ns, cred->euid);
1075 		ucred->gid = from_kgid_munged(current_ns, cred->egid);
1076 	}
1077 }
1078 
1079 int sock_getsockopt(struct socket *sock, int level, int optname,
1080 		    char __user *optval, int __user *optlen)
1081 {
1082 	struct sock *sk = sock->sk;
1083 
1084 	union {
1085 		int val;
1086 		u64 val64;
1087 		struct linger ling;
1088 		struct timeval tm;
1089 	} v;
1090 
1091 	int lv = sizeof(int);
1092 	int len;
1093 
1094 	if (get_user(len, optlen))
1095 		return -EFAULT;
1096 	if (len < 0)
1097 		return -EINVAL;
1098 
1099 	memset(&v, 0, sizeof(v));
1100 
1101 	switch (optname) {
1102 	case SO_DEBUG:
1103 		v.val = sock_flag(sk, SOCK_DBG);
1104 		break;
1105 
1106 	case SO_DONTROUTE:
1107 		v.val = sock_flag(sk, SOCK_LOCALROUTE);
1108 		break;
1109 
1110 	case SO_BROADCAST:
1111 		v.val = sock_flag(sk, SOCK_BROADCAST);
1112 		break;
1113 
1114 	case SO_SNDBUF:
1115 		v.val = sk->sk_sndbuf;
1116 		break;
1117 
1118 	case SO_RCVBUF:
1119 		v.val = sk->sk_rcvbuf;
1120 		break;
1121 
1122 	case SO_REUSEADDR:
1123 		v.val = sk->sk_reuse;
1124 		break;
1125 
1126 	case SO_REUSEPORT:
1127 		v.val = sk->sk_reuseport;
1128 		break;
1129 
1130 	case SO_KEEPALIVE:
1131 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
1132 		break;
1133 
1134 	case SO_TYPE:
1135 		v.val = sk->sk_type;
1136 		break;
1137 
1138 	case SO_PROTOCOL:
1139 		v.val = sk->sk_protocol;
1140 		break;
1141 
1142 	case SO_DOMAIN:
1143 		v.val = sk->sk_family;
1144 		break;
1145 
1146 	case SO_ERROR:
1147 		v.val = -sock_error(sk);
1148 		if (v.val == 0)
1149 			v.val = xchg(&sk->sk_err_soft, 0);
1150 		break;
1151 
1152 	case SO_OOBINLINE:
1153 		v.val = sock_flag(sk, SOCK_URGINLINE);
1154 		break;
1155 
1156 	case SO_NO_CHECK:
1157 		v.val = sk->sk_no_check_tx;
1158 		break;
1159 
1160 	case SO_PRIORITY:
1161 		v.val = sk->sk_priority;
1162 		break;
1163 
1164 	case SO_LINGER:
1165 		lv		= sizeof(v.ling);
1166 		v.ling.l_onoff	= sock_flag(sk, SOCK_LINGER);
1167 		v.ling.l_linger	= sk->sk_lingertime / HZ;
1168 		break;
1169 
1170 	case SO_BSDCOMPAT:
1171 		sock_warn_obsolete_bsdism("getsockopt");
1172 		break;
1173 
1174 	case SO_TIMESTAMP:
1175 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1176 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
1177 		break;
1178 
1179 	case SO_TIMESTAMPNS:
1180 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1181 		break;
1182 
1183 	case SO_TIMESTAMPING:
1184 		v.val = sk->sk_tsflags;
1185 		break;
1186 
1187 	case SO_RCVTIMEO:
1188 		lv = sizeof(struct timeval);
1189 		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1190 			v.tm.tv_sec = 0;
1191 			v.tm.tv_usec = 0;
1192 		} else {
1193 			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1194 			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ;
1195 		}
1196 		break;
1197 
1198 	case SO_SNDTIMEO:
1199 		lv = sizeof(struct timeval);
1200 		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1201 			v.tm.tv_sec = 0;
1202 			v.tm.tv_usec = 0;
1203 		} else {
1204 			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1205 			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ;
1206 		}
1207 		break;
1208 
1209 	case SO_RCVLOWAT:
1210 		v.val = sk->sk_rcvlowat;
1211 		break;
1212 
1213 	case SO_SNDLOWAT:
1214 		v.val = 1;
1215 		break;
1216 
1217 	case SO_PASSCRED:
1218 		v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1219 		break;
1220 
1221 	case SO_PEERCRED:
1222 	{
1223 		struct ucred peercred;
1224 		if (len > sizeof(peercred))
1225 			len = sizeof(peercred);
1226 		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1227 		if (copy_to_user(optval, &peercred, len))
1228 			return -EFAULT;
1229 		goto lenout;
1230 	}
1231 
1232 	case SO_PEERNAME:
1233 	{
1234 		char address[128];
1235 
1236 		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1237 			return -ENOTCONN;
1238 		if (lv < len)
1239 			return -EINVAL;
1240 		if (copy_to_user(optval, address, len))
1241 			return -EFAULT;
1242 		goto lenout;
1243 	}
1244 
1245 	/* Dubious BSD thing... Probably nobody even uses it, but
1246 	 * the UNIX standard wants it for whatever reason... -DaveM
1247 	 */
1248 	case SO_ACCEPTCONN:
1249 		v.val = sk->sk_state == TCP_LISTEN;
1250 		break;
1251 
1252 	case SO_PASSSEC:
1253 		v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1254 		break;
1255 
1256 	case SO_PEERSEC:
1257 		return security_socket_getpeersec_stream(sock, optval, optlen, len);
1258 
1259 	case SO_MARK:
1260 		v.val = sk->sk_mark;
1261 		break;
1262 
1263 	case SO_RXQ_OVFL:
1264 		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1265 		break;
1266 
1267 	case SO_WIFI_STATUS:
1268 		v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1269 		break;
1270 
1271 	case SO_PEEK_OFF:
1272 		if (!sock->ops->set_peek_off)
1273 			return -EOPNOTSUPP;
1274 
1275 		v.val = sk->sk_peek_off;
1276 		break;
1277 	case SO_NOFCS:
1278 		v.val = sock_flag(sk, SOCK_NOFCS);
1279 		break;
1280 
1281 	case SO_BINDTODEVICE:
1282 		return sock_getbindtodevice(sk, optval, optlen, len);
1283 
1284 	case SO_GET_FILTER:
1285 		len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1286 		if (len < 0)
1287 			return len;
1288 
1289 		goto lenout;
1290 
1291 	case SO_LOCK_FILTER:
1292 		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1293 		break;
1294 
1295 	case SO_BPF_EXTENSIONS:
1296 		v.val = bpf_tell_extensions();
1297 		break;
1298 
1299 	case SO_SELECT_ERR_QUEUE:
1300 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1301 		break;
1302 
1303 #ifdef CONFIG_NET_RX_BUSY_POLL
1304 	case SO_BUSY_POLL:
1305 		v.val = sk->sk_ll_usec;
1306 		break;
1307 #endif
1308 
1309 	case SO_MAX_PACING_RATE:
1310 		v.val = sk->sk_max_pacing_rate;
1311 		break;
1312 
1313 	case SO_INCOMING_CPU:
1314 		v.val = sk->sk_incoming_cpu;
1315 		break;
1316 
1317 	case SO_MEMINFO:
1318 	{
1319 		u32 meminfo[SK_MEMINFO_VARS];
1320 
1321 		if (get_user(len, optlen))
1322 			return -EFAULT;
1323 
1324 		sk_get_meminfo(sk, meminfo);
1325 
1326 		len = min_t(unsigned int, len, sizeof(meminfo));
1327 		if (copy_to_user(optval, &meminfo, len))
1328 			return -EFAULT;
1329 
1330 		goto lenout;
1331 	}
1332 
1333 #ifdef CONFIG_NET_RX_BUSY_POLL
1334 	case SO_INCOMING_NAPI_ID:
1335 		v.val = READ_ONCE(sk->sk_napi_id);
1336 
1337 		/* aggregate non-NAPI IDs down to 0 */
1338 		if (v.val < MIN_NAPI_ID)
1339 			v.val = 0;
1340 
1341 		break;
1342 #endif
1343 
1344 	case SO_COOKIE:
1345 		lv = sizeof(u64);
1346 		if (len < lv)
1347 			return -EINVAL;
1348 		v.val64 = sock_gen_cookie(sk);
1349 		break;
1350 
1351 	default:
1352 		/* We implement the SO_SNDLOWAT etc to not be settable
1353 		 * (1003.1g 7).
1354 		 */
1355 		return -ENOPROTOOPT;
1356 	}
1357 
1358 	if (len > lv)
1359 		len = lv;
1360 	if (copy_to_user(optval, &v, len))
1361 		return -EFAULT;
1362 lenout:
1363 	if (put_user(len, optlen))
1364 		return -EFAULT;
1365 	return 0;
1366 }
1367 
1368 /*
1369  * Initialize an sk_lock.
1370  *
1371  * (We also register the sk_lock with the lock validator.)
1372  */
1373 static inline void sock_lock_init(struct sock *sk)
1374 {
1375 	if (sk->sk_kern_sock)
1376 		sock_lock_init_class_and_name(
1377 			sk,
1378 			af_family_kern_slock_key_strings[sk->sk_family],
1379 			af_family_kern_slock_keys + sk->sk_family,
1380 			af_family_kern_key_strings[sk->sk_family],
1381 			af_family_kern_keys + sk->sk_family);
1382 	else
1383 		sock_lock_init_class_and_name(
1384 			sk,
1385 			af_family_slock_key_strings[sk->sk_family],
1386 			af_family_slock_keys + sk->sk_family,
1387 			af_family_key_strings[sk->sk_family],
1388 			af_family_keys + sk->sk_family);
1389 }
1390 
1391 /*
1392  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1393  * even temporarly, because of RCU lookups. sk_node should also be left as is.
1394  * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
1395  */
1396 static void sock_copy(struct sock *nsk, const struct sock *osk)
1397 {
1398 #ifdef CONFIG_SECURITY_NETWORK
1399 	void *sptr = nsk->sk_security;
1400 #endif
1401 	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1402 
1403 	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1404 	       osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1405 
1406 #ifdef CONFIG_SECURITY_NETWORK
1407 	nsk->sk_security = sptr;
1408 	security_sk_clone(osk, nsk);
1409 #endif
1410 }
1411 
1412 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1413 		int family)
1414 {
1415 	struct sock *sk;
1416 	struct kmem_cache *slab;
1417 
1418 	slab = prot->slab;
1419 	if (slab != NULL) {
1420 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1421 		if (!sk)
1422 			return sk;
1423 		if (priority & __GFP_ZERO)
1424 			sk_prot_clear_nulls(sk, prot->obj_size);
1425 	} else
1426 		sk = kmalloc(prot->obj_size, priority);
1427 
1428 	if (sk != NULL) {
1429 		kmemcheck_annotate_bitfield(sk, flags);
1430 
1431 		if (security_sk_alloc(sk, family, priority))
1432 			goto out_free;
1433 
1434 		if (!try_module_get(prot->owner))
1435 			goto out_free_sec;
1436 		sk_tx_queue_clear(sk);
1437 	}
1438 
1439 	return sk;
1440 
1441 out_free_sec:
1442 	security_sk_free(sk);
1443 out_free:
1444 	if (slab != NULL)
1445 		kmem_cache_free(slab, sk);
1446 	else
1447 		kfree(sk);
1448 	return NULL;
1449 }
1450 
1451 static void sk_prot_free(struct proto *prot, struct sock *sk)
1452 {
1453 	struct kmem_cache *slab;
1454 	struct module *owner;
1455 
1456 	owner = prot->owner;
1457 	slab = prot->slab;
1458 
1459 	cgroup_sk_free(&sk->sk_cgrp_data);
1460 	mem_cgroup_sk_free(sk);
1461 	security_sk_free(sk);
1462 	if (slab != NULL)
1463 		kmem_cache_free(slab, sk);
1464 	else
1465 		kfree(sk);
1466 	module_put(owner);
1467 }
1468 
1469 /**
1470  *	sk_alloc - All socket objects are allocated here
1471  *	@net: the applicable net namespace
1472  *	@family: protocol family
1473  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1474  *	@prot: struct proto associated with this new sock instance
1475  *	@kern: is this to be a kernel socket?
1476  */
1477 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1478 		      struct proto *prot, int kern)
1479 {
1480 	struct sock *sk;
1481 
1482 	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1483 	if (sk) {
1484 		sk->sk_family = family;
1485 		/*
1486 		 * See comment in struct sock definition to understand
1487 		 * why we need sk_prot_creator -acme
1488 		 */
1489 		sk->sk_prot = sk->sk_prot_creator = prot;
1490 		sk->sk_kern_sock = kern;
1491 		sock_lock_init(sk);
1492 		sk->sk_net_refcnt = kern ? 0 : 1;
1493 		if (likely(sk->sk_net_refcnt))
1494 			get_net(net);
1495 		sock_net_set(sk, net);
1496 		atomic_set(&sk->sk_wmem_alloc, 1);
1497 
1498 		mem_cgroup_sk_alloc(sk);
1499 		cgroup_sk_alloc(&sk->sk_cgrp_data);
1500 		sock_update_classid(&sk->sk_cgrp_data);
1501 		sock_update_netprioidx(&sk->sk_cgrp_data);
1502 	}
1503 
1504 	return sk;
1505 }
1506 EXPORT_SYMBOL(sk_alloc);
1507 
1508 /* Sockets having SOCK_RCU_FREE will call this function after one RCU
1509  * grace period. This is the case for UDP sockets and TCP listeners.
1510  */
1511 static void __sk_destruct(struct rcu_head *head)
1512 {
1513 	struct sock *sk = container_of(head, struct sock, sk_rcu);
1514 	struct sk_filter *filter;
1515 
1516 	if (sk->sk_destruct)
1517 		sk->sk_destruct(sk);
1518 
1519 	filter = rcu_dereference_check(sk->sk_filter,
1520 				       atomic_read(&sk->sk_wmem_alloc) == 0);
1521 	if (filter) {
1522 		sk_filter_uncharge(sk, filter);
1523 		RCU_INIT_POINTER(sk->sk_filter, NULL);
1524 	}
1525 	if (rcu_access_pointer(sk->sk_reuseport_cb))
1526 		reuseport_detach_sock(sk);
1527 
1528 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1529 
1530 	if (atomic_read(&sk->sk_omem_alloc))
1531 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
1532 			 __func__, atomic_read(&sk->sk_omem_alloc));
1533 
1534 	if (sk->sk_frag.page) {
1535 		put_page(sk->sk_frag.page);
1536 		sk->sk_frag.page = NULL;
1537 	}
1538 
1539 	if (sk->sk_peer_cred)
1540 		put_cred(sk->sk_peer_cred);
1541 	put_pid(sk->sk_peer_pid);
1542 	if (likely(sk->sk_net_refcnt))
1543 		put_net(sock_net(sk));
1544 	sk_prot_free(sk->sk_prot_creator, sk);
1545 }
1546 
1547 void sk_destruct(struct sock *sk)
1548 {
1549 	if (sock_flag(sk, SOCK_RCU_FREE))
1550 		call_rcu(&sk->sk_rcu, __sk_destruct);
1551 	else
1552 		__sk_destruct(&sk->sk_rcu);
1553 }
1554 
1555 static void __sk_free(struct sock *sk)
1556 {
1557 	if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
1558 		sock_diag_broadcast_destroy(sk);
1559 	else
1560 		sk_destruct(sk);
1561 }
1562 
1563 void sk_free(struct sock *sk)
1564 {
1565 	/*
1566 	 * We subtract one from sk_wmem_alloc and can know if
1567 	 * some packets are still in some tx queue.
1568 	 * If not null, sock_wfree() will call __sk_free(sk) later
1569 	 */
1570 	if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1571 		__sk_free(sk);
1572 }
1573 EXPORT_SYMBOL(sk_free);
1574 
1575 static void sk_init_common(struct sock *sk)
1576 {
1577 	skb_queue_head_init(&sk->sk_receive_queue);
1578 	skb_queue_head_init(&sk->sk_write_queue);
1579 	skb_queue_head_init(&sk->sk_error_queue);
1580 
1581 	rwlock_init(&sk->sk_callback_lock);
1582 	lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
1583 			af_rlock_keys + sk->sk_family,
1584 			af_family_rlock_key_strings[sk->sk_family]);
1585 	lockdep_set_class_and_name(&sk->sk_write_queue.lock,
1586 			af_wlock_keys + sk->sk_family,
1587 			af_family_wlock_key_strings[sk->sk_family]);
1588 	lockdep_set_class_and_name(&sk->sk_error_queue.lock,
1589 			af_elock_keys + sk->sk_family,
1590 			af_family_elock_key_strings[sk->sk_family]);
1591 	lockdep_set_class_and_name(&sk->sk_callback_lock,
1592 			af_callback_keys + sk->sk_family,
1593 			af_family_clock_key_strings[sk->sk_family]);
1594 }
1595 
1596 /**
1597  *	sk_clone_lock - clone a socket, and lock its clone
1598  *	@sk: the socket to clone
1599  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1600  *
1601  *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1602  */
1603 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1604 {
1605 	struct sock *newsk;
1606 	bool is_charged = true;
1607 
1608 	newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1609 	if (newsk != NULL) {
1610 		struct sk_filter *filter;
1611 
1612 		sock_copy(newsk, sk);
1613 
1614 		/* SANITY */
1615 		if (likely(newsk->sk_net_refcnt))
1616 			get_net(sock_net(newsk));
1617 		sk_node_init(&newsk->sk_node);
1618 		sock_lock_init(newsk);
1619 		bh_lock_sock(newsk);
1620 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
1621 		newsk->sk_backlog.len = 0;
1622 
1623 		atomic_set(&newsk->sk_rmem_alloc, 0);
1624 		/*
1625 		 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1626 		 */
1627 		atomic_set(&newsk->sk_wmem_alloc, 1);
1628 		atomic_set(&newsk->sk_omem_alloc, 0);
1629 		sk_init_common(newsk);
1630 
1631 		newsk->sk_dst_cache	= NULL;
1632 		newsk->sk_dst_pending_confirm = 0;
1633 		newsk->sk_wmem_queued	= 0;
1634 		newsk->sk_forward_alloc = 0;
1635 		atomic_set(&newsk->sk_drops, 0);
1636 		newsk->sk_send_head	= NULL;
1637 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1638 
1639 		sock_reset_flag(newsk, SOCK_DONE);
1640 
1641 		filter = rcu_dereference_protected(newsk->sk_filter, 1);
1642 		if (filter != NULL)
1643 			/* though it's an empty new sock, the charging may fail
1644 			 * if sysctl_optmem_max was changed between creation of
1645 			 * original socket and cloning
1646 			 */
1647 			is_charged = sk_filter_charge(newsk, filter);
1648 
1649 		if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1650 			/* We need to make sure that we don't uncharge the new
1651 			 * socket if we couldn't charge it in the first place
1652 			 * as otherwise we uncharge the parent's filter.
1653 			 */
1654 			if (!is_charged)
1655 				RCU_INIT_POINTER(newsk->sk_filter, NULL);
1656 			sk_free_unlock_clone(newsk);
1657 			newsk = NULL;
1658 			goto out;
1659 		}
1660 		RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1661 
1662 		newsk->sk_err	   = 0;
1663 		newsk->sk_err_soft = 0;
1664 		newsk->sk_priority = 0;
1665 		newsk->sk_incoming_cpu = raw_smp_processor_id();
1666 		atomic64_set(&newsk->sk_cookie, 0);
1667 
1668 		mem_cgroup_sk_alloc(newsk);
1669 		cgroup_sk_alloc(&newsk->sk_cgrp_data);
1670 
1671 		/*
1672 		 * Before updating sk_refcnt, we must commit prior changes to memory
1673 		 * (Documentation/RCU/rculist_nulls.txt for details)
1674 		 */
1675 		smp_wmb();
1676 		atomic_set(&newsk->sk_refcnt, 2);
1677 
1678 		/*
1679 		 * Increment the counter in the same struct proto as the master
1680 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1681 		 * is the same as sk->sk_prot->socks, as this field was copied
1682 		 * with memcpy).
1683 		 *
1684 		 * This _changes_ the previous behaviour, where
1685 		 * tcp_create_openreq_child always was incrementing the
1686 		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1687 		 * to be taken into account in all callers. -acme
1688 		 */
1689 		sk_refcnt_debug_inc(newsk);
1690 		sk_set_socket(newsk, NULL);
1691 		newsk->sk_wq = NULL;
1692 
1693 		if (newsk->sk_prot->sockets_allocated)
1694 			sk_sockets_allocated_inc(newsk);
1695 
1696 		if (sock_needs_netstamp(sk) &&
1697 		    newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1698 			net_enable_timestamp();
1699 	}
1700 out:
1701 	return newsk;
1702 }
1703 EXPORT_SYMBOL_GPL(sk_clone_lock);
1704 
1705 void sk_free_unlock_clone(struct sock *sk)
1706 {
1707 	/* It is still raw copy of parent, so invalidate
1708 	 * destructor and make plain sk_free() */
1709 	sk->sk_destruct = NULL;
1710 	bh_unlock_sock(sk);
1711 	sk_free(sk);
1712 }
1713 EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
1714 
1715 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1716 {
1717 	u32 max_segs = 1;
1718 
1719 	sk_dst_set(sk, dst);
1720 	sk->sk_route_caps = dst->dev->features;
1721 	if (sk->sk_route_caps & NETIF_F_GSO)
1722 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1723 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
1724 	if (sk_can_gso(sk)) {
1725 		if (dst->header_len) {
1726 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1727 		} else {
1728 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1729 			sk->sk_gso_max_size = dst->dev->gso_max_size;
1730 			max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
1731 		}
1732 	}
1733 	sk->sk_gso_max_segs = max_segs;
1734 }
1735 EXPORT_SYMBOL_GPL(sk_setup_caps);
1736 
1737 /*
1738  *	Simple resource managers for sockets.
1739  */
1740 
1741 
1742 /*
1743  * Write buffer destructor automatically called from kfree_skb.
1744  */
1745 void sock_wfree(struct sk_buff *skb)
1746 {
1747 	struct sock *sk = skb->sk;
1748 	unsigned int len = skb->truesize;
1749 
1750 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1751 		/*
1752 		 * Keep a reference on sk_wmem_alloc, this will be released
1753 		 * after sk_write_space() call
1754 		 */
1755 		atomic_sub(len - 1, &sk->sk_wmem_alloc);
1756 		sk->sk_write_space(sk);
1757 		len = 1;
1758 	}
1759 	/*
1760 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1761 	 * could not do because of in-flight packets
1762 	 */
1763 	if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1764 		__sk_free(sk);
1765 }
1766 EXPORT_SYMBOL(sock_wfree);
1767 
1768 /* This variant of sock_wfree() is used by TCP,
1769  * since it sets SOCK_USE_WRITE_QUEUE.
1770  */
1771 void __sock_wfree(struct sk_buff *skb)
1772 {
1773 	struct sock *sk = skb->sk;
1774 
1775 	if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
1776 		__sk_free(sk);
1777 }
1778 
1779 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1780 {
1781 	skb_orphan(skb);
1782 	skb->sk = sk;
1783 #ifdef CONFIG_INET
1784 	if (unlikely(!sk_fullsock(sk))) {
1785 		skb->destructor = sock_edemux;
1786 		sock_hold(sk);
1787 		return;
1788 	}
1789 #endif
1790 	skb->destructor = sock_wfree;
1791 	skb_set_hash_from_sk(skb, sk);
1792 	/*
1793 	 * We used to take a refcount on sk, but following operation
1794 	 * is enough to guarantee sk_free() wont free this sock until
1795 	 * all in-flight packets are completed
1796 	 */
1797 	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
1798 }
1799 EXPORT_SYMBOL(skb_set_owner_w);
1800 
1801 /* This helper is used by netem, as it can hold packets in its
1802  * delay queue. We want to allow the owner socket to send more
1803  * packets, as if they were already TX completed by a typical driver.
1804  * But we also want to keep skb->sk set because some packet schedulers
1805  * rely on it (sch_fq for example). So we set skb->truesize to a small
1806  * amount (1) and decrease sk_wmem_alloc accordingly.
1807  */
1808 void skb_orphan_partial(struct sk_buff *skb)
1809 {
1810 	/* If this skb is a TCP pure ACK or already went here,
1811 	 * we have nothing to do. 2 is already a very small truesize.
1812 	 */
1813 	if (skb->truesize <= 2)
1814 		return;
1815 
1816 	/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
1817 	 * so we do not completely orphan skb, but transfert all
1818 	 * accounted bytes but one, to avoid unexpected reorders.
1819 	 */
1820 	if (skb->destructor == sock_wfree
1821 #ifdef CONFIG_INET
1822 	    || skb->destructor == tcp_wfree
1823 #endif
1824 		) {
1825 		atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
1826 		skb->truesize = 1;
1827 	} else {
1828 		skb_orphan(skb);
1829 	}
1830 }
1831 EXPORT_SYMBOL(skb_orphan_partial);
1832 
1833 /*
1834  * Read buffer destructor automatically called from kfree_skb.
1835  */
1836 void sock_rfree(struct sk_buff *skb)
1837 {
1838 	struct sock *sk = skb->sk;
1839 	unsigned int len = skb->truesize;
1840 
1841 	atomic_sub(len, &sk->sk_rmem_alloc);
1842 	sk_mem_uncharge(sk, len);
1843 }
1844 EXPORT_SYMBOL(sock_rfree);
1845 
1846 /*
1847  * Buffer destructor for skbs that are not used directly in read or write
1848  * path, e.g. for error handler skbs. Automatically called from kfree_skb.
1849  */
1850 void sock_efree(struct sk_buff *skb)
1851 {
1852 	sock_put(skb->sk);
1853 }
1854 EXPORT_SYMBOL(sock_efree);
1855 
1856 kuid_t sock_i_uid(struct sock *sk)
1857 {
1858 	kuid_t uid;
1859 
1860 	read_lock_bh(&sk->sk_callback_lock);
1861 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1862 	read_unlock_bh(&sk->sk_callback_lock);
1863 	return uid;
1864 }
1865 EXPORT_SYMBOL(sock_i_uid);
1866 
1867 unsigned long sock_i_ino(struct sock *sk)
1868 {
1869 	unsigned long ino;
1870 
1871 	read_lock_bh(&sk->sk_callback_lock);
1872 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1873 	read_unlock_bh(&sk->sk_callback_lock);
1874 	return ino;
1875 }
1876 EXPORT_SYMBOL(sock_i_ino);
1877 
1878 /*
1879  * Allocate a skb from the socket's send buffer.
1880  */
1881 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1882 			     gfp_t priority)
1883 {
1884 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1885 		struct sk_buff *skb = alloc_skb(size, priority);
1886 		if (skb) {
1887 			skb_set_owner_w(skb, sk);
1888 			return skb;
1889 		}
1890 	}
1891 	return NULL;
1892 }
1893 EXPORT_SYMBOL(sock_wmalloc);
1894 
1895 /*
1896  * Allocate a memory block from the socket's option memory buffer.
1897  */
1898 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1899 {
1900 	if ((unsigned int)size <= sysctl_optmem_max &&
1901 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1902 		void *mem;
1903 		/* First do the add, to avoid the race if kmalloc
1904 		 * might sleep.
1905 		 */
1906 		atomic_add(size, &sk->sk_omem_alloc);
1907 		mem = kmalloc(size, priority);
1908 		if (mem)
1909 			return mem;
1910 		atomic_sub(size, &sk->sk_omem_alloc);
1911 	}
1912 	return NULL;
1913 }
1914 EXPORT_SYMBOL(sock_kmalloc);
1915 
1916 /* Free an option memory block. Note, we actually want the inline
1917  * here as this allows gcc to detect the nullify and fold away the
1918  * condition entirely.
1919  */
1920 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
1921 				  const bool nullify)
1922 {
1923 	if (WARN_ON_ONCE(!mem))
1924 		return;
1925 	if (nullify)
1926 		kzfree(mem);
1927 	else
1928 		kfree(mem);
1929 	atomic_sub(size, &sk->sk_omem_alloc);
1930 }
1931 
1932 void sock_kfree_s(struct sock *sk, void *mem, int size)
1933 {
1934 	__sock_kfree_s(sk, mem, size, false);
1935 }
1936 EXPORT_SYMBOL(sock_kfree_s);
1937 
1938 void sock_kzfree_s(struct sock *sk, void *mem, int size)
1939 {
1940 	__sock_kfree_s(sk, mem, size, true);
1941 }
1942 EXPORT_SYMBOL(sock_kzfree_s);
1943 
1944 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1945    I think, these locks should be removed for datagram sockets.
1946  */
1947 static long sock_wait_for_wmem(struct sock *sk, long timeo)
1948 {
1949 	DEFINE_WAIT(wait);
1950 
1951 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1952 	for (;;) {
1953 		if (!timeo)
1954 			break;
1955 		if (signal_pending(current))
1956 			break;
1957 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1958 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1959 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1960 			break;
1961 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1962 			break;
1963 		if (sk->sk_err)
1964 			break;
1965 		timeo = schedule_timeout(timeo);
1966 	}
1967 	finish_wait(sk_sleep(sk), &wait);
1968 	return timeo;
1969 }
1970 
1971 
1972 /*
1973  *	Generic send/receive buffer handlers
1974  */
1975 
1976 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1977 				     unsigned long data_len, int noblock,
1978 				     int *errcode, int max_page_order)
1979 {
1980 	struct sk_buff *skb;
1981 	long timeo;
1982 	int err;
1983 
1984 	timeo = sock_sndtimeo(sk, noblock);
1985 	for (;;) {
1986 		err = sock_error(sk);
1987 		if (err != 0)
1988 			goto failure;
1989 
1990 		err = -EPIPE;
1991 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1992 			goto failure;
1993 
1994 		if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
1995 			break;
1996 
1997 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1998 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1999 		err = -EAGAIN;
2000 		if (!timeo)
2001 			goto failure;
2002 		if (signal_pending(current))
2003 			goto interrupted;
2004 		timeo = sock_wait_for_wmem(sk, timeo);
2005 	}
2006 	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
2007 				   errcode, sk->sk_allocation);
2008 	if (skb)
2009 		skb_set_owner_w(skb, sk);
2010 	return skb;
2011 
2012 interrupted:
2013 	err = sock_intr_errno(timeo);
2014 failure:
2015 	*errcode = err;
2016 	return NULL;
2017 }
2018 EXPORT_SYMBOL(sock_alloc_send_pskb);
2019 
2020 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
2021 				    int noblock, int *errcode)
2022 {
2023 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
2024 }
2025 EXPORT_SYMBOL(sock_alloc_send_skb);
2026 
2027 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
2028 		     struct sockcm_cookie *sockc)
2029 {
2030 	u32 tsflags;
2031 
2032 	switch (cmsg->cmsg_type) {
2033 	case SO_MARK:
2034 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2035 			return -EPERM;
2036 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2037 			return -EINVAL;
2038 		sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2039 		break;
2040 	case SO_TIMESTAMPING:
2041 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2042 			return -EINVAL;
2043 
2044 		tsflags = *(u32 *)CMSG_DATA(cmsg);
2045 		if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2046 			return -EINVAL;
2047 
2048 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2049 		sockc->tsflags |= tsflags;
2050 		break;
2051 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2052 	case SCM_RIGHTS:
2053 	case SCM_CREDENTIALS:
2054 		break;
2055 	default:
2056 		return -EINVAL;
2057 	}
2058 	return 0;
2059 }
2060 EXPORT_SYMBOL(__sock_cmsg_send);
2061 
2062 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2063 		   struct sockcm_cookie *sockc)
2064 {
2065 	struct cmsghdr *cmsg;
2066 	int ret;
2067 
2068 	for_each_cmsghdr(cmsg, msg) {
2069 		if (!CMSG_OK(msg, cmsg))
2070 			return -EINVAL;
2071 		if (cmsg->cmsg_level != SOL_SOCKET)
2072 			continue;
2073 		ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
2074 		if (ret)
2075 			return ret;
2076 	}
2077 	return 0;
2078 }
2079 EXPORT_SYMBOL(sock_cmsg_send);
2080 
2081 /* On 32bit arches, an skb frag is limited to 2^15 */
2082 #define SKB_FRAG_PAGE_ORDER	get_order(32768)
2083 
2084 /**
2085  * skb_page_frag_refill - check that a page_frag contains enough room
2086  * @sz: minimum size of the fragment we want to get
2087  * @pfrag: pointer to page_frag
2088  * @gfp: priority for memory allocation
2089  *
2090  * Note: While this allocator tries to use high order pages, there is
2091  * no guarantee that allocations succeed. Therefore, @sz MUST be
2092  * less or equal than PAGE_SIZE.
2093  */
2094 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2095 {
2096 	if (pfrag->page) {
2097 		if (page_ref_count(pfrag->page) == 1) {
2098 			pfrag->offset = 0;
2099 			return true;
2100 		}
2101 		if (pfrag->offset + sz <= pfrag->size)
2102 			return true;
2103 		put_page(pfrag->page);
2104 	}
2105 
2106 	pfrag->offset = 0;
2107 	if (SKB_FRAG_PAGE_ORDER) {
2108 		/* Avoid direct reclaim but allow kswapd to wake */
2109 		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2110 					  __GFP_COMP | __GFP_NOWARN |
2111 					  __GFP_NORETRY,
2112 					  SKB_FRAG_PAGE_ORDER);
2113 		if (likely(pfrag->page)) {
2114 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2115 			return true;
2116 		}
2117 	}
2118 	pfrag->page = alloc_page(gfp);
2119 	if (likely(pfrag->page)) {
2120 		pfrag->size = PAGE_SIZE;
2121 		return true;
2122 	}
2123 	return false;
2124 }
2125 EXPORT_SYMBOL(skb_page_frag_refill);
2126 
2127 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2128 {
2129 	if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2130 		return true;
2131 
2132 	sk_enter_memory_pressure(sk);
2133 	sk_stream_moderate_sndbuf(sk);
2134 	return false;
2135 }
2136 EXPORT_SYMBOL(sk_page_frag_refill);
2137 
2138 static void __lock_sock(struct sock *sk)
2139 	__releases(&sk->sk_lock.slock)
2140 	__acquires(&sk->sk_lock.slock)
2141 {
2142 	DEFINE_WAIT(wait);
2143 
2144 	for (;;) {
2145 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2146 					TASK_UNINTERRUPTIBLE);
2147 		spin_unlock_bh(&sk->sk_lock.slock);
2148 		schedule();
2149 		spin_lock_bh(&sk->sk_lock.slock);
2150 		if (!sock_owned_by_user(sk))
2151 			break;
2152 	}
2153 	finish_wait(&sk->sk_lock.wq, &wait);
2154 }
2155 
2156 static void __release_sock(struct sock *sk)
2157 	__releases(&sk->sk_lock.slock)
2158 	__acquires(&sk->sk_lock.slock)
2159 {
2160 	struct sk_buff *skb, *next;
2161 
2162 	while ((skb = sk->sk_backlog.head) != NULL) {
2163 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2164 
2165 		spin_unlock_bh(&sk->sk_lock.slock);
2166 
2167 		do {
2168 			next = skb->next;
2169 			prefetch(next);
2170 			WARN_ON_ONCE(skb_dst_is_noref(skb));
2171 			skb->next = NULL;
2172 			sk_backlog_rcv(sk, skb);
2173 
2174 			cond_resched();
2175 
2176 			skb = next;
2177 		} while (skb != NULL);
2178 
2179 		spin_lock_bh(&sk->sk_lock.slock);
2180 	}
2181 
2182 	/*
2183 	 * Doing the zeroing here guarantee we can not loop forever
2184 	 * while a wild producer attempts to flood us.
2185 	 */
2186 	sk->sk_backlog.len = 0;
2187 }
2188 
2189 void __sk_flush_backlog(struct sock *sk)
2190 {
2191 	spin_lock_bh(&sk->sk_lock.slock);
2192 	__release_sock(sk);
2193 	spin_unlock_bh(&sk->sk_lock.slock);
2194 }
2195 
2196 /**
2197  * sk_wait_data - wait for data to arrive at sk_receive_queue
2198  * @sk:    sock to wait on
2199  * @timeo: for how long
2200  * @skb:   last skb seen on sk_receive_queue
2201  *
2202  * Now socket state including sk->sk_err is changed only under lock,
2203  * hence we may omit checks after joining wait queue.
2204  * We check receive queue before schedule() only as optimization;
2205  * it is very likely that release_sock() added new data.
2206  */
2207 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2208 {
2209 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
2210 	int rc;
2211 
2212 	add_wait_queue(sk_sleep(sk), &wait);
2213 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2214 	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
2215 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2216 	remove_wait_queue(sk_sleep(sk), &wait);
2217 	return rc;
2218 }
2219 EXPORT_SYMBOL(sk_wait_data);
2220 
2221 /**
2222  *	__sk_mem_raise_allocated - increase memory_allocated
2223  *	@sk: socket
2224  *	@size: memory size to allocate
2225  *	@amt: pages to allocate
2226  *	@kind: allocation type
2227  *
2228  *	Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
2229  */
2230 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2231 {
2232 	struct proto *prot = sk->sk_prot;
2233 	long allocated = sk_memory_allocated_add(sk, amt);
2234 
2235 	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2236 	    !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
2237 		goto suppress_allocation;
2238 
2239 	/* Under limit. */
2240 	if (allocated <= sk_prot_mem_limits(sk, 0)) {
2241 		sk_leave_memory_pressure(sk);
2242 		return 1;
2243 	}
2244 
2245 	/* Under pressure. */
2246 	if (allocated > sk_prot_mem_limits(sk, 1))
2247 		sk_enter_memory_pressure(sk);
2248 
2249 	/* Over hard limit. */
2250 	if (allocated > sk_prot_mem_limits(sk, 2))
2251 		goto suppress_allocation;
2252 
2253 	/* guarantee minimum buffer size under pressure */
2254 	if (kind == SK_MEM_RECV) {
2255 		if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2256 			return 1;
2257 
2258 	} else { /* SK_MEM_SEND */
2259 		if (sk->sk_type == SOCK_STREAM) {
2260 			if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2261 				return 1;
2262 		} else if (atomic_read(&sk->sk_wmem_alloc) <
2263 			   prot->sysctl_wmem[0])
2264 				return 1;
2265 	}
2266 
2267 	if (sk_has_memory_pressure(sk)) {
2268 		int alloc;
2269 
2270 		if (!sk_under_memory_pressure(sk))
2271 			return 1;
2272 		alloc = sk_sockets_allocated_read_positive(sk);
2273 		if (sk_prot_mem_limits(sk, 2) > alloc *
2274 		    sk_mem_pages(sk->sk_wmem_queued +
2275 				 atomic_read(&sk->sk_rmem_alloc) +
2276 				 sk->sk_forward_alloc))
2277 			return 1;
2278 	}
2279 
2280 suppress_allocation:
2281 
2282 	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2283 		sk_stream_moderate_sndbuf(sk);
2284 
2285 		/* Fail only if socket is _under_ its sndbuf.
2286 		 * In this case we cannot block, so that we have to fail.
2287 		 */
2288 		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2289 			return 1;
2290 	}
2291 
2292 	trace_sock_exceed_buf_limit(sk, prot, allocated);
2293 
2294 	sk_memory_allocated_sub(sk, amt);
2295 
2296 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2297 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
2298 
2299 	return 0;
2300 }
2301 EXPORT_SYMBOL(__sk_mem_raise_allocated);
2302 
2303 /**
2304  *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2305  *	@sk: socket
2306  *	@size: memory size to allocate
2307  *	@kind: allocation type
2308  *
2309  *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2310  *	rmem allocation. This function assumes that protocols which have
2311  *	memory_pressure use sk_wmem_queued as write buffer accounting.
2312  */
2313 int __sk_mem_schedule(struct sock *sk, int size, int kind)
2314 {
2315 	int ret, amt = sk_mem_pages(size);
2316 
2317 	sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2318 	ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2319 	if (!ret)
2320 		sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2321 	return ret;
2322 }
2323 EXPORT_SYMBOL(__sk_mem_schedule);
2324 
2325 /**
2326  *	__sk_mem_reduce_allocated - reclaim memory_allocated
2327  *	@sk: socket
2328  *	@amount: number of quanta
2329  *
2330  *	Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
2331  */
2332 void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2333 {
2334 	sk_memory_allocated_sub(sk, amount);
2335 
2336 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2337 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
2338 
2339 	if (sk_under_memory_pressure(sk) &&
2340 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2341 		sk_leave_memory_pressure(sk);
2342 }
2343 EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2344 
2345 /**
2346  *	__sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
2347  *	@sk: socket
2348  *	@amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2349  */
2350 void __sk_mem_reclaim(struct sock *sk, int amount)
2351 {
2352 	amount >>= SK_MEM_QUANTUM_SHIFT;
2353 	sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2354 	__sk_mem_reduce_allocated(sk, amount);
2355 }
2356 EXPORT_SYMBOL(__sk_mem_reclaim);
2357 
2358 int sk_set_peek_off(struct sock *sk, int val)
2359 {
2360 	if (val < 0)
2361 		return -EINVAL;
2362 
2363 	sk->sk_peek_off = val;
2364 	return 0;
2365 }
2366 EXPORT_SYMBOL_GPL(sk_set_peek_off);
2367 
2368 /*
2369  * Set of default routines for initialising struct proto_ops when
2370  * the protocol does not support a particular function. In certain
2371  * cases where it makes no sense for a protocol to have a "do nothing"
2372  * function, some default processing is provided.
2373  */
2374 
2375 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2376 {
2377 	return -EOPNOTSUPP;
2378 }
2379 EXPORT_SYMBOL(sock_no_bind);
2380 
2381 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2382 		    int len, int flags)
2383 {
2384 	return -EOPNOTSUPP;
2385 }
2386 EXPORT_SYMBOL(sock_no_connect);
2387 
2388 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2389 {
2390 	return -EOPNOTSUPP;
2391 }
2392 EXPORT_SYMBOL(sock_no_socketpair);
2393 
2394 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
2395 		   bool kern)
2396 {
2397 	return -EOPNOTSUPP;
2398 }
2399 EXPORT_SYMBOL(sock_no_accept);
2400 
2401 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2402 		    int *len, int peer)
2403 {
2404 	return -EOPNOTSUPP;
2405 }
2406 EXPORT_SYMBOL(sock_no_getname);
2407 
2408 unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2409 {
2410 	return 0;
2411 }
2412 EXPORT_SYMBOL(sock_no_poll);
2413 
2414 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2415 {
2416 	return -EOPNOTSUPP;
2417 }
2418 EXPORT_SYMBOL(sock_no_ioctl);
2419 
2420 int sock_no_listen(struct socket *sock, int backlog)
2421 {
2422 	return -EOPNOTSUPP;
2423 }
2424 EXPORT_SYMBOL(sock_no_listen);
2425 
2426 int sock_no_shutdown(struct socket *sock, int how)
2427 {
2428 	return -EOPNOTSUPP;
2429 }
2430 EXPORT_SYMBOL(sock_no_shutdown);
2431 
2432 int sock_no_setsockopt(struct socket *sock, int level, int optname,
2433 		    char __user *optval, unsigned int optlen)
2434 {
2435 	return -EOPNOTSUPP;
2436 }
2437 EXPORT_SYMBOL(sock_no_setsockopt);
2438 
2439 int sock_no_getsockopt(struct socket *sock, int level, int optname,
2440 		    char __user *optval, int __user *optlen)
2441 {
2442 	return -EOPNOTSUPP;
2443 }
2444 EXPORT_SYMBOL(sock_no_getsockopt);
2445 
2446 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2447 {
2448 	return -EOPNOTSUPP;
2449 }
2450 EXPORT_SYMBOL(sock_no_sendmsg);
2451 
2452 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2453 		    int flags)
2454 {
2455 	return -EOPNOTSUPP;
2456 }
2457 EXPORT_SYMBOL(sock_no_recvmsg);
2458 
2459 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2460 {
2461 	/* Mirror missing mmap method error code */
2462 	return -ENODEV;
2463 }
2464 EXPORT_SYMBOL(sock_no_mmap);
2465 
2466 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2467 {
2468 	ssize_t res;
2469 	struct msghdr msg = {.msg_flags = flags};
2470 	struct kvec iov;
2471 	char *kaddr = kmap(page);
2472 	iov.iov_base = kaddr + offset;
2473 	iov.iov_len = size;
2474 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2475 	kunmap(page);
2476 	return res;
2477 }
2478 EXPORT_SYMBOL(sock_no_sendpage);
2479 
2480 /*
2481  *	Default Socket Callbacks
2482  */
2483 
2484 static void sock_def_wakeup(struct sock *sk)
2485 {
2486 	struct socket_wq *wq;
2487 
2488 	rcu_read_lock();
2489 	wq = rcu_dereference(sk->sk_wq);
2490 	if (skwq_has_sleeper(wq))
2491 		wake_up_interruptible_all(&wq->wait);
2492 	rcu_read_unlock();
2493 }
2494 
2495 static void sock_def_error_report(struct sock *sk)
2496 {
2497 	struct socket_wq *wq;
2498 
2499 	rcu_read_lock();
2500 	wq = rcu_dereference(sk->sk_wq);
2501 	if (skwq_has_sleeper(wq))
2502 		wake_up_interruptible_poll(&wq->wait, POLLERR);
2503 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2504 	rcu_read_unlock();
2505 }
2506 
2507 static void sock_def_readable(struct sock *sk)
2508 {
2509 	struct socket_wq *wq;
2510 
2511 	rcu_read_lock();
2512 	wq = rcu_dereference(sk->sk_wq);
2513 	if (skwq_has_sleeper(wq))
2514 		wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2515 						POLLRDNORM | POLLRDBAND);
2516 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2517 	rcu_read_unlock();
2518 }
2519 
2520 static void sock_def_write_space(struct sock *sk)
2521 {
2522 	struct socket_wq *wq;
2523 
2524 	rcu_read_lock();
2525 
2526 	/* Do not wake up a writer until he can make "significant"
2527 	 * progress.  --DaveM
2528 	 */
2529 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2530 		wq = rcu_dereference(sk->sk_wq);
2531 		if (skwq_has_sleeper(wq))
2532 			wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2533 						POLLWRNORM | POLLWRBAND);
2534 
2535 		/* Should agree with poll, otherwise some programs break */
2536 		if (sock_writeable(sk))
2537 			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2538 	}
2539 
2540 	rcu_read_unlock();
2541 }
2542 
2543 static void sock_def_destruct(struct sock *sk)
2544 {
2545 }
2546 
2547 void sk_send_sigurg(struct sock *sk)
2548 {
2549 	if (sk->sk_socket && sk->sk_socket->file)
2550 		if (send_sigurg(&sk->sk_socket->file->f_owner))
2551 			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2552 }
2553 EXPORT_SYMBOL(sk_send_sigurg);
2554 
2555 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2556 		    unsigned long expires)
2557 {
2558 	if (!mod_timer(timer, expires))
2559 		sock_hold(sk);
2560 }
2561 EXPORT_SYMBOL(sk_reset_timer);
2562 
2563 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2564 {
2565 	if (del_timer(timer))
2566 		__sock_put(sk);
2567 }
2568 EXPORT_SYMBOL(sk_stop_timer);
2569 
2570 void sock_init_data(struct socket *sock, struct sock *sk)
2571 {
2572 	sk_init_common(sk);
2573 	sk->sk_send_head	=	NULL;
2574 
2575 	init_timer(&sk->sk_timer);
2576 
2577 	sk->sk_allocation	=	GFP_KERNEL;
2578 	sk->sk_rcvbuf		=	sysctl_rmem_default;
2579 	sk->sk_sndbuf		=	sysctl_wmem_default;
2580 	sk->sk_state		=	TCP_CLOSE;
2581 	sk_set_socket(sk, sock);
2582 
2583 	sock_set_flag(sk, SOCK_ZAPPED);
2584 
2585 	if (sock) {
2586 		sk->sk_type	=	sock->type;
2587 		sk->sk_wq	=	sock->wq;
2588 		sock->sk	=	sk;
2589 		sk->sk_uid	=	SOCK_INODE(sock)->i_uid;
2590 	} else {
2591 		sk->sk_wq	=	NULL;
2592 		sk->sk_uid	=	make_kuid(sock_net(sk)->user_ns, 0);
2593 	}
2594 
2595 	rwlock_init(&sk->sk_callback_lock);
2596 	if (sk->sk_kern_sock)
2597 		lockdep_set_class_and_name(
2598 			&sk->sk_callback_lock,
2599 			af_kern_callback_keys + sk->sk_family,
2600 			af_family_kern_clock_key_strings[sk->sk_family]);
2601 	else
2602 		lockdep_set_class_and_name(
2603 			&sk->sk_callback_lock,
2604 			af_callback_keys + sk->sk_family,
2605 			af_family_clock_key_strings[sk->sk_family]);
2606 
2607 	sk->sk_state_change	=	sock_def_wakeup;
2608 	sk->sk_data_ready	=	sock_def_readable;
2609 	sk->sk_write_space	=	sock_def_write_space;
2610 	sk->sk_error_report	=	sock_def_error_report;
2611 	sk->sk_destruct		=	sock_def_destruct;
2612 
2613 	sk->sk_frag.page	=	NULL;
2614 	sk->sk_frag.offset	=	0;
2615 	sk->sk_peek_off		=	-1;
2616 
2617 	sk->sk_peer_pid 	=	NULL;
2618 	sk->sk_peer_cred	=	NULL;
2619 	sk->sk_write_pending	=	0;
2620 	sk->sk_rcvlowat		=	1;
2621 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
2622 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
2623 
2624 	sk->sk_stamp = SK_DEFAULT_STAMP;
2625 
2626 #ifdef CONFIG_NET_RX_BUSY_POLL
2627 	sk->sk_napi_id		=	0;
2628 	sk->sk_ll_usec		=	sysctl_net_busy_read;
2629 #endif
2630 
2631 	sk->sk_max_pacing_rate = ~0U;
2632 	sk->sk_pacing_rate = ~0U;
2633 	sk->sk_incoming_cpu = -1;
2634 	/*
2635 	 * Before updating sk_refcnt, we must commit prior changes to memory
2636 	 * (Documentation/RCU/rculist_nulls.txt for details)
2637 	 */
2638 	smp_wmb();
2639 	atomic_set(&sk->sk_refcnt, 1);
2640 	atomic_set(&sk->sk_drops, 0);
2641 }
2642 EXPORT_SYMBOL(sock_init_data);
2643 
2644 void lock_sock_nested(struct sock *sk, int subclass)
2645 {
2646 	might_sleep();
2647 	spin_lock_bh(&sk->sk_lock.slock);
2648 	if (sk->sk_lock.owned)
2649 		__lock_sock(sk);
2650 	sk->sk_lock.owned = 1;
2651 	spin_unlock(&sk->sk_lock.slock);
2652 	/*
2653 	 * The sk_lock has mutex_lock() semantics here:
2654 	 */
2655 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2656 	local_bh_enable();
2657 }
2658 EXPORT_SYMBOL(lock_sock_nested);
2659 
2660 void release_sock(struct sock *sk)
2661 {
2662 	spin_lock_bh(&sk->sk_lock.slock);
2663 	if (sk->sk_backlog.tail)
2664 		__release_sock(sk);
2665 
2666 	/* Warning : release_cb() might need to release sk ownership,
2667 	 * ie call sock_release_ownership(sk) before us.
2668 	 */
2669 	if (sk->sk_prot->release_cb)
2670 		sk->sk_prot->release_cb(sk);
2671 
2672 	sock_release_ownership(sk);
2673 	if (waitqueue_active(&sk->sk_lock.wq))
2674 		wake_up(&sk->sk_lock.wq);
2675 	spin_unlock_bh(&sk->sk_lock.slock);
2676 }
2677 EXPORT_SYMBOL(release_sock);
2678 
2679 /**
2680  * lock_sock_fast - fast version of lock_sock
2681  * @sk: socket
2682  *
2683  * This version should be used for very small section, where process wont block
2684  * return false if fast path is taken
2685  *   sk_lock.slock locked, owned = 0, BH disabled
2686  * return true if slow path is taken
2687  *   sk_lock.slock unlocked, owned = 1, BH enabled
2688  */
2689 bool lock_sock_fast(struct sock *sk)
2690 {
2691 	might_sleep();
2692 	spin_lock_bh(&sk->sk_lock.slock);
2693 
2694 	if (!sk->sk_lock.owned)
2695 		/*
2696 		 * Note : We must disable BH
2697 		 */
2698 		return false;
2699 
2700 	__lock_sock(sk);
2701 	sk->sk_lock.owned = 1;
2702 	spin_unlock(&sk->sk_lock.slock);
2703 	/*
2704 	 * The sk_lock has mutex_lock() semantics here:
2705 	 */
2706 	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2707 	local_bh_enable();
2708 	return true;
2709 }
2710 EXPORT_SYMBOL(lock_sock_fast);
2711 
2712 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2713 {
2714 	struct timeval tv;
2715 	if (!sock_flag(sk, SOCK_TIMESTAMP))
2716 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2717 	tv = ktime_to_timeval(sk->sk_stamp);
2718 	if (tv.tv_sec == -1)
2719 		return -ENOENT;
2720 	if (tv.tv_sec == 0) {
2721 		sk->sk_stamp = ktime_get_real();
2722 		tv = ktime_to_timeval(sk->sk_stamp);
2723 	}
2724 	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2725 }
2726 EXPORT_SYMBOL(sock_get_timestamp);
2727 
2728 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2729 {
2730 	struct timespec ts;
2731 	if (!sock_flag(sk, SOCK_TIMESTAMP))
2732 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2733 	ts = ktime_to_timespec(sk->sk_stamp);
2734 	if (ts.tv_sec == -1)
2735 		return -ENOENT;
2736 	if (ts.tv_sec == 0) {
2737 		sk->sk_stamp = ktime_get_real();
2738 		ts = ktime_to_timespec(sk->sk_stamp);
2739 	}
2740 	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2741 }
2742 EXPORT_SYMBOL(sock_get_timestampns);
2743 
2744 void sock_enable_timestamp(struct sock *sk, int flag)
2745 {
2746 	if (!sock_flag(sk, flag)) {
2747 		unsigned long previous_flags = sk->sk_flags;
2748 
2749 		sock_set_flag(sk, flag);
2750 		/*
2751 		 * we just set one of the two flags which require net
2752 		 * time stamping, but time stamping might have been on
2753 		 * already because of the other one
2754 		 */
2755 		if (sock_needs_netstamp(sk) &&
2756 		    !(previous_flags & SK_FLAGS_TIMESTAMP))
2757 			net_enable_timestamp();
2758 	}
2759 }
2760 
2761 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2762 		       int level, int type)
2763 {
2764 	struct sock_exterr_skb *serr;
2765 	struct sk_buff *skb;
2766 	int copied, err;
2767 
2768 	err = -EAGAIN;
2769 	skb = sock_dequeue_err_skb(sk);
2770 	if (skb == NULL)
2771 		goto out;
2772 
2773 	copied = skb->len;
2774 	if (copied > len) {
2775 		msg->msg_flags |= MSG_TRUNC;
2776 		copied = len;
2777 	}
2778 	err = skb_copy_datagram_msg(skb, 0, msg, copied);
2779 	if (err)
2780 		goto out_free_skb;
2781 
2782 	sock_recv_timestamp(msg, sk, skb);
2783 
2784 	serr = SKB_EXT_ERR(skb);
2785 	put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2786 
2787 	msg->msg_flags |= MSG_ERRQUEUE;
2788 	err = copied;
2789 
2790 out_free_skb:
2791 	kfree_skb(skb);
2792 out:
2793 	return err;
2794 }
2795 EXPORT_SYMBOL(sock_recv_errqueue);
2796 
2797 /*
2798  *	Get a socket option on an socket.
2799  *
2800  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
2801  *	asynchronous errors should be reported by getsockopt. We assume
2802  *	this means if you specify SO_ERROR (otherwise whats the point of it).
2803  */
2804 int sock_common_getsockopt(struct socket *sock, int level, int optname,
2805 			   char __user *optval, int __user *optlen)
2806 {
2807 	struct sock *sk = sock->sk;
2808 
2809 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2810 }
2811 EXPORT_SYMBOL(sock_common_getsockopt);
2812 
2813 #ifdef CONFIG_COMPAT
2814 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2815 				  char __user *optval, int __user *optlen)
2816 {
2817 	struct sock *sk = sock->sk;
2818 
2819 	if (sk->sk_prot->compat_getsockopt != NULL)
2820 		return sk->sk_prot->compat_getsockopt(sk, level, optname,
2821 						      optval, optlen);
2822 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2823 }
2824 EXPORT_SYMBOL(compat_sock_common_getsockopt);
2825 #endif
2826 
2827 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2828 			int flags)
2829 {
2830 	struct sock *sk = sock->sk;
2831 	int addr_len = 0;
2832 	int err;
2833 
2834 	err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2835 				   flags & ~MSG_DONTWAIT, &addr_len);
2836 	if (err >= 0)
2837 		msg->msg_namelen = addr_len;
2838 	return err;
2839 }
2840 EXPORT_SYMBOL(sock_common_recvmsg);
2841 
2842 /*
2843  *	Set socket options on an inet socket.
2844  */
2845 int sock_common_setsockopt(struct socket *sock, int level, int optname,
2846 			   char __user *optval, unsigned int optlen)
2847 {
2848 	struct sock *sk = sock->sk;
2849 
2850 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2851 }
2852 EXPORT_SYMBOL(sock_common_setsockopt);
2853 
2854 #ifdef CONFIG_COMPAT
2855 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2856 				  char __user *optval, unsigned int optlen)
2857 {
2858 	struct sock *sk = sock->sk;
2859 
2860 	if (sk->sk_prot->compat_setsockopt != NULL)
2861 		return sk->sk_prot->compat_setsockopt(sk, level, optname,
2862 						      optval, optlen);
2863 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2864 }
2865 EXPORT_SYMBOL(compat_sock_common_setsockopt);
2866 #endif
2867 
2868 void sk_common_release(struct sock *sk)
2869 {
2870 	if (sk->sk_prot->destroy)
2871 		sk->sk_prot->destroy(sk);
2872 
2873 	/*
2874 	 * Observation: when sock_common_release is called, processes have
2875 	 * no access to socket. But net still has.
2876 	 * Step one, detach it from networking:
2877 	 *
2878 	 * A. Remove from hash tables.
2879 	 */
2880 
2881 	sk->sk_prot->unhash(sk);
2882 
2883 	/*
2884 	 * In this point socket cannot receive new packets, but it is possible
2885 	 * that some packets are in flight because some CPU runs receiver and
2886 	 * did hash table lookup before we unhashed socket. They will achieve
2887 	 * receive queue and will be purged by socket destructor.
2888 	 *
2889 	 * Also we still have packets pending on receive queue and probably,
2890 	 * our own packets waiting in device queues. sock_destroy will drain
2891 	 * receive queue, but transmitted packets will delay socket destruction
2892 	 * until the last reference will be released.
2893 	 */
2894 
2895 	sock_orphan(sk);
2896 
2897 	xfrm_sk_free_policy(sk);
2898 
2899 	sk_refcnt_debug_release(sk);
2900 
2901 	sock_put(sk);
2902 }
2903 EXPORT_SYMBOL(sk_common_release);
2904 
2905 void sk_get_meminfo(const struct sock *sk, u32 *mem)
2906 {
2907 	memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
2908 
2909 	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
2910 	mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
2911 	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
2912 	mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
2913 	mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
2914 	mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
2915 	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
2916 	mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
2917 	mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
2918 }
2919 
2920 #ifdef CONFIG_PROC_FS
2921 #define PROTO_INUSE_NR	64	/* should be enough for the first time */
2922 struct prot_inuse {
2923 	int val[PROTO_INUSE_NR];
2924 };
2925 
2926 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2927 
2928 #ifdef CONFIG_NET_NS
2929 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2930 {
2931 	__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2932 }
2933 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2934 
2935 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2936 {
2937 	int cpu, idx = prot->inuse_idx;
2938 	int res = 0;
2939 
2940 	for_each_possible_cpu(cpu)
2941 		res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2942 
2943 	return res >= 0 ? res : 0;
2944 }
2945 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2946 
2947 static int __net_init sock_inuse_init_net(struct net *net)
2948 {
2949 	net->core.inuse = alloc_percpu(struct prot_inuse);
2950 	return net->core.inuse ? 0 : -ENOMEM;
2951 }
2952 
2953 static void __net_exit sock_inuse_exit_net(struct net *net)
2954 {
2955 	free_percpu(net->core.inuse);
2956 }
2957 
2958 static struct pernet_operations net_inuse_ops = {
2959 	.init = sock_inuse_init_net,
2960 	.exit = sock_inuse_exit_net,
2961 };
2962 
2963 static __init int net_inuse_init(void)
2964 {
2965 	if (register_pernet_subsys(&net_inuse_ops))
2966 		panic("Cannot initialize net inuse counters");
2967 
2968 	return 0;
2969 }
2970 
2971 core_initcall(net_inuse_init);
2972 #else
2973 static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2974 
2975 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2976 {
2977 	__this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2978 }
2979 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2980 
2981 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2982 {
2983 	int cpu, idx = prot->inuse_idx;
2984 	int res = 0;
2985 
2986 	for_each_possible_cpu(cpu)
2987 		res += per_cpu(prot_inuse, cpu).val[idx];
2988 
2989 	return res >= 0 ? res : 0;
2990 }
2991 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2992 #endif
2993 
2994 static void assign_proto_idx(struct proto *prot)
2995 {
2996 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2997 
2998 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2999 		pr_err("PROTO_INUSE_NR exhausted\n");
3000 		return;
3001 	}
3002 
3003 	set_bit(prot->inuse_idx, proto_inuse_idx);
3004 }
3005 
3006 static void release_proto_idx(struct proto *prot)
3007 {
3008 	if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3009 		clear_bit(prot->inuse_idx, proto_inuse_idx);
3010 }
3011 #else
3012 static inline void assign_proto_idx(struct proto *prot)
3013 {
3014 }
3015 
3016 static inline void release_proto_idx(struct proto *prot)
3017 {
3018 }
3019 #endif
3020 
3021 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3022 {
3023 	if (!rsk_prot)
3024 		return;
3025 	kfree(rsk_prot->slab_name);
3026 	rsk_prot->slab_name = NULL;
3027 	kmem_cache_destroy(rsk_prot->slab);
3028 	rsk_prot->slab = NULL;
3029 }
3030 
3031 static int req_prot_init(const struct proto *prot)
3032 {
3033 	struct request_sock_ops *rsk_prot = prot->rsk_prot;
3034 
3035 	if (!rsk_prot)
3036 		return 0;
3037 
3038 	rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3039 					prot->name);
3040 	if (!rsk_prot->slab_name)
3041 		return -ENOMEM;
3042 
3043 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3044 					   rsk_prot->obj_size, 0,
3045 					   prot->slab_flags, NULL);
3046 
3047 	if (!rsk_prot->slab) {
3048 		pr_crit("%s: Can't create request sock SLAB cache!\n",
3049 			prot->name);
3050 		return -ENOMEM;
3051 	}
3052 	return 0;
3053 }
3054 
3055 int proto_register(struct proto *prot, int alloc_slab)
3056 {
3057 	if (alloc_slab) {
3058 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
3059 					SLAB_HWCACHE_ALIGN | prot->slab_flags,
3060 					NULL);
3061 
3062 		if (prot->slab == NULL) {
3063 			pr_crit("%s: Can't create sock SLAB cache!\n",
3064 				prot->name);
3065 			goto out;
3066 		}
3067 
3068 		if (req_prot_init(prot))
3069 			goto out_free_request_sock_slab;
3070 
3071 		if (prot->twsk_prot != NULL) {
3072 			prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
3073 
3074 			if (prot->twsk_prot->twsk_slab_name == NULL)
3075 				goto out_free_request_sock_slab;
3076 
3077 			prot->twsk_prot->twsk_slab =
3078 				kmem_cache_create(prot->twsk_prot->twsk_slab_name,
3079 						  prot->twsk_prot->twsk_obj_size,
3080 						  0,
3081 						  prot->slab_flags,
3082 						  NULL);
3083 			if (prot->twsk_prot->twsk_slab == NULL)
3084 				goto out_free_timewait_sock_slab_name;
3085 		}
3086 	}
3087 
3088 	mutex_lock(&proto_list_mutex);
3089 	list_add(&prot->node, &proto_list);
3090 	assign_proto_idx(prot);
3091 	mutex_unlock(&proto_list_mutex);
3092 	return 0;
3093 
3094 out_free_timewait_sock_slab_name:
3095 	kfree(prot->twsk_prot->twsk_slab_name);
3096 out_free_request_sock_slab:
3097 	req_prot_cleanup(prot->rsk_prot);
3098 
3099 	kmem_cache_destroy(prot->slab);
3100 	prot->slab = NULL;
3101 out:
3102 	return -ENOBUFS;
3103 }
3104 EXPORT_SYMBOL(proto_register);
3105 
3106 void proto_unregister(struct proto *prot)
3107 {
3108 	mutex_lock(&proto_list_mutex);
3109 	release_proto_idx(prot);
3110 	list_del(&prot->node);
3111 	mutex_unlock(&proto_list_mutex);
3112 
3113 	kmem_cache_destroy(prot->slab);
3114 	prot->slab = NULL;
3115 
3116 	req_prot_cleanup(prot->rsk_prot);
3117 
3118 	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
3119 		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
3120 		kfree(prot->twsk_prot->twsk_slab_name);
3121 		prot->twsk_prot->twsk_slab = NULL;
3122 	}
3123 }
3124 EXPORT_SYMBOL(proto_unregister);
3125 
3126 #ifdef CONFIG_PROC_FS
3127 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
3128 	__acquires(proto_list_mutex)
3129 {
3130 	mutex_lock(&proto_list_mutex);
3131 	return seq_list_start_head(&proto_list, *pos);
3132 }
3133 
3134 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3135 {
3136 	return seq_list_next(v, &proto_list, pos);
3137 }
3138 
3139 static void proto_seq_stop(struct seq_file *seq, void *v)
3140 	__releases(proto_list_mutex)
3141 {
3142 	mutex_unlock(&proto_list_mutex);
3143 }
3144 
3145 static char proto_method_implemented(const void *method)
3146 {
3147 	return method == NULL ? 'n' : 'y';
3148 }
3149 static long sock_prot_memory_allocated(struct proto *proto)
3150 {
3151 	return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
3152 }
3153 
3154 static char *sock_prot_memory_pressure(struct proto *proto)
3155 {
3156 	return proto->memory_pressure != NULL ?
3157 	proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3158 }
3159 
3160 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3161 {
3162 
3163 	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
3164 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3165 		   proto->name,
3166 		   proto->obj_size,
3167 		   sock_prot_inuse_get(seq_file_net(seq), proto),
3168 		   sock_prot_memory_allocated(proto),
3169 		   sock_prot_memory_pressure(proto),
3170 		   proto->max_header,
3171 		   proto->slab == NULL ? "no" : "yes",
3172 		   module_name(proto->owner),
3173 		   proto_method_implemented(proto->close),
3174 		   proto_method_implemented(proto->connect),
3175 		   proto_method_implemented(proto->disconnect),
3176 		   proto_method_implemented(proto->accept),
3177 		   proto_method_implemented(proto->ioctl),
3178 		   proto_method_implemented(proto->init),
3179 		   proto_method_implemented(proto->destroy),
3180 		   proto_method_implemented(proto->shutdown),
3181 		   proto_method_implemented(proto->setsockopt),
3182 		   proto_method_implemented(proto->getsockopt),
3183 		   proto_method_implemented(proto->sendmsg),
3184 		   proto_method_implemented(proto->recvmsg),
3185 		   proto_method_implemented(proto->sendpage),
3186 		   proto_method_implemented(proto->bind),
3187 		   proto_method_implemented(proto->backlog_rcv),
3188 		   proto_method_implemented(proto->hash),
3189 		   proto_method_implemented(proto->unhash),
3190 		   proto_method_implemented(proto->get_port),
3191 		   proto_method_implemented(proto->enter_memory_pressure));
3192 }
3193 
3194 static int proto_seq_show(struct seq_file *seq, void *v)
3195 {
3196 	if (v == &proto_list)
3197 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3198 			   "protocol",
3199 			   "size",
3200 			   "sockets",
3201 			   "memory",
3202 			   "press",
3203 			   "maxhdr",
3204 			   "slab",
3205 			   "module",
3206 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3207 	else
3208 		proto_seq_printf(seq, list_entry(v, struct proto, node));
3209 	return 0;
3210 }
3211 
3212 static const struct seq_operations proto_seq_ops = {
3213 	.start  = proto_seq_start,
3214 	.next   = proto_seq_next,
3215 	.stop   = proto_seq_stop,
3216 	.show   = proto_seq_show,
3217 };
3218 
3219 static int proto_seq_open(struct inode *inode, struct file *file)
3220 {
3221 	return seq_open_net(inode, file, &proto_seq_ops,
3222 			    sizeof(struct seq_net_private));
3223 }
3224 
3225 static const struct file_operations proto_seq_fops = {
3226 	.owner		= THIS_MODULE,
3227 	.open		= proto_seq_open,
3228 	.read		= seq_read,
3229 	.llseek		= seq_lseek,
3230 	.release	= seq_release_net,
3231 };
3232 
3233 static __net_init int proto_init_net(struct net *net)
3234 {
3235 	if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
3236 		return -ENOMEM;
3237 
3238 	return 0;
3239 }
3240 
3241 static __net_exit void proto_exit_net(struct net *net)
3242 {
3243 	remove_proc_entry("protocols", net->proc_net);
3244 }
3245 
3246 
3247 static __net_initdata struct pernet_operations proto_net_ops = {
3248 	.init = proto_init_net,
3249 	.exit = proto_exit_net,
3250 };
3251 
3252 static int __init proto_init(void)
3253 {
3254 	return register_pernet_subsys(&proto_net_ops);
3255 }
3256 
3257 subsys_initcall(proto_init);
3258 
3259 #endif /* PROC_FS */
3260 
3261 #ifdef CONFIG_NET_RX_BUSY_POLL
3262 bool sk_busy_loop_end(void *p, unsigned long start_time)
3263 {
3264 	struct sock *sk = p;
3265 
3266 	return !skb_queue_empty(&sk->sk_receive_queue) ||
3267 	       sk_busy_loop_timeout(sk, start_time);
3268 }
3269 EXPORT_SYMBOL(sk_busy_loop_end);
3270 #endif /* CONFIG_NET_RX_BUSY_POLL */
3271