xref: /openbmc/linux/net/core/sock.c (revision a3cbcadf)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * INET		An implementation of the TCP/IP protocol suite for the LINUX
4  *		operating system.  INET is implemented using the  BSD Socket
5  *		interface as the means of communication with the user level.
6  *
7  *		Generic socket support routines. Memory allocators, socket lock/release
8  *		handler for protocols to use and generic option handler.
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Florian La Roche, <flla@stud.uni-sb.de>
13  *		Alan Cox, <A.Cox@swansea.ac.uk>
14  *
15  * Fixes:
16  *		Alan Cox	: 	Numerous verify_area() problems
17  *		Alan Cox	:	Connecting on a connecting socket
18  *					now returns an error for tcp.
19  *		Alan Cox	:	sock->protocol is set correctly.
20  *					and is not sometimes left as 0.
21  *		Alan Cox	:	connect handles icmp errors on a
22  *					connect properly. Unfortunately there
23  *					is a restart syscall nasty there. I
24  *					can't match BSD without hacking the C
25  *					library. Ideas urgently sought!
26  *		Alan Cox	:	Disallow bind() to addresses that are
27  *					not ours - especially broadcast ones!!
28  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
29  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
30  *					instead they leave that for the DESTROY timer.
31  *		Alan Cox	:	Clean up error flag in accept
32  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
33  *					was buggy. Put a remove_sock() in the handler
34  *					for memory when we hit 0. Also altered the timer
35  *					code. The ACK stuff can wait and needs major
36  *					TCP layer surgery.
37  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
38  *					and fixed timer/inet_bh race.
39  *		Alan Cox	:	Added zapped flag for TCP
40  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
41  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
43  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
44  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
46  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
47  *	Pauline Middelink	:	identd support
48  *		Alan Cox	:	Fixed connect() taking signals I think.
49  *		Alan Cox	:	SO_LINGER supported
50  *		Alan Cox	:	Error reporting fixes
51  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
52  *		Alan Cox	:	inet sockets don't set sk->type!
53  *		Alan Cox	:	Split socket option code
54  *		Alan Cox	:	Callbacks
55  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
56  *		Alex		:	Removed restriction on inet fioctl
57  *		Alan Cox	:	Splitting INET from NET core
58  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
59  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
60  *		Alan Cox	:	Split IP from generic code
61  *		Alan Cox	:	New kfree_skbmem()
62  *		Alan Cox	:	Make SO_DEBUG superuser only.
63  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
64  *					(compatibility fix)
65  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
66  *		Alan Cox	:	Allocator for a socket is settable.
67  *		Alan Cox	:	SO_ERROR includes soft errors.
68  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
69  *		Alan Cox	: 	Generic socket allocation to make hooks
70  *					easier (suggested by Craig Metz).
71  *		Michael Pall	:	SO_ERROR returns positive errno again
72  *              Steve Whitehouse:       Added default destructor to free
73  *                                      protocol private data.
74  *              Steve Whitehouse:       Added various other default routines
75  *                                      common to several socket families.
76  *              Chris Evans     :       Call suser() check last on F_SETOWN
77  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
79  *		Andi Kleen	:	Fix write_space callback
80  *		Chris Evans	:	Security fixes - signedness again
81  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
82  *
83  * To Fix:
84  */
85 
86 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
87 
88 #include <asm/unaligned.h>
89 #include <linux/capability.h>
90 #include <linux/errno.h>
91 #include <linux/errqueue.h>
92 #include <linux/types.h>
93 #include <linux/socket.h>
94 #include <linux/in.h>
95 #include <linux/kernel.h>
96 #include <linux/module.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/sched.h>
100 #include <linux/sched/mm.h>
101 #include <linux/timer.h>
102 #include <linux/string.h>
103 #include <linux/sockios.h>
104 #include <linux/net.h>
105 #include <linux/mm.h>
106 #include <linux/slab.h>
107 #include <linux/interrupt.h>
108 #include <linux/poll.h>
109 #include <linux/tcp.h>
110 #include <linux/init.h>
111 #include <linux/highmem.h>
112 #include <linux/user_namespace.h>
113 #include <linux/static_key.h>
114 #include <linux/memcontrol.h>
115 #include <linux/prefetch.h>
116 #include <linux/compat.h>
117 
118 #include <linux/uaccess.h>
119 
120 #include <linux/netdevice.h>
121 #include <net/protocol.h>
122 #include <linux/skbuff.h>
123 #include <net/net_namespace.h>
124 #include <net/request_sock.h>
125 #include <net/sock.h>
126 #include <linux/net_tstamp.h>
127 #include <net/xfrm.h>
128 #include <linux/ipsec.h>
129 #include <net/cls_cgroup.h>
130 #include <net/netprio_cgroup.h>
131 #include <linux/sock_diag.h>
132 
133 #include <linux/filter.h>
134 #include <net/sock_reuseport.h>
135 #include <net/bpf_sk_storage.h>
136 
137 #include <trace/events/sock.h>
138 
139 #include <net/tcp.h>
140 #include <net/busy_poll.h>
141 
142 static DEFINE_MUTEX(proto_list_mutex);
143 static LIST_HEAD(proto_list);
144 
145 static void sock_inuse_add(struct net *net, int val);
146 
147 /**
148  * sk_ns_capable - General socket capability test
149  * @sk: Socket to use a capability on or through
150  * @user_ns: The user namespace of the capability to use
151  * @cap: The capability to use
152  *
153  * Test to see if the opener of the socket had when the socket was
154  * created and the current process has the capability @cap in the user
155  * namespace @user_ns.
156  */
157 bool sk_ns_capable(const struct sock *sk,
158 		   struct user_namespace *user_ns, int cap)
159 {
160 	return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
161 		ns_capable(user_ns, cap);
162 }
163 EXPORT_SYMBOL(sk_ns_capable);
164 
165 /**
166  * sk_capable - Socket global capability test
167  * @sk: Socket to use a capability on or through
168  * @cap: The global capability to use
169  *
170  * Test to see if the opener of the socket had when the socket was
171  * created and the current process has the capability @cap in all user
172  * namespaces.
173  */
174 bool sk_capable(const struct sock *sk, int cap)
175 {
176 	return sk_ns_capable(sk, &init_user_ns, cap);
177 }
178 EXPORT_SYMBOL(sk_capable);
179 
180 /**
181  * sk_net_capable - Network namespace socket capability test
182  * @sk: Socket to use a capability on or through
183  * @cap: The capability to use
184  *
185  * Test to see if the opener of the socket had when the socket was created
186  * and the current process has the capability @cap over the network namespace
187  * the socket is a member of.
188  */
189 bool sk_net_capable(const struct sock *sk, int cap)
190 {
191 	return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
192 }
193 EXPORT_SYMBOL(sk_net_capable);
194 
195 /*
196  * Each address family might have different locking rules, so we have
197  * one slock key per address family and separate keys for internal and
198  * userspace sockets.
199  */
200 static struct lock_class_key af_family_keys[AF_MAX];
201 static struct lock_class_key af_family_kern_keys[AF_MAX];
202 static struct lock_class_key af_family_slock_keys[AF_MAX];
203 static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
204 
205 /*
206  * Make lock validator output more readable. (we pre-construct these
207  * strings build-time, so that runtime initialization of socket
208  * locks is fast):
209  */
210 
211 #define _sock_locks(x)						  \
212   x "AF_UNSPEC",	x "AF_UNIX"     ,	x "AF_INET"     , \
213   x "AF_AX25"  ,	x "AF_IPX"      ,	x "AF_APPLETALK", \
214   x "AF_NETROM",	x "AF_BRIDGE"   ,	x "AF_ATMPVC"   , \
215   x "AF_X25"   ,	x "AF_INET6"    ,	x "AF_ROSE"     , \
216   x "AF_DECnet",	x "AF_NETBEUI"  ,	x "AF_SECURITY" , \
217   x "AF_KEY"   ,	x "AF_NETLINK"  ,	x "AF_PACKET"   , \
218   x "AF_ASH"   ,	x "AF_ECONET"   ,	x "AF_ATMSVC"   , \
219   x "AF_RDS"   ,	x "AF_SNA"      ,	x "AF_IRDA"     , \
220   x "AF_PPPOX" ,	x "AF_WANPIPE"  ,	x "AF_LLC"      , \
221   x "27"       ,	x "28"          ,	x "AF_CAN"      , \
222   x "AF_TIPC"  ,	x "AF_BLUETOOTH",	x "IUCV"        , \
223   x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
224   x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
225   x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
226   x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_XDP"	, \
227   x "AF_MAX"
228 
229 static const char *const af_family_key_strings[AF_MAX+1] = {
230 	_sock_locks("sk_lock-")
231 };
232 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
233 	_sock_locks("slock-")
234 };
235 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
236 	_sock_locks("clock-")
237 };
238 
239 static const char *const af_family_kern_key_strings[AF_MAX+1] = {
240 	_sock_locks("k-sk_lock-")
241 };
242 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
243 	_sock_locks("k-slock-")
244 };
245 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
246 	_sock_locks("k-clock-")
247 };
248 static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
249 	_sock_locks("rlock-")
250 };
251 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
252 	_sock_locks("wlock-")
253 };
254 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
255 	_sock_locks("elock-")
256 };
257 
258 /*
259  * sk_callback_lock and sk queues locking rules are per-address-family,
260  * so split the lock classes by using a per-AF key:
261  */
262 static struct lock_class_key af_callback_keys[AF_MAX];
263 static struct lock_class_key af_rlock_keys[AF_MAX];
264 static struct lock_class_key af_wlock_keys[AF_MAX];
265 static struct lock_class_key af_elock_keys[AF_MAX];
266 static struct lock_class_key af_kern_callback_keys[AF_MAX];
267 
268 /* Run time adjustable parameters. */
269 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
270 EXPORT_SYMBOL(sysctl_wmem_max);
271 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
272 EXPORT_SYMBOL(sysctl_rmem_max);
273 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
274 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
275 
276 /* Maximal space eaten by iovec or ancillary data plus some space */
277 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
278 EXPORT_SYMBOL(sysctl_optmem_max);
279 
280 int sysctl_tstamp_allow_data __read_mostly = 1;
281 
282 DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
283 EXPORT_SYMBOL_GPL(memalloc_socks_key);
284 
285 /**
286  * sk_set_memalloc - sets %SOCK_MEMALLOC
287  * @sk: socket to set it on
288  *
289  * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
290  * It's the responsibility of the admin to adjust min_free_kbytes
291  * to meet the requirements
292  */
293 void sk_set_memalloc(struct sock *sk)
294 {
295 	sock_set_flag(sk, SOCK_MEMALLOC);
296 	sk->sk_allocation |= __GFP_MEMALLOC;
297 	static_branch_inc(&memalloc_socks_key);
298 }
299 EXPORT_SYMBOL_GPL(sk_set_memalloc);
300 
301 void sk_clear_memalloc(struct sock *sk)
302 {
303 	sock_reset_flag(sk, SOCK_MEMALLOC);
304 	sk->sk_allocation &= ~__GFP_MEMALLOC;
305 	static_branch_dec(&memalloc_socks_key);
306 
307 	/*
308 	 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
309 	 * progress of swapping. SOCK_MEMALLOC may be cleared while
310 	 * it has rmem allocations due to the last swapfile being deactivated
311 	 * but there is a risk that the socket is unusable due to exceeding
312 	 * the rmem limits. Reclaim the reserves and obey rmem limits again.
313 	 */
314 	sk_mem_reclaim(sk);
315 }
316 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
317 
318 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
319 {
320 	int ret;
321 	unsigned int noreclaim_flag;
322 
323 	/* these should have been dropped before queueing */
324 	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
325 
326 	noreclaim_flag = memalloc_noreclaim_save();
327 	ret = sk->sk_backlog_rcv(sk, skb);
328 	memalloc_noreclaim_restore(noreclaim_flag);
329 
330 	return ret;
331 }
332 EXPORT_SYMBOL(__sk_backlog_rcv);
333 
334 void sk_error_report(struct sock *sk)
335 {
336 	sk->sk_error_report(sk);
337 
338 	switch (sk->sk_family) {
339 	case AF_INET:
340 		fallthrough;
341 	case AF_INET6:
342 		trace_inet_sk_error_report(sk);
343 		break;
344 	default:
345 		break;
346 	}
347 }
348 EXPORT_SYMBOL(sk_error_report);
349 
350 static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
351 {
352 	struct __kernel_sock_timeval tv;
353 
354 	if (timeo == MAX_SCHEDULE_TIMEOUT) {
355 		tv.tv_sec = 0;
356 		tv.tv_usec = 0;
357 	} else {
358 		tv.tv_sec = timeo / HZ;
359 		tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
360 	}
361 
362 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
363 		struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
364 		*(struct old_timeval32 *)optval = tv32;
365 		return sizeof(tv32);
366 	}
367 
368 	if (old_timeval) {
369 		struct __kernel_old_timeval old_tv;
370 		old_tv.tv_sec = tv.tv_sec;
371 		old_tv.tv_usec = tv.tv_usec;
372 		*(struct __kernel_old_timeval *)optval = old_tv;
373 		return sizeof(old_tv);
374 	}
375 
376 	*(struct __kernel_sock_timeval *)optval = tv;
377 	return sizeof(tv);
378 }
379 
380 static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
381 			    bool old_timeval)
382 {
383 	struct __kernel_sock_timeval tv;
384 
385 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
386 		struct old_timeval32 tv32;
387 
388 		if (optlen < sizeof(tv32))
389 			return -EINVAL;
390 
391 		if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
392 			return -EFAULT;
393 		tv.tv_sec = tv32.tv_sec;
394 		tv.tv_usec = tv32.tv_usec;
395 	} else if (old_timeval) {
396 		struct __kernel_old_timeval old_tv;
397 
398 		if (optlen < sizeof(old_tv))
399 			return -EINVAL;
400 		if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
401 			return -EFAULT;
402 		tv.tv_sec = old_tv.tv_sec;
403 		tv.tv_usec = old_tv.tv_usec;
404 	} else {
405 		if (optlen < sizeof(tv))
406 			return -EINVAL;
407 		if (copy_from_sockptr(&tv, optval, sizeof(tv)))
408 			return -EFAULT;
409 	}
410 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
411 		return -EDOM;
412 
413 	if (tv.tv_sec < 0) {
414 		static int warned __read_mostly;
415 
416 		*timeo_p = 0;
417 		if (warned < 10 && net_ratelimit()) {
418 			warned++;
419 			pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
420 				__func__, current->comm, task_pid_nr(current));
421 		}
422 		return 0;
423 	}
424 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
425 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
426 		return 0;
427 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
428 		*timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
429 	return 0;
430 }
431 
432 static bool sock_needs_netstamp(const struct sock *sk)
433 {
434 	switch (sk->sk_family) {
435 	case AF_UNSPEC:
436 	case AF_UNIX:
437 		return false;
438 	default:
439 		return true;
440 	}
441 }
442 
443 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
444 {
445 	if (sk->sk_flags & flags) {
446 		sk->sk_flags &= ~flags;
447 		if (sock_needs_netstamp(sk) &&
448 		    !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
449 			net_disable_timestamp();
450 	}
451 }
452 
453 
454 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
455 {
456 	unsigned long flags;
457 	struct sk_buff_head *list = &sk->sk_receive_queue;
458 
459 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
460 		atomic_inc(&sk->sk_drops);
461 		trace_sock_rcvqueue_full(sk, skb);
462 		return -ENOMEM;
463 	}
464 
465 	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
466 		atomic_inc(&sk->sk_drops);
467 		return -ENOBUFS;
468 	}
469 
470 	skb->dev = NULL;
471 	skb_set_owner_r(skb, sk);
472 
473 	/* we escape from rcu protected region, make sure we dont leak
474 	 * a norefcounted dst
475 	 */
476 	skb_dst_force(skb);
477 
478 	spin_lock_irqsave(&list->lock, flags);
479 	sock_skb_set_dropcount(sk, skb);
480 	__skb_queue_tail(list, skb);
481 	spin_unlock_irqrestore(&list->lock, flags);
482 
483 	if (!sock_flag(sk, SOCK_DEAD))
484 		sk->sk_data_ready(sk);
485 	return 0;
486 }
487 EXPORT_SYMBOL(__sock_queue_rcv_skb);
488 
489 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
490 {
491 	int err;
492 
493 	err = sk_filter(sk, skb);
494 	if (err)
495 		return err;
496 
497 	return __sock_queue_rcv_skb(sk, skb);
498 }
499 EXPORT_SYMBOL(sock_queue_rcv_skb);
500 
501 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
502 		     const int nested, unsigned int trim_cap, bool refcounted)
503 {
504 	int rc = NET_RX_SUCCESS;
505 
506 	if (sk_filter_trim_cap(sk, skb, trim_cap))
507 		goto discard_and_relse;
508 
509 	skb->dev = NULL;
510 
511 	if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
512 		atomic_inc(&sk->sk_drops);
513 		goto discard_and_relse;
514 	}
515 	if (nested)
516 		bh_lock_sock_nested(sk);
517 	else
518 		bh_lock_sock(sk);
519 	if (!sock_owned_by_user(sk)) {
520 		/*
521 		 * trylock + unlock semantics:
522 		 */
523 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
524 
525 		rc = sk_backlog_rcv(sk, skb);
526 
527 		mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
528 	} else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
529 		bh_unlock_sock(sk);
530 		atomic_inc(&sk->sk_drops);
531 		goto discard_and_relse;
532 	}
533 
534 	bh_unlock_sock(sk);
535 out:
536 	if (refcounted)
537 		sock_put(sk);
538 	return rc;
539 discard_and_relse:
540 	kfree_skb(skb);
541 	goto out;
542 }
543 EXPORT_SYMBOL(__sk_receive_skb);
544 
545 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
546 							  u32));
547 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
548 							   u32));
549 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
550 {
551 	struct dst_entry *dst = __sk_dst_get(sk);
552 
553 	if (dst && dst->obsolete &&
554 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
555 			       dst, cookie) == NULL) {
556 		sk_tx_queue_clear(sk);
557 		sk->sk_dst_pending_confirm = 0;
558 		RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
559 		dst_release(dst);
560 		return NULL;
561 	}
562 
563 	return dst;
564 }
565 EXPORT_SYMBOL(__sk_dst_check);
566 
567 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
568 {
569 	struct dst_entry *dst = sk_dst_get(sk);
570 
571 	if (dst && dst->obsolete &&
572 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
573 			       dst, cookie) == NULL) {
574 		sk_dst_reset(sk);
575 		dst_release(dst);
576 		return NULL;
577 	}
578 
579 	return dst;
580 }
581 EXPORT_SYMBOL(sk_dst_check);
582 
583 static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
584 {
585 	int ret = -ENOPROTOOPT;
586 #ifdef CONFIG_NETDEVICES
587 	struct net *net = sock_net(sk);
588 
589 	/* Sorry... */
590 	ret = -EPERM;
591 	if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
592 		goto out;
593 
594 	ret = -EINVAL;
595 	if (ifindex < 0)
596 		goto out;
597 
598 	sk->sk_bound_dev_if = ifindex;
599 	if (sk->sk_prot->rehash)
600 		sk->sk_prot->rehash(sk);
601 	sk_dst_reset(sk);
602 
603 	ret = 0;
604 
605 out:
606 #endif
607 
608 	return ret;
609 }
610 
611 int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
612 {
613 	int ret;
614 
615 	if (lock_sk)
616 		lock_sock(sk);
617 	ret = sock_bindtoindex_locked(sk, ifindex);
618 	if (lock_sk)
619 		release_sock(sk);
620 
621 	return ret;
622 }
623 EXPORT_SYMBOL(sock_bindtoindex);
624 
625 static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
626 {
627 	int ret = -ENOPROTOOPT;
628 #ifdef CONFIG_NETDEVICES
629 	struct net *net = sock_net(sk);
630 	char devname[IFNAMSIZ];
631 	int index;
632 
633 	ret = -EINVAL;
634 	if (optlen < 0)
635 		goto out;
636 
637 	/* Bind this socket to a particular device like "eth0",
638 	 * as specified in the passed interface name. If the
639 	 * name is "" or the option length is zero the socket
640 	 * is not bound.
641 	 */
642 	if (optlen > IFNAMSIZ - 1)
643 		optlen = IFNAMSIZ - 1;
644 	memset(devname, 0, sizeof(devname));
645 
646 	ret = -EFAULT;
647 	if (copy_from_sockptr(devname, optval, optlen))
648 		goto out;
649 
650 	index = 0;
651 	if (devname[0] != '\0') {
652 		struct net_device *dev;
653 
654 		rcu_read_lock();
655 		dev = dev_get_by_name_rcu(net, devname);
656 		if (dev)
657 			index = dev->ifindex;
658 		rcu_read_unlock();
659 		ret = -ENODEV;
660 		if (!dev)
661 			goto out;
662 	}
663 
664 	return sock_bindtoindex(sk, index, true);
665 out:
666 #endif
667 
668 	return ret;
669 }
670 
671 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
672 				int __user *optlen, int len)
673 {
674 	int ret = -ENOPROTOOPT;
675 #ifdef CONFIG_NETDEVICES
676 	struct net *net = sock_net(sk);
677 	char devname[IFNAMSIZ];
678 
679 	if (sk->sk_bound_dev_if == 0) {
680 		len = 0;
681 		goto zero;
682 	}
683 
684 	ret = -EINVAL;
685 	if (len < IFNAMSIZ)
686 		goto out;
687 
688 	ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
689 	if (ret)
690 		goto out;
691 
692 	len = strlen(devname) + 1;
693 
694 	ret = -EFAULT;
695 	if (copy_to_user(optval, devname, len))
696 		goto out;
697 
698 zero:
699 	ret = -EFAULT;
700 	if (put_user(len, optlen))
701 		goto out;
702 
703 	ret = 0;
704 
705 out:
706 #endif
707 
708 	return ret;
709 }
710 
711 bool sk_mc_loop(struct sock *sk)
712 {
713 	if (dev_recursion_level())
714 		return false;
715 	if (!sk)
716 		return true;
717 	switch (sk->sk_family) {
718 	case AF_INET:
719 		return inet_sk(sk)->mc_loop;
720 #if IS_ENABLED(CONFIG_IPV6)
721 	case AF_INET6:
722 		return inet6_sk(sk)->mc_loop;
723 #endif
724 	}
725 	WARN_ON_ONCE(1);
726 	return true;
727 }
728 EXPORT_SYMBOL(sk_mc_loop);
729 
730 void sock_set_reuseaddr(struct sock *sk)
731 {
732 	lock_sock(sk);
733 	sk->sk_reuse = SK_CAN_REUSE;
734 	release_sock(sk);
735 }
736 EXPORT_SYMBOL(sock_set_reuseaddr);
737 
738 void sock_set_reuseport(struct sock *sk)
739 {
740 	lock_sock(sk);
741 	sk->sk_reuseport = true;
742 	release_sock(sk);
743 }
744 EXPORT_SYMBOL(sock_set_reuseport);
745 
746 void sock_no_linger(struct sock *sk)
747 {
748 	lock_sock(sk);
749 	sk->sk_lingertime = 0;
750 	sock_set_flag(sk, SOCK_LINGER);
751 	release_sock(sk);
752 }
753 EXPORT_SYMBOL(sock_no_linger);
754 
755 void sock_set_priority(struct sock *sk, u32 priority)
756 {
757 	lock_sock(sk);
758 	sk->sk_priority = priority;
759 	release_sock(sk);
760 }
761 EXPORT_SYMBOL(sock_set_priority);
762 
763 void sock_set_sndtimeo(struct sock *sk, s64 secs)
764 {
765 	lock_sock(sk);
766 	if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
767 		sk->sk_sndtimeo = secs * HZ;
768 	else
769 		sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
770 	release_sock(sk);
771 }
772 EXPORT_SYMBOL(sock_set_sndtimeo);
773 
774 static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
775 {
776 	if (val)  {
777 		sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
778 		sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
779 		sock_set_flag(sk, SOCK_RCVTSTAMP);
780 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
781 	} else {
782 		sock_reset_flag(sk, SOCK_RCVTSTAMP);
783 		sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
784 	}
785 }
786 
787 void sock_enable_timestamps(struct sock *sk)
788 {
789 	lock_sock(sk);
790 	__sock_set_timestamps(sk, true, false, true);
791 	release_sock(sk);
792 }
793 EXPORT_SYMBOL(sock_enable_timestamps);
794 
795 void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
796 {
797 	switch (optname) {
798 	case SO_TIMESTAMP_OLD:
799 		__sock_set_timestamps(sk, valbool, false, false);
800 		break;
801 	case SO_TIMESTAMP_NEW:
802 		__sock_set_timestamps(sk, valbool, true, false);
803 		break;
804 	case SO_TIMESTAMPNS_OLD:
805 		__sock_set_timestamps(sk, valbool, false, true);
806 		break;
807 	case SO_TIMESTAMPNS_NEW:
808 		__sock_set_timestamps(sk, valbool, true, true);
809 		break;
810 	}
811 }
812 
813 int sock_set_timestamping(struct sock *sk, int optname, int val)
814 {
815 	if (val & ~SOF_TIMESTAMPING_MASK)
816 		return -EINVAL;
817 
818 	if (val & SOF_TIMESTAMPING_OPT_ID &&
819 	    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
820 		if (sk->sk_protocol == IPPROTO_TCP &&
821 		    sk->sk_type == SOCK_STREAM) {
822 			if ((1 << sk->sk_state) &
823 			    (TCPF_CLOSE | TCPF_LISTEN))
824 				return -EINVAL;
825 			sk->sk_tskey = tcp_sk(sk)->snd_una;
826 		} else {
827 			sk->sk_tskey = 0;
828 		}
829 	}
830 
831 	if (val & SOF_TIMESTAMPING_OPT_STATS &&
832 	    !(val & SOF_TIMESTAMPING_OPT_TSONLY))
833 		return -EINVAL;
834 
835 	sk->sk_tsflags = val;
836 	sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
837 
838 	if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
839 		sock_enable_timestamp(sk,
840 				      SOCK_TIMESTAMPING_RX_SOFTWARE);
841 	else
842 		sock_disable_timestamp(sk,
843 				       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
844 	return 0;
845 }
846 
847 void sock_set_keepalive(struct sock *sk)
848 {
849 	lock_sock(sk);
850 	if (sk->sk_prot->keepalive)
851 		sk->sk_prot->keepalive(sk, true);
852 	sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
853 	release_sock(sk);
854 }
855 EXPORT_SYMBOL(sock_set_keepalive);
856 
857 static void __sock_set_rcvbuf(struct sock *sk, int val)
858 {
859 	/* Ensure val * 2 fits into an int, to prevent max_t() from treating it
860 	 * as a negative value.
861 	 */
862 	val = min_t(int, val, INT_MAX / 2);
863 	sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
864 
865 	/* We double it on the way in to account for "struct sk_buff" etc.
866 	 * overhead.   Applications assume that the SO_RCVBUF setting they make
867 	 * will allow that much actual data to be received on that socket.
868 	 *
869 	 * Applications are unaware that "struct sk_buff" and other overheads
870 	 * allocate from the receive buffer during socket buffer allocation.
871 	 *
872 	 * And after considering the possible alternatives, returning the value
873 	 * we actually used in getsockopt is the most desirable behavior.
874 	 */
875 	WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
876 }
877 
878 void sock_set_rcvbuf(struct sock *sk, int val)
879 {
880 	lock_sock(sk);
881 	__sock_set_rcvbuf(sk, val);
882 	release_sock(sk);
883 }
884 EXPORT_SYMBOL(sock_set_rcvbuf);
885 
886 static void __sock_set_mark(struct sock *sk, u32 val)
887 {
888 	if (val != sk->sk_mark) {
889 		sk->sk_mark = val;
890 		sk_dst_reset(sk);
891 	}
892 }
893 
894 void sock_set_mark(struct sock *sk, u32 val)
895 {
896 	lock_sock(sk);
897 	__sock_set_mark(sk, val);
898 	release_sock(sk);
899 }
900 EXPORT_SYMBOL(sock_set_mark);
901 
902 /*
903  *	This is meant for all protocols to use and covers goings on
904  *	at the socket level. Everything here is generic.
905  */
906 
907 int sock_setsockopt(struct socket *sock, int level, int optname,
908 		    sockptr_t optval, unsigned int optlen)
909 {
910 	struct sock_txtime sk_txtime;
911 	struct sock *sk = sock->sk;
912 	int val;
913 	int valbool;
914 	struct linger ling;
915 	int ret = 0;
916 
917 	/*
918 	 *	Options without arguments
919 	 */
920 
921 	if (optname == SO_BINDTODEVICE)
922 		return sock_setbindtodevice(sk, optval, optlen);
923 
924 	if (optlen < sizeof(int))
925 		return -EINVAL;
926 
927 	if (copy_from_sockptr(&val, optval, sizeof(val)))
928 		return -EFAULT;
929 
930 	valbool = val ? 1 : 0;
931 
932 	lock_sock(sk);
933 
934 	switch (optname) {
935 	case SO_DEBUG:
936 		if (val && !capable(CAP_NET_ADMIN))
937 			ret = -EACCES;
938 		else
939 			sock_valbool_flag(sk, SOCK_DBG, valbool);
940 		break;
941 	case SO_REUSEADDR:
942 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
943 		break;
944 	case SO_REUSEPORT:
945 		sk->sk_reuseport = valbool;
946 		break;
947 	case SO_TYPE:
948 	case SO_PROTOCOL:
949 	case SO_DOMAIN:
950 	case SO_ERROR:
951 		ret = -ENOPROTOOPT;
952 		break;
953 	case SO_DONTROUTE:
954 		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
955 		sk_dst_reset(sk);
956 		break;
957 	case SO_BROADCAST:
958 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
959 		break;
960 	case SO_SNDBUF:
961 		/* Don't error on this BSD doesn't and if you think
962 		 * about it this is right. Otherwise apps have to
963 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
964 		 * are treated in BSD as hints
965 		 */
966 		val = min_t(u32, val, sysctl_wmem_max);
967 set_sndbuf:
968 		/* Ensure val * 2 fits into an int, to prevent max_t()
969 		 * from treating it as a negative value.
970 		 */
971 		val = min_t(int, val, INT_MAX / 2);
972 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
973 		WRITE_ONCE(sk->sk_sndbuf,
974 			   max_t(int, val * 2, SOCK_MIN_SNDBUF));
975 		/* Wake up sending tasks if we upped the value. */
976 		sk->sk_write_space(sk);
977 		break;
978 
979 	case SO_SNDBUFFORCE:
980 		if (!capable(CAP_NET_ADMIN)) {
981 			ret = -EPERM;
982 			break;
983 		}
984 
985 		/* No negative values (to prevent underflow, as val will be
986 		 * multiplied by 2).
987 		 */
988 		if (val < 0)
989 			val = 0;
990 		goto set_sndbuf;
991 
992 	case SO_RCVBUF:
993 		/* Don't error on this BSD doesn't and if you think
994 		 * about it this is right. Otherwise apps have to
995 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
996 		 * are treated in BSD as hints
997 		 */
998 		__sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
999 		break;
1000 
1001 	case SO_RCVBUFFORCE:
1002 		if (!capable(CAP_NET_ADMIN)) {
1003 			ret = -EPERM;
1004 			break;
1005 		}
1006 
1007 		/* No negative values (to prevent underflow, as val will be
1008 		 * multiplied by 2).
1009 		 */
1010 		__sock_set_rcvbuf(sk, max(val, 0));
1011 		break;
1012 
1013 	case SO_KEEPALIVE:
1014 		if (sk->sk_prot->keepalive)
1015 			sk->sk_prot->keepalive(sk, valbool);
1016 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
1017 		break;
1018 
1019 	case SO_OOBINLINE:
1020 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
1021 		break;
1022 
1023 	case SO_NO_CHECK:
1024 		sk->sk_no_check_tx = valbool;
1025 		break;
1026 
1027 	case SO_PRIORITY:
1028 		if ((val >= 0 && val <= 6) ||
1029 		    ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1030 			sk->sk_priority = val;
1031 		else
1032 			ret = -EPERM;
1033 		break;
1034 
1035 	case SO_LINGER:
1036 		if (optlen < sizeof(ling)) {
1037 			ret = -EINVAL;	/* 1003.1g */
1038 			break;
1039 		}
1040 		if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
1041 			ret = -EFAULT;
1042 			break;
1043 		}
1044 		if (!ling.l_onoff)
1045 			sock_reset_flag(sk, SOCK_LINGER);
1046 		else {
1047 #if (BITS_PER_LONG == 32)
1048 			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
1049 				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1050 			else
1051 #endif
1052 				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
1053 			sock_set_flag(sk, SOCK_LINGER);
1054 		}
1055 		break;
1056 
1057 	case SO_BSDCOMPAT:
1058 		break;
1059 
1060 	case SO_PASSCRED:
1061 		if (valbool)
1062 			set_bit(SOCK_PASSCRED, &sock->flags);
1063 		else
1064 			clear_bit(SOCK_PASSCRED, &sock->flags);
1065 		break;
1066 
1067 	case SO_TIMESTAMP_OLD:
1068 	case SO_TIMESTAMP_NEW:
1069 	case SO_TIMESTAMPNS_OLD:
1070 	case SO_TIMESTAMPNS_NEW:
1071 		sock_set_timestamp(sk, valbool, optname);
1072 		break;
1073 
1074 	case SO_TIMESTAMPING_NEW:
1075 	case SO_TIMESTAMPING_OLD:
1076 		ret = sock_set_timestamping(sk, optname, val);
1077 		break;
1078 
1079 	case SO_RCVLOWAT:
1080 		if (val < 0)
1081 			val = INT_MAX;
1082 		if (sock->ops->set_rcvlowat)
1083 			ret = sock->ops->set_rcvlowat(sk, val);
1084 		else
1085 			WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
1086 		break;
1087 
1088 	case SO_RCVTIMEO_OLD:
1089 	case SO_RCVTIMEO_NEW:
1090 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1091 				       optlen, optname == SO_RCVTIMEO_OLD);
1092 		break;
1093 
1094 	case SO_SNDTIMEO_OLD:
1095 	case SO_SNDTIMEO_NEW:
1096 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1097 				       optlen, optname == SO_SNDTIMEO_OLD);
1098 		break;
1099 
1100 	case SO_ATTACH_FILTER: {
1101 		struct sock_fprog fprog;
1102 
1103 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1104 		if (!ret)
1105 			ret = sk_attach_filter(&fprog, sk);
1106 		break;
1107 	}
1108 	case SO_ATTACH_BPF:
1109 		ret = -EINVAL;
1110 		if (optlen == sizeof(u32)) {
1111 			u32 ufd;
1112 
1113 			ret = -EFAULT;
1114 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1115 				break;
1116 
1117 			ret = sk_attach_bpf(ufd, sk);
1118 		}
1119 		break;
1120 
1121 	case SO_ATTACH_REUSEPORT_CBPF: {
1122 		struct sock_fprog fprog;
1123 
1124 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1125 		if (!ret)
1126 			ret = sk_reuseport_attach_filter(&fprog, sk);
1127 		break;
1128 	}
1129 	case SO_ATTACH_REUSEPORT_EBPF:
1130 		ret = -EINVAL;
1131 		if (optlen == sizeof(u32)) {
1132 			u32 ufd;
1133 
1134 			ret = -EFAULT;
1135 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1136 				break;
1137 
1138 			ret = sk_reuseport_attach_bpf(ufd, sk);
1139 		}
1140 		break;
1141 
1142 	case SO_DETACH_REUSEPORT_BPF:
1143 		ret = reuseport_detach_prog(sk);
1144 		break;
1145 
1146 	case SO_DETACH_FILTER:
1147 		ret = sk_detach_filter(sk);
1148 		break;
1149 
1150 	case SO_LOCK_FILTER:
1151 		if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1152 			ret = -EPERM;
1153 		else
1154 			sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1155 		break;
1156 
1157 	case SO_PASSSEC:
1158 		if (valbool)
1159 			set_bit(SOCK_PASSSEC, &sock->flags);
1160 		else
1161 			clear_bit(SOCK_PASSSEC, &sock->flags);
1162 		break;
1163 	case SO_MARK:
1164 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1165 			ret = -EPERM;
1166 			break;
1167 		}
1168 
1169 		__sock_set_mark(sk, val);
1170 		break;
1171 
1172 	case SO_RXQ_OVFL:
1173 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1174 		break;
1175 
1176 	case SO_WIFI_STATUS:
1177 		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1178 		break;
1179 
1180 	case SO_PEEK_OFF:
1181 		if (sock->ops->set_peek_off)
1182 			ret = sock->ops->set_peek_off(sk, val);
1183 		else
1184 			ret = -EOPNOTSUPP;
1185 		break;
1186 
1187 	case SO_NOFCS:
1188 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1189 		break;
1190 
1191 	case SO_SELECT_ERR_QUEUE:
1192 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1193 		break;
1194 
1195 #ifdef CONFIG_NET_RX_BUSY_POLL
1196 	case SO_BUSY_POLL:
1197 		/* allow unprivileged users to decrease the value */
1198 		if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1199 			ret = -EPERM;
1200 		else {
1201 			if (val < 0)
1202 				ret = -EINVAL;
1203 			else
1204 				sk->sk_ll_usec = val;
1205 		}
1206 		break;
1207 	case SO_PREFER_BUSY_POLL:
1208 		if (valbool && !capable(CAP_NET_ADMIN))
1209 			ret = -EPERM;
1210 		else
1211 			WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
1212 		break;
1213 	case SO_BUSY_POLL_BUDGET:
1214 		if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) {
1215 			ret = -EPERM;
1216 		} else {
1217 			if (val < 0 || val > U16_MAX)
1218 				ret = -EINVAL;
1219 			else
1220 				WRITE_ONCE(sk->sk_busy_poll_budget, val);
1221 		}
1222 		break;
1223 #endif
1224 
1225 	case SO_MAX_PACING_RATE:
1226 		{
1227 		unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
1228 
1229 		if (sizeof(ulval) != sizeof(val) &&
1230 		    optlen >= sizeof(ulval) &&
1231 		    copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
1232 			ret = -EFAULT;
1233 			break;
1234 		}
1235 		if (ulval != ~0UL)
1236 			cmpxchg(&sk->sk_pacing_status,
1237 				SK_PACING_NONE,
1238 				SK_PACING_NEEDED);
1239 		sk->sk_max_pacing_rate = ulval;
1240 		sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
1241 		break;
1242 		}
1243 	case SO_INCOMING_CPU:
1244 		WRITE_ONCE(sk->sk_incoming_cpu, val);
1245 		break;
1246 
1247 	case SO_CNX_ADVICE:
1248 		if (val == 1)
1249 			dst_negative_advice(sk);
1250 		break;
1251 
1252 	case SO_ZEROCOPY:
1253 		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
1254 			if (!((sk->sk_type == SOCK_STREAM &&
1255 			       sk->sk_protocol == IPPROTO_TCP) ||
1256 			      (sk->sk_type == SOCK_DGRAM &&
1257 			       sk->sk_protocol == IPPROTO_UDP)))
1258 				ret = -ENOTSUPP;
1259 		} else if (sk->sk_family != PF_RDS) {
1260 			ret = -ENOTSUPP;
1261 		}
1262 		if (!ret) {
1263 			if (val < 0 || val > 1)
1264 				ret = -EINVAL;
1265 			else
1266 				sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
1267 		}
1268 		break;
1269 
1270 	case SO_TXTIME:
1271 		if (optlen != sizeof(struct sock_txtime)) {
1272 			ret = -EINVAL;
1273 			break;
1274 		} else if (copy_from_sockptr(&sk_txtime, optval,
1275 			   sizeof(struct sock_txtime))) {
1276 			ret = -EFAULT;
1277 			break;
1278 		} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
1279 			ret = -EINVAL;
1280 			break;
1281 		}
1282 		/* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1283 		 * scheduler has enough safe guards.
1284 		 */
1285 		if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1286 		    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1287 			ret = -EPERM;
1288 			break;
1289 		}
1290 		sock_valbool_flag(sk, SOCK_TXTIME, true);
1291 		sk->sk_clockid = sk_txtime.clockid;
1292 		sk->sk_txtime_deadline_mode =
1293 			!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1294 		sk->sk_txtime_report_errors =
1295 			!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1296 		break;
1297 
1298 	case SO_BINDTOIFINDEX:
1299 		ret = sock_bindtoindex_locked(sk, val);
1300 		break;
1301 
1302 	default:
1303 		ret = -ENOPROTOOPT;
1304 		break;
1305 	}
1306 	release_sock(sk);
1307 	return ret;
1308 }
1309 EXPORT_SYMBOL(sock_setsockopt);
1310 
1311 
1312 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1313 			  struct ucred *ucred)
1314 {
1315 	ucred->pid = pid_vnr(pid);
1316 	ucred->uid = ucred->gid = -1;
1317 	if (cred) {
1318 		struct user_namespace *current_ns = current_user_ns();
1319 
1320 		ucred->uid = from_kuid_munged(current_ns, cred->euid);
1321 		ucred->gid = from_kgid_munged(current_ns, cred->egid);
1322 	}
1323 }
1324 
1325 static int groups_to_user(gid_t __user *dst, const struct group_info *src)
1326 {
1327 	struct user_namespace *user_ns = current_user_ns();
1328 	int i;
1329 
1330 	for (i = 0; i < src->ngroups; i++)
1331 		if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
1332 			return -EFAULT;
1333 
1334 	return 0;
1335 }
1336 
1337 int sock_getsockopt(struct socket *sock, int level, int optname,
1338 		    char __user *optval, int __user *optlen)
1339 {
1340 	struct sock *sk = sock->sk;
1341 
1342 	union {
1343 		int val;
1344 		u64 val64;
1345 		unsigned long ulval;
1346 		struct linger ling;
1347 		struct old_timeval32 tm32;
1348 		struct __kernel_old_timeval tm;
1349 		struct  __kernel_sock_timeval stm;
1350 		struct sock_txtime txtime;
1351 	} v;
1352 
1353 	int lv = sizeof(int);
1354 	int len;
1355 
1356 	if (get_user(len, optlen))
1357 		return -EFAULT;
1358 	if (len < 0)
1359 		return -EINVAL;
1360 
1361 	memset(&v, 0, sizeof(v));
1362 
1363 	switch (optname) {
1364 	case SO_DEBUG:
1365 		v.val = sock_flag(sk, SOCK_DBG);
1366 		break;
1367 
1368 	case SO_DONTROUTE:
1369 		v.val = sock_flag(sk, SOCK_LOCALROUTE);
1370 		break;
1371 
1372 	case SO_BROADCAST:
1373 		v.val = sock_flag(sk, SOCK_BROADCAST);
1374 		break;
1375 
1376 	case SO_SNDBUF:
1377 		v.val = sk->sk_sndbuf;
1378 		break;
1379 
1380 	case SO_RCVBUF:
1381 		v.val = sk->sk_rcvbuf;
1382 		break;
1383 
1384 	case SO_REUSEADDR:
1385 		v.val = sk->sk_reuse;
1386 		break;
1387 
1388 	case SO_REUSEPORT:
1389 		v.val = sk->sk_reuseport;
1390 		break;
1391 
1392 	case SO_KEEPALIVE:
1393 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
1394 		break;
1395 
1396 	case SO_TYPE:
1397 		v.val = sk->sk_type;
1398 		break;
1399 
1400 	case SO_PROTOCOL:
1401 		v.val = sk->sk_protocol;
1402 		break;
1403 
1404 	case SO_DOMAIN:
1405 		v.val = sk->sk_family;
1406 		break;
1407 
1408 	case SO_ERROR:
1409 		v.val = -sock_error(sk);
1410 		if (v.val == 0)
1411 			v.val = xchg(&sk->sk_err_soft, 0);
1412 		break;
1413 
1414 	case SO_OOBINLINE:
1415 		v.val = sock_flag(sk, SOCK_URGINLINE);
1416 		break;
1417 
1418 	case SO_NO_CHECK:
1419 		v.val = sk->sk_no_check_tx;
1420 		break;
1421 
1422 	case SO_PRIORITY:
1423 		v.val = sk->sk_priority;
1424 		break;
1425 
1426 	case SO_LINGER:
1427 		lv		= sizeof(v.ling);
1428 		v.ling.l_onoff	= sock_flag(sk, SOCK_LINGER);
1429 		v.ling.l_linger	= sk->sk_lingertime / HZ;
1430 		break;
1431 
1432 	case SO_BSDCOMPAT:
1433 		break;
1434 
1435 	case SO_TIMESTAMP_OLD:
1436 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1437 				!sock_flag(sk, SOCK_TSTAMP_NEW) &&
1438 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
1439 		break;
1440 
1441 	case SO_TIMESTAMPNS_OLD:
1442 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
1443 		break;
1444 
1445 	case SO_TIMESTAMP_NEW:
1446 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1447 		break;
1448 
1449 	case SO_TIMESTAMPNS_NEW:
1450 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
1451 		break;
1452 
1453 	case SO_TIMESTAMPING_OLD:
1454 		v.val = sk->sk_tsflags;
1455 		break;
1456 
1457 	case SO_RCVTIMEO_OLD:
1458 	case SO_RCVTIMEO_NEW:
1459 		lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
1460 		break;
1461 
1462 	case SO_SNDTIMEO_OLD:
1463 	case SO_SNDTIMEO_NEW:
1464 		lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
1465 		break;
1466 
1467 	case SO_RCVLOWAT:
1468 		v.val = sk->sk_rcvlowat;
1469 		break;
1470 
1471 	case SO_SNDLOWAT:
1472 		v.val = 1;
1473 		break;
1474 
1475 	case SO_PASSCRED:
1476 		v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1477 		break;
1478 
1479 	case SO_PEERCRED:
1480 	{
1481 		struct ucred peercred;
1482 		if (len > sizeof(peercred))
1483 			len = sizeof(peercred);
1484 		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1485 		if (copy_to_user(optval, &peercred, len))
1486 			return -EFAULT;
1487 		goto lenout;
1488 	}
1489 
1490 	case SO_PEERGROUPS:
1491 	{
1492 		int ret, n;
1493 
1494 		if (!sk->sk_peer_cred)
1495 			return -ENODATA;
1496 
1497 		n = sk->sk_peer_cred->group_info->ngroups;
1498 		if (len < n * sizeof(gid_t)) {
1499 			len = n * sizeof(gid_t);
1500 			return put_user(len, optlen) ? -EFAULT : -ERANGE;
1501 		}
1502 		len = n * sizeof(gid_t);
1503 
1504 		ret = groups_to_user((gid_t __user *)optval,
1505 				     sk->sk_peer_cred->group_info);
1506 		if (ret)
1507 			return ret;
1508 		goto lenout;
1509 	}
1510 
1511 	case SO_PEERNAME:
1512 	{
1513 		char address[128];
1514 
1515 		lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
1516 		if (lv < 0)
1517 			return -ENOTCONN;
1518 		if (lv < len)
1519 			return -EINVAL;
1520 		if (copy_to_user(optval, address, len))
1521 			return -EFAULT;
1522 		goto lenout;
1523 	}
1524 
1525 	/* Dubious BSD thing... Probably nobody even uses it, but
1526 	 * the UNIX standard wants it for whatever reason... -DaveM
1527 	 */
1528 	case SO_ACCEPTCONN:
1529 		v.val = sk->sk_state == TCP_LISTEN;
1530 		break;
1531 
1532 	case SO_PASSSEC:
1533 		v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1534 		break;
1535 
1536 	case SO_PEERSEC:
1537 		return security_socket_getpeersec_stream(sock, optval, optlen, len);
1538 
1539 	case SO_MARK:
1540 		v.val = sk->sk_mark;
1541 		break;
1542 
1543 	case SO_RXQ_OVFL:
1544 		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1545 		break;
1546 
1547 	case SO_WIFI_STATUS:
1548 		v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1549 		break;
1550 
1551 	case SO_PEEK_OFF:
1552 		if (!sock->ops->set_peek_off)
1553 			return -EOPNOTSUPP;
1554 
1555 		v.val = sk->sk_peek_off;
1556 		break;
1557 	case SO_NOFCS:
1558 		v.val = sock_flag(sk, SOCK_NOFCS);
1559 		break;
1560 
1561 	case SO_BINDTODEVICE:
1562 		return sock_getbindtodevice(sk, optval, optlen, len);
1563 
1564 	case SO_GET_FILTER:
1565 		len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1566 		if (len < 0)
1567 			return len;
1568 
1569 		goto lenout;
1570 
1571 	case SO_LOCK_FILTER:
1572 		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1573 		break;
1574 
1575 	case SO_BPF_EXTENSIONS:
1576 		v.val = bpf_tell_extensions();
1577 		break;
1578 
1579 	case SO_SELECT_ERR_QUEUE:
1580 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1581 		break;
1582 
1583 #ifdef CONFIG_NET_RX_BUSY_POLL
1584 	case SO_BUSY_POLL:
1585 		v.val = sk->sk_ll_usec;
1586 		break;
1587 	case SO_PREFER_BUSY_POLL:
1588 		v.val = READ_ONCE(sk->sk_prefer_busy_poll);
1589 		break;
1590 #endif
1591 
1592 	case SO_MAX_PACING_RATE:
1593 		if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1594 			lv = sizeof(v.ulval);
1595 			v.ulval = sk->sk_max_pacing_rate;
1596 		} else {
1597 			/* 32bit version */
1598 			v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
1599 		}
1600 		break;
1601 
1602 	case SO_INCOMING_CPU:
1603 		v.val = READ_ONCE(sk->sk_incoming_cpu);
1604 		break;
1605 
1606 	case SO_MEMINFO:
1607 	{
1608 		u32 meminfo[SK_MEMINFO_VARS];
1609 
1610 		sk_get_meminfo(sk, meminfo);
1611 
1612 		len = min_t(unsigned int, len, sizeof(meminfo));
1613 		if (copy_to_user(optval, &meminfo, len))
1614 			return -EFAULT;
1615 
1616 		goto lenout;
1617 	}
1618 
1619 #ifdef CONFIG_NET_RX_BUSY_POLL
1620 	case SO_INCOMING_NAPI_ID:
1621 		v.val = READ_ONCE(sk->sk_napi_id);
1622 
1623 		/* aggregate non-NAPI IDs down to 0 */
1624 		if (v.val < MIN_NAPI_ID)
1625 			v.val = 0;
1626 
1627 		break;
1628 #endif
1629 
1630 	case SO_COOKIE:
1631 		lv = sizeof(u64);
1632 		if (len < lv)
1633 			return -EINVAL;
1634 		v.val64 = sock_gen_cookie(sk);
1635 		break;
1636 
1637 	case SO_ZEROCOPY:
1638 		v.val = sock_flag(sk, SOCK_ZEROCOPY);
1639 		break;
1640 
1641 	case SO_TXTIME:
1642 		lv = sizeof(v.txtime);
1643 		v.txtime.clockid = sk->sk_clockid;
1644 		v.txtime.flags |= sk->sk_txtime_deadline_mode ?
1645 				  SOF_TXTIME_DEADLINE_MODE : 0;
1646 		v.txtime.flags |= sk->sk_txtime_report_errors ?
1647 				  SOF_TXTIME_REPORT_ERRORS : 0;
1648 		break;
1649 
1650 	case SO_BINDTOIFINDEX:
1651 		v.val = sk->sk_bound_dev_if;
1652 		break;
1653 
1654 	case SO_NETNS_COOKIE:
1655 		lv = sizeof(u64);
1656 		if (len != lv)
1657 			return -EINVAL;
1658 		v.val64 = sock_net(sk)->net_cookie;
1659 		break;
1660 
1661 	default:
1662 		/* We implement the SO_SNDLOWAT etc to not be settable
1663 		 * (1003.1g 7).
1664 		 */
1665 		return -ENOPROTOOPT;
1666 	}
1667 
1668 	if (len > lv)
1669 		len = lv;
1670 	if (copy_to_user(optval, &v, len))
1671 		return -EFAULT;
1672 lenout:
1673 	if (put_user(len, optlen))
1674 		return -EFAULT;
1675 	return 0;
1676 }
1677 
1678 /*
1679  * Initialize an sk_lock.
1680  *
1681  * (We also register the sk_lock with the lock validator.)
1682  */
1683 static inline void sock_lock_init(struct sock *sk)
1684 {
1685 	if (sk->sk_kern_sock)
1686 		sock_lock_init_class_and_name(
1687 			sk,
1688 			af_family_kern_slock_key_strings[sk->sk_family],
1689 			af_family_kern_slock_keys + sk->sk_family,
1690 			af_family_kern_key_strings[sk->sk_family],
1691 			af_family_kern_keys + sk->sk_family);
1692 	else
1693 		sock_lock_init_class_and_name(
1694 			sk,
1695 			af_family_slock_key_strings[sk->sk_family],
1696 			af_family_slock_keys + sk->sk_family,
1697 			af_family_key_strings[sk->sk_family],
1698 			af_family_keys + sk->sk_family);
1699 }
1700 
1701 /*
1702  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1703  * even temporarly, because of RCU lookups. sk_node should also be left as is.
1704  * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
1705  */
1706 static void sock_copy(struct sock *nsk, const struct sock *osk)
1707 {
1708 	const struct proto *prot = READ_ONCE(osk->sk_prot);
1709 #ifdef CONFIG_SECURITY_NETWORK
1710 	void *sptr = nsk->sk_security;
1711 #endif
1712 
1713 	/* If we move sk_tx_queue_mapping out of the private section,
1714 	 * we must check if sk_tx_queue_clear() is called after
1715 	 * sock_copy() in sk_clone_lock().
1716 	 */
1717 	BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
1718 		     offsetof(struct sock, sk_dontcopy_begin) ||
1719 		     offsetof(struct sock, sk_tx_queue_mapping) >=
1720 		     offsetof(struct sock, sk_dontcopy_end));
1721 
1722 	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1723 
1724 	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1725 	       prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1726 
1727 #ifdef CONFIG_SECURITY_NETWORK
1728 	nsk->sk_security = sptr;
1729 	security_sk_clone(osk, nsk);
1730 #endif
1731 }
1732 
1733 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1734 		int family)
1735 {
1736 	struct sock *sk;
1737 	struct kmem_cache *slab;
1738 
1739 	slab = prot->slab;
1740 	if (slab != NULL) {
1741 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1742 		if (!sk)
1743 			return sk;
1744 		if (want_init_on_alloc(priority))
1745 			sk_prot_clear_nulls(sk, prot->obj_size);
1746 	} else
1747 		sk = kmalloc(prot->obj_size, priority);
1748 
1749 	if (sk != NULL) {
1750 		if (security_sk_alloc(sk, family, priority))
1751 			goto out_free;
1752 
1753 		if (!try_module_get(prot->owner))
1754 			goto out_free_sec;
1755 	}
1756 
1757 	return sk;
1758 
1759 out_free_sec:
1760 	security_sk_free(sk);
1761 out_free:
1762 	if (slab != NULL)
1763 		kmem_cache_free(slab, sk);
1764 	else
1765 		kfree(sk);
1766 	return NULL;
1767 }
1768 
1769 static void sk_prot_free(struct proto *prot, struct sock *sk)
1770 {
1771 	struct kmem_cache *slab;
1772 	struct module *owner;
1773 
1774 	owner = prot->owner;
1775 	slab = prot->slab;
1776 
1777 	cgroup_sk_free(&sk->sk_cgrp_data);
1778 	mem_cgroup_sk_free(sk);
1779 	security_sk_free(sk);
1780 	if (slab != NULL)
1781 		kmem_cache_free(slab, sk);
1782 	else
1783 		kfree(sk);
1784 	module_put(owner);
1785 }
1786 
1787 /**
1788  *	sk_alloc - All socket objects are allocated here
1789  *	@net: the applicable net namespace
1790  *	@family: protocol family
1791  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1792  *	@prot: struct proto associated with this new sock instance
1793  *	@kern: is this to be a kernel socket?
1794  */
1795 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1796 		      struct proto *prot, int kern)
1797 {
1798 	struct sock *sk;
1799 
1800 	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1801 	if (sk) {
1802 		sk->sk_family = family;
1803 		/*
1804 		 * See comment in struct sock definition to understand
1805 		 * why we need sk_prot_creator -acme
1806 		 */
1807 		sk->sk_prot = sk->sk_prot_creator = prot;
1808 		sk->sk_kern_sock = kern;
1809 		sock_lock_init(sk);
1810 		sk->sk_net_refcnt = kern ? 0 : 1;
1811 		if (likely(sk->sk_net_refcnt)) {
1812 			get_net(net);
1813 			sock_inuse_add(net, 1);
1814 		}
1815 
1816 		sock_net_set(sk, net);
1817 		refcount_set(&sk->sk_wmem_alloc, 1);
1818 
1819 		mem_cgroup_sk_alloc(sk);
1820 		cgroup_sk_alloc(&sk->sk_cgrp_data);
1821 		sock_update_classid(&sk->sk_cgrp_data);
1822 		sock_update_netprioidx(&sk->sk_cgrp_data);
1823 		sk_tx_queue_clear(sk);
1824 	}
1825 
1826 	return sk;
1827 }
1828 EXPORT_SYMBOL(sk_alloc);
1829 
1830 /* Sockets having SOCK_RCU_FREE will call this function after one RCU
1831  * grace period. This is the case for UDP sockets and TCP listeners.
1832  */
1833 static void __sk_destruct(struct rcu_head *head)
1834 {
1835 	struct sock *sk = container_of(head, struct sock, sk_rcu);
1836 	struct sk_filter *filter;
1837 
1838 	if (sk->sk_destruct)
1839 		sk->sk_destruct(sk);
1840 
1841 	filter = rcu_dereference_check(sk->sk_filter,
1842 				       refcount_read(&sk->sk_wmem_alloc) == 0);
1843 	if (filter) {
1844 		sk_filter_uncharge(sk, filter);
1845 		RCU_INIT_POINTER(sk->sk_filter, NULL);
1846 	}
1847 
1848 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1849 
1850 #ifdef CONFIG_BPF_SYSCALL
1851 	bpf_sk_storage_free(sk);
1852 #endif
1853 
1854 	if (atomic_read(&sk->sk_omem_alloc))
1855 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
1856 			 __func__, atomic_read(&sk->sk_omem_alloc));
1857 
1858 	if (sk->sk_frag.page) {
1859 		put_page(sk->sk_frag.page);
1860 		sk->sk_frag.page = NULL;
1861 	}
1862 
1863 	if (sk->sk_peer_cred)
1864 		put_cred(sk->sk_peer_cred);
1865 	put_pid(sk->sk_peer_pid);
1866 	if (likely(sk->sk_net_refcnt))
1867 		put_net(sock_net(sk));
1868 	sk_prot_free(sk->sk_prot_creator, sk);
1869 }
1870 
1871 void sk_destruct(struct sock *sk)
1872 {
1873 	bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
1874 
1875 	if (rcu_access_pointer(sk->sk_reuseport_cb)) {
1876 		reuseport_detach_sock(sk);
1877 		use_call_rcu = true;
1878 	}
1879 
1880 	if (use_call_rcu)
1881 		call_rcu(&sk->sk_rcu, __sk_destruct);
1882 	else
1883 		__sk_destruct(&sk->sk_rcu);
1884 }
1885 
1886 static void __sk_free(struct sock *sk)
1887 {
1888 	if (likely(sk->sk_net_refcnt))
1889 		sock_inuse_add(sock_net(sk), -1);
1890 
1891 	if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
1892 		sock_diag_broadcast_destroy(sk);
1893 	else
1894 		sk_destruct(sk);
1895 }
1896 
1897 void sk_free(struct sock *sk)
1898 {
1899 	/*
1900 	 * We subtract one from sk_wmem_alloc and can know if
1901 	 * some packets are still in some tx queue.
1902 	 * If not null, sock_wfree() will call __sk_free(sk) later
1903 	 */
1904 	if (refcount_dec_and_test(&sk->sk_wmem_alloc))
1905 		__sk_free(sk);
1906 }
1907 EXPORT_SYMBOL(sk_free);
1908 
1909 static void sk_init_common(struct sock *sk)
1910 {
1911 	skb_queue_head_init(&sk->sk_receive_queue);
1912 	skb_queue_head_init(&sk->sk_write_queue);
1913 	skb_queue_head_init(&sk->sk_error_queue);
1914 
1915 	rwlock_init(&sk->sk_callback_lock);
1916 	lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
1917 			af_rlock_keys + sk->sk_family,
1918 			af_family_rlock_key_strings[sk->sk_family]);
1919 	lockdep_set_class_and_name(&sk->sk_write_queue.lock,
1920 			af_wlock_keys + sk->sk_family,
1921 			af_family_wlock_key_strings[sk->sk_family]);
1922 	lockdep_set_class_and_name(&sk->sk_error_queue.lock,
1923 			af_elock_keys + sk->sk_family,
1924 			af_family_elock_key_strings[sk->sk_family]);
1925 	lockdep_set_class_and_name(&sk->sk_callback_lock,
1926 			af_callback_keys + sk->sk_family,
1927 			af_family_clock_key_strings[sk->sk_family]);
1928 }
1929 
1930 /**
1931  *	sk_clone_lock - clone a socket, and lock its clone
1932  *	@sk: the socket to clone
1933  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1934  *
1935  *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1936  */
1937 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1938 {
1939 	struct proto *prot = READ_ONCE(sk->sk_prot);
1940 	struct sk_filter *filter;
1941 	bool is_charged = true;
1942 	struct sock *newsk;
1943 
1944 	newsk = sk_prot_alloc(prot, priority, sk->sk_family);
1945 	if (!newsk)
1946 		goto out;
1947 
1948 	sock_copy(newsk, sk);
1949 
1950 	newsk->sk_prot_creator = prot;
1951 
1952 	/* SANITY */
1953 	if (likely(newsk->sk_net_refcnt))
1954 		get_net(sock_net(newsk));
1955 	sk_node_init(&newsk->sk_node);
1956 	sock_lock_init(newsk);
1957 	bh_lock_sock(newsk);
1958 	newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
1959 	newsk->sk_backlog.len = 0;
1960 
1961 	atomic_set(&newsk->sk_rmem_alloc, 0);
1962 
1963 	/* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
1964 	refcount_set(&newsk->sk_wmem_alloc, 1);
1965 
1966 	atomic_set(&newsk->sk_omem_alloc, 0);
1967 	sk_init_common(newsk);
1968 
1969 	newsk->sk_dst_cache	= NULL;
1970 	newsk->sk_dst_pending_confirm = 0;
1971 	newsk->sk_wmem_queued	= 0;
1972 	newsk->sk_forward_alloc = 0;
1973 	atomic_set(&newsk->sk_drops, 0);
1974 	newsk->sk_send_head	= NULL;
1975 	newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1976 	atomic_set(&newsk->sk_zckey, 0);
1977 
1978 	sock_reset_flag(newsk, SOCK_DONE);
1979 
1980 	/* sk->sk_memcg will be populated at accept() time */
1981 	newsk->sk_memcg = NULL;
1982 
1983 	cgroup_sk_clone(&newsk->sk_cgrp_data);
1984 
1985 	rcu_read_lock();
1986 	filter = rcu_dereference(sk->sk_filter);
1987 	if (filter != NULL)
1988 		/* though it's an empty new sock, the charging may fail
1989 		 * if sysctl_optmem_max was changed between creation of
1990 		 * original socket and cloning
1991 		 */
1992 		is_charged = sk_filter_charge(newsk, filter);
1993 	RCU_INIT_POINTER(newsk->sk_filter, filter);
1994 	rcu_read_unlock();
1995 
1996 	if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1997 		/* We need to make sure that we don't uncharge the new
1998 		 * socket if we couldn't charge it in the first place
1999 		 * as otherwise we uncharge the parent's filter.
2000 		 */
2001 		if (!is_charged)
2002 			RCU_INIT_POINTER(newsk->sk_filter, NULL);
2003 		sk_free_unlock_clone(newsk);
2004 		newsk = NULL;
2005 		goto out;
2006 	}
2007 	RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
2008 
2009 	if (bpf_sk_storage_clone(sk, newsk)) {
2010 		sk_free_unlock_clone(newsk);
2011 		newsk = NULL;
2012 		goto out;
2013 	}
2014 
2015 	/* Clear sk_user_data if parent had the pointer tagged
2016 	 * as not suitable for copying when cloning.
2017 	 */
2018 	if (sk_user_data_is_nocopy(newsk))
2019 		newsk->sk_user_data = NULL;
2020 
2021 	newsk->sk_err	   = 0;
2022 	newsk->sk_err_soft = 0;
2023 	newsk->sk_priority = 0;
2024 	newsk->sk_incoming_cpu = raw_smp_processor_id();
2025 	if (likely(newsk->sk_net_refcnt))
2026 		sock_inuse_add(sock_net(newsk), 1);
2027 
2028 	/* Before updating sk_refcnt, we must commit prior changes to memory
2029 	 * (Documentation/RCU/rculist_nulls.rst for details)
2030 	 */
2031 	smp_wmb();
2032 	refcount_set(&newsk->sk_refcnt, 2);
2033 
2034 	/* Increment the counter in the same struct proto as the master
2035 	 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
2036 	 * is the same as sk->sk_prot->socks, as this field was copied
2037 	 * with memcpy).
2038 	 *
2039 	 * This _changes_ the previous behaviour, where
2040 	 * tcp_create_openreq_child always was incrementing the
2041 	 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
2042 	 * to be taken into account in all callers. -acme
2043 	 */
2044 	sk_refcnt_debug_inc(newsk);
2045 	sk_set_socket(newsk, NULL);
2046 	sk_tx_queue_clear(newsk);
2047 	RCU_INIT_POINTER(newsk->sk_wq, NULL);
2048 
2049 	if (newsk->sk_prot->sockets_allocated)
2050 		sk_sockets_allocated_inc(newsk);
2051 
2052 	if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2053 		net_enable_timestamp();
2054 out:
2055 	return newsk;
2056 }
2057 EXPORT_SYMBOL_GPL(sk_clone_lock);
2058 
2059 void sk_free_unlock_clone(struct sock *sk)
2060 {
2061 	/* It is still raw copy of parent, so invalidate
2062 	 * destructor and make plain sk_free() */
2063 	sk->sk_destruct = NULL;
2064 	bh_unlock_sock(sk);
2065 	sk_free(sk);
2066 }
2067 EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
2068 
2069 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
2070 {
2071 	u32 max_segs = 1;
2072 
2073 	sk_dst_set(sk, dst);
2074 	sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
2075 	if (sk->sk_route_caps & NETIF_F_GSO)
2076 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
2077 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
2078 	if (sk_can_gso(sk)) {
2079 		if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
2080 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
2081 		} else {
2082 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
2083 			sk->sk_gso_max_size = dst->dev->gso_max_size;
2084 			max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
2085 		}
2086 	}
2087 	sk->sk_gso_max_segs = max_segs;
2088 }
2089 EXPORT_SYMBOL_GPL(sk_setup_caps);
2090 
2091 /*
2092  *	Simple resource managers for sockets.
2093  */
2094 
2095 
2096 /*
2097  * Write buffer destructor automatically called from kfree_skb.
2098  */
2099 void sock_wfree(struct sk_buff *skb)
2100 {
2101 	struct sock *sk = skb->sk;
2102 	unsigned int len = skb->truesize;
2103 
2104 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
2105 		/*
2106 		 * Keep a reference on sk_wmem_alloc, this will be released
2107 		 * after sk_write_space() call
2108 		 */
2109 		WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
2110 		sk->sk_write_space(sk);
2111 		len = 1;
2112 	}
2113 	/*
2114 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
2115 	 * could not do because of in-flight packets
2116 	 */
2117 	if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
2118 		__sk_free(sk);
2119 }
2120 EXPORT_SYMBOL(sock_wfree);
2121 
2122 /* This variant of sock_wfree() is used by TCP,
2123  * since it sets SOCK_USE_WRITE_QUEUE.
2124  */
2125 void __sock_wfree(struct sk_buff *skb)
2126 {
2127 	struct sock *sk = skb->sk;
2128 
2129 	if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
2130 		__sk_free(sk);
2131 }
2132 
2133 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
2134 {
2135 	skb_orphan(skb);
2136 	skb->sk = sk;
2137 #ifdef CONFIG_INET
2138 	if (unlikely(!sk_fullsock(sk))) {
2139 		skb->destructor = sock_edemux;
2140 		sock_hold(sk);
2141 		return;
2142 	}
2143 #endif
2144 	skb->destructor = sock_wfree;
2145 	skb_set_hash_from_sk(skb, sk);
2146 	/*
2147 	 * We used to take a refcount on sk, but following operation
2148 	 * is enough to guarantee sk_free() wont free this sock until
2149 	 * all in-flight packets are completed
2150 	 */
2151 	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
2152 }
2153 EXPORT_SYMBOL(skb_set_owner_w);
2154 
2155 static bool can_skb_orphan_partial(const struct sk_buff *skb)
2156 {
2157 #ifdef CONFIG_TLS_DEVICE
2158 	/* Drivers depend on in-order delivery for crypto offload,
2159 	 * partial orphan breaks out-of-order-OK logic.
2160 	 */
2161 	if (skb->decrypted)
2162 		return false;
2163 #endif
2164 	return (skb->destructor == sock_wfree ||
2165 		(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
2166 }
2167 
2168 /* This helper is used by netem, as it can hold packets in its
2169  * delay queue. We want to allow the owner socket to send more
2170  * packets, as if they were already TX completed by a typical driver.
2171  * But we also want to keep skb->sk set because some packet schedulers
2172  * rely on it (sch_fq for example).
2173  */
2174 void skb_orphan_partial(struct sk_buff *skb)
2175 {
2176 	if (skb_is_tcp_pure_ack(skb))
2177 		return;
2178 
2179 	if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2180 		return;
2181 
2182 	skb_orphan(skb);
2183 }
2184 EXPORT_SYMBOL(skb_orphan_partial);
2185 
2186 /*
2187  * Read buffer destructor automatically called from kfree_skb.
2188  */
2189 void sock_rfree(struct sk_buff *skb)
2190 {
2191 	struct sock *sk = skb->sk;
2192 	unsigned int len = skb->truesize;
2193 
2194 	atomic_sub(len, &sk->sk_rmem_alloc);
2195 	sk_mem_uncharge(sk, len);
2196 }
2197 EXPORT_SYMBOL(sock_rfree);
2198 
2199 /*
2200  * Buffer destructor for skbs that are not used directly in read or write
2201  * path, e.g. for error handler skbs. Automatically called from kfree_skb.
2202  */
2203 void sock_efree(struct sk_buff *skb)
2204 {
2205 	sock_put(skb->sk);
2206 }
2207 EXPORT_SYMBOL(sock_efree);
2208 
2209 /* Buffer destructor for prefetch/receive path where reference count may
2210  * not be held, e.g. for listen sockets.
2211  */
2212 #ifdef CONFIG_INET
2213 void sock_pfree(struct sk_buff *skb)
2214 {
2215 	if (sk_is_refcounted(skb->sk))
2216 		sock_gen_put(skb->sk);
2217 }
2218 EXPORT_SYMBOL(sock_pfree);
2219 #endif /* CONFIG_INET */
2220 
2221 kuid_t sock_i_uid(struct sock *sk)
2222 {
2223 	kuid_t uid;
2224 
2225 	read_lock_bh(&sk->sk_callback_lock);
2226 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2227 	read_unlock_bh(&sk->sk_callback_lock);
2228 	return uid;
2229 }
2230 EXPORT_SYMBOL(sock_i_uid);
2231 
2232 unsigned long sock_i_ino(struct sock *sk)
2233 {
2234 	unsigned long ino;
2235 
2236 	read_lock_bh(&sk->sk_callback_lock);
2237 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
2238 	read_unlock_bh(&sk->sk_callback_lock);
2239 	return ino;
2240 }
2241 EXPORT_SYMBOL(sock_i_ino);
2242 
2243 /*
2244  * Allocate a skb from the socket's send buffer.
2245  */
2246 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
2247 			     gfp_t priority)
2248 {
2249 	if (force ||
2250 	    refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
2251 		struct sk_buff *skb = alloc_skb(size, priority);
2252 
2253 		if (skb) {
2254 			skb_set_owner_w(skb, sk);
2255 			return skb;
2256 		}
2257 	}
2258 	return NULL;
2259 }
2260 EXPORT_SYMBOL(sock_wmalloc);
2261 
2262 static void sock_ofree(struct sk_buff *skb)
2263 {
2264 	struct sock *sk = skb->sk;
2265 
2266 	atomic_sub(skb->truesize, &sk->sk_omem_alloc);
2267 }
2268 
2269 struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
2270 			     gfp_t priority)
2271 {
2272 	struct sk_buff *skb;
2273 
2274 	/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
2275 	if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
2276 	    sysctl_optmem_max)
2277 		return NULL;
2278 
2279 	skb = alloc_skb(size, priority);
2280 	if (!skb)
2281 		return NULL;
2282 
2283 	atomic_add(skb->truesize, &sk->sk_omem_alloc);
2284 	skb->sk = sk;
2285 	skb->destructor = sock_ofree;
2286 	return skb;
2287 }
2288 
2289 /*
2290  * Allocate a memory block from the socket's option memory buffer.
2291  */
2292 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
2293 {
2294 	if ((unsigned int)size <= sysctl_optmem_max &&
2295 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
2296 		void *mem;
2297 		/* First do the add, to avoid the race if kmalloc
2298 		 * might sleep.
2299 		 */
2300 		atomic_add(size, &sk->sk_omem_alloc);
2301 		mem = kmalloc(size, priority);
2302 		if (mem)
2303 			return mem;
2304 		atomic_sub(size, &sk->sk_omem_alloc);
2305 	}
2306 	return NULL;
2307 }
2308 EXPORT_SYMBOL(sock_kmalloc);
2309 
2310 /* Free an option memory block. Note, we actually want the inline
2311  * here as this allows gcc to detect the nullify and fold away the
2312  * condition entirely.
2313  */
2314 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
2315 				  const bool nullify)
2316 {
2317 	if (WARN_ON_ONCE(!mem))
2318 		return;
2319 	if (nullify)
2320 		kfree_sensitive(mem);
2321 	else
2322 		kfree(mem);
2323 	atomic_sub(size, &sk->sk_omem_alloc);
2324 }
2325 
2326 void sock_kfree_s(struct sock *sk, void *mem, int size)
2327 {
2328 	__sock_kfree_s(sk, mem, size, false);
2329 }
2330 EXPORT_SYMBOL(sock_kfree_s);
2331 
2332 void sock_kzfree_s(struct sock *sk, void *mem, int size)
2333 {
2334 	__sock_kfree_s(sk, mem, size, true);
2335 }
2336 EXPORT_SYMBOL(sock_kzfree_s);
2337 
2338 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
2339    I think, these locks should be removed for datagram sockets.
2340  */
2341 static long sock_wait_for_wmem(struct sock *sk, long timeo)
2342 {
2343 	DEFINE_WAIT(wait);
2344 
2345 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2346 	for (;;) {
2347 		if (!timeo)
2348 			break;
2349 		if (signal_pending(current))
2350 			break;
2351 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2352 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2353 		if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
2354 			break;
2355 		if (sk->sk_shutdown & SEND_SHUTDOWN)
2356 			break;
2357 		if (sk->sk_err)
2358 			break;
2359 		timeo = schedule_timeout(timeo);
2360 	}
2361 	finish_wait(sk_sleep(sk), &wait);
2362 	return timeo;
2363 }
2364 
2365 
2366 /*
2367  *	Generic send/receive buffer handlers
2368  */
2369 
2370 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
2371 				     unsigned long data_len, int noblock,
2372 				     int *errcode, int max_page_order)
2373 {
2374 	struct sk_buff *skb;
2375 	long timeo;
2376 	int err;
2377 
2378 	timeo = sock_sndtimeo(sk, noblock);
2379 	for (;;) {
2380 		err = sock_error(sk);
2381 		if (err != 0)
2382 			goto failure;
2383 
2384 		err = -EPIPE;
2385 		if (sk->sk_shutdown & SEND_SHUTDOWN)
2386 			goto failure;
2387 
2388 		if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
2389 			break;
2390 
2391 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2392 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2393 		err = -EAGAIN;
2394 		if (!timeo)
2395 			goto failure;
2396 		if (signal_pending(current))
2397 			goto interrupted;
2398 		timeo = sock_wait_for_wmem(sk, timeo);
2399 	}
2400 	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
2401 				   errcode, sk->sk_allocation);
2402 	if (skb)
2403 		skb_set_owner_w(skb, sk);
2404 	return skb;
2405 
2406 interrupted:
2407 	err = sock_intr_errno(timeo);
2408 failure:
2409 	*errcode = err;
2410 	return NULL;
2411 }
2412 EXPORT_SYMBOL(sock_alloc_send_pskb);
2413 
2414 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
2415 				    int noblock, int *errcode)
2416 {
2417 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
2418 }
2419 EXPORT_SYMBOL(sock_alloc_send_skb);
2420 
2421 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
2422 		     struct sockcm_cookie *sockc)
2423 {
2424 	u32 tsflags;
2425 
2426 	switch (cmsg->cmsg_type) {
2427 	case SO_MARK:
2428 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2429 			return -EPERM;
2430 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2431 			return -EINVAL;
2432 		sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2433 		break;
2434 	case SO_TIMESTAMPING_OLD:
2435 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2436 			return -EINVAL;
2437 
2438 		tsflags = *(u32 *)CMSG_DATA(cmsg);
2439 		if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2440 			return -EINVAL;
2441 
2442 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2443 		sockc->tsflags |= tsflags;
2444 		break;
2445 	case SCM_TXTIME:
2446 		if (!sock_flag(sk, SOCK_TXTIME))
2447 			return -EINVAL;
2448 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
2449 			return -EINVAL;
2450 		sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
2451 		break;
2452 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2453 	case SCM_RIGHTS:
2454 	case SCM_CREDENTIALS:
2455 		break;
2456 	default:
2457 		return -EINVAL;
2458 	}
2459 	return 0;
2460 }
2461 EXPORT_SYMBOL(__sock_cmsg_send);
2462 
2463 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2464 		   struct sockcm_cookie *sockc)
2465 {
2466 	struct cmsghdr *cmsg;
2467 	int ret;
2468 
2469 	for_each_cmsghdr(cmsg, msg) {
2470 		if (!CMSG_OK(msg, cmsg))
2471 			return -EINVAL;
2472 		if (cmsg->cmsg_level != SOL_SOCKET)
2473 			continue;
2474 		ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
2475 		if (ret)
2476 			return ret;
2477 	}
2478 	return 0;
2479 }
2480 EXPORT_SYMBOL(sock_cmsg_send);
2481 
2482 static void sk_enter_memory_pressure(struct sock *sk)
2483 {
2484 	if (!sk->sk_prot->enter_memory_pressure)
2485 		return;
2486 
2487 	sk->sk_prot->enter_memory_pressure(sk);
2488 }
2489 
2490 static void sk_leave_memory_pressure(struct sock *sk)
2491 {
2492 	if (sk->sk_prot->leave_memory_pressure) {
2493 		sk->sk_prot->leave_memory_pressure(sk);
2494 	} else {
2495 		unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
2496 
2497 		if (memory_pressure && READ_ONCE(*memory_pressure))
2498 			WRITE_ONCE(*memory_pressure, 0);
2499 	}
2500 }
2501 
2502 #define SKB_FRAG_PAGE_ORDER	get_order(32768)
2503 DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
2504 
2505 /**
2506  * skb_page_frag_refill - check that a page_frag contains enough room
2507  * @sz: minimum size of the fragment we want to get
2508  * @pfrag: pointer to page_frag
2509  * @gfp: priority for memory allocation
2510  *
2511  * Note: While this allocator tries to use high order pages, there is
2512  * no guarantee that allocations succeed. Therefore, @sz MUST be
2513  * less or equal than PAGE_SIZE.
2514  */
2515 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2516 {
2517 	if (pfrag->page) {
2518 		if (page_ref_count(pfrag->page) == 1) {
2519 			pfrag->offset = 0;
2520 			return true;
2521 		}
2522 		if (pfrag->offset + sz <= pfrag->size)
2523 			return true;
2524 		put_page(pfrag->page);
2525 	}
2526 
2527 	pfrag->offset = 0;
2528 	if (SKB_FRAG_PAGE_ORDER &&
2529 	    !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
2530 		/* Avoid direct reclaim but allow kswapd to wake */
2531 		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2532 					  __GFP_COMP | __GFP_NOWARN |
2533 					  __GFP_NORETRY,
2534 					  SKB_FRAG_PAGE_ORDER);
2535 		if (likely(pfrag->page)) {
2536 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2537 			return true;
2538 		}
2539 	}
2540 	pfrag->page = alloc_page(gfp);
2541 	if (likely(pfrag->page)) {
2542 		pfrag->size = PAGE_SIZE;
2543 		return true;
2544 	}
2545 	return false;
2546 }
2547 EXPORT_SYMBOL(skb_page_frag_refill);
2548 
2549 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2550 {
2551 	if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2552 		return true;
2553 
2554 	sk_enter_memory_pressure(sk);
2555 	sk_stream_moderate_sndbuf(sk);
2556 	return false;
2557 }
2558 EXPORT_SYMBOL(sk_page_frag_refill);
2559 
2560 void __lock_sock(struct sock *sk)
2561 	__releases(&sk->sk_lock.slock)
2562 	__acquires(&sk->sk_lock.slock)
2563 {
2564 	DEFINE_WAIT(wait);
2565 
2566 	for (;;) {
2567 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2568 					TASK_UNINTERRUPTIBLE);
2569 		spin_unlock_bh(&sk->sk_lock.slock);
2570 		schedule();
2571 		spin_lock_bh(&sk->sk_lock.slock);
2572 		if (!sock_owned_by_user(sk))
2573 			break;
2574 	}
2575 	finish_wait(&sk->sk_lock.wq, &wait);
2576 }
2577 
2578 void __release_sock(struct sock *sk)
2579 	__releases(&sk->sk_lock.slock)
2580 	__acquires(&sk->sk_lock.slock)
2581 {
2582 	struct sk_buff *skb, *next;
2583 
2584 	while ((skb = sk->sk_backlog.head) != NULL) {
2585 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2586 
2587 		spin_unlock_bh(&sk->sk_lock.slock);
2588 
2589 		do {
2590 			next = skb->next;
2591 			prefetch(next);
2592 			WARN_ON_ONCE(skb_dst_is_noref(skb));
2593 			skb_mark_not_on_list(skb);
2594 			sk_backlog_rcv(sk, skb);
2595 
2596 			cond_resched();
2597 
2598 			skb = next;
2599 		} while (skb != NULL);
2600 
2601 		spin_lock_bh(&sk->sk_lock.slock);
2602 	}
2603 
2604 	/*
2605 	 * Doing the zeroing here guarantee we can not loop forever
2606 	 * while a wild producer attempts to flood us.
2607 	 */
2608 	sk->sk_backlog.len = 0;
2609 }
2610 
2611 void __sk_flush_backlog(struct sock *sk)
2612 {
2613 	spin_lock_bh(&sk->sk_lock.slock);
2614 	__release_sock(sk);
2615 	spin_unlock_bh(&sk->sk_lock.slock);
2616 }
2617 
2618 /**
2619  * sk_wait_data - wait for data to arrive at sk_receive_queue
2620  * @sk:    sock to wait on
2621  * @timeo: for how long
2622  * @skb:   last skb seen on sk_receive_queue
2623  *
2624  * Now socket state including sk->sk_err is changed only under lock,
2625  * hence we may omit checks after joining wait queue.
2626  * We check receive queue before schedule() only as optimization;
2627  * it is very likely that release_sock() added new data.
2628  */
2629 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2630 {
2631 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
2632 	int rc;
2633 
2634 	add_wait_queue(sk_sleep(sk), &wait);
2635 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2636 	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
2637 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2638 	remove_wait_queue(sk_sleep(sk), &wait);
2639 	return rc;
2640 }
2641 EXPORT_SYMBOL(sk_wait_data);
2642 
2643 /**
2644  *	__sk_mem_raise_allocated - increase memory_allocated
2645  *	@sk: socket
2646  *	@size: memory size to allocate
2647  *	@amt: pages to allocate
2648  *	@kind: allocation type
2649  *
2650  *	Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
2651  */
2652 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2653 {
2654 	struct proto *prot = sk->sk_prot;
2655 	long allocated = sk_memory_allocated_add(sk, amt);
2656 	bool charged = true;
2657 
2658 	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2659 	    !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
2660 		goto suppress_allocation;
2661 
2662 	/* Under limit. */
2663 	if (allocated <= sk_prot_mem_limits(sk, 0)) {
2664 		sk_leave_memory_pressure(sk);
2665 		return 1;
2666 	}
2667 
2668 	/* Under pressure. */
2669 	if (allocated > sk_prot_mem_limits(sk, 1))
2670 		sk_enter_memory_pressure(sk);
2671 
2672 	/* Over hard limit. */
2673 	if (allocated > sk_prot_mem_limits(sk, 2))
2674 		goto suppress_allocation;
2675 
2676 	/* guarantee minimum buffer size under pressure */
2677 	if (kind == SK_MEM_RECV) {
2678 		if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
2679 			return 1;
2680 
2681 	} else { /* SK_MEM_SEND */
2682 		int wmem0 = sk_get_wmem0(sk, prot);
2683 
2684 		if (sk->sk_type == SOCK_STREAM) {
2685 			if (sk->sk_wmem_queued < wmem0)
2686 				return 1;
2687 		} else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
2688 				return 1;
2689 		}
2690 	}
2691 
2692 	if (sk_has_memory_pressure(sk)) {
2693 		u64 alloc;
2694 
2695 		if (!sk_under_memory_pressure(sk))
2696 			return 1;
2697 		alloc = sk_sockets_allocated_read_positive(sk);
2698 		if (sk_prot_mem_limits(sk, 2) > alloc *
2699 		    sk_mem_pages(sk->sk_wmem_queued +
2700 				 atomic_read(&sk->sk_rmem_alloc) +
2701 				 sk->sk_forward_alloc))
2702 			return 1;
2703 	}
2704 
2705 suppress_allocation:
2706 
2707 	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2708 		sk_stream_moderate_sndbuf(sk);
2709 
2710 		/* Fail only if socket is _under_ its sndbuf.
2711 		 * In this case we cannot block, so that we have to fail.
2712 		 */
2713 		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2714 			return 1;
2715 	}
2716 
2717 	if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
2718 		trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
2719 
2720 	sk_memory_allocated_sub(sk, amt);
2721 
2722 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2723 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
2724 
2725 	return 0;
2726 }
2727 EXPORT_SYMBOL(__sk_mem_raise_allocated);
2728 
2729 /**
2730  *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2731  *	@sk: socket
2732  *	@size: memory size to allocate
2733  *	@kind: allocation type
2734  *
2735  *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2736  *	rmem allocation. This function assumes that protocols which have
2737  *	memory_pressure use sk_wmem_queued as write buffer accounting.
2738  */
2739 int __sk_mem_schedule(struct sock *sk, int size, int kind)
2740 {
2741 	int ret, amt = sk_mem_pages(size);
2742 
2743 	sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2744 	ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2745 	if (!ret)
2746 		sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2747 	return ret;
2748 }
2749 EXPORT_SYMBOL(__sk_mem_schedule);
2750 
2751 /**
2752  *	__sk_mem_reduce_allocated - reclaim memory_allocated
2753  *	@sk: socket
2754  *	@amount: number of quanta
2755  *
2756  *	Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
2757  */
2758 void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2759 {
2760 	sk_memory_allocated_sub(sk, amount);
2761 
2762 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2763 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
2764 
2765 	if (sk_under_memory_pressure(sk) &&
2766 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2767 		sk_leave_memory_pressure(sk);
2768 }
2769 EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2770 
2771 /**
2772  *	__sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
2773  *	@sk: socket
2774  *	@amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2775  */
2776 void __sk_mem_reclaim(struct sock *sk, int amount)
2777 {
2778 	amount >>= SK_MEM_QUANTUM_SHIFT;
2779 	sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2780 	__sk_mem_reduce_allocated(sk, amount);
2781 }
2782 EXPORT_SYMBOL(__sk_mem_reclaim);
2783 
2784 int sk_set_peek_off(struct sock *sk, int val)
2785 {
2786 	sk->sk_peek_off = val;
2787 	return 0;
2788 }
2789 EXPORT_SYMBOL_GPL(sk_set_peek_off);
2790 
2791 /*
2792  * Set of default routines for initialising struct proto_ops when
2793  * the protocol does not support a particular function. In certain
2794  * cases where it makes no sense for a protocol to have a "do nothing"
2795  * function, some default processing is provided.
2796  */
2797 
2798 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2799 {
2800 	return -EOPNOTSUPP;
2801 }
2802 EXPORT_SYMBOL(sock_no_bind);
2803 
2804 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2805 		    int len, int flags)
2806 {
2807 	return -EOPNOTSUPP;
2808 }
2809 EXPORT_SYMBOL(sock_no_connect);
2810 
2811 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2812 {
2813 	return -EOPNOTSUPP;
2814 }
2815 EXPORT_SYMBOL(sock_no_socketpair);
2816 
2817 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
2818 		   bool kern)
2819 {
2820 	return -EOPNOTSUPP;
2821 }
2822 EXPORT_SYMBOL(sock_no_accept);
2823 
2824 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2825 		    int peer)
2826 {
2827 	return -EOPNOTSUPP;
2828 }
2829 EXPORT_SYMBOL(sock_no_getname);
2830 
2831 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2832 {
2833 	return -EOPNOTSUPP;
2834 }
2835 EXPORT_SYMBOL(sock_no_ioctl);
2836 
2837 int sock_no_listen(struct socket *sock, int backlog)
2838 {
2839 	return -EOPNOTSUPP;
2840 }
2841 EXPORT_SYMBOL(sock_no_listen);
2842 
2843 int sock_no_shutdown(struct socket *sock, int how)
2844 {
2845 	return -EOPNOTSUPP;
2846 }
2847 EXPORT_SYMBOL(sock_no_shutdown);
2848 
2849 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2850 {
2851 	return -EOPNOTSUPP;
2852 }
2853 EXPORT_SYMBOL(sock_no_sendmsg);
2854 
2855 int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
2856 {
2857 	return -EOPNOTSUPP;
2858 }
2859 EXPORT_SYMBOL(sock_no_sendmsg_locked);
2860 
2861 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2862 		    int flags)
2863 {
2864 	return -EOPNOTSUPP;
2865 }
2866 EXPORT_SYMBOL(sock_no_recvmsg);
2867 
2868 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2869 {
2870 	/* Mirror missing mmap method error code */
2871 	return -ENODEV;
2872 }
2873 EXPORT_SYMBOL(sock_no_mmap);
2874 
2875 /*
2876  * When a file is received (via SCM_RIGHTS, etc), we must bump the
2877  * various sock-based usage counts.
2878  */
2879 void __receive_sock(struct file *file)
2880 {
2881 	struct socket *sock;
2882 
2883 	sock = sock_from_file(file);
2884 	if (sock) {
2885 		sock_update_netprioidx(&sock->sk->sk_cgrp_data);
2886 		sock_update_classid(&sock->sk->sk_cgrp_data);
2887 	}
2888 }
2889 
2890 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2891 {
2892 	ssize_t res;
2893 	struct msghdr msg = {.msg_flags = flags};
2894 	struct kvec iov;
2895 	char *kaddr = kmap(page);
2896 	iov.iov_base = kaddr + offset;
2897 	iov.iov_len = size;
2898 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2899 	kunmap(page);
2900 	return res;
2901 }
2902 EXPORT_SYMBOL(sock_no_sendpage);
2903 
2904 ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
2905 				int offset, size_t size, int flags)
2906 {
2907 	ssize_t res;
2908 	struct msghdr msg = {.msg_flags = flags};
2909 	struct kvec iov;
2910 	char *kaddr = kmap(page);
2911 
2912 	iov.iov_base = kaddr + offset;
2913 	iov.iov_len = size;
2914 	res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
2915 	kunmap(page);
2916 	return res;
2917 }
2918 EXPORT_SYMBOL(sock_no_sendpage_locked);
2919 
2920 /*
2921  *	Default Socket Callbacks
2922  */
2923 
2924 static void sock_def_wakeup(struct sock *sk)
2925 {
2926 	struct socket_wq *wq;
2927 
2928 	rcu_read_lock();
2929 	wq = rcu_dereference(sk->sk_wq);
2930 	if (skwq_has_sleeper(wq))
2931 		wake_up_interruptible_all(&wq->wait);
2932 	rcu_read_unlock();
2933 }
2934 
2935 static void sock_def_error_report(struct sock *sk)
2936 {
2937 	struct socket_wq *wq;
2938 
2939 	rcu_read_lock();
2940 	wq = rcu_dereference(sk->sk_wq);
2941 	if (skwq_has_sleeper(wq))
2942 		wake_up_interruptible_poll(&wq->wait, EPOLLERR);
2943 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2944 	rcu_read_unlock();
2945 }
2946 
2947 void sock_def_readable(struct sock *sk)
2948 {
2949 	struct socket_wq *wq;
2950 
2951 	rcu_read_lock();
2952 	wq = rcu_dereference(sk->sk_wq);
2953 	if (skwq_has_sleeper(wq))
2954 		wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
2955 						EPOLLRDNORM | EPOLLRDBAND);
2956 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2957 	rcu_read_unlock();
2958 }
2959 
2960 static void sock_def_write_space(struct sock *sk)
2961 {
2962 	struct socket_wq *wq;
2963 
2964 	rcu_read_lock();
2965 
2966 	/* Do not wake up a writer until he can make "significant"
2967 	 * progress.  --DaveM
2968 	 */
2969 	if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
2970 		wq = rcu_dereference(sk->sk_wq);
2971 		if (skwq_has_sleeper(wq))
2972 			wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
2973 						EPOLLWRNORM | EPOLLWRBAND);
2974 
2975 		/* Should agree with poll, otherwise some programs break */
2976 		if (sock_writeable(sk))
2977 			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2978 	}
2979 
2980 	rcu_read_unlock();
2981 }
2982 
2983 static void sock_def_destruct(struct sock *sk)
2984 {
2985 }
2986 
2987 void sk_send_sigurg(struct sock *sk)
2988 {
2989 	if (sk->sk_socket && sk->sk_socket->file)
2990 		if (send_sigurg(&sk->sk_socket->file->f_owner))
2991 			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2992 }
2993 EXPORT_SYMBOL(sk_send_sigurg);
2994 
2995 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2996 		    unsigned long expires)
2997 {
2998 	if (!mod_timer(timer, expires))
2999 		sock_hold(sk);
3000 }
3001 EXPORT_SYMBOL(sk_reset_timer);
3002 
3003 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
3004 {
3005 	if (del_timer(timer))
3006 		__sock_put(sk);
3007 }
3008 EXPORT_SYMBOL(sk_stop_timer);
3009 
3010 void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
3011 {
3012 	if (del_timer_sync(timer))
3013 		__sock_put(sk);
3014 }
3015 EXPORT_SYMBOL(sk_stop_timer_sync);
3016 
3017 void sock_init_data(struct socket *sock, struct sock *sk)
3018 {
3019 	sk_init_common(sk);
3020 	sk->sk_send_head	=	NULL;
3021 
3022 	timer_setup(&sk->sk_timer, NULL, 0);
3023 
3024 	sk->sk_allocation	=	GFP_KERNEL;
3025 	sk->sk_rcvbuf		=	sysctl_rmem_default;
3026 	sk->sk_sndbuf		=	sysctl_wmem_default;
3027 	sk->sk_state		=	TCP_CLOSE;
3028 	sk_set_socket(sk, sock);
3029 
3030 	sock_set_flag(sk, SOCK_ZAPPED);
3031 
3032 	if (sock) {
3033 		sk->sk_type	=	sock->type;
3034 		RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
3035 		sock->sk	=	sk;
3036 		sk->sk_uid	=	SOCK_INODE(sock)->i_uid;
3037 	} else {
3038 		RCU_INIT_POINTER(sk->sk_wq, NULL);
3039 		sk->sk_uid	=	make_kuid(sock_net(sk)->user_ns, 0);
3040 	}
3041 
3042 	rwlock_init(&sk->sk_callback_lock);
3043 	if (sk->sk_kern_sock)
3044 		lockdep_set_class_and_name(
3045 			&sk->sk_callback_lock,
3046 			af_kern_callback_keys + sk->sk_family,
3047 			af_family_kern_clock_key_strings[sk->sk_family]);
3048 	else
3049 		lockdep_set_class_and_name(
3050 			&sk->sk_callback_lock,
3051 			af_callback_keys + sk->sk_family,
3052 			af_family_clock_key_strings[sk->sk_family]);
3053 
3054 	sk->sk_state_change	=	sock_def_wakeup;
3055 	sk->sk_data_ready	=	sock_def_readable;
3056 	sk->sk_write_space	=	sock_def_write_space;
3057 	sk->sk_error_report	=	sock_def_error_report;
3058 	sk->sk_destruct		=	sock_def_destruct;
3059 
3060 	sk->sk_frag.page	=	NULL;
3061 	sk->sk_frag.offset	=	0;
3062 	sk->sk_peek_off		=	-1;
3063 
3064 	sk->sk_peer_pid 	=	NULL;
3065 	sk->sk_peer_cred	=	NULL;
3066 	sk->sk_write_pending	=	0;
3067 	sk->sk_rcvlowat		=	1;
3068 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
3069 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
3070 
3071 	sk->sk_stamp = SK_DEFAULT_STAMP;
3072 #if BITS_PER_LONG==32
3073 	seqlock_init(&sk->sk_stamp_seq);
3074 #endif
3075 	atomic_set(&sk->sk_zckey, 0);
3076 
3077 #ifdef CONFIG_NET_RX_BUSY_POLL
3078 	sk->sk_napi_id		=	0;
3079 	sk->sk_ll_usec		=	sysctl_net_busy_read;
3080 #endif
3081 
3082 	sk->sk_max_pacing_rate = ~0UL;
3083 	sk->sk_pacing_rate = ~0UL;
3084 	WRITE_ONCE(sk->sk_pacing_shift, 10);
3085 	sk->sk_incoming_cpu = -1;
3086 
3087 	sk_rx_queue_clear(sk);
3088 	/*
3089 	 * Before updating sk_refcnt, we must commit prior changes to memory
3090 	 * (Documentation/RCU/rculist_nulls.rst for details)
3091 	 */
3092 	smp_wmb();
3093 	refcount_set(&sk->sk_refcnt, 1);
3094 	atomic_set(&sk->sk_drops, 0);
3095 }
3096 EXPORT_SYMBOL(sock_init_data);
3097 
3098 void lock_sock_nested(struct sock *sk, int subclass)
3099 {
3100 	might_sleep();
3101 	spin_lock_bh(&sk->sk_lock.slock);
3102 	if (sk->sk_lock.owned)
3103 		__lock_sock(sk);
3104 	sk->sk_lock.owned = 1;
3105 	spin_unlock(&sk->sk_lock.slock);
3106 	/*
3107 	 * The sk_lock has mutex_lock() semantics here:
3108 	 */
3109 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
3110 	local_bh_enable();
3111 }
3112 EXPORT_SYMBOL(lock_sock_nested);
3113 
3114 void release_sock(struct sock *sk)
3115 {
3116 	spin_lock_bh(&sk->sk_lock.slock);
3117 	if (sk->sk_backlog.tail)
3118 		__release_sock(sk);
3119 
3120 	/* Warning : release_cb() might need to release sk ownership,
3121 	 * ie call sock_release_ownership(sk) before us.
3122 	 */
3123 	if (sk->sk_prot->release_cb)
3124 		sk->sk_prot->release_cb(sk);
3125 
3126 	sock_release_ownership(sk);
3127 	if (waitqueue_active(&sk->sk_lock.wq))
3128 		wake_up(&sk->sk_lock.wq);
3129 	spin_unlock_bh(&sk->sk_lock.slock);
3130 }
3131 EXPORT_SYMBOL(release_sock);
3132 
3133 /**
3134  * lock_sock_fast - fast version of lock_sock
3135  * @sk: socket
3136  *
3137  * This version should be used for very small section, where process wont block
3138  * return false if fast path is taken:
3139  *
3140  *   sk_lock.slock locked, owned = 0, BH disabled
3141  *
3142  * return true if slow path is taken:
3143  *
3144  *   sk_lock.slock unlocked, owned = 1, BH enabled
3145  */
3146 bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
3147 {
3148 	might_sleep();
3149 	spin_lock_bh(&sk->sk_lock.slock);
3150 
3151 	if (!sk->sk_lock.owned)
3152 		/*
3153 		 * Note : We must disable BH
3154 		 */
3155 		return false;
3156 
3157 	__lock_sock(sk);
3158 	sk->sk_lock.owned = 1;
3159 	spin_unlock(&sk->sk_lock.slock);
3160 	/*
3161 	 * The sk_lock has mutex_lock() semantics here:
3162 	 */
3163 	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
3164 	__acquire(&sk->sk_lock.slock);
3165 	local_bh_enable();
3166 	return true;
3167 }
3168 EXPORT_SYMBOL(lock_sock_fast);
3169 
3170 int sock_gettstamp(struct socket *sock, void __user *userstamp,
3171 		   bool timeval, bool time32)
3172 {
3173 	struct sock *sk = sock->sk;
3174 	struct timespec64 ts;
3175 
3176 	sock_enable_timestamp(sk, SOCK_TIMESTAMP);
3177 	ts = ktime_to_timespec64(sock_read_timestamp(sk));
3178 	if (ts.tv_sec == -1)
3179 		return -ENOENT;
3180 	if (ts.tv_sec == 0) {
3181 		ktime_t kt = ktime_get_real();
3182 		sock_write_timestamp(sk, kt);
3183 		ts = ktime_to_timespec64(kt);
3184 	}
3185 
3186 	if (timeval)
3187 		ts.tv_nsec /= 1000;
3188 
3189 #ifdef CONFIG_COMPAT_32BIT_TIME
3190 	if (time32)
3191 		return put_old_timespec32(&ts, userstamp);
3192 #endif
3193 #ifdef CONFIG_SPARC64
3194 	/* beware of padding in sparc64 timeval */
3195 	if (timeval && !in_compat_syscall()) {
3196 		struct __kernel_old_timeval __user tv = {
3197 			.tv_sec = ts.tv_sec,
3198 			.tv_usec = ts.tv_nsec,
3199 		};
3200 		if (copy_to_user(userstamp, &tv, sizeof(tv)))
3201 			return -EFAULT;
3202 		return 0;
3203 	}
3204 #endif
3205 	return put_timespec64(&ts, userstamp);
3206 }
3207 EXPORT_SYMBOL(sock_gettstamp);
3208 
3209 void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
3210 {
3211 	if (!sock_flag(sk, flag)) {
3212 		unsigned long previous_flags = sk->sk_flags;
3213 
3214 		sock_set_flag(sk, flag);
3215 		/*
3216 		 * we just set one of the two flags which require net
3217 		 * time stamping, but time stamping might have been on
3218 		 * already because of the other one
3219 		 */
3220 		if (sock_needs_netstamp(sk) &&
3221 		    !(previous_flags & SK_FLAGS_TIMESTAMP))
3222 			net_enable_timestamp();
3223 	}
3224 }
3225 
3226 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
3227 		       int level, int type)
3228 {
3229 	struct sock_exterr_skb *serr;
3230 	struct sk_buff *skb;
3231 	int copied, err;
3232 
3233 	err = -EAGAIN;
3234 	skb = sock_dequeue_err_skb(sk);
3235 	if (skb == NULL)
3236 		goto out;
3237 
3238 	copied = skb->len;
3239 	if (copied > len) {
3240 		msg->msg_flags |= MSG_TRUNC;
3241 		copied = len;
3242 	}
3243 	err = skb_copy_datagram_msg(skb, 0, msg, copied);
3244 	if (err)
3245 		goto out_free_skb;
3246 
3247 	sock_recv_timestamp(msg, sk, skb);
3248 
3249 	serr = SKB_EXT_ERR(skb);
3250 	put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
3251 
3252 	msg->msg_flags |= MSG_ERRQUEUE;
3253 	err = copied;
3254 
3255 out_free_skb:
3256 	kfree_skb(skb);
3257 out:
3258 	return err;
3259 }
3260 EXPORT_SYMBOL(sock_recv_errqueue);
3261 
3262 /*
3263  *	Get a socket option on an socket.
3264  *
3265  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
3266  *	asynchronous errors should be reported by getsockopt. We assume
3267  *	this means if you specify SO_ERROR (otherwise whats the point of it).
3268  */
3269 int sock_common_getsockopt(struct socket *sock, int level, int optname,
3270 			   char __user *optval, int __user *optlen)
3271 {
3272 	struct sock *sk = sock->sk;
3273 
3274 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
3275 }
3276 EXPORT_SYMBOL(sock_common_getsockopt);
3277 
3278 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
3279 			int flags)
3280 {
3281 	struct sock *sk = sock->sk;
3282 	int addr_len = 0;
3283 	int err;
3284 
3285 	err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
3286 				   flags & ~MSG_DONTWAIT, &addr_len);
3287 	if (err >= 0)
3288 		msg->msg_namelen = addr_len;
3289 	return err;
3290 }
3291 EXPORT_SYMBOL(sock_common_recvmsg);
3292 
3293 /*
3294  *	Set socket options on an inet socket.
3295  */
3296 int sock_common_setsockopt(struct socket *sock, int level, int optname,
3297 			   sockptr_t optval, unsigned int optlen)
3298 {
3299 	struct sock *sk = sock->sk;
3300 
3301 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
3302 }
3303 EXPORT_SYMBOL(sock_common_setsockopt);
3304 
3305 void sk_common_release(struct sock *sk)
3306 {
3307 	if (sk->sk_prot->destroy)
3308 		sk->sk_prot->destroy(sk);
3309 
3310 	/*
3311 	 * Observation: when sk_common_release is called, processes have
3312 	 * no access to socket. But net still has.
3313 	 * Step one, detach it from networking:
3314 	 *
3315 	 * A. Remove from hash tables.
3316 	 */
3317 
3318 	sk->sk_prot->unhash(sk);
3319 
3320 	/*
3321 	 * In this point socket cannot receive new packets, but it is possible
3322 	 * that some packets are in flight because some CPU runs receiver and
3323 	 * did hash table lookup before we unhashed socket. They will achieve
3324 	 * receive queue and will be purged by socket destructor.
3325 	 *
3326 	 * Also we still have packets pending on receive queue and probably,
3327 	 * our own packets waiting in device queues. sock_destroy will drain
3328 	 * receive queue, but transmitted packets will delay socket destruction
3329 	 * until the last reference will be released.
3330 	 */
3331 
3332 	sock_orphan(sk);
3333 
3334 	xfrm_sk_free_policy(sk);
3335 
3336 	sk_refcnt_debug_release(sk);
3337 
3338 	sock_put(sk);
3339 }
3340 EXPORT_SYMBOL(sk_common_release);
3341 
3342 void sk_get_meminfo(const struct sock *sk, u32 *mem)
3343 {
3344 	memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3345 
3346 	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3347 	mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3348 	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3349 	mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
3350 	mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
3351 	mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3352 	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
3353 	mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3354 	mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3355 }
3356 
3357 #ifdef CONFIG_PROC_FS
3358 #define PROTO_INUSE_NR	64	/* should be enough for the first time */
3359 struct prot_inuse {
3360 	int val[PROTO_INUSE_NR];
3361 };
3362 
3363 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
3364 
3365 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3366 {
3367 	__this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
3368 }
3369 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
3370 
3371 int sock_prot_inuse_get(struct net *net, struct proto *prot)
3372 {
3373 	int cpu, idx = prot->inuse_idx;
3374 	int res = 0;
3375 
3376 	for_each_possible_cpu(cpu)
3377 		res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
3378 
3379 	return res >= 0 ? res : 0;
3380 }
3381 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3382 
3383 static void sock_inuse_add(struct net *net, int val)
3384 {
3385 	this_cpu_add(*net->core.sock_inuse, val);
3386 }
3387 
3388 int sock_inuse_get(struct net *net)
3389 {
3390 	int cpu, res = 0;
3391 
3392 	for_each_possible_cpu(cpu)
3393 		res += *per_cpu_ptr(net->core.sock_inuse, cpu);
3394 
3395 	return res;
3396 }
3397 
3398 EXPORT_SYMBOL_GPL(sock_inuse_get);
3399 
3400 static int __net_init sock_inuse_init_net(struct net *net)
3401 {
3402 	net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3403 	if (net->core.prot_inuse == NULL)
3404 		return -ENOMEM;
3405 
3406 	net->core.sock_inuse = alloc_percpu(int);
3407 	if (net->core.sock_inuse == NULL)
3408 		goto out;
3409 
3410 	return 0;
3411 
3412 out:
3413 	free_percpu(net->core.prot_inuse);
3414 	return -ENOMEM;
3415 }
3416 
3417 static void __net_exit sock_inuse_exit_net(struct net *net)
3418 {
3419 	free_percpu(net->core.prot_inuse);
3420 	free_percpu(net->core.sock_inuse);
3421 }
3422 
3423 static struct pernet_operations net_inuse_ops = {
3424 	.init = sock_inuse_init_net,
3425 	.exit = sock_inuse_exit_net,
3426 };
3427 
3428 static __init int net_inuse_init(void)
3429 {
3430 	if (register_pernet_subsys(&net_inuse_ops))
3431 		panic("Cannot initialize net inuse counters");
3432 
3433 	return 0;
3434 }
3435 
3436 core_initcall(net_inuse_init);
3437 
3438 static int assign_proto_idx(struct proto *prot)
3439 {
3440 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
3441 
3442 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3443 		pr_err("PROTO_INUSE_NR exhausted\n");
3444 		return -ENOSPC;
3445 	}
3446 
3447 	set_bit(prot->inuse_idx, proto_inuse_idx);
3448 	return 0;
3449 }
3450 
3451 static void release_proto_idx(struct proto *prot)
3452 {
3453 	if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3454 		clear_bit(prot->inuse_idx, proto_inuse_idx);
3455 }
3456 #else
3457 static inline int assign_proto_idx(struct proto *prot)
3458 {
3459 	return 0;
3460 }
3461 
3462 static inline void release_proto_idx(struct proto *prot)
3463 {
3464 }
3465 
3466 static void sock_inuse_add(struct net *net, int val)
3467 {
3468 }
3469 #endif
3470 
3471 static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
3472 {
3473 	if (!twsk_prot)
3474 		return;
3475 	kfree(twsk_prot->twsk_slab_name);
3476 	twsk_prot->twsk_slab_name = NULL;
3477 	kmem_cache_destroy(twsk_prot->twsk_slab);
3478 	twsk_prot->twsk_slab = NULL;
3479 }
3480 
3481 static int tw_prot_init(const struct proto *prot)
3482 {
3483 	struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
3484 
3485 	if (!twsk_prot)
3486 		return 0;
3487 
3488 	twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
3489 					      prot->name);
3490 	if (!twsk_prot->twsk_slab_name)
3491 		return -ENOMEM;
3492 
3493 	twsk_prot->twsk_slab =
3494 		kmem_cache_create(twsk_prot->twsk_slab_name,
3495 				  twsk_prot->twsk_obj_size, 0,
3496 				  SLAB_ACCOUNT | prot->slab_flags,
3497 				  NULL);
3498 	if (!twsk_prot->twsk_slab) {
3499 		pr_crit("%s: Can't create timewait sock SLAB cache!\n",
3500 			prot->name);
3501 		return -ENOMEM;
3502 	}
3503 
3504 	return 0;
3505 }
3506 
3507 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3508 {
3509 	if (!rsk_prot)
3510 		return;
3511 	kfree(rsk_prot->slab_name);
3512 	rsk_prot->slab_name = NULL;
3513 	kmem_cache_destroy(rsk_prot->slab);
3514 	rsk_prot->slab = NULL;
3515 }
3516 
3517 static int req_prot_init(const struct proto *prot)
3518 {
3519 	struct request_sock_ops *rsk_prot = prot->rsk_prot;
3520 
3521 	if (!rsk_prot)
3522 		return 0;
3523 
3524 	rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3525 					prot->name);
3526 	if (!rsk_prot->slab_name)
3527 		return -ENOMEM;
3528 
3529 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3530 					   rsk_prot->obj_size, 0,
3531 					   SLAB_ACCOUNT | prot->slab_flags,
3532 					   NULL);
3533 
3534 	if (!rsk_prot->slab) {
3535 		pr_crit("%s: Can't create request sock SLAB cache!\n",
3536 			prot->name);
3537 		return -ENOMEM;
3538 	}
3539 	return 0;
3540 }
3541 
3542 int proto_register(struct proto *prot, int alloc_slab)
3543 {
3544 	int ret = -ENOBUFS;
3545 
3546 	if (alloc_slab) {
3547 		prot->slab = kmem_cache_create_usercopy(prot->name,
3548 					prot->obj_size, 0,
3549 					SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3550 					prot->slab_flags,
3551 					prot->useroffset, prot->usersize,
3552 					NULL);
3553 
3554 		if (prot->slab == NULL) {
3555 			pr_crit("%s: Can't create sock SLAB cache!\n",
3556 				prot->name);
3557 			goto out;
3558 		}
3559 
3560 		if (req_prot_init(prot))
3561 			goto out_free_request_sock_slab;
3562 
3563 		if (tw_prot_init(prot))
3564 			goto out_free_timewait_sock_slab;
3565 	}
3566 
3567 	mutex_lock(&proto_list_mutex);
3568 	ret = assign_proto_idx(prot);
3569 	if (ret) {
3570 		mutex_unlock(&proto_list_mutex);
3571 		goto out_free_timewait_sock_slab;
3572 	}
3573 	list_add(&prot->node, &proto_list);
3574 	mutex_unlock(&proto_list_mutex);
3575 	return ret;
3576 
3577 out_free_timewait_sock_slab:
3578 	if (alloc_slab)
3579 		tw_prot_cleanup(prot->twsk_prot);
3580 out_free_request_sock_slab:
3581 	if (alloc_slab) {
3582 		req_prot_cleanup(prot->rsk_prot);
3583 
3584 		kmem_cache_destroy(prot->slab);
3585 		prot->slab = NULL;
3586 	}
3587 out:
3588 	return ret;
3589 }
3590 EXPORT_SYMBOL(proto_register);
3591 
3592 void proto_unregister(struct proto *prot)
3593 {
3594 	mutex_lock(&proto_list_mutex);
3595 	release_proto_idx(prot);
3596 	list_del(&prot->node);
3597 	mutex_unlock(&proto_list_mutex);
3598 
3599 	kmem_cache_destroy(prot->slab);
3600 	prot->slab = NULL;
3601 
3602 	req_prot_cleanup(prot->rsk_prot);
3603 	tw_prot_cleanup(prot->twsk_prot);
3604 }
3605 EXPORT_SYMBOL(proto_unregister);
3606 
3607 int sock_load_diag_module(int family, int protocol)
3608 {
3609 	if (!protocol) {
3610 		if (!sock_is_registered(family))
3611 			return -ENOENT;
3612 
3613 		return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3614 				      NETLINK_SOCK_DIAG, family);
3615 	}
3616 
3617 #ifdef CONFIG_INET
3618 	if (family == AF_INET &&
3619 	    protocol != IPPROTO_RAW &&
3620 	    protocol < MAX_INET_PROTOS &&
3621 	    !rcu_access_pointer(inet_protos[protocol]))
3622 		return -ENOENT;
3623 #endif
3624 
3625 	return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
3626 			      NETLINK_SOCK_DIAG, family, protocol);
3627 }
3628 EXPORT_SYMBOL(sock_load_diag_module);
3629 
3630 #ifdef CONFIG_PROC_FS
3631 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
3632 	__acquires(proto_list_mutex)
3633 {
3634 	mutex_lock(&proto_list_mutex);
3635 	return seq_list_start_head(&proto_list, *pos);
3636 }
3637 
3638 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3639 {
3640 	return seq_list_next(v, &proto_list, pos);
3641 }
3642 
3643 static void proto_seq_stop(struct seq_file *seq, void *v)
3644 	__releases(proto_list_mutex)
3645 {
3646 	mutex_unlock(&proto_list_mutex);
3647 }
3648 
3649 static char proto_method_implemented(const void *method)
3650 {
3651 	return method == NULL ? 'n' : 'y';
3652 }
3653 static long sock_prot_memory_allocated(struct proto *proto)
3654 {
3655 	return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
3656 }
3657 
3658 static const char *sock_prot_memory_pressure(struct proto *proto)
3659 {
3660 	return proto->memory_pressure != NULL ?
3661 	proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3662 }
3663 
3664 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3665 {
3666 
3667 	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
3668 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3669 		   proto->name,
3670 		   proto->obj_size,
3671 		   sock_prot_inuse_get(seq_file_net(seq), proto),
3672 		   sock_prot_memory_allocated(proto),
3673 		   sock_prot_memory_pressure(proto),
3674 		   proto->max_header,
3675 		   proto->slab == NULL ? "no" : "yes",
3676 		   module_name(proto->owner),
3677 		   proto_method_implemented(proto->close),
3678 		   proto_method_implemented(proto->connect),
3679 		   proto_method_implemented(proto->disconnect),
3680 		   proto_method_implemented(proto->accept),
3681 		   proto_method_implemented(proto->ioctl),
3682 		   proto_method_implemented(proto->init),
3683 		   proto_method_implemented(proto->destroy),
3684 		   proto_method_implemented(proto->shutdown),
3685 		   proto_method_implemented(proto->setsockopt),
3686 		   proto_method_implemented(proto->getsockopt),
3687 		   proto_method_implemented(proto->sendmsg),
3688 		   proto_method_implemented(proto->recvmsg),
3689 		   proto_method_implemented(proto->sendpage),
3690 		   proto_method_implemented(proto->bind),
3691 		   proto_method_implemented(proto->backlog_rcv),
3692 		   proto_method_implemented(proto->hash),
3693 		   proto_method_implemented(proto->unhash),
3694 		   proto_method_implemented(proto->get_port),
3695 		   proto_method_implemented(proto->enter_memory_pressure));
3696 }
3697 
3698 static int proto_seq_show(struct seq_file *seq, void *v)
3699 {
3700 	if (v == &proto_list)
3701 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3702 			   "protocol",
3703 			   "size",
3704 			   "sockets",
3705 			   "memory",
3706 			   "press",
3707 			   "maxhdr",
3708 			   "slab",
3709 			   "module",
3710 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3711 	else
3712 		proto_seq_printf(seq, list_entry(v, struct proto, node));
3713 	return 0;
3714 }
3715 
3716 static const struct seq_operations proto_seq_ops = {
3717 	.start  = proto_seq_start,
3718 	.next   = proto_seq_next,
3719 	.stop   = proto_seq_stop,
3720 	.show   = proto_seq_show,
3721 };
3722 
3723 static __net_init int proto_init_net(struct net *net)
3724 {
3725 	if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
3726 			sizeof(struct seq_net_private)))
3727 		return -ENOMEM;
3728 
3729 	return 0;
3730 }
3731 
3732 static __net_exit void proto_exit_net(struct net *net)
3733 {
3734 	remove_proc_entry("protocols", net->proc_net);
3735 }
3736 
3737 
3738 static __net_initdata struct pernet_operations proto_net_ops = {
3739 	.init = proto_init_net,
3740 	.exit = proto_exit_net,
3741 };
3742 
3743 static int __init proto_init(void)
3744 {
3745 	return register_pernet_subsys(&proto_net_ops);
3746 }
3747 
3748 subsys_initcall(proto_init);
3749 
3750 #endif /* PROC_FS */
3751 
3752 #ifdef CONFIG_NET_RX_BUSY_POLL
3753 bool sk_busy_loop_end(void *p, unsigned long start_time)
3754 {
3755 	struct sock *sk = p;
3756 
3757 	return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
3758 	       sk_busy_loop_timeout(sk, start_time);
3759 }
3760 EXPORT_SYMBOL(sk_busy_loop_end);
3761 #endif /* CONFIG_NET_RX_BUSY_POLL */
3762 
3763 int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
3764 {
3765 	if (!sk->sk_prot->bind_add)
3766 		return -EOPNOTSUPP;
3767 	return sk->sk_prot->bind_add(sk, addr, addr_len);
3768 }
3769 EXPORT_SYMBOL(sock_bind_add);
3770