xref: /openbmc/linux/net/core/sock.c (revision e868d61272caa648214046a096e5a6bfc068dc8c)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:	Ross Biro
13  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *		Alan Cox	: 	Numerous verify_area() problems
19  *		Alan Cox	:	Connecting on a connecting socket
20  *					now returns an error for tcp.
21  *		Alan Cox	:	sock->protocol is set correctly.
22  *					and is not sometimes left as 0.
23  *		Alan Cox	:	connect handles icmp errors on a
24  *					connect properly. Unfortunately there
25  *					is a restart syscall nasty there. I
26  *					can't match BSD without hacking the C
27  *					library. Ideas urgently sought!
28  *		Alan Cox	:	Disallow bind() to addresses that are
29  *					not ours - especially broadcast ones!!
30  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32  *					instead they leave that for the DESTROY timer.
33  *		Alan Cox	:	Clean up error flag in accept
34  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35  *					was buggy. Put a remove_sock() in the handler
36  *					for memory when we hit 0. Also altered the timer
37  *					code. The ACK stuff can wait and needs major
38  *					TCP layer surgery.
39  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40  *					and fixed timer/inet_bh race.
41  *		Alan Cox	:	Added zapped flag for TCP
42  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49  *	Pauline Middelink	:	identd support
50  *		Alan Cox	:	Fixed connect() taking signals I think.
51  *		Alan Cox	:	SO_LINGER supported
52  *		Alan Cox	:	Error reporting fixes
53  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54  *		Alan Cox	:	inet sockets don't set sk->type!
55  *		Alan Cox	:	Split socket option code
56  *		Alan Cox	:	Callbacks
57  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58  *		Alex		:	Removed restriction on inet fioctl
59  *		Alan Cox	:	Splitting INET from NET core
60  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62  *		Alan Cox	:	Split IP from generic code
63  *		Alan Cox	:	New kfree_skbmem()
64  *		Alan Cox	:	Make SO_DEBUG superuser only.
65  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66  *					(compatibility fix)
67  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68  *		Alan Cox	:	Allocator for a socket is settable.
69  *		Alan Cox	:	SO_ERROR includes soft errors.
70  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71  *		Alan Cox	: 	Generic socket allocation to make hooks
72  *					easier (suggested by Craig Metz).
73  *		Michael Pall	:	SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81  *		Andi Kleen	:	Fix write_space callback
82  *		Chris Evans	:	Security fixes - signedness again
83  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *		This program is free software; you can redistribute it and/or
89  *		modify it under the terms of the GNU General Public License
90  *		as published by the Free Software Foundation; either version
91  *		2 of the License, or (at your option) any later version.
92  */
93 
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115 
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118 
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/request_sock.h>
123 #include <net/sock.h>
124 #include <net/xfrm.h>
125 #include <linux/ipsec.h>
126 
127 #include <linux/filter.h>
128 
129 #ifdef CONFIG_INET
130 #include <net/tcp.h>
131 #endif
132 
133 /*
134  * Each address family might have different locking rules, so we have
135  * one slock key per address family:
136  */
137 static struct lock_class_key af_family_keys[AF_MAX];
138 static struct lock_class_key af_family_slock_keys[AF_MAX];
139 
140 #ifdef CONFIG_DEBUG_LOCK_ALLOC
141 /*
142  * Make lock validator output more readable. (we pre-construct these
143  * strings build-time, so that runtime initialization of socket
144  * locks is fast):
145  */
146 static const char *af_family_key_strings[AF_MAX+1] = {
147   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
148   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
149   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
150   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
151   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
152   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
153   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
154   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
155   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
156   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
157   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
158   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
159 };
160 static const char *af_family_slock_key_strings[AF_MAX+1] = {
161   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
162   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
163   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
164   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
165   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
166   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
167   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
168   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
169   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
170   "slock-27"       , "slock-28"          , "slock-29"          ,
171   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
172   "slock-AF_RXRPC" , "slock-AF_MAX"
173 };
174 #endif
175 
176 /*
177  * sk_callback_lock locking rules are per-address-family,
178  * so split the lock classes by using a per-AF key:
179  */
180 static struct lock_class_key af_callback_keys[AF_MAX];
181 
182 /* Take into consideration the size of the struct sk_buff overhead in the
183  * determination of these values, since that is non-constant across
184  * platforms.  This makes socket queueing behavior and performance
185  * not depend upon such differences.
186  */
187 #define _SK_MEM_PACKETS		256
188 #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
189 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
190 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
191 
192 /* Run time adjustable parameters. */
193 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
194 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
195 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
196 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
197 
198 /* Maximal space eaten by iovec or ancilliary data plus some space */
199 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
200 
201 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
202 {
203 	struct timeval tv;
204 
205 	if (optlen < sizeof(tv))
206 		return -EINVAL;
207 	if (copy_from_user(&tv, optval, sizeof(tv)))
208 		return -EFAULT;
209 
210 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
211 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
212 		return 0;
213 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
214 		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
215 	return 0;
216 }
217 
218 static void sock_warn_obsolete_bsdism(const char *name)
219 {
220 	static int warned;
221 	static char warncomm[TASK_COMM_LEN];
222 	if (strcmp(warncomm, current->comm) && warned < 5) {
223 		strcpy(warncomm,  current->comm);
224 		printk(KERN_WARNING "process `%s' is using obsolete "
225 		       "%s SO_BSDCOMPAT\n", warncomm, name);
226 		warned++;
227 	}
228 }
229 
230 static void sock_disable_timestamp(struct sock *sk)
231 {
232 	if (sock_flag(sk, SOCK_TIMESTAMP)) {
233 		sock_reset_flag(sk, SOCK_TIMESTAMP);
234 		net_disable_timestamp();
235 	}
236 }
237 
238 
239 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
240 {
241 	int err = 0;
242 	int skb_len;
243 
244 	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
245 	   number of warnings when compiling with -W --ANK
246 	 */
247 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
248 	    (unsigned)sk->sk_rcvbuf) {
249 		err = -ENOMEM;
250 		goto out;
251 	}
252 
253 	err = sk_filter(sk, skb);
254 	if (err)
255 		goto out;
256 
257 	skb->dev = NULL;
258 	skb_set_owner_r(skb, sk);
259 
260 	/* Cache the SKB length before we tack it onto the receive
261 	 * queue.  Once it is added it no longer belongs to us and
262 	 * may be freed by other threads of control pulling packets
263 	 * from the queue.
264 	 */
265 	skb_len = skb->len;
266 
267 	skb_queue_tail(&sk->sk_receive_queue, skb);
268 
269 	if (!sock_flag(sk, SOCK_DEAD))
270 		sk->sk_data_ready(sk, skb_len);
271 out:
272 	return err;
273 }
274 EXPORT_SYMBOL(sock_queue_rcv_skb);
275 
276 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
277 {
278 	int rc = NET_RX_SUCCESS;
279 
280 	if (sk_filter(sk, skb))
281 		goto discard_and_relse;
282 
283 	skb->dev = NULL;
284 
285 	if (nested)
286 		bh_lock_sock_nested(sk);
287 	else
288 		bh_lock_sock(sk);
289 	if (!sock_owned_by_user(sk)) {
290 		/*
291 		 * trylock + unlock semantics:
292 		 */
293 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
294 
295 		rc = sk->sk_backlog_rcv(sk, skb);
296 
297 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
298 	} else
299 		sk_add_backlog(sk, skb);
300 	bh_unlock_sock(sk);
301 out:
302 	sock_put(sk);
303 	return rc;
304 discard_and_relse:
305 	kfree_skb(skb);
306 	goto out;
307 }
308 EXPORT_SYMBOL(sk_receive_skb);
309 
310 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
311 {
312 	struct dst_entry *dst = sk->sk_dst_cache;
313 
314 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
315 		sk->sk_dst_cache = NULL;
316 		dst_release(dst);
317 		return NULL;
318 	}
319 
320 	return dst;
321 }
322 EXPORT_SYMBOL(__sk_dst_check);
323 
324 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
325 {
326 	struct dst_entry *dst = sk_dst_get(sk);
327 
328 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
329 		sk_dst_reset(sk);
330 		dst_release(dst);
331 		return NULL;
332 	}
333 
334 	return dst;
335 }
336 EXPORT_SYMBOL(sk_dst_check);
337 
338 /*
339  *	This is meant for all protocols to use and covers goings on
340  *	at the socket level. Everything here is generic.
341  */
342 
343 int sock_setsockopt(struct socket *sock, int level, int optname,
344 		    char __user *optval, int optlen)
345 {
346 	struct sock *sk=sock->sk;
347 	struct sk_filter *filter;
348 	int val;
349 	int valbool;
350 	struct linger ling;
351 	int ret = 0;
352 
353 	/*
354 	 *	Options without arguments
355 	 */
356 
357 #ifdef SO_DONTLINGER		/* Compatibility item... */
358 	if (optname == SO_DONTLINGER) {
359 		lock_sock(sk);
360 		sock_reset_flag(sk, SOCK_LINGER);
361 		release_sock(sk);
362 		return 0;
363 	}
364 #endif
365 
366 	if (optlen < sizeof(int))
367 		return -EINVAL;
368 
369 	if (get_user(val, (int __user *)optval))
370 		return -EFAULT;
371 
372 	valbool = val?1:0;
373 
374 	lock_sock(sk);
375 
376 	switch(optname) {
377 	case SO_DEBUG:
378 		if (val && !capable(CAP_NET_ADMIN)) {
379 			ret = -EACCES;
380 		}
381 		else if (valbool)
382 			sock_set_flag(sk, SOCK_DBG);
383 		else
384 			sock_reset_flag(sk, SOCK_DBG);
385 		break;
386 	case SO_REUSEADDR:
387 		sk->sk_reuse = valbool;
388 		break;
389 	case SO_TYPE:
390 	case SO_ERROR:
391 		ret = -ENOPROTOOPT;
392 		break;
393 	case SO_DONTROUTE:
394 		if (valbool)
395 			sock_set_flag(sk, SOCK_LOCALROUTE);
396 		else
397 			sock_reset_flag(sk, SOCK_LOCALROUTE);
398 		break;
399 	case SO_BROADCAST:
400 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
401 		break;
402 	case SO_SNDBUF:
403 		/* Don't error on this BSD doesn't and if you think
404 		   about it this is right. Otherwise apps have to
405 		   play 'guess the biggest size' games. RCVBUF/SNDBUF
406 		   are treated in BSD as hints */
407 
408 		if (val > sysctl_wmem_max)
409 			val = sysctl_wmem_max;
410 set_sndbuf:
411 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
412 		if ((val * 2) < SOCK_MIN_SNDBUF)
413 			sk->sk_sndbuf = SOCK_MIN_SNDBUF;
414 		else
415 			sk->sk_sndbuf = val * 2;
416 
417 		/*
418 		 *	Wake up sending tasks if we
419 		 *	upped the value.
420 		 */
421 		sk->sk_write_space(sk);
422 		break;
423 
424 	case SO_SNDBUFFORCE:
425 		if (!capable(CAP_NET_ADMIN)) {
426 			ret = -EPERM;
427 			break;
428 		}
429 		goto set_sndbuf;
430 
431 	case SO_RCVBUF:
432 		/* Don't error on this BSD doesn't and if you think
433 		   about it this is right. Otherwise apps have to
434 		   play 'guess the biggest size' games. RCVBUF/SNDBUF
435 		   are treated in BSD as hints */
436 
437 		if (val > sysctl_rmem_max)
438 			val = sysctl_rmem_max;
439 set_rcvbuf:
440 		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
441 		/*
442 		 * We double it on the way in to account for
443 		 * "struct sk_buff" etc. overhead.   Applications
444 		 * assume that the SO_RCVBUF setting they make will
445 		 * allow that much actual data to be received on that
446 		 * socket.
447 		 *
448 		 * Applications are unaware that "struct sk_buff" and
449 		 * other overheads allocate from the receive buffer
450 		 * during socket buffer allocation.
451 		 *
452 		 * And after considering the possible alternatives,
453 		 * returning the value we actually used in getsockopt
454 		 * is the most desirable behavior.
455 		 */
456 		if ((val * 2) < SOCK_MIN_RCVBUF)
457 			sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
458 		else
459 			sk->sk_rcvbuf = val * 2;
460 		break;
461 
462 	case SO_RCVBUFFORCE:
463 		if (!capable(CAP_NET_ADMIN)) {
464 			ret = -EPERM;
465 			break;
466 		}
467 		goto set_rcvbuf;
468 
469 	case SO_KEEPALIVE:
470 #ifdef CONFIG_INET
471 		if (sk->sk_protocol == IPPROTO_TCP)
472 			tcp_set_keepalive(sk, valbool);
473 #endif
474 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
475 		break;
476 
477 	case SO_OOBINLINE:
478 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
479 		break;
480 
481 	case SO_NO_CHECK:
482 		sk->sk_no_check = valbool;
483 		break;
484 
485 	case SO_PRIORITY:
486 		if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
487 			sk->sk_priority = val;
488 		else
489 			ret = -EPERM;
490 		break;
491 
492 	case SO_LINGER:
493 		if (optlen < sizeof(ling)) {
494 			ret = -EINVAL;	/* 1003.1g */
495 			break;
496 		}
497 		if (copy_from_user(&ling,optval,sizeof(ling))) {
498 			ret = -EFAULT;
499 			break;
500 		}
501 		if (!ling.l_onoff)
502 			sock_reset_flag(sk, SOCK_LINGER);
503 		else {
504 #if (BITS_PER_LONG == 32)
505 			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
506 				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
507 			else
508 #endif
509 				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
510 			sock_set_flag(sk, SOCK_LINGER);
511 		}
512 		break;
513 
514 	case SO_BSDCOMPAT:
515 		sock_warn_obsolete_bsdism("setsockopt");
516 		break;
517 
518 	case SO_PASSCRED:
519 		if (valbool)
520 			set_bit(SOCK_PASSCRED, &sock->flags);
521 		else
522 			clear_bit(SOCK_PASSCRED, &sock->flags);
523 		break;
524 
525 	case SO_TIMESTAMP:
526 	case SO_TIMESTAMPNS:
527 		if (valbool)  {
528 			if (optname == SO_TIMESTAMP)
529 				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
530 			else
531 				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
532 			sock_set_flag(sk, SOCK_RCVTSTAMP);
533 			sock_enable_timestamp(sk);
534 		} else {
535 			sock_reset_flag(sk, SOCK_RCVTSTAMP);
536 			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
537 		}
538 		break;
539 
540 	case SO_RCVLOWAT:
541 		if (val < 0)
542 			val = INT_MAX;
543 		sk->sk_rcvlowat = val ? : 1;
544 		break;
545 
546 	case SO_RCVTIMEO:
547 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
548 		break;
549 
550 	case SO_SNDTIMEO:
551 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
552 		break;
553 
554 #ifdef CONFIG_NETDEVICES
555 	case SO_BINDTODEVICE:
556 	{
557 		char devname[IFNAMSIZ];
558 
559 		/* Sorry... */
560 		if (!capable(CAP_NET_RAW)) {
561 			ret = -EPERM;
562 			break;
563 		}
564 
565 		/* Bind this socket to a particular device like "eth0",
566 		 * as specified in the passed interface name. If the
567 		 * name is "" or the option length is zero the socket
568 		 * is not bound.
569 		 */
570 
571 		if (!valbool) {
572 			sk->sk_bound_dev_if = 0;
573 		} else {
574 			if (optlen > IFNAMSIZ - 1)
575 				optlen = IFNAMSIZ - 1;
576 			memset(devname, 0, sizeof(devname));
577 			if (copy_from_user(devname, optval, optlen)) {
578 				ret = -EFAULT;
579 				break;
580 			}
581 
582 			/* Remove any cached route for this socket. */
583 			sk_dst_reset(sk);
584 
585 			if (devname[0] == '\0') {
586 				sk->sk_bound_dev_if = 0;
587 			} else {
588 				struct net_device *dev = dev_get_by_name(devname);
589 				if (!dev) {
590 					ret = -ENODEV;
591 					break;
592 				}
593 				sk->sk_bound_dev_if = dev->ifindex;
594 				dev_put(dev);
595 			}
596 		}
597 		break;
598 	}
599 #endif
600 
601 
602 	case SO_ATTACH_FILTER:
603 		ret = -EINVAL;
604 		if (optlen == sizeof(struct sock_fprog)) {
605 			struct sock_fprog fprog;
606 
607 			ret = -EFAULT;
608 			if (copy_from_user(&fprog, optval, sizeof(fprog)))
609 				break;
610 
611 			ret = sk_attach_filter(&fprog, sk);
612 		}
613 		break;
614 
615 	case SO_DETACH_FILTER:
616 		rcu_read_lock_bh();
617 		filter = rcu_dereference(sk->sk_filter);
618 		if (filter) {
619 			rcu_assign_pointer(sk->sk_filter, NULL);
620 			sk_filter_release(sk, filter);
621 			rcu_read_unlock_bh();
622 			break;
623 		}
624 		rcu_read_unlock_bh();
625 		ret = -ENONET;
626 		break;
627 
628 	case SO_PASSSEC:
629 		if (valbool)
630 			set_bit(SOCK_PASSSEC, &sock->flags);
631 		else
632 			clear_bit(SOCK_PASSSEC, &sock->flags);
633 		break;
634 
635 		/* We implement the SO_SNDLOWAT etc to
636 		   not be settable (1003.1g 5.3) */
637 	default:
638 		ret = -ENOPROTOOPT;
639 		break;
640 	}
641 	release_sock(sk);
642 	return ret;
643 }
644 
645 
646 int sock_getsockopt(struct socket *sock, int level, int optname,
647 		    char __user *optval, int __user *optlen)
648 {
649 	struct sock *sk = sock->sk;
650 
651 	union {
652 		int val;
653 		struct linger ling;
654 		struct timeval tm;
655 	} v;
656 
657 	unsigned int lv = sizeof(int);
658 	int len;
659 
660 	if (get_user(len, optlen))
661 		return -EFAULT;
662 	if (len < 0)
663 		return -EINVAL;
664 
665 	switch(optname) {
666 	case SO_DEBUG:
667 		v.val = sock_flag(sk, SOCK_DBG);
668 		break;
669 
670 	case SO_DONTROUTE:
671 		v.val = sock_flag(sk, SOCK_LOCALROUTE);
672 		break;
673 
674 	case SO_BROADCAST:
675 		v.val = !!sock_flag(sk, SOCK_BROADCAST);
676 		break;
677 
678 	case SO_SNDBUF:
679 		v.val = sk->sk_sndbuf;
680 		break;
681 
682 	case SO_RCVBUF:
683 		v.val = sk->sk_rcvbuf;
684 		break;
685 
686 	case SO_REUSEADDR:
687 		v.val = sk->sk_reuse;
688 		break;
689 
690 	case SO_KEEPALIVE:
691 		v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
692 		break;
693 
694 	case SO_TYPE:
695 		v.val = sk->sk_type;
696 		break;
697 
698 	case SO_ERROR:
699 		v.val = -sock_error(sk);
700 		if (v.val==0)
701 			v.val = xchg(&sk->sk_err_soft, 0);
702 		break;
703 
704 	case SO_OOBINLINE:
705 		v.val = !!sock_flag(sk, SOCK_URGINLINE);
706 		break;
707 
708 	case SO_NO_CHECK:
709 		v.val = sk->sk_no_check;
710 		break;
711 
712 	case SO_PRIORITY:
713 		v.val = sk->sk_priority;
714 		break;
715 
716 	case SO_LINGER:
717 		lv		= sizeof(v.ling);
718 		v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
719 		v.ling.l_linger	= sk->sk_lingertime / HZ;
720 		break;
721 
722 	case SO_BSDCOMPAT:
723 		sock_warn_obsolete_bsdism("getsockopt");
724 		break;
725 
726 	case SO_TIMESTAMP:
727 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
728 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
729 		break;
730 
731 	case SO_TIMESTAMPNS:
732 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
733 		break;
734 
735 	case SO_RCVTIMEO:
736 		lv=sizeof(struct timeval);
737 		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
738 			v.tm.tv_sec = 0;
739 			v.tm.tv_usec = 0;
740 		} else {
741 			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
742 			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
743 		}
744 		break;
745 
746 	case SO_SNDTIMEO:
747 		lv=sizeof(struct timeval);
748 		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
749 			v.tm.tv_sec = 0;
750 			v.tm.tv_usec = 0;
751 		} else {
752 			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
753 			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
754 		}
755 		break;
756 
757 	case SO_RCVLOWAT:
758 		v.val = sk->sk_rcvlowat;
759 		break;
760 
761 	case SO_SNDLOWAT:
762 		v.val=1;
763 		break;
764 
765 	case SO_PASSCRED:
766 		v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
767 		break;
768 
769 	case SO_PEERCRED:
770 		if (len > sizeof(sk->sk_peercred))
771 			len = sizeof(sk->sk_peercred);
772 		if (copy_to_user(optval, &sk->sk_peercred, len))
773 			return -EFAULT;
774 		goto lenout;
775 
776 	case SO_PEERNAME:
777 	{
778 		char address[128];
779 
780 		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
781 			return -ENOTCONN;
782 		if (lv < len)
783 			return -EINVAL;
784 		if (copy_to_user(optval, address, len))
785 			return -EFAULT;
786 		goto lenout;
787 	}
788 
789 	/* Dubious BSD thing... Probably nobody even uses it, but
790 	 * the UNIX standard wants it for whatever reason... -DaveM
791 	 */
792 	case SO_ACCEPTCONN:
793 		v.val = sk->sk_state == TCP_LISTEN;
794 		break;
795 
796 	case SO_PASSSEC:
797 		v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
798 		break;
799 
800 	case SO_PEERSEC:
801 		return security_socket_getpeersec_stream(sock, optval, optlen, len);
802 
803 	default:
804 		return -ENOPROTOOPT;
805 	}
806 
807 	if (len > lv)
808 		len = lv;
809 	if (copy_to_user(optval, &v, len))
810 		return -EFAULT;
811 lenout:
812 	if (put_user(len, optlen))
813 		return -EFAULT;
814 	return 0;
815 }
816 
817 /*
818  * Initialize an sk_lock.
819  *
820  * (We also register the sk_lock with the lock validator.)
821  */
822 static inline void sock_lock_init(struct sock *sk)
823 {
824 	sock_lock_init_class_and_name(sk,
825 			af_family_slock_key_strings[sk->sk_family],
826 			af_family_slock_keys + sk->sk_family,
827 			af_family_key_strings[sk->sk_family],
828 			af_family_keys + sk->sk_family);
829 }
830 
831 /**
832  *	sk_alloc - All socket objects are allocated here
833  *	@family: protocol family
834  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
835  *	@prot: struct proto associated with this new sock instance
836  *	@zero_it: if we should zero the newly allocated sock
837  */
838 struct sock *sk_alloc(int family, gfp_t priority,
839 		      struct proto *prot, int zero_it)
840 {
841 	struct sock *sk = NULL;
842 	struct kmem_cache *slab = prot->slab;
843 
844 	if (slab != NULL)
845 		sk = kmem_cache_alloc(slab, priority);
846 	else
847 		sk = kmalloc(prot->obj_size, priority);
848 
849 	if (sk) {
850 		if (zero_it) {
851 			memset(sk, 0, prot->obj_size);
852 			sk->sk_family = family;
853 			/*
854 			 * See comment in struct sock definition to understand
855 			 * why we need sk_prot_creator -acme
856 			 */
857 			sk->sk_prot = sk->sk_prot_creator = prot;
858 			sock_lock_init(sk);
859 		}
860 
861 		if (security_sk_alloc(sk, family, priority))
862 			goto out_free;
863 
864 		if (!try_module_get(prot->owner))
865 			goto out_free;
866 	}
867 	return sk;
868 
869 out_free:
870 	if (slab != NULL)
871 		kmem_cache_free(slab, sk);
872 	else
873 		kfree(sk);
874 	return NULL;
875 }
876 
877 void sk_free(struct sock *sk)
878 {
879 	struct sk_filter *filter;
880 	struct module *owner = sk->sk_prot_creator->owner;
881 
882 	if (sk->sk_destruct)
883 		sk->sk_destruct(sk);
884 
885 	filter = rcu_dereference(sk->sk_filter);
886 	if (filter) {
887 		sk_filter_release(sk, filter);
888 		rcu_assign_pointer(sk->sk_filter, NULL);
889 	}
890 
891 	sock_disable_timestamp(sk);
892 
893 	if (atomic_read(&sk->sk_omem_alloc))
894 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
895 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
896 
897 	security_sk_free(sk);
898 	if (sk->sk_prot_creator->slab != NULL)
899 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
900 	else
901 		kfree(sk);
902 	module_put(owner);
903 }
904 
905 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
906 {
907 	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
908 
909 	if (newsk != NULL) {
910 		struct sk_filter *filter;
911 
912 		sock_copy(newsk, sk);
913 
914 		/* SANITY */
915 		sk_node_init(&newsk->sk_node);
916 		sock_lock_init(newsk);
917 		bh_lock_sock(newsk);
918 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
919 
920 		atomic_set(&newsk->sk_rmem_alloc, 0);
921 		atomic_set(&newsk->sk_wmem_alloc, 0);
922 		atomic_set(&newsk->sk_omem_alloc, 0);
923 		skb_queue_head_init(&newsk->sk_receive_queue);
924 		skb_queue_head_init(&newsk->sk_write_queue);
925 #ifdef CONFIG_NET_DMA
926 		skb_queue_head_init(&newsk->sk_async_wait_queue);
927 #endif
928 
929 		rwlock_init(&newsk->sk_dst_lock);
930 		rwlock_init(&newsk->sk_callback_lock);
931 		lockdep_set_class(&newsk->sk_callback_lock,
932 				   af_callback_keys + newsk->sk_family);
933 
934 		newsk->sk_dst_cache	= NULL;
935 		newsk->sk_wmem_queued	= 0;
936 		newsk->sk_forward_alloc = 0;
937 		newsk->sk_send_head	= NULL;
938 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
939 
940 		sock_reset_flag(newsk, SOCK_DONE);
941 		skb_queue_head_init(&newsk->sk_error_queue);
942 
943 		filter = newsk->sk_filter;
944 		if (filter != NULL)
945 			sk_filter_charge(newsk, filter);
946 
947 		if (unlikely(xfrm_sk_clone_policy(newsk))) {
948 			/* It is still raw copy of parent, so invalidate
949 			 * destructor and make plain sk_free() */
950 			newsk->sk_destruct = NULL;
951 			sk_free(newsk);
952 			newsk = NULL;
953 			goto out;
954 		}
955 
956 		newsk->sk_err	   = 0;
957 		newsk->sk_priority = 0;
958 		atomic_set(&newsk->sk_refcnt, 2);
959 
960 		/*
961 		 * Increment the counter in the same struct proto as the master
962 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
963 		 * is the same as sk->sk_prot->socks, as this field was copied
964 		 * with memcpy).
965 		 *
966 		 * This _changes_ the previous behaviour, where
967 		 * tcp_create_openreq_child always was incrementing the
968 		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
969 		 * to be taken into account in all callers. -acme
970 		 */
971 		sk_refcnt_debug_inc(newsk);
972 		newsk->sk_socket = NULL;
973 		newsk->sk_sleep	 = NULL;
974 
975 		if (newsk->sk_prot->sockets_allocated)
976 			atomic_inc(newsk->sk_prot->sockets_allocated);
977 	}
978 out:
979 	return newsk;
980 }
981 
982 EXPORT_SYMBOL_GPL(sk_clone);
983 
984 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
985 {
986 	__sk_dst_set(sk, dst);
987 	sk->sk_route_caps = dst->dev->features;
988 	if (sk->sk_route_caps & NETIF_F_GSO)
989 		sk->sk_route_caps |= NETIF_F_GSO_MASK;
990 	if (sk_can_gso(sk)) {
991 		if (dst->header_len)
992 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
993 		else
994 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
995 	}
996 }
997 EXPORT_SYMBOL_GPL(sk_setup_caps);
998 
999 void __init sk_init(void)
1000 {
1001 	if (num_physpages <= 4096) {
1002 		sysctl_wmem_max = 32767;
1003 		sysctl_rmem_max = 32767;
1004 		sysctl_wmem_default = 32767;
1005 		sysctl_rmem_default = 32767;
1006 	} else if (num_physpages >= 131072) {
1007 		sysctl_wmem_max = 131071;
1008 		sysctl_rmem_max = 131071;
1009 	}
1010 }
1011 
1012 /*
1013  *	Simple resource managers for sockets.
1014  */
1015 
1016 
1017 /*
1018  * Write buffer destructor automatically called from kfree_skb.
1019  */
1020 void sock_wfree(struct sk_buff *skb)
1021 {
1022 	struct sock *sk = skb->sk;
1023 
1024 	/* In case it might be waiting for more memory. */
1025 	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1026 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1027 		sk->sk_write_space(sk);
1028 	sock_put(sk);
1029 }
1030 
1031 /*
1032  * Read buffer destructor automatically called from kfree_skb.
1033  */
1034 void sock_rfree(struct sk_buff *skb)
1035 {
1036 	struct sock *sk = skb->sk;
1037 
1038 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1039 }
1040 
1041 
1042 int sock_i_uid(struct sock *sk)
1043 {
1044 	int uid;
1045 
1046 	read_lock(&sk->sk_callback_lock);
1047 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1048 	read_unlock(&sk->sk_callback_lock);
1049 	return uid;
1050 }
1051 
1052 unsigned long sock_i_ino(struct sock *sk)
1053 {
1054 	unsigned long ino;
1055 
1056 	read_lock(&sk->sk_callback_lock);
1057 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1058 	read_unlock(&sk->sk_callback_lock);
1059 	return ino;
1060 }
1061 
1062 /*
1063  * Allocate a skb from the socket's send buffer.
1064  */
1065 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1066 			     gfp_t priority)
1067 {
1068 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1069 		struct sk_buff * skb = alloc_skb(size, priority);
1070 		if (skb) {
1071 			skb_set_owner_w(skb, sk);
1072 			return skb;
1073 		}
1074 	}
1075 	return NULL;
1076 }
1077 
1078 /*
1079  * Allocate a skb from the socket's receive buffer.
1080  */
1081 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1082 			     gfp_t priority)
1083 {
1084 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1085 		struct sk_buff *skb = alloc_skb(size, priority);
1086 		if (skb) {
1087 			skb_set_owner_r(skb, sk);
1088 			return skb;
1089 		}
1090 	}
1091 	return NULL;
1092 }
1093 
1094 /*
1095  * Allocate a memory block from the socket's option memory buffer.
1096  */
1097 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1098 {
1099 	if ((unsigned)size <= sysctl_optmem_max &&
1100 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1101 		void *mem;
1102 		/* First do the add, to avoid the race if kmalloc
1103 		 * might sleep.
1104 		 */
1105 		atomic_add(size, &sk->sk_omem_alloc);
1106 		mem = kmalloc(size, priority);
1107 		if (mem)
1108 			return mem;
1109 		atomic_sub(size, &sk->sk_omem_alloc);
1110 	}
1111 	return NULL;
1112 }
1113 
1114 /*
1115  * Free an option memory block.
1116  */
1117 void sock_kfree_s(struct sock *sk, void *mem, int size)
1118 {
1119 	kfree(mem);
1120 	atomic_sub(size, &sk->sk_omem_alloc);
1121 }
1122 
1123 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1124    I think, these locks should be removed for datagram sockets.
1125  */
1126 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1127 {
1128 	DEFINE_WAIT(wait);
1129 
1130 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1131 	for (;;) {
1132 		if (!timeo)
1133 			break;
1134 		if (signal_pending(current))
1135 			break;
1136 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1137 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1138 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1139 			break;
1140 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1141 			break;
1142 		if (sk->sk_err)
1143 			break;
1144 		timeo = schedule_timeout(timeo);
1145 	}
1146 	finish_wait(sk->sk_sleep, &wait);
1147 	return timeo;
1148 }
1149 
1150 
1151 /*
1152  *	Generic send/receive buffer handlers
1153  */
1154 
1155 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1156 					    unsigned long header_len,
1157 					    unsigned long data_len,
1158 					    int noblock, int *errcode)
1159 {
1160 	struct sk_buff *skb;
1161 	gfp_t gfp_mask;
1162 	long timeo;
1163 	int err;
1164 
1165 	gfp_mask = sk->sk_allocation;
1166 	if (gfp_mask & __GFP_WAIT)
1167 		gfp_mask |= __GFP_REPEAT;
1168 
1169 	timeo = sock_sndtimeo(sk, noblock);
1170 	while (1) {
1171 		err = sock_error(sk);
1172 		if (err != 0)
1173 			goto failure;
1174 
1175 		err = -EPIPE;
1176 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1177 			goto failure;
1178 
1179 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1180 			skb = alloc_skb(header_len, gfp_mask);
1181 			if (skb) {
1182 				int npages;
1183 				int i;
1184 
1185 				/* No pages, we're done... */
1186 				if (!data_len)
1187 					break;
1188 
1189 				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1190 				skb->truesize += data_len;
1191 				skb_shinfo(skb)->nr_frags = npages;
1192 				for (i = 0; i < npages; i++) {
1193 					struct page *page;
1194 					skb_frag_t *frag;
1195 
1196 					page = alloc_pages(sk->sk_allocation, 0);
1197 					if (!page) {
1198 						err = -ENOBUFS;
1199 						skb_shinfo(skb)->nr_frags = i;
1200 						kfree_skb(skb);
1201 						goto failure;
1202 					}
1203 
1204 					frag = &skb_shinfo(skb)->frags[i];
1205 					frag->page = page;
1206 					frag->page_offset = 0;
1207 					frag->size = (data_len >= PAGE_SIZE ?
1208 						      PAGE_SIZE :
1209 						      data_len);
1210 					data_len -= PAGE_SIZE;
1211 				}
1212 
1213 				/* Full success... */
1214 				break;
1215 			}
1216 			err = -ENOBUFS;
1217 			goto failure;
1218 		}
1219 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1220 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1221 		err = -EAGAIN;
1222 		if (!timeo)
1223 			goto failure;
1224 		if (signal_pending(current))
1225 			goto interrupted;
1226 		timeo = sock_wait_for_wmem(sk, timeo);
1227 	}
1228 
1229 	skb_set_owner_w(skb, sk);
1230 	return skb;
1231 
1232 interrupted:
1233 	err = sock_intr_errno(timeo);
1234 failure:
1235 	*errcode = err;
1236 	return NULL;
1237 }
1238 
1239 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1240 				    int noblock, int *errcode)
1241 {
1242 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1243 }
1244 
1245 static void __lock_sock(struct sock *sk)
1246 {
1247 	DEFINE_WAIT(wait);
1248 
1249 	for (;;) {
1250 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1251 					TASK_UNINTERRUPTIBLE);
1252 		spin_unlock_bh(&sk->sk_lock.slock);
1253 		schedule();
1254 		spin_lock_bh(&sk->sk_lock.slock);
1255 		if (!sock_owned_by_user(sk))
1256 			break;
1257 	}
1258 	finish_wait(&sk->sk_lock.wq, &wait);
1259 }
1260 
1261 static void __release_sock(struct sock *sk)
1262 {
1263 	struct sk_buff *skb = sk->sk_backlog.head;
1264 
1265 	do {
1266 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1267 		bh_unlock_sock(sk);
1268 
1269 		do {
1270 			struct sk_buff *next = skb->next;
1271 
1272 			skb->next = NULL;
1273 			sk->sk_backlog_rcv(sk, skb);
1274 
1275 			/*
1276 			 * We are in process context here with softirqs
1277 			 * disabled, use cond_resched_softirq() to preempt.
1278 			 * This is safe to do because we've taken the backlog
1279 			 * queue private:
1280 			 */
1281 			cond_resched_softirq();
1282 
1283 			skb = next;
1284 		} while (skb != NULL);
1285 
1286 		bh_lock_sock(sk);
1287 	} while ((skb = sk->sk_backlog.head) != NULL);
1288 }
1289 
1290 /**
1291  * sk_wait_data - wait for data to arrive at sk_receive_queue
1292  * @sk:    sock to wait on
1293  * @timeo: for how long
1294  *
1295  * Now socket state including sk->sk_err is changed only under lock,
1296  * hence we may omit checks after joining wait queue.
1297  * We check receive queue before schedule() only as optimization;
1298  * it is very likely that release_sock() added new data.
1299  */
1300 int sk_wait_data(struct sock *sk, long *timeo)
1301 {
1302 	int rc;
1303 	DEFINE_WAIT(wait);
1304 
1305 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1306 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1307 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1308 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1309 	finish_wait(sk->sk_sleep, &wait);
1310 	return rc;
1311 }
1312 
1313 EXPORT_SYMBOL(sk_wait_data);
1314 
1315 /*
1316  * Set of default routines for initialising struct proto_ops when
1317  * the protocol does not support a particular function. In certain
1318  * cases where it makes no sense for a protocol to have a "do nothing"
1319  * function, some default processing is provided.
1320  */
1321 
1322 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1323 {
1324 	return -EOPNOTSUPP;
1325 }
1326 
1327 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1328 		    int len, int flags)
1329 {
1330 	return -EOPNOTSUPP;
1331 }
1332 
1333 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1334 {
1335 	return -EOPNOTSUPP;
1336 }
1337 
1338 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1339 {
1340 	return -EOPNOTSUPP;
1341 }
1342 
1343 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1344 		    int *len, int peer)
1345 {
1346 	return -EOPNOTSUPP;
1347 }
1348 
1349 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1350 {
1351 	return 0;
1352 }
1353 
1354 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1355 {
1356 	return -EOPNOTSUPP;
1357 }
1358 
1359 int sock_no_listen(struct socket *sock, int backlog)
1360 {
1361 	return -EOPNOTSUPP;
1362 }
1363 
1364 int sock_no_shutdown(struct socket *sock, int how)
1365 {
1366 	return -EOPNOTSUPP;
1367 }
1368 
1369 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1370 		    char __user *optval, int optlen)
1371 {
1372 	return -EOPNOTSUPP;
1373 }
1374 
1375 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1376 		    char __user *optval, int __user *optlen)
1377 {
1378 	return -EOPNOTSUPP;
1379 }
1380 
1381 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1382 		    size_t len)
1383 {
1384 	return -EOPNOTSUPP;
1385 }
1386 
1387 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1388 		    size_t len, int flags)
1389 {
1390 	return -EOPNOTSUPP;
1391 }
1392 
1393 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1394 {
1395 	/* Mirror missing mmap method error code */
1396 	return -ENODEV;
1397 }
1398 
1399 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1400 {
1401 	ssize_t res;
1402 	struct msghdr msg = {.msg_flags = flags};
1403 	struct kvec iov;
1404 	char *kaddr = kmap(page);
1405 	iov.iov_base = kaddr + offset;
1406 	iov.iov_len = size;
1407 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1408 	kunmap(page);
1409 	return res;
1410 }
1411 
1412 /*
1413  *	Default Socket Callbacks
1414  */
1415 
1416 static void sock_def_wakeup(struct sock *sk)
1417 {
1418 	read_lock(&sk->sk_callback_lock);
1419 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1420 		wake_up_interruptible_all(sk->sk_sleep);
1421 	read_unlock(&sk->sk_callback_lock);
1422 }
1423 
1424 static void sock_def_error_report(struct sock *sk)
1425 {
1426 	read_lock(&sk->sk_callback_lock);
1427 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1428 		wake_up_interruptible(sk->sk_sleep);
1429 	sk_wake_async(sk,0,POLL_ERR);
1430 	read_unlock(&sk->sk_callback_lock);
1431 }
1432 
1433 static void sock_def_readable(struct sock *sk, int len)
1434 {
1435 	read_lock(&sk->sk_callback_lock);
1436 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1437 		wake_up_interruptible(sk->sk_sleep);
1438 	sk_wake_async(sk,1,POLL_IN);
1439 	read_unlock(&sk->sk_callback_lock);
1440 }
1441 
1442 static void sock_def_write_space(struct sock *sk)
1443 {
1444 	read_lock(&sk->sk_callback_lock);
1445 
1446 	/* Do not wake up a writer until he can make "significant"
1447 	 * progress.  --DaveM
1448 	 */
1449 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1450 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1451 			wake_up_interruptible(sk->sk_sleep);
1452 
1453 		/* Should agree with poll, otherwise some programs break */
1454 		if (sock_writeable(sk))
1455 			sk_wake_async(sk, 2, POLL_OUT);
1456 	}
1457 
1458 	read_unlock(&sk->sk_callback_lock);
1459 }
1460 
1461 static void sock_def_destruct(struct sock *sk)
1462 {
1463 	kfree(sk->sk_protinfo);
1464 }
1465 
1466 void sk_send_sigurg(struct sock *sk)
1467 {
1468 	if (sk->sk_socket && sk->sk_socket->file)
1469 		if (send_sigurg(&sk->sk_socket->file->f_owner))
1470 			sk_wake_async(sk, 3, POLL_PRI);
1471 }
1472 
1473 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1474 		    unsigned long expires)
1475 {
1476 	if (!mod_timer(timer, expires))
1477 		sock_hold(sk);
1478 }
1479 
1480 EXPORT_SYMBOL(sk_reset_timer);
1481 
1482 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1483 {
1484 	if (timer_pending(timer) && del_timer(timer))
1485 		__sock_put(sk);
1486 }
1487 
1488 EXPORT_SYMBOL(sk_stop_timer);
1489 
1490 void sock_init_data(struct socket *sock, struct sock *sk)
1491 {
1492 	skb_queue_head_init(&sk->sk_receive_queue);
1493 	skb_queue_head_init(&sk->sk_write_queue);
1494 	skb_queue_head_init(&sk->sk_error_queue);
1495 #ifdef CONFIG_NET_DMA
1496 	skb_queue_head_init(&sk->sk_async_wait_queue);
1497 #endif
1498 
1499 	sk->sk_send_head	=	NULL;
1500 
1501 	init_timer(&sk->sk_timer);
1502 
1503 	sk->sk_allocation	=	GFP_KERNEL;
1504 	sk->sk_rcvbuf		=	sysctl_rmem_default;
1505 	sk->sk_sndbuf		=	sysctl_wmem_default;
1506 	sk->sk_state		=	TCP_CLOSE;
1507 	sk->sk_socket		=	sock;
1508 
1509 	sock_set_flag(sk, SOCK_ZAPPED);
1510 
1511 	if (sock) {
1512 		sk->sk_type	=	sock->type;
1513 		sk->sk_sleep	=	&sock->wait;
1514 		sock->sk	=	sk;
1515 	} else
1516 		sk->sk_sleep	=	NULL;
1517 
1518 	rwlock_init(&sk->sk_dst_lock);
1519 	rwlock_init(&sk->sk_callback_lock);
1520 	lockdep_set_class(&sk->sk_callback_lock,
1521 			   af_callback_keys + sk->sk_family);
1522 
1523 	sk->sk_state_change	=	sock_def_wakeup;
1524 	sk->sk_data_ready	=	sock_def_readable;
1525 	sk->sk_write_space	=	sock_def_write_space;
1526 	sk->sk_error_report	=	sock_def_error_report;
1527 	sk->sk_destruct		=	sock_def_destruct;
1528 
1529 	sk->sk_sndmsg_page	=	NULL;
1530 	sk->sk_sndmsg_off	=	0;
1531 
1532 	sk->sk_peercred.pid 	=	0;
1533 	sk->sk_peercred.uid	=	-1;
1534 	sk->sk_peercred.gid	=	-1;
1535 	sk->sk_write_pending	=	0;
1536 	sk->sk_rcvlowat		=	1;
1537 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1538 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1539 
1540 	sk->sk_stamp = ktime_set(-1L, -1L);
1541 
1542 	atomic_set(&sk->sk_refcnt, 1);
1543 }
1544 
1545 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1546 {
1547 	might_sleep();
1548 	spin_lock_bh(&sk->sk_lock.slock);
1549 	if (sk->sk_lock.owner)
1550 		__lock_sock(sk);
1551 	sk->sk_lock.owner = (void *)1;
1552 	spin_unlock(&sk->sk_lock.slock);
1553 	/*
1554 	 * The sk_lock has mutex_lock() semantics here:
1555 	 */
1556 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1557 	local_bh_enable();
1558 }
1559 
1560 EXPORT_SYMBOL(lock_sock_nested);
1561 
1562 void fastcall release_sock(struct sock *sk)
1563 {
1564 	/*
1565 	 * The sk_lock has mutex_unlock() semantics:
1566 	 */
1567 	mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1568 
1569 	spin_lock_bh(&sk->sk_lock.slock);
1570 	if (sk->sk_backlog.tail)
1571 		__release_sock(sk);
1572 	sk->sk_lock.owner = NULL;
1573 	if (waitqueue_active(&sk->sk_lock.wq))
1574 		wake_up(&sk->sk_lock.wq);
1575 	spin_unlock_bh(&sk->sk_lock.slock);
1576 }
1577 EXPORT_SYMBOL(release_sock);
1578 
1579 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1580 {
1581 	struct timeval tv;
1582 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1583 		sock_enable_timestamp(sk);
1584 	tv = ktime_to_timeval(sk->sk_stamp);
1585 	if (tv.tv_sec == -1)
1586 		return -ENOENT;
1587 	if (tv.tv_sec == 0) {
1588 		sk->sk_stamp = ktime_get_real();
1589 		tv = ktime_to_timeval(sk->sk_stamp);
1590 	}
1591 	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1592 }
1593 EXPORT_SYMBOL(sock_get_timestamp);
1594 
1595 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1596 {
1597 	struct timespec ts;
1598 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1599 		sock_enable_timestamp(sk);
1600 	ts = ktime_to_timespec(sk->sk_stamp);
1601 	if (ts.tv_sec == -1)
1602 		return -ENOENT;
1603 	if (ts.tv_sec == 0) {
1604 		sk->sk_stamp = ktime_get_real();
1605 		ts = ktime_to_timespec(sk->sk_stamp);
1606 	}
1607 	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1608 }
1609 EXPORT_SYMBOL(sock_get_timestampns);
1610 
1611 void sock_enable_timestamp(struct sock *sk)
1612 {
1613 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1614 		sock_set_flag(sk, SOCK_TIMESTAMP);
1615 		net_enable_timestamp();
1616 	}
1617 }
1618 EXPORT_SYMBOL(sock_enable_timestamp);
1619 
1620 /*
1621  *	Get a socket option on an socket.
1622  *
1623  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1624  *	asynchronous errors should be reported by getsockopt. We assume
1625  *	this means if you specify SO_ERROR (otherwise whats the point of it).
1626  */
1627 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1628 			   char __user *optval, int __user *optlen)
1629 {
1630 	struct sock *sk = sock->sk;
1631 
1632 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1633 }
1634 
1635 EXPORT_SYMBOL(sock_common_getsockopt);
1636 
1637 #ifdef CONFIG_COMPAT
1638 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1639 				  char __user *optval, int __user *optlen)
1640 {
1641 	struct sock *sk = sock->sk;
1642 
1643 	if (sk->sk_prot->compat_getsockopt != NULL)
1644 		return sk->sk_prot->compat_getsockopt(sk, level, optname,
1645 						      optval, optlen);
1646 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1647 }
1648 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1649 #endif
1650 
1651 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1652 			struct msghdr *msg, size_t size, int flags)
1653 {
1654 	struct sock *sk = sock->sk;
1655 	int addr_len = 0;
1656 	int err;
1657 
1658 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1659 				   flags & ~MSG_DONTWAIT, &addr_len);
1660 	if (err >= 0)
1661 		msg->msg_namelen = addr_len;
1662 	return err;
1663 }
1664 
1665 EXPORT_SYMBOL(sock_common_recvmsg);
1666 
1667 /*
1668  *	Set socket options on an inet socket.
1669  */
1670 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1671 			   char __user *optval, int optlen)
1672 {
1673 	struct sock *sk = sock->sk;
1674 
1675 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1676 }
1677 
1678 EXPORT_SYMBOL(sock_common_setsockopt);
1679 
1680 #ifdef CONFIG_COMPAT
1681 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1682 				  char __user *optval, int optlen)
1683 {
1684 	struct sock *sk = sock->sk;
1685 
1686 	if (sk->sk_prot->compat_setsockopt != NULL)
1687 		return sk->sk_prot->compat_setsockopt(sk, level, optname,
1688 						      optval, optlen);
1689 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1690 }
1691 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1692 #endif
1693 
1694 void sk_common_release(struct sock *sk)
1695 {
1696 	if (sk->sk_prot->destroy)
1697 		sk->sk_prot->destroy(sk);
1698 
1699 	/*
1700 	 * Observation: when sock_common_release is called, processes have
1701 	 * no access to socket. But net still has.
1702 	 * Step one, detach it from networking:
1703 	 *
1704 	 * A. Remove from hash tables.
1705 	 */
1706 
1707 	sk->sk_prot->unhash(sk);
1708 
1709 	/*
1710 	 * In this point socket cannot receive new packets, but it is possible
1711 	 * that some packets are in flight because some CPU runs receiver and
1712 	 * did hash table lookup before we unhashed socket. They will achieve
1713 	 * receive queue and will be purged by socket destructor.
1714 	 *
1715 	 * Also we still have packets pending on receive queue and probably,
1716 	 * our own packets waiting in device queues. sock_destroy will drain
1717 	 * receive queue, but transmitted packets will delay socket destruction
1718 	 * until the last reference will be released.
1719 	 */
1720 
1721 	sock_orphan(sk);
1722 
1723 	xfrm_sk_free_policy(sk);
1724 
1725 	sk_refcnt_debug_release(sk);
1726 	sock_put(sk);
1727 }
1728 
1729 EXPORT_SYMBOL(sk_common_release);
1730 
1731 static DEFINE_RWLOCK(proto_list_lock);
1732 static LIST_HEAD(proto_list);
1733 
1734 int proto_register(struct proto *prot, int alloc_slab)
1735 {
1736 	char *request_sock_slab_name = NULL;
1737 	char *timewait_sock_slab_name;
1738 	int rc = -ENOBUFS;
1739 
1740 	if (alloc_slab) {
1741 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1742 					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1743 
1744 		if (prot->slab == NULL) {
1745 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1746 			       prot->name);
1747 			goto out;
1748 		}
1749 
1750 		if (prot->rsk_prot != NULL) {
1751 			static const char mask[] = "request_sock_%s";
1752 
1753 			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1754 			if (request_sock_slab_name == NULL)
1755 				goto out_free_sock_slab;
1756 
1757 			sprintf(request_sock_slab_name, mask, prot->name);
1758 			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1759 								 prot->rsk_prot->obj_size, 0,
1760 								 SLAB_HWCACHE_ALIGN, NULL, NULL);
1761 
1762 			if (prot->rsk_prot->slab == NULL) {
1763 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1764 				       prot->name);
1765 				goto out_free_request_sock_slab_name;
1766 			}
1767 		}
1768 
1769 		if (prot->twsk_prot != NULL) {
1770 			static const char mask[] = "tw_sock_%s";
1771 
1772 			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1773 
1774 			if (timewait_sock_slab_name == NULL)
1775 				goto out_free_request_sock_slab;
1776 
1777 			sprintf(timewait_sock_slab_name, mask, prot->name);
1778 			prot->twsk_prot->twsk_slab =
1779 				kmem_cache_create(timewait_sock_slab_name,
1780 						  prot->twsk_prot->twsk_obj_size,
1781 						  0, SLAB_HWCACHE_ALIGN,
1782 						  NULL, NULL);
1783 			if (prot->twsk_prot->twsk_slab == NULL)
1784 				goto out_free_timewait_sock_slab_name;
1785 		}
1786 	}
1787 
1788 	write_lock(&proto_list_lock);
1789 	list_add(&prot->node, &proto_list);
1790 	write_unlock(&proto_list_lock);
1791 	rc = 0;
1792 out:
1793 	return rc;
1794 out_free_timewait_sock_slab_name:
1795 	kfree(timewait_sock_slab_name);
1796 out_free_request_sock_slab:
1797 	if (prot->rsk_prot && prot->rsk_prot->slab) {
1798 		kmem_cache_destroy(prot->rsk_prot->slab);
1799 		prot->rsk_prot->slab = NULL;
1800 	}
1801 out_free_request_sock_slab_name:
1802 	kfree(request_sock_slab_name);
1803 out_free_sock_slab:
1804 	kmem_cache_destroy(prot->slab);
1805 	prot->slab = NULL;
1806 	goto out;
1807 }
1808 
1809 EXPORT_SYMBOL(proto_register);
1810 
1811 void proto_unregister(struct proto *prot)
1812 {
1813 	write_lock(&proto_list_lock);
1814 	list_del(&prot->node);
1815 	write_unlock(&proto_list_lock);
1816 
1817 	if (prot->slab != NULL) {
1818 		kmem_cache_destroy(prot->slab);
1819 		prot->slab = NULL;
1820 	}
1821 
1822 	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1823 		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1824 
1825 		kmem_cache_destroy(prot->rsk_prot->slab);
1826 		kfree(name);
1827 		prot->rsk_prot->slab = NULL;
1828 	}
1829 
1830 	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1831 		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1832 
1833 		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1834 		kfree(name);
1835 		prot->twsk_prot->twsk_slab = NULL;
1836 	}
1837 }
1838 
1839 EXPORT_SYMBOL(proto_unregister);
1840 
1841 #ifdef CONFIG_PROC_FS
1842 static inline struct proto *__proto_head(void)
1843 {
1844 	return list_entry(proto_list.next, struct proto, node);
1845 }
1846 
1847 static inline struct proto *proto_head(void)
1848 {
1849 	return list_empty(&proto_list) ? NULL : __proto_head();
1850 }
1851 
1852 static inline struct proto *proto_next(struct proto *proto)
1853 {
1854 	return proto->node.next == &proto_list ? NULL :
1855 		list_entry(proto->node.next, struct proto, node);
1856 }
1857 
1858 static inline struct proto *proto_get_idx(loff_t pos)
1859 {
1860 	struct proto *proto;
1861 	loff_t i = 0;
1862 
1863 	list_for_each_entry(proto, &proto_list, node)
1864 		if (i++ == pos)
1865 			goto out;
1866 
1867 	proto = NULL;
1868 out:
1869 	return proto;
1870 }
1871 
1872 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1873 {
1874 	read_lock(&proto_list_lock);
1875 	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1876 }
1877 
1878 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1879 {
1880 	++*pos;
1881 	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1882 }
1883 
1884 static void proto_seq_stop(struct seq_file *seq, void *v)
1885 {
1886 	read_unlock(&proto_list_lock);
1887 }
1888 
1889 static char proto_method_implemented(const void *method)
1890 {
1891 	return method == NULL ? 'n' : 'y';
1892 }
1893 
1894 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1895 {
1896 	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1897 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1898 		   proto->name,
1899 		   proto->obj_size,
1900 		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1901 		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1902 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1903 		   proto->max_header,
1904 		   proto->slab == NULL ? "no" : "yes",
1905 		   module_name(proto->owner),
1906 		   proto_method_implemented(proto->close),
1907 		   proto_method_implemented(proto->connect),
1908 		   proto_method_implemented(proto->disconnect),
1909 		   proto_method_implemented(proto->accept),
1910 		   proto_method_implemented(proto->ioctl),
1911 		   proto_method_implemented(proto->init),
1912 		   proto_method_implemented(proto->destroy),
1913 		   proto_method_implemented(proto->shutdown),
1914 		   proto_method_implemented(proto->setsockopt),
1915 		   proto_method_implemented(proto->getsockopt),
1916 		   proto_method_implemented(proto->sendmsg),
1917 		   proto_method_implemented(proto->recvmsg),
1918 		   proto_method_implemented(proto->sendpage),
1919 		   proto_method_implemented(proto->bind),
1920 		   proto_method_implemented(proto->backlog_rcv),
1921 		   proto_method_implemented(proto->hash),
1922 		   proto_method_implemented(proto->unhash),
1923 		   proto_method_implemented(proto->get_port),
1924 		   proto_method_implemented(proto->enter_memory_pressure));
1925 }
1926 
1927 static int proto_seq_show(struct seq_file *seq, void *v)
1928 {
1929 	if (v == SEQ_START_TOKEN)
1930 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1931 			   "protocol",
1932 			   "size",
1933 			   "sockets",
1934 			   "memory",
1935 			   "press",
1936 			   "maxhdr",
1937 			   "slab",
1938 			   "module",
1939 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1940 	else
1941 		proto_seq_printf(seq, v);
1942 	return 0;
1943 }
1944 
1945 static const struct seq_operations proto_seq_ops = {
1946 	.start  = proto_seq_start,
1947 	.next   = proto_seq_next,
1948 	.stop   = proto_seq_stop,
1949 	.show   = proto_seq_show,
1950 };
1951 
1952 static int proto_seq_open(struct inode *inode, struct file *file)
1953 {
1954 	return seq_open(file, &proto_seq_ops);
1955 }
1956 
1957 static const struct file_operations proto_seq_fops = {
1958 	.owner		= THIS_MODULE,
1959 	.open		= proto_seq_open,
1960 	.read		= seq_read,
1961 	.llseek		= seq_lseek,
1962 	.release	= seq_release,
1963 };
1964 
1965 static int __init proto_init(void)
1966 {
1967 	/* register /proc/net/protocols */
1968 	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1969 }
1970 
1971 subsys_initcall(proto_init);
1972 
1973 #endif /* PROC_FS */
1974 
1975 EXPORT_SYMBOL(sk_alloc);
1976 EXPORT_SYMBOL(sk_free);
1977 EXPORT_SYMBOL(sk_send_sigurg);
1978 EXPORT_SYMBOL(sock_alloc_send_skb);
1979 EXPORT_SYMBOL(sock_init_data);
1980 EXPORT_SYMBOL(sock_kfree_s);
1981 EXPORT_SYMBOL(sock_kmalloc);
1982 EXPORT_SYMBOL(sock_no_accept);
1983 EXPORT_SYMBOL(sock_no_bind);
1984 EXPORT_SYMBOL(sock_no_connect);
1985 EXPORT_SYMBOL(sock_no_getname);
1986 EXPORT_SYMBOL(sock_no_getsockopt);
1987 EXPORT_SYMBOL(sock_no_ioctl);
1988 EXPORT_SYMBOL(sock_no_listen);
1989 EXPORT_SYMBOL(sock_no_mmap);
1990 EXPORT_SYMBOL(sock_no_poll);
1991 EXPORT_SYMBOL(sock_no_recvmsg);
1992 EXPORT_SYMBOL(sock_no_sendmsg);
1993 EXPORT_SYMBOL(sock_no_sendpage);
1994 EXPORT_SYMBOL(sock_no_setsockopt);
1995 EXPORT_SYMBOL(sock_no_shutdown);
1996 EXPORT_SYMBOL(sock_no_socketpair);
1997 EXPORT_SYMBOL(sock_rfree);
1998 EXPORT_SYMBOL(sock_setsockopt);
1999 EXPORT_SYMBOL(sock_wfree);
2000 EXPORT_SYMBOL(sock_wmalloc);
2001 EXPORT_SYMBOL(sock_i_uid);
2002 EXPORT_SYMBOL(sock_i_ino);
2003 EXPORT_SYMBOL(sysctl_optmem_max);
2004 #ifdef CONFIG_SYSCTL
2005 EXPORT_SYMBOL(sysctl_rmem_max);
2006 EXPORT_SYMBOL(sysctl_wmem_max);
2007 #endif
2008