xref: /openbmc/linux/net/core/sock.c (revision d5cb9783536a41df9f9cba5b0a1d78047ed787f7)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:	Ross Biro
13  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *		Alan Cox	: 	Numerous verify_area() problems
19  *		Alan Cox	:	Connecting on a connecting socket
20  *					now returns an error for tcp.
21  *		Alan Cox	:	sock->protocol is set correctly.
22  *					and is not sometimes left as 0.
23  *		Alan Cox	:	connect handles icmp errors on a
24  *					connect properly. Unfortunately there
25  *					is a restart syscall nasty there. I
26  *					can't match BSD without hacking the C
27  *					library. Ideas urgently sought!
28  *		Alan Cox	:	Disallow bind() to addresses that are
29  *					not ours - especially broadcast ones!!
30  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32  *					instead they leave that for the DESTROY timer.
33  *		Alan Cox	:	Clean up error flag in accept
34  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35  *					was buggy. Put a remove_sock() in the handler
36  *					for memory when we hit 0. Also altered the timer
37  *					code. The ACK stuff can wait and needs major
38  *					TCP layer surgery.
39  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40  *					and fixed timer/inet_bh race.
41  *		Alan Cox	:	Added zapped flag for TCP
42  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49  *	Pauline Middelink	:	identd support
50  *		Alan Cox	:	Fixed connect() taking signals I think.
51  *		Alan Cox	:	SO_LINGER supported
52  *		Alan Cox	:	Error reporting fixes
53  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54  *		Alan Cox	:	inet sockets don't set sk->type!
55  *		Alan Cox	:	Split socket option code
56  *		Alan Cox	:	Callbacks
57  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58  *		Alex		:	Removed restriction on inet fioctl
59  *		Alan Cox	:	Splitting INET from NET core
60  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62  *		Alan Cox	:	Split IP from generic code
63  *		Alan Cox	:	New kfree_skbmem()
64  *		Alan Cox	:	Make SO_DEBUG superuser only.
65  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66  *					(compatibility fix)
67  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68  *		Alan Cox	:	Allocator for a socket is settable.
69  *		Alan Cox	:	SO_ERROR includes soft errors.
70  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71  *		Alan Cox	: 	Generic socket allocation to make hooks
72  *					easier (suggested by Craig Metz).
73  *		Michael Pall	:	SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81  *		Andi Kleen	:	Fix write_space callback
82  *		Chris Evans	:	Security fixes - signedness again
83  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *		This program is free software; you can redistribute it and/or
89  *		modify it under the terms of the GNU General Public License
90  *		as published by the Free Software Foundation; either version
91  *		2 of the License, or (at your option) any later version.
92  */
93 
94 #include <linux/config.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 
115 #include <asm/uaccess.h>
116 #include <asm/system.h>
117 
118 #include <linux/netdevice.h>
119 #include <net/protocol.h>
120 #include <linux/skbuff.h>
121 #include <net/request_sock.h>
122 #include <net/sock.h>
123 #include <net/xfrm.h>
124 #include <linux/ipsec.h>
125 
126 #include <linux/filter.h>
127 
128 #ifdef CONFIG_INET
129 #include <net/tcp.h>
130 #endif
131 
132 /* Take into consideration the size of the struct sk_buff overhead in the
133  * determination of these values, since that is non-constant across
134  * platforms.  This makes socket queueing behavior and performance
135  * not depend upon such differences.
136  */
137 #define _SK_MEM_PACKETS		256
138 #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
139 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
140 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
141 
142 /* Run time adjustable parameters. */
143 __u32 sysctl_wmem_max = SK_WMEM_MAX;
144 __u32 sysctl_rmem_max = SK_RMEM_MAX;
145 __u32 sysctl_wmem_default = SK_WMEM_MAX;
146 __u32 sysctl_rmem_default = SK_RMEM_MAX;
147 
148 /* Maximal space eaten by iovec or ancilliary data plus some space */
149 int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
150 
151 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
152 {
153 	struct timeval tv;
154 
155 	if (optlen < sizeof(tv))
156 		return -EINVAL;
157 	if (copy_from_user(&tv, optval, sizeof(tv)))
158 		return -EFAULT;
159 
160 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
161 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
162 		return 0;
163 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
164 		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
165 	return 0;
166 }
167 
168 static void sock_warn_obsolete_bsdism(const char *name)
169 {
170 	static int warned;
171 	static char warncomm[TASK_COMM_LEN];
172 	if (strcmp(warncomm, current->comm) && warned < 5) {
173 		strcpy(warncomm,  current->comm);
174 		printk(KERN_WARNING "process `%s' is using obsolete "
175 		       "%s SO_BSDCOMPAT\n", warncomm, name);
176 		warned++;
177 	}
178 }
179 
180 static void sock_disable_timestamp(struct sock *sk)
181 {
182 	if (sock_flag(sk, SOCK_TIMESTAMP)) {
183 		sock_reset_flag(sk, SOCK_TIMESTAMP);
184 		net_disable_timestamp();
185 	}
186 }
187 
188 
189 /*
190  *	This is meant for all protocols to use and covers goings on
191  *	at the socket level. Everything here is generic.
192  */
193 
194 int sock_setsockopt(struct socket *sock, int level, int optname,
195 		    char __user *optval, int optlen)
196 {
197 	struct sock *sk=sock->sk;
198 	struct sk_filter *filter;
199 	int val;
200 	int valbool;
201 	struct linger ling;
202 	int ret = 0;
203 
204 	/*
205 	 *	Options without arguments
206 	 */
207 
208 #ifdef SO_DONTLINGER		/* Compatibility item... */
209 	if (optname == SO_DONTLINGER) {
210 		lock_sock(sk);
211 		sock_reset_flag(sk, SOCK_LINGER);
212 		release_sock(sk);
213 		return 0;
214 	}
215 #endif
216 
217   	if(optlen<sizeof(int))
218   		return(-EINVAL);
219 
220 	if (get_user(val, (int __user *)optval))
221 		return -EFAULT;
222 
223   	valbool = val?1:0;
224 
225 	lock_sock(sk);
226 
227   	switch(optname)
228   	{
229 		case SO_DEBUG:
230 			if(val && !capable(CAP_NET_ADMIN))
231 			{
232 				ret = -EACCES;
233 			}
234 			else if (valbool)
235 				sock_set_flag(sk, SOCK_DBG);
236 			else
237 				sock_reset_flag(sk, SOCK_DBG);
238 			break;
239 		case SO_REUSEADDR:
240 			sk->sk_reuse = valbool;
241 			break;
242 		case SO_TYPE:
243 		case SO_ERROR:
244 			ret = -ENOPROTOOPT;
245 		  	break;
246 		case SO_DONTROUTE:
247 			if (valbool)
248 				sock_set_flag(sk, SOCK_LOCALROUTE);
249 			else
250 				sock_reset_flag(sk, SOCK_LOCALROUTE);
251 			break;
252 		case SO_BROADCAST:
253 			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
254 			break;
255 		case SO_SNDBUF:
256 			/* Don't error on this BSD doesn't and if you think
257 			   about it this is right. Otherwise apps have to
258 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
259 			   are treated in BSD as hints */
260 
261 			if (val > sysctl_wmem_max)
262 				val = sysctl_wmem_max;
263 set_sndbuf:
264 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
265 			if ((val * 2) < SOCK_MIN_SNDBUF)
266 				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
267 			else
268 				sk->sk_sndbuf = val * 2;
269 
270 			/*
271 			 *	Wake up sending tasks if we
272 			 *	upped the value.
273 			 */
274 			sk->sk_write_space(sk);
275 			break;
276 
277 		case SO_SNDBUFFORCE:
278 			if (!capable(CAP_NET_ADMIN)) {
279 				ret = -EPERM;
280 				break;
281 			}
282 			goto set_sndbuf;
283 
284 		case SO_RCVBUF:
285 			/* Don't error on this BSD doesn't and if you think
286 			   about it this is right. Otherwise apps have to
287 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
288 			   are treated in BSD as hints */
289 
290 			if (val > sysctl_rmem_max)
291 				val = sysctl_rmem_max;
292 set_rcvbuf:
293 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
294 			/* FIXME: is this lower bound the right one? */
295 			if ((val * 2) < SOCK_MIN_RCVBUF)
296 				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
297 			else
298 				sk->sk_rcvbuf = val * 2;
299 			break;
300 
301 		case SO_RCVBUFFORCE:
302 			if (!capable(CAP_NET_ADMIN)) {
303 				ret = -EPERM;
304 				break;
305 			}
306 			goto set_rcvbuf;
307 
308 		case SO_KEEPALIVE:
309 #ifdef CONFIG_INET
310 			if (sk->sk_protocol == IPPROTO_TCP)
311 				tcp_set_keepalive(sk, valbool);
312 #endif
313 			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
314 			break;
315 
316 	 	case SO_OOBINLINE:
317 			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
318 			break;
319 
320 	 	case SO_NO_CHECK:
321 			sk->sk_no_check = valbool;
322 			break;
323 
324 		case SO_PRIORITY:
325 			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
326 				sk->sk_priority = val;
327 			else
328 				ret = -EPERM;
329 			break;
330 
331 		case SO_LINGER:
332 			if(optlen<sizeof(ling)) {
333 				ret = -EINVAL;	/* 1003.1g */
334 				break;
335 			}
336 			if (copy_from_user(&ling,optval,sizeof(ling))) {
337 				ret = -EFAULT;
338 				break;
339 			}
340 			if (!ling.l_onoff)
341 				sock_reset_flag(sk, SOCK_LINGER);
342 			else {
343 #if (BITS_PER_LONG == 32)
344 				if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
345 					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
346 				else
347 #endif
348 					sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
349 				sock_set_flag(sk, SOCK_LINGER);
350 			}
351 			break;
352 
353 		case SO_BSDCOMPAT:
354 			sock_warn_obsolete_bsdism("setsockopt");
355 			break;
356 
357 		case SO_PASSCRED:
358 			if (valbool)
359 				set_bit(SOCK_PASSCRED, &sock->flags);
360 			else
361 				clear_bit(SOCK_PASSCRED, &sock->flags);
362 			break;
363 
364 		case SO_TIMESTAMP:
365 			if (valbool)  {
366 				sock_set_flag(sk, SOCK_RCVTSTAMP);
367 				sock_enable_timestamp(sk);
368 			} else
369 				sock_reset_flag(sk, SOCK_RCVTSTAMP);
370 			break;
371 
372 		case SO_RCVLOWAT:
373 			if (val < 0)
374 				val = INT_MAX;
375 			sk->sk_rcvlowat = val ? : 1;
376 			break;
377 
378 		case SO_RCVTIMEO:
379 			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
380 			break;
381 
382 		case SO_SNDTIMEO:
383 			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
384 			break;
385 
386 #ifdef CONFIG_NETDEVICES
387 		case SO_BINDTODEVICE:
388 		{
389 			char devname[IFNAMSIZ];
390 
391 			/* Sorry... */
392 			if (!capable(CAP_NET_RAW)) {
393 				ret = -EPERM;
394 				break;
395 			}
396 
397 			/* Bind this socket to a particular device like "eth0",
398 			 * as specified in the passed interface name. If the
399 			 * name is "" or the option length is zero the socket
400 			 * is not bound.
401 			 */
402 
403 			if (!valbool) {
404 				sk->sk_bound_dev_if = 0;
405 			} else {
406 				if (optlen > IFNAMSIZ)
407 					optlen = IFNAMSIZ;
408 				if (copy_from_user(devname, optval, optlen)) {
409 					ret = -EFAULT;
410 					break;
411 				}
412 
413 				/* Remove any cached route for this socket. */
414 				sk_dst_reset(sk);
415 
416 				if (devname[0] == '\0') {
417 					sk->sk_bound_dev_if = 0;
418 				} else {
419 					struct net_device *dev = dev_get_by_name(devname);
420 					if (!dev) {
421 						ret = -ENODEV;
422 						break;
423 					}
424 					sk->sk_bound_dev_if = dev->ifindex;
425 					dev_put(dev);
426 				}
427 			}
428 			break;
429 		}
430 #endif
431 
432 
433 		case SO_ATTACH_FILTER:
434 			ret = -EINVAL;
435 			if (optlen == sizeof(struct sock_fprog)) {
436 				struct sock_fprog fprog;
437 
438 				ret = -EFAULT;
439 				if (copy_from_user(&fprog, optval, sizeof(fprog)))
440 					break;
441 
442 				ret = sk_attach_filter(&fprog, sk);
443 			}
444 			break;
445 
446 		case SO_DETACH_FILTER:
447 			spin_lock_bh(&sk->sk_lock.slock);
448 			filter = sk->sk_filter;
449                         if (filter) {
450 				sk->sk_filter = NULL;
451 				spin_unlock_bh(&sk->sk_lock.slock);
452 				sk_filter_release(sk, filter);
453 				break;
454 			}
455 			spin_unlock_bh(&sk->sk_lock.slock);
456 			ret = -ENONET;
457 			break;
458 
459 		/* We implement the SO_SNDLOWAT etc to
460 		   not be settable (1003.1g 5.3) */
461 		default:
462 		  	ret = -ENOPROTOOPT;
463 			break;
464   	}
465 	release_sock(sk);
466 	return ret;
467 }
468 
469 
470 int sock_getsockopt(struct socket *sock, int level, int optname,
471 		    char __user *optval, int __user *optlen)
472 {
473 	struct sock *sk = sock->sk;
474 
475 	union
476 	{
477   		int val;
478   		struct linger ling;
479 		struct timeval tm;
480 	} v;
481 
482 	unsigned int lv = sizeof(int);
483 	int len;
484 
485   	if(get_user(len,optlen))
486   		return -EFAULT;
487 	if(len < 0)
488 		return -EINVAL;
489 
490   	switch(optname)
491   	{
492 		case SO_DEBUG:
493 			v.val = sock_flag(sk, SOCK_DBG);
494 			break;
495 
496 		case SO_DONTROUTE:
497 			v.val = sock_flag(sk, SOCK_LOCALROUTE);
498 			break;
499 
500 		case SO_BROADCAST:
501 			v.val = !!sock_flag(sk, SOCK_BROADCAST);
502 			break;
503 
504 		case SO_SNDBUF:
505 			v.val = sk->sk_sndbuf;
506 			break;
507 
508 		case SO_RCVBUF:
509 			v.val = sk->sk_rcvbuf;
510 			break;
511 
512 		case SO_REUSEADDR:
513 			v.val = sk->sk_reuse;
514 			break;
515 
516 		case SO_KEEPALIVE:
517 			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
518 			break;
519 
520 		case SO_TYPE:
521 			v.val = sk->sk_type;
522 			break;
523 
524 		case SO_ERROR:
525 			v.val = -sock_error(sk);
526 			if(v.val==0)
527 				v.val = xchg(&sk->sk_err_soft, 0);
528 			break;
529 
530 		case SO_OOBINLINE:
531 			v.val = !!sock_flag(sk, SOCK_URGINLINE);
532 			break;
533 
534 		case SO_NO_CHECK:
535 			v.val = sk->sk_no_check;
536 			break;
537 
538 		case SO_PRIORITY:
539 			v.val = sk->sk_priority;
540 			break;
541 
542 		case SO_LINGER:
543 			lv		= sizeof(v.ling);
544 			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
545  			v.ling.l_linger	= sk->sk_lingertime / HZ;
546 			break;
547 
548 		case SO_BSDCOMPAT:
549 			sock_warn_obsolete_bsdism("getsockopt");
550 			break;
551 
552 		case SO_TIMESTAMP:
553 			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
554 			break;
555 
556 		case SO_RCVTIMEO:
557 			lv=sizeof(struct timeval);
558 			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
559 				v.tm.tv_sec = 0;
560 				v.tm.tv_usec = 0;
561 			} else {
562 				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
563 				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
564 			}
565 			break;
566 
567 		case SO_SNDTIMEO:
568 			lv=sizeof(struct timeval);
569 			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
570 				v.tm.tv_sec = 0;
571 				v.tm.tv_usec = 0;
572 			} else {
573 				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
574 				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
575 			}
576 			break;
577 
578 		case SO_RCVLOWAT:
579 			v.val = sk->sk_rcvlowat;
580 			break;
581 
582 		case SO_SNDLOWAT:
583 			v.val=1;
584 			break;
585 
586 		case SO_PASSCRED:
587 			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
588 			break;
589 
590 		case SO_PEERCRED:
591 			if (len > sizeof(sk->sk_peercred))
592 				len = sizeof(sk->sk_peercred);
593 			if (copy_to_user(optval, &sk->sk_peercred, len))
594 				return -EFAULT;
595 			goto lenout;
596 
597 		case SO_PEERNAME:
598 		{
599 			char address[128];
600 
601 			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
602 				return -ENOTCONN;
603 			if (lv < len)
604 				return -EINVAL;
605 			if (copy_to_user(optval, address, len))
606 				return -EFAULT;
607 			goto lenout;
608 		}
609 
610 		/* Dubious BSD thing... Probably nobody even uses it, but
611 		 * the UNIX standard wants it for whatever reason... -DaveM
612 		 */
613 		case SO_ACCEPTCONN:
614 			v.val = sk->sk_state == TCP_LISTEN;
615 			break;
616 
617 		case SO_PEERSEC:
618 			return security_socket_getpeersec(sock, optval, optlen, len);
619 
620 		default:
621 			return(-ENOPROTOOPT);
622 	}
623 	if (len > lv)
624 		len = lv;
625 	if (copy_to_user(optval, &v, len))
626 		return -EFAULT;
627 lenout:
628   	if (put_user(len, optlen))
629   		return -EFAULT;
630   	return 0;
631 }
632 
633 /**
634  *	sk_alloc - All socket objects are allocated here
635  *	@family: protocol family
636  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
637  *	@prot: struct proto associated with this new sock instance
638  *	@zero_it: if we should zero the newly allocated sock
639  */
640 struct sock *sk_alloc(int family, gfp_t priority,
641 		      struct proto *prot, int zero_it)
642 {
643 	struct sock *sk = NULL;
644 	kmem_cache_t *slab = prot->slab;
645 
646 	if (slab != NULL)
647 		sk = kmem_cache_alloc(slab, priority);
648 	else
649 		sk = kmalloc(prot->obj_size, priority);
650 
651 	if (sk) {
652 		if (zero_it) {
653 			memset(sk, 0, prot->obj_size);
654 			sk->sk_family = family;
655 			/*
656 			 * See comment in struct sock definition to understand
657 			 * why we need sk_prot_creator -acme
658 			 */
659 			sk->sk_prot = sk->sk_prot_creator = prot;
660 			sock_lock_init(sk);
661 		}
662 
663 		if (security_sk_alloc(sk, family, priority))
664 			goto out_free;
665 
666 		if (!try_module_get(prot->owner))
667 			goto out_free;
668 	}
669 	return sk;
670 
671 out_free:
672 	if (slab != NULL)
673 		kmem_cache_free(slab, sk);
674 	else
675 		kfree(sk);
676 	return NULL;
677 }
678 
679 void sk_free(struct sock *sk)
680 {
681 	struct sk_filter *filter;
682 	struct module *owner = sk->sk_prot_creator->owner;
683 
684 	if (sk->sk_destruct)
685 		sk->sk_destruct(sk);
686 
687 	filter = sk->sk_filter;
688 	if (filter) {
689 		sk_filter_release(sk, filter);
690 		sk->sk_filter = NULL;
691 	}
692 
693 	sock_disable_timestamp(sk);
694 
695 	if (atomic_read(&sk->sk_omem_alloc))
696 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
697 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
698 
699 	security_sk_free(sk);
700 	if (sk->sk_prot_creator->slab != NULL)
701 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
702 	else
703 		kfree(sk);
704 	module_put(owner);
705 }
706 
707 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
708 {
709 	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
710 
711 	if (newsk != NULL) {
712 		struct sk_filter *filter;
713 
714 		memcpy(newsk, sk, sk->sk_prot->obj_size);
715 
716 		/* SANITY */
717 		sk_node_init(&newsk->sk_node);
718 		sock_lock_init(newsk);
719 		bh_lock_sock(newsk);
720 
721 		atomic_set(&newsk->sk_rmem_alloc, 0);
722 		atomic_set(&newsk->sk_wmem_alloc, 0);
723 		atomic_set(&newsk->sk_omem_alloc, 0);
724 		skb_queue_head_init(&newsk->sk_receive_queue);
725 		skb_queue_head_init(&newsk->sk_write_queue);
726 
727 		rwlock_init(&newsk->sk_dst_lock);
728 		rwlock_init(&newsk->sk_callback_lock);
729 
730 		newsk->sk_dst_cache	= NULL;
731 		newsk->sk_wmem_queued	= 0;
732 		newsk->sk_forward_alloc = 0;
733 		newsk->sk_send_head	= NULL;
734 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
735 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
736 
737 		sock_reset_flag(newsk, SOCK_DONE);
738 		skb_queue_head_init(&newsk->sk_error_queue);
739 
740 		filter = newsk->sk_filter;
741 		if (filter != NULL)
742 			sk_filter_charge(newsk, filter);
743 
744 		if (unlikely(xfrm_sk_clone_policy(newsk))) {
745 			/* It is still raw copy of parent, so invalidate
746 			 * destructor and make plain sk_free() */
747 			newsk->sk_destruct = NULL;
748 			sk_free(newsk);
749 			newsk = NULL;
750 			goto out;
751 		}
752 
753 		newsk->sk_err	   = 0;
754 		newsk->sk_priority = 0;
755 		atomic_set(&newsk->sk_refcnt, 2);
756 
757 		/*
758 		 * Increment the counter in the same struct proto as the master
759 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
760 		 * is the same as sk->sk_prot->socks, as this field was copied
761 		 * with memcpy).
762 		 *
763 		 * This _changes_ the previous behaviour, where
764 		 * tcp_create_openreq_child always was incrementing the
765 		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
766 		 * to be taken into account in all callers. -acme
767 		 */
768 		sk_refcnt_debug_inc(newsk);
769 		newsk->sk_socket = NULL;
770 		newsk->sk_sleep	 = NULL;
771 
772 		if (newsk->sk_prot->sockets_allocated)
773 			atomic_inc(newsk->sk_prot->sockets_allocated);
774 	}
775 out:
776 	return newsk;
777 }
778 
779 EXPORT_SYMBOL_GPL(sk_clone);
780 
781 void __init sk_init(void)
782 {
783 	if (num_physpages <= 4096) {
784 		sysctl_wmem_max = 32767;
785 		sysctl_rmem_max = 32767;
786 		sysctl_wmem_default = 32767;
787 		sysctl_rmem_default = 32767;
788 	} else if (num_physpages >= 131072) {
789 		sysctl_wmem_max = 131071;
790 		sysctl_rmem_max = 131071;
791 	}
792 }
793 
794 /*
795  *	Simple resource managers for sockets.
796  */
797 
798 
799 /*
800  * Write buffer destructor automatically called from kfree_skb.
801  */
802 void sock_wfree(struct sk_buff *skb)
803 {
804 	struct sock *sk = skb->sk;
805 
806 	/* In case it might be waiting for more memory. */
807 	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
808 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
809 		sk->sk_write_space(sk);
810 	sock_put(sk);
811 }
812 
813 /*
814  * Read buffer destructor automatically called from kfree_skb.
815  */
816 void sock_rfree(struct sk_buff *skb)
817 {
818 	struct sock *sk = skb->sk;
819 
820 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
821 }
822 
823 
824 int sock_i_uid(struct sock *sk)
825 {
826 	int uid;
827 
828 	read_lock(&sk->sk_callback_lock);
829 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
830 	read_unlock(&sk->sk_callback_lock);
831 	return uid;
832 }
833 
834 unsigned long sock_i_ino(struct sock *sk)
835 {
836 	unsigned long ino;
837 
838 	read_lock(&sk->sk_callback_lock);
839 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
840 	read_unlock(&sk->sk_callback_lock);
841 	return ino;
842 }
843 
844 /*
845  * Allocate a skb from the socket's send buffer.
846  */
847 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
848 			     gfp_t priority)
849 {
850 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
851 		struct sk_buff * skb = alloc_skb(size, priority);
852 		if (skb) {
853 			skb_set_owner_w(skb, sk);
854 			return skb;
855 		}
856 	}
857 	return NULL;
858 }
859 
860 /*
861  * Allocate a skb from the socket's receive buffer.
862  */
863 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
864 			     gfp_t priority)
865 {
866 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
867 		struct sk_buff *skb = alloc_skb(size, priority);
868 		if (skb) {
869 			skb_set_owner_r(skb, sk);
870 			return skb;
871 		}
872 	}
873 	return NULL;
874 }
875 
876 /*
877  * Allocate a memory block from the socket's option memory buffer.
878  */
879 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
880 {
881 	if ((unsigned)size <= sysctl_optmem_max &&
882 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
883 		void *mem;
884 		/* First do the add, to avoid the race if kmalloc
885  		 * might sleep.
886 		 */
887 		atomic_add(size, &sk->sk_omem_alloc);
888 		mem = kmalloc(size, priority);
889 		if (mem)
890 			return mem;
891 		atomic_sub(size, &sk->sk_omem_alloc);
892 	}
893 	return NULL;
894 }
895 
896 /*
897  * Free an option memory block.
898  */
899 void sock_kfree_s(struct sock *sk, void *mem, int size)
900 {
901 	kfree(mem);
902 	atomic_sub(size, &sk->sk_omem_alloc);
903 }
904 
905 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
906    I think, these locks should be removed for datagram sockets.
907  */
908 static long sock_wait_for_wmem(struct sock * sk, long timeo)
909 {
910 	DEFINE_WAIT(wait);
911 
912 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
913 	for (;;) {
914 		if (!timeo)
915 			break;
916 		if (signal_pending(current))
917 			break;
918 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
919 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
920 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
921 			break;
922 		if (sk->sk_shutdown & SEND_SHUTDOWN)
923 			break;
924 		if (sk->sk_err)
925 			break;
926 		timeo = schedule_timeout(timeo);
927 	}
928 	finish_wait(sk->sk_sleep, &wait);
929 	return timeo;
930 }
931 
932 
933 /*
934  *	Generic send/receive buffer handlers
935  */
936 
937 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
938 					    unsigned long header_len,
939 					    unsigned long data_len,
940 					    int noblock, int *errcode)
941 {
942 	struct sk_buff *skb;
943 	gfp_t gfp_mask;
944 	long timeo;
945 	int err;
946 
947 	gfp_mask = sk->sk_allocation;
948 	if (gfp_mask & __GFP_WAIT)
949 		gfp_mask |= __GFP_REPEAT;
950 
951 	timeo = sock_sndtimeo(sk, noblock);
952 	while (1) {
953 		err = sock_error(sk);
954 		if (err != 0)
955 			goto failure;
956 
957 		err = -EPIPE;
958 		if (sk->sk_shutdown & SEND_SHUTDOWN)
959 			goto failure;
960 
961 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
962 			skb = alloc_skb(header_len, sk->sk_allocation);
963 			if (skb) {
964 				int npages;
965 				int i;
966 
967 				/* No pages, we're done... */
968 				if (!data_len)
969 					break;
970 
971 				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
972 				skb->truesize += data_len;
973 				skb_shinfo(skb)->nr_frags = npages;
974 				for (i = 0; i < npages; i++) {
975 					struct page *page;
976 					skb_frag_t *frag;
977 
978 					page = alloc_pages(sk->sk_allocation, 0);
979 					if (!page) {
980 						err = -ENOBUFS;
981 						skb_shinfo(skb)->nr_frags = i;
982 						kfree_skb(skb);
983 						goto failure;
984 					}
985 
986 					frag = &skb_shinfo(skb)->frags[i];
987 					frag->page = page;
988 					frag->page_offset = 0;
989 					frag->size = (data_len >= PAGE_SIZE ?
990 						      PAGE_SIZE :
991 						      data_len);
992 					data_len -= PAGE_SIZE;
993 				}
994 
995 				/* Full success... */
996 				break;
997 			}
998 			err = -ENOBUFS;
999 			goto failure;
1000 		}
1001 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1002 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1003 		err = -EAGAIN;
1004 		if (!timeo)
1005 			goto failure;
1006 		if (signal_pending(current))
1007 			goto interrupted;
1008 		timeo = sock_wait_for_wmem(sk, timeo);
1009 	}
1010 
1011 	skb_set_owner_w(skb, sk);
1012 	return skb;
1013 
1014 interrupted:
1015 	err = sock_intr_errno(timeo);
1016 failure:
1017 	*errcode = err;
1018 	return NULL;
1019 }
1020 
1021 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1022 				    int noblock, int *errcode)
1023 {
1024 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1025 }
1026 
1027 static void __lock_sock(struct sock *sk)
1028 {
1029 	DEFINE_WAIT(wait);
1030 
1031 	for(;;) {
1032 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1033 					TASK_UNINTERRUPTIBLE);
1034 		spin_unlock_bh(&sk->sk_lock.slock);
1035 		schedule();
1036 		spin_lock_bh(&sk->sk_lock.slock);
1037 		if(!sock_owned_by_user(sk))
1038 			break;
1039 	}
1040 	finish_wait(&sk->sk_lock.wq, &wait);
1041 }
1042 
1043 static void __release_sock(struct sock *sk)
1044 {
1045 	struct sk_buff *skb = sk->sk_backlog.head;
1046 
1047 	do {
1048 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1049 		bh_unlock_sock(sk);
1050 
1051 		do {
1052 			struct sk_buff *next = skb->next;
1053 
1054 			skb->next = NULL;
1055 			sk->sk_backlog_rcv(sk, skb);
1056 
1057 			/*
1058 			 * We are in process context here with softirqs
1059 			 * disabled, use cond_resched_softirq() to preempt.
1060 			 * This is safe to do because we've taken the backlog
1061 			 * queue private:
1062 			 */
1063 			cond_resched_softirq();
1064 
1065 			skb = next;
1066 		} while (skb != NULL);
1067 
1068 		bh_lock_sock(sk);
1069 	} while((skb = sk->sk_backlog.head) != NULL);
1070 }
1071 
1072 /**
1073  * sk_wait_data - wait for data to arrive at sk_receive_queue
1074  * @sk:    sock to wait on
1075  * @timeo: for how long
1076  *
1077  * Now socket state including sk->sk_err is changed only under lock,
1078  * hence we may omit checks after joining wait queue.
1079  * We check receive queue before schedule() only as optimization;
1080  * it is very likely that release_sock() added new data.
1081  */
1082 int sk_wait_data(struct sock *sk, long *timeo)
1083 {
1084 	int rc;
1085 	DEFINE_WAIT(wait);
1086 
1087 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1088 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1089 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1090 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1091 	finish_wait(sk->sk_sleep, &wait);
1092 	return rc;
1093 }
1094 
1095 EXPORT_SYMBOL(sk_wait_data);
1096 
1097 /*
1098  * Set of default routines for initialising struct proto_ops when
1099  * the protocol does not support a particular function. In certain
1100  * cases where it makes no sense for a protocol to have a "do nothing"
1101  * function, some default processing is provided.
1102  */
1103 
1104 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1105 {
1106 	return -EOPNOTSUPP;
1107 }
1108 
1109 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1110 		    int len, int flags)
1111 {
1112 	return -EOPNOTSUPP;
1113 }
1114 
1115 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1116 {
1117 	return -EOPNOTSUPP;
1118 }
1119 
1120 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1121 {
1122 	return -EOPNOTSUPP;
1123 }
1124 
1125 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1126 		    int *len, int peer)
1127 {
1128 	return -EOPNOTSUPP;
1129 }
1130 
1131 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1132 {
1133 	return 0;
1134 }
1135 
1136 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1137 {
1138 	return -EOPNOTSUPP;
1139 }
1140 
1141 int sock_no_listen(struct socket *sock, int backlog)
1142 {
1143 	return -EOPNOTSUPP;
1144 }
1145 
1146 int sock_no_shutdown(struct socket *sock, int how)
1147 {
1148 	return -EOPNOTSUPP;
1149 }
1150 
1151 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1152 		    char __user *optval, int optlen)
1153 {
1154 	return -EOPNOTSUPP;
1155 }
1156 
1157 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1158 		    char __user *optval, int __user *optlen)
1159 {
1160 	return -EOPNOTSUPP;
1161 }
1162 
1163 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1164 		    size_t len)
1165 {
1166 	return -EOPNOTSUPP;
1167 }
1168 
1169 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1170 		    size_t len, int flags)
1171 {
1172 	return -EOPNOTSUPP;
1173 }
1174 
1175 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1176 {
1177 	/* Mirror missing mmap method error code */
1178 	return -ENODEV;
1179 }
1180 
1181 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1182 {
1183 	ssize_t res;
1184 	struct msghdr msg = {.msg_flags = flags};
1185 	struct kvec iov;
1186 	char *kaddr = kmap(page);
1187 	iov.iov_base = kaddr + offset;
1188 	iov.iov_len = size;
1189 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1190 	kunmap(page);
1191 	return res;
1192 }
1193 
1194 /*
1195  *	Default Socket Callbacks
1196  */
1197 
1198 static void sock_def_wakeup(struct sock *sk)
1199 {
1200 	read_lock(&sk->sk_callback_lock);
1201 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1202 		wake_up_interruptible_all(sk->sk_sleep);
1203 	read_unlock(&sk->sk_callback_lock);
1204 }
1205 
1206 static void sock_def_error_report(struct sock *sk)
1207 {
1208 	read_lock(&sk->sk_callback_lock);
1209 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1210 		wake_up_interruptible(sk->sk_sleep);
1211 	sk_wake_async(sk,0,POLL_ERR);
1212 	read_unlock(&sk->sk_callback_lock);
1213 }
1214 
1215 static void sock_def_readable(struct sock *sk, int len)
1216 {
1217 	read_lock(&sk->sk_callback_lock);
1218 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1219 		wake_up_interruptible(sk->sk_sleep);
1220 	sk_wake_async(sk,1,POLL_IN);
1221 	read_unlock(&sk->sk_callback_lock);
1222 }
1223 
1224 static void sock_def_write_space(struct sock *sk)
1225 {
1226 	read_lock(&sk->sk_callback_lock);
1227 
1228 	/* Do not wake up a writer until he can make "significant"
1229 	 * progress.  --DaveM
1230 	 */
1231 	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1232 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1233 			wake_up_interruptible(sk->sk_sleep);
1234 
1235 		/* Should agree with poll, otherwise some programs break */
1236 		if (sock_writeable(sk))
1237 			sk_wake_async(sk, 2, POLL_OUT);
1238 	}
1239 
1240 	read_unlock(&sk->sk_callback_lock);
1241 }
1242 
1243 static void sock_def_destruct(struct sock *sk)
1244 {
1245 	if (sk->sk_protinfo)
1246 		kfree(sk->sk_protinfo);
1247 }
1248 
1249 void sk_send_sigurg(struct sock *sk)
1250 {
1251 	if (sk->sk_socket && sk->sk_socket->file)
1252 		if (send_sigurg(&sk->sk_socket->file->f_owner))
1253 			sk_wake_async(sk, 3, POLL_PRI);
1254 }
1255 
1256 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1257 		    unsigned long expires)
1258 {
1259 	if (!mod_timer(timer, expires))
1260 		sock_hold(sk);
1261 }
1262 
1263 EXPORT_SYMBOL(sk_reset_timer);
1264 
1265 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1266 {
1267 	if (timer_pending(timer) && del_timer(timer))
1268 		__sock_put(sk);
1269 }
1270 
1271 EXPORT_SYMBOL(sk_stop_timer);
1272 
1273 void sock_init_data(struct socket *sock, struct sock *sk)
1274 {
1275 	skb_queue_head_init(&sk->sk_receive_queue);
1276 	skb_queue_head_init(&sk->sk_write_queue);
1277 	skb_queue_head_init(&sk->sk_error_queue);
1278 
1279 	sk->sk_send_head	=	NULL;
1280 
1281 	init_timer(&sk->sk_timer);
1282 
1283 	sk->sk_allocation	=	GFP_KERNEL;
1284 	sk->sk_rcvbuf		=	sysctl_rmem_default;
1285 	sk->sk_sndbuf		=	sysctl_wmem_default;
1286 	sk->sk_state		=	TCP_CLOSE;
1287 	sk->sk_socket		=	sock;
1288 
1289 	sock_set_flag(sk, SOCK_ZAPPED);
1290 
1291 	if(sock)
1292 	{
1293 		sk->sk_type	=	sock->type;
1294 		sk->sk_sleep	=	&sock->wait;
1295 		sock->sk	=	sk;
1296 	} else
1297 		sk->sk_sleep	=	NULL;
1298 
1299 	rwlock_init(&sk->sk_dst_lock);
1300 	rwlock_init(&sk->sk_callback_lock);
1301 
1302 	sk->sk_state_change	=	sock_def_wakeup;
1303 	sk->sk_data_ready	=	sock_def_readable;
1304 	sk->sk_write_space	=	sock_def_write_space;
1305 	sk->sk_error_report	=	sock_def_error_report;
1306 	sk->sk_destruct		=	sock_def_destruct;
1307 
1308 	sk->sk_sndmsg_page	=	NULL;
1309 	sk->sk_sndmsg_off	=	0;
1310 
1311 	sk->sk_peercred.pid 	=	0;
1312 	sk->sk_peercred.uid	=	-1;
1313 	sk->sk_peercred.gid	=	-1;
1314 	sk->sk_write_pending	=	0;
1315 	sk->sk_rcvlowat		=	1;
1316 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1317 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1318 
1319 	sk->sk_stamp.tv_sec     = -1L;
1320 	sk->sk_stamp.tv_usec    = -1L;
1321 
1322 	atomic_set(&sk->sk_refcnt, 1);
1323 }
1324 
1325 void fastcall lock_sock(struct sock *sk)
1326 {
1327 	might_sleep();
1328 	spin_lock_bh(&(sk->sk_lock.slock));
1329 	if (sk->sk_lock.owner)
1330 		__lock_sock(sk);
1331 	sk->sk_lock.owner = (void *)1;
1332 	spin_unlock_bh(&(sk->sk_lock.slock));
1333 }
1334 
1335 EXPORT_SYMBOL(lock_sock);
1336 
1337 void fastcall release_sock(struct sock *sk)
1338 {
1339 	spin_lock_bh(&(sk->sk_lock.slock));
1340 	if (sk->sk_backlog.tail)
1341 		__release_sock(sk);
1342 	sk->sk_lock.owner = NULL;
1343         if (waitqueue_active(&(sk->sk_lock.wq)))
1344 		wake_up(&(sk->sk_lock.wq));
1345 	spin_unlock_bh(&(sk->sk_lock.slock));
1346 }
1347 EXPORT_SYMBOL(release_sock);
1348 
1349 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1350 {
1351 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1352 		sock_enable_timestamp(sk);
1353 	if (sk->sk_stamp.tv_sec == -1)
1354 		return -ENOENT;
1355 	if (sk->sk_stamp.tv_sec == 0)
1356 		do_gettimeofday(&sk->sk_stamp);
1357 	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1358 		-EFAULT : 0;
1359 }
1360 EXPORT_SYMBOL(sock_get_timestamp);
1361 
1362 void sock_enable_timestamp(struct sock *sk)
1363 {
1364 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1365 		sock_set_flag(sk, SOCK_TIMESTAMP);
1366 		net_enable_timestamp();
1367 	}
1368 }
1369 EXPORT_SYMBOL(sock_enable_timestamp);
1370 
1371 /*
1372  *	Get a socket option on an socket.
1373  *
1374  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1375  *	asynchronous errors should be reported by getsockopt. We assume
1376  *	this means if you specify SO_ERROR (otherwise whats the point of it).
1377  */
1378 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1379 			   char __user *optval, int __user *optlen)
1380 {
1381 	struct sock *sk = sock->sk;
1382 
1383 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1384 }
1385 
1386 EXPORT_SYMBOL(sock_common_getsockopt);
1387 
1388 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1389 			struct msghdr *msg, size_t size, int flags)
1390 {
1391 	struct sock *sk = sock->sk;
1392 	int addr_len = 0;
1393 	int err;
1394 
1395 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1396 				   flags & ~MSG_DONTWAIT, &addr_len);
1397 	if (err >= 0)
1398 		msg->msg_namelen = addr_len;
1399 	return err;
1400 }
1401 
1402 EXPORT_SYMBOL(sock_common_recvmsg);
1403 
1404 /*
1405  *	Set socket options on an inet socket.
1406  */
1407 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1408 			   char __user *optval, int optlen)
1409 {
1410 	struct sock *sk = sock->sk;
1411 
1412 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1413 }
1414 
1415 EXPORT_SYMBOL(sock_common_setsockopt);
1416 
1417 void sk_common_release(struct sock *sk)
1418 {
1419 	if (sk->sk_prot->destroy)
1420 		sk->sk_prot->destroy(sk);
1421 
1422 	/*
1423 	 * Observation: when sock_common_release is called, processes have
1424 	 * no access to socket. But net still has.
1425 	 * Step one, detach it from networking:
1426 	 *
1427 	 * A. Remove from hash tables.
1428 	 */
1429 
1430 	sk->sk_prot->unhash(sk);
1431 
1432 	/*
1433 	 * In this point socket cannot receive new packets, but it is possible
1434 	 * that some packets are in flight because some CPU runs receiver and
1435 	 * did hash table lookup before we unhashed socket. They will achieve
1436 	 * receive queue and will be purged by socket destructor.
1437 	 *
1438 	 * Also we still have packets pending on receive queue and probably,
1439 	 * our own packets waiting in device queues. sock_destroy will drain
1440 	 * receive queue, but transmitted packets will delay socket destruction
1441 	 * until the last reference will be released.
1442 	 */
1443 
1444 	sock_orphan(sk);
1445 
1446 	xfrm_sk_free_policy(sk);
1447 
1448 	sk_refcnt_debug_release(sk);
1449 	sock_put(sk);
1450 }
1451 
1452 EXPORT_SYMBOL(sk_common_release);
1453 
1454 static DEFINE_RWLOCK(proto_list_lock);
1455 static LIST_HEAD(proto_list);
1456 
1457 int proto_register(struct proto *prot, int alloc_slab)
1458 {
1459 	char *request_sock_slab_name = NULL;
1460 	char *timewait_sock_slab_name;
1461 	int rc = -ENOBUFS;
1462 
1463 	if (alloc_slab) {
1464 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1465 					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1466 
1467 		if (prot->slab == NULL) {
1468 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1469 			       prot->name);
1470 			goto out;
1471 		}
1472 
1473 		if (prot->rsk_prot != NULL) {
1474 			static const char mask[] = "request_sock_%s";
1475 
1476 			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1477 			if (request_sock_slab_name == NULL)
1478 				goto out_free_sock_slab;
1479 
1480 			sprintf(request_sock_slab_name, mask, prot->name);
1481 			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1482 								 prot->rsk_prot->obj_size, 0,
1483 								 SLAB_HWCACHE_ALIGN, NULL, NULL);
1484 
1485 			if (prot->rsk_prot->slab == NULL) {
1486 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1487 				       prot->name);
1488 				goto out_free_request_sock_slab_name;
1489 			}
1490 		}
1491 
1492 		if (prot->twsk_obj_size) {
1493 			static const char mask[] = "tw_sock_%s";
1494 
1495 			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1496 
1497 			if (timewait_sock_slab_name == NULL)
1498 				goto out_free_request_sock_slab;
1499 
1500 			sprintf(timewait_sock_slab_name, mask, prot->name);
1501 			prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
1502 							    prot->twsk_obj_size,
1503 							    0, SLAB_HWCACHE_ALIGN,
1504 							    NULL, NULL);
1505 			if (prot->twsk_slab == NULL)
1506 				goto out_free_timewait_sock_slab_name;
1507 		}
1508 	}
1509 
1510 	write_lock(&proto_list_lock);
1511 	list_add(&prot->node, &proto_list);
1512 	write_unlock(&proto_list_lock);
1513 	rc = 0;
1514 out:
1515 	return rc;
1516 out_free_timewait_sock_slab_name:
1517 	kfree(timewait_sock_slab_name);
1518 out_free_request_sock_slab:
1519 	if (prot->rsk_prot && prot->rsk_prot->slab) {
1520 		kmem_cache_destroy(prot->rsk_prot->slab);
1521 		prot->rsk_prot->slab = NULL;
1522 	}
1523 out_free_request_sock_slab_name:
1524 	kfree(request_sock_slab_name);
1525 out_free_sock_slab:
1526 	kmem_cache_destroy(prot->slab);
1527 	prot->slab = NULL;
1528 	goto out;
1529 }
1530 
1531 EXPORT_SYMBOL(proto_register);
1532 
1533 void proto_unregister(struct proto *prot)
1534 {
1535 	write_lock(&proto_list_lock);
1536 	list_del(&prot->node);
1537 	write_unlock(&proto_list_lock);
1538 
1539 	if (prot->slab != NULL) {
1540 		kmem_cache_destroy(prot->slab);
1541 		prot->slab = NULL;
1542 	}
1543 
1544 	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1545 		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1546 
1547 		kmem_cache_destroy(prot->rsk_prot->slab);
1548 		kfree(name);
1549 		prot->rsk_prot->slab = NULL;
1550 	}
1551 
1552 	if (prot->twsk_slab != NULL) {
1553 		const char *name = kmem_cache_name(prot->twsk_slab);
1554 
1555 		kmem_cache_destroy(prot->twsk_slab);
1556 		kfree(name);
1557 		prot->twsk_slab = NULL;
1558 	}
1559 }
1560 
1561 EXPORT_SYMBOL(proto_unregister);
1562 
1563 #ifdef CONFIG_PROC_FS
1564 static inline struct proto *__proto_head(void)
1565 {
1566 	return list_entry(proto_list.next, struct proto, node);
1567 }
1568 
1569 static inline struct proto *proto_head(void)
1570 {
1571 	return list_empty(&proto_list) ? NULL : __proto_head();
1572 }
1573 
1574 static inline struct proto *proto_next(struct proto *proto)
1575 {
1576 	return proto->node.next == &proto_list ? NULL :
1577 		list_entry(proto->node.next, struct proto, node);
1578 }
1579 
1580 static inline struct proto *proto_get_idx(loff_t pos)
1581 {
1582 	struct proto *proto;
1583 	loff_t i = 0;
1584 
1585 	list_for_each_entry(proto, &proto_list, node)
1586 		if (i++ == pos)
1587 			goto out;
1588 
1589 	proto = NULL;
1590 out:
1591 	return proto;
1592 }
1593 
1594 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1595 {
1596 	read_lock(&proto_list_lock);
1597 	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1598 }
1599 
1600 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1601 {
1602 	++*pos;
1603 	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1604 }
1605 
1606 static void proto_seq_stop(struct seq_file *seq, void *v)
1607 {
1608 	read_unlock(&proto_list_lock);
1609 }
1610 
1611 static char proto_method_implemented(const void *method)
1612 {
1613 	return method == NULL ? 'n' : 'y';
1614 }
1615 
1616 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1617 {
1618 	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1619 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1620 		   proto->name,
1621 		   proto->obj_size,
1622 		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1623 		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1624 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1625 		   proto->max_header,
1626 		   proto->slab == NULL ? "no" : "yes",
1627 		   module_name(proto->owner),
1628 		   proto_method_implemented(proto->close),
1629 		   proto_method_implemented(proto->connect),
1630 		   proto_method_implemented(proto->disconnect),
1631 		   proto_method_implemented(proto->accept),
1632 		   proto_method_implemented(proto->ioctl),
1633 		   proto_method_implemented(proto->init),
1634 		   proto_method_implemented(proto->destroy),
1635 		   proto_method_implemented(proto->shutdown),
1636 		   proto_method_implemented(proto->setsockopt),
1637 		   proto_method_implemented(proto->getsockopt),
1638 		   proto_method_implemented(proto->sendmsg),
1639 		   proto_method_implemented(proto->recvmsg),
1640 		   proto_method_implemented(proto->sendpage),
1641 		   proto_method_implemented(proto->bind),
1642 		   proto_method_implemented(proto->backlog_rcv),
1643 		   proto_method_implemented(proto->hash),
1644 		   proto_method_implemented(proto->unhash),
1645 		   proto_method_implemented(proto->get_port),
1646 		   proto_method_implemented(proto->enter_memory_pressure));
1647 }
1648 
1649 static int proto_seq_show(struct seq_file *seq, void *v)
1650 {
1651 	if (v == SEQ_START_TOKEN)
1652 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1653 			   "protocol",
1654 			   "size",
1655 			   "sockets",
1656 			   "memory",
1657 			   "press",
1658 			   "maxhdr",
1659 			   "slab",
1660 			   "module",
1661 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1662 	else
1663 		proto_seq_printf(seq, v);
1664 	return 0;
1665 }
1666 
1667 static struct seq_operations proto_seq_ops = {
1668 	.start  = proto_seq_start,
1669 	.next   = proto_seq_next,
1670 	.stop   = proto_seq_stop,
1671 	.show   = proto_seq_show,
1672 };
1673 
1674 static int proto_seq_open(struct inode *inode, struct file *file)
1675 {
1676 	return seq_open(file, &proto_seq_ops);
1677 }
1678 
1679 static struct file_operations proto_seq_fops = {
1680 	.owner		= THIS_MODULE,
1681 	.open		= proto_seq_open,
1682 	.read		= seq_read,
1683 	.llseek		= seq_lseek,
1684 	.release	= seq_release,
1685 };
1686 
1687 static int __init proto_init(void)
1688 {
1689 	/* register /proc/net/protocols */
1690 	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1691 }
1692 
1693 subsys_initcall(proto_init);
1694 
1695 #endif /* PROC_FS */
1696 
1697 EXPORT_SYMBOL(sk_alloc);
1698 EXPORT_SYMBOL(sk_free);
1699 EXPORT_SYMBOL(sk_send_sigurg);
1700 EXPORT_SYMBOL(sock_alloc_send_skb);
1701 EXPORT_SYMBOL(sock_init_data);
1702 EXPORT_SYMBOL(sock_kfree_s);
1703 EXPORT_SYMBOL(sock_kmalloc);
1704 EXPORT_SYMBOL(sock_no_accept);
1705 EXPORT_SYMBOL(sock_no_bind);
1706 EXPORT_SYMBOL(sock_no_connect);
1707 EXPORT_SYMBOL(sock_no_getname);
1708 EXPORT_SYMBOL(sock_no_getsockopt);
1709 EXPORT_SYMBOL(sock_no_ioctl);
1710 EXPORT_SYMBOL(sock_no_listen);
1711 EXPORT_SYMBOL(sock_no_mmap);
1712 EXPORT_SYMBOL(sock_no_poll);
1713 EXPORT_SYMBOL(sock_no_recvmsg);
1714 EXPORT_SYMBOL(sock_no_sendmsg);
1715 EXPORT_SYMBOL(sock_no_sendpage);
1716 EXPORT_SYMBOL(sock_no_setsockopt);
1717 EXPORT_SYMBOL(sock_no_shutdown);
1718 EXPORT_SYMBOL(sock_no_socketpair);
1719 EXPORT_SYMBOL(sock_rfree);
1720 EXPORT_SYMBOL(sock_setsockopt);
1721 EXPORT_SYMBOL(sock_wfree);
1722 EXPORT_SYMBOL(sock_wmalloc);
1723 EXPORT_SYMBOL(sock_i_uid);
1724 EXPORT_SYMBOL(sock_i_ino);
1725 EXPORT_SYMBOL(sysctl_optmem_max);
1726 #ifdef CONFIG_SYSCTL
1727 EXPORT_SYMBOL(sysctl_rmem_max);
1728 EXPORT_SYMBOL(sysctl_wmem_max);
1729 #endif
1730