xref: /openbmc/linux/net/core/sock.c (revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
13  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *		Alan Cox	: 	Numerous verify_area() problems
19  *		Alan Cox	:	Connecting on a connecting socket
20  *					now returns an error for tcp.
21  *		Alan Cox	:	sock->protocol is set correctly.
22  *					and is not sometimes left as 0.
23  *		Alan Cox	:	connect handles icmp errors on a
24  *					connect properly. Unfortunately there
25  *					is a restart syscall nasty there. I
26  *					can't match BSD without hacking the C
27  *					library. Ideas urgently sought!
28  *		Alan Cox	:	Disallow bind() to addresses that are
29  *					not ours - especially broadcast ones!!
30  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32  *					instead they leave that for the DESTROY timer.
33  *		Alan Cox	:	Clean up error flag in accept
34  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35  *					was buggy. Put a remove_sock() in the handler
36  *					for memory when we hit 0. Also altered the timer
37  *					code. The ACK stuff can wait and needs major
38  *					TCP layer surgery.
39  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40  *					and fixed timer/inet_bh race.
41  *		Alan Cox	:	Added zapped flag for TCP
42  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49  *	Pauline Middelink	:	identd support
50  *		Alan Cox	:	Fixed connect() taking signals I think.
51  *		Alan Cox	:	SO_LINGER supported
52  *		Alan Cox	:	Error reporting fixes
53  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54  *		Alan Cox	:	inet sockets don't set sk->type!
55  *		Alan Cox	:	Split socket option code
56  *		Alan Cox	:	Callbacks
57  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58  *		Alex		:	Removed restriction on inet fioctl
59  *		Alan Cox	:	Splitting INET from NET core
60  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62  *		Alan Cox	:	Split IP from generic code
63  *		Alan Cox	:	New kfree_skbmem()
64  *		Alan Cox	:	Make SO_DEBUG superuser only.
65  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66  *					(compatibility fix)
67  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68  *		Alan Cox	:	Allocator for a socket is settable.
69  *		Alan Cox	:	SO_ERROR includes soft errors.
70  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71  *		Alan Cox	: 	Generic socket allocation to make hooks
72  *					easier (suggested by Craig Metz).
73  *		Michael Pall	:	SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81  *		Andi Kleen	:	Fix write_space callback
82  *		Chris Evans	:	Security fixes - signedness again
83  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *		This program is free software; you can redistribute it and/or
89  *		modify it under the terms of the GNU General Public License
90  *		as published by the Free Software Foundation; either version
91  *		2 of the License, or (at your option) any later version.
92  */
93 
94 #include <linux/config.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/major.h>
101 #include <linux/module.h>
102 #include <linux/proc_fs.h>
103 #include <linux/seq_file.h>
104 #include <linux/sched.h>
105 #include <linux/timer.h>
106 #include <linux/string.h>
107 #include <linux/sockios.h>
108 #include <linux/net.h>
109 #include <linux/mm.h>
110 #include <linux/slab.h>
111 #include <linux/interrupt.h>
112 #include <linux/poll.h>
113 #include <linux/tcp.h>
114 #include <linux/init.h>
115 
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118 
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/sock.h>
123 #include <net/xfrm.h>
124 #include <linux/ipsec.h>
125 
126 #include <linux/filter.h>
127 
128 #ifdef CONFIG_INET
129 #include <net/tcp.h>
130 #endif
131 
132 /* Take into consideration the size of the struct sk_buff overhead in the
133  * determination of these values, since that is non-constant across
134  * platforms.  This makes socket queueing behavior and performance
135  * not depend upon such differences.
136  */
137 #define _SK_MEM_PACKETS		256
138 #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
139 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
140 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
141 
142 /* Run time adjustable parameters. */
143 __u32 sysctl_wmem_max = SK_WMEM_MAX;
144 __u32 sysctl_rmem_max = SK_RMEM_MAX;
145 __u32 sysctl_wmem_default = SK_WMEM_MAX;
146 __u32 sysctl_rmem_default = SK_RMEM_MAX;
147 
148 /* Maximal space eaten by iovec or ancilliary data plus some space */
149 int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
150 
151 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
152 {
153 	struct timeval tv;
154 
155 	if (optlen < sizeof(tv))
156 		return -EINVAL;
157 	if (copy_from_user(&tv, optval, sizeof(tv)))
158 		return -EFAULT;
159 
160 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
161 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
162 		return 0;
163 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
164 		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
165 	return 0;
166 }
167 
168 static void sock_warn_obsolete_bsdism(const char *name)
169 {
170 	static int warned;
171 	static char warncomm[TASK_COMM_LEN];
172 	if (strcmp(warncomm, current->comm) && warned < 5) {
173 		strcpy(warncomm,  current->comm);
174 		printk(KERN_WARNING "process `%s' is using obsolete "
175 		       "%s SO_BSDCOMPAT\n", warncomm, name);
176 		warned++;
177 	}
178 }
179 
180 static void sock_disable_timestamp(struct sock *sk)
181 {
182 	if (sock_flag(sk, SOCK_TIMESTAMP)) {
183 		sock_reset_flag(sk, SOCK_TIMESTAMP);
184 		net_disable_timestamp();
185 	}
186 }
187 
188 
189 /*
190  *	This is meant for all protocols to use and covers goings on
191  *	at the socket level. Everything here is generic.
192  */
193 
194 int sock_setsockopt(struct socket *sock, int level, int optname,
195 		    char __user *optval, int optlen)
196 {
197 	struct sock *sk=sock->sk;
198 	struct sk_filter *filter;
199 	int val;
200 	int valbool;
201 	struct linger ling;
202 	int ret = 0;
203 
204 	/*
205 	 *	Options without arguments
206 	 */
207 
208 #ifdef SO_DONTLINGER		/* Compatibility item... */
209 	switch (optname) {
210 		case SO_DONTLINGER:
211 			sock_reset_flag(sk, SOCK_LINGER);
212 			return 0;
213 	}
214 #endif
215 
216   	if(optlen<sizeof(int))
217   		return(-EINVAL);
218 
219 	if (get_user(val, (int __user *)optval))
220 		return -EFAULT;
221 
222   	valbool = val?1:0;
223 
224 	lock_sock(sk);
225 
226   	switch(optname)
227   	{
228 		case SO_DEBUG:
229 			if(val && !capable(CAP_NET_ADMIN))
230 			{
231 				ret = -EACCES;
232 			}
233 			else if (valbool)
234 				sock_set_flag(sk, SOCK_DBG);
235 			else
236 				sock_reset_flag(sk, SOCK_DBG);
237 			break;
238 		case SO_REUSEADDR:
239 			sk->sk_reuse = valbool;
240 			break;
241 		case SO_TYPE:
242 		case SO_ERROR:
243 			ret = -ENOPROTOOPT;
244 		  	break;
245 		case SO_DONTROUTE:
246 			if (valbool)
247 				sock_set_flag(sk, SOCK_LOCALROUTE);
248 			else
249 				sock_reset_flag(sk, SOCK_LOCALROUTE);
250 			break;
251 		case SO_BROADCAST:
252 			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
253 			break;
254 		case SO_SNDBUF:
255 			/* Don't error on this BSD doesn't and if you think
256 			   about it this is right. Otherwise apps have to
257 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
258 			   are treated in BSD as hints */
259 
260 			if (val > sysctl_wmem_max)
261 				val = sysctl_wmem_max;
262 
263 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
264 			if ((val * 2) < SOCK_MIN_SNDBUF)
265 				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
266 			else
267 				sk->sk_sndbuf = val * 2;
268 
269 			/*
270 			 *	Wake up sending tasks if we
271 			 *	upped the value.
272 			 */
273 			sk->sk_write_space(sk);
274 			break;
275 
276 		case SO_RCVBUF:
277 			/* Don't error on this BSD doesn't and if you think
278 			   about it this is right. Otherwise apps have to
279 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
280 			   are treated in BSD as hints */
281 
282 			if (val > sysctl_rmem_max)
283 				val = sysctl_rmem_max;
284 
285 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
286 			/* FIXME: is this lower bound the right one? */
287 			if ((val * 2) < SOCK_MIN_RCVBUF)
288 				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
289 			else
290 				sk->sk_rcvbuf = val * 2;
291 			break;
292 
293 		case SO_KEEPALIVE:
294 #ifdef CONFIG_INET
295 			if (sk->sk_protocol == IPPROTO_TCP)
296 				tcp_set_keepalive(sk, valbool);
297 #endif
298 			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
299 			break;
300 
301 	 	case SO_OOBINLINE:
302 			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
303 			break;
304 
305 	 	case SO_NO_CHECK:
306 			sk->sk_no_check = valbool;
307 			break;
308 
309 		case SO_PRIORITY:
310 			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
311 				sk->sk_priority = val;
312 			else
313 				ret = -EPERM;
314 			break;
315 
316 		case SO_LINGER:
317 			if(optlen<sizeof(ling)) {
318 				ret = -EINVAL;	/* 1003.1g */
319 				break;
320 			}
321 			if (copy_from_user(&ling,optval,sizeof(ling))) {
322 				ret = -EFAULT;
323 				break;
324 			}
325 			if (!ling.l_onoff)
326 				sock_reset_flag(sk, SOCK_LINGER);
327 			else {
328 #if (BITS_PER_LONG == 32)
329 				if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
330 					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
331 				else
332 #endif
333 					sk->sk_lingertime = ling.l_linger * HZ;
334 				sock_set_flag(sk, SOCK_LINGER);
335 			}
336 			break;
337 
338 		case SO_BSDCOMPAT:
339 			sock_warn_obsolete_bsdism("setsockopt");
340 			break;
341 
342 		case SO_PASSCRED:
343 			if (valbool)
344 				set_bit(SOCK_PASSCRED, &sock->flags);
345 			else
346 				clear_bit(SOCK_PASSCRED, &sock->flags);
347 			break;
348 
349 		case SO_TIMESTAMP:
350 			if (valbool)  {
351 				sock_set_flag(sk, SOCK_RCVTSTAMP);
352 				sock_enable_timestamp(sk);
353 			} else
354 				sock_reset_flag(sk, SOCK_RCVTSTAMP);
355 			break;
356 
357 		case SO_RCVLOWAT:
358 			if (val < 0)
359 				val = INT_MAX;
360 			sk->sk_rcvlowat = val ? : 1;
361 			break;
362 
363 		case SO_RCVTIMEO:
364 			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
365 			break;
366 
367 		case SO_SNDTIMEO:
368 			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
369 			break;
370 
371 #ifdef CONFIG_NETDEVICES
372 		case SO_BINDTODEVICE:
373 		{
374 			char devname[IFNAMSIZ];
375 
376 			/* Sorry... */
377 			if (!capable(CAP_NET_RAW)) {
378 				ret = -EPERM;
379 				break;
380 			}
381 
382 			/* Bind this socket to a particular device like "eth0",
383 			 * as specified in the passed interface name. If the
384 			 * name is "" or the option length is zero the socket
385 			 * is not bound.
386 			 */
387 
388 			if (!valbool) {
389 				sk->sk_bound_dev_if = 0;
390 			} else {
391 				if (optlen > IFNAMSIZ)
392 					optlen = IFNAMSIZ;
393 				if (copy_from_user(devname, optval, optlen)) {
394 					ret = -EFAULT;
395 					break;
396 				}
397 
398 				/* Remove any cached route for this socket. */
399 				sk_dst_reset(sk);
400 
401 				if (devname[0] == '\0') {
402 					sk->sk_bound_dev_if = 0;
403 				} else {
404 					struct net_device *dev = dev_get_by_name(devname);
405 					if (!dev) {
406 						ret = -ENODEV;
407 						break;
408 					}
409 					sk->sk_bound_dev_if = dev->ifindex;
410 					dev_put(dev);
411 				}
412 			}
413 			break;
414 		}
415 #endif
416 
417 
418 		case SO_ATTACH_FILTER:
419 			ret = -EINVAL;
420 			if (optlen == sizeof(struct sock_fprog)) {
421 				struct sock_fprog fprog;
422 
423 				ret = -EFAULT;
424 				if (copy_from_user(&fprog, optval, sizeof(fprog)))
425 					break;
426 
427 				ret = sk_attach_filter(&fprog, sk);
428 			}
429 			break;
430 
431 		case SO_DETACH_FILTER:
432 			spin_lock_bh(&sk->sk_lock.slock);
433 			filter = sk->sk_filter;
434                         if (filter) {
435 				sk->sk_filter = NULL;
436 				spin_unlock_bh(&sk->sk_lock.slock);
437 				sk_filter_release(sk, filter);
438 				break;
439 			}
440 			spin_unlock_bh(&sk->sk_lock.slock);
441 			ret = -ENONET;
442 			break;
443 
444 		/* We implement the SO_SNDLOWAT etc to
445 		   not be settable (1003.1g 5.3) */
446 		default:
447 		  	ret = -ENOPROTOOPT;
448 			break;
449   	}
450 	release_sock(sk);
451 	return ret;
452 }
453 
454 
455 int sock_getsockopt(struct socket *sock, int level, int optname,
456 		    char __user *optval, int __user *optlen)
457 {
458 	struct sock *sk = sock->sk;
459 
460 	union
461 	{
462   		int val;
463   		struct linger ling;
464 		struct timeval tm;
465 	} v;
466 
467 	unsigned int lv = sizeof(int);
468 	int len;
469 
470   	if(get_user(len,optlen))
471   		return -EFAULT;
472 	if(len < 0)
473 		return -EINVAL;
474 
475   	switch(optname)
476   	{
477 		case SO_DEBUG:
478 			v.val = sock_flag(sk, SOCK_DBG);
479 			break;
480 
481 		case SO_DONTROUTE:
482 			v.val = sock_flag(sk, SOCK_LOCALROUTE);
483 			break;
484 
485 		case SO_BROADCAST:
486 			v.val = !!sock_flag(sk, SOCK_BROADCAST);
487 			break;
488 
489 		case SO_SNDBUF:
490 			v.val = sk->sk_sndbuf;
491 			break;
492 
493 		case SO_RCVBUF:
494 			v.val = sk->sk_rcvbuf;
495 			break;
496 
497 		case SO_REUSEADDR:
498 			v.val = sk->sk_reuse;
499 			break;
500 
501 		case SO_KEEPALIVE:
502 			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
503 			break;
504 
505 		case SO_TYPE:
506 			v.val = sk->sk_type;
507 			break;
508 
509 		case SO_ERROR:
510 			v.val = -sock_error(sk);
511 			if(v.val==0)
512 				v.val = xchg(&sk->sk_err_soft, 0);
513 			break;
514 
515 		case SO_OOBINLINE:
516 			v.val = !!sock_flag(sk, SOCK_URGINLINE);
517 			break;
518 
519 		case SO_NO_CHECK:
520 			v.val = sk->sk_no_check;
521 			break;
522 
523 		case SO_PRIORITY:
524 			v.val = sk->sk_priority;
525 			break;
526 
527 		case SO_LINGER:
528 			lv		= sizeof(v.ling);
529 			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
530  			v.ling.l_linger	= sk->sk_lingertime / HZ;
531 			break;
532 
533 		case SO_BSDCOMPAT:
534 			sock_warn_obsolete_bsdism("getsockopt");
535 			break;
536 
537 		case SO_TIMESTAMP:
538 			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
539 			break;
540 
541 		case SO_RCVTIMEO:
542 			lv=sizeof(struct timeval);
543 			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
544 				v.tm.tv_sec = 0;
545 				v.tm.tv_usec = 0;
546 			} else {
547 				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
548 				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
549 			}
550 			break;
551 
552 		case SO_SNDTIMEO:
553 			lv=sizeof(struct timeval);
554 			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
555 				v.tm.tv_sec = 0;
556 				v.tm.tv_usec = 0;
557 			} else {
558 				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
559 				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
560 			}
561 			break;
562 
563 		case SO_RCVLOWAT:
564 			v.val = sk->sk_rcvlowat;
565 			break;
566 
567 		case SO_SNDLOWAT:
568 			v.val=1;
569 			break;
570 
571 		case SO_PASSCRED:
572 			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
573 			break;
574 
575 		case SO_PEERCRED:
576 			if (len > sizeof(sk->sk_peercred))
577 				len = sizeof(sk->sk_peercred);
578 			if (copy_to_user(optval, &sk->sk_peercred, len))
579 				return -EFAULT;
580 			goto lenout;
581 
582 		case SO_PEERNAME:
583 		{
584 			char address[128];
585 
586 			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
587 				return -ENOTCONN;
588 			if (lv < len)
589 				return -EINVAL;
590 			if (copy_to_user(optval, address, len))
591 				return -EFAULT;
592 			goto lenout;
593 		}
594 
595 		/* Dubious BSD thing... Probably nobody even uses it, but
596 		 * the UNIX standard wants it for whatever reason... -DaveM
597 		 */
598 		case SO_ACCEPTCONN:
599 			v.val = sk->sk_state == TCP_LISTEN;
600 			break;
601 
602 		case SO_PEERSEC:
603 			return security_socket_getpeersec(sock, optval, optlen, len);
604 
605 		default:
606 			return(-ENOPROTOOPT);
607 	}
608 	if (len > lv)
609 		len = lv;
610 	if (copy_to_user(optval, &v, len))
611 		return -EFAULT;
612 lenout:
613   	if (put_user(len, optlen))
614   		return -EFAULT;
615   	return 0;
616 }
617 
618 /**
619  *	sk_alloc - All socket objects are allocated here
620  *	@family - protocol family
621  *	@priority - for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
622  *	@prot - struct proto associated with this new sock instance
623  *	@zero_it - if we should zero the newly allocated sock
624  */
625 struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it)
626 {
627 	struct sock *sk = NULL;
628 	kmem_cache_t *slab = prot->slab;
629 
630 	if (slab != NULL)
631 		sk = kmem_cache_alloc(slab, priority);
632 	else
633 		sk = kmalloc(prot->obj_size, priority);
634 
635 	if (sk) {
636 		if (zero_it) {
637 			memset(sk, 0, prot->obj_size);
638 			sk->sk_family = family;
639 			sk->sk_prot = prot;
640 			sock_lock_init(sk);
641 		}
642 
643 		if (security_sk_alloc(sk, family, priority)) {
644 			kmem_cache_free(slab, sk);
645 			sk = NULL;
646 		} else
647 			__module_get(prot->owner);
648 	}
649 	return sk;
650 }
651 
652 void sk_free(struct sock *sk)
653 {
654 	struct sk_filter *filter;
655 	struct module *owner = sk->sk_prot->owner;
656 
657 	if (sk->sk_destruct)
658 		sk->sk_destruct(sk);
659 
660 	filter = sk->sk_filter;
661 	if (filter) {
662 		sk_filter_release(sk, filter);
663 		sk->sk_filter = NULL;
664 	}
665 
666 	sock_disable_timestamp(sk);
667 
668 	if (atomic_read(&sk->sk_omem_alloc))
669 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
670 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
671 
672 	security_sk_free(sk);
673 	if (sk->sk_prot->slab != NULL)
674 		kmem_cache_free(sk->sk_prot->slab, sk);
675 	else
676 		kfree(sk);
677 	module_put(owner);
678 }
679 
680 void __init sk_init(void)
681 {
682 	if (num_physpages <= 4096) {
683 		sysctl_wmem_max = 32767;
684 		sysctl_rmem_max = 32767;
685 		sysctl_wmem_default = 32767;
686 		sysctl_rmem_default = 32767;
687 	} else if (num_physpages >= 131072) {
688 		sysctl_wmem_max = 131071;
689 		sysctl_rmem_max = 131071;
690 	}
691 }
692 
693 /*
694  *	Simple resource managers for sockets.
695  */
696 
697 
698 /*
699  * Write buffer destructor automatically called from kfree_skb.
700  */
701 void sock_wfree(struct sk_buff *skb)
702 {
703 	struct sock *sk = skb->sk;
704 
705 	/* In case it might be waiting for more memory. */
706 	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
707 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
708 		sk->sk_write_space(sk);
709 	sock_put(sk);
710 }
711 
712 /*
713  * Read buffer destructor automatically called from kfree_skb.
714  */
715 void sock_rfree(struct sk_buff *skb)
716 {
717 	struct sock *sk = skb->sk;
718 
719 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
720 }
721 
722 
723 int sock_i_uid(struct sock *sk)
724 {
725 	int uid;
726 
727 	read_lock(&sk->sk_callback_lock);
728 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
729 	read_unlock(&sk->sk_callback_lock);
730 	return uid;
731 }
732 
733 unsigned long sock_i_ino(struct sock *sk)
734 {
735 	unsigned long ino;
736 
737 	read_lock(&sk->sk_callback_lock);
738 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
739 	read_unlock(&sk->sk_callback_lock);
740 	return ino;
741 }
742 
743 /*
744  * Allocate a skb from the socket's send buffer.
745  */
746 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
747 {
748 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
749 		struct sk_buff * skb = alloc_skb(size, priority);
750 		if (skb) {
751 			skb_set_owner_w(skb, sk);
752 			return skb;
753 		}
754 	}
755 	return NULL;
756 }
757 
758 /*
759  * Allocate a skb from the socket's receive buffer.
760  */
761 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
762 {
763 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
764 		struct sk_buff *skb = alloc_skb(size, priority);
765 		if (skb) {
766 			skb_set_owner_r(skb, sk);
767 			return skb;
768 		}
769 	}
770 	return NULL;
771 }
772 
773 /*
774  * Allocate a memory block from the socket's option memory buffer.
775  */
776 void *sock_kmalloc(struct sock *sk, int size, int priority)
777 {
778 	if ((unsigned)size <= sysctl_optmem_max &&
779 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
780 		void *mem;
781 		/* First do the add, to avoid the race if kmalloc
782  		 * might sleep.
783 		 */
784 		atomic_add(size, &sk->sk_omem_alloc);
785 		mem = kmalloc(size, priority);
786 		if (mem)
787 			return mem;
788 		atomic_sub(size, &sk->sk_omem_alloc);
789 	}
790 	return NULL;
791 }
792 
793 /*
794  * Free an option memory block.
795  */
796 void sock_kfree_s(struct sock *sk, void *mem, int size)
797 {
798 	kfree(mem);
799 	atomic_sub(size, &sk->sk_omem_alloc);
800 }
801 
802 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
803    I think, these locks should be removed for datagram sockets.
804  */
805 static long sock_wait_for_wmem(struct sock * sk, long timeo)
806 {
807 	DEFINE_WAIT(wait);
808 
809 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
810 	for (;;) {
811 		if (!timeo)
812 			break;
813 		if (signal_pending(current))
814 			break;
815 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
816 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
817 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
818 			break;
819 		if (sk->sk_shutdown & SEND_SHUTDOWN)
820 			break;
821 		if (sk->sk_err)
822 			break;
823 		timeo = schedule_timeout(timeo);
824 	}
825 	finish_wait(sk->sk_sleep, &wait);
826 	return timeo;
827 }
828 
829 
830 /*
831  *	Generic send/receive buffer handlers
832  */
833 
834 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
835 					    unsigned long header_len,
836 					    unsigned long data_len,
837 					    int noblock, int *errcode)
838 {
839 	struct sk_buff *skb;
840 	unsigned int gfp_mask;
841 	long timeo;
842 	int err;
843 
844 	gfp_mask = sk->sk_allocation;
845 	if (gfp_mask & __GFP_WAIT)
846 		gfp_mask |= __GFP_REPEAT;
847 
848 	timeo = sock_sndtimeo(sk, noblock);
849 	while (1) {
850 		err = sock_error(sk);
851 		if (err != 0)
852 			goto failure;
853 
854 		err = -EPIPE;
855 		if (sk->sk_shutdown & SEND_SHUTDOWN)
856 			goto failure;
857 
858 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
859 			skb = alloc_skb(header_len, sk->sk_allocation);
860 			if (skb) {
861 				int npages;
862 				int i;
863 
864 				/* No pages, we're done... */
865 				if (!data_len)
866 					break;
867 
868 				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
869 				skb->truesize += data_len;
870 				skb_shinfo(skb)->nr_frags = npages;
871 				for (i = 0; i < npages; i++) {
872 					struct page *page;
873 					skb_frag_t *frag;
874 
875 					page = alloc_pages(sk->sk_allocation, 0);
876 					if (!page) {
877 						err = -ENOBUFS;
878 						skb_shinfo(skb)->nr_frags = i;
879 						kfree_skb(skb);
880 						goto failure;
881 					}
882 
883 					frag = &skb_shinfo(skb)->frags[i];
884 					frag->page = page;
885 					frag->page_offset = 0;
886 					frag->size = (data_len >= PAGE_SIZE ?
887 						      PAGE_SIZE :
888 						      data_len);
889 					data_len -= PAGE_SIZE;
890 				}
891 
892 				/* Full success... */
893 				break;
894 			}
895 			err = -ENOBUFS;
896 			goto failure;
897 		}
898 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
899 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
900 		err = -EAGAIN;
901 		if (!timeo)
902 			goto failure;
903 		if (signal_pending(current))
904 			goto interrupted;
905 		timeo = sock_wait_for_wmem(sk, timeo);
906 	}
907 
908 	skb_set_owner_w(skb, sk);
909 	return skb;
910 
911 interrupted:
912 	err = sock_intr_errno(timeo);
913 failure:
914 	*errcode = err;
915 	return NULL;
916 }
917 
918 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
919 				    int noblock, int *errcode)
920 {
921 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
922 }
923 
924 static void __lock_sock(struct sock *sk)
925 {
926 	DEFINE_WAIT(wait);
927 
928 	for(;;) {
929 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
930 					TASK_UNINTERRUPTIBLE);
931 		spin_unlock_bh(&sk->sk_lock.slock);
932 		schedule();
933 		spin_lock_bh(&sk->sk_lock.slock);
934 		if(!sock_owned_by_user(sk))
935 			break;
936 	}
937 	finish_wait(&sk->sk_lock.wq, &wait);
938 }
939 
940 static void __release_sock(struct sock *sk)
941 {
942 	struct sk_buff *skb = sk->sk_backlog.head;
943 
944 	do {
945 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
946 		bh_unlock_sock(sk);
947 
948 		do {
949 			struct sk_buff *next = skb->next;
950 
951 			skb->next = NULL;
952 			sk->sk_backlog_rcv(sk, skb);
953 
954 			/*
955 			 * We are in process context here with softirqs
956 			 * disabled, use cond_resched_softirq() to preempt.
957 			 * This is safe to do because we've taken the backlog
958 			 * queue private:
959 			 */
960 			cond_resched_softirq();
961 
962 			skb = next;
963 		} while (skb != NULL);
964 
965 		bh_lock_sock(sk);
966 	} while((skb = sk->sk_backlog.head) != NULL);
967 }
968 
969 /**
970  * sk_wait_data - wait for data to arrive at sk_receive_queue
971  * sk - sock to wait on
972  * timeo - for how long
973  *
974  * Now socket state including sk->sk_err is changed only under lock,
975  * hence we may omit checks after joining wait queue.
976  * We check receive queue before schedule() only as optimization;
977  * it is very likely that release_sock() added new data.
978  */
979 int sk_wait_data(struct sock *sk, long *timeo)
980 {
981 	int rc;
982 	DEFINE_WAIT(wait);
983 
984 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
985 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
986 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
987 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
988 	finish_wait(sk->sk_sleep, &wait);
989 	return rc;
990 }
991 
992 EXPORT_SYMBOL(sk_wait_data);
993 
994 /*
995  * Set of default routines for initialising struct proto_ops when
996  * the protocol does not support a particular function. In certain
997  * cases where it makes no sense for a protocol to have a "do nothing"
998  * function, some default processing is provided.
999  */
1000 
1001 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1002 {
1003 	return -EOPNOTSUPP;
1004 }
1005 
1006 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1007 		    int len, int flags)
1008 {
1009 	return -EOPNOTSUPP;
1010 }
1011 
1012 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1013 {
1014 	return -EOPNOTSUPP;
1015 }
1016 
1017 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1018 {
1019 	return -EOPNOTSUPP;
1020 }
1021 
1022 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1023 		    int *len, int peer)
1024 {
1025 	return -EOPNOTSUPP;
1026 }
1027 
1028 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1029 {
1030 	return 0;
1031 }
1032 
1033 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1034 {
1035 	return -EOPNOTSUPP;
1036 }
1037 
1038 int sock_no_listen(struct socket *sock, int backlog)
1039 {
1040 	return -EOPNOTSUPP;
1041 }
1042 
1043 int sock_no_shutdown(struct socket *sock, int how)
1044 {
1045 	return -EOPNOTSUPP;
1046 }
1047 
1048 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1049 		    char __user *optval, int optlen)
1050 {
1051 	return -EOPNOTSUPP;
1052 }
1053 
1054 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1055 		    char __user *optval, int __user *optlen)
1056 {
1057 	return -EOPNOTSUPP;
1058 }
1059 
1060 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1061 		    size_t len)
1062 {
1063 	return -EOPNOTSUPP;
1064 }
1065 
1066 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1067 		    size_t len, int flags)
1068 {
1069 	return -EOPNOTSUPP;
1070 }
1071 
1072 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1073 {
1074 	/* Mirror missing mmap method error code */
1075 	return -ENODEV;
1076 }
1077 
1078 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1079 {
1080 	ssize_t res;
1081 	struct msghdr msg = {.msg_flags = flags};
1082 	struct kvec iov;
1083 	char *kaddr = kmap(page);
1084 	iov.iov_base = kaddr + offset;
1085 	iov.iov_len = size;
1086 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1087 	kunmap(page);
1088 	return res;
1089 }
1090 
1091 /*
1092  *	Default Socket Callbacks
1093  */
1094 
1095 static void sock_def_wakeup(struct sock *sk)
1096 {
1097 	read_lock(&sk->sk_callback_lock);
1098 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1099 		wake_up_interruptible_all(sk->sk_sleep);
1100 	read_unlock(&sk->sk_callback_lock);
1101 }
1102 
1103 static void sock_def_error_report(struct sock *sk)
1104 {
1105 	read_lock(&sk->sk_callback_lock);
1106 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1107 		wake_up_interruptible(sk->sk_sleep);
1108 	sk_wake_async(sk,0,POLL_ERR);
1109 	read_unlock(&sk->sk_callback_lock);
1110 }
1111 
1112 static void sock_def_readable(struct sock *sk, int len)
1113 {
1114 	read_lock(&sk->sk_callback_lock);
1115 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1116 		wake_up_interruptible(sk->sk_sleep);
1117 	sk_wake_async(sk,1,POLL_IN);
1118 	read_unlock(&sk->sk_callback_lock);
1119 }
1120 
1121 static void sock_def_write_space(struct sock *sk)
1122 {
1123 	read_lock(&sk->sk_callback_lock);
1124 
1125 	/* Do not wake up a writer until he can make "significant"
1126 	 * progress.  --DaveM
1127 	 */
1128 	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1129 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1130 			wake_up_interruptible(sk->sk_sleep);
1131 
1132 		/* Should agree with poll, otherwise some programs break */
1133 		if (sock_writeable(sk))
1134 			sk_wake_async(sk, 2, POLL_OUT);
1135 	}
1136 
1137 	read_unlock(&sk->sk_callback_lock);
1138 }
1139 
1140 static void sock_def_destruct(struct sock *sk)
1141 {
1142 	if (sk->sk_protinfo)
1143 		kfree(sk->sk_protinfo);
1144 }
1145 
1146 void sk_send_sigurg(struct sock *sk)
1147 {
1148 	if (sk->sk_socket && sk->sk_socket->file)
1149 		if (send_sigurg(&sk->sk_socket->file->f_owner))
1150 			sk_wake_async(sk, 3, POLL_PRI);
1151 }
1152 
1153 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1154 		    unsigned long expires)
1155 {
1156 	if (!mod_timer(timer, expires))
1157 		sock_hold(sk);
1158 }
1159 
1160 EXPORT_SYMBOL(sk_reset_timer);
1161 
1162 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1163 {
1164 	if (timer_pending(timer) && del_timer(timer))
1165 		__sock_put(sk);
1166 }
1167 
1168 EXPORT_SYMBOL(sk_stop_timer);
1169 
1170 void sock_init_data(struct socket *sock, struct sock *sk)
1171 {
1172 	skb_queue_head_init(&sk->sk_receive_queue);
1173 	skb_queue_head_init(&sk->sk_write_queue);
1174 	skb_queue_head_init(&sk->sk_error_queue);
1175 
1176 	sk->sk_send_head	=	NULL;
1177 
1178 	init_timer(&sk->sk_timer);
1179 
1180 	sk->sk_allocation	=	GFP_KERNEL;
1181 	sk->sk_rcvbuf		=	sysctl_rmem_default;
1182 	sk->sk_sndbuf		=	sysctl_wmem_default;
1183 	sk->sk_state		=	TCP_CLOSE;
1184 	sk->sk_socket		=	sock;
1185 
1186 	sock_set_flag(sk, SOCK_ZAPPED);
1187 
1188 	if(sock)
1189 	{
1190 		sk->sk_type	=	sock->type;
1191 		sk->sk_sleep	=	&sock->wait;
1192 		sock->sk	=	sk;
1193 	} else
1194 		sk->sk_sleep	=	NULL;
1195 
1196 	rwlock_init(&sk->sk_dst_lock);
1197 	rwlock_init(&sk->sk_callback_lock);
1198 
1199 	sk->sk_state_change	=	sock_def_wakeup;
1200 	sk->sk_data_ready	=	sock_def_readable;
1201 	sk->sk_write_space	=	sock_def_write_space;
1202 	sk->sk_error_report	=	sock_def_error_report;
1203 	sk->sk_destruct		=	sock_def_destruct;
1204 
1205 	sk->sk_sndmsg_page	=	NULL;
1206 	sk->sk_sndmsg_off	=	0;
1207 
1208 	sk->sk_peercred.pid 	=	0;
1209 	sk->sk_peercred.uid	=	-1;
1210 	sk->sk_peercred.gid	=	-1;
1211 	sk->sk_write_pending	=	0;
1212 	sk->sk_rcvlowat		=	1;
1213 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1214 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1215 
1216 	sk->sk_stamp.tv_sec     = -1L;
1217 	sk->sk_stamp.tv_usec    = -1L;
1218 
1219 	atomic_set(&sk->sk_refcnt, 1);
1220 }
1221 
1222 void fastcall lock_sock(struct sock *sk)
1223 {
1224 	might_sleep();
1225 	spin_lock_bh(&(sk->sk_lock.slock));
1226 	if (sk->sk_lock.owner)
1227 		__lock_sock(sk);
1228 	sk->sk_lock.owner = (void *)1;
1229 	spin_unlock_bh(&(sk->sk_lock.slock));
1230 }
1231 
1232 EXPORT_SYMBOL(lock_sock);
1233 
1234 void fastcall release_sock(struct sock *sk)
1235 {
1236 	spin_lock_bh(&(sk->sk_lock.slock));
1237 	if (sk->sk_backlog.tail)
1238 		__release_sock(sk);
1239 	sk->sk_lock.owner = NULL;
1240         if (waitqueue_active(&(sk->sk_lock.wq)))
1241 		wake_up(&(sk->sk_lock.wq));
1242 	spin_unlock_bh(&(sk->sk_lock.slock));
1243 }
1244 EXPORT_SYMBOL(release_sock);
1245 
1246 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1247 {
1248 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1249 		sock_enable_timestamp(sk);
1250 	if (sk->sk_stamp.tv_sec == -1)
1251 		return -ENOENT;
1252 	if (sk->sk_stamp.tv_sec == 0)
1253 		do_gettimeofday(&sk->sk_stamp);
1254 	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1255 		-EFAULT : 0;
1256 }
1257 EXPORT_SYMBOL(sock_get_timestamp);
1258 
1259 void sock_enable_timestamp(struct sock *sk)
1260 {
1261 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1262 		sock_set_flag(sk, SOCK_TIMESTAMP);
1263 		net_enable_timestamp();
1264 	}
1265 }
1266 EXPORT_SYMBOL(sock_enable_timestamp);
1267 
1268 /*
1269  *	Get a socket option on an socket.
1270  *
1271  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1272  *	asynchronous errors should be reported by getsockopt. We assume
1273  *	this means if you specify SO_ERROR (otherwise whats the point of it).
1274  */
1275 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1276 			   char __user *optval, int __user *optlen)
1277 {
1278 	struct sock *sk = sock->sk;
1279 
1280 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1281 }
1282 
1283 EXPORT_SYMBOL(sock_common_getsockopt);
1284 
1285 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1286 			struct msghdr *msg, size_t size, int flags)
1287 {
1288 	struct sock *sk = sock->sk;
1289 	int addr_len = 0;
1290 	int err;
1291 
1292 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1293 				   flags & ~MSG_DONTWAIT, &addr_len);
1294 	if (err >= 0)
1295 		msg->msg_namelen = addr_len;
1296 	return err;
1297 }
1298 
1299 EXPORT_SYMBOL(sock_common_recvmsg);
1300 
1301 /*
1302  *	Set socket options on an inet socket.
1303  */
1304 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1305 			   char __user *optval, int optlen)
1306 {
1307 	struct sock *sk = sock->sk;
1308 
1309 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1310 }
1311 
1312 EXPORT_SYMBOL(sock_common_setsockopt);
1313 
1314 void sk_common_release(struct sock *sk)
1315 {
1316 	if (sk->sk_prot->destroy)
1317 		sk->sk_prot->destroy(sk);
1318 
1319 	/*
1320 	 * Observation: when sock_common_release is called, processes have
1321 	 * no access to socket. But net still has.
1322 	 * Step one, detach it from networking:
1323 	 *
1324 	 * A. Remove from hash tables.
1325 	 */
1326 
1327 	sk->sk_prot->unhash(sk);
1328 
1329 	/*
1330 	 * In this point socket cannot receive new packets, but it is possible
1331 	 * that some packets are in flight because some CPU runs receiver and
1332 	 * did hash table lookup before we unhashed socket. They will achieve
1333 	 * receive queue and will be purged by socket destructor.
1334 	 *
1335 	 * Also we still have packets pending on receive queue and probably,
1336 	 * our own packets waiting in device queues. sock_destroy will drain
1337 	 * receive queue, but transmitted packets will delay socket destruction
1338 	 * until the last reference will be released.
1339 	 */
1340 
1341 	sock_orphan(sk);
1342 
1343 	xfrm_sk_free_policy(sk);
1344 
1345 #ifdef INET_REFCNT_DEBUG
1346 	if (atomic_read(&sk->sk_refcnt) != 1)
1347 		printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
1348 		       sk, atomic_read(&sk->sk_refcnt));
1349 #endif
1350 	sock_put(sk);
1351 }
1352 
1353 EXPORT_SYMBOL(sk_common_release);
1354 
1355 static DEFINE_RWLOCK(proto_list_lock);
1356 static LIST_HEAD(proto_list);
1357 
1358 int proto_register(struct proto *prot, int alloc_slab)
1359 {
1360 	int rc = -ENOBUFS;
1361 
1362 	write_lock(&proto_list_lock);
1363 
1364 	if (alloc_slab) {
1365 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1366 					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1367 
1368 		if (prot->slab == NULL) {
1369 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1370 			       prot->name);
1371 			goto out_unlock;
1372 		}
1373 	}
1374 
1375 	list_add(&prot->node, &proto_list);
1376 	rc = 0;
1377 out_unlock:
1378 	write_unlock(&proto_list_lock);
1379 	return rc;
1380 }
1381 
1382 EXPORT_SYMBOL(proto_register);
1383 
1384 void proto_unregister(struct proto *prot)
1385 {
1386 	write_lock(&proto_list_lock);
1387 
1388 	if (prot->slab != NULL) {
1389 		kmem_cache_destroy(prot->slab);
1390 		prot->slab = NULL;
1391 	}
1392 
1393 	list_del(&prot->node);
1394 	write_unlock(&proto_list_lock);
1395 }
1396 
1397 EXPORT_SYMBOL(proto_unregister);
1398 
1399 #ifdef CONFIG_PROC_FS
1400 static inline struct proto *__proto_head(void)
1401 {
1402 	return list_entry(proto_list.next, struct proto, node);
1403 }
1404 
1405 static inline struct proto *proto_head(void)
1406 {
1407 	return list_empty(&proto_list) ? NULL : __proto_head();
1408 }
1409 
1410 static inline struct proto *proto_next(struct proto *proto)
1411 {
1412 	return proto->node.next == &proto_list ? NULL :
1413 		list_entry(proto->node.next, struct proto, node);
1414 }
1415 
1416 static inline struct proto *proto_get_idx(loff_t pos)
1417 {
1418 	struct proto *proto;
1419 	loff_t i = 0;
1420 
1421 	list_for_each_entry(proto, &proto_list, node)
1422 		if (i++ == pos)
1423 			goto out;
1424 
1425 	proto = NULL;
1426 out:
1427 	return proto;
1428 }
1429 
1430 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1431 {
1432 	read_lock(&proto_list_lock);
1433 	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1434 }
1435 
1436 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1437 {
1438 	++*pos;
1439 	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1440 }
1441 
1442 static void proto_seq_stop(struct seq_file *seq, void *v)
1443 {
1444 	read_unlock(&proto_list_lock);
1445 }
1446 
1447 static char proto_method_implemented(const void *method)
1448 {
1449 	return method == NULL ? 'n' : 'y';
1450 }
1451 
1452 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1453 {
1454 	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1455 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1456 		   proto->name,
1457 		   proto->obj_size,
1458 		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1459 		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1460 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1461 		   proto->max_header,
1462 		   proto->slab == NULL ? "no" : "yes",
1463 		   module_name(proto->owner),
1464 		   proto_method_implemented(proto->close),
1465 		   proto_method_implemented(proto->connect),
1466 		   proto_method_implemented(proto->disconnect),
1467 		   proto_method_implemented(proto->accept),
1468 		   proto_method_implemented(proto->ioctl),
1469 		   proto_method_implemented(proto->init),
1470 		   proto_method_implemented(proto->destroy),
1471 		   proto_method_implemented(proto->shutdown),
1472 		   proto_method_implemented(proto->setsockopt),
1473 		   proto_method_implemented(proto->getsockopt),
1474 		   proto_method_implemented(proto->sendmsg),
1475 		   proto_method_implemented(proto->recvmsg),
1476 		   proto_method_implemented(proto->sendpage),
1477 		   proto_method_implemented(proto->bind),
1478 		   proto_method_implemented(proto->backlog_rcv),
1479 		   proto_method_implemented(proto->hash),
1480 		   proto_method_implemented(proto->unhash),
1481 		   proto_method_implemented(proto->get_port),
1482 		   proto_method_implemented(proto->enter_memory_pressure));
1483 }
1484 
1485 static int proto_seq_show(struct seq_file *seq, void *v)
1486 {
1487 	if (v == SEQ_START_TOKEN)
1488 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1489 			   "protocol",
1490 			   "size",
1491 			   "sockets",
1492 			   "memory",
1493 			   "press",
1494 			   "maxhdr",
1495 			   "slab",
1496 			   "module",
1497 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1498 	else
1499 		proto_seq_printf(seq, v);
1500 	return 0;
1501 }
1502 
1503 static struct seq_operations proto_seq_ops = {
1504 	.start  = proto_seq_start,
1505 	.next   = proto_seq_next,
1506 	.stop   = proto_seq_stop,
1507 	.show   = proto_seq_show,
1508 };
1509 
1510 static int proto_seq_open(struct inode *inode, struct file *file)
1511 {
1512 	return seq_open(file, &proto_seq_ops);
1513 }
1514 
1515 static struct file_operations proto_seq_fops = {
1516 	.owner		= THIS_MODULE,
1517 	.open		= proto_seq_open,
1518 	.read		= seq_read,
1519 	.llseek		= seq_lseek,
1520 	.release	= seq_release,
1521 };
1522 
1523 static int __init proto_init(void)
1524 {
1525 	/* register /proc/net/protocols */
1526 	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1527 }
1528 
1529 subsys_initcall(proto_init);
1530 
1531 #endif /* PROC_FS */
1532 
1533 EXPORT_SYMBOL(sk_alloc);
1534 EXPORT_SYMBOL(sk_free);
1535 EXPORT_SYMBOL(sk_send_sigurg);
1536 EXPORT_SYMBOL(sock_alloc_send_skb);
1537 EXPORT_SYMBOL(sock_init_data);
1538 EXPORT_SYMBOL(sock_kfree_s);
1539 EXPORT_SYMBOL(sock_kmalloc);
1540 EXPORT_SYMBOL(sock_no_accept);
1541 EXPORT_SYMBOL(sock_no_bind);
1542 EXPORT_SYMBOL(sock_no_connect);
1543 EXPORT_SYMBOL(sock_no_getname);
1544 EXPORT_SYMBOL(sock_no_getsockopt);
1545 EXPORT_SYMBOL(sock_no_ioctl);
1546 EXPORT_SYMBOL(sock_no_listen);
1547 EXPORT_SYMBOL(sock_no_mmap);
1548 EXPORT_SYMBOL(sock_no_poll);
1549 EXPORT_SYMBOL(sock_no_recvmsg);
1550 EXPORT_SYMBOL(sock_no_sendmsg);
1551 EXPORT_SYMBOL(sock_no_sendpage);
1552 EXPORT_SYMBOL(sock_no_setsockopt);
1553 EXPORT_SYMBOL(sock_no_shutdown);
1554 EXPORT_SYMBOL(sock_no_socketpair);
1555 EXPORT_SYMBOL(sock_rfree);
1556 EXPORT_SYMBOL(sock_setsockopt);
1557 EXPORT_SYMBOL(sock_wfree);
1558 EXPORT_SYMBOL(sock_wmalloc);
1559 EXPORT_SYMBOL(sock_i_uid);
1560 EXPORT_SYMBOL(sock_i_ino);
1561 #ifdef CONFIG_SYSCTL
1562 EXPORT_SYMBOL(sysctl_optmem_max);
1563 EXPORT_SYMBOL(sysctl_rmem_max);
1564 EXPORT_SYMBOL(sysctl_wmem_max);
1565 #endif
1566