xref: /openbmc/linux/net/dccp/proto.c (revision d5cb9783536a41df9f9cba5b0a1d78047ed787f7)
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or modify it
8  *	under the terms of the GNU General Public License version 2 as
9  *	published by the Free Software Foundation.
10  */
11 
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25 
26 #include <net/inet_common.h>
27 #include <net/ip.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31 
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
38 
39 #include "ccid.h"
40 #include "dccp.h"
41 
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43 
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45 
46 static struct net_protocol dccp_protocol = {
47 	.handler	= dccp_v4_rcv,
48 	.err_handler	= dccp_v4_err,
49 };
50 
51 const char *dccp_packet_name(const int type)
52 {
53 	static const char *dccp_packet_names[] = {
54 		[DCCP_PKT_REQUEST]  = "REQUEST",
55 		[DCCP_PKT_RESPONSE] = "RESPONSE",
56 		[DCCP_PKT_DATA]	    = "DATA",
57 		[DCCP_PKT_ACK]	    = "ACK",
58 		[DCCP_PKT_DATAACK]  = "DATAACK",
59 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60 		[DCCP_PKT_CLOSE]    = "CLOSE",
61 		[DCCP_PKT_RESET]    = "RESET",
62 		[DCCP_PKT_SYNC]	    = "SYNC",
63 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
64 	};
65 
66 	if (type >= DCCP_NR_PKT_TYPES)
67 		return "INVALID";
68 	else
69 		return dccp_packet_names[type];
70 }
71 
72 EXPORT_SYMBOL_GPL(dccp_packet_name);
73 
74 const char *dccp_state_name(const int state)
75 {
76 	static char *dccp_state_names[] = {
77 	[DCCP_OPEN]	  = "OPEN",
78 	[DCCP_REQUESTING] = "REQUESTING",
79 	[DCCP_PARTOPEN]	  = "PARTOPEN",
80 	[DCCP_LISTEN]	  = "LISTEN",
81 	[DCCP_RESPOND]	  = "RESPOND",
82 	[DCCP_CLOSING]	  = "CLOSING",
83 	[DCCP_TIME_WAIT]  = "TIME_WAIT",
84 	[DCCP_CLOSED]	  = "CLOSED",
85 	};
86 
87 	if (state >= DCCP_MAX_STATES)
88 		return "INVALID STATE!";
89 	else
90 		return dccp_state_names[state];
91 }
92 
93 EXPORT_SYMBOL_GPL(dccp_state_name);
94 
95 static inline int dccp_listen_start(struct sock *sk)
96 {
97 	struct dccp_sock *dp = dccp_sk(sk);
98 
99 	dp->dccps_role = DCCP_ROLE_LISTEN;
100 	/*
101 	 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
102 	 * before calling listen()
103 	 */
104 	if (dccp_service_not_initialized(sk))
105 		return -EPROTO;
106 	return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
107 }
108 
109 int dccp_disconnect(struct sock *sk, int flags)
110 {
111 	struct inet_connection_sock *icsk = inet_csk(sk);
112 	struct inet_sock *inet = inet_sk(sk);
113 	int err = 0;
114 	const int old_state = sk->sk_state;
115 
116 	if (old_state != DCCP_CLOSED)
117 		dccp_set_state(sk, DCCP_CLOSED);
118 
119 	/* ABORT function of RFC793 */
120 	if (old_state == DCCP_LISTEN) {
121 		inet_csk_listen_stop(sk);
122 	/* FIXME: do the active reset thing */
123 	} else if (old_state == DCCP_REQUESTING)
124 		sk->sk_err = ECONNRESET;
125 
126 	dccp_clear_xmit_timers(sk);
127 	__skb_queue_purge(&sk->sk_receive_queue);
128 	if (sk->sk_send_head != NULL) {
129 		__kfree_skb(sk->sk_send_head);
130 		sk->sk_send_head = NULL;
131 	}
132 
133 	inet->dport = 0;
134 
135 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
136 		inet_reset_saddr(sk);
137 
138 	sk->sk_shutdown = 0;
139 	sock_reset_flag(sk, SOCK_DONE);
140 
141 	icsk->icsk_backoff = 0;
142 	inet_csk_delack_init(sk);
143 	__sk_dst_reset(sk);
144 
145 	BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
146 
147 	sk->sk_error_report(sk);
148 	return err;
149 }
150 
151 /*
152  *	Wait for a DCCP event.
153  *
154  *	Note that we don't need to lock the socket, as the upper poll layers
155  *	take care of normal races (between the test and the event) and we don't
156  *	go look at any of the socket buffers directly.
157  */
158 static unsigned int dccp_poll(struct file *file, struct socket *sock,
159 			      poll_table *wait)
160 {
161 	unsigned int mask;
162 	struct sock *sk = sock->sk;
163 
164 	poll_wait(file, sk->sk_sleep, wait);
165 	if (sk->sk_state == DCCP_LISTEN)
166 		return inet_csk_listen_poll(sk);
167 
168 	/* Socket is not locked. We are protected from async events
169 	   by poll logic and correct handling of state changes
170 	   made by another threads is impossible in any case.
171 	 */
172 
173 	mask = 0;
174 	if (sk->sk_err)
175 		mask = POLLERR;
176 
177 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
178 		mask |= POLLHUP;
179 	if (sk->sk_shutdown & RCV_SHUTDOWN)
180 		mask |= POLLIN | POLLRDNORM;
181 
182 	/* Connected? */
183 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
184 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
185 			mask |= POLLIN | POLLRDNORM;
186 
187 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
188 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
189 				mask |= POLLOUT | POLLWRNORM;
190 			} else {  /* send SIGIO later */
191 				set_bit(SOCK_ASYNC_NOSPACE,
192 					&sk->sk_socket->flags);
193 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
194 
195 				/* Race breaker. If space is freed after
196 				 * wspace test but before the flags are set,
197 				 * IO signal will be lost.
198 				 */
199 				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
200 					mask |= POLLOUT | POLLWRNORM;
201 			}
202 		}
203 	}
204 	return mask;
205 }
206 
207 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
208 {
209 	dccp_pr_debug("entry\n");
210 	return -ENOIOCTLCMD;
211 }
212 
213 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
214 				   char __user *optval, int optlen)
215 {
216 	struct dccp_sock *dp = dccp_sk(sk);
217 	struct dccp_service_list *sl = NULL;
218 
219 	if (service == DCCP_SERVICE_INVALID_VALUE ||
220 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
221 		return -EINVAL;
222 
223 	if (optlen > sizeof(service)) {
224 		sl = kmalloc(optlen, GFP_KERNEL);
225 		if (sl == NULL)
226 			return -ENOMEM;
227 
228 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
229 		if (copy_from_user(sl->dccpsl_list,
230 				   optval + sizeof(service),
231 				   optlen - sizeof(service)) ||
232 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
233 			kfree(sl);
234 			return -EFAULT;
235 		}
236 	}
237 
238 	lock_sock(sk);
239 	dp->dccps_service = service;
240 
241 	if (dp->dccps_service_list != NULL)
242 		kfree(dp->dccps_service_list);
243 
244 	dp->dccps_service_list = sl;
245 	release_sock(sk);
246 	return 0;
247 }
248 
249 int dccp_setsockopt(struct sock *sk, int level, int optname,
250 		    char __user *optval, int optlen)
251 {
252 	struct dccp_sock *dp;
253 	int err;
254 	int val;
255 
256 	if (level != SOL_DCCP)
257 		return ip_setsockopt(sk, level, optname, optval, optlen);
258 
259 	if (optlen < sizeof(int))
260 		return -EINVAL;
261 
262 	if (get_user(val, (int __user *)optval))
263 		return -EFAULT;
264 
265 	if (optname == DCCP_SOCKOPT_SERVICE)
266 		return dccp_setsockopt_service(sk, val, optval, optlen);
267 
268 	lock_sock(sk);
269 	dp = dccp_sk(sk);
270 	err = 0;
271 
272 	switch (optname) {
273 	case DCCP_SOCKOPT_PACKET_SIZE:
274 		dp->dccps_packet_size = val;
275 		break;
276 	default:
277 		err = -ENOPROTOOPT;
278 		break;
279 	}
280 
281 	release_sock(sk);
282 	return err;
283 }
284 
285 static int dccp_getsockopt_service(struct sock *sk, int len,
286 				   u32 __user *optval,
287 				   int __user *optlen)
288 {
289 	const struct dccp_sock *dp = dccp_sk(sk);
290 	const struct dccp_service_list *sl;
291 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
292 
293 	lock_sock(sk);
294 	if (dccp_service_not_initialized(sk))
295 		goto out;
296 
297 	if ((sl = dp->dccps_service_list) != NULL) {
298 		slen = sl->dccpsl_nr * sizeof(u32);
299 		total_len += slen;
300 	}
301 
302 	err = -EINVAL;
303 	if (total_len > len)
304 		goto out;
305 
306 	err = 0;
307 	if (put_user(total_len, optlen) ||
308 	    put_user(dp->dccps_service, optval) ||
309 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
310 		err = -EFAULT;
311 out:
312 	release_sock(sk);
313 	return err;
314 }
315 
316 int dccp_getsockopt(struct sock *sk, int level, int optname,
317 		    char __user *optval, int __user *optlen)
318 {
319 	struct dccp_sock *dp;
320 	int val, len;
321 
322 	if (level != SOL_DCCP)
323 		return ip_getsockopt(sk, level, optname, optval, optlen);
324 
325 	if (get_user(len, optlen))
326 		return -EFAULT;
327 
328 	if (len < sizeof(int))
329 		return -EINVAL;
330 
331 	dp = dccp_sk(sk);
332 
333 	switch (optname) {
334 	case DCCP_SOCKOPT_PACKET_SIZE:
335 		val = dp->dccps_packet_size;
336 		len = sizeof(dp->dccps_packet_size);
337 		break;
338 	case DCCP_SOCKOPT_SERVICE:
339 		return dccp_getsockopt_service(sk, len,
340 					       (u32 __user *)optval, optlen);
341 	case 128 ... 191:
342 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
343 					     len, (u32 __user *)optval, optlen);
344 	case 192 ... 255:
345 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
346 					     len, (u32 __user *)optval, optlen);
347 	default:
348 		return -ENOPROTOOPT;
349 	}
350 
351 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
352 		return -EFAULT;
353 
354 	return 0;
355 }
356 
357 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
358 		 size_t len)
359 {
360 	const struct dccp_sock *dp = dccp_sk(sk);
361 	const int flags = msg->msg_flags;
362 	const int noblock = flags & MSG_DONTWAIT;
363 	struct sk_buff *skb;
364 	int rc, size;
365 	long timeo;
366 
367 	if (len > dp->dccps_mss_cache)
368 		return -EMSGSIZE;
369 
370 	lock_sock(sk);
371 	timeo = sock_sndtimeo(sk, noblock);
372 
373 	/*
374 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
375 	 * so that the trick in dccp_rcv_request_sent_state_process.
376 	 */
377 	/* Wait for a connection to finish. */
378 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
379 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
380 			goto out_release;
381 
382 	size = sk->sk_prot->max_header + len;
383 	release_sock(sk);
384 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
385 	lock_sock(sk);
386 	if (skb == NULL)
387 		goto out_release;
388 
389 	skb_reserve(skb, sk->sk_prot->max_header);
390 	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
391 	if (rc != 0)
392 		goto out_discard;
393 
394 	rc = dccp_write_xmit(sk, skb, &timeo);
395 	/*
396 	 * XXX we don't use sk_write_queue, so just discard the packet.
397 	 *     Current plan however is to _use_ sk_write_queue with
398 	 *     an algorith similar to tcp_sendmsg, where the main difference
399 	 *     is that in DCCP we have to respect packet boundaries, so
400 	 *     no coalescing of skbs.
401 	 *
402 	 *     This bug was _quickly_ found & fixed by just looking at an OSTRA
403 	 *     generated callgraph 8) -acme
404 	 */
405 out_release:
406 	release_sock(sk);
407 	return rc ? : len;
408 out_discard:
409 	kfree_skb(skb);
410 	goto out_release;
411 }
412 
413 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
414 		 size_t len, int nonblock, int flags, int *addr_len)
415 {
416 	const struct dccp_hdr *dh;
417 	long timeo;
418 
419 	lock_sock(sk);
420 
421 	if (sk->sk_state == DCCP_LISTEN) {
422 		len = -ENOTCONN;
423 		goto out;
424 	}
425 
426 	timeo = sock_rcvtimeo(sk, nonblock);
427 
428 	do {
429 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
430 
431 		if (skb == NULL)
432 			goto verify_sock_status;
433 
434 		dh = dccp_hdr(skb);
435 
436 		if (dh->dccph_type == DCCP_PKT_DATA ||
437 		    dh->dccph_type == DCCP_PKT_DATAACK)
438 			goto found_ok_skb;
439 
440 		if (dh->dccph_type == DCCP_PKT_RESET ||
441 		    dh->dccph_type == DCCP_PKT_CLOSE) {
442 			dccp_pr_debug("found fin ok!\n");
443 			len = 0;
444 			goto found_fin_ok;
445 		}
446 		dccp_pr_debug("packet_type=%s\n",
447 			      dccp_packet_name(dh->dccph_type));
448 		sk_eat_skb(sk, skb);
449 verify_sock_status:
450 		if (sock_flag(sk, SOCK_DONE)) {
451 			len = 0;
452 			break;
453 		}
454 
455 		if (sk->sk_err) {
456 			len = sock_error(sk);
457 			break;
458 		}
459 
460 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
461 			len = 0;
462 			break;
463 		}
464 
465 		if (sk->sk_state == DCCP_CLOSED) {
466 			if (!sock_flag(sk, SOCK_DONE)) {
467 				/* This occurs when user tries to read
468 				 * from never connected socket.
469 				 */
470 				len = -ENOTCONN;
471 				break;
472 			}
473 			len = 0;
474 			break;
475 		}
476 
477 		if (!timeo) {
478 			len = -EAGAIN;
479 			break;
480 		}
481 
482 		if (signal_pending(current)) {
483 			len = sock_intr_errno(timeo);
484 			break;
485 		}
486 
487 		sk_wait_data(sk, &timeo);
488 		continue;
489 	found_ok_skb:
490 		if (len > skb->len)
491 			len = skb->len;
492 		else if (len < skb->len)
493 			msg->msg_flags |= MSG_TRUNC;
494 
495 		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
496 			/* Exception. Bailout! */
497 			len = -EFAULT;
498 			break;
499 		}
500 	found_fin_ok:
501 		if (!(flags & MSG_PEEK))
502 			sk_eat_skb(sk, skb);
503 		break;
504 	} while (1);
505 out:
506 	release_sock(sk);
507 	return len;
508 }
509 
510 static int inet_dccp_listen(struct socket *sock, int backlog)
511 {
512 	struct sock *sk = sock->sk;
513 	unsigned char old_state;
514 	int err;
515 
516 	lock_sock(sk);
517 
518 	err = -EINVAL;
519 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
520 		goto out;
521 
522 	old_state = sk->sk_state;
523 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
524 		goto out;
525 
526 	/* Really, if the socket is already in listen state
527 	 * we can only allow the backlog to be adjusted.
528 	 */
529 	if (old_state != DCCP_LISTEN) {
530 		/*
531 		 * FIXME: here it probably should be sk->sk_prot->listen_start
532 		 * see tcp_listen_start
533 		 */
534 		err = dccp_listen_start(sk);
535 		if (err)
536 			goto out;
537 	}
538 	sk->sk_max_ack_backlog = backlog;
539 	err = 0;
540 
541 out:
542 	release_sock(sk);
543 	return err;
544 }
545 
546 static const unsigned char dccp_new_state[] = {
547 	/* current state:   new state:      action:	*/
548 	[0]		  = DCCP_CLOSED,
549 	[DCCP_OPEN] 	  = DCCP_CLOSING | DCCP_ACTION_FIN,
550 	[DCCP_REQUESTING] = DCCP_CLOSED,
551 	[DCCP_PARTOPEN]	  = DCCP_CLOSING | DCCP_ACTION_FIN,
552 	[DCCP_LISTEN]	  = DCCP_CLOSED,
553 	[DCCP_RESPOND]	  = DCCP_CLOSED,
554 	[DCCP_CLOSING]	  = DCCP_CLOSED,
555 	[DCCP_TIME_WAIT]  = DCCP_CLOSED,
556 	[DCCP_CLOSED]	  = DCCP_CLOSED,
557 };
558 
559 static int dccp_close_state(struct sock *sk)
560 {
561 	const int next = dccp_new_state[sk->sk_state];
562 	const int ns = next & DCCP_STATE_MASK;
563 
564 	if (ns != sk->sk_state)
565 		dccp_set_state(sk, ns);
566 
567 	return next & DCCP_ACTION_FIN;
568 }
569 
570 void dccp_close(struct sock *sk, long timeout)
571 {
572 	struct sk_buff *skb;
573 
574 	lock_sock(sk);
575 
576 	sk->sk_shutdown = SHUTDOWN_MASK;
577 
578 	if (sk->sk_state == DCCP_LISTEN) {
579 		dccp_set_state(sk, DCCP_CLOSED);
580 
581 		/* Special case. */
582 		inet_csk_listen_stop(sk);
583 
584 		goto adjudge_to_death;
585 	}
586 
587 	/*
588 	 * We need to flush the recv. buffs.  We do this only on the
589 	 * descriptor close, not protocol-sourced closes, because the
590 	  *reader process may not have drained the data yet!
591 	 */
592 	/* FIXME: check for unread data */
593 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
594 		__kfree_skb(skb);
595 	}
596 
597 	if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
598 		/* Check zero linger _after_ checking for unread data. */
599 		sk->sk_prot->disconnect(sk, 0);
600 	} else if (dccp_close_state(sk)) {
601 		dccp_send_close(sk, 1);
602 	}
603 
604 	sk_stream_wait_close(sk, timeout);
605 
606 adjudge_to_death:
607 	/*
608 	 * It is the last release_sock in its life. It will remove backlog.
609 	 */
610 	release_sock(sk);
611 	/*
612 	 * Now socket is owned by kernel and we acquire BH lock
613 	 * to finish close. No need to check for user refs.
614 	 */
615 	local_bh_disable();
616 	bh_lock_sock(sk);
617 	BUG_TRAP(!sock_owned_by_user(sk));
618 
619 	sock_hold(sk);
620 	sock_orphan(sk);
621 
622 	/*
623 	 * The last release_sock may have processed the CLOSE or RESET
624 	 * packet moving sock to CLOSED state, if not we have to fire
625 	 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
626 	 * in draft-ietf-dccp-spec-11. -acme
627 	 */
628 	if (sk->sk_state == DCCP_CLOSING) {
629 		/* FIXME: should start at 2 * RTT */
630 		/* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
631 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
632 					  inet_csk(sk)->icsk_rto,
633 					  DCCP_RTO_MAX);
634 #if 0
635 		/* Yeah, we should use sk->sk_prot->orphan_count, etc */
636 		dccp_set_state(sk, DCCP_CLOSED);
637 #endif
638 	}
639 
640 	atomic_inc(sk->sk_prot->orphan_count);
641 	if (sk->sk_state == DCCP_CLOSED)
642 		inet_csk_destroy_sock(sk);
643 
644 	/* Otherwise, socket is reprieved until protocol close. */
645 
646 	bh_unlock_sock(sk);
647 	local_bh_enable();
648 	sock_put(sk);
649 }
650 
651 void dccp_shutdown(struct sock *sk, int how)
652 {
653 	dccp_pr_debug("entry\n");
654 }
655 
656 static struct proto_ops inet_dccp_ops = {
657 	.family		= PF_INET,
658 	.owner		= THIS_MODULE,
659 	.release	= inet_release,
660 	.bind		= inet_bind,
661 	.connect	= inet_stream_connect,
662 	.socketpair	= sock_no_socketpair,
663 	.accept		= inet_accept,
664 	.getname	= inet_getname,
665 	/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
666 	.poll		= dccp_poll,
667 	.ioctl		= inet_ioctl,
668 	/* FIXME: work on inet_listen to rename it to sock_common_listen */
669 	.listen		= inet_dccp_listen,
670 	.shutdown	= inet_shutdown,
671 	.setsockopt	= sock_common_setsockopt,
672 	.getsockopt	= sock_common_getsockopt,
673 	.sendmsg	= inet_sendmsg,
674 	.recvmsg	= sock_common_recvmsg,
675 	.mmap		= sock_no_mmap,
676 	.sendpage	= sock_no_sendpage,
677 };
678 
679 extern struct net_proto_family inet_family_ops;
680 
681 static struct inet_protosw dccp_v4_protosw = {
682 	.type		= SOCK_DCCP,
683 	.protocol	= IPPROTO_DCCP,
684 	.prot		= &dccp_v4_prot,
685 	.ops		= &inet_dccp_ops,
686 	.capability	= -1,
687 	.no_check	= 0,
688 	.flags		= 0,
689 };
690 
691 /*
692  * This is the global socket data structure used for responding to
693  * the Out-of-the-blue (OOTB) packets. A control sock will be created
694  * for this socket at the initialization time.
695  */
696 struct socket *dccp_ctl_socket;
697 
698 static char dccp_ctl_socket_err_msg[] __initdata =
699 	KERN_ERR "DCCP: Failed to create the control socket.\n";
700 
701 static int __init dccp_ctl_sock_init(void)
702 {
703 	int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
704 				  &dccp_ctl_socket);
705 	if (rc < 0)
706 		printk(dccp_ctl_socket_err_msg);
707 	else {
708 		dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
709 		inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
710 
711 		/* Unhash it so that IP input processing does not even
712 		 * see it, we do not wish this socket to see incoming
713 		 * packets.
714 		 */
715 		dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
716 	}
717 
718 	return rc;
719 }
720 
721 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
722 void dccp_ctl_sock_exit(void)
723 {
724 	if (dccp_ctl_socket != NULL) {
725 		sock_release(dccp_ctl_socket);
726 		dccp_ctl_socket = NULL;
727 	}
728 }
729 
730 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
731 #endif
732 
733 static int __init init_dccp_v4_mibs(void)
734 {
735 	int rc = -ENOMEM;
736 
737 	dccp_statistics[0] = alloc_percpu(struct dccp_mib);
738 	if (dccp_statistics[0] == NULL)
739 		goto out;
740 
741 	dccp_statistics[1] = alloc_percpu(struct dccp_mib);
742 	if (dccp_statistics[1] == NULL)
743 		goto out_free_one;
744 
745 	rc = 0;
746 out:
747 	return rc;
748 out_free_one:
749 	free_percpu(dccp_statistics[0]);
750 	dccp_statistics[0] = NULL;
751 	goto out;
752 
753 }
754 
755 static int thash_entries;
756 module_param(thash_entries, int, 0444);
757 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
758 
759 #ifdef CONFIG_IP_DCCP_DEBUG
760 int dccp_debug;
761 module_param(dccp_debug, int, 0444);
762 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
763 #endif
764 
765 static int __init dccp_init(void)
766 {
767 	unsigned long goal;
768 	int ehash_order, bhash_order, i;
769 	int rc = proto_register(&dccp_v4_prot, 1);
770 
771 	if (rc)
772 		goto out;
773 
774 	dccp_hashinfo.bind_bucket_cachep =
775 		kmem_cache_create("dccp_bind_bucket",
776 				  sizeof(struct inet_bind_bucket), 0,
777 				  SLAB_HWCACHE_ALIGN, NULL, NULL);
778 	if (!dccp_hashinfo.bind_bucket_cachep)
779 		goto out_proto_unregister;
780 
781 	/*
782 	 * Size and allocate the main established and bind bucket
783 	 * hash tables.
784 	 *
785 	 * The methodology is similar to that of the buffer cache.
786 	 */
787 	if (num_physpages >= (128 * 1024))
788 		goal = num_physpages >> (21 - PAGE_SHIFT);
789 	else
790 		goal = num_physpages >> (23 - PAGE_SHIFT);
791 
792 	if (thash_entries)
793 		goal = (thash_entries *
794 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
795 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
796 		;
797 	do {
798 		dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
799 					sizeof(struct inet_ehash_bucket);
800 		dccp_hashinfo.ehash_size >>= 1;
801 		while (dccp_hashinfo.ehash_size &
802 		       (dccp_hashinfo.ehash_size - 1))
803 			dccp_hashinfo.ehash_size--;
804 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
805 			__get_free_pages(GFP_ATOMIC, ehash_order);
806 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
807 
808 	if (!dccp_hashinfo.ehash) {
809 		printk(KERN_CRIT "Failed to allocate DCCP "
810 				 "established hash table\n");
811 		goto out_free_bind_bucket_cachep;
812 	}
813 
814 	for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
815 		rwlock_init(&dccp_hashinfo.ehash[i].lock);
816 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
817 	}
818 
819 	bhash_order = ehash_order;
820 
821 	do {
822 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
823 					sizeof(struct inet_bind_hashbucket);
824 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
825 		    bhash_order > 0)
826 			continue;
827 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
828 			__get_free_pages(GFP_ATOMIC, bhash_order);
829 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
830 
831 	if (!dccp_hashinfo.bhash) {
832 		printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
833 		goto out_free_dccp_ehash;
834 	}
835 
836 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
837 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
838 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
839 	}
840 
841 	if (init_dccp_v4_mibs())
842 		goto out_free_dccp_bhash;
843 
844 	rc = -EAGAIN;
845 	if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
846 		goto out_free_dccp_v4_mibs;
847 
848 	inet_register_protosw(&dccp_v4_protosw);
849 
850 	rc = dccp_ctl_sock_init();
851 	if (rc)
852 		goto out_unregister_protosw;
853 out:
854 	return rc;
855 out_unregister_protosw:
856 	inet_unregister_protosw(&dccp_v4_protosw);
857 	inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
858 out_free_dccp_v4_mibs:
859 	free_percpu(dccp_statistics[0]);
860 	free_percpu(dccp_statistics[1]);
861 	dccp_statistics[0] = dccp_statistics[1] = NULL;
862 out_free_dccp_bhash:
863 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
864 	dccp_hashinfo.bhash = NULL;
865 out_free_dccp_ehash:
866 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
867 	dccp_hashinfo.ehash = NULL;
868 out_free_bind_bucket_cachep:
869 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
870 	dccp_hashinfo.bind_bucket_cachep = NULL;
871 out_proto_unregister:
872 	proto_unregister(&dccp_v4_prot);
873 	goto out;
874 }
875 
876 static const char dccp_del_proto_err_msg[] __exitdata =
877 	KERN_ERR "can't remove dccp net_protocol\n";
878 
879 static void __exit dccp_fini(void)
880 {
881 	inet_unregister_protosw(&dccp_v4_protosw);
882 
883 	if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
884 		printk(dccp_del_proto_err_msg);
885 
886 	free_percpu(dccp_statistics[0]);
887 	free_percpu(dccp_statistics[1]);
888 	free_pages((unsigned long)dccp_hashinfo.bhash,
889 		   get_order(dccp_hashinfo.bhash_size *
890 			     sizeof(struct inet_bind_hashbucket)));
891 	free_pages((unsigned long)dccp_hashinfo.ehash,
892 		   get_order(dccp_hashinfo.ehash_size *
893 			     sizeof(struct inet_ehash_bucket)));
894 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
895 	proto_unregister(&dccp_v4_prot);
896 }
897 
898 module_init(dccp_init);
899 module_exit(dccp_fini);
900 
901 /*
902  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
903  * values directly, Also cover the case where the protocol is not specified,
904  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
905  */
906 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
907 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
908 MODULE_LICENSE("GPL");
909 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
910 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
911