xref: /openbmc/linux/net/dccp/proto.c (revision 64c70b1c)
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or modify it
8  *	under the terms of the GNU General Public License version 2 as
9  *	published by the Free Software Foundation.
10  */
11 
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24 
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28 
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34 
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38 
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40 
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42 
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44 
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46 
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 	.lhash_lock	= RW_LOCK_UNLOCKED,
49 	.lhash_users	= ATOMIC_INIT(0),
50 	.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52 
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54 
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57 
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60 	const int oldstate = sk->sk_state;
61 
62 	dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
63 		      dccp_role(sk), sk,
64 		      dccp_state_name(oldstate), dccp_state_name(state));
65 	WARN_ON(state == oldstate);
66 
67 	switch (state) {
68 	case DCCP_OPEN:
69 		if (oldstate != DCCP_OPEN)
70 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71 		break;
72 
73 	case DCCP_CLOSED:
74 		if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
75 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76 
77 		sk->sk_prot->unhash(sk);
78 		if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80 			inet_put_port(&dccp_hashinfo, sk);
81 		/* fall through */
82 	default:
83 		if (oldstate == DCCP_OPEN)
84 			DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85 	}
86 
87 	/* Change state AFTER socket is unhashed to avoid closed
88 	 * socket sitting in hash tables.
89 	 */
90 	sk->sk_state = state;
91 }
92 
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94 
95 void dccp_done(struct sock *sk)
96 {
97 	dccp_set_state(sk, DCCP_CLOSED);
98 	dccp_clear_xmit_timers(sk);
99 
100 	sk->sk_shutdown = SHUTDOWN_MASK;
101 
102 	if (!sock_flag(sk, SOCK_DEAD))
103 		sk->sk_state_change(sk);
104 	else
105 		inet_csk_destroy_sock(sk);
106 }
107 
108 EXPORT_SYMBOL_GPL(dccp_done);
109 
110 const char *dccp_packet_name(const int type)
111 {
112 	static const char *dccp_packet_names[] = {
113 		[DCCP_PKT_REQUEST]  = "REQUEST",
114 		[DCCP_PKT_RESPONSE] = "RESPONSE",
115 		[DCCP_PKT_DATA]	    = "DATA",
116 		[DCCP_PKT_ACK]	    = "ACK",
117 		[DCCP_PKT_DATAACK]  = "DATAACK",
118 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
119 		[DCCP_PKT_CLOSE]    = "CLOSE",
120 		[DCCP_PKT_RESET]    = "RESET",
121 		[DCCP_PKT_SYNC]	    = "SYNC",
122 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
123 	};
124 
125 	if (type >= DCCP_NR_PKT_TYPES)
126 		return "INVALID";
127 	else
128 		return dccp_packet_names[type];
129 }
130 
131 EXPORT_SYMBOL_GPL(dccp_packet_name);
132 
133 const char *dccp_state_name(const int state)
134 {
135 	static char *dccp_state_names[] = {
136 	[DCCP_OPEN]	  = "OPEN",
137 	[DCCP_REQUESTING] = "REQUESTING",
138 	[DCCP_PARTOPEN]	  = "PARTOPEN",
139 	[DCCP_LISTEN]	  = "LISTEN",
140 	[DCCP_RESPOND]	  = "RESPOND",
141 	[DCCP_CLOSING]	  = "CLOSING",
142 	[DCCP_TIME_WAIT]  = "TIME_WAIT",
143 	[DCCP_CLOSED]	  = "CLOSED",
144 	};
145 
146 	if (state >= DCCP_MAX_STATES)
147 		return "INVALID STATE!";
148 	else
149 		return dccp_state_names[state];
150 }
151 
152 EXPORT_SYMBOL_GPL(dccp_state_name);
153 
154 void dccp_hash(struct sock *sk)
155 {
156 	inet_hash(&dccp_hashinfo, sk);
157 }
158 
159 EXPORT_SYMBOL_GPL(dccp_hash);
160 
161 void dccp_unhash(struct sock *sk)
162 {
163 	inet_unhash(&dccp_hashinfo, sk);
164 }
165 
166 EXPORT_SYMBOL_GPL(dccp_unhash);
167 
168 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
169 {
170 	struct dccp_sock *dp = dccp_sk(sk);
171 	struct dccp_minisock *dmsk = dccp_msk(sk);
172 	struct inet_connection_sock *icsk = inet_csk(sk);
173 
174 	dccp_minisock_init(&dp->dccps_minisock);
175 	do_gettimeofday(&dp->dccps_epoch);
176 
177 	/*
178 	 * FIXME: We're hardcoding the CCID, and doing this at this point makes
179 	 * the listening (master) sock get CCID control blocks, which is not
180 	 * necessary, but for now, to not mess with the test userspace apps,
181 	 * lets leave it here, later the real solution is to do this in a
182 	 * setsockopt(CCIDs-I-want/accept). -acme
183 	 */
184 	if (likely(ctl_sock_initialized)) {
185 		int rc = dccp_feat_init(dmsk);
186 
187 		if (rc)
188 			return rc;
189 
190 		if (dmsk->dccpms_send_ack_vector) {
191 			dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
192 			if (dp->dccps_hc_rx_ackvec == NULL)
193 				return -ENOMEM;
194 		}
195 		dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
196 						      sk, GFP_KERNEL);
197 		dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
198 						      sk, GFP_KERNEL);
199 		if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
200 			     dp->dccps_hc_tx_ccid == NULL)) {
201 			ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
202 			ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
203 			if (dmsk->dccpms_send_ack_vector) {
204 				dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
205 				dp->dccps_hc_rx_ackvec = NULL;
206 			}
207 			dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
208 			return -ENOMEM;
209 		}
210 	} else {
211 		/* control socket doesn't need feat nego */
212 		INIT_LIST_HEAD(&dmsk->dccpms_pending);
213 		INIT_LIST_HEAD(&dmsk->dccpms_conf);
214 	}
215 
216 	dccp_init_xmit_timers(sk);
217 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
218 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
219 	sk->sk_state		= DCCP_CLOSED;
220 	sk->sk_write_space	= dccp_write_space;
221 	icsk->icsk_sync_mss	= dccp_sync_mss;
222 	dp->dccps_mss_cache	= 536;
223 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
224 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
225 	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
226 
227 	return 0;
228 }
229 
230 EXPORT_SYMBOL_GPL(dccp_init_sock);
231 
232 int dccp_destroy_sock(struct sock *sk)
233 {
234 	struct dccp_sock *dp = dccp_sk(sk);
235 	struct dccp_minisock *dmsk = dccp_msk(sk);
236 
237 	/*
238 	 * DCCP doesn't use sk_write_queue, just sk_send_head
239 	 * for retransmissions
240 	 */
241 	if (sk->sk_send_head != NULL) {
242 		kfree_skb(sk->sk_send_head);
243 		sk->sk_send_head = NULL;
244 	}
245 
246 	/* Clean up a referenced DCCP bind bucket. */
247 	if (inet_csk(sk)->icsk_bind_hash != NULL)
248 		inet_put_port(&dccp_hashinfo, sk);
249 
250 	kfree(dp->dccps_service_list);
251 	dp->dccps_service_list = NULL;
252 
253 	if (dmsk->dccpms_send_ack_vector) {
254 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
255 		dp->dccps_hc_rx_ackvec = NULL;
256 	}
257 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
258 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
259 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
260 
261 	/* clean up feature negotiation state */
262 	dccp_feat_clean(dmsk);
263 
264 	return 0;
265 }
266 
267 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
268 
269 static inline int dccp_listen_start(struct sock *sk, int backlog)
270 {
271 	struct dccp_sock *dp = dccp_sk(sk);
272 
273 	dp->dccps_role = DCCP_ROLE_LISTEN;
274 	return inet_csk_listen_start(sk, backlog);
275 }
276 
277 int dccp_disconnect(struct sock *sk, int flags)
278 {
279 	struct inet_connection_sock *icsk = inet_csk(sk);
280 	struct inet_sock *inet = inet_sk(sk);
281 	int err = 0;
282 	const int old_state = sk->sk_state;
283 
284 	if (old_state != DCCP_CLOSED)
285 		dccp_set_state(sk, DCCP_CLOSED);
286 
287 	/* ABORT function of RFC793 */
288 	if (old_state == DCCP_LISTEN) {
289 		inet_csk_listen_stop(sk);
290 	/* FIXME: do the active reset thing */
291 	} else if (old_state == DCCP_REQUESTING)
292 		sk->sk_err = ECONNRESET;
293 
294 	dccp_clear_xmit_timers(sk);
295 	__skb_queue_purge(&sk->sk_receive_queue);
296 	if (sk->sk_send_head != NULL) {
297 		__kfree_skb(sk->sk_send_head);
298 		sk->sk_send_head = NULL;
299 	}
300 
301 	inet->dport = 0;
302 
303 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
304 		inet_reset_saddr(sk);
305 
306 	sk->sk_shutdown = 0;
307 	sock_reset_flag(sk, SOCK_DONE);
308 
309 	icsk->icsk_backoff = 0;
310 	inet_csk_delack_init(sk);
311 	__sk_dst_reset(sk);
312 
313 	BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
314 
315 	sk->sk_error_report(sk);
316 	return err;
317 }
318 
319 EXPORT_SYMBOL_GPL(dccp_disconnect);
320 
321 /*
322  *	Wait for a DCCP event.
323  *
324  *	Note that we don't need to lock the socket, as the upper poll layers
325  *	take care of normal races (between the test and the event) and we don't
326  *	go look at any of the socket buffers directly.
327  */
328 unsigned int dccp_poll(struct file *file, struct socket *sock,
329 		       poll_table *wait)
330 {
331 	unsigned int mask;
332 	struct sock *sk = sock->sk;
333 
334 	poll_wait(file, sk->sk_sleep, wait);
335 	if (sk->sk_state == DCCP_LISTEN)
336 		return inet_csk_listen_poll(sk);
337 
338 	/* Socket is not locked. We are protected from async events
339 	   by poll logic and correct handling of state changes
340 	   made by another threads is impossible in any case.
341 	 */
342 
343 	mask = 0;
344 	if (sk->sk_err)
345 		mask = POLLERR;
346 
347 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
348 		mask |= POLLHUP;
349 	if (sk->sk_shutdown & RCV_SHUTDOWN)
350 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
351 
352 	/* Connected? */
353 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
354 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
355 			mask |= POLLIN | POLLRDNORM;
356 
357 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
358 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
359 				mask |= POLLOUT | POLLWRNORM;
360 			} else {  /* send SIGIO later */
361 				set_bit(SOCK_ASYNC_NOSPACE,
362 					&sk->sk_socket->flags);
363 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
364 
365 				/* Race breaker. If space is freed after
366 				 * wspace test but before the flags are set,
367 				 * IO signal will be lost.
368 				 */
369 				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
370 					mask |= POLLOUT | POLLWRNORM;
371 			}
372 		}
373 	}
374 	return mask;
375 }
376 
377 EXPORT_SYMBOL_GPL(dccp_poll);
378 
379 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
380 {
381 	dccp_pr_debug("entry\n");
382 	return -ENOIOCTLCMD;
383 }
384 
385 EXPORT_SYMBOL_GPL(dccp_ioctl);
386 
387 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
388 				   char __user *optval, int optlen)
389 {
390 	struct dccp_sock *dp = dccp_sk(sk);
391 	struct dccp_service_list *sl = NULL;
392 
393 	if (service == DCCP_SERVICE_INVALID_VALUE ||
394 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
395 		return -EINVAL;
396 
397 	if (optlen > sizeof(service)) {
398 		sl = kmalloc(optlen, GFP_KERNEL);
399 		if (sl == NULL)
400 			return -ENOMEM;
401 
402 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
403 		if (copy_from_user(sl->dccpsl_list,
404 				   optval + sizeof(service),
405 				   optlen - sizeof(service)) ||
406 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
407 			kfree(sl);
408 			return -EFAULT;
409 		}
410 	}
411 
412 	lock_sock(sk);
413 	dp->dccps_service = service;
414 
415 	kfree(dp->dccps_service_list);
416 
417 	dp->dccps_service_list = sl;
418 	release_sock(sk);
419 	return 0;
420 }
421 
422 /* byte 1 is feature.  the rest is the preference list */
423 static int dccp_setsockopt_change(struct sock *sk, int type,
424 				  struct dccp_so_feat __user *optval)
425 {
426 	struct dccp_so_feat opt;
427 	u8 *val;
428 	int rc;
429 
430 	if (copy_from_user(&opt, optval, sizeof(opt)))
431 		return -EFAULT;
432 
433 	val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
434 	if (!val)
435 		return -ENOMEM;
436 
437 	if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
438 		rc = -EFAULT;
439 		goto out_free_val;
440 	}
441 
442 	rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
443 			      val, opt.dccpsf_len, GFP_KERNEL);
444 	if (rc)
445 		goto out_free_val;
446 
447 out:
448 	return rc;
449 
450 out_free_val:
451 	kfree(val);
452 	goto out;
453 }
454 
455 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
456 		char __user *optval, int optlen)
457 {
458 	struct dccp_sock *dp = dccp_sk(sk);
459 	int val, err = 0;
460 
461 	if (optlen < sizeof(int))
462 		return -EINVAL;
463 
464 	if (get_user(val, (int __user *)optval))
465 		return -EFAULT;
466 
467 	if (optname == DCCP_SOCKOPT_SERVICE)
468 		return dccp_setsockopt_service(sk, val, optval, optlen);
469 
470 	lock_sock(sk);
471 	switch (optname) {
472 	case DCCP_SOCKOPT_PACKET_SIZE:
473 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
474 		err = 0;
475 		break;
476 	case DCCP_SOCKOPT_CHANGE_L:
477 		if (optlen != sizeof(struct dccp_so_feat))
478 			err = -EINVAL;
479 		else
480 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
481 						     (struct dccp_so_feat __user *)
482 						     optval);
483 		break;
484 	case DCCP_SOCKOPT_CHANGE_R:
485 		if (optlen != sizeof(struct dccp_so_feat))
486 			err = -EINVAL;
487 		else
488 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
489 						     (struct dccp_so_feat __user *)
490 						     optval);
491 		break;
492 	case DCCP_SOCKOPT_SEND_CSCOV:	/* sender side, RFC 4340, sec. 9.2 */
493 		if (val < 0 || val > 15)
494 			err = -EINVAL;
495 		else
496 			dp->dccps_pcslen = val;
497 		break;
498 	case DCCP_SOCKOPT_RECV_CSCOV:	/* receiver side, RFC 4340 sec. 9.2.1 */
499 		if (val < 0 || val > 15)
500 			err = -EINVAL;
501 		else {
502 			dp->dccps_pcrlen = val;
503 			/* FIXME: add feature negotiation,
504 			 * ChangeL(MinimumChecksumCoverage, val) */
505 		}
506 		break;
507 	default:
508 		err = -ENOPROTOOPT;
509 		break;
510 	}
511 
512 	release_sock(sk);
513 	return err;
514 }
515 
516 int dccp_setsockopt(struct sock *sk, int level, int optname,
517 		    char __user *optval, int optlen)
518 {
519 	if (level != SOL_DCCP)
520 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
521 							     optname, optval,
522 							     optlen);
523 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
524 }
525 
526 EXPORT_SYMBOL_GPL(dccp_setsockopt);
527 
528 #ifdef CONFIG_COMPAT
529 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
530 			   char __user *optval, int optlen)
531 {
532 	if (level != SOL_DCCP)
533 		return inet_csk_compat_setsockopt(sk, level, optname,
534 						  optval, optlen);
535 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
536 }
537 
538 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
539 #endif
540 
541 static int dccp_getsockopt_service(struct sock *sk, int len,
542 				   __be32 __user *optval,
543 				   int __user *optlen)
544 {
545 	const struct dccp_sock *dp = dccp_sk(sk);
546 	const struct dccp_service_list *sl;
547 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
548 
549 	lock_sock(sk);
550 	if ((sl = dp->dccps_service_list) != NULL) {
551 		slen = sl->dccpsl_nr * sizeof(u32);
552 		total_len += slen;
553 	}
554 
555 	err = -EINVAL;
556 	if (total_len > len)
557 		goto out;
558 
559 	err = 0;
560 	if (put_user(total_len, optlen) ||
561 	    put_user(dp->dccps_service, optval) ||
562 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
563 		err = -EFAULT;
564 out:
565 	release_sock(sk);
566 	return err;
567 }
568 
569 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
570 		    char __user *optval, int __user *optlen)
571 {
572 	struct dccp_sock *dp;
573 	int val, len;
574 
575 	if (get_user(len, optlen))
576 		return -EFAULT;
577 
578 	if (len < (int)sizeof(int))
579 		return -EINVAL;
580 
581 	dp = dccp_sk(sk);
582 
583 	switch (optname) {
584 	case DCCP_SOCKOPT_PACKET_SIZE:
585 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
586 		return 0;
587 	case DCCP_SOCKOPT_SERVICE:
588 		return dccp_getsockopt_service(sk, len,
589 					       (__be32 __user *)optval, optlen);
590 	case DCCP_SOCKOPT_SEND_CSCOV:
591 		val = dp->dccps_pcslen;
592 		len = sizeof(val);
593 		break;
594 	case DCCP_SOCKOPT_RECV_CSCOV:
595 		val = dp->dccps_pcrlen;
596 		len = sizeof(val);
597 		break;
598 	case 128 ... 191:
599 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
600 					     len, (u32 __user *)optval, optlen);
601 	case 192 ... 255:
602 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
603 					     len, (u32 __user *)optval, optlen);
604 	default:
605 		return -ENOPROTOOPT;
606 	}
607 
608 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
609 		return -EFAULT;
610 
611 	return 0;
612 }
613 
614 int dccp_getsockopt(struct sock *sk, int level, int optname,
615 		    char __user *optval, int __user *optlen)
616 {
617 	if (level != SOL_DCCP)
618 		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
619 							     optname, optval,
620 							     optlen);
621 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
622 }
623 
624 EXPORT_SYMBOL_GPL(dccp_getsockopt);
625 
626 #ifdef CONFIG_COMPAT
627 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
628 			   char __user *optval, int __user *optlen)
629 {
630 	if (level != SOL_DCCP)
631 		return inet_csk_compat_getsockopt(sk, level, optname,
632 						  optval, optlen);
633 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
634 }
635 
636 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
637 #endif
638 
639 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
640 		 size_t len)
641 {
642 	const struct dccp_sock *dp = dccp_sk(sk);
643 	const int flags = msg->msg_flags;
644 	const int noblock = flags & MSG_DONTWAIT;
645 	struct sk_buff *skb;
646 	int rc, size;
647 	long timeo;
648 
649 	if (len > dp->dccps_mss_cache)
650 		return -EMSGSIZE;
651 
652 	lock_sock(sk);
653 
654 	if (sysctl_dccp_tx_qlen &&
655 	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
656 		rc = -EAGAIN;
657 		goto out_release;
658 	}
659 
660 	timeo = sock_sndtimeo(sk, noblock);
661 
662 	/*
663 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
664 	 * so that the trick in dccp_rcv_request_sent_state_process.
665 	 */
666 	/* Wait for a connection to finish. */
667 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
668 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
669 			goto out_release;
670 
671 	size = sk->sk_prot->max_header + len;
672 	release_sock(sk);
673 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
674 	lock_sock(sk);
675 	if (skb == NULL)
676 		goto out_release;
677 
678 	skb_reserve(skb, sk->sk_prot->max_header);
679 	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
680 	if (rc != 0)
681 		goto out_discard;
682 
683 	skb_queue_tail(&sk->sk_write_queue, skb);
684 	dccp_write_xmit(sk,0);
685 out_release:
686 	release_sock(sk);
687 	return rc ? : len;
688 out_discard:
689 	kfree_skb(skb);
690 	goto out_release;
691 }
692 
693 EXPORT_SYMBOL_GPL(dccp_sendmsg);
694 
695 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
696 		 size_t len, int nonblock, int flags, int *addr_len)
697 {
698 	const struct dccp_hdr *dh;
699 	long timeo;
700 
701 	lock_sock(sk);
702 
703 	if (sk->sk_state == DCCP_LISTEN) {
704 		len = -ENOTCONN;
705 		goto out;
706 	}
707 
708 	timeo = sock_rcvtimeo(sk, nonblock);
709 
710 	do {
711 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
712 
713 		if (skb == NULL)
714 			goto verify_sock_status;
715 
716 		dh = dccp_hdr(skb);
717 
718 		if (dh->dccph_type == DCCP_PKT_DATA ||
719 		    dh->dccph_type == DCCP_PKT_DATAACK)
720 			goto found_ok_skb;
721 
722 		if (dh->dccph_type == DCCP_PKT_RESET ||
723 		    dh->dccph_type == DCCP_PKT_CLOSE) {
724 			dccp_pr_debug("found fin ok!\n");
725 			len = 0;
726 			goto found_fin_ok;
727 		}
728 		dccp_pr_debug("packet_type=%s\n",
729 			      dccp_packet_name(dh->dccph_type));
730 		sk_eat_skb(sk, skb, 0);
731 verify_sock_status:
732 		if (sock_flag(sk, SOCK_DONE)) {
733 			len = 0;
734 			break;
735 		}
736 
737 		if (sk->sk_err) {
738 			len = sock_error(sk);
739 			break;
740 		}
741 
742 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
743 			len = 0;
744 			break;
745 		}
746 
747 		if (sk->sk_state == DCCP_CLOSED) {
748 			if (!sock_flag(sk, SOCK_DONE)) {
749 				/* This occurs when user tries to read
750 				 * from never connected socket.
751 				 */
752 				len = -ENOTCONN;
753 				break;
754 			}
755 			len = 0;
756 			break;
757 		}
758 
759 		if (!timeo) {
760 			len = -EAGAIN;
761 			break;
762 		}
763 
764 		if (signal_pending(current)) {
765 			len = sock_intr_errno(timeo);
766 			break;
767 		}
768 
769 		sk_wait_data(sk, &timeo);
770 		continue;
771 	found_ok_skb:
772 		if (len > skb->len)
773 			len = skb->len;
774 		else if (len < skb->len)
775 			msg->msg_flags |= MSG_TRUNC;
776 
777 		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
778 			/* Exception. Bailout! */
779 			len = -EFAULT;
780 			break;
781 		}
782 	found_fin_ok:
783 		if (!(flags & MSG_PEEK))
784 			sk_eat_skb(sk, skb, 0);
785 		break;
786 	} while (1);
787 out:
788 	release_sock(sk);
789 	return len;
790 }
791 
792 EXPORT_SYMBOL_GPL(dccp_recvmsg);
793 
794 int inet_dccp_listen(struct socket *sock, int backlog)
795 {
796 	struct sock *sk = sock->sk;
797 	unsigned char old_state;
798 	int err;
799 
800 	lock_sock(sk);
801 
802 	err = -EINVAL;
803 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
804 		goto out;
805 
806 	old_state = sk->sk_state;
807 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
808 		goto out;
809 
810 	/* Really, if the socket is already in listen state
811 	 * we can only allow the backlog to be adjusted.
812 	 */
813 	if (old_state != DCCP_LISTEN) {
814 		/*
815 		 * FIXME: here it probably should be sk->sk_prot->listen_start
816 		 * see tcp_listen_start
817 		 */
818 		err = dccp_listen_start(sk, backlog);
819 		if (err)
820 			goto out;
821 	}
822 	sk->sk_max_ack_backlog = backlog;
823 	err = 0;
824 
825 out:
826 	release_sock(sk);
827 	return err;
828 }
829 
830 EXPORT_SYMBOL_GPL(inet_dccp_listen);
831 
832 static const unsigned char dccp_new_state[] = {
833 	/* current state:   new state:      action:	*/
834 	[0]		  = DCCP_CLOSED,
835 	[DCCP_OPEN]	  = DCCP_CLOSING | DCCP_ACTION_FIN,
836 	[DCCP_REQUESTING] = DCCP_CLOSED,
837 	[DCCP_PARTOPEN]	  = DCCP_CLOSING | DCCP_ACTION_FIN,
838 	[DCCP_LISTEN]	  = DCCP_CLOSED,
839 	[DCCP_RESPOND]	  = DCCP_CLOSED,
840 	[DCCP_CLOSING]	  = DCCP_CLOSED,
841 	[DCCP_TIME_WAIT]  = DCCP_CLOSED,
842 	[DCCP_CLOSED]	  = DCCP_CLOSED,
843 };
844 
845 static int dccp_close_state(struct sock *sk)
846 {
847 	const int next = dccp_new_state[sk->sk_state];
848 	const int ns = next & DCCP_STATE_MASK;
849 
850 	if (ns != sk->sk_state)
851 		dccp_set_state(sk, ns);
852 
853 	return next & DCCP_ACTION_FIN;
854 }
855 
856 void dccp_close(struct sock *sk, long timeout)
857 {
858 	struct dccp_sock *dp = dccp_sk(sk);
859 	struct sk_buff *skb;
860 	int state;
861 
862 	lock_sock(sk);
863 
864 	sk->sk_shutdown = SHUTDOWN_MASK;
865 
866 	if (sk->sk_state == DCCP_LISTEN) {
867 		dccp_set_state(sk, DCCP_CLOSED);
868 
869 		/* Special case. */
870 		inet_csk_listen_stop(sk);
871 
872 		goto adjudge_to_death;
873 	}
874 
875 	sk_stop_timer(sk, &dp->dccps_xmit_timer);
876 
877 	/*
878 	 * We need to flush the recv. buffs.  We do this only on the
879 	 * descriptor close, not protocol-sourced closes, because the
880 	  *reader process may not have drained the data yet!
881 	 */
882 	/* FIXME: check for unread data */
883 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
884 		__kfree_skb(skb);
885 	}
886 
887 	if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
888 		/* Check zero linger _after_ checking for unread data. */
889 		sk->sk_prot->disconnect(sk, 0);
890 	} else if (dccp_close_state(sk)) {
891 		dccp_send_close(sk, 1);
892 	}
893 
894 	sk_stream_wait_close(sk, timeout);
895 
896 adjudge_to_death:
897 	state = sk->sk_state;
898 	sock_hold(sk);
899 	sock_orphan(sk);
900 	atomic_inc(sk->sk_prot->orphan_count);
901 
902 	/*
903 	 * It is the last release_sock in its life. It will remove backlog.
904 	 */
905 	release_sock(sk);
906 	/*
907 	 * Now socket is owned by kernel and we acquire BH lock
908 	 * to finish close. No need to check for user refs.
909 	 */
910 	local_bh_disable();
911 	bh_lock_sock(sk);
912 	BUG_TRAP(!sock_owned_by_user(sk));
913 
914 	/* Have we already been destroyed by a softirq or backlog? */
915 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
916 		goto out;
917 
918 	/*
919 	 * The last release_sock may have processed the CLOSE or RESET
920 	 * packet moving sock to CLOSED state, if not we have to fire
921 	 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
922 	 * in draft-ietf-dccp-spec-11. -acme
923 	 */
924 	if (sk->sk_state == DCCP_CLOSING) {
925 		/* FIXME: should start at 2 * RTT */
926 		/* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
927 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
928 					  inet_csk(sk)->icsk_rto,
929 					  DCCP_RTO_MAX);
930 #if 0
931 		/* Yeah, we should use sk->sk_prot->orphan_count, etc */
932 		dccp_set_state(sk, DCCP_CLOSED);
933 #endif
934 	}
935 
936 	if (sk->sk_state == DCCP_CLOSED)
937 		inet_csk_destroy_sock(sk);
938 
939 	/* Otherwise, socket is reprieved until protocol close. */
940 
941 out:
942 	bh_unlock_sock(sk);
943 	local_bh_enable();
944 	sock_put(sk);
945 }
946 
947 EXPORT_SYMBOL_GPL(dccp_close);
948 
949 void dccp_shutdown(struct sock *sk, int how)
950 {
951 	dccp_pr_debug("entry\n");
952 }
953 
954 EXPORT_SYMBOL_GPL(dccp_shutdown);
955 
956 static int __init dccp_mib_init(void)
957 {
958 	int rc = -ENOMEM;
959 
960 	dccp_statistics[0] = alloc_percpu(struct dccp_mib);
961 	if (dccp_statistics[0] == NULL)
962 		goto out;
963 
964 	dccp_statistics[1] = alloc_percpu(struct dccp_mib);
965 	if (dccp_statistics[1] == NULL)
966 		goto out_free_one;
967 
968 	rc = 0;
969 out:
970 	return rc;
971 out_free_one:
972 	free_percpu(dccp_statistics[0]);
973 	dccp_statistics[0] = NULL;
974 	goto out;
975 
976 }
977 
978 static void dccp_mib_exit(void)
979 {
980 	free_percpu(dccp_statistics[0]);
981 	free_percpu(dccp_statistics[1]);
982 	dccp_statistics[0] = dccp_statistics[1] = NULL;
983 }
984 
985 static int thash_entries;
986 module_param(thash_entries, int, 0444);
987 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
988 
989 #ifdef CONFIG_IP_DCCP_DEBUG
990 int dccp_debug;
991 module_param(dccp_debug, int, 0444);
992 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
993 
994 EXPORT_SYMBOL_GPL(dccp_debug);
995 #endif
996 
997 static int __init dccp_init(void)
998 {
999 	unsigned long goal;
1000 	int ehash_order, bhash_order, i;
1001 	int rc = -ENOBUFS;
1002 
1003 	dccp_hashinfo.bind_bucket_cachep =
1004 		kmem_cache_create("dccp_bind_bucket",
1005 				  sizeof(struct inet_bind_bucket), 0,
1006 				  SLAB_HWCACHE_ALIGN, NULL, NULL);
1007 	if (!dccp_hashinfo.bind_bucket_cachep)
1008 		goto out;
1009 
1010 	/*
1011 	 * Size and allocate the main established and bind bucket
1012 	 * hash tables.
1013 	 *
1014 	 * The methodology is similar to that of the buffer cache.
1015 	 */
1016 	if (num_physpages >= (128 * 1024))
1017 		goal = num_physpages >> (21 - PAGE_SHIFT);
1018 	else
1019 		goal = num_physpages >> (23 - PAGE_SHIFT);
1020 
1021 	if (thash_entries)
1022 		goal = (thash_entries *
1023 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1024 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1025 		;
1026 	do {
1027 		dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1028 					sizeof(struct inet_ehash_bucket);
1029 		while (dccp_hashinfo.ehash_size &
1030 		       (dccp_hashinfo.ehash_size - 1))
1031 			dccp_hashinfo.ehash_size--;
1032 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1033 			__get_free_pages(GFP_ATOMIC, ehash_order);
1034 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1035 
1036 	if (!dccp_hashinfo.ehash) {
1037 		DCCP_CRIT("Failed to allocate DCCP established hash table");
1038 		goto out_free_bind_bucket_cachep;
1039 	}
1040 
1041 	for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1042 		rwlock_init(&dccp_hashinfo.ehash[i].lock);
1043 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1044 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1045 	}
1046 
1047 	bhash_order = ehash_order;
1048 
1049 	do {
1050 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1051 					sizeof(struct inet_bind_hashbucket);
1052 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1053 		    bhash_order > 0)
1054 			continue;
1055 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1056 			__get_free_pages(GFP_ATOMIC, bhash_order);
1057 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1058 
1059 	if (!dccp_hashinfo.bhash) {
1060 		DCCP_CRIT("Failed to allocate DCCP bind hash table");
1061 		goto out_free_dccp_ehash;
1062 	}
1063 
1064 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1065 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1066 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1067 	}
1068 
1069 	rc = dccp_mib_init();
1070 	if (rc)
1071 		goto out_free_dccp_bhash;
1072 
1073 	rc = dccp_ackvec_init();
1074 	if (rc)
1075 		goto out_free_dccp_mib;
1076 
1077 	rc = dccp_sysctl_init();
1078 	if (rc)
1079 		goto out_ackvec_exit;
1080 out:
1081 	return rc;
1082 out_ackvec_exit:
1083 	dccp_ackvec_exit();
1084 out_free_dccp_mib:
1085 	dccp_mib_exit();
1086 out_free_dccp_bhash:
1087 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1088 	dccp_hashinfo.bhash = NULL;
1089 out_free_dccp_ehash:
1090 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1091 	dccp_hashinfo.ehash = NULL;
1092 out_free_bind_bucket_cachep:
1093 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1094 	dccp_hashinfo.bind_bucket_cachep = NULL;
1095 	goto out;
1096 }
1097 
1098 static void __exit dccp_fini(void)
1099 {
1100 	dccp_mib_exit();
1101 	free_pages((unsigned long)dccp_hashinfo.bhash,
1102 		   get_order(dccp_hashinfo.bhash_size *
1103 			     sizeof(struct inet_bind_hashbucket)));
1104 	free_pages((unsigned long)dccp_hashinfo.ehash,
1105 		   get_order(dccp_hashinfo.ehash_size *
1106 			     sizeof(struct inet_ehash_bucket)));
1107 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1108 	dccp_ackvec_exit();
1109 	dccp_sysctl_exit();
1110 }
1111 
1112 module_init(dccp_init);
1113 module_exit(dccp_fini);
1114 
1115 MODULE_LICENSE("GPL");
1116 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1117 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1118