xref: /openbmc/linux/net/dccp/proto.c (revision f15cbe6f1a4b4d9df59142fc8e4abb973302cf44)
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or modify it
8  *	under the terms of the GNU General Public License version 2 as
9  *	published by the Free Software Foundation.
10  */
11 
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24 
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28 
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34 
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38 
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40 
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42 
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44 
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46 
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 	.lhash_lock	= RW_LOCK_UNLOCKED,
49 	.lhash_users	= ATOMIC_INIT(0),
50 	.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52 
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54 
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57 
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60 	const int oldstate = sk->sk_state;
61 
62 	dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63 		      dccp_state_name(oldstate), dccp_state_name(state));
64 	WARN_ON(state == oldstate);
65 
66 	switch (state) {
67 	case DCCP_OPEN:
68 		if (oldstate != DCCP_OPEN)
69 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70 		break;
71 
72 	case DCCP_CLOSED:
73 		if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74 		    oldstate == DCCP_CLOSING)
75 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76 
77 		sk->sk_prot->unhash(sk);
78 		if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80 			inet_put_port(sk);
81 		/* fall through */
82 	default:
83 		if (oldstate == DCCP_OPEN)
84 			DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85 	}
86 
87 	/* Change state AFTER socket is unhashed to avoid closed
88 	 * socket sitting in hash tables.
89 	 */
90 	sk->sk_state = state;
91 }
92 
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94 
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97 	switch (sk->sk_state) {
98 	case DCCP_PASSIVE_CLOSE:
99 		/* Node (client or server) has received Close packet. */
100 		dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101 		dccp_set_state(sk, DCCP_CLOSED);
102 		break;
103 	case DCCP_PASSIVE_CLOSEREQ:
104 		/*
105 		 * Client received CloseReq. We set the `active' flag so that
106 		 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107 		 */
108 		dccp_send_close(sk, 1);
109 		dccp_set_state(sk, DCCP_CLOSING);
110 	}
111 }
112 
113 void dccp_done(struct sock *sk)
114 {
115 	dccp_set_state(sk, DCCP_CLOSED);
116 	dccp_clear_xmit_timers(sk);
117 
118 	sk->sk_shutdown = SHUTDOWN_MASK;
119 
120 	if (!sock_flag(sk, SOCK_DEAD))
121 		sk->sk_state_change(sk);
122 	else
123 		inet_csk_destroy_sock(sk);
124 }
125 
126 EXPORT_SYMBOL_GPL(dccp_done);
127 
128 const char *dccp_packet_name(const int type)
129 {
130 	static const char *dccp_packet_names[] = {
131 		[DCCP_PKT_REQUEST]  = "REQUEST",
132 		[DCCP_PKT_RESPONSE] = "RESPONSE",
133 		[DCCP_PKT_DATA]	    = "DATA",
134 		[DCCP_PKT_ACK]	    = "ACK",
135 		[DCCP_PKT_DATAACK]  = "DATAACK",
136 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137 		[DCCP_PKT_CLOSE]    = "CLOSE",
138 		[DCCP_PKT_RESET]    = "RESET",
139 		[DCCP_PKT_SYNC]	    = "SYNC",
140 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
141 	};
142 
143 	if (type >= DCCP_NR_PKT_TYPES)
144 		return "INVALID";
145 	else
146 		return dccp_packet_names[type];
147 }
148 
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150 
151 const char *dccp_state_name(const int state)
152 {
153 	static char *dccp_state_names[] = {
154 	[DCCP_OPEN]		= "OPEN",
155 	[DCCP_REQUESTING]	= "REQUESTING",
156 	[DCCP_PARTOPEN]		= "PARTOPEN",
157 	[DCCP_LISTEN]		= "LISTEN",
158 	[DCCP_RESPOND]		= "RESPOND",
159 	[DCCP_CLOSING]		= "CLOSING",
160 	[DCCP_ACTIVE_CLOSEREQ]	= "CLOSEREQ",
161 	[DCCP_PASSIVE_CLOSE]	= "PASSIVE_CLOSE",
162 	[DCCP_PASSIVE_CLOSEREQ]	= "PASSIVE_CLOSEREQ",
163 	[DCCP_TIME_WAIT]	= "TIME_WAIT",
164 	[DCCP_CLOSED]		= "CLOSED",
165 	};
166 
167 	if (state >= DCCP_MAX_STATES)
168 		return "INVALID STATE!";
169 	else
170 		return dccp_state_names[state];
171 }
172 
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174 
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177 	struct dccp_sock *dp = dccp_sk(sk);
178 	struct dccp_minisock *dmsk = dccp_msk(sk);
179 	struct inet_connection_sock *icsk = inet_csk(sk);
180 
181 	dccp_minisock_init(&dp->dccps_minisock);
182 
183 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
184 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
185 	sk->sk_state		= DCCP_CLOSED;
186 	sk->sk_write_space	= dccp_write_space;
187 	icsk->icsk_sync_mss	= dccp_sync_mss;
188 	dp->dccps_mss_cache	= 536;
189 	dp->dccps_rate_last	= jiffies;
190 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
191 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
192 	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
193 
194 	dccp_init_xmit_timers(sk);
195 
196 	/*
197 	 * FIXME: We're hardcoding the CCID, and doing this at this point makes
198 	 * the listening (master) sock get CCID control blocks, which is not
199 	 * necessary, but for now, to not mess with the test userspace apps,
200 	 * lets leave it here, later the real solution is to do this in a
201 	 * setsockopt(CCIDs-I-want/accept). -acme
202 	 */
203 	if (likely(ctl_sock_initialized)) {
204 		int rc = dccp_feat_init(dmsk);
205 
206 		if (rc)
207 			return rc;
208 
209 		if (dmsk->dccpms_send_ack_vector) {
210 			dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
211 			if (dp->dccps_hc_rx_ackvec == NULL)
212 				return -ENOMEM;
213 		}
214 		dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
215 						      sk, GFP_KERNEL);
216 		dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
217 						      sk, GFP_KERNEL);
218 		if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
219 			     dp->dccps_hc_tx_ccid == NULL)) {
220 			ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
221 			ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
222 			if (dmsk->dccpms_send_ack_vector) {
223 				dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224 				dp->dccps_hc_rx_ackvec = NULL;
225 			}
226 			dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227 			return -ENOMEM;
228 		}
229 	} else {
230 		/* control socket doesn't need feat nego */
231 		INIT_LIST_HEAD(&dmsk->dccpms_pending);
232 		INIT_LIST_HEAD(&dmsk->dccpms_conf);
233 	}
234 
235 	return 0;
236 }
237 
238 EXPORT_SYMBOL_GPL(dccp_init_sock);
239 
240 void dccp_destroy_sock(struct sock *sk)
241 {
242 	struct dccp_sock *dp = dccp_sk(sk);
243 	struct dccp_minisock *dmsk = dccp_msk(sk);
244 
245 	/*
246 	 * DCCP doesn't use sk_write_queue, just sk_send_head
247 	 * for retransmissions
248 	 */
249 	if (sk->sk_send_head != NULL) {
250 		kfree_skb(sk->sk_send_head);
251 		sk->sk_send_head = NULL;
252 	}
253 
254 	/* Clean up a referenced DCCP bind bucket. */
255 	if (inet_csk(sk)->icsk_bind_hash != NULL)
256 		inet_put_port(sk);
257 
258 	kfree(dp->dccps_service_list);
259 	dp->dccps_service_list = NULL;
260 
261 	if (dmsk->dccpms_send_ack_vector) {
262 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
263 		dp->dccps_hc_rx_ackvec = NULL;
264 	}
265 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
266 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
267 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
268 
269 	/* clean up feature negotiation state */
270 	dccp_feat_clean(dmsk);
271 }
272 
273 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
274 
275 static inline int dccp_listen_start(struct sock *sk, int backlog)
276 {
277 	struct dccp_sock *dp = dccp_sk(sk);
278 
279 	dp->dccps_role = DCCP_ROLE_LISTEN;
280 	return inet_csk_listen_start(sk, backlog);
281 }
282 
283 static inline int dccp_need_reset(int state)
284 {
285 	return state != DCCP_CLOSED && state != DCCP_LISTEN &&
286 	       state != DCCP_REQUESTING;
287 }
288 
289 int dccp_disconnect(struct sock *sk, int flags)
290 {
291 	struct inet_connection_sock *icsk = inet_csk(sk);
292 	struct inet_sock *inet = inet_sk(sk);
293 	int err = 0;
294 	const int old_state = sk->sk_state;
295 
296 	if (old_state != DCCP_CLOSED)
297 		dccp_set_state(sk, DCCP_CLOSED);
298 
299 	/*
300 	 * This corresponds to the ABORT function of RFC793, sec. 3.8
301 	 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
302 	 */
303 	if (old_state == DCCP_LISTEN) {
304 		inet_csk_listen_stop(sk);
305 	} else if (dccp_need_reset(old_state)) {
306 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
307 		sk->sk_err = ECONNRESET;
308 	} else if (old_state == DCCP_REQUESTING)
309 		sk->sk_err = ECONNRESET;
310 
311 	dccp_clear_xmit_timers(sk);
312 	__skb_queue_purge(&sk->sk_receive_queue);
313 	if (sk->sk_send_head != NULL) {
314 		__kfree_skb(sk->sk_send_head);
315 		sk->sk_send_head = NULL;
316 	}
317 
318 	inet->dport = 0;
319 
320 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
321 		inet_reset_saddr(sk);
322 
323 	sk->sk_shutdown = 0;
324 	sock_reset_flag(sk, SOCK_DONE);
325 
326 	icsk->icsk_backoff = 0;
327 	inet_csk_delack_init(sk);
328 	__sk_dst_reset(sk);
329 
330 	WARN_ON(inet->num && !icsk->icsk_bind_hash);
331 
332 	sk->sk_error_report(sk);
333 	return err;
334 }
335 
336 EXPORT_SYMBOL_GPL(dccp_disconnect);
337 
338 /*
339  *	Wait for a DCCP event.
340  *
341  *	Note that we don't need to lock the socket, as the upper poll layers
342  *	take care of normal races (between the test and the event) and we don't
343  *	go look at any of the socket buffers directly.
344  */
345 unsigned int dccp_poll(struct file *file, struct socket *sock,
346 		       poll_table *wait)
347 {
348 	unsigned int mask;
349 	struct sock *sk = sock->sk;
350 
351 	poll_wait(file, sk->sk_sleep, wait);
352 	if (sk->sk_state == DCCP_LISTEN)
353 		return inet_csk_listen_poll(sk);
354 
355 	/* Socket is not locked. We are protected from async events
356 	   by poll logic and correct handling of state changes
357 	   made by another threads is impossible in any case.
358 	 */
359 
360 	mask = 0;
361 	if (sk->sk_err)
362 		mask = POLLERR;
363 
364 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
365 		mask |= POLLHUP;
366 	if (sk->sk_shutdown & RCV_SHUTDOWN)
367 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
368 
369 	/* Connected? */
370 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
371 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
372 			mask |= POLLIN | POLLRDNORM;
373 
374 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
375 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
376 				mask |= POLLOUT | POLLWRNORM;
377 			} else {  /* send SIGIO later */
378 				set_bit(SOCK_ASYNC_NOSPACE,
379 					&sk->sk_socket->flags);
380 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
381 
382 				/* Race breaker. If space is freed after
383 				 * wspace test but before the flags are set,
384 				 * IO signal will be lost.
385 				 */
386 				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
387 					mask |= POLLOUT | POLLWRNORM;
388 			}
389 		}
390 	}
391 	return mask;
392 }
393 
394 EXPORT_SYMBOL_GPL(dccp_poll);
395 
396 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
397 {
398 	int rc = -ENOTCONN;
399 
400 	lock_sock(sk);
401 
402 	if (sk->sk_state == DCCP_LISTEN)
403 		goto out;
404 
405 	switch (cmd) {
406 	case SIOCINQ: {
407 		struct sk_buff *skb;
408 		unsigned long amount = 0;
409 
410 		skb = skb_peek(&sk->sk_receive_queue);
411 		if (skb != NULL) {
412 			/*
413 			 * We will only return the amount of this packet since
414 			 * that is all that will be read.
415 			 */
416 			amount = skb->len;
417 		}
418 		rc = put_user(amount, (int __user *)arg);
419 	}
420 		break;
421 	default:
422 		rc = -ENOIOCTLCMD;
423 		break;
424 	}
425 out:
426 	release_sock(sk);
427 	return rc;
428 }
429 
430 EXPORT_SYMBOL_GPL(dccp_ioctl);
431 
432 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
433 				   char __user *optval, int optlen)
434 {
435 	struct dccp_sock *dp = dccp_sk(sk);
436 	struct dccp_service_list *sl = NULL;
437 
438 	if (service == DCCP_SERVICE_INVALID_VALUE ||
439 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
440 		return -EINVAL;
441 
442 	if (optlen > sizeof(service)) {
443 		sl = kmalloc(optlen, GFP_KERNEL);
444 		if (sl == NULL)
445 			return -ENOMEM;
446 
447 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
448 		if (copy_from_user(sl->dccpsl_list,
449 				   optval + sizeof(service),
450 				   optlen - sizeof(service)) ||
451 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
452 			kfree(sl);
453 			return -EFAULT;
454 		}
455 	}
456 
457 	lock_sock(sk);
458 	dp->dccps_service = service;
459 
460 	kfree(dp->dccps_service_list);
461 
462 	dp->dccps_service_list = sl;
463 	release_sock(sk);
464 	return 0;
465 }
466 
467 /* byte 1 is feature.  the rest is the preference list */
468 static int dccp_setsockopt_change(struct sock *sk, int type,
469 				  struct dccp_so_feat __user *optval)
470 {
471 	struct dccp_so_feat opt;
472 	u8 *val;
473 	int rc;
474 
475 	if (copy_from_user(&opt, optval, sizeof(opt)))
476 		return -EFAULT;
477 
478 	val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
479 	if (!val)
480 		return -ENOMEM;
481 
482 	if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
483 		rc = -EFAULT;
484 		goto out_free_val;
485 	}
486 
487 	rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
488 			      val, opt.dccpsf_len, GFP_KERNEL);
489 	if (rc)
490 		goto out_free_val;
491 
492 out:
493 	return rc;
494 
495 out_free_val:
496 	kfree(val);
497 	goto out;
498 }
499 
500 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
501 		char __user *optval, int optlen)
502 {
503 	struct dccp_sock *dp = dccp_sk(sk);
504 	int val, err = 0;
505 
506 	if (optlen < sizeof(int))
507 		return -EINVAL;
508 
509 	if (get_user(val, (int __user *)optval))
510 		return -EFAULT;
511 
512 	if (optname == DCCP_SOCKOPT_SERVICE)
513 		return dccp_setsockopt_service(sk, val, optval, optlen);
514 
515 	lock_sock(sk);
516 	switch (optname) {
517 	case DCCP_SOCKOPT_PACKET_SIZE:
518 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
519 		err = 0;
520 		break;
521 	case DCCP_SOCKOPT_CHANGE_L:
522 		if (optlen != sizeof(struct dccp_so_feat))
523 			err = -EINVAL;
524 		else
525 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
526 						     (struct dccp_so_feat __user *)
527 						     optval);
528 		break;
529 	case DCCP_SOCKOPT_CHANGE_R:
530 		if (optlen != sizeof(struct dccp_so_feat))
531 			err = -EINVAL;
532 		else
533 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
534 						     (struct dccp_so_feat __user *)
535 						     optval);
536 		break;
537 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
538 		if (dp->dccps_role != DCCP_ROLE_SERVER)
539 			err = -EOPNOTSUPP;
540 		else
541 			dp->dccps_server_timewait = (val != 0);
542 		break;
543 	case DCCP_SOCKOPT_SEND_CSCOV:	/* sender side, RFC 4340, sec. 9.2 */
544 		if (val < 0 || val > 15)
545 			err = -EINVAL;
546 		else
547 			dp->dccps_pcslen = val;
548 		break;
549 	case DCCP_SOCKOPT_RECV_CSCOV:	/* receiver side, RFC 4340 sec. 9.2.1 */
550 		if (val < 0 || val > 15)
551 			err = -EINVAL;
552 		else {
553 			dp->dccps_pcrlen = val;
554 			/* FIXME: add feature negotiation,
555 			 * ChangeL(MinimumChecksumCoverage, val) */
556 		}
557 		break;
558 	default:
559 		err = -ENOPROTOOPT;
560 		break;
561 	}
562 
563 	release_sock(sk);
564 	return err;
565 }
566 
567 int dccp_setsockopt(struct sock *sk, int level, int optname,
568 		    char __user *optval, int optlen)
569 {
570 	if (level != SOL_DCCP)
571 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
572 							     optname, optval,
573 							     optlen);
574 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
575 }
576 
577 EXPORT_SYMBOL_GPL(dccp_setsockopt);
578 
579 #ifdef CONFIG_COMPAT
580 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
581 			   char __user *optval, int optlen)
582 {
583 	if (level != SOL_DCCP)
584 		return inet_csk_compat_setsockopt(sk, level, optname,
585 						  optval, optlen);
586 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
587 }
588 
589 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
590 #endif
591 
592 static int dccp_getsockopt_service(struct sock *sk, int len,
593 				   __be32 __user *optval,
594 				   int __user *optlen)
595 {
596 	const struct dccp_sock *dp = dccp_sk(sk);
597 	const struct dccp_service_list *sl;
598 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
599 
600 	lock_sock(sk);
601 	if ((sl = dp->dccps_service_list) != NULL) {
602 		slen = sl->dccpsl_nr * sizeof(u32);
603 		total_len += slen;
604 	}
605 
606 	err = -EINVAL;
607 	if (total_len > len)
608 		goto out;
609 
610 	err = 0;
611 	if (put_user(total_len, optlen) ||
612 	    put_user(dp->dccps_service, optval) ||
613 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
614 		err = -EFAULT;
615 out:
616 	release_sock(sk);
617 	return err;
618 }
619 
620 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
621 		    char __user *optval, int __user *optlen)
622 {
623 	struct dccp_sock *dp;
624 	int val, len;
625 
626 	if (get_user(len, optlen))
627 		return -EFAULT;
628 
629 	if (len < (int)sizeof(int))
630 		return -EINVAL;
631 
632 	dp = dccp_sk(sk);
633 
634 	switch (optname) {
635 	case DCCP_SOCKOPT_PACKET_SIZE:
636 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
637 		return 0;
638 	case DCCP_SOCKOPT_SERVICE:
639 		return dccp_getsockopt_service(sk, len,
640 					       (__be32 __user *)optval, optlen);
641 	case DCCP_SOCKOPT_GET_CUR_MPS:
642 		val = dp->dccps_mss_cache;
643 		break;
644 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
645 		val = dp->dccps_server_timewait;
646 		break;
647 	case DCCP_SOCKOPT_SEND_CSCOV:
648 		val = dp->dccps_pcslen;
649 		break;
650 	case DCCP_SOCKOPT_RECV_CSCOV:
651 		val = dp->dccps_pcrlen;
652 		break;
653 	case 128 ... 191:
654 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
655 					     len, (u32 __user *)optval, optlen);
656 	case 192 ... 255:
657 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
658 					     len, (u32 __user *)optval, optlen);
659 	default:
660 		return -ENOPROTOOPT;
661 	}
662 
663 	len = sizeof(val);
664 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
665 		return -EFAULT;
666 
667 	return 0;
668 }
669 
670 int dccp_getsockopt(struct sock *sk, int level, int optname,
671 		    char __user *optval, int __user *optlen)
672 {
673 	if (level != SOL_DCCP)
674 		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
675 							     optname, optval,
676 							     optlen);
677 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
678 }
679 
680 EXPORT_SYMBOL_GPL(dccp_getsockopt);
681 
682 #ifdef CONFIG_COMPAT
683 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
684 			   char __user *optval, int __user *optlen)
685 {
686 	if (level != SOL_DCCP)
687 		return inet_csk_compat_getsockopt(sk, level, optname,
688 						  optval, optlen);
689 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
690 }
691 
692 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
693 #endif
694 
695 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
696 		 size_t len)
697 {
698 	const struct dccp_sock *dp = dccp_sk(sk);
699 	const int flags = msg->msg_flags;
700 	const int noblock = flags & MSG_DONTWAIT;
701 	struct sk_buff *skb;
702 	int rc, size;
703 	long timeo;
704 
705 	if (len > dp->dccps_mss_cache)
706 		return -EMSGSIZE;
707 
708 	lock_sock(sk);
709 
710 	if (sysctl_dccp_tx_qlen &&
711 	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
712 		rc = -EAGAIN;
713 		goto out_release;
714 	}
715 
716 	timeo = sock_sndtimeo(sk, noblock);
717 
718 	/*
719 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
720 	 * so that the trick in dccp_rcv_request_sent_state_process.
721 	 */
722 	/* Wait for a connection to finish. */
723 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
724 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
725 			goto out_release;
726 
727 	size = sk->sk_prot->max_header + len;
728 	release_sock(sk);
729 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
730 	lock_sock(sk);
731 	if (skb == NULL)
732 		goto out_release;
733 
734 	skb_reserve(skb, sk->sk_prot->max_header);
735 	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
736 	if (rc != 0)
737 		goto out_discard;
738 
739 	skb_queue_tail(&sk->sk_write_queue, skb);
740 	dccp_write_xmit(sk,0);
741 out_release:
742 	release_sock(sk);
743 	return rc ? : len;
744 out_discard:
745 	kfree_skb(skb);
746 	goto out_release;
747 }
748 
749 EXPORT_SYMBOL_GPL(dccp_sendmsg);
750 
751 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
752 		 size_t len, int nonblock, int flags, int *addr_len)
753 {
754 	const struct dccp_hdr *dh;
755 	long timeo;
756 
757 	lock_sock(sk);
758 
759 	if (sk->sk_state == DCCP_LISTEN) {
760 		len = -ENOTCONN;
761 		goto out;
762 	}
763 
764 	timeo = sock_rcvtimeo(sk, nonblock);
765 
766 	do {
767 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
768 
769 		if (skb == NULL)
770 			goto verify_sock_status;
771 
772 		dh = dccp_hdr(skb);
773 
774 		switch (dh->dccph_type) {
775 		case DCCP_PKT_DATA:
776 		case DCCP_PKT_DATAACK:
777 			goto found_ok_skb;
778 
779 		case DCCP_PKT_CLOSE:
780 		case DCCP_PKT_CLOSEREQ:
781 			if (!(flags & MSG_PEEK))
782 				dccp_finish_passive_close(sk);
783 			/* fall through */
784 		case DCCP_PKT_RESET:
785 			dccp_pr_debug("found fin (%s) ok!\n",
786 				      dccp_packet_name(dh->dccph_type));
787 			len = 0;
788 			goto found_fin_ok;
789 		default:
790 			dccp_pr_debug("packet_type=%s\n",
791 				      dccp_packet_name(dh->dccph_type));
792 			sk_eat_skb(sk, skb, 0);
793 		}
794 verify_sock_status:
795 		if (sock_flag(sk, SOCK_DONE)) {
796 			len = 0;
797 			break;
798 		}
799 
800 		if (sk->sk_err) {
801 			len = sock_error(sk);
802 			break;
803 		}
804 
805 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
806 			len = 0;
807 			break;
808 		}
809 
810 		if (sk->sk_state == DCCP_CLOSED) {
811 			if (!sock_flag(sk, SOCK_DONE)) {
812 				/* This occurs when user tries to read
813 				 * from never connected socket.
814 				 */
815 				len = -ENOTCONN;
816 				break;
817 			}
818 			len = 0;
819 			break;
820 		}
821 
822 		if (!timeo) {
823 			len = -EAGAIN;
824 			break;
825 		}
826 
827 		if (signal_pending(current)) {
828 			len = sock_intr_errno(timeo);
829 			break;
830 		}
831 
832 		sk_wait_data(sk, &timeo);
833 		continue;
834 	found_ok_skb:
835 		if (len > skb->len)
836 			len = skb->len;
837 		else if (len < skb->len)
838 			msg->msg_flags |= MSG_TRUNC;
839 
840 		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
841 			/* Exception. Bailout! */
842 			len = -EFAULT;
843 			break;
844 		}
845 	found_fin_ok:
846 		if (!(flags & MSG_PEEK))
847 			sk_eat_skb(sk, skb, 0);
848 		break;
849 	} while (1);
850 out:
851 	release_sock(sk);
852 	return len;
853 }
854 
855 EXPORT_SYMBOL_GPL(dccp_recvmsg);
856 
857 int inet_dccp_listen(struct socket *sock, int backlog)
858 {
859 	struct sock *sk = sock->sk;
860 	unsigned char old_state;
861 	int err;
862 
863 	lock_sock(sk);
864 
865 	err = -EINVAL;
866 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
867 		goto out;
868 
869 	old_state = sk->sk_state;
870 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
871 		goto out;
872 
873 	/* Really, if the socket is already in listen state
874 	 * we can only allow the backlog to be adjusted.
875 	 */
876 	if (old_state != DCCP_LISTEN) {
877 		/*
878 		 * FIXME: here it probably should be sk->sk_prot->listen_start
879 		 * see tcp_listen_start
880 		 */
881 		err = dccp_listen_start(sk, backlog);
882 		if (err)
883 			goto out;
884 	}
885 	sk->sk_max_ack_backlog = backlog;
886 	err = 0;
887 
888 out:
889 	release_sock(sk);
890 	return err;
891 }
892 
893 EXPORT_SYMBOL_GPL(inet_dccp_listen);
894 
895 static void dccp_terminate_connection(struct sock *sk)
896 {
897 	u8 next_state = DCCP_CLOSED;
898 
899 	switch (sk->sk_state) {
900 	case DCCP_PASSIVE_CLOSE:
901 	case DCCP_PASSIVE_CLOSEREQ:
902 		dccp_finish_passive_close(sk);
903 		break;
904 	case DCCP_PARTOPEN:
905 		dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
906 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
907 		/* fall through */
908 	case DCCP_OPEN:
909 		dccp_send_close(sk, 1);
910 
911 		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
912 		    !dccp_sk(sk)->dccps_server_timewait)
913 			next_state = DCCP_ACTIVE_CLOSEREQ;
914 		else
915 			next_state = DCCP_CLOSING;
916 		/* fall through */
917 	default:
918 		dccp_set_state(sk, next_state);
919 	}
920 }
921 
922 void dccp_close(struct sock *sk, long timeout)
923 {
924 	struct dccp_sock *dp = dccp_sk(sk);
925 	struct sk_buff *skb;
926 	u32 data_was_unread = 0;
927 	int state;
928 
929 	lock_sock(sk);
930 
931 	sk->sk_shutdown = SHUTDOWN_MASK;
932 
933 	if (sk->sk_state == DCCP_LISTEN) {
934 		dccp_set_state(sk, DCCP_CLOSED);
935 
936 		/* Special case. */
937 		inet_csk_listen_stop(sk);
938 
939 		goto adjudge_to_death;
940 	}
941 
942 	sk_stop_timer(sk, &dp->dccps_xmit_timer);
943 
944 	/*
945 	 * We need to flush the recv. buffs.  We do this only on the
946 	 * descriptor close, not protocol-sourced closes, because the
947 	  *reader process may not have drained the data yet!
948 	 */
949 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
950 		data_was_unread += skb->len;
951 		__kfree_skb(skb);
952 	}
953 
954 	if (data_was_unread) {
955 		/* Unread data was tossed, send an appropriate Reset Code */
956 		DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
957 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
958 		dccp_set_state(sk, DCCP_CLOSED);
959 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
960 		/* Check zero linger _after_ checking for unread data. */
961 		sk->sk_prot->disconnect(sk, 0);
962 	} else if (sk->sk_state != DCCP_CLOSED) {
963 		dccp_terminate_connection(sk);
964 	}
965 
966 	sk_stream_wait_close(sk, timeout);
967 
968 adjudge_to_death:
969 	state = sk->sk_state;
970 	sock_hold(sk);
971 	sock_orphan(sk);
972 	atomic_inc(sk->sk_prot->orphan_count);
973 
974 	/*
975 	 * It is the last release_sock in its life. It will remove backlog.
976 	 */
977 	release_sock(sk);
978 	/*
979 	 * Now socket is owned by kernel and we acquire BH lock
980 	 * to finish close. No need to check for user refs.
981 	 */
982 	local_bh_disable();
983 	bh_lock_sock(sk);
984 	WARN_ON(sock_owned_by_user(sk));
985 
986 	/* Have we already been destroyed by a softirq or backlog? */
987 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
988 		goto out;
989 
990 	if (sk->sk_state == DCCP_CLOSED)
991 		inet_csk_destroy_sock(sk);
992 
993 	/* Otherwise, socket is reprieved until protocol close. */
994 
995 out:
996 	bh_unlock_sock(sk);
997 	local_bh_enable();
998 	sock_put(sk);
999 }
1000 
1001 EXPORT_SYMBOL_GPL(dccp_close);
1002 
1003 void dccp_shutdown(struct sock *sk, int how)
1004 {
1005 	dccp_pr_debug("called shutdown(%x)\n", how);
1006 }
1007 
1008 EXPORT_SYMBOL_GPL(dccp_shutdown);
1009 
1010 static inline int dccp_mib_init(void)
1011 {
1012 	return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1013 }
1014 
1015 static inline void dccp_mib_exit(void)
1016 {
1017 	snmp_mib_free((void**)dccp_statistics);
1018 }
1019 
1020 static int thash_entries;
1021 module_param(thash_entries, int, 0444);
1022 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1023 
1024 #ifdef CONFIG_IP_DCCP_DEBUG
1025 int dccp_debug;
1026 module_param(dccp_debug, bool, 0444);
1027 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1028 
1029 EXPORT_SYMBOL_GPL(dccp_debug);
1030 #endif
1031 
1032 static int __init dccp_init(void)
1033 {
1034 	unsigned long goal;
1035 	int ehash_order, bhash_order, i;
1036 	int rc = -ENOBUFS;
1037 
1038 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1039 		     FIELD_SIZEOF(struct sk_buff, cb));
1040 
1041 	dccp_hashinfo.bind_bucket_cachep =
1042 		kmem_cache_create("dccp_bind_bucket",
1043 				  sizeof(struct inet_bind_bucket), 0,
1044 				  SLAB_HWCACHE_ALIGN, NULL);
1045 	if (!dccp_hashinfo.bind_bucket_cachep)
1046 		goto out;
1047 
1048 	/*
1049 	 * Size and allocate the main established and bind bucket
1050 	 * hash tables.
1051 	 *
1052 	 * The methodology is similar to that of the buffer cache.
1053 	 */
1054 	if (num_physpages >= (128 * 1024))
1055 		goal = num_physpages >> (21 - PAGE_SHIFT);
1056 	else
1057 		goal = num_physpages >> (23 - PAGE_SHIFT);
1058 
1059 	if (thash_entries)
1060 		goal = (thash_entries *
1061 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1062 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1063 		;
1064 	do {
1065 		dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1066 					sizeof(struct inet_ehash_bucket);
1067 		while (dccp_hashinfo.ehash_size &
1068 		       (dccp_hashinfo.ehash_size - 1))
1069 			dccp_hashinfo.ehash_size--;
1070 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1071 			__get_free_pages(GFP_ATOMIC, ehash_order);
1072 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1073 
1074 	if (!dccp_hashinfo.ehash) {
1075 		DCCP_CRIT("Failed to allocate DCCP established hash table");
1076 		goto out_free_bind_bucket_cachep;
1077 	}
1078 
1079 	for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1080 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1081 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1082 	}
1083 
1084 	if (inet_ehash_locks_alloc(&dccp_hashinfo))
1085 			goto out_free_dccp_ehash;
1086 
1087 	bhash_order = ehash_order;
1088 
1089 	do {
1090 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1091 					sizeof(struct inet_bind_hashbucket);
1092 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1093 		    bhash_order > 0)
1094 			continue;
1095 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1096 			__get_free_pages(GFP_ATOMIC, bhash_order);
1097 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1098 
1099 	if (!dccp_hashinfo.bhash) {
1100 		DCCP_CRIT("Failed to allocate DCCP bind hash table");
1101 		goto out_free_dccp_locks;
1102 	}
1103 
1104 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1105 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1106 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1107 	}
1108 
1109 	rc = dccp_mib_init();
1110 	if (rc)
1111 		goto out_free_dccp_bhash;
1112 
1113 	rc = dccp_ackvec_init();
1114 	if (rc)
1115 		goto out_free_dccp_mib;
1116 
1117 	rc = dccp_sysctl_init();
1118 	if (rc)
1119 		goto out_ackvec_exit;
1120 
1121 	dccp_timestamping_init();
1122 out:
1123 	return rc;
1124 out_ackvec_exit:
1125 	dccp_ackvec_exit();
1126 out_free_dccp_mib:
1127 	dccp_mib_exit();
1128 out_free_dccp_bhash:
1129 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1130 	dccp_hashinfo.bhash = NULL;
1131 out_free_dccp_locks:
1132 	inet_ehash_locks_free(&dccp_hashinfo);
1133 out_free_dccp_ehash:
1134 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1135 	dccp_hashinfo.ehash = NULL;
1136 out_free_bind_bucket_cachep:
1137 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1138 	dccp_hashinfo.bind_bucket_cachep = NULL;
1139 	goto out;
1140 }
1141 
1142 static void __exit dccp_fini(void)
1143 {
1144 	dccp_mib_exit();
1145 	free_pages((unsigned long)dccp_hashinfo.bhash,
1146 		   get_order(dccp_hashinfo.bhash_size *
1147 			     sizeof(struct inet_bind_hashbucket)));
1148 	free_pages((unsigned long)dccp_hashinfo.ehash,
1149 		   get_order(dccp_hashinfo.ehash_size *
1150 			     sizeof(struct inet_ehash_bucket)));
1151 	inet_ehash_locks_free(&dccp_hashinfo);
1152 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1153 	dccp_ackvec_exit();
1154 	dccp_sysctl_exit();
1155 }
1156 
1157 module_init(dccp_init);
1158 module_exit(dccp_fini);
1159 
1160 MODULE_LICENSE("GPL");
1161 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1162 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1163