xref: /openbmc/linux/net/dccp/proto.c (revision 643d1f7f)
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or modify it
8  *	under the terms of the GNU General Public License version 2 as
9  *	published by the Free Software Foundation.
10  */
11 
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24 
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28 
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35 
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39 
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43 
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45 
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47 
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 	.lhash_lock	= RW_LOCK_UNLOCKED,
50 	.lhash_users	= ATOMIC_INIT(0),
51 	.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53 
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55 
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
58 
59 void dccp_set_state(struct sock *sk, const int state)
60 {
61 	const int oldstate = sk->sk_state;
62 
63 	dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
64 		      dccp_state_name(oldstate), dccp_state_name(state));
65 	WARN_ON(state == oldstate);
66 
67 	switch (state) {
68 	case DCCP_OPEN:
69 		if (oldstate != DCCP_OPEN)
70 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71 		break;
72 
73 	case DCCP_CLOSED:
74 		if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75 		    oldstate == DCCP_CLOSING)
76 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77 
78 		sk->sk_prot->unhash(sk);
79 		if (inet_csk(sk)->icsk_bind_hash != NULL &&
80 		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81 			inet_put_port(&dccp_hashinfo, sk);
82 		/* fall through */
83 	default:
84 		if (oldstate == DCCP_OPEN)
85 			DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
86 	}
87 
88 	/* Change state AFTER socket is unhashed to avoid closed
89 	 * socket sitting in hash tables.
90 	 */
91 	sk->sk_state = state;
92 }
93 
94 EXPORT_SYMBOL_GPL(dccp_set_state);
95 
96 static void dccp_finish_passive_close(struct sock *sk)
97 {
98 	switch (sk->sk_state) {
99 	case DCCP_PASSIVE_CLOSE:
100 		/* Node (client or server) has received Close packet. */
101 		dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102 		dccp_set_state(sk, DCCP_CLOSED);
103 		break;
104 	case DCCP_PASSIVE_CLOSEREQ:
105 		/*
106 		 * Client received CloseReq. We set the `active' flag so that
107 		 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108 		 */
109 		dccp_send_close(sk, 1);
110 		dccp_set_state(sk, DCCP_CLOSING);
111 	}
112 }
113 
114 void dccp_done(struct sock *sk)
115 {
116 	dccp_set_state(sk, DCCP_CLOSED);
117 	dccp_clear_xmit_timers(sk);
118 
119 	sk->sk_shutdown = SHUTDOWN_MASK;
120 
121 	if (!sock_flag(sk, SOCK_DEAD))
122 		sk->sk_state_change(sk);
123 	else
124 		inet_csk_destroy_sock(sk);
125 }
126 
127 EXPORT_SYMBOL_GPL(dccp_done);
128 
129 const char *dccp_packet_name(const int type)
130 {
131 	static const char *dccp_packet_names[] = {
132 		[DCCP_PKT_REQUEST]  = "REQUEST",
133 		[DCCP_PKT_RESPONSE] = "RESPONSE",
134 		[DCCP_PKT_DATA]	    = "DATA",
135 		[DCCP_PKT_ACK]	    = "ACK",
136 		[DCCP_PKT_DATAACK]  = "DATAACK",
137 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138 		[DCCP_PKT_CLOSE]    = "CLOSE",
139 		[DCCP_PKT_RESET]    = "RESET",
140 		[DCCP_PKT_SYNC]	    = "SYNC",
141 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
142 	};
143 
144 	if (type >= DCCP_NR_PKT_TYPES)
145 		return "INVALID";
146 	else
147 		return dccp_packet_names[type];
148 }
149 
150 EXPORT_SYMBOL_GPL(dccp_packet_name);
151 
152 const char *dccp_state_name(const int state)
153 {
154 	static char *dccp_state_names[] = {
155 	[DCCP_OPEN]		= "OPEN",
156 	[DCCP_REQUESTING]	= "REQUESTING",
157 	[DCCP_PARTOPEN]		= "PARTOPEN",
158 	[DCCP_LISTEN]		= "LISTEN",
159 	[DCCP_RESPOND]		= "RESPOND",
160 	[DCCP_CLOSING]		= "CLOSING",
161 	[DCCP_ACTIVE_CLOSEREQ]	= "CLOSEREQ",
162 	[DCCP_PASSIVE_CLOSE]	= "PASSIVE_CLOSE",
163 	[DCCP_PASSIVE_CLOSEREQ]	= "PASSIVE_CLOSEREQ",
164 	[DCCP_TIME_WAIT]	= "TIME_WAIT",
165 	[DCCP_CLOSED]		= "CLOSED",
166 	};
167 
168 	if (state >= DCCP_MAX_STATES)
169 		return "INVALID STATE!";
170 	else
171 		return dccp_state_names[state];
172 }
173 
174 EXPORT_SYMBOL_GPL(dccp_state_name);
175 
176 void dccp_hash(struct sock *sk)
177 {
178 	inet_hash(&dccp_hashinfo, sk);
179 }
180 
181 EXPORT_SYMBOL_GPL(dccp_hash);
182 
183 void dccp_unhash(struct sock *sk)
184 {
185 	inet_unhash(&dccp_hashinfo, sk);
186 }
187 
188 EXPORT_SYMBOL_GPL(dccp_unhash);
189 
190 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
191 {
192 	struct dccp_sock *dp = dccp_sk(sk);
193 	struct dccp_minisock *dmsk = dccp_msk(sk);
194 	struct inet_connection_sock *icsk = inet_csk(sk);
195 
196 	dccp_minisock_init(&dp->dccps_minisock);
197 
198 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
199 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
200 	sk->sk_state		= DCCP_CLOSED;
201 	sk->sk_write_space	= dccp_write_space;
202 	icsk->icsk_sync_mss	= dccp_sync_mss;
203 	dp->dccps_mss_cache	= 536;
204 	dp->dccps_rate_last	= jiffies;
205 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
206 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
207 	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
208 
209 	dccp_init_xmit_timers(sk);
210 
211 	/*
212 	 * FIXME: We're hardcoding the CCID, and doing this at this point makes
213 	 * the listening (master) sock get CCID control blocks, which is not
214 	 * necessary, but for now, to not mess with the test userspace apps,
215 	 * lets leave it here, later the real solution is to do this in a
216 	 * setsockopt(CCIDs-I-want/accept). -acme
217 	 */
218 	if (likely(ctl_sock_initialized)) {
219 		int rc = dccp_feat_init(dmsk);
220 
221 		if (rc)
222 			return rc;
223 
224 		if (dmsk->dccpms_send_ack_vector) {
225 			dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
226 			if (dp->dccps_hc_rx_ackvec == NULL)
227 				return -ENOMEM;
228 		}
229 		dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
230 						      sk, GFP_KERNEL);
231 		dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
232 						      sk, GFP_KERNEL);
233 		if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
234 			     dp->dccps_hc_tx_ccid == NULL)) {
235 			ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236 			ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
237 			if (dmsk->dccpms_send_ack_vector) {
238 				dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239 				dp->dccps_hc_rx_ackvec = NULL;
240 			}
241 			dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
242 			return -ENOMEM;
243 		}
244 	} else {
245 		/* control socket doesn't need feat nego */
246 		INIT_LIST_HEAD(&dmsk->dccpms_pending);
247 		INIT_LIST_HEAD(&dmsk->dccpms_conf);
248 	}
249 
250 	return 0;
251 }
252 
253 EXPORT_SYMBOL_GPL(dccp_init_sock);
254 
255 int dccp_destroy_sock(struct sock *sk)
256 {
257 	struct dccp_sock *dp = dccp_sk(sk);
258 	struct dccp_minisock *dmsk = dccp_msk(sk);
259 
260 	/*
261 	 * DCCP doesn't use sk_write_queue, just sk_send_head
262 	 * for retransmissions
263 	 */
264 	if (sk->sk_send_head != NULL) {
265 		kfree_skb(sk->sk_send_head);
266 		sk->sk_send_head = NULL;
267 	}
268 
269 	/* Clean up a referenced DCCP bind bucket. */
270 	if (inet_csk(sk)->icsk_bind_hash != NULL)
271 		inet_put_port(&dccp_hashinfo, sk);
272 
273 	kfree(dp->dccps_service_list);
274 	dp->dccps_service_list = NULL;
275 
276 	if (dmsk->dccpms_send_ack_vector) {
277 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
278 		dp->dccps_hc_rx_ackvec = NULL;
279 	}
280 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
281 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
282 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
283 
284 	/* clean up feature negotiation state */
285 	dccp_feat_clean(dmsk);
286 
287 	return 0;
288 }
289 
290 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
291 
292 static inline int dccp_listen_start(struct sock *sk, int backlog)
293 {
294 	struct dccp_sock *dp = dccp_sk(sk);
295 
296 	dp->dccps_role = DCCP_ROLE_LISTEN;
297 	return inet_csk_listen_start(sk, backlog);
298 }
299 
300 static inline int dccp_need_reset(int state)
301 {
302 	return state != DCCP_CLOSED && state != DCCP_LISTEN &&
303 	       state != DCCP_REQUESTING;
304 }
305 
306 int dccp_disconnect(struct sock *sk, int flags)
307 {
308 	struct inet_connection_sock *icsk = inet_csk(sk);
309 	struct inet_sock *inet = inet_sk(sk);
310 	int err = 0;
311 	const int old_state = sk->sk_state;
312 
313 	if (old_state != DCCP_CLOSED)
314 		dccp_set_state(sk, DCCP_CLOSED);
315 
316 	/*
317 	 * This corresponds to the ABORT function of RFC793, sec. 3.8
318 	 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
319 	 */
320 	if (old_state == DCCP_LISTEN) {
321 		inet_csk_listen_stop(sk);
322 	} else if (dccp_need_reset(old_state)) {
323 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
324 		sk->sk_err = ECONNRESET;
325 	} else if (old_state == DCCP_REQUESTING)
326 		sk->sk_err = ECONNRESET;
327 
328 	dccp_clear_xmit_timers(sk);
329 	__skb_queue_purge(&sk->sk_receive_queue);
330 	if (sk->sk_send_head != NULL) {
331 		__kfree_skb(sk->sk_send_head);
332 		sk->sk_send_head = NULL;
333 	}
334 
335 	inet->dport = 0;
336 
337 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
338 		inet_reset_saddr(sk);
339 
340 	sk->sk_shutdown = 0;
341 	sock_reset_flag(sk, SOCK_DONE);
342 
343 	icsk->icsk_backoff = 0;
344 	inet_csk_delack_init(sk);
345 	__sk_dst_reset(sk);
346 
347 	BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
348 
349 	sk->sk_error_report(sk);
350 	return err;
351 }
352 
353 EXPORT_SYMBOL_GPL(dccp_disconnect);
354 
355 /*
356  *	Wait for a DCCP event.
357  *
358  *	Note that we don't need to lock the socket, as the upper poll layers
359  *	take care of normal races (between the test and the event) and we don't
360  *	go look at any of the socket buffers directly.
361  */
362 unsigned int dccp_poll(struct file *file, struct socket *sock,
363 		       poll_table *wait)
364 {
365 	unsigned int mask;
366 	struct sock *sk = sock->sk;
367 
368 	poll_wait(file, sk->sk_sleep, wait);
369 	if (sk->sk_state == DCCP_LISTEN)
370 		return inet_csk_listen_poll(sk);
371 
372 	/* Socket is not locked. We are protected from async events
373 	   by poll logic and correct handling of state changes
374 	   made by another threads is impossible in any case.
375 	 */
376 
377 	mask = 0;
378 	if (sk->sk_err)
379 		mask = POLLERR;
380 
381 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
382 		mask |= POLLHUP;
383 	if (sk->sk_shutdown & RCV_SHUTDOWN)
384 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
385 
386 	/* Connected? */
387 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
388 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
389 			mask |= POLLIN | POLLRDNORM;
390 
391 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
392 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
393 				mask |= POLLOUT | POLLWRNORM;
394 			} else {  /* send SIGIO later */
395 				set_bit(SOCK_ASYNC_NOSPACE,
396 					&sk->sk_socket->flags);
397 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
398 
399 				/* Race breaker. If space is freed after
400 				 * wspace test but before the flags are set,
401 				 * IO signal will be lost.
402 				 */
403 				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
404 					mask |= POLLOUT | POLLWRNORM;
405 			}
406 		}
407 	}
408 	return mask;
409 }
410 
411 EXPORT_SYMBOL_GPL(dccp_poll);
412 
413 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
414 {
415 	int rc = -ENOTCONN;
416 
417 	lock_sock(sk);
418 
419 	if (sk->sk_state == DCCP_LISTEN)
420 		goto out;
421 
422 	switch (cmd) {
423 	case SIOCINQ: {
424 		struct sk_buff *skb;
425 		unsigned long amount = 0;
426 
427 		skb = skb_peek(&sk->sk_receive_queue);
428 		if (skb != NULL) {
429 			/*
430 			 * We will only return the amount of this packet since
431 			 * that is all that will be read.
432 			 */
433 			amount = skb->len;
434 		}
435 		rc = put_user(amount, (int __user *)arg);
436 	}
437 		break;
438 	default:
439 		rc = -ENOIOCTLCMD;
440 		break;
441 	}
442 out:
443 	release_sock(sk);
444 	return rc;
445 }
446 
447 EXPORT_SYMBOL_GPL(dccp_ioctl);
448 
449 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
450 				   char __user *optval, int optlen)
451 {
452 	struct dccp_sock *dp = dccp_sk(sk);
453 	struct dccp_service_list *sl = NULL;
454 
455 	if (service == DCCP_SERVICE_INVALID_VALUE ||
456 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
457 		return -EINVAL;
458 
459 	if (optlen > sizeof(service)) {
460 		sl = kmalloc(optlen, GFP_KERNEL);
461 		if (sl == NULL)
462 			return -ENOMEM;
463 
464 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
465 		if (copy_from_user(sl->dccpsl_list,
466 				   optval + sizeof(service),
467 				   optlen - sizeof(service)) ||
468 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
469 			kfree(sl);
470 			return -EFAULT;
471 		}
472 	}
473 
474 	lock_sock(sk);
475 	dp->dccps_service = service;
476 
477 	kfree(dp->dccps_service_list);
478 
479 	dp->dccps_service_list = sl;
480 	release_sock(sk);
481 	return 0;
482 }
483 
484 /* byte 1 is feature.  the rest is the preference list */
485 static int dccp_setsockopt_change(struct sock *sk, int type,
486 				  struct dccp_so_feat __user *optval)
487 {
488 	struct dccp_so_feat opt;
489 	u8 *val;
490 	int rc;
491 
492 	if (copy_from_user(&opt, optval, sizeof(opt)))
493 		return -EFAULT;
494 
495 	val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
496 	if (!val)
497 		return -ENOMEM;
498 
499 	if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
500 		rc = -EFAULT;
501 		goto out_free_val;
502 	}
503 
504 	rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
505 			      val, opt.dccpsf_len, GFP_KERNEL);
506 	if (rc)
507 		goto out_free_val;
508 
509 out:
510 	return rc;
511 
512 out_free_val:
513 	kfree(val);
514 	goto out;
515 }
516 
517 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
518 		char __user *optval, int optlen)
519 {
520 	struct dccp_sock *dp = dccp_sk(sk);
521 	int val, err = 0;
522 
523 	if (optlen < sizeof(int))
524 		return -EINVAL;
525 
526 	if (get_user(val, (int __user *)optval))
527 		return -EFAULT;
528 
529 	if (optname == DCCP_SOCKOPT_SERVICE)
530 		return dccp_setsockopt_service(sk, val, optval, optlen);
531 
532 	lock_sock(sk);
533 	switch (optname) {
534 	case DCCP_SOCKOPT_PACKET_SIZE:
535 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
536 		err = 0;
537 		break;
538 	case DCCP_SOCKOPT_CHANGE_L:
539 		if (optlen != sizeof(struct dccp_so_feat))
540 			err = -EINVAL;
541 		else
542 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
543 						     (struct dccp_so_feat __user *)
544 						     optval);
545 		break;
546 	case DCCP_SOCKOPT_CHANGE_R:
547 		if (optlen != sizeof(struct dccp_so_feat))
548 			err = -EINVAL;
549 		else
550 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
551 						     (struct dccp_so_feat __user *)
552 						     optval);
553 		break;
554 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
555 		if (dp->dccps_role != DCCP_ROLE_SERVER)
556 			err = -EOPNOTSUPP;
557 		else
558 			dp->dccps_server_timewait = (val != 0);
559 		break;
560 	case DCCP_SOCKOPT_SEND_CSCOV:	/* sender side, RFC 4340, sec. 9.2 */
561 		if (val < 0 || val > 15)
562 			err = -EINVAL;
563 		else
564 			dp->dccps_pcslen = val;
565 		break;
566 	case DCCP_SOCKOPT_RECV_CSCOV:	/* receiver side, RFC 4340 sec. 9.2.1 */
567 		if (val < 0 || val > 15)
568 			err = -EINVAL;
569 		else {
570 			dp->dccps_pcrlen = val;
571 			/* FIXME: add feature negotiation,
572 			 * ChangeL(MinimumChecksumCoverage, val) */
573 		}
574 		break;
575 	default:
576 		err = -ENOPROTOOPT;
577 		break;
578 	}
579 
580 	release_sock(sk);
581 	return err;
582 }
583 
584 int dccp_setsockopt(struct sock *sk, int level, int optname,
585 		    char __user *optval, int optlen)
586 {
587 	if (level != SOL_DCCP)
588 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
589 							     optname, optval,
590 							     optlen);
591 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
592 }
593 
594 EXPORT_SYMBOL_GPL(dccp_setsockopt);
595 
596 #ifdef CONFIG_COMPAT
597 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
598 			   char __user *optval, int optlen)
599 {
600 	if (level != SOL_DCCP)
601 		return inet_csk_compat_setsockopt(sk, level, optname,
602 						  optval, optlen);
603 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
604 }
605 
606 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
607 #endif
608 
609 static int dccp_getsockopt_service(struct sock *sk, int len,
610 				   __be32 __user *optval,
611 				   int __user *optlen)
612 {
613 	const struct dccp_sock *dp = dccp_sk(sk);
614 	const struct dccp_service_list *sl;
615 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
616 
617 	lock_sock(sk);
618 	if ((sl = dp->dccps_service_list) != NULL) {
619 		slen = sl->dccpsl_nr * sizeof(u32);
620 		total_len += slen;
621 	}
622 
623 	err = -EINVAL;
624 	if (total_len > len)
625 		goto out;
626 
627 	err = 0;
628 	if (put_user(total_len, optlen) ||
629 	    put_user(dp->dccps_service, optval) ||
630 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
631 		err = -EFAULT;
632 out:
633 	release_sock(sk);
634 	return err;
635 }
636 
637 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
638 		    char __user *optval, int __user *optlen)
639 {
640 	struct dccp_sock *dp;
641 	int val, len;
642 
643 	if (get_user(len, optlen))
644 		return -EFAULT;
645 
646 	if (len < (int)sizeof(int))
647 		return -EINVAL;
648 
649 	dp = dccp_sk(sk);
650 
651 	switch (optname) {
652 	case DCCP_SOCKOPT_PACKET_SIZE:
653 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
654 		return 0;
655 	case DCCP_SOCKOPT_SERVICE:
656 		return dccp_getsockopt_service(sk, len,
657 					       (__be32 __user *)optval, optlen);
658 	case DCCP_SOCKOPT_GET_CUR_MPS:
659 		val = dp->dccps_mss_cache;
660 		break;
661 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
662 		val = dp->dccps_server_timewait;
663 		break;
664 	case DCCP_SOCKOPT_SEND_CSCOV:
665 		val = dp->dccps_pcslen;
666 		break;
667 	case DCCP_SOCKOPT_RECV_CSCOV:
668 		val = dp->dccps_pcrlen;
669 		break;
670 	case 128 ... 191:
671 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
672 					     len, (u32 __user *)optval, optlen);
673 	case 192 ... 255:
674 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
675 					     len, (u32 __user *)optval, optlen);
676 	default:
677 		return -ENOPROTOOPT;
678 	}
679 
680 	len = sizeof(val);
681 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
682 		return -EFAULT;
683 
684 	return 0;
685 }
686 
687 int dccp_getsockopt(struct sock *sk, int level, int optname,
688 		    char __user *optval, int __user *optlen)
689 {
690 	if (level != SOL_DCCP)
691 		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
692 							     optname, optval,
693 							     optlen);
694 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
695 }
696 
697 EXPORT_SYMBOL_GPL(dccp_getsockopt);
698 
699 #ifdef CONFIG_COMPAT
700 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
701 			   char __user *optval, int __user *optlen)
702 {
703 	if (level != SOL_DCCP)
704 		return inet_csk_compat_getsockopt(sk, level, optname,
705 						  optval, optlen);
706 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
707 }
708 
709 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
710 #endif
711 
712 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
713 		 size_t len)
714 {
715 	const struct dccp_sock *dp = dccp_sk(sk);
716 	const int flags = msg->msg_flags;
717 	const int noblock = flags & MSG_DONTWAIT;
718 	struct sk_buff *skb;
719 	int rc, size;
720 	long timeo;
721 
722 	if (len > dp->dccps_mss_cache)
723 		return -EMSGSIZE;
724 
725 	lock_sock(sk);
726 
727 	if (sysctl_dccp_tx_qlen &&
728 	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
729 		rc = -EAGAIN;
730 		goto out_release;
731 	}
732 
733 	timeo = sock_sndtimeo(sk, noblock);
734 
735 	/*
736 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
737 	 * so that the trick in dccp_rcv_request_sent_state_process.
738 	 */
739 	/* Wait for a connection to finish. */
740 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
741 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
742 			goto out_release;
743 
744 	size = sk->sk_prot->max_header + len;
745 	release_sock(sk);
746 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
747 	lock_sock(sk);
748 	if (skb == NULL)
749 		goto out_release;
750 
751 	skb_reserve(skb, sk->sk_prot->max_header);
752 	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
753 	if (rc != 0)
754 		goto out_discard;
755 
756 	skb_queue_tail(&sk->sk_write_queue, skb);
757 	dccp_write_xmit(sk,0);
758 out_release:
759 	release_sock(sk);
760 	return rc ? : len;
761 out_discard:
762 	kfree_skb(skb);
763 	goto out_release;
764 }
765 
766 EXPORT_SYMBOL_GPL(dccp_sendmsg);
767 
768 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
769 		 size_t len, int nonblock, int flags, int *addr_len)
770 {
771 	const struct dccp_hdr *dh;
772 	long timeo;
773 
774 	lock_sock(sk);
775 
776 	if (sk->sk_state == DCCP_LISTEN) {
777 		len = -ENOTCONN;
778 		goto out;
779 	}
780 
781 	timeo = sock_rcvtimeo(sk, nonblock);
782 
783 	do {
784 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
785 
786 		if (skb == NULL)
787 			goto verify_sock_status;
788 
789 		dh = dccp_hdr(skb);
790 
791 		switch (dh->dccph_type) {
792 		case DCCP_PKT_DATA:
793 		case DCCP_PKT_DATAACK:
794 			goto found_ok_skb;
795 
796 		case DCCP_PKT_CLOSE:
797 		case DCCP_PKT_CLOSEREQ:
798 			if (!(flags & MSG_PEEK))
799 				dccp_finish_passive_close(sk);
800 			/* fall through */
801 		case DCCP_PKT_RESET:
802 			dccp_pr_debug("found fin (%s) ok!\n",
803 				      dccp_packet_name(dh->dccph_type));
804 			len = 0;
805 			goto found_fin_ok;
806 		default:
807 			dccp_pr_debug("packet_type=%s\n",
808 				      dccp_packet_name(dh->dccph_type));
809 			sk_eat_skb(sk, skb, 0);
810 		}
811 verify_sock_status:
812 		if (sock_flag(sk, SOCK_DONE)) {
813 			len = 0;
814 			break;
815 		}
816 
817 		if (sk->sk_err) {
818 			len = sock_error(sk);
819 			break;
820 		}
821 
822 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
823 			len = 0;
824 			break;
825 		}
826 
827 		if (sk->sk_state == DCCP_CLOSED) {
828 			if (!sock_flag(sk, SOCK_DONE)) {
829 				/* This occurs when user tries to read
830 				 * from never connected socket.
831 				 */
832 				len = -ENOTCONN;
833 				break;
834 			}
835 			len = 0;
836 			break;
837 		}
838 
839 		if (!timeo) {
840 			len = -EAGAIN;
841 			break;
842 		}
843 
844 		if (signal_pending(current)) {
845 			len = sock_intr_errno(timeo);
846 			break;
847 		}
848 
849 		sk_wait_data(sk, &timeo);
850 		continue;
851 	found_ok_skb:
852 		if (len > skb->len)
853 			len = skb->len;
854 		else if (len < skb->len)
855 			msg->msg_flags |= MSG_TRUNC;
856 
857 		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
858 			/* Exception. Bailout! */
859 			len = -EFAULT;
860 			break;
861 		}
862 	found_fin_ok:
863 		if (!(flags & MSG_PEEK))
864 			sk_eat_skb(sk, skb, 0);
865 		break;
866 	} while (1);
867 out:
868 	release_sock(sk);
869 	return len;
870 }
871 
872 EXPORT_SYMBOL_GPL(dccp_recvmsg);
873 
874 int inet_dccp_listen(struct socket *sock, int backlog)
875 {
876 	struct sock *sk = sock->sk;
877 	unsigned char old_state;
878 	int err;
879 
880 	lock_sock(sk);
881 
882 	err = -EINVAL;
883 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
884 		goto out;
885 
886 	old_state = sk->sk_state;
887 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
888 		goto out;
889 
890 	/* Really, if the socket is already in listen state
891 	 * we can only allow the backlog to be adjusted.
892 	 */
893 	if (old_state != DCCP_LISTEN) {
894 		/*
895 		 * FIXME: here it probably should be sk->sk_prot->listen_start
896 		 * see tcp_listen_start
897 		 */
898 		err = dccp_listen_start(sk, backlog);
899 		if (err)
900 			goto out;
901 	}
902 	sk->sk_max_ack_backlog = backlog;
903 	err = 0;
904 
905 out:
906 	release_sock(sk);
907 	return err;
908 }
909 
910 EXPORT_SYMBOL_GPL(inet_dccp_listen);
911 
912 static void dccp_terminate_connection(struct sock *sk)
913 {
914 	u8 next_state = DCCP_CLOSED;
915 
916 	switch (sk->sk_state) {
917 	case DCCP_PASSIVE_CLOSE:
918 	case DCCP_PASSIVE_CLOSEREQ:
919 		dccp_finish_passive_close(sk);
920 		break;
921 	case DCCP_PARTOPEN:
922 		dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
923 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
924 		/* fall through */
925 	case DCCP_OPEN:
926 		dccp_send_close(sk, 1);
927 
928 		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
929 		    !dccp_sk(sk)->dccps_server_timewait)
930 			next_state = DCCP_ACTIVE_CLOSEREQ;
931 		else
932 			next_state = DCCP_CLOSING;
933 		/* fall through */
934 	default:
935 		dccp_set_state(sk, next_state);
936 	}
937 }
938 
939 void dccp_close(struct sock *sk, long timeout)
940 {
941 	struct dccp_sock *dp = dccp_sk(sk);
942 	struct sk_buff *skb;
943 	u32 data_was_unread = 0;
944 	int state;
945 
946 	lock_sock(sk);
947 
948 	sk->sk_shutdown = SHUTDOWN_MASK;
949 
950 	if (sk->sk_state == DCCP_LISTEN) {
951 		dccp_set_state(sk, DCCP_CLOSED);
952 
953 		/* Special case. */
954 		inet_csk_listen_stop(sk);
955 
956 		goto adjudge_to_death;
957 	}
958 
959 	sk_stop_timer(sk, &dp->dccps_xmit_timer);
960 
961 	/*
962 	 * We need to flush the recv. buffs.  We do this only on the
963 	 * descriptor close, not protocol-sourced closes, because the
964 	  *reader process may not have drained the data yet!
965 	 */
966 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
967 		data_was_unread += skb->len;
968 		__kfree_skb(skb);
969 	}
970 
971 	if (data_was_unread) {
972 		/* Unread data was tossed, send an appropriate Reset Code */
973 		DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
974 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
975 		dccp_set_state(sk, DCCP_CLOSED);
976 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
977 		/* Check zero linger _after_ checking for unread data. */
978 		sk->sk_prot->disconnect(sk, 0);
979 	} else if (sk->sk_state != DCCP_CLOSED) {
980 		dccp_terminate_connection(sk);
981 	}
982 
983 	sk_stream_wait_close(sk, timeout);
984 
985 adjudge_to_death:
986 	state = sk->sk_state;
987 	sock_hold(sk);
988 	sock_orphan(sk);
989 	atomic_inc(sk->sk_prot->orphan_count);
990 
991 	/*
992 	 * It is the last release_sock in its life. It will remove backlog.
993 	 */
994 	release_sock(sk);
995 	/*
996 	 * Now socket is owned by kernel and we acquire BH lock
997 	 * to finish close. No need to check for user refs.
998 	 */
999 	local_bh_disable();
1000 	bh_lock_sock(sk);
1001 	BUG_TRAP(!sock_owned_by_user(sk));
1002 
1003 	/* Have we already been destroyed by a softirq or backlog? */
1004 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1005 		goto out;
1006 
1007 	if (sk->sk_state == DCCP_CLOSED)
1008 		inet_csk_destroy_sock(sk);
1009 
1010 	/* Otherwise, socket is reprieved until protocol close. */
1011 
1012 out:
1013 	bh_unlock_sock(sk);
1014 	local_bh_enable();
1015 	sock_put(sk);
1016 }
1017 
1018 EXPORT_SYMBOL_GPL(dccp_close);
1019 
1020 void dccp_shutdown(struct sock *sk, int how)
1021 {
1022 	dccp_pr_debug("called shutdown(%x)\n", how);
1023 }
1024 
1025 EXPORT_SYMBOL_GPL(dccp_shutdown);
1026 
1027 static int __init dccp_mib_init(void)
1028 {
1029 	int rc = -ENOMEM;
1030 
1031 	dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1032 	if (dccp_statistics[0] == NULL)
1033 		goto out;
1034 
1035 	dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1036 	if (dccp_statistics[1] == NULL)
1037 		goto out_free_one;
1038 
1039 	rc = 0;
1040 out:
1041 	return rc;
1042 out_free_one:
1043 	free_percpu(dccp_statistics[0]);
1044 	dccp_statistics[0] = NULL;
1045 	goto out;
1046 
1047 }
1048 
1049 static void dccp_mib_exit(void)
1050 {
1051 	free_percpu(dccp_statistics[0]);
1052 	free_percpu(dccp_statistics[1]);
1053 	dccp_statistics[0] = dccp_statistics[1] = NULL;
1054 }
1055 
1056 static int thash_entries;
1057 module_param(thash_entries, int, 0444);
1058 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1059 
1060 #ifdef CONFIG_IP_DCCP_DEBUG
1061 int dccp_debug;
1062 module_param(dccp_debug, bool, 0444);
1063 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1064 
1065 EXPORT_SYMBOL_GPL(dccp_debug);
1066 #endif
1067 
1068 static int __init dccp_init(void)
1069 {
1070 	unsigned long goal;
1071 	int ehash_order, bhash_order, i;
1072 	int rc = -ENOBUFS;
1073 
1074 	dccp_hashinfo.bind_bucket_cachep =
1075 		kmem_cache_create("dccp_bind_bucket",
1076 				  sizeof(struct inet_bind_bucket), 0,
1077 				  SLAB_HWCACHE_ALIGN, NULL);
1078 	if (!dccp_hashinfo.bind_bucket_cachep)
1079 		goto out;
1080 
1081 	/*
1082 	 * Size and allocate the main established and bind bucket
1083 	 * hash tables.
1084 	 *
1085 	 * The methodology is similar to that of the buffer cache.
1086 	 */
1087 	if (num_physpages >= (128 * 1024))
1088 		goal = num_physpages >> (21 - PAGE_SHIFT);
1089 	else
1090 		goal = num_physpages >> (23 - PAGE_SHIFT);
1091 
1092 	if (thash_entries)
1093 		goal = (thash_entries *
1094 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1095 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1096 		;
1097 	do {
1098 		dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1099 					sizeof(struct inet_ehash_bucket);
1100 		while (dccp_hashinfo.ehash_size &
1101 		       (dccp_hashinfo.ehash_size - 1))
1102 			dccp_hashinfo.ehash_size--;
1103 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1104 			__get_free_pages(GFP_ATOMIC, ehash_order);
1105 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1106 
1107 	if (!dccp_hashinfo.ehash) {
1108 		DCCP_CRIT("Failed to allocate DCCP established hash table");
1109 		goto out_free_bind_bucket_cachep;
1110 	}
1111 
1112 	for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1113 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1114 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1115 	}
1116 
1117 	if (inet_ehash_locks_alloc(&dccp_hashinfo))
1118 			goto out_free_dccp_ehash;
1119 
1120 	bhash_order = ehash_order;
1121 
1122 	do {
1123 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1124 					sizeof(struct inet_bind_hashbucket);
1125 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1126 		    bhash_order > 0)
1127 			continue;
1128 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1129 			__get_free_pages(GFP_ATOMIC, bhash_order);
1130 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1131 
1132 	if (!dccp_hashinfo.bhash) {
1133 		DCCP_CRIT("Failed to allocate DCCP bind hash table");
1134 		goto out_free_dccp_locks;
1135 	}
1136 
1137 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1138 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1139 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1140 	}
1141 
1142 	rc = dccp_mib_init();
1143 	if (rc)
1144 		goto out_free_dccp_bhash;
1145 
1146 	rc = dccp_ackvec_init();
1147 	if (rc)
1148 		goto out_free_dccp_mib;
1149 
1150 	rc = dccp_sysctl_init();
1151 	if (rc)
1152 		goto out_ackvec_exit;
1153 
1154 	dccp_timestamping_init();
1155 out:
1156 	return rc;
1157 out_ackvec_exit:
1158 	dccp_ackvec_exit();
1159 out_free_dccp_mib:
1160 	dccp_mib_exit();
1161 out_free_dccp_bhash:
1162 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1163 	dccp_hashinfo.bhash = NULL;
1164 out_free_dccp_locks:
1165 	inet_ehash_locks_free(&dccp_hashinfo);
1166 out_free_dccp_ehash:
1167 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1168 	dccp_hashinfo.ehash = NULL;
1169 out_free_bind_bucket_cachep:
1170 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1171 	dccp_hashinfo.bind_bucket_cachep = NULL;
1172 	goto out;
1173 }
1174 
1175 static void __exit dccp_fini(void)
1176 {
1177 	dccp_mib_exit();
1178 	free_pages((unsigned long)dccp_hashinfo.bhash,
1179 		   get_order(dccp_hashinfo.bhash_size *
1180 			     sizeof(struct inet_bind_hashbucket)));
1181 	free_pages((unsigned long)dccp_hashinfo.ehash,
1182 		   get_order(dccp_hashinfo.ehash_size *
1183 			     sizeof(struct inet_ehash_bucket)));
1184 	inet_ehash_locks_free(&dccp_hashinfo);
1185 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1186 	dccp_ackvec_exit();
1187 	dccp_sysctl_exit();
1188 }
1189 
1190 module_init(dccp_init);
1191 module_exit(dccp_fini);
1192 
1193 MODULE_LICENSE("GPL");
1194 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1195 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1196