xref: /openbmc/linux/net/dccp/proto.c (revision 3805e6a1)
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or modify it
8  *	under the terms of the GNU General Public License version 2 as
9  *	published by the Free Software Foundation.
10  */
11 
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25 
26 #include <net/inet_sock.h>
27 #include <net/sock.h>
28 #include <net/xfrm.h>
29 
30 #include <asm/ioctls.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35 
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39 
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43 
44 struct percpu_counter dccp_orphan_count;
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46 
47 struct inet_hashinfo dccp_hashinfo;
48 EXPORT_SYMBOL_GPL(dccp_hashinfo);
49 
50 /* the maximum queue length for tx in packets. 0 is no limit */
51 int sysctl_dccp_tx_qlen __read_mostly = 5;
52 
53 #ifdef CONFIG_IP_DCCP_DEBUG
54 static const char *dccp_state_name(const int state)
55 {
56 	static const char *const dccp_state_names[] = {
57 	[DCCP_OPEN]		= "OPEN",
58 	[DCCP_REQUESTING]	= "REQUESTING",
59 	[DCCP_PARTOPEN]		= "PARTOPEN",
60 	[DCCP_LISTEN]		= "LISTEN",
61 	[DCCP_RESPOND]		= "RESPOND",
62 	[DCCP_CLOSING]		= "CLOSING",
63 	[DCCP_ACTIVE_CLOSEREQ]	= "CLOSEREQ",
64 	[DCCP_PASSIVE_CLOSE]	= "PASSIVE_CLOSE",
65 	[DCCP_PASSIVE_CLOSEREQ]	= "PASSIVE_CLOSEREQ",
66 	[DCCP_TIME_WAIT]	= "TIME_WAIT",
67 	[DCCP_CLOSED]		= "CLOSED",
68 	};
69 
70 	if (state >= DCCP_MAX_STATES)
71 		return "INVALID STATE!";
72 	else
73 		return dccp_state_names[state];
74 }
75 #endif
76 
77 void dccp_set_state(struct sock *sk, const int state)
78 {
79 	const int oldstate = sk->sk_state;
80 
81 	dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
82 		      dccp_state_name(oldstate), dccp_state_name(state));
83 	WARN_ON(state == oldstate);
84 
85 	switch (state) {
86 	case DCCP_OPEN:
87 		if (oldstate != DCCP_OPEN)
88 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
89 		/* Client retransmits all Confirm options until entering OPEN */
90 		if (oldstate == DCCP_PARTOPEN)
91 			dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
92 		break;
93 
94 	case DCCP_CLOSED:
95 		if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
96 		    oldstate == DCCP_CLOSING)
97 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
98 
99 		sk->sk_prot->unhash(sk);
100 		if (inet_csk(sk)->icsk_bind_hash != NULL &&
101 		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
102 			inet_put_port(sk);
103 		/* fall through */
104 	default:
105 		if (oldstate == DCCP_OPEN)
106 			DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
107 	}
108 
109 	/* Change state AFTER socket is unhashed to avoid closed
110 	 * socket sitting in hash tables.
111 	 */
112 	sk->sk_state = state;
113 }
114 
115 EXPORT_SYMBOL_GPL(dccp_set_state);
116 
117 static void dccp_finish_passive_close(struct sock *sk)
118 {
119 	switch (sk->sk_state) {
120 	case DCCP_PASSIVE_CLOSE:
121 		/* Node (client or server) has received Close packet. */
122 		dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
123 		dccp_set_state(sk, DCCP_CLOSED);
124 		break;
125 	case DCCP_PASSIVE_CLOSEREQ:
126 		/*
127 		 * Client received CloseReq. We set the `active' flag so that
128 		 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
129 		 */
130 		dccp_send_close(sk, 1);
131 		dccp_set_state(sk, DCCP_CLOSING);
132 	}
133 }
134 
135 void dccp_done(struct sock *sk)
136 {
137 	dccp_set_state(sk, DCCP_CLOSED);
138 	dccp_clear_xmit_timers(sk);
139 
140 	sk->sk_shutdown = SHUTDOWN_MASK;
141 
142 	if (!sock_flag(sk, SOCK_DEAD))
143 		sk->sk_state_change(sk);
144 	else
145 		inet_csk_destroy_sock(sk);
146 }
147 
148 EXPORT_SYMBOL_GPL(dccp_done);
149 
150 const char *dccp_packet_name(const int type)
151 {
152 	static const char *const dccp_packet_names[] = {
153 		[DCCP_PKT_REQUEST]  = "REQUEST",
154 		[DCCP_PKT_RESPONSE] = "RESPONSE",
155 		[DCCP_PKT_DATA]	    = "DATA",
156 		[DCCP_PKT_ACK]	    = "ACK",
157 		[DCCP_PKT_DATAACK]  = "DATAACK",
158 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
159 		[DCCP_PKT_CLOSE]    = "CLOSE",
160 		[DCCP_PKT_RESET]    = "RESET",
161 		[DCCP_PKT_SYNC]	    = "SYNC",
162 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
163 	};
164 
165 	if (type >= DCCP_NR_PKT_TYPES)
166 		return "INVALID";
167 	else
168 		return dccp_packet_names[type];
169 }
170 
171 EXPORT_SYMBOL_GPL(dccp_packet_name);
172 
173 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174 {
175 	struct dccp_sock *dp = dccp_sk(sk);
176 	struct inet_connection_sock *icsk = inet_csk(sk);
177 
178 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
179 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
180 	sk->sk_state		= DCCP_CLOSED;
181 	sk->sk_write_space	= dccp_write_space;
182 	icsk->icsk_sync_mss	= dccp_sync_mss;
183 	dp->dccps_mss_cache	= 536;
184 	dp->dccps_rate_last	= jiffies;
185 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
186 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
187 	dp->dccps_tx_qlen	= sysctl_dccp_tx_qlen;
188 
189 	dccp_init_xmit_timers(sk);
190 
191 	INIT_LIST_HEAD(&dp->dccps_featneg);
192 	/* control socket doesn't need feat nego */
193 	if (likely(ctl_sock_initialized))
194 		return dccp_feat_init(sk);
195 	return 0;
196 }
197 
198 EXPORT_SYMBOL_GPL(dccp_init_sock);
199 
200 void dccp_destroy_sock(struct sock *sk)
201 {
202 	struct dccp_sock *dp = dccp_sk(sk);
203 
204 	/*
205 	 * DCCP doesn't use sk_write_queue, just sk_send_head
206 	 * for retransmissions
207 	 */
208 	if (sk->sk_send_head != NULL) {
209 		kfree_skb(sk->sk_send_head);
210 		sk->sk_send_head = NULL;
211 	}
212 
213 	/* Clean up a referenced DCCP bind bucket. */
214 	if (inet_csk(sk)->icsk_bind_hash != NULL)
215 		inet_put_port(sk);
216 
217 	kfree(dp->dccps_service_list);
218 	dp->dccps_service_list = NULL;
219 
220 	if (dp->dccps_hc_rx_ackvec != NULL) {
221 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
222 		dp->dccps_hc_rx_ackvec = NULL;
223 	}
224 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
225 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
226 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227 
228 	/* clean up feature negotiation state */
229 	dccp_feat_list_purge(&dp->dccps_featneg);
230 }
231 
232 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
233 
234 static inline int dccp_listen_start(struct sock *sk, int backlog)
235 {
236 	struct dccp_sock *dp = dccp_sk(sk);
237 
238 	dp->dccps_role = DCCP_ROLE_LISTEN;
239 	/* do not start to listen if feature negotiation setup fails */
240 	if (dccp_feat_finalise_settings(dp))
241 		return -EPROTO;
242 	return inet_csk_listen_start(sk, backlog);
243 }
244 
245 static inline int dccp_need_reset(int state)
246 {
247 	return state != DCCP_CLOSED && state != DCCP_LISTEN &&
248 	       state != DCCP_REQUESTING;
249 }
250 
251 int dccp_disconnect(struct sock *sk, int flags)
252 {
253 	struct inet_connection_sock *icsk = inet_csk(sk);
254 	struct inet_sock *inet = inet_sk(sk);
255 	int err = 0;
256 	const int old_state = sk->sk_state;
257 
258 	if (old_state != DCCP_CLOSED)
259 		dccp_set_state(sk, DCCP_CLOSED);
260 
261 	/*
262 	 * This corresponds to the ABORT function of RFC793, sec. 3.8
263 	 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
264 	 */
265 	if (old_state == DCCP_LISTEN) {
266 		inet_csk_listen_stop(sk);
267 	} else if (dccp_need_reset(old_state)) {
268 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
269 		sk->sk_err = ECONNRESET;
270 	} else if (old_state == DCCP_REQUESTING)
271 		sk->sk_err = ECONNRESET;
272 
273 	dccp_clear_xmit_timers(sk);
274 
275 	__skb_queue_purge(&sk->sk_receive_queue);
276 	__skb_queue_purge(&sk->sk_write_queue);
277 	if (sk->sk_send_head != NULL) {
278 		__kfree_skb(sk->sk_send_head);
279 		sk->sk_send_head = NULL;
280 	}
281 
282 	inet->inet_dport = 0;
283 
284 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
285 		inet_reset_saddr(sk);
286 
287 	sk->sk_shutdown = 0;
288 	sock_reset_flag(sk, SOCK_DONE);
289 
290 	icsk->icsk_backoff = 0;
291 	inet_csk_delack_init(sk);
292 	__sk_dst_reset(sk);
293 
294 	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
295 
296 	sk->sk_error_report(sk);
297 	return err;
298 }
299 
300 EXPORT_SYMBOL_GPL(dccp_disconnect);
301 
302 /*
303  *	Wait for a DCCP event.
304  *
305  *	Note that we don't need to lock the socket, as the upper poll layers
306  *	take care of normal races (between the test and the event) and we don't
307  *	go look at any of the socket buffers directly.
308  */
309 unsigned int dccp_poll(struct file *file, struct socket *sock,
310 		       poll_table *wait)
311 {
312 	unsigned int mask;
313 	struct sock *sk = sock->sk;
314 
315 	sock_poll_wait(file, sk_sleep(sk), wait);
316 	if (sk->sk_state == DCCP_LISTEN)
317 		return inet_csk_listen_poll(sk);
318 
319 	/* Socket is not locked. We are protected from async events
320 	   by poll logic and correct handling of state changes
321 	   made by another threads is impossible in any case.
322 	 */
323 
324 	mask = 0;
325 	if (sk->sk_err)
326 		mask = POLLERR;
327 
328 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
329 		mask |= POLLHUP;
330 	if (sk->sk_shutdown & RCV_SHUTDOWN)
331 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
332 
333 	/* Connected? */
334 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
335 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
336 			mask |= POLLIN | POLLRDNORM;
337 
338 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
339 			if (sk_stream_is_writeable(sk)) {
340 				mask |= POLLOUT | POLLWRNORM;
341 			} else {  /* send SIGIO later */
342 				sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
343 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
344 
345 				/* Race breaker. If space is freed after
346 				 * wspace test but before the flags are set,
347 				 * IO signal will be lost.
348 				 */
349 				if (sk_stream_is_writeable(sk))
350 					mask |= POLLOUT | POLLWRNORM;
351 			}
352 		}
353 	}
354 	return mask;
355 }
356 
357 EXPORT_SYMBOL_GPL(dccp_poll);
358 
359 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
360 {
361 	int rc = -ENOTCONN;
362 
363 	lock_sock(sk);
364 
365 	if (sk->sk_state == DCCP_LISTEN)
366 		goto out;
367 
368 	switch (cmd) {
369 	case SIOCINQ: {
370 		struct sk_buff *skb;
371 		unsigned long amount = 0;
372 
373 		skb = skb_peek(&sk->sk_receive_queue);
374 		if (skb != NULL) {
375 			/*
376 			 * We will only return the amount of this packet since
377 			 * that is all that will be read.
378 			 */
379 			amount = skb->len;
380 		}
381 		rc = put_user(amount, (int __user *)arg);
382 	}
383 		break;
384 	default:
385 		rc = -ENOIOCTLCMD;
386 		break;
387 	}
388 out:
389 	release_sock(sk);
390 	return rc;
391 }
392 
393 EXPORT_SYMBOL_GPL(dccp_ioctl);
394 
395 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
396 				   char __user *optval, unsigned int optlen)
397 {
398 	struct dccp_sock *dp = dccp_sk(sk);
399 	struct dccp_service_list *sl = NULL;
400 
401 	if (service == DCCP_SERVICE_INVALID_VALUE ||
402 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
403 		return -EINVAL;
404 
405 	if (optlen > sizeof(service)) {
406 		sl = kmalloc(optlen, GFP_KERNEL);
407 		if (sl == NULL)
408 			return -ENOMEM;
409 
410 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
411 		if (copy_from_user(sl->dccpsl_list,
412 				   optval + sizeof(service),
413 				   optlen - sizeof(service)) ||
414 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
415 			kfree(sl);
416 			return -EFAULT;
417 		}
418 	}
419 
420 	lock_sock(sk);
421 	dp->dccps_service = service;
422 
423 	kfree(dp->dccps_service_list);
424 
425 	dp->dccps_service_list = sl;
426 	release_sock(sk);
427 	return 0;
428 }
429 
430 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
431 {
432 	u8 *list, len;
433 	int i, rc;
434 
435 	if (cscov < 0 || cscov > 15)
436 		return -EINVAL;
437 	/*
438 	 * Populate a list of permissible values, in the range cscov...15. This
439 	 * is necessary since feature negotiation of single values only works if
440 	 * both sides incidentally choose the same value. Since the list starts
441 	 * lowest-value first, negotiation will pick the smallest shared value.
442 	 */
443 	if (cscov == 0)
444 		return 0;
445 	len = 16 - cscov;
446 
447 	list = kmalloc(len, GFP_KERNEL);
448 	if (list == NULL)
449 		return -ENOBUFS;
450 
451 	for (i = 0; i < len; i++)
452 		list[i] = cscov++;
453 
454 	rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
455 
456 	if (rc == 0) {
457 		if (rx)
458 			dccp_sk(sk)->dccps_pcrlen = cscov;
459 		else
460 			dccp_sk(sk)->dccps_pcslen = cscov;
461 	}
462 	kfree(list);
463 	return rc;
464 }
465 
466 static int dccp_setsockopt_ccid(struct sock *sk, int type,
467 				char __user *optval, unsigned int optlen)
468 {
469 	u8 *val;
470 	int rc = 0;
471 
472 	if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
473 		return -EINVAL;
474 
475 	val = memdup_user(optval, optlen);
476 	if (IS_ERR(val))
477 		return PTR_ERR(val);
478 
479 	lock_sock(sk);
480 	if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
481 		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
482 
483 	if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
484 		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
485 	release_sock(sk);
486 
487 	kfree(val);
488 	return rc;
489 }
490 
491 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
492 		char __user *optval, unsigned int optlen)
493 {
494 	struct dccp_sock *dp = dccp_sk(sk);
495 	int val, err = 0;
496 
497 	switch (optname) {
498 	case DCCP_SOCKOPT_PACKET_SIZE:
499 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
500 		return 0;
501 	case DCCP_SOCKOPT_CHANGE_L:
502 	case DCCP_SOCKOPT_CHANGE_R:
503 		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
504 		return 0;
505 	case DCCP_SOCKOPT_CCID:
506 	case DCCP_SOCKOPT_RX_CCID:
507 	case DCCP_SOCKOPT_TX_CCID:
508 		return dccp_setsockopt_ccid(sk, optname, optval, optlen);
509 	}
510 
511 	if (optlen < (int)sizeof(int))
512 		return -EINVAL;
513 
514 	if (get_user(val, (int __user *)optval))
515 		return -EFAULT;
516 
517 	if (optname == DCCP_SOCKOPT_SERVICE)
518 		return dccp_setsockopt_service(sk, val, optval, optlen);
519 
520 	lock_sock(sk);
521 	switch (optname) {
522 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
523 		if (dp->dccps_role != DCCP_ROLE_SERVER)
524 			err = -EOPNOTSUPP;
525 		else
526 			dp->dccps_server_timewait = (val != 0);
527 		break;
528 	case DCCP_SOCKOPT_SEND_CSCOV:
529 		err = dccp_setsockopt_cscov(sk, val, false);
530 		break;
531 	case DCCP_SOCKOPT_RECV_CSCOV:
532 		err = dccp_setsockopt_cscov(sk, val, true);
533 		break;
534 	case DCCP_SOCKOPT_QPOLICY_ID:
535 		if (sk->sk_state != DCCP_CLOSED)
536 			err = -EISCONN;
537 		else if (val < 0 || val >= DCCPQ_POLICY_MAX)
538 			err = -EINVAL;
539 		else
540 			dp->dccps_qpolicy = val;
541 		break;
542 	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
543 		if (val < 0)
544 			err = -EINVAL;
545 		else
546 			dp->dccps_tx_qlen = val;
547 		break;
548 	default:
549 		err = -ENOPROTOOPT;
550 		break;
551 	}
552 	release_sock(sk);
553 
554 	return err;
555 }
556 
557 int dccp_setsockopt(struct sock *sk, int level, int optname,
558 		    char __user *optval, unsigned int optlen)
559 {
560 	if (level != SOL_DCCP)
561 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
562 							     optname, optval,
563 							     optlen);
564 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
565 }
566 
567 EXPORT_SYMBOL_GPL(dccp_setsockopt);
568 
569 #ifdef CONFIG_COMPAT
570 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
571 			   char __user *optval, unsigned int optlen)
572 {
573 	if (level != SOL_DCCP)
574 		return inet_csk_compat_setsockopt(sk, level, optname,
575 						  optval, optlen);
576 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
577 }
578 
579 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
580 #endif
581 
582 static int dccp_getsockopt_service(struct sock *sk, int len,
583 				   __be32 __user *optval,
584 				   int __user *optlen)
585 {
586 	const struct dccp_sock *dp = dccp_sk(sk);
587 	const struct dccp_service_list *sl;
588 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
589 
590 	lock_sock(sk);
591 	if ((sl = dp->dccps_service_list) != NULL) {
592 		slen = sl->dccpsl_nr * sizeof(u32);
593 		total_len += slen;
594 	}
595 
596 	err = -EINVAL;
597 	if (total_len > len)
598 		goto out;
599 
600 	err = 0;
601 	if (put_user(total_len, optlen) ||
602 	    put_user(dp->dccps_service, optval) ||
603 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
604 		err = -EFAULT;
605 out:
606 	release_sock(sk);
607 	return err;
608 }
609 
610 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
611 		    char __user *optval, int __user *optlen)
612 {
613 	struct dccp_sock *dp;
614 	int val, len;
615 
616 	if (get_user(len, optlen))
617 		return -EFAULT;
618 
619 	if (len < (int)sizeof(int))
620 		return -EINVAL;
621 
622 	dp = dccp_sk(sk);
623 
624 	switch (optname) {
625 	case DCCP_SOCKOPT_PACKET_SIZE:
626 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
627 		return 0;
628 	case DCCP_SOCKOPT_SERVICE:
629 		return dccp_getsockopt_service(sk, len,
630 					       (__be32 __user *)optval, optlen);
631 	case DCCP_SOCKOPT_GET_CUR_MPS:
632 		val = dp->dccps_mss_cache;
633 		break;
634 	case DCCP_SOCKOPT_AVAILABLE_CCIDS:
635 		return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
636 	case DCCP_SOCKOPT_TX_CCID:
637 		val = ccid_get_current_tx_ccid(dp);
638 		if (val < 0)
639 			return -ENOPROTOOPT;
640 		break;
641 	case DCCP_SOCKOPT_RX_CCID:
642 		val = ccid_get_current_rx_ccid(dp);
643 		if (val < 0)
644 			return -ENOPROTOOPT;
645 		break;
646 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
647 		val = dp->dccps_server_timewait;
648 		break;
649 	case DCCP_SOCKOPT_SEND_CSCOV:
650 		val = dp->dccps_pcslen;
651 		break;
652 	case DCCP_SOCKOPT_RECV_CSCOV:
653 		val = dp->dccps_pcrlen;
654 		break;
655 	case DCCP_SOCKOPT_QPOLICY_ID:
656 		val = dp->dccps_qpolicy;
657 		break;
658 	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
659 		val = dp->dccps_tx_qlen;
660 		break;
661 	case 128 ... 191:
662 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
663 					     len, (u32 __user *)optval, optlen);
664 	case 192 ... 255:
665 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
666 					     len, (u32 __user *)optval, optlen);
667 	default:
668 		return -ENOPROTOOPT;
669 	}
670 
671 	len = sizeof(val);
672 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
673 		return -EFAULT;
674 
675 	return 0;
676 }
677 
678 int dccp_getsockopt(struct sock *sk, int level, int optname,
679 		    char __user *optval, int __user *optlen)
680 {
681 	if (level != SOL_DCCP)
682 		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
683 							     optname, optval,
684 							     optlen);
685 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
686 }
687 
688 EXPORT_SYMBOL_GPL(dccp_getsockopt);
689 
690 #ifdef CONFIG_COMPAT
691 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
692 			   char __user *optval, int __user *optlen)
693 {
694 	if (level != SOL_DCCP)
695 		return inet_csk_compat_getsockopt(sk, level, optname,
696 						  optval, optlen);
697 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
698 }
699 
700 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
701 #endif
702 
703 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
704 {
705 	struct cmsghdr *cmsg;
706 
707 	/*
708 	 * Assign an (opaque) qpolicy priority value to skb->priority.
709 	 *
710 	 * We are overloading this skb field for use with the qpolicy subystem.
711 	 * The skb->priority is normally used for the SO_PRIORITY option, which
712 	 * is initialised from sk_priority. Since the assignment of sk_priority
713 	 * to skb->priority happens later (on layer 3), we overload this field
714 	 * for use with queueing priorities as long as the skb is on layer 4.
715 	 * The default priority value (if nothing is set) is 0.
716 	 */
717 	skb->priority = 0;
718 
719 	for_each_cmsghdr(cmsg, msg) {
720 		if (!CMSG_OK(msg, cmsg))
721 			return -EINVAL;
722 
723 		if (cmsg->cmsg_level != SOL_DCCP)
724 			continue;
725 
726 		if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
727 		    !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
728 			return -EINVAL;
729 
730 		switch (cmsg->cmsg_type) {
731 		case DCCP_SCM_PRIORITY:
732 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
733 				return -EINVAL;
734 			skb->priority = *(__u32 *)CMSG_DATA(cmsg);
735 			break;
736 		default:
737 			return -EINVAL;
738 		}
739 	}
740 	return 0;
741 }
742 
743 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
744 {
745 	const struct dccp_sock *dp = dccp_sk(sk);
746 	const int flags = msg->msg_flags;
747 	const int noblock = flags & MSG_DONTWAIT;
748 	struct sk_buff *skb;
749 	int rc, size;
750 	long timeo;
751 
752 	if (len > dp->dccps_mss_cache)
753 		return -EMSGSIZE;
754 
755 	lock_sock(sk);
756 
757 	if (dccp_qpolicy_full(sk)) {
758 		rc = -EAGAIN;
759 		goto out_release;
760 	}
761 
762 	timeo = sock_sndtimeo(sk, noblock);
763 
764 	/*
765 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
766 	 * so that the trick in dccp_rcv_request_sent_state_process.
767 	 */
768 	/* Wait for a connection to finish. */
769 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
770 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
771 			goto out_release;
772 
773 	size = sk->sk_prot->max_header + len;
774 	release_sock(sk);
775 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
776 	lock_sock(sk);
777 	if (skb == NULL)
778 		goto out_release;
779 
780 	skb_reserve(skb, sk->sk_prot->max_header);
781 	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
782 	if (rc != 0)
783 		goto out_discard;
784 
785 	rc = dccp_msghdr_parse(msg, skb);
786 	if (rc != 0)
787 		goto out_discard;
788 
789 	dccp_qpolicy_push(sk, skb);
790 	/*
791 	 * The xmit_timer is set if the TX CCID is rate-based and will expire
792 	 * when congestion control permits to release further packets into the
793 	 * network. Window-based CCIDs do not use this timer.
794 	 */
795 	if (!timer_pending(&dp->dccps_xmit_timer))
796 		dccp_write_xmit(sk);
797 out_release:
798 	release_sock(sk);
799 	return rc ? : len;
800 out_discard:
801 	kfree_skb(skb);
802 	goto out_release;
803 }
804 
805 EXPORT_SYMBOL_GPL(dccp_sendmsg);
806 
807 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
808 		 int flags, int *addr_len)
809 {
810 	const struct dccp_hdr *dh;
811 	long timeo;
812 
813 	lock_sock(sk);
814 
815 	if (sk->sk_state == DCCP_LISTEN) {
816 		len = -ENOTCONN;
817 		goto out;
818 	}
819 
820 	timeo = sock_rcvtimeo(sk, nonblock);
821 
822 	do {
823 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
824 
825 		if (skb == NULL)
826 			goto verify_sock_status;
827 
828 		dh = dccp_hdr(skb);
829 
830 		switch (dh->dccph_type) {
831 		case DCCP_PKT_DATA:
832 		case DCCP_PKT_DATAACK:
833 			goto found_ok_skb;
834 
835 		case DCCP_PKT_CLOSE:
836 		case DCCP_PKT_CLOSEREQ:
837 			if (!(flags & MSG_PEEK))
838 				dccp_finish_passive_close(sk);
839 			/* fall through */
840 		case DCCP_PKT_RESET:
841 			dccp_pr_debug("found fin (%s) ok!\n",
842 				      dccp_packet_name(dh->dccph_type));
843 			len = 0;
844 			goto found_fin_ok;
845 		default:
846 			dccp_pr_debug("packet_type=%s\n",
847 				      dccp_packet_name(dh->dccph_type));
848 			sk_eat_skb(sk, skb);
849 		}
850 verify_sock_status:
851 		if (sock_flag(sk, SOCK_DONE)) {
852 			len = 0;
853 			break;
854 		}
855 
856 		if (sk->sk_err) {
857 			len = sock_error(sk);
858 			break;
859 		}
860 
861 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
862 			len = 0;
863 			break;
864 		}
865 
866 		if (sk->sk_state == DCCP_CLOSED) {
867 			if (!sock_flag(sk, SOCK_DONE)) {
868 				/* This occurs when user tries to read
869 				 * from never connected socket.
870 				 */
871 				len = -ENOTCONN;
872 				break;
873 			}
874 			len = 0;
875 			break;
876 		}
877 
878 		if (!timeo) {
879 			len = -EAGAIN;
880 			break;
881 		}
882 
883 		if (signal_pending(current)) {
884 			len = sock_intr_errno(timeo);
885 			break;
886 		}
887 
888 		sk_wait_data(sk, &timeo, NULL);
889 		continue;
890 	found_ok_skb:
891 		if (len > skb->len)
892 			len = skb->len;
893 		else if (len < skb->len)
894 			msg->msg_flags |= MSG_TRUNC;
895 
896 		if (skb_copy_datagram_msg(skb, 0, msg, len)) {
897 			/* Exception. Bailout! */
898 			len = -EFAULT;
899 			break;
900 		}
901 		if (flags & MSG_TRUNC)
902 			len = skb->len;
903 	found_fin_ok:
904 		if (!(flags & MSG_PEEK))
905 			sk_eat_skb(sk, skb);
906 		break;
907 	} while (1);
908 out:
909 	release_sock(sk);
910 	return len;
911 }
912 
913 EXPORT_SYMBOL_GPL(dccp_recvmsg);
914 
915 int inet_dccp_listen(struct socket *sock, int backlog)
916 {
917 	struct sock *sk = sock->sk;
918 	unsigned char old_state;
919 	int err;
920 
921 	lock_sock(sk);
922 
923 	err = -EINVAL;
924 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
925 		goto out;
926 
927 	old_state = sk->sk_state;
928 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
929 		goto out;
930 
931 	/* Really, if the socket is already in listen state
932 	 * we can only allow the backlog to be adjusted.
933 	 */
934 	if (old_state != DCCP_LISTEN) {
935 		/*
936 		 * FIXME: here it probably should be sk->sk_prot->listen_start
937 		 * see tcp_listen_start
938 		 */
939 		err = dccp_listen_start(sk, backlog);
940 		if (err)
941 			goto out;
942 	}
943 	sk->sk_max_ack_backlog = backlog;
944 	err = 0;
945 
946 out:
947 	release_sock(sk);
948 	return err;
949 }
950 
951 EXPORT_SYMBOL_GPL(inet_dccp_listen);
952 
953 static void dccp_terminate_connection(struct sock *sk)
954 {
955 	u8 next_state = DCCP_CLOSED;
956 
957 	switch (sk->sk_state) {
958 	case DCCP_PASSIVE_CLOSE:
959 	case DCCP_PASSIVE_CLOSEREQ:
960 		dccp_finish_passive_close(sk);
961 		break;
962 	case DCCP_PARTOPEN:
963 		dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
964 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
965 		/* fall through */
966 	case DCCP_OPEN:
967 		dccp_send_close(sk, 1);
968 
969 		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
970 		    !dccp_sk(sk)->dccps_server_timewait)
971 			next_state = DCCP_ACTIVE_CLOSEREQ;
972 		else
973 			next_state = DCCP_CLOSING;
974 		/* fall through */
975 	default:
976 		dccp_set_state(sk, next_state);
977 	}
978 }
979 
980 void dccp_close(struct sock *sk, long timeout)
981 {
982 	struct dccp_sock *dp = dccp_sk(sk);
983 	struct sk_buff *skb;
984 	u32 data_was_unread = 0;
985 	int state;
986 
987 	lock_sock(sk);
988 
989 	sk->sk_shutdown = SHUTDOWN_MASK;
990 
991 	if (sk->sk_state == DCCP_LISTEN) {
992 		dccp_set_state(sk, DCCP_CLOSED);
993 
994 		/* Special case. */
995 		inet_csk_listen_stop(sk);
996 
997 		goto adjudge_to_death;
998 	}
999 
1000 	sk_stop_timer(sk, &dp->dccps_xmit_timer);
1001 
1002 	/*
1003 	 * We need to flush the recv. buffs.  We do this only on the
1004 	 * descriptor close, not protocol-sourced closes, because the
1005 	  *reader process may not have drained the data yet!
1006 	 */
1007 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1008 		data_was_unread += skb->len;
1009 		__kfree_skb(skb);
1010 	}
1011 
1012 	if (data_was_unread) {
1013 		/* Unread data was tossed, send an appropriate Reset Code */
1014 		DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1015 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1016 		dccp_set_state(sk, DCCP_CLOSED);
1017 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1018 		/* Check zero linger _after_ checking for unread data. */
1019 		sk->sk_prot->disconnect(sk, 0);
1020 	} else if (sk->sk_state != DCCP_CLOSED) {
1021 		/*
1022 		 * Normal connection termination. May need to wait if there are
1023 		 * still packets in the TX queue that are delayed by the CCID.
1024 		 */
1025 		dccp_flush_write_queue(sk, &timeout);
1026 		dccp_terminate_connection(sk);
1027 	}
1028 
1029 	/*
1030 	 * Flush write queue. This may be necessary in several cases:
1031 	 * - we have been closed by the peer but still have application data;
1032 	 * - abortive termination (unread data or zero linger time),
1033 	 * - normal termination but queue could not be flushed within time limit
1034 	 */
1035 	__skb_queue_purge(&sk->sk_write_queue);
1036 
1037 	sk_stream_wait_close(sk, timeout);
1038 
1039 adjudge_to_death:
1040 	state = sk->sk_state;
1041 	sock_hold(sk);
1042 	sock_orphan(sk);
1043 
1044 	/*
1045 	 * It is the last release_sock in its life. It will remove backlog.
1046 	 */
1047 	release_sock(sk);
1048 	/*
1049 	 * Now socket is owned by kernel and we acquire BH lock
1050 	 * to finish close. No need to check for user refs.
1051 	 */
1052 	local_bh_disable();
1053 	bh_lock_sock(sk);
1054 	WARN_ON(sock_owned_by_user(sk));
1055 
1056 	percpu_counter_inc(sk->sk_prot->orphan_count);
1057 
1058 	/* Have we already been destroyed by a softirq or backlog? */
1059 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1060 		goto out;
1061 
1062 	if (sk->sk_state == DCCP_CLOSED)
1063 		inet_csk_destroy_sock(sk);
1064 
1065 	/* Otherwise, socket is reprieved until protocol close. */
1066 
1067 out:
1068 	bh_unlock_sock(sk);
1069 	local_bh_enable();
1070 	sock_put(sk);
1071 }
1072 
1073 EXPORT_SYMBOL_GPL(dccp_close);
1074 
1075 void dccp_shutdown(struct sock *sk, int how)
1076 {
1077 	dccp_pr_debug("called shutdown(%x)\n", how);
1078 }
1079 
1080 EXPORT_SYMBOL_GPL(dccp_shutdown);
1081 
1082 static inline int __init dccp_mib_init(void)
1083 {
1084 	dccp_statistics = alloc_percpu(struct dccp_mib);
1085 	if (!dccp_statistics)
1086 		return -ENOMEM;
1087 	return 0;
1088 }
1089 
1090 static inline void dccp_mib_exit(void)
1091 {
1092 	free_percpu(dccp_statistics);
1093 }
1094 
1095 static int thash_entries;
1096 module_param(thash_entries, int, 0444);
1097 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1098 
1099 #ifdef CONFIG_IP_DCCP_DEBUG
1100 bool dccp_debug;
1101 module_param(dccp_debug, bool, 0644);
1102 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1103 
1104 EXPORT_SYMBOL_GPL(dccp_debug);
1105 #endif
1106 
1107 static int __init dccp_init(void)
1108 {
1109 	unsigned long goal;
1110 	int ehash_order, bhash_order, i;
1111 	int rc;
1112 
1113 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1114 		     FIELD_SIZEOF(struct sk_buff, cb));
1115 	rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1116 	if (rc)
1117 		goto out_fail;
1118 	rc = -ENOBUFS;
1119 	inet_hashinfo_init(&dccp_hashinfo);
1120 	dccp_hashinfo.bind_bucket_cachep =
1121 		kmem_cache_create("dccp_bind_bucket",
1122 				  sizeof(struct inet_bind_bucket), 0,
1123 				  SLAB_HWCACHE_ALIGN, NULL);
1124 	if (!dccp_hashinfo.bind_bucket_cachep)
1125 		goto out_free_percpu;
1126 
1127 	/*
1128 	 * Size and allocate the main established and bind bucket
1129 	 * hash tables.
1130 	 *
1131 	 * The methodology is similar to that of the buffer cache.
1132 	 */
1133 	if (totalram_pages >= (128 * 1024))
1134 		goal = totalram_pages >> (21 - PAGE_SHIFT);
1135 	else
1136 		goal = totalram_pages >> (23 - PAGE_SHIFT);
1137 
1138 	if (thash_entries)
1139 		goal = (thash_entries *
1140 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1141 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1142 		;
1143 	do {
1144 		unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1145 					sizeof(struct inet_ehash_bucket);
1146 
1147 		while (hash_size & (hash_size - 1))
1148 			hash_size--;
1149 		dccp_hashinfo.ehash_mask = hash_size - 1;
1150 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1151 			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1152 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1153 
1154 	if (!dccp_hashinfo.ehash) {
1155 		DCCP_CRIT("Failed to allocate DCCP established hash table");
1156 		goto out_free_bind_bucket_cachep;
1157 	}
1158 
1159 	for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1160 		INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1161 
1162 	if (inet_ehash_locks_alloc(&dccp_hashinfo))
1163 			goto out_free_dccp_ehash;
1164 
1165 	bhash_order = ehash_order;
1166 
1167 	do {
1168 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1169 					sizeof(struct inet_bind_hashbucket);
1170 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1171 		    bhash_order > 0)
1172 			continue;
1173 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1174 			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1175 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1176 
1177 	if (!dccp_hashinfo.bhash) {
1178 		DCCP_CRIT("Failed to allocate DCCP bind hash table");
1179 		goto out_free_dccp_locks;
1180 	}
1181 
1182 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1183 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1184 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1185 	}
1186 
1187 	rc = dccp_mib_init();
1188 	if (rc)
1189 		goto out_free_dccp_bhash;
1190 
1191 	rc = dccp_ackvec_init();
1192 	if (rc)
1193 		goto out_free_dccp_mib;
1194 
1195 	rc = dccp_sysctl_init();
1196 	if (rc)
1197 		goto out_ackvec_exit;
1198 
1199 	rc = ccid_initialize_builtins();
1200 	if (rc)
1201 		goto out_sysctl_exit;
1202 
1203 	dccp_timestamping_init();
1204 
1205 	return 0;
1206 
1207 out_sysctl_exit:
1208 	dccp_sysctl_exit();
1209 out_ackvec_exit:
1210 	dccp_ackvec_exit();
1211 out_free_dccp_mib:
1212 	dccp_mib_exit();
1213 out_free_dccp_bhash:
1214 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1215 out_free_dccp_locks:
1216 	inet_ehash_locks_free(&dccp_hashinfo);
1217 out_free_dccp_ehash:
1218 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1219 out_free_bind_bucket_cachep:
1220 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1221 out_free_percpu:
1222 	percpu_counter_destroy(&dccp_orphan_count);
1223 out_fail:
1224 	dccp_hashinfo.bhash = NULL;
1225 	dccp_hashinfo.ehash = NULL;
1226 	dccp_hashinfo.bind_bucket_cachep = NULL;
1227 	return rc;
1228 }
1229 
1230 static void __exit dccp_fini(void)
1231 {
1232 	ccid_cleanup_builtins();
1233 	dccp_mib_exit();
1234 	free_pages((unsigned long)dccp_hashinfo.bhash,
1235 		   get_order(dccp_hashinfo.bhash_size *
1236 			     sizeof(struct inet_bind_hashbucket)));
1237 	free_pages((unsigned long)dccp_hashinfo.ehash,
1238 		   get_order((dccp_hashinfo.ehash_mask + 1) *
1239 			     sizeof(struct inet_ehash_bucket)));
1240 	inet_ehash_locks_free(&dccp_hashinfo);
1241 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1242 	dccp_ackvec_exit();
1243 	dccp_sysctl_exit();
1244 	percpu_counter_destroy(&dccp_orphan_count);
1245 }
1246 
1247 module_init(dccp_init);
1248 module_exit(dccp_fini);
1249 
1250 MODULE_LICENSE("GPL");
1251 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1252 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1253