xref: /openbmc/linux/net/dccp/proto.c (revision 98ddec80)
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or modify it
8  *	under the terms of the GNU General Public License version 2 as
9  *	published by the Free Software Foundation.
10  */
11 
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25 
26 #include <net/inet_sock.h>
27 #include <net/inet_common.h>
28 #include <net/sock.h>
29 #include <net/xfrm.h>
30 
31 #include <asm/ioctls.h>
32 #include <linux/spinlock.h>
33 #include <linux/timer.h>
34 #include <linux/delay.h>
35 #include <linux/poll.h>
36 
37 #include "ccid.h"
38 #include "dccp.h"
39 #include "feat.h"
40 
41 #define CREATE_TRACE_POINTS
42 #include "trace.h"
43 
44 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
45 
46 EXPORT_SYMBOL_GPL(dccp_statistics);
47 
48 struct percpu_counter dccp_orphan_count;
49 EXPORT_SYMBOL_GPL(dccp_orphan_count);
50 
51 struct inet_hashinfo dccp_hashinfo;
52 EXPORT_SYMBOL_GPL(dccp_hashinfo);
53 
54 /* the maximum queue length for tx in packets. 0 is no limit */
55 int sysctl_dccp_tx_qlen __read_mostly = 5;
56 
57 #ifdef CONFIG_IP_DCCP_DEBUG
58 static const char *dccp_state_name(const int state)
59 {
60 	static const char *const dccp_state_names[] = {
61 	[DCCP_OPEN]		= "OPEN",
62 	[DCCP_REQUESTING]	= "REQUESTING",
63 	[DCCP_PARTOPEN]		= "PARTOPEN",
64 	[DCCP_LISTEN]		= "LISTEN",
65 	[DCCP_RESPOND]		= "RESPOND",
66 	[DCCP_CLOSING]		= "CLOSING",
67 	[DCCP_ACTIVE_CLOSEREQ]	= "CLOSEREQ",
68 	[DCCP_PASSIVE_CLOSE]	= "PASSIVE_CLOSE",
69 	[DCCP_PASSIVE_CLOSEREQ]	= "PASSIVE_CLOSEREQ",
70 	[DCCP_TIME_WAIT]	= "TIME_WAIT",
71 	[DCCP_CLOSED]		= "CLOSED",
72 	};
73 
74 	if (state >= DCCP_MAX_STATES)
75 		return "INVALID STATE!";
76 	else
77 		return dccp_state_names[state];
78 }
79 #endif
80 
81 void dccp_set_state(struct sock *sk, const int state)
82 {
83 	const int oldstate = sk->sk_state;
84 
85 	dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
86 		      dccp_state_name(oldstate), dccp_state_name(state));
87 	WARN_ON(state == oldstate);
88 
89 	switch (state) {
90 	case DCCP_OPEN:
91 		if (oldstate != DCCP_OPEN)
92 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
93 		/* Client retransmits all Confirm options until entering OPEN */
94 		if (oldstate == DCCP_PARTOPEN)
95 			dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
96 		break;
97 
98 	case DCCP_CLOSED:
99 		if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
100 		    oldstate == DCCP_CLOSING)
101 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
102 
103 		sk->sk_prot->unhash(sk);
104 		if (inet_csk(sk)->icsk_bind_hash != NULL &&
105 		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
106 			inet_put_port(sk);
107 		/* fall through */
108 	default:
109 		if (oldstate == DCCP_OPEN)
110 			DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
111 	}
112 
113 	/* Change state AFTER socket is unhashed to avoid closed
114 	 * socket sitting in hash tables.
115 	 */
116 	inet_sk_set_state(sk, state);
117 }
118 
119 EXPORT_SYMBOL_GPL(dccp_set_state);
120 
121 static void dccp_finish_passive_close(struct sock *sk)
122 {
123 	switch (sk->sk_state) {
124 	case DCCP_PASSIVE_CLOSE:
125 		/* Node (client or server) has received Close packet. */
126 		dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
127 		dccp_set_state(sk, DCCP_CLOSED);
128 		break;
129 	case DCCP_PASSIVE_CLOSEREQ:
130 		/*
131 		 * Client received CloseReq. We set the `active' flag so that
132 		 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
133 		 */
134 		dccp_send_close(sk, 1);
135 		dccp_set_state(sk, DCCP_CLOSING);
136 	}
137 }
138 
139 void dccp_done(struct sock *sk)
140 {
141 	dccp_set_state(sk, DCCP_CLOSED);
142 	dccp_clear_xmit_timers(sk);
143 
144 	sk->sk_shutdown = SHUTDOWN_MASK;
145 
146 	if (!sock_flag(sk, SOCK_DEAD))
147 		sk->sk_state_change(sk);
148 	else
149 		inet_csk_destroy_sock(sk);
150 }
151 
152 EXPORT_SYMBOL_GPL(dccp_done);
153 
154 const char *dccp_packet_name(const int type)
155 {
156 	static const char *const dccp_packet_names[] = {
157 		[DCCP_PKT_REQUEST]  = "REQUEST",
158 		[DCCP_PKT_RESPONSE] = "RESPONSE",
159 		[DCCP_PKT_DATA]	    = "DATA",
160 		[DCCP_PKT_ACK]	    = "ACK",
161 		[DCCP_PKT_DATAACK]  = "DATAACK",
162 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
163 		[DCCP_PKT_CLOSE]    = "CLOSE",
164 		[DCCP_PKT_RESET]    = "RESET",
165 		[DCCP_PKT_SYNC]	    = "SYNC",
166 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
167 	};
168 
169 	if (type >= DCCP_NR_PKT_TYPES)
170 		return "INVALID";
171 	else
172 		return dccp_packet_names[type];
173 }
174 
175 EXPORT_SYMBOL_GPL(dccp_packet_name);
176 
177 static void dccp_sk_destruct(struct sock *sk)
178 {
179 	struct dccp_sock *dp = dccp_sk(sk);
180 
181 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
182 	dp->dccps_hc_tx_ccid = NULL;
183 	inet_sock_destruct(sk);
184 }
185 
186 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
187 {
188 	struct dccp_sock *dp = dccp_sk(sk);
189 	struct inet_connection_sock *icsk = inet_csk(sk);
190 
191 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
192 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
193 	sk->sk_state		= DCCP_CLOSED;
194 	sk->sk_write_space	= dccp_write_space;
195 	sk->sk_destruct		= dccp_sk_destruct;
196 	icsk->icsk_sync_mss	= dccp_sync_mss;
197 	dp->dccps_mss_cache	= 536;
198 	dp->dccps_rate_last	= jiffies;
199 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
200 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
201 	dp->dccps_tx_qlen	= sysctl_dccp_tx_qlen;
202 
203 	dccp_init_xmit_timers(sk);
204 
205 	INIT_LIST_HEAD(&dp->dccps_featneg);
206 	/* control socket doesn't need feat nego */
207 	if (likely(ctl_sock_initialized))
208 		return dccp_feat_init(sk);
209 	return 0;
210 }
211 
212 EXPORT_SYMBOL_GPL(dccp_init_sock);
213 
214 void dccp_destroy_sock(struct sock *sk)
215 {
216 	struct dccp_sock *dp = dccp_sk(sk);
217 
218 	__skb_queue_purge(&sk->sk_write_queue);
219 	if (sk->sk_send_head != NULL) {
220 		kfree_skb(sk->sk_send_head);
221 		sk->sk_send_head = NULL;
222 	}
223 
224 	/* Clean up a referenced DCCP bind bucket. */
225 	if (inet_csk(sk)->icsk_bind_hash != NULL)
226 		inet_put_port(sk);
227 
228 	kfree(dp->dccps_service_list);
229 	dp->dccps_service_list = NULL;
230 
231 	if (dp->dccps_hc_rx_ackvec != NULL) {
232 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
233 		dp->dccps_hc_rx_ackvec = NULL;
234 	}
235 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236 	dp->dccps_hc_rx_ccid = NULL;
237 
238 	/* clean up feature negotiation state */
239 	dccp_feat_list_purge(&dp->dccps_featneg);
240 }
241 
242 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
243 
244 static inline int dccp_listen_start(struct sock *sk, int backlog)
245 {
246 	struct dccp_sock *dp = dccp_sk(sk);
247 
248 	dp->dccps_role = DCCP_ROLE_LISTEN;
249 	/* do not start to listen if feature negotiation setup fails */
250 	if (dccp_feat_finalise_settings(dp))
251 		return -EPROTO;
252 	return inet_csk_listen_start(sk, backlog);
253 }
254 
255 static inline int dccp_need_reset(int state)
256 {
257 	return state != DCCP_CLOSED && state != DCCP_LISTEN &&
258 	       state != DCCP_REQUESTING;
259 }
260 
261 int dccp_disconnect(struct sock *sk, int flags)
262 {
263 	struct inet_connection_sock *icsk = inet_csk(sk);
264 	struct inet_sock *inet = inet_sk(sk);
265 	struct dccp_sock *dp = dccp_sk(sk);
266 	int err = 0;
267 	const int old_state = sk->sk_state;
268 
269 	if (old_state != DCCP_CLOSED)
270 		dccp_set_state(sk, DCCP_CLOSED);
271 
272 	/*
273 	 * This corresponds to the ABORT function of RFC793, sec. 3.8
274 	 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
275 	 */
276 	if (old_state == DCCP_LISTEN) {
277 		inet_csk_listen_stop(sk);
278 	} else if (dccp_need_reset(old_state)) {
279 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
280 		sk->sk_err = ECONNRESET;
281 	} else if (old_state == DCCP_REQUESTING)
282 		sk->sk_err = ECONNRESET;
283 
284 	dccp_clear_xmit_timers(sk);
285 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
286 	dp->dccps_hc_rx_ccid = NULL;
287 
288 	__skb_queue_purge(&sk->sk_receive_queue);
289 	__skb_queue_purge(&sk->sk_write_queue);
290 	if (sk->sk_send_head != NULL) {
291 		__kfree_skb(sk->sk_send_head);
292 		sk->sk_send_head = NULL;
293 	}
294 
295 	inet->inet_dport = 0;
296 
297 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
298 		inet_reset_saddr(sk);
299 
300 	sk->sk_shutdown = 0;
301 	sock_reset_flag(sk, SOCK_DONE);
302 
303 	icsk->icsk_backoff = 0;
304 	inet_csk_delack_init(sk);
305 	__sk_dst_reset(sk);
306 
307 	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
308 
309 	sk->sk_error_report(sk);
310 	return err;
311 }
312 
313 EXPORT_SYMBOL_GPL(dccp_disconnect);
314 
315 __poll_t dccp_poll_mask(struct socket *sock, __poll_t events)
316 {
317 	__poll_t mask;
318 	struct sock *sk = sock->sk;
319 
320 	if (sk->sk_state == DCCP_LISTEN)
321 		return inet_csk_listen_poll(sk);
322 
323 	/* Socket is not locked. We are protected from async events
324 	   by poll logic and correct handling of state changes
325 	   made by another threads is impossible in any case.
326 	 */
327 
328 	mask = 0;
329 	if (sk->sk_err)
330 		mask = EPOLLERR;
331 
332 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
333 		mask |= EPOLLHUP;
334 	if (sk->sk_shutdown & RCV_SHUTDOWN)
335 		mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
336 
337 	/* Connected? */
338 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
339 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
340 			mask |= EPOLLIN | EPOLLRDNORM;
341 
342 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
343 			if (sk_stream_is_writeable(sk)) {
344 				mask |= EPOLLOUT | EPOLLWRNORM;
345 			} else {  /* send SIGIO later */
346 				sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
347 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
348 
349 				/* Race breaker. If space is freed after
350 				 * wspace test but before the flags are set,
351 				 * IO signal will be lost.
352 				 */
353 				if (sk_stream_is_writeable(sk))
354 					mask |= EPOLLOUT | EPOLLWRNORM;
355 			}
356 		}
357 	}
358 	return mask;
359 }
360 
361 EXPORT_SYMBOL_GPL(dccp_poll_mask);
362 
363 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
364 {
365 	int rc = -ENOTCONN;
366 
367 	lock_sock(sk);
368 
369 	if (sk->sk_state == DCCP_LISTEN)
370 		goto out;
371 
372 	switch (cmd) {
373 	case SIOCINQ: {
374 		struct sk_buff *skb;
375 		unsigned long amount = 0;
376 
377 		skb = skb_peek(&sk->sk_receive_queue);
378 		if (skb != NULL) {
379 			/*
380 			 * We will only return the amount of this packet since
381 			 * that is all that will be read.
382 			 */
383 			amount = skb->len;
384 		}
385 		rc = put_user(amount, (int __user *)arg);
386 	}
387 		break;
388 	default:
389 		rc = -ENOIOCTLCMD;
390 		break;
391 	}
392 out:
393 	release_sock(sk);
394 	return rc;
395 }
396 
397 EXPORT_SYMBOL_GPL(dccp_ioctl);
398 
399 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
400 				   char __user *optval, unsigned int optlen)
401 {
402 	struct dccp_sock *dp = dccp_sk(sk);
403 	struct dccp_service_list *sl = NULL;
404 
405 	if (service == DCCP_SERVICE_INVALID_VALUE ||
406 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
407 		return -EINVAL;
408 
409 	if (optlen > sizeof(service)) {
410 		sl = kmalloc(optlen, GFP_KERNEL);
411 		if (sl == NULL)
412 			return -ENOMEM;
413 
414 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
415 		if (copy_from_user(sl->dccpsl_list,
416 				   optval + sizeof(service),
417 				   optlen - sizeof(service)) ||
418 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
419 			kfree(sl);
420 			return -EFAULT;
421 		}
422 	}
423 
424 	lock_sock(sk);
425 	dp->dccps_service = service;
426 
427 	kfree(dp->dccps_service_list);
428 
429 	dp->dccps_service_list = sl;
430 	release_sock(sk);
431 	return 0;
432 }
433 
434 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
435 {
436 	u8 *list, len;
437 	int i, rc;
438 
439 	if (cscov < 0 || cscov > 15)
440 		return -EINVAL;
441 	/*
442 	 * Populate a list of permissible values, in the range cscov...15. This
443 	 * is necessary since feature negotiation of single values only works if
444 	 * both sides incidentally choose the same value. Since the list starts
445 	 * lowest-value first, negotiation will pick the smallest shared value.
446 	 */
447 	if (cscov == 0)
448 		return 0;
449 	len = 16 - cscov;
450 
451 	list = kmalloc(len, GFP_KERNEL);
452 	if (list == NULL)
453 		return -ENOBUFS;
454 
455 	for (i = 0; i < len; i++)
456 		list[i] = cscov++;
457 
458 	rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
459 
460 	if (rc == 0) {
461 		if (rx)
462 			dccp_sk(sk)->dccps_pcrlen = cscov;
463 		else
464 			dccp_sk(sk)->dccps_pcslen = cscov;
465 	}
466 	kfree(list);
467 	return rc;
468 }
469 
470 static int dccp_setsockopt_ccid(struct sock *sk, int type,
471 				char __user *optval, unsigned int optlen)
472 {
473 	u8 *val;
474 	int rc = 0;
475 
476 	if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
477 		return -EINVAL;
478 
479 	val = memdup_user(optval, optlen);
480 	if (IS_ERR(val))
481 		return PTR_ERR(val);
482 
483 	lock_sock(sk);
484 	if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
485 		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
486 
487 	if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
488 		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
489 	release_sock(sk);
490 
491 	kfree(val);
492 	return rc;
493 }
494 
495 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
496 		char __user *optval, unsigned int optlen)
497 {
498 	struct dccp_sock *dp = dccp_sk(sk);
499 	int val, err = 0;
500 
501 	switch (optname) {
502 	case DCCP_SOCKOPT_PACKET_SIZE:
503 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
504 		return 0;
505 	case DCCP_SOCKOPT_CHANGE_L:
506 	case DCCP_SOCKOPT_CHANGE_R:
507 		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
508 		return 0;
509 	case DCCP_SOCKOPT_CCID:
510 	case DCCP_SOCKOPT_RX_CCID:
511 	case DCCP_SOCKOPT_TX_CCID:
512 		return dccp_setsockopt_ccid(sk, optname, optval, optlen);
513 	}
514 
515 	if (optlen < (int)sizeof(int))
516 		return -EINVAL;
517 
518 	if (get_user(val, (int __user *)optval))
519 		return -EFAULT;
520 
521 	if (optname == DCCP_SOCKOPT_SERVICE)
522 		return dccp_setsockopt_service(sk, val, optval, optlen);
523 
524 	lock_sock(sk);
525 	switch (optname) {
526 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
527 		if (dp->dccps_role != DCCP_ROLE_SERVER)
528 			err = -EOPNOTSUPP;
529 		else
530 			dp->dccps_server_timewait = (val != 0);
531 		break;
532 	case DCCP_SOCKOPT_SEND_CSCOV:
533 		err = dccp_setsockopt_cscov(sk, val, false);
534 		break;
535 	case DCCP_SOCKOPT_RECV_CSCOV:
536 		err = dccp_setsockopt_cscov(sk, val, true);
537 		break;
538 	case DCCP_SOCKOPT_QPOLICY_ID:
539 		if (sk->sk_state != DCCP_CLOSED)
540 			err = -EISCONN;
541 		else if (val < 0 || val >= DCCPQ_POLICY_MAX)
542 			err = -EINVAL;
543 		else
544 			dp->dccps_qpolicy = val;
545 		break;
546 	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
547 		if (val < 0)
548 			err = -EINVAL;
549 		else
550 			dp->dccps_tx_qlen = val;
551 		break;
552 	default:
553 		err = -ENOPROTOOPT;
554 		break;
555 	}
556 	release_sock(sk);
557 
558 	return err;
559 }
560 
561 int dccp_setsockopt(struct sock *sk, int level, int optname,
562 		    char __user *optval, unsigned int optlen)
563 {
564 	if (level != SOL_DCCP)
565 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
566 							     optname, optval,
567 							     optlen);
568 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
569 }
570 
571 EXPORT_SYMBOL_GPL(dccp_setsockopt);
572 
573 #ifdef CONFIG_COMPAT
574 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
575 			   char __user *optval, unsigned int optlen)
576 {
577 	if (level != SOL_DCCP)
578 		return inet_csk_compat_setsockopt(sk, level, optname,
579 						  optval, optlen);
580 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
581 }
582 
583 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
584 #endif
585 
586 static int dccp_getsockopt_service(struct sock *sk, int len,
587 				   __be32 __user *optval,
588 				   int __user *optlen)
589 {
590 	const struct dccp_sock *dp = dccp_sk(sk);
591 	const struct dccp_service_list *sl;
592 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
593 
594 	lock_sock(sk);
595 	if ((sl = dp->dccps_service_list) != NULL) {
596 		slen = sl->dccpsl_nr * sizeof(u32);
597 		total_len += slen;
598 	}
599 
600 	err = -EINVAL;
601 	if (total_len > len)
602 		goto out;
603 
604 	err = 0;
605 	if (put_user(total_len, optlen) ||
606 	    put_user(dp->dccps_service, optval) ||
607 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
608 		err = -EFAULT;
609 out:
610 	release_sock(sk);
611 	return err;
612 }
613 
614 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
615 		    char __user *optval, int __user *optlen)
616 {
617 	struct dccp_sock *dp;
618 	int val, len;
619 
620 	if (get_user(len, optlen))
621 		return -EFAULT;
622 
623 	if (len < (int)sizeof(int))
624 		return -EINVAL;
625 
626 	dp = dccp_sk(sk);
627 
628 	switch (optname) {
629 	case DCCP_SOCKOPT_PACKET_SIZE:
630 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
631 		return 0;
632 	case DCCP_SOCKOPT_SERVICE:
633 		return dccp_getsockopt_service(sk, len,
634 					       (__be32 __user *)optval, optlen);
635 	case DCCP_SOCKOPT_GET_CUR_MPS:
636 		val = dp->dccps_mss_cache;
637 		break;
638 	case DCCP_SOCKOPT_AVAILABLE_CCIDS:
639 		return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
640 	case DCCP_SOCKOPT_TX_CCID:
641 		val = ccid_get_current_tx_ccid(dp);
642 		if (val < 0)
643 			return -ENOPROTOOPT;
644 		break;
645 	case DCCP_SOCKOPT_RX_CCID:
646 		val = ccid_get_current_rx_ccid(dp);
647 		if (val < 0)
648 			return -ENOPROTOOPT;
649 		break;
650 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
651 		val = dp->dccps_server_timewait;
652 		break;
653 	case DCCP_SOCKOPT_SEND_CSCOV:
654 		val = dp->dccps_pcslen;
655 		break;
656 	case DCCP_SOCKOPT_RECV_CSCOV:
657 		val = dp->dccps_pcrlen;
658 		break;
659 	case DCCP_SOCKOPT_QPOLICY_ID:
660 		val = dp->dccps_qpolicy;
661 		break;
662 	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
663 		val = dp->dccps_tx_qlen;
664 		break;
665 	case 128 ... 191:
666 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
667 					     len, (u32 __user *)optval, optlen);
668 	case 192 ... 255:
669 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
670 					     len, (u32 __user *)optval, optlen);
671 	default:
672 		return -ENOPROTOOPT;
673 	}
674 
675 	len = sizeof(val);
676 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
677 		return -EFAULT;
678 
679 	return 0;
680 }
681 
682 int dccp_getsockopt(struct sock *sk, int level, int optname,
683 		    char __user *optval, int __user *optlen)
684 {
685 	if (level != SOL_DCCP)
686 		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
687 							     optname, optval,
688 							     optlen);
689 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
690 }
691 
692 EXPORT_SYMBOL_GPL(dccp_getsockopt);
693 
694 #ifdef CONFIG_COMPAT
695 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
696 			   char __user *optval, int __user *optlen)
697 {
698 	if (level != SOL_DCCP)
699 		return inet_csk_compat_getsockopt(sk, level, optname,
700 						  optval, optlen);
701 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
702 }
703 
704 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
705 #endif
706 
707 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
708 {
709 	struct cmsghdr *cmsg;
710 
711 	/*
712 	 * Assign an (opaque) qpolicy priority value to skb->priority.
713 	 *
714 	 * We are overloading this skb field for use with the qpolicy subystem.
715 	 * The skb->priority is normally used for the SO_PRIORITY option, which
716 	 * is initialised from sk_priority. Since the assignment of sk_priority
717 	 * to skb->priority happens later (on layer 3), we overload this field
718 	 * for use with queueing priorities as long as the skb is on layer 4.
719 	 * The default priority value (if nothing is set) is 0.
720 	 */
721 	skb->priority = 0;
722 
723 	for_each_cmsghdr(cmsg, msg) {
724 		if (!CMSG_OK(msg, cmsg))
725 			return -EINVAL;
726 
727 		if (cmsg->cmsg_level != SOL_DCCP)
728 			continue;
729 
730 		if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
731 		    !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
732 			return -EINVAL;
733 
734 		switch (cmsg->cmsg_type) {
735 		case DCCP_SCM_PRIORITY:
736 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
737 				return -EINVAL;
738 			skb->priority = *(__u32 *)CMSG_DATA(cmsg);
739 			break;
740 		default:
741 			return -EINVAL;
742 		}
743 	}
744 	return 0;
745 }
746 
747 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
748 {
749 	const struct dccp_sock *dp = dccp_sk(sk);
750 	const int flags = msg->msg_flags;
751 	const int noblock = flags & MSG_DONTWAIT;
752 	struct sk_buff *skb;
753 	int rc, size;
754 	long timeo;
755 
756 	trace_dccp_probe(sk, len);
757 
758 	if (len > dp->dccps_mss_cache)
759 		return -EMSGSIZE;
760 
761 	lock_sock(sk);
762 
763 	if (dccp_qpolicy_full(sk)) {
764 		rc = -EAGAIN;
765 		goto out_release;
766 	}
767 
768 	timeo = sock_sndtimeo(sk, noblock);
769 
770 	/*
771 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
772 	 * so that the trick in dccp_rcv_request_sent_state_process.
773 	 */
774 	/* Wait for a connection to finish. */
775 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
776 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
777 			goto out_release;
778 
779 	size = sk->sk_prot->max_header + len;
780 	release_sock(sk);
781 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
782 	lock_sock(sk);
783 	if (skb == NULL)
784 		goto out_release;
785 
786 	if (sk->sk_state == DCCP_CLOSED) {
787 		rc = -ENOTCONN;
788 		goto out_discard;
789 	}
790 
791 	skb_reserve(skb, sk->sk_prot->max_header);
792 	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
793 	if (rc != 0)
794 		goto out_discard;
795 
796 	rc = dccp_msghdr_parse(msg, skb);
797 	if (rc != 0)
798 		goto out_discard;
799 
800 	dccp_qpolicy_push(sk, skb);
801 	/*
802 	 * The xmit_timer is set if the TX CCID is rate-based and will expire
803 	 * when congestion control permits to release further packets into the
804 	 * network. Window-based CCIDs do not use this timer.
805 	 */
806 	if (!timer_pending(&dp->dccps_xmit_timer))
807 		dccp_write_xmit(sk);
808 out_release:
809 	release_sock(sk);
810 	return rc ? : len;
811 out_discard:
812 	kfree_skb(skb);
813 	goto out_release;
814 }
815 
816 EXPORT_SYMBOL_GPL(dccp_sendmsg);
817 
818 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
819 		 int flags, int *addr_len)
820 {
821 	const struct dccp_hdr *dh;
822 	long timeo;
823 
824 	lock_sock(sk);
825 
826 	if (sk->sk_state == DCCP_LISTEN) {
827 		len = -ENOTCONN;
828 		goto out;
829 	}
830 
831 	timeo = sock_rcvtimeo(sk, nonblock);
832 
833 	do {
834 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
835 
836 		if (skb == NULL)
837 			goto verify_sock_status;
838 
839 		dh = dccp_hdr(skb);
840 
841 		switch (dh->dccph_type) {
842 		case DCCP_PKT_DATA:
843 		case DCCP_PKT_DATAACK:
844 			goto found_ok_skb;
845 
846 		case DCCP_PKT_CLOSE:
847 		case DCCP_PKT_CLOSEREQ:
848 			if (!(flags & MSG_PEEK))
849 				dccp_finish_passive_close(sk);
850 			/* fall through */
851 		case DCCP_PKT_RESET:
852 			dccp_pr_debug("found fin (%s) ok!\n",
853 				      dccp_packet_name(dh->dccph_type));
854 			len = 0;
855 			goto found_fin_ok;
856 		default:
857 			dccp_pr_debug("packet_type=%s\n",
858 				      dccp_packet_name(dh->dccph_type));
859 			sk_eat_skb(sk, skb);
860 		}
861 verify_sock_status:
862 		if (sock_flag(sk, SOCK_DONE)) {
863 			len = 0;
864 			break;
865 		}
866 
867 		if (sk->sk_err) {
868 			len = sock_error(sk);
869 			break;
870 		}
871 
872 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
873 			len = 0;
874 			break;
875 		}
876 
877 		if (sk->sk_state == DCCP_CLOSED) {
878 			if (!sock_flag(sk, SOCK_DONE)) {
879 				/* This occurs when user tries to read
880 				 * from never connected socket.
881 				 */
882 				len = -ENOTCONN;
883 				break;
884 			}
885 			len = 0;
886 			break;
887 		}
888 
889 		if (!timeo) {
890 			len = -EAGAIN;
891 			break;
892 		}
893 
894 		if (signal_pending(current)) {
895 			len = sock_intr_errno(timeo);
896 			break;
897 		}
898 
899 		sk_wait_data(sk, &timeo, NULL);
900 		continue;
901 	found_ok_skb:
902 		if (len > skb->len)
903 			len = skb->len;
904 		else if (len < skb->len)
905 			msg->msg_flags |= MSG_TRUNC;
906 
907 		if (skb_copy_datagram_msg(skb, 0, msg, len)) {
908 			/* Exception. Bailout! */
909 			len = -EFAULT;
910 			break;
911 		}
912 		if (flags & MSG_TRUNC)
913 			len = skb->len;
914 	found_fin_ok:
915 		if (!(flags & MSG_PEEK))
916 			sk_eat_skb(sk, skb);
917 		break;
918 	} while (1);
919 out:
920 	release_sock(sk);
921 	return len;
922 }
923 
924 EXPORT_SYMBOL_GPL(dccp_recvmsg);
925 
926 int inet_dccp_listen(struct socket *sock, int backlog)
927 {
928 	struct sock *sk = sock->sk;
929 	unsigned char old_state;
930 	int err;
931 
932 	lock_sock(sk);
933 
934 	err = -EINVAL;
935 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
936 		goto out;
937 
938 	old_state = sk->sk_state;
939 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
940 		goto out;
941 
942 	/* Really, if the socket is already in listen state
943 	 * we can only allow the backlog to be adjusted.
944 	 */
945 	if (old_state != DCCP_LISTEN) {
946 		/*
947 		 * FIXME: here it probably should be sk->sk_prot->listen_start
948 		 * see tcp_listen_start
949 		 */
950 		err = dccp_listen_start(sk, backlog);
951 		if (err)
952 			goto out;
953 	}
954 	sk->sk_max_ack_backlog = backlog;
955 	err = 0;
956 
957 out:
958 	release_sock(sk);
959 	return err;
960 }
961 
962 EXPORT_SYMBOL_GPL(inet_dccp_listen);
963 
964 static void dccp_terminate_connection(struct sock *sk)
965 {
966 	u8 next_state = DCCP_CLOSED;
967 
968 	switch (sk->sk_state) {
969 	case DCCP_PASSIVE_CLOSE:
970 	case DCCP_PASSIVE_CLOSEREQ:
971 		dccp_finish_passive_close(sk);
972 		break;
973 	case DCCP_PARTOPEN:
974 		dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
975 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
976 		/* fall through */
977 	case DCCP_OPEN:
978 		dccp_send_close(sk, 1);
979 
980 		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
981 		    !dccp_sk(sk)->dccps_server_timewait)
982 			next_state = DCCP_ACTIVE_CLOSEREQ;
983 		else
984 			next_state = DCCP_CLOSING;
985 		/* fall through */
986 	default:
987 		dccp_set_state(sk, next_state);
988 	}
989 }
990 
991 void dccp_close(struct sock *sk, long timeout)
992 {
993 	struct dccp_sock *dp = dccp_sk(sk);
994 	struct sk_buff *skb;
995 	u32 data_was_unread = 0;
996 	int state;
997 
998 	lock_sock(sk);
999 
1000 	sk->sk_shutdown = SHUTDOWN_MASK;
1001 
1002 	if (sk->sk_state == DCCP_LISTEN) {
1003 		dccp_set_state(sk, DCCP_CLOSED);
1004 
1005 		/* Special case. */
1006 		inet_csk_listen_stop(sk);
1007 
1008 		goto adjudge_to_death;
1009 	}
1010 
1011 	sk_stop_timer(sk, &dp->dccps_xmit_timer);
1012 
1013 	/*
1014 	 * We need to flush the recv. buffs.  We do this only on the
1015 	 * descriptor close, not protocol-sourced closes, because the
1016 	  *reader process may not have drained the data yet!
1017 	 */
1018 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1019 		data_was_unread += skb->len;
1020 		__kfree_skb(skb);
1021 	}
1022 
1023 	/* If socket has been already reset kill it. */
1024 	if (sk->sk_state == DCCP_CLOSED)
1025 		goto adjudge_to_death;
1026 
1027 	if (data_was_unread) {
1028 		/* Unread data was tossed, send an appropriate Reset Code */
1029 		DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1030 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1031 		dccp_set_state(sk, DCCP_CLOSED);
1032 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1033 		/* Check zero linger _after_ checking for unread data. */
1034 		sk->sk_prot->disconnect(sk, 0);
1035 	} else if (sk->sk_state != DCCP_CLOSED) {
1036 		/*
1037 		 * Normal connection termination. May need to wait if there are
1038 		 * still packets in the TX queue that are delayed by the CCID.
1039 		 */
1040 		dccp_flush_write_queue(sk, &timeout);
1041 		dccp_terminate_connection(sk);
1042 	}
1043 
1044 	/*
1045 	 * Flush write queue. This may be necessary in several cases:
1046 	 * - we have been closed by the peer but still have application data;
1047 	 * - abortive termination (unread data or zero linger time),
1048 	 * - normal termination but queue could not be flushed within time limit
1049 	 */
1050 	__skb_queue_purge(&sk->sk_write_queue);
1051 
1052 	sk_stream_wait_close(sk, timeout);
1053 
1054 adjudge_to_death:
1055 	state = sk->sk_state;
1056 	sock_hold(sk);
1057 	sock_orphan(sk);
1058 
1059 	/*
1060 	 * It is the last release_sock in its life. It will remove backlog.
1061 	 */
1062 	release_sock(sk);
1063 	/*
1064 	 * Now socket is owned by kernel and we acquire BH lock
1065 	 * to finish close. No need to check for user refs.
1066 	 */
1067 	local_bh_disable();
1068 	bh_lock_sock(sk);
1069 	WARN_ON(sock_owned_by_user(sk));
1070 
1071 	percpu_counter_inc(sk->sk_prot->orphan_count);
1072 
1073 	/* Have we already been destroyed by a softirq or backlog? */
1074 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1075 		goto out;
1076 
1077 	if (sk->sk_state == DCCP_CLOSED)
1078 		inet_csk_destroy_sock(sk);
1079 
1080 	/* Otherwise, socket is reprieved until protocol close. */
1081 
1082 out:
1083 	bh_unlock_sock(sk);
1084 	local_bh_enable();
1085 	sock_put(sk);
1086 }
1087 
1088 EXPORT_SYMBOL_GPL(dccp_close);
1089 
1090 void dccp_shutdown(struct sock *sk, int how)
1091 {
1092 	dccp_pr_debug("called shutdown(%x)\n", how);
1093 }
1094 
1095 EXPORT_SYMBOL_GPL(dccp_shutdown);
1096 
1097 static inline int __init dccp_mib_init(void)
1098 {
1099 	dccp_statistics = alloc_percpu(struct dccp_mib);
1100 	if (!dccp_statistics)
1101 		return -ENOMEM;
1102 	return 0;
1103 }
1104 
1105 static inline void dccp_mib_exit(void)
1106 {
1107 	free_percpu(dccp_statistics);
1108 }
1109 
1110 static int thash_entries;
1111 module_param(thash_entries, int, 0444);
1112 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1113 
1114 #ifdef CONFIG_IP_DCCP_DEBUG
1115 bool dccp_debug;
1116 module_param(dccp_debug, bool, 0644);
1117 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1118 
1119 EXPORT_SYMBOL_GPL(dccp_debug);
1120 #endif
1121 
1122 static int __init dccp_init(void)
1123 {
1124 	unsigned long goal;
1125 	int ehash_order, bhash_order, i;
1126 	int rc;
1127 
1128 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1129 		     FIELD_SIZEOF(struct sk_buff, cb));
1130 	rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1131 	if (rc)
1132 		goto out_fail;
1133 	rc = -ENOBUFS;
1134 	inet_hashinfo_init(&dccp_hashinfo);
1135 	dccp_hashinfo.bind_bucket_cachep =
1136 		kmem_cache_create("dccp_bind_bucket",
1137 				  sizeof(struct inet_bind_bucket), 0,
1138 				  SLAB_HWCACHE_ALIGN, NULL);
1139 	if (!dccp_hashinfo.bind_bucket_cachep)
1140 		goto out_free_percpu;
1141 
1142 	/*
1143 	 * Size and allocate the main established and bind bucket
1144 	 * hash tables.
1145 	 *
1146 	 * The methodology is similar to that of the buffer cache.
1147 	 */
1148 	if (totalram_pages >= (128 * 1024))
1149 		goal = totalram_pages >> (21 - PAGE_SHIFT);
1150 	else
1151 		goal = totalram_pages >> (23 - PAGE_SHIFT);
1152 
1153 	if (thash_entries)
1154 		goal = (thash_entries *
1155 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1156 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1157 		;
1158 	do {
1159 		unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1160 					sizeof(struct inet_ehash_bucket);
1161 
1162 		while (hash_size & (hash_size - 1))
1163 			hash_size--;
1164 		dccp_hashinfo.ehash_mask = hash_size - 1;
1165 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1166 			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1167 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1168 
1169 	if (!dccp_hashinfo.ehash) {
1170 		DCCP_CRIT("Failed to allocate DCCP established hash table");
1171 		goto out_free_bind_bucket_cachep;
1172 	}
1173 
1174 	for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1175 		INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1176 
1177 	if (inet_ehash_locks_alloc(&dccp_hashinfo))
1178 			goto out_free_dccp_ehash;
1179 
1180 	bhash_order = ehash_order;
1181 
1182 	do {
1183 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1184 					sizeof(struct inet_bind_hashbucket);
1185 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1186 		    bhash_order > 0)
1187 			continue;
1188 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1189 			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1190 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1191 
1192 	if (!dccp_hashinfo.bhash) {
1193 		DCCP_CRIT("Failed to allocate DCCP bind hash table");
1194 		goto out_free_dccp_locks;
1195 	}
1196 
1197 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1198 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1199 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1200 	}
1201 
1202 	rc = dccp_mib_init();
1203 	if (rc)
1204 		goto out_free_dccp_bhash;
1205 
1206 	rc = dccp_ackvec_init();
1207 	if (rc)
1208 		goto out_free_dccp_mib;
1209 
1210 	rc = dccp_sysctl_init();
1211 	if (rc)
1212 		goto out_ackvec_exit;
1213 
1214 	rc = ccid_initialize_builtins();
1215 	if (rc)
1216 		goto out_sysctl_exit;
1217 
1218 	dccp_timestamping_init();
1219 
1220 	return 0;
1221 
1222 out_sysctl_exit:
1223 	dccp_sysctl_exit();
1224 out_ackvec_exit:
1225 	dccp_ackvec_exit();
1226 out_free_dccp_mib:
1227 	dccp_mib_exit();
1228 out_free_dccp_bhash:
1229 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1230 out_free_dccp_locks:
1231 	inet_ehash_locks_free(&dccp_hashinfo);
1232 out_free_dccp_ehash:
1233 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1234 out_free_bind_bucket_cachep:
1235 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1236 out_free_percpu:
1237 	percpu_counter_destroy(&dccp_orphan_count);
1238 out_fail:
1239 	dccp_hashinfo.bhash = NULL;
1240 	dccp_hashinfo.ehash = NULL;
1241 	dccp_hashinfo.bind_bucket_cachep = NULL;
1242 	return rc;
1243 }
1244 
1245 static void __exit dccp_fini(void)
1246 {
1247 	ccid_cleanup_builtins();
1248 	dccp_mib_exit();
1249 	free_pages((unsigned long)dccp_hashinfo.bhash,
1250 		   get_order(dccp_hashinfo.bhash_size *
1251 			     sizeof(struct inet_bind_hashbucket)));
1252 	free_pages((unsigned long)dccp_hashinfo.ehash,
1253 		   get_order((dccp_hashinfo.ehash_mask + 1) *
1254 			     sizeof(struct inet_ehash_bucket)));
1255 	inet_ehash_locks_free(&dccp_hashinfo);
1256 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1257 	dccp_ackvec_exit();
1258 	dccp_sysctl_exit();
1259 	percpu_counter_destroy(&dccp_orphan_count);
1260 }
1261 
1262 module_init(dccp_init);
1263 module_exit(dccp_fini);
1264 
1265 MODULE_LICENSE("GPL");
1266 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1267 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1268