xref: /openbmc/linux/net/dccp/proto.c (revision 9b8321531a90c400e9c561d903926eee79639dcf)
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or modify it
8  *	under the terms of the GNU General Public License version 2 as
9  *	published by the Free Software Foundation.
10  */
11 
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25 
26 #include <net/inet_sock.h>
27 #include <net/sock.h>
28 #include <net/xfrm.h>
29 
30 #include <asm/ioctls.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35 
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39 
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43 
44 struct percpu_counter dccp_orphan_count;
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46 
47 struct inet_hashinfo dccp_hashinfo;
48 EXPORT_SYMBOL_GPL(dccp_hashinfo);
49 
50 /* the maximum queue length for tx in packets. 0 is no limit */
51 int sysctl_dccp_tx_qlen __read_mostly = 5;
52 
53 #ifdef CONFIG_IP_DCCP_DEBUG
54 static const char *dccp_state_name(const int state)
55 {
56 	static const char *const dccp_state_names[] = {
57 	[DCCP_OPEN]		= "OPEN",
58 	[DCCP_REQUESTING]	= "REQUESTING",
59 	[DCCP_PARTOPEN]		= "PARTOPEN",
60 	[DCCP_LISTEN]		= "LISTEN",
61 	[DCCP_RESPOND]		= "RESPOND",
62 	[DCCP_CLOSING]		= "CLOSING",
63 	[DCCP_ACTIVE_CLOSEREQ]	= "CLOSEREQ",
64 	[DCCP_PASSIVE_CLOSE]	= "PASSIVE_CLOSE",
65 	[DCCP_PASSIVE_CLOSEREQ]	= "PASSIVE_CLOSEREQ",
66 	[DCCP_TIME_WAIT]	= "TIME_WAIT",
67 	[DCCP_CLOSED]		= "CLOSED",
68 	};
69 
70 	if (state >= DCCP_MAX_STATES)
71 		return "INVALID STATE!";
72 	else
73 		return dccp_state_names[state];
74 }
75 #endif
76 
77 void dccp_set_state(struct sock *sk, const int state)
78 {
79 	const int oldstate = sk->sk_state;
80 
81 	dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
82 		      dccp_state_name(oldstate), dccp_state_name(state));
83 	WARN_ON(state == oldstate);
84 
85 	switch (state) {
86 	case DCCP_OPEN:
87 		if (oldstate != DCCP_OPEN)
88 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
89 		/* Client retransmits all Confirm options until entering OPEN */
90 		if (oldstate == DCCP_PARTOPEN)
91 			dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
92 		break;
93 
94 	case DCCP_CLOSED:
95 		if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
96 		    oldstate == DCCP_CLOSING)
97 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
98 
99 		sk->sk_prot->unhash(sk);
100 		if (inet_csk(sk)->icsk_bind_hash != NULL &&
101 		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
102 			inet_put_port(sk);
103 		/* fall through */
104 	default:
105 		if (oldstate == DCCP_OPEN)
106 			DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
107 	}
108 
109 	/* Change state AFTER socket is unhashed to avoid closed
110 	 * socket sitting in hash tables.
111 	 */
112 	sk->sk_state = state;
113 }
114 
115 EXPORT_SYMBOL_GPL(dccp_set_state);
116 
117 static void dccp_finish_passive_close(struct sock *sk)
118 {
119 	switch (sk->sk_state) {
120 	case DCCP_PASSIVE_CLOSE:
121 		/* Node (client or server) has received Close packet. */
122 		dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
123 		dccp_set_state(sk, DCCP_CLOSED);
124 		break;
125 	case DCCP_PASSIVE_CLOSEREQ:
126 		/*
127 		 * Client received CloseReq. We set the `active' flag so that
128 		 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
129 		 */
130 		dccp_send_close(sk, 1);
131 		dccp_set_state(sk, DCCP_CLOSING);
132 	}
133 }
134 
135 void dccp_done(struct sock *sk)
136 {
137 	dccp_set_state(sk, DCCP_CLOSED);
138 	dccp_clear_xmit_timers(sk);
139 
140 	sk->sk_shutdown = SHUTDOWN_MASK;
141 
142 	if (!sock_flag(sk, SOCK_DEAD))
143 		sk->sk_state_change(sk);
144 	else
145 		inet_csk_destroy_sock(sk);
146 }
147 
148 EXPORT_SYMBOL_GPL(dccp_done);
149 
150 const char *dccp_packet_name(const int type)
151 {
152 	static const char *const dccp_packet_names[] = {
153 		[DCCP_PKT_REQUEST]  = "REQUEST",
154 		[DCCP_PKT_RESPONSE] = "RESPONSE",
155 		[DCCP_PKT_DATA]	    = "DATA",
156 		[DCCP_PKT_ACK]	    = "ACK",
157 		[DCCP_PKT_DATAACK]  = "DATAACK",
158 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
159 		[DCCP_PKT_CLOSE]    = "CLOSE",
160 		[DCCP_PKT_RESET]    = "RESET",
161 		[DCCP_PKT_SYNC]	    = "SYNC",
162 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
163 	};
164 
165 	if (type >= DCCP_NR_PKT_TYPES)
166 		return "INVALID";
167 	else
168 		return dccp_packet_names[type];
169 }
170 
171 EXPORT_SYMBOL_GPL(dccp_packet_name);
172 
173 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174 {
175 	struct dccp_sock *dp = dccp_sk(sk);
176 	struct inet_connection_sock *icsk = inet_csk(sk);
177 
178 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
179 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
180 	sk->sk_state		= DCCP_CLOSED;
181 	sk->sk_write_space	= dccp_write_space;
182 	icsk->icsk_sync_mss	= dccp_sync_mss;
183 	dp->dccps_mss_cache	= 536;
184 	dp->dccps_rate_last	= jiffies;
185 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
186 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
187 	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
188 
189 	dccp_init_xmit_timers(sk);
190 
191 	INIT_LIST_HEAD(&dp->dccps_featneg);
192 	/* control socket doesn't need feat nego */
193 	if (likely(ctl_sock_initialized))
194 		return dccp_feat_init(sk);
195 	return 0;
196 }
197 
198 EXPORT_SYMBOL_GPL(dccp_init_sock);
199 
200 void dccp_destroy_sock(struct sock *sk)
201 {
202 	struct dccp_sock *dp = dccp_sk(sk);
203 
204 	/*
205 	 * DCCP doesn't use sk_write_queue, just sk_send_head
206 	 * for retransmissions
207 	 */
208 	if (sk->sk_send_head != NULL) {
209 		kfree_skb(sk->sk_send_head);
210 		sk->sk_send_head = NULL;
211 	}
212 
213 	/* Clean up a referenced DCCP bind bucket. */
214 	if (inet_csk(sk)->icsk_bind_hash != NULL)
215 		inet_put_port(sk);
216 
217 	kfree(dp->dccps_service_list);
218 	dp->dccps_service_list = NULL;
219 
220 	if (dp->dccps_hc_rx_ackvec != NULL) {
221 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
222 		dp->dccps_hc_rx_ackvec = NULL;
223 	}
224 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
225 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
226 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227 
228 	/* clean up feature negotiation state */
229 	dccp_feat_list_purge(&dp->dccps_featneg);
230 }
231 
232 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
233 
234 static inline int dccp_listen_start(struct sock *sk, int backlog)
235 {
236 	struct dccp_sock *dp = dccp_sk(sk);
237 
238 	dp->dccps_role = DCCP_ROLE_LISTEN;
239 	/* do not start to listen if feature negotiation setup fails */
240 	if (dccp_feat_finalise_settings(dp))
241 		return -EPROTO;
242 	return inet_csk_listen_start(sk, backlog);
243 }
244 
245 static inline int dccp_need_reset(int state)
246 {
247 	return state != DCCP_CLOSED && state != DCCP_LISTEN &&
248 	       state != DCCP_REQUESTING;
249 }
250 
251 int dccp_disconnect(struct sock *sk, int flags)
252 {
253 	struct inet_connection_sock *icsk = inet_csk(sk);
254 	struct inet_sock *inet = inet_sk(sk);
255 	int err = 0;
256 	const int old_state = sk->sk_state;
257 
258 	if (old_state != DCCP_CLOSED)
259 		dccp_set_state(sk, DCCP_CLOSED);
260 
261 	/*
262 	 * This corresponds to the ABORT function of RFC793, sec. 3.8
263 	 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
264 	 */
265 	if (old_state == DCCP_LISTEN) {
266 		inet_csk_listen_stop(sk);
267 	} else if (dccp_need_reset(old_state)) {
268 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
269 		sk->sk_err = ECONNRESET;
270 	} else if (old_state == DCCP_REQUESTING)
271 		sk->sk_err = ECONNRESET;
272 
273 	dccp_clear_xmit_timers(sk);
274 
275 	__skb_queue_purge(&sk->sk_receive_queue);
276 	__skb_queue_purge(&sk->sk_write_queue);
277 	if (sk->sk_send_head != NULL) {
278 		__kfree_skb(sk->sk_send_head);
279 		sk->sk_send_head = NULL;
280 	}
281 
282 	inet->inet_dport = 0;
283 
284 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
285 		inet_reset_saddr(sk);
286 
287 	sk->sk_shutdown = 0;
288 	sock_reset_flag(sk, SOCK_DONE);
289 
290 	icsk->icsk_backoff = 0;
291 	inet_csk_delack_init(sk);
292 	__sk_dst_reset(sk);
293 
294 	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
295 
296 	sk->sk_error_report(sk);
297 	return err;
298 }
299 
300 EXPORT_SYMBOL_GPL(dccp_disconnect);
301 
302 /*
303  *	Wait for a DCCP event.
304  *
305  *	Note that we don't need to lock the socket, as the upper poll layers
306  *	take care of normal races (between the test and the event) and we don't
307  *	go look at any of the socket buffers directly.
308  */
309 unsigned int dccp_poll(struct file *file, struct socket *sock,
310 		       poll_table *wait)
311 {
312 	unsigned int mask;
313 	struct sock *sk = sock->sk;
314 
315 	sock_poll_wait(file, sk_sleep(sk), wait);
316 	if (sk->sk_state == DCCP_LISTEN)
317 		return inet_csk_listen_poll(sk);
318 
319 	/* Socket is not locked. We are protected from async events
320 	   by poll logic and correct handling of state changes
321 	   made by another threads is impossible in any case.
322 	 */
323 
324 	mask = 0;
325 	if (sk->sk_err)
326 		mask = POLLERR;
327 
328 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
329 		mask |= POLLHUP;
330 	if (sk->sk_shutdown & RCV_SHUTDOWN)
331 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
332 
333 	/* Connected? */
334 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
335 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
336 			mask |= POLLIN | POLLRDNORM;
337 
338 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
339 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
340 				mask |= POLLOUT | POLLWRNORM;
341 			} else {  /* send SIGIO later */
342 				set_bit(SOCK_ASYNC_NOSPACE,
343 					&sk->sk_socket->flags);
344 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
345 
346 				/* Race breaker. If space is freed after
347 				 * wspace test but before the flags are set,
348 				 * IO signal will be lost.
349 				 */
350 				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
351 					mask |= POLLOUT | POLLWRNORM;
352 			}
353 		}
354 	}
355 	return mask;
356 }
357 
358 EXPORT_SYMBOL_GPL(dccp_poll);
359 
360 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
361 {
362 	int rc = -ENOTCONN;
363 
364 	lock_sock(sk);
365 
366 	if (sk->sk_state == DCCP_LISTEN)
367 		goto out;
368 
369 	switch (cmd) {
370 	case SIOCINQ: {
371 		struct sk_buff *skb;
372 		unsigned long amount = 0;
373 
374 		skb = skb_peek(&sk->sk_receive_queue);
375 		if (skb != NULL) {
376 			/*
377 			 * We will only return the amount of this packet since
378 			 * that is all that will be read.
379 			 */
380 			amount = skb->len;
381 		}
382 		rc = put_user(amount, (int __user *)arg);
383 	}
384 		break;
385 	default:
386 		rc = -ENOIOCTLCMD;
387 		break;
388 	}
389 out:
390 	release_sock(sk);
391 	return rc;
392 }
393 
394 EXPORT_SYMBOL_GPL(dccp_ioctl);
395 
396 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
397 				   char __user *optval, unsigned int optlen)
398 {
399 	struct dccp_sock *dp = dccp_sk(sk);
400 	struct dccp_service_list *sl = NULL;
401 
402 	if (service == DCCP_SERVICE_INVALID_VALUE ||
403 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
404 		return -EINVAL;
405 
406 	if (optlen > sizeof(service)) {
407 		sl = kmalloc(optlen, GFP_KERNEL);
408 		if (sl == NULL)
409 			return -ENOMEM;
410 
411 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
412 		if (copy_from_user(sl->dccpsl_list,
413 				   optval + sizeof(service),
414 				   optlen - sizeof(service)) ||
415 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
416 			kfree(sl);
417 			return -EFAULT;
418 		}
419 	}
420 
421 	lock_sock(sk);
422 	dp->dccps_service = service;
423 
424 	kfree(dp->dccps_service_list);
425 
426 	dp->dccps_service_list = sl;
427 	release_sock(sk);
428 	return 0;
429 }
430 
431 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
432 {
433 	u8 *list, len;
434 	int i, rc;
435 
436 	if (cscov < 0 || cscov > 15)
437 		return -EINVAL;
438 	/*
439 	 * Populate a list of permissible values, in the range cscov...15. This
440 	 * is necessary since feature negotiation of single values only works if
441 	 * both sides incidentally choose the same value. Since the list starts
442 	 * lowest-value first, negotiation will pick the smallest shared value.
443 	 */
444 	if (cscov == 0)
445 		return 0;
446 	len = 16 - cscov;
447 
448 	list = kmalloc(len, GFP_KERNEL);
449 	if (list == NULL)
450 		return -ENOBUFS;
451 
452 	for (i = 0; i < len; i++)
453 		list[i] = cscov++;
454 
455 	rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
456 
457 	if (rc == 0) {
458 		if (rx)
459 			dccp_sk(sk)->dccps_pcrlen = cscov;
460 		else
461 			dccp_sk(sk)->dccps_pcslen = cscov;
462 	}
463 	kfree(list);
464 	return rc;
465 }
466 
467 static int dccp_setsockopt_ccid(struct sock *sk, int type,
468 				char __user *optval, unsigned int optlen)
469 {
470 	u8 *val;
471 	int rc = 0;
472 
473 	if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
474 		return -EINVAL;
475 
476 	val = memdup_user(optval, optlen);
477 	if (IS_ERR(val))
478 		return PTR_ERR(val);
479 
480 	lock_sock(sk);
481 	if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
482 		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
483 
484 	if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
485 		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
486 	release_sock(sk);
487 
488 	kfree(val);
489 	return rc;
490 }
491 
492 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
493 		char __user *optval, unsigned int optlen)
494 {
495 	struct dccp_sock *dp = dccp_sk(sk);
496 	int val, err = 0;
497 
498 	switch (optname) {
499 	case DCCP_SOCKOPT_PACKET_SIZE:
500 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
501 		return 0;
502 	case DCCP_SOCKOPT_CHANGE_L:
503 	case DCCP_SOCKOPT_CHANGE_R:
504 		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
505 		return 0;
506 	case DCCP_SOCKOPT_CCID:
507 	case DCCP_SOCKOPT_RX_CCID:
508 	case DCCP_SOCKOPT_TX_CCID:
509 		return dccp_setsockopt_ccid(sk, optname, optval, optlen);
510 	}
511 
512 	if (optlen < (int)sizeof(int))
513 		return -EINVAL;
514 
515 	if (get_user(val, (int __user *)optval))
516 		return -EFAULT;
517 
518 	if (optname == DCCP_SOCKOPT_SERVICE)
519 		return dccp_setsockopt_service(sk, val, optval, optlen);
520 
521 	lock_sock(sk);
522 	switch (optname) {
523 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
524 		if (dp->dccps_role != DCCP_ROLE_SERVER)
525 			err = -EOPNOTSUPP;
526 		else
527 			dp->dccps_server_timewait = (val != 0);
528 		break;
529 	case DCCP_SOCKOPT_SEND_CSCOV:
530 		err = dccp_setsockopt_cscov(sk, val, false);
531 		break;
532 	case DCCP_SOCKOPT_RECV_CSCOV:
533 		err = dccp_setsockopt_cscov(sk, val, true);
534 		break;
535 	default:
536 		err = -ENOPROTOOPT;
537 		break;
538 	}
539 	release_sock(sk);
540 
541 	return err;
542 }
543 
544 int dccp_setsockopt(struct sock *sk, int level, int optname,
545 		    char __user *optval, unsigned int optlen)
546 {
547 	if (level != SOL_DCCP)
548 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
549 							     optname, optval,
550 							     optlen);
551 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
552 }
553 
554 EXPORT_SYMBOL_GPL(dccp_setsockopt);
555 
556 #ifdef CONFIG_COMPAT
557 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
558 			   char __user *optval, unsigned int optlen)
559 {
560 	if (level != SOL_DCCP)
561 		return inet_csk_compat_setsockopt(sk, level, optname,
562 						  optval, optlen);
563 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
564 }
565 
566 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
567 #endif
568 
569 static int dccp_getsockopt_service(struct sock *sk, int len,
570 				   __be32 __user *optval,
571 				   int __user *optlen)
572 {
573 	const struct dccp_sock *dp = dccp_sk(sk);
574 	const struct dccp_service_list *sl;
575 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
576 
577 	lock_sock(sk);
578 	if ((sl = dp->dccps_service_list) != NULL) {
579 		slen = sl->dccpsl_nr * sizeof(u32);
580 		total_len += slen;
581 	}
582 
583 	err = -EINVAL;
584 	if (total_len > len)
585 		goto out;
586 
587 	err = 0;
588 	if (put_user(total_len, optlen) ||
589 	    put_user(dp->dccps_service, optval) ||
590 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
591 		err = -EFAULT;
592 out:
593 	release_sock(sk);
594 	return err;
595 }
596 
597 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
598 		    char __user *optval, int __user *optlen)
599 {
600 	struct dccp_sock *dp;
601 	int val, len;
602 
603 	if (get_user(len, optlen))
604 		return -EFAULT;
605 
606 	if (len < (int)sizeof(int))
607 		return -EINVAL;
608 
609 	dp = dccp_sk(sk);
610 
611 	switch (optname) {
612 	case DCCP_SOCKOPT_PACKET_SIZE:
613 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
614 		return 0;
615 	case DCCP_SOCKOPT_SERVICE:
616 		return dccp_getsockopt_service(sk, len,
617 					       (__be32 __user *)optval, optlen);
618 	case DCCP_SOCKOPT_GET_CUR_MPS:
619 		val = dp->dccps_mss_cache;
620 		break;
621 	case DCCP_SOCKOPT_AVAILABLE_CCIDS:
622 		return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
623 	case DCCP_SOCKOPT_TX_CCID:
624 		val = ccid_get_current_tx_ccid(dp);
625 		if (val < 0)
626 			return -ENOPROTOOPT;
627 		break;
628 	case DCCP_SOCKOPT_RX_CCID:
629 		val = ccid_get_current_rx_ccid(dp);
630 		if (val < 0)
631 			return -ENOPROTOOPT;
632 		break;
633 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
634 		val = dp->dccps_server_timewait;
635 		break;
636 	case DCCP_SOCKOPT_SEND_CSCOV:
637 		val = dp->dccps_pcslen;
638 		break;
639 	case DCCP_SOCKOPT_RECV_CSCOV:
640 		val = dp->dccps_pcrlen;
641 		break;
642 	case 128 ... 191:
643 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
644 					     len, (u32 __user *)optval, optlen);
645 	case 192 ... 255:
646 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
647 					     len, (u32 __user *)optval, optlen);
648 	default:
649 		return -ENOPROTOOPT;
650 	}
651 
652 	len = sizeof(val);
653 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
654 		return -EFAULT;
655 
656 	return 0;
657 }
658 
659 int dccp_getsockopt(struct sock *sk, int level, int optname,
660 		    char __user *optval, int __user *optlen)
661 {
662 	if (level != SOL_DCCP)
663 		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
664 							     optname, optval,
665 							     optlen);
666 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
667 }
668 
669 EXPORT_SYMBOL_GPL(dccp_getsockopt);
670 
671 #ifdef CONFIG_COMPAT
672 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
673 			   char __user *optval, int __user *optlen)
674 {
675 	if (level != SOL_DCCP)
676 		return inet_csk_compat_getsockopt(sk, level, optname,
677 						  optval, optlen);
678 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
679 }
680 
681 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
682 #endif
683 
684 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
685 		 size_t len)
686 {
687 	const struct dccp_sock *dp = dccp_sk(sk);
688 	const int flags = msg->msg_flags;
689 	const int noblock = flags & MSG_DONTWAIT;
690 	struct sk_buff *skb;
691 	int rc, size;
692 	long timeo;
693 
694 	if (len > dp->dccps_mss_cache)
695 		return -EMSGSIZE;
696 
697 	lock_sock(sk);
698 
699 	if (sysctl_dccp_tx_qlen &&
700 	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
701 		rc = -EAGAIN;
702 		goto out_release;
703 	}
704 
705 	timeo = sock_sndtimeo(sk, noblock);
706 
707 	/*
708 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
709 	 * so that the trick in dccp_rcv_request_sent_state_process.
710 	 */
711 	/* Wait for a connection to finish. */
712 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
713 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
714 			goto out_release;
715 
716 	size = sk->sk_prot->max_header + len;
717 	release_sock(sk);
718 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
719 	lock_sock(sk);
720 	if (skb == NULL)
721 		goto out_release;
722 
723 	skb_reserve(skb, sk->sk_prot->max_header);
724 	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
725 	if (rc != 0)
726 		goto out_discard;
727 
728 	skb_queue_tail(&sk->sk_write_queue, skb);
729 	/*
730 	 * The xmit_timer is set if the TX CCID is rate-based and will expire
731 	 * when congestion control permits to release further packets into the
732 	 * network. Window-based CCIDs do not use this timer.
733 	 */
734 	if (!timer_pending(&dp->dccps_xmit_timer))
735 		dccp_write_xmit(sk);
736 out_release:
737 	release_sock(sk);
738 	return rc ? : len;
739 out_discard:
740 	kfree_skb(skb);
741 	goto out_release;
742 }
743 
744 EXPORT_SYMBOL_GPL(dccp_sendmsg);
745 
746 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
747 		 size_t len, int nonblock, int flags, int *addr_len)
748 {
749 	const struct dccp_hdr *dh;
750 	long timeo;
751 
752 	lock_sock(sk);
753 
754 	if (sk->sk_state == DCCP_LISTEN) {
755 		len = -ENOTCONN;
756 		goto out;
757 	}
758 
759 	timeo = sock_rcvtimeo(sk, nonblock);
760 
761 	do {
762 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
763 
764 		if (skb == NULL)
765 			goto verify_sock_status;
766 
767 		dh = dccp_hdr(skb);
768 
769 		switch (dh->dccph_type) {
770 		case DCCP_PKT_DATA:
771 		case DCCP_PKT_DATAACK:
772 			goto found_ok_skb;
773 
774 		case DCCP_PKT_CLOSE:
775 		case DCCP_PKT_CLOSEREQ:
776 			if (!(flags & MSG_PEEK))
777 				dccp_finish_passive_close(sk);
778 			/* fall through */
779 		case DCCP_PKT_RESET:
780 			dccp_pr_debug("found fin (%s) ok!\n",
781 				      dccp_packet_name(dh->dccph_type));
782 			len = 0;
783 			goto found_fin_ok;
784 		default:
785 			dccp_pr_debug("packet_type=%s\n",
786 				      dccp_packet_name(dh->dccph_type));
787 			sk_eat_skb(sk, skb, 0);
788 		}
789 verify_sock_status:
790 		if (sock_flag(sk, SOCK_DONE)) {
791 			len = 0;
792 			break;
793 		}
794 
795 		if (sk->sk_err) {
796 			len = sock_error(sk);
797 			break;
798 		}
799 
800 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
801 			len = 0;
802 			break;
803 		}
804 
805 		if (sk->sk_state == DCCP_CLOSED) {
806 			if (!sock_flag(sk, SOCK_DONE)) {
807 				/* This occurs when user tries to read
808 				 * from never connected socket.
809 				 */
810 				len = -ENOTCONN;
811 				break;
812 			}
813 			len = 0;
814 			break;
815 		}
816 
817 		if (!timeo) {
818 			len = -EAGAIN;
819 			break;
820 		}
821 
822 		if (signal_pending(current)) {
823 			len = sock_intr_errno(timeo);
824 			break;
825 		}
826 
827 		sk_wait_data(sk, &timeo);
828 		continue;
829 	found_ok_skb:
830 		if (len > skb->len)
831 			len = skb->len;
832 		else if (len < skb->len)
833 			msg->msg_flags |= MSG_TRUNC;
834 
835 		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
836 			/* Exception. Bailout! */
837 			len = -EFAULT;
838 			break;
839 		}
840 		if (flags & MSG_TRUNC)
841 			len = skb->len;
842 	found_fin_ok:
843 		if (!(flags & MSG_PEEK))
844 			sk_eat_skb(sk, skb, 0);
845 		break;
846 	} while (1);
847 out:
848 	release_sock(sk);
849 	return len;
850 }
851 
852 EXPORT_SYMBOL_GPL(dccp_recvmsg);
853 
854 int inet_dccp_listen(struct socket *sock, int backlog)
855 {
856 	struct sock *sk = sock->sk;
857 	unsigned char old_state;
858 	int err;
859 
860 	lock_sock(sk);
861 
862 	err = -EINVAL;
863 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
864 		goto out;
865 
866 	old_state = sk->sk_state;
867 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
868 		goto out;
869 
870 	/* Really, if the socket is already in listen state
871 	 * we can only allow the backlog to be adjusted.
872 	 */
873 	if (old_state != DCCP_LISTEN) {
874 		/*
875 		 * FIXME: here it probably should be sk->sk_prot->listen_start
876 		 * see tcp_listen_start
877 		 */
878 		err = dccp_listen_start(sk, backlog);
879 		if (err)
880 			goto out;
881 	}
882 	sk->sk_max_ack_backlog = backlog;
883 	err = 0;
884 
885 out:
886 	release_sock(sk);
887 	return err;
888 }
889 
890 EXPORT_SYMBOL_GPL(inet_dccp_listen);
891 
892 static void dccp_terminate_connection(struct sock *sk)
893 {
894 	u8 next_state = DCCP_CLOSED;
895 
896 	switch (sk->sk_state) {
897 	case DCCP_PASSIVE_CLOSE:
898 	case DCCP_PASSIVE_CLOSEREQ:
899 		dccp_finish_passive_close(sk);
900 		break;
901 	case DCCP_PARTOPEN:
902 		dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
903 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
904 		/* fall through */
905 	case DCCP_OPEN:
906 		dccp_send_close(sk, 1);
907 
908 		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
909 		    !dccp_sk(sk)->dccps_server_timewait)
910 			next_state = DCCP_ACTIVE_CLOSEREQ;
911 		else
912 			next_state = DCCP_CLOSING;
913 		/* fall through */
914 	default:
915 		dccp_set_state(sk, next_state);
916 	}
917 }
918 
919 void dccp_close(struct sock *sk, long timeout)
920 {
921 	struct dccp_sock *dp = dccp_sk(sk);
922 	struct sk_buff *skb;
923 	u32 data_was_unread = 0;
924 	int state;
925 
926 	lock_sock(sk);
927 
928 	sk->sk_shutdown = SHUTDOWN_MASK;
929 
930 	if (sk->sk_state == DCCP_LISTEN) {
931 		dccp_set_state(sk, DCCP_CLOSED);
932 
933 		/* Special case. */
934 		inet_csk_listen_stop(sk);
935 
936 		goto adjudge_to_death;
937 	}
938 
939 	sk_stop_timer(sk, &dp->dccps_xmit_timer);
940 
941 	/*
942 	 * We need to flush the recv. buffs.  We do this only on the
943 	 * descriptor close, not protocol-sourced closes, because the
944 	  *reader process may not have drained the data yet!
945 	 */
946 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
947 		data_was_unread += skb->len;
948 		__kfree_skb(skb);
949 	}
950 
951 	if (data_was_unread) {
952 		/* Unread data was tossed, send an appropriate Reset Code */
953 		DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
954 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
955 		dccp_set_state(sk, DCCP_CLOSED);
956 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
957 		/* Check zero linger _after_ checking for unread data. */
958 		sk->sk_prot->disconnect(sk, 0);
959 	} else if (sk->sk_state != DCCP_CLOSED) {
960 		/*
961 		 * Normal connection termination. May need to wait if there are
962 		 * still packets in the TX queue that are delayed by the CCID.
963 		 */
964 		dccp_flush_write_queue(sk, &timeout);
965 		dccp_terminate_connection(sk);
966 	}
967 
968 	/*
969 	 * Flush write queue. This may be necessary in several cases:
970 	 * - we have been closed by the peer but still have application data;
971 	 * - abortive termination (unread data or zero linger time),
972 	 * - normal termination but queue could not be flushed within time limit
973 	 */
974 	__skb_queue_purge(&sk->sk_write_queue);
975 
976 	sk_stream_wait_close(sk, timeout);
977 
978 adjudge_to_death:
979 	state = sk->sk_state;
980 	sock_hold(sk);
981 	sock_orphan(sk);
982 
983 	/*
984 	 * It is the last release_sock in its life. It will remove backlog.
985 	 */
986 	release_sock(sk);
987 	/*
988 	 * Now socket is owned by kernel and we acquire BH lock
989 	 * to finish close. No need to check for user refs.
990 	 */
991 	local_bh_disable();
992 	bh_lock_sock(sk);
993 	WARN_ON(sock_owned_by_user(sk));
994 
995 	percpu_counter_inc(sk->sk_prot->orphan_count);
996 
997 	/* Have we already been destroyed by a softirq or backlog? */
998 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
999 		goto out;
1000 
1001 	if (sk->sk_state == DCCP_CLOSED)
1002 		inet_csk_destroy_sock(sk);
1003 
1004 	/* Otherwise, socket is reprieved until protocol close. */
1005 
1006 out:
1007 	bh_unlock_sock(sk);
1008 	local_bh_enable();
1009 	sock_put(sk);
1010 }
1011 
1012 EXPORT_SYMBOL_GPL(dccp_close);
1013 
1014 void dccp_shutdown(struct sock *sk, int how)
1015 {
1016 	dccp_pr_debug("called shutdown(%x)\n", how);
1017 }
1018 
1019 EXPORT_SYMBOL_GPL(dccp_shutdown);
1020 
1021 static inline int dccp_mib_init(void)
1022 {
1023 	return snmp_mib_init((void __percpu **)dccp_statistics,
1024 			     sizeof(struct dccp_mib),
1025 			     __alignof__(struct dccp_mib));
1026 }
1027 
1028 static inline void dccp_mib_exit(void)
1029 {
1030 	snmp_mib_free((void __percpu **)dccp_statistics);
1031 }
1032 
1033 static int thash_entries;
1034 module_param(thash_entries, int, 0444);
1035 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1036 
1037 #ifdef CONFIG_IP_DCCP_DEBUG
1038 int dccp_debug;
1039 module_param(dccp_debug, bool, 0644);
1040 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1041 
1042 EXPORT_SYMBOL_GPL(dccp_debug);
1043 #endif
1044 
1045 static int __init dccp_init(void)
1046 {
1047 	unsigned long goal;
1048 	int ehash_order, bhash_order, i;
1049 	int rc;
1050 
1051 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1052 		     FIELD_SIZEOF(struct sk_buff, cb));
1053 	rc = percpu_counter_init(&dccp_orphan_count, 0);
1054 	if (rc)
1055 		goto out_fail;
1056 	rc = -ENOBUFS;
1057 	inet_hashinfo_init(&dccp_hashinfo);
1058 	dccp_hashinfo.bind_bucket_cachep =
1059 		kmem_cache_create("dccp_bind_bucket",
1060 				  sizeof(struct inet_bind_bucket), 0,
1061 				  SLAB_HWCACHE_ALIGN, NULL);
1062 	if (!dccp_hashinfo.bind_bucket_cachep)
1063 		goto out_free_percpu;
1064 
1065 	/*
1066 	 * Size and allocate the main established and bind bucket
1067 	 * hash tables.
1068 	 *
1069 	 * The methodology is similar to that of the buffer cache.
1070 	 */
1071 	if (totalram_pages >= (128 * 1024))
1072 		goal = totalram_pages >> (21 - PAGE_SHIFT);
1073 	else
1074 		goal = totalram_pages >> (23 - PAGE_SHIFT);
1075 
1076 	if (thash_entries)
1077 		goal = (thash_entries *
1078 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1079 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1080 		;
1081 	do {
1082 		unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1083 					sizeof(struct inet_ehash_bucket);
1084 
1085 		while (hash_size & (hash_size - 1))
1086 			hash_size--;
1087 		dccp_hashinfo.ehash_mask = hash_size - 1;
1088 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1089 			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1090 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1091 
1092 	if (!dccp_hashinfo.ehash) {
1093 		DCCP_CRIT("Failed to allocate DCCP established hash table");
1094 		goto out_free_bind_bucket_cachep;
1095 	}
1096 
1097 	for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) {
1098 		INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1099 		INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1100 	}
1101 
1102 	if (inet_ehash_locks_alloc(&dccp_hashinfo))
1103 			goto out_free_dccp_ehash;
1104 
1105 	bhash_order = ehash_order;
1106 
1107 	do {
1108 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1109 					sizeof(struct inet_bind_hashbucket);
1110 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1111 		    bhash_order > 0)
1112 			continue;
1113 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1114 			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1115 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1116 
1117 	if (!dccp_hashinfo.bhash) {
1118 		DCCP_CRIT("Failed to allocate DCCP bind hash table");
1119 		goto out_free_dccp_locks;
1120 	}
1121 
1122 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1123 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1124 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1125 	}
1126 
1127 	rc = dccp_mib_init();
1128 	if (rc)
1129 		goto out_free_dccp_bhash;
1130 
1131 	rc = dccp_ackvec_init();
1132 	if (rc)
1133 		goto out_free_dccp_mib;
1134 
1135 	rc = dccp_sysctl_init();
1136 	if (rc)
1137 		goto out_ackvec_exit;
1138 
1139 	rc = ccid_initialize_builtins();
1140 	if (rc)
1141 		goto out_sysctl_exit;
1142 
1143 	dccp_timestamping_init();
1144 
1145 	return 0;
1146 
1147 out_sysctl_exit:
1148 	dccp_sysctl_exit();
1149 out_ackvec_exit:
1150 	dccp_ackvec_exit();
1151 out_free_dccp_mib:
1152 	dccp_mib_exit();
1153 out_free_dccp_bhash:
1154 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1155 out_free_dccp_locks:
1156 	inet_ehash_locks_free(&dccp_hashinfo);
1157 out_free_dccp_ehash:
1158 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1159 out_free_bind_bucket_cachep:
1160 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1161 out_free_percpu:
1162 	percpu_counter_destroy(&dccp_orphan_count);
1163 out_fail:
1164 	dccp_hashinfo.bhash = NULL;
1165 	dccp_hashinfo.ehash = NULL;
1166 	dccp_hashinfo.bind_bucket_cachep = NULL;
1167 	return rc;
1168 }
1169 
1170 static void __exit dccp_fini(void)
1171 {
1172 	ccid_cleanup_builtins();
1173 	dccp_mib_exit();
1174 	free_pages((unsigned long)dccp_hashinfo.bhash,
1175 		   get_order(dccp_hashinfo.bhash_size *
1176 			     sizeof(struct inet_bind_hashbucket)));
1177 	free_pages((unsigned long)dccp_hashinfo.ehash,
1178 		   get_order((dccp_hashinfo.ehash_mask + 1) *
1179 			     sizeof(struct inet_ehash_bucket)));
1180 	inet_ehash_locks_free(&dccp_hashinfo);
1181 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1182 	dccp_ackvec_exit();
1183 	dccp_sysctl_exit();
1184 	percpu_counter_destroy(&dccp_orphan_count);
1185 }
1186 
1187 module_init(dccp_init);
1188 module_exit(dccp_fini);
1189 
1190 MODULE_LICENSE("GPL");
1191 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1192 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1193