xref: /openbmc/linux/net/dccp/proto.c (revision 384740dc)
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or modify it
8  *	under the terms of the GNU General Public License version 2 as
9  *	published by the Free Software Foundation.
10  */
11 
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24 
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28 
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34 
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38 
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40 
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42 
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44 
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46 
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 	.lhash_lock	= RW_LOCK_UNLOCKED,
49 	.lhash_users	= ATOMIC_INIT(0),
50 	.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52 
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54 
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57 
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60 	const int oldstate = sk->sk_state;
61 
62 	dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63 		      dccp_state_name(oldstate), dccp_state_name(state));
64 	WARN_ON(state == oldstate);
65 
66 	switch (state) {
67 	case DCCP_OPEN:
68 		if (oldstate != DCCP_OPEN)
69 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70 		break;
71 
72 	case DCCP_CLOSED:
73 		if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74 		    oldstate == DCCP_CLOSING)
75 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76 
77 		sk->sk_prot->unhash(sk);
78 		if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80 			inet_put_port(sk);
81 		/* fall through */
82 	default:
83 		if (oldstate == DCCP_OPEN)
84 			DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85 	}
86 
87 	/* Change state AFTER socket is unhashed to avoid closed
88 	 * socket sitting in hash tables.
89 	 */
90 	sk->sk_state = state;
91 }
92 
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94 
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97 	switch (sk->sk_state) {
98 	case DCCP_PASSIVE_CLOSE:
99 		/* Node (client or server) has received Close packet. */
100 		dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101 		dccp_set_state(sk, DCCP_CLOSED);
102 		break;
103 	case DCCP_PASSIVE_CLOSEREQ:
104 		/*
105 		 * Client received CloseReq. We set the `active' flag so that
106 		 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107 		 */
108 		dccp_send_close(sk, 1);
109 		dccp_set_state(sk, DCCP_CLOSING);
110 	}
111 }
112 
113 void dccp_done(struct sock *sk)
114 {
115 	dccp_set_state(sk, DCCP_CLOSED);
116 	dccp_clear_xmit_timers(sk);
117 
118 	sk->sk_shutdown = SHUTDOWN_MASK;
119 
120 	if (!sock_flag(sk, SOCK_DEAD))
121 		sk->sk_state_change(sk);
122 	else
123 		inet_csk_destroy_sock(sk);
124 }
125 
126 EXPORT_SYMBOL_GPL(dccp_done);
127 
128 const char *dccp_packet_name(const int type)
129 {
130 	static const char *dccp_packet_names[] = {
131 		[DCCP_PKT_REQUEST]  = "REQUEST",
132 		[DCCP_PKT_RESPONSE] = "RESPONSE",
133 		[DCCP_PKT_DATA]	    = "DATA",
134 		[DCCP_PKT_ACK]	    = "ACK",
135 		[DCCP_PKT_DATAACK]  = "DATAACK",
136 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137 		[DCCP_PKT_CLOSE]    = "CLOSE",
138 		[DCCP_PKT_RESET]    = "RESET",
139 		[DCCP_PKT_SYNC]	    = "SYNC",
140 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
141 	};
142 
143 	if (type >= DCCP_NR_PKT_TYPES)
144 		return "INVALID";
145 	else
146 		return dccp_packet_names[type];
147 }
148 
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150 
151 const char *dccp_state_name(const int state)
152 {
153 	static char *dccp_state_names[] = {
154 	[DCCP_OPEN]		= "OPEN",
155 	[DCCP_REQUESTING]	= "REQUESTING",
156 	[DCCP_PARTOPEN]		= "PARTOPEN",
157 	[DCCP_LISTEN]		= "LISTEN",
158 	[DCCP_RESPOND]		= "RESPOND",
159 	[DCCP_CLOSING]		= "CLOSING",
160 	[DCCP_ACTIVE_CLOSEREQ]	= "CLOSEREQ",
161 	[DCCP_PASSIVE_CLOSE]	= "PASSIVE_CLOSE",
162 	[DCCP_PASSIVE_CLOSEREQ]	= "PASSIVE_CLOSEREQ",
163 	[DCCP_TIME_WAIT]	= "TIME_WAIT",
164 	[DCCP_CLOSED]		= "CLOSED",
165 	};
166 
167 	if (state >= DCCP_MAX_STATES)
168 		return "INVALID STATE!";
169 	else
170 		return dccp_state_names[state];
171 }
172 
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174 
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177 	struct dccp_sock *dp = dccp_sk(sk);
178 	struct dccp_minisock *dmsk = dccp_msk(sk);
179 	struct inet_connection_sock *icsk = inet_csk(sk);
180 
181 	dccp_minisock_init(&dp->dccps_minisock);
182 
183 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
184 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
185 	sk->sk_state		= DCCP_CLOSED;
186 	sk->sk_write_space	= dccp_write_space;
187 	icsk->icsk_sync_mss	= dccp_sync_mss;
188 	dp->dccps_mss_cache	= 536;
189 	dp->dccps_rate_last	= jiffies;
190 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
191 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
192 	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
193 
194 	dccp_init_xmit_timers(sk);
195 
196 	/*
197 	 * FIXME: We're hardcoding the CCID, and doing this at this point makes
198 	 * the listening (master) sock get CCID control blocks, which is not
199 	 * necessary, but for now, to not mess with the test userspace apps,
200 	 * lets leave it here, later the real solution is to do this in a
201 	 * setsockopt(CCIDs-I-want/accept). -acme
202 	 */
203 	if (likely(ctl_sock_initialized)) {
204 		int rc = dccp_feat_init(dmsk);
205 
206 		if (rc)
207 			return rc;
208 
209 		if (dmsk->dccpms_send_ack_vector) {
210 			dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
211 			if (dp->dccps_hc_rx_ackvec == NULL)
212 				return -ENOMEM;
213 		}
214 		dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
215 						      sk, GFP_KERNEL);
216 		dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
217 						      sk, GFP_KERNEL);
218 		if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
219 			     dp->dccps_hc_tx_ccid == NULL)) {
220 			ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
221 			ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
222 			if (dmsk->dccpms_send_ack_vector) {
223 				dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224 				dp->dccps_hc_rx_ackvec = NULL;
225 			}
226 			dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227 			return -ENOMEM;
228 		}
229 	} else {
230 		/* control socket doesn't need feat nego */
231 		INIT_LIST_HEAD(&dmsk->dccpms_pending);
232 		INIT_LIST_HEAD(&dmsk->dccpms_conf);
233 	}
234 
235 	return 0;
236 }
237 
238 EXPORT_SYMBOL_GPL(dccp_init_sock);
239 
240 void dccp_destroy_sock(struct sock *sk)
241 {
242 	struct dccp_sock *dp = dccp_sk(sk);
243 	struct dccp_minisock *dmsk = dccp_msk(sk);
244 
245 	/*
246 	 * DCCP doesn't use sk_write_queue, just sk_send_head
247 	 * for retransmissions
248 	 */
249 	if (sk->sk_send_head != NULL) {
250 		kfree_skb(sk->sk_send_head);
251 		sk->sk_send_head = NULL;
252 	}
253 
254 	/* Clean up a referenced DCCP bind bucket. */
255 	if (inet_csk(sk)->icsk_bind_hash != NULL)
256 		inet_put_port(sk);
257 
258 	kfree(dp->dccps_service_list);
259 	dp->dccps_service_list = NULL;
260 
261 	if (dmsk->dccpms_send_ack_vector) {
262 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
263 		dp->dccps_hc_rx_ackvec = NULL;
264 	}
265 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
266 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
267 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
268 
269 	/* clean up feature negotiation state */
270 	dccp_feat_clean(dmsk);
271 }
272 
273 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
274 
275 static inline int dccp_listen_start(struct sock *sk, int backlog)
276 {
277 	struct dccp_sock *dp = dccp_sk(sk);
278 
279 	dp->dccps_role = DCCP_ROLE_LISTEN;
280 	return inet_csk_listen_start(sk, backlog);
281 }
282 
283 static inline int dccp_need_reset(int state)
284 {
285 	return state != DCCP_CLOSED && state != DCCP_LISTEN &&
286 	       state != DCCP_REQUESTING;
287 }
288 
289 int dccp_disconnect(struct sock *sk, int flags)
290 {
291 	struct inet_connection_sock *icsk = inet_csk(sk);
292 	struct inet_sock *inet = inet_sk(sk);
293 	int err = 0;
294 	const int old_state = sk->sk_state;
295 
296 	if (old_state != DCCP_CLOSED)
297 		dccp_set_state(sk, DCCP_CLOSED);
298 
299 	/*
300 	 * This corresponds to the ABORT function of RFC793, sec. 3.8
301 	 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
302 	 */
303 	if (old_state == DCCP_LISTEN) {
304 		inet_csk_listen_stop(sk);
305 	} else if (dccp_need_reset(old_state)) {
306 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
307 		sk->sk_err = ECONNRESET;
308 	} else if (old_state == DCCP_REQUESTING)
309 		sk->sk_err = ECONNRESET;
310 
311 	dccp_clear_xmit_timers(sk);
312 	__skb_queue_purge(&sk->sk_receive_queue);
313 	if (sk->sk_send_head != NULL) {
314 		__kfree_skb(sk->sk_send_head);
315 		sk->sk_send_head = NULL;
316 	}
317 
318 	inet->dport = 0;
319 
320 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
321 		inet_reset_saddr(sk);
322 
323 	sk->sk_shutdown = 0;
324 	sock_reset_flag(sk, SOCK_DONE);
325 
326 	icsk->icsk_backoff = 0;
327 	inet_csk_delack_init(sk);
328 	__sk_dst_reset(sk);
329 
330 	WARN_ON(inet->num && !icsk->icsk_bind_hash);
331 
332 	sk->sk_error_report(sk);
333 	return err;
334 }
335 
336 EXPORT_SYMBOL_GPL(dccp_disconnect);
337 
338 /*
339  *	Wait for a DCCP event.
340  *
341  *	Note that we don't need to lock the socket, as the upper poll layers
342  *	take care of normal races (between the test and the event) and we don't
343  *	go look at any of the socket buffers directly.
344  */
345 unsigned int dccp_poll(struct file *file, struct socket *sock,
346 		       poll_table *wait)
347 {
348 	unsigned int mask;
349 	struct sock *sk = sock->sk;
350 
351 	poll_wait(file, sk->sk_sleep, wait);
352 	if (sk->sk_state == DCCP_LISTEN)
353 		return inet_csk_listen_poll(sk);
354 
355 	/* Socket is not locked. We are protected from async events
356 	   by poll logic and correct handling of state changes
357 	   made by another threads is impossible in any case.
358 	 */
359 
360 	mask = 0;
361 	if (sk->sk_err)
362 		mask = POLLERR;
363 
364 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
365 		mask |= POLLHUP;
366 	if (sk->sk_shutdown & RCV_SHUTDOWN)
367 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
368 
369 	/* Connected? */
370 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
371 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
372 			mask |= POLLIN | POLLRDNORM;
373 
374 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
375 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
376 				mask |= POLLOUT | POLLWRNORM;
377 			} else {  /* send SIGIO later */
378 				set_bit(SOCK_ASYNC_NOSPACE,
379 					&sk->sk_socket->flags);
380 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
381 
382 				/* Race breaker. If space is freed after
383 				 * wspace test but before the flags are set,
384 				 * IO signal will be lost.
385 				 */
386 				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
387 					mask |= POLLOUT | POLLWRNORM;
388 			}
389 		}
390 	}
391 	return mask;
392 }
393 
394 EXPORT_SYMBOL_GPL(dccp_poll);
395 
396 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
397 {
398 	int rc = -ENOTCONN;
399 
400 	lock_sock(sk);
401 
402 	if (sk->sk_state == DCCP_LISTEN)
403 		goto out;
404 
405 	switch (cmd) {
406 	case SIOCINQ: {
407 		struct sk_buff *skb;
408 		unsigned long amount = 0;
409 
410 		skb = skb_peek(&sk->sk_receive_queue);
411 		if (skb != NULL) {
412 			/*
413 			 * We will only return the amount of this packet since
414 			 * that is all that will be read.
415 			 */
416 			amount = skb->len;
417 		}
418 		rc = put_user(amount, (int __user *)arg);
419 	}
420 		break;
421 	default:
422 		rc = -ENOIOCTLCMD;
423 		break;
424 	}
425 out:
426 	release_sock(sk);
427 	return rc;
428 }
429 
430 EXPORT_SYMBOL_GPL(dccp_ioctl);
431 
432 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
433 				   char __user *optval, int optlen)
434 {
435 	struct dccp_sock *dp = dccp_sk(sk);
436 	struct dccp_service_list *sl = NULL;
437 
438 	if (service == DCCP_SERVICE_INVALID_VALUE ||
439 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
440 		return -EINVAL;
441 
442 	if (optlen > sizeof(service)) {
443 		sl = kmalloc(optlen, GFP_KERNEL);
444 		if (sl == NULL)
445 			return -ENOMEM;
446 
447 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
448 		if (copy_from_user(sl->dccpsl_list,
449 				   optval + sizeof(service),
450 				   optlen - sizeof(service)) ||
451 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
452 			kfree(sl);
453 			return -EFAULT;
454 		}
455 	}
456 
457 	lock_sock(sk);
458 	dp->dccps_service = service;
459 
460 	kfree(dp->dccps_service_list);
461 
462 	dp->dccps_service_list = sl;
463 	release_sock(sk);
464 	return 0;
465 }
466 
467 /* byte 1 is feature.  the rest is the preference list */
468 static int dccp_setsockopt_change(struct sock *sk, int type,
469 				  struct dccp_so_feat __user *optval)
470 {
471 	struct dccp_so_feat opt;
472 	u8 *val;
473 	int rc;
474 
475 	if (copy_from_user(&opt, optval, sizeof(opt)))
476 		return -EFAULT;
477 	/*
478 	 * rfc4340: 6.1. Change Options
479 	 */
480 	if (opt.dccpsf_len < 1)
481 		return -EINVAL;
482 
483 	val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
484 	if (!val)
485 		return -ENOMEM;
486 
487 	if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
488 		rc = -EFAULT;
489 		goto out_free_val;
490 	}
491 
492 	rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
493 			      val, opt.dccpsf_len, GFP_KERNEL);
494 	if (rc)
495 		goto out_free_val;
496 
497 out:
498 	return rc;
499 
500 out_free_val:
501 	kfree(val);
502 	goto out;
503 }
504 
505 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
506 		char __user *optval, int optlen)
507 {
508 	struct dccp_sock *dp = dccp_sk(sk);
509 	int val, err = 0;
510 
511 	if (optlen < sizeof(int))
512 		return -EINVAL;
513 
514 	if (get_user(val, (int __user *)optval))
515 		return -EFAULT;
516 
517 	if (optname == DCCP_SOCKOPT_SERVICE)
518 		return dccp_setsockopt_service(sk, val, optval, optlen);
519 
520 	lock_sock(sk);
521 	switch (optname) {
522 	case DCCP_SOCKOPT_PACKET_SIZE:
523 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
524 		err = 0;
525 		break;
526 	case DCCP_SOCKOPT_CHANGE_L:
527 		if (optlen != sizeof(struct dccp_so_feat))
528 			err = -EINVAL;
529 		else
530 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
531 						     (struct dccp_so_feat __user *)
532 						     optval);
533 		break;
534 	case DCCP_SOCKOPT_CHANGE_R:
535 		if (optlen != sizeof(struct dccp_so_feat))
536 			err = -EINVAL;
537 		else
538 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
539 						     (struct dccp_so_feat __user *)
540 						     optval);
541 		break;
542 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
543 		if (dp->dccps_role != DCCP_ROLE_SERVER)
544 			err = -EOPNOTSUPP;
545 		else
546 			dp->dccps_server_timewait = (val != 0);
547 		break;
548 	case DCCP_SOCKOPT_SEND_CSCOV:	/* sender side, RFC 4340, sec. 9.2 */
549 		if (val < 0 || val > 15)
550 			err = -EINVAL;
551 		else
552 			dp->dccps_pcslen = val;
553 		break;
554 	case DCCP_SOCKOPT_RECV_CSCOV:	/* receiver side, RFC 4340 sec. 9.2.1 */
555 		if (val < 0 || val > 15)
556 			err = -EINVAL;
557 		else {
558 			dp->dccps_pcrlen = val;
559 			/* FIXME: add feature negotiation,
560 			 * ChangeL(MinimumChecksumCoverage, val) */
561 		}
562 		break;
563 	default:
564 		err = -ENOPROTOOPT;
565 		break;
566 	}
567 
568 	release_sock(sk);
569 	return err;
570 }
571 
572 int dccp_setsockopt(struct sock *sk, int level, int optname,
573 		    char __user *optval, int optlen)
574 {
575 	if (level != SOL_DCCP)
576 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
577 							     optname, optval,
578 							     optlen);
579 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
580 }
581 
582 EXPORT_SYMBOL_GPL(dccp_setsockopt);
583 
584 #ifdef CONFIG_COMPAT
585 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
586 			   char __user *optval, int optlen)
587 {
588 	if (level != SOL_DCCP)
589 		return inet_csk_compat_setsockopt(sk, level, optname,
590 						  optval, optlen);
591 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
592 }
593 
594 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
595 #endif
596 
597 static int dccp_getsockopt_service(struct sock *sk, int len,
598 				   __be32 __user *optval,
599 				   int __user *optlen)
600 {
601 	const struct dccp_sock *dp = dccp_sk(sk);
602 	const struct dccp_service_list *sl;
603 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
604 
605 	lock_sock(sk);
606 	if ((sl = dp->dccps_service_list) != NULL) {
607 		slen = sl->dccpsl_nr * sizeof(u32);
608 		total_len += slen;
609 	}
610 
611 	err = -EINVAL;
612 	if (total_len > len)
613 		goto out;
614 
615 	err = 0;
616 	if (put_user(total_len, optlen) ||
617 	    put_user(dp->dccps_service, optval) ||
618 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
619 		err = -EFAULT;
620 out:
621 	release_sock(sk);
622 	return err;
623 }
624 
625 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
626 		    char __user *optval, int __user *optlen)
627 {
628 	struct dccp_sock *dp;
629 	int val, len;
630 
631 	if (get_user(len, optlen))
632 		return -EFAULT;
633 
634 	if (len < (int)sizeof(int))
635 		return -EINVAL;
636 
637 	dp = dccp_sk(sk);
638 
639 	switch (optname) {
640 	case DCCP_SOCKOPT_PACKET_SIZE:
641 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
642 		return 0;
643 	case DCCP_SOCKOPT_SERVICE:
644 		return dccp_getsockopt_service(sk, len,
645 					       (__be32 __user *)optval, optlen);
646 	case DCCP_SOCKOPT_GET_CUR_MPS:
647 		val = dp->dccps_mss_cache;
648 		break;
649 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
650 		val = dp->dccps_server_timewait;
651 		break;
652 	case DCCP_SOCKOPT_SEND_CSCOV:
653 		val = dp->dccps_pcslen;
654 		break;
655 	case DCCP_SOCKOPT_RECV_CSCOV:
656 		val = dp->dccps_pcrlen;
657 		break;
658 	case 128 ... 191:
659 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
660 					     len, (u32 __user *)optval, optlen);
661 	case 192 ... 255:
662 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
663 					     len, (u32 __user *)optval, optlen);
664 	default:
665 		return -ENOPROTOOPT;
666 	}
667 
668 	len = sizeof(val);
669 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
670 		return -EFAULT;
671 
672 	return 0;
673 }
674 
675 int dccp_getsockopt(struct sock *sk, int level, int optname,
676 		    char __user *optval, int __user *optlen)
677 {
678 	if (level != SOL_DCCP)
679 		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
680 							     optname, optval,
681 							     optlen);
682 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
683 }
684 
685 EXPORT_SYMBOL_GPL(dccp_getsockopt);
686 
687 #ifdef CONFIG_COMPAT
688 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
689 			   char __user *optval, int __user *optlen)
690 {
691 	if (level != SOL_DCCP)
692 		return inet_csk_compat_getsockopt(sk, level, optname,
693 						  optval, optlen);
694 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
695 }
696 
697 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
698 #endif
699 
700 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
701 		 size_t len)
702 {
703 	const struct dccp_sock *dp = dccp_sk(sk);
704 	const int flags = msg->msg_flags;
705 	const int noblock = flags & MSG_DONTWAIT;
706 	struct sk_buff *skb;
707 	int rc, size;
708 	long timeo;
709 
710 	if (len > dp->dccps_mss_cache)
711 		return -EMSGSIZE;
712 
713 	lock_sock(sk);
714 
715 	if (sysctl_dccp_tx_qlen &&
716 	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
717 		rc = -EAGAIN;
718 		goto out_release;
719 	}
720 
721 	timeo = sock_sndtimeo(sk, noblock);
722 
723 	/*
724 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
725 	 * so that the trick in dccp_rcv_request_sent_state_process.
726 	 */
727 	/* Wait for a connection to finish. */
728 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
729 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
730 			goto out_release;
731 
732 	size = sk->sk_prot->max_header + len;
733 	release_sock(sk);
734 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
735 	lock_sock(sk);
736 	if (skb == NULL)
737 		goto out_release;
738 
739 	skb_reserve(skb, sk->sk_prot->max_header);
740 	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
741 	if (rc != 0)
742 		goto out_discard;
743 
744 	skb_queue_tail(&sk->sk_write_queue, skb);
745 	dccp_write_xmit(sk,0);
746 out_release:
747 	release_sock(sk);
748 	return rc ? : len;
749 out_discard:
750 	kfree_skb(skb);
751 	goto out_release;
752 }
753 
754 EXPORT_SYMBOL_GPL(dccp_sendmsg);
755 
756 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
757 		 size_t len, int nonblock, int flags, int *addr_len)
758 {
759 	const struct dccp_hdr *dh;
760 	long timeo;
761 
762 	lock_sock(sk);
763 
764 	if (sk->sk_state == DCCP_LISTEN) {
765 		len = -ENOTCONN;
766 		goto out;
767 	}
768 
769 	timeo = sock_rcvtimeo(sk, nonblock);
770 
771 	do {
772 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
773 
774 		if (skb == NULL)
775 			goto verify_sock_status;
776 
777 		dh = dccp_hdr(skb);
778 
779 		switch (dh->dccph_type) {
780 		case DCCP_PKT_DATA:
781 		case DCCP_PKT_DATAACK:
782 			goto found_ok_skb;
783 
784 		case DCCP_PKT_CLOSE:
785 		case DCCP_PKT_CLOSEREQ:
786 			if (!(flags & MSG_PEEK))
787 				dccp_finish_passive_close(sk);
788 			/* fall through */
789 		case DCCP_PKT_RESET:
790 			dccp_pr_debug("found fin (%s) ok!\n",
791 				      dccp_packet_name(dh->dccph_type));
792 			len = 0;
793 			goto found_fin_ok;
794 		default:
795 			dccp_pr_debug("packet_type=%s\n",
796 				      dccp_packet_name(dh->dccph_type));
797 			sk_eat_skb(sk, skb, 0);
798 		}
799 verify_sock_status:
800 		if (sock_flag(sk, SOCK_DONE)) {
801 			len = 0;
802 			break;
803 		}
804 
805 		if (sk->sk_err) {
806 			len = sock_error(sk);
807 			break;
808 		}
809 
810 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
811 			len = 0;
812 			break;
813 		}
814 
815 		if (sk->sk_state == DCCP_CLOSED) {
816 			if (!sock_flag(sk, SOCK_DONE)) {
817 				/* This occurs when user tries to read
818 				 * from never connected socket.
819 				 */
820 				len = -ENOTCONN;
821 				break;
822 			}
823 			len = 0;
824 			break;
825 		}
826 
827 		if (!timeo) {
828 			len = -EAGAIN;
829 			break;
830 		}
831 
832 		if (signal_pending(current)) {
833 			len = sock_intr_errno(timeo);
834 			break;
835 		}
836 
837 		sk_wait_data(sk, &timeo);
838 		continue;
839 	found_ok_skb:
840 		if (len > skb->len)
841 			len = skb->len;
842 		else if (len < skb->len)
843 			msg->msg_flags |= MSG_TRUNC;
844 
845 		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
846 			/* Exception. Bailout! */
847 			len = -EFAULT;
848 			break;
849 		}
850 	found_fin_ok:
851 		if (!(flags & MSG_PEEK))
852 			sk_eat_skb(sk, skb, 0);
853 		break;
854 	} while (1);
855 out:
856 	release_sock(sk);
857 	return len;
858 }
859 
860 EXPORT_SYMBOL_GPL(dccp_recvmsg);
861 
862 int inet_dccp_listen(struct socket *sock, int backlog)
863 {
864 	struct sock *sk = sock->sk;
865 	unsigned char old_state;
866 	int err;
867 
868 	lock_sock(sk);
869 
870 	err = -EINVAL;
871 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
872 		goto out;
873 
874 	old_state = sk->sk_state;
875 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
876 		goto out;
877 
878 	/* Really, if the socket is already in listen state
879 	 * we can only allow the backlog to be adjusted.
880 	 */
881 	if (old_state != DCCP_LISTEN) {
882 		/*
883 		 * FIXME: here it probably should be sk->sk_prot->listen_start
884 		 * see tcp_listen_start
885 		 */
886 		err = dccp_listen_start(sk, backlog);
887 		if (err)
888 			goto out;
889 	}
890 	sk->sk_max_ack_backlog = backlog;
891 	err = 0;
892 
893 out:
894 	release_sock(sk);
895 	return err;
896 }
897 
898 EXPORT_SYMBOL_GPL(inet_dccp_listen);
899 
900 static void dccp_terminate_connection(struct sock *sk)
901 {
902 	u8 next_state = DCCP_CLOSED;
903 
904 	switch (sk->sk_state) {
905 	case DCCP_PASSIVE_CLOSE:
906 	case DCCP_PASSIVE_CLOSEREQ:
907 		dccp_finish_passive_close(sk);
908 		break;
909 	case DCCP_PARTOPEN:
910 		dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
911 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
912 		/* fall through */
913 	case DCCP_OPEN:
914 		dccp_send_close(sk, 1);
915 
916 		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
917 		    !dccp_sk(sk)->dccps_server_timewait)
918 			next_state = DCCP_ACTIVE_CLOSEREQ;
919 		else
920 			next_state = DCCP_CLOSING;
921 		/* fall through */
922 	default:
923 		dccp_set_state(sk, next_state);
924 	}
925 }
926 
927 void dccp_close(struct sock *sk, long timeout)
928 {
929 	struct dccp_sock *dp = dccp_sk(sk);
930 	struct sk_buff *skb;
931 	u32 data_was_unread = 0;
932 	int state;
933 
934 	lock_sock(sk);
935 
936 	sk->sk_shutdown = SHUTDOWN_MASK;
937 
938 	if (sk->sk_state == DCCP_LISTEN) {
939 		dccp_set_state(sk, DCCP_CLOSED);
940 
941 		/* Special case. */
942 		inet_csk_listen_stop(sk);
943 
944 		goto adjudge_to_death;
945 	}
946 
947 	sk_stop_timer(sk, &dp->dccps_xmit_timer);
948 
949 	/*
950 	 * We need to flush the recv. buffs.  We do this only on the
951 	 * descriptor close, not protocol-sourced closes, because the
952 	  *reader process may not have drained the data yet!
953 	 */
954 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
955 		data_was_unread += skb->len;
956 		__kfree_skb(skb);
957 	}
958 
959 	if (data_was_unread) {
960 		/* Unread data was tossed, send an appropriate Reset Code */
961 		DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
962 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
963 		dccp_set_state(sk, DCCP_CLOSED);
964 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
965 		/* Check zero linger _after_ checking for unread data. */
966 		sk->sk_prot->disconnect(sk, 0);
967 	} else if (sk->sk_state != DCCP_CLOSED) {
968 		dccp_terminate_connection(sk);
969 	}
970 
971 	sk_stream_wait_close(sk, timeout);
972 
973 adjudge_to_death:
974 	state = sk->sk_state;
975 	sock_hold(sk);
976 	sock_orphan(sk);
977 	atomic_inc(sk->sk_prot->orphan_count);
978 
979 	/*
980 	 * It is the last release_sock in its life. It will remove backlog.
981 	 */
982 	release_sock(sk);
983 	/*
984 	 * Now socket is owned by kernel and we acquire BH lock
985 	 * to finish close. No need to check for user refs.
986 	 */
987 	local_bh_disable();
988 	bh_lock_sock(sk);
989 	WARN_ON(sock_owned_by_user(sk));
990 
991 	/* Have we already been destroyed by a softirq or backlog? */
992 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
993 		goto out;
994 
995 	if (sk->sk_state == DCCP_CLOSED)
996 		inet_csk_destroy_sock(sk);
997 
998 	/* Otherwise, socket is reprieved until protocol close. */
999 
1000 out:
1001 	bh_unlock_sock(sk);
1002 	local_bh_enable();
1003 	sock_put(sk);
1004 }
1005 
1006 EXPORT_SYMBOL_GPL(dccp_close);
1007 
1008 void dccp_shutdown(struct sock *sk, int how)
1009 {
1010 	dccp_pr_debug("called shutdown(%x)\n", how);
1011 }
1012 
1013 EXPORT_SYMBOL_GPL(dccp_shutdown);
1014 
1015 static inline int dccp_mib_init(void)
1016 {
1017 	return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1018 }
1019 
1020 static inline void dccp_mib_exit(void)
1021 {
1022 	snmp_mib_free((void**)dccp_statistics);
1023 }
1024 
1025 static int thash_entries;
1026 module_param(thash_entries, int, 0444);
1027 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1028 
1029 #ifdef CONFIG_IP_DCCP_DEBUG
1030 int dccp_debug;
1031 module_param(dccp_debug, bool, 0444);
1032 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1033 
1034 EXPORT_SYMBOL_GPL(dccp_debug);
1035 #endif
1036 
1037 static int __init dccp_init(void)
1038 {
1039 	unsigned long goal;
1040 	int ehash_order, bhash_order, i;
1041 	int rc = -ENOBUFS;
1042 
1043 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1044 		     FIELD_SIZEOF(struct sk_buff, cb));
1045 
1046 	dccp_hashinfo.bind_bucket_cachep =
1047 		kmem_cache_create("dccp_bind_bucket",
1048 				  sizeof(struct inet_bind_bucket), 0,
1049 				  SLAB_HWCACHE_ALIGN, NULL);
1050 	if (!dccp_hashinfo.bind_bucket_cachep)
1051 		goto out;
1052 
1053 	/*
1054 	 * Size and allocate the main established and bind bucket
1055 	 * hash tables.
1056 	 *
1057 	 * The methodology is similar to that of the buffer cache.
1058 	 */
1059 	if (num_physpages >= (128 * 1024))
1060 		goal = num_physpages >> (21 - PAGE_SHIFT);
1061 	else
1062 		goal = num_physpages >> (23 - PAGE_SHIFT);
1063 
1064 	if (thash_entries)
1065 		goal = (thash_entries *
1066 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1067 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1068 		;
1069 	do {
1070 		dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1071 					sizeof(struct inet_ehash_bucket);
1072 		while (dccp_hashinfo.ehash_size &
1073 		       (dccp_hashinfo.ehash_size - 1))
1074 			dccp_hashinfo.ehash_size--;
1075 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1076 			__get_free_pages(GFP_ATOMIC, ehash_order);
1077 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1078 
1079 	if (!dccp_hashinfo.ehash) {
1080 		DCCP_CRIT("Failed to allocate DCCP established hash table");
1081 		goto out_free_bind_bucket_cachep;
1082 	}
1083 
1084 	for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1085 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1086 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1087 	}
1088 
1089 	if (inet_ehash_locks_alloc(&dccp_hashinfo))
1090 			goto out_free_dccp_ehash;
1091 
1092 	bhash_order = ehash_order;
1093 
1094 	do {
1095 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1096 					sizeof(struct inet_bind_hashbucket);
1097 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1098 		    bhash_order > 0)
1099 			continue;
1100 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1101 			__get_free_pages(GFP_ATOMIC, bhash_order);
1102 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1103 
1104 	if (!dccp_hashinfo.bhash) {
1105 		DCCP_CRIT("Failed to allocate DCCP bind hash table");
1106 		goto out_free_dccp_locks;
1107 	}
1108 
1109 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1110 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1111 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1112 	}
1113 
1114 	rc = dccp_mib_init();
1115 	if (rc)
1116 		goto out_free_dccp_bhash;
1117 
1118 	rc = dccp_ackvec_init();
1119 	if (rc)
1120 		goto out_free_dccp_mib;
1121 
1122 	rc = dccp_sysctl_init();
1123 	if (rc)
1124 		goto out_ackvec_exit;
1125 
1126 	dccp_timestamping_init();
1127 out:
1128 	return rc;
1129 out_ackvec_exit:
1130 	dccp_ackvec_exit();
1131 out_free_dccp_mib:
1132 	dccp_mib_exit();
1133 out_free_dccp_bhash:
1134 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1135 	dccp_hashinfo.bhash = NULL;
1136 out_free_dccp_locks:
1137 	inet_ehash_locks_free(&dccp_hashinfo);
1138 out_free_dccp_ehash:
1139 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1140 	dccp_hashinfo.ehash = NULL;
1141 out_free_bind_bucket_cachep:
1142 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1143 	dccp_hashinfo.bind_bucket_cachep = NULL;
1144 	goto out;
1145 }
1146 
1147 static void __exit dccp_fini(void)
1148 {
1149 	dccp_mib_exit();
1150 	free_pages((unsigned long)dccp_hashinfo.bhash,
1151 		   get_order(dccp_hashinfo.bhash_size *
1152 			     sizeof(struct inet_bind_hashbucket)));
1153 	free_pages((unsigned long)dccp_hashinfo.ehash,
1154 		   get_order(dccp_hashinfo.ehash_size *
1155 			     sizeof(struct inet_ehash_bucket)));
1156 	inet_ehash_locks_free(&dccp_hashinfo);
1157 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1158 	dccp_ackvec_exit();
1159 	dccp_sysctl_exit();
1160 }
1161 
1162 module_init(dccp_init);
1163 module_exit(dccp_fini);
1164 
1165 MODULE_LICENSE("GPL");
1166 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1167 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1168