xref: /openbmc/linux/net/dccp/proto.c (revision 96de0e252cedffad61b3cb5e05662c591898e69a)
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or modify it
8  *	under the terms of the GNU General Public License version 2 as
9  *	published by the Free Software Foundation.
10  */
11 
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24 
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28 
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34 
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38 
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40 
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42 
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44 
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46 
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 	.lhash_lock	= RW_LOCK_UNLOCKED,
49 	.lhash_users	= ATOMIC_INIT(0),
50 	.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52 
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54 
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57 
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60 	const int oldstate = sk->sk_state;
61 
62 	dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
63 		      dccp_role(sk), sk,
64 		      dccp_state_name(oldstate), dccp_state_name(state));
65 	WARN_ON(state == oldstate);
66 
67 	switch (state) {
68 	case DCCP_OPEN:
69 		if (oldstate != DCCP_OPEN)
70 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71 		break;
72 
73 	case DCCP_CLOSED:
74 		if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
75 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76 
77 		sk->sk_prot->unhash(sk);
78 		if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80 			inet_put_port(&dccp_hashinfo, sk);
81 		/* fall through */
82 	default:
83 		if (oldstate == DCCP_OPEN)
84 			DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85 	}
86 
87 	/* Change state AFTER socket is unhashed to avoid closed
88 	 * socket sitting in hash tables.
89 	 */
90 	sk->sk_state = state;
91 }
92 
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94 
95 void dccp_done(struct sock *sk)
96 {
97 	dccp_set_state(sk, DCCP_CLOSED);
98 	dccp_clear_xmit_timers(sk);
99 
100 	sk->sk_shutdown = SHUTDOWN_MASK;
101 
102 	if (!sock_flag(sk, SOCK_DEAD))
103 		sk->sk_state_change(sk);
104 	else
105 		inet_csk_destroy_sock(sk);
106 }
107 
108 EXPORT_SYMBOL_GPL(dccp_done);
109 
110 const char *dccp_packet_name(const int type)
111 {
112 	static const char *dccp_packet_names[] = {
113 		[DCCP_PKT_REQUEST]  = "REQUEST",
114 		[DCCP_PKT_RESPONSE] = "RESPONSE",
115 		[DCCP_PKT_DATA]	    = "DATA",
116 		[DCCP_PKT_ACK]	    = "ACK",
117 		[DCCP_PKT_DATAACK]  = "DATAACK",
118 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
119 		[DCCP_PKT_CLOSE]    = "CLOSE",
120 		[DCCP_PKT_RESET]    = "RESET",
121 		[DCCP_PKT_SYNC]	    = "SYNC",
122 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
123 	};
124 
125 	if (type >= DCCP_NR_PKT_TYPES)
126 		return "INVALID";
127 	else
128 		return dccp_packet_names[type];
129 }
130 
131 EXPORT_SYMBOL_GPL(dccp_packet_name);
132 
133 const char *dccp_state_name(const int state)
134 {
135 	static char *dccp_state_names[] = {
136 	[DCCP_OPEN]	  = "OPEN",
137 	[DCCP_REQUESTING] = "REQUESTING",
138 	[DCCP_PARTOPEN]	  = "PARTOPEN",
139 	[DCCP_LISTEN]	  = "LISTEN",
140 	[DCCP_RESPOND]	  = "RESPOND",
141 	[DCCP_CLOSING]	  = "CLOSING",
142 	[DCCP_TIME_WAIT]  = "TIME_WAIT",
143 	[DCCP_CLOSED]	  = "CLOSED",
144 	};
145 
146 	if (state >= DCCP_MAX_STATES)
147 		return "INVALID STATE!";
148 	else
149 		return dccp_state_names[state];
150 }
151 
152 EXPORT_SYMBOL_GPL(dccp_state_name);
153 
154 void dccp_hash(struct sock *sk)
155 {
156 	inet_hash(&dccp_hashinfo, sk);
157 }
158 
159 EXPORT_SYMBOL_GPL(dccp_hash);
160 
161 void dccp_unhash(struct sock *sk)
162 {
163 	inet_unhash(&dccp_hashinfo, sk);
164 }
165 
166 EXPORT_SYMBOL_GPL(dccp_unhash);
167 
168 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
169 {
170 	struct dccp_sock *dp = dccp_sk(sk);
171 	struct dccp_minisock *dmsk = dccp_msk(sk);
172 	struct inet_connection_sock *icsk = inet_csk(sk);
173 
174 	dccp_minisock_init(&dp->dccps_minisock);
175 
176 	/*
177 	 * FIXME: We're hardcoding the CCID, and doing this at this point makes
178 	 * the listening (master) sock get CCID control blocks, which is not
179 	 * necessary, but for now, to not mess with the test userspace apps,
180 	 * lets leave it here, later the real solution is to do this in a
181 	 * setsockopt(CCIDs-I-want/accept). -acme
182 	 */
183 	if (likely(ctl_sock_initialized)) {
184 		int rc = dccp_feat_init(dmsk);
185 
186 		if (rc)
187 			return rc;
188 
189 		if (dmsk->dccpms_send_ack_vector) {
190 			dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
191 			if (dp->dccps_hc_rx_ackvec == NULL)
192 				return -ENOMEM;
193 		}
194 		dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
195 						      sk, GFP_KERNEL);
196 		dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
197 						      sk, GFP_KERNEL);
198 		if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
199 			     dp->dccps_hc_tx_ccid == NULL)) {
200 			ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
201 			ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
202 			if (dmsk->dccpms_send_ack_vector) {
203 				dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
204 				dp->dccps_hc_rx_ackvec = NULL;
205 			}
206 			dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
207 			return -ENOMEM;
208 		}
209 	} else {
210 		/* control socket doesn't need feat nego */
211 		INIT_LIST_HEAD(&dmsk->dccpms_pending);
212 		INIT_LIST_HEAD(&dmsk->dccpms_conf);
213 	}
214 
215 	dccp_init_xmit_timers(sk);
216 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
217 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
218 	sk->sk_state		= DCCP_CLOSED;
219 	sk->sk_write_space	= dccp_write_space;
220 	icsk->icsk_sync_mss	= dccp_sync_mss;
221 	dp->dccps_mss_cache	= 536;
222 	dp->dccps_rate_last	= jiffies;
223 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
224 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
225 	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
226 
227 	return 0;
228 }
229 
230 EXPORT_SYMBOL_GPL(dccp_init_sock);
231 
232 int dccp_destroy_sock(struct sock *sk)
233 {
234 	struct dccp_sock *dp = dccp_sk(sk);
235 	struct dccp_minisock *dmsk = dccp_msk(sk);
236 
237 	/*
238 	 * DCCP doesn't use sk_write_queue, just sk_send_head
239 	 * for retransmissions
240 	 */
241 	if (sk->sk_send_head != NULL) {
242 		kfree_skb(sk->sk_send_head);
243 		sk->sk_send_head = NULL;
244 	}
245 
246 	/* Clean up a referenced DCCP bind bucket. */
247 	if (inet_csk(sk)->icsk_bind_hash != NULL)
248 		inet_put_port(&dccp_hashinfo, sk);
249 
250 	kfree(dp->dccps_service_list);
251 	dp->dccps_service_list = NULL;
252 
253 	if (dmsk->dccpms_send_ack_vector) {
254 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
255 		dp->dccps_hc_rx_ackvec = NULL;
256 	}
257 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
258 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
259 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
260 
261 	/* clean up feature negotiation state */
262 	dccp_feat_clean(dmsk);
263 
264 	return 0;
265 }
266 
267 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
268 
269 static inline int dccp_listen_start(struct sock *sk, int backlog)
270 {
271 	struct dccp_sock *dp = dccp_sk(sk);
272 
273 	dp->dccps_role = DCCP_ROLE_LISTEN;
274 	return inet_csk_listen_start(sk, backlog);
275 }
276 
277 int dccp_disconnect(struct sock *sk, int flags)
278 {
279 	struct inet_connection_sock *icsk = inet_csk(sk);
280 	struct inet_sock *inet = inet_sk(sk);
281 	int err = 0;
282 	const int old_state = sk->sk_state;
283 
284 	if (old_state != DCCP_CLOSED)
285 		dccp_set_state(sk, DCCP_CLOSED);
286 
287 	/* ABORT function of RFC793 */
288 	if (old_state == DCCP_LISTEN) {
289 		inet_csk_listen_stop(sk);
290 	/* FIXME: do the active reset thing */
291 	} else if (old_state == DCCP_REQUESTING)
292 		sk->sk_err = ECONNRESET;
293 
294 	dccp_clear_xmit_timers(sk);
295 	__skb_queue_purge(&sk->sk_receive_queue);
296 	if (sk->sk_send_head != NULL) {
297 		__kfree_skb(sk->sk_send_head);
298 		sk->sk_send_head = NULL;
299 	}
300 
301 	inet->dport = 0;
302 
303 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
304 		inet_reset_saddr(sk);
305 
306 	sk->sk_shutdown = 0;
307 	sock_reset_flag(sk, SOCK_DONE);
308 
309 	icsk->icsk_backoff = 0;
310 	inet_csk_delack_init(sk);
311 	__sk_dst_reset(sk);
312 
313 	BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
314 
315 	sk->sk_error_report(sk);
316 	return err;
317 }
318 
319 EXPORT_SYMBOL_GPL(dccp_disconnect);
320 
321 /*
322  *	Wait for a DCCP event.
323  *
324  *	Note that we don't need to lock the socket, as the upper poll layers
325  *	take care of normal races (between the test and the event) and we don't
326  *	go look at any of the socket buffers directly.
327  */
328 unsigned int dccp_poll(struct file *file, struct socket *sock,
329 		       poll_table *wait)
330 {
331 	unsigned int mask;
332 	struct sock *sk = sock->sk;
333 
334 	poll_wait(file, sk->sk_sleep, wait);
335 	if (sk->sk_state == DCCP_LISTEN)
336 		return inet_csk_listen_poll(sk);
337 
338 	/* Socket is not locked. We are protected from async events
339 	   by poll logic and correct handling of state changes
340 	   made by another threads is impossible in any case.
341 	 */
342 
343 	mask = 0;
344 	if (sk->sk_err)
345 		mask = POLLERR;
346 
347 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
348 		mask |= POLLHUP;
349 	if (sk->sk_shutdown & RCV_SHUTDOWN)
350 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
351 
352 	/* Connected? */
353 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
354 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
355 			mask |= POLLIN | POLLRDNORM;
356 
357 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
358 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
359 				mask |= POLLOUT | POLLWRNORM;
360 			} else {  /* send SIGIO later */
361 				set_bit(SOCK_ASYNC_NOSPACE,
362 					&sk->sk_socket->flags);
363 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
364 
365 				/* Race breaker. If space is freed after
366 				 * wspace test but before the flags are set,
367 				 * IO signal will be lost.
368 				 */
369 				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
370 					mask |= POLLOUT | POLLWRNORM;
371 			}
372 		}
373 	}
374 	return mask;
375 }
376 
377 EXPORT_SYMBOL_GPL(dccp_poll);
378 
379 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
380 {
381 	dccp_pr_debug("entry\n");
382 	return -ENOIOCTLCMD;
383 }
384 
385 EXPORT_SYMBOL_GPL(dccp_ioctl);
386 
387 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
388 				   char __user *optval, int optlen)
389 {
390 	struct dccp_sock *dp = dccp_sk(sk);
391 	struct dccp_service_list *sl = NULL;
392 
393 	if (service == DCCP_SERVICE_INVALID_VALUE ||
394 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
395 		return -EINVAL;
396 
397 	if (optlen > sizeof(service)) {
398 		sl = kmalloc(optlen, GFP_KERNEL);
399 		if (sl == NULL)
400 			return -ENOMEM;
401 
402 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
403 		if (copy_from_user(sl->dccpsl_list,
404 				   optval + sizeof(service),
405 				   optlen - sizeof(service)) ||
406 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
407 			kfree(sl);
408 			return -EFAULT;
409 		}
410 	}
411 
412 	lock_sock(sk);
413 	dp->dccps_service = service;
414 
415 	kfree(dp->dccps_service_list);
416 
417 	dp->dccps_service_list = sl;
418 	release_sock(sk);
419 	return 0;
420 }
421 
422 /* byte 1 is feature.  the rest is the preference list */
423 static int dccp_setsockopt_change(struct sock *sk, int type,
424 				  struct dccp_so_feat __user *optval)
425 {
426 	struct dccp_so_feat opt;
427 	u8 *val;
428 	int rc;
429 
430 	if (copy_from_user(&opt, optval, sizeof(opt)))
431 		return -EFAULT;
432 
433 	val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
434 	if (!val)
435 		return -ENOMEM;
436 
437 	if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
438 		rc = -EFAULT;
439 		goto out_free_val;
440 	}
441 
442 	rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
443 			      val, opt.dccpsf_len, GFP_KERNEL);
444 	if (rc)
445 		goto out_free_val;
446 
447 out:
448 	return rc;
449 
450 out_free_val:
451 	kfree(val);
452 	goto out;
453 }
454 
455 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
456 		char __user *optval, int optlen)
457 {
458 	struct dccp_sock *dp = dccp_sk(sk);
459 	int val, err = 0;
460 
461 	if (optlen < sizeof(int))
462 		return -EINVAL;
463 
464 	if (get_user(val, (int __user *)optval))
465 		return -EFAULT;
466 
467 	if (optname == DCCP_SOCKOPT_SERVICE)
468 		return dccp_setsockopt_service(sk, val, optval, optlen);
469 
470 	lock_sock(sk);
471 	switch (optname) {
472 	case DCCP_SOCKOPT_PACKET_SIZE:
473 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
474 		err = 0;
475 		break;
476 	case DCCP_SOCKOPT_CHANGE_L:
477 		if (optlen != sizeof(struct dccp_so_feat))
478 			err = -EINVAL;
479 		else
480 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
481 						     (struct dccp_so_feat __user *)
482 						     optval);
483 		break;
484 	case DCCP_SOCKOPT_CHANGE_R:
485 		if (optlen != sizeof(struct dccp_so_feat))
486 			err = -EINVAL;
487 		else
488 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
489 						     (struct dccp_so_feat __user *)
490 						     optval);
491 		break;
492 	case DCCP_SOCKOPT_SEND_CSCOV:	/* sender side, RFC 4340, sec. 9.2 */
493 		if (val < 0 || val > 15)
494 			err = -EINVAL;
495 		else
496 			dp->dccps_pcslen = val;
497 		break;
498 	case DCCP_SOCKOPT_RECV_CSCOV:	/* receiver side, RFC 4340 sec. 9.2.1 */
499 		if (val < 0 || val > 15)
500 			err = -EINVAL;
501 		else {
502 			dp->dccps_pcrlen = val;
503 			/* FIXME: add feature negotiation,
504 			 * ChangeL(MinimumChecksumCoverage, val) */
505 		}
506 		break;
507 	default:
508 		err = -ENOPROTOOPT;
509 		break;
510 	}
511 
512 	release_sock(sk);
513 	return err;
514 }
515 
516 int dccp_setsockopt(struct sock *sk, int level, int optname,
517 		    char __user *optval, int optlen)
518 {
519 	if (level != SOL_DCCP)
520 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
521 							     optname, optval,
522 							     optlen);
523 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
524 }
525 
526 EXPORT_SYMBOL_GPL(dccp_setsockopt);
527 
528 #ifdef CONFIG_COMPAT
529 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
530 			   char __user *optval, int optlen)
531 {
532 	if (level != SOL_DCCP)
533 		return inet_csk_compat_setsockopt(sk, level, optname,
534 						  optval, optlen);
535 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
536 }
537 
538 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
539 #endif
540 
541 static int dccp_getsockopt_service(struct sock *sk, int len,
542 				   __be32 __user *optval,
543 				   int __user *optlen)
544 {
545 	const struct dccp_sock *dp = dccp_sk(sk);
546 	const struct dccp_service_list *sl;
547 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
548 
549 	lock_sock(sk);
550 	if ((sl = dp->dccps_service_list) != NULL) {
551 		slen = sl->dccpsl_nr * sizeof(u32);
552 		total_len += slen;
553 	}
554 
555 	err = -EINVAL;
556 	if (total_len > len)
557 		goto out;
558 
559 	err = 0;
560 	if (put_user(total_len, optlen) ||
561 	    put_user(dp->dccps_service, optval) ||
562 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
563 		err = -EFAULT;
564 out:
565 	release_sock(sk);
566 	return err;
567 }
568 
569 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
570 		    char __user *optval, int __user *optlen)
571 {
572 	struct dccp_sock *dp;
573 	int val, len;
574 
575 	if (get_user(len, optlen))
576 		return -EFAULT;
577 
578 	if (len < (int)sizeof(int))
579 		return -EINVAL;
580 
581 	dp = dccp_sk(sk);
582 
583 	switch (optname) {
584 	case DCCP_SOCKOPT_PACKET_SIZE:
585 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
586 		return 0;
587 	case DCCP_SOCKOPT_SERVICE:
588 		return dccp_getsockopt_service(sk, len,
589 					       (__be32 __user *)optval, optlen);
590 	case DCCP_SOCKOPT_GET_CUR_MPS:
591 		val = dp->dccps_mss_cache;
592 		len = sizeof(val);
593 		break;
594 	case DCCP_SOCKOPT_SEND_CSCOV:
595 		val = dp->dccps_pcslen;
596 		len = sizeof(val);
597 		break;
598 	case DCCP_SOCKOPT_RECV_CSCOV:
599 		val = dp->dccps_pcrlen;
600 		len = sizeof(val);
601 		break;
602 	case 128 ... 191:
603 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
604 					     len, (u32 __user *)optval, optlen);
605 	case 192 ... 255:
606 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
607 					     len, (u32 __user *)optval, optlen);
608 	default:
609 		return -ENOPROTOOPT;
610 	}
611 
612 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
613 		return -EFAULT;
614 
615 	return 0;
616 }
617 
618 int dccp_getsockopt(struct sock *sk, int level, int optname,
619 		    char __user *optval, int __user *optlen)
620 {
621 	if (level != SOL_DCCP)
622 		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
623 							     optname, optval,
624 							     optlen);
625 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
626 }
627 
628 EXPORT_SYMBOL_GPL(dccp_getsockopt);
629 
630 #ifdef CONFIG_COMPAT
631 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
632 			   char __user *optval, int __user *optlen)
633 {
634 	if (level != SOL_DCCP)
635 		return inet_csk_compat_getsockopt(sk, level, optname,
636 						  optval, optlen);
637 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
638 }
639 
640 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
641 #endif
642 
643 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
644 		 size_t len)
645 {
646 	const struct dccp_sock *dp = dccp_sk(sk);
647 	const int flags = msg->msg_flags;
648 	const int noblock = flags & MSG_DONTWAIT;
649 	struct sk_buff *skb;
650 	int rc, size;
651 	long timeo;
652 
653 	if (len > dp->dccps_mss_cache)
654 		return -EMSGSIZE;
655 
656 	lock_sock(sk);
657 
658 	if (sysctl_dccp_tx_qlen &&
659 	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
660 		rc = -EAGAIN;
661 		goto out_release;
662 	}
663 
664 	timeo = sock_sndtimeo(sk, noblock);
665 
666 	/*
667 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
668 	 * so that the trick in dccp_rcv_request_sent_state_process.
669 	 */
670 	/* Wait for a connection to finish. */
671 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
672 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
673 			goto out_release;
674 
675 	size = sk->sk_prot->max_header + len;
676 	release_sock(sk);
677 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
678 	lock_sock(sk);
679 	if (skb == NULL)
680 		goto out_release;
681 
682 	skb_reserve(skb, sk->sk_prot->max_header);
683 	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
684 	if (rc != 0)
685 		goto out_discard;
686 
687 	skb_queue_tail(&sk->sk_write_queue, skb);
688 	dccp_write_xmit(sk,0);
689 out_release:
690 	release_sock(sk);
691 	return rc ? : len;
692 out_discard:
693 	kfree_skb(skb);
694 	goto out_release;
695 }
696 
697 EXPORT_SYMBOL_GPL(dccp_sendmsg);
698 
699 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
700 		 size_t len, int nonblock, int flags, int *addr_len)
701 {
702 	const struct dccp_hdr *dh;
703 	long timeo;
704 
705 	lock_sock(sk);
706 
707 	if (sk->sk_state == DCCP_LISTEN) {
708 		len = -ENOTCONN;
709 		goto out;
710 	}
711 
712 	timeo = sock_rcvtimeo(sk, nonblock);
713 
714 	do {
715 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
716 
717 		if (skb == NULL)
718 			goto verify_sock_status;
719 
720 		dh = dccp_hdr(skb);
721 
722 		if (dh->dccph_type == DCCP_PKT_DATA ||
723 		    dh->dccph_type == DCCP_PKT_DATAACK)
724 			goto found_ok_skb;
725 
726 		if (dh->dccph_type == DCCP_PKT_RESET ||
727 		    dh->dccph_type == DCCP_PKT_CLOSE) {
728 			dccp_pr_debug("found fin ok!\n");
729 			len = 0;
730 			goto found_fin_ok;
731 		}
732 		dccp_pr_debug("packet_type=%s\n",
733 			      dccp_packet_name(dh->dccph_type));
734 		sk_eat_skb(sk, skb, 0);
735 verify_sock_status:
736 		if (sock_flag(sk, SOCK_DONE)) {
737 			len = 0;
738 			break;
739 		}
740 
741 		if (sk->sk_err) {
742 			len = sock_error(sk);
743 			break;
744 		}
745 
746 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
747 			len = 0;
748 			break;
749 		}
750 
751 		if (sk->sk_state == DCCP_CLOSED) {
752 			if (!sock_flag(sk, SOCK_DONE)) {
753 				/* This occurs when user tries to read
754 				 * from never connected socket.
755 				 */
756 				len = -ENOTCONN;
757 				break;
758 			}
759 			len = 0;
760 			break;
761 		}
762 
763 		if (!timeo) {
764 			len = -EAGAIN;
765 			break;
766 		}
767 
768 		if (signal_pending(current)) {
769 			len = sock_intr_errno(timeo);
770 			break;
771 		}
772 
773 		sk_wait_data(sk, &timeo);
774 		continue;
775 	found_ok_skb:
776 		if (len > skb->len)
777 			len = skb->len;
778 		else if (len < skb->len)
779 			msg->msg_flags |= MSG_TRUNC;
780 
781 		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
782 			/* Exception. Bailout! */
783 			len = -EFAULT;
784 			break;
785 		}
786 	found_fin_ok:
787 		if (!(flags & MSG_PEEK))
788 			sk_eat_skb(sk, skb, 0);
789 		break;
790 	} while (1);
791 out:
792 	release_sock(sk);
793 	return len;
794 }
795 
796 EXPORT_SYMBOL_GPL(dccp_recvmsg);
797 
798 int inet_dccp_listen(struct socket *sock, int backlog)
799 {
800 	struct sock *sk = sock->sk;
801 	unsigned char old_state;
802 	int err;
803 
804 	lock_sock(sk);
805 
806 	err = -EINVAL;
807 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
808 		goto out;
809 
810 	old_state = sk->sk_state;
811 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
812 		goto out;
813 
814 	/* Really, if the socket is already in listen state
815 	 * we can only allow the backlog to be adjusted.
816 	 */
817 	if (old_state != DCCP_LISTEN) {
818 		/*
819 		 * FIXME: here it probably should be sk->sk_prot->listen_start
820 		 * see tcp_listen_start
821 		 */
822 		err = dccp_listen_start(sk, backlog);
823 		if (err)
824 			goto out;
825 	}
826 	sk->sk_max_ack_backlog = backlog;
827 	err = 0;
828 
829 out:
830 	release_sock(sk);
831 	return err;
832 }
833 
834 EXPORT_SYMBOL_GPL(inet_dccp_listen);
835 
836 static const unsigned char dccp_new_state[] = {
837 	/* current state:   new state:      action:	*/
838 	[0]		  = DCCP_CLOSED,
839 	[DCCP_OPEN]	  = DCCP_CLOSING | DCCP_ACTION_FIN,
840 	[DCCP_REQUESTING] = DCCP_CLOSED,
841 	[DCCP_PARTOPEN]	  = DCCP_CLOSING | DCCP_ACTION_FIN,
842 	[DCCP_LISTEN]	  = DCCP_CLOSED,
843 	[DCCP_RESPOND]	  = DCCP_CLOSED,
844 	[DCCP_CLOSING]	  = DCCP_CLOSED,
845 	[DCCP_TIME_WAIT]  = DCCP_CLOSED,
846 	[DCCP_CLOSED]	  = DCCP_CLOSED,
847 };
848 
849 static int dccp_close_state(struct sock *sk)
850 {
851 	const int next = dccp_new_state[sk->sk_state];
852 	const int ns = next & DCCP_STATE_MASK;
853 
854 	if (ns != sk->sk_state)
855 		dccp_set_state(sk, ns);
856 
857 	return next & DCCP_ACTION_FIN;
858 }
859 
860 void dccp_close(struct sock *sk, long timeout)
861 {
862 	struct dccp_sock *dp = dccp_sk(sk);
863 	struct sk_buff *skb;
864 	int state;
865 
866 	lock_sock(sk);
867 
868 	sk->sk_shutdown = SHUTDOWN_MASK;
869 
870 	if (sk->sk_state == DCCP_LISTEN) {
871 		dccp_set_state(sk, DCCP_CLOSED);
872 
873 		/* Special case. */
874 		inet_csk_listen_stop(sk);
875 
876 		goto adjudge_to_death;
877 	}
878 
879 	sk_stop_timer(sk, &dp->dccps_xmit_timer);
880 
881 	/*
882 	 * We need to flush the recv. buffs.  We do this only on the
883 	 * descriptor close, not protocol-sourced closes, because the
884 	  *reader process may not have drained the data yet!
885 	 */
886 	/* FIXME: check for unread data */
887 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
888 		__kfree_skb(skb);
889 	}
890 
891 	if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
892 		/* Check zero linger _after_ checking for unread data. */
893 		sk->sk_prot->disconnect(sk, 0);
894 	} else if (dccp_close_state(sk)) {
895 		dccp_send_close(sk, 1);
896 	}
897 
898 	sk_stream_wait_close(sk, timeout);
899 
900 adjudge_to_death:
901 	state = sk->sk_state;
902 	sock_hold(sk);
903 	sock_orphan(sk);
904 	atomic_inc(sk->sk_prot->orphan_count);
905 
906 	/*
907 	 * It is the last release_sock in its life. It will remove backlog.
908 	 */
909 	release_sock(sk);
910 	/*
911 	 * Now socket is owned by kernel and we acquire BH lock
912 	 * to finish close. No need to check for user refs.
913 	 */
914 	local_bh_disable();
915 	bh_lock_sock(sk);
916 	BUG_TRAP(!sock_owned_by_user(sk));
917 
918 	/* Have we already been destroyed by a softirq or backlog? */
919 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
920 		goto out;
921 
922 	/*
923 	 * The last release_sock may have processed the CLOSE or RESET
924 	 * packet moving sock to CLOSED state, if not we have to fire
925 	 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
926 	 * in draft-ietf-dccp-spec-11. -acme
927 	 */
928 	if (sk->sk_state == DCCP_CLOSING) {
929 		/* FIXME: should start at 2 * RTT */
930 		/* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
931 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
932 					  inet_csk(sk)->icsk_rto,
933 					  DCCP_RTO_MAX);
934 #if 0
935 		/* Yeah, we should use sk->sk_prot->orphan_count, etc */
936 		dccp_set_state(sk, DCCP_CLOSED);
937 #endif
938 	}
939 
940 	if (sk->sk_state == DCCP_CLOSED)
941 		inet_csk_destroy_sock(sk);
942 
943 	/* Otherwise, socket is reprieved until protocol close. */
944 
945 out:
946 	bh_unlock_sock(sk);
947 	local_bh_enable();
948 	sock_put(sk);
949 }
950 
951 EXPORT_SYMBOL_GPL(dccp_close);
952 
953 void dccp_shutdown(struct sock *sk, int how)
954 {
955 	dccp_pr_debug("entry\n");
956 }
957 
958 EXPORT_SYMBOL_GPL(dccp_shutdown);
959 
960 static int __init dccp_mib_init(void)
961 {
962 	int rc = -ENOMEM;
963 
964 	dccp_statistics[0] = alloc_percpu(struct dccp_mib);
965 	if (dccp_statistics[0] == NULL)
966 		goto out;
967 
968 	dccp_statistics[1] = alloc_percpu(struct dccp_mib);
969 	if (dccp_statistics[1] == NULL)
970 		goto out_free_one;
971 
972 	rc = 0;
973 out:
974 	return rc;
975 out_free_one:
976 	free_percpu(dccp_statistics[0]);
977 	dccp_statistics[0] = NULL;
978 	goto out;
979 
980 }
981 
982 static void dccp_mib_exit(void)
983 {
984 	free_percpu(dccp_statistics[0]);
985 	free_percpu(dccp_statistics[1]);
986 	dccp_statistics[0] = dccp_statistics[1] = NULL;
987 }
988 
989 static int thash_entries;
990 module_param(thash_entries, int, 0444);
991 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
992 
993 #ifdef CONFIG_IP_DCCP_DEBUG
994 int dccp_debug;
995 module_param(dccp_debug, bool, 0444);
996 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
997 
998 EXPORT_SYMBOL_GPL(dccp_debug);
999 #endif
1000 
1001 static int __init dccp_init(void)
1002 {
1003 	unsigned long goal;
1004 	int ehash_order, bhash_order, i;
1005 	int rc = -ENOBUFS;
1006 
1007 	dccp_hashinfo.bind_bucket_cachep =
1008 		kmem_cache_create("dccp_bind_bucket",
1009 				  sizeof(struct inet_bind_bucket), 0,
1010 				  SLAB_HWCACHE_ALIGN, NULL);
1011 	if (!dccp_hashinfo.bind_bucket_cachep)
1012 		goto out;
1013 
1014 	/*
1015 	 * Size and allocate the main established and bind bucket
1016 	 * hash tables.
1017 	 *
1018 	 * The methodology is similar to that of the buffer cache.
1019 	 */
1020 	if (num_physpages >= (128 * 1024))
1021 		goal = num_physpages >> (21 - PAGE_SHIFT);
1022 	else
1023 		goal = num_physpages >> (23 - PAGE_SHIFT);
1024 
1025 	if (thash_entries)
1026 		goal = (thash_entries *
1027 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1028 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1029 		;
1030 	do {
1031 		dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1032 					sizeof(struct inet_ehash_bucket);
1033 		while (dccp_hashinfo.ehash_size &
1034 		       (dccp_hashinfo.ehash_size - 1))
1035 			dccp_hashinfo.ehash_size--;
1036 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1037 			__get_free_pages(GFP_ATOMIC, ehash_order);
1038 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1039 
1040 	if (!dccp_hashinfo.ehash) {
1041 		DCCP_CRIT("Failed to allocate DCCP established hash table");
1042 		goto out_free_bind_bucket_cachep;
1043 	}
1044 
1045 	for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1046 		rwlock_init(&dccp_hashinfo.ehash[i].lock);
1047 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1048 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1049 	}
1050 
1051 	bhash_order = ehash_order;
1052 
1053 	do {
1054 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1055 					sizeof(struct inet_bind_hashbucket);
1056 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1057 		    bhash_order > 0)
1058 			continue;
1059 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1060 			__get_free_pages(GFP_ATOMIC, bhash_order);
1061 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1062 
1063 	if (!dccp_hashinfo.bhash) {
1064 		DCCP_CRIT("Failed to allocate DCCP bind hash table");
1065 		goto out_free_dccp_ehash;
1066 	}
1067 
1068 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1069 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1070 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1071 	}
1072 
1073 	rc = dccp_mib_init();
1074 	if (rc)
1075 		goto out_free_dccp_bhash;
1076 
1077 	rc = dccp_ackvec_init();
1078 	if (rc)
1079 		goto out_free_dccp_mib;
1080 
1081 	rc = dccp_sysctl_init();
1082 	if (rc)
1083 		goto out_ackvec_exit;
1084 
1085 	dccp_timestamping_init();
1086 out:
1087 	return rc;
1088 out_ackvec_exit:
1089 	dccp_ackvec_exit();
1090 out_free_dccp_mib:
1091 	dccp_mib_exit();
1092 out_free_dccp_bhash:
1093 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1094 	dccp_hashinfo.bhash = NULL;
1095 out_free_dccp_ehash:
1096 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1097 	dccp_hashinfo.ehash = NULL;
1098 out_free_bind_bucket_cachep:
1099 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1100 	dccp_hashinfo.bind_bucket_cachep = NULL;
1101 	goto out;
1102 }
1103 
1104 static void __exit dccp_fini(void)
1105 {
1106 	dccp_mib_exit();
1107 	free_pages((unsigned long)dccp_hashinfo.bhash,
1108 		   get_order(dccp_hashinfo.bhash_size *
1109 			     sizeof(struct inet_bind_hashbucket)));
1110 	free_pages((unsigned long)dccp_hashinfo.ehash,
1111 		   get_order(dccp_hashinfo.ehash_size *
1112 			     sizeof(struct inet_ehash_bucket)));
1113 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1114 	dccp_ackvec_exit();
1115 	dccp_sysctl_exit();
1116 }
1117 
1118 module_init(dccp_init);
1119 module_exit(dccp_fini);
1120 
1121 MODULE_LICENSE("GPL");
1122 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1123 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1124