xref: /openbmc/linux/net/dccp/proto.c (revision 9ac8d3fb)
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or modify it
8  *	under the terms of the GNU General Public License version 2 as
9  *	published by the Free Software Foundation.
10  */
11 
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24 
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28 
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34 
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38 
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40 
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42 
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44 
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46 
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 	.lhash_lock	= RW_LOCK_UNLOCKED,
49 	.lhash_users	= ATOMIC_INIT(0),
50 	.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52 
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54 
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57 
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60 	const int oldstate = sk->sk_state;
61 
62 	dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63 		      dccp_state_name(oldstate), dccp_state_name(state));
64 	WARN_ON(state == oldstate);
65 
66 	switch (state) {
67 	case DCCP_OPEN:
68 		if (oldstate != DCCP_OPEN)
69 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70 		break;
71 
72 	case DCCP_CLOSED:
73 		if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74 		    oldstate == DCCP_CLOSING)
75 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76 
77 		sk->sk_prot->unhash(sk);
78 		if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80 			inet_put_port(sk);
81 		/* fall through */
82 	default:
83 		if (oldstate == DCCP_OPEN)
84 			DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85 	}
86 
87 	/* Change state AFTER socket is unhashed to avoid closed
88 	 * socket sitting in hash tables.
89 	 */
90 	sk->sk_state = state;
91 }
92 
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94 
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97 	switch (sk->sk_state) {
98 	case DCCP_PASSIVE_CLOSE:
99 		/* Node (client or server) has received Close packet. */
100 		dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101 		dccp_set_state(sk, DCCP_CLOSED);
102 		break;
103 	case DCCP_PASSIVE_CLOSEREQ:
104 		/*
105 		 * Client received CloseReq. We set the `active' flag so that
106 		 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107 		 */
108 		dccp_send_close(sk, 1);
109 		dccp_set_state(sk, DCCP_CLOSING);
110 	}
111 }
112 
113 void dccp_done(struct sock *sk)
114 {
115 	dccp_set_state(sk, DCCP_CLOSED);
116 	dccp_clear_xmit_timers(sk);
117 
118 	sk->sk_shutdown = SHUTDOWN_MASK;
119 
120 	if (!sock_flag(sk, SOCK_DEAD))
121 		sk->sk_state_change(sk);
122 	else
123 		inet_csk_destroy_sock(sk);
124 }
125 
126 EXPORT_SYMBOL_GPL(dccp_done);
127 
128 const char *dccp_packet_name(const int type)
129 {
130 	static const char *dccp_packet_names[] = {
131 		[DCCP_PKT_REQUEST]  = "REQUEST",
132 		[DCCP_PKT_RESPONSE] = "RESPONSE",
133 		[DCCP_PKT_DATA]	    = "DATA",
134 		[DCCP_PKT_ACK]	    = "ACK",
135 		[DCCP_PKT_DATAACK]  = "DATAACK",
136 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137 		[DCCP_PKT_CLOSE]    = "CLOSE",
138 		[DCCP_PKT_RESET]    = "RESET",
139 		[DCCP_PKT_SYNC]	    = "SYNC",
140 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
141 	};
142 
143 	if (type >= DCCP_NR_PKT_TYPES)
144 		return "INVALID";
145 	else
146 		return dccp_packet_names[type];
147 }
148 
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150 
151 const char *dccp_state_name(const int state)
152 {
153 	static char *dccp_state_names[] = {
154 	[DCCP_OPEN]		= "OPEN",
155 	[DCCP_REQUESTING]	= "REQUESTING",
156 	[DCCP_PARTOPEN]		= "PARTOPEN",
157 	[DCCP_LISTEN]		= "LISTEN",
158 	[DCCP_RESPOND]		= "RESPOND",
159 	[DCCP_CLOSING]		= "CLOSING",
160 	[DCCP_ACTIVE_CLOSEREQ]	= "CLOSEREQ",
161 	[DCCP_PASSIVE_CLOSE]	= "PASSIVE_CLOSE",
162 	[DCCP_PASSIVE_CLOSEREQ]	= "PASSIVE_CLOSEREQ",
163 	[DCCP_TIME_WAIT]	= "TIME_WAIT",
164 	[DCCP_CLOSED]		= "CLOSED",
165 	};
166 
167 	if (state >= DCCP_MAX_STATES)
168 		return "INVALID STATE!";
169 	else
170 		return dccp_state_names[state];
171 }
172 
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174 
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177 	struct dccp_sock *dp = dccp_sk(sk);
178 	struct dccp_minisock *dmsk = dccp_msk(sk);
179 	struct inet_connection_sock *icsk = inet_csk(sk);
180 
181 	dccp_minisock_init(&dp->dccps_minisock);
182 
183 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
184 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
185 	sk->sk_state		= DCCP_CLOSED;
186 	sk->sk_write_space	= dccp_write_space;
187 	icsk->icsk_sync_mss	= dccp_sync_mss;
188 	dp->dccps_mss_cache	= 536;
189 	dp->dccps_rate_last	= jiffies;
190 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
191 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
192 	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
193 
194 	dccp_init_xmit_timers(sk);
195 
196 	/*
197 	 * FIXME: We're hardcoding the CCID, and doing this at this point makes
198 	 * the listening (master) sock get CCID control blocks, which is not
199 	 * necessary, but for now, to not mess with the test userspace apps,
200 	 * lets leave it here, later the real solution is to do this in a
201 	 * setsockopt(CCIDs-I-want/accept). -acme
202 	 */
203 	if (likely(ctl_sock_initialized)) {
204 		int rc = dccp_feat_init(dmsk);
205 
206 		if (rc)
207 			return rc;
208 
209 		if (dmsk->dccpms_send_ack_vector) {
210 			dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
211 			if (dp->dccps_hc_rx_ackvec == NULL)
212 				return -ENOMEM;
213 		}
214 		dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
215 						      sk, GFP_KERNEL);
216 		dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
217 						      sk, GFP_KERNEL);
218 		if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
219 			     dp->dccps_hc_tx_ccid == NULL)) {
220 			ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
221 			ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
222 			if (dmsk->dccpms_send_ack_vector) {
223 				dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224 				dp->dccps_hc_rx_ackvec = NULL;
225 			}
226 			dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227 			return -ENOMEM;
228 		}
229 	} else {
230 		/* control socket doesn't need feat nego */
231 		INIT_LIST_HEAD(&dmsk->dccpms_pending);
232 		INIT_LIST_HEAD(&dmsk->dccpms_conf);
233 	}
234 
235 	return 0;
236 }
237 
238 EXPORT_SYMBOL_GPL(dccp_init_sock);
239 
240 void dccp_destroy_sock(struct sock *sk)
241 {
242 	struct dccp_sock *dp = dccp_sk(sk);
243 	struct dccp_minisock *dmsk = dccp_msk(sk);
244 
245 	/*
246 	 * DCCP doesn't use sk_write_queue, just sk_send_head
247 	 * for retransmissions
248 	 */
249 	if (sk->sk_send_head != NULL) {
250 		kfree_skb(sk->sk_send_head);
251 		sk->sk_send_head = NULL;
252 	}
253 
254 	/* Clean up a referenced DCCP bind bucket. */
255 	if (inet_csk(sk)->icsk_bind_hash != NULL)
256 		inet_put_port(sk);
257 
258 	kfree(dp->dccps_service_list);
259 	dp->dccps_service_list = NULL;
260 
261 	if (dmsk->dccpms_send_ack_vector) {
262 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
263 		dp->dccps_hc_rx_ackvec = NULL;
264 	}
265 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
266 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
267 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
268 
269 	/* clean up feature negotiation state */
270 	dccp_feat_clean(dmsk);
271 }
272 
273 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
274 
275 static inline int dccp_listen_start(struct sock *sk, int backlog)
276 {
277 	struct dccp_sock *dp = dccp_sk(sk);
278 
279 	dp->dccps_role = DCCP_ROLE_LISTEN;
280 	return inet_csk_listen_start(sk, backlog);
281 }
282 
283 static inline int dccp_need_reset(int state)
284 {
285 	return state != DCCP_CLOSED && state != DCCP_LISTEN &&
286 	       state != DCCP_REQUESTING;
287 }
288 
289 int dccp_disconnect(struct sock *sk, int flags)
290 {
291 	struct inet_connection_sock *icsk = inet_csk(sk);
292 	struct inet_sock *inet = inet_sk(sk);
293 	int err = 0;
294 	const int old_state = sk->sk_state;
295 
296 	if (old_state != DCCP_CLOSED)
297 		dccp_set_state(sk, DCCP_CLOSED);
298 
299 	/*
300 	 * This corresponds to the ABORT function of RFC793, sec. 3.8
301 	 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
302 	 */
303 	if (old_state == DCCP_LISTEN) {
304 		inet_csk_listen_stop(sk);
305 	} else if (dccp_need_reset(old_state)) {
306 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
307 		sk->sk_err = ECONNRESET;
308 	} else if (old_state == DCCP_REQUESTING)
309 		sk->sk_err = ECONNRESET;
310 
311 	dccp_clear_xmit_timers(sk);
312 
313 	__skb_queue_purge(&sk->sk_receive_queue);
314 	__skb_queue_purge(&sk->sk_write_queue);
315 	if (sk->sk_send_head != NULL) {
316 		__kfree_skb(sk->sk_send_head);
317 		sk->sk_send_head = NULL;
318 	}
319 
320 	inet->dport = 0;
321 
322 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
323 		inet_reset_saddr(sk);
324 
325 	sk->sk_shutdown = 0;
326 	sock_reset_flag(sk, SOCK_DONE);
327 
328 	icsk->icsk_backoff = 0;
329 	inet_csk_delack_init(sk);
330 	__sk_dst_reset(sk);
331 
332 	WARN_ON(inet->num && !icsk->icsk_bind_hash);
333 
334 	sk->sk_error_report(sk);
335 	return err;
336 }
337 
338 EXPORT_SYMBOL_GPL(dccp_disconnect);
339 
340 /*
341  *	Wait for a DCCP event.
342  *
343  *	Note that we don't need to lock the socket, as the upper poll layers
344  *	take care of normal races (between the test and the event) and we don't
345  *	go look at any of the socket buffers directly.
346  */
347 unsigned int dccp_poll(struct file *file, struct socket *sock,
348 		       poll_table *wait)
349 {
350 	unsigned int mask;
351 	struct sock *sk = sock->sk;
352 
353 	poll_wait(file, sk->sk_sleep, wait);
354 	if (sk->sk_state == DCCP_LISTEN)
355 		return inet_csk_listen_poll(sk);
356 
357 	/* Socket is not locked. We are protected from async events
358 	   by poll logic and correct handling of state changes
359 	   made by another threads is impossible in any case.
360 	 */
361 
362 	mask = 0;
363 	if (sk->sk_err)
364 		mask = POLLERR;
365 
366 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
367 		mask |= POLLHUP;
368 	if (sk->sk_shutdown & RCV_SHUTDOWN)
369 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
370 
371 	/* Connected? */
372 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
373 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
374 			mask |= POLLIN | POLLRDNORM;
375 
376 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
377 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
378 				mask |= POLLOUT | POLLWRNORM;
379 			} else {  /* send SIGIO later */
380 				set_bit(SOCK_ASYNC_NOSPACE,
381 					&sk->sk_socket->flags);
382 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
383 
384 				/* Race breaker. If space is freed after
385 				 * wspace test but before the flags are set,
386 				 * IO signal will be lost.
387 				 */
388 				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
389 					mask |= POLLOUT | POLLWRNORM;
390 			}
391 		}
392 	}
393 	return mask;
394 }
395 
396 EXPORT_SYMBOL_GPL(dccp_poll);
397 
398 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
399 {
400 	int rc = -ENOTCONN;
401 
402 	lock_sock(sk);
403 
404 	if (sk->sk_state == DCCP_LISTEN)
405 		goto out;
406 
407 	switch (cmd) {
408 	case SIOCINQ: {
409 		struct sk_buff *skb;
410 		unsigned long amount = 0;
411 
412 		skb = skb_peek(&sk->sk_receive_queue);
413 		if (skb != NULL) {
414 			/*
415 			 * We will only return the amount of this packet since
416 			 * that is all that will be read.
417 			 */
418 			amount = skb->len;
419 		}
420 		rc = put_user(amount, (int __user *)arg);
421 	}
422 		break;
423 	default:
424 		rc = -ENOIOCTLCMD;
425 		break;
426 	}
427 out:
428 	release_sock(sk);
429 	return rc;
430 }
431 
432 EXPORT_SYMBOL_GPL(dccp_ioctl);
433 
434 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
435 				   char __user *optval, int optlen)
436 {
437 	struct dccp_sock *dp = dccp_sk(sk);
438 	struct dccp_service_list *sl = NULL;
439 
440 	if (service == DCCP_SERVICE_INVALID_VALUE ||
441 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
442 		return -EINVAL;
443 
444 	if (optlen > sizeof(service)) {
445 		sl = kmalloc(optlen, GFP_KERNEL);
446 		if (sl == NULL)
447 			return -ENOMEM;
448 
449 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
450 		if (copy_from_user(sl->dccpsl_list,
451 				   optval + sizeof(service),
452 				   optlen - sizeof(service)) ||
453 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
454 			kfree(sl);
455 			return -EFAULT;
456 		}
457 	}
458 
459 	lock_sock(sk);
460 	dp->dccps_service = service;
461 
462 	kfree(dp->dccps_service_list);
463 
464 	dp->dccps_service_list = sl;
465 	release_sock(sk);
466 	return 0;
467 }
468 
469 /* byte 1 is feature.  the rest is the preference list */
470 static int dccp_setsockopt_change(struct sock *sk, int type,
471 				  struct dccp_so_feat __user *optval)
472 {
473 	struct dccp_so_feat opt;
474 	u8 *val;
475 	int rc;
476 
477 	if (copy_from_user(&opt, optval, sizeof(opt)))
478 		return -EFAULT;
479 	/*
480 	 * rfc4340: 6.1. Change Options
481 	 */
482 	if (opt.dccpsf_len < 1)
483 		return -EINVAL;
484 
485 	val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
486 	if (!val)
487 		return -ENOMEM;
488 
489 	if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
490 		rc = -EFAULT;
491 		goto out_free_val;
492 	}
493 
494 	rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
495 			      val, opt.dccpsf_len, GFP_KERNEL);
496 	if (rc)
497 		goto out_free_val;
498 
499 out:
500 	return rc;
501 
502 out_free_val:
503 	kfree(val);
504 	goto out;
505 }
506 
507 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
508 		char __user *optval, int optlen)
509 {
510 	struct dccp_sock *dp = dccp_sk(sk);
511 	int val, err = 0;
512 
513 	if (optlen < sizeof(int))
514 		return -EINVAL;
515 
516 	if (get_user(val, (int __user *)optval))
517 		return -EFAULT;
518 
519 	if (optname == DCCP_SOCKOPT_SERVICE)
520 		return dccp_setsockopt_service(sk, val, optval, optlen);
521 
522 	lock_sock(sk);
523 	switch (optname) {
524 	case DCCP_SOCKOPT_PACKET_SIZE:
525 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
526 		err = 0;
527 		break;
528 	case DCCP_SOCKOPT_CHANGE_L:
529 		if (optlen != sizeof(struct dccp_so_feat))
530 			err = -EINVAL;
531 		else
532 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
533 						     (struct dccp_so_feat __user *)
534 						     optval);
535 		break;
536 	case DCCP_SOCKOPT_CHANGE_R:
537 		if (optlen != sizeof(struct dccp_so_feat))
538 			err = -EINVAL;
539 		else
540 			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
541 						     (struct dccp_so_feat __user *)
542 						     optval);
543 		break;
544 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
545 		if (dp->dccps_role != DCCP_ROLE_SERVER)
546 			err = -EOPNOTSUPP;
547 		else
548 			dp->dccps_server_timewait = (val != 0);
549 		break;
550 	case DCCP_SOCKOPT_SEND_CSCOV:	/* sender side, RFC 4340, sec. 9.2 */
551 		if (val < 0 || val > 15)
552 			err = -EINVAL;
553 		else
554 			dp->dccps_pcslen = val;
555 		break;
556 	case DCCP_SOCKOPT_RECV_CSCOV:	/* receiver side, RFC 4340 sec. 9.2.1 */
557 		if (val < 0 || val > 15)
558 			err = -EINVAL;
559 		else {
560 			dp->dccps_pcrlen = val;
561 			/* FIXME: add feature negotiation,
562 			 * ChangeL(MinimumChecksumCoverage, val) */
563 		}
564 		break;
565 	default:
566 		err = -ENOPROTOOPT;
567 		break;
568 	}
569 
570 	release_sock(sk);
571 	return err;
572 }
573 
574 int dccp_setsockopt(struct sock *sk, int level, int optname,
575 		    char __user *optval, int optlen)
576 {
577 	if (level != SOL_DCCP)
578 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
579 							     optname, optval,
580 							     optlen);
581 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
582 }
583 
584 EXPORT_SYMBOL_GPL(dccp_setsockopt);
585 
586 #ifdef CONFIG_COMPAT
587 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
588 			   char __user *optval, int optlen)
589 {
590 	if (level != SOL_DCCP)
591 		return inet_csk_compat_setsockopt(sk, level, optname,
592 						  optval, optlen);
593 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
594 }
595 
596 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
597 #endif
598 
599 static int dccp_getsockopt_service(struct sock *sk, int len,
600 				   __be32 __user *optval,
601 				   int __user *optlen)
602 {
603 	const struct dccp_sock *dp = dccp_sk(sk);
604 	const struct dccp_service_list *sl;
605 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
606 
607 	lock_sock(sk);
608 	if ((sl = dp->dccps_service_list) != NULL) {
609 		slen = sl->dccpsl_nr * sizeof(u32);
610 		total_len += slen;
611 	}
612 
613 	err = -EINVAL;
614 	if (total_len > len)
615 		goto out;
616 
617 	err = 0;
618 	if (put_user(total_len, optlen) ||
619 	    put_user(dp->dccps_service, optval) ||
620 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
621 		err = -EFAULT;
622 out:
623 	release_sock(sk);
624 	return err;
625 }
626 
627 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
628 		    char __user *optval, int __user *optlen)
629 {
630 	struct dccp_sock *dp;
631 	int val, len;
632 
633 	if (get_user(len, optlen))
634 		return -EFAULT;
635 
636 	if (len < (int)sizeof(int))
637 		return -EINVAL;
638 
639 	dp = dccp_sk(sk);
640 
641 	switch (optname) {
642 	case DCCP_SOCKOPT_PACKET_SIZE:
643 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
644 		return 0;
645 	case DCCP_SOCKOPT_SERVICE:
646 		return dccp_getsockopt_service(sk, len,
647 					       (__be32 __user *)optval, optlen);
648 	case DCCP_SOCKOPT_GET_CUR_MPS:
649 		val = dp->dccps_mss_cache;
650 		break;
651 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
652 		val = dp->dccps_server_timewait;
653 		break;
654 	case DCCP_SOCKOPT_SEND_CSCOV:
655 		val = dp->dccps_pcslen;
656 		break;
657 	case DCCP_SOCKOPT_RECV_CSCOV:
658 		val = dp->dccps_pcrlen;
659 		break;
660 	case 128 ... 191:
661 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
662 					     len, (u32 __user *)optval, optlen);
663 	case 192 ... 255:
664 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
665 					     len, (u32 __user *)optval, optlen);
666 	default:
667 		return -ENOPROTOOPT;
668 	}
669 
670 	len = sizeof(val);
671 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
672 		return -EFAULT;
673 
674 	return 0;
675 }
676 
677 int dccp_getsockopt(struct sock *sk, int level, int optname,
678 		    char __user *optval, int __user *optlen)
679 {
680 	if (level != SOL_DCCP)
681 		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
682 							     optname, optval,
683 							     optlen);
684 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
685 }
686 
687 EXPORT_SYMBOL_GPL(dccp_getsockopt);
688 
689 #ifdef CONFIG_COMPAT
690 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
691 			   char __user *optval, int __user *optlen)
692 {
693 	if (level != SOL_DCCP)
694 		return inet_csk_compat_getsockopt(sk, level, optname,
695 						  optval, optlen);
696 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
697 }
698 
699 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
700 #endif
701 
702 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
703 		 size_t len)
704 {
705 	const struct dccp_sock *dp = dccp_sk(sk);
706 	const int flags = msg->msg_flags;
707 	const int noblock = flags & MSG_DONTWAIT;
708 	struct sk_buff *skb;
709 	int rc, size;
710 	long timeo;
711 
712 	if (len > dp->dccps_mss_cache)
713 		return -EMSGSIZE;
714 
715 	lock_sock(sk);
716 
717 	if (sysctl_dccp_tx_qlen &&
718 	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
719 		rc = -EAGAIN;
720 		goto out_release;
721 	}
722 
723 	timeo = sock_sndtimeo(sk, noblock);
724 
725 	/*
726 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
727 	 * so that the trick in dccp_rcv_request_sent_state_process.
728 	 */
729 	/* Wait for a connection to finish. */
730 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
731 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
732 			goto out_release;
733 
734 	size = sk->sk_prot->max_header + len;
735 	release_sock(sk);
736 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
737 	lock_sock(sk);
738 	if (skb == NULL)
739 		goto out_release;
740 
741 	skb_reserve(skb, sk->sk_prot->max_header);
742 	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
743 	if (rc != 0)
744 		goto out_discard;
745 
746 	skb_queue_tail(&sk->sk_write_queue, skb);
747 	dccp_write_xmit(sk,0);
748 out_release:
749 	release_sock(sk);
750 	return rc ? : len;
751 out_discard:
752 	kfree_skb(skb);
753 	goto out_release;
754 }
755 
756 EXPORT_SYMBOL_GPL(dccp_sendmsg);
757 
758 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
759 		 size_t len, int nonblock, int flags, int *addr_len)
760 {
761 	const struct dccp_hdr *dh;
762 	long timeo;
763 
764 	lock_sock(sk);
765 
766 	if (sk->sk_state == DCCP_LISTEN) {
767 		len = -ENOTCONN;
768 		goto out;
769 	}
770 
771 	timeo = sock_rcvtimeo(sk, nonblock);
772 
773 	do {
774 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
775 
776 		if (skb == NULL)
777 			goto verify_sock_status;
778 
779 		dh = dccp_hdr(skb);
780 
781 		switch (dh->dccph_type) {
782 		case DCCP_PKT_DATA:
783 		case DCCP_PKT_DATAACK:
784 			goto found_ok_skb;
785 
786 		case DCCP_PKT_CLOSE:
787 		case DCCP_PKT_CLOSEREQ:
788 			if (!(flags & MSG_PEEK))
789 				dccp_finish_passive_close(sk);
790 			/* fall through */
791 		case DCCP_PKT_RESET:
792 			dccp_pr_debug("found fin (%s) ok!\n",
793 				      dccp_packet_name(dh->dccph_type));
794 			len = 0;
795 			goto found_fin_ok;
796 		default:
797 			dccp_pr_debug("packet_type=%s\n",
798 				      dccp_packet_name(dh->dccph_type));
799 			sk_eat_skb(sk, skb, 0);
800 		}
801 verify_sock_status:
802 		if (sock_flag(sk, SOCK_DONE)) {
803 			len = 0;
804 			break;
805 		}
806 
807 		if (sk->sk_err) {
808 			len = sock_error(sk);
809 			break;
810 		}
811 
812 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
813 			len = 0;
814 			break;
815 		}
816 
817 		if (sk->sk_state == DCCP_CLOSED) {
818 			if (!sock_flag(sk, SOCK_DONE)) {
819 				/* This occurs when user tries to read
820 				 * from never connected socket.
821 				 */
822 				len = -ENOTCONN;
823 				break;
824 			}
825 			len = 0;
826 			break;
827 		}
828 
829 		if (!timeo) {
830 			len = -EAGAIN;
831 			break;
832 		}
833 
834 		if (signal_pending(current)) {
835 			len = sock_intr_errno(timeo);
836 			break;
837 		}
838 
839 		sk_wait_data(sk, &timeo);
840 		continue;
841 	found_ok_skb:
842 		if (len > skb->len)
843 			len = skb->len;
844 		else if (len < skb->len)
845 			msg->msg_flags |= MSG_TRUNC;
846 
847 		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
848 			/* Exception. Bailout! */
849 			len = -EFAULT;
850 			break;
851 		}
852 	found_fin_ok:
853 		if (!(flags & MSG_PEEK))
854 			sk_eat_skb(sk, skb, 0);
855 		break;
856 	} while (1);
857 out:
858 	release_sock(sk);
859 	return len;
860 }
861 
862 EXPORT_SYMBOL_GPL(dccp_recvmsg);
863 
864 int inet_dccp_listen(struct socket *sock, int backlog)
865 {
866 	struct sock *sk = sock->sk;
867 	unsigned char old_state;
868 	int err;
869 
870 	lock_sock(sk);
871 
872 	err = -EINVAL;
873 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
874 		goto out;
875 
876 	old_state = sk->sk_state;
877 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
878 		goto out;
879 
880 	/* Really, if the socket is already in listen state
881 	 * we can only allow the backlog to be adjusted.
882 	 */
883 	if (old_state != DCCP_LISTEN) {
884 		/*
885 		 * FIXME: here it probably should be sk->sk_prot->listen_start
886 		 * see tcp_listen_start
887 		 */
888 		err = dccp_listen_start(sk, backlog);
889 		if (err)
890 			goto out;
891 	}
892 	sk->sk_max_ack_backlog = backlog;
893 	err = 0;
894 
895 out:
896 	release_sock(sk);
897 	return err;
898 }
899 
900 EXPORT_SYMBOL_GPL(inet_dccp_listen);
901 
902 static void dccp_terminate_connection(struct sock *sk)
903 {
904 	u8 next_state = DCCP_CLOSED;
905 
906 	switch (sk->sk_state) {
907 	case DCCP_PASSIVE_CLOSE:
908 	case DCCP_PASSIVE_CLOSEREQ:
909 		dccp_finish_passive_close(sk);
910 		break;
911 	case DCCP_PARTOPEN:
912 		dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
913 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
914 		/* fall through */
915 	case DCCP_OPEN:
916 		dccp_send_close(sk, 1);
917 
918 		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
919 		    !dccp_sk(sk)->dccps_server_timewait)
920 			next_state = DCCP_ACTIVE_CLOSEREQ;
921 		else
922 			next_state = DCCP_CLOSING;
923 		/* fall through */
924 	default:
925 		dccp_set_state(sk, next_state);
926 	}
927 }
928 
929 void dccp_close(struct sock *sk, long timeout)
930 {
931 	struct dccp_sock *dp = dccp_sk(sk);
932 	struct sk_buff *skb;
933 	u32 data_was_unread = 0;
934 	int state;
935 
936 	lock_sock(sk);
937 
938 	sk->sk_shutdown = SHUTDOWN_MASK;
939 
940 	if (sk->sk_state == DCCP_LISTEN) {
941 		dccp_set_state(sk, DCCP_CLOSED);
942 
943 		/* Special case. */
944 		inet_csk_listen_stop(sk);
945 
946 		goto adjudge_to_death;
947 	}
948 
949 	sk_stop_timer(sk, &dp->dccps_xmit_timer);
950 
951 	/*
952 	 * We need to flush the recv. buffs.  We do this only on the
953 	 * descriptor close, not protocol-sourced closes, because the
954 	  *reader process may not have drained the data yet!
955 	 */
956 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
957 		data_was_unread += skb->len;
958 		__kfree_skb(skb);
959 	}
960 
961 	if (data_was_unread) {
962 		/* Unread data was tossed, send an appropriate Reset Code */
963 		DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
964 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
965 		dccp_set_state(sk, DCCP_CLOSED);
966 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
967 		/* Check zero linger _after_ checking for unread data. */
968 		sk->sk_prot->disconnect(sk, 0);
969 	} else if (sk->sk_state != DCCP_CLOSED) {
970 		dccp_terminate_connection(sk);
971 	}
972 
973 	sk_stream_wait_close(sk, timeout);
974 
975 adjudge_to_death:
976 	state = sk->sk_state;
977 	sock_hold(sk);
978 	sock_orphan(sk);
979 	atomic_inc(sk->sk_prot->orphan_count);
980 
981 	/*
982 	 * It is the last release_sock in its life. It will remove backlog.
983 	 */
984 	release_sock(sk);
985 	/*
986 	 * Now socket is owned by kernel and we acquire BH lock
987 	 * to finish close. No need to check for user refs.
988 	 */
989 	local_bh_disable();
990 	bh_lock_sock(sk);
991 	WARN_ON(sock_owned_by_user(sk));
992 
993 	/* Have we already been destroyed by a softirq or backlog? */
994 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
995 		goto out;
996 
997 	if (sk->sk_state == DCCP_CLOSED)
998 		inet_csk_destroy_sock(sk);
999 
1000 	/* Otherwise, socket is reprieved until protocol close. */
1001 
1002 out:
1003 	bh_unlock_sock(sk);
1004 	local_bh_enable();
1005 	sock_put(sk);
1006 }
1007 
1008 EXPORT_SYMBOL_GPL(dccp_close);
1009 
1010 void dccp_shutdown(struct sock *sk, int how)
1011 {
1012 	dccp_pr_debug("called shutdown(%x)\n", how);
1013 }
1014 
1015 EXPORT_SYMBOL_GPL(dccp_shutdown);
1016 
1017 static inline int dccp_mib_init(void)
1018 {
1019 	return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1020 }
1021 
1022 static inline void dccp_mib_exit(void)
1023 {
1024 	snmp_mib_free((void**)dccp_statistics);
1025 }
1026 
1027 static int thash_entries;
1028 module_param(thash_entries, int, 0444);
1029 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1030 
1031 #ifdef CONFIG_IP_DCCP_DEBUG
1032 int dccp_debug;
1033 module_param(dccp_debug, bool, 0644);
1034 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1035 
1036 EXPORT_SYMBOL_GPL(dccp_debug);
1037 #endif
1038 
1039 static int __init dccp_init(void)
1040 {
1041 	unsigned long goal;
1042 	int ehash_order, bhash_order, i;
1043 	int rc = -ENOBUFS;
1044 
1045 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1046 		     FIELD_SIZEOF(struct sk_buff, cb));
1047 
1048 	dccp_hashinfo.bind_bucket_cachep =
1049 		kmem_cache_create("dccp_bind_bucket",
1050 				  sizeof(struct inet_bind_bucket), 0,
1051 				  SLAB_HWCACHE_ALIGN, NULL);
1052 	if (!dccp_hashinfo.bind_bucket_cachep)
1053 		goto out;
1054 
1055 	/*
1056 	 * Size and allocate the main established and bind bucket
1057 	 * hash tables.
1058 	 *
1059 	 * The methodology is similar to that of the buffer cache.
1060 	 */
1061 	if (num_physpages >= (128 * 1024))
1062 		goal = num_physpages >> (21 - PAGE_SHIFT);
1063 	else
1064 		goal = num_physpages >> (23 - PAGE_SHIFT);
1065 
1066 	if (thash_entries)
1067 		goal = (thash_entries *
1068 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1069 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1070 		;
1071 	do {
1072 		dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1073 					sizeof(struct inet_ehash_bucket);
1074 		while (dccp_hashinfo.ehash_size &
1075 		       (dccp_hashinfo.ehash_size - 1))
1076 			dccp_hashinfo.ehash_size--;
1077 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1078 			__get_free_pages(GFP_ATOMIC, ehash_order);
1079 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1080 
1081 	if (!dccp_hashinfo.ehash) {
1082 		DCCP_CRIT("Failed to allocate DCCP established hash table");
1083 		goto out_free_bind_bucket_cachep;
1084 	}
1085 
1086 	for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1087 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1088 		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1089 	}
1090 
1091 	if (inet_ehash_locks_alloc(&dccp_hashinfo))
1092 			goto out_free_dccp_ehash;
1093 
1094 	bhash_order = ehash_order;
1095 
1096 	do {
1097 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1098 					sizeof(struct inet_bind_hashbucket);
1099 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1100 		    bhash_order > 0)
1101 			continue;
1102 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1103 			__get_free_pages(GFP_ATOMIC, bhash_order);
1104 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1105 
1106 	if (!dccp_hashinfo.bhash) {
1107 		DCCP_CRIT("Failed to allocate DCCP bind hash table");
1108 		goto out_free_dccp_locks;
1109 	}
1110 
1111 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1112 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1113 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1114 	}
1115 
1116 	rc = dccp_mib_init();
1117 	if (rc)
1118 		goto out_free_dccp_bhash;
1119 
1120 	rc = dccp_ackvec_init();
1121 	if (rc)
1122 		goto out_free_dccp_mib;
1123 
1124 	rc = dccp_sysctl_init();
1125 	if (rc)
1126 		goto out_ackvec_exit;
1127 
1128 	dccp_timestamping_init();
1129 out:
1130 	return rc;
1131 out_ackvec_exit:
1132 	dccp_ackvec_exit();
1133 out_free_dccp_mib:
1134 	dccp_mib_exit();
1135 out_free_dccp_bhash:
1136 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1137 	dccp_hashinfo.bhash = NULL;
1138 out_free_dccp_locks:
1139 	inet_ehash_locks_free(&dccp_hashinfo);
1140 out_free_dccp_ehash:
1141 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1142 	dccp_hashinfo.ehash = NULL;
1143 out_free_bind_bucket_cachep:
1144 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1145 	dccp_hashinfo.bind_bucket_cachep = NULL;
1146 	goto out;
1147 }
1148 
1149 static void __exit dccp_fini(void)
1150 {
1151 	dccp_mib_exit();
1152 	free_pages((unsigned long)dccp_hashinfo.bhash,
1153 		   get_order(dccp_hashinfo.bhash_size *
1154 			     sizeof(struct inet_bind_hashbucket)));
1155 	free_pages((unsigned long)dccp_hashinfo.ehash,
1156 		   get_order(dccp_hashinfo.ehash_size *
1157 			     sizeof(struct inet_ehash_bucket)));
1158 	inet_ehash_locks_free(&dccp_hashinfo);
1159 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1160 	dccp_ackvec_exit();
1161 	dccp_sysctl_exit();
1162 }
1163 
1164 module_init(dccp_init);
1165 module_exit(dccp_fini);
1166 
1167 MODULE_LICENSE("GPL");
1168 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1169 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1170