xref: /openbmc/linux/net/dccp/proto.c (revision 4a3fad70)
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or modify it
8  *	under the terms of the GNU General Public License version 2 as
9  *	published by the Free Software Foundation.
10  */
11 
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25 
26 #include <net/inet_sock.h>
27 #include <net/inet_common.h>
28 #include <net/sock.h>
29 #include <net/xfrm.h>
30 
31 #include <asm/ioctls.h>
32 #include <linux/spinlock.h>
33 #include <linux/timer.h>
34 #include <linux/delay.h>
35 #include <linux/poll.h>
36 
37 #include "ccid.h"
38 #include "dccp.h"
39 #include "feat.h"
40 
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42 
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44 
45 struct percpu_counter dccp_orphan_count;
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47 
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50 
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53 
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57 	static const char *const dccp_state_names[] = {
58 	[DCCP_OPEN]		= "OPEN",
59 	[DCCP_REQUESTING]	= "REQUESTING",
60 	[DCCP_PARTOPEN]		= "PARTOPEN",
61 	[DCCP_LISTEN]		= "LISTEN",
62 	[DCCP_RESPOND]		= "RESPOND",
63 	[DCCP_CLOSING]		= "CLOSING",
64 	[DCCP_ACTIVE_CLOSEREQ]	= "CLOSEREQ",
65 	[DCCP_PASSIVE_CLOSE]	= "PASSIVE_CLOSE",
66 	[DCCP_PASSIVE_CLOSEREQ]	= "PASSIVE_CLOSEREQ",
67 	[DCCP_TIME_WAIT]	= "TIME_WAIT",
68 	[DCCP_CLOSED]		= "CLOSED",
69 	};
70 
71 	if (state >= DCCP_MAX_STATES)
72 		return "INVALID STATE!";
73 	else
74 		return dccp_state_names[state];
75 }
76 #endif
77 
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80 	const int oldstate = sk->sk_state;
81 
82 	dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
83 		      dccp_state_name(oldstate), dccp_state_name(state));
84 	WARN_ON(state == oldstate);
85 
86 	switch (state) {
87 	case DCCP_OPEN:
88 		if (oldstate != DCCP_OPEN)
89 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90 		/* Client retransmits all Confirm options until entering OPEN */
91 		if (oldstate == DCCP_PARTOPEN)
92 			dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93 		break;
94 
95 	case DCCP_CLOSED:
96 		if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97 		    oldstate == DCCP_CLOSING)
98 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99 
100 		sk->sk_prot->unhash(sk);
101 		if (inet_csk(sk)->icsk_bind_hash != NULL &&
102 		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103 			inet_put_port(sk);
104 		/* fall through */
105 	default:
106 		if (oldstate == DCCP_OPEN)
107 			DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108 	}
109 
110 	/* Change state AFTER socket is unhashed to avoid closed
111 	 * socket sitting in hash tables.
112 	 */
113 	sk->sk_state = state;
114 }
115 
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117 
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120 	switch (sk->sk_state) {
121 	case DCCP_PASSIVE_CLOSE:
122 		/* Node (client or server) has received Close packet. */
123 		dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124 		dccp_set_state(sk, DCCP_CLOSED);
125 		break;
126 	case DCCP_PASSIVE_CLOSEREQ:
127 		/*
128 		 * Client received CloseReq. We set the `active' flag so that
129 		 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130 		 */
131 		dccp_send_close(sk, 1);
132 		dccp_set_state(sk, DCCP_CLOSING);
133 	}
134 }
135 
136 void dccp_done(struct sock *sk)
137 {
138 	dccp_set_state(sk, DCCP_CLOSED);
139 	dccp_clear_xmit_timers(sk);
140 
141 	sk->sk_shutdown = SHUTDOWN_MASK;
142 
143 	if (!sock_flag(sk, SOCK_DEAD))
144 		sk->sk_state_change(sk);
145 	else
146 		inet_csk_destroy_sock(sk);
147 }
148 
149 EXPORT_SYMBOL_GPL(dccp_done);
150 
151 const char *dccp_packet_name(const int type)
152 {
153 	static const char *const dccp_packet_names[] = {
154 		[DCCP_PKT_REQUEST]  = "REQUEST",
155 		[DCCP_PKT_RESPONSE] = "RESPONSE",
156 		[DCCP_PKT_DATA]	    = "DATA",
157 		[DCCP_PKT_ACK]	    = "ACK",
158 		[DCCP_PKT_DATAACK]  = "DATAACK",
159 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160 		[DCCP_PKT_CLOSE]    = "CLOSE",
161 		[DCCP_PKT_RESET]    = "RESET",
162 		[DCCP_PKT_SYNC]	    = "SYNC",
163 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
164 	};
165 
166 	if (type >= DCCP_NR_PKT_TYPES)
167 		return "INVALID";
168 	else
169 		return dccp_packet_names[type];
170 }
171 
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173 
174 static void dccp_sk_destruct(struct sock *sk)
175 {
176 	struct dccp_sock *dp = dccp_sk(sk);
177 
178 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179 	dp->dccps_hc_tx_ccid = NULL;
180 	inet_sock_destruct(sk);
181 }
182 
183 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
184 {
185 	struct dccp_sock *dp = dccp_sk(sk);
186 	struct inet_connection_sock *icsk = inet_csk(sk);
187 
188 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
189 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
190 	sk->sk_state		= DCCP_CLOSED;
191 	sk->sk_write_space	= dccp_write_space;
192 	sk->sk_destruct		= dccp_sk_destruct;
193 	icsk->icsk_sync_mss	= dccp_sync_mss;
194 	dp->dccps_mss_cache	= 536;
195 	dp->dccps_rate_last	= jiffies;
196 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
197 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
198 	dp->dccps_tx_qlen	= sysctl_dccp_tx_qlen;
199 
200 	dccp_init_xmit_timers(sk);
201 
202 	INIT_LIST_HEAD(&dp->dccps_featneg);
203 	/* control socket doesn't need feat nego */
204 	if (likely(ctl_sock_initialized))
205 		return dccp_feat_init(sk);
206 	return 0;
207 }
208 
209 EXPORT_SYMBOL_GPL(dccp_init_sock);
210 
211 void dccp_destroy_sock(struct sock *sk)
212 {
213 	struct dccp_sock *dp = dccp_sk(sk);
214 
215 	__skb_queue_purge(&sk->sk_write_queue);
216 	if (sk->sk_send_head != NULL) {
217 		kfree_skb(sk->sk_send_head);
218 		sk->sk_send_head = NULL;
219 	}
220 
221 	/* Clean up a referenced DCCP bind bucket. */
222 	if (inet_csk(sk)->icsk_bind_hash != NULL)
223 		inet_put_port(sk);
224 
225 	kfree(dp->dccps_service_list);
226 	dp->dccps_service_list = NULL;
227 
228 	if (dp->dccps_hc_rx_ackvec != NULL) {
229 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
230 		dp->dccps_hc_rx_ackvec = NULL;
231 	}
232 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
233 	dp->dccps_hc_rx_ccid = NULL;
234 
235 	/* clean up feature negotiation state */
236 	dccp_feat_list_purge(&dp->dccps_featneg);
237 }
238 
239 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
240 
241 static inline int dccp_listen_start(struct sock *sk, int backlog)
242 {
243 	struct dccp_sock *dp = dccp_sk(sk);
244 
245 	dp->dccps_role = DCCP_ROLE_LISTEN;
246 	/* do not start to listen if feature negotiation setup fails */
247 	if (dccp_feat_finalise_settings(dp))
248 		return -EPROTO;
249 	return inet_csk_listen_start(sk, backlog);
250 }
251 
252 static inline int dccp_need_reset(int state)
253 {
254 	return state != DCCP_CLOSED && state != DCCP_LISTEN &&
255 	       state != DCCP_REQUESTING;
256 }
257 
258 int dccp_disconnect(struct sock *sk, int flags)
259 {
260 	struct inet_connection_sock *icsk = inet_csk(sk);
261 	struct inet_sock *inet = inet_sk(sk);
262 	struct dccp_sock *dp = dccp_sk(sk);
263 	int err = 0;
264 	const int old_state = sk->sk_state;
265 
266 	if (old_state != DCCP_CLOSED)
267 		dccp_set_state(sk, DCCP_CLOSED);
268 
269 	/*
270 	 * This corresponds to the ABORT function of RFC793, sec. 3.8
271 	 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
272 	 */
273 	if (old_state == DCCP_LISTEN) {
274 		inet_csk_listen_stop(sk);
275 	} else if (dccp_need_reset(old_state)) {
276 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
277 		sk->sk_err = ECONNRESET;
278 	} else if (old_state == DCCP_REQUESTING)
279 		sk->sk_err = ECONNRESET;
280 
281 	dccp_clear_xmit_timers(sk);
282 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
283 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
284 	dp->dccps_hc_rx_ccid = NULL;
285 	dp->dccps_hc_tx_ccid = NULL;
286 
287 	__skb_queue_purge(&sk->sk_receive_queue);
288 	__skb_queue_purge(&sk->sk_write_queue);
289 	if (sk->sk_send_head != NULL) {
290 		__kfree_skb(sk->sk_send_head);
291 		sk->sk_send_head = NULL;
292 	}
293 
294 	inet->inet_dport = 0;
295 
296 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
297 		inet_reset_saddr(sk);
298 
299 	sk->sk_shutdown = 0;
300 	sock_reset_flag(sk, SOCK_DONE);
301 
302 	icsk->icsk_backoff = 0;
303 	inet_csk_delack_init(sk);
304 	__sk_dst_reset(sk);
305 
306 	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
307 
308 	sk->sk_error_report(sk);
309 	return err;
310 }
311 
312 EXPORT_SYMBOL_GPL(dccp_disconnect);
313 
314 /*
315  *	Wait for a DCCP event.
316  *
317  *	Note that we don't need to lock the socket, as the upper poll layers
318  *	take care of normal races (between the test and the event) and we don't
319  *	go look at any of the socket buffers directly.
320  */
321 unsigned int dccp_poll(struct file *file, struct socket *sock,
322 		       poll_table *wait)
323 {
324 	unsigned int mask;
325 	struct sock *sk = sock->sk;
326 
327 	sock_poll_wait(file, sk_sleep(sk), wait);
328 	if (sk->sk_state == DCCP_LISTEN)
329 		return inet_csk_listen_poll(sk);
330 
331 	/* Socket is not locked. We are protected from async events
332 	   by poll logic and correct handling of state changes
333 	   made by another threads is impossible in any case.
334 	 */
335 
336 	mask = 0;
337 	if (sk->sk_err)
338 		mask = POLLERR;
339 
340 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
341 		mask |= POLLHUP;
342 	if (sk->sk_shutdown & RCV_SHUTDOWN)
343 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
344 
345 	/* Connected? */
346 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
347 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
348 			mask |= POLLIN | POLLRDNORM;
349 
350 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
351 			if (sk_stream_is_writeable(sk)) {
352 				mask |= POLLOUT | POLLWRNORM;
353 			} else {  /* send SIGIO later */
354 				sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
355 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
356 
357 				/* Race breaker. If space is freed after
358 				 * wspace test but before the flags are set,
359 				 * IO signal will be lost.
360 				 */
361 				if (sk_stream_is_writeable(sk))
362 					mask |= POLLOUT | POLLWRNORM;
363 			}
364 		}
365 	}
366 	return mask;
367 }
368 
369 EXPORT_SYMBOL_GPL(dccp_poll);
370 
371 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
372 {
373 	int rc = -ENOTCONN;
374 
375 	lock_sock(sk);
376 
377 	if (sk->sk_state == DCCP_LISTEN)
378 		goto out;
379 
380 	switch (cmd) {
381 	case SIOCINQ: {
382 		struct sk_buff *skb;
383 		unsigned long amount = 0;
384 
385 		skb = skb_peek(&sk->sk_receive_queue);
386 		if (skb != NULL) {
387 			/*
388 			 * We will only return the amount of this packet since
389 			 * that is all that will be read.
390 			 */
391 			amount = skb->len;
392 		}
393 		rc = put_user(amount, (int __user *)arg);
394 	}
395 		break;
396 	default:
397 		rc = -ENOIOCTLCMD;
398 		break;
399 	}
400 out:
401 	release_sock(sk);
402 	return rc;
403 }
404 
405 EXPORT_SYMBOL_GPL(dccp_ioctl);
406 
407 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
408 				   char __user *optval, unsigned int optlen)
409 {
410 	struct dccp_sock *dp = dccp_sk(sk);
411 	struct dccp_service_list *sl = NULL;
412 
413 	if (service == DCCP_SERVICE_INVALID_VALUE ||
414 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
415 		return -EINVAL;
416 
417 	if (optlen > sizeof(service)) {
418 		sl = kmalloc(optlen, GFP_KERNEL);
419 		if (sl == NULL)
420 			return -ENOMEM;
421 
422 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
423 		if (copy_from_user(sl->dccpsl_list,
424 				   optval + sizeof(service),
425 				   optlen - sizeof(service)) ||
426 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
427 			kfree(sl);
428 			return -EFAULT;
429 		}
430 	}
431 
432 	lock_sock(sk);
433 	dp->dccps_service = service;
434 
435 	kfree(dp->dccps_service_list);
436 
437 	dp->dccps_service_list = sl;
438 	release_sock(sk);
439 	return 0;
440 }
441 
442 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
443 {
444 	u8 *list, len;
445 	int i, rc;
446 
447 	if (cscov < 0 || cscov > 15)
448 		return -EINVAL;
449 	/*
450 	 * Populate a list of permissible values, in the range cscov...15. This
451 	 * is necessary since feature negotiation of single values only works if
452 	 * both sides incidentally choose the same value. Since the list starts
453 	 * lowest-value first, negotiation will pick the smallest shared value.
454 	 */
455 	if (cscov == 0)
456 		return 0;
457 	len = 16 - cscov;
458 
459 	list = kmalloc(len, GFP_KERNEL);
460 	if (list == NULL)
461 		return -ENOBUFS;
462 
463 	for (i = 0; i < len; i++)
464 		list[i] = cscov++;
465 
466 	rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
467 
468 	if (rc == 0) {
469 		if (rx)
470 			dccp_sk(sk)->dccps_pcrlen = cscov;
471 		else
472 			dccp_sk(sk)->dccps_pcslen = cscov;
473 	}
474 	kfree(list);
475 	return rc;
476 }
477 
478 static int dccp_setsockopt_ccid(struct sock *sk, int type,
479 				char __user *optval, unsigned int optlen)
480 {
481 	u8 *val;
482 	int rc = 0;
483 
484 	if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
485 		return -EINVAL;
486 
487 	val = memdup_user(optval, optlen);
488 	if (IS_ERR(val))
489 		return PTR_ERR(val);
490 
491 	lock_sock(sk);
492 	if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
493 		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
494 
495 	if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
496 		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
497 	release_sock(sk);
498 
499 	kfree(val);
500 	return rc;
501 }
502 
503 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
504 		char __user *optval, unsigned int optlen)
505 {
506 	struct dccp_sock *dp = dccp_sk(sk);
507 	int val, err = 0;
508 
509 	switch (optname) {
510 	case DCCP_SOCKOPT_PACKET_SIZE:
511 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
512 		return 0;
513 	case DCCP_SOCKOPT_CHANGE_L:
514 	case DCCP_SOCKOPT_CHANGE_R:
515 		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
516 		return 0;
517 	case DCCP_SOCKOPT_CCID:
518 	case DCCP_SOCKOPT_RX_CCID:
519 	case DCCP_SOCKOPT_TX_CCID:
520 		return dccp_setsockopt_ccid(sk, optname, optval, optlen);
521 	}
522 
523 	if (optlen < (int)sizeof(int))
524 		return -EINVAL;
525 
526 	if (get_user(val, (int __user *)optval))
527 		return -EFAULT;
528 
529 	if (optname == DCCP_SOCKOPT_SERVICE)
530 		return dccp_setsockopt_service(sk, val, optval, optlen);
531 
532 	lock_sock(sk);
533 	switch (optname) {
534 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
535 		if (dp->dccps_role != DCCP_ROLE_SERVER)
536 			err = -EOPNOTSUPP;
537 		else
538 			dp->dccps_server_timewait = (val != 0);
539 		break;
540 	case DCCP_SOCKOPT_SEND_CSCOV:
541 		err = dccp_setsockopt_cscov(sk, val, false);
542 		break;
543 	case DCCP_SOCKOPT_RECV_CSCOV:
544 		err = dccp_setsockopt_cscov(sk, val, true);
545 		break;
546 	case DCCP_SOCKOPT_QPOLICY_ID:
547 		if (sk->sk_state != DCCP_CLOSED)
548 			err = -EISCONN;
549 		else if (val < 0 || val >= DCCPQ_POLICY_MAX)
550 			err = -EINVAL;
551 		else
552 			dp->dccps_qpolicy = val;
553 		break;
554 	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
555 		if (val < 0)
556 			err = -EINVAL;
557 		else
558 			dp->dccps_tx_qlen = val;
559 		break;
560 	default:
561 		err = -ENOPROTOOPT;
562 		break;
563 	}
564 	release_sock(sk);
565 
566 	return err;
567 }
568 
569 int dccp_setsockopt(struct sock *sk, int level, int optname,
570 		    char __user *optval, unsigned int optlen)
571 {
572 	if (level != SOL_DCCP)
573 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
574 							     optname, optval,
575 							     optlen);
576 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
577 }
578 
579 EXPORT_SYMBOL_GPL(dccp_setsockopt);
580 
581 #ifdef CONFIG_COMPAT
582 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
583 			   char __user *optval, unsigned int optlen)
584 {
585 	if (level != SOL_DCCP)
586 		return inet_csk_compat_setsockopt(sk, level, optname,
587 						  optval, optlen);
588 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
589 }
590 
591 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
592 #endif
593 
594 static int dccp_getsockopt_service(struct sock *sk, int len,
595 				   __be32 __user *optval,
596 				   int __user *optlen)
597 {
598 	const struct dccp_sock *dp = dccp_sk(sk);
599 	const struct dccp_service_list *sl;
600 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
601 
602 	lock_sock(sk);
603 	if ((sl = dp->dccps_service_list) != NULL) {
604 		slen = sl->dccpsl_nr * sizeof(u32);
605 		total_len += slen;
606 	}
607 
608 	err = -EINVAL;
609 	if (total_len > len)
610 		goto out;
611 
612 	err = 0;
613 	if (put_user(total_len, optlen) ||
614 	    put_user(dp->dccps_service, optval) ||
615 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
616 		err = -EFAULT;
617 out:
618 	release_sock(sk);
619 	return err;
620 }
621 
622 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
623 		    char __user *optval, int __user *optlen)
624 {
625 	struct dccp_sock *dp;
626 	int val, len;
627 
628 	if (get_user(len, optlen))
629 		return -EFAULT;
630 
631 	if (len < (int)sizeof(int))
632 		return -EINVAL;
633 
634 	dp = dccp_sk(sk);
635 
636 	switch (optname) {
637 	case DCCP_SOCKOPT_PACKET_SIZE:
638 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
639 		return 0;
640 	case DCCP_SOCKOPT_SERVICE:
641 		return dccp_getsockopt_service(sk, len,
642 					       (__be32 __user *)optval, optlen);
643 	case DCCP_SOCKOPT_GET_CUR_MPS:
644 		val = dp->dccps_mss_cache;
645 		break;
646 	case DCCP_SOCKOPT_AVAILABLE_CCIDS:
647 		return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
648 	case DCCP_SOCKOPT_TX_CCID:
649 		val = ccid_get_current_tx_ccid(dp);
650 		if (val < 0)
651 			return -ENOPROTOOPT;
652 		break;
653 	case DCCP_SOCKOPT_RX_CCID:
654 		val = ccid_get_current_rx_ccid(dp);
655 		if (val < 0)
656 			return -ENOPROTOOPT;
657 		break;
658 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
659 		val = dp->dccps_server_timewait;
660 		break;
661 	case DCCP_SOCKOPT_SEND_CSCOV:
662 		val = dp->dccps_pcslen;
663 		break;
664 	case DCCP_SOCKOPT_RECV_CSCOV:
665 		val = dp->dccps_pcrlen;
666 		break;
667 	case DCCP_SOCKOPT_QPOLICY_ID:
668 		val = dp->dccps_qpolicy;
669 		break;
670 	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
671 		val = dp->dccps_tx_qlen;
672 		break;
673 	case 128 ... 191:
674 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
675 					     len, (u32 __user *)optval, optlen);
676 	case 192 ... 255:
677 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
678 					     len, (u32 __user *)optval, optlen);
679 	default:
680 		return -ENOPROTOOPT;
681 	}
682 
683 	len = sizeof(val);
684 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
685 		return -EFAULT;
686 
687 	return 0;
688 }
689 
690 int dccp_getsockopt(struct sock *sk, int level, int optname,
691 		    char __user *optval, int __user *optlen)
692 {
693 	if (level != SOL_DCCP)
694 		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
695 							     optname, optval,
696 							     optlen);
697 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
698 }
699 
700 EXPORT_SYMBOL_GPL(dccp_getsockopt);
701 
702 #ifdef CONFIG_COMPAT
703 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
704 			   char __user *optval, int __user *optlen)
705 {
706 	if (level != SOL_DCCP)
707 		return inet_csk_compat_getsockopt(sk, level, optname,
708 						  optval, optlen);
709 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
710 }
711 
712 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
713 #endif
714 
715 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
716 {
717 	struct cmsghdr *cmsg;
718 
719 	/*
720 	 * Assign an (opaque) qpolicy priority value to skb->priority.
721 	 *
722 	 * We are overloading this skb field for use with the qpolicy subystem.
723 	 * The skb->priority is normally used for the SO_PRIORITY option, which
724 	 * is initialised from sk_priority. Since the assignment of sk_priority
725 	 * to skb->priority happens later (on layer 3), we overload this field
726 	 * for use with queueing priorities as long as the skb is on layer 4.
727 	 * The default priority value (if nothing is set) is 0.
728 	 */
729 	skb->priority = 0;
730 
731 	for_each_cmsghdr(cmsg, msg) {
732 		if (!CMSG_OK(msg, cmsg))
733 			return -EINVAL;
734 
735 		if (cmsg->cmsg_level != SOL_DCCP)
736 			continue;
737 
738 		if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
739 		    !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
740 			return -EINVAL;
741 
742 		switch (cmsg->cmsg_type) {
743 		case DCCP_SCM_PRIORITY:
744 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
745 				return -EINVAL;
746 			skb->priority = *(__u32 *)CMSG_DATA(cmsg);
747 			break;
748 		default:
749 			return -EINVAL;
750 		}
751 	}
752 	return 0;
753 }
754 
755 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
756 {
757 	const struct dccp_sock *dp = dccp_sk(sk);
758 	const int flags = msg->msg_flags;
759 	const int noblock = flags & MSG_DONTWAIT;
760 	struct sk_buff *skb;
761 	int rc, size;
762 	long timeo;
763 
764 	if (len > dp->dccps_mss_cache)
765 		return -EMSGSIZE;
766 
767 	lock_sock(sk);
768 
769 	if (dccp_qpolicy_full(sk)) {
770 		rc = -EAGAIN;
771 		goto out_release;
772 	}
773 
774 	timeo = sock_sndtimeo(sk, noblock);
775 
776 	/*
777 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
778 	 * so that the trick in dccp_rcv_request_sent_state_process.
779 	 */
780 	/* Wait for a connection to finish. */
781 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
782 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
783 			goto out_release;
784 
785 	size = sk->sk_prot->max_header + len;
786 	release_sock(sk);
787 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
788 	lock_sock(sk);
789 	if (skb == NULL)
790 		goto out_release;
791 
792 	skb_reserve(skb, sk->sk_prot->max_header);
793 	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
794 	if (rc != 0)
795 		goto out_discard;
796 
797 	rc = dccp_msghdr_parse(msg, skb);
798 	if (rc != 0)
799 		goto out_discard;
800 
801 	dccp_qpolicy_push(sk, skb);
802 	/*
803 	 * The xmit_timer is set if the TX CCID is rate-based and will expire
804 	 * when congestion control permits to release further packets into the
805 	 * network. Window-based CCIDs do not use this timer.
806 	 */
807 	if (!timer_pending(&dp->dccps_xmit_timer))
808 		dccp_write_xmit(sk);
809 out_release:
810 	release_sock(sk);
811 	return rc ? : len;
812 out_discard:
813 	kfree_skb(skb);
814 	goto out_release;
815 }
816 
817 EXPORT_SYMBOL_GPL(dccp_sendmsg);
818 
819 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
820 		 int flags, int *addr_len)
821 {
822 	const struct dccp_hdr *dh;
823 	long timeo;
824 
825 	lock_sock(sk);
826 
827 	if (sk->sk_state == DCCP_LISTEN) {
828 		len = -ENOTCONN;
829 		goto out;
830 	}
831 
832 	timeo = sock_rcvtimeo(sk, nonblock);
833 
834 	do {
835 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
836 
837 		if (skb == NULL)
838 			goto verify_sock_status;
839 
840 		dh = dccp_hdr(skb);
841 
842 		switch (dh->dccph_type) {
843 		case DCCP_PKT_DATA:
844 		case DCCP_PKT_DATAACK:
845 			goto found_ok_skb;
846 
847 		case DCCP_PKT_CLOSE:
848 		case DCCP_PKT_CLOSEREQ:
849 			if (!(flags & MSG_PEEK))
850 				dccp_finish_passive_close(sk);
851 			/* fall through */
852 		case DCCP_PKT_RESET:
853 			dccp_pr_debug("found fin (%s) ok!\n",
854 				      dccp_packet_name(dh->dccph_type));
855 			len = 0;
856 			goto found_fin_ok;
857 		default:
858 			dccp_pr_debug("packet_type=%s\n",
859 				      dccp_packet_name(dh->dccph_type));
860 			sk_eat_skb(sk, skb);
861 		}
862 verify_sock_status:
863 		if (sock_flag(sk, SOCK_DONE)) {
864 			len = 0;
865 			break;
866 		}
867 
868 		if (sk->sk_err) {
869 			len = sock_error(sk);
870 			break;
871 		}
872 
873 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
874 			len = 0;
875 			break;
876 		}
877 
878 		if (sk->sk_state == DCCP_CLOSED) {
879 			if (!sock_flag(sk, SOCK_DONE)) {
880 				/* This occurs when user tries to read
881 				 * from never connected socket.
882 				 */
883 				len = -ENOTCONN;
884 				break;
885 			}
886 			len = 0;
887 			break;
888 		}
889 
890 		if (!timeo) {
891 			len = -EAGAIN;
892 			break;
893 		}
894 
895 		if (signal_pending(current)) {
896 			len = sock_intr_errno(timeo);
897 			break;
898 		}
899 
900 		sk_wait_data(sk, &timeo, NULL);
901 		continue;
902 	found_ok_skb:
903 		if (len > skb->len)
904 			len = skb->len;
905 		else if (len < skb->len)
906 			msg->msg_flags |= MSG_TRUNC;
907 
908 		if (skb_copy_datagram_msg(skb, 0, msg, len)) {
909 			/* Exception. Bailout! */
910 			len = -EFAULT;
911 			break;
912 		}
913 		if (flags & MSG_TRUNC)
914 			len = skb->len;
915 	found_fin_ok:
916 		if (!(flags & MSG_PEEK))
917 			sk_eat_skb(sk, skb);
918 		break;
919 	} while (1);
920 out:
921 	release_sock(sk);
922 	return len;
923 }
924 
925 EXPORT_SYMBOL_GPL(dccp_recvmsg);
926 
927 int inet_dccp_listen(struct socket *sock, int backlog)
928 {
929 	struct sock *sk = sock->sk;
930 	unsigned char old_state;
931 	int err;
932 
933 	lock_sock(sk);
934 
935 	err = -EINVAL;
936 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
937 		goto out;
938 
939 	old_state = sk->sk_state;
940 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
941 		goto out;
942 
943 	/* Really, if the socket is already in listen state
944 	 * we can only allow the backlog to be adjusted.
945 	 */
946 	if (old_state != DCCP_LISTEN) {
947 		/*
948 		 * FIXME: here it probably should be sk->sk_prot->listen_start
949 		 * see tcp_listen_start
950 		 */
951 		err = dccp_listen_start(sk, backlog);
952 		if (err)
953 			goto out;
954 	}
955 	sk->sk_max_ack_backlog = backlog;
956 	err = 0;
957 
958 out:
959 	release_sock(sk);
960 	return err;
961 }
962 
963 EXPORT_SYMBOL_GPL(inet_dccp_listen);
964 
965 static void dccp_terminate_connection(struct sock *sk)
966 {
967 	u8 next_state = DCCP_CLOSED;
968 
969 	switch (sk->sk_state) {
970 	case DCCP_PASSIVE_CLOSE:
971 	case DCCP_PASSIVE_CLOSEREQ:
972 		dccp_finish_passive_close(sk);
973 		break;
974 	case DCCP_PARTOPEN:
975 		dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
976 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
977 		/* fall through */
978 	case DCCP_OPEN:
979 		dccp_send_close(sk, 1);
980 
981 		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
982 		    !dccp_sk(sk)->dccps_server_timewait)
983 			next_state = DCCP_ACTIVE_CLOSEREQ;
984 		else
985 			next_state = DCCP_CLOSING;
986 		/* fall through */
987 	default:
988 		dccp_set_state(sk, next_state);
989 	}
990 }
991 
992 void dccp_close(struct sock *sk, long timeout)
993 {
994 	struct dccp_sock *dp = dccp_sk(sk);
995 	struct sk_buff *skb;
996 	u32 data_was_unread = 0;
997 	int state;
998 
999 	lock_sock(sk);
1000 
1001 	sk->sk_shutdown = SHUTDOWN_MASK;
1002 
1003 	if (sk->sk_state == DCCP_LISTEN) {
1004 		dccp_set_state(sk, DCCP_CLOSED);
1005 
1006 		/* Special case. */
1007 		inet_csk_listen_stop(sk);
1008 
1009 		goto adjudge_to_death;
1010 	}
1011 
1012 	sk_stop_timer(sk, &dp->dccps_xmit_timer);
1013 
1014 	/*
1015 	 * We need to flush the recv. buffs.  We do this only on the
1016 	 * descriptor close, not protocol-sourced closes, because the
1017 	  *reader process may not have drained the data yet!
1018 	 */
1019 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1020 		data_was_unread += skb->len;
1021 		__kfree_skb(skb);
1022 	}
1023 
1024 	/* If socket has been already reset kill it. */
1025 	if (sk->sk_state == DCCP_CLOSED)
1026 		goto adjudge_to_death;
1027 
1028 	if (data_was_unread) {
1029 		/* Unread data was tossed, send an appropriate Reset Code */
1030 		DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1031 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1032 		dccp_set_state(sk, DCCP_CLOSED);
1033 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1034 		/* Check zero linger _after_ checking for unread data. */
1035 		sk->sk_prot->disconnect(sk, 0);
1036 	} else if (sk->sk_state != DCCP_CLOSED) {
1037 		/*
1038 		 * Normal connection termination. May need to wait if there are
1039 		 * still packets in the TX queue that are delayed by the CCID.
1040 		 */
1041 		dccp_flush_write_queue(sk, &timeout);
1042 		dccp_terminate_connection(sk);
1043 	}
1044 
1045 	/*
1046 	 * Flush write queue. This may be necessary in several cases:
1047 	 * - we have been closed by the peer but still have application data;
1048 	 * - abortive termination (unread data or zero linger time),
1049 	 * - normal termination but queue could not be flushed within time limit
1050 	 */
1051 	__skb_queue_purge(&sk->sk_write_queue);
1052 
1053 	sk_stream_wait_close(sk, timeout);
1054 
1055 adjudge_to_death:
1056 	state = sk->sk_state;
1057 	sock_hold(sk);
1058 	sock_orphan(sk);
1059 
1060 	/*
1061 	 * It is the last release_sock in its life. It will remove backlog.
1062 	 */
1063 	release_sock(sk);
1064 	/*
1065 	 * Now socket is owned by kernel and we acquire BH lock
1066 	 * to finish close. No need to check for user refs.
1067 	 */
1068 	local_bh_disable();
1069 	bh_lock_sock(sk);
1070 	WARN_ON(sock_owned_by_user(sk));
1071 
1072 	percpu_counter_inc(sk->sk_prot->orphan_count);
1073 
1074 	/* Have we already been destroyed by a softirq or backlog? */
1075 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1076 		goto out;
1077 
1078 	if (sk->sk_state == DCCP_CLOSED)
1079 		inet_csk_destroy_sock(sk);
1080 
1081 	/* Otherwise, socket is reprieved until protocol close. */
1082 
1083 out:
1084 	bh_unlock_sock(sk);
1085 	local_bh_enable();
1086 	sock_put(sk);
1087 }
1088 
1089 EXPORT_SYMBOL_GPL(dccp_close);
1090 
1091 void dccp_shutdown(struct sock *sk, int how)
1092 {
1093 	dccp_pr_debug("called shutdown(%x)\n", how);
1094 }
1095 
1096 EXPORT_SYMBOL_GPL(dccp_shutdown);
1097 
1098 static inline int __init dccp_mib_init(void)
1099 {
1100 	dccp_statistics = alloc_percpu(struct dccp_mib);
1101 	if (!dccp_statistics)
1102 		return -ENOMEM;
1103 	return 0;
1104 }
1105 
1106 static inline void dccp_mib_exit(void)
1107 {
1108 	free_percpu(dccp_statistics);
1109 }
1110 
1111 static int thash_entries;
1112 module_param(thash_entries, int, 0444);
1113 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1114 
1115 #ifdef CONFIG_IP_DCCP_DEBUG
1116 bool dccp_debug;
1117 module_param(dccp_debug, bool, 0644);
1118 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1119 
1120 EXPORT_SYMBOL_GPL(dccp_debug);
1121 #endif
1122 
1123 static int __init dccp_init(void)
1124 {
1125 	unsigned long goal;
1126 	int ehash_order, bhash_order, i;
1127 	int rc;
1128 
1129 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1130 		     FIELD_SIZEOF(struct sk_buff, cb));
1131 	rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1132 	if (rc)
1133 		goto out_fail;
1134 	rc = -ENOBUFS;
1135 	inet_hashinfo_init(&dccp_hashinfo);
1136 	dccp_hashinfo.bind_bucket_cachep =
1137 		kmem_cache_create("dccp_bind_bucket",
1138 				  sizeof(struct inet_bind_bucket), 0,
1139 				  SLAB_HWCACHE_ALIGN, NULL);
1140 	if (!dccp_hashinfo.bind_bucket_cachep)
1141 		goto out_free_percpu;
1142 
1143 	/*
1144 	 * Size and allocate the main established and bind bucket
1145 	 * hash tables.
1146 	 *
1147 	 * The methodology is similar to that of the buffer cache.
1148 	 */
1149 	if (totalram_pages >= (128 * 1024))
1150 		goal = totalram_pages >> (21 - PAGE_SHIFT);
1151 	else
1152 		goal = totalram_pages >> (23 - PAGE_SHIFT);
1153 
1154 	if (thash_entries)
1155 		goal = (thash_entries *
1156 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1157 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1158 		;
1159 	do {
1160 		unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1161 					sizeof(struct inet_ehash_bucket);
1162 
1163 		while (hash_size & (hash_size - 1))
1164 			hash_size--;
1165 		dccp_hashinfo.ehash_mask = hash_size - 1;
1166 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1167 			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1168 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1169 
1170 	if (!dccp_hashinfo.ehash) {
1171 		DCCP_CRIT("Failed to allocate DCCP established hash table");
1172 		goto out_free_bind_bucket_cachep;
1173 	}
1174 
1175 	for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1176 		INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1177 
1178 	if (inet_ehash_locks_alloc(&dccp_hashinfo))
1179 			goto out_free_dccp_ehash;
1180 
1181 	bhash_order = ehash_order;
1182 
1183 	do {
1184 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1185 					sizeof(struct inet_bind_hashbucket);
1186 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1187 		    bhash_order > 0)
1188 			continue;
1189 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1190 			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1191 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1192 
1193 	if (!dccp_hashinfo.bhash) {
1194 		DCCP_CRIT("Failed to allocate DCCP bind hash table");
1195 		goto out_free_dccp_locks;
1196 	}
1197 
1198 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1199 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1200 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1201 	}
1202 
1203 	rc = dccp_mib_init();
1204 	if (rc)
1205 		goto out_free_dccp_bhash;
1206 
1207 	rc = dccp_ackvec_init();
1208 	if (rc)
1209 		goto out_free_dccp_mib;
1210 
1211 	rc = dccp_sysctl_init();
1212 	if (rc)
1213 		goto out_ackvec_exit;
1214 
1215 	rc = ccid_initialize_builtins();
1216 	if (rc)
1217 		goto out_sysctl_exit;
1218 
1219 	dccp_timestamping_init();
1220 
1221 	return 0;
1222 
1223 out_sysctl_exit:
1224 	dccp_sysctl_exit();
1225 out_ackvec_exit:
1226 	dccp_ackvec_exit();
1227 out_free_dccp_mib:
1228 	dccp_mib_exit();
1229 out_free_dccp_bhash:
1230 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1231 out_free_dccp_locks:
1232 	inet_ehash_locks_free(&dccp_hashinfo);
1233 out_free_dccp_ehash:
1234 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1235 out_free_bind_bucket_cachep:
1236 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1237 out_free_percpu:
1238 	percpu_counter_destroy(&dccp_orphan_count);
1239 out_fail:
1240 	dccp_hashinfo.bhash = NULL;
1241 	dccp_hashinfo.ehash = NULL;
1242 	dccp_hashinfo.bind_bucket_cachep = NULL;
1243 	return rc;
1244 }
1245 
1246 static void __exit dccp_fini(void)
1247 {
1248 	ccid_cleanup_builtins();
1249 	dccp_mib_exit();
1250 	free_pages((unsigned long)dccp_hashinfo.bhash,
1251 		   get_order(dccp_hashinfo.bhash_size *
1252 			     sizeof(struct inet_bind_hashbucket)));
1253 	free_pages((unsigned long)dccp_hashinfo.ehash,
1254 		   get_order((dccp_hashinfo.ehash_mask + 1) *
1255 			     sizeof(struct inet_ehash_bucket)));
1256 	inet_ehash_locks_free(&dccp_hashinfo);
1257 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1258 	dccp_ackvec_exit();
1259 	dccp_sysctl_exit();
1260 	percpu_counter_destroy(&dccp_orphan_count);
1261 }
1262 
1263 module_init(dccp_init);
1264 module_exit(dccp_fini);
1265 
1266 MODULE_LICENSE("GPL");
1267 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1268 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1269