xref: /openbmc/linux/net/dccp/proto.c (revision 6d04fe15)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  net/dccp/proto.c
4  *
5  *  An implementation of the DCCP protocol
6  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7  */
8 
9 #include <linux/dccp.h>
10 #include <linux/module.h>
11 #include <linux/types.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/if_arp.h>
18 #include <linux/init.h>
19 #include <linux/random.h>
20 #include <linux/slab.h>
21 #include <net/checksum.h>
22 
23 #include <net/inet_sock.h>
24 #include <net/inet_common.h>
25 #include <net/sock.h>
26 #include <net/xfrm.h>
27 
28 #include <asm/ioctls.h>
29 #include <linux/spinlock.h>
30 #include <linux/timer.h>
31 #include <linux/delay.h>
32 #include <linux/poll.h>
33 
34 #include "ccid.h"
35 #include "dccp.h"
36 #include "feat.h"
37 
38 #define CREATE_TRACE_POINTS
39 #include "trace.h"
40 
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42 
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44 
45 struct percpu_counter dccp_orphan_count;
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47 
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50 
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53 
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57 	static const char *const dccp_state_names[] = {
58 	[DCCP_OPEN]		= "OPEN",
59 	[DCCP_REQUESTING]	= "REQUESTING",
60 	[DCCP_PARTOPEN]		= "PARTOPEN",
61 	[DCCP_LISTEN]		= "LISTEN",
62 	[DCCP_RESPOND]		= "RESPOND",
63 	[DCCP_CLOSING]		= "CLOSING",
64 	[DCCP_ACTIVE_CLOSEREQ]	= "CLOSEREQ",
65 	[DCCP_PASSIVE_CLOSE]	= "PASSIVE_CLOSE",
66 	[DCCP_PASSIVE_CLOSEREQ]	= "PASSIVE_CLOSEREQ",
67 	[DCCP_TIME_WAIT]	= "TIME_WAIT",
68 	[DCCP_CLOSED]		= "CLOSED",
69 	};
70 
71 	if (state >= DCCP_MAX_STATES)
72 		return "INVALID STATE!";
73 	else
74 		return dccp_state_names[state];
75 }
76 #endif
77 
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80 	const int oldstate = sk->sk_state;
81 
82 	dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
83 		      dccp_state_name(oldstate), dccp_state_name(state));
84 	WARN_ON(state == oldstate);
85 
86 	switch (state) {
87 	case DCCP_OPEN:
88 		if (oldstate != DCCP_OPEN)
89 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90 		/* Client retransmits all Confirm options until entering OPEN */
91 		if (oldstate == DCCP_PARTOPEN)
92 			dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93 		break;
94 
95 	case DCCP_CLOSED:
96 		if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97 		    oldstate == DCCP_CLOSING)
98 			DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99 
100 		sk->sk_prot->unhash(sk);
101 		if (inet_csk(sk)->icsk_bind_hash != NULL &&
102 		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103 			inet_put_port(sk);
104 		/* fall through */
105 	default:
106 		if (oldstate == DCCP_OPEN)
107 			DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108 	}
109 
110 	/* Change state AFTER socket is unhashed to avoid closed
111 	 * socket sitting in hash tables.
112 	 */
113 	inet_sk_set_state(sk, state);
114 }
115 
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117 
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120 	switch (sk->sk_state) {
121 	case DCCP_PASSIVE_CLOSE:
122 		/* Node (client or server) has received Close packet. */
123 		dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124 		dccp_set_state(sk, DCCP_CLOSED);
125 		break;
126 	case DCCP_PASSIVE_CLOSEREQ:
127 		/*
128 		 * Client received CloseReq. We set the `active' flag so that
129 		 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130 		 */
131 		dccp_send_close(sk, 1);
132 		dccp_set_state(sk, DCCP_CLOSING);
133 	}
134 }
135 
136 void dccp_done(struct sock *sk)
137 {
138 	dccp_set_state(sk, DCCP_CLOSED);
139 	dccp_clear_xmit_timers(sk);
140 
141 	sk->sk_shutdown = SHUTDOWN_MASK;
142 
143 	if (!sock_flag(sk, SOCK_DEAD))
144 		sk->sk_state_change(sk);
145 	else
146 		inet_csk_destroy_sock(sk);
147 }
148 
149 EXPORT_SYMBOL_GPL(dccp_done);
150 
151 const char *dccp_packet_name(const int type)
152 {
153 	static const char *const dccp_packet_names[] = {
154 		[DCCP_PKT_REQUEST]  = "REQUEST",
155 		[DCCP_PKT_RESPONSE] = "RESPONSE",
156 		[DCCP_PKT_DATA]	    = "DATA",
157 		[DCCP_PKT_ACK]	    = "ACK",
158 		[DCCP_PKT_DATAACK]  = "DATAACK",
159 		[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160 		[DCCP_PKT_CLOSE]    = "CLOSE",
161 		[DCCP_PKT_RESET]    = "RESET",
162 		[DCCP_PKT_SYNC]	    = "SYNC",
163 		[DCCP_PKT_SYNCACK]  = "SYNCACK",
164 	};
165 
166 	if (type >= DCCP_NR_PKT_TYPES)
167 		return "INVALID";
168 	else
169 		return dccp_packet_names[type];
170 }
171 
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173 
174 static void dccp_sk_destruct(struct sock *sk)
175 {
176 	struct dccp_sock *dp = dccp_sk(sk);
177 
178 	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179 	dp->dccps_hc_tx_ccid = NULL;
180 	inet_sock_destruct(sk);
181 }
182 
183 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
184 {
185 	struct dccp_sock *dp = dccp_sk(sk);
186 	struct inet_connection_sock *icsk = inet_csk(sk);
187 
188 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
189 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
190 	sk->sk_state		= DCCP_CLOSED;
191 	sk->sk_write_space	= dccp_write_space;
192 	sk->sk_destruct		= dccp_sk_destruct;
193 	icsk->icsk_sync_mss	= dccp_sync_mss;
194 	dp->dccps_mss_cache	= 536;
195 	dp->dccps_rate_last	= jiffies;
196 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
197 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
198 	dp->dccps_tx_qlen	= sysctl_dccp_tx_qlen;
199 
200 	dccp_init_xmit_timers(sk);
201 
202 	INIT_LIST_HEAD(&dp->dccps_featneg);
203 	/* control socket doesn't need feat nego */
204 	if (likely(ctl_sock_initialized))
205 		return dccp_feat_init(sk);
206 	return 0;
207 }
208 
209 EXPORT_SYMBOL_GPL(dccp_init_sock);
210 
211 void dccp_destroy_sock(struct sock *sk)
212 {
213 	struct dccp_sock *dp = dccp_sk(sk);
214 
215 	__skb_queue_purge(&sk->sk_write_queue);
216 	if (sk->sk_send_head != NULL) {
217 		kfree_skb(sk->sk_send_head);
218 		sk->sk_send_head = NULL;
219 	}
220 
221 	/* Clean up a referenced DCCP bind bucket. */
222 	if (inet_csk(sk)->icsk_bind_hash != NULL)
223 		inet_put_port(sk);
224 
225 	kfree(dp->dccps_service_list);
226 	dp->dccps_service_list = NULL;
227 
228 	if (dp->dccps_hc_rx_ackvec != NULL) {
229 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
230 		dp->dccps_hc_rx_ackvec = NULL;
231 	}
232 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
233 	dp->dccps_hc_rx_ccid = NULL;
234 
235 	/* clean up feature negotiation state */
236 	dccp_feat_list_purge(&dp->dccps_featneg);
237 }
238 
239 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
240 
241 static inline int dccp_listen_start(struct sock *sk, int backlog)
242 {
243 	struct dccp_sock *dp = dccp_sk(sk);
244 
245 	dp->dccps_role = DCCP_ROLE_LISTEN;
246 	/* do not start to listen if feature negotiation setup fails */
247 	if (dccp_feat_finalise_settings(dp))
248 		return -EPROTO;
249 	return inet_csk_listen_start(sk, backlog);
250 }
251 
252 static inline int dccp_need_reset(int state)
253 {
254 	return state != DCCP_CLOSED && state != DCCP_LISTEN &&
255 	       state != DCCP_REQUESTING;
256 }
257 
258 int dccp_disconnect(struct sock *sk, int flags)
259 {
260 	struct inet_connection_sock *icsk = inet_csk(sk);
261 	struct inet_sock *inet = inet_sk(sk);
262 	struct dccp_sock *dp = dccp_sk(sk);
263 	const int old_state = sk->sk_state;
264 
265 	if (old_state != DCCP_CLOSED)
266 		dccp_set_state(sk, DCCP_CLOSED);
267 
268 	/*
269 	 * This corresponds to the ABORT function of RFC793, sec. 3.8
270 	 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
271 	 */
272 	if (old_state == DCCP_LISTEN) {
273 		inet_csk_listen_stop(sk);
274 	} else if (dccp_need_reset(old_state)) {
275 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
276 		sk->sk_err = ECONNRESET;
277 	} else if (old_state == DCCP_REQUESTING)
278 		sk->sk_err = ECONNRESET;
279 
280 	dccp_clear_xmit_timers(sk);
281 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
282 	dp->dccps_hc_rx_ccid = NULL;
283 
284 	__skb_queue_purge(&sk->sk_receive_queue);
285 	__skb_queue_purge(&sk->sk_write_queue);
286 	if (sk->sk_send_head != NULL) {
287 		__kfree_skb(sk->sk_send_head);
288 		sk->sk_send_head = NULL;
289 	}
290 
291 	inet->inet_dport = 0;
292 
293 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
294 		inet_reset_saddr(sk);
295 
296 	sk->sk_shutdown = 0;
297 	sock_reset_flag(sk, SOCK_DONE);
298 
299 	icsk->icsk_backoff = 0;
300 	inet_csk_delack_init(sk);
301 	__sk_dst_reset(sk);
302 
303 	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
304 
305 	sk->sk_error_report(sk);
306 	return 0;
307 }
308 
309 EXPORT_SYMBOL_GPL(dccp_disconnect);
310 
311 /*
312  *	Wait for a DCCP event.
313  *
314  *	Note that we don't need to lock the socket, as the upper poll layers
315  *	take care of normal races (between the test and the event) and we don't
316  *	go look at any of the socket buffers directly.
317  */
318 __poll_t dccp_poll(struct file *file, struct socket *sock,
319 		       poll_table *wait)
320 {
321 	__poll_t mask;
322 	struct sock *sk = sock->sk;
323 
324 	sock_poll_wait(file, sock, wait);
325 	if (sk->sk_state == DCCP_LISTEN)
326 		return inet_csk_listen_poll(sk);
327 
328 	/* Socket is not locked. We are protected from async events
329 	   by poll logic and correct handling of state changes
330 	   made by another threads is impossible in any case.
331 	 */
332 
333 	mask = 0;
334 	if (sk->sk_err)
335 		mask = EPOLLERR;
336 
337 	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
338 		mask |= EPOLLHUP;
339 	if (sk->sk_shutdown & RCV_SHUTDOWN)
340 		mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
341 
342 	/* Connected? */
343 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
344 		if (atomic_read(&sk->sk_rmem_alloc) > 0)
345 			mask |= EPOLLIN | EPOLLRDNORM;
346 
347 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
348 			if (sk_stream_is_writeable(sk)) {
349 				mask |= EPOLLOUT | EPOLLWRNORM;
350 			} else {  /* send SIGIO later */
351 				sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
352 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
353 
354 				/* Race breaker. If space is freed after
355 				 * wspace test but before the flags are set,
356 				 * IO signal will be lost.
357 				 */
358 				if (sk_stream_is_writeable(sk))
359 					mask |= EPOLLOUT | EPOLLWRNORM;
360 			}
361 		}
362 	}
363 	return mask;
364 }
365 
366 EXPORT_SYMBOL_GPL(dccp_poll);
367 
368 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
369 {
370 	int rc = -ENOTCONN;
371 
372 	lock_sock(sk);
373 
374 	if (sk->sk_state == DCCP_LISTEN)
375 		goto out;
376 
377 	switch (cmd) {
378 	case SIOCOUTQ: {
379 		int amount = sk_wmem_alloc_get(sk);
380 		/* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
381 		 * always 0, comparably to UDP.
382 		 */
383 
384 		rc = put_user(amount, (int __user *)arg);
385 	}
386 		break;
387 	case SIOCINQ: {
388 		struct sk_buff *skb;
389 		unsigned long amount = 0;
390 
391 		skb = skb_peek(&sk->sk_receive_queue);
392 		if (skb != NULL) {
393 			/*
394 			 * We will only return the amount of this packet since
395 			 * that is all that will be read.
396 			 */
397 			amount = skb->len;
398 		}
399 		rc = put_user(amount, (int __user *)arg);
400 	}
401 		break;
402 	default:
403 		rc = -ENOIOCTLCMD;
404 		break;
405 	}
406 out:
407 	release_sock(sk);
408 	return rc;
409 }
410 
411 EXPORT_SYMBOL_GPL(dccp_ioctl);
412 
413 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
414 				   sockptr_t optval, unsigned int optlen)
415 {
416 	struct dccp_sock *dp = dccp_sk(sk);
417 	struct dccp_service_list *sl = NULL;
418 
419 	if (service == DCCP_SERVICE_INVALID_VALUE ||
420 	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
421 		return -EINVAL;
422 
423 	if (optlen > sizeof(service)) {
424 		sl = kmalloc(optlen, GFP_KERNEL);
425 		if (sl == NULL)
426 			return -ENOMEM;
427 
428 		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
429 		sockptr_advance(optval, sizeof(service));
430 		if (copy_from_sockptr(sl->dccpsl_list, optval,
431 				      optlen - sizeof(service)) ||
432 		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
433 			kfree(sl);
434 			return -EFAULT;
435 		}
436 	}
437 
438 	lock_sock(sk);
439 	dp->dccps_service = service;
440 
441 	kfree(dp->dccps_service_list);
442 
443 	dp->dccps_service_list = sl;
444 	release_sock(sk);
445 	return 0;
446 }
447 
448 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
449 {
450 	u8 *list, len;
451 	int i, rc;
452 
453 	if (cscov < 0 || cscov > 15)
454 		return -EINVAL;
455 	/*
456 	 * Populate a list of permissible values, in the range cscov...15. This
457 	 * is necessary since feature negotiation of single values only works if
458 	 * both sides incidentally choose the same value. Since the list starts
459 	 * lowest-value first, negotiation will pick the smallest shared value.
460 	 */
461 	if (cscov == 0)
462 		return 0;
463 	len = 16 - cscov;
464 
465 	list = kmalloc(len, GFP_KERNEL);
466 	if (list == NULL)
467 		return -ENOBUFS;
468 
469 	for (i = 0; i < len; i++)
470 		list[i] = cscov++;
471 
472 	rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
473 
474 	if (rc == 0) {
475 		if (rx)
476 			dccp_sk(sk)->dccps_pcrlen = cscov;
477 		else
478 			dccp_sk(sk)->dccps_pcslen = cscov;
479 	}
480 	kfree(list);
481 	return rc;
482 }
483 
484 static int dccp_setsockopt_ccid(struct sock *sk, int type,
485 				sockptr_t optval, unsigned int optlen)
486 {
487 	u8 *val;
488 	int rc = 0;
489 
490 	if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
491 		return -EINVAL;
492 
493 	val = memdup_sockptr(optval, optlen);
494 	if (IS_ERR(val))
495 		return PTR_ERR(val);
496 
497 	lock_sock(sk);
498 	if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
499 		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
500 
501 	if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
502 		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
503 	release_sock(sk);
504 
505 	kfree(val);
506 	return rc;
507 }
508 
509 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
510 		sockptr_t optval, unsigned int optlen)
511 {
512 	struct dccp_sock *dp = dccp_sk(sk);
513 	int val, err = 0;
514 
515 	switch (optname) {
516 	case DCCP_SOCKOPT_PACKET_SIZE:
517 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
518 		return 0;
519 	case DCCP_SOCKOPT_CHANGE_L:
520 	case DCCP_SOCKOPT_CHANGE_R:
521 		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
522 		return 0;
523 	case DCCP_SOCKOPT_CCID:
524 	case DCCP_SOCKOPT_RX_CCID:
525 	case DCCP_SOCKOPT_TX_CCID:
526 		return dccp_setsockopt_ccid(sk, optname, optval, optlen);
527 	}
528 
529 	if (optlen < (int)sizeof(int))
530 		return -EINVAL;
531 
532 	if (copy_from_sockptr(&val, optval, sizeof(int)))
533 		return -EFAULT;
534 
535 	if (optname == DCCP_SOCKOPT_SERVICE)
536 		return dccp_setsockopt_service(sk, val, optval, optlen);
537 
538 	lock_sock(sk);
539 	switch (optname) {
540 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
541 		if (dp->dccps_role != DCCP_ROLE_SERVER)
542 			err = -EOPNOTSUPP;
543 		else
544 			dp->dccps_server_timewait = (val != 0);
545 		break;
546 	case DCCP_SOCKOPT_SEND_CSCOV:
547 		err = dccp_setsockopt_cscov(sk, val, false);
548 		break;
549 	case DCCP_SOCKOPT_RECV_CSCOV:
550 		err = dccp_setsockopt_cscov(sk, val, true);
551 		break;
552 	case DCCP_SOCKOPT_QPOLICY_ID:
553 		if (sk->sk_state != DCCP_CLOSED)
554 			err = -EISCONN;
555 		else if (val < 0 || val >= DCCPQ_POLICY_MAX)
556 			err = -EINVAL;
557 		else
558 			dp->dccps_qpolicy = val;
559 		break;
560 	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
561 		if (val < 0)
562 			err = -EINVAL;
563 		else
564 			dp->dccps_tx_qlen = val;
565 		break;
566 	default:
567 		err = -ENOPROTOOPT;
568 		break;
569 	}
570 	release_sock(sk);
571 
572 	return err;
573 }
574 
575 int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
576 		    unsigned int optlen)
577 {
578 	if (level != SOL_DCCP)
579 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
580 							     optname, optval,
581 							     optlen);
582 	return do_dccp_setsockopt(sk, level, optname, optval, optlen);
583 }
584 
585 EXPORT_SYMBOL_GPL(dccp_setsockopt);
586 
587 static int dccp_getsockopt_service(struct sock *sk, int len,
588 				   __be32 __user *optval,
589 				   int __user *optlen)
590 {
591 	const struct dccp_sock *dp = dccp_sk(sk);
592 	const struct dccp_service_list *sl;
593 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
594 
595 	lock_sock(sk);
596 	if ((sl = dp->dccps_service_list) != NULL) {
597 		slen = sl->dccpsl_nr * sizeof(u32);
598 		total_len += slen;
599 	}
600 
601 	err = -EINVAL;
602 	if (total_len > len)
603 		goto out;
604 
605 	err = 0;
606 	if (put_user(total_len, optlen) ||
607 	    put_user(dp->dccps_service, optval) ||
608 	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
609 		err = -EFAULT;
610 out:
611 	release_sock(sk);
612 	return err;
613 }
614 
615 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
616 		    char __user *optval, int __user *optlen)
617 {
618 	struct dccp_sock *dp;
619 	int val, len;
620 
621 	if (get_user(len, optlen))
622 		return -EFAULT;
623 
624 	if (len < (int)sizeof(int))
625 		return -EINVAL;
626 
627 	dp = dccp_sk(sk);
628 
629 	switch (optname) {
630 	case DCCP_SOCKOPT_PACKET_SIZE:
631 		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
632 		return 0;
633 	case DCCP_SOCKOPT_SERVICE:
634 		return dccp_getsockopt_service(sk, len,
635 					       (__be32 __user *)optval, optlen);
636 	case DCCP_SOCKOPT_GET_CUR_MPS:
637 		val = dp->dccps_mss_cache;
638 		break;
639 	case DCCP_SOCKOPT_AVAILABLE_CCIDS:
640 		return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
641 	case DCCP_SOCKOPT_TX_CCID:
642 		val = ccid_get_current_tx_ccid(dp);
643 		if (val < 0)
644 			return -ENOPROTOOPT;
645 		break;
646 	case DCCP_SOCKOPT_RX_CCID:
647 		val = ccid_get_current_rx_ccid(dp);
648 		if (val < 0)
649 			return -ENOPROTOOPT;
650 		break;
651 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
652 		val = dp->dccps_server_timewait;
653 		break;
654 	case DCCP_SOCKOPT_SEND_CSCOV:
655 		val = dp->dccps_pcslen;
656 		break;
657 	case DCCP_SOCKOPT_RECV_CSCOV:
658 		val = dp->dccps_pcrlen;
659 		break;
660 	case DCCP_SOCKOPT_QPOLICY_ID:
661 		val = dp->dccps_qpolicy;
662 		break;
663 	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
664 		val = dp->dccps_tx_qlen;
665 		break;
666 	case 128 ... 191:
667 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
668 					     len, (u32 __user *)optval, optlen);
669 	case 192 ... 255:
670 		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
671 					     len, (u32 __user *)optval, optlen);
672 	default:
673 		return -ENOPROTOOPT;
674 	}
675 
676 	len = sizeof(val);
677 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
678 		return -EFAULT;
679 
680 	return 0;
681 }
682 
683 int dccp_getsockopt(struct sock *sk, int level, int optname,
684 		    char __user *optval, int __user *optlen)
685 {
686 	if (level != SOL_DCCP)
687 		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
688 							     optname, optval,
689 							     optlen);
690 	return do_dccp_getsockopt(sk, level, optname, optval, optlen);
691 }
692 
693 EXPORT_SYMBOL_GPL(dccp_getsockopt);
694 
695 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
696 {
697 	struct cmsghdr *cmsg;
698 
699 	/*
700 	 * Assign an (opaque) qpolicy priority value to skb->priority.
701 	 *
702 	 * We are overloading this skb field for use with the qpolicy subystem.
703 	 * The skb->priority is normally used for the SO_PRIORITY option, which
704 	 * is initialised from sk_priority. Since the assignment of sk_priority
705 	 * to skb->priority happens later (on layer 3), we overload this field
706 	 * for use with queueing priorities as long as the skb is on layer 4.
707 	 * The default priority value (if nothing is set) is 0.
708 	 */
709 	skb->priority = 0;
710 
711 	for_each_cmsghdr(cmsg, msg) {
712 		if (!CMSG_OK(msg, cmsg))
713 			return -EINVAL;
714 
715 		if (cmsg->cmsg_level != SOL_DCCP)
716 			continue;
717 
718 		if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
719 		    !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
720 			return -EINVAL;
721 
722 		switch (cmsg->cmsg_type) {
723 		case DCCP_SCM_PRIORITY:
724 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
725 				return -EINVAL;
726 			skb->priority = *(__u32 *)CMSG_DATA(cmsg);
727 			break;
728 		default:
729 			return -EINVAL;
730 		}
731 	}
732 	return 0;
733 }
734 
735 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
736 {
737 	const struct dccp_sock *dp = dccp_sk(sk);
738 	const int flags = msg->msg_flags;
739 	const int noblock = flags & MSG_DONTWAIT;
740 	struct sk_buff *skb;
741 	int rc, size;
742 	long timeo;
743 
744 	trace_dccp_probe(sk, len);
745 
746 	if (len > dp->dccps_mss_cache)
747 		return -EMSGSIZE;
748 
749 	lock_sock(sk);
750 
751 	if (dccp_qpolicy_full(sk)) {
752 		rc = -EAGAIN;
753 		goto out_release;
754 	}
755 
756 	timeo = sock_sndtimeo(sk, noblock);
757 
758 	/*
759 	 * We have to use sk_stream_wait_connect here to set sk_write_pending,
760 	 * so that the trick in dccp_rcv_request_sent_state_process.
761 	 */
762 	/* Wait for a connection to finish. */
763 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
764 		if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
765 			goto out_release;
766 
767 	size = sk->sk_prot->max_header + len;
768 	release_sock(sk);
769 	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
770 	lock_sock(sk);
771 	if (skb == NULL)
772 		goto out_release;
773 
774 	if (sk->sk_state == DCCP_CLOSED) {
775 		rc = -ENOTCONN;
776 		goto out_discard;
777 	}
778 
779 	skb_reserve(skb, sk->sk_prot->max_header);
780 	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
781 	if (rc != 0)
782 		goto out_discard;
783 
784 	rc = dccp_msghdr_parse(msg, skb);
785 	if (rc != 0)
786 		goto out_discard;
787 
788 	dccp_qpolicy_push(sk, skb);
789 	/*
790 	 * The xmit_timer is set if the TX CCID is rate-based and will expire
791 	 * when congestion control permits to release further packets into the
792 	 * network. Window-based CCIDs do not use this timer.
793 	 */
794 	if (!timer_pending(&dp->dccps_xmit_timer))
795 		dccp_write_xmit(sk);
796 out_release:
797 	release_sock(sk);
798 	return rc ? : len;
799 out_discard:
800 	kfree_skb(skb);
801 	goto out_release;
802 }
803 
804 EXPORT_SYMBOL_GPL(dccp_sendmsg);
805 
806 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
807 		 int flags, int *addr_len)
808 {
809 	const struct dccp_hdr *dh;
810 	long timeo;
811 
812 	lock_sock(sk);
813 
814 	if (sk->sk_state == DCCP_LISTEN) {
815 		len = -ENOTCONN;
816 		goto out;
817 	}
818 
819 	timeo = sock_rcvtimeo(sk, nonblock);
820 
821 	do {
822 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
823 
824 		if (skb == NULL)
825 			goto verify_sock_status;
826 
827 		dh = dccp_hdr(skb);
828 
829 		switch (dh->dccph_type) {
830 		case DCCP_PKT_DATA:
831 		case DCCP_PKT_DATAACK:
832 			goto found_ok_skb;
833 
834 		case DCCP_PKT_CLOSE:
835 		case DCCP_PKT_CLOSEREQ:
836 			if (!(flags & MSG_PEEK))
837 				dccp_finish_passive_close(sk);
838 			/* fall through */
839 		case DCCP_PKT_RESET:
840 			dccp_pr_debug("found fin (%s) ok!\n",
841 				      dccp_packet_name(dh->dccph_type));
842 			len = 0;
843 			goto found_fin_ok;
844 		default:
845 			dccp_pr_debug("packet_type=%s\n",
846 				      dccp_packet_name(dh->dccph_type));
847 			sk_eat_skb(sk, skb);
848 		}
849 verify_sock_status:
850 		if (sock_flag(sk, SOCK_DONE)) {
851 			len = 0;
852 			break;
853 		}
854 
855 		if (sk->sk_err) {
856 			len = sock_error(sk);
857 			break;
858 		}
859 
860 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
861 			len = 0;
862 			break;
863 		}
864 
865 		if (sk->sk_state == DCCP_CLOSED) {
866 			if (!sock_flag(sk, SOCK_DONE)) {
867 				/* This occurs when user tries to read
868 				 * from never connected socket.
869 				 */
870 				len = -ENOTCONN;
871 				break;
872 			}
873 			len = 0;
874 			break;
875 		}
876 
877 		if (!timeo) {
878 			len = -EAGAIN;
879 			break;
880 		}
881 
882 		if (signal_pending(current)) {
883 			len = sock_intr_errno(timeo);
884 			break;
885 		}
886 
887 		sk_wait_data(sk, &timeo, NULL);
888 		continue;
889 	found_ok_skb:
890 		if (len > skb->len)
891 			len = skb->len;
892 		else if (len < skb->len)
893 			msg->msg_flags |= MSG_TRUNC;
894 
895 		if (skb_copy_datagram_msg(skb, 0, msg, len)) {
896 			/* Exception. Bailout! */
897 			len = -EFAULT;
898 			break;
899 		}
900 		if (flags & MSG_TRUNC)
901 			len = skb->len;
902 	found_fin_ok:
903 		if (!(flags & MSG_PEEK))
904 			sk_eat_skb(sk, skb);
905 		break;
906 	} while (1);
907 out:
908 	release_sock(sk);
909 	return len;
910 }
911 
912 EXPORT_SYMBOL_GPL(dccp_recvmsg);
913 
914 int inet_dccp_listen(struct socket *sock, int backlog)
915 {
916 	struct sock *sk = sock->sk;
917 	unsigned char old_state;
918 	int err;
919 
920 	lock_sock(sk);
921 
922 	err = -EINVAL;
923 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
924 		goto out;
925 
926 	old_state = sk->sk_state;
927 	if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
928 		goto out;
929 
930 	WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
931 	/* Really, if the socket is already in listen state
932 	 * we can only allow the backlog to be adjusted.
933 	 */
934 	if (old_state != DCCP_LISTEN) {
935 		/*
936 		 * FIXME: here it probably should be sk->sk_prot->listen_start
937 		 * see tcp_listen_start
938 		 */
939 		err = dccp_listen_start(sk, backlog);
940 		if (err)
941 			goto out;
942 	}
943 	err = 0;
944 
945 out:
946 	release_sock(sk);
947 	return err;
948 }
949 
950 EXPORT_SYMBOL_GPL(inet_dccp_listen);
951 
952 static void dccp_terminate_connection(struct sock *sk)
953 {
954 	u8 next_state = DCCP_CLOSED;
955 
956 	switch (sk->sk_state) {
957 	case DCCP_PASSIVE_CLOSE:
958 	case DCCP_PASSIVE_CLOSEREQ:
959 		dccp_finish_passive_close(sk);
960 		break;
961 	case DCCP_PARTOPEN:
962 		dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
963 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
964 		/* fall through */
965 	case DCCP_OPEN:
966 		dccp_send_close(sk, 1);
967 
968 		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
969 		    !dccp_sk(sk)->dccps_server_timewait)
970 			next_state = DCCP_ACTIVE_CLOSEREQ;
971 		else
972 			next_state = DCCP_CLOSING;
973 		/* fall through */
974 	default:
975 		dccp_set_state(sk, next_state);
976 	}
977 }
978 
979 void dccp_close(struct sock *sk, long timeout)
980 {
981 	struct dccp_sock *dp = dccp_sk(sk);
982 	struct sk_buff *skb;
983 	u32 data_was_unread = 0;
984 	int state;
985 
986 	lock_sock(sk);
987 
988 	sk->sk_shutdown = SHUTDOWN_MASK;
989 
990 	if (sk->sk_state == DCCP_LISTEN) {
991 		dccp_set_state(sk, DCCP_CLOSED);
992 
993 		/* Special case. */
994 		inet_csk_listen_stop(sk);
995 
996 		goto adjudge_to_death;
997 	}
998 
999 	sk_stop_timer(sk, &dp->dccps_xmit_timer);
1000 
1001 	/*
1002 	 * We need to flush the recv. buffs.  We do this only on the
1003 	 * descriptor close, not protocol-sourced closes, because the
1004 	  *reader process may not have drained the data yet!
1005 	 */
1006 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1007 		data_was_unread += skb->len;
1008 		__kfree_skb(skb);
1009 	}
1010 
1011 	/* If socket has been already reset kill it. */
1012 	if (sk->sk_state == DCCP_CLOSED)
1013 		goto adjudge_to_death;
1014 
1015 	if (data_was_unread) {
1016 		/* Unread data was tossed, send an appropriate Reset Code */
1017 		DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1018 		dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1019 		dccp_set_state(sk, DCCP_CLOSED);
1020 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1021 		/* Check zero linger _after_ checking for unread data. */
1022 		sk->sk_prot->disconnect(sk, 0);
1023 	} else if (sk->sk_state != DCCP_CLOSED) {
1024 		/*
1025 		 * Normal connection termination. May need to wait if there are
1026 		 * still packets in the TX queue that are delayed by the CCID.
1027 		 */
1028 		dccp_flush_write_queue(sk, &timeout);
1029 		dccp_terminate_connection(sk);
1030 	}
1031 
1032 	/*
1033 	 * Flush write queue. This may be necessary in several cases:
1034 	 * - we have been closed by the peer but still have application data;
1035 	 * - abortive termination (unread data or zero linger time),
1036 	 * - normal termination but queue could not be flushed within time limit
1037 	 */
1038 	__skb_queue_purge(&sk->sk_write_queue);
1039 
1040 	sk_stream_wait_close(sk, timeout);
1041 
1042 adjudge_to_death:
1043 	state = sk->sk_state;
1044 	sock_hold(sk);
1045 	sock_orphan(sk);
1046 
1047 	/*
1048 	 * It is the last release_sock in its life. It will remove backlog.
1049 	 */
1050 	release_sock(sk);
1051 	/*
1052 	 * Now socket is owned by kernel and we acquire BH lock
1053 	 * to finish close. No need to check for user refs.
1054 	 */
1055 	local_bh_disable();
1056 	bh_lock_sock(sk);
1057 	WARN_ON(sock_owned_by_user(sk));
1058 
1059 	percpu_counter_inc(sk->sk_prot->orphan_count);
1060 
1061 	/* Have we already been destroyed by a softirq or backlog? */
1062 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1063 		goto out;
1064 
1065 	if (sk->sk_state == DCCP_CLOSED)
1066 		inet_csk_destroy_sock(sk);
1067 
1068 	/* Otherwise, socket is reprieved until protocol close. */
1069 
1070 out:
1071 	bh_unlock_sock(sk);
1072 	local_bh_enable();
1073 	sock_put(sk);
1074 }
1075 
1076 EXPORT_SYMBOL_GPL(dccp_close);
1077 
1078 void dccp_shutdown(struct sock *sk, int how)
1079 {
1080 	dccp_pr_debug("called shutdown(%x)\n", how);
1081 }
1082 
1083 EXPORT_SYMBOL_GPL(dccp_shutdown);
1084 
1085 static inline int __init dccp_mib_init(void)
1086 {
1087 	dccp_statistics = alloc_percpu(struct dccp_mib);
1088 	if (!dccp_statistics)
1089 		return -ENOMEM;
1090 	return 0;
1091 }
1092 
1093 static inline void dccp_mib_exit(void)
1094 {
1095 	free_percpu(dccp_statistics);
1096 }
1097 
1098 static int thash_entries;
1099 module_param(thash_entries, int, 0444);
1100 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1101 
1102 #ifdef CONFIG_IP_DCCP_DEBUG
1103 bool dccp_debug;
1104 module_param(dccp_debug, bool, 0644);
1105 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1106 
1107 EXPORT_SYMBOL_GPL(dccp_debug);
1108 #endif
1109 
1110 static int __init dccp_init(void)
1111 {
1112 	unsigned long goal;
1113 	unsigned long nr_pages = totalram_pages();
1114 	int ehash_order, bhash_order, i;
1115 	int rc;
1116 
1117 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1118 		     sizeof_field(struct sk_buff, cb));
1119 	rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1120 	if (rc)
1121 		goto out_fail;
1122 	inet_hashinfo_init(&dccp_hashinfo);
1123 	rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
1124 	if (rc)
1125 		goto out_free_percpu;
1126 	rc = -ENOBUFS;
1127 	dccp_hashinfo.bind_bucket_cachep =
1128 		kmem_cache_create("dccp_bind_bucket",
1129 				  sizeof(struct inet_bind_bucket), 0,
1130 				  SLAB_HWCACHE_ALIGN, NULL);
1131 	if (!dccp_hashinfo.bind_bucket_cachep)
1132 		goto out_free_hashinfo2;
1133 
1134 	/*
1135 	 * Size and allocate the main established and bind bucket
1136 	 * hash tables.
1137 	 *
1138 	 * The methodology is similar to that of the buffer cache.
1139 	 */
1140 	if (nr_pages >= (128 * 1024))
1141 		goal = nr_pages >> (21 - PAGE_SHIFT);
1142 	else
1143 		goal = nr_pages >> (23 - PAGE_SHIFT);
1144 
1145 	if (thash_entries)
1146 		goal = (thash_entries *
1147 			sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1148 	for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1149 		;
1150 	do {
1151 		unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1152 					sizeof(struct inet_ehash_bucket);
1153 
1154 		while (hash_size & (hash_size - 1))
1155 			hash_size--;
1156 		dccp_hashinfo.ehash_mask = hash_size - 1;
1157 		dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1158 			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1159 	} while (!dccp_hashinfo.ehash && --ehash_order > 0);
1160 
1161 	if (!dccp_hashinfo.ehash) {
1162 		DCCP_CRIT("Failed to allocate DCCP established hash table");
1163 		goto out_free_bind_bucket_cachep;
1164 	}
1165 
1166 	for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1167 		INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1168 
1169 	if (inet_ehash_locks_alloc(&dccp_hashinfo))
1170 			goto out_free_dccp_ehash;
1171 
1172 	bhash_order = ehash_order;
1173 
1174 	do {
1175 		dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1176 					sizeof(struct inet_bind_hashbucket);
1177 		if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1178 		    bhash_order > 0)
1179 			continue;
1180 		dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1181 			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1182 	} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1183 
1184 	if (!dccp_hashinfo.bhash) {
1185 		DCCP_CRIT("Failed to allocate DCCP bind hash table");
1186 		goto out_free_dccp_locks;
1187 	}
1188 
1189 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1190 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1191 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1192 	}
1193 
1194 	rc = dccp_mib_init();
1195 	if (rc)
1196 		goto out_free_dccp_bhash;
1197 
1198 	rc = dccp_ackvec_init();
1199 	if (rc)
1200 		goto out_free_dccp_mib;
1201 
1202 	rc = dccp_sysctl_init();
1203 	if (rc)
1204 		goto out_ackvec_exit;
1205 
1206 	rc = ccid_initialize_builtins();
1207 	if (rc)
1208 		goto out_sysctl_exit;
1209 
1210 	dccp_timestamping_init();
1211 
1212 	return 0;
1213 
1214 out_sysctl_exit:
1215 	dccp_sysctl_exit();
1216 out_ackvec_exit:
1217 	dccp_ackvec_exit();
1218 out_free_dccp_mib:
1219 	dccp_mib_exit();
1220 out_free_dccp_bhash:
1221 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1222 out_free_dccp_locks:
1223 	inet_ehash_locks_free(&dccp_hashinfo);
1224 out_free_dccp_ehash:
1225 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1226 out_free_bind_bucket_cachep:
1227 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1228 out_free_hashinfo2:
1229 	inet_hashinfo2_free_mod(&dccp_hashinfo);
1230 out_free_percpu:
1231 	percpu_counter_destroy(&dccp_orphan_count);
1232 out_fail:
1233 	dccp_hashinfo.bhash = NULL;
1234 	dccp_hashinfo.ehash = NULL;
1235 	dccp_hashinfo.bind_bucket_cachep = NULL;
1236 	return rc;
1237 }
1238 
1239 static void __exit dccp_fini(void)
1240 {
1241 	ccid_cleanup_builtins();
1242 	dccp_mib_exit();
1243 	free_pages((unsigned long)dccp_hashinfo.bhash,
1244 		   get_order(dccp_hashinfo.bhash_size *
1245 			     sizeof(struct inet_bind_hashbucket)));
1246 	free_pages((unsigned long)dccp_hashinfo.ehash,
1247 		   get_order((dccp_hashinfo.ehash_mask + 1) *
1248 			     sizeof(struct inet_ehash_bucket)));
1249 	inet_ehash_locks_free(&dccp_hashinfo);
1250 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1251 	dccp_ackvec_exit();
1252 	dccp_sysctl_exit();
1253 	inet_hashinfo2_free_mod(&dccp_hashinfo);
1254 	percpu_counter_destroy(&dccp_orphan_count);
1255 }
1256 
1257 module_init(dccp_init);
1258 module_exit(dccp_fini);
1259 
1260 MODULE_LICENSE("GPL");
1261 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1262 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1263