xref: /openbmc/linux/net/dccp/ipv6.c (revision d2c43ff1)
1 /*
2  *	DCCP over IPv6
3  *	Linux INET6 implementation
4  *
5  *	Based on net/dccp6/ipv6.c
6  *
7  *	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
8  *
9  *	This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14 
15 #include <linux/module.h>
16 #include <linux/random.h>
17 #include <linux/slab.h>
18 #include <linux/xfrm.h>
19 
20 #include <net/addrconf.h>
21 #include <net/inet_common.h>
22 #include <net/inet_hashtables.h>
23 #include <net/inet_sock.h>
24 #include <net/inet6_connection_sock.h>
25 #include <net/inet6_hashtables.h>
26 #include <net/ip6_route.h>
27 #include <net/ipv6.h>
28 #include <net/protocol.h>
29 #include <net/transp_v6.h>
30 #include <net/ip6_checksum.h>
31 #include <net/xfrm.h>
32 #include <net/secure_seq.h>
33 
34 #include "dccp.h"
35 #include "ipv6.h"
36 #include "feat.h"
37 
38 /* The per-net dccp.v6_ctl_sk is used for sending RSTs and ACKs */
39 
40 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped;
41 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
42 
43 /* add pseudo-header to DCCP checksum stored in skb->csum */
44 static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
45 				      const struct in6_addr *saddr,
46 				      const struct in6_addr *daddr)
47 {
48 	return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
49 }
50 
51 static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
52 {
53 	struct ipv6_pinfo *np = inet6_sk(sk);
54 	struct dccp_hdr *dh = dccp_hdr(skb);
55 
56 	dccp_csum_outgoing(skb);
57 	dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr);
58 }
59 
60 static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
61 {
62 	return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
63 					     ipv6_hdr(skb)->saddr.s6_addr32,
64 					     dccp_hdr(skb)->dccph_dport,
65 					     dccp_hdr(skb)->dccph_sport     );
66 
67 }
68 
69 static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
70 			u8 type, u8 code, int offset, __be32 info)
71 {
72 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
73 	const struct dccp_hdr *dh;
74 	struct dccp_sock *dp;
75 	struct ipv6_pinfo *np;
76 	struct sock *sk;
77 	int err;
78 	__u64 seq;
79 	struct net *net = dev_net(skb->dev);
80 
81 	/* Only need dccph_dport & dccph_sport which are the first
82 	 * 4 bytes in dccp header.
83 	 * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
84 	 */
85 	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
86 	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
87 	dh = (struct dccp_hdr *)(skb->data + offset);
88 
89 	sk = __inet6_lookup_established(net, &dccp_hashinfo,
90 					&hdr->daddr, dh->dccph_dport,
91 					&hdr->saddr, ntohs(dh->dccph_sport),
92 					inet6_iif(skb));
93 
94 	if (!sk) {
95 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
96 				  ICMP6_MIB_INERRORS);
97 		return;
98 	}
99 
100 	if (sk->sk_state == DCCP_TIME_WAIT) {
101 		inet_twsk_put(inet_twsk(sk));
102 		return;
103 	}
104 	seq = dccp_hdr_seq(dh);
105 	if (sk->sk_state == DCCP_NEW_SYN_RECV)
106 		return dccp_req_err(sk, seq);
107 
108 	bh_lock_sock(sk);
109 	if (sock_owned_by_user(sk))
110 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
111 
112 	if (sk->sk_state == DCCP_CLOSED)
113 		goto out;
114 
115 	dp = dccp_sk(sk);
116 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
117 	    !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
118 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
119 		goto out;
120 	}
121 
122 	np = inet6_sk(sk);
123 
124 	if (type == NDISC_REDIRECT) {
125 		if (!sock_owned_by_user(sk)) {
126 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
127 
128 			if (dst)
129 				dst->ops->redirect(dst, sk, skb);
130 		}
131 		goto out;
132 	}
133 
134 	if (type == ICMPV6_PKT_TOOBIG) {
135 		struct dst_entry *dst = NULL;
136 
137 		if (!ip6_sk_accept_pmtu(sk))
138 			goto out;
139 
140 		if (sock_owned_by_user(sk))
141 			goto out;
142 		if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
143 			goto out;
144 
145 		dst = inet6_csk_update_pmtu(sk, ntohl(info));
146 		if (!dst)
147 			goto out;
148 
149 		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst))
150 			dccp_sync_mss(sk, dst_mtu(dst));
151 		goto out;
152 	}
153 
154 	icmpv6_err_convert(type, code, &err);
155 
156 	/* Might be for an request_sock */
157 	switch (sk->sk_state) {
158 	case DCCP_REQUESTING:
159 	case DCCP_RESPOND:  /* Cannot happen.
160 			       It can, it SYNs are crossed. --ANK */
161 		if (!sock_owned_by_user(sk)) {
162 			__DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
163 			sk->sk_err = err;
164 			/*
165 			 * Wake people up to see the error
166 			 * (see connect in sock.c)
167 			 */
168 			sk->sk_error_report(sk);
169 			dccp_done(sk);
170 		} else
171 			sk->sk_err_soft = err;
172 		goto out;
173 	}
174 
175 	if (!sock_owned_by_user(sk) && np->recverr) {
176 		sk->sk_err = err;
177 		sk->sk_error_report(sk);
178 	} else
179 		sk->sk_err_soft = err;
180 
181 out:
182 	bh_unlock_sock(sk);
183 	sock_put(sk);
184 }
185 
186 
187 static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req)
188 {
189 	struct inet_request_sock *ireq = inet_rsk(req);
190 	struct ipv6_pinfo *np = inet6_sk(sk);
191 	struct sk_buff *skb;
192 	struct in6_addr *final_p, final;
193 	struct flowi6 fl6;
194 	int err = -1;
195 	struct dst_entry *dst;
196 
197 	memset(&fl6, 0, sizeof(fl6));
198 	fl6.flowi6_proto = IPPROTO_DCCP;
199 	fl6.daddr = ireq->ir_v6_rmt_addr;
200 	fl6.saddr = ireq->ir_v6_loc_addr;
201 	fl6.flowlabel = 0;
202 	fl6.flowi6_oif = ireq->ir_iif;
203 	fl6.fl6_dport = ireq->ir_rmt_port;
204 	fl6.fl6_sport = htons(ireq->ir_num);
205 	security_req_classify_flow(req, flowi6_to_flowi(&fl6));
206 
207 
208 	rcu_read_lock();
209 	final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
210 	rcu_read_unlock();
211 
212 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
213 	if (IS_ERR(dst)) {
214 		err = PTR_ERR(dst);
215 		dst = NULL;
216 		goto done;
217 	}
218 
219 	skb = dccp_make_response(sk, dst, req);
220 	if (skb != NULL) {
221 		struct dccp_hdr *dh = dccp_hdr(skb);
222 		struct ipv6_txoptions *opt;
223 
224 		dh->dccph_checksum = dccp_v6_csum_finish(skb,
225 							 &ireq->ir_v6_loc_addr,
226 							 &ireq->ir_v6_rmt_addr);
227 		fl6.daddr = ireq->ir_v6_rmt_addr;
228 		rcu_read_lock();
229 		opt = ireq->ipv6_opt;
230 		if (!opt)
231 			opt = rcu_dereference(np->opt);
232 		err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass);
233 		rcu_read_unlock();
234 		err = net_xmit_eval(err);
235 	}
236 
237 done:
238 	dst_release(dst);
239 	return err;
240 }
241 
242 static void dccp_v6_reqsk_destructor(struct request_sock *req)
243 {
244 	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
245 	kfree(inet_rsk(req)->ipv6_opt);
246 	kfree_skb(inet_rsk(req)->pktopts);
247 }
248 
249 static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
250 {
251 	const struct ipv6hdr *rxip6h;
252 	struct sk_buff *skb;
253 	struct flowi6 fl6;
254 	struct net *net = dev_net(skb_dst(rxskb)->dev);
255 	struct sock *ctl_sk = net->dccp.v6_ctl_sk;
256 	struct dst_entry *dst;
257 
258 	if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
259 		return;
260 
261 	if (!ipv6_unicast_destination(rxskb))
262 		return;
263 
264 	skb = dccp_ctl_make_reset(ctl_sk, rxskb);
265 	if (skb == NULL)
266 		return;
267 
268 	rxip6h = ipv6_hdr(rxskb);
269 	dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
270 							    &rxip6h->daddr);
271 
272 	memset(&fl6, 0, sizeof(fl6));
273 	fl6.daddr = rxip6h->saddr;
274 	fl6.saddr = rxip6h->daddr;
275 
276 	fl6.flowi6_proto = IPPROTO_DCCP;
277 	fl6.flowi6_oif = inet6_iif(rxskb);
278 	fl6.fl6_dport = dccp_hdr(skb)->dccph_dport;
279 	fl6.fl6_sport = dccp_hdr(skb)->dccph_sport;
280 	security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6));
281 
282 	/* sk = NULL, but it is safe for now. RST socket required. */
283 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
284 	if (!IS_ERR(dst)) {
285 		skb_dst_set(skb, dst);
286 		ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0);
287 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
288 		DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
289 		return;
290 	}
291 
292 	kfree_skb(skb);
293 }
294 
295 static struct request_sock_ops dccp6_request_sock_ops = {
296 	.family		= AF_INET6,
297 	.obj_size	= sizeof(struct dccp6_request_sock),
298 	.rtx_syn_ack	= dccp_v6_send_response,
299 	.send_ack	= dccp_reqsk_send_ack,
300 	.destructor	= dccp_v6_reqsk_destructor,
301 	.send_reset	= dccp_v6_ctl_send_reset,
302 	.syn_ack_timeout = dccp_syn_ack_timeout,
303 };
304 
305 static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
306 {
307 	struct request_sock *req;
308 	struct dccp_request_sock *dreq;
309 	struct inet_request_sock *ireq;
310 	struct ipv6_pinfo *np = inet6_sk(sk);
311 	const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
312 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
313 
314 	if (skb->protocol == htons(ETH_P_IP))
315 		return dccp_v4_conn_request(sk, skb);
316 
317 	if (!ipv6_unicast_destination(skb))
318 		return 0;	/* discard, don't send a reset here */
319 
320 	if (dccp_bad_service_code(sk, service)) {
321 		dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
322 		goto drop;
323 	}
324 	/*
325 	 * There are no SYN attacks on IPv6, yet...
326 	 */
327 	dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
328 	if (inet_csk_reqsk_queue_is_full(sk))
329 		goto drop;
330 
331 	if (sk_acceptq_is_full(sk))
332 		goto drop;
333 
334 	req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
335 	if (req == NULL)
336 		goto drop;
337 
338 	if (dccp_reqsk_init(req, dccp_sk(sk), skb))
339 		goto drop_and_free;
340 
341 	dreq = dccp_rsk(req);
342 	if (dccp_parse_options(sk, dreq, skb))
343 		goto drop_and_free;
344 
345 	if (security_inet_conn_request(sk, skb, req))
346 		goto drop_and_free;
347 
348 	ireq = inet_rsk(req);
349 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
350 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
351 	ireq->ireq_family = AF_INET6;
352 
353 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
354 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
355 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
356 		refcount_inc(&skb->users);
357 		ireq->pktopts = skb;
358 	}
359 	ireq->ir_iif = sk->sk_bound_dev_if;
360 
361 	/* So that link locals have meaning */
362 	if (!sk->sk_bound_dev_if &&
363 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
364 		ireq->ir_iif = inet6_iif(skb);
365 
366 	/*
367 	 * Step 3: Process LISTEN state
368 	 *
369 	 *   Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
370 	 *
371 	 * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child().
372 	 */
373 	dreq->dreq_isr	   = dcb->dccpd_seq;
374 	dreq->dreq_gsr     = dreq->dreq_isr;
375 	dreq->dreq_iss	   = dccp_v6_init_sequence(skb);
376 	dreq->dreq_gss     = dreq->dreq_iss;
377 	dreq->dreq_service = service;
378 
379 	if (dccp_v6_send_response(sk, req))
380 		goto drop_and_free;
381 
382 	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
383 	reqsk_put(req);
384 	return 0;
385 
386 drop_and_free:
387 	reqsk_free(req);
388 drop:
389 	__DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
390 	return -1;
391 }
392 
393 static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
394 					      struct sk_buff *skb,
395 					      struct request_sock *req,
396 					      struct dst_entry *dst,
397 					      struct request_sock *req_unhash,
398 					      bool *own_req)
399 {
400 	struct inet_request_sock *ireq = inet_rsk(req);
401 	struct ipv6_pinfo *newnp;
402 	const struct ipv6_pinfo *np = inet6_sk(sk);
403 	struct ipv6_txoptions *opt;
404 	struct inet_sock *newinet;
405 	struct dccp6_sock *newdp6;
406 	struct sock *newsk;
407 
408 	if (skb->protocol == htons(ETH_P_IP)) {
409 		/*
410 		 *	v6 mapped
411 		 */
412 		newsk = dccp_v4_request_recv_sock(sk, skb, req, dst,
413 						  req_unhash, own_req);
414 		if (newsk == NULL)
415 			return NULL;
416 
417 		newdp6 = (struct dccp6_sock *)newsk;
418 		newinet = inet_sk(newsk);
419 		newinet->pinet6 = &newdp6->inet6;
420 		newnp = inet6_sk(newsk);
421 
422 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
423 
424 		newnp->saddr = newsk->sk_v6_rcv_saddr;
425 
426 		inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
427 		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
428 		newnp->pktoptions  = NULL;
429 		newnp->opt	   = NULL;
430 		newnp->ipv6_mc_list = NULL;
431 		newnp->ipv6_ac_list = NULL;
432 		newnp->ipv6_fl_list = NULL;
433 		newnp->mcast_oif   = inet6_iif(skb);
434 		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
435 
436 		/*
437 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
438 		 * here, dccp_create_openreq_child now does this for us, see the comment in
439 		 * that function for the gory details. -acme
440 		 */
441 
442 		/* It is tricky place. Until this moment IPv4 tcp
443 		   worked with IPv6 icsk.icsk_af_ops.
444 		   Sync it now.
445 		 */
446 		dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
447 
448 		return newsk;
449 	}
450 
451 
452 	if (sk_acceptq_is_full(sk))
453 		goto out_overflow;
454 
455 	if (!dst) {
456 		struct flowi6 fl6;
457 
458 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP);
459 		if (!dst)
460 			goto out;
461 	}
462 
463 	newsk = dccp_create_openreq_child(sk, req, skb);
464 	if (newsk == NULL)
465 		goto out_nonewsk;
466 
467 	/*
468 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
469 	 * count here, dccp_create_openreq_child now does this for us, see the
470 	 * comment in that function for the gory details. -acme
471 	 */
472 
473 	ip6_dst_store(newsk, dst, NULL, NULL);
474 	newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
475 						      NETIF_F_TSO);
476 	newdp6 = (struct dccp6_sock *)newsk;
477 	newinet = inet_sk(newsk);
478 	newinet->pinet6 = &newdp6->inet6;
479 	newnp = inet6_sk(newsk);
480 
481 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
482 
483 	newsk->sk_v6_daddr	= ireq->ir_v6_rmt_addr;
484 	newnp->saddr		= ireq->ir_v6_loc_addr;
485 	newsk->sk_v6_rcv_saddr	= ireq->ir_v6_loc_addr;
486 	newsk->sk_bound_dev_if	= ireq->ir_iif;
487 
488 	/* Now IPv6 options...
489 
490 	   First: no IPv4 options.
491 	 */
492 	newinet->inet_opt = NULL;
493 
494 	/* Clone RX bits */
495 	newnp->rxopt.all = np->rxopt.all;
496 
497 	newnp->ipv6_mc_list = NULL;
498 	newnp->ipv6_ac_list = NULL;
499 	newnp->ipv6_fl_list = NULL;
500 	newnp->pktoptions = NULL;
501 	newnp->opt	  = NULL;
502 	newnp->mcast_oif  = inet6_iif(skb);
503 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
504 
505 	/*
506 	 * Clone native IPv6 options from listening socket (if any)
507 	 *
508 	 * Yes, keeping reference count would be much more clever, but we make
509 	 * one more one thing there: reattach optmem to newsk.
510 	 */
511 	opt = ireq->ipv6_opt;
512 	if (!opt)
513 		opt = rcu_dereference(np->opt);
514 	if (opt) {
515 		opt = ipv6_dup_options(newsk, opt);
516 		RCU_INIT_POINTER(newnp->opt, opt);
517 	}
518 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
519 	if (opt)
520 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
521 						    opt->opt_flen;
522 
523 	dccp_sync_mss(newsk, dst_mtu(dst));
524 
525 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
526 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
527 
528 	if (__inet_inherit_port(sk, newsk) < 0) {
529 		inet_csk_prepare_forced_close(newsk);
530 		dccp_done(newsk);
531 		goto out;
532 	}
533 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
534 	/* Clone pktoptions received with SYN, if we own the req */
535 	if (*own_req && ireq->pktopts) {
536 		newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
537 		consume_skb(ireq->pktopts);
538 		ireq->pktopts = NULL;
539 		if (newnp->pktoptions)
540 			skb_set_owner_r(newnp->pktoptions, newsk);
541 	}
542 
543 	return newsk;
544 
545 out_overflow:
546 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
547 out_nonewsk:
548 	dst_release(dst);
549 out:
550 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
551 	return NULL;
552 }
553 
554 /* The socket must have it's spinlock held when we get
555  * here.
556  *
557  * We have a potential double-lock case here, so even when
558  * doing backlog processing we use the BH locking scheme.
559  * This is because we cannot sleep with the original spinlock
560  * held.
561  */
562 static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
563 {
564 	struct ipv6_pinfo *np = inet6_sk(sk);
565 	struct sk_buff *opt_skb = NULL;
566 
567 	/* Imagine: socket is IPv6. IPv4 packet arrives,
568 	   goes to IPv4 receive handler and backlogged.
569 	   From backlog it always goes here. Kerboom...
570 	   Fortunately, dccp_rcv_established and rcv_established
571 	   handle them correctly, but it is not case with
572 	   dccp_v6_hnd_req and dccp_v6_ctl_send_reset().   --ANK
573 	 */
574 
575 	if (skb->protocol == htons(ETH_P_IP))
576 		return dccp_v4_do_rcv(sk, skb);
577 
578 	if (sk_filter(sk, skb))
579 		goto discard;
580 
581 	/*
582 	 * socket locking is here for SMP purposes as backlog rcv is currently
583 	 * called with bh processing disabled.
584 	 */
585 
586 	/* Do Stevens' IPV6_PKTOPTIONS.
587 
588 	   Yes, guys, it is the only place in our code, where we
589 	   may make it not affecting IPv4.
590 	   The rest of code is protocol independent,
591 	   and I do not like idea to uglify IPv4.
592 
593 	   Actually, all the idea behind IPV6_PKTOPTIONS
594 	   looks not very well thought. For now we latch
595 	   options, received in the last packet, enqueued
596 	   by tcp. Feel free to propose better solution.
597 					       --ANK (980728)
598 	 */
599 	if (np->rxopt.all)
600 	/*
601 	 * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below
602 	 *        (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example.
603 	 */
604 		opt_skb = skb_clone(skb, GFP_ATOMIC);
605 
606 	if (sk->sk_state == DCCP_OPEN) { /* Fast path */
607 		if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
608 			goto reset;
609 		if (opt_skb) {
610 			/* XXX This is where we would goto ipv6_pktoptions. */
611 			__kfree_skb(opt_skb);
612 		}
613 		return 0;
614 	}
615 
616 	/*
617 	 *  Step 3: Process LISTEN state
618 	 *     If S.state == LISTEN,
619 	 *	 If P.type == Request or P contains a valid Init Cookie option,
620 	 *	      (* Must scan the packet's options to check for Init
621 	 *		 Cookies.  Only Init Cookies are processed here,
622 	 *		 however; other options are processed in Step 8.  This
623 	 *		 scan need only be performed if the endpoint uses Init
624 	 *		 Cookies *)
625 	 *	      (* Generate a new socket and switch to that socket *)
626 	 *	      Set S := new socket for this port pair
627 	 *	      S.state = RESPOND
628 	 *	      Choose S.ISS (initial seqno) or set from Init Cookies
629 	 *	      Initialize S.GAR := S.ISS
630 	 *	      Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
631 	 *	      Continue with S.state == RESPOND
632 	 *	      (* A Response packet will be generated in Step 11 *)
633 	 *	 Otherwise,
634 	 *	      Generate Reset(No Connection) unless P.type == Reset
635 	 *	      Drop packet and return
636 	 *
637 	 * NOTE: the check for the packet types is done in
638 	 *	 dccp_rcv_state_process
639 	 */
640 
641 	if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
642 		goto reset;
643 	if (opt_skb) {
644 		/* XXX This is where we would goto ipv6_pktoptions. */
645 		__kfree_skb(opt_skb);
646 	}
647 	return 0;
648 
649 reset:
650 	dccp_v6_ctl_send_reset(sk, skb);
651 discard:
652 	if (opt_skb != NULL)
653 		__kfree_skb(opt_skb);
654 	kfree_skb(skb);
655 	return 0;
656 }
657 
658 static int dccp_v6_rcv(struct sk_buff *skb)
659 {
660 	const struct dccp_hdr *dh;
661 	bool refcounted;
662 	struct sock *sk;
663 	int min_cov;
664 
665 	/* Step 1: Check header basics */
666 
667 	if (dccp_invalid_packet(skb))
668 		goto discard_it;
669 
670 	/* Step 1: If header checksum is incorrect, drop packet and return. */
671 	if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr,
672 				     &ipv6_hdr(skb)->daddr)) {
673 		DCCP_WARN("dropped packet with invalid checksum\n");
674 		goto discard_it;
675 	}
676 
677 	dh = dccp_hdr(skb);
678 
679 	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(dh);
680 	DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
681 
682 	if (dccp_packet_without_ack(skb))
683 		DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
684 	else
685 		DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
686 
687 lookup:
688 	sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
689 			        dh->dccph_sport, dh->dccph_dport,
690 				inet6_iif(skb), &refcounted);
691 	if (!sk) {
692 		dccp_pr_debug("failed to look up flow ID in table and "
693 			      "get corresponding socket\n");
694 		goto no_dccp_socket;
695 	}
696 
697 	/*
698 	 * Step 2:
699 	 *	... or S.state == TIMEWAIT,
700 	 *		Generate Reset(No Connection) unless P.type == Reset
701 	 *		Drop packet and return
702 	 */
703 	if (sk->sk_state == DCCP_TIME_WAIT) {
704 		dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
705 		inet_twsk_put(inet_twsk(sk));
706 		goto no_dccp_socket;
707 	}
708 
709 	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
710 		struct request_sock *req = inet_reqsk(sk);
711 		struct sock *nsk;
712 
713 		sk = req->rsk_listener;
714 		if (unlikely(sk->sk_state != DCCP_LISTEN)) {
715 			inet_csk_reqsk_queue_drop_and_put(sk, req);
716 			goto lookup;
717 		}
718 		sock_hold(sk);
719 		refcounted = true;
720 		nsk = dccp_check_req(sk, skb, req);
721 		if (!nsk) {
722 			reqsk_put(req);
723 			goto discard_and_relse;
724 		}
725 		if (nsk == sk) {
726 			reqsk_put(req);
727 		} else if (dccp_child_process(sk, nsk, skb)) {
728 			dccp_v6_ctl_send_reset(sk, skb);
729 			goto discard_and_relse;
730 		} else {
731 			sock_put(sk);
732 			return 0;
733 		}
734 	}
735 	/*
736 	 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
737 	 *	o if MinCsCov = 0, only packets with CsCov = 0 are accepted
738 	 *	o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
739 	 */
740 	min_cov = dccp_sk(sk)->dccps_pcrlen;
741 	if (dh->dccph_cscov  &&  (min_cov == 0 || dh->dccph_cscov < min_cov))  {
742 		dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
743 			      dh->dccph_cscov, min_cov);
744 		/* FIXME: send Data Dropped option (see also dccp_v4_rcv) */
745 		goto discard_and_relse;
746 	}
747 
748 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
749 		goto discard_and_relse;
750 
751 	return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
752 				refcounted) ? -1 : 0;
753 
754 no_dccp_socket:
755 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
756 		goto discard_it;
757 	/*
758 	 * Step 2:
759 	 *	If no socket ...
760 	 *		Generate Reset(No Connection) unless P.type == Reset
761 	 *		Drop packet and return
762 	 */
763 	if (dh->dccph_type != DCCP_PKT_RESET) {
764 		DCCP_SKB_CB(skb)->dccpd_reset_code =
765 					DCCP_RESET_CODE_NO_CONNECTION;
766 		dccp_v6_ctl_send_reset(sk, skb);
767 	}
768 
769 discard_it:
770 	kfree_skb(skb);
771 	return 0;
772 
773 discard_and_relse:
774 	if (refcounted)
775 		sock_put(sk);
776 	goto discard_it;
777 }
778 
779 static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
780 			   int addr_len)
781 {
782 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
783 	struct inet_connection_sock *icsk = inet_csk(sk);
784 	struct inet_sock *inet = inet_sk(sk);
785 	struct ipv6_pinfo *np = inet6_sk(sk);
786 	struct dccp_sock *dp = dccp_sk(sk);
787 	struct in6_addr *saddr = NULL, *final_p, final;
788 	struct ipv6_txoptions *opt;
789 	struct flowi6 fl6;
790 	struct dst_entry *dst;
791 	int addr_type;
792 	int err;
793 
794 	dp->dccps_role = DCCP_ROLE_CLIENT;
795 
796 	if (addr_len < SIN6_LEN_RFC2133)
797 		return -EINVAL;
798 
799 	if (usin->sin6_family != AF_INET6)
800 		return -EAFNOSUPPORT;
801 
802 	memset(&fl6, 0, sizeof(fl6));
803 
804 	if (np->sndflow) {
805 		fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
806 		IP6_ECN_flow_init(fl6.flowlabel);
807 		if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
808 			struct ip6_flowlabel *flowlabel;
809 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
810 			if (flowlabel == NULL)
811 				return -EINVAL;
812 			fl6_sock_release(flowlabel);
813 		}
814 	}
815 	/*
816 	 * connect() to INADDR_ANY means loopback (BSD'ism).
817 	 */
818 	if (ipv6_addr_any(&usin->sin6_addr))
819 		usin->sin6_addr.s6_addr[15] = 1;
820 
821 	addr_type = ipv6_addr_type(&usin->sin6_addr);
822 
823 	if (addr_type & IPV6_ADDR_MULTICAST)
824 		return -ENETUNREACH;
825 
826 	if (addr_type & IPV6_ADDR_LINKLOCAL) {
827 		if (addr_len >= sizeof(struct sockaddr_in6) &&
828 		    usin->sin6_scope_id) {
829 			/* If interface is set while binding, indices
830 			 * must coincide.
831 			 */
832 			if (sk->sk_bound_dev_if &&
833 			    sk->sk_bound_dev_if != usin->sin6_scope_id)
834 				return -EINVAL;
835 
836 			sk->sk_bound_dev_if = usin->sin6_scope_id;
837 		}
838 
839 		/* Connect to link-local address requires an interface */
840 		if (!sk->sk_bound_dev_if)
841 			return -EINVAL;
842 	}
843 
844 	sk->sk_v6_daddr = usin->sin6_addr;
845 	np->flow_label = fl6.flowlabel;
846 
847 	/*
848 	 * DCCP over IPv4
849 	 */
850 	if (addr_type == IPV6_ADDR_MAPPED) {
851 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
852 		struct sockaddr_in sin;
853 
854 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
855 
856 		if (__ipv6_only_sock(sk))
857 			return -ENETUNREACH;
858 
859 		sin.sin_family = AF_INET;
860 		sin.sin_port = usin->sin6_port;
861 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
862 
863 		icsk->icsk_af_ops = &dccp_ipv6_mapped;
864 		sk->sk_backlog_rcv = dccp_v4_do_rcv;
865 
866 		err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
867 		if (err) {
868 			icsk->icsk_ext_hdr_len = exthdrlen;
869 			icsk->icsk_af_ops = &dccp_ipv6_af_ops;
870 			sk->sk_backlog_rcv = dccp_v6_do_rcv;
871 			goto failure;
872 		}
873 		np->saddr = sk->sk_v6_rcv_saddr;
874 		return err;
875 	}
876 
877 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
878 		saddr = &sk->sk_v6_rcv_saddr;
879 
880 	fl6.flowi6_proto = IPPROTO_DCCP;
881 	fl6.daddr = sk->sk_v6_daddr;
882 	fl6.saddr = saddr ? *saddr : np->saddr;
883 	fl6.flowi6_oif = sk->sk_bound_dev_if;
884 	fl6.fl6_dport = usin->sin6_port;
885 	fl6.fl6_sport = inet->inet_sport;
886 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
887 
888 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
889 	final_p = fl6_update_dst(&fl6, opt, &final);
890 
891 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
892 	if (IS_ERR(dst)) {
893 		err = PTR_ERR(dst);
894 		goto failure;
895 	}
896 
897 	if (saddr == NULL) {
898 		saddr = &fl6.saddr;
899 		sk->sk_v6_rcv_saddr = *saddr;
900 	}
901 
902 	/* set the source address */
903 	np->saddr = *saddr;
904 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
905 
906 	ip6_dst_store(sk, dst, NULL, NULL);
907 
908 	icsk->icsk_ext_hdr_len = 0;
909 	if (opt)
910 		icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
911 
912 	inet->inet_dport = usin->sin6_port;
913 
914 	dccp_set_state(sk, DCCP_REQUESTING);
915 	err = inet6_hash_connect(&dccp_death_row, sk);
916 	if (err)
917 		goto late_failure;
918 
919 	dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
920 						      sk->sk_v6_daddr.s6_addr32,
921 						      inet->inet_sport,
922 						      inet->inet_dport);
923 	err = dccp_connect(sk);
924 	if (err)
925 		goto late_failure;
926 
927 	return 0;
928 
929 late_failure:
930 	dccp_set_state(sk, DCCP_CLOSED);
931 	__sk_dst_reset(sk);
932 failure:
933 	inet->inet_dport = 0;
934 	sk->sk_route_caps = 0;
935 	return err;
936 }
937 
938 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
939 	.queue_xmit	   = inet6_csk_xmit,
940 	.send_check	   = dccp_v6_send_check,
941 	.rebuild_header	   = inet6_sk_rebuild_header,
942 	.conn_request	   = dccp_v6_conn_request,
943 	.syn_recv_sock	   = dccp_v6_request_recv_sock,
944 	.net_header_len	   = sizeof(struct ipv6hdr),
945 	.setsockopt	   = ipv6_setsockopt,
946 	.getsockopt	   = ipv6_getsockopt,
947 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
948 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
949 #ifdef CONFIG_COMPAT
950 	.compat_setsockopt = compat_ipv6_setsockopt,
951 	.compat_getsockopt = compat_ipv6_getsockopt,
952 #endif
953 };
954 
955 /*
956  *	DCCP over IPv4 via INET6 API
957  */
958 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
959 	.queue_xmit	   = ip_queue_xmit,
960 	.send_check	   = dccp_v4_send_check,
961 	.rebuild_header	   = inet_sk_rebuild_header,
962 	.conn_request	   = dccp_v6_conn_request,
963 	.syn_recv_sock	   = dccp_v6_request_recv_sock,
964 	.net_header_len	   = sizeof(struct iphdr),
965 	.setsockopt	   = ipv6_setsockopt,
966 	.getsockopt	   = ipv6_getsockopt,
967 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
968 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
969 #ifdef CONFIG_COMPAT
970 	.compat_setsockopt = compat_ipv6_setsockopt,
971 	.compat_getsockopt = compat_ipv6_getsockopt,
972 #endif
973 };
974 
975 /* NOTE: A lot of things set to zero explicitly by call to
976  *       sk_alloc() so need not be done here.
977  */
978 static int dccp_v6_init_sock(struct sock *sk)
979 {
980 	static __u8 dccp_v6_ctl_sock_initialized;
981 	int err = dccp_init_sock(sk, dccp_v6_ctl_sock_initialized);
982 
983 	if (err == 0) {
984 		if (unlikely(!dccp_v6_ctl_sock_initialized))
985 			dccp_v6_ctl_sock_initialized = 1;
986 		inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
987 	}
988 
989 	return err;
990 }
991 
992 static void dccp_v6_destroy_sock(struct sock *sk)
993 {
994 	dccp_destroy_sock(sk);
995 	inet6_destroy_sock(sk);
996 }
997 
998 static struct timewait_sock_ops dccp6_timewait_sock_ops = {
999 	.twsk_obj_size	= sizeof(struct dccp6_timewait_sock),
1000 };
1001 
1002 static struct proto dccp_v6_prot = {
1003 	.name		   = "DCCPv6",
1004 	.owner		   = THIS_MODULE,
1005 	.close		   = dccp_close,
1006 	.connect	   = dccp_v6_connect,
1007 	.disconnect	   = dccp_disconnect,
1008 	.ioctl		   = dccp_ioctl,
1009 	.init		   = dccp_v6_init_sock,
1010 	.setsockopt	   = dccp_setsockopt,
1011 	.getsockopt	   = dccp_getsockopt,
1012 	.sendmsg	   = dccp_sendmsg,
1013 	.recvmsg	   = dccp_recvmsg,
1014 	.backlog_rcv	   = dccp_v6_do_rcv,
1015 	.hash		   = inet6_hash,
1016 	.unhash		   = inet_unhash,
1017 	.accept		   = inet_csk_accept,
1018 	.get_port	   = inet_csk_get_port,
1019 	.shutdown	   = dccp_shutdown,
1020 	.destroy	   = dccp_v6_destroy_sock,
1021 	.orphan_count	   = &dccp_orphan_count,
1022 	.max_header	   = MAX_DCCP_HEADER,
1023 	.obj_size	   = sizeof(struct dccp6_sock),
1024 	.slab_flags	   = SLAB_TYPESAFE_BY_RCU,
1025 	.rsk_prot	   = &dccp6_request_sock_ops,
1026 	.twsk_prot	   = &dccp6_timewait_sock_ops,
1027 	.h.hashinfo	   = &dccp_hashinfo,
1028 #ifdef CONFIG_COMPAT
1029 	.compat_setsockopt = compat_dccp_setsockopt,
1030 	.compat_getsockopt = compat_dccp_getsockopt,
1031 #endif
1032 };
1033 
1034 static const struct inet6_protocol dccp_v6_protocol = {
1035 	.handler	= dccp_v6_rcv,
1036 	.err_handler	= dccp_v6_err,
1037 	.flags		= INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
1038 };
1039 
1040 static const struct proto_ops inet6_dccp_ops = {
1041 	.family		   = PF_INET6,
1042 	.owner		   = THIS_MODULE,
1043 	.release	   = inet6_release,
1044 	.bind		   = inet6_bind,
1045 	.connect	   = inet_stream_connect,
1046 	.socketpair	   = sock_no_socketpair,
1047 	.accept		   = inet_accept,
1048 	.getname	   = inet6_getname,
1049 	.poll		   = dccp_poll,
1050 	.ioctl		   = inet6_ioctl,
1051 	.listen		   = inet_dccp_listen,
1052 	.shutdown	   = inet_shutdown,
1053 	.setsockopt	   = sock_common_setsockopt,
1054 	.getsockopt	   = sock_common_getsockopt,
1055 	.sendmsg	   = inet_sendmsg,
1056 	.recvmsg	   = sock_common_recvmsg,
1057 	.mmap		   = sock_no_mmap,
1058 	.sendpage	   = sock_no_sendpage,
1059 #ifdef CONFIG_COMPAT
1060 	.compat_setsockopt = compat_sock_common_setsockopt,
1061 	.compat_getsockopt = compat_sock_common_getsockopt,
1062 #endif
1063 };
1064 
1065 static struct inet_protosw dccp_v6_protosw = {
1066 	.type		= SOCK_DCCP,
1067 	.protocol	= IPPROTO_DCCP,
1068 	.prot		= &dccp_v6_prot,
1069 	.ops		= &inet6_dccp_ops,
1070 	.flags		= INET_PROTOSW_ICSK,
1071 };
1072 
1073 static int __net_init dccp_v6_init_net(struct net *net)
1074 {
1075 	if (dccp_hashinfo.bhash == NULL)
1076 		return -ESOCKTNOSUPPORT;
1077 
1078 	return inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6,
1079 				    SOCK_DCCP, IPPROTO_DCCP, net);
1080 }
1081 
1082 static void __net_exit dccp_v6_exit_net(struct net *net)
1083 {
1084 	inet_ctl_sock_destroy(net->dccp.v6_ctl_sk);
1085 }
1086 
1087 static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
1088 {
1089 	inet_twsk_purge(&dccp_hashinfo, AF_INET6);
1090 }
1091 
1092 static struct pernet_operations dccp_v6_ops = {
1093 	.init   = dccp_v6_init_net,
1094 	.exit   = dccp_v6_exit_net,
1095 	.exit_batch = dccp_v6_exit_batch,
1096 };
1097 
1098 static int __init dccp_v6_init(void)
1099 {
1100 	int err = proto_register(&dccp_v6_prot, 1);
1101 
1102 	if (err)
1103 		goto out;
1104 
1105 	inet6_register_protosw(&dccp_v6_protosw);
1106 
1107 	err = register_pernet_subsys(&dccp_v6_ops);
1108 	if (err)
1109 		goto out_destroy_ctl_sock;
1110 
1111 	err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1112 	if (err)
1113 		goto out_unregister_proto;
1114 
1115 out:
1116 	return err;
1117 out_unregister_proto:
1118 	unregister_pernet_subsys(&dccp_v6_ops);
1119 out_destroy_ctl_sock:
1120 	inet6_unregister_protosw(&dccp_v6_protosw);
1121 	proto_unregister(&dccp_v6_prot);
1122 	goto out;
1123 }
1124 
1125 static void __exit dccp_v6_exit(void)
1126 {
1127 	inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1128 	unregister_pernet_subsys(&dccp_v6_ops);
1129 	inet6_unregister_protosw(&dccp_v6_protosw);
1130 	proto_unregister(&dccp_v6_prot);
1131 }
1132 
1133 module_init(dccp_v6_init);
1134 module_exit(dccp_v6_exit);
1135 
1136 /*
1137  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1138  * values directly, Also cover the case where the protocol is not specified,
1139  * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP
1140  */
1141 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 33, 6);
1142 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 0, 6);
1143 MODULE_LICENSE("GPL");
1144 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
1145 MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol");
1146