xref: /openbmc/linux/net/dccp/ipv6.c (revision e6dec923)
1 /*
2  *	DCCP over IPv6
3  *	Linux INET6 implementation
4  *
5  *	Based on net/dccp6/ipv6.c
6  *
7  *	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
8  *
9  *	This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14 
15 #include <linux/module.h>
16 #include <linux/random.h>
17 #include <linux/slab.h>
18 #include <linux/xfrm.h>
19 
20 #include <net/addrconf.h>
21 #include <net/inet_common.h>
22 #include <net/inet_hashtables.h>
23 #include <net/inet_sock.h>
24 #include <net/inet6_connection_sock.h>
25 #include <net/inet6_hashtables.h>
26 #include <net/ip6_route.h>
27 #include <net/ipv6.h>
28 #include <net/protocol.h>
29 #include <net/transp_v6.h>
30 #include <net/ip6_checksum.h>
31 #include <net/xfrm.h>
32 #include <net/secure_seq.h>
33 
34 #include "dccp.h"
35 #include "ipv6.h"
36 #include "feat.h"
37 
38 /* The per-net dccp.v6_ctl_sk is used for sending RSTs and ACKs */
39 
40 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped;
41 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
42 
43 /* add pseudo-header to DCCP checksum stored in skb->csum */
44 static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
45 				      const struct in6_addr *saddr,
46 				      const struct in6_addr *daddr)
47 {
48 	return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
49 }
50 
51 static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
52 {
53 	struct ipv6_pinfo *np = inet6_sk(sk);
54 	struct dccp_hdr *dh = dccp_hdr(skb);
55 
56 	dccp_csum_outgoing(skb);
57 	dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr);
58 }
59 
60 static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
61 {
62 	return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
63 					     ipv6_hdr(skb)->saddr.s6_addr32,
64 					     dccp_hdr(skb)->dccph_dport,
65 					     dccp_hdr(skb)->dccph_sport     );
66 
67 }
68 
69 static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
70 			u8 type, u8 code, int offset, __be32 info)
71 {
72 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
73 	const struct dccp_hdr *dh;
74 	struct dccp_sock *dp;
75 	struct ipv6_pinfo *np;
76 	struct sock *sk;
77 	int err;
78 	__u64 seq;
79 	struct net *net = dev_net(skb->dev);
80 
81 	/* Only need dccph_dport & dccph_sport which are the first
82 	 * 4 bytes in dccp header.
83 	 * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
84 	 */
85 	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
86 	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
87 	dh = (struct dccp_hdr *)(skb->data + offset);
88 
89 	sk = __inet6_lookup_established(net, &dccp_hashinfo,
90 					&hdr->daddr, dh->dccph_dport,
91 					&hdr->saddr, ntohs(dh->dccph_sport),
92 					inet6_iif(skb));
93 
94 	if (!sk) {
95 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
96 				  ICMP6_MIB_INERRORS);
97 		return;
98 	}
99 
100 	if (sk->sk_state == DCCP_TIME_WAIT) {
101 		inet_twsk_put(inet_twsk(sk));
102 		return;
103 	}
104 	seq = dccp_hdr_seq(dh);
105 	if (sk->sk_state == DCCP_NEW_SYN_RECV)
106 		return dccp_req_err(sk, seq);
107 
108 	bh_lock_sock(sk);
109 	if (sock_owned_by_user(sk))
110 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
111 
112 	if (sk->sk_state == DCCP_CLOSED)
113 		goto out;
114 
115 	dp = dccp_sk(sk);
116 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
117 	    !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
118 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
119 		goto out;
120 	}
121 
122 	np = inet6_sk(sk);
123 
124 	if (type == NDISC_REDIRECT) {
125 		if (!sock_owned_by_user(sk)) {
126 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
127 
128 			if (dst)
129 				dst->ops->redirect(dst, sk, skb);
130 		}
131 		goto out;
132 	}
133 
134 	if (type == ICMPV6_PKT_TOOBIG) {
135 		struct dst_entry *dst = NULL;
136 
137 		if (!ip6_sk_accept_pmtu(sk))
138 			goto out;
139 
140 		if (sock_owned_by_user(sk))
141 			goto out;
142 		if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
143 			goto out;
144 
145 		dst = inet6_csk_update_pmtu(sk, ntohl(info));
146 		if (!dst)
147 			goto out;
148 
149 		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst))
150 			dccp_sync_mss(sk, dst_mtu(dst));
151 		goto out;
152 	}
153 
154 	icmpv6_err_convert(type, code, &err);
155 
156 	/* Might be for an request_sock */
157 	switch (sk->sk_state) {
158 	case DCCP_REQUESTING:
159 	case DCCP_RESPOND:  /* Cannot happen.
160 			       It can, it SYNs are crossed. --ANK */
161 		if (!sock_owned_by_user(sk)) {
162 			__DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
163 			sk->sk_err = err;
164 			/*
165 			 * Wake people up to see the error
166 			 * (see connect in sock.c)
167 			 */
168 			sk->sk_error_report(sk);
169 			dccp_done(sk);
170 		} else
171 			sk->sk_err_soft = err;
172 		goto out;
173 	}
174 
175 	if (!sock_owned_by_user(sk) && np->recverr) {
176 		sk->sk_err = err;
177 		sk->sk_error_report(sk);
178 	} else
179 		sk->sk_err_soft = err;
180 
181 out:
182 	bh_unlock_sock(sk);
183 	sock_put(sk);
184 }
185 
186 
187 static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req)
188 {
189 	struct inet_request_sock *ireq = inet_rsk(req);
190 	struct ipv6_pinfo *np = inet6_sk(sk);
191 	struct sk_buff *skb;
192 	struct in6_addr *final_p, final;
193 	struct flowi6 fl6;
194 	int err = -1;
195 	struct dst_entry *dst;
196 
197 	memset(&fl6, 0, sizeof(fl6));
198 	fl6.flowi6_proto = IPPROTO_DCCP;
199 	fl6.daddr = ireq->ir_v6_rmt_addr;
200 	fl6.saddr = ireq->ir_v6_loc_addr;
201 	fl6.flowlabel = 0;
202 	fl6.flowi6_oif = ireq->ir_iif;
203 	fl6.fl6_dport = ireq->ir_rmt_port;
204 	fl6.fl6_sport = htons(ireq->ir_num);
205 	security_req_classify_flow(req, flowi6_to_flowi(&fl6));
206 
207 
208 	rcu_read_lock();
209 	final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
210 	rcu_read_unlock();
211 
212 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
213 	if (IS_ERR(dst)) {
214 		err = PTR_ERR(dst);
215 		dst = NULL;
216 		goto done;
217 	}
218 
219 	skb = dccp_make_response(sk, dst, req);
220 	if (skb != NULL) {
221 		struct dccp_hdr *dh = dccp_hdr(skb);
222 		struct ipv6_txoptions *opt;
223 
224 		dh->dccph_checksum = dccp_v6_csum_finish(skb,
225 							 &ireq->ir_v6_loc_addr,
226 							 &ireq->ir_v6_rmt_addr);
227 		fl6.daddr = ireq->ir_v6_rmt_addr;
228 		rcu_read_lock();
229 		opt = ireq->ipv6_opt;
230 		if (!opt)
231 			opt = rcu_dereference(np->opt);
232 		err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass);
233 		rcu_read_unlock();
234 		err = net_xmit_eval(err);
235 	}
236 
237 done:
238 	dst_release(dst);
239 	return err;
240 }
241 
242 static void dccp_v6_reqsk_destructor(struct request_sock *req)
243 {
244 	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
245 	kfree(inet_rsk(req)->ipv6_opt);
246 	kfree_skb(inet_rsk(req)->pktopts);
247 }
248 
249 static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
250 {
251 	const struct ipv6hdr *rxip6h;
252 	struct sk_buff *skb;
253 	struct flowi6 fl6;
254 	struct net *net = dev_net(skb_dst(rxskb)->dev);
255 	struct sock *ctl_sk = net->dccp.v6_ctl_sk;
256 	struct dst_entry *dst;
257 
258 	if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
259 		return;
260 
261 	if (!ipv6_unicast_destination(rxskb))
262 		return;
263 
264 	skb = dccp_ctl_make_reset(ctl_sk, rxskb);
265 	if (skb == NULL)
266 		return;
267 
268 	rxip6h = ipv6_hdr(rxskb);
269 	dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
270 							    &rxip6h->daddr);
271 
272 	memset(&fl6, 0, sizeof(fl6));
273 	fl6.daddr = rxip6h->saddr;
274 	fl6.saddr = rxip6h->daddr;
275 
276 	fl6.flowi6_proto = IPPROTO_DCCP;
277 	fl6.flowi6_oif = inet6_iif(rxskb);
278 	fl6.fl6_dport = dccp_hdr(skb)->dccph_dport;
279 	fl6.fl6_sport = dccp_hdr(skb)->dccph_sport;
280 	security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6));
281 
282 	/* sk = NULL, but it is safe for now. RST socket required. */
283 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
284 	if (!IS_ERR(dst)) {
285 		skb_dst_set(skb, dst);
286 		ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0);
287 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
288 		DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
289 		return;
290 	}
291 
292 	kfree_skb(skb);
293 }
294 
295 static struct request_sock_ops dccp6_request_sock_ops = {
296 	.family		= AF_INET6,
297 	.obj_size	= sizeof(struct dccp6_request_sock),
298 	.rtx_syn_ack	= dccp_v6_send_response,
299 	.send_ack	= dccp_reqsk_send_ack,
300 	.destructor	= dccp_v6_reqsk_destructor,
301 	.send_reset	= dccp_v6_ctl_send_reset,
302 	.syn_ack_timeout = dccp_syn_ack_timeout,
303 };
304 
305 static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
306 {
307 	struct request_sock *req;
308 	struct dccp_request_sock *dreq;
309 	struct inet_request_sock *ireq;
310 	struct ipv6_pinfo *np = inet6_sk(sk);
311 	const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
312 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
313 
314 	if (skb->protocol == htons(ETH_P_IP))
315 		return dccp_v4_conn_request(sk, skb);
316 
317 	if (!ipv6_unicast_destination(skb))
318 		return 0;	/* discard, don't send a reset here */
319 
320 	if (dccp_bad_service_code(sk, service)) {
321 		dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
322 		goto drop;
323 	}
324 	/*
325 	 * There are no SYN attacks on IPv6, yet...
326 	 */
327 	dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
328 	if (inet_csk_reqsk_queue_is_full(sk))
329 		goto drop;
330 
331 	if (sk_acceptq_is_full(sk))
332 		goto drop;
333 
334 	req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
335 	if (req == NULL)
336 		goto drop;
337 
338 	if (dccp_reqsk_init(req, dccp_sk(sk), skb))
339 		goto drop_and_free;
340 
341 	dreq = dccp_rsk(req);
342 	if (dccp_parse_options(sk, dreq, skb))
343 		goto drop_and_free;
344 
345 	if (security_inet_conn_request(sk, skb, req))
346 		goto drop_and_free;
347 
348 	ireq = inet_rsk(req);
349 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
350 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
351 	ireq->ireq_family = AF_INET6;
352 
353 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
354 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
355 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
356 		refcount_inc(&skb->users);
357 		ireq->pktopts = skb;
358 	}
359 	ireq->ir_iif = sk->sk_bound_dev_if;
360 
361 	/* So that link locals have meaning */
362 	if (!sk->sk_bound_dev_if &&
363 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
364 		ireq->ir_iif = inet6_iif(skb);
365 
366 	/*
367 	 * Step 3: Process LISTEN state
368 	 *
369 	 *   Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
370 	 *
371 	 * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child().
372 	 */
373 	dreq->dreq_isr	   = dcb->dccpd_seq;
374 	dreq->dreq_gsr     = dreq->dreq_isr;
375 	dreq->dreq_iss	   = dccp_v6_init_sequence(skb);
376 	dreq->dreq_gss     = dreq->dreq_iss;
377 	dreq->dreq_service = service;
378 
379 	if (dccp_v6_send_response(sk, req))
380 		goto drop_and_free;
381 
382 	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
383 	return 0;
384 
385 drop_and_free:
386 	reqsk_free(req);
387 drop:
388 	__DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
389 	return -1;
390 }
391 
392 static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
393 					      struct sk_buff *skb,
394 					      struct request_sock *req,
395 					      struct dst_entry *dst,
396 					      struct request_sock *req_unhash,
397 					      bool *own_req)
398 {
399 	struct inet_request_sock *ireq = inet_rsk(req);
400 	struct ipv6_pinfo *newnp;
401 	const struct ipv6_pinfo *np = inet6_sk(sk);
402 	struct ipv6_txoptions *opt;
403 	struct inet_sock *newinet;
404 	struct dccp6_sock *newdp6;
405 	struct sock *newsk;
406 
407 	if (skb->protocol == htons(ETH_P_IP)) {
408 		/*
409 		 *	v6 mapped
410 		 */
411 		newsk = dccp_v4_request_recv_sock(sk, skb, req, dst,
412 						  req_unhash, own_req);
413 		if (newsk == NULL)
414 			return NULL;
415 
416 		newdp6 = (struct dccp6_sock *)newsk;
417 		newinet = inet_sk(newsk);
418 		newinet->pinet6 = &newdp6->inet6;
419 		newnp = inet6_sk(newsk);
420 
421 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
422 
423 		newnp->saddr = newsk->sk_v6_rcv_saddr;
424 
425 		inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
426 		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
427 		newnp->pktoptions  = NULL;
428 		newnp->opt	   = NULL;
429 		newnp->ipv6_mc_list = NULL;
430 		newnp->ipv6_ac_list = NULL;
431 		newnp->ipv6_fl_list = NULL;
432 		newnp->mcast_oif   = inet6_iif(skb);
433 		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
434 
435 		/*
436 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
437 		 * here, dccp_create_openreq_child now does this for us, see the comment in
438 		 * that function for the gory details. -acme
439 		 */
440 
441 		/* It is tricky place. Until this moment IPv4 tcp
442 		   worked with IPv6 icsk.icsk_af_ops.
443 		   Sync it now.
444 		 */
445 		dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
446 
447 		return newsk;
448 	}
449 
450 
451 	if (sk_acceptq_is_full(sk))
452 		goto out_overflow;
453 
454 	if (!dst) {
455 		struct flowi6 fl6;
456 
457 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP);
458 		if (!dst)
459 			goto out;
460 	}
461 
462 	newsk = dccp_create_openreq_child(sk, req, skb);
463 	if (newsk == NULL)
464 		goto out_nonewsk;
465 
466 	/*
467 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
468 	 * count here, dccp_create_openreq_child now does this for us, see the
469 	 * comment in that function for the gory details. -acme
470 	 */
471 
472 	ip6_dst_store(newsk, dst, NULL, NULL);
473 	newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
474 						      NETIF_F_TSO);
475 	newdp6 = (struct dccp6_sock *)newsk;
476 	newinet = inet_sk(newsk);
477 	newinet->pinet6 = &newdp6->inet6;
478 	newnp = inet6_sk(newsk);
479 
480 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
481 
482 	newsk->sk_v6_daddr	= ireq->ir_v6_rmt_addr;
483 	newnp->saddr		= ireq->ir_v6_loc_addr;
484 	newsk->sk_v6_rcv_saddr	= ireq->ir_v6_loc_addr;
485 	newsk->sk_bound_dev_if	= ireq->ir_iif;
486 
487 	/* Now IPv6 options...
488 
489 	   First: no IPv4 options.
490 	 */
491 	newinet->inet_opt = NULL;
492 
493 	/* Clone RX bits */
494 	newnp->rxopt.all = np->rxopt.all;
495 
496 	newnp->ipv6_mc_list = NULL;
497 	newnp->ipv6_ac_list = NULL;
498 	newnp->ipv6_fl_list = NULL;
499 	newnp->pktoptions = NULL;
500 	newnp->opt	  = NULL;
501 	newnp->mcast_oif  = inet6_iif(skb);
502 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
503 
504 	/*
505 	 * Clone native IPv6 options from listening socket (if any)
506 	 *
507 	 * Yes, keeping reference count would be much more clever, but we make
508 	 * one more one thing there: reattach optmem to newsk.
509 	 */
510 	opt = ireq->ipv6_opt;
511 	if (!opt)
512 		opt = rcu_dereference(np->opt);
513 	if (opt) {
514 		opt = ipv6_dup_options(newsk, opt);
515 		RCU_INIT_POINTER(newnp->opt, opt);
516 	}
517 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
518 	if (opt)
519 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
520 						    opt->opt_flen;
521 
522 	dccp_sync_mss(newsk, dst_mtu(dst));
523 
524 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
525 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
526 
527 	if (__inet_inherit_port(sk, newsk) < 0) {
528 		inet_csk_prepare_forced_close(newsk);
529 		dccp_done(newsk);
530 		goto out;
531 	}
532 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
533 	/* Clone pktoptions received with SYN, if we own the req */
534 	if (*own_req && ireq->pktopts) {
535 		newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
536 		consume_skb(ireq->pktopts);
537 		ireq->pktopts = NULL;
538 		if (newnp->pktoptions)
539 			skb_set_owner_r(newnp->pktoptions, newsk);
540 	}
541 
542 	return newsk;
543 
544 out_overflow:
545 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
546 out_nonewsk:
547 	dst_release(dst);
548 out:
549 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
550 	return NULL;
551 }
552 
553 /* The socket must have it's spinlock held when we get
554  * here.
555  *
556  * We have a potential double-lock case here, so even when
557  * doing backlog processing we use the BH locking scheme.
558  * This is because we cannot sleep with the original spinlock
559  * held.
560  */
561 static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
562 {
563 	struct ipv6_pinfo *np = inet6_sk(sk);
564 	struct sk_buff *opt_skb = NULL;
565 
566 	/* Imagine: socket is IPv6. IPv4 packet arrives,
567 	   goes to IPv4 receive handler and backlogged.
568 	   From backlog it always goes here. Kerboom...
569 	   Fortunately, dccp_rcv_established and rcv_established
570 	   handle them correctly, but it is not case with
571 	   dccp_v6_hnd_req and dccp_v6_ctl_send_reset().   --ANK
572 	 */
573 
574 	if (skb->protocol == htons(ETH_P_IP))
575 		return dccp_v4_do_rcv(sk, skb);
576 
577 	if (sk_filter(sk, skb))
578 		goto discard;
579 
580 	/*
581 	 * socket locking is here for SMP purposes as backlog rcv is currently
582 	 * called with bh processing disabled.
583 	 */
584 
585 	/* Do Stevens' IPV6_PKTOPTIONS.
586 
587 	   Yes, guys, it is the only place in our code, where we
588 	   may make it not affecting IPv4.
589 	   The rest of code is protocol independent,
590 	   and I do not like idea to uglify IPv4.
591 
592 	   Actually, all the idea behind IPV6_PKTOPTIONS
593 	   looks not very well thought. For now we latch
594 	   options, received in the last packet, enqueued
595 	   by tcp. Feel free to propose better solution.
596 					       --ANK (980728)
597 	 */
598 	if (np->rxopt.all)
599 	/*
600 	 * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below
601 	 *        (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example.
602 	 */
603 		opt_skb = skb_clone(skb, GFP_ATOMIC);
604 
605 	if (sk->sk_state == DCCP_OPEN) { /* Fast path */
606 		if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
607 			goto reset;
608 		if (opt_skb) {
609 			/* XXX This is where we would goto ipv6_pktoptions. */
610 			__kfree_skb(opt_skb);
611 		}
612 		return 0;
613 	}
614 
615 	/*
616 	 *  Step 3: Process LISTEN state
617 	 *     If S.state == LISTEN,
618 	 *	 If P.type == Request or P contains a valid Init Cookie option,
619 	 *	      (* Must scan the packet's options to check for Init
620 	 *		 Cookies.  Only Init Cookies are processed here,
621 	 *		 however; other options are processed in Step 8.  This
622 	 *		 scan need only be performed if the endpoint uses Init
623 	 *		 Cookies *)
624 	 *	      (* Generate a new socket and switch to that socket *)
625 	 *	      Set S := new socket for this port pair
626 	 *	      S.state = RESPOND
627 	 *	      Choose S.ISS (initial seqno) or set from Init Cookies
628 	 *	      Initialize S.GAR := S.ISS
629 	 *	      Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
630 	 *	      Continue with S.state == RESPOND
631 	 *	      (* A Response packet will be generated in Step 11 *)
632 	 *	 Otherwise,
633 	 *	      Generate Reset(No Connection) unless P.type == Reset
634 	 *	      Drop packet and return
635 	 *
636 	 * NOTE: the check for the packet types is done in
637 	 *	 dccp_rcv_state_process
638 	 */
639 
640 	if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
641 		goto reset;
642 	if (opt_skb) {
643 		/* XXX This is where we would goto ipv6_pktoptions. */
644 		__kfree_skb(opt_skb);
645 	}
646 	return 0;
647 
648 reset:
649 	dccp_v6_ctl_send_reset(sk, skb);
650 discard:
651 	if (opt_skb != NULL)
652 		__kfree_skb(opt_skb);
653 	kfree_skb(skb);
654 	return 0;
655 }
656 
657 static int dccp_v6_rcv(struct sk_buff *skb)
658 {
659 	const struct dccp_hdr *dh;
660 	bool refcounted;
661 	struct sock *sk;
662 	int min_cov;
663 
664 	/* Step 1: Check header basics */
665 
666 	if (dccp_invalid_packet(skb))
667 		goto discard_it;
668 
669 	/* Step 1: If header checksum is incorrect, drop packet and return. */
670 	if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr,
671 				     &ipv6_hdr(skb)->daddr)) {
672 		DCCP_WARN("dropped packet with invalid checksum\n");
673 		goto discard_it;
674 	}
675 
676 	dh = dccp_hdr(skb);
677 
678 	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(dh);
679 	DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
680 
681 	if (dccp_packet_without_ack(skb))
682 		DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
683 	else
684 		DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
685 
686 lookup:
687 	sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
688 			        dh->dccph_sport, dh->dccph_dport,
689 				inet6_iif(skb), &refcounted);
690 	if (!sk) {
691 		dccp_pr_debug("failed to look up flow ID in table and "
692 			      "get corresponding socket\n");
693 		goto no_dccp_socket;
694 	}
695 
696 	/*
697 	 * Step 2:
698 	 *	... or S.state == TIMEWAIT,
699 	 *		Generate Reset(No Connection) unless P.type == Reset
700 	 *		Drop packet and return
701 	 */
702 	if (sk->sk_state == DCCP_TIME_WAIT) {
703 		dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
704 		inet_twsk_put(inet_twsk(sk));
705 		goto no_dccp_socket;
706 	}
707 
708 	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
709 		struct request_sock *req = inet_reqsk(sk);
710 		struct sock *nsk;
711 
712 		sk = req->rsk_listener;
713 		if (unlikely(sk->sk_state != DCCP_LISTEN)) {
714 			inet_csk_reqsk_queue_drop_and_put(sk, req);
715 			goto lookup;
716 		}
717 		sock_hold(sk);
718 		refcounted = true;
719 		nsk = dccp_check_req(sk, skb, req);
720 		if (!nsk) {
721 			reqsk_put(req);
722 			goto discard_and_relse;
723 		}
724 		if (nsk == sk) {
725 			reqsk_put(req);
726 		} else if (dccp_child_process(sk, nsk, skb)) {
727 			dccp_v6_ctl_send_reset(sk, skb);
728 			goto discard_and_relse;
729 		} else {
730 			sock_put(sk);
731 			return 0;
732 		}
733 	}
734 	/*
735 	 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
736 	 *	o if MinCsCov = 0, only packets with CsCov = 0 are accepted
737 	 *	o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
738 	 */
739 	min_cov = dccp_sk(sk)->dccps_pcrlen;
740 	if (dh->dccph_cscov  &&  (min_cov == 0 || dh->dccph_cscov < min_cov))  {
741 		dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
742 			      dh->dccph_cscov, min_cov);
743 		/* FIXME: send Data Dropped option (see also dccp_v4_rcv) */
744 		goto discard_and_relse;
745 	}
746 
747 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
748 		goto discard_and_relse;
749 
750 	return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
751 				refcounted) ? -1 : 0;
752 
753 no_dccp_socket:
754 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
755 		goto discard_it;
756 	/*
757 	 * Step 2:
758 	 *	If no socket ...
759 	 *		Generate Reset(No Connection) unless P.type == Reset
760 	 *		Drop packet and return
761 	 */
762 	if (dh->dccph_type != DCCP_PKT_RESET) {
763 		DCCP_SKB_CB(skb)->dccpd_reset_code =
764 					DCCP_RESET_CODE_NO_CONNECTION;
765 		dccp_v6_ctl_send_reset(sk, skb);
766 	}
767 
768 discard_it:
769 	kfree_skb(skb);
770 	return 0;
771 
772 discard_and_relse:
773 	if (refcounted)
774 		sock_put(sk);
775 	goto discard_it;
776 }
777 
778 static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
779 			   int addr_len)
780 {
781 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
782 	struct inet_connection_sock *icsk = inet_csk(sk);
783 	struct inet_sock *inet = inet_sk(sk);
784 	struct ipv6_pinfo *np = inet6_sk(sk);
785 	struct dccp_sock *dp = dccp_sk(sk);
786 	struct in6_addr *saddr = NULL, *final_p, final;
787 	struct ipv6_txoptions *opt;
788 	struct flowi6 fl6;
789 	struct dst_entry *dst;
790 	int addr_type;
791 	int err;
792 
793 	dp->dccps_role = DCCP_ROLE_CLIENT;
794 
795 	if (addr_len < SIN6_LEN_RFC2133)
796 		return -EINVAL;
797 
798 	if (usin->sin6_family != AF_INET6)
799 		return -EAFNOSUPPORT;
800 
801 	memset(&fl6, 0, sizeof(fl6));
802 
803 	if (np->sndflow) {
804 		fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
805 		IP6_ECN_flow_init(fl6.flowlabel);
806 		if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
807 			struct ip6_flowlabel *flowlabel;
808 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
809 			if (flowlabel == NULL)
810 				return -EINVAL;
811 			fl6_sock_release(flowlabel);
812 		}
813 	}
814 	/*
815 	 * connect() to INADDR_ANY means loopback (BSD'ism).
816 	 */
817 	if (ipv6_addr_any(&usin->sin6_addr))
818 		usin->sin6_addr.s6_addr[15] = 1;
819 
820 	addr_type = ipv6_addr_type(&usin->sin6_addr);
821 
822 	if (addr_type & IPV6_ADDR_MULTICAST)
823 		return -ENETUNREACH;
824 
825 	if (addr_type & IPV6_ADDR_LINKLOCAL) {
826 		if (addr_len >= sizeof(struct sockaddr_in6) &&
827 		    usin->sin6_scope_id) {
828 			/* If interface is set while binding, indices
829 			 * must coincide.
830 			 */
831 			if (sk->sk_bound_dev_if &&
832 			    sk->sk_bound_dev_if != usin->sin6_scope_id)
833 				return -EINVAL;
834 
835 			sk->sk_bound_dev_if = usin->sin6_scope_id;
836 		}
837 
838 		/* Connect to link-local address requires an interface */
839 		if (!sk->sk_bound_dev_if)
840 			return -EINVAL;
841 	}
842 
843 	sk->sk_v6_daddr = usin->sin6_addr;
844 	np->flow_label = fl6.flowlabel;
845 
846 	/*
847 	 * DCCP over IPv4
848 	 */
849 	if (addr_type == IPV6_ADDR_MAPPED) {
850 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
851 		struct sockaddr_in sin;
852 
853 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
854 
855 		if (__ipv6_only_sock(sk))
856 			return -ENETUNREACH;
857 
858 		sin.sin_family = AF_INET;
859 		sin.sin_port = usin->sin6_port;
860 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
861 
862 		icsk->icsk_af_ops = &dccp_ipv6_mapped;
863 		sk->sk_backlog_rcv = dccp_v4_do_rcv;
864 
865 		err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
866 		if (err) {
867 			icsk->icsk_ext_hdr_len = exthdrlen;
868 			icsk->icsk_af_ops = &dccp_ipv6_af_ops;
869 			sk->sk_backlog_rcv = dccp_v6_do_rcv;
870 			goto failure;
871 		}
872 		np->saddr = sk->sk_v6_rcv_saddr;
873 		return err;
874 	}
875 
876 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
877 		saddr = &sk->sk_v6_rcv_saddr;
878 
879 	fl6.flowi6_proto = IPPROTO_DCCP;
880 	fl6.daddr = sk->sk_v6_daddr;
881 	fl6.saddr = saddr ? *saddr : np->saddr;
882 	fl6.flowi6_oif = sk->sk_bound_dev_if;
883 	fl6.fl6_dport = usin->sin6_port;
884 	fl6.fl6_sport = inet->inet_sport;
885 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
886 
887 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
888 	final_p = fl6_update_dst(&fl6, opt, &final);
889 
890 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
891 	if (IS_ERR(dst)) {
892 		err = PTR_ERR(dst);
893 		goto failure;
894 	}
895 
896 	if (saddr == NULL) {
897 		saddr = &fl6.saddr;
898 		sk->sk_v6_rcv_saddr = *saddr;
899 	}
900 
901 	/* set the source address */
902 	np->saddr = *saddr;
903 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
904 
905 	ip6_dst_store(sk, dst, NULL, NULL);
906 
907 	icsk->icsk_ext_hdr_len = 0;
908 	if (opt)
909 		icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
910 
911 	inet->inet_dport = usin->sin6_port;
912 
913 	dccp_set_state(sk, DCCP_REQUESTING);
914 	err = inet6_hash_connect(&dccp_death_row, sk);
915 	if (err)
916 		goto late_failure;
917 
918 	dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
919 						      sk->sk_v6_daddr.s6_addr32,
920 						      inet->inet_sport,
921 						      inet->inet_dport);
922 	err = dccp_connect(sk);
923 	if (err)
924 		goto late_failure;
925 
926 	return 0;
927 
928 late_failure:
929 	dccp_set_state(sk, DCCP_CLOSED);
930 	__sk_dst_reset(sk);
931 failure:
932 	inet->inet_dport = 0;
933 	sk->sk_route_caps = 0;
934 	return err;
935 }
936 
937 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
938 	.queue_xmit	   = inet6_csk_xmit,
939 	.send_check	   = dccp_v6_send_check,
940 	.rebuild_header	   = inet6_sk_rebuild_header,
941 	.conn_request	   = dccp_v6_conn_request,
942 	.syn_recv_sock	   = dccp_v6_request_recv_sock,
943 	.net_header_len	   = sizeof(struct ipv6hdr),
944 	.setsockopt	   = ipv6_setsockopt,
945 	.getsockopt	   = ipv6_getsockopt,
946 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
947 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
948 #ifdef CONFIG_COMPAT
949 	.compat_setsockopt = compat_ipv6_setsockopt,
950 	.compat_getsockopt = compat_ipv6_getsockopt,
951 #endif
952 };
953 
954 /*
955  *	DCCP over IPv4 via INET6 API
956  */
957 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
958 	.queue_xmit	   = ip_queue_xmit,
959 	.send_check	   = dccp_v4_send_check,
960 	.rebuild_header	   = inet_sk_rebuild_header,
961 	.conn_request	   = dccp_v6_conn_request,
962 	.syn_recv_sock	   = dccp_v6_request_recv_sock,
963 	.net_header_len	   = sizeof(struct iphdr),
964 	.setsockopt	   = ipv6_setsockopt,
965 	.getsockopt	   = ipv6_getsockopt,
966 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
967 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
968 #ifdef CONFIG_COMPAT
969 	.compat_setsockopt = compat_ipv6_setsockopt,
970 	.compat_getsockopt = compat_ipv6_getsockopt,
971 #endif
972 };
973 
974 /* NOTE: A lot of things set to zero explicitly by call to
975  *       sk_alloc() so need not be done here.
976  */
977 static int dccp_v6_init_sock(struct sock *sk)
978 {
979 	static __u8 dccp_v6_ctl_sock_initialized;
980 	int err = dccp_init_sock(sk, dccp_v6_ctl_sock_initialized);
981 
982 	if (err == 0) {
983 		if (unlikely(!dccp_v6_ctl_sock_initialized))
984 			dccp_v6_ctl_sock_initialized = 1;
985 		inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
986 	}
987 
988 	return err;
989 }
990 
991 static void dccp_v6_destroy_sock(struct sock *sk)
992 {
993 	dccp_destroy_sock(sk);
994 	inet6_destroy_sock(sk);
995 }
996 
997 static struct timewait_sock_ops dccp6_timewait_sock_ops = {
998 	.twsk_obj_size	= sizeof(struct dccp6_timewait_sock),
999 };
1000 
1001 static struct proto dccp_v6_prot = {
1002 	.name		   = "DCCPv6",
1003 	.owner		   = THIS_MODULE,
1004 	.close		   = dccp_close,
1005 	.connect	   = dccp_v6_connect,
1006 	.disconnect	   = dccp_disconnect,
1007 	.ioctl		   = dccp_ioctl,
1008 	.init		   = dccp_v6_init_sock,
1009 	.setsockopt	   = dccp_setsockopt,
1010 	.getsockopt	   = dccp_getsockopt,
1011 	.sendmsg	   = dccp_sendmsg,
1012 	.recvmsg	   = dccp_recvmsg,
1013 	.backlog_rcv	   = dccp_v6_do_rcv,
1014 	.hash		   = inet6_hash,
1015 	.unhash		   = inet_unhash,
1016 	.accept		   = inet_csk_accept,
1017 	.get_port	   = inet_csk_get_port,
1018 	.shutdown	   = dccp_shutdown,
1019 	.destroy	   = dccp_v6_destroy_sock,
1020 	.orphan_count	   = &dccp_orphan_count,
1021 	.max_header	   = MAX_DCCP_HEADER,
1022 	.obj_size	   = sizeof(struct dccp6_sock),
1023 	.slab_flags	   = SLAB_TYPESAFE_BY_RCU,
1024 	.rsk_prot	   = &dccp6_request_sock_ops,
1025 	.twsk_prot	   = &dccp6_timewait_sock_ops,
1026 	.h.hashinfo	   = &dccp_hashinfo,
1027 #ifdef CONFIG_COMPAT
1028 	.compat_setsockopt = compat_dccp_setsockopt,
1029 	.compat_getsockopt = compat_dccp_getsockopt,
1030 #endif
1031 };
1032 
1033 static const struct inet6_protocol dccp_v6_protocol = {
1034 	.handler	= dccp_v6_rcv,
1035 	.err_handler	= dccp_v6_err,
1036 	.flags		= INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
1037 };
1038 
1039 static const struct proto_ops inet6_dccp_ops = {
1040 	.family		   = PF_INET6,
1041 	.owner		   = THIS_MODULE,
1042 	.release	   = inet6_release,
1043 	.bind		   = inet6_bind,
1044 	.connect	   = inet_stream_connect,
1045 	.socketpair	   = sock_no_socketpair,
1046 	.accept		   = inet_accept,
1047 	.getname	   = inet6_getname,
1048 	.poll		   = dccp_poll,
1049 	.ioctl		   = inet6_ioctl,
1050 	.listen		   = inet_dccp_listen,
1051 	.shutdown	   = inet_shutdown,
1052 	.setsockopt	   = sock_common_setsockopt,
1053 	.getsockopt	   = sock_common_getsockopt,
1054 	.sendmsg	   = inet_sendmsg,
1055 	.recvmsg	   = sock_common_recvmsg,
1056 	.mmap		   = sock_no_mmap,
1057 	.sendpage	   = sock_no_sendpage,
1058 #ifdef CONFIG_COMPAT
1059 	.compat_setsockopt = compat_sock_common_setsockopt,
1060 	.compat_getsockopt = compat_sock_common_getsockopt,
1061 #endif
1062 };
1063 
1064 static struct inet_protosw dccp_v6_protosw = {
1065 	.type		= SOCK_DCCP,
1066 	.protocol	= IPPROTO_DCCP,
1067 	.prot		= &dccp_v6_prot,
1068 	.ops		= &inet6_dccp_ops,
1069 	.flags		= INET_PROTOSW_ICSK,
1070 };
1071 
1072 static int __net_init dccp_v6_init_net(struct net *net)
1073 {
1074 	if (dccp_hashinfo.bhash == NULL)
1075 		return -ESOCKTNOSUPPORT;
1076 
1077 	return inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6,
1078 				    SOCK_DCCP, IPPROTO_DCCP, net);
1079 }
1080 
1081 static void __net_exit dccp_v6_exit_net(struct net *net)
1082 {
1083 	inet_ctl_sock_destroy(net->dccp.v6_ctl_sk);
1084 }
1085 
1086 static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
1087 {
1088 	inet_twsk_purge(&dccp_hashinfo, AF_INET6);
1089 }
1090 
1091 static struct pernet_operations dccp_v6_ops = {
1092 	.init   = dccp_v6_init_net,
1093 	.exit   = dccp_v6_exit_net,
1094 	.exit_batch = dccp_v6_exit_batch,
1095 };
1096 
1097 static int __init dccp_v6_init(void)
1098 {
1099 	int err = proto_register(&dccp_v6_prot, 1);
1100 
1101 	if (err)
1102 		goto out;
1103 
1104 	inet6_register_protosw(&dccp_v6_protosw);
1105 
1106 	err = register_pernet_subsys(&dccp_v6_ops);
1107 	if (err)
1108 		goto out_destroy_ctl_sock;
1109 
1110 	err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1111 	if (err)
1112 		goto out_unregister_proto;
1113 
1114 out:
1115 	return err;
1116 out_unregister_proto:
1117 	unregister_pernet_subsys(&dccp_v6_ops);
1118 out_destroy_ctl_sock:
1119 	inet6_unregister_protosw(&dccp_v6_protosw);
1120 	proto_unregister(&dccp_v6_prot);
1121 	goto out;
1122 }
1123 
1124 static void __exit dccp_v6_exit(void)
1125 {
1126 	inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1127 	unregister_pernet_subsys(&dccp_v6_ops);
1128 	inet6_unregister_protosw(&dccp_v6_protosw);
1129 	proto_unregister(&dccp_v6_prot);
1130 }
1131 
1132 module_init(dccp_v6_init);
1133 module_exit(dccp_v6_exit);
1134 
1135 /*
1136  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1137  * values directly, Also cover the case where the protocol is not specified,
1138  * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP
1139  */
1140 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 33, 6);
1141 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 0, 6);
1142 MODULE_LICENSE("GPL");
1143 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
1144 MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol");
1145