xref: /openbmc/linux/net/dccp/ipv6.c (revision fe0a5788)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	DCCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Based on net/dccp6/ipv6.c
7  *
8  *	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/random.h>
13 #include <linux/slab.h>
14 #include <linux/xfrm.h>
15 #include <linux/string.h>
16 
17 #include <net/addrconf.h>
18 #include <net/inet_common.h>
19 #include <net/inet_hashtables.h>
20 #include <net/inet_sock.h>
21 #include <net/inet6_connection_sock.h>
22 #include <net/inet6_hashtables.h>
23 #include <net/ip6_route.h>
24 #include <net/ipv6.h>
25 #include <net/protocol.h>
26 #include <net/transp_v6.h>
27 #include <net/ip6_checksum.h>
28 #include <net/xfrm.h>
29 #include <net/secure_seq.h>
30 #include <net/sock.h>
31 
32 #include "dccp.h"
33 #include "ipv6.h"
34 #include "feat.h"
35 
36 /* The per-net dccp.v6_ctl_sk is used for sending RSTs and ACKs */
37 
38 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped;
39 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
40 
41 /* add pseudo-header to DCCP checksum stored in skb->csum */
42 static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
43 				      const struct in6_addr *saddr,
44 				      const struct in6_addr *daddr)
45 {
46 	return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
47 }
48 
49 static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
50 {
51 	struct ipv6_pinfo *np = inet6_sk(sk);
52 	struct dccp_hdr *dh = dccp_hdr(skb);
53 
54 	dccp_csum_outgoing(skb);
55 	dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr);
56 }
57 
58 static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
59 {
60 	return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
61 					     ipv6_hdr(skb)->saddr.s6_addr32,
62 					     dccp_hdr(skb)->dccph_dport,
63 					     dccp_hdr(skb)->dccph_sport     );
64 
65 }
66 
67 static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
68 			u8 type, u8 code, int offset, __be32 info)
69 {
70 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
71 	const struct dccp_hdr *dh;
72 	struct dccp_sock *dp;
73 	struct ipv6_pinfo *np;
74 	struct sock *sk;
75 	int err;
76 	__u64 seq;
77 	struct net *net = dev_net(skb->dev);
78 
79 	/* Only need dccph_dport & dccph_sport which are the first
80 	 * 4 bytes in dccp header.
81 	 * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
82 	 */
83 	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
84 	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
85 	dh = (struct dccp_hdr *)(skb->data + offset);
86 
87 	sk = __inet6_lookup_established(net, &dccp_hashinfo,
88 					&hdr->daddr, dh->dccph_dport,
89 					&hdr->saddr, ntohs(dh->dccph_sport),
90 					inet6_iif(skb), 0);
91 
92 	if (!sk) {
93 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
94 				  ICMP6_MIB_INERRORS);
95 		return -ENOENT;
96 	}
97 
98 	if (sk->sk_state == DCCP_TIME_WAIT) {
99 		inet_twsk_put(inet_twsk(sk));
100 		return 0;
101 	}
102 	seq = dccp_hdr_seq(dh);
103 	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
104 		dccp_req_err(sk, seq);
105 		return 0;
106 	}
107 
108 	bh_lock_sock(sk);
109 	if (sock_owned_by_user(sk))
110 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
111 
112 	if (sk->sk_state == DCCP_CLOSED)
113 		goto out;
114 
115 	dp = dccp_sk(sk);
116 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
117 	    !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
118 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
119 		goto out;
120 	}
121 
122 	np = inet6_sk(sk);
123 
124 	if (type == NDISC_REDIRECT) {
125 		if (!sock_owned_by_user(sk)) {
126 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
127 
128 			if (dst)
129 				dst->ops->redirect(dst, sk, skb);
130 		}
131 		goto out;
132 	}
133 
134 	if (type == ICMPV6_PKT_TOOBIG) {
135 		struct dst_entry *dst = NULL;
136 
137 		if (!ip6_sk_accept_pmtu(sk))
138 			goto out;
139 
140 		if (sock_owned_by_user(sk))
141 			goto out;
142 		if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
143 			goto out;
144 
145 		dst = inet6_csk_update_pmtu(sk, ntohl(info));
146 		if (!dst)
147 			goto out;
148 
149 		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst))
150 			dccp_sync_mss(sk, dst_mtu(dst));
151 		goto out;
152 	}
153 
154 	icmpv6_err_convert(type, code, &err);
155 
156 	/* Might be for an request_sock */
157 	switch (sk->sk_state) {
158 	case DCCP_REQUESTING:
159 	case DCCP_RESPOND:  /* Cannot happen.
160 			       It can, it SYNs are crossed. --ANK */
161 		if (!sock_owned_by_user(sk)) {
162 			__DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
163 			sk->sk_err = err;
164 			/*
165 			 * Wake people up to see the error
166 			 * (see connect in sock.c)
167 			 */
168 			sk->sk_error_report(sk);
169 			dccp_done(sk);
170 		} else
171 			sk->sk_err_soft = err;
172 		goto out;
173 	}
174 
175 	if (!sock_owned_by_user(sk) && np->recverr) {
176 		sk->sk_err = err;
177 		sk->sk_error_report(sk);
178 	} else
179 		sk->sk_err_soft = err;
180 
181 out:
182 	bh_unlock_sock(sk);
183 	sock_put(sk);
184 	return 0;
185 }
186 
187 
188 static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req)
189 {
190 	struct inet_request_sock *ireq = inet_rsk(req);
191 	struct ipv6_pinfo *np = inet6_sk(sk);
192 	struct sk_buff *skb;
193 	struct in6_addr *final_p, final;
194 	struct flowi6 fl6;
195 	int err = -1;
196 	struct dst_entry *dst;
197 
198 	memset(&fl6, 0, sizeof(fl6));
199 	fl6.flowi6_proto = IPPROTO_DCCP;
200 	fl6.daddr = ireq->ir_v6_rmt_addr;
201 	fl6.saddr = ireq->ir_v6_loc_addr;
202 	fl6.flowlabel = 0;
203 	fl6.flowi6_oif = ireq->ir_iif;
204 	fl6.fl6_dport = ireq->ir_rmt_port;
205 	fl6.fl6_sport = htons(ireq->ir_num);
206 	security_req_classify_flow(req, flowi6_to_flowi(&fl6));
207 
208 
209 	rcu_read_lock();
210 	final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
211 	rcu_read_unlock();
212 
213 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
214 	if (IS_ERR(dst)) {
215 		err = PTR_ERR(dst);
216 		dst = NULL;
217 		goto done;
218 	}
219 
220 	skb = dccp_make_response(sk, dst, req);
221 	if (skb != NULL) {
222 		struct dccp_hdr *dh = dccp_hdr(skb);
223 		struct ipv6_txoptions *opt;
224 
225 		dh->dccph_checksum = dccp_v6_csum_finish(skb,
226 							 &ireq->ir_v6_loc_addr,
227 							 &ireq->ir_v6_rmt_addr);
228 		fl6.daddr = ireq->ir_v6_rmt_addr;
229 		rcu_read_lock();
230 		opt = ireq->ipv6_opt;
231 		if (!opt)
232 			opt = rcu_dereference(np->opt);
233 		err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
234 			       sk->sk_priority);
235 		rcu_read_unlock();
236 		err = net_xmit_eval(err);
237 	}
238 
239 done:
240 	dst_release(dst);
241 	return err;
242 }
243 
244 static void dccp_v6_reqsk_destructor(struct request_sock *req)
245 {
246 	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
247 	kfree(inet_rsk(req)->ipv6_opt);
248 	kfree_skb(inet_rsk(req)->pktopts);
249 }
250 
251 static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
252 {
253 	const struct ipv6hdr *rxip6h;
254 	struct sk_buff *skb;
255 	struct flowi6 fl6;
256 	struct net *net = dev_net(skb_dst(rxskb)->dev);
257 	struct sock *ctl_sk = net->dccp.v6_ctl_sk;
258 	struct dst_entry *dst;
259 
260 	if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
261 		return;
262 
263 	if (!ipv6_unicast_destination(rxskb))
264 		return;
265 
266 	skb = dccp_ctl_make_reset(ctl_sk, rxskb);
267 	if (skb == NULL)
268 		return;
269 
270 	rxip6h = ipv6_hdr(rxskb);
271 	dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
272 							    &rxip6h->daddr);
273 
274 	memset(&fl6, 0, sizeof(fl6));
275 	fl6.daddr = rxip6h->saddr;
276 	fl6.saddr = rxip6h->daddr;
277 
278 	fl6.flowi6_proto = IPPROTO_DCCP;
279 	fl6.flowi6_oif = inet6_iif(rxskb);
280 	fl6.fl6_dport = dccp_hdr(skb)->dccph_dport;
281 	fl6.fl6_sport = dccp_hdr(skb)->dccph_sport;
282 	security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6));
283 
284 	/* sk = NULL, but it is safe for now. RST socket required. */
285 	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
286 	if (!IS_ERR(dst)) {
287 		skb_dst_set(skb, dst);
288 		ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0, 0);
289 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
290 		DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
291 		return;
292 	}
293 
294 	kfree_skb(skb);
295 }
296 
297 static struct request_sock_ops dccp6_request_sock_ops = {
298 	.family		= AF_INET6,
299 	.obj_size	= sizeof(struct dccp6_request_sock),
300 	.rtx_syn_ack	= dccp_v6_send_response,
301 	.send_ack	= dccp_reqsk_send_ack,
302 	.destructor	= dccp_v6_reqsk_destructor,
303 	.send_reset	= dccp_v6_ctl_send_reset,
304 	.syn_ack_timeout = dccp_syn_ack_timeout,
305 };
306 
307 static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
308 {
309 	struct request_sock *req;
310 	struct dccp_request_sock *dreq;
311 	struct inet_request_sock *ireq;
312 	struct ipv6_pinfo *np = inet6_sk(sk);
313 	const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
314 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
315 
316 	if (skb->protocol == htons(ETH_P_IP))
317 		return dccp_v4_conn_request(sk, skb);
318 
319 	if (!ipv6_unicast_destination(skb))
320 		return 0;	/* discard, don't send a reset here */
321 
322 	if (dccp_bad_service_code(sk, service)) {
323 		dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
324 		goto drop;
325 	}
326 	/*
327 	 * There are no SYN attacks on IPv6, yet...
328 	 */
329 	dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
330 	if (inet_csk_reqsk_queue_is_full(sk))
331 		goto drop;
332 
333 	if (sk_acceptq_is_full(sk))
334 		goto drop;
335 
336 	req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
337 	if (req == NULL)
338 		goto drop;
339 
340 	if (dccp_reqsk_init(req, dccp_sk(sk), skb))
341 		goto drop_and_free;
342 
343 	dreq = dccp_rsk(req);
344 	if (dccp_parse_options(sk, dreq, skb))
345 		goto drop_and_free;
346 
347 	if (security_inet_conn_request(sk, skb, req))
348 		goto drop_and_free;
349 
350 	ireq = inet_rsk(req);
351 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
352 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
353 	ireq->ireq_family = AF_INET6;
354 	ireq->ir_mark = inet_request_mark(sk, skb);
355 
356 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
357 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
358 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
359 		refcount_inc(&skb->users);
360 		ireq->pktopts = skb;
361 	}
362 	ireq->ir_iif = sk->sk_bound_dev_if;
363 
364 	/* So that link locals have meaning */
365 	if (!sk->sk_bound_dev_if &&
366 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
367 		ireq->ir_iif = inet6_iif(skb);
368 
369 	/*
370 	 * Step 3: Process LISTEN state
371 	 *
372 	 *   Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
373 	 *
374 	 * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child().
375 	 */
376 	dreq->dreq_isr	   = dcb->dccpd_seq;
377 	dreq->dreq_gsr     = dreq->dreq_isr;
378 	dreq->dreq_iss	   = dccp_v6_init_sequence(skb);
379 	dreq->dreq_gss     = dreq->dreq_iss;
380 	dreq->dreq_service = service;
381 
382 	if (dccp_v6_send_response(sk, req))
383 		goto drop_and_free;
384 
385 	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
386 	reqsk_put(req);
387 	return 0;
388 
389 drop_and_free:
390 	reqsk_free(req);
391 drop:
392 	__DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
393 	return -1;
394 }
395 
396 static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
397 					      struct sk_buff *skb,
398 					      struct request_sock *req,
399 					      struct dst_entry *dst,
400 					      struct request_sock *req_unhash,
401 					      bool *own_req)
402 {
403 	struct inet_request_sock *ireq = inet_rsk(req);
404 	struct ipv6_pinfo *newnp;
405 	const struct ipv6_pinfo *np = inet6_sk(sk);
406 	struct ipv6_txoptions *opt;
407 	struct inet_sock *newinet;
408 	struct dccp6_sock *newdp6;
409 	struct sock *newsk;
410 
411 	if (skb->protocol == htons(ETH_P_IP)) {
412 		/*
413 		 *	v6 mapped
414 		 */
415 		newsk = dccp_v4_request_recv_sock(sk, skb, req, dst,
416 						  req_unhash, own_req);
417 		if (newsk == NULL)
418 			return NULL;
419 
420 		newdp6 = (struct dccp6_sock *)newsk;
421 		newinet = inet_sk(newsk);
422 		newinet->pinet6 = &newdp6->inet6;
423 		newnp = inet6_sk(newsk);
424 
425 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
426 
427 		newnp->saddr = newsk->sk_v6_rcv_saddr;
428 
429 		inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
430 		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
431 		newnp->pktoptions  = NULL;
432 		newnp->opt	   = NULL;
433 		newnp->ipv6_mc_list = NULL;
434 		newnp->ipv6_ac_list = NULL;
435 		newnp->ipv6_fl_list = NULL;
436 		newnp->mcast_oif   = inet_iif(skb);
437 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
438 
439 		/*
440 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
441 		 * here, dccp_create_openreq_child now does this for us, see the comment in
442 		 * that function for the gory details. -acme
443 		 */
444 
445 		/* It is tricky place. Until this moment IPv4 tcp
446 		   worked with IPv6 icsk.icsk_af_ops.
447 		   Sync it now.
448 		 */
449 		dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
450 
451 		return newsk;
452 	}
453 
454 
455 	if (sk_acceptq_is_full(sk))
456 		goto out_overflow;
457 
458 	if (!dst) {
459 		struct flowi6 fl6;
460 
461 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP);
462 		if (!dst)
463 			goto out;
464 	}
465 
466 	newsk = dccp_create_openreq_child(sk, req, skb);
467 	if (newsk == NULL)
468 		goto out_nonewsk;
469 
470 	/*
471 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
472 	 * count here, dccp_create_openreq_child now does this for us, see the
473 	 * comment in that function for the gory details. -acme
474 	 */
475 
476 	ip6_dst_store(newsk, dst, NULL, NULL);
477 	newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
478 						      NETIF_F_TSO);
479 	newdp6 = (struct dccp6_sock *)newsk;
480 	newinet = inet_sk(newsk);
481 	newinet->pinet6 = &newdp6->inet6;
482 	newnp = inet6_sk(newsk);
483 
484 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
485 
486 	newsk->sk_v6_daddr	= ireq->ir_v6_rmt_addr;
487 	newnp->saddr		= ireq->ir_v6_loc_addr;
488 	newsk->sk_v6_rcv_saddr	= ireq->ir_v6_loc_addr;
489 	newsk->sk_bound_dev_if	= ireq->ir_iif;
490 
491 	/* Now IPv6 options...
492 
493 	   First: no IPv4 options.
494 	 */
495 	newinet->inet_opt = NULL;
496 
497 	/* Clone RX bits */
498 	newnp->rxopt.all = np->rxopt.all;
499 
500 	newnp->ipv6_mc_list = NULL;
501 	newnp->ipv6_ac_list = NULL;
502 	newnp->ipv6_fl_list = NULL;
503 	newnp->pktoptions = NULL;
504 	newnp->opt	  = NULL;
505 	newnp->mcast_oif  = inet6_iif(skb);
506 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
507 
508 	/*
509 	 * Clone native IPv6 options from listening socket (if any)
510 	 *
511 	 * Yes, keeping reference count would be much more clever, but we make
512 	 * one more one thing there: reattach optmem to newsk.
513 	 */
514 	opt = ireq->ipv6_opt;
515 	if (!opt)
516 		opt = rcu_dereference(np->opt);
517 	if (opt) {
518 		opt = ipv6_dup_options(newsk, opt);
519 		RCU_INIT_POINTER(newnp->opt, opt);
520 	}
521 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
522 	if (opt)
523 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
524 						    opt->opt_flen;
525 
526 	dccp_sync_mss(newsk, dst_mtu(dst));
527 
528 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
529 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
530 
531 	if (__inet_inherit_port(sk, newsk) < 0) {
532 		inet_csk_prepare_forced_close(newsk);
533 		dccp_done(newsk);
534 		goto out;
535 	}
536 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
537 	/* Clone pktoptions received with SYN, if we own the req */
538 	if (*own_req && ireq->pktopts) {
539 		newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
540 		consume_skb(ireq->pktopts);
541 		ireq->pktopts = NULL;
542 		if (newnp->pktoptions)
543 			skb_set_owner_r(newnp->pktoptions, newsk);
544 	}
545 
546 	return newsk;
547 
548 out_overflow:
549 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
550 out_nonewsk:
551 	dst_release(dst);
552 out:
553 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
554 	return NULL;
555 }
556 
557 /* The socket must have it's spinlock held when we get
558  * here.
559  *
560  * We have a potential double-lock case here, so even when
561  * doing backlog processing we use the BH locking scheme.
562  * This is because we cannot sleep with the original spinlock
563  * held.
564  */
565 static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
566 {
567 	struct ipv6_pinfo *np = inet6_sk(sk);
568 	struct sk_buff *opt_skb = NULL;
569 
570 	/* Imagine: socket is IPv6. IPv4 packet arrives,
571 	   goes to IPv4 receive handler and backlogged.
572 	   From backlog it always goes here. Kerboom...
573 	   Fortunately, dccp_rcv_established and rcv_established
574 	   handle them correctly, but it is not case with
575 	   dccp_v6_hnd_req and dccp_v6_ctl_send_reset().   --ANK
576 	 */
577 
578 	if (skb->protocol == htons(ETH_P_IP))
579 		return dccp_v4_do_rcv(sk, skb);
580 
581 	if (sk_filter(sk, skb))
582 		goto discard;
583 
584 	/*
585 	 * socket locking is here for SMP purposes as backlog rcv is currently
586 	 * called with bh processing disabled.
587 	 */
588 
589 	/* Do Stevens' IPV6_PKTOPTIONS.
590 
591 	   Yes, guys, it is the only place in our code, where we
592 	   may make it not affecting IPv4.
593 	   The rest of code is protocol independent,
594 	   and I do not like idea to uglify IPv4.
595 
596 	   Actually, all the idea behind IPV6_PKTOPTIONS
597 	   looks not very well thought. For now we latch
598 	   options, received in the last packet, enqueued
599 	   by tcp. Feel free to propose better solution.
600 					       --ANK (980728)
601 	 */
602 	if (np->rxopt.all)
603 		opt_skb = skb_clone(skb, GFP_ATOMIC);
604 
605 	if (sk->sk_state == DCCP_OPEN) { /* Fast path */
606 		if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
607 			goto reset;
608 		if (opt_skb)
609 			goto ipv6_pktoptions;
610 		return 0;
611 	}
612 
613 	/*
614 	 *  Step 3: Process LISTEN state
615 	 *     If S.state == LISTEN,
616 	 *	 If P.type == Request or P contains a valid Init Cookie option,
617 	 *	      (* Must scan the packet's options to check for Init
618 	 *		 Cookies.  Only Init Cookies are processed here,
619 	 *		 however; other options are processed in Step 8.  This
620 	 *		 scan need only be performed if the endpoint uses Init
621 	 *		 Cookies *)
622 	 *	      (* Generate a new socket and switch to that socket *)
623 	 *	      Set S := new socket for this port pair
624 	 *	      S.state = RESPOND
625 	 *	      Choose S.ISS (initial seqno) or set from Init Cookies
626 	 *	      Initialize S.GAR := S.ISS
627 	 *	      Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
628 	 *	      Continue with S.state == RESPOND
629 	 *	      (* A Response packet will be generated in Step 11 *)
630 	 *	 Otherwise,
631 	 *	      Generate Reset(No Connection) unless P.type == Reset
632 	 *	      Drop packet and return
633 	 *
634 	 * NOTE: the check for the packet types is done in
635 	 *	 dccp_rcv_state_process
636 	 */
637 
638 	if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
639 		goto reset;
640 	if (opt_skb)
641 		goto ipv6_pktoptions;
642 	return 0;
643 
644 reset:
645 	dccp_v6_ctl_send_reset(sk, skb);
646 discard:
647 	if (opt_skb != NULL)
648 		__kfree_skb(opt_skb);
649 	kfree_skb(skb);
650 	return 0;
651 
652 /* Handling IPV6_PKTOPTIONS skb the similar
653  * way it's done for net/ipv6/tcp_ipv6.c
654  */
655 ipv6_pktoptions:
656 	if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) {
657 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
658 			np->mcast_oif = inet6_iif(opt_skb);
659 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
660 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
661 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
662 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
663 		if (np->repflow)
664 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
665 		if (ipv6_opt_accepted(sk, opt_skb,
666 				      &DCCP_SKB_CB(opt_skb)->header.h6)) {
667 			skb_set_owner_r(opt_skb, sk);
668 			memmove(IP6CB(opt_skb),
669 				&DCCP_SKB_CB(opt_skb)->header.h6,
670 				sizeof(struct inet6_skb_parm));
671 			opt_skb = xchg(&np->pktoptions, opt_skb);
672 		} else {
673 			__kfree_skb(opt_skb);
674 			opt_skb = xchg(&np->pktoptions, NULL);
675 		}
676 	}
677 
678 	kfree_skb(opt_skb);
679 	return 0;
680 }
681 
682 static int dccp_v6_rcv(struct sk_buff *skb)
683 {
684 	const struct dccp_hdr *dh;
685 	bool refcounted;
686 	struct sock *sk;
687 	int min_cov;
688 
689 	/* Step 1: Check header basics */
690 
691 	if (dccp_invalid_packet(skb))
692 		goto discard_it;
693 
694 	/* Step 1: If header checksum is incorrect, drop packet and return. */
695 	if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr,
696 				     &ipv6_hdr(skb)->daddr)) {
697 		DCCP_WARN("dropped packet with invalid checksum\n");
698 		goto discard_it;
699 	}
700 
701 	dh = dccp_hdr(skb);
702 
703 	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(dh);
704 	DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
705 
706 	if (dccp_packet_without_ack(skb))
707 		DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
708 	else
709 		DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
710 
711 lookup:
712 	sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
713 			        dh->dccph_sport, dh->dccph_dport,
714 				inet6_iif(skb), 0, &refcounted);
715 	if (!sk) {
716 		dccp_pr_debug("failed to look up flow ID in table and "
717 			      "get corresponding socket\n");
718 		goto no_dccp_socket;
719 	}
720 
721 	/*
722 	 * Step 2:
723 	 *	... or S.state == TIMEWAIT,
724 	 *		Generate Reset(No Connection) unless P.type == Reset
725 	 *		Drop packet and return
726 	 */
727 	if (sk->sk_state == DCCP_TIME_WAIT) {
728 		dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
729 		inet_twsk_put(inet_twsk(sk));
730 		goto no_dccp_socket;
731 	}
732 
733 	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
734 		struct request_sock *req = inet_reqsk(sk);
735 		struct sock *nsk;
736 
737 		sk = req->rsk_listener;
738 		if (unlikely(sk->sk_state != DCCP_LISTEN)) {
739 			inet_csk_reqsk_queue_drop_and_put(sk, req);
740 			goto lookup;
741 		}
742 		sock_hold(sk);
743 		refcounted = true;
744 		nsk = dccp_check_req(sk, skb, req);
745 		if (!nsk) {
746 			reqsk_put(req);
747 			goto discard_and_relse;
748 		}
749 		if (nsk == sk) {
750 			reqsk_put(req);
751 		} else if (dccp_child_process(sk, nsk, skb)) {
752 			dccp_v6_ctl_send_reset(sk, skb);
753 			goto discard_and_relse;
754 		} else {
755 			sock_put(sk);
756 			return 0;
757 		}
758 	}
759 	/*
760 	 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
761 	 *	o if MinCsCov = 0, only packets with CsCov = 0 are accepted
762 	 *	o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
763 	 */
764 	min_cov = dccp_sk(sk)->dccps_pcrlen;
765 	if (dh->dccph_cscov  &&  (min_cov == 0 || dh->dccph_cscov < min_cov))  {
766 		dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
767 			      dh->dccph_cscov, min_cov);
768 		/* FIXME: send Data Dropped option (see also dccp_v4_rcv) */
769 		goto discard_and_relse;
770 	}
771 
772 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
773 		goto discard_and_relse;
774 
775 	return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
776 				refcounted) ? -1 : 0;
777 
778 no_dccp_socket:
779 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
780 		goto discard_it;
781 	/*
782 	 * Step 2:
783 	 *	If no socket ...
784 	 *		Generate Reset(No Connection) unless P.type == Reset
785 	 *		Drop packet and return
786 	 */
787 	if (dh->dccph_type != DCCP_PKT_RESET) {
788 		DCCP_SKB_CB(skb)->dccpd_reset_code =
789 					DCCP_RESET_CODE_NO_CONNECTION;
790 		dccp_v6_ctl_send_reset(sk, skb);
791 	}
792 
793 discard_it:
794 	kfree_skb(skb);
795 	return 0;
796 
797 discard_and_relse:
798 	if (refcounted)
799 		sock_put(sk);
800 	goto discard_it;
801 }
802 
803 static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
804 			   int addr_len)
805 {
806 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
807 	struct inet_connection_sock *icsk = inet_csk(sk);
808 	struct inet_sock *inet = inet_sk(sk);
809 	struct ipv6_pinfo *np = inet6_sk(sk);
810 	struct dccp_sock *dp = dccp_sk(sk);
811 	struct in6_addr *saddr = NULL, *final_p, final;
812 	struct ipv6_txoptions *opt;
813 	struct flowi6 fl6;
814 	struct dst_entry *dst;
815 	int addr_type;
816 	int err;
817 
818 	dp->dccps_role = DCCP_ROLE_CLIENT;
819 
820 	if (addr_len < SIN6_LEN_RFC2133)
821 		return -EINVAL;
822 
823 	if (usin->sin6_family != AF_INET6)
824 		return -EAFNOSUPPORT;
825 
826 	memset(&fl6, 0, sizeof(fl6));
827 
828 	if (np->sndflow) {
829 		fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
830 		IP6_ECN_flow_init(fl6.flowlabel);
831 		if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
832 			struct ip6_flowlabel *flowlabel;
833 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
834 			if (IS_ERR(flowlabel))
835 				return -EINVAL;
836 			fl6_sock_release(flowlabel);
837 		}
838 	}
839 	/*
840 	 * connect() to INADDR_ANY means loopback (BSD'ism).
841 	 */
842 	if (ipv6_addr_any(&usin->sin6_addr))
843 		usin->sin6_addr.s6_addr[15] = 1;
844 
845 	addr_type = ipv6_addr_type(&usin->sin6_addr);
846 
847 	if (addr_type & IPV6_ADDR_MULTICAST)
848 		return -ENETUNREACH;
849 
850 	if (addr_type & IPV6_ADDR_LINKLOCAL) {
851 		if (addr_len >= sizeof(struct sockaddr_in6) &&
852 		    usin->sin6_scope_id) {
853 			/* If interface is set while binding, indices
854 			 * must coincide.
855 			 */
856 			if (sk->sk_bound_dev_if &&
857 			    sk->sk_bound_dev_if != usin->sin6_scope_id)
858 				return -EINVAL;
859 
860 			sk->sk_bound_dev_if = usin->sin6_scope_id;
861 		}
862 
863 		/* Connect to link-local address requires an interface */
864 		if (!sk->sk_bound_dev_if)
865 			return -EINVAL;
866 	}
867 
868 	sk->sk_v6_daddr = usin->sin6_addr;
869 	np->flow_label = fl6.flowlabel;
870 
871 	/*
872 	 * DCCP over IPv4
873 	 */
874 	if (addr_type == IPV6_ADDR_MAPPED) {
875 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
876 		struct sockaddr_in sin;
877 
878 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
879 
880 		if (__ipv6_only_sock(sk))
881 			return -ENETUNREACH;
882 
883 		sin.sin_family = AF_INET;
884 		sin.sin_port = usin->sin6_port;
885 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
886 
887 		icsk->icsk_af_ops = &dccp_ipv6_mapped;
888 		sk->sk_backlog_rcv = dccp_v4_do_rcv;
889 
890 		err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
891 		if (err) {
892 			icsk->icsk_ext_hdr_len = exthdrlen;
893 			icsk->icsk_af_ops = &dccp_ipv6_af_ops;
894 			sk->sk_backlog_rcv = dccp_v6_do_rcv;
895 			goto failure;
896 		}
897 		np->saddr = sk->sk_v6_rcv_saddr;
898 		return err;
899 	}
900 
901 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
902 		saddr = &sk->sk_v6_rcv_saddr;
903 
904 	fl6.flowi6_proto = IPPROTO_DCCP;
905 	fl6.daddr = sk->sk_v6_daddr;
906 	fl6.saddr = saddr ? *saddr : np->saddr;
907 	fl6.flowi6_oif = sk->sk_bound_dev_if;
908 	fl6.fl6_dport = usin->sin6_port;
909 	fl6.fl6_sport = inet->inet_sport;
910 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
911 
912 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
913 	final_p = fl6_update_dst(&fl6, opt, &final);
914 
915 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
916 	if (IS_ERR(dst)) {
917 		err = PTR_ERR(dst);
918 		goto failure;
919 	}
920 
921 	if (saddr == NULL) {
922 		saddr = &fl6.saddr;
923 		sk->sk_v6_rcv_saddr = *saddr;
924 	}
925 
926 	/* set the source address */
927 	np->saddr = *saddr;
928 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
929 
930 	ip6_dst_store(sk, dst, NULL, NULL);
931 
932 	icsk->icsk_ext_hdr_len = 0;
933 	if (opt)
934 		icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
935 
936 	inet->inet_dport = usin->sin6_port;
937 
938 	dccp_set_state(sk, DCCP_REQUESTING);
939 	err = inet6_hash_connect(&dccp_death_row, sk);
940 	if (err)
941 		goto late_failure;
942 
943 	dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
944 						      sk->sk_v6_daddr.s6_addr32,
945 						      inet->inet_sport,
946 						      inet->inet_dport);
947 	err = dccp_connect(sk);
948 	if (err)
949 		goto late_failure;
950 
951 	return 0;
952 
953 late_failure:
954 	dccp_set_state(sk, DCCP_CLOSED);
955 	__sk_dst_reset(sk);
956 failure:
957 	inet->inet_dport = 0;
958 	sk->sk_route_caps = 0;
959 	return err;
960 }
961 
962 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
963 	.queue_xmit	   = inet6_csk_xmit,
964 	.send_check	   = dccp_v6_send_check,
965 	.rebuild_header	   = inet6_sk_rebuild_header,
966 	.conn_request	   = dccp_v6_conn_request,
967 	.syn_recv_sock	   = dccp_v6_request_recv_sock,
968 	.net_header_len	   = sizeof(struct ipv6hdr),
969 	.setsockopt	   = ipv6_setsockopt,
970 	.getsockopt	   = ipv6_getsockopt,
971 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
972 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
973 };
974 
975 /*
976  *	DCCP over IPv4 via INET6 API
977  */
978 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
979 	.queue_xmit	   = ip_queue_xmit,
980 	.send_check	   = dccp_v4_send_check,
981 	.rebuild_header	   = inet_sk_rebuild_header,
982 	.conn_request	   = dccp_v6_conn_request,
983 	.syn_recv_sock	   = dccp_v6_request_recv_sock,
984 	.net_header_len	   = sizeof(struct iphdr),
985 	.setsockopt	   = ipv6_setsockopt,
986 	.getsockopt	   = ipv6_getsockopt,
987 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
988 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
989 };
990 
991 /* NOTE: A lot of things set to zero explicitly by call to
992  *       sk_alloc() so need not be done here.
993  */
994 static int dccp_v6_init_sock(struct sock *sk)
995 {
996 	static __u8 dccp_v6_ctl_sock_initialized;
997 	int err = dccp_init_sock(sk, dccp_v6_ctl_sock_initialized);
998 
999 	if (err == 0) {
1000 		if (unlikely(!dccp_v6_ctl_sock_initialized))
1001 			dccp_v6_ctl_sock_initialized = 1;
1002 		inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
1003 	}
1004 
1005 	return err;
1006 }
1007 
1008 static void dccp_v6_destroy_sock(struct sock *sk)
1009 {
1010 	dccp_destroy_sock(sk);
1011 	inet6_destroy_sock(sk);
1012 }
1013 
1014 static struct timewait_sock_ops dccp6_timewait_sock_ops = {
1015 	.twsk_obj_size	= sizeof(struct dccp6_timewait_sock),
1016 };
1017 
1018 static struct proto dccp_v6_prot = {
1019 	.name		   = "DCCPv6",
1020 	.owner		   = THIS_MODULE,
1021 	.close		   = dccp_close,
1022 	.connect	   = dccp_v6_connect,
1023 	.disconnect	   = dccp_disconnect,
1024 	.ioctl		   = dccp_ioctl,
1025 	.init		   = dccp_v6_init_sock,
1026 	.setsockopt	   = dccp_setsockopt,
1027 	.getsockopt	   = dccp_getsockopt,
1028 	.sendmsg	   = dccp_sendmsg,
1029 	.recvmsg	   = dccp_recvmsg,
1030 	.backlog_rcv	   = dccp_v6_do_rcv,
1031 	.hash		   = inet6_hash,
1032 	.unhash		   = inet_unhash,
1033 	.accept		   = inet_csk_accept,
1034 	.get_port	   = inet_csk_get_port,
1035 	.shutdown	   = dccp_shutdown,
1036 	.destroy	   = dccp_v6_destroy_sock,
1037 	.orphan_count	   = &dccp_orphan_count,
1038 	.max_header	   = MAX_DCCP_HEADER,
1039 	.obj_size	   = sizeof(struct dccp6_sock),
1040 	.slab_flags	   = SLAB_TYPESAFE_BY_RCU,
1041 	.rsk_prot	   = &dccp6_request_sock_ops,
1042 	.twsk_prot	   = &dccp6_timewait_sock_ops,
1043 	.h.hashinfo	   = &dccp_hashinfo,
1044 };
1045 
1046 static const struct inet6_protocol dccp_v6_protocol = {
1047 	.handler	= dccp_v6_rcv,
1048 	.err_handler	= dccp_v6_err,
1049 	.flags		= INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
1050 };
1051 
1052 static const struct proto_ops inet6_dccp_ops = {
1053 	.family		   = PF_INET6,
1054 	.owner		   = THIS_MODULE,
1055 	.release	   = inet6_release,
1056 	.bind		   = inet6_bind,
1057 	.connect	   = inet_stream_connect,
1058 	.socketpair	   = sock_no_socketpair,
1059 	.accept		   = inet_accept,
1060 	.getname	   = inet6_getname,
1061 	.poll		   = dccp_poll,
1062 	.ioctl		   = inet6_ioctl,
1063 	.gettstamp	   = sock_gettstamp,
1064 	.listen		   = inet_dccp_listen,
1065 	.shutdown	   = inet_shutdown,
1066 	.setsockopt	   = sock_common_setsockopt,
1067 	.getsockopt	   = sock_common_getsockopt,
1068 	.sendmsg	   = inet_sendmsg,
1069 	.recvmsg	   = sock_common_recvmsg,
1070 	.mmap		   = sock_no_mmap,
1071 	.sendpage	   = sock_no_sendpage,
1072 #ifdef CONFIG_COMPAT
1073 	.compat_ioctl	   = inet6_compat_ioctl,
1074 #endif
1075 };
1076 
1077 static struct inet_protosw dccp_v6_protosw = {
1078 	.type		= SOCK_DCCP,
1079 	.protocol	= IPPROTO_DCCP,
1080 	.prot		= &dccp_v6_prot,
1081 	.ops		= &inet6_dccp_ops,
1082 	.flags		= INET_PROTOSW_ICSK,
1083 };
1084 
1085 static int __net_init dccp_v6_init_net(struct net *net)
1086 {
1087 	if (dccp_hashinfo.bhash == NULL)
1088 		return -ESOCKTNOSUPPORT;
1089 
1090 	return inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6,
1091 				    SOCK_DCCP, IPPROTO_DCCP, net);
1092 }
1093 
1094 static void __net_exit dccp_v6_exit_net(struct net *net)
1095 {
1096 	inet_ctl_sock_destroy(net->dccp.v6_ctl_sk);
1097 }
1098 
1099 static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
1100 {
1101 	inet_twsk_purge(&dccp_hashinfo, AF_INET6);
1102 }
1103 
1104 static struct pernet_operations dccp_v6_ops = {
1105 	.init   = dccp_v6_init_net,
1106 	.exit   = dccp_v6_exit_net,
1107 	.exit_batch = dccp_v6_exit_batch,
1108 };
1109 
1110 static int __init dccp_v6_init(void)
1111 {
1112 	int err = proto_register(&dccp_v6_prot, 1);
1113 
1114 	if (err)
1115 		goto out;
1116 
1117 	inet6_register_protosw(&dccp_v6_protosw);
1118 
1119 	err = register_pernet_subsys(&dccp_v6_ops);
1120 	if (err)
1121 		goto out_destroy_ctl_sock;
1122 
1123 	err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1124 	if (err)
1125 		goto out_unregister_proto;
1126 
1127 out:
1128 	return err;
1129 out_unregister_proto:
1130 	unregister_pernet_subsys(&dccp_v6_ops);
1131 out_destroy_ctl_sock:
1132 	inet6_unregister_protosw(&dccp_v6_protosw);
1133 	proto_unregister(&dccp_v6_prot);
1134 	goto out;
1135 }
1136 
1137 static void __exit dccp_v6_exit(void)
1138 {
1139 	inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1140 	unregister_pernet_subsys(&dccp_v6_ops);
1141 	inet6_unregister_protosw(&dccp_v6_protosw);
1142 	proto_unregister(&dccp_v6_prot);
1143 }
1144 
1145 module_init(dccp_v6_init);
1146 module_exit(dccp_v6_exit);
1147 
1148 /*
1149  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1150  * values directly, Also cover the case where the protocol is not specified,
1151  * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP
1152  */
1153 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 33, 6);
1154 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 0, 6);
1155 MODULE_LICENSE("GPL");
1156 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
1157 MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol");
1158