xref: /openbmc/linux/net/dccp/ipv6.c (revision 1c2dd16a)
1 /*
2  *	DCCP over IPv6
3  *	Linux INET6 implementation
4  *
5  *	Based on net/dccp6/ipv6.c
6  *
7  *	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
8  *
9  *	This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14 
15 #include <linux/module.h>
16 #include <linux/random.h>
17 #include <linux/slab.h>
18 #include <linux/xfrm.h>
19 
20 #include <net/addrconf.h>
21 #include <net/inet_common.h>
22 #include <net/inet_hashtables.h>
23 #include <net/inet_sock.h>
24 #include <net/inet6_connection_sock.h>
25 #include <net/inet6_hashtables.h>
26 #include <net/ip6_route.h>
27 #include <net/ipv6.h>
28 #include <net/protocol.h>
29 #include <net/transp_v6.h>
30 #include <net/ip6_checksum.h>
31 #include <net/xfrm.h>
32 #include <net/secure_seq.h>
33 
34 #include "dccp.h"
35 #include "ipv6.h"
36 #include "feat.h"
37 
38 /* The per-net dccp.v6_ctl_sk is used for sending RSTs and ACKs */
39 
40 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped;
41 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
42 
43 /* add pseudo-header to DCCP checksum stored in skb->csum */
44 static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
45 				      const struct in6_addr *saddr,
46 				      const struct in6_addr *daddr)
47 {
48 	return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
49 }
50 
51 static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
52 {
53 	struct ipv6_pinfo *np = inet6_sk(sk);
54 	struct dccp_hdr *dh = dccp_hdr(skb);
55 
56 	dccp_csum_outgoing(skb);
57 	dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr);
58 }
59 
60 static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
61 {
62 	return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
63 					     ipv6_hdr(skb)->saddr.s6_addr32,
64 					     dccp_hdr(skb)->dccph_dport,
65 					     dccp_hdr(skb)->dccph_sport     );
66 
67 }
68 
69 static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
70 			u8 type, u8 code, int offset, __be32 info)
71 {
72 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
73 	const struct dccp_hdr *dh;
74 	struct dccp_sock *dp;
75 	struct ipv6_pinfo *np;
76 	struct sock *sk;
77 	int err;
78 	__u64 seq;
79 	struct net *net = dev_net(skb->dev);
80 
81 	/* Only need dccph_dport & dccph_sport which are the first
82 	 * 4 bytes in dccp header.
83 	 * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
84 	 */
85 	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
86 	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
87 	dh = (struct dccp_hdr *)(skb->data + offset);
88 
89 	sk = __inet6_lookup_established(net, &dccp_hashinfo,
90 					&hdr->daddr, dh->dccph_dport,
91 					&hdr->saddr, ntohs(dh->dccph_sport),
92 					inet6_iif(skb));
93 
94 	if (!sk) {
95 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
96 				  ICMP6_MIB_INERRORS);
97 		return;
98 	}
99 
100 	if (sk->sk_state == DCCP_TIME_WAIT) {
101 		inet_twsk_put(inet_twsk(sk));
102 		return;
103 	}
104 	seq = dccp_hdr_seq(dh);
105 	if (sk->sk_state == DCCP_NEW_SYN_RECV)
106 		return dccp_req_err(sk, seq);
107 
108 	bh_lock_sock(sk);
109 	if (sock_owned_by_user(sk))
110 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
111 
112 	if (sk->sk_state == DCCP_CLOSED)
113 		goto out;
114 
115 	dp = dccp_sk(sk);
116 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
117 	    !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
118 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
119 		goto out;
120 	}
121 
122 	np = inet6_sk(sk);
123 
124 	if (type == NDISC_REDIRECT) {
125 		if (!sock_owned_by_user(sk)) {
126 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
127 
128 			if (dst)
129 				dst->ops->redirect(dst, sk, skb);
130 		}
131 		goto out;
132 	}
133 
134 	if (type == ICMPV6_PKT_TOOBIG) {
135 		struct dst_entry *dst = NULL;
136 
137 		if (!ip6_sk_accept_pmtu(sk))
138 			goto out;
139 
140 		if (sock_owned_by_user(sk))
141 			goto out;
142 		if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
143 			goto out;
144 
145 		dst = inet6_csk_update_pmtu(sk, ntohl(info));
146 		if (!dst)
147 			goto out;
148 
149 		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst))
150 			dccp_sync_mss(sk, dst_mtu(dst));
151 		goto out;
152 	}
153 
154 	icmpv6_err_convert(type, code, &err);
155 
156 	/* Might be for an request_sock */
157 	switch (sk->sk_state) {
158 	case DCCP_REQUESTING:
159 	case DCCP_RESPOND:  /* Cannot happen.
160 			       It can, it SYNs are crossed. --ANK */
161 		if (!sock_owned_by_user(sk)) {
162 			__DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
163 			sk->sk_err = err;
164 			/*
165 			 * Wake people up to see the error
166 			 * (see connect in sock.c)
167 			 */
168 			sk->sk_error_report(sk);
169 			dccp_done(sk);
170 		} else
171 			sk->sk_err_soft = err;
172 		goto out;
173 	}
174 
175 	if (!sock_owned_by_user(sk) && np->recverr) {
176 		sk->sk_err = err;
177 		sk->sk_error_report(sk);
178 	} else
179 		sk->sk_err_soft = err;
180 
181 out:
182 	bh_unlock_sock(sk);
183 	sock_put(sk);
184 }
185 
186 
187 static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req)
188 {
189 	struct inet_request_sock *ireq = inet_rsk(req);
190 	struct ipv6_pinfo *np = inet6_sk(sk);
191 	struct sk_buff *skb;
192 	struct in6_addr *final_p, final;
193 	struct flowi6 fl6;
194 	int err = -1;
195 	struct dst_entry *dst;
196 
197 	memset(&fl6, 0, sizeof(fl6));
198 	fl6.flowi6_proto = IPPROTO_DCCP;
199 	fl6.daddr = ireq->ir_v6_rmt_addr;
200 	fl6.saddr = ireq->ir_v6_loc_addr;
201 	fl6.flowlabel = 0;
202 	fl6.flowi6_oif = ireq->ir_iif;
203 	fl6.fl6_dport = ireq->ir_rmt_port;
204 	fl6.fl6_sport = htons(ireq->ir_num);
205 	security_req_classify_flow(req, flowi6_to_flowi(&fl6));
206 
207 
208 	rcu_read_lock();
209 	final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
210 	rcu_read_unlock();
211 
212 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
213 	if (IS_ERR(dst)) {
214 		err = PTR_ERR(dst);
215 		dst = NULL;
216 		goto done;
217 	}
218 
219 	skb = dccp_make_response(sk, dst, req);
220 	if (skb != NULL) {
221 		struct dccp_hdr *dh = dccp_hdr(skb);
222 		struct ipv6_txoptions *opt;
223 
224 		dh->dccph_checksum = dccp_v6_csum_finish(skb,
225 							 &ireq->ir_v6_loc_addr,
226 							 &ireq->ir_v6_rmt_addr);
227 		fl6.daddr = ireq->ir_v6_rmt_addr;
228 		rcu_read_lock();
229 		opt = ireq->ipv6_opt;
230 		if (!opt)
231 			opt = rcu_dereference(np->opt);
232 		err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass);
233 		rcu_read_unlock();
234 		err = net_xmit_eval(err);
235 	}
236 
237 done:
238 	dst_release(dst);
239 	return err;
240 }
241 
242 static void dccp_v6_reqsk_destructor(struct request_sock *req)
243 {
244 	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
245 	kfree(inet_rsk(req)->ipv6_opt);
246 	kfree_skb(inet_rsk(req)->pktopts);
247 }
248 
249 static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
250 {
251 	const struct ipv6hdr *rxip6h;
252 	struct sk_buff *skb;
253 	struct flowi6 fl6;
254 	struct net *net = dev_net(skb_dst(rxskb)->dev);
255 	struct sock *ctl_sk = net->dccp.v6_ctl_sk;
256 	struct dst_entry *dst;
257 
258 	if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
259 		return;
260 
261 	if (!ipv6_unicast_destination(rxskb))
262 		return;
263 
264 	skb = dccp_ctl_make_reset(ctl_sk, rxskb);
265 	if (skb == NULL)
266 		return;
267 
268 	rxip6h = ipv6_hdr(rxskb);
269 	dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
270 							    &rxip6h->daddr);
271 
272 	memset(&fl6, 0, sizeof(fl6));
273 	fl6.daddr = rxip6h->saddr;
274 	fl6.saddr = rxip6h->daddr;
275 
276 	fl6.flowi6_proto = IPPROTO_DCCP;
277 	fl6.flowi6_oif = inet6_iif(rxskb);
278 	fl6.fl6_dport = dccp_hdr(skb)->dccph_dport;
279 	fl6.fl6_sport = dccp_hdr(skb)->dccph_sport;
280 	security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6));
281 
282 	/* sk = NULL, but it is safe for now. RST socket required. */
283 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
284 	if (!IS_ERR(dst)) {
285 		skb_dst_set(skb, dst);
286 		ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0);
287 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
288 		DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
289 		return;
290 	}
291 
292 	kfree_skb(skb);
293 }
294 
295 static struct request_sock_ops dccp6_request_sock_ops = {
296 	.family		= AF_INET6,
297 	.obj_size	= sizeof(struct dccp6_request_sock),
298 	.rtx_syn_ack	= dccp_v6_send_response,
299 	.send_ack	= dccp_reqsk_send_ack,
300 	.destructor	= dccp_v6_reqsk_destructor,
301 	.send_reset	= dccp_v6_ctl_send_reset,
302 	.syn_ack_timeout = dccp_syn_ack_timeout,
303 };
304 
305 static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
306 {
307 	struct request_sock *req;
308 	struct dccp_request_sock *dreq;
309 	struct inet_request_sock *ireq;
310 	struct ipv6_pinfo *np = inet6_sk(sk);
311 	const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
312 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
313 
314 	if (skb->protocol == htons(ETH_P_IP))
315 		return dccp_v4_conn_request(sk, skb);
316 
317 	if (!ipv6_unicast_destination(skb))
318 		return 0;	/* discard, don't send a reset here */
319 
320 	if (dccp_bad_service_code(sk, service)) {
321 		dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
322 		goto drop;
323 	}
324 	/*
325 	 * There are no SYN attacks on IPv6, yet...
326 	 */
327 	dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
328 	if (inet_csk_reqsk_queue_is_full(sk))
329 		goto drop;
330 
331 	if (sk_acceptq_is_full(sk))
332 		goto drop;
333 
334 	req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
335 	if (req == NULL)
336 		goto drop;
337 
338 	if (dccp_reqsk_init(req, dccp_sk(sk), skb))
339 		goto drop_and_free;
340 
341 	dreq = dccp_rsk(req);
342 	if (dccp_parse_options(sk, dreq, skb))
343 		goto drop_and_free;
344 
345 	if (security_inet_conn_request(sk, skb, req))
346 		goto drop_and_free;
347 
348 	ireq = inet_rsk(req);
349 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
350 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
351 	ireq->ireq_family = AF_INET6;
352 
353 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
354 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
355 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
356 		atomic_inc(&skb->users);
357 		ireq->pktopts = skb;
358 	}
359 	ireq->ir_iif = sk->sk_bound_dev_if;
360 
361 	/* So that link locals have meaning */
362 	if (!sk->sk_bound_dev_if &&
363 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
364 		ireq->ir_iif = inet6_iif(skb);
365 
366 	/*
367 	 * Step 3: Process LISTEN state
368 	 *
369 	 *   Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
370 	 *
371 	 * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child().
372 	 */
373 	dreq->dreq_isr	   = dcb->dccpd_seq;
374 	dreq->dreq_gsr     = dreq->dreq_isr;
375 	dreq->dreq_iss	   = dccp_v6_init_sequence(skb);
376 	dreq->dreq_gss     = dreq->dreq_iss;
377 	dreq->dreq_service = service;
378 
379 	if (dccp_v6_send_response(sk, req))
380 		goto drop_and_free;
381 
382 	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
383 	return 0;
384 
385 drop_and_free:
386 	reqsk_free(req);
387 drop:
388 	__DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
389 	return -1;
390 }
391 
392 static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
393 					      struct sk_buff *skb,
394 					      struct request_sock *req,
395 					      struct dst_entry *dst,
396 					      struct request_sock *req_unhash,
397 					      bool *own_req)
398 {
399 	struct inet_request_sock *ireq = inet_rsk(req);
400 	struct ipv6_pinfo *newnp;
401 	const struct ipv6_pinfo *np = inet6_sk(sk);
402 	struct ipv6_txoptions *opt;
403 	struct inet_sock *newinet;
404 	struct dccp6_sock *newdp6;
405 	struct sock *newsk;
406 
407 	if (skb->protocol == htons(ETH_P_IP)) {
408 		/*
409 		 *	v6 mapped
410 		 */
411 		newsk = dccp_v4_request_recv_sock(sk, skb, req, dst,
412 						  req_unhash, own_req);
413 		if (newsk == NULL)
414 			return NULL;
415 
416 		newdp6 = (struct dccp6_sock *)newsk;
417 		newinet = inet_sk(newsk);
418 		newinet->pinet6 = &newdp6->inet6;
419 		newnp = inet6_sk(newsk);
420 
421 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
422 
423 		newnp->saddr = newsk->sk_v6_rcv_saddr;
424 
425 		inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
426 		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
427 		newnp->pktoptions  = NULL;
428 		newnp->opt	   = NULL;
429 		newnp->mcast_oif   = inet6_iif(skb);
430 		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
431 
432 		/*
433 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
434 		 * here, dccp_create_openreq_child now does this for us, see the comment in
435 		 * that function for the gory details. -acme
436 		 */
437 
438 		/* It is tricky place. Until this moment IPv4 tcp
439 		   worked with IPv6 icsk.icsk_af_ops.
440 		   Sync it now.
441 		 */
442 		dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
443 
444 		return newsk;
445 	}
446 
447 
448 	if (sk_acceptq_is_full(sk))
449 		goto out_overflow;
450 
451 	if (!dst) {
452 		struct flowi6 fl6;
453 
454 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP);
455 		if (!dst)
456 			goto out;
457 	}
458 
459 	newsk = dccp_create_openreq_child(sk, req, skb);
460 	if (newsk == NULL)
461 		goto out_nonewsk;
462 
463 	/*
464 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
465 	 * count here, dccp_create_openreq_child now does this for us, see the
466 	 * comment in that function for the gory details. -acme
467 	 */
468 
469 	ip6_dst_store(newsk, dst, NULL, NULL);
470 	newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
471 						      NETIF_F_TSO);
472 	newdp6 = (struct dccp6_sock *)newsk;
473 	newinet = inet_sk(newsk);
474 	newinet->pinet6 = &newdp6->inet6;
475 	newnp = inet6_sk(newsk);
476 
477 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
478 
479 	newsk->sk_v6_daddr	= ireq->ir_v6_rmt_addr;
480 	newnp->saddr		= ireq->ir_v6_loc_addr;
481 	newsk->sk_v6_rcv_saddr	= ireq->ir_v6_loc_addr;
482 	newsk->sk_bound_dev_if	= ireq->ir_iif;
483 
484 	/* Now IPv6 options...
485 
486 	   First: no IPv4 options.
487 	 */
488 	newinet->inet_opt = NULL;
489 
490 	/* Clone RX bits */
491 	newnp->rxopt.all = np->rxopt.all;
492 
493 	newnp->pktoptions = NULL;
494 	newnp->opt	  = NULL;
495 	newnp->mcast_oif  = inet6_iif(skb);
496 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
497 
498 	/*
499 	 * Clone native IPv6 options from listening socket (if any)
500 	 *
501 	 * Yes, keeping reference count would be much more clever, but we make
502 	 * one more one thing there: reattach optmem to newsk.
503 	 */
504 	opt = ireq->ipv6_opt;
505 	if (!opt)
506 		opt = rcu_dereference(np->opt);
507 	if (opt) {
508 		opt = ipv6_dup_options(newsk, opt);
509 		RCU_INIT_POINTER(newnp->opt, opt);
510 	}
511 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
512 	if (opt)
513 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
514 						    opt->opt_flen;
515 
516 	dccp_sync_mss(newsk, dst_mtu(dst));
517 
518 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
519 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
520 
521 	if (__inet_inherit_port(sk, newsk) < 0) {
522 		inet_csk_prepare_forced_close(newsk);
523 		dccp_done(newsk);
524 		goto out;
525 	}
526 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
527 	/* Clone pktoptions received with SYN, if we own the req */
528 	if (*own_req && ireq->pktopts) {
529 		newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
530 		consume_skb(ireq->pktopts);
531 		ireq->pktopts = NULL;
532 		if (newnp->pktoptions)
533 			skb_set_owner_r(newnp->pktoptions, newsk);
534 	}
535 
536 	return newsk;
537 
538 out_overflow:
539 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
540 out_nonewsk:
541 	dst_release(dst);
542 out:
543 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
544 	return NULL;
545 }
546 
547 /* The socket must have it's spinlock held when we get
548  * here.
549  *
550  * We have a potential double-lock case here, so even when
551  * doing backlog processing we use the BH locking scheme.
552  * This is because we cannot sleep with the original spinlock
553  * held.
554  */
555 static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
556 {
557 	struct ipv6_pinfo *np = inet6_sk(sk);
558 	struct sk_buff *opt_skb = NULL;
559 
560 	/* Imagine: socket is IPv6. IPv4 packet arrives,
561 	   goes to IPv4 receive handler and backlogged.
562 	   From backlog it always goes here. Kerboom...
563 	   Fortunately, dccp_rcv_established and rcv_established
564 	   handle them correctly, but it is not case with
565 	   dccp_v6_hnd_req and dccp_v6_ctl_send_reset().   --ANK
566 	 */
567 
568 	if (skb->protocol == htons(ETH_P_IP))
569 		return dccp_v4_do_rcv(sk, skb);
570 
571 	if (sk_filter(sk, skb))
572 		goto discard;
573 
574 	/*
575 	 * socket locking is here for SMP purposes as backlog rcv is currently
576 	 * called with bh processing disabled.
577 	 */
578 
579 	/* Do Stevens' IPV6_PKTOPTIONS.
580 
581 	   Yes, guys, it is the only place in our code, where we
582 	   may make it not affecting IPv4.
583 	   The rest of code is protocol independent,
584 	   and I do not like idea to uglify IPv4.
585 
586 	   Actually, all the idea behind IPV6_PKTOPTIONS
587 	   looks not very well thought. For now we latch
588 	   options, received in the last packet, enqueued
589 	   by tcp. Feel free to propose better solution.
590 					       --ANK (980728)
591 	 */
592 	if (np->rxopt.all)
593 	/*
594 	 * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below
595 	 *        (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example.
596 	 */
597 		opt_skb = skb_clone(skb, GFP_ATOMIC);
598 
599 	if (sk->sk_state == DCCP_OPEN) { /* Fast path */
600 		if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
601 			goto reset;
602 		if (opt_skb) {
603 			/* XXX This is where we would goto ipv6_pktoptions. */
604 			__kfree_skb(opt_skb);
605 		}
606 		return 0;
607 	}
608 
609 	/*
610 	 *  Step 3: Process LISTEN state
611 	 *     If S.state == LISTEN,
612 	 *	 If P.type == Request or P contains a valid Init Cookie option,
613 	 *	      (* Must scan the packet's options to check for Init
614 	 *		 Cookies.  Only Init Cookies are processed here,
615 	 *		 however; other options are processed in Step 8.  This
616 	 *		 scan need only be performed if the endpoint uses Init
617 	 *		 Cookies *)
618 	 *	      (* Generate a new socket and switch to that socket *)
619 	 *	      Set S := new socket for this port pair
620 	 *	      S.state = RESPOND
621 	 *	      Choose S.ISS (initial seqno) or set from Init Cookies
622 	 *	      Initialize S.GAR := S.ISS
623 	 *	      Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
624 	 *	      Continue with S.state == RESPOND
625 	 *	      (* A Response packet will be generated in Step 11 *)
626 	 *	 Otherwise,
627 	 *	      Generate Reset(No Connection) unless P.type == Reset
628 	 *	      Drop packet and return
629 	 *
630 	 * NOTE: the check for the packet types is done in
631 	 *	 dccp_rcv_state_process
632 	 */
633 
634 	if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
635 		goto reset;
636 	if (opt_skb) {
637 		/* XXX This is where we would goto ipv6_pktoptions. */
638 		__kfree_skb(opt_skb);
639 	}
640 	return 0;
641 
642 reset:
643 	dccp_v6_ctl_send_reset(sk, skb);
644 discard:
645 	if (opt_skb != NULL)
646 		__kfree_skb(opt_skb);
647 	kfree_skb(skb);
648 	return 0;
649 }
650 
651 static int dccp_v6_rcv(struct sk_buff *skb)
652 {
653 	const struct dccp_hdr *dh;
654 	bool refcounted;
655 	struct sock *sk;
656 	int min_cov;
657 
658 	/* Step 1: Check header basics */
659 
660 	if (dccp_invalid_packet(skb))
661 		goto discard_it;
662 
663 	/* Step 1: If header checksum is incorrect, drop packet and return. */
664 	if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr,
665 				     &ipv6_hdr(skb)->daddr)) {
666 		DCCP_WARN("dropped packet with invalid checksum\n");
667 		goto discard_it;
668 	}
669 
670 	dh = dccp_hdr(skb);
671 
672 	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(dh);
673 	DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
674 
675 	if (dccp_packet_without_ack(skb))
676 		DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
677 	else
678 		DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
679 
680 lookup:
681 	sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
682 			        dh->dccph_sport, dh->dccph_dport,
683 				inet6_iif(skb), &refcounted);
684 	if (!sk) {
685 		dccp_pr_debug("failed to look up flow ID in table and "
686 			      "get corresponding socket\n");
687 		goto no_dccp_socket;
688 	}
689 
690 	/*
691 	 * Step 2:
692 	 *	... or S.state == TIMEWAIT,
693 	 *		Generate Reset(No Connection) unless P.type == Reset
694 	 *		Drop packet and return
695 	 */
696 	if (sk->sk_state == DCCP_TIME_WAIT) {
697 		dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
698 		inet_twsk_put(inet_twsk(sk));
699 		goto no_dccp_socket;
700 	}
701 
702 	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
703 		struct request_sock *req = inet_reqsk(sk);
704 		struct sock *nsk;
705 
706 		sk = req->rsk_listener;
707 		if (unlikely(sk->sk_state != DCCP_LISTEN)) {
708 			inet_csk_reqsk_queue_drop_and_put(sk, req);
709 			goto lookup;
710 		}
711 		sock_hold(sk);
712 		refcounted = true;
713 		nsk = dccp_check_req(sk, skb, req);
714 		if (!nsk) {
715 			reqsk_put(req);
716 			goto discard_and_relse;
717 		}
718 		if (nsk == sk) {
719 			reqsk_put(req);
720 		} else if (dccp_child_process(sk, nsk, skb)) {
721 			dccp_v6_ctl_send_reset(sk, skb);
722 			goto discard_and_relse;
723 		} else {
724 			sock_put(sk);
725 			return 0;
726 		}
727 	}
728 	/*
729 	 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
730 	 *	o if MinCsCov = 0, only packets with CsCov = 0 are accepted
731 	 *	o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
732 	 */
733 	min_cov = dccp_sk(sk)->dccps_pcrlen;
734 	if (dh->dccph_cscov  &&  (min_cov == 0 || dh->dccph_cscov < min_cov))  {
735 		dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
736 			      dh->dccph_cscov, min_cov);
737 		/* FIXME: send Data Dropped option (see also dccp_v4_rcv) */
738 		goto discard_and_relse;
739 	}
740 
741 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
742 		goto discard_and_relse;
743 
744 	return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
745 				refcounted) ? -1 : 0;
746 
747 no_dccp_socket:
748 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
749 		goto discard_it;
750 	/*
751 	 * Step 2:
752 	 *	If no socket ...
753 	 *		Generate Reset(No Connection) unless P.type == Reset
754 	 *		Drop packet and return
755 	 */
756 	if (dh->dccph_type != DCCP_PKT_RESET) {
757 		DCCP_SKB_CB(skb)->dccpd_reset_code =
758 					DCCP_RESET_CODE_NO_CONNECTION;
759 		dccp_v6_ctl_send_reset(sk, skb);
760 	}
761 
762 discard_it:
763 	kfree_skb(skb);
764 	return 0;
765 
766 discard_and_relse:
767 	if (refcounted)
768 		sock_put(sk);
769 	goto discard_it;
770 }
771 
772 static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
773 			   int addr_len)
774 {
775 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
776 	struct inet_connection_sock *icsk = inet_csk(sk);
777 	struct inet_sock *inet = inet_sk(sk);
778 	struct ipv6_pinfo *np = inet6_sk(sk);
779 	struct dccp_sock *dp = dccp_sk(sk);
780 	struct in6_addr *saddr = NULL, *final_p, final;
781 	struct ipv6_txoptions *opt;
782 	struct flowi6 fl6;
783 	struct dst_entry *dst;
784 	int addr_type;
785 	int err;
786 
787 	dp->dccps_role = DCCP_ROLE_CLIENT;
788 
789 	if (addr_len < SIN6_LEN_RFC2133)
790 		return -EINVAL;
791 
792 	if (usin->sin6_family != AF_INET6)
793 		return -EAFNOSUPPORT;
794 
795 	memset(&fl6, 0, sizeof(fl6));
796 
797 	if (np->sndflow) {
798 		fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
799 		IP6_ECN_flow_init(fl6.flowlabel);
800 		if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
801 			struct ip6_flowlabel *flowlabel;
802 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
803 			if (flowlabel == NULL)
804 				return -EINVAL;
805 			fl6_sock_release(flowlabel);
806 		}
807 	}
808 	/*
809 	 * connect() to INADDR_ANY means loopback (BSD'ism).
810 	 */
811 	if (ipv6_addr_any(&usin->sin6_addr))
812 		usin->sin6_addr.s6_addr[15] = 1;
813 
814 	addr_type = ipv6_addr_type(&usin->sin6_addr);
815 
816 	if (addr_type & IPV6_ADDR_MULTICAST)
817 		return -ENETUNREACH;
818 
819 	if (addr_type & IPV6_ADDR_LINKLOCAL) {
820 		if (addr_len >= sizeof(struct sockaddr_in6) &&
821 		    usin->sin6_scope_id) {
822 			/* If interface is set while binding, indices
823 			 * must coincide.
824 			 */
825 			if (sk->sk_bound_dev_if &&
826 			    sk->sk_bound_dev_if != usin->sin6_scope_id)
827 				return -EINVAL;
828 
829 			sk->sk_bound_dev_if = usin->sin6_scope_id;
830 		}
831 
832 		/* Connect to link-local address requires an interface */
833 		if (!sk->sk_bound_dev_if)
834 			return -EINVAL;
835 	}
836 
837 	sk->sk_v6_daddr = usin->sin6_addr;
838 	np->flow_label = fl6.flowlabel;
839 
840 	/*
841 	 * DCCP over IPv4
842 	 */
843 	if (addr_type == IPV6_ADDR_MAPPED) {
844 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
845 		struct sockaddr_in sin;
846 
847 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
848 
849 		if (__ipv6_only_sock(sk))
850 			return -ENETUNREACH;
851 
852 		sin.sin_family = AF_INET;
853 		sin.sin_port = usin->sin6_port;
854 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
855 
856 		icsk->icsk_af_ops = &dccp_ipv6_mapped;
857 		sk->sk_backlog_rcv = dccp_v4_do_rcv;
858 
859 		err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
860 		if (err) {
861 			icsk->icsk_ext_hdr_len = exthdrlen;
862 			icsk->icsk_af_ops = &dccp_ipv6_af_ops;
863 			sk->sk_backlog_rcv = dccp_v6_do_rcv;
864 			goto failure;
865 		}
866 		np->saddr = sk->sk_v6_rcv_saddr;
867 		return err;
868 	}
869 
870 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
871 		saddr = &sk->sk_v6_rcv_saddr;
872 
873 	fl6.flowi6_proto = IPPROTO_DCCP;
874 	fl6.daddr = sk->sk_v6_daddr;
875 	fl6.saddr = saddr ? *saddr : np->saddr;
876 	fl6.flowi6_oif = sk->sk_bound_dev_if;
877 	fl6.fl6_dport = usin->sin6_port;
878 	fl6.fl6_sport = inet->inet_sport;
879 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
880 
881 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
882 	final_p = fl6_update_dst(&fl6, opt, &final);
883 
884 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
885 	if (IS_ERR(dst)) {
886 		err = PTR_ERR(dst);
887 		goto failure;
888 	}
889 
890 	if (saddr == NULL) {
891 		saddr = &fl6.saddr;
892 		sk->sk_v6_rcv_saddr = *saddr;
893 	}
894 
895 	/* set the source address */
896 	np->saddr = *saddr;
897 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
898 
899 	ip6_dst_store(sk, dst, NULL, NULL);
900 
901 	icsk->icsk_ext_hdr_len = 0;
902 	if (opt)
903 		icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
904 
905 	inet->inet_dport = usin->sin6_port;
906 
907 	dccp_set_state(sk, DCCP_REQUESTING);
908 	err = inet6_hash_connect(&dccp_death_row, sk);
909 	if (err)
910 		goto late_failure;
911 
912 	dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
913 						      sk->sk_v6_daddr.s6_addr32,
914 						      inet->inet_sport,
915 						      inet->inet_dport);
916 	err = dccp_connect(sk);
917 	if (err)
918 		goto late_failure;
919 
920 	return 0;
921 
922 late_failure:
923 	dccp_set_state(sk, DCCP_CLOSED);
924 	__sk_dst_reset(sk);
925 failure:
926 	inet->inet_dport = 0;
927 	sk->sk_route_caps = 0;
928 	return err;
929 }
930 
931 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
932 	.queue_xmit	   = inet6_csk_xmit,
933 	.send_check	   = dccp_v6_send_check,
934 	.rebuild_header	   = inet6_sk_rebuild_header,
935 	.conn_request	   = dccp_v6_conn_request,
936 	.syn_recv_sock	   = dccp_v6_request_recv_sock,
937 	.net_header_len	   = sizeof(struct ipv6hdr),
938 	.setsockopt	   = ipv6_setsockopt,
939 	.getsockopt	   = ipv6_getsockopt,
940 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
941 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
942 #ifdef CONFIG_COMPAT
943 	.compat_setsockopt = compat_ipv6_setsockopt,
944 	.compat_getsockopt = compat_ipv6_getsockopt,
945 #endif
946 };
947 
948 /*
949  *	DCCP over IPv4 via INET6 API
950  */
951 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
952 	.queue_xmit	   = ip_queue_xmit,
953 	.send_check	   = dccp_v4_send_check,
954 	.rebuild_header	   = inet_sk_rebuild_header,
955 	.conn_request	   = dccp_v6_conn_request,
956 	.syn_recv_sock	   = dccp_v6_request_recv_sock,
957 	.net_header_len	   = sizeof(struct iphdr),
958 	.setsockopt	   = ipv6_setsockopt,
959 	.getsockopt	   = ipv6_getsockopt,
960 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
961 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
962 #ifdef CONFIG_COMPAT
963 	.compat_setsockopt = compat_ipv6_setsockopt,
964 	.compat_getsockopt = compat_ipv6_getsockopt,
965 #endif
966 };
967 
968 /* NOTE: A lot of things set to zero explicitly by call to
969  *       sk_alloc() so need not be done here.
970  */
971 static int dccp_v6_init_sock(struct sock *sk)
972 {
973 	static __u8 dccp_v6_ctl_sock_initialized;
974 	int err = dccp_init_sock(sk, dccp_v6_ctl_sock_initialized);
975 
976 	if (err == 0) {
977 		if (unlikely(!dccp_v6_ctl_sock_initialized))
978 			dccp_v6_ctl_sock_initialized = 1;
979 		inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
980 	}
981 
982 	return err;
983 }
984 
985 static void dccp_v6_destroy_sock(struct sock *sk)
986 {
987 	dccp_destroy_sock(sk);
988 	inet6_destroy_sock(sk);
989 }
990 
991 static struct timewait_sock_ops dccp6_timewait_sock_ops = {
992 	.twsk_obj_size	= sizeof(struct dccp6_timewait_sock),
993 };
994 
995 static struct proto dccp_v6_prot = {
996 	.name		   = "DCCPv6",
997 	.owner		   = THIS_MODULE,
998 	.close		   = dccp_close,
999 	.connect	   = dccp_v6_connect,
1000 	.disconnect	   = dccp_disconnect,
1001 	.ioctl		   = dccp_ioctl,
1002 	.init		   = dccp_v6_init_sock,
1003 	.setsockopt	   = dccp_setsockopt,
1004 	.getsockopt	   = dccp_getsockopt,
1005 	.sendmsg	   = dccp_sendmsg,
1006 	.recvmsg	   = dccp_recvmsg,
1007 	.backlog_rcv	   = dccp_v6_do_rcv,
1008 	.hash		   = inet6_hash,
1009 	.unhash		   = inet_unhash,
1010 	.accept		   = inet_csk_accept,
1011 	.get_port	   = inet_csk_get_port,
1012 	.shutdown	   = dccp_shutdown,
1013 	.destroy	   = dccp_v6_destroy_sock,
1014 	.orphan_count	   = &dccp_orphan_count,
1015 	.max_header	   = MAX_DCCP_HEADER,
1016 	.obj_size	   = sizeof(struct dccp6_sock),
1017 	.slab_flags	   = SLAB_DESTROY_BY_RCU,
1018 	.rsk_prot	   = &dccp6_request_sock_ops,
1019 	.twsk_prot	   = &dccp6_timewait_sock_ops,
1020 	.h.hashinfo	   = &dccp_hashinfo,
1021 #ifdef CONFIG_COMPAT
1022 	.compat_setsockopt = compat_dccp_setsockopt,
1023 	.compat_getsockopt = compat_dccp_getsockopt,
1024 #endif
1025 };
1026 
1027 static const struct inet6_protocol dccp_v6_protocol = {
1028 	.handler	= dccp_v6_rcv,
1029 	.err_handler	= dccp_v6_err,
1030 	.flags		= INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
1031 };
1032 
1033 static const struct proto_ops inet6_dccp_ops = {
1034 	.family		   = PF_INET6,
1035 	.owner		   = THIS_MODULE,
1036 	.release	   = inet6_release,
1037 	.bind		   = inet6_bind,
1038 	.connect	   = inet_stream_connect,
1039 	.socketpair	   = sock_no_socketpair,
1040 	.accept		   = inet_accept,
1041 	.getname	   = inet6_getname,
1042 	.poll		   = dccp_poll,
1043 	.ioctl		   = inet6_ioctl,
1044 	.listen		   = inet_dccp_listen,
1045 	.shutdown	   = inet_shutdown,
1046 	.setsockopt	   = sock_common_setsockopt,
1047 	.getsockopt	   = sock_common_getsockopt,
1048 	.sendmsg	   = inet_sendmsg,
1049 	.recvmsg	   = sock_common_recvmsg,
1050 	.mmap		   = sock_no_mmap,
1051 	.sendpage	   = sock_no_sendpage,
1052 #ifdef CONFIG_COMPAT
1053 	.compat_setsockopt = compat_sock_common_setsockopt,
1054 	.compat_getsockopt = compat_sock_common_getsockopt,
1055 #endif
1056 };
1057 
1058 static struct inet_protosw dccp_v6_protosw = {
1059 	.type		= SOCK_DCCP,
1060 	.protocol	= IPPROTO_DCCP,
1061 	.prot		= &dccp_v6_prot,
1062 	.ops		= &inet6_dccp_ops,
1063 	.flags		= INET_PROTOSW_ICSK,
1064 };
1065 
1066 static int __net_init dccp_v6_init_net(struct net *net)
1067 {
1068 	if (dccp_hashinfo.bhash == NULL)
1069 		return -ESOCKTNOSUPPORT;
1070 
1071 	return inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6,
1072 				    SOCK_DCCP, IPPROTO_DCCP, net);
1073 }
1074 
1075 static void __net_exit dccp_v6_exit_net(struct net *net)
1076 {
1077 	inet_ctl_sock_destroy(net->dccp.v6_ctl_sk);
1078 }
1079 
1080 static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
1081 {
1082 	inet_twsk_purge(&dccp_hashinfo, AF_INET6);
1083 }
1084 
1085 static struct pernet_operations dccp_v6_ops = {
1086 	.init   = dccp_v6_init_net,
1087 	.exit   = dccp_v6_exit_net,
1088 	.exit_batch = dccp_v6_exit_batch,
1089 };
1090 
1091 static int __init dccp_v6_init(void)
1092 {
1093 	int err = proto_register(&dccp_v6_prot, 1);
1094 
1095 	if (err != 0)
1096 		goto out;
1097 
1098 	err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1099 	if (err != 0)
1100 		goto out_unregister_proto;
1101 
1102 	inet6_register_protosw(&dccp_v6_protosw);
1103 
1104 	err = register_pernet_subsys(&dccp_v6_ops);
1105 	if (err != 0)
1106 		goto out_destroy_ctl_sock;
1107 out:
1108 	return err;
1109 
1110 out_destroy_ctl_sock:
1111 	inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1112 	inet6_unregister_protosw(&dccp_v6_protosw);
1113 out_unregister_proto:
1114 	proto_unregister(&dccp_v6_prot);
1115 	goto out;
1116 }
1117 
1118 static void __exit dccp_v6_exit(void)
1119 {
1120 	unregister_pernet_subsys(&dccp_v6_ops);
1121 	inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1122 	inet6_unregister_protosw(&dccp_v6_protosw);
1123 	proto_unregister(&dccp_v6_prot);
1124 }
1125 
1126 module_init(dccp_v6_init);
1127 module_exit(dccp_v6_exit);
1128 
1129 /*
1130  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1131  * values directly, Also cover the case where the protocol is not specified,
1132  * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP
1133  */
1134 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 33, 6);
1135 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 0, 6);
1136 MODULE_LICENSE("GPL");
1137 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
1138 MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol");
1139