xref: /openbmc/linux/net/ipv6/tcp_ipv6.c (revision d5cb9783536a41df9f9cba5b0a1d78047ed787f7)
1 /*
2  *	TCP over IPv6
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *	Based on:
11  *	linux/net/ipv4/tcp.c
12  *	linux/net/ipv4/tcp_input.c
13  *	linux/net/ipv4/tcp_output.c
14  *
15  *	Fixes:
16  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
17  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
18  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
19  *					a single port at the same time.
20  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
21  *
22  *	This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27 
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
43 
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
47 
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
51 #include <net/ipv6.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
58 #include <net/xfrm.h>
59 #include <net/addrconf.h>
60 #include <net/snmp.h>
61 #include <net/dsfield.h>
62 
63 #include <asm/uaccess.h>
64 
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
67 
68 static void	tcp_v6_send_reset(struct sk_buff *skb);
69 static void	tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
70 static void	tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
71 				  struct sk_buff *skb);
72 
73 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74 static int	tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 
76 static struct tcp_func ipv6_mapped;
77 static struct tcp_func ipv6_specific;
78 
79 static inline int tcp_v6_bind_conflict(const struct sock *sk,
80 				       const struct inet_bind_bucket *tb)
81 {
82 	const struct sock *sk2;
83 	const struct hlist_node *node;
84 
85 	/* We must walk the whole port owner list in this case. -DaveM */
86 	sk_for_each_bound(sk2, node, &tb->owners) {
87 		if (sk != sk2 &&
88 		    (!sk->sk_bound_dev_if ||
89 		     !sk2->sk_bound_dev_if ||
90 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 		    (!sk->sk_reuse || !sk2->sk_reuse ||
92 		     sk2->sk_state == TCP_LISTEN) &&
93 		     ipv6_rcv_saddr_equal(sk, sk2))
94 			break;
95 	}
96 
97 	return node != NULL;
98 }
99 
100 /* Grrr, addr_type already calculated by caller, but I don't want
101  * to add some silly "cookie" argument to this method just for that.
102  * But it doesn't matter, the recalculation is in the rarest path
103  * this function ever takes.
104  */
105 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
106 {
107 	struct inet_bind_hashbucket *head;
108 	struct inet_bind_bucket *tb;
109 	struct hlist_node *node;
110 	int ret;
111 
112 	local_bh_disable();
113 	if (snum == 0) {
114 		int low = sysctl_local_port_range[0];
115 		int high = sysctl_local_port_range[1];
116 		int remaining = (high - low) + 1;
117 		int rover;
118 
119 		spin_lock(&tcp_hashinfo.portalloc_lock);
120 		if (tcp_hashinfo.port_rover < low)
121 			rover = low;
122 		else
123 			rover = tcp_hashinfo.port_rover;
124 		do {	rover++;
125 			if (rover > high)
126 				rover = low;
127 			head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
128 			spin_lock(&head->lock);
129 			inet_bind_bucket_for_each(tb, node, &head->chain)
130 				if (tb->port == rover)
131 					goto next;
132 			break;
133 		next:
134 			spin_unlock(&head->lock);
135 		} while (--remaining > 0);
136 		tcp_hashinfo.port_rover = rover;
137 		spin_unlock(&tcp_hashinfo.portalloc_lock);
138 
139 		/* Exhausted local port range during search?  It is not
140 		 * possible for us to be holding one of the bind hash
141 		 * locks if this test triggers, because if 'remaining'
142 		 * drops to zero, we broke out of the do/while loop at
143 		 * the top level, not from the 'break;' statement.
144 		 */
145 		ret = 1;
146 		if (unlikely(remaining <= 0))
147 			goto fail;
148 
149 		/* OK, here is the one we will use. */
150 		snum = rover;
151 	} else {
152 		head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
153 		spin_lock(&head->lock);
154 		inet_bind_bucket_for_each(tb, node, &head->chain)
155 			if (tb->port == snum)
156 				goto tb_found;
157 	}
158 	tb = NULL;
159 	goto tb_not_found;
160 tb_found:
161 	if (tb && !hlist_empty(&tb->owners)) {
162 		if (tb->fastreuse > 0 && sk->sk_reuse &&
163 		    sk->sk_state != TCP_LISTEN) {
164 			goto success;
165 		} else {
166 			ret = 1;
167 			if (tcp_v6_bind_conflict(sk, tb))
168 				goto fail_unlock;
169 		}
170 	}
171 tb_not_found:
172 	ret = 1;
173 	if (tb == NULL) {
174 	       	tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
175 		if (tb == NULL)
176 			goto fail_unlock;
177 	}
178 	if (hlist_empty(&tb->owners)) {
179 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
180 			tb->fastreuse = 1;
181 		else
182 			tb->fastreuse = 0;
183 	} else if (tb->fastreuse &&
184 		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
185 		tb->fastreuse = 0;
186 
187 success:
188 	if (!inet_csk(sk)->icsk_bind_hash)
189 		inet_bind_hash(sk, tb, snum);
190 	BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
191 	ret = 0;
192 
193 fail_unlock:
194 	spin_unlock(&head->lock);
195 fail:
196 	local_bh_enable();
197 	return ret;
198 }
199 
200 static __inline__ void __tcp_v6_hash(struct sock *sk)
201 {
202 	struct hlist_head *list;
203 	rwlock_t *lock;
204 
205 	BUG_TRAP(sk_unhashed(sk));
206 
207 	if (sk->sk_state == TCP_LISTEN) {
208 		list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
209 		lock = &tcp_hashinfo.lhash_lock;
210 		inet_listen_wlock(&tcp_hashinfo);
211 	} else {
212 		unsigned int hash;
213 		sk->sk_hash = hash = inet6_sk_ehashfn(sk);
214 		hash &= (tcp_hashinfo.ehash_size - 1);
215 		list = &tcp_hashinfo.ehash[hash].chain;
216 		lock = &tcp_hashinfo.ehash[hash].lock;
217 		write_lock(lock);
218 	}
219 
220 	__sk_add_node(sk, list);
221 	sock_prot_inc_use(sk->sk_prot);
222 	write_unlock(lock);
223 }
224 
225 
226 static void tcp_v6_hash(struct sock *sk)
227 {
228 	if (sk->sk_state != TCP_CLOSE) {
229 		struct tcp_sock *tp = tcp_sk(sk);
230 
231 		if (tp->af_specific == &ipv6_mapped) {
232 			tcp_prot.hash(sk);
233 			return;
234 		}
235 		local_bh_disable();
236 		__tcp_v6_hash(sk);
237 		local_bh_enable();
238 	}
239 }
240 
241 /*
242  * Open request hash tables.
243  */
244 
245 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
246 {
247 	u32 a, b, c;
248 
249 	a = raddr->s6_addr32[0];
250 	b = raddr->s6_addr32[1];
251 	c = raddr->s6_addr32[2];
252 
253 	a += JHASH_GOLDEN_RATIO;
254 	b += JHASH_GOLDEN_RATIO;
255 	c += rnd;
256 	__jhash_mix(a, b, c);
257 
258 	a += raddr->s6_addr32[3];
259 	b += (u32) rport;
260 	__jhash_mix(a, b, c);
261 
262 	return c & (TCP_SYNQ_HSIZE - 1);
263 }
264 
265 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
266 					      struct request_sock ***prevp,
267 					      __u16 rport,
268 					      struct in6_addr *raddr,
269 					      struct in6_addr *laddr,
270 					      int iif)
271 {
272 	const struct inet_connection_sock *icsk = inet_csk(sk);
273 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
274 	struct request_sock *req, **prev;
275 
276 	for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
277 	     (req = *prev) != NULL;
278 	     prev = &req->dl_next) {
279 		const struct tcp6_request_sock *treq = tcp6_rsk(req);
280 
281 		if (inet_rsk(req)->rmt_port == rport &&
282 		    req->rsk_ops->family == AF_INET6 &&
283 		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
284 		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
285 		    (!treq->iif || treq->iif == iif)) {
286 			BUG_TRAP(req->sk == NULL);
287 			*prevp = prev;
288 			return req;
289 		}
290 	}
291 
292 	return NULL;
293 }
294 
295 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
296 				   struct in6_addr *saddr,
297 				   struct in6_addr *daddr,
298 				   unsigned long base)
299 {
300 	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
301 }
302 
303 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
304 {
305 	if (skb->protocol == htons(ETH_P_IPV6)) {
306 		return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
307 						    skb->nh.ipv6h->saddr.s6_addr32,
308 						    skb->h.th->dest,
309 						    skb->h.th->source);
310 	} else {
311 		return secure_tcp_sequence_number(skb->nh.iph->daddr,
312 						  skb->nh.iph->saddr,
313 						  skb->h.th->dest,
314 						  skb->h.th->source);
315 	}
316 }
317 
318 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
319 				      struct inet_timewait_sock **twp)
320 {
321 	struct inet_sock *inet = inet_sk(sk);
322 	const struct ipv6_pinfo *np = inet6_sk(sk);
323 	const struct in6_addr *daddr = &np->rcv_saddr;
324 	const struct in6_addr *saddr = &np->daddr;
325 	const int dif = sk->sk_bound_dev_if;
326 	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
327 	unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
328 	struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
329 	struct sock *sk2;
330 	const struct hlist_node *node;
331 	struct inet_timewait_sock *tw;
332 
333 	prefetch(head->chain.first);
334 	write_lock(&head->lock);
335 
336 	/* Check TIME-WAIT sockets first. */
337 	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
338 		const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
339 
340 		tw = inet_twsk(sk2);
341 
342 		if(*((__u32 *)&(tw->tw_dport))	== ports	&&
343 		   sk2->sk_family		== PF_INET6	&&
344 		   ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr)	&&
345 		   ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)	&&
346 		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
347 			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
348 			struct tcp_sock *tp = tcp_sk(sk);
349 
350 			if (tcptw->tw_ts_recent_stamp &&
351 			    (!twp ||
352 			     (sysctl_tcp_tw_reuse &&
353 			      xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
354 				/* See comment in tcp_ipv4.c */
355 				tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
356 				if (!tp->write_seq)
357 					tp->write_seq = 1;
358 				tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
359 				tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
360 				sock_hold(sk2);
361 				goto unique;
362 			} else
363 				goto not_unique;
364 		}
365 	}
366 	tw = NULL;
367 
368 	/* And established part... */
369 	sk_for_each(sk2, node, &head->chain) {
370 		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
371 			goto not_unique;
372 	}
373 
374 unique:
375 	BUG_TRAP(sk_unhashed(sk));
376 	__sk_add_node(sk, &head->chain);
377 	sk->sk_hash = hash;
378 	sock_prot_inc_use(sk->sk_prot);
379 	write_unlock(&head->lock);
380 
381 	if (twp) {
382 		*twp = tw;
383 		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
384 	} else if (tw) {
385 		/* Silly. Should hash-dance instead... */
386 		inet_twsk_deschedule(tw, &tcp_death_row);
387 		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
388 
389 		inet_twsk_put(tw);
390 	}
391 	return 0;
392 
393 not_unique:
394 	write_unlock(&head->lock);
395 	return -EADDRNOTAVAIL;
396 }
397 
398 static inline u32 tcpv6_port_offset(const struct sock *sk)
399 {
400 	const struct inet_sock *inet = inet_sk(sk);
401 	const struct ipv6_pinfo *np = inet6_sk(sk);
402 
403 	return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
404 					   np->daddr.s6_addr32,
405 					   inet->dport);
406 }
407 
408 static int tcp_v6_hash_connect(struct sock *sk)
409 {
410 	unsigned short snum = inet_sk(sk)->num;
411  	struct inet_bind_hashbucket *head;
412  	struct inet_bind_bucket *tb;
413 	int ret;
414 
415  	if (!snum) {
416  		int low = sysctl_local_port_range[0];
417  		int high = sysctl_local_port_range[1];
418 		int range = high - low;
419  		int i;
420 		int port;
421 		static u32 hint;
422 		u32 offset = hint + tcpv6_port_offset(sk);
423 		struct hlist_node *node;
424  		struct inet_timewait_sock *tw = NULL;
425 
426  		local_bh_disable();
427 		for (i = 1; i <= range; i++) {
428 			port = low + (i + offset) % range;
429  			head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
430  			spin_lock(&head->lock);
431 
432  			/* Does not bother with rcv_saddr checks,
433  			 * because the established check is already
434  			 * unique enough.
435  			 */
436 			inet_bind_bucket_for_each(tb, node, &head->chain) {
437  				if (tb->port == port) {
438  					BUG_TRAP(!hlist_empty(&tb->owners));
439  					if (tb->fastreuse >= 0)
440  						goto next_port;
441  					if (!__tcp_v6_check_established(sk,
442 									port,
443 									&tw))
444  						goto ok;
445  					goto next_port;
446  				}
447  			}
448 
449  			tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
450  			if (!tb) {
451  				spin_unlock(&head->lock);
452  				break;
453  			}
454  			tb->fastreuse = -1;
455  			goto ok;
456 
457  		next_port:
458  			spin_unlock(&head->lock);
459  		}
460  		local_bh_enable();
461 
462  		return -EADDRNOTAVAIL;
463 
464 ok:
465 		hint += i;
466 
467  		/* Head lock still held and bh's disabled */
468  		inet_bind_hash(sk, tb, port);
469 		if (sk_unhashed(sk)) {
470  			inet_sk(sk)->sport = htons(port);
471  			__tcp_v6_hash(sk);
472  		}
473  		spin_unlock(&head->lock);
474 
475  		if (tw) {
476  			inet_twsk_deschedule(tw, &tcp_death_row);
477  			inet_twsk_put(tw);
478  		}
479 
480 		ret = 0;
481 		goto out;
482  	}
483 
484  	head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
485  	tb   = inet_csk(sk)->icsk_bind_hash;
486 	spin_lock_bh(&head->lock);
487 
488 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
489 		__tcp_v6_hash(sk);
490 		spin_unlock_bh(&head->lock);
491 		return 0;
492 	} else {
493 		spin_unlock(&head->lock);
494 		/* No definite answer... Walk to established hash table */
495 		ret = __tcp_v6_check_established(sk, snum, NULL);
496 out:
497 		local_bh_enable();
498 		return ret;
499 	}
500 }
501 
502 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
503 			  int addr_len)
504 {
505 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
506 	struct inet_sock *inet = inet_sk(sk);
507 	struct ipv6_pinfo *np = inet6_sk(sk);
508 	struct tcp_sock *tp = tcp_sk(sk);
509 	struct in6_addr *saddr = NULL, *final_p = NULL, final;
510 	struct flowi fl;
511 	struct dst_entry *dst;
512 	int addr_type;
513 	int err;
514 
515 	if (addr_len < SIN6_LEN_RFC2133)
516 		return -EINVAL;
517 
518 	if (usin->sin6_family != AF_INET6)
519 		return(-EAFNOSUPPORT);
520 
521 	memset(&fl, 0, sizeof(fl));
522 
523 	if (np->sndflow) {
524 		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
525 		IP6_ECN_flow_init(fl.fl6_flowlabel);
526 		if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
527 			struct ip6_flowlabel *flowlabel;
528 			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
529 			if (flowlabel == NULL)
530 				return -EINVAL;
531 			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
532 			fl6_sock_release(flowlabel);
533 		}
534 	}
535 
536 	/*
537   	 *	connect() to INADDR_ANY means loopback (BSD'ism).
538   	 */
539 
540   	if(ipv6_addr_any(&usin->sin6_addr))
541 		usin->sin6_addr.s6_addr[15] = 0x1;
542 
543 	addr_type = ipv6_addr_type(&usin->sin6_addr);
544 
545 	if(addr_type & IPV6_ADDR_MULTICAST)
546 		return -ENETUNREACH;
547 
548 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
549 		if (addr_len >= sizeof(struct sockaddr_in6) &&
550 		    usin->sin6_scope_id) {
551 			/* If interface is set while binding, indices
552 			 * must coincide.
553 			 */
554 			if (sk->sk_bound_dev_if &&
555 			    sk->sk_bound_dev_if != usin->sin6_scope_id)
556 				return -EINVAL;
557 
558 			sk->sk_bound_dev_if = usin->sin6_scope_id;
559 		}
560 
561 		/* Connect to link-local address requires an interface */
562 		if (!sk->sk_bound_dev_if)
563 			return -EINVAL;
564 	}
565 
566 	if (tp->rx_opt.ts_recent_stamp &&
567 	    !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
568 		tp->rx_opt.ts_recent = 0;
569 		tp->rx_opt.ts_recent_stamp = 0;
570 		tp->write_seq = 0;
571 	}
572 
573 	ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
574 	np->flow_label = fl.fl6_flowlabel;
575 
576 	/*
577 	 *	TCP over IPv4
578 	 */
579 
580 	if (addr_type == IPV6_ADDR_MAPPED) {
581 		u32 exthdrlen = tp->ext_header_len;
582 		struct sockaddr_in sin;
583 
584 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
585 
586 		if (__ipv6_only_sock(sk))
587 			return -ENETUNREACH;
588 
589 		sin.sin_family = AF_INET;
590 		sin.sin_port = usin->sin6_port;
591 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
592 
593 		tp->af_specific = &ipv6_mapped;
594 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
595 
596 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
597 
598 		if (err) {
599 			tp->ext_header_len = exthdrlen;
600 			tp->af_specific = &ipv6_specific;
601 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
602 			goto failure;
603 		} else {
604 			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
605 				      inet->saddr);
606 			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
607 				      inet->rcv_saddr);
608 		}
609 
610 		return err;
611 	}
612 
613 	if (!ipv6_addr_any(&np->rcv_saddr))
614 		saddr = &np->rcv_saddr;
615 
616 	fl.proto = IPPROTO_TCP;
617 	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
618 	ipv6_addr_copy(&fl.fl6_src,
619 		       (saddr ? saddr : &np->saddr));
620 	fl.oif = sk->sk_bound_dev_if;
621 	fl.fl_ip_dport = usin->sin6_port;
622 	fl.fl_ip_sport = inet->sport;
623 
624 	if (np->opt && np->opt->srcrt) {
625 		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
626 		ipv6_addr_copy(&final, &fl.fl6_dst);
627 		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
628 		final_p = &final;
629 	}
630 
631 	err = ip6_dst_lookup(sk, &dst, &fl);
632 	if (err)
633 		goto failure;
634 	if (final_p)
635 		ipv6_addr_copy(&fl.fl6_dst, final_p);
636 
637 	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
638 		goto failure;
639 
640 	if (saddr == NULL) {
641 		saddr = &fl.fl6_src;
642 		ipv6_addr_copy(&np->rcv_saddr, saddr);
643 	}
644 
645 	/* set the source address */
646 	ipv6_addr_copy(&np->saddr, saddr);
647 	inet->rcv_saddr = LOOPBACK4_IPV6;
648 
649 	ip6_dst_store(sk, dst, NULL);
650 	sk->sk_route_caps = dst->dev->features &
651 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
652 
653 	tp->ext_header_len = 0;
654 	if (np->opt)
655 		tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
656 
657 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
658 
659 	inet->dport = usin->sin6_port;
660 
661 	tcp_set_state(sk, TCP_SYN_SENT);
662 	err = tcp_v6_hash_connect(sk);
663 	if (err)
664 		goto late_failure;
665 
666 	if (!tp->write_seq)
667 		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
668 							     np->daddr.s6_addr32,
669 							     inet->sport,
670 							     inet->dport);
671 
672 	err = tcp_connect(sk);
673 	if (err)
674 		goto late_failure;
675 
676 	return 0;
677 
678 late_failure:
679 	tcp_set_state(sk, TCP_CLOSE);
680 	__sk_dst_reset(sk);
681 failure:
682 	inet->dport = 0;
683 	sk->sk_route_caps = 0;
684 	return err;
685 }
686 
687 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
688 		int type, int code, int offset, __u32 info)
689 {
690 	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
691 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
692 	struct ipv6_pinfo *np;
693 	struct sock *sk;
694 	int err;
695 	struct tcp_sock *tp;
696 	__u32 seq;
697 
698 	sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
699 			  th->source, skb->dev->ifindex);
700 
701 	if (sk == NULL) {
702 		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
703 		return;
704 	}
705 
706 	if (sk->sk_state == TCP_TIME_WAIT) {
707 		inet_twsk_put((struct inet_timewait_sock *)sk);
708 		return;
709 	}
710 
711 	bh_lock_sock(sk);
712 	if (sock_owned_by_user(sk))
713 		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
714 
715 	if (sk->sk_state == TCP_CLOSE)
716 		goto out;
717 
718 	tp = tcp_sk(sk);
719 	seq = ntohl(th->seq);
720 	if (sk->sk_state != TCP_LISTEN &&
721 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
722 		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
723 		goto out;
724 	}
725 
726 	np = inet6_sk(sk);
727 
728 	if (type == ICMPV6_PKT_TOOBIG) {
729 		struct dst_entry *dst = NULL;
730 
731 		if (sock_owned_by_user(sk))
732 			goto out;
733 		if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
734 			goto out;
735 
736 		/* icmp should have updated the destination cache entry */
737 		dst = __sk_dst_check(sk, np->dst_cookie);
738 
739 		if (dst == NULL) {
740 			struct inet_sock *inet = inet_sk(sk);
741 			struct flowi fl;
742 
743 			/* BUGGG_FUTURE: Again, it is not clear how
744 			   to handle rthdr case. Ignore this complexity
745 			   for now.
746 			 */
747 			memset(&fl, 0, sizeof(fl));
748 			fl.proto = IPPROTO_TCP;
749 			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
750 			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
751 			fl.oif = sk->sk_bound_dev_if;
752 			fl.fl_ip_dport = inet->dport;
753 			fl.fl_ip_sport = inet->sport;
754 
755 			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
756 				sk->sk_err_soft = -err;
757 				goto out;
758 			}
759 
760 			if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
761 				sk->sk_err_soft = -err;
762 				goto out;
763 			}
764 
765 		} else
766 			dst_hold(dst);
767 
768 		if (tp->pmtu_cookie > dst_mtu(dst)) {
769 			tcp_sync_mss(sk, dst_mtu(dst));
770 			tcp_simple_retransmit(sk);
771 		} /* else let the usual retransmit timer handle it */
772 		dst_release(dst);
773 		goto out;
774 	}
775 
776 	icmpv6_err_convert(type, code, &err);
777 
778 	/* Might be for an request_sock */
779 	switch (sk->sk_state) {
780 		struct request_sock *req, **prev;
781 	case TCP_LISTEN:
782 		if (sock_owned_by_user(sk))
783 			goto out;
784 
785 		req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
786 					&hdr->saddr, inet6_iif(skb));
787 		if (!req)
788 			goto out;
789 
790 		/* ICMPs are not backlogged, hence we cannot get
791 		 * an established socket here.
792 		 */
793 		BUG_TRAP(req->sk == NULL);
794 
795 		if (seq != tcp_rsk(req)->snt_isn) {
796 			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
797 			goto out;
798 		}
799 
800 		inet_csk_reqsk_queue_drop(sk, req, prev);
801 		goto out;
802 
803 	case TCP_SYN_SENT:
804 	case TCP_SYN_RECV:  /* Cannot happen.
805 			       It can, it SYNs are crossed. --ANK */
806 		if (!sock_owned_by_user(sk)) {
807 			TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
808 			sk->sk_err = err;
809 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
810 
811 			tcp_done(sk);
812 		} else
813 			sk->sk_err_soft = err;
814 		goto out;
815 	}
816 
817 	if (!sock_owned_by_user(sk) && np->recverr) {
818 		sk->sk_err = err;
819 		sk->sk_error_report(sk);
820 	} else
821 		sk->sk_err_soft = err;
822 
823 out:
824 	bh_unlock_sock(sk);
825 	sock_put(sk);
826 }
827 
828 
829 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
830 			      struct dst_entry *dst)
831 {
832 	struct tcp6_request_sock *treq = tcp6_rsk(req);
833 	struct ipv6_pinfo *np = inet6_sk(sk);
834 	struct sk_buff * skb;
835 	struct ipv6_txoptions *opt = NULL;
836 	struct in6_addr * final_p = NULL, final;
837 	struct flowi fl;
838 	int err = -1;
839 
840 	memset(&fl, 0, sizeof(fl));
841 	fl.proto = IPPROTO_TCP;
842 	ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
843 	ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
844 	fl.fl6_flowlabel = 0;
845 	fl.oif = treq->iif;
846 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
847 	fl.fl_ip_sport = inet_sk(sk)->sport;
848 
849 	if (dst == NULL) {
850 		opt = np->opt;
851 		if (opt == NULL &&
852 		    np->rxopt.bits.osrcrt == 2 &&
853 		    treq->pktopts) {
854 			struct sk_buff *pktopts = treq->pktopts;
855 			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
856 			if (rxopt->srcrt)
857 				opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
858 		}
859 
860 		if (opt && opt->srcrt) {
861 			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
862 			ipv6_addr_copy(&final, &fl.fl6_dst);
863 			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
864 			final_p = &final;
865 		}
866 
867 		err = ip6_dst_lookup(sk, &dst, &fl);
868 		if (err)
869 			goto done;
870 		if (final_p)
871 			ipv6_addr_copy(&fl.fl6_dst, final_p);
872 		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
873 			goto done;
874 	}
875 
876 	skb = tcp_make_synack(sk, dst, req);
877 	if (skb) {
878 		struct tcphdr *th = skb->h.th;
879 
880 		th->check = tcp_v6_check(th, skb->len,
881 					 &treq->loc_addr, &treq->rmt_addr,
882 					 csum_partial((char *)th, skb->len, skb->csum));
883 
884 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
885 		err = ip6_xmit(sk, skb, &fl, opt, 0);
886 		if (err == NET_XMIT_CN)
887 			err = 0;
888 	}
889 
890 done:
891         if (opt && opt != np->opt)
892 		sock_kfree_s(sk, opt, opt->tot_len);
893 	return err;
894 }
895 
896 static void tcp_v6_reqsk_destructor(struct request_sock *req)
897 {
898 	if (tcp6_rsk(req)->pktopts)
899 		kfree_skb(tcp6_rsk(req)->pktopts);
900 }
901 
902 static struct request_sock_ops tcp6_request_sock_ops = {
903 	.family		=	AF_INET6,
904 	.obj_size	=	sizeof(struct tcp6_request_sock),
905 	.rtx_syn_ack	=	tcp_v6_send_synack,
906 	.send_ack	=	tcp_v6_reqsk_send_ack,
907 	.destructor	=	tcp_v6_reqsk_destructor,
908 	.send_reset	=	tcp_v6_send_reset
909 };
910 
911 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
912 {
913 	struct ipv6_pinfo *np = inet6_sk(sk);
914 	struct inet6_skb_parm *opt = IP6CB(skb);
915 
916 	if (np->rxopt.all) {
917 		if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
918 		    ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
919 		    (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
920 		    ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
921 			return 1;
922 	}
923 	return 0;
924 }
925 
926 
927 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
928 			      struct sk_buff *skb)
929 {
930 	struct ipv6_pinfo *np = inet6_sk(sk);
931 
932 	if (skb->ip_summed == CHECKSUM_HW) {
933 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
934 		skb->csum = offsetof(struct tcphdr, check);
935 	} else {
936 		th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
937 					    csum_partial((char *)th, th->doff<<2,
938 							 skb->csum));
939 	}
940 }
941 
942 
943 static void tcp_v6_send_reset(struct sk_buff *skb)
944 {
945 	struct tcphdr *th = skb->h.th, *t1;
946 	struct sk_buff *buff;
947 	struct flowi fl;
948 
949 	if (th->rst)
950 		return;
951 
952 	if (!ipv6_unicast_destination(skb))
953 		return;
954 
955 	/*
956 	 * We need to grab some memory, and put together an RST,
957 	 * and then put it into the queue to be sent.
958 	 */
959 
960 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
961 			 GFP_ATOMIC);
962 	if (buff == NULL)
963 	  	return;
964 
965 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
966 
967 	t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
968 
969 	/* Swap the send and the receive. */
970 	memset(t1, 0, sizeof(*t1));
971 	t1->dest = th->source;
972 	t1->source = th->dest;
973 	t1->doff = sizeof(*t1)/4;
974 	t1->rst = 1;
975 
976 	if(th->ack) {
977 	  	t1->seq = th->ack_seq;
978 	} else {
979 		t1->ack = 1;
980 		t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
981 				    + skb->len - (th->doff<<2));
982 	}
983 
984 	buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
985 
986 	memset(&fl, 0, sizeof(fl));
987 	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
988 	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
989 
990 	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
991 				    sizeof(*t1), IPPROTO_TCP,
992 				    buff->csum);
993 
994 	fl.proto = IPPROTO_TCP;
995 	fl.oif = inet6_iif(skb);
996 	fl.fl_ip_dport = t1->dest;
997 	fl.fl_ip_sport = t1->source;
998 
999 	/* sk = NULL, but it is safe for now. RST socket required. */
1000 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1001 
1002 		if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
1003 			return;
1004 
1005 		ip6_xmit(NULL, buff, &fl, NULL, 0);
1006 		TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1007 		TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1008 		return;
1009 	}
1010 
1011 	kfree_skb(buff);
1012 }
1013 
1014 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1015 {
1016 	struct tcphdr *th = skb->h.th, *t1;
1017 	struct sk_buff *buff;
1018 	struct flowi fl;
1019 	int tot_len = sizeof(struct tcphdr);
1020 
1021 	if (ts)
1022 		tot_len += 3*4;
1023 
1024 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1025 			 GFP_ATOMIC);
1026 	if (buff == NULL)
1027 		return;
1028 
1029 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1030 
1031 	t1 = (struct tcphdr *) skb_push(buff,tot_len);
1032 
1033 	/* Swap the send and the receive. */
1034 	memset(t1, 0, sizeof(*t1));
1035 	t1->dest = th->source;
1036 	t1->source = th->dest;
1037 	t1->doff = tot_len/4;
1038 	t1->seq = htonl(seq);
1039 	t1->ack_seq = htonl(ack);
1040 	t1->ack = 1;
1041 	t1->window = htons(win);
1042 
1043 	if (ts) {
1044 		u32 *ptr = (u32*)(t1 + 1);
1045 		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1046 			       (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1047 		*ptr++ = htonl(tcp_time_stamp);
1048 		*ptr = htonl(ts);
1049 	}
1050 
1051 	buff->csum = csum_partial((char *)t1, tot_len, 0);
1052 
1053 	memset(&fl, 0, sizeof(fl));
1054 	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1055 	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1056 
1057 	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1058 				    tot_len, IPPROTO_TCP,
1059 				    buff->csum);
1060 
1061 	fl.proto = IPPROTO_TCP;
1062 	fl.oif = inet6_iif(skb);
1063 	fl.fl_ip_dport = t1->dest;
1064 	fl.fl_ip_sport = t1->source;
1065 
1066 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1067 		if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
1068 			return;
1069 		ip6_xmit(NULL, buff, &fl, NULL, 0);
1070 		TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1071 		return;
1072 	}
1073 
1074 	kfree_skb(buff);
1075 }
1076 
1077 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1078 {
1079 	struct inet_timewait_sock *tw = inet_twsk(sk);
1080 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1081 
1082 	tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1083 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1084 			tcptw->tw_ts_recent);
1085 
1086 	inet_twsk_put(tw);
1087 }
1088 
1089 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1090 {
1091 	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1092 }
1093 
1094 
1095 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1096 {
1097 	struct request_sock *req, **prev;
1098 	const struct tcphdr *th = skb->h.th;
1099 	struct sock *nsk;
1100 
1101 	/* Find possible connection requests. */
1102 	req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1103 				&skb->nh.ipv6h->daddr, inet6_iif(skb));
1104 	if (req)
1105 		return tcp_check_req(sk, skb, req, prev);
1106 
1107 	nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1108 					 th->source, &skb->nh.ipv6h->daddr,
1109 					 ntohs(th->dest), inet6_iif(skb));
1110 
1111 	if (nsk) {
1112 		if (nsk->sk_state != TCP_TIME_WAIT) {
1113 			bh_lock_sock(nsk);
1114 			return nsk;
1115 		}
1116 		inet_twsk_put((struct inet_timewait_sock *)nsk);
1117 		return NULL;
1118 	}
1119 
1120 #if 0 /*def CONFIG_SYN_COOKIES*/
1121 	if (!th->rst && !th->syn && th->ack)
1122 		sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1123 #endif
1124 	return sk;
1125 }
1126 
1127 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1128 {
1129 	struct inet_connection_sock *icsk = inet_csk(sk);
1130 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1131 	const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1132 
1133 	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1134 	inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1135 }
1136 
1137 
1138 /* FIXME: this is substantially similar to the ipv4 code.
1139  * Can some kind of merge be done? -- erics
1140  */
1141 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1142 {
1143 	struct tcp6_request_sock *treq;
1144 	struct ipv6_pinfo *np = inet6_sk(sk);
1145 	struct tcp_options_received tmp_opt;
1146 	struct tcp_sock *tp = tcp_sk(sk);
1147 	struct request_sock *req = NULL;
1148 	__u32 isn = TCP_SKB_CB(skb)->when;
1149 
1150 	if (skb->protocol == htons(ETH_P_IP))
1151 		return tcp_v4_conn_request(sk, skb);
1152 
1153 	if (!ipv6_unicast_destination(skb))
1154 		goto drop;
1155 
1156 	/*
1157 	 *	There are no SYN attacks on IPv6, yet...
1158 	 */
1159 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1160 		if (net_ratelimit())
1161 			printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1162 		goto drop;
1163 	}
1164 
1165 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1166 		goto drop;
1167 
1168 	req = reqsk_alloc(&tcp6_request_sock_ops);
1169 	if (req == NULL)
1170 		goto drop;
1171 
1172 	tcp_clear_options(&tmp_opt);
1173 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1174 	tmp_opt.user_mss = tp->rx_opt.user_mss;
1175 
1176 	tcp_parse_options(skb, &tmp_opt, 0);
1177 
1178 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1179 	tcp_openreq_init(req, &tmp_opt, skb);
1180 
1181 	treq = tcp6_rsk(req);
1182 	ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1183 	ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1184 	TCP_ECN_create_request(req, skb->h.th);
1185 	treq->pktopts = NULL;
1186 	if (ipv6_opt_accepted(sk, skb) ||
1187 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1188 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1189 		atomic_inc(&skb->users);
1190 		treq->pktopts = skb;
1191 	}
1192 	treq->iif = sk->sk_bound_dev_if;
1193 
1194 	/* So that link locals have meaning */
1195 	if (!sk->sk_bound_dev_if &&
1196 	    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1197 		treq->iif = inet6_iif(skb);
1198 
1199 	if (isn == 0)
1200 		isn = tcp_v6_init_sequence(sk,skb);
1201 
1202 	tcp_rsk(req)->snt_isn = isn;
1203 
1204 	if (tcp_v6_send_synack(sk, req, NULL))
1205 		goto drop;
1206 
1207 	tcp_v6_synq_add(sk, req);
1208 
1209 	return 0;
1210 
1211 drop:
1212 	if (req)
1213 		reqsk_free(req);
1214 
1215 	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1216 	return 0; /* don't send reset */
1217 }
1218 
1219 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1220 					  struct request_sock *req,
1221 					  struct dst_entry *dst)
1222 {
1223 	struct tcp6_request_sock *treq = tcp6_rsk(req);
1224 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1225 	struct tcp6_sock *newtcp6sk;
1226 	struct inet_sock *newinet;
1227 	struct tcp_sock *newtp;
1228 	struct sock *newsk;
1229 	struct ipv6_txoptions *opt;
1230 
1231 	if (skb->protocol == htons(ETH_P_IP)) {
1232 		/*
1233 		 *	v6 mapped
1234 		 */
1235 
1236 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1237 
1238 		if (newsk == NULL)
1239 			return NULL;
1240 
1241 		newtcp6sk = (struct tcp6_sock *)newsk;
1242 		inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1243 
1244 		newinet = inet_sk(newsk);
1245 		newnp = inet6_sk(newsk);
1246 		newtp = tcp_sk(newsk);
1247 
1248 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1249 
1250 		ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1251 			      newinet->daddr);
1252 
1253 		ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1254 			      newinet->saddr);
1255 
1256 		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1257 
1258 		newtp->af_specific = &ipv6_mapped;
1259 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1260 		newnp->pktoptions  = NULL;
1261 		newnp->opt	   = NULL;
1262 		newnp->mcast_oif   = inet6_iif(skb);
1263 		newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1264 
1265 		/*
1266 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1267 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1268 		 * that function for the gory details. -acme
1269 		 */
1270 
1271 		/* It is tricky place. Until this moment IPv4 tcp
1272 		   worked with IPv6 af_tcp.af_specific.
1273 		   Sync it now.
1274 		 */
1275 		tcp_sync_mss(newsk, newtp->pmtu_cookie);
1276 
1277 		return newsk;
1278 	}
1279 
1280 	opt = np->opt;
1281 
1282 	if (sk_acceptq_is_full(sk))
1283 		goto out_overflow;
1284 
1285 	if (np->rxopt.bits.osrcrt == 2 &&
1286 	    opt == NULL && treq->pktopts) {
1287 		struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1288 		if (rxopt->srcrt)
1289 			opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1290 	}
1291 
1292 	if (dst == NULL) {
1293 		struct in6_addr *final_p = NULL, final;
1294 		struct flowi fl;
1295 
1296 		memset(&fl, 0, sizeof(fl));
1297 		fl.proto = IPPROTO_TCP;
1298 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1299 		if (opt && opt->srcrt) {
1300 			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1301 			ipv6_addr_copy(&final, &fl.fl6_dst);
1302 			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1303 			final_p = &final;
1304 		}
1305 		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1306 		fl.oif = sk->sk_bound_dev_if;
1307 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1308 		fl.fl_ip_sport = inet_sk(sk)->sport;
1309 
1310 		if (ip6_dst_lookup(sk, &dst, &fl))
1311 			goto out;
1312 
1313 		if (final_p)
1314 			ipv6_addr_copy(&fl.fl6_dst, final_p);
1315 
1316 		if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1317 			goto out;
1318 	}
1319 
1320 	newsk = tcp_create_openreq_child(sk, req, skb);
1321 	if (newsk == NULL)
1322 		goto out;
1323 
1324 	/*
1325 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1326 	 * count here, tcp_create_openreq_child now does this for us, see the
1327 	 * comment in that function for the gory details. -acme
1328 	 */
1329 
1330 	ip6_dst_store(newsk, dst, NULL);
1331 	newsk->sk_route_caps = dst->dev->features &
1332 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1333 
1334 	newtcp6sk = (struct tcp6_sock *)newsk;
1335 	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1336 
1337 	newtp = tcp_sk(newsk);
1338 	newinet = inet_sk(newsk);
1339 	newnp = inet6_sk(newsk);
1340 
1341 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1342 
1343 	ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1344 	ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1345 	ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1346 	newsk->sk_bound_dev_if = treq->iif;
1347 
1348 	/* Now IPv6 options...
1349 
1350 	   First: no IPv4 options.
1351 	 */
1352 	newinet->opt = NULL;
1353 
1354 	/* Clone RX bits */
1355 	newnp->rxopt.all = np->rxopt.all;
1356 
1357 	/* Clone pktoptions received with SYN */
1358 	newnp->pktoptions = NULL;
1359 	if (treq->pktopts != NULL) {
1360 		newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1361 		kfree_skb(treq->pktopts);
1362 		treq->pktopts = NULL;
1363 		if (newnp->pktoptions)
1364 			skb_set_owner_r(newnp->pktoptions, newsk);
1365 	}
1366 	newnp->opt	  = NULL;
1367 	newnp->mcast_oif  = inet6_iif(skb);
1368 	newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1369 
1370 	/* Clone native IPv6 options from listening socket (if any)
1371 
1372 	   Yes, keeping reference count would be much more clever,
1373 	   but we make one more one thing there: reattach optmem
1374 	   to newsk.
1375 	 */
1376 	if (opt) {
1377 		newnp->opt = ipv6_dup_options(newsk, opt);
1378 		if (opt != np->opt)
1379 			sock_kfree_s(sk, opt, opt->tot_len);
1380 	}
1381 
1382 	newtp->ext_header_len = 0;
1383 	if (newnp->opt)
1384 		newtp->ext_header_len = newnp->opt->opt_nflen +
1385 					newnp->opt->opt_flen;
1386 
1387 	tcp_sync_mss(newsk, dst_mtu(dst));
1388 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1389 	tcp_initialize_rcv_mss(newsk);
1390 
1391 	newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1392 
1393 	__tcp_v6_hash(newsk);
1394 	inet_inherit_port(&tcp_hashinfo, sk, newsk);
1395 
1396 	return newsk;
1397 
1398 out_overflow:
1399 	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1400 out:
1401 	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1402 	if (opt && opt != np->opt)
1403 		sock_kfree_s(sk, opt, opt->tot_len);
1404 	dst_release(dst);
1405 	return NULL;
1406 }
1407 
1408 static int tcp_v6_checksum_init(struct sk_buff *skb)
1409 {
1410 	if (skb->ip_summed == CHECKSUM_HW) {
1411 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1412 		if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1413 				  &skb->nh.ipv6h->daddr,skb->csum))
1414 			return 0;
1415 		LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1416 	}
1417 	if (skb->len <= 76) {
1418 		if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1419 				 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1420 			return -1;
1421 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1422 	} else {
1423 		skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1424 					  &skb->nh.ipv6h->daddr,0);
1425 	}
1426 	return 0;
1427 }
1428 
1429 /* The socket must have it's spinlock held when we get
1430  * here.
1431  *
1432  * We have a potential double-lock case here, so even when
1433  * doing backlog processing we use the BH locking scheme.
1434  * This is because we cannot sleep with the original spinlock
1435  * held.
1436  */
1437 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1438 {
1439 	struct ipv6_pinfo *np = inet6_sk(sk);
1440 	struct tcp_sock *tp;
1441 	struct sk_buff *opt_skb = NULL;
1442 
1443 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1444 	   goes to IPv4 receive handler and backlogged.
1445 	   From backlog it always goes here. Kerboom...
1446 	   Fortunately, tcp_rcv_established and rcv_established
1447 	   handle them correctly, but it is not case with
1448 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1449 	 */
1450 
1451 	if (skb->protocol == htons(ETH_P_IP))
1452 		return tcp_v4_do_rcv(sk, skb);
1453 
1454 	if (sk_filter(sk, skb, 0))
1455 		goto discard;
1456 
1457 	/*
1458 	 *	socket locking is here for SMP purposes as backlog rcv
1459 	 *	is currently called with bh processing disabled.
1460 	 */
1461 
1462 	/* Do Stevens' IPV6_PKTOPTIONS.
1463 
1464 	   Yes, guys, it is the only place in our code, where we
1465 	   may make it not affecting IPv4.
1466 	   The rest of code is protocol independent,
1467 	   and I do not like idea to uglify IPv4.
1468 
1469 	   Actually, all the idea behind IPV6_PKTOPTIONS
1470 	   looks not very well thought. For now we latch
1471 	   options, received in the last packet, enqueued
1472 	   by tcp. Feel free to propose better solution.
1473 	                                       --ANK (980728)
1474 	 */
1475 	if (np->rxopt.all)
1476 		opt_skb = skb_clone(skb, GFP_ATOMIC);
1477 
1478 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1479 		TCP_CHECK_TIMER(sk);
1480 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1481 			goto reset;
1482 		TCP_CHECK_TIMER(sk);
1483 		if (opt_skb)
1484 			goto ipv6_pktoptions;
1485 		return 0;
1486 	}
1487 
1488 	if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1489 		goto csum_err;
1490 
1491 	if (sk->sk_state == TCP_LISTEN) {
1492 		struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1493 		if (!nsk)
1494 			goto discard;
1495 
1496 		/*
1497 		 * Queue it on the new socket if the new socket is active,
1498 		 * otherwise we just shortcircuit this and continue with
1499 		 * the new socket..
1500 		 */
1501  		if(nsk != sk) {
1502 			if (tcp_child_process(sk, nsk, skb))
1503 				goto reset;
1504 			if (opt_skb)
1505 				__kfree_skb(opt_skb);
1506 			return 0;
1507 		}
1508 	}
1509 
1510 	TCP_CHECK_TIMER(sk);
1511 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1512 		goto reset;
1513 	TCP_CHECK_TIMER(sk);
1514 	if (opt_skb)
1515 		goto ipv6_pktoptions;
1516 	return 0;
1517 
1518 reset:
1519 	tcp_v6_send_reset(skb);
1520 discard:
1521 	if (opt_skb)
1522 		__kfree_skb(opt_skb);
1523 	kfree_skb(skb);
1524 	return 0;
1525 csum_err:
1526 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
1527 	goto discard;
1528 
1529 
1530 ipv6_pktoptions:
1531 	/* Do you ask, what is it?
1532 
1533 	   1. skb was enqueued by tcp.
1534 	   2. skb is added to tail of read queue, rather than out of order.
1535 	   3. socket is not in passive state.
1536 	   4. Finally, it really contains options, which user wants to receive.
1537 	 */
1538 	tp = tcp_sk(sk);
1539 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1540 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1541 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1542 			np->mcast_oif = inet6_iif(opt_skb);
1543 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1544 			np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1545 		if (ipv6_opt_accepted(sk, opt_skb)) {
1546 			skb_set_owner_r(opt_skb, sk);
1547 			opt_skb = xchg(&np->pktoptions, opt_skb);
1548 		} else {
1549 			__kfree_skb(opt_skb);
1550 			opt_skb = xchg(&np->pktoptions, NULL);
1551 		}
1552 	}
1553 
1554 	if (opt_skb)
1555 		kfree_skb(opt_skb);
1556 	return 0;
1557 }
1558 
1559 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1560 {
1561 	struct sk_buff *skb = *pskb;
1562 	struct tcphdr *th;
1563 	struct sock *sk;
1564 	int ret;
1565 
1566 	if (skb->pkt_type != PACKET_HOST)
1567 		goto discard_it;
1568 
1569 	/*
1570 	 *	Count it even if it's bad.
1571 	 */
1572 	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1573 
1574 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1575 		goto discard_it;
1576 
1577 	th = skb->h.th;
1578 
1579 	if (th->doff < sizeof(struct tcphdr)/4)
1580 		goto bad_packet;
1581 	if (!pskb_may_pull(skb, th->doff*4))
1582 		goto discard_it;
1583 
1584 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1585 	     tcp_v6_checksum_init(skb) < 0))
1586 		goto bad_packet;
1587 
1588 	th = skb->h.th;
1589 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1590 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1591 				    skb->len - th->doff*4);
1592 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1593 	TCP_SKB_CB(skb)->when = 0;
1594 	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1595 	TCP_SKB_CB(skb)->sacked = 0;
1596 
1597 	sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1598 			    &skb->nh.ipv6h->daddr, ntohs(th->dest),
1599 			    inet6_iif(skb));
1600 
1601 	if (!sk)
1602 		goto no_tcp_socket;
1603 
1604 process:
1605 	if (sk->sk_state == TCP_TIME_WAIT)
1606 		goto do_time_wait;
1607 
1608 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1609 		goto discard_and_relse;
1610 
1611 	if (sk_filter(sk, skb, 0))
1612 		goto discard_and_relse;
1613 
1614 	skb->dev = NULL;
1615 
1616 	bh_lock_sock(sk);
1617 	ret = 0;
1618 	if (!sock_owned_by_user(sk)) {
1619 		if (!tcp_prequeue(sk, skb))
1620 			ret = tcp_v6_do_rcv(sk, skb);
1621 	} else
1622 		sk_add_backlog(sk, skb);
1623 	bh_unlock_sock(sk);
1624 
1625 	sock_put(sk);
1626 	return ret ? -1 : 0;
1627 
1628 no_tcp_socket:
1629 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1630 		goto discard_it;
1631 
1632 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1633 bad_packet:
1634 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1635 	} else {
1636 		tcp_v6_send_reset(skb);
1637 	}
1638 
1639 discard_it:
1640 
1641 	/*
1642 	 *	Discard frame
1643 	 */
1644 
1645 	kfree_skb(skb);
1646 	return 0;
1647 
1648 discard_and_relse:
1649 	sock_put(sk);
1650 	goto discard_it;
1651 
1652 do_time_wait:
1653 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1654 		inet_twsk_put((struct inet_timewait_sock *)sk);
1655 		goto discard_it;
1656 	}
1657 
1658 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1659 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1660 		inet_twsk_put((struct inet_timewait_sock *)sk);
1661 		goto discard_it;
1662 	}
1663 
1664 	switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1665 					   skb, th)) {
1666 	case TCP_TW_SYN:
1667 	{
1668 		struct sock *sk2;
1669 
1670 		sk2 = inet6_lookup_listener(&tcp_hashinfo,
1671 					    &skb->nh.ipv6h->daddr,
1672 					    ntohs(th->dest), inet6_iif(skb));
1673 		if (sk2 != NULL) {
1674 			struct inet_timewait_sock *tw = inet_twsk(sk);
1675 			inet_twsk_deschedule(tw, &tcp_death_row);
1676 			inet_twsk_put(tw);
1677 			sk = sk2;
1678 			goto process;
1679 		}
1680 		/* Fall through to ACK */
1681 	}
1682 	case TCP_TW_ACK:
1683 		tcp_v6_timewait_ack(sk, skb);
1684 		break;
1685 	case TCP_TW_RST:
1686 		goto no_tcp_socket;
1687 	case TCP_TW_SUCCESS:;
1688 	}
1689 	goto discard_it;
1690 }
1691 
1692 static int tcp_v6_rebuild_header(struct sock *sk)
1693 {
1694 	int err;
1695 	struct dst_entry *dst;
1696 	struct ipv6_pinfo *np = inet6_sk(sk);
1697 
1698 	dst = __sk_dst_check(sk, np->dst_cookie);
1699 
1700 	if (dst == NULL) {
1701 		struct inet_sock *inet = inet_sk(sk);
1702 		struct in6_addr *final_p = NULL, final;
1703 		struct flowi fl;
1704 
1705 		memset(&fl, 0, sizeof(fl));
1706 		fl.proto = IPPROTO_TCP;
1707 		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1708 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1709 		fl.fl6_flowlabel = np->flow_label;
1710 		fl.oif = sk->sk_bound_dev_if;
1711 		fl.fl_ip_dport = inet->dport;
1712 		fl.fl_ip_sport = inet->sport;
1713 
1714 		if (np->opt && np->opt->srcrt) {
1715 			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1716 			ipv6_addr_copy(&final, &fl.fl6_dst);
1717 			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1718 			final_p = &final;
1719 		}
1720 
1721 		err = ip6_dst_lookup(sk, &dst, &fl);
1722 		if (err) {
1723 			sk->sk_route_caps = 0;
1724 			return err;
1725 		}
1726 		if (final_p)
1727 			ipv6_addr_copy(&fl.fl6_dst, final_p);
1728 
1729 		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1730 			sk->sk_err_soft = -err;
1731 			return err;
1732 		}
1733 
1734 		ip6_dst_store(sk, dst, NULL);
1735 		sk->sk_route_caps = dst->dev->features &
1736 			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1737 	}
1738 
1739 	return 0;
1740 }
1741 
1742 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1743 {
1744 	struct sock *sk = skb->sk;
1745 	struct inet_sock *inet = inet_sk(sk);
1746 	struct ipv6_pinfo *np = inet6_sk(sk);
1747 	struct flowi fl;
1748 	struct dst_entry *dst;
1749 	struct in6_addr *final_p = NULL, final;
1750 
1751 	memset(&fl, 0, sizeof(fl));
1752 	fl.proto = IPPROTO_TCP;
1753 	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1754 	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1755 	fl.fl6_flowlabel = np->flow_label;
1756 	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1757 	fl.oif = sk->sk_bound_dev_if;
1758 	fl.fl_ip_sport = inet->sport;
1759 	fl.fl_ip_dport = inet->dport;
1760 
1761 	if (np->opt && np->opt->srcrt) {
1762 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1763 		ipv6_addr_copy(&final, &fl.fl6_dst);
1764 		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1765 		final_p = &final;
1766 	}
1767 
1768 	dst = __sk_dst_check(sk, np->dst_cookie);
1769 
1770 	if (dst == NULL) {
1771 		int err = ip6_dst_lookup(sk, &dst, &fl);
1772 
1773 		if (err) {
1774 			sk->sk_err_soft = -err;
1775 			return err;
1776 		}
1777 
1778 		if (final_p)
1779 			ipv6_addr_copy(&fl.fl6_dst, final_p);
1780 
1781 		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1782 			sk->sk_route_caps = 0;
1783 			return err;
1784 		}
1785 
1786 		ip6_dst_store(sk, dst, NULL);
1787 		sk->sk_route_caps = dst->dev->features &
1788 			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1789 	}
1790 
1791 	skb->dst = dst_clone(dst);
1792 
1793 	/* Restore final destination back after routing done */
1794 	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1795 
1796 	return ip6_xmit(sk, skb, &fl, np->opt, 0);
1797 }
1798 
1799 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1800 {
1801 	struct ipv6_pinfo *np = inet6_sk(sk);
1802 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1803 
1804 	sin6->sin6_family = AF_INET6;
1805 	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1806 	sin6->sin6_port	= inet_sk(sk)->dport;
1807 	/* We do not store received flowlabel for TCP */
1808 	sin6->sin6_flowinfo = 0;
1809 	sin6->sin6_scope_id = 0;
1810 	if (sk->sk_bound_dev_if &&
1811 	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1812 		sin6->sin6_scope_id = sk->sk_bound_dev_if;
1813 }
1814 
1815 static int tcp_v6_remember_stamp(struct sock *sk)
1816 {
1817 	/* Alas, not yet... */
1818 	return 0;
1819 }
1820 
1821 static struct tcp_func ipv6_specific = {
1822 	.queue_xmit	=	tcp_v6_xmit,
1823 	.send_check	=	tcp_v6_send_check,
1824 	.rebuild_header	=	tcp_v6_rebuild_header,
1825 	.conn_request	=	tcp_v6_conn_request,
1826 	.syn_recv_sock	=	tcp_v6_syn_recv_sock,
1827 	.remember_stamp	=	tcp_v6_remember_stamp,
1828 	.net_header_len	=	sizeof(struct ipv6hdr),
1829 
1830 	.setsockopt	=	ipv6_setsockopt,
1831 	.getsockopt	=	ipv6_getsockopt,
1832 	.addr2sockaddr	=	v6_addr2sockaddr,
1833 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
1834 };
1835 
1836 /*
1837  *	TCP over IPv4 via INET6 API
1838  */
1839 
1840 static struct tcp_func ipv6_mapped = {
1841 	.queue_xmit	=	ip_queue_xmit,
1842 	.send_check	=	tcp_v4_send_check,
1843 	.rebuild_header	=	inet_sk_rebuild_header,
1844 	.conn_request	=	tcp_v6_conn_request,
1845 	.syn_recv_sock	=	tcp_v6_syn_recv_sock,
1846 	.remember_stamp	=	tcp_v4_remember_stamp,
1847 	.net_header_len	=	sizeof(struct iphdr),
1848 
1849 	.setsockopt	=	ipv6_setsockopt,
1850 	.getsockopt	=	ipv6_getsockopt,
1851 	.addr2sockaddr	=	v6_addr2sockaddr,
1852 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
1853 };
1854 
1855 
1856 
1857 /* NOTE: A lot of things set to zero explicitly by call to
1858  *       sk_alloc() so need not be done here.
1859  */
1860 static int tcp_v6_init_sock(struct sock *sk)
1861 {
1862 	struct inet_connection_sock *icsk = inet_csk(sk);
1863 	struct tcp_sock *tp = tcp_sk(sk);
1864 
1865 	skb_queue_head_init(&tp->out_of_order_queue);
1866 	tcp_init_xmit_timers(sk);
1867 	tcp_prequeue_init(tp);
1868 
1869 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1870 	tp->mdev = TCP_TIMEOUT_INIT;
1871 
1872 	/* So many TCP implementations out there (incorrectly) count the
1873 	 * initial SYN frame in their delayed-ACK and congestion control
1874 	 * algorithms that we must have the following bandaid to talk
1875 	 * efficiently to them.  -DaveM
1876 	 */
1877 	tp->snd_cwnd = 2;
1878 
1879 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1880 	 * initialization of these values.
1881 	 */
1882 	tp->snd_ssthresh = 0x7fffffff;
1883 	tp->snd_cwnd_clamp = ~0;
1884 	tp->mss_cache = 536;
1885 
1886 	tp->reordering = sysctl_tcp_reordering;
1887 
1888 	sk->sk_state = TCP_CLOSE;
1889 
1890 	tp->af_specific = &ipv6_specific;
1891 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1892 	sk->sk_write_space = sk_stream_write_space;
1893 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1894 
1895 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1896 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1897 
1898 	atomic_inc(&tcp_sockets_allocated);
1899 
1900 	return 0;
1901 }
1902 
1903 static int tcp_v6_destroy_sock(struct sock *sk)
1904 {
1905 	tcp_v4_destroy_sock(sk);
1906 	return inet6_destroy_sock(sk);
1907 }
1908 
1909 /* Proc filesystem TCPv6 sock list dumping. */
1910 static void get_openreq6(struct seq_file *seq,
1911 			 struct sock *sk, struct request_sock *req, int i, int uid)
1912 {
1913 	struct in6_addr *dest, *src;
1914 	int ttd = req->expires - jiffies;
1915 
1916 	if (ttd < 0)
1917 		ttd = 0;
1918 
1919 	src = &tcp6_rsk(req)->loc_addr;
1920 	dest = &tcp6_rsk(req)->rmt_addr;
1921 	seq_printf(seq,
1922 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1923 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1924 		   i,
1925 		   src->s6_addr32[0], src->s6_addr32[1],
1926 		   src->s6_addr32[2], src->s6_addr32[3],
1927 		   ntohs(inet_sk(sk)->sport),
1928 		   dest->s6_addr32[0], dest->s6_addr32[1],
1929 		   dest->s6_addr32[2], dest->s6_addr32[3],
1930 		   ntohs(inet_rsk(req)->rmt_port),
1931 		   TCP_SYN_RECV,
1932 		   0,0, /* could print option size, but that is af dependent. */
1933 		   1,   /* timers active (only the expire timer) */
1934 		   jiffies_to_clock_t(ttd),
1935 		   req->retrans,
1936 		   uid,
1937 		   0,  /* non standard timer */
1938 		   0, /* open_requests have no inode */
1939 		   0, req);
1940 }
1941 
1942 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1943 {
1944 	struct in6_addr *dest, *src;
1945 	__u16 destp, srcp;
1946 	int timer_active;
1947 	unsigned long timer_expires;
1948 	struct inet_sock *inet = inet_sk(sp);
1949 	struct tcp_sock *tp = tcp_sk(sp);
1950 	const struct inet_connection_sock *icsk = inet_csk(sp);
1951 	struct ipv6_pinfo *np = inet6_sk(sp);
1952 
1953 	dest  = &np->daddr;
1954 	src   = &np->rcv_saddr;
1955 	destp = ntohs(inet->dport);
1956 	srcp  = ntohs(inet->sport);
1957 
1958 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1959 		timer_active	= 1;
1960 		timer_expires	= icsk->icsk_timeout;
1961 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1962 		timer_active	= 4;
1963 		timer_expires	= icsk->icsk_timeout;
1964 	} else if (timer_pending(&sp->sk_timer)) {
1965 		timer_active	= 2;
1966 		timer_expires	= sp->sk_timer.expires;
1967 	} else {
1968 		timer_active	= 0;
1969 		timer_expires = jiffies;
1970 	}
1971 
1972 	seq_printf(seq,
1973 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1974 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1975 		   i,
1976 		   src->s6_addr32[0], src->s6_addr32[1],
1977 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1978 		   dest->s6_addr32[0], dest->s6_addr32[1],
1979 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1980 		   sp->sk_state,
1981 		   tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1982 		   timer_active,
1983 		   jiffies_to_clock_t(timer_expires - jiffies),
1984 		   icsk->icsk_retransmits,
1985 		   sock_i_uid(sp),
1986 		   icsk->icsk_probes_out,
1987 		   sock_i_ino(sp),
1988 		   atomic_read(&sp->sk_refcnt), sp,
1989 		   icsk->icsk_rto,
1990 		   icsk->icsk_ack.ato,
1991 		   (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1992 		   tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1993 		   );
1994 }
1995 
1996 static void get_timewait6_sock(struct seq_file *seq,
1997 			       struct inet_timewait_sock *tw, int i)
1998 {
1999 	struct in6_addr *dest, *src;
2000 	__u16 destp, srcp;
2001 	struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2002 	int ttd = tw->tw_ttd - jiffies;
2003 
2004 	if (ttd < 0)
2005 		ttd = 0;
2006 
2007 	dest = &tcp6tw->tw_v6_daddr;
2008 	src  = &tcp6tw->tw_v6_rcv_saddr;
2009 	destp = ntohs(tw->tw_dport);
2010 	srcp  = ntohs(tw->tw_sport);
2011 
2012 	seq_printf(seq,
2013 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2014 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2015 		   i,
2016 		   src->s6_addr32[0], src->s6_addr32[1],
2017 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2018 		   dest->s6_addr32[0], dest->s6_addr32[1],
2019 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2020 		   tw->tw_substate, 0, 0,
2021 		   3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2022 		   atomic_read(&tw->tw_refcnt), tw);
2023 }
2024 
2025 #ifdef CONFIG_PROC_FS
2026 static int tcp6_seq_show(struct seq_file *seq, void *v)
2027 {
2028 	struct tcp_iter_state *st;
2029 
2030 	if (v == SEQ_START_TOKEN) {
2031 		seq_puts(seq,
2032 			 "  sl  "
2033 			 "local_address                         "
2034 			 "remote_address                        "
2035 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2036 			 "   uid  timeout inode\n");
2037 		goto out;
2038 	}
2039 	st = seq->private;
2040 
2041 	switch (st->state) {
2042 	case TCP_SEQ_STATE_LISTENING:
2043 	case TCP_SEQ_STATE_ESTABLISHED:
2044 		get_tcp6_sock(seq, v, st->num);
2045 		break;
2046 	case TCP_SEQ_STATE_OPENREQ:
2047 		get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2048 		break;
2049 	case TCP_SEQ_STATE_TIME_WAIT:
2050 		get_timewait6_sock(seq, v, st->num);
2051 		break;
2052 	}
2053 out:
2054 	return 0;
2055 }
2056 
2057 static struct file_operations tcp6_seq_fops;
2058 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2059 	.owner		= THIS_MODULE,
2060 	.name		= "tcp6",
2061 	.family		= AF_INET6,
2062 	.seq_show	= tcp6_seq_show,
2063 	.seq_fops	= &tcp6_seq_fops,
2064 };
2065 
2066 int __init tcp6_proc_init(void)
2067 {
2068 	return tcp_proc_register(&tcp6_seq_afinfo);
2069 }
2070 
2071 void tcp6_proc_exit(void)
2072 {
2073 	tcp_proc_unregister(&tcp6_seq_afinfo);
2074 }
2075 #endif
2076 
2077 struct proto tcpv6_prot = {
2078 	.name			= "TCPv6",
2079 	.owner			= THIS_MODULE,
2080 	.close			= tcp_close,
2081 	.connect		= tcp_v6_connect,
2082 	.disconnect		= tcp_disconnect,
2083 	.accept			= inet_csk_accept,
2084 	.ioctl			= tcp_ioctl,
2085 	.init			= tcp_v6_init_sock,
2086 	.destroy		= tcp_v6_destroy_sock,
2087 	.shutdown		= tcp_shutdown,
2088 	.setsockopt		= tcp_setsockopt,
2089 	.getsockopt		= tcp_getsockopt,
2090 	.sendmsg		= tcp_sendmsg,
2091 	.recvmsg		= tcp_recvmsg,
2092 	.backlog_rcv		= tcp_v6_do_rcv,
2093 	.hash			= tcp_v6_hash,
2094 	.unhash			= tcp_unhash,
2095 	.get_port		= tcp_v6_get_port,
2096 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2097 	.sockets_allocated	= &tcp_sockets_allocated,
2098 	.memory_allocated	= &tcp_memory_allocated,
2099 	.memory_pressure	= &tcp_memory_pressure,
2100 	.orphan_count		= &tcp_orphan_count,
2101 	.sysctl_mem		= sysctl_tcp_mem,
2102 	.sysctl_wmem		= sysctl_tcp_wmem,
2103 	.sysctl_rmem		= sysctl_tcp_rmem,
2104 	.max_header		= MAX_TCP_HEADER,
2105 	.obj_size		= sizeof(struct tcp6_sock),
2106 	.twsk_obj_size		= sizeof(struct tcp6_timewait_sock),
2107 	.rsk_prot		= &tcp6_request_sock_ops,
2108 };
2109 
2110 static struct inet6_protocol tcpv6_protocol = {
2111 	.handler	=	tcp_v6_rcv,
2112 	.err_handler	=	tcp_v6_err,
2113 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2114 };
2115 
2116 static struct inet_protosw tcpv6_protosw = {
2117 	.type		=	SOCK_STREAM,
2118 	.protocol	=	IPPROTO_TCP,
2119 	.prot		=	&tcpv6_prot,
2120 	.ops		=	&inet6_stream_ops,
2121 	.capability	=	-1,
2122 	.no_check	=	0,
2123 	.flags		=	INET_PROTOSW_PERMANENT,
2124 };
2125 
2126 void __init tcpv6_init(void)
2127 {
2128 	/* register inet6 protocol */
2129 	if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2130 		printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2131 	inet6_register_protosw(&tcpv6_protosw);
2132 }
2133