xref: /openbmc/linux/net/ipv6/tcp_ipv6.c (revision ecd25094)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 static const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr)
85 {
86 	return NULL;
87 }
88 #endif
89 
90 /* Helper returning the inet6 address from a given tcp socket.
91  * It can be used in TCP stack instead of inet6_sk(sk).
92  * This avoids a dereference and allow compiler optimizations.
93  * It is a specialized version of inet6_sk_generic().
94  */
95 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
96 {
97 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
98 
99 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
100 }
101 
102 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
103 {
104 	struct dst_entry *dst = skb_dst(skb);
105 
106 	if (dst && dst_hold_safe(dst)) {
107 		const struct rt6_info *rt = (const struct rt6_info *)dst;
108 
109 		sk->sk_rx_dst = dst;
110 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
111 		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
112 	}
113 }
114 
115 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
116 {
117 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118 				ipv6_hdr(skb)->saddr.s6_addr32,
119 				tcp_hdr(skb)->dest,
120 				tcp_hdr(skb)->source);
121 }
122 
123 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
124 {
125 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126 				   ipv6_hdr(skb)->saddr.s6_addr32);
127 }
128 
129 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
130 			      int addr_len)
131 {
132 	/* This check is replicated from tcp_v6_connect() and intended to
133 	 * prevent BPF program called below from accessing bytes that are out
134 	 * of the bound specified by user in addr_len.
135 	 */
136 	if (addr_len < SIN6_LEN_RFC2133)
137 		return -EINVAL;
138 
139 	sock_owned_by_me(sk);
140 
141 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
142 }
143 
144 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
145 			  int addr_len)
146 {
147 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148 	struct inet_sock *inet = inet_sk(sk);
149 	struct inet_connection_sock *icsk = inet_csk(sk);
150 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
151 	struct tcp_sock *tp = tcp_sk(sk);
152 	struct in6_addr *saddr = NULL, *final_p, final;
153 	struct ipv6_txoptions *opt;
154 	struct flowi6 fl6;
155 	struct dst_entry *dst;
156 	int addr_type;
157 	int err;
158 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
159 
160 	if (addr_len < SIN6_LEN_RFC2133)
161 		return -EINVAL;
162 
163 	if (usin->sin6_family != AF_INET6)
164 		return -EAFNOSUPPORT;
165 
166 	memset(&fl6, 0, sizeof(fl6));
167 
168 	if (np->sndflow) {
169 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170 		IP6_ECN_flow_init(fl6.flowlabel);
171 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172 			struct ip6_flowlabel *flowlabel;
173 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
174 			if (IS_ERR(flowlabel))
175 				return -EINVAL;
176 			fl6_sock_release(flowlabel);
177 		}
178 	}
179 
180 	/*
181 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
182 	 */
183 
184 	if (ipv6_addr_any(&usin->sin6_addr)) {
185 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
187 					       &usin->sin6_addr);
188 		else
189 			usin->sin6_addr = in6addr_loopback;
190 	}
191 
192 	addr_type = ipv6_addr_type(&usin->sin6_addr);
193 
194 	if (addr_type & IPV6_ADDR_MULTICAST)
195 		return -ENETUNREACH;
196 
197 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
198 		if (addr_len >= sizeof(struct sockaddr_in6) &&
199 		    usin->sin6_scope_id) {
200 			/* If interface is set while binding, indices
201 			 * must coincide.
202 			 */
203 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
204 				return -EINVAL;
205 
206 			sk->sk_bound_dev_if = usin->sin6_scope_id;
207 		}
208 
209 		/* Connect to link-local address requires an interface */
210 		if (!sk->sk_bound_dev_if)
211 			return -EINVAL;
212 	}
213 
214 	if (tp->rx_opt.ts_recent_stamp &&
215 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216 		tp->rx_opt.ts_recent = 0;
217 		tp->rx_opt.ts_recent_stamp = 0;
218 		tp->write_seq = 0;
219 	}
220 
221 	sk->sk_v6_daddr = usin->sin6_addr;
222 	np->flow_label = fl6.flowlabel;
223 
224 	/*
225 	 *	TCP over IPv4
226 	 */
227 
228 	if (addr_type & IPV6_ADDR_MAPPED) {
229 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
230 		struct sockaddr_in sin;
231 
232 		if (__ipv6_only_sock(sk))
233 			return -ENETUNREACH;
234 
235 		sin.sin_family = AF_INET;
236 		sin.sin_port = usin->sin6_port;
237 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
238 
239 		icsk->icsk_af_ops = &ipv6_mapped;
240 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
241 #ifdef CONFIG_TCP_MD5SIG
242 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
243 #endif
244 
245 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
246 
247 		if (err) {
248 			icsk->icsk_ext_hdr_len = exthdrlen;
249 			icsk->icsk_af_ops = &ipv6_specific;
250 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
251 #ifdef CONFIG_TCP_MD5SIG
252 			tp->af_specific = &tcp_sock_ipv6_specific;
253 #endif
254 			goto failure;
255 		}
256 		np->saddr = sk->sk_v6_rcv_saddr;
257 
258 		return err;
259 	}
260 
261 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262 		saddr = &sk->sk_v6_rcv_saddr;
263 
264 	fl6.flowi6_proto = IPPROTO_TCP;
265 	fl6.daddr = sk->sk_v6_daddr;
266 	fl6.saddr = saddr ? *saddr : np->saddr;
267 	fl6.flowi6_oif = sk->sk_bound_dev_if;
268 	fl6.flowi6_mark = sk->sk_mark;
269 	fl6.fl6_dport = usin->sin6_port;
270 	fl6.fl6_sport = inet->inet_sport;
271 	fl6.flowi6_uid = sk->sk_uid;
272 
273 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274 	final_p = fl6_update_dst(&fl6, opt, &final);
275 
276 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
277 
278 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
279 	if (IS_ERR(dst)) {
280 		err = PTR_ERR(dst);
281 		goto failure;
282 	}
283 
284 	if (!saddr) {
285 		saddr = &fl6.saddr;
286 		sk->sk_v6_rcv_saddr = *saddr;
287 	}
288 
289 	/* set the source address */
290 	np->saddr = *saddr;
291 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
292 
293 	sk->sk_gso_type = SKB_GSO_TCPV6;
294 	ip6_dst_store(sk, dst, NULL, NULL);
295 
296 	icsk->icsk_ext_hdr_len = 0;
297 	if (opt)
298 		icsk->icsk_ext_hdr_len = opt->opt_flen +
299 					 opt->opt_nflen;
300 
301 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
302 
303 	inet->inet_dport = usin->sin6_port;
304 
305 	tcp_set_state(sk, TCP_SYN_SENT);
306 	err = inet6_hash_connect(tcp_death_row, sk);
307 	if (err)
308 		goto late_failure;
309 
310 	sk_set_txhash(sk);
311 
312 	if (likely(!tp->repair)) {
313 		if (!tp->write_seq)
314 			tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
315 							 sk->sk_v6_daddr.s6_addr32,
316 							 inet->inet_sport,
317 							 inet->inet_dport);
318 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
319 						   np->saddr.s6_addr32,
320 						   sk->sk_v6_daddr.s6_addr32);
321 	}
322 
323 	if (tcp_fastopen_defer_connect(sk, &err))
324 		return err;
325 	if (err)
326 		goto late_failure;
327 
328 	err = tcp_connect(sk);
329 	if (err)
330 		goto late_failure;
331 
332 	return 0;
333 
334 late_failure:
335 	tcp_set_state(sk, TCP_CLOSE);
336 failure:
337 	inet->inet_dport = 0;
338 	sk->sk_route_caps = 0;
339 	return err;
340 }
341 
342 static void tcp_v6_mtu_reduced(struct sock *sk)
343 {
344 	struct dst_entry *dst;
345 
346 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
347 		return;
348 
349 	dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
350 	if (!dst)
351 		return;
352 
353 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
354 		tcp_sync_mss(sk, dst_mtu(dst));
355 		tcp_simple_retransmit(sk);
356 	}
357 }
358 
359 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
360 		u8 type, u8 code, int offset, __be32 info)
361 {
362 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
363 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
364 	struct net *net = dev_net(skb->dev);
365 	struct request_sock *fastopen;
366 	struct ipv6_pinfo *np;
367 	struct tcp_sock *tp;
368 	__u32 seq, snd_una;
369 	struct sock *sk;
370 	bool fatal;
371 	int err;
372 
373 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
374 					&hdr->daddr, th->dest,
375 					&hdr->saddr, ntohs(th->source),
376 					skb->dev->ifindex, inet6_sdif(skb));
377 
378 	if (!sk) {
379 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
380 				  ICMP6_MIB_INERRORS);
381 		return -ENOENT;
382 	}
383 
384 	if (sk->sk_state == TCP_TIME_WAIT) {
385 		inet_twsk_put(inet_twsk(sk));
386 		return 0;
387 	}
388 	seq = ntohl(th->seq);
389 	fatal = icmpv6_err_convert(type, code, &err);
390 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
391 		tcp_req_err(sk, seq, fatal);
392 		return 0;
393 	}
394 
395 	bh_lock_sock(sk);
396 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
397 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
398 
399 	if (sk->sk_state == TCP_CLOSE)
400 		goto out;
401 
402 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
403 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
404 		goto out;
405 	}
406 
407 	tp = tcp_sk(sk);
408 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
409 	fastopen = tp->fastopen_rsk;
410 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
411 	if (sk->sk_state != TCP_LISTEN &&
412 	    !between(seq, snd_una, tp->snd_nxt)) {
413 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
414 		goto out;
415 	}
416 
417 	np = tcp_inet6_sk(sk);
418 
419 	if (type == NDISC_REDIRECT) {
420 		if (!sock_owned_by_user(sk)) {
421 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
422 
423 			if (dst)
424 				dst->ops->redirect(dst, sk, skb);
425 		}
426 		goto out;
427 	}
428 
429 	if (type == ICMPV6_PKT_TOOBIG) {
430 		/* We are not interested in TCP_LISTEN and open_requests
431 		 * (SYN-ACKs send out by Linux are always <576bytes so
432 		 * they should go through unfragmented).
433 		 */
434 		if (sk->sk_state == TCP_LISTEN)
435 			goto out;
436 
437 		if (!ip6_sk_accept_pmtu(sk))
438 			goto out;
439 
440 		tp->mtu_info = ntohl(info);
441 		if (!sock_owned_by_user(sk))
442 			tcp_v6_mtu_reduced(sk);
443 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
444 					   &sk->sk_tsq_flags))
445 			sock_hold(sk);
446 		goto out;
447 	}
448 
449 
450 	/* Might be for an request_sock */
451 	switch (sk->sk_state) {
452 	case TCP_SYN_SENT:
453 	case TCP_SYN_RECV:
454 		/* Only in fast or simultaneous open. If a fast open socket is
455 		 * is already accepted it is treated as a connected one below.
456 		 */
457 		if (fastopen && !fastopen->sk)
458 			break;
459 
460 		if (!sock_owned_by_user(sk)) {
461 			sk->sk_err = err;
462 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
463 
464 			tcp_done(sk);
465 		} else
466 			sk->sk_err_soft = err;
467 		goto out;
468 	}
469 
470 	if (!sock_owned_by_user(sk) && np->recverr) {
471 		sk->sk_err = err;
472 		sk->sk_error_report(sk);
473 	} else
474 		sk->sk_err_soft = err;
475 
476 out:
477 	bh_unlock_sock(sk);
478 	sock_put(sk);
479 	return 0;
480 }
481 
482 
483 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
484 			      struct flowi *fl,
485 			      struct request_sock *req,
486 			      struct tcp_fastopen_cookie *foc,
487 			      enum tcp_synack_type synack_type)
488 {
489 	struct inet_request_sock *ireq = inet_rsk(req);
490 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
491 	struct ipv6_txoptions *opt;
492 	struct flowi6 *fl6 = &fl->u.ip6;
493 	struct sk_buff *skb;
494 	int err = -ENOMEM;
495 
496 	/* First, grab a route. */
497 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
498 					       IPPROTO_TCP)) == NULL)
499 		goto done;
500 
501 	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
502 
503 	if (skb) {
504 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
505 				    &ireq->ir_v6_rmt_addr);
506 
507 		fl6->daddr = ireq->ir_v6_rmt_addr;
508 		if (np->repflow && ireq->pktopts)
509 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
510 
511 		rcu_read_lock();
512 		opt = ireq->ipv6_opt;
513 		if (!opt)
514 			opt = rcu_dereference(np->opt);
515 		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
516 		rcu_read_unlock();
517 		err = net_xmit_eval(err);
518 	}
519 
520 done:
521 	return err;
522 }
523 
524 
525 static void tcp_v6_reqsk_destructor(struct request_sock *req)
526 {
527 	kfree(inet_rsk(req)->ipv6_opt);
528 	kfree_skb(inet_rsk(req)->pktopts);
529 }
530 
531 #ifdef CONFIG_TCP_MD5SIG
532 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
533 						   const struct in6_addr *addr)
534 {
535 	return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
536 }
537 
538 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
539 						const struct sock *addr_sk)
540 {
541 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
542 }
543 
544 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
545 				 char __user *optval, int optlen)
546 {
547 	struct tcp_md5sig cmd;
548 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
549 	u8 prefixlen;
550 
551 	if (optlen < sizeof(cmd))
552 		return -EINVAL;
553 
554 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
555 		return -EFAULT;
556 
557 	if (sin6->sin6_family != AF_INET6)
558 		return -EINVAL;
559 
560 	if (optname == TCP_MD5SIG_EXT &&
561 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
562 		prefixlen = cmd.tcpm_prefixlen;
563 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
564 					prefixlen > 32))
565 			return -EINVAL;
566 	} else {
567 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
568 	}
569 
570 	if (!cmd.tcpm_keylen) {
571 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
572 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
573 					      AF_INET, prefixlen);
574 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
575 				      AF_INET6, prefixlen);
576 	}
577 
578 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
579 		return -EINVAL;
580 
581 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
582 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
583 				      AF_INET, prefixlen, cmd.tcpm_key,
584 				      cmd.tcpm_keylen, GFP_KERNEL);
585 
586 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
587 			      AF_INET6, prefixlen, cmd.tcpm_key,
588 			      cmd.tcpm_keylen, GFP_KERNEL);
589 }
590 
591 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
592 				   const struct in6_addr *daddr,
593 				   const struct in6_addr *saddr,
594 				   const struct tcphdr *th, int nbytes)
595 {
596 	struct tcp6_pseudohdr *bp;
597 	struct scatterlist sg;
598 	struct tcphdr *_th;
599 
600 	bp = hp->scratch;
601 	/* 1. TCP pseudo-header (RFC2460) */
602 	bp->saddr = *saddr;
603 	bp->daddr = *daddr;
604 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
605 	bp->len = cpu_to_be32(nbytes);
606 
607 	_th = (struct tcphdr *)(bp + 1);
608 	memcpy(_th, th, sizeof(*th));
609 	_th->check = 0;
610 
611 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
612 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
613 				sizeof(*bp) + sizeof(*th));
614 	return crypto_ahash_update(hp->md5_req);
615 }
616 
617 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
618 			       const struct in6_addr *daddr, struct in6_addr *saddr,
619 			       const struct tcphdr *th)
620 {
621 	struct tcp_md5sig_pool *hp;
622 	struct ahash_request *req;
623 
624 	hp = tcp_get_md5sig_pool();
625 	if (!hp)
626 		goto clear_hash_noput;
627 	req = hp->md5_req;
628 
629 	if (crypto_ahash_init(req))
630 		goto clear_hash;
631 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
632 		goto clear_hash;
633 	if (tcp_md5_hash_key(hp, key))
634 		goto clear_hash;
635 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
636 	if (crypto_ahash_final(req))
637 		goto clear_hash;
638 
639 	tcp_put_md5sig_pool();
640 	return 0;
641 
642 clear_hash:
643 	tcp_put_md5sig_pool();
644 clear_hash_noput:
645 	memset(md5_hash, 0, 16);
646 	return 1;
647 }
648 
649 static int tcp_v6_md5_hash_skb(char *md5_hash,
650 			       const struct tcp_md5sig_key *key,
651 			       const struct sock *sk,
652 			       const struct sk_buff *skb)
653 {
654 	const struct in6_addr *saddr, *daddr;
655 	struct tcp_md5sig_pool *hp;
656 	struct ahash_request *req;
657 	const struct tcphdr *th = tcp_hdr(skb);
658 
659 	if (sk) { /* valid for establish/request sockets */
660 		saddr = &sk->sk_v6_rcv_saddr;
661 		daddr = &sk->sk_v6_daddr;
662 	} else {
663 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
664 		saddr = &ip6h->saddr;
665 		daddr = &ip6h->daddr;
666 	}
667 
668 	hp = tcp_get_md5sig_pool();
669 	if (!hp)
670 		goto clear_hash_noput;
671 	req = hp->md5_req;
672 
673 	if (crypto_ahash_init(req))
674 		goto clear_hash;
675 
676 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
677 		goto clear_hash;
678 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
679 		goto clear_hash;
680 	if (tcp_md5_hash_key(hp, key))
681 		goto clear_hash;
682 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
683 	if (crypto_ahash_final(req))
684 		goto clear_hash;
685 
686 	tcp_put_md5sig_pool();
687 	return 0;
688 
689 clear_hash:
690 	tcp_put_md5sig_pool();
691 clear_hash_noput:
692 	memset(md5_hash, 0, 16);
693 	return 1;
694 }
695 
696 #endif
697 
698 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
699 				    const struct sk_buff *skb)
700 {
701 #ifdef CONFIG_TCP_MD5SIG
702 	const __u8 *hash_location = NULL;
703 	struct tcp_md5sig_key *hash_expected;
704 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
705 	const struct tcphdr *th = tcp_hdr(skb);
706 	int genhash;
707 	u8 newhash[16];
708 
709 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
710 	hash_location = tcp_parse_md5sig_option(th);
711 
712 	/* We've parsed the options - do we have a hash? */
713 	if (!hash_expected && !hash_location)
714 		return false;
715 
716 	if (hash_expected && !hash_location) {
717 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
718 		return true;
719 	}
720 
721 	if (!hash_expected && hash_location) {
722 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
723 		return true;
724 	}
725 
726 	/* check the signature */
727 	genhash = tcp_v6_md5_hash_skb(newhash,
728 				      hash_expected,
729 				      NULL, skb);
730 
731 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
732 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
733 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
734 				     genhash ? "failed" : "mismatch",
735 				     &ip6h->saddr, ntohs(th->source),
736 				     &ip6h->daddr, ntohs(th->dest));
737 		return true;
738 	}
739 #endif
740 	return false;
741 }
742 
743 static void tcp_v6_init_req(struct request_sock *req,
744 			    const struct sock *sk_listener,
745 			    struct sk_buff *skb)
746 {
747 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
748 	struct inet_request_sock *ireq = inet_rsk(req);
749 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
750 
751 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
752 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
753 
754 	/* So that link locals have meaning */
755 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
756 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
757 		ireq->ir_iif = tcp_v6_iif(skb);
758 
759 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
760 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
761 	     np->rxopt.bits.rxinfo ||
762 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
763 	     np->rxopt.bits.rxohlim || np->repflow)) {
764 		refcount_inc(&skb->users);
765 		ireq->pktopts = skb;
766 	}
767 }
768 
769 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
770 					  struct flowi *fl,
771 					  const struct request_sock *req)
772 {
773 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
774 }
775 
776 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
777 	.family		=	AF_INET6,
778 	.obj_size	=	sizeof(struct tcp6_request_sock),
779 	.rtx_syn_ack	=	tcp_rtx_synack,
780 	.send_ack	=	tcp_v6_reqsk_send_ack,
781 	.destructor	=	tcp_v6_reqsk_destructor,
782 	.send_reset	=	tcp_v6_send_reset,
783 	.syn_ack_timeout =	tcp_syn_ack_timeout,
784 };
785 
786 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
787 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
788 				sizeof(struct ipv6hdr),
789 #ifdef CONFIG_TCP_MD5SIG
790 	.req_md5_lookup	=	tcp_v6_md5_lookup,
791 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
792 #endif
793 	.init_req	=	tcp_v6_init_req,
794 #ifdef CONFIG_SYN_COOKIES
795 	.cookie_init_seq =	cookie_v6_init_sequence,
796 #endif
797 	.route_req	=	tcp_v6_route_req,
798 	.init_seq	=	tcp_v6_init_seq,
799 	.init_ts_off	=	tcp_v6_init_ts_off,
800 	.send_synack	=	tcp_v6_send_synack,
801 };
802 
803 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
804 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
805 				 int oif, struct tcp_md5sig_key *key, int rst,
806 				 u8 tclass, __be32 label)
807 {
808 	const struct tcphdr *th = tcp_hdr(skb);
809 	struct tcphdr *t1;
810 	struct sk_buff *buff;
811 	struct flowi6 fl6;
812 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
813 	struct sock *ctl_sk = net->ipv6.tcp_sk;
814 	unsigned int tot_len = sizeof(struct tcphdr);
815 	struct dst_entry *dst;
816 	__be32 *topt;
817 	__u32 mark = 0;
818 
819 	if (tsecr)
820 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
821 #ifdef CONFIG_TCP_MD5SIG
822 	if (key)
823 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
824 #endif
825 
826 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
827 			 GFP_ATOMIC);
828 	if (!buff)
829 		return;
830 
831 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
832 
833 	t1 = skb_push(buff, tot_len);
834 	skb_reset_transport_header(buff);
835 
836 	/* Swap the send and the receive. */
837 	memset(t1, 0, sizeof(*t1));
838 	t1->dest = th->source;
839 	t1->source = th->dest;
840 	t1->doff = tot_len / 4;
841 	t1->seq = htonl(seq);
842 	t1->ack_seq = htonl(ack);
843 	t1->ack = !rst || !th->ack;
844 	t1->rst = rst;
845 	t1->window = htons(win);
846 
847 	topt = (__be32 *)(t1 + 1);
848 
849 	if (tsecr) {
850 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
851 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
852 		*topt++ = htonl(tsval);
853 		*topt++ = htonl(tsecr);
854 	}
855 
856 #ifdef CONFIG_TCP_MD5SIG
857 	if (key) {
858 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
859 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
860 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
861 				    &ipv6_hdr(skb)->saddr,
862 				    &ipv6_hdr(skb)->daddr, t1);
863 	}
864 #endif
865 
866 	memset(&fl6, 0, sizeof(fl6));
867 	fl6.daddr = ipv6_hdr(skb)->saddr;
868 	fl6.saddr = ipv6_hdr(skb)->daddr;
869 	fl6.flowlabel = label;
870 
871 	buff->ip_summed = CHECKSUM_PARTIAL;
872 	buff->csum = 0;
873 
874 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
875 
876 	fl6.flowi6_proto = IPPROTO_TCP;
877 	if (rt6_need_strict(&fl6.daddr) && !oif)
878 		fl6.flowi6_oif = tcp_v6_iif(skb);
879 	else {
880 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
881 			oif = skb->skb_iif;
882 
883 		fl6.flowi6_oif = oif;
884 	}
885 
886 	if (sk) {
887 		if (sk->sk_state == TCP_TIME_WAIT) {
888 			mark = inet_twsk(sk)->tw_mark;
889 			/* autoflowlabel relies on buff->hash */
890 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
891 				     PKT_HASH_TYPE_L4);
892 		} else {
893 			mark = sk->sk_mark;
894 		}
895 		buff->tstamp = tcp_transmit_time(sk);
896 	}
897 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
898 	fl6.fl6_dport = t1->dest;
899 	fl6.fl6_sport = t1->source;
900 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
901 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
902 
903 	/* Pass a socket to ip6_dst_lookup either it is for RST
904 	 * Underlying function will use this to retrieve the network
905 	 * namespace
906 	 */
907 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
908 	if (!IS_ERR(dst)) {
909 		skb_dst_set(buff, dst);
910 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
911 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
912 		if (rst)
913 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
914 		return;
915 	}
916 
917 	kfree_skb(buff);
918 }
919 
920 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
921 {
922 	const struct tcphdr *th = tcp_hdr(skb);
923 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
924 	u32 seq = 0, ack_seq = 0;
925 	struct tcp_md5sig_key *key = NULL;
926 #ifdef CONFIG_TCP_MD5SIG
927 	const __u8 *hash_location = NULL;
928 	unsigned char newhash[16];
929 	int genhash;
930 	struct sock *sk1 = NULL;
931 #endif
932 	__be32 label = 0;
933 	struct net *net;
934 	int oif = 0;
935 
936 	if (th->rst)
937 		return;
938 
939 	/* If sk not NULL, it means we did a successful lookup and incoming
940 	 * route had to be correct. prequeue might have dropped our dst.
941 	 */
942 	if (!sk && !ipv6_unicast_destination(skb))
943 		return;
944 
945 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
946 #ifdef CONFIG_TCP_MD5SIG
947 	rcu_read_lock();
948 	hash_location = tcp_parse_md5sig_option(th);
949 	if (sk && sk_fullsock(sk)) {
950 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
951 	} else if (hash_location) {
952 		/*
953 		 * active side is lost. Try to find listening socket through
954 		 * source port, and then find md5 key through listening socket.
955 		 * we are not loose security here:
956 		 * Incoming packet is checked with md5 hash with finding key,
957 		 * no RST generated if md5 hash doesn't match.
958 		 */
959 		sk1 = inet6_lookup_listener(net,
960 					   &tcp_hashinfo, NULL, 0,
961 					   &ipv6h->saddr,
962 					   th->source, &ipv6h->daddr,
963 					   ntohs(th->source),
964 					   tcp_v6_iif_l3_slave(skb),
965 					   tcp_v6_sdif(skb));
966 		if (!sk1)
967 			goto out;
968 
969 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
970 		if (!key)
971 			goto out;
972 
973 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
974 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
975 			goto out;
976 	}
977 #endif
978 
979 	if (th->ack)
980 		seq = ntohl(th->ack_seq);
981 	else
982 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
983 			  (th->doff << 2);
984 
985 	if (sk) {
986 		oif = sk->sk_bound_dev_if;
987 		if (sk_fullsock(sk)) {
988 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
989 
990 			trace_tcp_send_reset(sk, skb);
991 			if (np->repflow)
992 				label = ip6_flowlabel(ipv6h);
993 		}
994 		if (sk->sk_state == TCP_TIME_WAIT)
995 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
996 	} else {
997 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
998 			label = ip6_flowlabel(ipv6h);
999 	}
1000 
1001 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1002 			     label);
1003 
1004 #ifdef CONFIG_TCP_MD5SIG
1005 out:
1006 	rcu_read_unlock();
1007 #endif
1008 }
1009 
1010 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1011 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1012 			    struct tcp_md5sig_key *key, u8 tclass,
1013 			    __be32 label)
1014 {
1015 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1016 			     tclass, label);
1017 }
1018 
1019 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1020 {
1021 	struct inet_timewait_sock *tw = inet_twsk(sk);
1022 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1023 
1024 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1025 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1026 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1027 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1028 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
1029 
1030 	inet_twsk_put(tw);
1031 }
1032 
1033 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1034 				  struct request_sock *req)
1035 {
1036 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1037 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1038 	 */
1039 	/* RFC 7323 2.3
1040 	 * The window field (SEG.WND) of every outgoing segment, with the
1041 	 * exception of <SYN> segments, MUST be right-shifted by
1042 	 * Rcv.Wind.Shift bits:
1043 	 */
1044 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1045 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1046 			tcp_rsk(req)->rcv_nxt,
1047 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1048 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1049 			req->ts_recent, sk->sk_bound_dev_if,
1050 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1051 			0, 0);
1052 }
1053 
1054 
1055 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1056 {
1057 #ifdef CONFIG_SYN_COOKIES
1058 	const struct tcphdr *th = tcp_hdr(skb);
1059 
1060 	if (!th->syn)
1061 		sk = cookie_v6_check(sk, skb);
1062 #endif
1063 	return sk;
1064 }
1065 
1066 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1067 			 struct tcphdr *th, u32 *cookie)
1068 {
1069 	u16 mss = 0;
1070 #ifdef CONFIG_SYN_COOKIES
1071 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1072 				    &tcp_request_sock_ipv6_ops, sk, th);
1073 	if (mss) {
1074 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1075 		tcp_synq_overflow(sk);
1076 	}
1077 #endif
1078 	return mss;
1079 }
1080 
1081 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1082 {
1083 	if (skb->protocol == htons(ETH_P_IP))
1084 		return tcp_v4_conn_request(sk, skb);
1085 
1086 	if (!ipv6_unicast_destination(skb))
1087 		goto drop;
1088 
1089 	return tcp_conn_request(&tcp6_request_sock_ops,
1090 				&tcp_request_sock_ipv6_ops, sk, skb);
1091 
1092 drop:
1093 	tcp_listendrop(sk);
1094 	return 0; /* don't send reset */
1095 }
1096 
1097 static void tcp_v6_restore_cb(struct sk_buff *skb)
1098 {
1099 	/* We need to move header back to the beginning if xfrm6_policy_check()
1100 	 * and tcp_v6_fill_cb() are going to be called again.
1101 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1102 	 */
1103 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1104 		sizeof(struct inet6_skb_parm));
1105 }
1106 
1107 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1108 					 struct request_sock *req,
1109 					 struct dst_entry *dst,
1110 					 struct request_sock *req_unhash,
1111 					 bool *own_req)
1112 {
1113 	struct inet_request_sock *ireq;
1114 	struct ipv6_pinfo *newnp;
1115 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1116 	struct ipv6_txoptions *opt;
1117 	struct inet_sock *newinet;
1118 	struct tcp_sock *newtp;
1119 	struct sock *newsk;
1120 #ifdef CONFIG_TCP_MD5SIG
1121 	struct tcp_md5sig_key *key;
1122 #endif
1123 	struct flowi6 fl6;
1124 
1125 	if (skb->protocol == htons(ETH_P_IP)) {
1126 		/*
1127 		 *	v6 mapped
1128 		 */
1129 
1130 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1131 					     req_unhash, own_req);
1132 
1133 		if (!newsk)
1134 			return NULL;
1135 
1136 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1137 
1138 		newinet = inet_sk(newsk);
1139 		newnp = tcp_inet6_sk(newsk);
1140 		newtp = tcp_sk(newsk);
1141 
1142 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1143 
1144 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1145 
1146 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1147 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1148 #ifdef CONFIG_TCP_MD5SIG
1149 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1150 #endif
1151 
1152 		newnp->ipv6_mc_list = NULL;
1153 		newnp->ipv6_ac_list = NULL;
1154 		newnp->ipv6_fl_list = NULL;
1155 		newnp->pktoptions  = NULL;
1156 		newnp->opt	   = NULL;
1157 		newnp->mcast_oif   = inet_iif(skb);
1158 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1159 		newnp->rcv_flowinfo = 0;
1160 		if (np->repflow)
1161 			newnp->flow_label = 0;
1162 
1163 		/*
1164 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1165 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1166 		 * that function for the gory details. -acme
1167 		 */
1168 
1169 		/* It is tricky place. Until this moment IPv4 tcp
1170 		   worked with IPv6 icsk.icsk_af_ops.
1171 		   Sync it now.
1172 		 */
1173 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1174 
1175 		return newsk;
1176 	}
1177 
1178 	ireq = inet_rsk(req);
1179 
1180 	if (sk_acceptq_is_full(sk))
1181 		goto out_overflow;
1182 
1183 	if (!dst) {
1184 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1185 		if (!dst)
1186 			goto out;
1187 	}
1188 
1189 	newsk = tcp_create_openreq_child(sk, req, skb);
1190 	if (!newsk)
1191 		goto out_nonewsk;
1192 
1193 	/*
1194 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1195 	 * count here, tcp_create_openreq_child now does this for us, see the
1196 	 * comment in that function for the gory details. -acme
1197 	 */
1198 
1199 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1200 	ip6_dst_store(newsk, dst, NULL, NULL);
1201 	inet6_sk_rx_dst_set(newsk, skb);
1202 
1203 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1204 
1205 	newtp = tcp_sk(newsk);
1206 	newinet = inet_sk(newsk);
1207 	newnp = tcp_inet6_sk(newsk);
1208 
1209 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1210 
1211 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1212 	newnp->saddr = ireq->ir_v6_loc_addr;
1213 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1214 	newsk->sk_bound_dev_if = ireq->ir_iif;
1215 
1216 	/* Now IPv6 options...
1217 
1218 	   First: no IPv4 options.
1219 	 */
1220 	newinet->inet_opt = NULL;
1221 	newnp->ipv6_mc_list = NULL;
1222 	newnp->ipv6_ac_list = NULL;
1223 	newnp->ipv6_fl_list = NULL;
1224 
1225 	/* Clone RX bits */
1226 	newnp->rxopt.all = np->rxopt.all;
1227 
1228 	newnp->pktoptions = NULL;
1229 	newnp->opt	  = NULL;
1230 	newnp->mcast_oif  = tcp_v6_iif(skb);
1231 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1232 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1233 	if (np->repflow)
1234 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1235 
1236 	/* Clone native IPv6 options from listening socket (if any)
1237 
1238 	   Yes, keeping reference count would be much more clever,
1239 	   but we make one more one thing there: reattach optmem
1240 	   to newsk.
1241 	 */
1242 	opt = ireq->ipv6_opt;
1243 	if (!opt)
1244 		opt = rcu_dereference(np->opt);
1245 	if (opt) {
1246 		opt = ipv6_dup_options(newsk, opt);
1247 		RCU_INIT_POINTER(newnp->opt, opt);
1248 	}
1249 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1250 	if (opt)
1251 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1252 						    opt->opt_flen;
1253 
1254 	tcp_ca_openreq_child(newsk, dst);
1255 
1256 	tcp_sync_mss(newsk, dst_mtu(dst));
1257 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1258 
1259 	tcp_initialize_rcv_mss(newsk);
1260 
1261 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1262 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1263 
1264 #ifdef CONFIG_TCP_MD5SIG
1265 	/* Copy over the MD5 key from the original socket */
1266 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1267 	if (key) {
1268 		/* We're using one, so create a matching key
1269 		 * on the newsk structure. If we fail to get
1270 		 * memory, then we end up not copying the key
1271 		 * across. Shucks.
1272 		 */
1273 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1274 			       AF_INET6, 128, key->key, key->keylen,
1275 			       sk_gfp_mask(sk, GFP_ATOMIC));
1276 	}
1277 #endif
1278 
1279 	if (__inet_inherit_port(sk, newsk) < 0) {
1280 		inet_csk_prepare_forced_close(newsk);
1281 		tcp_done(newsk);
1282 		goto out;
1283 	}
1284 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1285 	if (*own_req) {
1286 		tcp_move_syn(newtp, req);
1287 
1288 		/* Clone pktoptions received with SYN, if we own the req */
1289 		if (ireq->pktopts) {
1290 			newnp->pktoptions = skb_clone(ireq->pktopts,
1291 						      sk_gfp_mask(sk, GFP_ATOMIC));
1292 			consume_skb(ireq->pktopts);
1293 			ireq->pktopts = NULL;
1294 			if (newnp->pktoptions) {
1295 				tcp_v6_restore_cb(newnp->pktoptions);
1296 				skb_set_owner_r(newnp->pktoptions, newsk);
1297 			}
1298 		}
1299 	}
1300 
1301 	return newsk;
1302 
1303 out_overflow:
1304 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1305 out_nonewsk:
1306 	dst_release(dst);
1307 out:
1308 	tcp_listendrop(sk);
1309 	return NULL;
1310 }
1311 
1312 /* The socket must have it's spinlock held when we get
1313  * here, unless it is a TCP_LISTEN socket.
1314  *
1315  * We have a potential double-lock case here, so even when
1316  * doing backlog processing we use the BH locking scheme.
1317  * This is because we cannot sleep with the original spinlock
1318  * held.
1319  */
1320 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1321 {
1322 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1323 	struct sk_buff *opt_skb = NULL;
1324 	struct tcp_sock *tp;
1325 
1326 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1327 	   goes to IPv4 receive handler and backlogged.
1328 	   From backlog it always goes here. Kerboom...
1329 	   Fortunately, tcp_rcv_established and rcv_established
1330 	   handle them correctly, but it is not case with
1331 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1332 	 */
1333 
1334 	if (skb->protocol == htons(ETH_P_IP))
1335 		return tcp_v4_do_rcv(sk, skb);
1336 
1337 	/*
1338 	 *	socket locking is here for SMP purposes as backlog rcv
1339 	 *	is currently called with bh processing disabled.
1340 	 */
1341 
1342 	/* Do Stevens' IPV6_PKTOPTIONS.
1343 
1344 	   Yes, guys, it is the only place in our code, where we
1345 	   may make it not affecting IPv4.
1346 	   The rest of code is protocol independent,
1347 	   and I do not like idea to uglify IPv4.
1348 
1349 	   Actually, all the idea behind IPV6_PKTOPTIONS
1350 	   looks not very well thought. For now we latch
1351 	   options, received in the last packet, enqueued
1352 	   by tcp. Feel free to propose better solution.
1353 					       --ANK (980728)
1354 	 */
1355 	if (np->rxopt.all)
1356 		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1357 
1358 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1359 		struct dst_entry *dst = sk->sk_rx_dst;
1360 
1361 		sock_rps_save_rxhash(sk, skb);
1362 		sk_mark_napi_id(sk, skb);
1363 		if (dst) {
1364 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1365 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1366 				dst_release(dst);
1367 				sk->sk_rx_dst = NULL;
1368 			}
1369 		}
1370 
1371 		tcp_rcv_established(sk, skb);
1372 		if (opt_skb)
1373 			goto ipv6_pktoptions;
1374 		return 0;
1375 	}
1376 
1377 	if (tcp_checksum_complete(skb))
1378 		goto csum_err;
1379 
1380 	if (sk->sk_state == TCP_LISTEN) {
1381 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1382 
1383 		if (!nsk)
1384 			goto discard;
1385 
1386 		if (nsk != sk) {
1387 			if (tcp_child_process(sk, nsk, skb))
1388 				goto reset;
1389 			if (opt_skb)
1390 				__kfree_skb(opt_skb);
1391 			return 0;
1392 		}
1393 	} else
1394 		sock_rps_save_rxhash(sk, skb);
1395 
1396 	if (tcp_rcv_state_process(sk, skb))
1397 		goto reset;
1398 	if (opt_skb)
1399 		goto ipv6_pktoptions;
1400 	return 0;
1401 
1402 reset:
1403 	tcp_v6_send_reset(sk, skb);
1404 discard:
1405 	if (opt_skb)
1406 		__kfree_skb(opt_skb);
1407 	kfree_skb(skb);
1408 	return 0;
1409 csum_err:
1410 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1411 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1412 	goto discard;
1413 
1414 
1415 ipv6_pktoptions:
1416 	/* Do you ask, what is it?
1417 
1418 	   1. skb was enqueued by tcp.
1419 	   2. skb is added to tail of read queue, rather than out of order.
1420 	   3. socket is not in passive state.
1421 	   4. Finally, it really contains options, which user wants to receive.
1422 	 */
1423 	tp = tcp_sk(sk);
1424 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1425 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1426 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1427 			np->mcast_oif = tcp_v6_iif(opt_skb);
1428 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1429 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1430 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1431 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1432 		if (np->repflow)
1433 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1434 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1435 			skb_set_owner_r(opt_skb, sk);
1436 			tcp_v6_restore_cb(opt_skb);
1437 			opt_skb = xchg(&np->pktoptions, opt_skb);
1438 		} else {
1439 			__kfree_skb(opt_skb);
1440 			opt_skb = xchg(&np->pktoptions, NULL);
1441 		}
1442 	}
1443 
1444 	kfree_skb(opt_skb);
1445 	return 0;
1446 }
1447 
1448 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1449 			   const struct tcphdr *th)
1450 {
1451 	/* This is tricky: we move IP6CB at its correct location into
1452 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1453 	 * _decode_session6() uses IP6CB().
1454 	 * barrier() makes sure compiler won't play aliasing games.
1455 	 */
1456 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1457 		sizeof(struct inet6_skb_parm));
1458 	barrier();
1459 
1460 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1461 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1462 				    skb->len - th->doff*4);
1463 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1464 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1465 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1466 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1467 	TCP_SKB_CB(skb)->sacked = 0;
1468 	TCP_SKB_CB(skb)->has_rxtstamp =
1469 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1470 }
1471 
1472 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1473 {
1474 	struct sk_buff *skb_to_free;
1475 	int sdif = inet6_sdif(skb);
1476 	const struct tcphdr *th;
1477 	const struct ipv6hdr *hdr;
1478 	bool refcounted;
1479 	struct sock *sk;
1480 	int ret;
1481 	struct net *net = dev_net(skb->dev);
1482 
1483 	if (skb->pkt_type != PACKET_HOST)
1484 		goto discard_it;
1485 
1486 	/*
1487 	 *	Count it even if it's bad.
1488 	 */
1489 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1490 
1491 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1492 		goto discard_it;
1493 
1494 	th = (const struct tcphdr *)skb->data;
1495 
1496 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1497 		goto bad_packet;
1498 	if (!pskb_may_pull(skb, th->doff*4))
1499 		goto discard_it;
1500 
1501 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1502 		goto csum_error;
1503 
1504 	th = (const struct tcphdr *)skb->data;
1505 	hdr = ipv6_hdr(skb);
1506 
1507 lookup:
1508 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1509 				th->source, th->dest, inet6_iif(skb), sdif,
1510 				&refcounted);
1511 	if (!sk)
1512 		goto no_tcp_socket;
1513 
1514 process:
1515 	if (sk->sk_state == TCP_TIME_WAIT)
1516 		goto do_time_wait;
1517 
1518 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1519 		struct request_sock *req = inet_reqsk(sk);
1520 		bool req_stolen = false;
1521 		struct sock *nsk;
1522 
1523 		sk = req->rsk_listener;
1524 		if (tcp_v6_inbound_md5_hash(sk, skb)) {
1525 			sk_drops_add(sk, skb);
1526 			reqsk_put(req);
1527 			goto discard_it;
1528 		}
1529 		if (tcp_checksum_complete(skb)) {
1530 			reqsk_put(req);
1531 			goto csum_error;
1532 		}
1533 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1534 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1535 			goto lookup;
1536 		}
1537 		sock_hold(sk);
1538 		refcounted = true;
1539 		nsk = NULL;
1540 		if (!tcp_filter(sk, skb)) {
1541 			th = (const struct tcphdr *)skb->data;
1542 			hdr = ipv6_hdr(skb);
1543 			tcp_v6_fill_cb(skb, hdr, th);
1544 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1545 		}
1546 		if (!nsk) {
1547 			reqsk_put(req);
1548 			if (req_stolen) {
1549 				/* Another cpu got exclusive access to req
1550 				 * and created a full blown socket.
1551 				 * Try to feed this packet to this socket
1552 				 * instead of discarding it.
1553 				 */
1554 				tcp_v6_restore_cb(skb);
1555 				sock_put(sk);
1556 				goto lookup;
1557 			}
1558 			goto discard_and_relse;
1559 		}
1560 		if (nsk == sk) {
1561 			reqsk_put(req);
1562 			tcp_v6_restore_cb(skb);
1563 		} else if (tcp_child_process(sk, nsk, skb)) {
1564 			tcp_v6_send_reset(nsk, skb);
1565 			goto discard_and_relse;
1566 		} else {
1567 			sock_put(sk);
1568 			return 0;
1569 		}
1570 	}
1571 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1572 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1573 		goto discard_and_relse;
1574 	}
1575 
1576 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1577 		goto discard_and_relse;
1578 
1579 	if (tcp_v6_inbound_md5_hash(sk, skb))
1580 		goto discard_and_relse;
1581 
1582 	if (tcp_filter(sk, skb))
1583 		goto discard_and_relse;
1584 	th = (const struct tcphdr *)skb->data;
1585 	hdr = ipv6_hdr(skb);
1586 	tcp_v6_fill_cb(skb, hdr, th);
1587 
1588 	skb->dev = NULL;
1589 
1590 	if (sk->sk_state == TCP_LISTEN) {
1591 		ret = tcp_v6_do_rcv(sk, skb);
1592 		goto put_and_return;
1593 	}
1594 
1595 	sk_incoming_cpu_update(sk);
1596 
1597 	bh_lock_sock_nested(sk);
1598 	tcp_segs_in(tcp_sk(sk), skb);
1599 	ret = 0;
1600 	if (!sock_owned_by_user(sk)) {
1601 		skb_to_free = sk->sk_rx_skb_cache;
1602 		sk->sk_rx_skb_cache = NULL;
1603 		ret = tcp_v6_do_rcv(sk, skb);
1604 	} else {
1605 		if (tcp_add_backlog(sk, skb))
1606 			goto discard_and_relse;
1607 		skb_to_free = NULL;
1608 	}
1609 	bh_unlock_sock(sk);
1610 	if (skb_to_free)
1611 		__kfree_skb(skb_to_free);
1612 put_and_return:
1613 	if (refcounted)
1614 		sock_put(sk);
1615 	return ret ? -1 : 0;
1616 
1617 no_tcp_socket:
1618 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1619 		goto discard_it;
1620 
1621 	tcp_v6_fill_cb(skb, hdr, th);
1622 
1623 	if (tcp_checksum_complete(skb)) {
1624 csum_error:
1625 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1626 bad_packet:
1627 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1628 	} else {
1629 		tcp_v6_send_reset(NULL, skb);
1630 	}
1631 
1632 discard_it:
1633 	kfree_skb(skb);
1634 	return 0;
1635 
1636 discard_and_relse:
1637 	sk_drops_add(sk, skb);
1638 	if (refcounted)
1639 		sock_put(sk);
1640 	goto discard_it;
1641 
1642 do_time_wait:
1643 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1644 		inet_twsk_put(inet_twsk(sk));
1645 		goto discard_it;
1646 	}
1647 
1648 	tcp_v6_fill_cb(skb, hdr, th);
1649 
1650 	if (tcp_checksum_complete(skb)) {
1651 		inet_twsk_put(inet_twsk(sk));
1652 		goto csum_error;
1653 	}
1654 
1655 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1656 	case TCP_TW_SYN:
1657 	{
1658 		struct sock *sk2;
1659 
1660 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1661 					    skb, __tcp_hdrlen(th),
1662 					    &ipv6_hdr(skb)->saddr, th->source,
1663 					    &ipv6_hdr(skb)->daddr,
1664 					    ntohs(th->dest),
1665 					    tcp_v6_iif_l3_slave(skb),
1666 					    sdif);
1667 		if (sk2) {
1668 			struct inet_timewait_sock *tw = inet_twsk(sk);
1669 			inet_twsk_deschedule_put(tw);
1670 			sk = sk2;
1671 			tcp_v6_restore_cb(skb);
1672 			refcounted = false;
1673 			goto process;
1674 		}
1675 	}
1676 		/* to ACK */
1677 		/* fall through */
1678 	case TCP_TW_ACK:
1679 		tcp_v6_timewait_ack(sk, skb);
1680 		break;
1681 	case TCP_TW_RST:
1682 		tcp_v6_send_reset(sk, skb);
1683 		inet_twsk_deschedule_put(inet_twsk(sk));
1684 		goto discard_it;
1685 	case TCP_TW_SUCCESS:
1686 		;
1687 	}
1688 	goto discard_it;
1689 }
1690 
1691 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1692 {
1693 	const struct ipv6hdr *hdr;
1694 	const struct tcphdr *th;
1695 	struct sock *sk;
1696 
1697 	if (skb->pkt_type != PACKET_HOST)
1698 		return;
1699 
1700 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1701 		return;
1702 
1703 	hdr = ipv6_hdr(skb);
1704 	th = tcp_hdr(skb);
1705 
1706 	if (th->doff < sizeof(struct tcphdr) / 4)
1707 		return;
1708 
1709 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1710 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1711 					&hdr->saddr, th->source,
1712 					&hdr->daddr, ntohs(th->dest),
1713 					inet6_iif(skb), inet6_sdif(skb));
1714 	if (sk) {
1715 		skb->sk = sk;
1716 		skb->destructor = sock_edemux;
1717 		if (sk_fullsock(sk)) {
1718 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1719 
1720 			if (dst)
1721 				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1722 			if (dst &&
1723 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1724 				skb_dst_set_noref(skb, dst);
1725 		}
1726 	}
1727 }
1728 
1729 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1730 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1731 	.twsk_unique	= tcp_twsk_unique,
1732 	.twsk_destructor = tcp_twsk_destructor,
1733 };
1734 
1735 static const struct inet_connection_sock_af_ops ipv6_specific = {
1736 	.queue_xmit	   = inet6_csk_xmit,
1737 	.send_check	   = tcp_v6_send_check,
1738 	.rebuild_header	   = inet6_sk_rebuild_header,
1739 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1740 	.conn_request	   = tcp_v6_conn_request,
1741 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1742 	.net_header_len	   = sizeof(struct ipv6hdr),
1743 	.net_frag_header_len = sizeof(struct frag_hdr),
1744 	.setsockopt	   = ipv6_setsockopt,
1745 	.getsockopt	   = ipv6_getsockopt,
1746 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1747 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1748 #ifdef CONFIG_COMPAT
1749 	.compat_setsockopt = compat_ipv6_setsockopt,
1750 	.compat_getsockopt = compat_ipv6_getsockopt,
1751 #endif
1752 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1753 };
1754 
1755 #ifdef CONFIG_TCP_MD5SIG
1756 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1757 	.md5_lookup	=	tcp_v6_md5_lookup,
1758 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1759 	.md5_parse	=	tcp_v6_parse_md5_keys,
1760 };
1761 #endif
1762 
1763 /*
1764  *	TCP over IPv4 via INET6 API
1765  */
1766 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1767 	.queue_xmit	   = ip_queue_xmit,
1768 	.send_check	   = tcp_v4_send_check,
1769 	.rebuild_header	   = inet_sk_rebuild_header,
1770 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1771 	.conn_request	   = tcp_v6_conn_request,
1772 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1773 	.net_header_len	   = sizeof(struct iphdr),
1774 	.setsockopt	   = ipv6_setsockopt,
1775 	.getsockopt	   = ipv6_getsockopt,
1776 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1777 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1778 #ifdef CONFIG_COMPAT
1779 	.compat_setsockopt = compat_ipv6_setsockopt,
1780 	.compat_getsockopt = compat_ipv6_getsockopt,
1781 #endif
1782 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1783 };
1784 
1785 #ifdef CONFIG_TCP_MD5SIG
1786 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1787 	.md5_lookup	=	tcp_v4_md5_lookup,
1788 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1789 	.md5_parse	=	tcp_v6_parse_md5_keys,
1790 };
1791 #endif
1792 
1793 /* NOTE: A lot of things set to zero explicitly by call to
1794  *       sk_alloc() so need not be done here.
1795  */
1796 static int tcp_v6_init_sock(struct sock *sk)
1797 {
1798 	struct inet_connection_sock *icsk = inet_csk(sk);
1799 
1800 	tcp_init_sock(sk);
1801 
1802 	icsk->icsk_af_ops = &ipv6_specific;
1803 
1804 #ifdef CONFIG_TCP_MD5SIG
1805 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1806 #endif
1807 
1808 	return 0;
1809 }
1810 
1811 static void tcp_v6_destroy_sock(struct sock *sk)
1812 {
1813 	tcp_v4_destroy_sock(sk);
1814 	inet6_destroy_sock(sk);
1815 }
1816 
1817 #ifdef CONFIG_PROC_FS
1818 /* Proc filesystem TCPv6 sock list dumping. */
1819 static void get_openreq6(struct seq_file *seq,
1820 			 const struct request_sock *req, int i)
1821 {
1822 	long ttd = req->rsk_timer.expires - jiffies;
1823 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1824 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1825 
1826 	if (ttd < 0)
1827 		ttd = 0;
1828 
1829 	seq_printf(seq,
1830 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1831 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1832 		   i,
1833 		   src->s6_addr32[0], src->s6_addr32[1],
1834 		   src->s6_addr32[2], src->s6_addr32[3],
1835 		   inet_rsk(req)->ir_num,
1836 		   dest->s6_addr32[0], dest->s6_addr32[1],
1837 		   dest->s6_addr32[2], dest->s6_addr32[3],
1838 		   ntohs(inet_rsk(req)->ir_rmt_port),
1839 		   TCP_SYN_RECV,
1840 		   0, 0, /* could print option size, but that is af dependent. */
1841 		   1,   /* timers active (only the expire timer) */
1842 		   jiffies_to_clock_t(ttd),
1843 		   req->num_timeout,
1844 		   from_kuid_munged(seq_user_ns(seq),
1845 				    sock_i_uid(req->rsk_listener)),
1846 		   0,  /* non standard timer */
1847 		   0, /* open_requests have no inode */
1848 		   0, req);
1849 }
1850 
1851 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1852 {
1853 	const struct in6_addr *dest, *src;
1854 	__u16 destp, srcp;
1855 	int timer_active;
1856 	unsigned long timer_expires;
1857 	const struct inet_sock *inet = inet_sk(sp);
1858 	const struct tcp_sock *tp = tcp_sk(sp);
1859 	const struct inet_connection_sock *icsk = inet_csk(sp);
1860 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1861 	int rx_queue;
1862 	int state;
1863 
1864 	dest  = &sp->sk_v6_daddr;
1865 	src   = &sp->sk_v6_rcv_saddr;
1866 	destp = ntohs(inet->inet_dport);
1867 	srcp  = ntohs(inet->inet_sport);
1868 
1869 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1870 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1871 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1872 		timer_active	= 1;
1873 		timer_expires	= icsk->icsk_timeout;
1874 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1875 		timer_active	= 4;
1876 		timer_expires	= icsk->icsk_timeout;
1877 	} else if (timer_pending(&sp->sk_timer)) {
1878 		timer_active	= 2;
1879 		timer_expires	= sp->sk_timer.expires;
1880 	} else {
1881 		timer_active	= 0;
1882 		timer_expires = jiffies;
1883 	}
1884 
1885 	state = inet_sk_state_load(sp);
1886 	if (state == TCP_LISTEN)
1887 		rx_queue = sp->sk_ack_backlog;
1888 	else
1889 		/* Because we don't lock the socket,
1890 		 * we might find a transient negative value.
1891 		 */
1892 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1893 
1894 	seq_printf(seq,
1895 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1896 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1897 		   i,
1898 		   src->s6_addr32[0], src->s6_addr32[1],
1899 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1900 		   dest->s6_addr32[0], dest->s6_addr32[1],
1901 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1902 		   state,
1903 		   tp->write_seq - tp->snd_una,
1904 		   rx_queue,
1905 		   timer_active,
1906 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
1907 		   icsk->icsk_retransmits,
1908 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1909 		   icsk->icsk_probes_out,
1910 		   sock_i_ino(sp),
1911 		   refcount_read(&sp->sk_refcnt), sp,
1912 		   jiffies_to_clock_t(icsk->icsk_rto),
1913 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
1914 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1915 		   tp->snd_cwnd,
1916 		   state == TCP_LISTEN ?
1917 			fastopenq->max_qlen :
1918 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1919 		   );
1920 }
1921 
1922 static void get_timewait6_sock(struct seq_file *seq,
1923 			       struct inet_timewait_sock *tw, int i)
1924 {
1925 	long delta = tw->tw_timer.expires - jiffies;
1926 	const struct in6_addr *dest, *src;
1927 	__u16 destp, srcp;
1928 
1929 	dest = &tw->tw_v6_daddr;
1930 	src  = &tw->tw_v6_rcv_saddr;
1931 	destp = ntohs(tw->tw_dport);
1932 	srcp  = ntohs(tw->tw_sport);
1933 
1934 	seq_printf(seq,
1935 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1936 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1937 		   i,
1938 		   src->s6_addr32[0], src->s6_addr32[1],
1939 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1940 		   dest->s6_addr32[0], dest->s6_addr32[1],
1941 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1942 		   tw->tw_substate, 0, 0,
1943 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1944 		   refcount_read(&tw->tw_refcnt), tw);
1945 }
1946 
1947 static int tcp6_seq_show(struct seq_file *seq, void *v)
1948 {
1949 	struct tcp_iter_state *st;
1950 	struct sock *sk = v;
1951 
1952 	if (v == SEQ_START_TOKEN) {
1953 		seq_puts(seq,
1954 			 "  sl  "
1955 			 "local_address                         "
1956 			 "remote_address                        "
1957 			 "st tx_queue rx_queue tr tm->when retrnsmt"
1958 			 "   uid  timeout inode\n");
1959 		goto out;
1960 	}
1961 	st = seq->private;
1962 
1963 	if (sk->sk_state == TCP_TIME_WAIT)
1964 		get_timewait6_sock(seq, v, st->num);
1965 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
1966 		get_openreq6(seq, v, st->num);
1967 	else
1968 		get_tcp6_sock(seq, v, st->num);
1969 out:
1970 	return 0;
1971 }
1972 
1973 static const struct seq_operations tcp6_seq_ops = {
1974 	.show		= tcp6_seq_show,
1975 	.start		= tcp_seq_start,
1976 	.next		= tcp_seq_next,
1977 	.stop		= tcp_seq_stop,
1978 };
1979 
1980 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1981 	.family		= AF_INET6,
1982 };
1983 
1984 int __net_init tcp6_proc_init(struct net *net)
1985 {
1986 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
1987 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
1988 		return -ENOMEM;
1989 	return 0;
1990 }
1991 
1992 void tcp6_proc_exit(struct net *net)
1993 {
1994 	remove_proc_entry("tcp6", net->proc_net);
1995 }
1996 #endif
1997 
1998 struct proto tcpv6_prot = {
1999 	.name			= "TCPv6",
2000 	.owner			= THIS_MODULE,
2001 	.close			= tcp_close,
2002 	.pre_connect		= tcp_v6_pre_connect,
2003 	.connect		= tcp_v6_connect,
2004 	.disconnect		= tcp_disconnect,
2005 	.accept			= inet_csk_accept,
2006 	.ioctl			= tcp_ioctl,
2007 	.init			= tcp_v6_init_sock,
2008 	.destroy		= tcp_v6_destroy_sock,
2009 	.shutdown		= tcp_shutdown,
2010 	.setsockopt		= tcp_setsockopt,
2011 	.getsockopt		= tcp_getsockopt,
2012 	.keepalive		= tcp_set_keepalive,
2013 	.recvmsg		= tcp_recvmsg,
2014 	.sendmsg		= tcp_sendmsg,
2015 	.sendpage		= tcp_sendpage,
2016 	.backlog_rcv		= tcp_v6_do_rcv,
2017 	.release_cb		= tcp_release_cb,
2018 	.hash			= inet6_hash,
2019 	.unhash			= inet_unhash,
2020 	.get_port		= inet_csk_get_port,
2021 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2022 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2023 	.stream_memory_free	= tcp_stream_memory_free,
2024 	.sockets_allocated	= &tcp_sockets_allocated,
2025 	.memory_allocated	= &tcp_memory_allocated,
2026 	.memory_pressure	= &tcp_memory_pressure,
2027 	.orphan_count		= &tcp_orphan_count,
2028 	.sysctl_mem		= sysctl_tcp_mem,
2029 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2030 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2031 	.max_header		= MAX_TCP_HEADER,
2032 	.obj_size		= sizeof(struct tcp6_sock),
2033 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2034 	.twsk_prot		= &tcp6_timewait_sock_ops,
2035 	.rsk_prot		= &tcp6_request_sock_ops,
2036 	.h.hashinfo		= &tcp_hashinfo,
2037 	.no_autobind		= true,
2038 #ifdef CONFIG_COMPAT
2039 	.compat_setsockopt	= compat_tcp_setsockopt,
2040 	.compat_getsockopt	= compat_tcp_getsockopt,
2041 #endif
2042 	.diag_destroy		= tcp_abort,
2043 };
2044 
2045 /* thinking of making this const? Don't.
2046  * early_demux can change based on sysctl.
2047  */
2048 static struct inet6_protocol tcpv6_protocol = {
2049 	.early_demux	=	tcp_v6_early_demux,
2050 	.early_demux_handler =  tcp_v6_early_demux,
2051 	.handler	=	tcp_v6_rcv,
2052 	.err_handler	=	tcp_v6_err,
2053 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2054 };
2055 
2056 static struct inet_protosw tcpv6_protosw = {
2057 	.type		=	SOCK_STREAM,
2058 	.protocol	=	IPPROTO_TCP,
2059 	.prot		=	&tcpv6_prot,
2060 	.ops		=	&inet6_stream_ops,
2061 	.flags		=	INET_PROTOSW_PERMANENT |
2062 				INET_PROTOSW_ICSK,
2063 };
2064 
2065 static int __net_init tcpv6_net_init(struct net *net)
2066 {
2067 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2068 				    SOCK_RAW, IPPROTO_TCP, net);
2069 }
2070 
2071 static void __net_exit tcpv6_net_exit(struct net *net)
2072 {
2073 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2074 }
2075 
2076 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2077 {
2078 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2079 }
2080 
2081 static struct pernet_operations tcpv6_net_ops = {
2082 	.init	    = tcpv6_net_init,
2083 	.exit	    = tcpv6_net_exit,
2084 	.exit_batch = tcpv6_net_exit_batch,
2085 };
2086 
2087 int __init tcpv6_init(void)
2088 {
2089 	int ret;
2090 
2091 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2092 	if (ret)
2093 		goto out;
2094 
2095 	/* register inet6 protocol */
2096 	ret = inet6_register_protosw(&tcpv6_protosw);
2097 	if (ret)
2098 		goto out_tcpv6_protocol;
2099 
2100 	ret = register_pernet_subsys(&tcpv6_net_ops);
2101 	if (ret)
2102 		goto out_tcpv6_protosw;
2103 out:
2104 	return ret;
2105 
2106 out_tcpv6_protosw:
2107 	inet6_unregister_protosw(&tcpv6_protosw);
2108 out_tcpv6_protocol:
2109 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2110 	goto out;
2111 }
2112 
2113 void tcpv6_exit(void)
2114 {
2115 	unregister_pernet_subsys(&tcpv6_net_ops);
2116 	inet6_unregister_protosw(&tcpv6_protosw);
2117 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2118 }
2119