xref: /openbmc/linux/net/ipv6/tcp_ipv6.c (revision f20c7d91)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 
100 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102 
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 	struct dst_entry *dst = skb_dst(skb);
106 
107 	if (dst && dst_hold_safe(dst)) {
108 		const struct rt6_info *rt = (const struct rt6_info *)dst;
109 
110 		sk->sk_rx_dst = dst;
111 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 	}
114 }
115 
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 				ipv6_hdr(skb)->saddr.s6_addr32,
120 				tcp_hdr(skb)->dest,
121 				tcp_hdr(skb)->source);
122 }
123 
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 				   ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129 
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 			      int addr_len)
132 {
133 	/* This check is replicated from tcp_v6_connect() and intended to
134 	 * prevent BPF program called below from accessing bytes that are out
135 	 * of the bound specified by user in addr_len.
136 	 */
137 	if (addr_len < SIN6_LEN_RFC2133)
138 		return -EINVAL;
139 
140 	sock_owned_by_me(sk);
141 
142 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144 
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 			  int addr_len)
147 {
148 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct inet_connection_sock *icsk = inet_csk(sk);
151 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 	struct tcp_sock *tp = tcp_sk(sk);
153 	struct in6_addr *saddr = NULL, *final_p, final;
154 	struct ipv6_txoptions *opt;
155 	struct flowi6 fl6;
156 	struct dst_entry *dst;
157 	int addr_type;
158 	int err;
159 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 
161 	if (addr_len < SIN6_LEN_RFC2133)
162 		return -EINVAL;
163 
164 	if (usin->sin6_family != AF_INET6)
165 		return -EAFNOSUPPORT;
166 
167 	memset(&fl6, 0, sizeof(fl6));
168 
169 	if (np->sndflow) {
170 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 		IP6_ECN_flow_init(fl6.flowlabel);
172 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 			struct ip6_flowlabel *flowlabel;
174 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 			if (IS_ERR(flowlabel))
176 				return -EINVAL;
177 			fl6_sock_release(flowlabel);
178 		}
179 	}
180 
181 	/*
182 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
183 	 */
184 
185 	if (ipv6_addr_any(&usin->sin6_addr)) {
186 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 					       &usin->sin6_addr);
189 		else
190 			usin->sin6_addr = in6addr_loopback;
191 	}
192 
193 	addr_type = ipv6_addr_type(&usin->sin6_addr);
194 
195 	if (addr_type & IPV6_ADDR_MULTICAST)
196 		return -ENETUNREACH;
197 
198 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 		if (addr_len >= sizeof(struct sockaddr_in6) &&
200 		    usin->sin6_scope_id) {
201 			/* If interface is set while binding, indices
202 			 * must coincide.
203 			 */
204 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 				return -EINVAL;
206 
207 			sk->sk_bound_dev_if = usin->sin6_scope_id;
208 		}
209 
210 		/* Connect to link-local address requires an interface */
211 		if (!sk->sk_bound_dev_if)
212 			return -EINVAL;
213 	}
214 
215 	if (tp->rx_opt.ts_recent_stamp &&
216 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 		tp->rx_opt.ts_recent = 0;
218 		tp->rx_opt.ts_recent_stamp = 0;
219 		WRITE_ONCE(tp->write_seq, 0);
220 	}
221 
222 	sk->sk_v6_daddr = usin->sin6_addr;
223 	np->flow_label = fl6.flowlabel;
224 
225 	/*
226 	 *	TCP over IPv4
227 	 */
228 
229 	if (addr_type & IPV6_ADDR_MAPPED) {
230 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 		struct sockaddr_in sin;
232 
233 		if (__ipv6_only_sock(sk))
234 			return -ENETUNREACH;
235 
236 		sin.sin_family = AF_INET;
237 		sin.sin_port = usin->sin6_port;
238 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 
240 		icsk->icsk_af_ops = &ipv6_mapped;
241 		if (sk_is_mptcp(sk))
242 			mptcpv6_handle_mapped(sk, true);
243 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247 
248 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249 
250 		if (err) {
251 			icsk->icsk_ext_hdr_len = exthdrlen;
252 			icsk->icsk_af_ops = &ipv6_specific;
253 			if (sk_is_mptcp(sk))
254 				mptcpv6_handle_mapped(sk, false);
255 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 			tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259 			goto failure;
260 		}
261 		np->saddr = sk->sk_v6_rcv_saddr;
262 
263 		return err;
264 	}
265 
266 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 		saddr = &sk->sk_v6_rcv_saddr;
268 
269 	fl6.flowi6_proto = IPPROTO_TCP;
270 	fl6.daddr = sk->sk_v6_daddr;
271 	fl6.saddr = saddr ? *saddr : np->saddr;
272 	fl6.flowi6_oif = sk->sk_bound_dev_if;
273 	fl6.flowi6_mark = sk->sk_mark;
274 	fl6.fl6_dport = usin->sin6_port;
275 	fl6.fl6_sport = inet->inet_sport;
276 	fl6.flowi6_uid = sk->sk_uid;
277 
278 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 	final_p = fl6_update_dst(&fl6, opt, &final);
280 
281 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
282 
283 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284 	if (IS_ERR(dst)) {
285 		err = PTR_ERR(dst);
286 		goto failure;
287 	}
288 
289 	if (!saddr) {
290 		saddr = &fl6.saddr;
291 		sk->sk_v6_rcv_saddr = *saddr;
292 	}
293 
294 	/* set the source address */
295 	np->saddr = *saddr;
296 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297 
298 	sk->sk_gso_type = SKB_GSO_TCPV6;
299 	ip6_dst_store(sk, dst, NULL, NULL);
300 
301 	icsk->icsk_ext_hdr_len = 0;
302 	if (opt)
303 		icsk->icsk_ext_hdr_len = opt->opt_flen +
304 					 opt->opt_nflen;
305 
306 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307 
308 	inet->inet_dport = usin->sin6_port;
309 
310 	tcp_set_state(sk, TCP_SYN_SENT);
311 	err = inet6_hash_connect(tcp_death_row, sk);
312 	if (err)
313 		goto late_failure;
314 
315 	sk_set_txhash(sk);
316 
317 	if (likely(!tp->repair)) {
318 		if (!tp->write_seq)
319 			WRITE_ONCE(tp->write_seq,
320 				   secure_tcpv6_seq(np->saddr.s6_addr32,
321 						    sk->sk_v6_daddr.s6_addr32,
322 						    inet->inet_sport,
323 						    inet->inet_dport));
324 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325 						   np->saddr.s6_addr32,
326 						   sk->sk_v6_daddr.s6_addr32);
327 	}
328 
329 	if (tcp_fastopen_defer_connect(sk, &err))
330 		return err;
331 	if (err)
332 		goto late_failure;
333 
334 	err = tcp_connect(sk);
335 	if (err)
336 		goto late_failure;
337 
338 	return 0;
339 
340 late_failure:
341 	tcp_set_state(sk, TCP_CLOSE);
342 failure:
343 	inet->inet_dport = 0;
344 	sk->sk_route_caps = 0;
345 	return err;
346 }
347 
348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350 	struct dst_entry *dst;
351 
352 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
353 		return;
354 
355 	dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
356 	if (!dst)
357 		return;
358 
359 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
360 		tcp_sync_mss(sk, dst_mtu(dst));
361 		tcp_simple_retransmit(sk);
362 	}
363 }
364 
365 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
366 		u8 type, u8 code, int offset, __be32 info)
367 {
368 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
369 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
370 	struct net *net = dev_net(skb->dev);
371 	struct request_sock *fastopen;
372 	struct ipv6_pinfo *np;
373 	struct tcp_sock *tp;
374 	__u32 seq, snd_una;
375 	struct sock *sk;
376 	bool fatal;
377 	int err;
378 
379 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
380 					&hdr->daddr, th->dest,
381 					&hdr->saddr, ntohs(th->source),
382 					skb->dev->ifindex, inet6_sdif(skb));
383 
384 	if (!sk) {
385 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
386 				  ICMP6_MIB_INERRORS);
387 		return -ENOENT;
388 	}
389 
390 	if (sk->sk_state == TCP_TIME_WAIT) {
391 		inet_twsk_put(inet_twsk(sk));
392 		return 0;
393 	}
394 	seq = ntohl(th->seq);
395 	fatal = icmpv6_err_convert(type, code, &err);
396 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
397 		tcp_req_err(sk, seq, fatal);
398 		return 0;
399 	}
400 
401 	bh_lock_sock(sk);
402 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
403 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
404 
405 	if (sk->sk_state == TCP_CLOSE)
406 		goto out;
407 
408 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
409 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
410 		goto out;
411 	}
412 
413 	tp = tcp_sk(sk);
414 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
415 	fastopen = rcu_dereference(tp->fastopen_rsk);
416 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
417 	if (sk->sk_state != TCP_LISTEN &&
418 	    !between(seq, snd_una, tp->snd_nxt)) {
419 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
420 		goto out;
421 	}
422 
423 	np = tcp_inet6_sk(sk);
424 
425 	if (type == NDISC_REDIRECT) {
426 		if (!sock_owned_by_user(sk)) {
427 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
428 
429 			if (dst)
430 				dst->ops->redirect(dst, sk, skb);
431 		}
432 		goto out;
433 	}
434 
435 	if (type == ICMPV6_PKT_TOOBIG) {
436 		/* We are not interested in TCP_LISTEN and open_requests
437 		 * (SYN-ACKs send out by Linux are always <576bytes so
438 		 * they should go through unfragmented).
439 		 */
440 		if (sk->sk_state == TCP_LISTEN)
441 			goto out;
442 
443 		if (!ip6_sk_accept_pmtu(sk))
444 			goto out;
445 
446 		tp->mtu_info = ntohl(info);
447 		if (!sock_owned_by_user(sk))
448 			tcp_v6_mtu_reduced(sk);
449 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
450 					   &sk->sk_tsq_flags))
451 			sock_hold(sk);
452 		goto out;
453 	}
454 
455 
456 	/* Might be for an request_sock */
457 	switch (sk->sk_state) {
458 	case TCP_SYN_SENT:
459 	case TCP_SYN_RECV:
460 		/* Only in fast or simultaneous open. If a fast open socket is
461 		 * is already accepted it is treated as a connected one below.
462 		 */
463 		if (fastopen && !fastopen->sk)
464 			break;
465 
466 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
467 
468 		if (!sock_owned_by_user(sk)) {
469 			sk->sk_err = err;
470 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
471 
472 			tcp_done(sk);
473 		} else
474 			sk->sk_err_soft = err;
475 		goto out;
476 	case TCP_LISTEN:
477 		break;
478 	default:
479 		/* check if this ICMP message allows revert of backoff.
480 		 * (see RFC 6069)
481 		 */
482 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
483 		    code == ICMPV6_NOROUTE)
484 			tcp_ld_RTO_revert(sk, seq);
485 	}
486 
487 	if (!sock_owned_by_user(sk) && np->recverr) {
488 		sk->sk_err = err;
489 		sk->sk_error_report(sk);
490 	} else
491 		sk->sk_err_soft = err;
492 
493 out:
494 	bh_unlock_sock(sk);
495 	sock_put(sk);
496 	return 0;
497 }
498 
499 
500 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
501 			      struct flowi *fl,
502 			      struct request_sock *req,
503 			      struct tcp_fastopen_cookie *foc,
504 			      enum tcp_synack_type synack_type)
505 {
506 	struct inet_request_sock *ireq = inet_rsk(req);
507 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
508 	struct ipv6_txoptions *opt;
509 	struct flowi6 *fl6 = &fl->u.ip6;
510 	struct sk_buff *skb;
511 	int err = -ENOMEM;
512 
513 	/* First, grab a route. */
514 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
515 					       IPPROTO_TCP)) == NULL)
516 		goto done;
517 
518 	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
519 
520 	if (skb) {
521 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
522 				    &ireq->ir_v6_rmt_addr);
523 
524 		fl6->daddr = ireq->ir_v6_rmt_addr;
525 		if (np->repflow && ireq->pktopts)
526 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
527 
528 		rcu_read_lock();
529 		opt = ireq->ipv6_opt;
530 		if (!opt)
531 			opt = rcu_dereference(np->opt);
532 		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
533 			       sk->sk_priority);
534 		rcu_read_unlock();
535 		err = net_xmit_eval(err);
536 	}
537 
538 done:
539 	return err;
540 }
541 
542 
543 static void tcp_v6_reqsk_destructor(struct request_sock *req)
544 {
545 	kfree(inet_rsk(req)->ipv6_opt);
546 	kfree_skb(inet_rsk(req)->pktopts);
547 }
548 
549 #ifdef CONFIG_TCP_MD5SIG
550 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
551 						   const struct in6_addr *addr,
552 						   int l3index)
553 {
554 	return tcp_md5_do_lookup(sk, l3index,
555 				 (union tcp_md5_addr *)addr, AF_INET6);
556 }
557 
558 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
559 						const struct sock *addr_sk)
560 {
561 	int l3index;
562 
563 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
564 						 addr_sk->sk_bound_dev_if);
565 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
566 				    l3index);
567 }
568 
569 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
570 				 char __user *optval, int optlen)
571 {
572 	struct tcp_md5sig cmd;
573 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
574 	int l3index = 0;
575 	u8 prefixlen;
576 
577 	if (optlen < sizeof(cmd))
578 		return -EINVAL;
579 
580 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
581 		return -EFAULT;
582 
583 	if (sin6->sin6_family != AF_INET6)
584 		return -EINVAL;
585 
586 	if (optname == TCP_MD5SIG_EXT &&
587 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
588 		prefixlen = cmd.tcpm_prefixlen;
589 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
590 					prefixlen > 32))
591 			return -EINVAL;
592 	} else {
593 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
594 	}
595 
596 	if (optname == TCP_MD5SIG_EXT &&
597 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
598 		struct net_device *dev;
599 
600 		rcu_read_lock();
601 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
602 		if (dev && netif_is_l3_master(dev))
603 			l3index = dev->ifindex;
604 		rcu_read_unlock();
605 
606 		/* ok to reference set/not set outside of rcu;
607 		 * right now device MUST be an L3 master
608 		 */
609 		if (!dev || !l3index)
610 			return -EINVAL;
611 	}
612 
613 	if (!cmd.tcpm_keylen) {
614 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
615 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
616 					      AF_INET, prefixlen,
617 					      l3index);
618 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
619 				      AF_INET6, prefixlen, l3index);
620 	}
621 
622 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
623 		return -EINVAL;
624 
625 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
626 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
627 				      AF_INET, prefixlen, l3index,
628 				      cmd.tcpm_key, cmd.tcpm_keylen,
629 				      GFP_KERNEL);
630 
631 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
632 			      AF_INET6, prefixlen, l3index,
633 			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
634 }
635 
636 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
637 				   const struct in6_addr *daddr,
638 				   const struct in6_addr *saddr,
639 				   const struct tcphdr *th, int nbytes)
640 {
641 	struct tcp6_pseudohdr *bp;
642 	struct scatterlist sg;
643 	struct tcphdr *_th;
644 
645 	bp = hp->scratch;
646 	/* 1. TCP pseudo-header (RFC2460) */
647 	bp->saddr = *saddr;
648 	bp->daddr = *daddr;
649 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
650 	bp->len = cpu_to_be32(nbytes);
651 
652 	_th = (struct tcphdr *)(bp + 1);
653 	memcpy(_th, th, sizeof(*th));
654 	_th->check = 0;
655 
656 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
657 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
658 				sizeof(*bp) + sizeof(*th));
659 	return crypto_ahash_update(hp->md5_req);
660 }
661 
662 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
663 			       const struct in6_addr *daddr, struct in6_addr *saddr,
664 			       const struct tcphdr *th)
665 {
666 	struct tcp_md5sig_pool *hp;
667 	struct ahash_request *req;
668 
669 	hp = tcp_get_md5sig_pool();
670 	if (!hp)
671 		goto clear_hash_noput;
672 	req = hp->md5_req;
673 
674 	if (crypto_ahash_init(req))
675 		goto clear_hash;
676 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
677 		goto clear_hash;
678 	if (tcp_md5_hash_key(hp, key))
679 		goto clear_hash;
680 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
681 	if (crypto_ahash_final(req))
682 		goto clear_hash;
683 
684 	tcp_put_md5sig_pool();
685 	return 0;
686 
687 clear_hash:
688 	tcp_put_md5sig_pool();
689 clear_hash_noput:
690 	memset(md5_hash, 0, 16);
691 	return 1;
692 }
693 
694 static int tcp_v6_md5_hash_skb(char *md5_hash,
695 			       const struct tcp_md5sig_key *key,
696 			       const struct sock *sk,
697 			       const struct sk_buff *skb)
698 {
699 	const struct in6_addr *saddr, *daddr;
700 	struct tcp_md5sig_pool *hp;
701 	struct ahash_request *req;
702 	const struct tcphdr *th = tcp_hdr(skb);
703 
704 	if (sk) { /* valid for establish/request sockets */
705 		saddr = &sk->sk_v6_rcv_saddr;
706 		daddr = &sk->sk_v6_daddr;
707 	} else {
708 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
709 		saddr = &ip6h->saddr;
710 		daddr = &ip6h->daddr;
711 	}
712 
713 	hp = tcp_get_md5sig_pool();
714 	if (!hp)
715 		goto clear_hash_noput;
716 	req = hp->md5_req;
717 
718 	if (crypto_ahash_init(req))
719 		goto clear_hash;
720 
721 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
722 		goto clear_hash;
723 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
724 		goto clear_hash;
725 	if (tcp_md5_hash_key(hp, key))
726 		goto clear_hash;
727 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
728 	if (crypto_ahash_final(req))
729 		goto clear_hash;
730 
731 	tcp_put_md5sig_pool();
732 	return 0;
733 
734 clear_hash:
735 	tcp_put_md5sig_pool();
736 clear_hash_noput:
737 	memset(md5_hash, 0, 16);
738 	return 1;
739 }
740 
741 #endif
742 
743 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
744 				    const struct sk_buff *skb,
745 				    int dif, int sdif)
746 {
747 #ifdef CONFIG_TCP_MD5SIG
748 	const __u8 *hash_location = NULL;
749 	struct tcp_md5sig_key *hash_expected;
750 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
751 	const struct tcphdr *th = tcp_hdr(skb);
752 	int genhash, l3index;
753 	u8 newhash[16];
754 
755 	/* sdif set, means packet ingressed via a device
756 	 * in an L3 domain and dif is set to the l3mdev
757 	 */
758 	l3index = sdif ? dif : 0;
759 
760 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
761 	hash_location = tcp_parse_md5sig_option(th);
762 
763 	/* We've parsed the options - do we have a hash? */
764 	if (!hash_expected && !hash_location)
765 		return false;
766 
767 	if (hash_expected && !hash_location) {
768 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
769 		return true;
770 	}
771 
772 	if (!hash_expected && hash_location) {
773 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
774 		return true;
775 	}
776 
777 	/* check the signature */
778 	genhash = tcp_v6_md5_hash_skb(newhash,
779 				      hash_expected,
780 				      NULL, skb);
781 
782 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
783 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
784 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
785 				     genhash ? "failed" : "mismatch",
786 				     &ip6h->saddr, ntohs(th->source),
787 				     &ip6h->daddr, ntohs(th->dest), l3index);
788 		return true;
789 	}
790 #endif
791 	return false;
792 }
793 
794 static void tcp_v6_init_req(struct request_sock *req,
795 			    const struct sock *sk_listener,
796 			    struct sk_buff *skb)
797 {
798 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
799 	struct inet_request_sock *ireq = inet_rsk(req);
800 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
801 
802 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
803 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
804 
805 	/* So that link locals have meaning */
806 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
807 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
808 		ireq->ir_iif = tcp_v6_iif(skb);
809 
810 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
811 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
812 	     np->rxopt.bits.rxinfo ||
813 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
814 	     np->rxopt.bits.rxohlim || np->repflow)) {
815 		refcount_inc(&skb->users);
816 		ireq->pktopts = skb;
817 	}
818 }
819 
820 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
821 					  struct flowi *fl,
822 					  const struct request_sock *req)
823 {
824 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
825 }
826 
827 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
828 	.family		=	AF_INET6,
829 	.obj_size	=	sizeof(struct tcp6_request_sock),
830 	.rtx_syn_ack	=	tcp_rtx_synack,
831 	.send_ack	=	tcp_v6_reqsk_send_ack,
832 	.destructor	=	tcp_v6_reqsk_destructor,
833 	.send_reset	=	tcp_v6_send_reset,
834 	.syn_ack_timeout =	tcp_syn_ack_timeout,
835 };
836 
837 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
838 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
839 				sizeof(struct ipv6hdr),
840 #ifdef CONFIG_TCP_MD5SIG
841 	.req_md5_lookup	=	tcp_v6_md5_lookup,
842 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
843 #endif
844 	.init_req	=	tcp_v6_init_req,
845 #ifdef CONFIG_SYN_COOKIES
846 	.cookie_init_seq =	cookie_v6_init_sequence,
847 #endif
848 	.route_req	=	tcp_v6_route_req,
849 	.init_seq	=	tcp_v6_init_seq,
850 	.init_ts_off	=	tcp_v6_init_ts_off,
851 	.send_synack	=	tcp_v6_send_synack,
852 };
853 
854 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
855 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
856 				 int oif, struct tcp_md5sig_key *key, int rst,
857 				 u8 tclass, __be32 label, u32 priority)
858 {
859 	const struct tcphdr *th = tcp_hdr(skb);
860 	struct tcphdr *t1;
861 	struct sk_buff *buff;
862 	struct flowi6 fl6;
863 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
864 	struct sock *ctl_sk = net->ipv6.tcp_sk;
865 	unsigned int tot_len = sizeof(struct tcphdr);
866 	struct dst_entry *dst;
867 	__be32 *topt;
868 	__u32 mark = 0;
869 
870 	if (tsecr)
871 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
872 #ifdef CONFIG_TCP_MD5SIG
873 	if (key)
874 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
875 #endif
876 
877 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
878 			 GFP_ATOMIC);
879 	if (!buff)
880 		return;
881 
882 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
883 
884 	t1 = skb_push(buff, tot_len);
885 	skb_reset_transport_header(buff);
886 
887 	/* Swap the send and the receive. */
888 	memset(t1, 0, sizeof(*t1));
889 	t1->dest = th->source;
890 	t1->source = th->dest;
891 	t1->doff = tot_len / 4;
892 	t1->seq = htonl(seq);
893 	t1->ack_seq = htonl(ack);
894 	t1->ack = !rst || !th->ack;
895 	t1->rst = rst;
896 	t1->window = htons(win);
897 
898 	topt = (__be32 *)(t1 + 1);
899 
900 	if (tsecr) {
901 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
902 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
903 		*topt++ = htonl(tsval);
904 		*topt++ = htonl(tsecr);
905 	}
906 
907 #ifdef CONFIG_TCP_MD5SIG
908 	if (key) {
909 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
910 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
911 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
912 				    &ipv6_hdr(skb)->saddr,
913 				    &ipv6_hdr(skb)->daddr, t1);
914 	}
915 #endif
916 
917 	memset(&fl6, 0, sizeof(fl6));
918 	fl6.daddr = ipv6_hdr(skb)->saddr;
919 	fl6.saddr = ipv6_hdr(skb)->daddr;
920 	fl6.flowlabel = label;
921 
922 	buff->ip_summed = CHECKSUM_PARTIAL;
923 	buff->csum = 0;
924 
925 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
926 
927 	fl6.flowi6_proto = IPPROTO_TCP;
928 	if (rt6_need_strict(&fl6.daddr) && !oif)
929 		fl6.flowi6_oif = tcp_v6_iif(skb);
930 	else {
931 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
932 			oif = skb->skb_iif;
933 
934 		fl6.flowi6_oif = oif;
935 	}
936 
937 	if (sk) {
938 		if (sk->sk_state == TCP_TIME_WAIT) {
939 			mark = inet_twsk(sk)->tw_mark;
940 			/* autoflowlabel relies on buff->hash */
941 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
942 				     PKT_HASH_TYPE_L4);
943 		} else {
944 			mark = sk->sk_mark;
945 		}
946 		buff->tstamp = tcp_transmit_time(sk);
947 	}
948 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
949 	fl6.fl6_dport = t1->dest;
950 	fl6.fl6_sport = t1->source;
951 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
952 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
953 
954 	/* Pass a socket to ip6_dst_lookup either it is for RST
955 	 * Underlying function will use this to retrieve the network
956 	 * namespace
957 	 */
958 	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
959 	if (!IS_ERR(dst)) {
960 		skb_dst_set(buff, dst);
961 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
962 			 priority);
963 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
964 		if (rst)
965 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
966 		return;
967 	}
968 
969 	kfree_skb(buff);
970 }
971 
972 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
973 {
974 	const struct tcphdr *th = tcp_hdr(skb);
975 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
976 	u32 seq = 0, ack_seq = 0;
977 	struct tcp_md5sig_key *key = NULL;
978 #ifdef CONFIG_TCP_MD5SIG
979 	const __u8 *hash_location = NULL;
980 	unsigned char newhash[16];
981 	int genhash;
982 	struct sock *sk1 = NULL;
983 #endif
984 	__be32 label = 0;
985 	u32 priority = 0;
986 	struct net *net;
987 	int oif = 0;
988 
989 	if (th->rst)
990 		return;
991 
992 	/* If sk not NULL, it means we did a successful lookup and incoming
993 	 * route had to be correct. prequeue might have dropped our dst.
994 	 */
995 	if (!sk && !ipv6_unicast_destination(skb))
996 		return;
997 
998 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
999 #ifdef CONFIG_TCP_MD5SIG
1000 	rcu_read_lock();
1001 	hash_location = tcp_parse_md5sig_option(th);
1002 	if (sk && sk_fullsock(sk)) {
1003 		int l3index;
1004 
1005 		/* sdif set, means packet ingressed via a device
1006 		 * in an L3 domain and inet_iif is set to it.
1007 		 */
1008 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1009 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1010 	} else if (hash_location) {
1011 		int dif = tcp_v6_iif_l3_slave(skb);
1012 		int sdif = tcp_v6_sdif(skb);
1013 		int l3index;
1014 
1015 		/*
1016 		 * active side is lost. Try to find listening socket through
1017 		 * source port, and then find md5 key through listening socket.
1018 		 * we are not loose security here:
1019 		 * Incoming packet is checked with md5 hash with finding key,
1020 		 * no RST generated if md5 hash doesn't match.
1021 		 */
1022 		sk1 = inet6_lookup_listener(net,
1023 					   &tcp_hashinfo, NULL, 0,
1024 					   &ipv6h->saddr,
1025 					   th->source, &ipv6h->daddr,
1026 					   ntohs(th->source), dif, sdif);
1027 		if (!sk1)
1028 			goto out;
1029 
1030 		/* sdif set, means packet ingressed via a device
1031 		 * in an L3 domain and dif is set to it.
1032 		 */
1033 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1034 
1035 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1036 		if (!key)
1037 			goto out;
1038 
1039 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1040 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1041 			goto out;
1042 	}
1043 #endif
1044 
1045 	if (th->ack)
1046 		seq = ntohl(th->ack_seq);
1047 	else
1048 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1049 			  (th->doff << 2);
1050 
1051 	if (sk) {
1052 		oif = sk->sk_bound_dev_if;
1053 		if (sk_fullsock(sk)) {
1054 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1055 
1056 			trace_tcp_send_reset(sk, skb);
1057 			if (np->repflow)
1058 				label = ip6_flowlabel(ipv6h);
1059 			priority = sk->sk_priority;
1060 		}
1061 		if (sk->sk_state == TCP_TIME_WAIT) {
1062 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1063 			priority = inet_twsk(sk)->tw_priority;
1064 		}
1065 	} else {
1066 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1067 			label = ip6_flowlabel(ipv6h);
1068 	}
1069 
1070 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1071 			     label, priority);
1072 
1073 #ifdef CONFIG_TCP_MD5SIG
1074 out:
1075 	rcu_read_unlock();
1076 #endif
1077 }
1078 
1079 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1080 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1081 			    struct tcp_md5sig_key *key, u8 tclass,
1082 			    __be32 label, u32 priority)
1083 {
1084 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1085 			     tclass, label, priority);
1086 }
1087 
1088 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1089 {
1090 	struct inet_timewait_sock *tw = inet_twsk(sk);
1091 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1092 
1093 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1094 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1095 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1096 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1097 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1098 
1099 	inet_twsk_put(tw);
1100 }
1101 
1102 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1103 				  struct request_sock *req)
1104 {
1105 	int l3index;
1106 
1107 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1108 
1109 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1110 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1111 	 */
1112 	/* RFC 7323 2.3
1113 	 * The window field (SEG.WND) of every outgoing segment, with the
1114 	 * exception of <SYN> segments, MUST be right-shifted by
1115 	 * Rcv.Wind.Shift bits:
1116 	 */
1117 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1118 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1119 			tcp_rsk(req)->rcv_nxt,
1120 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1121 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1122 			req->ts_recent, sk->sk_bound_dev_if,
1123 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1124 			0, 0, sk->sk_priority);
1125 }
1126 
1127 
1128 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1129 {
1130 #ifdef CONFIG_SYN_COOKIES
1131 	const struct tcphdr *th = tcp_hdr(skb);
1132 
1133 	if (!th->syn)
1134 		sk = cookie_v6_check(sk, skb);
1135 #endif
1136 	return sk;
1137 }
1138 
1139 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1140 			 struct tcphdr *th, u32 *cookie)
1141 {
1142 	u16 mss = 0;
1143 #ifdef CONFIG_SYN_COOKIES
1144 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1145 				    &tcp_request_sock_ipv6_ops, sk, th);
1146 	if (mss) {
1147 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1148 		tcp_synq_overflow(sk);
1149 	}
1150 #endif
1151 	return mss;
1152 }
1153 
1154 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1155 {
1156 	if (skb->protocol == htons(ETH_P_IP))
1157 		return tcp_v4_conn_request(sk, skb);
1158 
1159 	if (!ipv6_unicast_destination(skb))
1160 		goto drop;
1161 
1162 	return tcp_conn_request(&tcp6_request_sock_ops,
1163 				&tcp_request_sock_ipv6_ops, sk, skb);
1164 
1165 drop:
1166 	tcp_listendrop(sk);
1167 	return 0; /* don't send reset */
1168 }
1169 
1170 static void tcp_v6_restore_cb(struct sk_buff *skb)
1171 {
1172 	/* We need to move header back to the beginning if xfrm6_policy_check()
1173 	 * and tcp_v6_fill_cb() are going to be called again.
1174 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1175 	 */
1176 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1177 		sizeof(struct inet6_skb_parm));
1178 }
1179 
1180 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1181 					 struct request_sock *req,
1182 					 struct dst_entry *dst,
1183 					 struct request_sock *req_unhash,
1184 					 bool *own_req)
1185 {
1186 	struct inet_request_sock *ireq;
1187 	struct ipv6_pinfo *newnp;
1188 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1189 	struct ipv6_txoptions *opt;
1190 	struct inet_sock *newinet;
1191 	struct tcp_sock *newtp;
1192 	struct sock *newsk;
1193 #ifdef CONFIG_TCP_MD5SIG
1194 	struct tcp_md5sig_key *key;
1195 	int l3index;
1196 #endif
1197 	struct flowi6 fl6;
1198 
1199 	if (skb->protocol == htons(ETH_P_IP)) {
1200 		/*
1201 		 *	v6 mapped
1202 		 */
1203 
1204 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1205 					     req_unhash, own_req);
1206 
1207 		if (!newsk)
1208 			return NULL;
1209 
1210 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1211 
1212 		newinet = inet_sk(newsk);
1213 		newnp = tcp_inet6_sk(newsk);
1214 		newtp = tcp_sk(newsk);
1215 
1216 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1217 
1218 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1219 
1220 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1221 		if (sk_is_mptcp(newsk))
1222 			mptcpv6_handle_mapped(newsk, true);
1223 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1224 #ifdef CONFIG_TCP_MD5SIG
1225 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1226 #endif
1227 
1228 		newnp->ipv6_mc_list = NULL;
1229 		newnp->ipv6_ac_list = NULL;
1230 		newnp->ipv6_fl_list = NULL;
1231 		newnp->pktoptions  = NULL;
1232 		newnp->opt	   = NULL;
1233 		newnp->mcast_oif   = inet_iif(skb);
1234 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1235 		newnp->rcv_flowinfo = 0;
1236 		if (np->repflow)
1237 			newnp->flow_label = 0;
1238 
1239 		/*
1240 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1241 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1242 		 * that function for the gory details. -acme
1243 		 */
1244 
1245 		/* It is tricky place. Until this moment IPv4 tcp
1246 		   worked with IPv6 icsk.icsk_af_ops.
1247 		   Sync it now.
1248 		 */
1249 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1250 
1251 		return newsk;
1252 	}
1253 
1254 	ireq = inet_rsk(req);
1255 
1256 	if (sk_acceptq_is_full(sk))
1257 		goto out_overflow;
1258 
1259 	if (!dst) {
1260 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1261 		if (!dst)
1262 			goto out;
1263 	}
1264 
1265 	newsk = tcp_create_openreq_child(sk, req, skb);
1266 	if (!newsk)
1267 		goto out_nonewsk;
1268 
1269 	/*
1270 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1271 	 * count here, tcp_create_openreq_child now does this for us, see the
1272 	 * comment in that function for the gory details. -acme
1273 	 */
1274 
1275 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1276 	ip6_dst_store(newsk, dst, NULL, NULL);
1277 	inet6_sk_rx_dst_set(newsk, skb);
1278 
1279 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1280 
1281 	newtp = tcp_sk(newsk);
1282 	newinet = inet_sk(newsk);
1283 	newnp = tcp_inet6_sk(newsk);
1284 
1285 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1286 
1287 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1288 	newnp->saddr = ireq->ir_v6_loc_addr;
1289 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1290 	newsk->sk_bound_dev_if = ireq->ir_iif;
1291 
1292 	/* Now IPv6 options...
1293 
1294 	   First: no IPv4 options.
1295 	 */
1296 	newinet->inet_opt = NULL;
1297 	newnp->ipv6_mc_list = NULL;
1298 	newnp->ipv6_ac_list = NULL;
1299 	newnp->ipv6_fl_list = NULL;
1300 
1301 	/* Clone RX bits */
1302 	newnp->rxopt.all = np->rxopt.all;
1303 
1304 	newnp->pktoptions = NULL;
1305 	newnp->opt	  = NULL;
1306 	newnp->mcast_oif  = tcp_v6_iif(skb);
1307 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1308 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1309 	if (np->repflow)
1310 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1311 
1312 	/* Clone native IPv6 options from listening socket (if any)
1313 
1314 	   Yes, keeping reference count would be much more clever,
1315 	   but we make one more one thing there: reattach optmem
1316 	   to newsk.
1317 	 */
1318 	opt = ireq->ipv6_opt;
1319 	if (!opt)
1320 		opt = rcu_dereference(np->opt);
1321 	if (opt) {
1322 		opt = ipv6_dup_options(newsk, opt);
1323 		RCU_INIT_POINTER(newnp->opt, opt);
1324 	}
1325 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1326 	if (opt)
1327 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1328 						    opt->opt_flen;
1329 
1330 	tcp_ca_openreq_child(newsk, dst);
1331 
1332 	tcp_sync_mss(newsk, dst_mtu(dst));
1333 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1334 
1335 	tcp_initialize_rcv_mss(newsk);
1336 
1337 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1338 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1339 
1340 #ifdef CONFIG_TCP_MD5SIG
1341 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1342 
1343 	/* Copy over the MD5 key from the original socket */
1344 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1345 	if (key) {
1346 		/* We're using one, so create a matching key
1347 		 * on the newsk structure. If we fail to get
1348 		 * memory, then we end up not copying the key
1349 		 * across. Shucks.
1350 		 */
1351 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1352 			       AF_INET6, 128, l3index, key->key, key->keylen,
1353 			       sk_gfp_mask(sk, GFP_ATOMIC));
1354 	}
1355 #endif
1356 
1357 	if (__inet_inherit_port(sk, newsk) < 0) {
1358 		inet_csk_prepare_forced_close(newsk);
1359 		tcp_done(newsk);
1360 		goto out;
1361 	}
1362 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1363 	if (*own_req) {
1364 		tcp_move_syn(newtp, req);
1365 
1366 		/* Clone pktoptions received with SYN, if we own the req */
1367 		if (ireq->pktopts) {
1368 			newnp->pktoptions = skb_clone(ireq->pktopts,
1369 						      sk_gfp_mask(sk, GFP_ATOMIC));
1370 			consume_skb(ireq->pktopts);
1371 			ireq->pktopts = NULL;
1372 			if (newnp->pktoptions) {
1373 				tcp_v6_restore_cb(newnp->pktoptions);
1374 				skb_set_owner_r(newnp->pktoptions, newsk);
1375 			}
1376 		}
1377 	}
1378 
1379 	return newsk;
1380 
1381 out_overflow:
1382 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1383 out_nonewsk:
1384 	dst_release(dst);
1385 out:
1386 	tcp_listendrop(sk);
1387 	return NULL;
1388 }
1389 
1390 /* The socket must have it's spinlock held when we get
1391  * here, unless it is a TCP_LISTEN socket.
1392  *
1393  * We have a potential double-lock case here, so even when
1394  * doing backlog processing we use the BH locking scheme.
1395  * This is because we cannot sleep with the original spinlock
1396  * held.
1397  */
1398 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1399 {
1400 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1401 	struct sk_buff *opt_skb = NULL;
1402 	struct tcp_sock *tp;
1403 
1404 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1405 	   goes to IPv4 receive handler and backlogged.
1406 	   From backlog it always goes here. Kerboom...
1407 	   Fortunately, tcp_rcv_established and rcv_established
1408 	   handle them correctly, but it is not case with
1409 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1410 	 */
1411 
1412 	if (skb->protocol == htons(ETH_P_IP))
1413 		return tcp_v4_do_rcv(sk, skb);
1414 
1415 	/*
1416 	 *	socket locking is here for SMP purposes as backlog rcv
1417 	 *	is currently called with bh processing disabled.
1418 	 */
1419 
1420 	/* Do Stevens' IPV6_PKTOPTIONS.
1421 
1422 	   Yes, guys, it is the only place in our code, where we
1423 	   may make it not affecting IPv4.
1424 	   The rest of code is protocol independent,
1425 	   and I do not like idea to uglify IPv4.
1426 
1427 	   Actually, all the idea behind IPV6_PKTOPTIONS
1428 	   looks not very well thought. For now we latch
1429 	   options, received in the last packet, enqueued
1430 	   by tcp. Feel free to propose better solution.
1431 					       --ANK (980728)
1432 	 */
1433 	if (np->rxopt.all)
1434 		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1435 
1436 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1437 		struct dst_entry *dst = sk->sk_rx_dst;
1438 
1439 		sock_rps_save_rxhash(sk, skb);
1440 		sk_mark_napi_id(sk, skb);
1441 		if (dst) {
1442 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1443 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1444 				dst_release(dst);
1445 				sk->sk_rx_dst = NULL;
1446 			}
1447 		}
1448 
1449 		tcp_rcv_established(sk, skb);
1450 		if (opt_skb)
1451 			goto ipv6_pktoptions;
1452 		return 0;
1453 	}
1454 
1455 	if (tcp_checksum_complete(skb))
1456 		goto csum_err;
1457 
1458 	if (sk->sk_state == TCP_LISTEN) {
1459 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1460 
1461 		if (!nsk)
1462 			goto discard;
1463 
1464 		if (nsk != sk) {
1465 			if (tcp_child_process(sk, nsk, skb))
1466 				goto reset;
1467 			if (opt_skb)
1468 				__kfree_skb(opt_skb);
1469 			return 0;
1470 		}
1471 	} else
1472 		sock_rps_save_rxhash(sk, skb);
1473 
1474 	if (tcp_rcv_state_process(sk, skb))
1475 		goto reset;
1476 	if (opt_skb)
1477 		goto ipv6_pktoptions;
1478 	return 0;
1479 
1480 reset:
1481 	tcp_v6_send_reset(sk, skb);
1482 discard:
1483 	if (opt_skb)
1484 		__kfree_skb(opt_skb);
1485 	kfree_skb(skb);
1486 	return 0;
1487 csum_err:
1488 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1489 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1490 	goto discard;
1491 
1492 
1493 ipv6_pktoptions:
1494 	/* Do you ask, what is it?
1495 
1496 	   1. skb was enqueued by tcp.
1497 	   2. skb is added to tail of read queue, rather than out of order.
1498 	   3. socket is not in passive state.
1499 	   4. Finally, it really contains options, which user wants to receive.
1500 	 */
1501 	tp = tcp_sk(sk);
1502 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1503 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1504 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1505 			np->mcast_oif = tcp_v6_iif(opt_skb);
1506 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1507 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1508 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1509 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1510 		if (np->repflow)
1511 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1512 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1513 			skb_set_owner_r(opt_skb, sk);
1514 			tcp_v6_restore_cb(opt_skb);
1515 			opt_skb = xchg(&np->pktoptions, opt_skb);
1516 		} else {
1517 			__kfree_skb(opt_skb);
1518 			opt_skb = xchg(&np->pktoptions, NULL);
1519 		}
1520 	}
1521 
1522 	kfree_skb(opt_skb);
1523 	return 0;
1524 }
1525 
1526 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1527 			   const struct tcphdr *th)
1528 {
1529 	/* This is tricky: we move IP6CB at its correct location into
1530 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1531 	 * _decode_session6() uses IP6CB().
1532 	 * barrier() makes sure compiler won't play aliasing games.
1533 	 */
1534 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1535 		sizeof(struct inet6_skb_parm));
1536 	barrier();
1537 
1538 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1539 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1540 				    skb->len - th->doff*4);
1541 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1542 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1543 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1544 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1545 	TCP_SKB_CB(skb)->sacked = 0;
1546 	TCP_SKB_CB(skb)->has_rxtstamp =
1547 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1548 }
1549 
1550 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1551 {
1552 	struct sk_buff *skb_to_free;
1553 	int sdif = inet6_sdif(skb);
1554 	int dif = inet6_iif(skb);
1555 	const struct tcphdr *th;
1556 	const struct ipv6hdr *hdr;
1557 	bool refcounted;
1558 	struct sock *sk;
1559 	int ret;
1560 	struct net *net = dev_net(skb->dev);
1561 
1562 	if (skb->pkt_type != PACKET_HOST)
1563 		goto discard_it;
1564 
1565 	/*
1566 	 *	Count it even if it's bad.
1567 	 */
1568 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1569 
1570 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1571 		goto discard_it;
1572 
1573 	th = (const struct tcphdr *)skb->data;
1574 
1575 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1576 		goto bad_packet;
1577 	if (!pskb_may_pull(skb, th->doff*4))
1578 		goto discard_it;
1579 
1580 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1581 		goto csum_error;
1582 
1583 	th = (const struct tcphdr *)skb->data;
1584 	hdr = ipv6_hdr(skb);
1585 
1586 lookup:
1587 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1588 				th->source, th->dest, inet6_iif(skb), sdif,
1589 				&refcounted);
1590 	if (!sk)
1591 		goto no_tcp_socket;
1592 
1593 process:
1594 	if (sk->sk_state == TCP_TIME_WAIT)
1595 		goto do_time_wait;
1596 
1597 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1598 		struct request_sock *req = inet_reqsk(sk);
1599 		bool req_stolen = false;
1600 		struct sock *nsk;
1601 
1602 		sk = req->rsk_listener;
1603 		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1604 			sk_drops_add(sk, skb);
1605 			reqsk_put(req);
1606 			goto discard_it;
1607 		}
1608 		if (tcp_checksum_complete(skb)) {
1609 			reqsk_put(req);
1610 			goto csum_error;
1611 		}
1612 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1613 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1614 			goto lookup;
1615 		}
1616 		sock_hold(sk);
1617 		refcounted = true;
1618 		nsk = NULL;
1619 		if (!tcp_filter(sk, skb)) {
1620 			th = (const struct tcphdr *)skb->data;
1621 			hdr = ipv6_hdr(skb);
1622 			tcp_v6_fill_cb(skb, hdr, th);
1623 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1624 		}
1625 		if (!nsk) {
1626 			reqsk_put(req);
1627 			if (req_stolen) {
1628 				/* Another cpu got exclusive access to req
1629 				 * and created a full blown socket.
1630 				 * Try to feed this packet to this socket
1631 				 * instead of discarding it.
1632 				 */
1633 				tcp_v6_restore_cb(skb);
1634 				sock_put(sk);
1635 				goto lookup;
1636 			}
1637 			goto discard_and_relse;
1638 		}
1639 		if (nsk == sk) {
1640 			reqsk_put(req);
1641 			tcp_v6_restore_cb(skb);
1642 		} else if (tcp_child_process(sk, nsk, skb)) {
1643 			tcp_v6_send_reset(nsk, skb);
1644 			goto discard_and_relse;
1645 		} else {
1646 			sock_put(sk);
1647 			return 0;
1648 		}
1649 	}
1650 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1651 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1652 		goto discard_and_relse;
1653 	}
1654 
1655 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1656 		goto discard_and_relse;
1657 
1658 	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1659 		goto discard_and_relse;
1660 
1661 	if (tcp_filter(sk, skb))
1662 		goto discard_and_relse;
1663 	th = (const struct tcphdr *)skb->data;
1664 	hdr = ipv6_hdr(skb);
1665 	tcp_v6_fill_cb(skb, hdr, th);
1666 
1667 	skb->dev = NULL;
1668 
1669 	if (sk->sk_state == TCP_LISTEN) {
1670 		ret = tcp_v6_do_rcv(sk, skb);
1671 		goto put_and_return;
1672 	}
1673 
1674 	sk_incoming_cpu_update(sk);
1675 
1676 	bh_lock_sock_nested(sk);
1677 	tcp_segs_in(tcp_sk(sk), skb);
1678 	ret = 0;
1679 	if (!sock_owned_by_user(sk)) {
1680 		skb_to_free = sk->sk_rx_skb_cache;
1681 		sk->sk_rx_skb_cache = NULL;
1682 		ret = tcp_v6_do_rcv(sk, skb);
1683 	} else {
1684 		if (tcp_add_backlog(sk, skb))
1685 			goto discard_and_relse;
1686 		skb_to_free = NULL;
1687 	}
1688 	bh_unlock_sock(sk);
1689 	if (skb_to_free)
1690 		__kfree_skb(skb_to_free);
1691 put_and_return:
1692 	if (refcounted)
1693 		sock_put(sk);
1694 	return ret ? -1 : 0;
1695 
1696 no_tcp_socket:
1697 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1698 		goto discard_it;
1699 
1700 	tcp_v6_fill_cb(skb, hdr, th);
1701 
1702 	if (tcp_checksum_complete(skb)) {
1703 csum_error:
1704 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1705 bad_packet:
1706 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1707 	} else {
1708 		tcp_v6_send_reset(NULL, skb);
1709 	}
1710 
1711 discard_it:
1712 	kfree_skb(skb);
1713 	return 0;
1714 
1715 discard_and_relse:
1716 	sk_drops_add(sk, skb);
1717 	if (refcounted)
1718 		sock_put(sk);
1719 	goto discard_it;
1720 
1721 do_time_wait:
1722 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1723 		inet_twsk_put(inet_twsk(sk));
1724 		goto discard_it;
1725 	}
1726 
1727 	tcp_v6_fill_cb(skb, hdr, th);
1728 
1729 	if (tcp_checksum_complete(skb)) {
1730 		inet_twsk_put(inet_twsk(sk));
1731 		goto csum_error;
1732 	}
1733 
1734 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1735 	case TCP_TW_SYN:
1736 	{
1737 		struct sock *sk2;
1738 
1739 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1740 					    skb, __tcp_hdrlen(th),
1741 					    &ipv6_hdr(skb)->saddr, th->source,
1742 					    &ipv6_hdr(skb)->daddr,
1743 					    ntohs(th->dest),
1744 					    tcp_v6_iif_l3_slave(skb),
1745 					    sdif);
1746 		if (sk2) {
1747 			struct inet_timewait_sock *tw = inet_twsk(sk);
1748 			inet_twsk_deschedule_put(tw);
1749 			sk = sk2;
1750 			tcp_v6_restore_cb(skb);
1751 			refcounted = false;
1752 			goto process;
1753 		}
1754 	}
1755 		/* to ACK */
1756 		fallthrough;
1757 	case TCP_TW_ACK:
1758 		tcp_v6_timewait_ack(sk, skb);
1759 		break;
1760 	case TCP_TW_RST:
1761 		tcp_v6_send_reset(sk, skb);
1762 		inet_twsk_deschedule_put(inet_twsk(sk));
1763 		goto discard_it;
1764 	case TCP_TW_SUCCESS:
1765 		;
1766 	}
1767 	goto discard_it;
1768 }
1769 
1770 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1771 {
1772 	const struct ipv6hdr *hdr;
1773 	const struct tcphdr *th;
1774 	struct sock *sk;
1775 
1776 	if (skb->pkt_type != PACKET_HOST)
1777 		return;
1778 
1779 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1780 		return;
1781 
1782 	hdr = ipv6_hdr(skb);
1783 	th = tcp_hdr(skb);
1784 
1785 	if (th->doff < sizeof(struct tcphdr) / 4)
1786 		return;
1787 
1788 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1789 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1790 					&hdr->saddr, th->source,
1791 					&hdr->daddr, ntohs(th->dest),
1792 					inet6_iif(skb), inet6_sdif(skb));
1793 	if (sk) {
1794 		skb->sk = sk;
1795 		skb->destructor = sock_edemux;
1796 		if (sk_fullsock(sk)) {
1797 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1798 
1799 			if (dst)
1800 				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1801 			if (dst &&
1802 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1803 				skb_dst_set_noref(skb, dst);
1804 		}
1805 	}
1806 }
1807 
1808 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1809 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1810 	.twsk_unique	= tcp_twsk_unique,
1811 	.twsk_destructor = tcp_twsk_destructor,
1812 };
1813 
1814 const struct inet_connection_sock_af_ops ipv6_specific = {
1815 	.queue_xmit	   = inet6_csk_xmit,
1816 	.send_check	   = tcp_v6_send_check,
1817 	.rebuild_header	   = inet6_sk_rebuild_header,
1818 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1819 	.conn_request	   = tcp_v6_conn_request,
1820 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1821 	.net_header_len	   = sizeof(struct ipv6hdr),
1822 	.net_frag_header_len = sizeof(struct frag_hdr),
1823 	.setsockopt	   = ipv6_setsockopt,
1824 	.getsockopt	   = ipv6_getsockopt,
1825 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1826 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1827 #ifdef CONFIG_COMPAT
1828 	.compat_setsockopt = compat_ipv6_setsockopt,
1829 	.compat_getsockopt = compat_ipv6_getsockopt,
1830 #endif
1831 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1832 };
1833 
1834 #ifdef CONFIG_TCP_MD5SIG
1835 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1836 	.md5_lookup	=	tcp_v6_md5_lookup,
1837 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1838 	.md5_parse	=	tcp_v6_parse_md5_keys,
1839 };
1840 #endif
1841 
1842 /*
1843  *	TCP over IPv4 via INET6 API
1844  */
1845 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1846 	.queue_xmit	   = ip_queue_xmit,
1847 	.send_check	   = tcp_v4_send_check,
1848 	.rebuild_header	   = inet_sk_rebuild_header,
1849 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1850 	.conn_request	   = tcp_v6_conn_request,
1851 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1852 	.net_header_len	   = sizeof(struct iphdr),
1853 	.setsockopt	   = ipv6_setsockopt,
1854 	.getsockopt	   = ipv6_getsockopt,
1855 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1856 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1857 #ifdef CONFIG_COMPAT
1858 	.compat_setsockopt = compat_ipv6_setsockopt,
1859 	.compat_getsockopt = compat_ipv6_getsockopt,
1860 #endif
1861 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1862 };
1863 
1864 #ifdef CONFIG_TCP_MD5SIG
1865 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1866 	.md5_lookup	=	tcp_v4_md5_lookup,
1867 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1868 	.md5_parse	=	tcp_v6_parse_md5_keys,
1869 };
1870 #endif
1871 
1872 /* NOTE: A lot of things set to zero explicitly by call to
1873  *       sk_alloc() so need not be done here.
1874  */
1875 static int tcp_v6_init_sock(struct sock *sk)
1876 {
1877 	struct inet_connection_sock *icsk = inet_csk(sk);
1878 
1879 	tcp_init_sock(sk);
1880 
1881 	icsk->icsk_af_ops = &ipv6_specific;
1882 
1883 #ifdef CONFIG_TCP_MD5SIG
1884 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1885 #endif
1886 
1887 	return 0;
1888 }
1889 
1890 static void tcp_v6_destroy_sock(struct sock *sk)
1891 {
1892 	tcp_v4_destroy_sock(sk);
1893 	inet6_destroy_sock(sk);
1894 }
1895 
1896 #ifdef CONFIG_PROC_FS
1897 /* Proc filesystem TCPv6 sock list dumping. */
1898 static void get_openreq6(struct seq_file *seq,
1899 			 const struct request_sock *req, int i)
1900 {
1901 	long ttd = req->rsk_timer.expires - jiffies;
1902 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1903 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1904 
1905 	if (ttd < 0)
1906 		ttd = 0;
1907 
1908 	seq_printf(seq,
1909 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1910 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1911 		   i,
1912 		   src->s6_addr32[0], src->s6_addr32[1],
1913 		   src->s6_addr32[2], src->s6_addr32[3],
1914 		   inet_rsk(req)->ir_num,
1915 		   dest->s6_addr32[0], dest->s6_addr32[1],
1916 		   dest->s6_addr32[2], dest->s6_addr32[3],
1917 		   ntohs(inet_rsk(req)->ir_rmt_port),
1918 		   TCP_SYN_RECV,
1919 		   0, 0, /* could print option size, but that is af dependent. */
1920 		   1,   /* timers active (only the expire timer) */
1921 		   jiffies_to_clock_t(ttd),
1922 		   req->num_timeout,
1923 		   from_kuid_munged(seq_user_ns(seq),
1924 				    sock_i_uid(req->rsk_listener)),
1925 		   0,  /* non standard timer */
1926 		   0, /* open_requests have no inode */
1927 		   0, req);
1928 }
1929 
1930 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1931 {
1932 	const struct in6_addr *dest, *src;
1933 	__u16 destp, srcp;
1934 	int timer_active;
1935 	unsigned long timer_expires;
1936 	const struct inet_sock *inet = inet_sk(sp);
1937 	const struct tcp_sock *tp = tcp_sk(sp);
1938 	const struct inet_connection_sock *icsk = inet_csk(sp);
1939 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1940 	int rx_queue;
1941 	int state;
1942 
1943 	dest  = &sp->sk_v6_daddr;
1944 	src   = &sp->sk_v6_rcv_saddr;
1945 	destp = ntohs(inet->inet_dport);
1946 	srcp  = ntohs(inet->inet_sport);
1947 
1948 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1949 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1950 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1951 		timer_active	= 1;
1952 		timer_expires	= icsk->icsk_timeout;
1953 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1954 		timer_active	= 4;
1955 		timer_expires	= icsk->icsk_timeout;
1956 	} else if (timer_pending(&sp->sk_timer)) {
1957 		timer_active	= 2;
1958 		timer_expires	= sp->sk_timer.expires;
1959 	} else {
1960 		timer_active	= 0;
1961 		timer_expires = jiffies;
1962 	}
1963 
1964 	state = inet_sk_state_load(sp);
1965 	if (state == TCP_LISTEN)
1966 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
1967 	else
1968 		/* Because we don't lock the socket,
1969 		 * we might find a transient negative value.
1970 		 */
1971 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1972 				      READ_ONCE(tp->copied_seq), 0);
1973 
1974 	seq_printf(seq,
1975 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1976 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1977 		   i,
1978 		   src->s6_addr32[0], src->s6_addr32[1],
1979 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1980 		   dest->s6_addr32[0], dest->s6_addr32[1],
1981 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1982 		   state,
1983 		   READ_ONCE(tp->write_seq) - tp->snd_una,
1984 		   rx_queue,
1985 		   timer_active,
1986 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
1987 		   icsk->icsk_retransmits,
1988 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1989 		   icsk->icsk_probes_out,
1990 		   sock_i_ino(sp),
1991 		   refcount_read(&sp->sk_refcnt), sp,
1992 		   jiffies_to_clock_t(icsk->icsk_rto),
1993 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
1994 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1995 		   tp->snd_cwnd,
1996 		   state == TCP_LISTEN ?
1997 			fastopenq->max_qlen :
1998 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1999 		   );
2000 }
2001 
2002 static void get_timewait6_sock(struct seq_file *seq,
2003 			       struct inet_timewait_sock *tw, int i)
2004 {
2005 	long delta = tw->tw_timer.expires - jiffies;
2006 	const struct in6_addr *dest, *src;
2007 	__u16 destp, srcp;
2008 
2009 	dest = &tw->tw_v6_daddr;
2010 	src  = &tw->tw_v6_rcv_saddr;
2011 	destp = ntohs(tw->tw_dport);
2012 	srcp  = ntohs(tw->tw_sport);
2013 
2014 	seq_printf(seq,
2015 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2016 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2017 		   i,
2018 		   src->s6_addr32[0], src->s6_addr32[1],
2019 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2020 		   dest->s6_addr32[0], dest->s6_addr32[1],
2021 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2022 		   tw->tw_substate, 0, 0,
2023 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2024 		   refcount_read(&tw->tw_refcnt), tw);
2025 }
2026 
2027 static int tcp6_seq_show(struct seq_file *seq, void *v)
2028 {
2029 	struct tcp_iter_state *st;
2030 	struct sock *sk = v;
2031 
2032 	if (v == SEQ_START_TOKEN) {
2033 		seq_puts(seq,
2034 			 "  sl  "
2035 			 "local_address                         "
2036 			 "remote_address                        "
2037 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2038 			 "   uid  timeout inode\n");
2039 		goto out;
2040 	}
2041 	st = seq->private;
2042 
2043 	if (sk->sk_state == TCP_TIME_WAIT)
2044 		get_timewait6_sock(seq, v, st->num);
2045 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2046 		get_openreq6(seq, v, st->num);
2047 	else
2048 		get_tcp6_sock(seq, v, st->num);
2049 out:
2050 	return 0;
2051 }
2052 
2053 static const struct seq_operations tcp6_seq_ops = {
2054 	.show		= tcp6_seq_show,
2055 	.start		= tcp_seq_start,
2056 	.next		= tcp_seq_next,
2057 	.stop		= tcp_seq_stop,
2058 };
2059 
2060 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2061 	.family		= AF_INET6,
2062 };
2063 
2064 int __net_init tcp6_proc_init(struct net *net)
2065 {
2066 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2067 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2068 		return -ENOMEM;
2069 	return 0;
2070 }
2071 
2072 void tcp6_proc_exit(struct net *net)
2073 {
2074 	remove_proc_entry("tcp6", net->proc_net);
2075 }
2076 #endif
2077 
2078 struct proto tcpv6_prot = {
2079 	.name			= "TCPv6",
2080 	.owner			= THIS_MODULE,
2081 	.close			= tcp_close,
2082 	.pre_connect		= tcp_v6_pre_connect,
2083 	.connect		= tcp_v6_connect,
2084 	.disconnect		= tcp_disconnect,
2085 	.accept			= inet_csk_accept,
2086 	.ioctl			= tcp_ioctl,
2087 	.init			= tcp_v6_init_sock,
2088 	.destroy		= tcp_v6_destroy_sock,
2089 	.shutdown		= tcp_shutdown,
2090 	.setsockopt		= tcp_setsockopt,
2091 	.getsockopt		= tcp_getsockopt,
2092 	.keepalive		= tcp_set_keepalive,
2093 	.recvmsg		= tcp_recvmsg,
2094 	.sendmsg		= tcp_sendmsg,
2095 	.sendpage		= tcp_sendpage,
2096 	.backlog_rcv		= tcp_v6_do_rcv,
2097 	.release_cb		= tcp_release_cb,
2098 	.hash			= inet6_hash,
2099 	.unhash			= inet_unhash,
2100 	.get_port		= inet_csk_get_port,
2101 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2102 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2103 	.stream_memory_free	= tcp_stream_memory_free,
2104 	.sockets_allocated	= &tcp_sockets_allocated,
2105 	.memory_allocated	= &tcp_memory_allocated,
2106 	.memory_pressure	= &tcp_memory_pressure,
2107 	.orphan_count		= &tcp_orphan_count,
2108 	.sysctl_mem		= sysctl_tcp_mem,
2109 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2110 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2111 	.max_header		= MAX_TCP_HEADER,
2112 	.obj_size		= sizeof(struct tcp6_sock),
2113 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2114 	.twsk_prot		= &tcp6_timewait_sock_ops,
2115 	.rsk_prot		= &tcp6_request_sock_ops,
2116 	.h.hashinfo		= &tcp_hashinfo,
2117 	.no_autobind		= true,
2118 #ifdef CONFIG_COMPAT
2119 	.compat_setsockopt	= compat_tcp_setsockopt,
2120 	.compat_getsockopt	= compat_tcp_getsockopt,
2121 #endif
2122 	.diag_destroy		= tcp_abort,
2123 };
2124 EXPORT_SYMBOL_GPL(tcpv6_prot);
2125 
2126 /* thinking of making this const? Don't.
2127  * early_demux can change based on sysctl.
2128  */
2129 static struct inet6_protocol tcpv6_protocol = {
2130 	.early_demux	=	tcp_v6_early_demux,
2131 	.early_demux_handler =  tcp_v6_early_demux,
2132 	.handler	=	tcp_v6_rcv,
2133 	.err_handler	=	tcp_v6_err,
2134 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2135 };
2136 
2137 static struct inet_protosw tcpv6_protosw = {
2138 	.type		=	SOCK_STREAM,
2139 	.protocol	=	IPPROTO_TCP,
2140 	.prot		=	&tcpv6_prot,
2141 	.ops		=	&inet6_stream_ops,
2142 	.flags		=	INET_PROTOSW_PERMANENT |
2143 				INET_PROTOSW_ICSK,
2144 };
2145 
2146 static int __net_init tcpv6_net_init(struct net *net)
2147 {
2148 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2149 				    SOCK_RAW, IPPROTO_TCP, net);
2150 }
2151 
2152 static void __net_exit tcpv6_net_exit(struct net *net)
2153 {
2154 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2155 }
2156 
2157 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2158 {
2159 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2160 }
2161 
2162 static struct pernet_operations tcpv6_net_ops = {
2163 	.init	    = tcpv6_net_init,
2164 	.exit	    = tcpv6_net_exit,
2165 	.exit_batch = tcpv6_net_exit_batch,
2166 };
2167 
2168 int __init tcpv6_init(void)
2169 {
2170 	int ret;
2171 
2172 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2173 	if (ret)
2174 		goto out;
2175 
2176 	/* register inet6 protocol */
2177 	ret = inet6_register_protosw(&tcpv6_protosw);
2178 	if (ret)
2179 		goto out_tcpv6_protocol;
2180 
2181 	ret = register_pernet_subsys(&tcpv6_net_ops);
2182 	if (ret)
2183 		goto out_tcpv6_protosw;
2184 
2185 	ret = mptcpv6_init();
2186 	if (ret)
2187 		goto out_tcpv6_pernet_subsys;
2188 
2189 out:
2190 	return ret;
2191 
2192 out_tcpv6_pernet_subsys:
2193 	unregister_pernet_subsys(&tcpv6_net_ops);
2194 out_tcpv6_protosw:
2195 	inet6_unregister_protosw(&tcpv6_protosw);
2196 out_tcpv6_protocol:
2197 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2198 	goto out;
2199 }
2200 
2201 void tcpv6_exit(void)
2202 {
2203 	unregister_pernet_subsys(&tcpv6_net_ops);
2204 	inet6_unregister_protosw(&tcpv6_protosw);
2205 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2206 }
2207