xref: /openbmc/linux/net/ipv6/tcp_ipv6.c (revision 6562c9ac)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 
100 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102 
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 	struct dst_entry *dst = skb_dst(skb);
106 
107 	if (dst && dst_hold_safe(dst)) {
108 		const struct rt6_info *rt = (const struct rt6_info *)dst;
109 
110 		rcu_assign_pointer(sk->sk_rx_dst, dst);
111 		sk->sk_rx_dst_ifindex = skb->skb_iif;
112 		sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113 	}
114 }
115 
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 				ipv6_hdr(skb)->saddr.s6_addr32,
120 				tcp_hdr(skb)->dest,
121 				tcp_hdr(skb)->source);
122 }
123 
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 				   ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129 
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 			      int addr_len)
132 {
133 	/* This check is replicated from tcp_v6_connect() and intended to
134 	 * prevent BPF program called below from accessing bytes that are out
135 	 * of the bound specified by user in addr_len.
136 	 */
137 	if (addr_len < SIN6_LEN_RFC2133)
138 		return -EINVAL;
139 
140 	sock_owned_by_me(sk);
141 
142 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144 
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 			  int addr_len)
147 {
148 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct inet_connection_sock *icsk = inet_csk(sk);
151 	struct inet_timewait_death_row *tcp_death_row;
152 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153 	struct tcp_sock *tp = tcp_sk(sk);
154 	struct in6_addr *saddr = NULL, *final_p, final;
155 	struct ipv6_txoptions *opt;
156 	struct flowi6 fl6;
157 	struct dst_entry *dst;
158 	int addr_type;
159 	int err;
160 
161 	if (addr_len < SIN6_LEN_RFC2133)
162 		return -EINVAL;
163 
164 	if (usin->sin6_family != AF_INET6)
165 		return -EAFNOSUPPORT;
166 
167 	memset(&fl6, 0, sizeof(fl6));
168 
169 	if (np->sndflow) {
170 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 		IP6_ECN_flow_init(fl6.flowlabel);
172 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 			struct ip6_flowlabel *flowlabel;
174 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 			if (IS_ERR(flowlabel))
176 				return -EINVAL;
177 			fl6_sock_release(flowlabel);
178 		}
179 	}
180 
181 	/*
182 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
183 	 */
184 
185 	if (ipv6_addr_any(&usin->sin6_addr)) {
186 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 					       &usin->sin6_addr);
189 		else
190 			usin->sin6_addr = in6addr_loopback;
191 	}
192 
193 	addr_type = ipv6_addr_type(&usin->sin6_addr);
194 
195 	if (addr_type & IPV6_ADDR_MULTICAST)
196 		return -ENETUNREACH;
197 
198 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 		if (addr_len >= sizeof(struct sockaddr_in6) &&
200 		    usin->sin6_scope_id) {
201 			/* If interface is set while binding, indices
202 			 * must coincide.
203 			 */
204 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 				return -EINVAL;
206 
207 			sk->sk_bound_dev_if = usin->sin6_scope_id;
208 		}
209 
210 		/* Connect to link-local address requires an interface */
211 		if (!sk->sk_bound_dev_if)
212 			return -EINVAL;
213 	}
214 
215 	if (tp->rx_opt.ts_recent_stamp &&
216 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 		tp->rx_opt.ts_recent = 0;
218 		tp->rx_opt.ts_recent_stamp = 0;
219 		WRITE_ONCE(tp->write_seq, 0);
220 	}
221 
222 	sk->sk_v6_daddr = usin->sin6_addr;
223 	np->flow_label = fl6.flowlabel;
224 
225 	/*
226 	 *	TCP over IPv4
227 	 */
228 
229 	if (addr_type & IPV6_ADDR_MAPPED) {
230 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 		struct sockaddr_in sin;
232 
233 		if (ipv6_only_sock(sk))
234 			return -ENETUNREACH;
235 
236 		sin.sin_family = AF_INET;
237 		sin.sin_port = usin->sin6_port;
238 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 
240 		icsk->icsk_af_ops = &ipv6_mapped;
241 		if (sk_is_mptcp(sk))
242 			mptcpv6_handle_mapped(sk, true);
243 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247 
248 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249 
250 		if (err) {
251 			icsk->icsk_ext_hdr_len = exthdrlen;
252 			icsk->icsk_af_ops = &ipv6_specific;
253 			if (sk_is_mptcp(sk))
254 				mptcpv6_handle_mapped(sk, false);
255 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 			tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259 			goto failure;
260 		}
261 		np->saddr = sk->sk_v6_rcv_saddr;
262 
263 		return err;
264 	}
265 
266 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 		saddr = &sk->sk_v6_rcv_saddr;
268 
269 	fl6.flowi6_proto = IPPROTO_TCP;
270 	fl6.daddr = sk->sk_v6_daddr;
271 	fl6.saddr = saddr ? *saddr : np->saddr;
272 	fl6.flowi6_oif = sk->sk_bound_dev_if;
273 	fl6.flowi6_mark = sk->sk_mark;
274 	fl6.fl6_dport = usin->sin6_port;
275 	fl6.fl6_sport = inet->inet_sport;
276 	fl6.flowi6_uid = sk->sk_uid;
277 
278 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 	final_p = fl6_update_dst(&fl6, opt, &final);
280 
281 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282 
283 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284 	if (IS_ERR(dst)) {
285 		err = PTR_ERR(dst);
286 		goto failure;
287 	}
288 
289 	if (!saddr) {
290 		saddr = &fl6.saddr;
291 		sk->sk_v6_rcv_saddr = *saddr;
292 	}
293 
294 	/* set the source address */
295 	np->saddr = *saddr;
296 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297 
298 	sk->sk_gso_type = SKB_GSO_TCPV6;
299 	ip6_dst_store(sk, dst, NULL, NULL);
300 
301 	icsk->icsk_ext_hdr_len = 0;
302 	if (opt)
303 		icsk->icsk_ext_hdr_len = opt->opt_flen +
304 					 opt->opt_nflen;
305 
306 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307 
308 	inet->inet_dport = usin->sin6_port;
309 
310 	tcp_set_state(sk, TCP_SYN_SENT);
311 	tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
312 	err = inet6_hash_connect(tcp_death_row, sk);
313 	if (err)
314 		goto late_failure;
315 
316 	sk_set_txhash(sk);
317 
318 	if (likely(!tp->repair)) {
319 		if (!tp->write_seq)
320 			WRITE_ONCE(tp->write_seq,
321 				   secure_tcpv6_seq(np->saddr.s6_addr32,
322 						    sk->sk_v6_daddr.s6_addr32,
323 						    inet->inet_sport,
324 						    inet->inet_dport));
325 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326 						   np->saddr.s6_addr32,
327 						   sk->sk_v6_daddr.s6_addr32);
328 	}
329 
330 	if (tcp_fastopen_defer_connect(sk, &err))
331 		return err;
332 	if (err)
333 		goto late_failure;
334 
335 	err = tcp_connect(sk);
336 	if (err)
337 		goto late_failure;
338 
339 	return 0;
340 
341 late_failure:
342 	tcp_set_state(sk, TCP_CLOSE);
343 failure:
344 	inet->inet_dport = 0;
345 	sk->sk_route_caps = 0;
346 	return err;
347 }
348 
349 static void tcp_v6_mtu_reduced(struct sock *sk)
350 {
351 	struct dst_entry *dst;
352 	u32 mtu;
353 
354 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
355 		return;
356 
357 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
358 
359 	/* Drop requests trying to increase our current mss.
360 	 * Check done in __ip6_rt_update_pmtu() is too late.
361 	 */
362 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
363 		return;
364 
365 	dst = inet6_csk_update_pmtu(sk, mtu);
366 	if (!dst)
367 		return;
368 
369 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
370 		tcp_sync_mss(sk, dst_mtu(dst));
371 		tcp_simple_retransmit(sk);
372 	}
373 }
374 
375 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
376 		u8 type, u8 code, int offset, __be32 info)
377 {
378 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
379 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
380 	struct net *net = dev_net(skb->dev);
381 	struct request_sock *fastopen;
382 	struct ipv6_pinfo *np;
383 	struct tcp_sock *tp;
384 	__u32 seq, snd_una;
385 	struct sock *sk;
386 	bool fatal;
387 	int err;
388 
389 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
390 					&hdr->daddr, th->dest,
391 					&hdr->saddr, ntohs(th->source),
392 					skb->dev->ifindex, inet6_sdif(skb));
393 
394 	if (!sk) {
395 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
396 				  ICMP6_MIB_INERRORS);
397 		return -ENOENT;
398 	}
399 
400 	if (sk->sk_state == TCP_TIME_WAIT) {
401 		inet_twsk_put(inet_twsk(sk));
402 		return 0;
403 	}
404 	seq = ntohl(th->seq);
405 	fatal = icmpv6_err_convert(type, code, &err);
406 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
407 		tcp_req_err(sk, seq, fatal);
408 		return 0;
409 	}
410 
411 	bh_lock_sock(sk);
412 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
413 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
414 
415 	if (sk->sk_state == TCP_CLOSE)
416 		goto out;
417 
418 	if (static_branch_unlikely(&ip6_min_hopcount)) {
419 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
420 		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
421 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
422 			goto out;
423 		}
424 	}
425 
426 	tp = tcp_sk(sk);
427 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
428 	fastopen = rcu_dereference(tp->fastopen_rsk);
429 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
430 	if (sk->sk_state != TCP_LISTEN &&
431 	    !between(seq, snd_una, tp->snd_nxt)) {
432 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
433 		goto out;
434 	}
435 
436 	np = tcp_inet6_sk(sk);
437 
438 	if (type == NDISC_REDIRECT) {
439 		if (!sock_owned_by_user(sk)) {
440 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
441 
442 			if (dst)
443 				dst->ops->redirect(dst, sk, skb);
444 		}
445 		goto out;
446 	}
447 
448 	if (type == ICMPV6_PKT_TOOBIG) {
449 		u32 mtu = ntohl(info);
450 
451 		/* We are not interested in TCP_LISTEN and open_requests
452 		 * (SYN-ACKs send out by Linux are always <576bytes so
453 		 * they should go through unfragmented).
454 		 */
455 		if (sk->sk_state == TCP_LISTEN)
456 			goto out;
457 
458 		if (!ip6_sk_accept_pmtu(sk))
459 			goto out;
460 
461 		if (mtu < IPV6_MIN_MTU)
462 			goto out;
463 
464 		WRITE_ONCE(tp->mtu_info, mtu);
465 
466 		if (!sock_owned_by_user(sk))
467 			tcp_v6_mtu_reduced(sk);
468 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
469 					   &sk->sk_tsq_flags))
470 			sock_hold(sk);
471 		goto out;
472 	}
473 
474 
475 	/* Might be for an request_sock */
476 	switch (sk->sk_state) {
477 	case TCP_SYN_SENT:
478 	case TCP_SYN_RECV:
479 		/* Only in fast or simultaneous open. If a fast open socket is
480 		 * already accepted it is treated as a connected one below.
481 		 */
482 		if (fastopen && !fastopen->sk)
483 			break;
484 
485 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
486 
487 		if (!sock_owned_by_user(sk)) {
488 			sk->sk_err = err;
489 			sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
490 
491 			tcp_done(sk);
492 		} else
493 			sk->sk_err_soft = err;
494 		goto out;
495 	case TCP_LISTEN:
496 		break;
497 	default:
498 		/* check if this ICMP message allows revert of backoff.
499 		 * (see RFC 6069)
500 		 */
501 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
502 		    code == ICMPV6_NOROUTE)
503 			tcp_ld_RTO_revert(sk, seq);
504 	}
505 
506 	if (!sock_owned_by_user(sk) && np->recverr) {
507 		sk->sk_err = err;
508 		sk_error_report(sk);
509 	} else
510 		sk->sk_err_soft = err;
511 
512 out:
513 	bh_unlock_sock(sk);
514 	sock_put(sk);
515 	return 0;
516 }
517 
518 
519 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
520 			      struct flowi *fl,
521 			      struct request_sock *req,
522 			      struct tcp_fastopen_cookie *foc,
523 			      enum tcp_synack_type synack_type,
524 			      struct sk_buff *syn_skb)
525 {
526 	struct inet_request_sock *ireq = inet_rsk(req);
527 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
528 	struct ipv6_txoptions *opt;
529 	struct flowi6 *fl6 = &fl->u.ip6;
530 	struct sk_buff *skb;
531 	int err = -ENOMEM;
532 	u8 tclass;
533 
534 	/* First, grab a route. */
535 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
536 					       IPPROTO_TCP)) == NULL)
537 		goto done;
538 
539 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
540 
541 	if (skb) {
542 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
543 				    &ireq->ir_v6_rmt_addr);
544 
545 		fl6->daddr = ireq->ir_v6_rmt_addr;
546 		if (np->repflow && ireq->pktopts)
547 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
548 
549 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
550 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
551 				(np->tclass & INET_ECN_MASK) :
552 				np->tclass;
553 
554 		if (!INET_ECN_is_capable(tclass) &&
555 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
556 			tclass |= INET_ECN_ECT_0;
557 
558 		rcu_read_lock();
559 		opt = ireq->ipv6_opt;
560 		if (!opt)
561 			opt = rcu_dereference(np->opt);
562 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
563 			       tclass, sk->sk_priority);
564 		rcu_read_unlock();
565 		err = net_xmit_eval(err);
566 	}
567 
568 done:
569 	return err;
570 }
571 
572 
573 static void tcp_v6_reqsk_destructor(struct request_sock *req)
574 {
575 	kfree(inet_rsk(req)->ipv6_opt);
576 	consume_skb(inet_rsk(req)->pktopts);
577 }
578 
579 #ifdef CONFIG_TCP_MD5SIG
580 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
581 						   const struct in6_addr *addr,
582 						   int l3index)
583 {
584 	return tcp_md5_do_lookup(sk, l3index,
585 				 (union tcp_md5_addr *)addr, AF_INET6);
586 }
587 
588 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
589 						const struct sock *addr_sk)
590 {
591 	int l3index;
592 
593 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
594 						 addr_sk->sk_bound_dev_if);
595 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
596 				    l3index);
597 }
598 
599 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
600 				 sockptr_t optval, int optlen)
601 {
602 	struct tcp_md5sig cmd;
603 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
604 	int l3index = 0;
605 	u8 prefixlen;
606 	u8 flags;
607 
608 	if (optlen < sizeof(cmd))
609 		return -EINVAL;
610 
611 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
612 		return -EFAULT;
613 
614 	if (sin6->sin6_family != AF_INET6)
615 		return -EINVAL;
616 
617 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
618 
619 	if (optname == TCP_MD5SIG_EXT &&
620 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
621 		prefixlen = cmd.tcpm_prefixlen;
622 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
623 					prefixlen > 32))
624 			return -EINVAL;
625 	} else {
626 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
627 	}
628 
629 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
630 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
631 		struct net_device *dev;
632 
633 		rcu_read_lock();
634 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
635 		if (dev && netif_is_l3_master(dev))
636 			l3index = dev->ifindex;
637 		rcu_read_unlock();
638 
639 		/* ok to reference set/not set outside of rcu;
640 		 * right now device MUST be an L3 master
641 		 */
642 		if (!dev || !l3index)
643 			return -EINVAL;
644 	}
645 
646 	if (!cmd.tcpm_keylen) {
647 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
648 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
649 					      AF_INET, prefixlen,
650 					      l3index, flags);
651 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
652 				      AF_INET6, prefixlen, l3index, flags);
653 	}
654 
655 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
656 		return -EINVAL;
657 
658 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
659 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
660 				      AF_INET, prefixlen, l3index, flags,
661 				      cmd.tcpm_key, cmd.tcpm_keylen,
662 				      GFP_KERNEL);
663 
664 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
665 			      AF_INET6, prefixlen, l3index, flags,
666 			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
667 }
668 
669 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
670 				   const struct in6_addr *daddr,
671 				   const struct in6_addr *saddr,
672 				   const struct tcphdr *th, int nbytes)
673 {
674 	struct tcp6_pseudohdr *bp;
675 	struct scatterlist sg;
676 	struct tcphdr *_th;
677 
678 	bp = hp->scratch;
679 	/* 1. TCP pseudo-header (RFC2460) */
680 	bp->saddr = *saddr;
681 	bp->daddr = *daddr;
682 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
683 	bp->len = cpu_to_be32(nbytes);
684 
685 	_th = (struct tcphdr *)(bp + 1);
686 	memcpy(_th, th, sizeof(*th));
687 	_th->check = 0;
688 
689 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
690 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
691 				sizeof(*bp) + sizeof(*th));
692 	return crypto_ahash_update(hp->md5_req);
693 }
694 
695 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
696 			       const struct in6_addr *daddr, struct in6_addr *saddr,
697 			       const struct tcphdr *th)
698 {
699 	struct tcp_md5sig_pool *hp;
700 	struct ahash_request *req;
701 
702 	hp = tcp_get_md5sig_pool();
703 	if (!hp)
704 		goto clear_hash_noput;
705 	req = hp->md5_req;
706 
707 	if (crypto_ahash_init(req))
708 		goto clear_hash;
709 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
710 		goto clear_hash;
711 	if (tcp_md5_hash_key(hp, key))
712 		goto clear_hash;
713 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
714 	if (crypto_ahash_final(req))
715 		goto clear_hash;
716 
717 	tcp_put_md5sig_pool();
718 	return 0;
719 
720 clear_hash:
721 	tcp_put_md5sig_pool();
722 clear_hash_noput:
723 	memset(md5_hash, 0, 16);
724 	return 1;
725 }
726 
727 static int tcp_v6_md5_hash_skb(char *md5_hash,
728 			       const struct tcp_md5sig_key *key,
729 			       const struct sock *sk,
730 			       const struct sk_buff *skb)
731 {
732 	const struct in6_addr *saddr, *daddr;
733 	struct tcp_md5sig_pool *hp;
734 	struct ahash_request *req;
735 	const struct tcphdr *th = tcp_hdr(skb);
736 
737 	if (sk) { /* valid for establish/request sockets */
738 		saddr = &sk->sk_v6_rcv_saddr;
739 		daddr = &sk->sk_v6_daddr;
740 	} else {
741 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
742 		saddr = &ip6h->saddr;
743 		daddr = &ip6h->daddr;
744 	}
745 
746 	hp = tcp_get_md5sig_pool();
747 	if (!hp)
748 		goto clear_hash_noput;
749 	req = hp->md5_req;
750 
751 	if (crypto_ahash_init(req))
752 		goto clear_hash;
753 
754 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
755 		goto clear_hash;
756 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
757 		goto clear_hash;
758 	if (tcp_md5_hash_key(hp, key))
759 		goto clear_hash;
760 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
761 	if (crypto_ahash_final(req))
762 		goto clear_hash;
763 
764 	tcp_put_md5sig_pool();
765 	return 0;
766 
767 clear_hash:
768 	tcp_put_md5sig_pool();
769 clear_hash_noput:
770 	memset(md5_hash, 0, 16);
771 	return 1;
772 }
773 
774 #endif
775 
776 static void tcp_v6_init_req(struct request_sock *req,
777 			    const struct sock *sk_listener,
778 			    struct sk_buff *skb)
779 {
780 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
781 	struct inet_request_sock *ireq = inet_rsk(req);
782 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
783 
784 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
785 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
786 
787 	/* So that link locals have meaning */
788 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
789 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
790 		ireq->ir_iif = tcp_v6_iif(skb);
791 
792 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
793 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
794 	     np->rxopt.bits.rxinfo ||
795 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
796 	     np->rxopt.bits.rxohlim || np->repflow)) {
797 		refcount_inc(&skb->users);
798 		ireq->pktopts = skb;
799 	}
800 }
801 
802 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
803 					  struct sk_buff *skb,
804 					  struct flowi *fl,
805 					  struct request_sock *req)
806 {
807 	tcp_v6_init_req(req, sk, skb);
808 
809 	if (security_inet_conn_request(sk, skb, req))
810 		return NULL;
811 
812 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
813 }
814 
815 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
816 	.family		=	AF_INET6,
817 	.obj_size	=	sizeof(struct tcp6_request_sock),
818 	.rtx_syn_ack	=	tcp_rtx_synack,
819 	.send_ack	=	tcp_v6_reqsk_send_ack,
820 	.destructor	=	tcp_v6_reqsk_destructor,
821 	.send_reset	=	tcp_v6_send_reset,
822 	.syn_ack_timeout =	tcp_syn_ack_timeout,
823 };
824 
825 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
826 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
827 				sizeof(struct ipv6hdr),
828 #ifdef CONFIG_TCP_MD5SIG
829 	.req_md5_lookup	=	tcp_v6_md5_lookup,
830 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
831 #endif
832 #ifdef CONFIG_SYN_COOKIES
833 	.cookie_init_seq =	cookie_v6_init_sequence,
834 #endif
835 	.route_req	=	tcp_v6_route_req,
836 	.init_seq	=	tcp_v6_init_seq,
837 	.init_ts_off	=	tcp_v6_init_ts_off,
838 	.send_synack	=	tcp_v6_send_synack,
839 };
840 
841 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
842 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
843 				 int oif, struct tcp_md5sig_key *key, int rst,
844 				 u8 tclass, __be32 label, u32 priority)
845 {
846 	const struct tcphdr *th = tcp_hdr(skb);
847 	struct tcphdr *t1;
848 	struct sk_buff *buff;
849 	struct flowi6 fl6;
850 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
851 	struct sock *ctl_sk = net->ipv6.tcp_sk;
852 	unsigned int tot_len = sizeof(struct tcphdr);
853 	__be32 mrst = 0, *topt;
854 	struct dst_entry *dst;
855 	__u32 mark = 0;
856 
857 	if (tsecr)
858 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
859 #ifdef CONFIG_TCP_MD5SIG
860 	if (key)
861 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
862 #endif
863 
864 #ifdef CONFIG_MPTCP
865 	if (rst && !key) {
866 		mrst = mptcp_reset_option(skb);
867 
868 		if (mrst)
869 			tot_len += sizeof(__be32);
870 	}
871 #endif
872 
873 	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
874 	if (!buff)
875 		return;
876 
877 	skb_reserve(buff, MAX_TCP_HEADER);
878 
879 	t1 = skb_push(buff, tot_len);
880 	skb_reset_transport_header(buff);
881 
882 	/* Swap the send and the receive. */
883 	memset(t1, 0, sizeof(*t1));
884 	t1->dest = th->source;
885 	t1->source = th->dest;
886 	t1->doff = tot_len / 4;
887 	t1->seq = htonl(seq);
888 	t1->ack_seq = htonl(ack);
889 	t1->ack = !rst || !th->ack;
890 	t1->rst = rst;
891 	t1->window = htons(win);
892 
893 	topt = (__be32 *)(t1 + 1);
894 
895 	if (tsecr) {
896 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
897 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
898 		*topt++ = htonl(tsval);
899 		*topt++ = htonl(tsecr);
900 	}
901 
902 	if (mrst)
903 		*topt++ = mrst;
904 
905 #ifdef CONFIG_TCP_MD5SIG
906 	if (key) {
907 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
908 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
909 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
910 				    &ipv6_hdr(skb)->saddr,
911 				    &ipv6_hdr(skb)->daddr, t1);
912 	}
913 #endif
914 
915 	memset(&fl6, 0, sizeof(fl6));
916 	fl6.daddr = ipv6_hdr(skb)->saddr;
917 	fl6.saddr = ipv6_hdr(skb)->daddr;
918 	fl6.flowlabel = label;
919 
920 	buff->ip_summed = CHECKSUM_PARTIAL;
921 
922 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
923 
924 	fl6.flowi6_proto = IPPROTO_TCP;
925 	if (rt6_need_strict(&fl6.daddr) && !oif)
926 		fl6.flowi6_oif = tcp_v6_iif(skb);
927 	else {
928 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
929 			oif = skb->skb_iif;
930 
931 		fl6.flowi6_oif = oif;
932 	}
933 
934 	if (sk) {
935 		if (sk->sk_state == TCP_TIME_WAIT) {
936 			mark = inet_twsk(sk)->tw_mark;
937 			/* autoflowlabel relies on buff->hash */
938 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
939 				     PKT_HASH_TYPE_L4);
940 		} else {
941 			mark = sk->sk_mark;
942 		}
943 		skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
944 	}
945 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
946 	fl6.fl6_dport = t1->dest;
947 	fl6.fl6_sport = t1->source;
948 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
949 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
950 
951 	/* Pass a socket to ip6_dst_lookup either it is for RST
952 	 * Underlying function will use this to retrieve the network
953 	 * namespace
954 	 */
955 	if (sk && sk->sk_state != TCP_TIME_WAIT)
956 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
957 	else
958 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
959 	if (!IS_ERR(dst)) {
960 		skb_dst_set(buff, dst);
961 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
962 			 tclass & ~INET_ECN_MASK, priority);
963 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
964 		if (rst)
965 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
966 		return;
967 	}
968 
969 	kfree_skb(buff);
970 }
971 
972 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
973 {
974 	const struct tcphdr *th = tcp_hdr(skb);
975 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
976 	u32 seq = 0, ack_seq = 0;
977 	struct tcp_md5sig_key *key = NULL;
978 #ifdef CONFIG_TCP_MD5SIG
979 	const __u8 *hash_location = NULL;
980 	unsigned char newhash[16];
981 	int genhash;
982 	struct sock *sk1 = NULL;
983 #endif
984 	__be32 label = 0;
985 	u32 priority = 0;
986 	struct net *net;
987 	int oif = 0;
988 
989 	if (th->rst)
990 		return;
991 
992 	/* If sk not NULL, it means we did a successful lookup and incoming
993 	 * route had to be correct. prequeue might have dropped our dst.
994 	 */
995 	if (!sk && !ipv6_unicast_destination(skb))
996 		return;
997 
998 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
999 #ifdef CONFIG_TCP_MD5SIG
1000 	rcu_read_lock();
1001 	hash_location = tcp_parse_md5sig_option(th);
1002 	if (sk && sk_fullsock(sk)) {
1003 		int l3index;
1004 
1005 		/* sdif set, means packet ingressed via a device
1006 		 * in an L3 domain and inet_iif is set to it.
1007 		 */
1008 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1009 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1010 	} else if (hash_location) {
1011 		int dif = tcp_v6_iif_l3_slave(skb);
1012 		int sdif = tcp_v6_sdif(skb);
1013 		int l3index;
1014 
1015 		/*
1016 		 * active side is lost. Try to find listening socket through
1017 		 * source port, and then find md5 key through listening socket.
1018 		 * we are not loose security here:
1019 		 * Incoming packet is checked with md5 hash with finding key,
1020 		 * no RST generated if md5 hash doesn't match.
1021 		 */
1022 		sk1 = inet6_lookup_listener(net,
1023 					   &tcp_hashinfo, NULL, 0,
1024 					   &ipv6h->saddr,
1025 					   th->source, &ipv6h->daddr,
1026 					   ntohs(th->source), dif, sdif);
1027 		if (!sk1)
1028 			goto out;
1029 
1030 		/* sdif set, means packet ingressed via a device
1031 		 * in an L3 domain and dif is set to it.
1032 		 */
1033 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1034 
1035 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1036 		if (!key)
1037 			goto out;
1038 
1039 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1040 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1041 			goto out;
1042 	}
1043 #endif
1044 
1045 	if (th->ack)
1046 		seq = ntohl(th->ack_seq);
1047 	else
1048 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1049 			  (th->doff << 2);
1050 
1051 	if (sk) {
1052 		oif = sk->sk_bound_dev_if;
1053 		if (sk_fullsock(sk)) {
1054 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1055 
1056 			trace_tcp_send_reset(sk, skb);
1057 			if (np->repflow)
1058 				label = ip6_flowlabel(ipv6h);
1059 			priority = sk->sk_priority;
1060 		}
1061 		if (sk->sk_state == TCP_TIME_WAIT) {
1062 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1063 			priority = inet_twsk(sk)->tw_priority;
1064 		}
1065 	} else {
1066 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1067 			label = ip6_flowlabel(ipv6h);
1068 	}
1069 
1070 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1071 			     ipv6_get_dsfield(ipv6h), label, priority);
1072 
1073 #ifdef CONFIG_TCP_MD5SIG
1074 out:
1075 	rcu_read_unlock();
1076 #endif
1077 }
1078 
1079 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1080 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1081 			    struct tcp_md5sig_key *key, u8 tclass,
1082 			    __be32 label, u32 priority)
1083 {
1084 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1085 			     tclass, label, priority);
1086 }
1087 
1088 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1089 {
1090 	struct inet_timewait_sock *tw = inet_twsk(sk);
1091 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1092 
1093 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1094 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1095 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1096 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1097 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1098 
1099 	inet_twsk_put(tw);
1100 }
1101 
1102 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1103 				  struct request_sock *req)
1104 {
1105 	int l3index;
1106 
1107 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1108 
1109 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1110 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1111 	 */
1112 	/* RFC 7323 2.3
1113 	 * The window field (SEG.WND) of every outgoing segment, with the
1114 	 * exception of <SYN> segments, MUST be right-shifted by
1115 	 * Rcv.Wind.Shift bits:
1116 	 */
1117 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1118 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1119 			tcp_rsk(req)->rcv_nxt,
1120 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1121 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1122 			req->ts_recent, sk->sk_bound_dev_if,
1123 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1124 			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1125 }
1126 
1127 
1128 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1129 {
1130 #ifdef CONFIG_SYN_COOKIES
1131 	const struct tcphdr *th = tcp_hdr(skb);
1132 
1133 	if (!th->syn)
1134 		sk = cookie_v6_check(sk, skb);
1135 #endif
1136 	return sk;
1137 }
1138 
1139 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1140 			 struct tcphdr *th, u32 *cookie)
1141 {
1142 	u16 mss = 0;
1143 #ifdef CONFIG_SYN_COOKIES
1144 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1145 				    &tcp_request_sock_ipv6_ops, sk, th);
1146 	if (mss) {
1147 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1148 		tcp_synq_overflow(sk);
1149 	}
1150 #endif
1151 	return mss;
1152 }
1153 
1154 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1155 {
1156 	if (skb->protocol == htons(ETH_P_IP))
1157 		return tcp_v4_conn_request(sk, skb);
1158 
1159 	if (!ipv6_unicast_destination(skb))
1160 		goto drop;
1161 
1162 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1163 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1164 		return 0;
1165 	}
1166 
1167 	return tcp_conn_request(&tcp6_request_sock_ops,
1168 				&tcp_request_sock_ipv6_ops, sk, skb);
1169 
1170 drop:
1171 	tcp_listendrop(sk);
1172 	return 0; /* don't send reset */
1173 }
1174 
1175 static void tcp_v6_restore_cb(struct sk_buff *skb)
1176 {
1177 	/* We need to move header back to the beginning if xfrm6_policy_check()
1178 	 * and tcp_v6_fill_cb() are going to be called again.
1179 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1180 	 */
1181 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1182 		sizeof(struct inet6_skb_parm));
1183 }
1184 
1185 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1186 					 struct request_sock *req,
1187 					 struct dst_entry *dst,
1188 					 struct request_sock *req_unhash,
1189 					 bool *own_req)
1190 {
1191 	struct inet_request_sock *ireq;
1192 	struct ipv6_pinfo *newnp;
1193 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1194 	struct ipv6_txoptions *opt;
1195 	struct inet_sock *newinet;
1196 	bool found_dup_sk = false;
1197 	struct tcp_sock *newtp;
1198 	struct sock *newsk;
1199 #ifdef CONFIG_TCP_MD5SIG
1200 	struct tcp_md5sig_key *key;
1201 	int l3index;
1202 #endif
1203 	struct flowi6 fl6;
1204 
1205 	if (skb->protocol == htons(ETH_P_IP)) {
1206 		/*
1207 		 *	v6 mapped
1208 		 */
1209 
1210 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1211 					     req_unhash, own_req);
1212 
1213 		if (!newsk)
1214 			return NULL;
1215 
1216 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1217 
1218 		newnp = tcp_inet6_sk(newsk);
1219 		newtp = tcp_sk(newsk);
1220 
1221 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1222 
1223 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1224 
1225 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1226 		if (sk_is_mptcp(newsk))
1227 			mptcpv6_handle_mapped(newsk, true);
1228 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1229 #ifdef CONFIG_TCP_MD5SIG
1230 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1231 #endif
1232 
1233 		newnp->ipv6_mc_list = NULL;
1234 		newnp->ipv6_ac_list = NULL;
1235 		newnp->ipv6_fl_list = NULL;
1236 		newnp->pktoptions  = NULL;
1237 		newnp->opt	   = NULL;
1238 		newnp->mcast_oif   = inet_iif(skb);
1239 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1240 		newnp->rcv_flowinfo = 0;
1241 		if (np->repflow)
1242 			newnp->flow_label = 0;
1243 
1244 		/*
1245 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1246 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1247 		 * that function for the gory details. -acme
1248 		 */
1249 
1250 		/* It is tricky place. Until this moment IPv4 tcp
1251 		   worked with IPv6 icsk.icsk_af_ops.
1252 		   Sync it now.
1253 		 */
1254 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1255 
1256 		return newsk;
1257 	}
1258 
1259 	ireq = inet_rsk(req);
1260 
1261 	if (sk_acceptq_is_full(sk))
1262 		goto out_overflow;
1263 
1264 	if (!dst) {
1265 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1266 		if (!dst)
1267 			goto out;
1268 	}
1269 
1270 	newsk = tcp_create_openreq_child(sk, req, skb);
1271 	if (!newsk)
1272 		goto out_nonewsk;
1273 
1274 	/*
1275 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1276 	 * count here, tcp_create_openreq_child now does this for us, see the
1277 	 * comment in that function for the gory details. -acme
1278 	 */
1279 
1280 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1281 	ip6_dst_store(newsk, dst, NULL, NULL);
1282 	inet6_sk_rx_dst_set(newsk, skb);
1283 
1284 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1285 
1286 	newtp = tcp_sk(newsk);
1287 	newinet = inet_sk(newsk);
1288 	newnp = tcp_inet6_sk(newsk);
1289 
1290 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1291 
1292 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1293 	newnp->saddr = ireq->ir_v6_loc_addr;
1294 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1295 	newsk->sk_bound_dev_if = ireq->ir_iif;
1296 
1297 	/* Now IPv6 options...
1298 
1299 	   First: no IPv4 options.
1300 	 */
1301 	newinet->inet_opt = NULL;
1302 	newnp->ipv6_mc_list = NULL;
1303 	newnp->ipv6_ac_list = NULL;
1304 	newnp->ipv6_fl_list = NULL;
1305 
1306 	/* Clone RX bits */
1307 	newnp->rxopt.all = np->rxopt.all;
1308 
1309 	newnp->pktoptions = NULL;
1310 	newnp->opt	  = NULL;
1311 	newnp->mcast_oif  = tcp_v6_iif(skb);
1312 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1313 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1314 	if (np->repflow)
1315 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1316 
1317 	/* Set ToS of the new socket based upon the value of incoming SYN.
1318 	 * ECT bits are set later in tcp_init_transfer().
1319 	 */
1320 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1321 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1322 
1323 	/* Clone native IPv6 options from listening socket (if any)
1324 
1325 	   Yes, keeping reference count would be much more clever,
1326 	   but we make one more one thing there: reattach optmem
1327 	   to newsk.
1328 	 */
1329 	opt = ireq->ipv6_opt;
1330 	if (!opt)
1331 		opt = rcu_dereference(np->opt);
1332 	if (opt) {
1333 		opt = ipv6_dup_options(newsk, opt);
1334 		RCU_INIT_POINTER(newnp->opt, opt);
1335 	}
1336 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1337 	if (opt)
1338 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1339 						    opt->opt_flen;
1340 
1341 	tcp_ca_openreq_child(newsk, dst);
1342 
1343 	tcp_sync_mss(newsk, dst_mtu(dst));
1344 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1345 
1346 	tcp_initialize_rcv_mss(newsk);
1347 
1348 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1349 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1350 
1351 #ifdef CONFIG_TCP_MD5SIG
1352 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1353 
1354 	/* Copy over the MD5 key from the original socket */
1355 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1356 	if (key) {
1357 		/* We're using one, so create a matching key
1358 		 * on the newsk structure. If we fail to get
1359 		 * memory, then we end up not copying the key
1360 		 * across. Shucks.
1361 		 */
1362 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1363 			       AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1364 			       sk_gfp_mask(sk, GFP_ATOMIC));
1365 	}
1366 #endif
1367 
1368 	if (__inet_inherit_port(sk, newsk) < 0) {
1369 		inet_csk_prepare_forced_close(newsk);
1370 		tcp_done(newsk);
1371 		goto out;
1372 	}
1373 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1374 				       &found_dup_sk);
1375 	if (*own_req) {
1376 		tcp_move_syn(newtp, req);
1377 
1378 		/* Clone pktoptions received with SYN, if we own the req */
1379 		if (ireq->pktopts) {
1380 			newnp->pktoptions = skb_clone(ireq->pktopts,
1381 						      sk_gfp_mask(sk, GFP_ATOMIC));
1382 			consume_skb(ireq->pktopts);
1383 			ireq->pktopts = NULL;
1384 			if (newnp->pktoptions) {
1385 				tcp_v6_restore_cb(newnp->pktoptions);
1386 				skb_set_owner_r(newnp->pktoptions, newsk);
1387 			}
1388 		}
1389 	} else {
1390 		if (!req_unhash && found_dup_sk) {
1391 			/* This code path should only be executed in the
1392 			 * syncookie case only
1393 			 */
1394 			bh_unlock_sock(newsk);
1395 			sock_put(newsk);
1396 			newsk = NULL;
1397 		}
1398 	}
1399 
1400 	return newsk;
1401 
1402 out_overflow:
1403 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1404 out_nonewsk:
1405 	dst_release(dst);
1406 out:
1407 	tcp_listendrop(sk);
1408 	return NULL;
1409 }
1410 
1411 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1412 							   u32));
1413 /* The socket must have it's spinlock held when we get
1414  * here, unless it is a TCP_LISTEN socket.
1415  *
1416  * We have a potential double-lock case here, so even when
1417  * doing backlog processing we use the BH locking scheme.
1418  * This is because we cannot sleep with the original spinlock
1419  * held.
1420  */
1421 INDIRECT_CALLABLE_SCOPE
1422 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1423 {
1424 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1425 	struct sk_buff *opt_skb = NULL;
1426 	enum skb_drop_reason reason;
1427 	struct tcp_sock *tp;
1428 
1429 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1430 	   goes to IPv4 receive handler and backlogged.
1431 	   From backlog it always goes here. Kerboom...
1432 	   Fortunately, tcp_rcv_established and rcv_established
1433 	   handle them correctly, but it is not case with
1434 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1435 	 */
1436 
1437 	if (skb->protocol == htons(ETH_P_IP))
1438 		return tcp_v4_do_rcv(sk, skb);
1439 
1440 	/*
1441 	 *	socket locking is here for SMP purposes as backlog rcv
1442 	 *	is currently called with bh processing disabled.
1443 	 */
1444 
1445 	/* Do Stevens' IPV6_PKTOPTIONS.
1446 
1447 	   Yes, guys, it is the only place in our code, where we
1448 	   may make it not affecting IPv4.
1449 	   The rest of code is protocol independent,
1450 	   and I do not like idea to uglify IPv4.
1451 
1452 	   Actually, all the idea behind IPV6_PKTOPTIONS
1453 	   looks not very well thought. For now we latch
1454 	   options, received in the last packet, enqueued
1455 	   by tcp. Feel free to propose better solution.
1456 					       --ANK (980728)
1457 	 */
1458 	if (np->rxopt.all)
1459 		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1460 
1461 	reason = SKB_DROP_REASON_NOT_SPECIFIED;
1462 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1463 		struct dst_entry *dst;
1464 
1465 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1466 						lockdep_sock_is_held(sk));
1467 
1468 		sock_rps_save_rxhash(sk, skb);
1469 		sk_mark_napi_id(sk, skb);
1470 		if (dst) {
1471 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1472 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1473 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1474 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1475 				dst_release(dst);
1476 			}
1477 		}
1478 
1479 		tcp_rcv_established(sk, skb);
1480 		if (opt_skb)
1481 			goto ipv6_pktoptions;
1482 		return 0;
1483 	}
1484 
1485 	if (tcp_checksum_complete(skb))
1486 		goto csum_err;
1487 
1488 	if (sk->sk_state == TCP_LISTEN) {
1489 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1490 
1491 		if (!nsk)
1492 			goto discard;
1493 
1494 		if (nsk != sk) {
1495 			if (tcp_child_process(sk, nsk, skb))
1496 				goto reset;
1497 			if (opt_skb)
1498 				__kfree_skb(opt_skb);
1499 			return 0;
1500 		}
1501 	} else
1502 		sock_rps_save_rxhash(sk, skb);
1503 
1504 	if (tcp_rcv_state_process(sk, skb))
1505 		goto reset;
1506 	if (opt_skb)
1507 		goto ipv6_pktoptions;
1508 	return 0;
1509 
1510 reset:
1511 	tcp_v6_send_reset(sk, skb);
1512 discard:
1513 	if (opt_skb)
1514 		__kfree_skb(opt_skb);
1515 	kfree_skb_reason(skb, reason);
1516 	return 0;
1517 csum_err:
1518 	reason = SKB_DROP_REASON_TCP_CSUM;
1519 	trace_tcp_bad_csum(skb);
1520 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1521 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1522 	goto discard;
1523 
1524 
1525 ipv6_pktoptions:
1526 	/* Do you ask, what is it?
1527 
1528 	   1. skb was enqueued by tcp.
1529 	   2. skb is added to tail of read queue, rather than out of order.
1530 	   3. socket is not in passive state.
1531 	   4. Finally, it really contains options, which user wants to receive.
1532 	 */
1533 	tp = tcp_sk(sk);
1534 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1535 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1536 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1537 			np->mcast_oif = tcp_v6_iif(opt_skb);
1538 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1539 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1540 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1541 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1542 		if (np->repflow)
1543 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1544 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1545 			skb_set_owner_r(opt_skb, sk);
1546 			tcp_v6_restore_cb(opt_skb);
1547 			opt_skb = xchg(&np->pktoptions, opt_skb);
1548 		} else {
1549 			__kfree_skb(opt_skb);
1550 			opt_skb = xchg(&np->pktoptions, NULL);
1551 		}
1552 	}
1553 
1554 	consume_skb(opt_skb);
1555 	return 0;
1556 }
1557 
1558 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1559 			   const struct tcphdr *th)
1560 {
1561 	/* This is tricky: we move IP6CB at its correct location into
1562 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1563 	 * _decode_session6() uses IP6CB().
1564 	 * barrier() makes sure compiler won't play aliasing games.
1565 	 */
1566 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1567 		sizeof(struct inet6_skb_parm));
1568 	barrier();
1569 
1570 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1571 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1572 				    skb->len - th->doff*4);
1573 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1574 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1575 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1576 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1577 	TCP_SKB_CB(skb)->sacked = 0;
1578 	TCP_SKB_CB(skb)->has_rxtstamp =
1579 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1580 }
1581 
1582 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1583 {
1584 	enum skb_drop_reason drop_reason;
1585 	int sdif = inet6_sdif(skb);
1586 	int dif = inet6_iif(skb);
1587 	const struct tcphdr *th;
1588 	const struct ipv6hdr *hdr;
1589 	bool refcounted;
1590 	struct sock *sk;
1591 	int ret;
1592 	struct net *net = dev_net(skb->dev);
1593 
1594 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1595 	if (skb->pkt_type != PACKET_HOST)
1596 		goto discard_it;
1597 
1598 	/*
1599 	 *	Count it even if it's bad.
1600 	 */
1601 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1602 
1603 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1604 		goto discard_it;
1605 
1606 	th = (const struct tcphdr *)skb->data;
1607 
1608 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1609 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1610 		goto bad_packet;
1611 	}
1612 	if (!pskb_may_pull(skb, th->doff*4))
1613 		goto discard_it;
1614 
1615 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1616 		goto csum_error;
1617 
1618 	th = (const struct tcphdr *)skb->data;
1619 	hdr = ipv6_hdr(skb);
1620 
1621 lookup:
1622 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1623 				th->source, th->dest, inet6_iif(skb), sdif,
1624 				&refcounted);
1625 	if (!sk)
1626 		goto no_tcp_socket;
1627 
1628 process:
1629 	if (sk->sk_state == TCP_TIME_WAIT)
1630 		goto do_time_wait;
1631 
1632 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1633 		struct request_sock *req = inet_reqsk(sk);
1634 		bool req_stolen = false;
1635 		struct sock *nsk;
1636 
1637 		sk = req->rsk_listener;
1638 		drop_reason = tcp_inbound_md5_hash(sk, skb,
1639 						   &hdr->saddr, &hdr->daddr,
1640 						   AF_INET6, dif, sdif);
1641 		if (drop_reason) {
1642 			sk_drops_add(sk, skb);
1643 			reqsk_put(req);
1644 			goto discard_it;
1645 		}
1646 		if (tcp_checksum_complete(skb)) {
1647 			reqsk_put(req);
1648 			goto csum_error;
1649 		}
1650 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1651 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1652 			if (!nsk) {
1653 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1654 				goto lookup;
1655 			}
1656 			sk = nsk;
1657 			/* reuseport_migrate_sock() has already held one sk_refcnt
1658 			 * before returning.
1659 			 */
1660 		} else {
1661 			sock_hold(sk);
1662 		}
1663 		refcounted = true;
1664 		nsk = NULL;
1665 		if (!tcp_filter(sk, skb)) {
1666 			th = (const struct tcphdr *)skb->data;
1667 			hdr = ipv6_hdr(skb);
1668 			tcp_v6_fill_cb(skb, hdr, th);
1669 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1670 		} else {
1671 			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1672 		}
1673 		if (!nsk) {
1674 			reqsk_put(req);
1675 			if (req_stolen) {
1676 				/* Another cpu got exclusive access to req
1677 				 * and created a full blown socket.
1678 				 * Try to feed this packet to this socket
1679 				 * instead of discarding it.
1680 				 */
1681 				tcp_v6_restore_cb(skb);
1682 				sock_put(sk);
1683 				goto lookup;
1684 			}
1685 			goto discard_and_relse;
1686 		}
1687 		if (nsk == sk) {
1688 			reqsk_put(req);
1689 			tcp_v6_restore_cb(skb);
1690 		} else if (tcp_child_process(sk, nsk, skb)) {
1691 			tcp_v6_send_reset(nsk, skb);
1692 			goto discard_and_relse;
1693 		} else {
1694 			sock_put(sk);
1695 			return 0;
1696 		}
1697 	}
1698 
1699 	if (static_branch_unlikely(&ip6_min_hopcount)) {
1700 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1701 		if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1702 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1703 			goto discard_and_relse;
1704 		}
1705 	}
1706 
1707 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1708 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1709 		goto discard_and_relse;
1710 	}
1711 
1712 	drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1713 					   AF_INET6, dif, sdif);
1714 	if (drop_reason)
1715 		goto discard_and_relse;
1716 
1717 	if (tcp_filter(sk, skb)) {
1718 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1719 		goto discard_and_relse;
1720 	}
1721 	th = (const struct tcphdr *)skb->data;
1722 	hdr = ipv6_hdr(skb);
1723 	tcp_v6_fill_cb(skb, hdr, th);
1724 
1725 	skb->dev = NULL;
1726 
1727 	if (sk->sk_state == TCP_LISTEN) {
1728 		ret = tcp_v6_do_rcv(sk, skb);
1729 		goto put_and_return;
1730 	}
1731 
1732 	sk_incoming_cpu_update(sk);
1733 
1734 	bh_lock_sock_nested(sk);
1735 	tcp_segs_in(tcp_sk(sk), skb);
1736 	ret = 0;
1737 	if (!sock_owned_by_user(sk)) {
1738 		ret = tcp_v6_do_rcv(sk, skb);
1739 	} else {
1740 		if (tcp_add_backlog(sk, skb, &drop_reason))
1741 			goto discard_and_relse;
1742 	}
1743 	bh_unlock_sock(sk);
1744 put_and_return:
1745 	if (refcounted)
1746 		sock_put(sk);
1747 	return ret ? -1 : 0;
1748 
1749 no_tcp_socket:
1750 	drop_reason = SKB_DROP_REASON_NO_SOCKET;
1751 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1752 		goto discard_it;
1753 
1754 	tcp_v6_fill_cb(skb, hdr, th);
1755 
1756 	if (tcp_checksum_complete(skb)) {
1757 csum_error:
1758 		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1759 		trace_tcp_bad_csum(skb);
1760 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1761 bad_packet:
1762 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1763 	} else {
1764 		tcp_v6_send_reset(NULL, skb);
1765 	}
1766 
1767 discard_it:
1768 	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1769 	kfree_skb_reason(skb, drop_reason);
1770 	return 0;
1771 
1772 discard_and_relse:
1773 	sk_drops_add(sk, skb);
1774 	if (refcounted)
1775 		sock_put(sk);
1776 	goto discard_it;
1777 
1778 do_time_wait:
1779 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1780 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1781 		inet_twsk_put(inet_twsk(sk));
1782 		goto discard_it;
1783 	}
1784 
1785 	tcp_v6_fill_cb(skb, hdr, th);
1786 
1787 	if (tcp_checksum_complete(skb)) {
1788 		inet_twsk_put(inet_twsk(sk));
1789 		goto csum_error;
1790 	}
1791 
1792 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1793 	case TCP_TW_SYN:
1794 	{
1795 		struct sock *sk2;
1796 
1797 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1798 					    skb, __tcp_hdrlen(th),
1799 					    &ipv6_hdr(skb)->saddr, th->source,
1800 					    &ipv6_hdr(skb)->daddr,
1801 					    ntohs(th->dest),
1802 					    tcp_v6_iif_l3_slave(skb),
1803 					    sdif);
1804 		if (sk2) {
1805 			struct inet_timewait_sock *tw = inet_twsk(sk);
1806 			inet_twsk_deschedule_put(tw);
1807 			sk = sk2;
1808 			tcp_v6_restore_cb(skb);
1809 			refcounted = false;
1810 			goto process;
1811 		}
1812 	}
1813 		/* to ACK */
1814 		fallthrough;
1815 	case TCP_TW_ACK:
1816 		tcp_v6_timewait_ack(sk, skb);
1817 		break;
1818 	case TCP_TW_RST:
1819 		tcp_v6_send_reset(sk, skb);
1820 		inet_twsk_deschedule_put(inet_twsk(sk));
1821 		goto discard_it;
1822 	case TCP_TW_SUCCESS:
1823 		;
1824 	}
1825 	goto discard_it;
1826 }
1827 
1828 void tcp_v6_early_demux(struct sk_buff *skb)
1829 {
1830 	const struct ipv6hdr *hdr;
1831 	const struct tcphdr *th;
1832 	struct sock *sk;
1833 
1834 	if (skb->pkt_type != PACKET_HOST)
1835 		return;
1836 
1837 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1838 		return;
1839 
1840 	hdr = ipv6_hdr(skb);
1841 	th = tcp_hdr(skb);
1842 
1843 	if (th->doff < sizeof(struct tcphdr) / 4)
1844 		return;
1845 
1846 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1847 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1848 					&hdr->saddr, th->source,
1849 					&hdr->daddr, ntohs(th->dest),
1850 					inet6_iif(skb), inet6_sdif(skb));
1851 	if (sk) {
1852 		skb->sk = sk;
1853 		skb->destructor = sock_edemux;
1854 		if (sk_fullsock(sk)) {
1855 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1856 
1857 			if (dst)
1858 				dst = dst_check(dst, sk->sk_rx_dst_cookie);
1859 			if (dst &&
1860 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
1861 				skb_dst_set_noref(skb, dst);
1862 		}
1863 	}
1864 }
1865 
1866 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1867 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1868 	.twsk_unique	= tcp_twsk_unique,
1869 	.twsk_destructor = tcp_twsk_destructor,
1870 };
1871 
1872 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1873 {
1874 	__tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1875 }
1876 
1877 const struct inet_connection_sock_af_ops ipv6_specific = {
1878 	.queue_xmit	   = inet6_csk_xmit,
1879 	.send_check	   = tcp_v6_send_check,
1880 	.rebuild_header	   = inet6_sk_rebuild_header,
1881 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1882 	.conn_request	   = tcp_v6_conn_request,
1883 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1884 	.net_header_len	   = sizeof(struct ipv6hdr),
1885 	.net_frag_header_len = sizeof(struct frag_hdr),
1886 	.setsockopt	   = ipv6_setsockopt,
1887 	.getsockopt	   = ipv6_getsockopt,
1888 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1889 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1890 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1891 };
1892 
1893 #ifdef CONFIG_TCP_MD5SIG
1894 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1895 	.md5_lookup	=	tcp_v6_md5_lookup,
1896 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1897 	.md5_parse	=	tcp_v6_parse_md5_keys,
1898 };
1899 #endif
1900 
1901 /*
1902  *	TCP over IPv4 via INET6 API
1903  */
1904 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1905 	.queue_xmit	   = ip_queue_xmit,
1906 	.send_check	   = tcp_v4_send_check,
1907 	.rebuild_header	   = inet_sk_rebuild_header,
1908 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1909 	.conn_request	   = tcp_v6_conn_request,
1910 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1911 	.net_header_len	   = sizeof(struct iphdr),
1912 	.setsockopt	   = ipv6_setsockopt,
1913 	.getsockopt	   = ipv6_getsockopt,
1914 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1915 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1916 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1917 };
1918 
1919 #ifdef CONFIG_TCP_MD5SIG
1920 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1921 	.md5_lookup	=	tcp_v4_md5_lookup,
1922 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1923 	.md5_parse	=	tcp_v6_parse_md5_keys,
1924 };
1925 #endif
1926 
1927 /* NOTE: A lot of things set to zero explicitly by call to
1928  *       sk_alloc() so need not be done here.
1929  */
1930 static int tcp_v6_init_sock(struct sock *sk)
1931 {
1932 	struct inet_connection_sock *icsk = inet_csk(sk);
1933 
1934 	tcp_init_sock(sk);
1935 
1936 	icsk->icsk_af_ops = &ipv6_specific;
1937 
1938 #ifdef CONFIG_TCP_MD5SIG
1939 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1940 #endif
1941 
1942 	return 0;
1943 }
1944 
1945 static void tcp_v6_destroy_sock(struct sock *sk)
1946 {
1947 	tcp_v4_destroy_sock(sk);
1948 	inet6_destroy_sock(sk);
1949 }
1950 
1951 #ifdef CONFIG_PROC_FS
1952 /* Proc filesystem TCPv6 sock list dumping. */
1953 static void get_openreq6(struct seq_file *seq,
1954 			 const struct request_sock *req, int i)
1955 {
1956 	long ttd = req->rsk_timer.expires - jiffies;
1957 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1958 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1959 
1960 	if (ttd < 0)
1961 		ttd = 0;
1962 
1963 	seq_printf(seq,
1964 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1965 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1966 		   i,
1967 		   src->s6_addr32[0], src->s6_addr32[1],
1968 		   src->s6_addr32[2], src->s6_addr32[3],
1969 		   inet_rsk(req)->ir_num,
1970 		   dest->s6_addr32[0], dest->s6_addr32[1],
1971 		   dest->s6_addr32[2], dest->s6_addr32[3],
1972 		   ntohs(inet_rsk(req)->ir_rmt_port),
1973 		   TCP_SYN_RECV,
1974 		   0, 0, /* could print option size, but that is af dependent. */
1975 		   1,   /* timers active (only the expire timer) */
1976 		   jiffies_to_clock_t(ttd),
1977 		   req->num_timeout,
1978 		   from_kuid_munged(seq_user_ns(seq),
1979 				    sock_i_uid(req->rsk_listener)),
1980 		   0,  /* non standard timer */
1981 		   0, /* open_requests have no inode */
1982 		   0, req);
1983 }
1984 
1985 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1986 {
1987 	const struct in6_addr *dest, *src;
1988 	__u16 destp, srcp;
1989 	int timer_active;
1990 	unsigned long timer_expires;
1991 	const struct inet_sock *inet = inet_sk(sp);
1992 	const struct tcp_sock *tp = tcp_sk(sp);
1993 	const struct inet_connection_sock *icsk = inet_csk(sp);
1994 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1995 	int rx_queue;
1996 	int state;
1997 
1998 	dest  = &sp->sk_v6_daddr;
1999 	src   = &sp->sk_v6_rcv_saddr;
2000 	destp = ntohs(inet->inet_dport);
2001 	srcp  = ntohs(inet->inet_sport);
2002 
2003 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2004 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2005 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2006 		timer_active	= 1;
2007 		timer_expires	= icsk->icsk_timeout;
2008 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2009 		timer_active	= 4;
2010 		timer_expires	= icsk->icsk_timeout;
2011 	} else if (timer_pending(&sp->sk_timer)) {
2012 		timer_active	= 2;
2013 		timer_expires	= sp->sk_timer.expires;
2014 	} else {
2015 		timer_active	= 0;
2016 		timer_expires = jiffies;
2017 	}
2018 
2019 	state = inet_sk_state_load(sp);
2020 	if (state == TCP_LISTEN)
2021 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2022 	else
2023 		/* Because we don't lock the socket,
2024 		 * we might find a transient negative value.
2025 		 */
2026 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2027 				      READ_ONCE(tp->copied_seq), 0);
2028 
2029 	seq_printf(seq,
2030 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2031 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2032 		   i,
2033 		   src->s6_addr32[0], src->s6_addr32[1],
2034 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2035 		   dest->s6_addr32[0], dest->s6_addr32[1],
2036 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2037 		   state,
2038 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2039 		   rx_queue,
2040 		   timer_active,
2041 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2042 		   icsk->icsk_retransmits,
2043 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2044 		   icsk->icsk_probes_out,
2045 		   sock_i_ino(sp),
2046 		   refcount_read(&sp->sk_refcnt), sp,
2047 		   jiffies_to_clock_t(icsk->icsk_rto),
2048 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2049 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2050 		   tcp_snd_cwnd(tp),
2051 		   state == TCP_LISTEN ?
2052 			fastopenq->max_qlen :
2053 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2054 		   );
2055 }
2056 
2057 static void get_timewait6_sock(struct seq_file *seq,
2058 			       struct inet_timewait_sock *tw, int i)
2059 {
2060 	long delta = tw->tw_timer.expires - jiffies;
2061 	const struct in6_addr *dest, *src;
2062 	__u16 destp, srcp;
2063 
2064 	dest = &tw->tw_v6_daddr;
2065 	src  = &tw->tw_v6_rcv_saddr;
2066 	destp = ntohs(tw->tw_dport);
2067 	srcp  = ntohs(tw->tw_sport);
2068 
2069 	seq_printf(seq,
2070 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2071 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2072 		   i,
2073 		   src->s6_addr32[0], src->s6_addr32[1],
2074 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2075 		   dest->s6_addr32[0], dest->s6_addr32[1],
2076 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2077 		   tw->tw_substate, 0, 0,
2078 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2079 		   refcount_read(&tw->tw_refcnt), tw);
2080 }
2081 
2082 static int tcp6_seq_show(struct seq_file *seq, void *v)
2083 {
2084 	struct tcp_iter_state *st;
2085 	struct sock *sk = v;
2086 
2087 	if (v == SEQ_START_TOKEN) {
2088 		seq_puts(seq,
2089 			 "  sl  "
2090 			 "local_address                         "
2091 			 "remote_address                        "
2092 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2093 			 "   uid  timeout inode\n");
2094 		goto out;
2095 	}
2096 	st = seq->private;
2097 
2098 	if (sk->sk_state == TCP_TIME_WAIT)
2099 		get_timewait6_sock(seq, v, st->num);
2100 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2101 		get_openreq6(seq, v, st->num);
2102 	else
2103 		get_tcp6_sock(seq, v, st->num);
2104 out:
2105 	return 0;
2106 }
2107 
2108 static const struct seq_operations tcp6_seq_ops = {
2109 	.show		= tcp6_seq_show,
2110 	.start		= tcp_seq_start,
2111 	.next		= tcp_seq_next,
2112 	.stop		= tcp_seq_stop,
2113 };
2114 
2115 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2116 	.family		= AF_INET6,
2117 };
2118 
2119 int __net_init tcp6_proc_init(struct net *net)
2120 {
2121 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2122 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2123 		return -ENOMEM;
2124 	return 0;
2125 }
2126 
2127 void tcp6_proc_exit(struct net *net)
2128 {
2129 	remove_proc_entry("tcp6", net->proc_net);
2130 }
2131 #endif
2132 
2133 struct proto tcpv6_prot = {
2134 	.name			= "TCPv6",
2135 	.owner			= THIS_MODULE,
2136 	.close			= tcp_close,
2137 	.pre_connect		= tcp_v6_pre_connect,
2138 	.connect		= tcp_v6_connect,
2139 	.disconnect		= tcp_disconnect,
2140 	.accept			= inet_csk_accept,
2141 	.ioctl			= tcp_ioctl,
2142 	.init			= tcp_v6_init_sock,
2143 	.destroy		= tcp_v6_destroy_sock,
2144 	.shutdown		= tcp_shutdown,
2145 	.setsockopt		= tcp_setsockopt,
2146 	.getsockopt		= tcp_getsockopt,
2147 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2148 	.keepalive		= tcp_set_keepalive,
2149 	.recvmsg		= tcp_recvmsg,
2150 	.sendmsg		= tcp_sendmsg,
2151 	.sendpage		= tcp_sendpage,
2152 	.backlog_rcv		= tcp_v6_do_rcv,
2153 	.release_cb		= tcp_release_cb,
2154 	.hash			= inet6_hash,
2155 	.unhash			= inet_unhash,
2156 	.get_port		= inet_csk_get_port,
2157 	.put_port		= inet_put_port,
2158 #ifdef CONFIG_BPF_SYSCALL
2159 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2160 #endif
2161 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2162 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2163 	.stream_memory_free	= tcp_stream_memory_free,
2164 	.sockets_allocated	= &tcp_sockets_allocated,
2165 
2166 	.memory_allocated	= &tcp_memory_allocated,
2167 	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
2168 
2169 	.memory_pressure	= &tcp_memory_pressure,
2170 	.orphan_count		= &tcp_orphan_count,
2171 	.sysctl_mem		= sysctl_tcp_mem,
2172 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2173 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2174 	.max_header		= MAX_TCP_HEADER,
2175 	.obj_size		= sizeof(struct tcp6_sock),
2176 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2177 	.twsk_prot		= &tcp6_timewait_sock_ops,
2178 	.rsk_prot		= &tcp6_request_sock_ops,
2179 	.h.hashinfo		= &tcp_hashinfo,
2180 	.no_autobind		= true,
2181 	.diag_destroy		= tcp_abort,
2182 };
2183 EXPORT_SYMBOL_GPL(tcpv6_prot);
2184 
2185 static const struct inet6_protocol tcpv6_protocol = {
2186 	.handler	=	tcp_v6_rcv,
2187 	.err_handler	=	tcp_v6_err,
2188 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2189 };
2190 
2191 static struct inet_protosw tcpv6_protosw = {
2192 	.type		=	SOCK_STREAM,
2193 	.protocol	=	IPPROTO_TCP,
2194 	.prot		=	&tcpv6_prot,
2195 	.ops		=	&inet6_stream_ops,
2196 	.flags		=	INET_PROTOSW_PERMANENT |
2197 				INET_PROTOSW_ICSK,
2198 };
2199 
2200 static int __net_init tcpv6_net_init(struct net *net)
2201 {
2202 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2203 				    SOCK_RAW, IPPROTO_TCP, net);
2204 }
2205 
2206 static void __net_exit tcpv6_net_exit(struct net *net)
2207 {
2208 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2209 }
2210 
2211 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2212 {
2213 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2214 }
2215 
2216 static struct pernet_operations tcpv6_net_ops = {
2217 	.init	    = tcpv6_net_init,
2218 	.exit	    = tcpv6_net_exit,
2219 	.exit_batch = tcpv6_net_exit_batch,
2220 };
2221 
2222 int __init tcpv6_init(void)
2223 {
2224 	int ret;
2225 
2226 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2227 	if (ret)
2228 		goto out;
2229 
2230 	/* register inet6 protocol */
2231 	ret = inet6_register_protosw(&tcpv6_protosw);
2232 	if (ret)
2233 		goto out_tcpv6_protocol;
2234 
2235 	ret = register_pernet_subsys(&tcpv6_net_ops);
2236 	if (ret)
2237 		goto out_tcpv6_protosw;
2238 
2239 	ret = mptcpv6_init();
2240 	if (ret)
2241 		goto out_tcpv6_pernet_subsys;
2242 
2243 out:
2244 	return ret;
2245 
2246 out_tcpv6_pernet_subsys:
2247 	unregister_pernet_subsys(&tcpv6_net_ops);
2248 out_tcpv6_protosw:
2249 	inet6_unregister_protosw(&tcpv6_protosw);
2250 out_tcpv6_protocol:
2251 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2252 	goto out;
2253 }
2254 
2255 void tcpv6_exit(void)
2256 {
2257 	unregister_pernet_subsys(&tcpv6_net_ops);
2258 	inet6_unregister_protosw(&tcpv6_protosw);
2259 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2260 }
2261