xref: /openbmc/linux/net/ipv6/tcp_ipv6.c (revision cd6d421e)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 
100 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102 
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 	struct dst_entry *dst = skb_dst(skb);
106 
107 	if (dst && dst_hold_safe(dst)) {
108 		const struct rt6_info *rt = (const struct rt6_info *)dst;
109 
110 		sk->sk_rx_dst = dst;
111 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 	}
114 }
115 
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 				ipv6_hdr(skb)->saddr.s6_addr32,
120 				tcp_hdr(skb)->dest,
121 				tcp_hdr(skb)->source);
122 }
123 
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 				   ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129 
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 			      int addr_len)
132 {
133 	/* This check is replicated from tcp_v6_connect() and intended to
134 	 * prevent BPF program called below from accessing bytes that are out
135 	 * of the bound specified by user in addr_len.
136 	 */
137 	if (addr_len < SIN6_LEN_RFC2133)
138 		return -EINVAL;
139 
140 	sock_owned_by_me(sk);
141 
142 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144 
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 			  int addr_len)
147 {
148 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct inet_connection_sock *icsk = inet_csk(sk);
151 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 	struct tcp_sock *tp = tcp_sk(sk);
153 	struct in6_addr *saddr = NULL, *final_p, final;
154 	struct ipv6_txoptions *opt;
155 	struct flowi6 fl6;
156 	struct dst_entry *dst;
157 	int addr_type;
158 	int err;
159 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 
161 	if (addr_len < SIN6_LEN_RFC2133)
162 		return -EINVAL;
163 
164 	if (usin->sin6_family != AF_INET6)
165 		return -EAFNOSUPPORT;
166 
167 	memset(&fl6, 0, sizeof(fl6));
168 
169 	if (np->sndflow) {
170 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 		IP6_ECN_flow_init(fl6.flowlabel);
172 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 			struct ip6_flowlabel *flowlabel;
174 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 			if (IS_ERR(flowlabel))
176 				return -EINVAL;
177 			fl6_sock_release(flowlabel);
178 		}
179 	}
180 
181 	/*
182 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
183 	 */
184 
185 	if (ipv6_addr_any(&usin->sin6_addr)) {
186 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 					       &usin->sin6_addr);
189 		else
190 			usin->sin6_addr = in6addr_loopback;
191 	}
192 
193 	addr_type = ipv6_addr_type(&usin->sin6_addr);
194 
195 	if (addr_type & IPV6_ADDR_MULTICAST)
196 		return -ENETUNREACH;
197 
198 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 		if (addr_len >= sizeof(struct sockaddr_in6) &&
200 		    usin->sin6_scope_id) {
201 			/* If interface is set while binding, indices
202 			 * must coincide.
203 			 */
204 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 				return -EINVAL;
206 
207 			sk->sk_bound_dev_if = usin->sin6_scope_id;
208 		}
209 
210 		/* Connect to link-local address requires an interface */
211 		if (!sk->sk_bound_dev_if)
212 			return -EINVAL;
213 	}
214 
215 	if (tp->rx_opt.ts_recent_stamp &&
216 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 		tp->rx_opt.ts_recent = 0;
218 		tp->rx_opt.ts_recent_stamp = 0;
219 		WRITE_ONCE(tp->write_seq, 0);
220 	}
221 
222 	sk->sk_v6_daddr = usin->sin6_addr;
223 	np->flow_label = fl6.flowlabel;
224 
225 	/*
226 	 *	TCP over IPv4
227 	 */
228 
229 	if (addr_type & IPV6_ADDR_MAPPED) {
230 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 		struct sockaddr_in sin;
232 
233 		if (__ipv6_only_sock(sk))
234 			return -ENETUNREACH;
235 
236 		sin.sin_family = AF_INET;
237 		sin.sin_port = usin->sin6_port;
238 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 
240 		icsk->icsk_af_ops = &ipv6_mapped;
241 		if (sk_is_mptcp(sk))
242 			mptcpv6_handle_mapped(sk, true);
243 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247 
248 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249 
250 		if (err) {
251 			icsk->icsk_ext_hdr_len = exthdrlen;
252 			icsk->icsk_af_ops = &ipv6_specific;
253 			if (sk_is_mptcp(sk))
254 				mptcpv6_handle_mapped(sk, false);
255 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 			tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259 			goto failure;
260 		}
261 		np->saddr = sk->sk_v6_rcv_saddr;
262 
263 		return err;
264 	}
265 
266 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 		saddr = &sk->sk_v6_rcv_saddr;
268 
269 	fl6.flowi6_proto = IPPROTO_TCP;
270 	fl6.daddr = sk->sk_v6_daddr;
271 	fl6.saddr = saddr ? *saddr : np->saddr;
272 	fl6.flowi6_oif = sk->sk_bound_dev_if;
273 	fl6.flowi6_mark = sk->sk_mark;
274 	fl6.fl6_dport = usin->sin6_port;
275 	fl6.fl6_sport = inet->inet_sport;
276 	fl6.flowi6_uid = sk->sk_uid;
277 
278 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 	final_p = fl6_update_dst(&fl6, opt, &final);
280 
281 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282 
283 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284 	if (IS_ERR(dst)) {
285 		err = PTR_ERR(dst);
286 		goto failure;
287 	}
288 
289 	if (!saddr) {
290 		saddr = &fl6.saddr;
291 		sk->sk_v6_rcv_saddr = *saddr;
292 	}
293 
294 	/* set the source address */
295 	np->saddr = *saddr;
296 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297 
298 	sk->sk_gso_type = SKB_GSO_TCPV6;
299 	ip6_dst_store(sk, dst, NULL, NULL);
300 
301 	icsk->icsk_ext_hdr_len = 0;
302 	if (opt)
303 		icsk->icsk_ext_hdr_len = opt->opt_flen +
304 					 opt->opt_nflen;
305 
306 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307 
308 	inet->inet_dport = usin->sin6_port;
309 
310 	tcp_set_state(sk, TCP_SYN_SENT);
311 	err = inet6_hash_connect(tcp_death_row, sk);
312 	if (err)
313 		goto late_failure;
314 
315 	sk_set_txhash(sk);
316 
317 	if (likely(!tp->repair)) {
318 		if (!tp->write_seq)
319 			WRITE_ONCE(tp->write_seq,
320 				   secure_tcpv6_seq(np->saddr.s6_addr32,
321 						    sk->sk_v6_daddr.s6_addr32,
322 						    inet->inet_sport,
323 						    inet->inet_dport));
324 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325 						   np->saddr.s6_addr32,
326 						   sk->sk_v6_daddr.s6_addr32);
327 	}
328 
329 	if (tcp_fastopen_defer_connect(sk, &err))
330 		return err;
331 	if (err)
332 		goto late_failure;
333 
334 	err = tcp_connect(sk);
335 	if (err)
336 		goto late_failure;
337 
338 	return 0;
339 
340 late_failure:
341 	tcp_set_state(sk, TCP_CLOSE);
342 failure:
343 	inet->inet_dport = 0;
344 	sk->sk_route_caps = 0;
345 	return err;
346 }
347 
348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350 	struct dst_entry *dst;
351 
352 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
353 		return;
354 
355 	dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
356 	if (!dst)
357 		return;
358 
359 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
360 		tcp_sync_mss(sk, dst_mtu(dst));
361 		tcp_simple_retransmit(sk);
362 	}
363 }
364 
365 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
366 		u8 type, u8 code, int offset, __be32 info)
367 {
368 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
369 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
370 	struct net *net = dev_net(skb->dev);
371 	struct request_sock *fastopen;
372 	struct ipv6_pinfo *np;
373 	struct tcp_sock *tp;
374 	__u32 seq, snd_una;
375 	struct sock *sk;
376 	bool fatal;
377 	int err;
378 
379 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
380 					&hdr->daddr, th->dest,
381 					&hdr->saddr, ntohs(th->source),
382 					skb->dev->ifindex, inet6_sdif(skb));
383 
384 	if (!sk) {
385 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
386 				  ICMP6_MIB_INERRORS);
387 		return -ENOENT;
388 	}
389 
390 	if (sk->sk_state == TCP_TIME_WAIT) {
391 		inet_twsk_put(inet_twsk(sk));
392 		return 0;
393 	}
394 	seq = ntohl(th->seq);
395 	fatal = icmpv6_err_convert(type, code, &err);
396 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
397 		tcp_req_err(sk, seq, fatal);
398 		return 0;
399 	}
400 
401 	bh_lock_sock(sk);
402 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
403 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
404 
405 	if (sk->sk_state == TCP_CLOSE)
406 		goto out;
407 
408 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
409 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
410 		goto out;
411 	}
412 
413 	tp = tcp_sk(sk);
414 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
415 	fastopen = rcu_dereference(tp->fastopen_rsk);
416 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
417 	if (sk->sk_state != TCP_LISTEN &&
418 	    !between(seq, snd_una, tp->snd_nxt)) {
419 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
420 		goto out;
421 	}
422 
423 	np = tcp_inet6_sk(sk);
424 
425 	if (type == NDISC_REDIRECT) {
426 		if (!sock_owned_by_user(sk)) {
427 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
428 
429 			if (dst)
430 				dst->ops->redirect(dst, sk, skb);
431 		}
432 		goto out;
433 	}
434 
435 	if (type == ICMPV6_PKT_TOOBIG) {
436 		/* We are not interested in TCP_LISTEN and open_requests
437 		 * (SYN-ACKs send out by Linux are always <576bytes so
438 		 * they should go through unfragmented).
439 		 */
440 		if (sk->sk_state == TCP_LISTEN)
441 			goto out;
442 
443 		if (!ip6_sk_accept_pmtu(sk))
444 			goto out;
445 
446 		tp->mtu_info = ntohl(info);
447 		if (!sock_owned_by_user(sk))
448 			tcp_v6_mtu_reduced(sk);
449 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
450 					   &sk->sk_tsq_flags))
451 			sock_hold(sk);
452 		goto out;
453 	}
454 
455 
456 	/* Might be for an request_sock */
457 	switch (sk->sk_state) {
458 	case TCP_SYN_SENT:
459 	case TCP_SYN_RECV:
460 		/* Only in fast or simultaneous open. If a fast open socket is
461 		 * already accepted it is treated as a connected one below.
462 		 */
463 		if (fastopen && !fastopen->sk)
464 			break;
465 
466 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
467 
468 		if (!sock_owned_by_user(sk)) {
469 			sk->sk_err = err;
470 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
471 
472 			tcp_done(sk);
473 		} else
474 			sk->sk_err_soft = err;
475 		goto out;
476 	case TCP_LISTEN:
477 		break;
478 	default:
479 		/* check if this ICMP message allows revert of backoff.
480 		 * (see RFC 6069)
481 		 */
482 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
483 		    code == ICMPV6_NOROUTE)
484 			tcp_ld_RTO_revert(sk, seq);
485 	}
486 
487 	if (!sock_owned_by_user(sk) && np->recverr) {
488 		sk->sk_err = err;
489 		sk->sk_error_report(sk);
490 	} else
491 		sk->sk_err_soft = err;
492 
493 out:
494 	bh_unlock_sock(sk);
495 	sock_put(sk);
496 	return 0;
497 }
498 
499 
500 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
501 			      struct flowi *fl,
502 			      struct request_sock *req,
503 			      struct tcp_fastopen_cookie *foc,
504 			      enum tcp_synack_type synack_type,
505 			      struct sk_buff *syn_skb)
506 {
507 	struct inet_request_sock *ireq = inet_rsk(req);
508 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
509 	struct ipv6_txoptions *opt;
510 	struct flowi6 *fl6 = &fl->u.ip6;
511 	struct sk_buff *skb;
512 	int err = -ENOMEM;
513 	u8 tclass;
514 
515 	/* First, grab a route. */
516 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
517 					       IPPROTO_TCP)) == NULL)
518 		goto done;
519 
520 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
521 
522 	if (skb) {
523 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
524 				    &ireq->ir_v6_rmt_addr);
525 
526 		fl6->daddr = ireq->ir_v6_rmt_addr;
527 		if (np->repflow && ireq->pktopts)
528 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
529 
530 		tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
531 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
532 				(np->tclass & INET_ECN_MASK) :
533 				np->tclass;
534 
535 		if (!INET_ECN_is_capable(tclass) &&
536 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
537 			tclass |= INET_ECN_ECT_0;
538 
539 		rcu_read_lock();
540 		opt = ireq->ipv6_opt;
541 		if (!opt)
542 			opt = rcu_dereference(np->opt);
543 		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,
544 			       tclass, sk->sk_priority);
545 		rcu_read_unlock();
546 		err = net_xmit_eval(err);
547 	}
548 
549 done:
550 	return err;
551 }
552 
553 
554 static void tcp_v6_reqsk_destructor(struct request_sock *req)
555 {
556 	kfree(inet_rsk(req)->ipv6_opt);
557 	kfree_skb(inet_rsk(req)->pktopts);
558 }
559 
560 #ifdef CONFIG_TCP_MD5SIG
561 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
562 						   const struct in6_addr *addr,
563 						   int l3index)
564 {
565 	return tcp_md5_do_lookup(sk, l3index,
566 				 (union tcp_md5_addr *)addr, AF_INET6);
567 }
568 
569 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
570 						const struct sock *addr_sk)
571 {
572 	int l3index;
573 
574 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
575 						 addr_sk->sk_bound_dev_if);
576 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
577 				    l3index);
578 }
579 
580 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
581 				 sockptr_t optval, int optlen)
582 {
583 	struct tcp_md5sig cmd;
584 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
585 	int l3index = 0;
586 	u8 prefixlen;
587 
588 	if (optlen < sizeof(cmd))
589 		return -EINVAL;
590 
591 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
592 		return -EFAULT;
593 
594 	if (sin6->sin6_family != AF_INET6)
595 		return -EINVAL;
596 
597 	if (optname == TCP_MD5SIG_EXT &&
598 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
599 		prefixlen = cmd.tcpm_prefixlen;
600 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
601 					prefixlen > 32))
602 			return -EINVAL;
603 	} else {
604 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
605 	}
606 
607 	if (optname == TCP_MD5SIG_EXT &&
608 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
609 		struct net_device *dev;
610 
611 		rcu_read_lock();
612 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
613 		if (dev && netif_is_l3_master(dev))
614 			l3index = dev->ifindex;
615 		rcu_read_unlock();
616 
617 		/* ok to reference set/not set outside of rcu;
618 		 * right now device MUST be an L3 master
619 		 */
620 		if (!dev || !l3index)
621 			return -EINVAL;
622 	}
623 
624 	if (!cmd.tcpm_keylen) {
625 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
626 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
627 					      AF_INET, prefixlen,
628 					      l3index);
629 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
630 				      AF_INET6, prefixlen, l3index);
631 	}
632 
633 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
634 		return -EINVAL;
635 
636 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
637 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
638 				      AF_INET, prefixlen, l3index,
639 				      cmd.tcpm_key, cmd.tcpm_keylen,
640 				      GFP_KERNEL);
641 
642 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
643 			      AF_INET6, prefixlen, l3index,
644 			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
645 }
646 
647 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
648 				   const struct in6_addr *daddr,
649 				   const struct in6_addr *saddr,
650 				   const struct tcphdr *th, int nbytes)
651 {
652 	struct tcp6_pseudohdr *bp;
653 	struct scatterlist sg;
654 	struct tcphdr *_th;
655 
656 	bp = hp->scratch;
657 	/* 1. TCP pseudo-header (RFC2460) */
658 	bp->saddr = *saddr;
659 	bp->daddr = *daddr;
660 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
661 	bp->len = cpu_to_be32(nbytes);
662 
663 	_th = (struct tcphdr *)(bp + 1);
664 	memcpy(_th, th, sizeof(*th));
665 	_th->check = 0;
666 
667 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
668 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
669 				sizeof(*bp) + sizeof(*th));
670 	return crypto_ahash_update(hp->md5_req);
671 }
672 
673 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
674 			       const struct in6_addr *daddr, struct in6_addr *saddr,
675 			       const struct tcphdr *th)
676 {
677 	struct tcp_md5sig_pool *hp;
678 	struct ahash_request *req;
679 
680 	hp = tcp_get_md5sig_pool();
681 	if (!hp)
682 		goto clear_hash_noput;
683 	req = hp->md5_req;
684 
685 	if (crypto_ahash_init(req))
686 		goto clear_hash;
687 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
688 		goto clear_hash;
689 	if (tcp_md5_hash_key(hp, key))
690 		goto clear_hash;
691 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
692 	if (crypto_ahash_final(req))
693 		goto clear_hash;
694 
695 	tcp_put_md5sig_pool();
696 	return 0;
697 
698 clear_hash:
699 	tcp_put_md5sig_pool();
700 clear_hash_noput:
701 	memset(md5_hash, 0, 16);
702 	return 1;
703 }
704 
705 static int tcp_v6_md5_hash_skb(char *md5_hash,
706 			       const struct tcp_md5sig_key *key,
707 			       const struct sock *sk,
708 			       const struct sk_buff *skb)
709 {
710 	const struct in6_addr *saddr, *daddr;
711 	struct tcp_md5sig_pool *hp;
712 	struct ahash_request *req;
713 	const struct tcphdr *th = tcp_hdr(skb);
714 
715 	if (sk) { /* valid for establish/request sockets */
716 		saddr = &sk->sk_v6_rcv_saddr;
717 		daddr = &sk->sk_v6_daddr;
718 	} else {
719 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
720 		saddr = &ip6h->saddr;
721 		daddr = &ip6h->daddr;
722 	}
723 
724 	hp = tcp_get_md5sig_pool();
725 	if (!hp)
726 		goto clear_hash_noput;
727 	req = hp->md5_req;
728 
729 	if (crypto_ahash_init(req))
730 		goto clear_hash;
731 
732 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
733 		goto clear_hash;
734 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
735 		goto clear_hash;
736 	if (tcp_md5_hash_key(hp, key))
737 		goto clear_hash;
738 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
739 	if (crypto_ahash_final(req))
740 		goto clear_hash;
741 
742 	tcp_put_md5sig_pool();
743 	return 0;
744 
745 clear_hash:
746 	tcp_put_md5sig_pool();
747 clear_hash_noput:
748 	memset(md5_hash, 0, 16);
749 	return 1;
750 }
751 
752 #endif
753 
754 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
755 				    const struct sk_buff *skb,
756 				    int dif, int sdif)
757 {
758 #ifdef CONFIG_TCP_MD5SIG
759 	const __u8 *hash_location = NULL;
760 	struct tcp_md5sig_key *hash_expected;
761 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
762 	const struct tcphdr *th = tcp_hdr(skb);
763 	int genhash, l3index;
764 	u8 newhash[16];
765 
766 	/* sdif set, means packet ingressed via a device
767 	 * in an L3 domain and dif is set to the l3mdev
768 	 */
769 	l3index = sdif ? dif : 0;
770 
771 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
772 	hash_location = tcp_parse_md5sig_option(th);
773 
774 	/* We've parsed the options - do we have a hash? */
775 	if (!hash_expected && !hash_location)
776 		return false;
777 
778 	if (hash_expected && !hash_location) {
779 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
780 		return true;
781 	}
782 
783 	if (!hash_expected && hash_location) {
784 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
785 		return true;
786 	}
787 
788 	/* check the signature */
789 	genhash = tcp_v6_md5_hash_skb(newhash,
790 				      hash_expected,
791 				      NULL, skb);
792 
793 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
794 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
795 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
796 				     genhash ? "failed" : "mismatch",
797 				     &ip6h->saddr, ntohs(th->source),
798 				     &ip6h->daddr, ntohs(th->dest), l3index);
799 		return true;
800 	}
801 #endif
802 	return false;
803 }
804 
805 static void tcp_v6_init_req(struct request_sock *req,
806 			    const struct sock *sk_listener,
807 			    struct sk_buff *skb)
808 {
809 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
810 	struct inet_request_sock *ireq = inet_rsk(req);
811 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
812 
813 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
814 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
815 
816 	/* So that link locals have meaning */
817 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
818 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
819 		ireq->ir_iif = tcp_v6_iif(skb);
820 
821 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
822 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
823 	     np->rxopt.bits.rxinfo ||
824 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
825 	     np->rxopt.bits.rxohlim || np->repflow)) {
826 		refcount_inc(&skb->users);
827 		ireq->pktopts = skb;
828 	}
829 }
830 
831 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
832 					  struct sk_buff *skb,
833 					  struct flowi *fl,
834 					  struct request_sock *req)
835 {
836 	tcp_v6_init_req(req, sk, skb);
837 
838 	if (security_inet_conn_request(sk, skb, req))
839 		return NULL;
840 
841 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
842 }
843 
844 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
845 	.family		=	AF_INET6,
846 	.obj_size	=	sizeof(struct tcp6_request_sock),
847 	.rtx_syn_ack	=	tcp_rtx_synack,
848 	.send_ack	=	tcp_v6_reqsk_send_ack,
849 	.destructor	=	tcp_v6_reqsk_destructor,
850 	.send_reset	=	tcp_v6_send_reset,
851 	.syn_ack_timeout =	tcp_syn_ack_timeout,
852 };
853 
854 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
855 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
856 				sizeof(struct ipv6hdr),
857 #ifdef CONFIG_TCP_MD5SIG
858 	.req_md5_lookup	=	tcp_v6_md5_lookup,
859 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
860 #endif
861 #ifdef CONFIG_SYN_COOKIES
862 	.cookie_init_seq =	cookie_v6_init_sequence,
863 #endif
864 	.route_req	=	tcp_v6_route_req,
865 	.init_seq	=	tcp_v6_init_seq,
866 	.init_ts_off	=	tcp_v6_init_ts_off,
867 	.send_synack	=	tcp_v6_send_synack,
868 };
869 
870 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
871 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
872 				 int oif, struct tcp_md5sig_key *key, int rst,
873 				 u8 tclass, __be32 label, u32 priority)
874 {
875 	const struct tcphdr *th = tcp_hdr(skb);
876 	struct tcphdr *t1;
877 	struct sk_buff *buff;
878 	struct flowi6 fl6;
879 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
880 	struct sock *ctl_sk = net->ipv6.tcp_sk;
881 	unsigned int tot_len = sizeof(struct tcphdr);
882 	struct dst_entry *dst;
883 	__be32 *topt;
884 	__u32 mark = 0;
885 
886 	if (tsecr)
887 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
888 #ifdef CONFIG_TCP_MD5SIG
889 	if (key)
890 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
891 #endif
892 
893 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
894 			 GFP_ATOMIC);
895 	if (!buff)
896 		return;
897 
898 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
899 
900 	t1 = skb_push(buff, tot_len);
901 	skb_reset_transport_header(buff);
902 
903 	/* Swap the send and the receive. */
904 	memset(t1, 0, sizeof(*t1));
905 	t1->dest = th->source;
906 	t1->source = th->dest;
907 	t1->doff = tot_len / 4;
908 	t1->seq = htonl(seq);
909 	t1->ack_seq = htonl(ack);
910 	t1->ack = !rst || !th->ack;
911 	t1->rst = rst;
912 	t1->window = htons(win);
913 
914 	topt = (__be32 *)(t1 + 1);
915 
916 	if (tsecr) {
917 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
918 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
919 		*topt++ = htonl(tsval);
920 		*topt++ = htonl(tsecr);
921 	}
922 
923 #ifdef CONFIG_TCP_MD5SIG
924 	if (key) {
925 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
926 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
927 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
928 				    &ipv6_hdr(skb)->saddr,
929 				    &ipv6_hdr(skb)->daddr, t1);
930 	}
931 #endif
932 
933 	memset(&fl6, 0, sizeof(fl6));
934 	fl6.daddr = ipv6_hdr(skb)->saddr;
935 	fl6.saddr = ipv6_hdr(skb)->daddr;
936 	fl6.flowlabel = label;
937 
938 	buff->ip_summed = CHECKSUM_PARTIAL;
939 	buff->csum = 0;
940 
941 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
942 
943 	fl6.flowi6_proto = IPPROTO_TCP;
944 	if (rt6_need_strict(&fl6.daddr) && !oif)
945 		fl6.flowi6_oif = tcp_v6_iif(skb);
946 	else {
947 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
948 			oif = skb->skb_iif;
949 
950 		fl6.flowi6_oif = oif;
951 	}
952 
953 	if (sk) {
954 		if (sk->sk_state == TCP_TIME_WAIT) {
955 			mark = inet_twsk(sk)->tw_mark;
956 			/* autoflowlabel relies on buff->hash */
957 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
958 				     PKT_HASH_TYPE_L4);
959 		} else {
960 			mark = sk->sk_mark;
961 		}
962 		buff->tstamp = tcp_transmit_time(sk);
963 	}
964 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
965 	fl6.fl6_dport = t1->dest;
966 	fl6.fl6_sport = t1->source;
967 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
968 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
969 
970 	/* Pass a socket to ip6_dst_lookup either it is for RST
971 	 * Underlying function will use this to retrieve the network
972 	 * namespace
973 	 */
974 	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
975 	if (!IS_ERR(dst)) {
976 		skb_dst_set(buff, dst);
977 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
978 			 tclass & ~INET_ECN_MASK, priority);
979 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
980 		if (rst)
981 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
982 		return;
983 	}
984 
985 	kfree_skb(buff);
986 }
987 
988 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
989 {
990 	const struct tcphdr *th = tcp_hdr(skb);
991 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
992 	u32 seq = 0, ack_seq = 0;
993 	struct tcp_md5sig_key *key = NULL;
994 #ifdef CONFIG_TCP_MD5SIG
995 	const __u8 *hash_location = NULL;
996 	unsigned char newhash[16];
997 	int genhash;
998 	struct sock *sk1 = NULL;
999 #endif
1000 	__be32 label = 0;
1001 	u32 priority = 0;
1002 	struct net *net;
1003 	int oif = 0;
1004 
1005 	if (th->rst)
1006 		return;
1007 
1008 	/* If sk not NULL, it means we did a successful lookup and incoming
1009 	 * route had to be correct. prequeue might have dropped our dst.
1010 	 */
1011 	if (!sk && !ipv6_unicast_destination(skb))
1012 		return;
1013 
1014 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1015 #ifdef CONFIG_TCP_MD5SIG
1016 	rcu_read_lock();
1017 	hash_location = tcp_parse_md5sig_option(th);
1018 	if (sk && sk_fullsock(sk)) {
1019 		int l3index;
1020 
1021 		/* sdif set, means packet ingressed via a device
1022 		 * in an L3 domain and inet_iif is set to it.
1023 		 */
1024 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1025 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1026 	} else if (hash_location) {
1027 		int dif = tcp_v6_iif_l3_slave(skb);
1028 		int sdif = tcp_v6_sdif(skb);
1029 		int l3index;
1030 
1031 		/*
1032 		 * active side is lost. Try to find listening socket through
1033 		 * source port, and then find md5 key through listening socket.
1034 		 * we are not loose security here:
1035 		 * Incoming packet is checked with md5 hash with finding key,
1036 		 * no RST generated if md5 hash doesn't match.
1037 		 */
1038 		sk1 = inet6_lookup_listener(net,
1039 					   &tcp_hashinfo, NULL, 0,
1040 					   &ipv6h->saddr,
1041 					   th->source, &ipv6h->daddr,
1042 					   ntohs(th->source), dif, sdif);
1043 		if (!sk1)
1044 			goto out;
1045 
1046 		/* sdif set, means packet ingressed via a device
1047 		 * in an L3 domain and dif is set to it.
1048 		 */
1049 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1050 
1051 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1052 		if (!key)
1053 			goto out;
1054 
1055 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1056 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1057 			goto out;
1058 	}
1059 #endif
1060 
1061 	if (th->ack)
1062 		seq = ntohl(th->ack_seq);
1063 	else
1064 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1065 			  (th->doff << 2);
1066 
1067 	if (sk) {
1068 		oif = sk->sk_bound_dev_if;
1069 		if (sk_fullsock(sk)) {
1070 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1071 
1072 			trace_tcp_send_reset(sk, skb);
1073 			if (np->repflow)
1074 				label = ip6_flowlabel(ipv6h);
1075 			priority = sk->sk_priority;
1076 		}
1077 		if (sk->sk_state == TCP_TIME_WAIT) {
1078 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1079 			priority = inet_twsk(sk)->tw_priority;
1080 		}
1081 	} else {
1082 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1083 			label = ip6_flowlabel(ipv6h);
1084 	}
1085 
1086 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1087 			     ipv6_get_dsfield(ipv6h), label, priority);
1088 
1089 #ifdef CONFIG_TCP_MD5SIG
1090 out:
1091 	rcu_read_unlock();
1092 #endif
1093 }
1094 
1095 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1096 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1097 			    struct tcp_md5sig_key *key, u8 tclass,
1098 			    __be32 label, u32 priority)
1099 {
1100 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1101 			     tclass, label, priority);
1102 }
1103 
1104 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1105 {
1106 	struct inet_timewait_sock *tw = inet_twsk(sk);
1107 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1108 
1109 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1110 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1111 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1112 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1113 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1114 
1115 	inet_twsk_put(tw);
1116 }
1117 
1118 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1119 				  struct request_sock *req)
1120 {
1121 	int l3index;
1122 
1123 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1124 
1125 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1126 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1127 	 */
1128 	/* RFC 7323 2.3
1129 	 * The window field (SEG.WND) of every outgoing segment, with the
1130 	 * exception of <SYN> segments, MUST be right-shifted by
1131 	 * Rcv.Wind.Shift bits:
1132 	 */
1133 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1134 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1135 			tcp_rsk(req)->rcv_nxt,
1136 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1137 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1138 			req->ts_recent, sk->sk_bound_dev_if,
1139 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1140 			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1141 }
1142 
1143 
1144 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1145 {
1146 #ifdef CONFIG_SYN_COOKIES
1147 	const struct tcphdr *th = tcp_hdr(skb);
1148 
1149 	if (!th->syn)
1150 		sk = cookie_v6_check(sk, skb);
1151 #endif
1152 	return sk;
1153 }
1154 
1155 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1156 			 struct tcphdr *th, u32 *cookie)
1157 {
1158 	u16 mss = 0;
1159 #ifdef CONFIG_SYN_COOKIES
1160 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1161 				    &tcp_request_sock_ipv6_ops, sk, th);
1162 	if (mss) {
1163 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1164 		tcp_synq_overflow(sk);
1165 	}
1166 #endif
1167 	return mss;
1168 }
1169 
1170 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1171 {
1172 	if (skb->protocol == htons(ETH_P_IP))
1173 		return tcp_v4_conn_request(sk, skb);
1174 
1175 	if (!ipv6_unicast_destination(skb))
1176 		goto drop;
1177 
1178 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1179 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1180 		return 0;
1181 	}
1182 
1183 	return tcp_conn_request(&tcp6_request_sock_ops,
1184 				&tcp_request_sock_ipv6_ops, sk, skb);
1185 
1186 drop:
1187 	tcp_listendrop(sk);
1188 	return 0; /* don't send reset */
1189 }
1190 
1191 static void tcp_v6_restore_cb(struct sk_buff *skb)
1192 {
1193 	/* We need to move header back to the beginning if xfrm6_policy_check()
1194 	 * and tcp_v6_fill_cb() are going to be called again.
1195 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1196 	 */
1197 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1198 		sizeof(struct inet6_skb_parm));
1199 }
1200 
1201 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1202 					 struct request_sock *req,
1203 					 struct dst_entry *dst,
1204 					 struct request_sock *req_unhash,
1205 					 bool *own_req)
1206 {
1207 	struct inet_request_sock *ireq;
1208 	struct ipv6_pinfo *newnp;
1209 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1210 	struct ipv6_txoptions *opt;
1211 	struct inet_sock *newinet;
1212 	bool found_dup_sk = false;
1213 	struct tcp_sock *newtp;
1214 	struct sock *newsk;
1215 #ifdef CONFIG_TCP_MD5SIG
1216 	struct tcp_md5sig_key *key;
1217 	int l3index;
1218 #endif
1219 	struct flowi6 fl6;
1220 
1221 	if (skb->protocol == htons(ETH_P_IP)) {
1222 		/*
1223 		 *	v6 mapped
1224 		 */
1225 
1226 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1227 					     req_unhash, own_req);
1228 
1229 		if (!newsk)
1230 			return NULL;
1231 
1232 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1233 
1234 		newinet = inet_sk(newsk);
1235 		newnp = tcp_inet6_sk(newsk);
1236 		newtp = tcp_sk(newsk);
1237 
1238 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1239 
1240 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1241 
1242 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1243 		if (sk_is_mptcp(newsk))
1244 			mptcpv6_handle_mapped(newsk, true);
1245 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1246 #ifdef CONFIG_TCP_MD5SIG
1247 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1248 #endif
1249 
1250 		newnp->ipv6_mc_list = NULL;
1251 		newnp->ipv6_ac_list = NULL;
1252 		newnp->ipv6_fl_list = NULL;
1253 		newnp->pktoptions  = NULL;
1254 		newnp->opt	   = NULL;
1255 		newnp->mcast_oif   = inet_iif(skb);
1256 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1257 		newnp->rcv_flowinfo = 0;
1258 		if (np->repflow)
1259 			newnp->flow_label = 0;
1260 
1261 		/*
1262 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1263 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1264 		 * that function for the gory details. -acme
1265 		 */
1266 
1267 		/* It is tricky place. Until this moment IPv4 tcp
1268 		   worked with IPv6 icsk.icsk_af_ops.
1269 		   Sync it now.
1270 		 */
1271 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1272 
1273 		return newsk;
1274 	}
1275 
1276 	ireq = inet_rsk(req);
1277 
1278 	if (sk_acceptq_is_full(sk))
1279 		goto out_overflow;
1280 
1281 	if (!dst) {
1282 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1283 		if (!dst)
1284 			goto out;
1285 	}
1286 
1287 	newsk = tcp_create_openreq_child(sk, req, skb);
1288 	if (!newsk)
1289 		goto out_nonewsk;
1290 
1291 	/*
1292 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1293 	 * count here, tcp_create_openreq_child now does this for us, see the
1294 	 * comment in that function for the gory details. -acme
1295 	 */
1296 
1297 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1298 	ip6_dst_store(newsk, dst, NULL, NULL);
1299 	inet6_sk_rx_dst_set(newsk, skb);
1300 
1301 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1302 
1303 	newtp = tcp_sk(newsk);
1304 	newinet = inet_sk(newsk);
1305 	newnp = tcp_inet6_sk(newsk);
1306 
1307 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1308 
1309 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1310 	newnp->saddr = ireq->ir_v6_loc_addr;
1311 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1312 	newsk->sk_bound_dev_if = ireq->ir_iif;
1313 
1314 	/* Now IPv6 options...
1315 
1316 	   First: no IPv4 options.
1317 	 */
1318 	newinet->inet_opt = NULL;
1319 	newnp->ipv6_mc_list = NULL;
1320 	newnp->ipv6_ac_list = NULL;
1321 	newnp->ipv6_fl_list = NULL;
1322 
1323 	/* Clone RX bits */
1324 	newnp->rxopt.all = np->rxopt.all;
1325 
1326 	newnp->pktoptions = NULL;
1327 	newnp->opt	  = NULL;
1328 	newnp->mcast_oif  = tcp_v6_iif(skb);
1329 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1330 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1331 	if (np->repflow)
1332 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1333 
1334 	/* Set ToS of the new socket based upon the value of incoming SYN.
1335 	 * ECT bits are set later in tcp_init_transfer().
1336 	 */
1337 	if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1338 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1339 
1340 	/* Clone native IPv6 options from listening socket (if any)
1341 
1342 	   Yes, keeping reference count would be much more clever,
1343 	   but we make one more one thing there: reattach optmem
1344 	   to newsk.
1345 	 */
1346 	opt = ireq->ipv6_opt;
1347 	if (!opt)
1348 		opt = rcu_dereference(np->opt);
1349 	if (opt) {
1350 		opt = ipv6_dup_options(newsk, opt);
1351 		RCU_INIT_POINTER(newnp->opt, opt);
1352 	}
1353 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1354 	if (opt)
1355 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1356 						    opt->opt_flen;
1357 
1358 	tcp_ca_openreq_child(newsk, dst);
1359 
1360 	tcp_sync_mss(newsk, dst_mtu(dst));
1361 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1362 
1363 	tcp_initialize_rcv_mss(newsk);
1364 
1365 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1366 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1367 
1368 #ifdef CONFIG_TCP_MD5SIG
1369 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1370 
1371 	/* Copy over the MD5 key from the original socket */
1372 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1373 	if (key) {
1374 		/* We're using one, so create a matching key
1375 		 * on the newsk structure. If we fail to get
1376 		 * memory, then we end up not copying the key
1377 		 * across. Shucks.
1378 		 */
1379 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1380 			       AF_INET6, 128, l3index, key->key, key->keylen,
1381 			       sk_gfp_mask(sk, GFP_ATOMIC));
1382 	}
1383 #endif
1384 
1385 	if (__inet_inherit_port(sk, newsk) < 0) {
1386 		inet_csk_prepare_forced_close(newsk);
1387 		tcp_done(newsk);
1388 		goto out;
1389 	}
1390 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1391 				       &found_dup_sk);
1392 	if (*own_req) {
1393 		tcp_move_syn(newtp, req);
1394 
1395 		/* Clone pktoptions received with SYN, if we own the req */
1396 		if (ireq->pktopts) {
1397 			newnp->pktoptions = skb_clone(ireq->pktopts,
1398 						      sk_gfp_mask(sk, GFP_ATOMIC));
1399 			consume_skb(ireq->pktopts);
1400 			ireq->pktopts = NULL;
1401 			if (newnp->pktoptions) {
1402 				tcp_v6_restore_cb(newnp->pktoptions);
1403 				skb_set_owner_r(newnp->pktoptions, newsk);
1404 			}
1405 		}
1406 	} else {
1407 		if (!req_unhash && found_dup_sk) {
1408 			/* This code path should only be executed in the
1409 			 * syncookie case only
1410 			 */
1411 			bh_unlock_sock(newsk);
1412 			sock_put(newsk);
1413 			newsk = NULL;
1414 		}
1415 	}
1416 
1417 	return newsk;
1418 
1419 out_overflow:
1420 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1421 out_nonewsk:
1422 	dst_release(dst);
1423 out:
1424 	tcp_listendrop(sk);
1425 	return NULL;
1426 }
1427 
1428 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1429 							   u32));
1430 /* The socket must have it's spinlock held when we get
1431  * here, unless it is a TCP_LISTEN socket.
1432  *
1433  * We have a potential double-lock case here, so even when
1434  * doing backlog processing we use the BH locking scheme.
1435  * This is because we cannot sleep with the original spinlock
1436  * held.
1437  */
1438 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1439 {
1440 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1441 	struct sk_buff *opt_skb = NULL;
1442 	struct tcp_sock *tp;
1443 
1444 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1445 	   goes to IPv4 receive handler and backlogged.
1446 	   From backlog it always goes here. Kerboom...
1447 	   Fortunately, tcp_rcv_established and rcv_established
1448 	   handle them correctly, but it is not case with
1449 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1450 	 */
1451 
1452 	if (skb->protocol == htons(ETH_P_IP))
1453 		return tcp_v4_do_rcv(sk, skb);
1454 
1455 	/*
1456 	 *	socket locking is here for SMP purposes as backlog rcv
1457 	 *	is currently called with bh processing disabled.
1458 	 */
1459 
1460 	/* Do Stevens' IPV6_PKTOPTIONS.
1461 
1462 	   Yes, guys, it is the only place in our code, where we
1463 	   may make it not affecting IPv4.
1464 	   The rest of code is protocol independent,
1465 	   and I do not like idea to uglify IPv4.
1466 
1467 	   Actually, all the idea behind IPV6_PKTOPTIONS
1468 	   looks not very well thought. For now we latch
1469 	   options, received in the last packet, enqueued
1470 	   by tcp. Feel free to propose better solution.
1471 					       --ANK (980728)
1472 	 */
1473 	if (np->rxopt.all)
1474 		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1475 
1476 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1477 		struct dst_entry *dst = sk->sk_rx_dst;
1478 
1479 		sock_rps_save_rxhash(sk, skb);
1480 		sk_mark_napi_id(sk, skb);
1481 		if (dst) {
1482 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1483 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1484 					    dst, np->rx_dst_cookie) == NULL) {
1485 				dst_release(dst);
1486 				sk->sk_rx_dst = NULL;
1487 			}
1488 		}
1489 
1490 		tcp_rcv_established(sk, skb);
1491 		if (opt_skb)
1492 			goto ipv6_pktoptions;
1493 		return 0;
1494 	}
1495 
1496 	if (tcp_checksum_complete(skb))
1497 		goto csum_err;
1498 
1499 	if (sk->sk_state == TCP_LISTEN) {
1500 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1501 
1502 		if (!nsk)
1503 			goto discard;
1504 
1505 		if (nsk != sk) {
1506 			if (tcp_child_process(sk, nsk, skb))
1507 				goto reset;
1508 			if (opt_skb)
1509 				__kfree_skb(opt_skb);
1510 			return 0;
1511 		}
1512 	} else
1513 		sock_rps_save_rxhash(sk, skb);
1514 
1515 	if (tcp_rcv_state_process(sk, skb))
1516 		goto reset;
1517 	if (opt_skb)
1518 		goto ipv6_pktoptions;
1519 	return 0;
1520 
1521 reset:
1522 	tcp_v6_send_reset(sk, skb);
1523 discard:
1524 	if (opt_skb)
1525 		__kfree_skb(opt_skb);
1526 	kfree_skb(skb);
1527 	return 0;
1528 csum_err:
1529 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1530 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1531 	goto discard;
1532 
1533 
1534 ipv6_pktoptions:
1535 	/* Do you ask, what is it?
1536 
1537 	   1. skb was enqueued by tcp.
1538 	   2. skb is added to tail of read queue, rather than out of order.
1539 	   3. socket is not in passive state.
1540 	   4. Finally, it really contains options, which user wants to receive.
1541 	 */
1542 	tp = tcp_sk(sk);
1543 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1544 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1545 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1546 			np->mcast_oif = tcp_v6_iif(opt_skb);
1547 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1548 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1549 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1550 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1551 		if (np->repflow)
1552 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1553 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1554 			skb_set_owner_r(opt_skb, sk);
1555 			tcp_v6_restore_cb(opt_skb);
1556 			opt_skb = xchg(&np->pktoptions, opt_skb);
1557 		} else {
1558 			__kfree_skb(opt_skb);
1559 			opt_skb = xchg(&np->pktoptions, NULL);
1560 		}
1561 	}
1562 
1563 	kfree_skb(opt_skb);
1564 	return 0;
1565 }
1566 
1567 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1568 			   const struct tcphdr *th)
1569 {
1570 	/* This is tricky: we move IP6CB at its correct location into
1571 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1572 	 * _decode_session6() uses IP6CB().
1573 	 * barrier() makes sure compiler won't play aliasing games.
1574 	 */
1575 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1576 		sizeof(struct inet6_skb_parm));
1577 	barrier();
1578 
1579 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1580 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1581 				    skb->len - th->doff*4);
1582 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1583 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1584 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1585 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1586 	TCP_SKB_CB(skb)->sacked = 0;
1587 	TCP_SKB_CB(skb)->has_rxtstamp =
1588 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1589 }
1590 
1591 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1592 {
1593 	struct sk_buff *skb_to_free;
1594 	int sdif = inet6_sdif(skb);
1595 	int dif = inet6_iif(skb);
1596 	const struct tcphdr *th;
1597 	const struct ipv6hdr *hdr;
1598 	bool refcounted;
1599 	struct sock *sk;
1600 	int ret;
1601 	struct net *net = dev_net(skb->dev);
1602 
1603 	if (skb->pkt_type != PACKET_HOST)
1604 		goto discard_it;
1605 
1606 	/*
1607 	 *	Count it even if it's bad.
1608 	 */
1609 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1610 
1611 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1612 		goto discard_it;
1613 
1614 	th = (const struct tcphdr *)skb->data;
1615 
1616 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1617 		goto bad_packet;
1618 	if (!pskb_may_pull(skb, th->doff*4))
1619 		goto discard_it;
1620 
1621 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1622 		goto csum_error;
1623 
1624 	th = (const struct tcphdr *)skb->data;
1625 	hdr = ipv6_hdr(skb);
1626 
1627 lookup:
1628 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1629 				th->source, th->dest, inet6_iif(skb), sdif,
1630 				&refcounted);
1631 	if (!sk)
1632 		goto no_tcp_socket;
1633 
1634 process:
1635 	if (sk->sk_state == TCP_TIME_WAIT)
1636 		goto do_time_wait;
1637 
1638 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1639 		struct request_sock *req = inet_reqsk(sk);
1640 		bool req_stolen = false;
1641 		struct sock *nsk;
1642 
1643 		sk = req->rsk_listener;
1644 		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1645 			sk_drops_add(sk, skb);
1646 			reqsk_put(req);
1647 			goto discard_it;
1648 		}
1649 		if (tcp_checksum_complete(skb)) {
1650 			reqsk_put(req);
1651 			goto csum_error;
1652 		}
1653 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1654 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1655 			goto lookup;
1656 		}
1657 		sock_hold(sk);
1658 		refcounted = true;
1659 		nsk = NULL;
1660 		if (!tcp_filter(sk, skb)) {
1661 			th = (const struct tcphdr *)skb->data;
1662 			hdr = ipv6_hdr(skb);
1663 			tcp_v6_fill_cb(skb, hdr, th);
1664 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1665 		}
1666 		if (!nsk) {
1667 			reqsk_put(req);
1668 			if (req_stolen) {
1669 				/* Another cpu got exclusive access to req
1670 				 * and created a full blown socket.
1671 				 * Try to feed this packet to this socket
1672 				 * instead of discarding it.
1673 				 */
1674 				tcp_v6_restore_cb(skb);
1675 				sock_put(sk);
1676 				goto lookup;
1677 			}
1678 			goto discard_and_relse;
1679 		}
1680 		if (nsk == sk) {
1681 			reqsk_put(req);
1682 			tcp_v6_restore_cb(skb);
1683 		} else if (tcp_child_process(sk, nsk, skb)) {
1684 			tcp_v6_send_reset(nsk, skb);
1685 			goto discard_and_relse;
1686 		} else {
1687 			sock_put(sk);
1688 			return 0;
1689 		}
1690 	}
1691 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1692 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1693 		goto discard_and_relse;
1694 	}
1695 
1696 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1697 		goto discard_and_relse;
1698 
1699 	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1700 		goto discard_and_relse;
1701 
1702 	if (tcp_filter(sk, skb))
1703 		goto discard_and_relse;
1704 	th = (const struct tcphdr *)skb->data;
1705 	hdr = ipv6_hdr(skb);
1706 	tcp_v6_fill_cb(skb, hdr, th);
1707 
1708 	skb->dev = NULL;
1709 
1710 	if (sk->sk_state == TCP_LISTEN) {
1711 		ret = tcp_v6_do_rcv(sk, skb);
1712 		goto put_and_return;
1713 	}
1714 
1715 	sk_incoming_cpu_update(sk);
1716 
1717 	bh_lock_sock_nested(sk);
1718 	tcp_segs_in(tcp_sk(sk), skb);
1719 	ret = 0;
1720 	if (!sock_owned_by_user(sk)) {
1721 		skb_to_free = sk->sk_rx_skb_cache;
1722 		sk->sk_rx_skb_cache = NULL;
1723 		ret = tcp_v6_do_rcv(sk, skb);
1724 	} else {
1725 		if (tcp_add_backlog(sk, skb))
1726 			goto discard_and_relse;
1727 		skb_to_free = NULL;
1728 	}
1729 	bh_unlock_sock(sk);
1730 	if (skb_to_free)
1731 		__kfree_skb(skb_to_free);
1732 put_and_return:
1733 	if (refcounted)
1734 		sock_put(sk);
1735 	return ret ? -1 : 0;
1736 
1737 no_tcp_socket:
1738 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1739 		goto discard_it;
1740 
1741 	tcp_v6_fill_cb(skb, hdr, th);
1742 
1743 	if (tcp_checksum_complete(skb)) {
1744 csum_error:
1745 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1746 bad_packet:
1747 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1748 	} else {
1749 		tcp_v6_send_reset(NULL, skb);
1750 	}
1751 
1752 discard_it:
1753 	kfree_skb(skb);
1754 	return 0;
1755 
1756 discard_and_relse:
1757 	sk_drops_add(sk, skb);
1758 	if (refcounted)
1759 		sock_put(sk);
1760 	goto discard_it;
1761 
1762 do_time_wait:
1763 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1764 		inet_twsk_put(inet_twsk(sk));
1765 		goto discard_it;
1766 	}
1767 
1768 	tcp_v6_fill_cb(skb, hdr, th);
1769 
1770 	if (tcp_checksum_complete(skb)) {
1771 		inet_twsk_put(inet_twsk(sk));
1772 		goto csum_error;
1773 	}
1774 
1775 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1776 	case TCP_TW_SYN:
1777 	{
1778 		struct sock *sk2;
1779 
1780 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1781 					    skb, __tcp_hdrlen(th),
1782 					    &ipv6_hdr(skb)->saddr, th->source,
1783 					    &ipv6_hdr(skb)->daddr,
1784 					    ntohs(th->dest),
1785 					    tcp_v6_iif_l3_slave(skb),
1786 					    sdif);
1787 		if (sk2) {
1788 			struct inet_timewait_sock *tw = inet_twsk(sk);
1789 			inet_twsk_deschedule_put(tw);
1790 			sk = sk2;
1791 			tcp_v6_restore_cb(skb);
1792 			refcounted = false;
1793 			goto process;
1794 		}
1795 	}
1796 		/* to ACK */
1797 		fallthrough;
1798 	case TCP_TW_ACK:
1799 		tcp_v6_timewait_ack(sk, skb);
1800 		break;
1801 	case TCP_TW_RST:
1802 		tcp_v6_send_reset(sk, skb);
1803 		inet_twsk_deschedule_put(inet_twsk(sk));
1804 		goto discard_it;
1805 	case TCP_TW_SUCCESS:
1806 		;
1807 	}
1808 	goto discard_it;
1809 }
1810 
1811 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1812 {
1813 	const struct ipv6hdr *hdr;
1814 	const struct tcphdr *th;
1815 	struct sock *sk;
1816 
1817 	if (skb->pkt_type != PACKET_HOST)
1818 		return;
1819 
1820 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1821 		return;
1822 
1823 	hdr = ipv6_hdr(skb);
1824 	th = tcp_hdr(skb);
1825 
1826 	if (th->doff < sizeof(struct tcphdr) / 4)
1827 		return;
1828 
1829 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1830 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1831 					&hdr->saddr, th->source,
1832 					&hdr->daddr, ntohs(th->dest),
1833 					inet6_iif(skb), inet6_sdif(skb));
1834 	if (sk) {
1835 		skb->sk = sk;
1836 		skb->destructor = sock_edemux;
1837 		if (sk_fullsock(sk)) {
1838 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1839 
1840 			if (dst)
1841 				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1842 			if (dst &&
1843 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1844 				skb_dst_set_noref(skb, dst);
1845 		}
1846 	}
1847 }
1848 
1849 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1850 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1851 	.twsk_unique	= tcp_twsk_unique,
1852 	.twsk_destructor = tcp_twsk_destructor,
1853 };
1854 
1855 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1856 {
1857 	struct ipv6_pinfo *np = inet6_sk(sk);
1858 
1859 	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1860 }
1861 
1862 const struct inet_connection_sock_af_ops ipv6_specific = {
1863 	.queue_xmit	   = inet6_csk_xmit,
1864 	.send_check	   = tcp_v6_send_check,
1865 	.rebuild_header	   = inet6_sk_rebuild_header,
1866 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1867 	.conn_request	   = tcp_v6_conn_request,
1868 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1869 	.net_header_len	   = sizeof(struct ipv6hdr),
1870 	.net_frag_header_len = sizeof(struct frag_hdr),
1871 	.setsockopt	   = ipv6_setsockopt,
1872 	.getsockopt	   = ipv6_getsockopt,
1873 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1874 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1875 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1876 };
1877 
1878 #ifdef CONFIG_TCP_MD5SIG
1879 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1880 	.md5_lookup	=	tcp_v6_md5_lookup,
1881 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1882 	.md5_parse	=	tcp_v6_parse_md5_keys,
1883 };
1884 #endif
1885 
1886 /*
1887  *	TCP over IPv4 via INET6 API
1888  */
1889 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1890 	.queue_xmit	   = ip_queue_xmit,
1891 	.send_check	   = tcp_v4_send_check,
1892 	.rebuild_header	   = inet_sk_rebuild_header,
1893 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1894 	.conn_request	   = tcp_v6_conn_request,
1895 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1896 	.net_header_len	   = sizeof(struct iphdr),
1897 	.setsockopt	   = ipv6_setsockopt,
1898 	.getsockopt	   = ipv6_getsockopt,
1899 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1900 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1901 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1902 };
1903 
1904 #ifdef CONFIG_TCP_MD5SIG
1905 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1906 	.md5_lookup	=	tcp_v4_md5_lookup,
1907 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1908 	.md5_parse	=	tcp_v6_parse_md5_keys,
1909 };
1910 #endif
1911 
1912 /* NOTE: A lot of things set to zero explicitly by call to
1913  *       sk_alloc() so need not be done here.
1914  */
1915 static int tcp_v6_init_sock(struct sock *sk)
1916 {
1917 	struct inet_connection_sock *icsk = inet_csk(sk);
1918 
1919 	tcp_init_sock(sk);
1920 
1921 	icsk->icsk_af_ops = &ipv6_specific;
1922 
1923 #ifdef CONFIG_TCP_MD5SIG
1924 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1925 #endif
1926 
1927 	return 0;
1928 }
1929 
1930 static void tcp_v6_destroy_sock(struct sock *sk)
1931 {
1932 	tcp_v4_destroy_sock(sk);
1933 	inet6_destroy_sock(sk);
1934 }
1935 
1936 #ifdef CONFIG_PROC_FS
1937 /* Proc filesystem TCPv6 sock list dumping. */
1938 static void get_openreq6(struct seq_file *seq,
1939 			 const struct request_sock *req, int i)
1940 {
1941 	long ttd = req->rsk_timer.expires - jiffies;
1942 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1943 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1944 
1945 	if (ttd < 0)
1946 		ttd = 0;
1947 
1948 	seq_printf(seq,
1949 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1950 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1951 		   i,
1952 		   src->s6_addr32[0], src->s6_addr32[1],
1953 		   src->s6_addr32[2], src->s6_addr32[3],
1954 		   inet_rsk(req)->ir_num,
1955 		   dest->s6_addr32[0], dest->s6_addr32[1],
1956 		   dest->s6_addr32[2], dest->s6_addr32[3],
1957 		   ntohs(inet_rsk(req)->ir_rmt_port),
1958 		   TCP_SYN_RECV,
1959 		   0, 0, /* could print option size, but that is af dependent. */
1960 		   1,   /* timers active (only the expire timer) */
1961 		   jiffies_to_clock_t(ttd),
1962 		   req->num_timeout,
1963 		   from_kuid_munged(seq_user_ns(seq),
1964 				    sock_i_uid(req->rsk_listener)),
1965 		   0,  /* non standard timer */
1966 		   0, /* open_requests have no inode */
1967 		   0, req);
1968 }
1969 
1970 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1971 {
1972 	const struct in6_addr *dest, *src;
1973 	__u16 destp, srcp;
1974 	int timer_active;
1975 	unsigned long timer_expires;
1976 	const struct inet_sock *inet = inet_sk(sp);
1977 	const struct tcp_sock *tp = tcp_sk(sp);
1978 	const struct inet_connection_sock *icsk = inet_csk(sp);
1979 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1980 	int rx_queue;
1981 	int state;
1982 
1983 	dest  = &sp->sk_v6_daddr;
1984 	src   = &sp->sk_v6_rcv_saddr;
1985 	destp = ntohs(inet->inet_dport);
1986 	srcp  = ntohs(inet->inet_sport);
1987 
1988 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1989 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1990 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1991 		timer_active	= 1;
1992 		timer_expires	= icsk->icsk_timeout;
1993 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1994 		timer_active	= 4;
1995 		timer_expires	= icsk->icsk_timeout;
1996 	} else if (timer_pending(&sp->sk_timer)) {
1997 		timer_active	= 2;
1998 		timer_expires	= sp->sk_timer.expires;
1999 	} else {
2000 		timer_active	= 0;
2001 		timer_expires = jiffies;
2002 	}
2003 
2004 	state = inet_sk_state_load(sp);
2005 	if (state == TCP_LISTEN)
2006 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2007 	else
2008 		/* Because we don't lock the socket,
2009 		 * we might find a transient negative value.
2010 		 */
2011 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2012 				      READ_ONCE(tp->copied_seq), 0);
2013 
2014 	seq_printf(seq,
2015 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2016 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2017 		   i,
2018 		   src->s6_addr32[0], src->s6_addr32[1],
2019 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2020 		   dest->s6_addr32[0], dest->s6_addr32[1],
2021 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2022 		   state,
2023 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2024 		   rx_queue,
2025 		   timer_active,
2026 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2027 		   icsk->icsk_retransmits,
2028 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2029 		   icsk->icsk_probes_out,
2030 		   sock_i_ino(sp),
2031 		   refcount_read(&sp->sk_refcnt), sp,
2032 		   jiffies_to_clock_t(icsk->icsk_rto),
2033 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2034 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2035 		   tp->snd_cwnd,
2036 		   state == TCP_LISTEN ?
2037 			fastopenq->max_qlen :
2038 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2039 		   );
2040 }
2041 
2042 static void get_timewait6_sock(struct seq_file *seq,
2043 			       struct inet_timewait_sock *tw, int i)
2044 {
2045 	long delta = tw->tw_timer.expires - jiffies;
2046 	const struct in6_addr *dest, *src;
2047 	__u16 destp, srcp;
2048 
2049 	dest = &tw->tw_v6_daddr;
2050 	src  = &tw->tw_v6_rcv_saddr;
2051 	destp = ntohs(tw->tw_dport);
2052 	srcp  = ntohs(tw->tw_sport);
2053 
2054 	seq_printf(seq,
2055 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2056 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2057 		   i,
2058 		   src->s6_addr32[0], src->s6_addr32[1],
2059 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2060 		   dest->s6_addr32[0], dest->s6_addr32[1],
2061 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2062 		   tw->tw_substate, 0, 0,
2063 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2064 		   refcount_read(&tw->tw_refcnt), tw);
2065 }
2066 
2067 static int tcp6_seq_show(struct seq_file *seq, void *v)
2068 {
2069 	struct tcp_iter_state *st;
2070 	struct sock *sk = v;
2071 
2072 	if (v == SEQ_START_TOKEN) {
2073 		seq_puts(seq,
2074 			 "  sl  "
2075 			 "local_address                         "
2076 			 "remote_address                        "
2077 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2078 			 "   uid  timeout inode\n");
2079 		goto out;
2080 	}
2081 	st = seq->private;
2082 
2083 	if (sk->sk_state == TCP_TIME_WAIT)
2084 		get_timewait6_sock(seq, v, st->num);
2085 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2086 		get_openreq6(seq, v, st->num);
2087 	else
2088 		get_tcp6_sock(seq, v, st->num);
2089 out:
2090 	return 0;
2091 }
2092 
2093 static const struct seq_operations tcp6_seq_ops = {
2094 	.show		= tcp6_seq_show,
2095 	.start		= tcp_seq_start,
2096 	.next		= tcp_seq_next,
2097 	.stop		= tcp_seq_stop,
2098 };
2099 
2100 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2101 	.family		= AF_INET6,
2102 };
2103 
2104 int __net_init tcp6_proc_init(struct net *net)
2105 {
2106 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2107 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2108 		return -ENOMEM;
2109 	return 0;
2110 }
2111 
2112 void tcp6_proc_exit(struct net *net)
2113 {
2114 	remove_proc_entry("tcp6", net->proc_net);
2115 }
2116 #endif
2117 
2118 struct proto tcpv6_prot = {
2119 	.name			= "TCPv6",
2120 	.owner			= THIS_MODULE,
2121 	.close			= tcp_close,
2122 	.pre_connect		= tcp_v6_pre_connect,
2123 	.connect		= tcp_v6_connect,
2124 	.disconnect		= tcp_disconnect,
2125 	.accept			= inet_csk_accept,
2126 	.ioctl			= tcp_ioctl,
2127 	.init			= tcp_v6_init_sock,
2128 	.destroy		= tcp_v6_destroy_sock,
2129 	.shutdown		= tcp_shutdown,
2130 	.setsockopt		= tcp_setsockopt,
2131 	.getsockopt		= tcp_getsockopt,
2132 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2133 	.keepalive		= tcp_set_keepalive,
2134 	.recvmsg		= tcp_recvmsg,
2135 	.sendmsg		= tcp_sendmsg,
2136 	.sendpage		= tcp_sendpage,
2137 	.backlog_rcv		= tcp_v6_do_rcv,
2138 	.release_cb		= tcp_release_cb,
2139 	.hash			= inet6_hash,
2140 	.unhash			= inet_unhash,
2141 	.get_port		= inet_csk_get_port,
2142 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2143 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2144 	.stream_memory_free	= tcp_stream_memory_free,
2145 	.sockets_allocated	= &tcp_sockets_allocated,
2146 	.memory_allocated	= &tcp_memory_allocated,
2147 	.memory_pressure	= &tcp_memory_pressure,
2148 	.orphan_count		= &tcp_orphan_count,
2149 	.sysctl_mem		= sysctl_tcp_mem,
2150 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2151 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2152 	.max_header		= MAX_TCP_HEADER,
2153 	.obj_size		= sizeof(struct tcp6_sock),
2154 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2155 	.twsk_prot		= &tcp6_timewait_sock_ops,
2156 	.rsk_prot		= &tcp6_request_sock_ops,
2157 	.h.hashinfo		= &tcp_hashinfo,
2158 	.no_autobind		= true,
2159 	.diag_destroy		= tcp_abort,
2160 };
2161 EXPORT_SYMBOL_GPL(tcpv6_prot);
2162 
2163 /* thinking of making this const? Don't.
2164  * early_demux can change based on sysctl.
2165  */
2166 static struct inet6_protocol tcpv6_protocol = {
2167 	.early_demux	=	tcp_v6_early_demux,
2168 	.early_demux_handler =  tcp_v6_early_demux,
2169 	.handler	=	tcp_v6_rcv,
2170 	.err_handler	=	tcp_v6_err,
2171 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2172 };
2173 
2174 static struct inet_protosw tcpv6_protosw = {
2175 	.type		=	SOCK_STREAM,
2176 	.protocol	=	IPPROTO_TCP,
2177 	.prot		=	&tcpv6_prot,
2178 	.ops		=	&inet6_stream_ops,
2179 	.flags		=	INET_PROTOSW_PERMANENT |
2180 				INET_PROTOSW_ICSK,
2181 };
2182 
2183 static int __net_init tcpv6_net_init(struct net *net)
2184 {
2185 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2186 				    SOCK_RAW, IPPROTO_TCP, net);
2187 }
2188 
2189 static void __net_exit tcpv6_net_exit(struct net *net)
2190 {
2191 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2192 }
2193 
2194 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2195 {
2196 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2197 }
2198 
2199 static struct pernet_operations tcpv6_net_ops = {
2200 	.init	    = tcpv6_net_init,
2201 	.exit	    = tcpv6_net_exit,
2202 	.exit_batch = tcpv6_net_exit_batch,
2203 };
2204 
2205 int __init tcpv6_init(void)
2206 {
2207 	int ret;
2208 
2209 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2210 	if (ret)
2211 		goto out;
2212 
2213 	/* register inet6 protocol */
2214 	ret = inet6_register_protosw(&tcpv6_protosw);
2215 	if (ret)
2216 		goto out_tcpv6_protocol;
2217 
2218 	ret = register_pernet_subsys(&tcpv6_net_ops);
2219 	if (ret)
2220 		goto out_tcpv6_protosw;
2221 
2222 	ret = mptcpv6_init();
2223 	if (ret)
2224 		goto out_tcpv6_pernet_subsys;
2225 
2226 out:
2227 	return ret;
2228 
2229 out_tcpv6_pernet_subsys:
2230 	unregister_pernet_subsys(&tcpv6_net_ops);
2231 out_tcpv6_protosw:
2232 	inet6_unregister_protosw(&tcpv6_protosw);
2233 out_tcpv6_protocol:
2234 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2235 	goto out;
2236 }
2237 
2238 void tcpv6_exit(void)
2239 {
2240 	unregister_pernet_subsys(&tcpv6_net_ops);
2241 	inet6_unregister_protosw(&tcpv6_protosw);
2242 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2243 }
2244