xref: /openbmc/linux/net/ipv6/tcp_ipv6.c (revision aad29a73199b7fbccfbabea3f1ee627ad1924f52)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
97 					      struct tcp6_sock, tcp)->inet6)
98 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
100 {
101 	struct dst_entry *dst = skb_dst(skb);
102 
103 	if (dst && dst_hold_safe(dst)) {
104 		rcu_assign_pointer(sk->sk_rx_dst, dst);
105 		sk->sk_rx_dst_ifindex = skb->skb_iif;
106 		sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
107 	}
108 }
109 
tcp_v6_init_seq(const struct sk_buff * skb)110 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
111 {
112 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
113 				ipv6_hdr(skb)->saddr.s6_addr32,
114 				tcp_hdr(skb)->dest,
115 				tcp_hdr(skb)->source);
116 }
117 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)118 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
119 {
120 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
121 				   ipv6_hdr(skb)->saddr.s6_addr32);
122 }
123 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)124 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
125 			      int addr_len)
126 {
127 	/* This check is replicated from tcp_v6_connect() and intended to
128 	 * prevent BPF program called below from accessing bytes that are out
129 	 * of the bound specified by user in addr_len.
130 	 */
131 	if (addr_len < SIN6_LEN_RFC2133)
132 		return -EINVAL;
133 
134 	sock_owned_by_me(sk);
135 
136 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
137 }
138 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)139 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
140 			  int addr_len)
141 {
142 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
143 	struct inet_connection_sock *icsk = inet_csk(sk);
144 	struct in6_addr *saddr = NULL, *final_p, final;
145 	struct inet_timewait_death_row *tcp_death_row;
146 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
147 	struct inet_sock *inet = inet_sk(sk);
148 	struct tcp_sock *tp = tcp_sk(sk);
149 	struct net *net = sock_net(sk);
150 	struct ipv6_txoptions *opt;
151 	struct dst_entry *dst;
152 	struct flowi6 fl6;
153 	int addr_type;
154 	int err;
155 
156 	if (addr_len < SIN6_LEN_RFC2133)
157 		return -EINVAL;
158 
159 	if (usin->sin6_family != AF_INET6)
160 		return -EAFNOSUPPORT;
161 
162 	memset(&fl6, 0, sizeof(fl6));
163 
164 	if (np->sndflow) {
165 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
166 		IP6_ECN_flow_init(fl6.flowlabel);
167 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
168 			struct ip6_flowlabel *flowlabel;
169 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
170 			if (IS_ERR(flowlabel))
171 				return -EINVAL;
172 			fl6_sock_release(flowlabel);
173 		}
174 	}
175 
176 	/*
177 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
178 	 */
179 
180 	if (ipv6_addr_any(&usin->sin6_addr)) {
181 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
182 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
183 					       &usin->sin6_addr);
184 		else
185 			usin->sin6_addr = in6addr_loopback;
186 	}
187 
188 	addr_type = ipv6_addr_type(&usin->sin6_addr);
189 
190 	if (addr_type & IPV6_ADDR_MULTICAST)
191 		return -ENETUNREACH;
192 
193 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
194 		if (addr_len >= sizeof(struct sockaddr_in6) &&
195 		    usin->sin6_scope_id) {
196 			/* If interface is set while binding, indices
197 			 * must coincide.
198 			 */
199 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
200 				return -EINVAL;
201 
202 			sk->sk_bound_dev_if = usin->sin6_scope_id;
203 		}
204 
205 		/* Connect to link-local address requires an interface */
206 		if (!sk->sk_bound_dev_if)
207 			return -EINVAL;
208 	}
209 
210 	if (tp->rx_opt.ts_recent_stamp &&
211 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
212 		tp->rx_opt.ts_recent = 0;
213 		tp->rx_opt.ts_recent_stamp = 0;
214 		WRITE_ONCE(tp->write_seq, 0);
215 	}
216 
217 	sk->sk_v6_daddr = usin->sin6_addr;
218 	np->flow_label = fl6.flowlabel;
219 
220 	/*
221 	 *	TCP over IPv4
222 	 */
223 
224 	if (addr_type & IPV6_ADDR_MAPPED) {
225 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
226 		struct sockaddr_in sin;
227 
228 		if (ipv6_only_sock(sk))
229 			return -ENETUNREACH;
230 
231 		sin.sin_family = AF_INET;
232 		sin.sin_port = usin->sin6_port;
233 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
234 
235 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
236 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
237 		if (sk_is_mptcp(sk))
238 			mptcpv6_handle_mapped(sk, true);
239 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
240 #ifdef CONFIG_TCP_MD5SIG
241 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
242 #endif
243 
244 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
245 
246 		if (err) {
247 			icsk->icsk_ext_hdr_len = exthdrlen;
248 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
249 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
250 			if (sk_is_mptcp(sk))
251 				mptcpv6_handle_mapped(sk, false);
252 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
253 #ifdef CONFIG_TCP_MD5SIG
254 			tp->af_specific = &tcp_sock_ipv6_specific;
255 #endif
256 			goto failure;
257 		}
258 		np->saddr = sk->sk_v6_rcv_saddr;
259 
260 		return err;
261 	}
262 
263 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
264 		saddr = &sk->sk_v6_rcv_saddr;
265 
266 	fl6.flowi6_proto = IPPROTO_TCP;
267 	fl6.daddr = sk->sk_v6_daddr;
268 	fl6.saddr = saddr ? *saddr : np->saddr;
269 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
270 	fl6.flowi6_oif = sk->sk_bound_dev_if;
271 	fl6.flowi6_mark = sk->sk_mark;
272 	fl6.fl6_dport = usin->sin6_port;
273 	fl6.fl6_sport = inet->inet_sport;
274 	fl6.flowi6_uid = sk->sk_uid;
275 
276 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
277 	final_p = fl6_update_dst(&fl6, opt, &final);
278 
279 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
280 
281 	dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
282 	if (IS_ERR(dst)) {
283 		err = PTR_ERR(dst);
284 		goto failure;
285 	}
286 
287 	tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
288 
289 	if (!saddr) {
290 		saddr = &fl6.saddr;
291 
292 		err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
293 		if (err)
294 			goto failure;
295 	}
296 
297 	/* set the source address */
298 	np->saddr = *saddr;
299 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
300 
301 	sk->sk_gso_type = SKB_GSO_TCPV6;
302 	ip6_dst_store(sk, dst, NULL, NULL);
303 
304 	icsk->icsk_ext_hdr_len = 0;
305 	if (opt)
306 		icsk->icsk_ext_hdr_len = opt->opt_flen +
307 					 opt->opt_nflen;
308 
309 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
310 
311 	inet->inet_dport = usin->sin6_port;
312 
313 	tcp_set_state(sk, TCP_SYN_SENT);
314 	err = inet6_hash_connect(tcp_death_row, sk);
315 	if (err)
316 		goto late_failure;
317 
318 	sk_set_txhash(sk);
319 
320 	if (likely(!tp->repair)) {
321 		if (!tp->write_seq)
322 			WRITE_ONCE(tp->write_seq,
323 				   secure_tcpv6_seq(np->saddr.s6_addr32,
324 						    sk->sk_v6_daddr.s6_addr32,
325 						    inet->inet_sport,
326 						    inet->inet_dport));
327 		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
328 						   sk->sk_v6_daddr.s6_addr32);
329 	}
330 
331 	if (tcp_fastopen_defer_connect(sk, &err))
332 		return err;
333 	if (err)
334 		goto late_failure;
335 
336 	err = tcp_connect(sk);
337 	if (err)
338 		goto late_failure;
339 
340 	return 0;
341 
342 late_failure:
343 	tcp_set_state(sk, TCP_CLOSE);
344 	inet_bhash2_reset_saddr(sk);
345 failure:
346 	inet->inet_dport = 0;
347 	sk->sk_route_caps = 0;
348 	return err;
349 }
350 
tcp_v6_mtu_reduced(struct sock * sk)351 static void tcp_v6_mtu_reduced(struct sock *sk)
352 {
353 	struct dst_entry *dst;
354 	u32 mtu;
355 
356 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
357 		return;
358 
359 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
360 
361 	/* Drop requests trying to increase our current mss.
362 	 * Check done in __ip6_rt_update_pmtu() is too late.
363 	 */
364 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
365 		return;
366 
367 	dst = inet6_csk_update_pmtu(sk, mtu);
368 	if (!dst)
369 		return;
370 
371 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
372 		tcp_sync_mss(sk, dst_mtu(dst));
373 		tcp_simple_retransmit(sk);
374 	}
375 }
376 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
378 		u8 type, u8 code, int offset, __be32 info)
379 {
380 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
381 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
382 	struct net *net = dev_net(skb->dev);
383 	struct request_sock *fastopen;
384 	struct ipv6_pinfo *np;
385 	struct tcp_sock *tp;
386 	__u32 seq, snd_una;
387 	struct sock *sk;
388 	bool fatal;
389 	int err;
390 
391 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
392 					&hdr->daddr, th->dest,
393 					&hdr->saddr, ntohs(th->source),
394 					skb->dev->ifindex, inet6_sdif(skb));
395 
396 	if (!sk) {
397 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398 				  ICMP6_MIB_INERRORS);
399 		return -ENOENT;
400 	}
401 
402 	if (sk->sk_state == TCP_TIME_WAIT) {
403 		inet_twsk_put(inet_twsk(sk));
404 		return 0;
405 	}
406 	seq = ntohl(th->seq);
407 	fatal = icmpv6_err_convert(type, code, &err);
408 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
409 		tcp_req_err(sk, seq, fatal);
410 		return 0;
411 	}
412 
413 	bh_lock_sock(sk);
414 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
415 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
416 
417 	if (sk->sk_state == TCP_CLOSE)
418 		goto out;
419 
420 	if (static_branch_unlikely(&ip6_min_hopcount)) {
421 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
422 		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
423 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
424 			goto out;
425 		}
426 	}
427 
428 	tp = tcp_sk(sk);
429 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
430 	fastopen = rcu_dereference(tp->fastopen_rsk);
431 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
432 	if (sk->sk_state != TCP_LISTEN &&
433 	    !between(seq, snd_una, tp->snd_nxt)) {
434 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
435 		goto out;
436 	}
437 
438 	np = tcp_inet6_sk(sk);
439 
440 	if (type == NDISC_REDIRECT) {
441 		if (!sock_owned_by_user(sk)) {
442 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
443 
444 			if (dst)
445 				dst->ops->redirect(dst, sk, skb);
446 		}
447 		goto out;
448 	}
449 
450 	if (type == ICMPV6_PKT_TOOBIG) {
451 		u32 mtu = ntohl(info);
452 
453 		/* We are not interested in TCP_LISTEN and open_requests
454 		 * (SYN-ACKs send out by Linux are always <576bytes so
455 		 * they should go through unfragmented).
456 		 */
457 		if (sk->sk_state == TCP_LISTEN)
458 			goto out;
459 
460 		if (!ip6_sk_accept_pmtu(sk))
461 			goto out;
462 
463 		if (mtu < IPV6_MIN_MTU)
464 			goto out;
465 
466 		WRITE_ONCE(tp->mtu_info, mtu);
467 
468 		if (!sock_owned_by_user(sk))
469 			tcp_v6_mtu_reduced(sk);
470 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
471 					   &sk->sk_tsq_flags))
472 			sock_hold(sk);
473 		goto out;
474 	}
475 
476 
477 	/* Might be for an request_sock */
478 	switch (sk->sk_state) {
479 	case TCP_SYN_SENT:
480 	case TCP_SYN_RECV:
481 		/* Only in fast or simultaneous open. If a fast open socket is
482 		 * already accepted it is treated as a connected one below.
483 		 */
484 		if (fastopen && !fastopen->sk)
485 			break;
486 
487 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
488 
489 		if (!sock_owned_by_user(sk))
490 			tcp_done_with_error(sk, err);
491 		else
492 			WRITE_ONCE(sk->sk_err_soft, err);
493 		goto out;
494 	case TCP_LISTEN:
495 		break;
496 	default:
497 		/* check if this ICMP message allows revert of backoff.
498 		 * (see RFC 6069)
499 		 */
500 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
501 		    code == ICMPV6_NOROUTE)
502 			tcp_ld_RTO_revert(sk, seq);
503 	}
504 
505 	if (!sock_owned_by_user(sk) && np->recverr) {
506 		WRITE_ONCE(sk->sk_err, err);
507 		sk_error_report(sk);
508 	} else {
509 		WRITE_ONCE(sk->sk_err_soft, err);
510 	}
511 out:
512 	bh_unlock_sock(sk);
513 	sock_put(sk);
514 	return 0;
515 }
516 
517 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
519 			      struct flowi *fl,
520 			      struct request_sock *req,
521 			      struct tcp_fastopen_cookie *foc,
522 			      enum tcp_synack_type synack_type,
523 			      struct sk_buff *syn_skb)
524 {
525 	struct inet_request_sock *ireq = inet_rsk(req);
526 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
527 	struct ipv6_txoptions *opt;
528 	struct flowi6 *fl6 = &fl->u.ip6;
529 	struct sk_buff *skb;
530 	int err = -ENOMEM;
531 	u8 tclass;
532 
533 	/* First, grab a route. */
534 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
535 					       IPPROTO_TCP)) == NULL)
536 		goto done;
537 
538 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
539 
540 	if (skb) {
541 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
542 				    &ireq->ir_v6_rmt_addr);
543 
544 		fl6->daddr = ireq->ir_v6_rmt_addr;
545 		if (np->repflow && ireq->pktopts)
546 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
547 
548 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
549 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
550 				(np->tclass & INET_ECN_MASK) :
551 				np->tclass;
552 
553 		if (!INET_ECN_is_capable(tclass) &&
554 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
555 			tclass |= INET_ECN_ECT_0;
556 
557 		rcu_read_lock();
558 		opt = ireq->ipv6_opt;
559 		if (!opt)
560 			opt = rcu_dereference(np->opt);
561 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
562 			       opt, tclass, sk->sk_priority);
563 		rcu_read_unlock();
564 		err = net_xmit_eval(err);
565 	}
566 
567 done:
568 	return err;
569 }
570 
571 
tcp_v6_reqsk_destructor(struct request_sock * req)572 static void tcp_v6_reqsk_destructor(struct request_sock *req)
573 {
574 	kfree(inet_rsk(req)->ipv6_opt);
575 	consume_skb(inet_rsk(req)->pktopts);
576 }
577 
578 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
580 						   const struct in6_addr *addr,
581 						   int l3index)
582 {
583 	return tcp_md5_do_lookup(sk, l3index,
584 				 (union tcp_md5_addr *)addr, AF_INET6);
585 }
586 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
588 						const struct sock *addr_sk)
589 {
590 	int l3index;
591 
592 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
593 						 addr_sk->sk_bound_dev_if);
594 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595 				    l3index);
596 }
597 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
599 				 sockptr_t optval, int optlen)
600 {
601 	struct tcp_md5sig cmd;
602 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603 	int l3index = 0;
604 	u8 prefixlen;
605 	u8 flags;
606 
607 	if (optlen < sizeof(cmd))
608 		return -EINVAL;
609 
610 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
611 		return -EFAULT;
612 
613 	if (sin6->sin6_family != AF_INET6)
614 		return -EINVAL;
615 
616 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
617 
618 	if (optname == TCP_MD5SIG_EXT &&
619 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
620 		prefixlen = cmd.tcpm_prefixlen;
621 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
622 					prefixlen > 32))
623 			return -EINVAL;
624 	} else {
625 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
626 	}
627 
628 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
629 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
630 		struct net_device *dev;
631 
632 		rcu_read_lock();
633 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
634 		if (dev && netif_is_l3_master(dev))
635 			l3index = dev->ifindex;
636 		rcu_read_unlock();
637 
638 		/* ok to reference set/not set outside of rcu;
639 		 * right now device MUST be an L3 master
640 		 */
641 		if (!dev || !l3index)
642 			return -EINVAL;
643 	}
644 
645 	if (!cmd.tcpm_keylen) {
646 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
647 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
648 					      AF_INET, prefixlen,
649 					      l3index, flags);
650 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
651 				      AF_INET6, prefixlen, l3index, flags);
652 	}
653 
654 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
655 		return -EINVAL;
656 
657 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
658 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
659 				      AF_INET, prefixlen, l3index, flags,
660 				      cmd.tcpm_key, cmd.tcpm_keylen);
661 
662 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
663 			      AF_INET6, prefixlen, l3index, flags,
664 			      cmd.tcpm_key, cmd.tcpm_keylen);
665 }
666 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)667 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
668 				   const struct in6_addr *daddr,
669 				   const struct in6_addr *saddr,
670 				   const struct tcphdr *th, int nbytes)
671 {
672 	struct tcp6_pseudohdr *bp;
673 	struct scatterlist sg;
674 	struct tcphdr *_th;
675 
676 	bp = hp->scratch;
677 	/* 1. TCP pseudo-header (RFC2460) */
678 	bp->saddr = *saddr;
679 	bp->daddr = *daddr;
680 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
681 	bp->len = cpu_to_be32(nbytes);
682 
683 	_th = (struct tcphdr *)(bp + 1);
684 	memcpy(_th, th, sizeof(*th));
685 	_th->check = 0;
686 
687 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
688 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
689 				sizeof(*bp) + sizeof(*th));
690 	return crypto_ahash_update(hp->md5_req);
691 }
692 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)693 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
694 			       const struct in6_addr *daddr, struct in6_addr *saddr,
695 			       const struct tcphdr *th)
696 {
697 	struct tcp_md5sig_pool *hp;
698 	struct ahash_request *req;
699 
700 	hp = tcp_get_md5sig_pool();
701 	if (!hp)
702 		goto clear_hash_noput;
703 	req = hp->md5_req;
704 
705 	if (crypto_ahash_init(req))
706 		goto clear_hash;
707 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
708 		goto clear_hash;
709 	if (tcp_md5_hash_key(hp, key))
710 		goto clear_hash;
711 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
712 	if (crypto_ahash_final(req))
713 		goto clear_hash;
714 
715 	tcp_put_md5sig_pool();
716 	return 0;
717 
718 clear_hash:
719 	tcp_put_md5sig_pool();
720 clear_hash_noput:
721 	memset(md5_hash, 0, 16);
722 	return 1;
723 }
724 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)725 static int tcp_v6_md5_hash_skb(char *md5_hash,
726 			       const struct tcp_md5sig_key *key,
727 			       const struct sock *sk,
728 			       const struct sk_buff *skb)
729 {
730 	const struct in6_addr *saddr, *daddr;
731 	struct tcp_md5sig_pool *hp;
732 	struct ahash_request *req;
733 	const struct tcphdr *th = tcp_hdr(skb);
734 
735 	if (sk) { /* valid for establish/request sockets */
736 		saddr = &sk->sk_v6_rcv_saddr;
737 		daddr = &sk->sk_v6_daddr;
738 	} else {
739 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
740 		saddr = &ip6h->saddr;
741 		daddr = &ip6h->daddr;
742 	}
743 
744 	hp = tcp_get_md5sig_pool();
745 	if (!hp)
746 		goto clear_hash_noput;
747 	req = hp->md5_req;
748 
749 	if (crypto_ahash_init(req))
750 		goto clear_hash;
751 
752 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
753 		goto clear_hash;
754 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
755 		goto clear_hash;
756 	if (tcp_md5_hash_key(hp, key))
757 		goto clear_hash;
758 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
759 	if (crypto_ahash_final(req))
760 		goto clear_hash;
761 
762 	tcp_put_md5sig_pool();
763 	return 0;
764 
765 clear_hash:
766 	tcp_put_md5sig_pool();
767 clear_hash_noput:
768 	memset(md5_hash, 0, 16);
769 	return 1;
770 }
771 
772 #endif
773 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)774 static void tcp_v6_init_req(struct request_sock *req,
775 			    const struct sock *sk_listener,
776 			    struct sk_buff *skb)
777 {
778 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
779 	struct inet_request_sock *ireq = inet_rsk(req);
780 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
781 
782 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
783 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
784 
785 	/* So that link locals have meaning */
786 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
787 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
788 		ireq->ir_iif = tcp_v6_iif(skb);
789 
790 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
791 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
792 	     np->rxopt.bits.rxinfo ||
793 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
794 	     np->rxopt.bits.rxohlim || np->repflow)) {
795 		refcount_inc(&skb->users);
796 		ireq->pktopts = skb;
797 	}
798 }
799 
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)800 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
801 					  struct sk_buff *skb,
802 					  struct flowi *fl,
803 					  struct request_sock *req)
804 {
805 	tcp_v6_init_req(req, sk, skb);
806 
807 	if (security_inet_conn_request(sk, skb, req))
808 		return NULL;
809 
810 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
811 }
812 
813 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
814 	.family		=	AF_INET6,
815 	.obj_size	=	sizeof(struct tcp6_request_sock),
816 	.rtx_syn_ack	=	tcp_rtx_synack,
817 	.send_ack	=	tcp_v6_reqsk_send_ack,
818 	.destructor	=	tcp_v6_reqsk_destructor,
819 	.send_reset	=	tcp_v6_send_reset,
820 	.syn_ack_timeout =	tcp_syn_ack_timeout,
821 };
822 
823 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
824 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
825 				sizeof(struct ipv6hdr),
826 #ifdef CONFIG_TCP_MD5SIG
827 	.req_md5_lookup	=	tcp_v6_md5_lookup,
828 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
829 #endif
830 #ifdef CONFIG_SYN_COOKIES
831 	.cookie_init_seq =	cookie_v6_init_sequence,
832 #endif
833 	.route_req	=	tcp_v6_route_req,
834 	.init_seq	=	tcp_v6_init_seq,
835 	.init_ts_off	=	tcp_v6_init_ts_off,
836 	.send_synack	=	tcp_v6_send_synack,
837 };
838 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash)839 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
840 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
841 				 int oif, struct tcp_md5sig_key *key, int rst,
842 				 u8 tclass, __be32 label, u32 priority, u32 txhash)
843 {
844 	const struct tcphdr *th = tcp_hdr(skb);
845 	struct tcphdr *t1;
846 	struct sk_buff *buff;
847 	struct flowi6 fl6;
848 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
849 	struct sock *ctl_sk = net->ipv6.tcp_sk;
850 	unsigned int tot_len = sizeof(struct tcphdr);
851 	__be32 mrst = 0, *topt;
852 	struct dst_entry *dst;
853 	__u32 mark = 0;
854 
855 	if (tsecr)
856 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
857 #ifdef CONFIG_TCP_MD5SIG
858 	if (key)
859 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
860 #endif
861 
862 #ifdef CONFIG_MPTCP
863 	if (rst && !key) {
864 		mrst = mptcp_reset_option(skb);
865 
866 		if (mrst)
867 			tot_len += sizeof(__be32);
868 	}
869 #endif
870 
871 	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
872 	if (!buff)
873 		return;
874 
875 	skb_reserve(buff, MAX_TCP_HEADER);
876 
877 	t1 = skb_push(buff, tot_len);
878 	skb_reset_transport_header(buff);
879 
880 	/* Swap the send and the receive. */
881 	memset(t1, 0, sizeof(*t1));
882 	t1->dest = th->source;
883 	t1->source = th->dest;
884 	t1->doff = tot_len / 4;
885 	t1->seq = htonl(seq);
886 	t1->ack_seq = htonl(ack);
887 	t1->ack = !rst || !th->ack;
888 	t1->rst = rst;
889 	t1->window = htons(win);
890 
891 	topt = (__be32 *)(t1 + 1);
892 
893 	if (tsecr) {
894 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
895 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
896 		*topt++ = htonl(tsval);
897 		*topt++ = htonl(tsecr);
898 	}
899 
900 	if (mrst)
901 		*topt++ = mrst;
902 
903 #ifdef CONFIG_TCP_MD5SIG
904 	if (key) {
905 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
906 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
907 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
908 				    &ipv6_hdr(skb)->saddr,
909 				    &ipv6_hdr(skb)->daddr, t1);
910 	}
911 #endif
912 
913 	memset(&fl6, 0, sizeof(fl6));
914 	fl6.daddr = ipv6_hdr(skb)->saddr;
915 	fl6.saddr = ipv6_hdr(skb)->daddr;
916 	fl6.flowlabel = label;
917 
918 	buff->ip_summed = CHECKSUM_PARTIAL;
919 
920 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
921 
922 	fl6.flowi6_proto = IPPROTO_TCP;
923 	if (rt6_need_strict(&fl6.daddr) && !oif)
924 		fl6.flowi6_oif = tcp_v6_iif(skb);
925 	else {
926 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
927 			oif = skb->skb_iif;
928 
929 		fl6.flowi6_oif = oif;
930 	}
931 
932 	if (sk) {
933 		if (sk->sk_state == TCP_TIME_WAIT)
934 			mark = inet_twsk(sk)->tw_mark;
935 		else
936 			mark = READ_ONCE(sk->sk_mark);
937 		skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
938 	}
939 	if (txhash) {
940 		/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
941 		skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
942 	}
943 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
944 	fl6.fl6_dport = t1->dest;
945 	fl6.fl6_sport = t1->source;
946 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
947 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
948 
949 	/* Pass a socket to ip6_dst_lookup either it is for RST
950 	 * Underlying function will use this to retrieve the network
951 	 * namespace
952 	 */
953 	if (sk && sk->sk_state != TCP_TIME_WAIT)
954 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
955 	else
956 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
957 	if (!IS_ERR(dst)) {
958 		skb_dst_set(buff, dst);
959 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
960 			 tclass & ~INET_ECN_MASK, priority);
961 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
962 		if (rst)
963 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
964 		return;
965 	}
966 
967 	kfree_skb(buff);
968 }
969 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)970 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
971 {
972 	const struct tcphdr *th = tcp_hdr(skb);
973 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
974 	u32 seq = 0, ack_seq = 0;
975 	struct tcp_md5sig_key *key = NULL;
976 #ifdef CONFIG_TCP_MD5SIG
977 	const __u8 *hash_location = NULL;
978 	unsigned char newhash[16];
979 	int genhash;
980 	struct sock *sk1 = NULL;
981 #endif
982 	__be32 label = 0;
983 	u32 priority = 0;
984 	struct net *net;
985 	u32 txhash = 0;
986 	int oif = 0;
987 
988 	if (th->rst)
989 		return;
990 
991 	/* If sk not NULL, it means we did a successful lookup and incoming
992 	 * route had to be correct. prequeue might have dropped our dst.
993 	 */
994 	if (!sk && !ipv6_unicast_destination(skb))
995 		return;
996 
997 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
998 #ifdef CONFIG_TCP_MD5SIG
999 	rcu_read_lock();
1000 	hash_location = tcp_parse_md5sig_option(th);
1001 	if (sk && sk_fullsock(sk)) {
1002 		int l3index;
1003 
1004 		/* sdif set, means packet ingressed via a device
1005 		 * in an L3 domain and inet_iif is set to it.
1006 		 */
1007 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1008 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1009 	} else if (hash_location) {
1010 		int dif = tcp_v6_iif_l3_slave(skb);
1011 		int sdif = tcp_v6_sdif(skb);
1012 		int l3index;
1013 
1014 		/*
1015 		 * active side is lost. Try to find listening socket through
1016 		 * source port, and then find md5 key through listening socket.
1017 		 * we are not loose security here:
1018 		 * Incoming packet is checked with md5 hash with finding key,
1019 		 * no RST generated if md5 hash doesn't match.
1020 		 */
1021 		sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1022 					    NULL, 0, &ipv6h->saddr, th->source,
1023 					    &ipv6h->daddr, ntohs(th->source),
1024 					    dif, sdif);
1025 		if (!sk1)
1026 			goto out;
1027 
1028 		/* sdif set, means packet ingressed via a device
1029 		 * in an L3 domain and dif is set to it.
1030 		 */
1031 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1032 
1033 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1034 		if (!key)
1035 			goto out;
1036 
1037 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1038 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1039 			goto out;
1040 	}
1041 #endif
1042 
1043 	if (th->ack)
1044 		seq = ntohl(th->ack_seq);
1045 	else
1046 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1047 			  (th->doff << 2);
1048 
1049 	if (sk) {
1050 		oif = sk->sk_bound_dev_if;
1051 		if (sk_fullsock(sk)) {
1052 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1053 
1054 			trace_tcp_send_reset(sk, skb);
1055 			if (np->repflow)
1056 				label = ip6_flowlabel(ipv6h);
1057 			priority = sk->sk_priority;
1058 			txhash = sk->sk_txhash;
1059 		}
1060 		if (sk->sk_state == TCP_TIME_WAIT) {
1061 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1062 			priority = inet_twsk(sk)->tw_priority;
1063 			txhash = inet_twsk(sk)->tw_txhash;
1064 		}
1065 	} else {
1066 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1067 			label = ip6_flowlabel(ipv6h);
1068 	}
1069 
1070 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1071 			     ipv6_get_dsfield(ipv6h), label, priority, txhash);
1072 
1073 #ifdef CONFIG_TCP_MD5SIG
1074 out:
1075 	rcu_read_unlock();
1076 #endif
1077 }
1078 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1079 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1080 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1081 			    struct tcp_md5sig_key *key, u8 tclass,
1082 			    __be32 label, u32 priority, u32 txhash)
1083 {
1084 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1085 			     tclass, label, priority, txhash);
1086 }
1087 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1088 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1089 {
1090 	struct inet_timewait_sock *tw = inet_twsk(sk);
1091 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1092 
1093 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1094 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1095 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1096 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1097 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1098 			tw->tw_txhash);
1099 
1100 	inet_twsk_put(tw);
1101 }
1102 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1103 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1104 				  struct request_sock *req)
1105 {
1106 	int l3index;
1107 
1108 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1109 
1110 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1111 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1112 	 */
1113 	/* RFC 7323 2.3
1114 	 * The window field (SEG.WND) of every outgoing segment, with the
1115 	 * exception of <SYN> segments, MUST be right-shifted by
1116 	 * Rcv.Wind.Shift bits:
1117 	 */
1118 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1119 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1120 			tcp_rsk(req)->rcv_nxt,
1121 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1122 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1123 			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1124 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1125 			ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1126 			READ_ONCE(sk->sk_priority),
1127 			READ_ONCE(tcp_rsk(req)->txhash));
1128 }
1129 
1130 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1131 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1132 {
1133 #ifdef CONFIG_SYN_COOKIES
1134 	const struct tcphdr *th = tcp_hdr(skb);
1135 
1136 	if (!th->syn)
1137 		sk = cookie_v6_check(sk, skb);
1138 #endif
1139 	return sk;
1140 }
1141 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1142 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1143 			 struct tcphdr *th, u32 *cookie)
1144 {
1145 	u16 mss = 0;
1146 #ifdef CONFIG_SYN_COOKIES
1147 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1148 				    &tcp_request_sock_ipv6_ops, sk, th);
1149 	if (mss) {
1150 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1151 		tcp_synq_overflow(sk);
1152 	}
1153 #endif
1154 	return mss;
1155 }
1156 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1157 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1158 {
1159 	if (skb->protocol == htons(ETH_P_IP))
1160 		return tcp_v4_conn_request(sk, skb);
1161 
1162 	if (!ipv6_unicast_destination(skb))
1163 		goto drop;
1164 
1165 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1166 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1167 		return 0;
1168 	}
1169 
1170 	return tcp_conn_request(&tcp6_request_sock_ops,
1171 				&tcp_request_sock_ipv6_ops, sk, skb);
1172 
1173 drop:
1174 	tcp_listendrop(sk);
1175 	return 0; /* don't send reset */
1176 }
1177 
tcp_v6_restore_cb(struct sk_buff * skb)1178 static void tcp_v6_restore_cb(struct sk_buff *skb)
1179 {
1180 	/* We need to move header back to the beginning if xfrm6_policy_check()
1181 	 * and tcp_v6_fill_cb() are going to be called again.
1182 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1183 	 */
1184 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1185 		sizeof(struct inet6_skb_parm));
1186 }
1187 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1188 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1189 					 struct request_sock *req,
1190 					 struct dst_entry *dst,
1191 					 struct request_sock *req_unhash,
1192 					 bool *own_req)
1193 {
1194 	struct inet_request_sock *ireq;
1195 	struct ipv6_pinfo *newnp;
1196 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1197 	struct ipv6_txoptions *opt;
1198 	struct inet_sock *newinet;
1199 	bool found_dup_sk = false;
1200 	struct tcp_sock *newtp;
1201 	struct sock *newsk;
1202 #ifdef CONFIG_TCP_MD5SIG
1203 	struct tcp_md5sig_key *key;
1204 	int l3index;
1205 #endif
1206 	struct flowi6 fl6;
1207 
1208 	if (skb->protocol == htons(ETH_P_IP)) {
1209 		/*
1210 		 *	v6 mapped
1211 		 */
1212 
1213 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1214 					     req_unhash, own_req);
1215 
1216 		if (!newsk)
1217 			return NULL;
1218 
1219 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1220 
1221 		newnp = tcp_inet6_sk(newsk);
1222 		newtp = tcp_sk(newsk);
1223 
1224 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1225 
1226 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1227 
1228 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1229 		if (sk_is_mptcp(newsk))
1230 			mptcpv6_handle_mapped(newsk, true);
1231 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1232 #ifdef CONFIG_TCP_MD5SIG
1233 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1234 #endif
1235 
1236 		newnp->ipv6_mc_list = NULL;
1237 		newnp->ipv6_ac_list = NULL;
1238 		newnp->ipv6_fl_list = NULL;
1239 		newnp->pktoptions  = NULL;
1240 		newnp->opt	   = NULL;
1241 		newnp->mcast_oif   = inet_iif(skb);
1242 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1243 		newnp->rcv_flowinfo = 0;
1244 		if (np->repflow)
1245 			newnp->flow_label = 0;
1246 
1247 		/*
1248 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1249 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1250 		 * that function for the gory details. -acme
1251 		 */
1252 
1253 		/* It is tricky place. Until this moment IPv4 tcp
1254 		   worked with IPv6 icsk.icsk_af_ops.
1255 		   Sync it now.
1256 		 */
1257 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1258 
1259 		return newsk;
1260 	}
1261 
1262 	ireq = inet_rsk(req);
1263 
1264 	if (sk_acceptq_is_full(sk))
1265 		goto out_overflow;
1266 
1267 	if (!dst) {
1268 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1269 		if (!dst)
1270 			goto out;
1271 	}
1272 
1273 	newsk = tcp_create_openreq_child(sk, req, skb);
1274 	if (!newsk)
1275 		goto out_nonewsk;
1276 
1277 	/*
1278 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1279 	 * count here, tcp_create_openreq_child now does this for us, see the
1280 	 * comment in that function for the gory details. -acme
1281 	 */
1282 
1283 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1284 	inet6_sk_rx_dst_set(newsk, skb);
1285 
1286 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1287 
1288 	newtp = tcp_sk(newsk);
1289 	newinet = inet_sk(newsk);
1290 	newnp = tcp_inet6_sk(newsk);
1291 
1292 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1293 
1294 	ip6_dst_store(newsk, dst, NULL, NULL);
1295 
1296 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1297 	newnp->saddr = ireq->ir_v6_loc_addr;
1298 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1299 	newsk->sk_bound_dev_if = ireq->ir_iif;
1300 
1301 	/* Now IPv6 options...
1302 
1303 	   First: no IPv4 options.
1304 	 */
1305 	newinet->inet_opt = NULL;
1306 	newnp->ipv6_mc_list = NULL;
1307 	newnp->ipv6_ac_list = NULL;
1308 	newnp->ipv6_fl_list = NULL;
1309 
1310 	/* Clone RX bits */
1311 	newnp->rxopt.all = np->rxopt.all;
1312 
1313 	newnp->pktoptions = NULL;
1314 	newnp->opt	  = NULL;
1315 	newnp->mcast_oif  = tcp_v6_iif(skb);
1316 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1317 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1318 	if (np->repflow)
1319 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1320 
1321 	/* Set ToS of the new socket based upon the value of incoming SYN.
1322 	 * ECT bits are set later in tcp_init_transfer().
1323 	 */
1324 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1325 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1326 
1327 	/* Clone native IPv6 options from listening socket (if any)
1328 
1329 	   Yes, keeping reference count would be much more clever,
1330 	   but we make one more one thing there: reattach optmem
1331 	   to newsk.
1332 	 */
1333 	opt = ireq->ipv6_opt;
1334 	if (!opt)
1335 		opt = rcu_dereference(np->opt);
1336 	if (opt) {
1337 		opt = ipv6_dup_options(newsk, opt);
1338 		RCU_INIT_POINTER(newnp->opt, opt);
1339 	}
1340 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1341 	if (opt)
1342 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1343 						    opt->opt_flen;
1344 
1345 	tcp_ca_openreq_child(newsk, dst);
1346 
1347 	tcp_sync_mss(newsk, dst_mtu(dst));
1348 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1349 
1350 	tcp_initialize_rcv_mss(newsk);
1351 
1352 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1353 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1354 
1355 #ifdef CONFIG_TCP_MD5SIG
1356 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1357 
1358 	/* Copy over the MD5 key from the original socket */
1359 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1360 	if (key) {
1361 		const union tcp_md5_addr *addr;
1362 
1363 		addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1364 		if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1365 			inet_csk_prepare_forced_close(newsk);
1366 			tcp_done(newsk);
1367 			goto out;
1368 		}
1369 	}
1370 #endif
1371 
1372 	if (__inet_inherit_port(sk, newsk) < 0) {
1373 		inet_csk_prepare_forced_close(newsk);
1374 		tcp_done(newsk);
1375 		goto out;
1376 	}
1377 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1378 				       &found_dup_sk);
1379 	if (*own_req) {
1380 		tcp_move_syn(newtp, req);
1381 
1382 		/* Clone pktoptions received with SYN, if we own the req */
1383 		if (ireq->pktopts) {
1384 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1385 			consume_skb(ireq->pktopts);
1386 			ireq->pktopts = NULL;
1387 			if (newnp->pktoptions)
1388 				tcp_v6_restore_cb(newnp->pktoptions);
1389 		}
1390 	} else {
1391 		if (!req_unhash && found_dup_sk) {
1392 			/* This code path should only be executed in the
1393 			 * syncookie case only
1394 			 */
1395 			bh_unlock_sock(newsk);
1396 			sock_put(newsk);
1397 			newsk = NULL;
1398 		}
1399 	}
1400 
1401 	return newsk;
1402 
1403 out_overflow:
1404 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1405 out_nonewsk:
1406 	dst_release(dst);
1407 out:
1408 	tcp_listendrop(sk);
1409 	return NULL;
1410 }
1411 
1412 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1413 							   u32));
1414 /* The socket must have it's spinlock held when we get
1415  * here, unless it is a TCP_LISTEN socket.
1416  *
1417  * We have a potential double-lock case here, so even when
1418  * doing backlog processing we use the BH locking scheme.
1419  * This is because we cannot sleep with the original spinlock
1420  * held.
1421  */
1422 INDIRECT_CALLABLE_SCOPE
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1423 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1424 {
1425 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1426 	struct sk_buff *opt_skb = NULL;
1427 	enum skb_drop_reason reason;
1428 	struct tcp_sock *tp;
1429 
1430 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1431 	   goes to IPv4 receive handler and backlogged.
1432 	   From backlog it always goes here. Kerboom...
1433 	   Fortunately, tcp_rcv_established and rcv_established
1434 	   handle them correctly, but it is not case with
1435 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1436 	 */
1437 
1438 	if (skb->protocol == htons(ETH_P_IP))
1439 		return tcp_v4_do_rcv(sk, skb);
1440 
1441 	/*
1442 	 *	socket locking is here for SMP purposes as backlog rcv
1443 	 *	is currently called with bh processing disabled.
1444 	 */
1445 
1446 	/* Do Stevens' IPV6_PKTOPTIONS.
1447 
1448 	   Yes, guys, it is the only place in our code, where we
1449 	   may make it not affecting IPv4.
1450 	   The rest of code is protocol independent,
1451 	   and I do not like idea to uglify IPv4.
1452 
1453 	   Actually, all the idea behind IPV6_PKTOPTIONS
1454 	   looks not very well thought. For now we latch
1455 	   options, received in the last packet, enqueued
1456 	   by tcp. Feel free to propose better solution.
1457 					       --ANK (980728)
1458 	 */
1459 	if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
1460 		opt_skb = skb_clone_and_charge_r(skb, sk);
1461 
1462 	reason = SKB_DROP_REASON_NOT_SPECIFIED;
1463 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1464 		struct dst_entry *dst;
1465 
1466 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1467 						lockdep_sock_is_held(sk));
1468 
1469 		sock_rps_save_rxhash(sk, skb);
1470 		sk_mark_napi_id(sk, skb);
1471 		if (dst) {
1472 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1473 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1474 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1475 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1476 				dst_release(dst);
1477 			}
1478 		}
1479 
1480 		tcp_rcv_established(sk, skb);
1481 		if (opt_skb)
1482 			goto ipv6_pktoptions;
1483 		return 0;
1484 	}
1485 
1486 	if (tcp_checksum_complete(skb))
1487 		goto csum_err;
1488 
1489 	if (sk->sk_state == TCP_LISTEN) {
1490 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1491 
1492 		if (!nsk)
1493 			goto discard;
1494 
1495 		if (nsk != sk) {
1496 			if (tcp_child_process(sk, nsk, skb))
1497 				goto reset;
1498 			return 0;
1499 		}
1500 	} else
1501 		sock_rps_save_rxhash(sk, skb);
1502 
1503 	if (tcp_rcv_state_process(sk, skb))
1504 		goto reset;
1505 	if (opt_skb)
1506 		goto ipv6_pktoptions;
1507 	return 0;
1508 
1509 reset:
1510 	tcp_v6_send_reset(sk, skb);
1511 discard:
1512 	if (opt_skb)
1513 		__kfree_skb(opt_skb);
1514 	kfree_skb_reason(skb, reason);
1515 	return 0;
1516 csum_err:
1517 	reason = SKB_DROP_REASON_TCP_CSUM;
1518 	trace_tcp_bad_csum(skb);
1519 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1520 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1521 	goto discard;
1522 
1523 
1524 ipv6_pktoptions:
1525 	/* Do you ask, what is it?
1526 
1527 	   1. skb was enqueued by tcp.
1528 	   2. skb is added to tail of read queue, rather than out of order.
1529 	   3. socket is not in passive state.
1530 	   4. Finally, it really contains options, which user wants to receive.
1531 	 */
1532 	tp = tcp_sk(sk);
1533 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1534 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1535 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1536 			np->mcast_oif = tcp_v6_iif(opt_skb);
1537 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1538 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1539 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1540 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1541 		if (np->repflow)
1542 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1543 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1544 			tcp_v6_restore_cb(opt_skb);
1545 			opt_skb = xchg(&np->pktoptions, opt_skb);
1546 		} else {
1547 			__kfree_skb(opt_skb);
1548 			opt_skb = xchg(&np->pktoptions, NULL);
1549 		}
1550 	}
1551 
1552 	consume_skb(opt_skb);
1553 	return 0;
1554 }
1555 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1556 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1557 			   const struct tcphdr *th)
1558 {
1559 	/* This is tricky: we move IP6CB at its correct location into
1560 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1561 	 * _decode_session6() uses IP6CB().
1562 	 * barrier() makes sure compiler won't play aliasing games.
1563 	 */
1564 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1565 		sizeof(struct inet6_skb_parm));
1566 	barrier();
1567 
1568 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1569 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1570 				    skb->len - th->doff*4);
1571 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1572 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1573 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1574 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1575 	TCP_SKB_CB(skb)->sacked = 0;
1576 	TCP_SKB_CB(skb)->has_rxtstamp =
1577 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1578 }
1579 
tcp_v6_rcv(struct sk_buff * skb)1580 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1581 {
1582 	enum skb_drop_reason drop_reason;
1583 	int sdif = inet6_sdif(skb);
1584 	int dif = inet6_iif(skb);
1585 	const struct tcphdr *th;
1586 	const struct ipv6hdr *hdr;
1587 	bool refcounted;
1588 	struct sock *sk;
1589 	int ret;
1590 	struct net *net = dev_net(skb->dev);
1591 
1592 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1593 	if (skb->pkt_type != PACKET_HOST)
1594 		goto discard_it;
1595 
1596 	/*
1597 	 *	Count it even if it's bad.
1598 	 */
1599 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1600 
1601 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1602 		goto discard_it;
1603 
1604 	th = (const struct tcphdr *)skb->data;
1605 
1606 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1607 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1608 		goto bad_packet;
1609 	}
1610 	if (!pskb_may_pull(skb, th->doff*4))
1611 		goto discard_it;
1612 
1613 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1614 		goto csum_error;
1615 
1616 	th = (const struct tcphdr *)skb->data;
1617 	hdr = ipv6_hdr(skb);
1618 
1619 lookup:
1620 	sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1621 				th->source, th->dest, inet6_iif(skb), sdif,
1622 				&refcounted);
1623 	if (!sk)
1624 		goto no_tcp_socket;
1625 
1626 process:
1627 	if (sk->sk_state == TCP_TIME_WAIT)
1628 		goto do_time_wait;
1629 
1630 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1631 		struct request_sock *req = inet_reqsk(sk);
1632 		bool req_stolen = false;
1633 		struct sock *nsk;
1634 
1635 		sk = req->rsk_listener;
1636 		if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1637 			drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1638 		else
1639 			drop_reason = tcp_inbound_md5_hash(sk, skb,
1640 							   &hdr->saddr, &hdr->daddr,
1641 							   AF_INET6, dif, sdif);
1642 		if (drop_reason) {
1643 			sk_drops_add(sk, skb);
1644 			reqsk_put(req);
1645 			goto discard_it;
1646 		}
1647 		if (tcp_checksum_complete(skb)) {
1648 			reqsk_put(req);
1649 			goto csum_error;
1650 		}
1651 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1652 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1653 			if (!nsk) {
1654 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1655 				goto lookup;
1656 			}
1657 			sk = nsk;
1658 			/* reuseport_migrate_sock() has already held one sk_refcnt
1659 			 * before returning.
1660 			 */
1661 		} else {
1662 			sock_hold(sk);
1663 		}
1664 		refcounted = true;
1665 		nsk = NULL;
1666 		if (!tcp_filter(sk, skb)) {
1667 			th = (const struct tcphdr *)skb->data;
1668 			hdr = ipv6_hdr(skb);
1669 			tcp_v6_fill_cb(skb, hdr, th);
1670 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1671 		} else {
1672 			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1673 		}
1674 		if (!nsk) {
1675 			reqsk_put(req);
1676 			if (req_stolen) {
1677 				/* Another cpu got exclusive access to req
1678 				 * and created a full blown socket.
1679 				 * Try to feed this packet to this socket
1680 				 * instead of discarding it.
1681 				 */
1682 				tcp_v6_restore_cb(skb);
1683 				sock_put(sk);
1684 				goto lookup;
1685 			}
1686 			goto discard_and_relse;
1687 		}
1688 		nf_reset_ct(skb);
1689 		if (nsk == sk) {
1690 			reqsk_put(req);
1691 			tcp_v6_restore_cb(skb);
1692 		} else if (tcp_child_process(sk, nsk, skb)) {
1693 			tcp_v6_send_reset(nsk, skb);
1694 			goto discard_and_relse;
1695 		} else {
1696 			sock_put(sk);
1697 			return 0;
1698 		}
1699 	}
1700 
1701 	if (static_branch_unlikely(&ip6_min_hopcount)) {
1702 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1703 		if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1704 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1705 			drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1706 			goto discard_and_relse;
1707 		}
1708 	}
1709 
1710 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1711 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1712 		goto discard_and_relse;
1713 	}
1714 
1715 	drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1716 					   AF_INET6, dif, sdif);
1717 	if (drop_reason)
1718 		goto discard_and_relse;
1719 
1720 	nf_reset_ct(skb);
1721 
1722 	if (tcp_filter(sk, skb)) {
1723 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1724 		goto discard_and_relse;
1725 	}
1726 	th = (const struct tcphdr *)skb->data;
1727 	hdr = ipv6_hdr(skb);
1728 	tcp_v6_fill_cb(skb, hdr, th);
1729 
1730 	skb->dev = NULL;
1731 
1732 	if (sk->sk_state == TCP_LISTEN) {
1733 		ret = tcp_v6_do_rcv(sk, skb);
1734 		goto put_and_return;
1735 	}
1736 
1737 	sk_incoming_cpu_update(sk);
1738 
1739 	bh_lock_sock_nested(sk);
1740 	tcp_segs_in(tcp_sk(sk), skb);
1741 	ret = 0;
1742 	if (!sock_owned_by_user(sk)) {
1743 		ret = tcp_v6_do_rcv(sk, skb);
1744 	} else {
1745 		if (tcp_add_backlog(sk, skb, &drop_reason))
1746 			goto discard_and_relse;
1747 	}
1748 	bh_unlock_sock(sk);
1749 put_and_return:
1750 	if (refcounted)
1751 		sock_put(sk);
1752 	return ret ? -1 : 0;
1753 
1754 no_tcp_socket:
1755 	drop_reason = SKB_DROP_REASON_NO_SOCKET;
1756 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1757 		goto discard_it;
1758 
1759 	tcp_v6_fill_cb(skb, hdr, th);
1760 
1761 	if (tcp_checksum_complete(skb)) {
1762 csum_error:
1763 		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1764 		trace_tcp_bad_csum(skb);
1765 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1766 bad_packet:
1767 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1768 	} else {
1769 		tcp_v6_send_reset(NULL, skb);
1770 	}
1771 
1772 discard_it:
1773 	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1774 	kfree_skb_reason(skb, drop_reason);
1775 	return 0;
1776 
1777 discard_and_relse:
1778 	sk_drops_add(sk, skb);
1779 	if (refcounted)
1780 		sock_put(sk);
1781 	goto discard_it;
1782 
1783 do_time_wait:
1784 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1785 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1786 		inet_twsk_put(inet_twsk(sk));
1787 		goto discard_it;
1788 	}
1789 
1790 	tcp_v6_fill_cb(skb, hdr, th);
1791 
1792 	if (tcp_checksum_complete(skb)) {
1793 		inet_twsk_put(inet_twsk(sk));
1794 		goto csum_error;
1795 	}
1796 
1797 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1798 	case TCP_TW_SYN:
1799 	{
1800 		struct sock *sk2;
1801 
1802 		sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1803 					    skb, __tcp_hdrlen(th),
1804 					    &ipv6_hdr(skb)->saddr, th->source,
1805 					    &ipv6_hdr(skb)->daddr,
1806 					    ntohs(th->dest),
1807 					    tcp_v6_iif_l3_slave(skb),
1808 					    sdif);
1809 		if (sk2) {
1810 			struct inet_timewait_sock *tw = inet_twsk(sk);
1811 			inet_twsk_deschedule_put(tw);
1812 			sk = sk2;
1813 			tcp_v6_restore_cb(skb);
1814 			refcounted = false;
1815 			goto process;
1816 		}
1817 	}
1818 		/* to ACK */
1819 		fallthrough;
1820 	case TCP_TW_ACK:
1821 		tcp_v6_timewait_ack(sk, skb);
1822 		break;
1823 	case TCP_TW_RST:
1824 		tcp_v6_send_reset(sk, skb);
1825 		inet_twsk_deschedule_put(inet_twsk(sk));
1826 		goto discard_it;
1827 	case TCP_TW_SUCCESS:
1828 		;
1829 	}
1830 	goto discard_it;
1831 }
1832 
tcp_v6_early_demux(struct sk_buff * skb)1833 void tcp_v6_early_demux(struct sk_buff *skb)
1834 {
1835 	struct net *net = dev_net(skb->dev);
1836 	const struct ipv6hdr *hdr;
1837 	const struct tcphdr *th;
1838 	struct sock *sk;
1839 
1840 	if (skb->pkt_type != PACKET_HOST)
1841 		return;
1842 
1843 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1844 		return;
1845 
1846 	hdr = ipv6_hdr(skb);
1847 	th = tcp_hdr(skb);
1848 
1849 	if (th->doff < sizeof(struct tcphdr) / 4)
1850 		return;
1851 
1852 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1853 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1854 					&hdr->saddr, th->source,
1855 					&hdr->daddr, ntohs(th->dest),
1856 					inet6_iif(skb), inet6_sdif(skb));
1857 	if (sk) {
1858 		skb->sk = sk;
1859 		skb->destructor = sock_edemux;
1860 		if (sk_fullsock(sk)) {
1861 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1862 
1863 			if (dst)
1864 				dst = dst_check(dst, sk->sk_rx_dst_cookie);
1865 			if (dst &&
1866 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
1867 				skb_dst_set_noref(skb, dst);
1868 		}
1869 	}
1870 }
1871 
1872 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1873 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1874 	.twsk_unique	= tcp_twsk_unique,
1875 	.twsk_destructor = tcp_twsk_destructor,
1876 };
1877 
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1878 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1879 {
1880 	__tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1881 }
1882 
1883 const struct inet_connection_sock_af_ops ipv6_specific = {
1884 	.queue_xmit	   = inet6_csk_xmit,
1885 	.send_check	   = tcp_v6_send_check,
1886 	.rebuild_header	   = inet6_sk_rebuild_header,
1887 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1888 	.conn_request	   = tcp_v6_conn_request,
1889 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1890 	.net_header_len	   = sizeof(struct ipv6hdr),
1891 	.net_frag_header_len = sizeof(struct frag_hdr),
1892 	.setsockopt	   = ipv6_setsockopt,
1893 	.getsockopt	   = ipv6_getsockopt,
1894 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1895 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1896 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1897 };
1898 
1899 #ifdef CONFIG_TCP_MD5SIG
1900 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1901 	.md5_lookup	=	tcp_v6_md5_lookup,
1902 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1903 	.md5_parse	=	tcp_v6_parse_md5_keys,
1904 };
1905 #endif
1906 
1907 /*
1908  *	TCP over IPv4 via INET6 API
1909  */
1910 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1911 	.queue_xmit	   = ip_queue_xmit,
1912 	.send_check	   = tcp_v4_send_check,
1913 	.rebuild_header	   = inet_sk_rebuild_header,
1914 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1915 	.conn_request	   = tcp_v6_conn_request,
1916 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1917 	.net_header_len	   = sizeof(struct iphdr),
1918 	.setsockopt	   = ipv6_setsockopt,
1919 	.getsockopt	   = ipv6_getsockopt,
1920 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1921 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1922 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1923 };
1924 
1925 #ifdef CONFIG_TCP_MD5SIG
1926 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1927 	.md5_lookup	=	tcp_v4_md5_lookup,
1928 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1929 	.md5_parse	=	tcp_v6_parse_md5_keys,
1930 };
1931 #endif
1932 
1933 /* NOTE: A lot of things set to zero explicitly by call to
1934  *       sk_alloc() so need not be done here.
1935  */
tcp_v6_init_sock(struct sock * sk)1936 static int tcp_v6_init_sock(struct sock *sk)
1937 {
1938 	struct inet_connection_sock *icsk = inet_csk(sk);
1939 
1940 	tcp_init_sock(sk);
1941 
1942 	icsk->icsk_af_ops = &ipv6_specific;
1943 
1944 #ifdef CONFIG_TCP_MD5SIG
1945 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1946 #endif
1947 
1948 	return 0;
1949 }
1950 
1951 #ifdef CONFIG_PROC_FS
1952 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1953 static void get_openreq6(struct seq_file *seq,
1954 			 const struct request_sock *req, int i)
1955 {
1956 	long ttd = req->rsk_timer.expires - jiffies;
1957 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1958 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1959 
1960 	if (ttd < 0)
1961 		ttd = 0;
1962 
1963 	seq_printf(seq,
1964 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1965 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1966 		   i,
1967 		   src->s6_addr32[0], src->s6_addr32[1],
1968 		   src->s6_addr32[2], src->s6_addr32[3],
1969 		   inet_rsk(req)->ir_num,
1970 		   dest->s6_addr32[0], dest->s6_addr32[1],
1971 		   dest->s6_addr32[2], dest->s6_addr32[3],
1972 		   ntohs(inet_rsk(req)->ir_rmt_port),
1973 		   TCP_SYN_RECV,
1974 		   0, 0, /* could print option size, but that is af dependent. */
1975 		   1,   /* timers active (only the expire timer) */
1976 		   jiffies_to_clock_t(ttd),
1977 		   req->num_timeout,
1978 		   from_kuid_munged(seq_user_ns(seq),
1979 				    sock_i_uid(req->rsk_listener)),
1980 		   0,  /* non standard timer */
1981 		   0, /* open_requests have no inode */
1982 		   0, req);
1983 }
1984 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1985 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1986 {
1987 	const struct in6_addr *dest, *src;
1988 	__u16 destp, srcp;
1989 	int timer_active;
1990 	unsigned long timer_expires;
1991 	const struct inet_sock *inet = inet_sk(sp);
1992 	const struct tcp_sock *tp = tcp_sk(sp);
1993 	const struct inet_connection_sock *icsk = inet_csk(sp);
1994 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1995 	int rx_queue;
1996 	int state;
1997 
1998 	dest  = &sp->sk_v6_daddr;
1999 	src   = &sp->sk_v6_rcv_saddr;
2000 	destp = ntohs(inet->inet_dport);
2001 	srcp  = ntohs(inet->inet_sport);
2002 
2003 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2004 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2005 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2006 		timer_active	= 1;
2007 		timer_expires	= icsk->icsk_timeout;
2008 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2009 		timer_active	= 4;
2010 		timer_expires	= icsk->icsk_timeout;
2011 	} else if (timer_pending(&sp->sk_timer)) {
2012 		timer_active	= 2;
2013 		timer_expires	= sp->sk_timer.expires;
2014 	} else {
2015 		timer_active	= 0;
2016 		timer_expires = jiffies;
2017 	}
2018 
2019 	state = inet_sk_state_load(sp);
2020 	if (state == TCP_LISTEN)
2021 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2022 	else
2023 		/* Because we don't lock the socket,
2024 		 * we might find a transient negative value.
2025 		 */
2026 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2027 				      READ_ONCE(tp->copied_seq), 0);
2028 
2029 	seq_printf(seq,
2030 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2031 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2032 		   i,
2033 		   src->s6_addr32[0], src->s6_addr32[1],
2034 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2035 		   dest->s6_addr32[0], dest->s6_addr32[1],
2036 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2037 		   state,
2038 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2039 		   rx_queue,
2040 		   timer_active,
2041 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2042 		   icsk->icsk_retransmits,
2043 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2044 		   icsk->icsk_probes_out,
2045 		   sock_i_ino(sp),
2046 		   refcount_read(&sp->sk_refcnt), sp,
2047 		   jiffies_to_clock_t(icsk->icsk_rto),
2048 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2049 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2050 		   tcp_snd_cwnd(tp),
2051 		   state == TCP_LISTEN ?
2052 			fastopenq->max_qlen :
2053 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2054 		   );
2055 }
2056 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2057 static void get_timewait6_sock(struct seq_file *seq,
2058 			       struct inet_timewait_sock *tw, int i)
2059 {
2060 	long delta = tw->tw_timer.expires - jiffies;
2061 	const struct in6_addr *dest, *src;
2062 	__u16 destp, srcp;
2063 
2064 	dest = &tw->tw_v6_daddr;
2065 	src  = &tw->tw_v6_rcv_saddr;
2066 	destp = ntohs(tw->tw_dport);
2067 	srcp  = ntohs(tw->tw_sport);
2068 
2069 	seq_printf(seq,
2070 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2071 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2072 		   i,
2073 		   src->s6_addr32[0], src->s6_addr32[1],
2074 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2075 		   dest->s6_addr32[0], dest->s6_addr32[1],
2076 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2077 		   tw->tw_substate, 0, 0,
2078 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2079 		   refcount_read(&tw->tw_refcnt), tw);
2080 }
2081 
tcp6_seq_show(struct seq_file * seq,void * v)2082 static int tcp6_seq_show(struct seq_file *seq, void *v)
2083 {
2084 	struct tcp_iter_state *st;
2085 	struct sock *sk = v;
2086 
2087 	if (v == SEQ_START_TOKEN) {
2088 		seq_puts(seq,
2089 			 "  sl  "
2090 			 "local_address                         "
2091 			 "remote_address                        "
2092 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2093 			 "   uid  timeout inode\n");
2094 		goto out;
2095 	}
2096 	st = seq->private;
2097 
2098 	if (sk->sk_state == TCP_TIME_WAIT)
2099 		get_timewait6_sock(seq, v, st->num);
2100 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2101 		get_openreq6(seq, v, st->num);
2102 	else
2103 		get_tcp6_sock(seq, v, st->num);
2104 out:
2105 	return 0;
2106 }
2107 
2108 static const struct seq_operations tcp6_seq_ops = {
2109 	.show		= tcp6_seq_show,
2110 	.start		= tcp_seq_start,
2111 	.next		= tcp_seq_next,
2112 	.stop		= tcp_seq_stop,
2113 };
2114 
2115 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2116 	.family		= AF_INET6,
2117 };
2118 
tcp6_proc_init(struct net * net)2119 int __net_init tcp6_proc_init(struct net *net)
2120 {
2121 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2122 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2123 		return -ENOMEM;
2124 	return 0;
2125 }
2126 
tcp6_proc_exit(struct net * net)2127 void tcp6_proc_exit(struct net *net)
2128 {
2129 	remove_proc_entry("tcp6", net->proc_net);
2130 }
2131 #endif
2132 
2133 struct proto tcpv6_prot = {
2134 	.name			= "TCPv6",
2135 	.owner			= THIS_MODULE,
2136 	.close			= tcp_close,
2137 	.pre_connect		= tcp_v6_pre_connect,
2138 	.connect		= tcp_v6_connect,
2139 	.disconnect		= tcp_disconnect,
2140 	.accept			= inet_csk_accept,
2141 	.ioctl			= tcp_ioctl,
2142 	.init			= tcp_v6_init_sock,
2143 	.destroy		= tcp_v4_destroy_sock,
2144 	.shutdown		= tcp_shutdown,
2145 	.setsockopt		= tcp_setsockopt,
2146 	.getsockopt		= tcp_getsockopt,
2147 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2148 	.keepalive		= tcp_set_keepalive,
2149 	.recvmsg		= tcp_recvmsg,
2150 	.sendmsg		= tcp_sendmsg,
2151 	.splice_eof		= tcp_splice_eof,
2152 	.backlog_rcv		= tcp_v6_do_rcv,
2153 	.release_cb		= tcp_release_cb,
2154 	.hash			= inet6_hash,
2155 	.unhash			= inet_unhash,
2156 	.get_port		= inet_csk_get_port,
2157 	.put_port		= inet_put_port,
2158 #ifdef CONFIG_BPF_SYSCALL
2159 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2160 #endif
2161 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2162 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2163 	.stream_memory_free	= tcp_stream_memory_free,
2164 	.sockets_allocated	= &tcp_sockets_allocated,
2165 
2166 	.memory_allocated	= &tcp_memory_allocated,
2167 	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
2168 
2169 	.memory_pressure	= &tcp_memory_pressure,
2170 	.orphan_count		= &tcp_orphan_count,
2171 	.sysctl_mem		= sysctl_tcp_mem,
2172 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2173 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2174 	.max_header		= MAX_TCP_HEADER,
2175 	.obj_size		= sizeof(struct tcp6_sock),
2176 	.ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2177 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2178 	.twsk_prot		= &tcp6_timewait_sock_ops,
2179 	.rsk_prot		= &tcp6_request_sock_ops,
2180 	.h.hashinfo		= NULL,
2181 	.no_autobind		= true,
2182 	.diag_destroy		= tcp_abort,
2183 };
2184 EXPORT_SYMBOL_GPL(tcpv6_prot);
2185 
2186 static const struct inet6_protocol tcpv6_protocol = {
2187 	.handler	=	tcp_v6_rcv,
2188 	.err_handler	=	tcp_v6_err,
2189 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2190 };
2191 
2192 static struct inet_protosw tcpv6_protosw = {
2193 	.type		=	SOCK_STREAM,
2194 	.protocol	=	IPPROTO_TCP,
2195 	.prot		=	&tcpv6_prot,
2196 	.ops		=	&inet6_stream_ops,
2197 	.flags		=	INET_PROTOSW_PERMANENT |
2198 				INET_PROTOSW_ICSK,
2199 };
2200 
tcpv6_net_init(struct net * net)2201 static int __net_init tcpv6_net_init(struct net *net)
2202 {
2203 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2204 				    SOCK_RAW, IPPROTO_TCP, net);
2205 }
2206 
tcpv6_net_exit(struct net * net)2207 static void __net_exit tcpv6_net_exit(struct net *net)
2208 {
2209 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2210 }
2211 
2212 static struct pernet_operations tcpv6_net_ops = {
2213 	.init	    = tcpv6_net_init,
2214 	.exit	    = tcpv6_net_exit,
2215 };
2216 
tcpv6_init(void)2217 int __init tcpv6_init(void)
2218 {
2219 	int ret;
2220 
2221 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2222 	if (ret)
2223 		goto out;
2224 
2225 	/* register inet6 protocol */
2226 	ret = inet6_register_protosw(&tcpv6_protosw);
2227 	if (ret)
2228 		goto out_tcpv6_protocol;
2229 
2230 	ret = register_pernet_subsys(&tcpv6_net_ops);
2231 	if (ret)
2232 		goto out_tcpv6_protosw;
2233 
2234 	ret = mptcpv6_init();
2235 	if (ret)
2236 		goto out_tcpv6_pernet_subsys;
2237 
2238 out:
2239 	return ret;
2240 
2241 out_tcpv6_pernet_subsys:
2242 	unregister_pernet_subsys(&tcpv6_net_ops);
2243 out_tcpv6_protosw:
2244 	inet6_unregister_protosw(&tcpv6_protosw);
2245 out_tcpv6_protocol:
2246 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2247 	goto out;
2248 }
2249 
tcpv6_exit(void)2250 void tcpv6_exit(void)
2251 {
2252 	unregister_pernet_subsys(&tcpv6_net_ops);
2253 	inet6_unregister_protosw(&tcpv6_protosw);
2254 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2255 }
2256