xref: /openbmc/linux/net/ipv6/tcp_ipv6.c (revision 278002edb19bce2c628fafb0af936e77000f3a5b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
97 					      struct tcp6_sock, tcp)->inet6)
98 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
100 {
101 	struct dst_entry *dst = skb_dst(skb);
102 
103 	if (dst && dst_hold_safe(dst)) {
104 		rcu_assign_pointer(sk->sk_rx_dst, dst);
105 		sk->sk_rx_dst_ifindex = skb->skb_iif;
106 		sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
107 	}
108 }
109 
tcp_v6_init_seq(const struct sk_buff * skb)110 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
111 {
112 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
113 				ipv6_hdr(skb)->saddr.s6_addr32,
114 				tcp_hdr(skb)->dest,
115 				tcp_hdr(skb)->source);
116 }
117 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)118 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
119 {
120 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
121 				   ipv6_hdr(skb)->saddr.s6_addr32);
122 }
123 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)124 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
125 			      int addr_len)
126 {
127 	/* This check is replicated from tcp_v6_connect() and intended to
128 	 * prevent BPF program called below from accessing bytes that are out
129 	 * of the bound specified by user in addr_len.
130 	 */
131 	if (addr_len < SIN6_LEN_RFC2133)
132 		return -EINVAL;
133 
134 	sock_owned_by_me(sk);
135 
136 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
137 }
138 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)139 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
140 			  int addr_len)
141 {
142 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
143 	struct inet_connection_sock *icsk = inet_csk(sk);
144 	struct in6_addr *saddr = NULL, *final_p, final;
145 	struct inet_timewait_death_row *tcp_death_row;
146 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
147 	struct inet_sock *inet = inet_sk(sk);
148 	struct tcp_sock *tp = tcp_sk(sk);
149 	struct net *net = sock_net(sk);
150 	struct ipv6_txoptions *opt;
151 	struct dst_entry *dst;
152 	struct flowi6 fl6;
153 	int addr_type;
154 	int err;
155 
156 	if (addr_len < SIN6_LEN_RFC2133)
157 		return -EINVAL;
158 
159 	if (usin->sin6_family != AF_INET6)
160 		return -EAFNOSUPPORT;
161 
162 	memset(&fl6, 0, sizeof(fl6));
163 
164 	if (np->sndflow) {
165 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
166 		IP6_ECN_flow_init(fl6.flowlabel);
167 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
168 			struct ip6_flowlabel *flowlabel;
169 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
170 			if (IS_ERR(flowlabel))
171 				return -EINVAL;
172 			fl6_sock_release(flowlabel);
173 		}
174 	}
175 
176 	/*
177 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
178 	 */
179 
180 	if (ipv6_addr_any(&usin->sin6_addr)) {
181 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
182 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
183 					       &usin->sin6_addr);
184 		else
185 			usin->sin6_addr = in6addr_loopback;
186 	}
187 
188 	addr_type = ipv6_addr_type(&usin->sin6_addr);
189 
190 	if (addr_type & IPV6_ADDR_MULTICAST)
191 		return -ENETUNREACH;
192 
193 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
194 		if (addr_len >= sizeof(struct sockaddr_in6) &&
195 		    usin->sin6_scope_id) {
196 			/* If interface is set while binding, indices
197 			 * must coincide.
198 			 */
199 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
200 				return -EINVAL;
201 
202 			sk->sk_bound_dev_if = usin->sin6_scope_id;
203 		}
204 
205 		/* Connect to link-local address requires an interface */
206 		if (!sk->sk_bound_dev_if)
207 			return -EINVAL;
208 	}
209 
210 	if (tp->rx_opt.ts_recent_stamp &&
211 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
212 		tp->rx_opt.ts_recent = 0;
213 		tp->rx_opt.ts_recent_stamp = 0;
214 		WRITE_ONCE(tp->write_seq, 0);
215 	}
216 
217 	sk->sk_v6_daddr = usin->sin6_addr;
218 	np->flow_label = fl6.flowlabel;
219 
220 	/*
221 	 *	TCP over IPv4
222 	 */
223 
224 	if (addr_type & IPV6_ADDR_MAPPED) {
225 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
226 		struct sockaddr_in sin;
227 
228 		if (ipv6_only_sock(sk))
229 			return -ENETUNREACH;
230 
231 		sin.sin_family = AF_INET;
232 		sin.sin_port = usin->sin6_port;
233 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
234 
235 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
236 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
237 		if (sk_is_mptcp(sk))
238 			mptcpv6_handle_mapped(sk, true);
239 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
240 #ifdef CONFIG_TCP_MD5SIG
241 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
242 #endif
243 
244 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
245 
246 		if (err) {
247 			icsk->icsk_ext_hdr_len = exthdrlen;
248 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
249 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
250 			if (sk_is_mptcp(sk))
251 				mptcpv6_handle_mapped(sk, false);
252 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
253 #ifdef CONFIG_TCP_MD5SIG
254 			tp->af_specific = &tcp_sock_ipv6_specific;
255 #endif
256 			goto failure;
257 		}
258 		np->saddr = sk->sk_v6_rcv_saddr;
259 
260 		return err;
261 	}
262 
263 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
264 		saddr = &sk->sk_v6_rcv_saddr;
265 
266 	fl6.flowi6_proto = IPPROTO_TCP;
267 	fl6.daddr = sk->sk_v6_daddr;
268 	fl6.saddr = saddr ? *saddr : np->saddr;
269 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
270 	fl6.flowi6_oif = sk->sk_bound_dev_if;
271 	fl6.flowi6_mark = sk->sk_mark;
272 	fl6.fl6_dport = usin->sin6_port;
273 	fl6.fl6_sport = inet->inet_sport;
274 	fl6.flowi6_uid = sk->sk_uid;
275 
276 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
277 	final_p = fl6_update_dst(&fl6, opt, &final);
278 
279 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
280 
281 	dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
282 	if (IS_ERR(dst)) {
283 		err = PTR_ERR(dst);
284 		goto failure;
285 	}
286 
287 	tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
288 
289 	if (!saddr) {
290 		saddr = &fl6.saddr;
291 
292 		err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
293 		if (err)
294 			goto failure;
295 	}
296 
297 	/* set the source address */
298 	np->saddr = *saddr;
299 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
300 
301 	sk->sk_gso_type = SKB_GSO_TCPV6;
302 	ip6_dst_store(sk, dst, NULL, NULL);
303 
304 	icsk->icsk_ext_hdr_len = 0;
305 	if (opt)
306 		icsk->icsk_ext_hdr_len = opt->opt_flen +
307 					 opt->opt_nflen;
308 
309 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
310 
311 	inet->inet_dport = usin->sin6_port;
312 
313 	tcp_set_state(sk, TCP_SYN_SENT);
314 	err = inet6_hash_connect(tcp_death_row, sk);
315 	if (err)
316 		goto late_failure;
317 
318 	sk_set_txhash(sk);
319 
320 	if (likely(!tp->repair)) {
321 		if (!tp->write_seq)
322 			WRITE_ONCE(tp->write_seq,
323 				   secure_tcpv6_seq(np->saddr.s6_addr32,
324 						    sk->sk_v6_daddr.s6_addr32,
325 						    inet->inet_sport,
326 						    inet->inet_dport));
327 		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
328 						   sk->sk_v6_daddr.s6_addr32);
329 	}
330 
331 	if (tcp_fastopen_defer_connect(sk, &err))
332 		return err;
333 	if (err)
334 		goto late_failure;
335 
336 	err = tcp_connect(sk);
337 	if (err)
338 		goto late_failure;
339 
340 	return 0;
341 
342 late_failure:
343 	tcp_set_state(sk, TCP_CLOSE);
344 	inet_bhash2_reset_saddr(sk);
345 failure:
346 	inet->inet_dport = 0;
347 	sk->sk_route_caps = 0;
348 	return err;
349 }
350 
tcp_v6_mtu_reduced(struct sock * sk)351 static void tcp_v6_mtu_reduced(struct sock *sk)
352 {
353 	struct dst_entry *dst;
354 	u32 mtu;
355 
356 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
357 		return;
358 
359 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
360 
361 	/* Drop requests trying to increase our current mss.
362 	 * Check done in __ip6_rt_update_pmtu() is too late.
363 	 */
364 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
365 		return;
366 
367 	dst = inet6_csk_update_pmtu(sk, mtu);
368 	if (!dst)
369 		return;
370 
371 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
372 		tcp_sync_mss(sk, dst_mtu(dst));
373 		tcp_simple_retransmit(sk);
374 	}
375 }
376 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
378 		u8 type, u8 code, int offset, __be32 info)
379 {
380 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
381 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
382 	struct net *net = dev_net(skb->dev);
383 	struct request_sock *fastopen;
384 	struct ipv6_pinfo *np;
385 	struct tcp_sock *tp;
386 	__u32 seq, snd_una;
387 	struct sock *sk;
388 	bool fatal;
389 	int err;
390 
391 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
392 					&hdr->daddr, th->dest,
393 					&hdr->saddr, ntohs(th->source),
394 					skb->dev->ifindex, inet6_sdif(skb));
395 
396 	if (!sk) {
397 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398 				  ICMP6_MIB_INERRORS);
399 		return -ENOENT;
400 	}
401 
402 	if (sk->sk_state == TCP_TIME_WAIT) {
403 		inet_twsk_put(inet_twsk(sk));
404 		return 0;
405 	}
406 	seq = ntohl(th->seq);
407 	fatal = icmpv6_err_convert(type, code, &err);
408 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
409 		tcp_req_err(sk, seq, fatal);
410 		return 0;
411 	}
412 
413 	bh_lock_sock(sk);
414 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
415 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
416 
417 	if (sk->sk_state == TCP_CLOSE)
418 		goto out;
419 
420 	if (static_branch_unlikely(&ip6_min_hopcount)) {
421 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
422 		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
423 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
424 			goto out;
425 		}
426 	}
427 
428 	tp = tcp_sk(sk);
429 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
430 	fastopen = rcu_dereference(tp->fastopen_rsk);
431 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
432 	if (sk->sk_state != TCP_LISTEN &&
433 	    !between(seq, snd_una, tp->snd_nxt)) {
434 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
435 		goto out;
436 	}
437 
438 	np = tcp_inet6_sk(sk);
439 
440 	if (type == NDISC_REDIRECT) {
441 		if (!sock_owned_by_user(sk)) {
442 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
443 
444 			if (dst)
445 				dst->ops->redirect(dst, sk, skb);
446 		}
447 		goto out;
448 	}
449 
450 	if (type == ICMPV6_PKT_TOOBIG) {
451 		u32 mtu = ntohl(info);
452 
453 		/* We are not interested in TCP_LISTEN and open_requests
454 		 * (SYN-ACKs send out by Linux are always <576bytes so
455 		 * they should go through unfragmented).
456 		 */
457 		if (sk->sk_state == TCP_LISTEN)
458 			goto out;
459 
460 		if (!ip6_sk_accept_pmtu(sk))
461 			goto out;
462 
463 		if (mtu < IPV6_MIN_MTU)
464 			goto out;
465 
466 		WRITE_ONCE(tp->mtu_info, mtu);
467 
468 		if (!sock_owned_by_user(sk))
469 			tcp_v6_mtu_reduced(sk);
470 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
471 					   &sk->sk_tsq_flags))
472 			sock_hold(sk);
473 		goto out;
474 	}
475 
476 
477 	/* Might be for an request_sock */
478 	switch (sk->sk_state) {
479 	case TCP_SYN_SENT:
480 	case TCP_SYN_RECV:
481 		/* Only in fast or simultaneous open. If a fast open socket is
482 		 * already accepted it is treated as a connected one below.
483 		 */
484 		if (fastopen && !fastopen->sk)
485 			break;
486 
487 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
488 
489 		if (!sock_owned_by_user(sk))
490 			tcp_done_with_error(sk, err);
491 		else
492 			WRITE_ONCE(sk->sk_err_soft, err);
493 		goto out;
494 	case TCP_LISTEN:
495 		break;
496 	default:
497 		/* check if this ICMP message allows revert of backoff.
498 		 * (see RFC 6069)
499 		 */
500 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
501 		    code == ICMPV6_NOROUTE)
502 			tcp_ld_RTO_revert(sk, seq);
503 	}
504 
505 	if (!sock_owned_by_user(sk) && np->recverr) {
506 		WRITE_ONCE(sk->sk_err, err);
507 		sk_error_report(sk);
508 	} else {
509 		WRITE_ONCE(sk->sk_err_soft, err);
510 	}
511 out:
512 	bh_unlock_sock(sk);
513 	sock_put(sk);
514 	return 0;
515 }
516 
517 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
519 			      struct flowi *fl,
520 			      struct request_sock *req,
521 			      struct tcp_fastopen_cookie *foc,
522 			      enum tcp_synack_type synack_type,
523 			      struct sk_buff *syn_skb)
524 {
525 	struct inet_request_sock *ireq = inet_rsk(req);
526 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
527 	struct ipv6_txoptions *opt;
528 	struct flowi6 *fl6 = &fl->u.ip6;
529 	struct sk_buff *skb;
530 	int err = -ENOMEM;
531 	u8 tclass;
532 
533 	/* First, grab a route. */
534 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
535 					       IPPROTO_TCP)) == NULL)
536 		goto done;
537 
538 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
539 
540 	if (skb) {
541 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
542 				    &ireq->ir_v6_rmt_addr);
543 
544 		fl6->daddr = ireq->ir_v6_rmt_addr;
545 		if (np->repflow && ireq->pktopts)
546 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
547 
548 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
549 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
550 				(np->tclass & INET_ECN_MASK) :
551 				np->tclass;
552 
553 		if (!INET_ECN_is_capable(tclass) &&
554 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
555 			tclass |= INET_ECN_ECT_0;
556 
557 		rcu_read_lock();
558 		opt = ireq->ipv6_opt;
559 		if (!opt)
560 			opt = rcu_dereference(np->opt);
561 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
562 			       opt, tclass, sk->sk_priority);
563 		rcu_read_unlock();
564 		err = net_xmit_eval(err);
565 	}
566 
567 done:
568 	return err;
569 }
570 
571 
tcp_v6_reqsk_destructor(struct request_sock * req)572 static void tcp_v6_reqsk_destructor(struct request_sock *req)
573 {
574 	kfree(inet_rsk(req)->ipv6_opt);
575 	consume_skb(inet_rsk(req)->pktopts);
576 }
577 
578 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
580 						   const struct in6_addr *addr,
581 						   int l3index)
582 {
583 	return tcp_md5_do_lookup(sk, l3index,
584 				 (union tcp_md5_addr *)addr, AF_INET6);
585 }
586 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
588 						const struct sock *addr_sk)
589 {
590 	int l3index;
591 
592 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
593 						 addr_sk->sk_bound_dev_if);
594 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595 				    l3index);
596 }
597 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
599 				 sockptr_t optval, int optlen)
600 {
601 	struct tcp_md5sig cmd;
602 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603 	int l3index = 0;
604 	u8 prefixlen;
605 	u8 flags;
606 
607 	if (optlen < sizeof(cmd))
608 		return -EINVAL;
609 
610 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
611 		return -EFAULT;
612 
613 	if (sin6->sin6_family != AF_INET6)
614 		return -EINVAL;
615 
616 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
617 
618 	if (optname == TCP_MD5SIG_EXT &&
619 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
620 		prefixlen = cmd.tcpm_prefixlen;
621 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
622 					prefixlen > 32))
623 			return -EINVAL;
624 	} else {
625 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
626 	}
627 
628 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
629 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
630 		struct net_device *dev;
631 
632 		rcu_read_lock();
633 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
634 		if (dev && netif_is_l3_master(dev))
635 			l3index = dev->ifindex;
636 		rcu_read_unlock();
637 
638 		/* ok to reference set/not set outside of rcu;
639 		 * right now device MUST be an L3 master
640 		 */
641 		if (!dev || !l3index)
642 			return -EINVAL;
643 	}
644 
645 	if (!cmd.tcpm_keylen) {
646 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
647 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
648 					      AF_INET, prefixlen,
649 					      l3index, flags);
650 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
651 				      AF_INET6, prefixlen, l3index, flags);
652 	}
653 
654 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
655 		return -EINVAL;
656 
657 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
658 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
659 				      AF_INET, prefixlen, l3index, flags,
660 				      cmd.tcpm_key, cmd.tcpm_keylen);
661 
662 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
663 			      AF_INET6, prefixlen, l3index, flags,
664 			      cmd.tcpm_key, cmd.tcpm_keylen);
665 }
666 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)667 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
668 				   const struct in6_addr *daddr,
669 				   const struct in6_addr *saddr,
670 				   const struct tcphdr *th, int nbytes)
671 {
672 	struct tcp6_pseudohdr *bp;
673 	struct scatterlist sg;
674 	struct tcphdr *_th;
675 
676 	bp = hp->scratch;
677 	/* 1. TCP pseudo-header (RFC2460) */
678 	bp->saddr = *saddr;
679 	bp->daddr = *daddr;
680 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
681 	bp->len = cpu_to_be32(nbytes);
682 
683 	_th = (struct tcphdr *)(bp + 1);
684 	memcpy(_th, th, sizeof(*th));
685 	_th->check = 0;
686 
687 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
688 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
689 				sizeof(*bp) + sizeof(*th));
690 	return crypto_ahash_update(hp->md5_req);
691 }
692 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)693 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
694 			       const struct in6_addr *daddr, struct in6_addr *saddr,
695 			       const struct tcphdr *th)
696 {
697 	struct tcp_md5sig_pool *hp;
698 	struct ahash_request *req;
699 
700 	hp = tcp_get_md5sig_pool();
701 	if (!hp)
702 		goto clear_hash_noput;
703 	req = hp->md5_req;
704 
705 	if (crypto_ahash_init(req))
706 		goto clear_hash;
707 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
708 		goto clear_hash;
709 	if (tcp_md5_hash_key(hp, key))
710 		goto clear_hash;
711 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
712 	if (crypto_ahash_final(req))
713 		goto clear_hash;
714 
715 	tcp_put_md5sig_pool();
716 	return 0;
717 
718 clear_hash:
719 	tcp_put_md5sig_pool();
720 clear_hash_noput:
721 	memset(md5_hash, 0, 16);
722 	return 1;
723 }
724 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)725 static int tcp_v6_md5_hash_skb(char *md5_hash,
726 			       const struct tcp_md5sig_key *key,
727 			       const struct sock *sk,
728 			       const struct sk_buff *skb)
729 {
730 	const struct in6_addr *saddr, *daddr;
731 	struct tcp_md5sig_pool *hp;
732 	struct ahash_request *req;
733 	const struct tcphdr *th = tcp_hdr(skb);
734 
735 	if (sk) { /* valid for establish/request sockets */
736 		saddr = &sk->sk_v6_rcv_saddr;
737 		daddr = &sk->sk_v6_daddr;
738 	} else {
739 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
740 		saddr = &ip6h->saddr;
741 		daddr = &ip6h->daddr;
742 	}
743 
744 	hp = tcp_get_md5sig_pool();
745 	if (!hp)
746 		goto clear_hash_noput;
747 	req = hp->md5_req;
748 
749 	if (crypto_ahash_init(req))
750 		goto clear_hash;
751 
752 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
753 		goto clear_hash;
754 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
755 		goto clear_hash;
756 	if (tcp_md5_hash_key(hp, key))
757 		goto clear_hash;
758 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
759 	if (crypto_ahash_final(req))
760 		goto clear_hash;
761 
762 	tcp_put_md5sig_pool();
763 	return 0;
764 
765 clear_hash:
766 	tcp_put_md5sig_pool();
767 clear_hash_noput:
768 	memset(md5_hash, 0, 16);
769 	return 1;
770 }
771 
772 #endif
773 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)774 static void tcp_v6_init_req(struct request_sock *req,
775 			    const struct sock *sk_listener,
776 			    struct sk_buff *skb)
777 {
778 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
779 	struct inet_request_sock *ireq = inet_rsk(req);
780 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
781 
782 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
783 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
784 
785 	/* So that link locals have meaning */
786 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
787 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
788 		ireq->ir_iif = tcp_v6_iif(skb);
789 
790 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
791 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
792 	     np->rxopt.bits.rxinfo ||
793 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
794 	     np->rxopt.bits.rxohlim || np->repflow)) {
795 		refcount_inc(&skb->users);
796 		ireq->pktopts = skb;
797 	}
798 }
799 
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)800 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
801 					  struct sk_buff *skb,
802 					  struct flowi *fl,
803 					  struct request_sock *req)
804 {
805 	tcp_v6_init_req(req, sk, skb);
806 
807 	if (security_inet_conn_request(sk, skb, req))
808 		return NULL;
809 
810 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
811 }
812 
813 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
814 	.family		=	AF_INET6,
815 	.obj_size	=	sizeof(struct tcp6_request_sock),
816 	.rtx_syn_ack	=	tcp_rtx_synack,
817 	.send_ack	=	tcp_v6_reqsk_send_ack,
818 	.destructor	=	tcp_v6_reqsk_destructor,
819 	.send_reset	=	tcp_v6_send_reset,
820 	.syn_ack_timeout =	tcp_syn_ack_timeout,
821 };
822 
823 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
824 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
825 				sizeof(struct ipv6hdr),
826 #ifdef CONFIG_TCP_MD5SIG
827 	.req_md5_lookup	=	tcp_v6_md5_lookup,
828 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
829 #endif
830 #ifdef CONFIG_SYN_COOKIES
831 	.cookie_init_seq =	cookie_v6_init_sequence,
832 #endif
833 	.route_req	=	tcp_v6_route_req,
834 	.init_seq	=	tcp_v6_init_seq,
835 	.init_ts_off	=	tcp_v6_init_ts_off,
836 	.send_synack	=	tcp_v6_send_synack,
837 };
838 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash)839 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
840 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
841 				 int oif, struct tcp_md5sig_key *key, int rst,
842 				 u8 tclass, __be32 label, u32 priority, u32 txhash)
843 {
844 	const struct tcphdr *th = tcp_hdr(skb);
845 	struct tcphdr *t1;
846 	struct sk_buff *buff;
847 	struct flowi6 fl6;
848 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
849 	struct sock *ctl_sk = net->ipv6.tcp_sk;
850 	unsigned int tot_len = sizeof(struct tcphdr);
851 	__be32 mrst = 0, *topt;
852 	struct dst_entry *dst;
853 	__u32 mark = 0;
854 
855 	if (tsecr)
856 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
857 #ifdef CONFIG_TCP_MD5SIG
858 	if (key)
859 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
860 #endif
861 
862 #ifdef CONFIG_MPTCP
863 	if (rst && !key) {
864 		mrst = mptcp_reset_option(skb);
865 
866 		if (mrst)
867 			tot_len += sizeof(__be32);
868 	}
869 #endif
870 
871 	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
872 	if (!buff)
873 		return;
874 
875 	skb_reserve(buff, MAX_TCP_HEADER);
876 
877 	t1 = skb_push(buff, tot_len);
878 	skb_reset_transport_header(buff);
879 
880 	/* Swap the send and the receive. */
881 	memset(t1, 0, sizeof(*t1));
882 	t1->dest = th->source;
883 	t1->source = th->dest;
884 	t1->doff = tot_len / 4;
885 	t1->seq = htonl(seq);
886 	t1->ack_seq = htonl(ack);
887 	t1->ack = !rst || !th->ack;
888 	t1->rst = rst;
889 	t1->window = htons(win);
890 
891 	topt = (__be32 *)(t1 + 1);
892 
893 	if (tsecr) {
894 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
895 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
896 		*topt++ = htonl(tsval);
897 		*topt++ = htonl(tsecr);
898 	}
899 
900 	if (mrst)
901 		*topt++ = mrst;
902 
903 #ifdef CONFIG_TCP_MD5SIG
904 	if (key) {
905 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
906 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
907 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
908 				    &ipv6_hdr(skb)->saddr,
909 				    &ipv6_hdr(skb)->daddr, t1);
910 	}
911 #endif
912 
913 	memset(&fl6, 0, sizeof(fl6));
914 	fl6.daddr = ipv6_hdr(skb)->saddr;
915 	fl6.saddr = ipv6_hdr(skb)->daddr;
916 	fl6.flowlabel = label;
917 
918 	buff->ip_summed = CHECKSUM_PARTIAL;
919 
920 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
921 
922 	fl6.flowi6_proto = IPPROTO_TCP;
923 	if (rt6_need_strict(&fl6.daddr) && !oif)
924 		fl6.flowi6_oif = tcp_v6_iif(skb);
925 	else {
926 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
927 			oif = skb->skb_iif;
928 
929 		fl6.flowi6_oif = oif;
930 	}
931 
932 	if (sk) {
933 		if (sk->sk_state == TCP_TIME_WAIT)
934 			mark = inet_twsk(sk)->tw_mark;
935 		else
936 			mark = READ_ONCE(sk->sk_mark);
937 		skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
938 	}
939 	if (txhash) {
940 		/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
941 		skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
942 	}
943 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
944 	fl6.fl6_dport = t1->dest;
945 	fl6.fl6_sport = t1->source;
946 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
947 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
948 
949 	/* Pass a socket to ip6_dst_lookup either it is for RST
950 	 * Underlying function will use this to retrieve the network
951 	 * namespace
952 	 */
953 	if (sk && sk->sk_state != TCP_TIME_WAIT)
954 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
955 	else
956 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
957 	if (!IS_ERR(dst)) {
958 		skb_dst_set(buff, dst);
959 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
960 			 tclass & ~INET_ECN_MASK, priority);
961 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
962 		if (rst)
963 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
964 		return;
965 	}
966 
967 	kfree_skb(buff);
968 }
969 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)970 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
971 {
972 	const struct tcphdr *th = tcp_hdr(skb);
973 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
974 	u32 seq = 0, ack_seq = 0;
975 	struct tcp_md5sig_key *key = NULL;
976 #ifdef CONFIG_TCP_MD5SIG
977 	const __u8 *hash_location = NULL;
978 	unsigned char newhash[16];
979 	int genhash;
980 	struct sock *sk1 = NULL;
981 #endif
982 	__be32 label = 0;
983 	u32 priority = 0;
984 	struct net *net;
985 	u32 txhash = 0;
986 	int oif = 0;
987 
988 	if (th->rst)
989 		return;
990 
991 	/* If sk not NULL, it means we did a successful lookup and incoming
992 	 * route had to be correct. prequeue might have dropped our dst.
993 	 */
994 	if (!sk && !ipv6_unicast_destination(skb))
995 		return;
996 
997 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
998 #ifdef CONFIG_TCP_MD5SIG
999 	rcu_read_lock();
1000 	hash_location = tcp_parse_md5sig_option(th);
1001 	if (sk && sk_fullsock(sk)) {
1002 		int l3index;
1003 
1004 		/* sdif set, means packet ingressed via a device
1005 		 * in an L3 domain and inet_iif is set to it.
1006 		 */
1007 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1008 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1009 	} else if (hash_location) {
1010 		int dif = tcp_v6_iif_l3_slave(skb);
1011 		int sdif = tcp_v6_sdif(skb);
1012 		int l3index;
1013 
1014 		/*
1015 		 * active side is lost. Try to find listening socket through
1016 		 * source port, and then find md5 key through listening socket.
1017 		 * we are not loose security here:
1018 		 * Incoming packet is checked with md5 hash with finding key,
1019 		 * no RST generated if md5 hash doesn't match.
1020 		 */
1021 		sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1022 					    NULL, 0, &ipv6h->saddr, th->source,
1023 					    &ipv6h->daddr, ntohs(th->source),
1024 					    dif, sdif);
1025 		if (!sk1)
1026 			goto out;
1027 
1028 		/* sdif set, means packet ingressed via a device
1029 		 * in an L3 domain and dif is set to it.
1030 		 */
1031 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1032 
1033 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1034 		if (!key)
1035 			goto out;
1036 
1037 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1038 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1039 			goto out;
1040 	}
1041 #endif
1042 
1043 	if (th->ack)
1044 		seq = ntohl(th->ack_seq);
1045 	else
1046 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1047 			  (th->doff << 2);
1048 
1049 	if (sk) {
1050 		oif = sk->sk_bound_dev_if;
1051 		if (sk_fullsock(sk)) {
1052 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1053 
1054 			trace_tcp_send_reset(sk, skb);
1055 			if (np->repflow)
1056 				label = ip6_flowlabel(ipv6h);
1057 			priority = sk->sk_priority;
1058 			txhash = sk->sk_txhash;
1059 		}
1060 		if (sk->sk_state == TCP_TIME_WAIT) {
1061 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1062 			priority = inet_twsk(sk)->tw_priority;
1063 			txhash = inet_twsk(sk)->tw_txhash;
1064 		}
1065 	} else {
1066 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1067 			label = ip6_flowlabel(ipv6h);
1068 	}
1069 
1070 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1071 			     ipv6_get_dsfield(ipv6h), label, priority, txhash);
1072 
1073 #ifdef CONFIG_TCP_MD5SIG
1074 out:
1075 	rcu_read_unlock();
1076 #endif
1077 }
1078 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1079 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1080 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1081 			    struct tcp_md5sig_key *key, u8 tclass,
1082 			    __be32 label, u32 priority, u32 txhash)
1083 {
1084 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1085 			     tclass, label, priority, txhash);
1086 }
1087 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1088 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1089 {
1090 	struct inet_timewait_sock *tw = inet_twsk(sk);
1091 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1092 
1093 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1094 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1095 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1096 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1097 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1098 			tw->tw_txhash);
1099 
1100 	inet_twsk_put(tw);
1101 }
1102 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1103 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1104 				  struct request_sock *req)
1105 {
1106 	int l3index;
1107 
1108 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1109 
1110 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1111 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1112 	 */
1113 	/* RFC 7323 2.3
1114 	 * The window field (SEG.WND) of every outgoing segment, with the
1115 	 * exception of <SYN> segments, MUST be right-shifted by
1116 	 * Rcv.Wind.Shift bits:
1117 	 */
1118 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1119 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1120 			tcp_rsk(req)->rcv_nxt,
1121 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1122 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1123 			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1124 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1125 			ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1126 			READ_ONCE(sk->sk_priority),
1127 			READ_ONCE(tcp_rsk(req)->txhash));
1128 }
1129 
1130 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1131 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1132 {
1133 #ifdef CONFIG_SYN_COOKIES
1134 	const struct tcphdr *th = tcp_hdr(skb);
1135 
1136 	if (!th->syn)
1137 		sk = cookie_v6_check(sk, skb);
1138 #endif
1139 	return sk;
1140 }
1141 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1142 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1143 			 struct tcphdr *th, u32 *cookie)
1144 {
1145 	u16 mss = 0;
1146 #ifdef CONFIG_SYN_COOKIES
1147 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1148 				    &tcp_request_sock_ipv6_ops, sk, th);
1149 	if (mss) {
1150 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1151 		tcp_synq_overflow(sk);
1152 	}
1153 #endif
1154 	return mss;
1155 }
1156 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1157 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1158 {
1159 	if (skb->protocol == htons(ETH_P_IP))
1160 		return tcp_v4_conn_request(sk, skb);
1161 
1162 	if (!ipv6_unicast_destination(skb))
1163 		goto drop;
1164 
1165 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1166 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1167 		return 0;
1168 	}
1169 
1170 	return tcp_conn_request(&tcp6_request_sock_ops,
1171 				&tcp_request_sock_ipv6_ops, sk, skb);
1172 
1173 drop:
1174 	tcp_listendrop(sk);
1175 	return 0; /* don't send reset */
1176 }
1177 
tcp_v6_restore_cb(struct sk_buff * skb)1178 static void tcp_v6_restore_cb(struct sk_buff *skb)
1179 {
1180 	/* We need to move header back to the beginning if xfrm6_policy_check()
1181 	 * and tcp_v6_fill_cb() are going to be called again.
1182 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1183 	 */
1184 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1185 		sizeof(struct inet6_skb_parm));
1186 }
1187 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1188 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1189 					 struct request_sock *req,
1190 					 struct dst_entry *dst,
1191 					 struct request_sock *req_unhash,
1192 					 bool *own_req)
1193 {
1194 	struct inet_request_sock *ireq;
1195 	struct ipv6_pinfo *newnp;
1196 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1197 	struct ipv6_txoptions *opt;
1198 	struct inet_sock *newinet;
1199 	bool found_dup_sk = false;
1200 	struct tcp_sock *newtp;
1201 	struct sock *newsk;
1202 #ifdef CONFIG_TCP_MD5SIG
1203 	struct tcp_md5sig_key *key;
1204 	int l3index;
1205 #endif
1206 	struct flowi6 fl6;
1207 
1208 	if (skb->protocol == htons(ETH_P_IP)) {
1209 		/*
1210 		 *	v6 mapped
1211 		 */
1212 
1213 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1214 					     req_unhash, own_req);
1215 
1216 		if (!newsk)
1217 			return NULL;
1218 
1219 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1220 
1221 		newnp = tcp_inet6_sk(newsk);
1222 		newtp = tcp_sk(newsk);
1223 
1224 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1225 
1226 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1227 
1228 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1229 		if (sk_is_mptcp(newsk))
1230 			mptcpv6_handle_mapped(newsk, true);
1231 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1232 #ifdef CONFIG_TCP_MD5SIG
1233 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1234 #endif
1235 
1236 		newnp->ipv6_mc_list = NULL;
1237 		newnp->ipv6_ac_list = NULL;
1238 		newnp->ipv6_fl_list = NULL;
1239 		newnp->pktoptions  = NULL;
1240 		newnp->opt	   = NULL;
1241 		newnp->mcast_oif   = inet_iif(skb);
1242 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1243 		newnp->rcv_flowinfo = 0;
1244 		if (np->repflow)
1245 			newnp->flow_label = 0;
1246 
1247 		/*
1248 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1249 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1250 		 * that function for the gory details. -acme
1251 		 */
1252 
1253 		/* It is tricky place. Until this moment IPv4 tcp
1254 		   worked with IPv6 icsk.icsk_af_ops.
1255 		   Sync it now.
1256 		 */
1257 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1258 
1259 		return newsk;
1260 	}
1261 
1262 	ireq = inet_rsk(req);
1263 
1264 	if (sk_acceptq_is_full(sk))
1265 		goto out_overflow;
1266 
1267 	if (!dst) {
1268 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1269 		if (!dst)
1270 			goto out;
1271 	}
1272 
1273 	newsk = tcp_create_openreq_child(sk, req, skb);
1274 	if (!newsk)
1275 		goto out_nonewsk;
1276 
1277 	/*
1278 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1279 	 * count here, tcp_create_openreq_child now does this for us, see the
1280 	 * comment in that function for the gory details. -acme
1281 	 */
1282 
1283 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1284 	inet6_sk_rx_dst_set(newsk, skb);
1285 
1286 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1287 
1288 	newtp = tcp_sk(newsk);
1289 	newinet = inet_sk(newsk);
1290 	newnp = tcp_inet6_sk(newsk);
1291 
1292 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1293 
1294 	ip6_dst_store(newsk, dst, NULL, NULL);
1295 
1296 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1297 	newnp->saddr = ireq->ir_v6_loc_addr;
1298 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1299 	newsk->sk_bound_dev_if = ireq->ir_iif;
1300 
1301 	/* Now IPv6 options...
1302 
1303 	   First: no IPv4 options.
1304 	 */
1305 	newinet->inet_opt = NULL;
1306 	newnp->ipv6_mc_list = NULL;
1307 	newnp->ipv6_ac_list = NULL;
1308 	newnp->ipv6_fl_list = NULL;
1309 
1310 	/* Clone RX bits */
1311 	newnp->rxopt.all = np->rxopt.all;
1312 
1313 	newnp->pktoptions = NULL;
1314 	newnp->opt	  = NULL;
1315 	newnp->mcast_oif  = tcp_v6_iif(skb);
1316 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1317 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1318 	if (np->repflow)
1319 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1320 
1321 	/* Set ToS of the new socket based upon the value of incoming SYN.
1322 	 * ECT bits are set later in tcp_init_transfer().
1323 	 */
1324 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1325 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1326 
1327 	/* Clone native IPv6 options from listening socket (if any)
1328 
1329 	   Yes, keeping reference count would be much more clever,
1330 	   but we make one more one thing there: reattach optmem
1331 	   to newsk.
1332 	 */
1333 	opt = ireq->ipv6_opt;
1334 	if (!opt)
1335 		opt = rcu_dereference(np->opt);
1336 	if (opt) {
1337 		opt = ipv6_dup_options(newsk, opt);
1338 		RCU_INIT_POINTER(newnp->opt, opt);
1339 	}
1340 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1341 	if (opt)
1342 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1343 						    opt->opt_flen;
1344 
1345 	tcp_ca_openreq_child(newsk, dst);
1346 
1347 	tcp_sync_mss(newsk, dst_mtu(dst));
1348 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1349 
1350 	tcp_initialize_rcv_mss(newsk);
1351 
1352 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1353 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1354 
1355 #ifdef CONFIG_TCP_MD5SIG
1356 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1357 
1358 	/* Copy over the MD5 key from the original socket */
1359 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1360 	if (key) {
1361 		const union tcp_md5_addr *addr;
1362 
1363 		addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1364 		if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1365 			inet_csk_prepare_forced_close(newsk);
1366 			tcp_done(newsk);
1367 			goto out;
1368 		}
1369 	}
1370 #endif
1371 
1372 	if (__inet_inherit_port(sk, newsk) < 0) {
1373 		inet_csk_prepare_forced_close(newsk);
1374 		tcp_done(newsk);
1375 		goto out;
1376 	}
1377 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1378 				       &found_dup_sk);
1379 	if (*own_req) {
1380 		tcp_move_syn(newtp, req);
1381 
1382 		/* Clone pktoptions received with SYN, if we own the req */
1383 		if (ireq->pktopts) {
1384 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1385 			consume_skb(ireq->pktopts);
1386 			ireq->pktopts = NULL;
1387 			if (newnp->pktoptions)
1388 				tcp_v6_restore_cb(newnp->pktoptions);
1389 		}
1390 	} else {
1391 		if (!req_unhash && found_dup_sk) {
1392 			/* This code path should only be executed in the
1393 			 * syncookie case only
1394 			 */
1395 			bh_unlock_sock(newsk);
1396 			sock_put(newsk);
1397 			newsk = NULL;
1398 		}
1399 	}
1400 
1401 	return newsk;
1402 
1403 out_overflow:
1404 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1405 out_nonewsk:
1406 	dst_release(dst);
1407 out:
1408 	tcp_listendrop(sk);
1409 	return NULL;
1410 }
1411 
1412 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1413 							   u32));
1414 /* The socket must have it's spinlock held when we get
1415  * here, unless it is a TCP_LISTEN socket.
1416  *
1417  * We have a potential double-lock case here, so even when
1418  * doing backlog processing we use the BH locking scheme.
1419  * This is because we cannot sleep with the original spinlock
1420  * held.
1421  */
1422 INDIRECT_CALLABLE_SCOPE
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1423 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1424 {
1425 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1426 	struct sk_buff *opt_skb = NULL;
1427 	enum skb_drop_reason reason;
1428 	struct tcp_sock *tp;
1429 
1430 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1431 	   goes to IPv4 receive handler and backlogged.
1432 	   From backlog it always goes here. Kerboom...
1433 	   Fortunately, tcp_rcv_established and rcv_established
1434 	   handle them correctly, but it is not case with
1435 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1436 	 */
1437 
1438 	if (skb->protocol == htons(ETH_P_IP))
1439 		return tcp_v4_do_rcv(sk, skb);
1440 
1441 	/*
1442 	 *	socket locking is here for SMP purposes as backlog rcv
1443 	 *	is currently called with bh processing disabled.
1444 	 */
1445 
1446 	/* Do Stevens' IPV6_PKTOPTIONS.
1447 
1448 	   Yes, guys, it is the only place in our code, where we
1449 	   may make it not affecting IPv4.
1450 	   The rest of code is protocol independent,
1451 	   and I do not like idea to uglify IPv4.
1452 
1453 	   Actually, all the idea behind IPV6_PKTOPTIONS
1454 	   looks not very well thought. For now we latch
1455 	   options, received in the last packet, enqueued
1456 	   by tcp. Feel free to propose better solution.
1457 					       --ANK (980728)
1458 	 */
1459 	if (np->rxopt.all)
1460 		opt_skb = skb_clone_and_charge_r(skb, sk);
1461 
1462 	reason = SKB_DROP_REASON_NOT_SPECIFIED;
1463 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1464 		struct dst_entry *dst;
1465 
1466 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1467 						lockdep_sock_is_held(sk));
1468 
1469 		sock_rps_save_rxhash(sk, skb);
1470 		sk_mark_napi_id(sk, skb);
1471 		if (dst) {
1472 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1473 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1474 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1475 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1476 				dst_release(dst);
1477 			}
1478 		}
1479 
1480 		tcp_rcv_established(sk, skb);
1481 		if (opt_skb)
1482 			goto ipv6_pktoptions;
1483 		return 0;
1484 	}
1485 
1486 	if (tcp_checksum_complete(skb))
1487 		goto csum_err;
1488 
1489 	if (sk->sk_state == TCP_LISTEN) {
1490 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1491 
1492 		if (!nsk)
1493 			goto discard;
1494 
1495 		if (nsk != sk) {
1496 			if (tcp_child_process(sk, nsk, skb))
1497 				goto reset;
1498 			if (opt_skb)
1499 				__kfree_skb(opt_skb);
1500 			return 0;
1501 		}
1502 	} else
1503 		sock_rps_save_rxhash(sk, skb);
1504 
1505 	if (tcp_rcv_state_process(sk, skb))
1506 		goto reset;
1507 	if (opt_skb)
1508 		goto ipv6_pktoptions;
1509 	return 0;
1510 
1511 reset:
1512 	tcp_v6_send_reset(sk, skb);
1513 discard:
1514 	if (opt_skb)
1515 		__kfree_skb(opt_skb);
1516 	kfree_skb_reason(skb, reason);
1517 	return 0;
1518 csum_err:
1519 	reason = SKB_DROP_REASON_TCP_CSUM;
1520 	trace_tcp_bad_csum(skb);
1521 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1522 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1523 	goto discard;
1524 
1525 
1526 ipv6_pktoptions:
1527 	/* Do you ask, what is it?
1528 
1529 	   1. skb was enqueued by tcp.
1530 	   2. skb is added to tail of read queue, rather than out of order.
1531 	   3. socket is not in passive state.
1532 	   4. Finally, it really contains options, which user wants to receive.
1533 	 */
1534 	tp = tcp_sk(sk);
1535 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1536 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1537 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1538 			np->mcast_oif = tcp_v6_iif(opt_skb);
1539 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1540 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1541 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1542 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1543 		if (np->repflow)
1544 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1545 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1546 			tcp_v6_restore_cb(opt_skb);
1547 			opt_skb = xchg(&np->pktoptions, opt_skb);
1548 		} else {
1549 			__kfree_skb(opt_skb);
1550 			opt_skb = xchg(&np->pktoptions, NULL);
1551 		}
1552 	}
1553 
1554 	consume_skb(opt_skb);
1555 	return 0;
1556 }
1557 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1558 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1559 			   const struct tcphdr *th)
1560 {
1561 	/* This is tricky: we move IP6CB at its correct location into
1562 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1563 	 * _decode_session6() uses IP6CB().
1564 	 * barrier() makes sure compiler won't play aliasing games.
1565 	 */
1566 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1567 		sizeof(struct inet6_skb_parm));
1568 	barrier();
1569 
1570 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1571 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1572 				    skb->len - th->doff*4);
1573 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1574 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1575 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1576 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1577 	TCP_SKB_CB(skb)->sacked = 0;
1578 	TCP_SKB_CB(skb)->has_rxtstamp =
1579 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1580 }
1581 
tcp_v6_rcv(struct sk_buff * skb)1582 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1583 {
1584 	enum skb_drop_reason drop_reason;
1585 	int sdif = inet6_sdif(skb);
1586 	int dif = inet6_iif(skb);
1587 	const struct tcphdr *th;
1588 	const struct ipv6hdr *hdr;
1589 	bool refcounted;
1590 	struct sock *sk;
1591 	int ret;
1592 	struct net *net = dev_net(skb->dev);
1593 
1594 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1595 	if (skb->pkt_type != PACKET_HOST)
1596 		goto discard_it;
1597 
1598 	/*
1599 	 *	Count it even if it's bad.
1600 	 */
1601 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1602 
1603 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1604 		goto discard_it;
1605 
1606 	th = (const struct tcphdr *)skb->data;
1607 
1608 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1609 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1610 		goto bad_packet;
1611 	}
1612 	if (!pskb_may_pull(skb, th->doff*4))
1613 		goto discard_it;
1614 
1615 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1616 		goto csum_error;
1617 
1618 	th = (const struct tcphdr *)skb->data;
1619 	hdr = ipv6_hdr(skb);
1620 
1621 lookup:
1622 	sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1623 				th->source, th->dest, inet6_iif(skb), sdif,
1624 				&refcounted);
1625 	if (!sk)
1626 		goto no_tcp_socket;
1627 
1628 process:
1629 	if (sk->sk_state == TCP_TIME_WAIT)
1630 		goto do_time_wait;
1631 
1632 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1633 		struct request_sock *req = inet_reqsk(sk);
1634 		bool req_stolen = false;
1635 		struct sock *nsk;
1636 
1637 		sk = req->rsk_listener;
1638 		if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1639 			drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1640 		else
1641 			drop_reason = tcp_inbound_md5_hash(sk, skb,
1642 							   &hdr->saddr, &hdr->daddr,
1643 							   AF_INET6, dif, sdif);
1644 		if (drop_reason) {
1645 			sk_drops_add(sk, skb);
1646 			reqsk_put(req);
1647 			goto discard_it;
1648 		}
1649 		if (tcp_checksum_complete(skb)) {
1650 			reqsk_put(req);
1651 			goto csum_error;
1652 		}
1653 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1654 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1655 			if (!nsk) {
1656 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1657 				goto lookup;
1658 			}
1659 			sk = nsk;
1660 			/* reuseport_migrate_sock() has already held one sk_refcnt
1661 			 * before returning.
1662 			 */
1663 		} else {
1664 			sock_hold(sk);
1665 		}
1666 		refcounted = true;
1667 		nsk = NULL;
1668 		if (!tcp_filter(sk, skb)) {
1669 			th = (const struct tcphdr *)skb->data;
1670 			hdr = ipv6_hdr(skb);
1671 			tcp_v6_fill_cb(skb, hdr, th);
1672 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1673 		} else {
1674 			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1675 		}
1676 		if (!nsk) {
1677 			reqsk_put(req);
1678 			if (req_stolen) {
1679 				/* Another cpu got exclusive access to req
1680 				 * and created a full blown socket.
1681 				 * Try to feed this packet to this socket
1682 				 * instead of discarding it.
1683 				 */
1684 				tcp_v6_restore_cb(skb);
1685 				sock_put(sk);
1686 				goto lookup;
1687 			}
1688 			goto discard_and_relse;
1689 		}
1690 		nf_reset_ct(skb);
1691 		if (nsk == sk) {
1692 			reqsk_put(req);
1693 			tcp_v6_restore_cb(skb);
1694 		} else if (tcp_child_process(sk, nsk, skb)) {
1695 			tcp_v6_send_reset(nsk, skb);
1696 			goto discard_and_relse;
1697 		} else {
1698 			sock_put(sk);
1699 			return 0;
1700 		}
1701 	}
1702 
1703 	if (static_branch_unlikely(&ip6_min_hopcount)) {
1704 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1705 		if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1706 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1707 			drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1708 			goto discard_and_relse;
1709 		}
1710 	}
1711 
1712 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1713 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1714 		goto discard_and_relse;
1715 	}
1716 
1717 	drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1718 					   AF_INET6, dif, sdif);
1719 	if (drop_reason)
1720 		goto discard_and_relse;
1721 
1722 	nf_reset_ct(skb);
1723 
1724 	if (tcp_filter(sk, skb)) {
1725 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1726 		goto discard_and_relse;
1727 	}
1728 	th = (const struct tcphdr *)skb->data;
1729 	hdr = ipv6_hdr(skb);
1730 	tcp_v6_fill_cb(skb, hdr, th);
1731 
1732 	skb->dev = NULL;
1733 
1734 	if (sk->sk_state == TCP_LISTEN) {
1735 		ret = tcp_v6_do_rcv(sk, skb);
1736 		goto put_and_return;
1737 	}
1738 
1739 	sk_incoming_cpu_update(sk);
1740 
1741 	bh_lock_sock_nested(sk);
1742 	tcp_segs_in(tcp_sk(sk), skb);
1743 	ret = 0;
1744 	if (!sock_owned_by_user(sk)) {
1745 		ret = tcp_v6_do_rcv(sk, skb);
1746 	} else {
1747 		if (tcp_add_backlog(sk, skb, &drop_reason))
1748 			goto discard_and_relse;
1749 	}
1750 	bh_unlock_sock(sk);
1751 put_and_return:
1752 	if (refcounted)
1753 		sock_put(sk);
1754 	return ret ? -1 : 0;
1755 
1756 no_tcp_socket:
1757 	drop_reason = SKB_DROP_REASON_NO_SOCKET;
1758 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1759 		goto discard_it;
1760 
1761 	tcp_v6_fill_cb(skb, hdr, th);
1762 
1763 	if (tcp_checksum_complete(skb)) {
1764 csum_error:
1765 		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1766 		trace_tcp_bad_csum(skb);
1767 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1768 bad_packet:
1769 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1770 	} else {
1771 		tcp_v6_send_reset(NULL, skb);
1772 	}
1773 
1774 discard_it:
1775 	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1776 	kfree_skb_reason(skb, drop_reason);
1777 	return 0;
1778 
1779 discard_and_relse:
1780 	sk_drops_add(sk, skb);
1781 	if (refcounted)
1782 		sock_put(sk);
1783 	goto discard_it;
1784 
1785 do_time_wait:
1786 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1787 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1788 		inet_twsk_put(inet_twsk(sk));
1789 		goto discard_it;
1790 	}
1791 
1792 	tcp_v6_fill_cb(skb, hdr, th);
1793 
1794 	if (tcp_checksum_complete(skb)) {
1795 		inet_twsk_put(inet_twsk(sk));
1796 		goto csum_error;
1797 	}
1798 
1799 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1800 	case TCP_TW_SYN:
1801 	{
1802 		struct sock *sk2;
1803 
1804 		sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1805 					    skb, __tcp_hdrlen(th),
1806 					    &ipv6_hdr(skb)->saddr, th->source,
1807 					    &ipv6_hdr(skb)->daddr,
1808 					    ntohs(th->dest),
1809 					    tcp_v6_iif_l3_slave(skb),
1810 					    sdif);
1811 		if (sk2) {
1812 			struct inet_timewait_sock *tw = inet_twsk(sk);
1813 			inet_twsk_deschedule_put(tw);
1814 			sk = sk2;
1815 			tcp_v6_restore_cb(skb);
1816 			refcounted = false;
1817 			goto process;
1818 		}
1819 	}
1820 		/* to ACK */
1821 		fallthrough;
1822 	case TCP_TW_ACK:
1823 		tcp_v6_timewait_ack(sk, skb);
1824 		break;
1825 	case TCP_TW_RST:
1826 		tcp_v6_send_reset(sk, skb);
1827 		inet_twsk_deschedule_put(inet_twsk(sk));
1828 		goto discard_it;
1829 	case TCP_TW_SUCCESS:
1830 		;
1831 	}
1832 	goto discard_it;
1833 }
1834 
tcp_v6_early_demux(struct sk_buff * skb)1835 void tcp_v6_early_demux(struct sk_buff *skb)
1836 {
1837 	struct net *net = dev_net(skb->dev);
1838 	const struct ipv6hdr *hdr;
1839 	const struct tcphdr *th;
1840 	struct sock *sk;
1841 
1842 	if (skb->pkt_type != PACKET_HOST)
1843 		return;
1844 
1845 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1846 		return;
1847 
1848 	hdr = ipv6_hdr(skb);
1849 	th = tcp_hdr(skb);
1850 
1851 	if (th->doff < sizeof(struct tcphdr) / 4)
1852 		return;
1853 
1854 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1855 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1856 					&hdr->saddr, th->source,
1857 					&hdr->daddr, ntohs(th->dest),
1858 					inet6_iif(skb), inet6_sdif(skb));
1859 	if (sk) {
1860 		skb->sk = sk;
1861 		skb->destructor = sock_edemux;
1862 		if (sk_fullsock(sk)) {
1863 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1864 
1865 			if (dst)
1866 				dst = dst_check(dst, sk->sk_rx_dst_cookie);
1867 			if (dst &&
1868 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
1869 				skb_dst_set_noref(skb, dst);
1870 		}
1871 	}
1872 }
1873 
1874 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1875 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1876 	.twsk_unique	= tcp_twsk_unique,
1877 	.twsk_destructor = tcp_twsk_destructor,
1878 };
1879 
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1880 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1881 {
1882 	__tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1883 }
1884 
1885 const struct inet_connection_sock_af_ops ipv6_specific = {
1886 	.queue_xmit	   = inet6_csk_xmit,
1887 	.send_check	   = tcp_v6_send_check,
1888 	.rebuild_header	   = inet6_sk_rebuild_header,
1889 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1890 	.conn_request	   = tcp_v6_conn_request,
1891 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1892 	.net_header_len	   = sizeof(struct ipv6hdr),
1893 	.net_frag_header_len = sizeof(struct frag_hdr),
1894 	.setsockopt	   = ipv6_setsockopt,
1895 	.getsockopt	   = ipv6_getsockopt,
1896 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1897 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1898 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1899 };
1900 
1901 #ifdef CONFIG_TCP_MD5SIG
1902 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1903 	.md5_lookup	=	tcp_v6_md5_lookup,
1904 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1905 	.md5_parse	=	tcp_v6_parse_md5_keys,
1906 };
1907 #endif
1908 
1909 /*
1910  *	TCP over IPv4 via INET6 API
1911  */
1912 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1913 	.queue_xmit	   = ip_queue_xmit,
1914 	.send_check	   = tcp_v4_send_check,
1915 	.rebuild_header	   = inet_sk_rebuild_header,
1916 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1917 	.conn_request	   = tcp_v6_conn_request,
1918 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1919 	.net_header_len	   = sizeof(struct iphdr),
1920 	.setsockopt	   = ipv6_setsockopt,
1921 	.getsockopt	   = ipv6_getsockopt,
1922 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1923 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1924 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1925 };
1926 
1927 #ifdef CONFIG_TCP_MD5SIG
1928 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1929 	.md5_lookup	=	tcp_v4_md5_lookup,
1930 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1931 	.md5_parse	=	tcp_v6_parse_md5_keys,
1932 };
1933 #endif
1934 
1935 /* NOTE: A lot of things set to zero explicitly by call to
1936  *       sk_alloc() so need not be done here.
1937  */
tcp_v6_init_sock(struct sock * sk)1938 static int tcp_v6_init_sock(struct sock *sk)
1939 {
1940 	struct inet_connection_sock *icsk = inet_csk(sk);
1941 
1942 	tcp_init_sock(sk);
1943 
1944 	icsk->icsk_af_ops = &ipv6_specific;
1945 
1946 #ifdef CONFIG_TCP_MD5SIG
1947 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1948 #endif
1949 
1950 	return 0;
1951 }
1952 
1953 #ifdef CONFIG_PROC_FS
1954 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1955 static void get_openreq6(struct seq_file *seq,
1956 			 const struct request_sock *req, int i)
1957 {
1958 	long ttd = req->rsk_timer.expires - jiffies;
1959 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1960 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1961 
1962 	if (ttd < 0)
1963 		ttd = 0;
1964 
1965 	seq_printf(seq,
1966 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1967 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1968 		   i,
1969 		   src->s6_addr32[0], src->s6_addr32[1],
1970 		   src->s6_addr32[2], src->s6_addr32[3],
1971 		   inet_rsk(req)->ir_num,
1972 		   dest->s6_addr32[0], dest->s6_addr32[1],
1973 		   dest->s6_addr32[2], dest->s6_addr32[3],
1974 		   ntohs(inet_rsk(req)->ir_rmt_port),
1975 		   TCP_SYN_RECV,
1976 		   0, 0, /* could print option size, but that is af dependent. */
1977 		   1,   /* timers active (only the expire timer) */
1978 		   jiffies_to_clock_t(ttd),
1979 		   req->num_timeout,
1980 		   from_kuid_munged(seq_user_ns(seq),
1981 				    sock_i_uid(req->rsk_listener)),
1982 		   0,  /* non standard timer */
1983 		   0, /* open_requests have no inode */
1984 		   0, req);
1985 }
1986 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1987 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1988 {
1989 	const struct in6_addr *dest, *src;
1990 	__u16 destp, srcp;
1991 	int timer_active;
1992 	unsigned long timer_expires;
1993 	const struct inet_sock *inet = inet_sk(sp);
1994 	const struct tcp_sock *tp = tcp_sk(sp);
1995 	const struct inet_connection_sock *icsk = inet_csk(sp);
1996 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1997 	int rx_queue;
1998 	int state;
1999 
2000 	dest  = &sp->sk_v6_daddr;
2001 	src   = &sp->sk_v6_rcv_saddr;
2002 	destp = ntohs(inet->inet_dport);
2003 	srcp  = ntohs(inet->inet_sport);
2004 
2005 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2006 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2007 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2008 		timer_active	= 1;
2009 		timer_expires	= icsk->icsk_timeout;
2010 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2011 		timer_active	= 4;
2012 		timer_expires	= icsk->icsk_timeout;
2013 	} else if (timer_pending(&sp->sk_timer)) {
2014 		timer_active	= 2;
2015 		timer_expires	= sp->sk_timer.expires;
2016 	} else {
2017 		timer_active	= 0;
2018 		timer_expires = jiffies;
2019 	}
2020 
2021 	state = inet_sk_state_load(sp);
2022 	if (state == TCP_LISTEN)
2023 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2024 	else
2025 		/* Because we don't lock the socket,
2026 		 * we might find a transient negative value.
2027 		 */
2028 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2029 				      READ_ONCE(tp->copied_seq), 0);
2030 
2031 	seq_printf(seq,
2032 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2033 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2034 		   i,
2035 		   src->s6_addr32[0], src->s6_addr32[1],
2036 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2037 		   dest->s6_addr32[0], dest->s6_addr32[1],
2038 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2039 		   state,
2040 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2041 		   rx_queue,
2042 		   timer_active,
2043 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2044 		   icsk->icsk_retransmits,
2045 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2046 		   icsk->icsk_probes_out,
2047 		   sock_i_ino(sp),
2048 		   refcount_read(&sp->sk_refcnt), sp,
2049 		   jiffies_to_clock_t(icsk->icsk_rto),
2050 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2051 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2052 		   tcp_snd_cwnd(tp),
2053 		   state == TCP_LISTEN ?
2054 			fastopenq->max_qlen :
2055 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2056 		   );
2057 }
2058 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2059 static void get_timewait6_sock(struct seq_file *seq,
2060 			       struct inet_timewait_sock *tw, int i)
2061 {
2062 	long delta = tw->tw_timer.expires - jiffies;
2063 	const struct in6_addr *dest, *src;
2064 	__u16 destp, srcp;
2065 
2066 	dest = &tw->tw_v6_daddr;
2067 	src  = &tw->tw_v6_rcv_saddr;
2068 	destp = ntohs(tw->tw_dport);
2069 	srcp  = ntohs(tw->tw_sport);
2070 
2071 	seq_printf(seq,
2072 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2073 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2074 		   i,
2075 		   src->s6_addr32[0], src->s6_addr32[1],
2076 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2077 		   dest->s6_addr32[0], dest->s6_addr32[1],
2078 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2079 		   tw->tw_substate, 0, 0,
2080 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2081 		   refcount_read(&tw->tw_refcnt), tw);
2082 }
2083 
tcp6_seq_show(struct seq_file * seq,void * v)2084 static int tcp6_seq_show(struct seq_file *seq, void *v)
2085 {
2086 	struct tcp_iter_state *st;
2087 	struct sock *sk = v;
2088 
2089 	if (v == SEQ_START_TOKEN) {
2090 		seq_puts(seq,
2091 			 "  sl  "
2092 			 "local_address                         "
2093 			 "remote_address                        "
2094 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2095 			 "   uid  timeout inode\n");
2096 		goto out;
2097 	}
2098 	st = seq->private;
2099 
2100 	if (sk->sk_state == TCP_TIME_WAIT)
2101 		get_timewait6_sock(seq, v, st->num);
2102 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2103 		get_openreq6(seq, v, st->num);
2104 	else
2105 		get_tcp6_sock(seq, v, st->num);
2106 out:
2107 	return 0;
2108 }
2109 
2110 static const struct seq_operations tcp6_seq_ops = {
2111 	.show		= tcp6_seq_show,
2112 	.start		= tcp_seq_start,
2113 	.next		= tcp_seq_next,
2114 	.stop		= tcp_seq_stop,
2115 };
2116 
2117 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2118 	.family		= AF_INET6,
2119 };
2120 
tcp6_proc_init(struct net * net)2121 int __net_init tcp6_proc_init(struct net *net)
2122 {
2123 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2124 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2125 		return -ENOMEM;
2126 	return 0;
2127 }
2128 
tcp6_proc_exit(struct net * net)2129 void tcp6_proc_exit(struct net *net)
2130 {
2131 	remove_proc_entry("tcp6", net->proc_net);
2132 }
2133 #endif
2134 
2135 struct proto tcpv6_prot = {
2136 	.name			= "TCPv6",
2137 	.owner			= THIS_MODULE,
2138 	.close			= tcp_close,
2139 	.pre_connect		= tcp_v6_pre_connect,
2140 	.connect		= tcp_v6_connect,
2141 	.disconnect		= tcp_disconnect,
2142 	.accept			= inet_csk_accept,
2143 	.ioctl			= tcp_ioctl,
2144 	.init			= tcp_v6_init_sock,
2145 	.destroy		= tcp_v4_destroy_sock,
2146 	.shutdown		= tcp_shutdown,
2147 	.setsockopt		= tcp_setsockopt,
2148 	.getsockopt		= tcp_getsockopt,
2149 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2150 	.keepalive		= tcp_set_keepalive,
2151 	.recvmsg		= tcp_recvmsg,
2152 	.sendmsg		= tcp_sendmsg,
2153 	.splice_eof		= tcp_splice_eof,
2154 	.backlog_rcv		= tcp_v6_do_rcv,
2155 	.release_cb		= tcp_release_cb,
2156 	.hash			= inet6_hash,
2157 	.unhash			= inet_unhash,
2158 	.get_port		= inet_csk_get_port,
2159 	.put_port		= inet_put_port,
2160 #ifdef CONFIG_BPF_SYSCALL
2161 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2162 #endif
2163 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2164 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2165 	.stream_memory_free	= tcp_stream_memory_free,
2166 	.sockets_allocated	= &tcp_sockets_allocated,
2167 
2168 	.memory_allocated	= &tcp_memory_allocated,
2169 	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
2170 
2171 	.memory_pressure	= &tcp_memory_pressure,
2172 	.orphan_count		= &tcp_orphan_count,
2173 	.sysctl_mem		= sysctl_tcp_mem,
2174 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2175 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2176 	.max_header		= MAX_TCP_HEADER,
2177 	.obj_size		= sizeof(struct tcp6_sock),
2178 	.ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2179 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2180 	.twsk_prot		= &tcp6_timewait_sock_ops,
2181 	.rsk_prot		= &tcp6_request_sock_ops,
2182 	.h.hashinfo		= NULL,
2183 	.no_autobind		= true,
2184 	.diag_destroy		= tcp_abort,
2185 };
2186 EXPORT_SYMBOL_GPL(tcpv6_prot);
2187 
2188 static const struct inet6_protocol tcpv6_protocol = {
2189 	.handler	=	tcp_v6_rcv,
2190 	.err_handler	=	tcp_v6_err,
2191 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2192 };
2193 
2194 static struct inet_protosw tcpv6_protosw = {
2195 	.type		=	SOCK_STREAM,
2196 	.protocol	=	IPPROTO_TCP,
2197 	.prot		=	&tcpv6_prot,
2198 	.ops		=	&inet6_stream_ops,
2199 	.flags		=	INET_PROTOSW_PERMANENT |
2200 				INET_PROTOSW_ICSK,
2201 };
2202 
tcpv6_net_init(struct net * net)2203 static int __net_init tcpv6_net_init(struct net *net)
2204 {
2205 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2206 				    SOCK_RAW, IPPROTO_TCP, net);
2207 }
2208 
tcpv6_net_exit(struct net * net)2209 static void __net_exit tcpv6_net_exit(struct net *net)
2210 {
2211 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2212 }
2213 
2214 static struct pernet_operations tcpv6_net_ops = {
2215 	.init	    = tcpv6_net_init,
2216 	.exit	    = tcpv6_net_exit,
2217 };
2218 
tcpv6_init(void)2219 int __init tcpv6_init(void)
2220 {
2221 	int ret;
2222 
2223 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2224 	if (ret)
2225 		goto out;
2226 
2227 	/* register inet6 protocol */
2228 	ret = inet6_register_protosw(&tcpv6_protosw);
2229 	if (ret)
2230 		goto out_tcpv6_protocol;
2231 
2232 	ret = register_pernet_subsys(&tcpv6_net_ops);
2233 	if (ret)
2234 		goto out_tcpv6_protosw;
2235 
2236 	ret = mptcpv6_init();
2237 	if (ret)
2238 		goto out_tcpv6_pernet_subsys;
2239 
2240 out:
2241 	return ret;
2242 
2243 out_tcpv6_pernet_subsys:
2244 	unregister_pernet_subsys(&tcpv6_net_ops);
2245 out_tcpv6_protosw:
2246 	inet6_unregister_protosw(&tcpv6_protosw);
2247 out_tcpv6_protocol:
2248 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2249 	goto out;
2250 }
2251 
tcpv6_exit(void)2252 void tcpv6_exit(void)
2253 {
2254 	unregister_pernet_subsys(&tcpv6_net_ops);
2255 	inet6_unregister_protosw(&tcpv6_protosw);
2256 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2257 }
2258