1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
85 int l3index)
86 {
87 return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
95 */
96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
97 struct tcp6_sock, tcp)->inet6)
98
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
100 {
101 struct dst_entry *dst = skb_dst(skb);
102
103 if (dst && dst_hold_safe(dst)) {
104 rcu_assign_pointer(sk->sk_rx_dst, dst);
105 sk->sk_rx_dst_ifindex = skb->skb_iif;
106 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
107 }
108 }
109
tcp_v6_init_seq(const struct sk_buff * skb)110 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
111 {
112 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
113 ipv6_hdr(skb)->saddr.s6_addr32,
114 tcp_hdr(skb)->dest,
115 tcp_hdr(skb)->source);
116 }
117
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)118 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
119 {
120 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
121 ipv6_hdr(skb)->saddr.s6_addr32);
122 }
123
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)124 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
125 int addr_len)
126 {
127 /* This check is replicated from tcp_v6_connect() and intended to
128 * prevent BPF program called below from accessing bytes that are out
129 * of the bound specified by user in addr_len.
130 */
131 if (addr_len < SIN6_LEN_RFC2133)
132 return -EINVAL;
133
134 sock_owned_by_me(sk);
135
136 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
137 }
138
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)139 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
140 int addr_len)
141 {
142 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
143 struct inet_connection_sock *icsk = inet_csk(sk);
144 struct in6_addr *saddr = NULL, *final_p, final;
145 struct inet_timewait_death_row *tcp_death_row;
146 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
147 struct inet_sock *inet = inet_sk(sk);
148 struct tcp_sock *tp = tcp_sk(sk);
149 struct net *net = sock_net(sk);
150 struct ipv6_txoptions *opt;
151 struct dst_entry *dst;
152 struct flowi6 fl6;
153 int addr_type;
154 int err;
155
156 if (addr_len < SIN6_LEN_RFC2133)
157 return -EINVAL;
158
159 if (usin->sin6_family != AF_INET6)
160 return -EAFNOSUPPORT;
161
162 memset(&fl6, 0, sizeof(fl6));
163
164 if (np->sndflow) {
165 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
166 IP6_ECN_flow_init(fl6.flowlabel);
167 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
168 struct ip6_flowlabel *flowlabel;
169 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
170 if (IS_ERR(flowlabel))
171 return -EINVAL;
172 fl6_sock_release(flowlabel);
173 }
174 }
175
176 /*
177 * connect() to INADDR_ANY means loopback (BSD'ism).
178 */
179
180 if (ipv6_addr_any(&usin->sin6_addr)) {
181 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
182 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
183 &usin->sin6_addr);
184 else
185 usin->sin6_addr = in6addr_loopback;
186 }
187
188 addr_type = ipv6_addr_type(&usin->sin6_addr);
189
190 if (addr_type & IPV6_ADDR_MULTICAST)
191 return -ENETUNREACH;
192
193 if (addr_type&IPV6_ADDR_LINKLOCAL) {
194 if (addr_len >= sizeof(struct sockaddr_in6) &&
195 usin->sin6_scope_id) {
196 /* If interface is set while binding, indices
197 * must coincide.
198 */
199 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
200 return -EINVAL;
201
202 sk->sk_bound_dev_if = usin->sin6_scope_id;
203 }
204
205 /* Connect to link-local address requires an interface */
206 if (!sk->sk_bound_dev_if)
207 return -EINVAL;
208 }
209
210 if (tp->rx_opt.ts_recent_stamp &&
211 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
212 tp->rx_opt.ts_recent = 0;
213 tp->rx_opt.ts_recent_stamp = 0;
214 WRITE_ONCE(tp->write_seq, 0);
215 }
216
217 sk->sk_v6_daddr = usin->sin6_addr;
218 np->flow_label = fl6.flowlabel;
219
220 /*
221 * TCP over IPv4
222 */
223
224 if (addr_type & IPV6_ADDR_MAPPED) {
225 u32 exthdrlen = icsk->icsk_ext_hdr_len;
226 struct sockaddr_in sin;
227
228 if (ipv6_only_sock(sk))
229 return -ENETUNREACH;
230
231 sin.sin_family = AF_INET;
232 sin.sin_port = usin->sin6_port;
233 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
234
235 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
236 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
237 if (sk_is_mptcp(sk))
238 mptcpv6_handle_mapped(sk, true);
239 sk->sk_backlog_rcv = tcp_v4_do_rcv;
240 #ifdef CONFIG_TCP_MD5SIG
241 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
242 #endif
243
244 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
245
246 if (err) {
247 icsk->icsk_ext_hdr_len = exthdrlen;
248 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
249 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
250 if (sk_is_mptcp(sk))
251 mptcpv6_handle_mapped(sk, false);
252 sk->sk_backlog_rcv = tcp_v6_do_rcv;
253 #ifdef CONFIG_TCP_MD5SIG
254 tp->af_specific = &tcp_sock_ipv6_specific;
255 #endif
256 goto failure;
257 }
258 np->saddr = sk->sk_v6_rcv_saddr;
259
260 return err;
261 }
262
263 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
264 saddr = &sk->sk_v6_rcv_saddr;
265
266 fl6.flowi6_proto = IPPROTO_TCP;
267 fl6.daddr = sk->sk_v6_daddr;
268 fl6.saddr = saddr ? *saddr : np->saddr;
269 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
270 fl6.flowi6_oif = sk->sk_bound_dev_if;
271 fl6.flowi6_mark = sk->sk_mark;
272 fl6.fl6_dport = usin->sin6_port;
273 fl6.fl6_sport = inet->inet_sport;
274 fl6.flowi6_uid = sk->sk_uid;
275
276 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
277 final_p = fl6_update_dst(&fl6, opt, &final);
278
279 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
280
281 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
282 if (IS_ERR(dst)) {
283 err = PTR_ERR(dst);
284 goto failure;
285 }
286
287 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
288
289 if (!saddr) {
290 saddr = &fl6.saddr;
291
292 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
293 if (err)
294 goto failure;
295 }
296
297 /* set the source address */
298 np->saddr = *saddr;
299 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
300
301 sk->sk_gso_type = SKB_GSO_TCPV6;
302 ip6_dst_store(sk, dst, NULL, NULL);
303
304 icsk->icsk_ext_hdr_len = 0;
305 if (opt)
306 icsk->icsk_ext_hdr_len = opt->opt_flen +
307 opt->opt_nflen;
308
309 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
310
311 inet->inet_dport = usin->sin6_port;
312
313 tcp_set_state(sk, TCP_SYN_SENT);
314 err = inet6_hash_connect(tcp_death_row, sk);
315 if (err)
316 goto late_failure;
317
318 sk_set_txhash(sk);
319
320 if (likely(!tp->repair)) {
321 if (!tp->write_seq)
322 WRITE_ONCE(tp->write_seq,
323 secure_tcpv6_seq(np->saddr.s6_addr32,
324 sk->sk_v6_daddr.s6_addr32,
325 inet->inet_sport,
326 inet->inet_dport));
327 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
328 sk->sk_v6_daddr.s6_addr32);
329 }
330
331 if (tcp_fastopen_defer_connect(sk, &err))
332 return err;
333 if (err)
334 goto late_failure;
335
336 err = tcp_connect(sk);
337 if (err)
338 goto late_failure;
339
340 return 0;
341
342 late_failure:
343 tcp_set_state(sk, TCP_CLOSE);
344 inet_bhash2_reset_saddr(sk);
345 failure:
346 inet->inet_dport = 0;
347 sk->sk_route_caps = 0;
348 return err;
349 }
350
tcp_v6_mtu_reduced(struct sock * sk)351 static void tcp_v6_mtu_reduced(struct sock *sk)
352 {
353 struct dst_entry *dst;
354 u32 mtu;
355
356 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
357 return;
358
359 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
360
361 /* Drop requests trying to increase our current mss.
362 * Check done in __ip6_rt_update_pmtu() is too late.
363 */
364 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
365 return;
366
367 dst = inet6_csk_update_pmtu(sk, mtu);
368 if (!dst)
369 return;
370
371 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
372 tcp_sync_mss(sk, dst_mtu(dst));
373 tcp_simple_retransmit(sk);
374 }
375 }
376
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
378 u8 type, u8 code, int offset, __be32 info)
379 {
380 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
381 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
382 struct net *net = dev_net(skb->dev);
383 struct request_sock *fastopen;
384 struct ipv6_pinfo *np;
385 struct tcp_sock *tp;
386 __u32 seq, snd_una;
387 struct sock *sk;
388 bool fatal;
389 int err;
390
391 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
392 &hdr->daddr, th->dest,
393 &hdr->saddr, ntohs(th->source),
394 skb->dev->ifindex, inet6_sdif(skb));
395
396 if (!sk) {
397 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398 ICMP6_MIB_INERRORS);
399 return -ENOENT;
400 }
401
402 if (sk->sk_state == TCP_TIME_WAIT) {
403 inet_twsk_put(inet_twsk(sk));
404 return 0;
405 }
406 seq = ntohl(th->seq);
407 fatal = icmpv6_err_convert(type, code, &err);
408 if (sk->sk_state == TCP_NEW_SYN_RECV) {
409 tcp_req_err(sk, seq, fatal);
410 return 0;
411 }
412
413 bh_lock_sock(sk);
414 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
415 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
416
417 if (sk->sk_state == TCP_CLOSE)
418 goto out;
419
420 if (static_branch_unlikely(&ip6_min_hopcount)) {
421 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
422 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
423 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
424 goto out;
425 }
426 }
427
428 tp = tcp_sk(sk);
429 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
430 fastopen = rcu_dereference(tp->fastopen_rsk);
431 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
432 if (sk->sk_state != TCP_LISTEN &&
433 !between(seq, snd_una, tp->snd_nxt)) {
434 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
435 goto out;
436 }
437
438 np = tcp_inet6_sk(sk);
439
440 if (type == NDISC_REDIRECT) {
441 if (!sock_owned_by_user(sk)) {
442 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
443
444 if (dst)
445 dst->ops->redirect(dst, sk, skb);
446 }
447 goto out;
448 }
449
450 if (type == ICMPV6_PKT_TOOBIG) {
451 u32 mtu = ntohl(info);
452
453 /* We are not interested in TCP_LISTEN and open_requests
454 * (SYN-ACKs send out by Linux are always <576bytes so
455 * they should go through unfragmented).
456 */
457 if (sk->sk_state == TCP_LISTEN)
458 goto out;
459
460 if (!ip6_sk_accept_pmtu(sk))
461 goto out;
462
463 if (mtu < IPV6_MIN_MTU)
464 goto out;
465
466 WRITE_ONCE(tp->mtu_info, mtu);
467
468 if (!sock_owned_by_user(sk))
469 tcp_v6_mtu_reduced(sk);
470 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
471 &sk->sk_tsq_flags))
472 sock_hold(sk);
473 goto out;
474 }
475
476
477 /* Might be for an request_sock */
478 switch (sk->sk_state) {
479 case TCP_SYN_SENT:
480 case TCP_SYN_RECV:
481 /* Only in fast or simultaneous open. If a fast open socket is
482 * already accepted it is treated as a connected one below.
483 */
484 if (fastopen && !fastopen->sk)
485 break;
486
487 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
488
489 if (!sock_owned_by_user(sk))
490 tcp_done_with_error(sk, err);
491 else
492 WRITE_ONCE(sk->sk_err_soft, err);
493 goto out;
494 case TCP_LISTEN:
495 break;
496 default:
497 /* check if this ICMP message allows revert of backoff.
498 * (see RFC 6069)
499 */
500 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
501 code == ICMPV6_NOROUTE)
502 tcp_ld_RTO_revert(sk, seq);
503 }
504
505 if (!sock_owned_by_user(sk) && np->recverr) {
506 WRITE_ONCE(sk->sk_err, err);
507 sk_error_report(sk);
508 } else {
509 WRITE_ONCE(sk->sk_err_soft, err);
510 }
511 out:
512 bh_unlock_sock(sk);
513 sock_put(sk);
514 return 0;
515 }
516
517
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
519 struct flowi *fl,
520 struct request_sock *req,
521 struct tcp_fastopen_cookie *foc,
522 enum tcp_synack_type synack_type,
523 struct sk_buff *syn_skb)
524 {
525 struct inet_request_sock *ireq = inet_rsk(req);
526 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
527 struct ipv6_txoptions *opt;
528 struct flowi6 *fl6 = &fl->u.ip6;
529 struct sk_buff *skb;
530 int err = -ENOMEM;
531 u8 tclass;
532
533 /* First, grab a route. */
534 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
535 IPPROTO_TCP)) == NULL)
536 goto done;
537
538 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
539
540 if (skb) {
541 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
542 &ireq->ir_v6_rmt_addr);
543
544 fl6->daddr = ireq->ir_v6_rmt_addr;
545 if (np->repflow && ireq->pktopts)
546 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
547
548 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
549 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
550 (np->tclass & INET_ECN_MASK) :
551 np->tclass;
552
553 if (!INET_ECN_is_capable(tclass) &&
554 tcp_bpf_ca_needs_ecn((struct sock *)req))
555 tclass |= INET_ECN_ECT_0;
556
557 rcu_read_lock();
558 opt = ireq->ipv6_opt;
559 if (!opt)
560 opt = rcu_dereference(np->opt);
561 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
562 opt, tclass, sk->sk_priority);
563 rcu_read_unlock();
564 err = net_xmit_eval(err);
565 }
566
567 done:
568 return err;
569 }
570
571
tcp_v6_reqsk_destructor(struct request_sock * req)572 static void tcp_v6_reqsk_destructor(struct request_sock *req)
573 {
574 kfree(inet_rsk(req)->ipv6_opt);
575 consume_skb(inet_rsk(req)->pktopts);
576 }
577
578 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
580 const struct in6_addr *addr,
581 int l3index)
582 {
583 return tcp_md5_do_lookup(sk, l3index,
584 (union tcp_md5_addr *)addr, AF_INET6);
585 }
586
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
588 const struct sock *addr_sk)
589 {
590 int l3index;
591
592 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
593 addr_sk->sk_bound_dev_if);
594 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595 l3index);
596 }
597
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
599 sockptr_t optval, int optlen)
600 {
601 struct tcp_md5sig cmd;
602 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603 int l3index = 0;
604 u8 prefixlen;
605 u8 flags;
606
607 if (optlen < sizeof(cmd))
608 return -EINVAL;
609
610 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
611 return -EFAULT;
612
613 if (sin6->sin6_family != AF_INET6)
614 return -EINVAL;
615
616 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
617
618 if (optname == TCP_MD5SIG_EXT &&
619 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
620 prefixlen = cmd.tcpm_prefixlen;
621 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
622 prefixlen > 32))
623 return -EINVAL;
624 } else {
625 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
626 }
627
628 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
629 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
630 struct net_device *dev;
631
632 rcu_read_lock();
633 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
634 if (dev && netif_is_l3_master(dev))
635 l3index = dev->ifindex;
636 rcu_read_unlock();
637
638 /* ok to reference set/not set outside of rcu;
639 * right now device MUST be an L3 master
640 */
641 if (!dev || !l3index)
642 return -EINVAL;
643 }
644
645 if (!cmd.tcpm_keylen) {
646 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
647 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
648 AF_INET, prefixlen,
649 l3index, flags);
650 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
651 AF_INET6, prefixlen, l3index, flags);
652 }
653
654 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
655 return -EINVAL;
656
657 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
658 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
659 AF_INET, prefixlen, l3index, flags,
660 cmd.tcpm_key, cmd.tcpm_keylen);
661
662 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
663 AF_INET6, prefixlen, l3index, flags,
664 cmd.tcpm_key, cmd.tcpm_keylen);
665 }
666
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)667 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
668 const struct in6_addr *daddr,
669 const struct in6_addr *saddr,
670 const struct tcphdr *th, int nbytes)
671 {
672 struct tcp6_pseudohdr *bp;
673 struct scatterlist sg;
674 struct tcphdr *_th;
675
676 bp = hp->scratch;
677 /* 1. TCP pseudo-header (RFC2460) */
678 bp->saddr = *saddr;
679 bp->daddr = *daddr;
680 bp->protocol = cpu_to_be32(IPPROTO_TCP);
681 bp->len = cpu_to_be32(nbytes);
682
683 _th = (struct tcphdr *)(bp + 1);
684 memcpy(_th, th, sizeof(*th));
685 _th->check = 0;
686
687 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
688 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
689 sizeof(*bp) + sizeof(*th));
690 return crypto_ahash_update(hp->md5_req);
691 }
692
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)693 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
694 const struct in6_addr *daddr, struct in6_addr *saddr,
695 const struct tcphdr *th)
696 {
697 struct tcp_md5sig_pool *hp;
698 struct ahash_request *req;
699
700 hp = tcp_get_md5sig_pool();
701 if (!hp)
702 goto clear_hash_noput;
703 req = hp->md5_req;
704
705 if (crypto_ahash_init(req))
706 goto clear_hash;
707 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
708 goto clear_hash;
709 if (tcp_md5_hash_key(hp, key))
710 goto clear_hash;
711 ahash_request_set_crypt(req, NULL, md5_hash, 0);
712 if (crypto_ahash_final(req))
713 goto clear_hash;
714
715 tcp_put_md5sig_pool();
716 return 0;
717
718 clear_hash:
719 tcp_put_md5sig_pool();
720 clear_hash_noput:
721 memset(md5_hash, 0, 16);
722 return 1;
723 }
724
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)725 static int tcp_v6_md5_hash_skb(char *md5_hash,
726 const struct tcp_md5sig_key *key,
727 const struct sock *sk,
728 const struct sk_buff *skb)
729 {
730 const struct in6_addr *saddr, *daddr;
731 struct tcp_md5sig_pool *hp;
732 struct ahash_request *req;
733 const struct tcphdr *th = tcp_hdr(skb);
734
735 if (sk) { /* valid for establish/request sockets */
736 saddr = &sk->sk_v6_rcv_saddr;
737 daddr = &sk->sk_v6_daddr;
738 } else {
739 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
740 saddr = &ip6h->saddr;
741 daddr = &ip6h->daddr;
742 }
743
744 hp = tcp_get_md5sig_pool();
745 if (!hp)
746 goto clear_hash_noput;
747 req = hp->md5_req;
748
749 if (crypto_ahash_init(req))
750 goto clear_hash;
751
752 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
753 goto clear_hash;
754 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
755 goto clear_hash;
756 if (tcp_md5_hash_key(hp, key))
757 goto clear_hash;
758 ahash_request_set_crypt(req, NULL, md5_hash, 0);
759 if (crypto_ahash_final(req))
760 goto clear_hash;
761
762 tcp_put_md5sig_pool();
763 return 0;
764
765 clear_hash:
766 tcp_put_md5sig_pool();
767 clear_hash_noput:
768 memset(md5_hash, 0, 16);
769 return 1;
770 }
771
772 #endif
773
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)774 static void tcp_v6_init_req(struct request_sock *req,
775 const struct sock *sk_listener,
776 struct sk_buff *skb)
777 {
778 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
779 struct inet_request_sock *ireq = inet_rsk(req);
780 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
781
782 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
783 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
784
785 /* So that link locals have meaning */
786 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
787 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
788 ireq->ir_iif = tcp_v6_iif(skb);
789
790 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
791 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
792 np->rxopt.bits.rxinfo ||
793 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
794 np->rxopt.bits.rxohlim || np->repflow)) {
795 refcount_inc(&skb->users);
796 ireq->pktopts = skb;
797 }
798 }
799
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)800 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
801 struct sk_buff *skb,
802 struct flowi *fl,
803 struct request_sock *req)
804 {
805 tcp_v6_init_req(req, sk, skb);
806
807 if (security_inet_conn_request(sk, skb, req))
808 return NULL;
809
810 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
811 }
812
813 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
814 .family = AF_INET6,
815 .obj_size = sizeof(struct tcp6_request_sock),
816 .rtx_syn_ack = tcp_rtx_synack,
817 .send_ack = tcp_v6_reqsk_send_ack,
818 .destructor = tcp_v6_reqsk_destructor,
819 .send_reset = tcp_v6_send_reset,
820 .syn_ack_timeout = tcp_syn_ack_timeout,
821 };
822
823 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
824 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
825 sizeof(struct ipv6hdr),
826 #ifdef CONFIG_TCP_MD5SIG
827 .req_md5_lookup = tcp_v6_md5_lookup,
828 .calc_md5_hash = tcp_v6_md5_hash_skb,
829 #endif
830 #ifdef CONFIG_SYN_COOKIES
831 .cookie_init_seq = cookie_v6_init_sequence,
832 #endif
833 .route_req = tcp_v6_route_req,
834 .init_seq = tcp_v6_init_seq,
835 .init_ts_off = tcp_v6_init_ts_off,
836 .send_synack = tcp_v6_send_synack,
837 };
838
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash)839 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
840 u32 ack, u32 win, u32 tsval, u32 tsecr,
841 int oif, struct tcp_md5sig_key *key, int rst,
842 u8 tclass, __be32 label, u32 priority, u32 txhash)
843 {
844 const struct tcphdr *th = tcp_hdr(skb);
845 struct tcphdr *t1;
846 struct sk_buff *buff;
847 struct flowi6 fl6;
848 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
849 struct sock *ctl_sk = net->ipv6.tcp_sk;
850 unsigned int tot_len = sizeof(struct tcphdr);
851 __be32 mrst = 0, *topt;
852 struct dst_entry *dst;
853 __u32 mark = 0;
854
855 if (tsecr)
856 tot_len += TCPOLEN_TSTAMP_ALIGNED;
857 #ifdef CONFIG_TCP_MD5SIG
858 if (key)
859 tot_len += TCPOLEN_MD5SIG_ALIGNED;
860 #endif
861
862 #ifdef CONFIG_MPTCP
863 if (rst && !key) {
864 mrst = mptcp_reset_option(skb);
865
866 if (mrst)
867 tot_len += sizeof(__be32);
868 }
869 #endif
870
871 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
872 if (!buff)
873 return;
874
875 skb_reserve(buff, MAX_TCP_HEADER);
876
877 t1 = skb_push(buff, tot_len);
878 skb_reset_transport_header(buff);
879
880 /* Swap the send and the receive. */
881 memset(t1, 0, sizeof(*t1));
882 t1->dest = th->source;
883 t1->source = th->dest;
884 t1->doff = tot_len / 4;
885 t1->seq = htonl(seq);
886 t1->ack_seq = htonl(ack);
887 t1->ack = !rst || !th->ack;
888 t1->rst = rst;
889 t1->window = htons(win);
890
891 topt = (__be32 *)(t1 + 1);
892
893 if (tsecr) {
894 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
895 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
896 *topt++ = htonl(tsval);
897 *topt++ = htonl(tsecr);
898 }
899
900 if (mrst)
901 *topt++ = mrst;
902
903 #ifdef CONFIG_TCP_MD5SIG
904 if (key) {
905 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
906 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
907 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
908 &ipv6_hdr(skb)->saddr,
909 &ipv6_hdr(skb)->daddr, t1);
910 }
911 #endif
912
913 memset(&fl6, 0, sizeof(fl6));
914 fl6.daddr = ipv6_hdr(skb)->saddr;
915 fl6.saddr = ipv6_hdr(skb)->daddr;
916 fl6.flowlabel = label;
917
918 buff->ip_summed = CHECKSUM_PARTIAL;
919
920 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
921
922 fl6.flowi6_proto = IPPROTO_TCP;
923 if (rt6_need_strict(&fl6.daddr) && !oif)
924 fl6.flowi6_oif = tcp_v6_iif(skb);
925 else {
926 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
927 oif = skb->skb_iif;
928
929 fl6.flowi6_oif = oif;
930 }
931
932 if (sk) {
933 if (sk->sk_state == TCP_TIME_WAIT)
934 mark = inet_twsk(sk)->tw_mark;
935 else
936 mark = READ_ONCE(sk->sk_mark);
937 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
938 }
939 if (txhash) {
940 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
941 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
942 }
943 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
944 fl6.fl6_dport = t1->dest;
945 fl6.fl6_sport = t1->source;
946 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
947 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
948
949 /* Pass a socket to ip6_dst_lookup either it is for RST
950 * Underlying function will use this to retrieve the network
951 * namespace
952 */
953 if (sk && sk->sk_state != TCP_TIME_WAIT)
954 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
955 else
956 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
957 if (!IS_ERR(dst)) {
958 skb_dst_set(buff, dst);
959 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
960 tclass & ~INET_ECN_MASK, priority);
961 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
962 if (rst)
963 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
964 return;
965 }
966
967 kfree_skb(buff);
968 }
969
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)970 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
971 {
972 const struct tcphdr *th = tcp_hdr(skb);
973 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
974 u32 seq = 0, ack_seq = 0;
975 struct tcp_md5sig_key *key = NULL;
976 #ifdef CONFIG_TCP_MD5SIG
977 const __u8 *hash_location = NULL;
978 unsigned char newhash[16];
979 int genhash;
980 struct sock *sk1 = NULL;
981 #endif
982 __be32 label = 0;
983 u32 priority = 0;
984 struct net *net;
985 u32 txhash = 0;
986 int oif = 0;
987
988 if (th->rst)
989 return;
990
991 /* If sk not NULL, it means we did a successful lookup and incoming
992 * route had to be correct. prequeue might have dropped our dst.
993 */
994 if (!sk && !ipv6_unicast_destination(skb))
995 return;
996
997 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
998 #ifdef CONFIG_TCP_MD5SIG
999 rcu_read_lock();
1000 hash_location = tcp_parse_md5sig_option(th);
1001 if (sk && sk_fullsock(sk)) {
1002 int l3index;
1003
1004 /* sdif set, means packet ingressed via a device
1005 * in an L3 domain and inet_iif is set to it.
1006 */
1007 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1008 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1009 } else if (hash_location) {
1010 int dif = tcp_v6_iif_l3_slave(skb);
1011 int sdif = tcp_v6_sdif(skb);
1012 int l3index;
1013
1014 /*
1015 * active side is lost. Try to find listening socket through
1016 * source port, and then find md5 key through listening socket.
1017 * we are not loose security here:
1018 * Incoming packet is checked with md5 hash with finding key,
1019 * no RST generated if md5 hash doesn't match.
1020 */
1021 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1022 NULL, 0, &ipv6h->saddr, th->source,
1023 &ipv6h->daddr, ntohs(th->source),
1024 dif, sdif);
1025 if (!sk1)
1026 goto out;
1027
1028 /* sdif set, means packet ingressed via a device
1029 * in an L3 domain and dif is set to it.
1030 */
1031 l3index = tcp_v6_sdif(skb) ? dif : 0;
1032
1033 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1034 if (!key)
1035 goto out;
1036
1037 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1038 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1039 goto out;
1040 }
1041 #endif
1042
1043 if (th->ack)
1044 seq = ntohl(th->ack_seq);
1045 else
1046 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1047 (th->doff << 2);
1048
1049 if (sk) {
1050 oif = sk->sk_bound_dev_if;
1051 if (sk_fullsock(sk)) {
1052 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1053
1054 trace_tcp_send_reset(sk, skb);
1055 if (np->repflow)
1056 label = ip6_flowlabel(ipv6h);
1057 priority = sk->sk_priority;
1058 txhash = sk->sk_txhash;
1059 }
1060 if (sk->sk_state == TCP_TIME_WAIT) {
1061 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1062 priority = inet_twsk(sk)->tw_priority;
1063 txhash = inet_twsk(sk)->tw_txhash;
1064 }
1065 } else {
1066 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1067 label = ip6_flowlabel(ipv6h);
1068 }
1069
1070 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1071 ipv6_get_dsfield(ipv6h), label, priority, txhash);
1072
1073 #ifdef CONFIG_TCP_MD5SIG
1074 out:
1075 rcu_read_unlock();
1076 #endif
1077 }
1078
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1079 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1080 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1081 struct tcp_md5sig_key *key, u8 tclass,
1082 __be32 label, u32 priority, u32 txhash)
1083 {
1084 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1085 tclass, label, priority, txhash);
1086 }
1087
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1088 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1089 {
1090 struct inet_timewait_sock *tw = inet_twsk(sk);
1091 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1092
1093 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1094 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1095 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1096 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1097 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1098 tw->tw_txhash);
1099
1100 inet_twsk_put(tw);
1101 }
1102
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1103 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1104 struct request_sock *req)
1105 {
1106 int l3index;
1107
1108 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1109
1110 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1111 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1112 */
1113 /* RFC 7323 2.3
1114 * The window field (SEG.WND) of every outgoing segment, with the
1115 * exception of <SYN> segments, MUST be right-shifted by
1116 * Rcv.Wind.Shift bits:
1117 */
1118 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1119 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1120 tcp_rsk(req)->rcv_nxt,
1121 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1122 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1123 READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1124 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1125 ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1126 READ_ONCE(sk->sk_priority),
1127 READ_ONCE(tcp_rsk(req)->txhash));
1128 }
1129
1130
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1131 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1132 {
1133 #ifdef CONFIG_SYN_COOKIES
1134 const struct tcphdr *th = tcp_hdr(skb);
1135
1136 if (!th->syn)
1137 sk = cookie_v6_check(sk, skb);
1138 #endif
1139 return sk;
1140 }
1141
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1142 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1143 struct tcphdr *th, u32 *cookie)
1144 {
1145 u16 mss = 0;
1146 #ifdef CONFIG_SYN_COOKIES
1147 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1148 &tcp_request_sock_ipv6_ops, sk, th);
1149 if (mss) {
1150 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1151 tcp_synq_overflow(sk);
1152 }
1153 #endif
1154 return mss;
1155 }
1156
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1157 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1158 {
1159 if (skb->protocol == htons(ETH_P_IP))
1160 return tcp_v4_conn_request(sk, skb);
1161
1162 if (!ipv6_unicast_destination(skb))
1163 goto drop;
1164
1165 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1166 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1167 return 0;
1168 }
1169
1170 return tcp_conn_request(&tcp6_request_sock_ops,
1171 &tcp_request_sock_ipv6_ops, sk, skb);
1172
1173 drop:
1174 tcp_listendrop(sk);
1175 return 0; /* don't send reset */
1176 }
1177
tcp_v6_restore_cb(struct sk_buff * skb)1178 static void tcp_v6_restore_cb(struct sk_buff *skb)
1179 {
1180 /* We need to move header back to the beginning if xfrm6_policy_check()
1181 * and tcp_v6_fill_cb() are going to be called again.
1182 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1183 */
1184 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1185 sizeof(struct inet6_skb_parm));
1186 }
1187
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1188 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1189 struct request_sock *req,
1190 struct dst_entry *dst,
1191 struct request_sock *req_unhash,
1192 bool *own_req)
1193 {
1194 struct inet_request_sock *ireq;
1195 struct ipv6_pinfo *newnp;
1196 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1197 struct ipv6_txoptions *opt;
1198 struct inet_sock *newinet;
1199 bool found_dup_sk = false;
1200 struct tcp_sock *newtp;
1201 struct sock *newsk;
1202 #ifdef CONFIG_TCP_MD5SIG
1203 struct tcp_md5sig_key *key;
1204 int l3index;
1205 #endif
1206 struct flowi6 fl6;
1207
1208 if (skb->protocol == htons(ETH_P_IP)) {
1209 /*
1210 * v6 mapped
1211 */
1212
1213 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1214 req_unhash, own_req);
1215
1216 if (!newsk)
1217 return NULL;
1218
1219 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1220
1221 newnp = tcp_inet6_sk(newsk);
1222 newtp = tcp_sk(newsk);
1223
1224 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1225
1226 newnp->saddr = newsk->sk_v6_rcv_saddr;
1227
1228 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1229 if (sk_is_mptcp(newsk))
1230 mptcpv6_handle_mapped(newsk, true);
1231 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1232 #ifdef CONFIG_TCP_MD5SIG
1233 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1234 #endif
1235
1236 newnp->ipv6_mc_list = NULL;
1237 newnp->ipv6_ac_list = NULL;
1238 newnp->ipv6_fl_list = NULL;
1239 newnp->pktoptions = NULL;
1240 newnp->opt = NULL;
1241 newnp->mcast_oif = inet_iif(skb);
1242 newnp->mcast_hops = ip_hdr(skb)->ttl;
1243 newnp->rcv_flowinfo = 0;
1244 if (np->repflow)
1245 newnp->flow_label = 0;
1246
1247 /*
1248 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1249 * here, tcp_create_openreq_child now does this for us, see the comment in
1250 * that function for the gory details. -acme
1251 */
1252
1253 /* It is tricky place. Until this moment IPv4 tcp
1254 worked with IPv6 icsk.icsk_af_ops.
1255 Sync it now.
1256 */
1257 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1258
1259 return newsk;
1260 }
1261
1262 ireq = inet_rsk(req);
1263
1264 if (sk_acceptq_is_full(sk))
1265 goto out_overflow;
1266
1267 if (!dst) {
1268 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1269 if (!dst)
1270 goto out;
1271 }
1272
1273 newsk = tcp_create_openreq_child(sk, req, skb);
1274 if (!newsk)
1275 goto out_nonewsk;
1276
1277 /*
1278 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1279 * count here, tcp_create_openreq_child now does this for us, see the
1280 * comment in that function for the gory details. -acme
1281 */
1282
1283 newsk->sk_gso_type = SKB_GSO_TCPV6;
1284 inet6_sk_rx_dst_set(newsk, skb);
1285
1286 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1287
1288 newtp = tcp_sk(newsk);
1289 newinet = inet_sk(newsk);
1290 newnp = tcp_inet6_sk(newsk);
1291
1292 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1293
1294 ip6_dst_store(newsk, dst, NULL, NULL);
1295
1296 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1297 newnp->saddr = ireq->ir_v6_loc_addr;
1298 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1299 newsk->sk_bound_dev_if = ireq->ir_iif;
1300
1301 /* Now IPv6 options...
1302
1303 First: no IPv4 options.
1304 */
1305 newinet->inet_opt = NULL;
1306 newnp->ipv6_mc_list = NULL;
1307 newnp->ipv6_ac_list = NULL;
1308 newnp->ipv6_fl_list = NULL;
1309
1310 /* Clone RX bits */
1311 newnp->rxopt.all = np->rxopt.all;
1312
1313 newnp->pktoptions = NULL;
1314 newnp->opt = NULL;
1315 newnp->mcast_oif = tcp_v6_iif(skb);
1316 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1317 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1318 if (np->repflow)
1319 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1320
1321 /* Set ToS of the new socket based upon the value of incoming SYN.
1322 * ECT bits are set later in tcp_init_transfer().
1323 */
1324 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1325 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1326
1327 /* Clone native IPv6 options from listening socket (if any)
1328
1329 Yes, keeping reference count would be much more clever,
1330 but we make one more one thing there: reattach optmem
1331 to newsk.
1332 */
1333 opt = ireq->ipv6_opt;
1334 if (!opt)
1335 opt = rcu_dereference(np->opt);
1336 if (opt) {
1337 opt = ipv6_dup_options(newsk, opt);
1338 RCU_INIT_POINTER(newnp->opt, opt);
1339 }
1340 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1341 if (opt)
1342 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1343 opt->opt_flen;
1344
1345 tcp_ca_openreq_child(newsk, dst);
1346
1347 tcp_sync_mss(newsk, dst_mtu(dst));
1348 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1349
1350 tcp_initialize_rcv_mss(newsk);
1351
1352 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1353 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1354
1355 #ifdef CONFIG_TCP_MD5SIG
1356 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1357
1358 /* Copy over the MD5 key from the original socket */
1359 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1360 if (key) {
1361 const union tcp_md5_addr *addr;
1362
1363 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1364 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1365 inet_csk_prepare_forced_close(newsk);
1366 tcp_done(newsk);
1367 goto out;
1368 }
1369 }
1370 #endif
1371
1372 if (__inet_inherit_port(sk, newsk) < 0) {
1373 inet_csk_prepare_forced_close(newsk);
1374 tcp_done(newsk);
1375 goto out;
1376 }
1377 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1378 &found_dup_sk);
1379 if (*own_req) {
1380 tcp_move_syn(newtp, req);
1381
1382 /* Clone pktoptions received with SYN, if we own the req */
1383 if (ireq->pktopts) {
1384 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1385 consume_skb(ireq->pktopts);
1386 ireq->pktopts = NULL;
1387 if (newnp->pktoptions)
1388 tcp_v6_restore_cb(newnp->pktoptions);
1389 }
1390 } else {
1391 if (!req_unhash && found_dup_sk) {
1392 /* This code path should only be executed in the
1393 * syncookie case only
1394 */
1395 bh_unlock_sock(newsk);
1396 sock_put(newsk);
1397 newsk = NULL;
1398 }
1399 }
1400
1401 return newsk;
1402
1403 out_overflow:
1404 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1405 out_nonewsk:
1406 dst_release(dst);
1407 out:
1408 tcp_listendrop(sk);
1409 return NULL;
1410 }
1411
1412 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1413 u32));
1414 /* The socket must have it's spinlock held when we get
1415 * here, unless it is a TCP_LISTEN socket.
1416 *
1417 * We have a potential double-lock case here, so even when
1418 * doing backlog processing we use the BH locking scheme.
1419 * This is because we cannot sleep with the original spinlock
1420 * held.
1421 */
1422 INDIRECT_CALLABLE_SCOPE
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1423 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1424 {
1425 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1426 struct sk_buff *opt_skb = NULL;
1427 enum skb_drop_reason reason;
1428 struct tcp_sock *tp;
1429
1430 /* Imagine: socket is IPv6. IPv4 packet arrives,
1431 goes to IPv4 receive handler and backlogged.
1432 From backlog it always goes here. Kerboom...
1433 Fortunately, tcp_rcv_established and rcv_established
1434 handle them correctly, but it is not case with
1435 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1436 */
1437
1438 if (skb->protocol == htons(ETH_P_IP))
1439 return tcp_v4_do_rcv(sk, skb);
1440
1441 /*
1442 * socket locking is here for SMP purposes as backlog rcv
1443 * is currently called with bh processing disabled.
1444 */
1445
1446 /* Do Stevens' IPV6_PKTOPTIONS.
1447
1448 Yes, guys, it is the only place in our code, where we
1449 may make it not affecting IPv4.
1450 The rest of code is protocol independent,
1451 and I do not like idea to uglify IPv4.
1452
1453 Actually, all the idea behind IPV6_PKTOPTIONS
1454 looks not very well thought. For now we latch
1455 options, received in the last packet, enqueued
1456 by tcp. Feel free to propose better solution.
1457 --ANK (980728)
1458 */
1459 if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
1460 opt_skb = skb_clone_and_charge_r(skb, sk);
1461
1462 reason = SKB_DROP_REASON_NOT_SPECIFIED;
1463 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1464 struct dst_entry *dst;
1465
1466 dst = rcu_dereference_protected(sk->sk_rx_dst,
1467 lockdep_sock_is_held(sk));
1468
1469 sock_rps_save_rxhash(sk, skb);
1470 sk_mark_napi_id(sk, skb);
1471 if (dst) {
1472 if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1473 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1474 dst, sk->sk_rx_dst_cookie) == NULL) {
1475 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1476 dst_release(dst);
1477 }
1478 }
1479
1480 tcp_rcv_established(sk, skb);
1481 if (opt_skb)
1482 goto ipv6_pktoptions;
1483 return 0;
1484 }
1485
1486 if (tcp_checksum_complete(skb))
1487 goto csum_err;
1488
1489 if (sk->sk_state == TCP_LISTEN) {
1490 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1491
1492 if (!nsk)
1493 goto discard;
1494
1495 if (nsk != sk) {
1496 if (tcp_child_process(sk, nsk, skb))
1497 goto reset;
1498 return 0;
1499 }
1500 } else
1501 sock_rps_save_rxhash(sk, skb);
1502
1503 if (tcp_rcv_state_process(sk, skb))
1504 goto reset;
1505 if (opt_skb)
1506 goto ipv6_pktoptions;
1507 return 0;
1508
1509 reset:
1510 tcp_v6_send_reset(sk, skb);
1511 discard:
1512 if (opt_skb)
1513 __kfree_skb(opt_skb);
1514 kfree_skb_reason(skb, reason);
1515 return 0;
1516 csum_err:
1517 reason = SKB_DROP_REASON_TCP_CSUM;
1518 trace_tcp_bad_csum(skb);
1519 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1520 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1521 goto discard;
1522
1523
1524 ipv6_pktoptions:
1525 /* Do you ask, what is it?
1526
1527 1. skb was enqueued by tcp.
1528 2. skb is added to tail of read queue, rather than out of order.
1529 3. socket is not in passive state.
1530 4. Finally, it really contains options, which user wants to receive.
1531 */
1532 tp = tcp_sk(sk);
1533 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1534 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1535 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1536 np->mcast_oif = tcp_v6_iif(opt_skb);
1537 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1538 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1539 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1540 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1541 if (np->repflow)
1542 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1543 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1544 tcp_v6_restore_cb(opt_skb);
1545 opt_skb = xchg(&np->pktoptions, opt_skb);
1546 } else {
1547 __kfree_skb(opt_skb);
1548 opt_skb = xchg(&np->pktoptions, NULL);
1549 }
1550 }
1551
1552 consume_skb(opt_skb);
1553 return 0;
1554 }
1555
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1556 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1557 const struct tcphdr *th)
1558 {
1559 /* This is tricky: we move IP6CB at its correct location into
1560 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1561 * _decode_session6() uses IP6CB().
1562 * barrier() makes sure compiler won't play aliasing games.
1563 */
1564 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1565 sizeof(struct inet6_skb_parm));
1566 barrier();
1567
1568 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1569 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1570 skb->len - th->doff*4);
1571 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1572 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1573 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1574 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1575 TCP_SKB_CB(skb)->sacked = 0;
1576 TCP_SKB_CB(skb)->has_rxtstamp =
1577 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1578 }
1579
tcp_v6_rcv(struct sk_buff * skb)1580 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1581 {
1582 enum skb_drop_reason drop_reason;
1583 int sdif = inet6_sdif(skb);
1584 int dif = inet6_iif(skb);
1585 const struct tcphdr *th;
1586 const struct ipv6hdr *hdr;
1587 bool refcounted;
1588 struct sock *sk;
1589 int ret;
1590 struct net *net = dev_net(skb->dev);
1591
1592 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1593 if (skb->pkt_type != PACKET_HOST)
1594 goto discard_it;
1595
1596 /*
1597 * Count it even if it's bad.
1598 */
1599 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1600
1601 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1602 goto discard_it;
1603
1604 th = (const struct tcphdr *)skb->data;
1605
1606 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1607 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1608 goto bad_packet;
1609 }
1610 if (!pskb_may_pull(skb, th->doff*4))
1611 goto discard_it;
1612
1613 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1614 goto csum_error;
1615
1616 th = (const struct tcphdr *)skb->data;
1617 hdr = ipv6_hdr(skb);
1618
1619 lookup:
1620 sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1621 th->source, th->dest, inet6_iif(skb), sdif,
1622 &refcounted);
1623 if (!sk)
1624 goto no_tcp_socket;
1625
1626 process:
1627 if (sk->sk_state == TCP_TIME_WAIT)
1628 goto do_time_wait;
1629
1630 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1631 struct request_sock *req = inet_reqsk(sk);
1632 bool req_stolen = false;
1633 struct sock *nsk;
1634
1635 sk = req->rsk_listener;
1636 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1637 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1638 else
1639 drop_reason = tcp_inbound_md5_hash(sk, skb,
1640 &hdr->saddr, &hdr->daddr,
1641 AF_INET6, dif, sdif);
1642 if (drop_reason) {
1643 sk_drops_add(sk, skb);
1644 reqsk_put(req);
1645 goto discard_it;
1646 }
1647 if (tcp_checksum_complete(skb)) {
1648 reqsk_put(req);
1649 goto csum_error;
1650 }
1651 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1652 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1653 if (!nsk) {
1654 inet_csk_reqsk_queue_drop_and_put(sk, req);
1655 goto lookup;
1656 }
1657 sk = nsk;
1658 /* reuseport_migrate_sock() has already held one sk_refcnt
1659 * before returning.
1660 */
1661 } else {
1662 sock_hold(sk);
1663 }
1664 refcounted = true;
1665 nsk = NULL;
1666 if (!tcp_filter(sk, skb)) {
1667 th = (const struct tcphdr *)skb->data;
1668 hdr = ipv6_hdr(skb);
1669 tcp_v6_fill_cb(skb, hdr, th);
1670 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1671 } else {
1672 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1673 }
1674 if (!nsk) {
1675 reqsk_put(req);
1676 if (req_stolen) {
1677 /* Another cpu got exclusive access to req
1678 * and created a full blown socket.
1679 * Try to feed this packet to this socket
1680 * instead of discarding it.
1681 */
1682 tcp_v6_restore_cb(skb);
1683 sock_put(sk);
1684 goto lookup;
1685 }
1686 goto discard_and_relse;
1687 }
1688 nf_reset_ct(skb);
1689 if (nsk == sk) {
1690 reqsk_put(req);
1691 tcp_v6_restore_cb(skb);
1692 } else if (tcp_child_process(sk, nsk, skb)) {
1693 tcp_v6_send_reset(nsk, skb);
1694 goto discard_and_relse;
1695 } else {
1696 sock_put(sk);
1697 return 0;
1698 }
1699 }
1700
1701 if (static_branch_unlikely(&ip6_min_hopcount)) {
1702 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1703 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1704 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1705 drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1706 goto discard_and_relse;
1707 }
1708 }
1709
1710 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1711 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1712 goto discard_and_relse;
1713 }
1714
1715 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1716 AF_INET6, dif, sdif);
1717 if (drop_reason)
1718 goto discard_and_relse;
1719
1720 nf_reset_ct(skb);
1721
1722 if (tcp_filter(sk, skb)) {
1723 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1724 goto discard_and_relse;
1725 }
1726 th = (const struct tcphdr *)skb->data;
1727 hdr = ipv6_hdr(skb);
1728 tcp_v6_fill_cb(skb, hdr, th);
1729
1730 skb->dev = NULL;
1731
1732 if (sk->sk_state == TCP_LISTEN) {
1733 ret = tcp_v6_do_rcv(sk, skb);
1734 goto put_and_return;
1735 }
1736
1737 sk_incoming_cpu_update(sk);
1738
1739 bh_lock_sock_nested(sk);
1740 tcp_segs_in(tcp_sk(sk), skb);
1741 ret = 0;
1742 if (!sock_owned_by_user(sk)) {
1743 ret = tcp_v6_do_rcv(sk, skb);
1744 } else {
1745 if (tcp_add_backlog(sk, skb, &drop_reason))
1746 goto discard_and_relse;
1747 }
1748 bh_unlock_sock(sk);
1749 put_and_return:
1750 if (refcounted)
1751 sock_put(sk);
1752 return ret ? -1 : 0;
1753
1754 no_tcp_socket:
1755 drop_reason = SKB_DROP_REASON_NO_SOCKET;
1756 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1757 goto discard_it;
1758
1759 tcp_v6_fill_cb(skb, hdr, th);
1760
1761 if (tcp_checksum_complete(skb)) {
1762 csum_error:
1763 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1764 trace_tcp_bad_csum(skb);
1765 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1766 bad_packet:
1767 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1768 } else {
1769 tcp_v6_send_reset(NULL, skb);
1770 }
1771
1772 discard_it:
1773 SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1774 kfree_skb_reason(skb, drop_reason);
1775 return 0;
1776
1777 discard_and_relse:
1778 sk_drops_add(sk, skb);
1779 if (refcounted)
1780 sock_put(sk);
1781 goto discard_it;
1782
1783 do_time_wait:
1784 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1785 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1786 inet_twsk_put(inet_twsk(sk));
1787 goto discard_it;
1788 }
1789
1790 tcp_v6_fill_cb(skb, hdr, th);
1791
1792 if (tcp_checksum_complete(skb)) {
1793 inet_twsk_put(inet_twsk(sk));
1794 goto csum_error;
1795 }
1796
1797 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1798 case TCP_TW_SYN:
1799 {
1800 struct sock *sk2;
1801
1802 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1803 skb, __tcp_hdrlen(th),
1804 &ipv6_hdr(skb)->saddr, th->source,
1805 &ipv6_hdr(skb)->daddr,
1806 ntohs(th->dest),
1807 tcp_v6_iif_l3_slave(skb),
1808 sdif);
1809 if (sk2) {
1810 struct inet_timewait_sock *tw = inet_twsk(sk);
1811 inet_twsk_deschedule_put(tw);
1812 sk = sk2;
1813 tcp_v6_restore_cb(skb);
1814 refcounted = false;
1815 goto process;
1816 }
1817 }
1818 /* to ACK */
1819 fallthrough;
1820 case TCP_TW_ACK:
1821 tcp_v6_timewait_ack(sk, skb);
1822 break;
1823 case TCP_TW_RST:
1824 tcp_v6_send_reset(sk, skb);
1825 inet_twsk_deschedule_put(inet_twsk(sk));
1826 goto discard_it;
1827 case TCP_TW_SUCCESS:
1828 ;
1829 }
1830 goto discard_it;
1831 }
1832
tcp_v6_early_demux(struct sk_buff * skb)1833 void tcp_v6_early_demux(struct sk_buff *skb)
1834 {
1835 struct net *net = dev_net(skb->dev);
1836 const struct ipv6hdr *hdr;
1837 const struct tcphdr *th;
1838 struct sock *sk;
1839
1840 if (skb->pkt_type != PACKET_HOST)
1841 return;
1842
1843 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1844 return;
1845
1846 hdr = ipv6_hdr(skb);
1847 th = tcp_hdr(skb);
1848
1849 if (th->doff < sizeof(struct tcphdr) / 4)
1850 return;
1851
1852 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1853 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1854 &hdr->saddr, th->source,
1855 &hdr->daddr, ntohs(th->dest),
1856 inet6_iif(skb), inet6_sdif(skb));
1857 if (sk) {
1858 skb->sk = sk;
1859 skb->destructor = sock_edemux;
1860 if (sk_fullsock(sk)) {
1861 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1862
1863 if (dst)
1864 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1865 if (dst &&
1866 sk->sk_rx_dst_ifindex == skb->skb_iif)
1867 skb_dst_set_noref(skb, dst);
1868 }
1869 }
1870 }
1871
1872 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1873 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1874 .twsk_unique = tcp_twsk_unique,
1875 .twsk_destructor = tcp_twsk_destructor,
1876 };
1877
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1878 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1879 {
1880 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1881 }
1882
1883 const struct inet_connection_sock_af_ops ipv6_specific = {
1884 .queue_xmit = inet6_csk_xmit,
1885 .send_check = tcp_v6_send_check,
1886 .rebuild_header = inet6_sk_rebuild_header,
1887 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1888 .conn_request = tcp_v6_conn_request,
1889 .syn_recv_sock = tcp_v6_syn_recv_sock,
1890 .net_header_len = sizeof(struct ipv6hdr),
1891 .net_frag_header_len = sizeof(struct frag_hdr),
1892 .setsockopt = ipv6_setsockopt,
1893 .getsockopt = ipv6_getsockopt,
1894 .addr2sockaddr = inet6_csk_addr2sockaddr,
1895 .sockaddr_len = sizeof(struct sockaddr_in6),
1896 .mtu_reduced = tcp_v6_mtu_reduced,
1897 };
1898
1899 #ifdef CONFIG_TCP_MD5SIG
1900 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1901 .md5_lookup = tcp_v6_md5_lookup,
1902 .calc_md5_hash = tcp_v6_md5_hash_skb,
1903 .md5_parse = tcp_v6_parse_md5_keys,
1904 };
1905 #endif
1906
1907 /*
1908 * TCP over IPv4 via INET6 API
1909 */
1910 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1911 .queue_xmit = ip_queue_xmit,
1912 .send_check = tcp_v4_send_check,
1913 .rebuild_header = inet_sk_rebuild_header,
1914 .sk_rx_dst_set = inet_sk_rx_dst_set,
1915 .conn_request = tcp_v6_conn_request,
1916 .syn_recv_sock = tcp_v6_syn_recv_sock,
1917 .net_header_len = sizeof(struct iphdr),
1918 .setsockopt = ipv6_setsockopt,
1919 .getsockopt = ipv6_getsockopt,
1920 .addr2sockaddr = inet6_csk_addr2sockaddr,
1921 .sockaddr_len = sizeof(struct sockaddr_in6),
1922 .mtu_reduced = tcp_v4_mtu_reduced,
1923 };
1924
1925 #ifdef CONFIG_TCP_MD5SIG
1926 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1927 .md5_lookup = tcp_v4_md5_lookup,
1928 .calc_md5_hash = tcp_v4_md5_hash_skb,
1929 .md5_parse = tcp_v6_parse_md5_keys,
1930 };
1931 #endif
1932
1933 /* NOTE: A lot of things set to zero explicitly by call to
1934 * sk_alloc() so need not be done here.
1935 */
tcp_v6_init_sock(struct sock * sk)1936 static int tcp_v6_init_sock(struct sock *sk)
1937 {
1938 struct inet_connection_sock *icsk = inet_csk(sk);
1939
1940 tcp_init_sock(sk);
1941
1942 icsk->icsk_af_ops = &ipv6_specific;
1943
1944 #ifdef CONFIG_TCP_MD5SIG
1945 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1946 #endif
1947
1948 return 0;
1949 }
1950
1951 #ifdef CONFIG_PROC_FS
1952 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1953 static void get_openreq6(struct seq_file *seq,
1954 const struct request_sock *req, int i)
1955 {
1956 long ttd = req->rsk_timer.expires - jiffies;
1957 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1958 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1959
1960 if (ttd < 0)
1961 ttd = 0;
1962
1963 seq_printf(seq,
1964 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1965 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1966 i,
1967 src->s6_addr32[0], src->s6_addr32[1],
1968 src->s6_addr32[2], src->s6_addr32[3],
1969 inet_rsk(req)->ir_num,
1970 dest->s6_addr32[0], dest->s6_addr32[1],
1971 dest->s6_addr32[2], dest->s6_addr32[3],
1972 ntohs(inet_rsk(req)->ir_rmt_port),
1973 TCP_SYN_RECV,
1974 0, 0, /* could print option size, but that is af dependent. */
1975 1, /* timers active (only the expire timer) */
1976 jiffies_to_clock_t(ttd),
1977 req->num_timeout,
1978 from_kuid_munged(seq_user_ns(seq),
1979 sock_i_uid(req->rsk_listener)),
1980 0, /* non standard timer */
1981 0, /* open_requests have no inode */
1982 0, req);
1983 }
1984
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1985 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1986 {
1987 const struct in6_addr *dest, *src;
1988 __u16 destp, srcp;
1989 int timer_active;
1990 unsigned long timer_expires;
1991 const struct inet_sock *inet = inet_sk(sp);
1992 const struct tcp_sock *tp = tcp_sk(sp);
1993 const struct inet_connection_sock *icsk = inet_csk(sp);
1994 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1995 int rx_queue;
1996 int state;
1997
1998 dest = &sp->sk_v6_daddr;
1999 src = &sp->sk_v6_rcv_saddr;
2000 destp = ntohs(inet->inet_dport);
2001 srcp = ntohs(inet->inet_sport);
2002
2003 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2004 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2005 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2006 timer_active = 1;
2007 timer_expires = icsk->icsk_timeout;
2008 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2009 timer_active = 4;
2010 timer_expires = icsk->icsk_timeout;
2011 } else if (timer_pending(&sp->sk_timer)) {
2012 timer_active = 2;
2013 timer_expires = sp->sk_timer.expires;
2014 } else {
2015 timer_active = 0;
2016 timer_expires = jiffies;
2017 }
2018
2019 state = inet_sk_state_load(sp);
2020 if (state == TCP_LISTEN)
2021 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2022 else
2023 /* Because we don't lock the socket,
2024 * we might find a transient negative value.
2025 */
2026 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2027 READ_ONCE(tp->copied_seq), 0);
2028
2029 seq_printf(seq,
2030 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2031 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2032 i,
2033 src->s6_addr32[0], src->s6_addr32[1],
2034 src->s6_addr32[2], src->s6_addr32[3], srcp,
2035 dest->s6_addr32[0], dest->s6_addr32[1],
2036 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2037 state,
2038 READ_ONCE(tp->write_seq) - tp->snd_una,
2039 rx_queue,
2040 timer_active,
2041 jiffies_delta_to_clock_t(timer_expires - jiffies),
2042 icsk->icsk_retransmits,
2043 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2044 icsk->icsk_probes_out,
2045 sock_i_ino(sp),
2046 refcount_read(&sp->sk_refcnt), sp,
2047 jiffies_to_clock_t(icsk->icsk_rto),
2048 jiffies_to_clock_t(icsk->icsk_ack.ato),
2049 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2050 tcp_snd_cwnd(tp),
2051 state == TCP_LISTEN ?
2052 fastopenq->max_qlen :
2053 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2054 );
2055 }
2056
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2057 static void get_timewait6_sock(struct seq_file *seq,
2058 struct inet_timewait_sock *tw, int i)
2059 {
2060 long delta = tw->tw_timer.expires - jiffies;
2061 const struct in6_addr *dest, *src;
2062 __u16 destp, srcp;
2063
2064 dest = &tw->tw_v6_daddr;
2065 src = &tw->tw_v6_rcv_saddr;
2066 destp = ntohs(tw->tw_dport);
2067 srcp = ntohs(tw->tw_sport);
2068
2069 seq_printf(seq,
2070 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2071 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2072 i,
2073 src->s6_addr32[0], src->s6_addr32[1],
2074 src->s6_addr32[2], src->s6_addr32[3], srcp,
2075 dest->s6_addr32[0], dest->s6_addr32[1],
2076 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2077 tw->tw_substate, 0, 0,
2078 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2079 refcount_read(&tw->tw_refcnt), tw);
2080 }
2081
tcp6_seq_show(struct seq_file * seq,void * v)2082 static int tcp6_seq_show(struct seq_file *seq, void *v)
2083 {
2084 struct tcp_iter_state *st;
2085 struct sock *sk = v;
2086
2087 if (v == SEQ_START_TOKEN) {
2088 seq_puts(seq,
2089 " sl "
2090 "local_address "
2091 "remote_address "
2092 "st tx_queue rx_queue tr tm->when retrnsmt"
2093 " uid timeout inode\n");
2094 goto out;
2095 }
2096 st = seq->private;
2097
2098 if (sk->sk_state == TCP_TIME_WAIT)
2099 get_timewait6_sock(seq, v, st->num);
2100 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2101 get_openreq6(seq, v, st->num);
2102 else
2103 get_tcp6_sock(seq, v, st->num);
2104 out:
2105 return 0;
2106 }
2107
2108 static const struct seq_operations tcp6_seq_ops = {
2109 .show = tcp6_seq_show,
2110 .start = tcp_seq_start,
2111 .next = tcp_seq_next,
2112 .stop = tcp_seq_stop,
2113 };
2114
2115 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2116 .family = AF_INET6,
2117 };
2118
tcp6_proc_init(struct net * net)2119 int __net_init tcp6_proc_init(struct net *net)
2120 {
2121 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2122 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2123 return -ENOMEM;
2124 return 0;
2125 }
2126
tcp6_proc_exit(struct net * net)2127 void tcp6_proc_exit(struct net *net)
2128 {
2129 remove_proc_entry("tcp6", net->proc_net);
2130 }
2131 #endif
2132
2133 struct proto tcpv6_prot = {
2134 .name = "TCPv6",
2135 .owner = THIS_MODULE,
2136 .close = tcp_close,
2137 .pre_connect = tcp_v6_pre_connect,
2138 .connect = tcp_v6_connect,
2139 .disconnect = tcp_disconnect,
2140 .accept = inet_csk_accept,
2141 .ioctl = tcp_ioctl,
2142 .init = tcp_v6_init_sock,
2143 .destroy = tcp_v4_destroy_sock,
2144 .shutdown = tcp_shutdown,
2145 .setsockopt = tcp_setsockopt,
2146 .getsockopt = tcp_getsockopt,
2147 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2148 .keepalive = tcp_set_keepalive,
2149 .recvmsg = tcp_recvmsg,
2150 .sendmsg = tcp_sendmsg,
2151 .splice_eof = tcp_splice_eof,
2152 .backlog_rcv = tcp_v6_do_rcv,
2153 .release_cb = tcp_release_cb,
2154 .hash = inet6_hash,
2155 .unhash = inet_unhash,
2156 .get_port = inet_csk_get_port,
2157 .put_port = inet_put_port,
2158 #ifdef CONFIG_BPF_SYSCALL
2159 .psock_update_sk_prot = tcp_bpf_update_proto,
2160 #endif
2161 .enter_memory_pressure = tcp_enter_memory_pressure,
2162 .leave_memory_pressure = tcp_leave_memory_pressure,
2163 .stream_memory_free = tcp_stream_memory_free,
2164 .sockets_allocated = &tcp_sockets_allocated,
2165
2166 .memory_allocated = &tcp_memory_allocated,
2167 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
2168
2169 .memory_pressure = &tcp_memory_pressure,
2170 .orphan_count = &tcp_orphan_count,
2171 .sysctl_mem = sysctl_tcp_mem,
2172 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2173 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2174 .max_header = MAX_TCP_HEADER,
2175 .obj_size = sizeof(struct tcp6_sock),
2176 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2177 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2178 .twsk_prot = &tcp6_timewait_sock_ops,
2179 .rsk_prot = &tcp6_request_sock_ops,
2180 .h.hashinfo = NULL,
2181 .no_autobind = true,
2182 .diag_destroy = tcp_abort,
2183 };
2184 EXPORT_SYMBOL_GPL(tcpv6_prot);
2185
2186 static const struct inet6_protocol tcpv6_protocol = {
2187 .handler = tcp_v6_rcv,
2188 .err_handler = tcp_v6_err,
2189 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2190 };
2191
2192 static struct inet_protosw tcpv6_protosw = {
2193 .type = SOCK_STREAM,
2194 .protocol = IPPROTO_TCP,
2195 .prot = &tcpv6_prot,
2196 .ops = &inet6_stream_ops,
2197 .flags = INET_PROTOSW_PERMANENT |
2198 INET_PROTOSW_ICSK,
2199 };
2200
tcpv6_net_init(struct net * net)2201 static int __net_init tcpv6_net_init(struct net *net)
2202 {
2203 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2204 SOCK_RAW, IPPROTO_TCP, net);
2205 }
2206
tcpv6_net_exit(struct net * net)2207 static void __net_exit tcpv6_net_exit(struct net *net)
2208 {
2209 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2210 }
2211
2212 static struct pernet_operations tcpv6_net_ops = {
2213 .init = tcpv6_net_init,
2214 .exit = tcpv6_net_exit,
2215 };
2216
tcpv6_init(void)2217 int __init tcpv6_init(void)
2218 {
2219 int ret;
2220
2221 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2222 if (ret)
2223 goto out;
2224
2225 /* register inet6 protocol */
2226 ret = inet6_register_protosw(&tcpv6_protosw);
2227 if (ret)
2228 goto out_tcpv6_protocol;
2229
2230 ret = register_pernet_subsys(&tcpv6_net_ops);
2231 if (ret)
2232 goto out_tcpv6_protosw;
2233
2234 ret = mptcpv6_init();
2235 if (ret)
2236 goto out_tcpv6_pernet_subsys;
2237
2238 out:
2239 return ret;
2240
2241 out_tcpv6_pernet_subsys:
2242 unregister_pernet_subsys(&tcpv6_net_ops);
2243 out_tcpv6_protosw:
2244 inet6_unregister_protosw(&tcpv6_protosw);
2245 out_tcpv6_protocol:
2246 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2247 goto out;
2248 }
2249
tcpv6_exit(void)2250 void tcpv6_exit(void)
2251 {
2252 unregister_pernet_subsys(&tcpv6_net_ops);
2253 inet6_unregister_protosw(&tcpv6_protosw);
2254 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2255 }
2256