1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
85 int l3index)
86 {
87 return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
95 */
96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
97 struct tcp6_sock, tcp)->inet6)
98
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
100 {
101 struct dst_entry *dst = skb_dst(skb);
102
103 if (dst && dst_hold_safe(dst)) {
104 rcu_assign_pointer(sk->sk_rx_dst, dst);
105 sk->sk_rx_dst_ifindex = skb->skb_iif;
106 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
107 }
108 }
109
tcp_v6_init_seq(const struct sk_buff * skb)110 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
111 {
112 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
113 ipv6_hdr(skb)->saddr.s6_addr32,
114 tcp_hdr(skb)->dest,
115 tcp_hdr(skb)->source);
116 }
117
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)118 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
119 {
120 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
121 ipv6_hdr(skb)->saddr.s6_addr32);
122 }
123
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)124 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
125 int addr_len)
126 {
127 /* This check is replicated from tcp_v6_connect() and intended to
128 * prevent BPF program called below from accessing bytes that are out
129 * of the bound specified by user in addr_len.
130 */
131 if (addr_len < SIN6_LEN_RFC2133)
132 return -EINVAL;
133
134 sock_owned_by_me(sk);
135
136 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
137 }
138
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)139 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
140 int addr_len)
141 {
142 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
143 struct inet_connection_sock *icsk = inet_csk(sk);
144 struct in6_addr *saddr = NULL, *final_p, final;
145 struct inet_timewait_death_row *tcp_death_row;
146 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
147 struct inet_sock *inet = inet_sk(sk);
148 struct tcp_sock *tp = tcp_sk(sk);
149 struct net *net = sock_net(sk);
150 struct ipv6_txoptions *opt;
151 struct dst_entry *dst;
152 struct flowi6 fl6;
153 int addr_type;
154 int err;
155
156 if (addr_len < SIN6_LEN_RFC2133)
157 return -EINVAL;
158
159 if (usin->sin6_family != AF_INET6)
160 return -EAFNOSUPPORT;
161
162 memset(&fl6, 0, sizeof(fl6));
163
164 if (np->sndflow) {
165 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
166 IP6_ECN_flow_init(fl6.flowlabel);
167 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
168 struct ip6_flowlabel *flowlabel;
169 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
170 if (IS_ERR(flowlabel))
171 return -EINVAL;
172 fl6_sock_release(flowlabel);
173 }
174 }
175
176 /*
177 * connect() to INADDR_ANY means loopback (BSD'ism).
178 */
179
180 if (ipv6_addr_any(&usin->sin6_addr)) {
181 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
182 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
183 &usin->sin6_addr);
184 else
185 usin->sin6_addr = in6addr_loopback;
186 }
187
188 addr_type = ipv6_addr_type(&usin->sin6_addr);
189
190 if (addr_type & IPV6_ADDR_MULTICAST)
191 return -ENETUNREACH;
192
193 if (addr_type&IPV6_ADDR_LINKLOCAL) {
194 if (addr_len >= sizeof(struct sockaddr_in6) &&
195 usin->sin6_scope_id) {
196 /* If interface is set while binding, indices
197 * must coincide.
198 */
199 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
200 return -EINVAL;
201
202 sk->sk_bound_dev_if = usin->sin6_scope_id;
203 }
204
205 /* Connect to link-local address requires an interface */
206 if (!sk->sk_bound_dev_if)
207 return -EINVAL;
208 }
209
210 if (tp->rx_opt.ts_recent_stamp &&
211 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
212 tp->rx_opt.ts_recent = 0;
213 tp->rx_opt.ts_recent_stamp = 0;
214 WRITE_ONCE(tp->write_seq, 0);
215 }
216
217 sk->sk_v6_daddr = usin->sin6_addr;
218 np->flow_label = fl6.flowlabel;
219
220 /*
221 * TCP over IPv4
222 */
223
224 if (addr_type & IPV6_ADDR_MAPPED) {
225 u32 exthdrlen = icsk->icsk_ext_hdr_len;
226 struct sockaddr_in sin;
227
228 if (ipv6_only_sock(sk))
229 return -ENETUNREACH;
230
231 sin.sin_family = AF_INET;
232 sin.sin_port = usin->sin6_port;
233 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
234
235 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
236 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
237 if (sk_is_mptcp(sk))
238 mptcpv6_handle_mapped(sk, true);
239 sk->sk_backlog_rcv = tcp_v4_do_rcv;
240 #ifdef CONFIG_TCP_MD5SIG
241 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
242 #endif
243
244 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
245
246 if (err) {
247 icsk->icsk_ext_hdr_len = exthdrlen;
248 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
249 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
250 if (sk_is_mptcp(sk))
251 mptcpv6_handle_mapped(sk, false);
252 sk->sk_backlog_rcv = tcp_v6_do_rcv;
253 #ifdef CONFIG_TCP_MD5SIG
254 tp->af_specific = &tcp_sock_ipv6_specific;
255 #endif
256 goto failure;
257 }
258 np->saddr = sk->sk_v6_rcv_saddr;
259
260 return err;
261 }
262
263 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
264 saddr = &sk->sk_v6_rcv_saddr;
265
266 fl6.flowi6_proto = IPPROTO_TCP;
267 fl6.daddr = sk->sk_v6_daddr;
268 fl6.saddr = saddr ? *saddr : np->saddr;
269 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
270 fl6.flowi6_oif = sk->sk_bound_dev_if;
271 fl6.flowi6_mark = sk->sk_mark;
272 fl6.fl6_dport = usin->sin6_port;
273 fl6.fl6_sport = inet->inet_sport;
274 fl6.flowi6_uid = sk->sk_uid;
275
276 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
277 final_p = fl6_update_dst(&fl6, opt, &final);
278
279 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
280
281 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
282 if (IS_ERR(dst)) {
283 err = PTR_ERR(dst);
284 goto failure;
285 }
286
287 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
288
289 if (!saddr) {
290 saddr = &fl6.saddr;
291
292 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
293 if (err)
294 goto failure;
295 }
296
297 /* set the source address */
298 np->saddr = *saddr;
299 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
300
301 sk->sk_gso_type = SKB_GSO_TCPV6;
302 ip6_dst_store(sk, dst, NULL, NULL);
303
304 icsk->icsk_ext_hdr_len = 0;
305 if (opt)
306 icsk->icsk_ext_hdr_len = opt->opt_flen +
307 opt->opt_nflen;
308
309 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
310
311 inet->inet_dport = usin->sin6_port;
312
313 tcp_set_state(sk, TCP_SYN_SENT);
314 err = inet6_hash_connect(tcp_death_row, sk);
315 if (err)
316 goto late_failure;
317
318 sk_set_txhash(sk);
319
320 if (likely(!tp->repair)) {
321 if (!tp->write_seq)
322 WRITE_ONCE(tp->write_seq,
323 secure_tcpv6_seq(np->saddr.s6_addr32,
324 sk->sk_v6_daddr.s6_addr32,
325 inet->inet_sport,
326 inet->inet_dport));
327 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
328 sk->sk_v6_daddr.s6_addr32);
329 }
330
331 if (tcp_fastopen_defer_connect(sk, &err))
332 return err;
333 if (err)
334 goto late_failure;
335
336 err = tcp_connect(sk);
337 if (err)
338 goto late_failure;
339
340 return 0;
341
342 late_failure:
343 tcp_set_state(sk, TCP_CLOSE);
344 inet_bhash2_reset_saddr(sk);
345 failure:
346 inet->inet_dport = 0;
347 sk->sk_route_caps = 0;
348 return err;
349 }
350
tcp_v6_mtu_reduced(struct sock * sk)351 static void tcp_v6_mtu_reduced(struct sock *sk)
352 {
353 struct dst_entry *dst;
354 u32 mtu;
355
356 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
357 return;
358
359 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
360
361 /* Drop requests trying to increase our current mss.
362 * Check done in __ip6_rt_update_pmtu() is too late.
363 */
364 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
365 return;
366
367 dst = inet6_csk_update_pmtu(sk, mtu);
368 if (!dst)
369 return;
370
371 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
372 tcp_sync_mss(sk, dst_mtu(dst));
373 tcp_simple_retransmit(sk);
374 }
375 }
376
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
378 u8 type, u8 code, int offset, __be32 info)
379 {
380 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
381 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
382 struct net *net = dev_net(skb->dev);
383 struct request_sock *fastopen;
384 struct ipv6_pinfo *np;
385 struct tcp_sock *tp;
386 __u32 seq, snd_una;
387 struct sock *sk;
388 bool fatal;
389 int err;
390
391 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
392 &hdr->daddr, th->dest,
393 &hdr->saddr, ntohs(th->source),
394 skb->dev->ifindex, inet6_sdif(skb));
395
396 if (!sk) {
397 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398 ICMP6_MIB_INERRORS);
399 return -ENOENT;
400 }
401
402 if (sk->sk_state == TCP_TIME_WAIT) {
403 inet_twsk_put(inet_twsk(sk));
404 return 0;
405 }
406 seq = ntohl(th->seq);
407 fatal = icmpv6_err_convert(type, code, &err);
408 if (sk->sk_state == TCP_NEW_SYN_RECV) {
409 tcp_req_err(sk, seq, fatal);
410 return 0;
411 }
412
413 bh_lock_sock(sk);
414 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
415 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
416
417 if (sk->sk_state == TCP_CLOSE)
418 goto out;
419
420 if (static_branch_unlikely(&ip6_min_hopcount)) {
421 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
422 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
423 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
424 goto out;
425 }
426 }
427
428 tp = tcp_sk(sk);
429 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
430 fastopen = rcu_dereference(tp->fastopen_rsk);
431 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
432 if (sk->sk_state != TCP_LISTEN &&
433 !between(seq, snd_una, tp->snd_nxt)) {
434 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
435 goto out;
436 }
437
438 np = tcp_inet6_sk(sk);
439
440 if (type == NDISC_REDIRECT) {
441 if (!sock_owned_by_user(sk)) {
442 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
443
444 if (dst)
445 dst->ops->redirect(dst, sk, skb);
446 }
447 goto out;
448 }
449
450 if (type == ICMPV6_PKT_TOOBIG) {
451 u32 mtu = ntohl(info);
452
453 /* We are not interested in TCP_LISTEN and open_requests
454 * (SYN-ACKs send out by Linux are always <576bytes so
455 * they should go through unfragmented).
456 */
457 if (sk->sk_state == TCP_LISTEN)
458 goto out;
459
460 if (!ip6_sk_accept_pmtu(sk))
461 goto out;
462
463 if (mtu < IPV6_MIN_MTU)
464 goto out;
465
466 WRITE_ONCE(tp->mtu_info, mtu);
467
468 if (!sock_owned_by_user(sk))
469 tcp_v6_mtu_reduced(sk);
470 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
471 &sk->sk_tsq_flags))
472 sock_hold(sk);
473 goto out;
474 }
475
476
477 /* Might be for an request_sock */
478 switch (sk->sk_state) {
479 case TCP_SYN_SENT:
480 case TCP_SYN_RECV:
481 /* Only in fast or simultaneous open. If a fast open socket is
482 * already accepted it is treated as a connected one below.
483 */
484 if (fastopen && !fastopen->sk)
485 break;
486
487 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
488
489 if (!sock_owned_by_user(sk))
490 tcp_done_with_error(sk, err);
491 else
492 WRITE_ONCE(sk->sk_err_soft, err);
493 goto out;
494 case TCP_LISTEN:
495 break;
496 default:
497 /* check if this ICMP message allows revert of backoff.
498 * (see RFC 6069)
499 */
500 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
501 code == ICMPV6_NOROUTE)
502 tcp_ld_RTO_revert(sk, seq);
503 }
504
505 if (!sock_owned_by_user(sk) && np->recverr) {
506 WRITE_ONCE(sk->sk_err, err);
507 sk_error_report(sk);
508 } else {
509 WRITE_ONCE(sk->sk_err_soft, err);
510 }
511 out:
512 bh_unlock_sock(sk);
513 sock_put(sk);
514 return 0;
515 }
516
517
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
519 struct flowi *fl,
520 struct request_sock *req,
521 struct tcp_fastopen_cookie *foc,
522 enum tcp_synack_type synack_type,
523 struct sk_buff *syn_skb)
524 {
525 struct inet_request_sock *ireq = inet_rsk(req);
526 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
527 struct ipv6_txoptions *opt;
528 struct flowi6 *fl6 = &fl->u.ip6;
529 struct sk_buff *skb;
530 int err = -ENOMEM;
531 u8 tclass;
532
533 /* First, grab a route. */
534 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
535 IPPROTO_TCP)) == NULL)
536 goto done;
537
538 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
539
540 if (skb) {
541 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
542 &ireq->ir_v6_rmt_addr);
543
544 fl6->daddr = ireq->ir_v6_rmt_addr;
545 if (np->repflow && ireq->pktopts)
546 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
547
548 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
549 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
550 (np->tclass & INET_ECN_MASK) :
551 np->tclass;
552
553 if (!INET_ECN_is_capable(tclass) &&
554 tcp_bpf_ca_needs_ecn((struct sock *)req))
555 tclass |= INET_ECN_ECT_0;
556
557 rcu_read_lock();
558 opt = ireq->ipv6_opt;
559 if (!opt)
560 opt = rcu_dereference(np->opt);
561 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
562 opt, tclass, sk->sk_priority);
563 rcu_read_unlock();
564 err = net_xmit_eval(err);
565 }
566
567 done:
568 return err;
569 }
570
571
tcp_v6_reqsk_destructor(struct request_sock * req)572 static void tcp_v6_reqsk_destructor(struct request_sock *req)
573 {
574 kfree(inet_rsk(req)->ipv6_opt);
575 consume_skb(inet_rsk(req)->pktopts);
576 }
577
578 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
580 const struct in6_addr *addr,
581 int l3index)
582 {
583 return tcp_md5_do_lookup(sk, l3index,
584 (union tcp_md5_addr *)addr, AF_INET6);
585 }
586
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
588 const struct sock *addr_sk)
589 {
590 int l3index;
591
592 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
593 addr_sk->sk_bound_dev_if);
594 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595 l3index);
596 }
597
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
599 sockptr_t optval, int optlen)
600 {
601 struct tcp_md5sig cmd;
602 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603 int l3index = 0;
604 u8 prefixlen;
605 u8 flags;
606
607 if (optlen < sizeof(cmd))
608 return -EINVAL;
609
610 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
611 return -EFAULT;
612
613 if (sin6->sin6_family != AF_INET6)
614 return -EINVAL;
615
616 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
617
618 if (optname == TCP_MD5SIG_EXT &&
619 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
620 prefixlen = cmd.tcpm_prefixlen;
621 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
622 prefixlen > 32))
623 return -EINVAL;
624 } else {
625 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
626 }
627
628 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
629 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
630 struct net_device *dev;
631
632 rcu_read_lock();
633 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
634 if (dev && netif_is_l3_master(dev))
635 l3index = dev->ifindex;
636 rcu_read_unlock();
637
638 /* ok to reference set/not set outside of rcu;
639 * right now device MUST be an L3 master
640 */
641 if (!dev || !l3index)
642 return -EINVAL;
643 }
644
645 if (!cmd.tcpm_keylen) {
646 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
647 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
648 AF_INET, prefixlen,
649 l3index, flags);
650 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
651 AF_INET6, prefixlen, l3index, flags);
652 }
653
654 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
655 return -EINVAL;
656
657 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
658 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
659 AF_INET, prefixlen, l3index, flags,
660 cmd.tcpm_key, cmd.tcpm_keylen);
661
662 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
663 AF_INET6, prefixlen, l3index, flags,
664 cmd.tcpm_key, cmd.tcpm_keylen);
665 }
666
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)667 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
668 const struct in6_addr *daddr,
669 const struct in6_addr *saddr,
670 const struct tcphdr *th, int nbytes)
671 {
672 struct tcp6_pseudohdr *bp;
673 struct scatterlist sg;
674 struct tcphdr *_th;
675
676 bp = hp->scratch;
677 /* 1. TCP pseudo-header (RFC2460) */
678 bp->saddr = *saddr;
679 bp->daddr = *daddr;
680 bp->protocol = cpu_to_be32(IPPROTO_TCP);
681 bp->len = cpu_to_be32(nbytes);
682
683 _th = (struct tcphdr *)(bp + 1);
684 memcpy(_th, th, sizeof(*th));
685 _th->check = 0;
686
687 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
688 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
689 sizeof(*bp) + sizeof(*th));
690 return crypto_ahash_update(hp->md5_req);
691 }
692
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)693 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
694 const struct in6_addr *daddr, struct in6_addr *saddr,
695 const struct tcphdr *th)
696 {
697 struct tcp_md5sig_pool *hp;
698 struct ahash_request *req;
699
700 hp = tcp_get_md5sig_pool();
701 if (!hp)
702 goto clear_hash_noput;
703 req = hp->md5_req;
704
705 if (crypto_ahash_init(req))
706 goto clear_hash;
707 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
708 goto clear_hash;
709 if (tcp_md5_hash_key(hp, key))
710 goto clear_hash;
711 ahash_request_set_crypt(req, NULL, md5_hash, 0);
712 if (crypto_ahash_final(req))
713 goto clear_hash;
714
715 tcp_put_md5sig_pool();
716 return 0;
717
718 clear_hash:
719 tcp_put_md5sig_pool();
720 clear_hash_noput:
721 memset(md5_hash, 0, 16);
722 return 1;
723 }
724
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)725 static int tcp_v6_md5_hash_skb(char *md5_hash,
726 const struct tcp_md5sig_key *key,
727 const struct sock *sk,
728 const struct sk_buff *skb)
729 {
730 const struct in6_addr *saddr, *daddr;
731 struct tcp_md5sig_pool *hp;
732 struct ahash_request *req;
733 const struct tcphdr *th = tcp_hdr(skb);
734
735 if (sk) { /* valid for establish/request sockets */
736 saddr = &sk->sk_v6_rcv_saddr;
737 daddr = &sk->sk_v6_daddr;
738 } else {
739 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
740 saddr = &ip6h->saddr;
741 daddr = &ip6h->daddr;
742 }
743
744 hp = tcp_get_md5sig_pool();
745 if (!hp)
746 goto clear_hash_noput;
747 req = hp->md5_req;
748
749 if (crypto_ahash_init(req))
750 goto clear_hash;
751
752 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
753 goto clear_hash;
754 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
755 goto clear_hash;
756 if (tcp_md5_hash_key(hp, key))
757 goto clear_hash;
758 ahash_request_set_crypt(req, NULL, md5_hash, 0);
759 if (crypto_ahash_final(req))
760 goto clear_hash;
761
762 tcp_put_md5sig_pool();
763 return 0;
764
765 clear_hash:
766 tcp_put_md5sig_pool();
767 clear_hash_noput:
768 memset(md5_hash, 0, 16);
769 return 1;
770 }
771
772 #endif
773
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)774 static void tcp_v6_init_req(struct request_sock *req,
775 const struct sock *sk_listener,
776 struct sk_buff *skb)
777 {
778 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
779 struct inet_request_sock *ireq = inet_rsk(req);
780 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
781
782 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
783 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
784
785 /* So that link locals have meaning */
786 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
787 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
788 ireq->ir_iif = tcp_v6_iif(skb);
789
790 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
791 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
792 np->rxopt.bits.rxinfo ||
793 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
794 np->rxopt.bits.rxohlim || np->repflow)) {
795 refcount_inc(&skb->users);
796 ireq->pktopts = skb;
797 }
798 }
799
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)800 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
801 struct sk_buff *skb,
802 struct flowi *fl,
803 struct request_sock *req)
804 {
805 tcp_v6_init_req(req, sk, skb);
806
807 if (security_inet_conn_request(sk, skb, req))
808 return NULL;
809
810 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
811 }
812
813 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
814 .family = AF_INET6,
815 .obj_size = sizeof(struct tcp6_request_sock),
816 .rtx_syn_ack = tcp_rtx_synack,
817 .send_ack = tcp_v6_reqsk_send_ack,
818 .destructor = tcp_v6_reqsk_destructor,
819 .send_reset = tcp_v6_send_reset,
820 .syn_ack_timeout = tcp_syn_ack_timeout,
821 };
822
823 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
824 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
825 sizeof(struct ipv6hdr),
826 #ifdef CONFIG_TCP_MD5SIG
827 .req_md5_lookup = tcp_v6_md5_lookup,
828 .calc_md5_hash = tcp_v6_md5_hash_skb,
829 #endif
830 #ifdef CONFIG_SYN_COOKIES
831 .cookie_init_seq = cookie_v6_init_sequence,
832 #endif
833 .route_req = tcp_v6_route_req,
834 .init_seq = tcp_v6_init_seq,
835 .init_ts_off = tcp_v6_init_ts_off,
836 .send_synack = tcp_v6_send_synack,
837 };
838
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash)839 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
840 u32 ack, u32 win, u32 tsval, u32 tsecr,
841 int oif, struct tcp_md5sig_key *key, int rst,
842 u8 tclass, __be32 label, u32 priority, u32 txhash)
843 {
844 const struct tcphdr *th = tcp_hdr(skb);
845 struct tcphdr *t1;
846 struct sk_buff *buff;
847 struct flowi6 fl6;
848 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
849 struct sock *ctl_sk = net->ipv6.tcp_sk;
850 unsigned int tot_len = sizeof(struct tcphdr);
851 __be32 mrst = 0, *topt;
852 struct dst_entry *dst;
853 __u32 mark = 0;
854
855 if (tsecr)
856 tot_len += TCPOLEN_TSTAMP_ALIGNED;
857 #ifdef CONFIG_TCP_MD5SIG
858 if (key)
859 tot_len += TCPOLEN_MD5SIG_ALIGNED;
860 #endif
861
862 #ifdef CONFIG_MPTCP
863 if (rst && !key) {
864 mrst = mptcp_reset_option(skb);
865
866 if (mrst)
867 tot_len += sizeof(__be32);
868 }
869 #endif
870
871 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
872 if (!buff)
873 return;
874
875 skb_reserve(buff, MAX_TCP_HEADER);
876
877 t1 = skb_push(buff, tot_len);
878 skb_reset_transport_header(buff);
879
880 /* Swap the send and the receive. */
881 memset(t1, 0, sizeof(*t1));
882 t1->dest = th->source;
883 t1->source = th->dest;
884 t1->doff = tot_len / 4;
885 t1->seq = htonl(seq);
886 t1->ack_seq = htonl(ack);
887 t1->ack = !rst || !th->ack;
888 t1->rst = rst;
889 t1->window = htons(win);
890
891 topt = (__be32 *)(t1 + 1);
892
893 if (tsecr) {
894 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
895 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
896 *topt++ = htonl(tsval);
897 *topt++ = htonl(tsecr);
898 }
899
900 if (mrst)
901 *topt++ = mrst;
902
903 #ifdef CONFIG_TCP_MD5SIG
904 if (key) {
905 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
906 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
907 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
908 &ipv6_hdr(skb)->saddr,
909 &ipv6_hdr(skb)->daddr, t1);
910 }
911 #endif
912
913 memset(&fl6, 0, sizeof(fl6));
914 fl6.daddr = ipv6_hdr(skb)->saddr;
915 fl6.saddr = ipv6_hdr(skb)->daddr;
916 fl6.flowlabel = label;
917
918 buff->ip_summed = CHECKSUM_PARTIAL;
919
920 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
921
922 fl6.flowi6_proto = IPPROTO_TCP;
923 if (rt6_need_strict(&fl6.daddr) && !oif)
924 fl6.flowi6_oif = tcp_v6_iif(skb);
925 else {
926 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
927 oif = skb->skb_iif;
928
929 fl6.flowi6_oif = oif;
930 }
931
932 if (sk) {
933 if (sk->sk_state == TCP_TIME_WAIT)
934 mark = inet_twsk(sk)->tw_mark;
935 else
936 mark = READ_ONCE(sk->sk_mark);
937 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
938 }
939 if (txhash) {
940 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
941 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
942 }
943 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
944 fl6.fl6_dport = t1->dest;
945 fl6.fl6_sport = t1->source;
946 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
947 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
948
949 /* Pass a socket to ip6_dst_lookup either it is for RST
950 * Underlying function will use this to retrieve the network
951 * namespace
952 */
953 if (sk && sk->sk_state != TCP_TIME_WAIT)
954 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
955 else
956 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
957 if (!IS_ERR(dst)) {
958 skb_dst_set(buff, dst);
959 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
960 tclass & ~INET_ECN_MASK, priority);
961 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
962 if (rst)
963 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
964 return;
965 }
966
967 kfree_skb(buff);
968 }
969
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)970 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
971 {
972 const struct tcphdr *th = tcp_hdr(skb);
973 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
974 u32 seq = 0, ack_seq = 0;
975 struct tcp_md5sig_key *key = NULL;
976 #ifdef CONFIG_TCP_MD5SIG
977 const __u8 *hash_location = NULL;
978 unsigned char newhash[16];
979 int genhash;
980 struct sock *sk1 = NULL;
981 #endif
982 __be32 label = 0;
983 u32 priority = 0;
984 struct net *net;
985 u32 txhash = 0;
986 int oif = 0;
987
988 if (th->rst)
989 return;
990
991 /* If sk not NULL, it means we did a successful lookup and incoming
992 * route had to be correct. prequeue might have dropped our dst.
993 */
994 if (!sk && !ipv6_unicast_destination(skb))
995 return;
996
997 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
998 #ifdef CONFIG_TCP_MD5SIG
999 rcu_read_lock();
1000 hash_location = tcp_parse_md5sig_option(th);
1001 if (sk && sk_fullsock(sk)) {
1002 int l3index;
1003
1004 /* sdif set, means packet ingressed via a device
1005 * in an L3 domain and inet_iif is set to it.
1006 */
1007 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1008 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1009 } else if (hash_location) {
1010 int dif = tcp_v6_iif_l3_slave(skb);
1011 int sdif = tcp_v6_sdif(skb);
1012 int l3index;
1013
1014 /*
1015 * active side is lost. Try to find listening socket through
1016 * source port, and then find md5 key through listening socket.
1017 * we are not loose security here:
1018 * Incoming packet is checked with md5 hash with finding key,
1019 * no RST generated if md5 hash doesn't match.
1020 */
1021 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1022 NULL, 0, &ipv6h->saddr, th->source,
1023 &ipv6h->daddr, ntohs(th->source),
1024 dif, sdif);
1025 if (!sk1)
1026 goto out;
1027
1028 /* sdif set, means packet ingressed via a device
1029 * in an L3 domain and dif is set to it.
1030 */
1031 l3index = tcp_v6_sdif(skb) ? dif : 0;
1032
1033 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1034 if (!key)
1035 goto out;
1036
1037 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1038 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1039 goto out;
1040 }
1041 #endif
1042
1043 if (th->ack)
1044 seq = ntohl(th->ack_seq);
1045 else
1046 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1047 (th->doff << 2);
1048
1049 if (sk) {
1050 oif = sk->sk_bound_dev_if;
1051 if (sk_fullsock(sk)) {
1052 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1053
1054 trace_tcp_send_reset(sk, skb);
1055 if (np->repflow)
1056 label = ip6_flowlabel(ipv6h);
1057 priority = sk->sk_priority;
1058 txhash = sk->sk_txhash;
1059 }
1060 if (sk->sk_state == TCP_TIME_WAIT) {
1061 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1062 priority = inet_twsk(sk)->tw_priority;
1063 txhash = inet_twsk(sk)->tw_txhash;
1064 }
1065 } else {
1066 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1067 label = ip6_flowlabel(ipv6h);
1068 }
1069
1070 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1071 ipv6_get_dsfield(ipv6h), label, priority, txhash);
1072
1073 #ifdef CONFIG_TCP_MD5SIG
1074 out:
1075 rcu_read_unlock();
1076 #endif
1077 }
1078
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1079 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1080 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1081 struct tcp_md5sig_key *key, u8 tclass,
1082 __be32 label, u32 priority, u32 txhash)
1083 {
1084 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1085 tclass, label, priority, txhash);
1086 }
1087
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1088 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1089 {
1090 struct inet_timewait_sock *tw = inet_twsk(sk);
1091 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1092
1093 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1094 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1095 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1096 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1097 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1098 tw->tw_txhash);
1099
1100 inet_twsk_put(tw);
1101 }
1102
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1103 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1104 struct request_sock *req)
1105 {
1106 int l3index;
1107
1108 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1109
1110 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1111 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1112 */
1113 /* RFC 7323 2.3
1114 * The window field (SEG.WND) of every outgoing segment, with the
1115 * exception of <SYN> segments, MUST be right-shifted by
1116 * Rcv.Wind.Shift bits:
1117 */
1118 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1119 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1120 tcp_rsk(req)->rcv_nxt,
1121 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1122 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1123 READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1124 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1125 ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1126 READ_ONCE(sk->sk_priority),
1127 READ_ONCE(tcp_rsk(req)->txhash));
1128 }
1129
1130
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1131 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1132 {
1133 #ifdef CONFIG_SYN_COOKIES
1134 const struct tcphdr *th = tcp_hdr(skb);
1135
1136 if (!th->syn)
1137 sk = cookie_v6_check(sk, skb);
1138 #endif
1139 return sk;
1140 }
1141
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1142 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1143 struct tcphdr *th, u32 *cookie)
1144 {
1145 u16 mss = 0;
1146 #ifdef CONFIG_SYN_COOKIES
1147 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1148 &tcp_request_sock_ipv6_ops, sk, th);
1149 if (mss) {
1150 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1151 tcp_synq_overflow(sk);
1152 }
1153 #endif
1154 return mss;
1155 }
1156
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1157 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1158 {
1159 if (skb->protocol == htons(ETH_P_IP))
1160 return tcp_v4_conn_request(sk, skb);
1161
1162 if (!ipv6_unicast_destination(skb))
1163 goto drop;
1164
1165 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1166 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1167 return 0;
1168 }
1169
1170 return tcp_conn_request(&tcp6_request_sock_ops,
1171 &tcp_request_sock_ipv6_ops, sk, skb);
1172
1173 drop:
1174 tcp_listendrop(sk);
1175 return 0; /* don't send reset */
1176 }
1177
tcp_v6_restore_cb(struct sk_buff * skb)1178 static void tcp_v6_restore_cb(struct sk_buff *skb)
1179 {
1180 /* We need to move header back to the beginning if xfrm6_policy_check()
1181 * and tcp_v6_fill_cb() are going to be called again.
1182 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1183 */
1184 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1185 sizeof(struct inet6_skb_parm));
1186 }
1187
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1188 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1189 struct request_sock *req,
1190 struct dst_entry *dst,
1191 struct request_sock *req_unhash,
1192 bool *own_req)
1193 {
1194 struct inet_request_sock *ireq;
1195 struct ipv6_pinfo *newnp;
1196 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1197 struct ipv6_txoptions *opt;
1198 struct inet_sock *newinet;
1199 bool found_dup_sk = false;
1200 struct tcp_sock *newtp;
1201 struct sock *newsk;
1202 #ifdef CONFIG_TCP_MD5SIG
1203 struct tcp_md5sig_key *key;
1204 int l3index;
1205 #endif
1206 struct flowi6 fl6;
1207
1208 if (skb->protocol == htons(ETH_P_IP)) {
1209 /*
1210 * v6 mapped
1211 */
1212
1213 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1214 req_unhash, own_req);
1215
1216 if (!newsk)
1217 return NULL;
1218
1219 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1220
1221 newnp = tcp_inet6_sk(newsk);
1222 newtp = tcp_sk(newsk);
1223
1224 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1225
1226 newnp->saddr = newsk->sk_v6_rcv_saddr;
1227
1228 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1229 if (sk_is_mptcp(newsk))
1230 mptcpv6_handle_mapped(newsk, true);
1231 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1232 #ifdef CONFIG_TCP_MD5SIG
1233 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1234 #endif
1235
1236 newnp->ipv6_mc_list = NULL;
1237 newnp->ipv6_ac_list = NULL;
1238 newnp->ipv6_fl_list = NULL;
1239 newnp->pktoptions = NULL;
1240 newnp->opt = NULL;
1241 newnp->mcast_oif = inet_iif(skb);
1242 newnp->mcast_hops = ip_hdr(skb)->ttl;
1243 newnp->rcv_flowinfo = 0;
1244 if (np->repflow)
1245 newnp->flow_label = 0;
1246
1247 /*
1248 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1249 * here, tcp_create_openreq_child now does this for us, see the comment in
1250 * that function for the gory details. -acme
1251 */
1252
1253 /* It is tricky place. Until this moment IPv4 tcp
1254 worked with IPv6 icsk.icsk_af_ops.
1255 Sync it now.
1256 */
1257 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1258
1259 return newsk;
1260 }
1261
1262 ireq = inet_rsk(req);
1263
1264 if (sk_acceptq_is_full(sk))
1265 goto out_overflow;
1266
1267 if (!dst) {
1268 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1269 if (!dst)
1270 goto out;
1271 }
1272
1273 newsk = tcp_create_openreq_child(sk, req, skb);
1274 if (!newsk)
1275 goto out_nonewsk;
1276
1277 /*
1278 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1279 * count here, tcp_create_openreq_child now does this for us, see the
1280 * comment in that function for the gory details. -acme
1281 */
1282
1283 newsk->sk_gso_type = SKB_GSO_TCPV6;
1284 inet6_sk_rx_dst_set(newsk, skb);
1285
1286 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1287
1288 newtp = tcp_sk(newsk);
1289 newinet = inet_sk(newsk);
1290 newnp = tcp_inet6_sk(newsk);
1291
1292 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1293
1294 ip6_dst_store(newsk, dst, NULL, NULL);
1295
1296 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1297 newnp->saddr = ireq->ir_v6_loc_addr;
1298 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1299 newsk->sk_bound_dev_if = ireq->ir_iif;
1300
1301 /* Now IPv6 options...
1302
1303 First: no IPv4 options.
1304 */
1305 newinet->inet_opt = NULL;
1306 newnp->ipv6_mc_list = NULL;
1307 newnp->ipv6_ac_list = NULL;
1308 newnp->ipv6_fl_list = NULL;
1309
1310 /* Clone RX bits */
1311 newnp->rxopt.all = np->rxopt.all;
1312
1313 newnp->pktoptions = NULL;
1314 newnp->opt = NULL;
1315 newnp->mcast_oif = tcp_v6_iif(skb);
1316 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1317 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1318 if (np->repflow)
1319 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1320
1321 /* Set ToS of the new socket based upon the value of incoming SYN.
1322 * ECT bits are set later in tcp_init_transfer().
1323 */
1324 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1325 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1326
1327 /* Clone native IPv6 options from listening socket (if any)
1328
1329 Yes, keeping reference count would be much more clever,
1330 but we make one more one thing there: reattach optmem
1331 to newsk.
1332 */
1333 opt = ireq->ipv6_opt;
1334 if (!opt)
1335 opt = rcu_dereference(np->opt);
1336 if (opt) {
1337 opt = ipv6_dup_options(newsk, opt);
1338 RCU_INIT_POINTER(newnp->opt, opt);
1339 }
1340 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1341 if (opt)
1342 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1343 opt->opt_flen;
1344
1345 tcp_ca_openreq_child(newsk, dst);
1346
1347 tcp_sync_mss(newsk, dst_mtu(dst));
1348 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1349
1350 tcp_initialize_rcv_mss(newsk);
1351
1352 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1353 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1354
1355 #ifdef CONFIG_TCP_MD5SIG
1356 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1357
1358 /* Copy over the MD5 key from the original socket */
1359 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1360 if (key) {
1361 const union tcp_md5_addr *addr;
1362
1363 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1364 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1365 inet_csk_prepare_forced_close(newsk);
1366 tcp_done(newsk);
1367 goto out;
1368 }
1369 }
1370 #endif
1371
1372 if (__inet_inherit_port(sk, newsk) < 0) {
1373 inet_csk_prepare_forced_close(newsk);
1374 tcp_done(newsk);
1375 goto out;
1376 }
1377 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1378 &found_dup_sk);
1379 if (*own_req) {
1380 tcp_move_syn(newtp, req);
1381
1382 /* Clone pktoptions received with SYN, if we own the req */
1383 if (ireq->pktopts) {
1384 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1385 consume_skb(ireq->pktopts);
1386 ireq->pktopts = NULL;
1387 if (newnp->pktoptions)
1388 tcp_v6_restore_cb(newnp->pktoptions);
1389 }
1390 } else {
1391 if (!req_unhash && found_dup_sk) {
1392 /* This code path should only be executed in the
1393 * syncookie case only
1394 */
1395 bh_unlock_sock(newsk);
1396 sock_put(newsk);
1397 newsk = NULL;
1398 }
1399 }
1400
1401 return newsk;
1402
1403 out_overflow:
1404 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1405 out_nonewsk:
1406 dst_release(dst);
1407 out:
1408 tcp_listendrop(sk);
1409 return NULL;
1410 }
1411
1412 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1413 u32));
1414 /* The socket must have it's spinlock held when we get
1415 * here, unless it is a TCP_LISTEN socket.
1416 *
1417 * We have a potential double-lock case here, so even when
1418 * doing backlog processing we use the BH locking scheme.
1419 * This is because we cannot sleep with the original spinlock
1420 * held.
1421 */
1422 INDIRECT_CALLABLE_SCOPE
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1423 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1424 {
1425 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1426 struct sk_buff *opt_skb = NULL;
1427 enum skb_drop_reason reason;
1428 struct tcp_sock *tp;
1429
1430 /* Imagine: socket is IPv6. IPv4 packet arrives,
1431 goes to IPv4 receive handler and backlogged.
1432 From backlog it always goes here. Kerboom...
1433 Fortunately, tcp_rcv_established and rcv_established
1434 handle them correctly, but it is not case with
1435 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1436 */
1437
1438 if (skb->protocol == htons(ETH_P_IP))
1439 return tcp_v4_do_rcv(sk, skb);
1440
1441 /*
1442 * socket locking is here for SMP purposes as backlog rcv
1443 * is currently called with bh processing disabled.
1444 */
1445
1446 /* Do Stevens' IPV6_PKTOPTIONS.
1447
1448 Yes, guys, it is the only place in our code, where we
1449 may make it not affecting IPv4.
1450 The rest of code is protocol independent,
1451 and I do not like idea to uglify IPv4.
1452
1453 Actually, all the idea behind IPV6_PKTOPTIONS
1454 looks not very well thought. For now we latch
1455 options, received in the last packet, enqueued
1456 by tcp. Feel free to propose better solution.
1457 --ANK (980728)
1458 */
1459 if (np->rxopt.all)
1460 opt_skb = skb_clone_and_charge_r(skb, sk);
1461
1462 reason = SKB_DROP_REASON_NOT_SPECIFIED;
1463 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1464 struct dst_entry *dst;
1465
1466 dst = rcu_dereference_protected(sk->sk_rx_dst,
1467 lockdep_sock_is_held(sk));
1468
1469 sock_rps_save_rxhash(sk, skb);
1470 sk_mark_napi_id(sk, skb);
1471 if (dst) {
1472 if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1473 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1474 dst, sk->sk_rx_dst_cookie) == NULL) {
1475 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1476 dst_release(dst);
1477 }
1478 }
1479
1480 tcp_rcv_established(sk, skb);
1481 if (opt_skb)
1482 goto ipv6_pktoptions;
1483 return 0;
1484 }
1485
1486 if (tcp_checksum_complete(skb))
1487 goto csum_err;
1488
1489 if (sk->sk_state == TCP_LISTEN) {
1490 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1491
1492 if (!nsk)
1493 goto discard;
1494
1495 if (nsk != sk) {
1496 if (tcp_child_process(sk, nsk, skb))
1497 goto reset;
1498 if (opt_skb)
1499 __kfree_skb(opt_skb);
1500 return 0;
1501 }
1502 } else
1503 sock_rps_save_rxhash(sk, skb);
1504
1505 if (tcp_rcv_state_process(sk, skb))
1506 goto reset;
1507 if (opt_skb)
1508 goto ipv6_pktoptions;
1509 return 0;
1510
1511 reset:
1512 tcp_v6_send_reset(sk, skb);
1513 discard:
1514 if (opt_skb)
1515 __kfree_skb(opt_skb);
1516 kfree_skb_reason(skb, reason);
1517 return 0;
1518 csum_err:
1519 reason = SKB_DROP_REASON_TCP_CSUM;
1520 trace_tcp_bad_csum(skb);
1521 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1522 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1523 goto discard;
1524
1525
1526 ipv6_pktoptions:
1527 /* Do you ask, what is it?
1528
1529 1. skb was enqueued by tcp.
1530 2. skb is added to tail of read queue, rather than out of order.
1531 3. socket is not in passive state.
1532 4. Finally, it really contains options, which user wants to receive.
1533 */
1534 tp = tcp_sk(sk);
1535 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1536 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1537 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1538 np->mcast_oif = tcp_v6_iif(opt_skb);
1539 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1540 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1541 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1542 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1543 if (np->repflow)
1544 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1545 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1546 tcp_v6_restore_cb(opt_skb);
1547 opt_skb = xchg(&np->pktoptions, opt_skb);
1548 } else {
1549 __kfree_skb(opt_skb);
1550 opt_skb = xchg(&np->pktoptions, NULL);
1551 }
1552 }
1553
1554 consume_skb(opt_skb);
1555 return 0;
1556 }
1557
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1558 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1559 const struct tcphdr *th)
1560 {
1561 /* This is tricky: we move IP6CB at its correct location into
1562 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1563 * _decode_session6() uses IP6CB().
1564 * barrier() makes sure compiler won't play aliasing games.
1565 */
1566 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1567 sizeof(struct inet6_skb_parm));
1568 barrier();
1569
1570 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1571 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1572 skb->len - th->doff*4);
1573 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1574 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1575 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1576 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1577 TCP_SKB_CB(skb)->sacked = 0;
1578 TCP_SKB_CB(skb)->has_rxtstamp =
1579 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1580 }
1581
tcp_v6_rcv(struct sk_buff * skb)1582 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1583 {
1584 enum skb_drop_reason drop_reason;
1585 int sdif = inet6_sdif(skb);
1586 int dif = inet6_iif(skb);
1587 const struct tcphdr *th;
1588 const struct ipv6hdr *hdr;
1589 bool refcounted;
1590 struct sock *sk;
1591 int ret;
1592 struct net *net = dev_net(skb->dev);
1593
1594 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1595 if (skb->pkt_type != PACKET_HOST)
1596 goto discard_it;
1597
1598 /*
1599 * Count it even if it's bad.
1600 */
1601 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1602
1603 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1604 goto discard_it;
1605
1606 th = (const struct tcphdr *)skb->data;
1607
1608 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1609 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1610 goto bad_packet;
1611 }
1612 if (!pskb_may_pull(skb, th->doff*4))
1613 goto discard_it;
1614
1615 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1616 goto csum_error;
1617
1618 th = (const struct tcphdr *)skb->data;
1619 hdr = ipv6_hdr(skb);
1620
1621 lookup:
1622 sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1623 th->source, th->dest, inet6_iif(skb), sdif,
1624 &refcounted);
1625 if (!sk)
1626 goto no_tcp_socket;
1627
1628 process:
1629 if (sk->sk_state == TCP_TIME_WAIT)
1630 goto do_time_wait;
1631
1632 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1633 struct request_sock *req = inet_reqsk(sk);
1634 bool req_stolen = false;
1635 struct sock *nsk;
1636
1637 sk = req->rsk_listener;
1638 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1639 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1640 else
1641 drop_reason = tcp_inbound_md5_hash(sk, skb,
1642 &hdr->saddr, &hdr->daddr,
1643 AF_INET6, dif, sdif);
1644 if (drop_reason) {
1645 sk_drops_add(sk, skb);
1646 reqsk_put(req);
1647 goto discard_it;
1648 }
1649 if (tcp_checksum_complete(skb)) {
1650 reqsk_put(req);
1651 goto csum_error;
1652 }
1653 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1654 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1655 if (!nsk) {
1656 inet_csk_reqsk_queue_drop_and_put(sk, req);
1657 goto lookup;
1658 }
1659 sk = nsk;
1660 /* reuseport_migrate_sock() has already held one sk_refcnt
1661 * before returning.
1662 */
1663 } else {
1664 sock_hold(sk);
1665 }
1666 refcounted = true;
1667 nsk = NULL;
1668 if (!tcp_filter(sk, skb)) {
1669 th = (const struct tcphdr *)skb->data;
1670 hdr = ipv6_hdr(skb);
1671 tcp_v6_fill_cb(skb, hdr, th);
1672 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1673 } else {
1674 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1675 }
1676 if (!nsk) {
1677 reqsk_put(req);
1678 if (req_stolen) {
1679 /* Another cpu got exclusive access to req
1680 * and created a full blown socket.
1681 * Try to feed this packet to this socket
1682 * instead of discarding it.
1683 */
1684 tcp_v6_restore_cb(skb);
1685 sock_put(sk);
1686 goto lookup;
1687 }
1688 goto discard_and_relse;
1689 }
1690 nf_reset_ct(skb);
1691 if (nsk == sk) {
1692 reqsk_put(req);
1693 tcp_v6_restore_cb(skb);
1694 } else if (tcp_child_process(sk, nsk, skb)) {
1695 tcp_v6_send_reset(nsk, skb);
1696 goto discard_and_relse;
1697 } else {
1698 sock_put(sk);
1699 return 0;
1700 }
1701 }
1702
1703 if (static_branch_unlikely(&ip6_min_hopcount)) {
1704 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1705 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1706 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1707 drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1708 goto discard_and_relse;
1709 }
1710 }
1711
1712 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1713 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1714 goto discard_and_relse;
1715 }
1716
1717 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1718 AF_INET6, dif, sdif);
1719 if (drop_reason)
1720 goto discard_and_relse;
1721
1722 nf_reset_ct(skb);
1723
1724 if (tcp_filter(sk, skb)) {
1725 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1726 goto discard_and_relse;
1727 }
1728 th = (const struct tcphdr *)skb->data;
1729 hdr = ipv6_hdr(skb);
1730 tcp_v6_fill_cb(skb, hdr, th);
1731
1732 skb->dev = NULL;
1733
1734 if (sk->sk_state == TCP_LISTEN) {
1735 ret = tcp_v6_do_rcv(sk, skb);
1736 goto put_and_return;
1737 }
1738
1739 sk_incoming_cpu_update(sk);
1740
1741 bh_lock_sock_nested(sk);
1742 tcp_segs_in(tcp_sk(sk), skb);
1743 ret = 0;
1744 if (!sock_owned_by_user(sk)) {
1745 ret = tcp_v6_do_rcv(sk, skb);
1746 } else {
1747 if (tcp_add_backlog(sk, skb, &drop_reason))
1748 goto discard_and_relse;
1749 }
1750 bh_unlock_sock(sk);
1751 put_and_return:
1752 if (refcounted)
1753 sock_put(sk);
1754 return ret ? -1 : 0;
1755
1756 no_tcp_socket:
1757 drop_reason = SKB_DROP_REASON_NO_SOCKET;
1758 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1759 goto discard_it;
1760
1761 tcp_v6_fill_cb(skb, hdr, th);
1762
1763 if (tcp_checksum_complete(skb)) {
1764 csum_error:
1765 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1766 trace_tcp_bad_csum(skb);
1767 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1768 bad_packet:
1769 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1770 } else {
1771 tcp_v6_send_reset(NULL, skb);
1772 }
1773
1774 discard_it:
1775 SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1776 kfree_skb_reason(skb, drop_reason);
1777 return 0;
1778
1779 discard_and_relse:
1780 sk_drops_add(sk, skb);
1781 if (refcounted)
1782 sock_put(sk);
1783 goto discard_it;
1784
1785 do_time_wait:
1786 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1787 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1788 inet_twsk_put(inet_twsk(sk));
1789 goto discard_it;
1790 }
1791
1792 tcp_v6_fill_cb(skb, hdr, th);
1793
1794 if (tcp_checksum_complete(skb)) {
1795 inet_twsk_put(inet_twsk(sk));
1796 goto csum_error;
1797 }
1798
1799 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1800 case TCP_TW_SYN:
1801 {
1802 struct sock *sk2;
1803
1804 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1805 skb, __tcp_hdrlen(th),
1806 &ipv6_hdr(skb)->saddr, th->source,
1807 &ipv6_hdr(skb)->daddr,
1808 ntohs(th->dest),
1809 tcp_v6_iif_l3_slave(skb),
1810 sdif);
1811 if (sk2) {
1812 struct inet_timewait_sock *tw = inet_twsk(sk);
1813 inet_twsk_deschedule_put(tw);
1814 sk = sk2;
1815 tcp_v6_restore_cb(skb);
1816 refcounted = false;
1817 goto process;
1818 }
1819 }
1820 /* to ACK */
1821 fallthrough;
1822 case TCP_TW_ACK:
1823 tcp_v6_timewait_ack(sk, skb);
1824 break;
1825 case TCP_TW_RST:
1826 tcp_v6_send_reset(sk, skb);
1827 inet_twsk_deschedule_put(inet_twsk(sk));
1828 goto discard_it;
1829 case TCP_TW_SUCCESS:
1830 ;
1831 }
1832 goto discard_it;
1833 }
1834
tcp_v6_early_demux(struct sk_buff * skb)1835 void tcp_v6_early_demux(struct sk_buff *skb)
1836 {
1837 struct net *net = dev_net(skb->dev);
1838 const struct ipv6hdr *hdr;
1839 const struct tcphdr *th;
1840 struct sock *sk;
1841
1842 if (skb->pkt_type != PACKET_HOST)
1843 return;
1844
1845 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1846 return;
1847
1848 hdr = ipv6_hdr(skb);
1849 th = tcp_hdr(skb);
1850
1851 if (th->doff < sizeof(struct tcphdr) / 4)
1852 return;
1853
1854 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1855 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1856 &hdr->saddr, th->source,
1857 &hdr->daddr, ntohs(th->dest),
1858 inet6_iif(skb), inet6_sdif(skb));
1859 if (sk) {
1860 skb->sk = sk;
1861 skb->destructor = sock_edemux;
1862 if (sk_fullsock(sk)) {
1863 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1864
1865 if (dst)
1866 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1867 if (dst &&
1868 sk->sk_rx_dst_ifindex == skb->skb_iif)
1869 skb_dst_set_noref(skb, dst);
1870 }
1871 }
1872 }
1873
1874 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1875 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1876 .twsk_unique = tcp_twsk_unique,
1877 .twsk_destructor = tcp_twsk_destructor,
1878 };
1879
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1880 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1881 {
1882 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1883 }
1884
1885 const struct inet_connection_sock_af_ops ipv6_specific = {
1886 .queue_xmit = inet6_csk_xmit,
1887 .send_check = tcp_v6_send_check,
1888 .rebuild_header = inet6_sk_rebuild_header,
1889 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1890 .conn_request = tcp_v6_conn_request,
1891 .syn_recv_sock = tcp_v6_syn_recv_sock,
1892 .net_header_len = sizeof(struct ipv6hdr),
1893 .net_frag_header_len = sizeof(struct frag_hdr),
1894 .setsockopt = ipv6_setsockopt,
1895 .getsockopt = ipv6_getsockopt,
1896 .addr2sockaddr = inet6_csk_addr2sockaddr,
1897 .sockaddr_len = sizeof(struct sockaddr_in6),
1898 .mtu_reduced = tcp_v6_mtu_reduced,
1899 };
1900
1901 #ifdef CONFIG_TCP_MD5SIG
1902 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1903 .md5_lookup = tcp_v6_md5_lookup,
1904 .calc_md5_hash = tcp_v6_md5_hash_skb,
1905 .md5_parse = tcp_v6_parse_md5_keys,
1906 };
1907 #endif
1908
1909 /*
1910 * TCP over IPv4 via INET6 API
1911 */
1912 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1913 .queue_xmit = ip_queue_xmit,
1914 .send_check = tcp_v4_send_check,
1915 .rebuild_header = inet_sk_rebuild_header,
1916 .sk_rx_dst_set = inet_sk_rx_dst_set,
1917 .conn_request = tcp_v6_conn_request,
1918 .syn_recv_sock = tcp_v6_syn_recv_sock,
1919 .net_header_len = sizeof(struct iphdr),
1920 .setsockopt = ipv6_setsockopt,
1921 .getsockopt = ipv6_getsockopt,
1922 .addr2sockaddr = inet6_csk_addr2sockaddr,
1923 .sockaddr_len = sizeof(struct sockaddr_in6),
1924 .mtu_reduced = tcp_v4_mtu_reduced,
1925 };
1926
1927 #ifdef CONFIG_TCP_MD5SIG
1928 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1929 .md5_lookup = tcp_v4_md5_lookup,
1930 .calc_md5_hash = tcp_v4_md5_hash_skb,
1931 .md5_parse = tcp_v6_parse_md5_keys,
1932 };
1933 #endif
1934
1935 /* NOTE: A lot of things set to zero explicitly by call to
1936 * sk_alloc() so need not be done here.
1937 */
tcp_v6_init_sock(struct sock * sk)1938 static int tcp_v6_init_sock(struct sock *sk)
1939 {
1940 struct inet_connection_sock *icsk = inet_csk(sk);
1941
1942 tcp_init_sock(sk);
1943
1944 icsk->icsk_af_ops = &ipv6_specific;
1945
1946 #ifdef CONFIG_TCP_MD5SIG
1947 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1948 #endif
1949
1950 return 0;
1951 }
1952
1953 #ifdef CONFIG_PROC_FS
1954 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1955 static void get_openreq6(struct seq_file *seq,
1956 const struct request_sock *req, int i)
1957 {
1958 long ttd = req->rsk_timer.expires - jiffies;
1959 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1960 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1961
1962 if (ttd < 0)
1963 ttd = 0;
1964
1965 seq_printf(seq,
1966 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1967 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1968 i,
1969 src->s6_addr32[0], src->s6_addr32[1],
1970 src->s6_addr32[2], src->s6_addr32[3],
1971 inet_rsk(req)->ir_num,
1972 dest->s6_addr32[0], dest->s6_addr32[1],
1973 dest->s6_addr32[2], dest->s6_addr32[3],
1974 ntohs(inet_rsk(req)->ir_rmt_port),
1975 TCP_SYN_RECV,
1976 0, 0, /* could print option size, but that is af dependent. */
1977 1, /* timers active (only the expire timer) */
1978 jiffies_to_clock_t(ttd),
1979 req->num_timeout,
1980 from_kuid_munged(seq_user_ns(seq),
1981 sock_i_uid(req->rsk_listener)),
1982 0, /* non standard timer */
1983 0, /* open_requests have no inode */
1984 0, req);
1985 }
1986
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1987 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1988 {
1989 const struct in6_addr *dest, *src;
1990 __u16 destp, srcp;
1991 int timer_active;
1992 unsigned long timer_expires;
1993 const struct inet_sock *inet = inet_sk(sp);
1994 const struct tcp_sock *tp = tcp_sk(sp);
1995 const struct inet_connection_sock *icsk = inet_csk(sp);
1996 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1997 int rx_queue;
1998 int state;
1999
2000 dest = &sp->sk_v6_daddr;
2001 src = &sp->sk_v6_rcv_saddr;
2002 destp = ntohs(inet->inet_dport);
2003 srcp = ntohs(inet->inet_sport);
2004
2005 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2006 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2007 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2008 timer_active = 1;
2009 timer_expires = icsk->icsk_timeout;
2010 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2011 timer_active = 4;
2012 timer_expires = icsk->icsk_timeout;
2013 } else if (timer_pending(&sp->sk_timer)) {
2014 timer_active = 2;
2015 timer_expires = sp->sk_timer.expires;
2016 } else {
2017 timer_active = 0;
2018 timer_expires = jiffies;
2019 }
2020
2021 state = inet_sk_state_load(sp);
2022 if (state == TCP_LISTEN)
2023 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2024 else
2025 /* Because we don't lock the socket,
2026 * we might find a transient negative value.
2027 */
2028 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2029 READ_ONCE(tp->copied_seq), 0);
2030
2031 seq_printf(seq,
2032 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2033 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2034 i,
2035 src->s6_addr32[0], src->s6_addr32[1],
2036 src->s6_addr32[2], src->s6_addr32[3], srcp,
2037 dest->s6_addr32[0], dest->s6_addr32[1],
2038 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2039 state,
2040 READ_ONCE(tp->write_seq) - tp->snd_una,
2041 rx_queue,
2042 timer_active,
2043 jiffies_delta_to_clock_t(timer_expires - jiffies),
2044 icsk->icsk_retransmits,
2045 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2046 icsk->icsk_probes_out,
2047 sock_i_ino(sp),
2048 refcount_read(&sp->sk_refcnt), sp,
2049 jiffies_to_clock_t(icsk->icsk_rto),
2050 jiffies_to_clock_t(icsk->icsk_ack.ato),
2051 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2052 tcp_snd_cwnd(tp),
2053 state == TCP_LISTEN ?
2054 fastopenq->max_qlen :
2055 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2056 );
2057 }
2058
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2059 static void get_timewait6_sock(struct seq_file *seq,
2060 struct inet_timewait_sock *tw, int i)
2061 {
2062 long delta = tw->tw_timer.expires - jiffies;
2063 const struct in6_addr *dest, *src;
2064 __u16 destp, srcp;
2065
2066 dest = &tw->tw_v6_daddr;
2067 src = &tw->tw_v6_rcv_saddr;
2068 destp = ntohs(tw->tw_dport);
2069 srcp = ntohs(tw->tw_sport);
2070
2071 seq_printf(seq,
2072 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2073 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2074 i,
2075 src->s6_addr32[0], src->s6_addr32[1],
2076 src->s6_addr32[2], src->s6_addr32[3], srcp,
2077 dest->s6_addr32[0], dest->s6_addr32[1],
2078 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2079 tw->tw_substate, 0, 0,
2080 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2081 refcount_read(&tw->tw_refcnt), tw);
2082 }
2083
tcp6_seq_show(struct seq_file * seq,void * v)2084 static int tcp6_seq_show(struct seq_file *seq, void *v)
2085 {
2086 struct tcp_iter_state *st;
2087 struct sock *sk = v;
2088
2089 if (v == SEQ_START_TOKEN) {
2090 seq_puts(seq,
2091 " sl "
2092 "local_address "
2093 "remote_address "
2094 "st tx_queue rx_queue tr tm->when retrnsmt"
2095 " uid timeout inode\n");
2096 goto out;
2097 }
2098 st = seq->private;
2099
2100 if (sk->sk_state == TCP_TIME_WAIT)
2101 get_timewait6_sock(seq, v, st->num);
2102 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2103 get_openreq6(seq, v, st->num);
2104 else
2105 get_tcp6_sock(seq, v, st->num);
2106 out:
2107 return 0;
2108 }
2109
2110 static const struct seq_operations tcp6_seq_ops = {
2111 .show = tcp6_seq_show,
2112 .start = tcp_seq_start,
2113 .next = tcp_seq_next,
2114 .stop = tcp_seq_stop,
2115 };
2116
2117 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2118 .family = AF_INET6,
2119 };
2120
tcp6_proc_init(struct net * net)2121 int __net_init tcp6_proc_init(struct net *net)
2122 {
2123 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2124 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2125 return -ENOMEM;
2126 return 0;
2127 }
2128
tcp6_proc_exit(struct net * net)2129 void tcp6_proc_exit(struct net *net)
2130 {
2131 remove_proc_entry("tcp6", net->proc_net);
2132 }
2133 #endif
2134
2135 struct proto tcpv6_prot = {
2136 .name = "TCPv6",
2137 .owner = THIS_MODULE,
2138 .close = tcp_close,
2139 .pre_connect = tcp_v6_pre_connect,
2140 .connect = tcp_v6_connect,
2141 .disconnect = tcp_disconnect,
2142 .accept = inet_csk_accept,
2143 .ioctl = tcp_ioctl,
2144 .init = tcp_v6_init_sock,
2145 .destroy = tcp_v4_destroy_sock,
2146 .shutdown = tcp_shutdown,
2147 .setsockopt = tcp_setsockopt,
2148 .getsockopt = tcp_getsockopt,
2149 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2150 .keepalive = tcp_set_keepalive,
2151 .recvmsg = tcp_recvmsg,
2152 .sendmsg = tcp_sendmsg,
2153 .splice_eof = tcp_splice_eof,
2154 .backlog_rcv = tcp_v6_do_rcv,
2155 .release_cb = tcp_release_cb,
2156 .hash = inet6_hash,
2157 .unhash = inet_unhash,
2158 .get_port = inet_csk_get_port,
2159 .put_port = inet_put_port,
2160 #ifdef CONFIG_BPF_SYSCALL
2161 .psock_update_sk_prot = tcp_bpf_update_proto,
2162 #endif
2163 .enter_memory_pressure = tcp_enter_memory_pressure,
2164 .leave_memory_pressure = tcp_leave_memory_pressure,
2165 .stream_memory_free = tcp_stream_memory_free,
2166 .sockets_allocated = &tcp_sockets_allocated,
2167
2168 .memory_allocated = &tcp_memory_allocated,
2169 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
2170
2171 .memory_pressure = &tcp_memory_pressure,
2172 .orphan_count = &tcp_orphan_count,
2173 .sysctl_mem = sysctl_tcp_mem,
2174 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2175 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2176 .max_header = MAX_TCP_HEADER,
2177 .obj_size = sizeof(struct tcp6_sock),
2178 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2179 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2180 .twsk_prot = &tcp6_timewait_sock_ops,
2181 .rsk_prot = &tcp6_request_sock_ops,
2182 .h.hashinfo = NULL,
2183 .no_autobind = true,
2184 .diag_destroy = tcp_abort,
2185 };
2186 EXPORT_SYMBOL_GPL(tcpv6_prot);
2187
2188 static const struct inet6_protocol tcpv6_protocol = {
2189 .handler = tcp_v6_rcv,
2190 .err_handler = tcp_v6_err,
2191 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2192 };
2193
2194 static struct inet_protosw tcpv6_protosw = {
2195 .type = SOCK_STREAM,
2196 .protocol = IPPROTO_TCP,
2197 .prot = &tcpv6_prot,
2198 .ops = &inet6_stream_ops,
2199 .flags = INET_PROTOSW_PERMANENT |
2200 INET_PROTOSW_ICSK,
2201 };
2202
tcpv6_net_init(struct net * net)2203 static int __net_init tcpv6_net_init(struct net *net)
2204 {
2205 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2206 SOCK_RAW, IPPROTO_TCP, net);
2207 }
2208
tcpv6_net_exit(struct net * net)2209 static void __net_exit tcpv6_net_exit(struct net *net)
2210 {
2211 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2212 }
2213
2214 static struct pernet_operations tcpv6_net_ops = {
2215 .init = tcpv6_net_init,
2216 .exit = tcpv6_net_exit,
2217 };
2218
tcpv6_init(void)2219 int __init tcpv6_init(void)
2220 {
2221 int ret;
2222
2223 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2224 if (ret)
2225 goto out;
2226
2227 /* register inet6 protocol */
2228 ret = inet6_register_protosw(&tcpv6_protosw);
2229 if (ret)
2230 goto out_tcpv6_protocol;
2231
2232 ret = register_pernet_subsys(&tcpv6_net_ops);
2233 if (ret)
2234 goto out_tcpv6_protosw;
2235
2236 ret = mptcpv6_init();
2237 if (ret)
2238 goto out_tcpv6_pernet_subsys;
2239
2240 out:
2241 return ret;
2242
2243 out_tcpv6_pernet_subsys:
2244 unregister_pernet_subsys(&tcpv6_net_ops);
2245 out_tcpv6_protosw:
2246 inet6_unregister_protosw(&tcpv6_protosw);
2247 out_tcpv6_protocol:
2248 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2249 goto out;
2250 }
2251
tcpv6_exit(void)2252 void tcpv6_exit(void)
2253 {
2254 unregister_pernet_subsys(&tcpv6_net_ops);
2255 inet6_unregister_protosw(&tcpv6_protosw);
2256 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2257 }
2258