tcp_ipv4.c (7bced397510ab569d31de4c70b39e13355046387) tcp_ipv4.c (24a2d43d8886f5a29c3cf108927f630c545a9a38)
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * IPv4 specific functions

--- 58 unchanged lines hidden (view full) ---

67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * IPv4 specific functions

--- 58 unchanged lines hidden (view full) ---

67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/netdma.h>
75#include <net/secure_seq.h>
76#include <net/tcp_memcontrol.h>
77#include <net/busy_poll.h>
78
79#include <linux/inet.h>
80#include <linux/ipv6.h>
81#include <linux/stddef.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
85#include <linux/crypto.h>
86#include <linux/scatterlist.h>
87
88int sysctl_tcp_tw_reuse __read_mostly;
89int sysctl_tcp_low_latency __read_mostly;
90EXPORT_SYMBOL(sysctl_tcp_low_latency);
91
76#include <net/secure_seq.h>
77#include <net/tcp_memcontrol.h>
78#include <net/busy_poll.h>
79
80#include <linux/inet.h>
81#include <linux/ipv6.h>
82#include <linux/stddef.h>
83#include <linux/proc_fs.h>
84#include <linux/seq_file.h>
85
86#include <linux/crypto.h>
87#include <linux/scatterlist.h>
88
89int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly;
91EXPORT_SYMBOL(sysctl_tcp_low_latency);
92
92
93#ifdef CONFIG_TCP_MD5SIG
94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
96#endif
97
98struct inet_hashinfo tcp_hashinfo;
99EXPORT_SYMBOL(tcp_hashinfo);
100
93#ifdef CONFIG_TCP_MD5SIG
94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
96#endif
97
98struct inet_hashinfo tcp_hashinfo;
99EXPORT_SYMBOL(tcp_hashinfo);
100
101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
101static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
102{
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
107}
108
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)

--- 92 unchanged lines hidden (view full) ---

202
203 if (tcp_death_row.sysctl_tw_recycle &&
204 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
205 tcp_fetch_timewait_stamp(sk, &rt->dst);
206
207 inet->inet_dport = usin->sin_port;
208 inet->inet_daddr = daddr;
209
102{
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
107}
108
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)

--- 92 unchanged lines hidden (view full) ---

202
203 if (tcp_death_row.sysctl_tw_recycle &&
204 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
205 tcp_fetch_timewait_stamp(sk, &rt->dst);
206
207 inet->inet_dport = usin->sin_port;
208 inet->inet_daddr = daddr;
209
210 inet_set_txhash(sk);
211
210 inet_csk(sk)->icsk_ext_hdr_len = 0;
211 if (inet_opt)
212 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
213
214 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
215
216 /* Socket identity is still unknown (sport may be zero).
217 * However we set state to SYN-SENT and not releasing socket

--- 45 unchanged lines hidden (view full) ---

263}
264EXPORT_SYMBOL(tcp_v4_connect);
265
266/*
267 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
268 * It can be called through tcp_release_cb() if socket was owned by user
269 * at the time tcp_v4_err() was called to handle ICMP message.
270 */
212 inet_csk(sk)->icsk_ext_hdr_len = 0;
213 if (inet_opt)
214 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
215
216 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
217
218 /* Socket identity is still unknown (sport may be zero).
219 * However we set state to SYN-SENT and not releasing socket

--- 45 unchanged lines hidden (view full) ---

265}
266EXPORT_SYMBOL(tcp_v4_connect);
267
268/*
269 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
270 * It can be called through tcp_release_cb() if socket was owned by user
271 * at the time tcp_v4_err() was called to handle ICMP message.
272 */
271static void tcp_v4_mtu_reduced(struct sock *sk)
273void tcp_v4_mtu_reduced(struct sock *sk)
272{
273 struct dst_entry *dst;
274 struct inet_sock *inet = inet_sk(sk);
275 u32 mtu = tcp_sk(sk)->mtu_info;
276
277 dst = inet_csk_update_pmtu(sk, mtu);
278 if (!dst)
279 return;

--- 14 unchanged lines hidden (view full) ---

294 /* Resend the TCP packet because it's
295 * clear that the old packet has been
296 * dropped. This is the new "fast" path mtu
297 * discovery.
298 */
299 tcp_simple_retransmit(sk);
300 } /* else let the usual retransmit timer handle it */
301}
274{
275 struct dst_entry *dst;
276 struct inet_sock *inet = inet_sk(sk);
277 u32 mtu = tcp_sk(sk)->mtu_info;
278
279 dst = inet_csk_update_pmtu(sk, mtu);
280 if (!dst)
281 return;

--- 14 unchanged lines hidden (view full) ---

296 /* Resend the TCP packet because it's
297 * clear that the old packet has been
298 * dropped. This is the new "fast" path mtu
299 * discovery.
300 */
301 tcp_simple_retransmit(sk);
302 } /* else let the usual retransmit timer handle it */
303}
304EXPORT_SYMBOL(tcp_v4_mtu_reduced);
302
303static void do_redirect(struct sk_buff *skb, struct sock *sk)
304{
305 struct dst_entry *dst = __sk_dst_check(sk, 0);
306
307 if (dst)
308 dst->ops->redirect(dst, sk, skb);
309}

--- 20 unchanged lines hidden (view full) ---

330 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
331 struct inet_connection_sock *icsk;
332 struct tcp_sock *tp;
333 struct inet_sock *inet;
334 const int type = icmp_hdr(icmp_skb)->type;
335 const int code = icmp_hdr(icmp_skb)->code;
336 struct sock *sk;
337 struct sk_buff *skb;
305
306static void do_redirect(struct sk_buff *skb, struct sock *sk)
307{
308 struct dst_entry *dst = __sk_dst_check(sk, 0);
309
310 if (dst)
311 dst->ops->redirect(dst, sk, skb);
312}

--- 20 unchanged lines hidden (view full) ---

333 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
334 struct inet_connection_sock *icsk;
335 struct tcp_sock *tp;
336 struct inet_sock *inet;
337 const int type = icmp_hdr(icmp_skb)->type;
338 const int code = icmp_hdr(icmp_skb)->code;
339 struct sock *sk;
340 struct sk_buff *skb;
338 struct request_sock *req;
339 __u32 seq;
341 struct request_sock *fastopen;
342 __u32 seq, snd_una;
340 __u32 remaining;
341 int err;
342 struct net *net = dev_net(icmp_skb->dev);
343
343 __u32 remaining;
344 int err;
345 struct net *net = dev_net(icmp_skb->dev);
346
344 if (icmp_skb->len < (iph->ihl << 2) + 8) {
345 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
346 return;
347 }
348
349 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
350 iph->saddr, th->source, inet_iif(icmp_skb));
351 if (!sk) {
352 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
353 return;
354 }
355 if (sk->sk_state == TCP_TIME_WAIT) {
356 inet_twsk_put(inet_twsk(sk));

--- 15 unchanged lines hidden (view full) ---

372
373 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
374 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
375 goto out;
376 }
377
378 icsk = inet_csk(sk);
379 tp = tcp_sk(sk);
347 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
348 iph->saddr, th->source, inet_iif(icmp_skb));
349 if (!sk) {
350 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
351 return;
352 }
353 if (sk->sk_state == TCP_TIME_WAIT) {
354 inet_twsk_put(inet_twsk(sk));

--- 15 unchanged lines hidden (view full) ---

370
371 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
372 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
373 goto out;
374 }
375
376 icsk = inet_csk(sk);
377 tp = tcp_sk(sk);
380 req = tp->fastopen_rsk;
381 seq = ntohl(th->seq);
378 seq = ntohl(th->seq);
379 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
380 fastopen = tp->fastopen_rsk;
381 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
382 if (sk->sk_state != TCP_LISTEN &&
382 if (sk->sk_state != TCP_LISTEN &&
383 !between(seq, tp->snd_una, tp->snd_nxt) &&
384 (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
385 /* For a Fast Open socket, allow seq to be snt_isn. */
383 !between(seq, snd_una, tp->snd_nxt)) {
386 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
387 goto out;
388 }
389
390 switch (type) {
391 case ICMP_REDIRECT:
392 do_redirect(icmp_skb, sk);
393 goto out;

--- 26 unchanged lines hidden (view full) ---

420 }
421
422 err = icmp_err_convert[code].errno;
423 /* check if icmp_skb allows revert of backoff
424 * (see draft-zimmermann-tcp-lcd) */
425 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
426 break;
427 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
384 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
385 goto out;
386 }
387
388 switch (type) {
389 case ICMP_REDIRECT:
390 do_redirect(icmp_skb, sk);
391 goto out;

--- 26 unchanged lines hidden (view full) ---

418 }
419
420 err = icmp_err_convert[code].errno;
421 /* check if icmp_skb allows revert of backoff
422 * (see draft-zimmermann-tcp-lcd) */
423 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
424 break;
425 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
428 !icsk->icsk_backoff)
426 !icsk->icsk_backoff || fastopen)
429 break;
430
427 break;
428
431 /* XXX (TFO) - revisit the following logic for TFO */
432
433 if (sock_owned_by_user(sk))
434 break;
435
436 icsk->icsk_backoff--;
429 if (sock_owned_by_user(sk))
430 break;
431
432 icsk->icsk_backoff--;
437 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
438 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
439 tcp_bound_rto(sk);
433 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
434 TCP_TIMEOUT_INIT;
435 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
440
441 skb = tcp_write_queue_head(sk);
442 BUG_ON(!skb);
443
436
437 skb = tcp_write_queue_head(sk);
438 BUG_ON(!skb);
439
444 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
445 tcp_time_stamp - TCP_SKB_CB(skb)->when);
440 remaining = icsk->icsk_rto -
441 min(icsk->icsk_rto,
442 tcp_time_stamp - tcp_skb_timestamp(skb));
446
447 if (remaining) {
448 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
449 remaining, TCP_RTO_MAX);
450 } else {
451 /* RTO revert clocked out retransmission.
452 * Will retransmit now */
453 tcp_retransmit_timer(sk);
454 }
455
456 break;
457 case ICMP_TIME_EXCEEDED:
458 err = EHOSTUNREACH;
459 break;
460 default:
461 goto out;
462 }
463
443
444 if (remaining) {
445 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
446 remaining, TCP_RTO_MAX);
447 } else {
448 /* RTO revert clocked out retransmission.
449 * Will retransmit now */
450 tcp_retransmit_timer(sk);
451 }
452
453 break;
454 case ICMP_TIME_EXCEEDED:
455 err = EHOSTUNREACH;
456 break;
457 default:
458 goto out;
459 }
460
464 /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
465 * than following the TCP_SYN_RECV case and closing the socket,
466 * we ignore the ICMP error and keep trying like a fully established
467 * socket. Is this the right thing to do?
468 */
469 if (req && req->sk == NULL)
470 goto out;
471
472 switch (sk->sk_state) {
473 struct request_sock *req, **prev;
474 case TCP_LISTEN:
475 if (sock_owned_by_user(sk))
476 goto out;
477
478 req = inet_csk_search_req(sk, &prev, th->dest,
479 iph->daddr, iph->saddr);

--- 16 unchanged lines hidden (view full) ---

496 * created socket, and POSIX does not want network
497 * errors returned from accept().
498 */
499 inet_csk_reqsk_queue_drop(sk, req, prev);
500 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
501 goto out;
502
503 case TCP_SYN_SENT:
461 switch (sk->sk_state) {
462 struct request_sock *req, **prev;
463 case TCP_LISTEN:
464 if (sock_owned_by_user(sk))
465 goto out;
466
467 req = inet_csk_search_req(sk, &prev, th->dest,
468 iph->daddr, iph->saddr);

--- 16 unchanged lines hidden (view full) ---

485 * created socket, and POSIX does not want network
486 * errors returned from accept().
487 */
488 inet_csk_reqsk_queue_drop(sk, req, prev);
489 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
490 goto out;
491
492 case TCP_SYN_SENT:
504 case TCP_SYN_RECV: /* Cannot happen.
505 It can f.e. if SYNs crossed,
506 or Fast Open.
507 */
493 case TCP_SYN_RECV:
494 /* Only in fast or simultaneous open. If a fast open socket is
495 * is already accepted it is treated as a connected one below.
496 */
497 if (fastopen && fastopen->sk == NULL)
498 break;
499
508 if (!sock_owned_by_user(sk)) {
509 sk->sk_err = err;
510
511 sk->sk_error_report(sk);
512
513 tcp_done(sk);
514 } else {
515 sk->sk_err_soft = err;

--- 168 unchanged lines hidden (view full) ---

684 * routing might fail in this case. No choice here, if we choose to force
685 * input interface, we will misroute in case of asymmetric route.
686 */
687 if (sk)
688 arg.bound_dev_if = sk->sk_bound_dev_if;
689
690 net = dev_net(skb_dst(skb)->dev);
691 arg.tos = ip_hdr(skb)->tos;
500 if (!sock_owned_by_user(sk)) {
501 sk->sk_err = err;
502
503 sk->sk_error_report(sk);
504
505 tcp_done(sk);
506 } else {
507 sk->sk_err_soft = err;

--- 168 unchanged lines hidden (view full) ---

676 * routing might fail in this case. No choice here, if we choose to force
677 * input interface, we will misroute in case of asymmetric route.
678 */
679 if (sk)
680 arg.bound_dev_if = sk->sk_bound_dev_if;
681
682 net = dev_net(skb_dst(skb)->dev);
683 arg.tos = ip_hdr(skb)->tos;
692 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
693 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
684 ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
685 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
686 &arg, arg.iov[0].iov_len);
694
695 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
696 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
697
698#ifdef CONFIG_TCP_MD5SIG
699release_sk1:
700 if (sk1) {
701 rcu_read_unlock();

--- 65 unchanged lines hidden (view full) ---

767 arg.flags = reply_flags;
768 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
769 ip_hdr(skb)->saddr, /* XXX */
770 arg.iov[0].iov_len, IPPROTO_TCP, 0);
771 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
772 if (oif)
773 arg.bound_dev_if = oif;
774 arg.tos = tos;
687
688 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
689 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
690
691#ifdef CONFIG_TCP_MD5SIG
692release_sk1:
693 if (sk1) {
694 rcu_read_unlock();

--- 65 unchanged lines hidden (view full) ---

760 arg.flags = reply_flags;
761 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
762 ip_hdr(skb)->saddr, /* XXX */
763 arg.iov[0].iov_len, IPPROTO_TCP, 0);
764 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
765 if (oif)
766 arg.bound_dev_if = oif;
767 arg.tos = tos;
775 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
776 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
768 ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
769 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
770 &arg, arg.iov[0].iov_len);
777
778 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
779}
780
781static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
782{
783 struct inet_timewait_sock *tw = inet_twsk(sk);
784 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);

--- 30 unchanged lines hidden (view full) ---

815}
816
817/*
818 * Send a SYN-ACK after having received a SYN.
819 * This still operates on a request_sock only, not on a big
820 * socket.
821 */
822static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
771
772 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
773}
774
775static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
776{
777 struct inet_timewait_sock *tw = inet_twsk(sk);
778 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);

--- 30 unchanged lines hidden (view full) ---

809}
810
811/*
812 * Send a SYN-ACK after having received a SYN.
813 * This still operates on a request_sock only, not on a big
814 * socket.
815 */
816static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
817 struct flowi *fl,
823 struct request_sock *req,
818 struct request_sock *req,
824 u16 queue_mapping)
819 u16 queue_mapping,
820 struct tcp_fastopen_cookie *foc)
825{
826 const struct inet_request_sock *ireq = inet_rsk(req);
827 struct flowi4 fl4;
828 int err = -1;
829 struct sk_buff *skb;
830
831 /* First, grab a route. */
832 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
833 return -1;
834
821{
822 const struct inet_request_sock *ireq = inet_rsk(req);
823 struct flowi4 fl4;
824 int err = -1;
825 struct sk_buff *skb;
826
827 /* First, grab a route. */
828 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
829 return -1;
830
835 skb = tcp_make_synack(sk, dst, req, NULL);
831 skb = tcp_make_synack(sk, dst, req, foc);
836
837 if (skb) {
838 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
839
840 skb_set_queue_mapping(skb, queue_mapping);
841 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
842 ireq->ir_rmt_addr,
843 ireq->opt);
844 err = net_xmit_eval(err);
832
833 if (skb) {
834 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
835
836 skb_set_queue_mapping(skb, queue_mapping);
837 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
838 ireq->ir_rmt_addr,
839 ireq->opt);
840 err = net_xmit_eval(err);
845 if (!tcp_rsk(req)->snt_synack && !err)
846 tcp_rsk(req)->snt_synack = tcp_time_stamp;
847 }
848
849 return err;
850}
851
841 }
842
843 return err;
844}
845
852static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
853{
854 int res = tcp_v4_send_synack(sk, NULL, req, 0);
855
856 if (!res)
857 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
858 return res;
859}
860
861/*
862 * IPv4 request_sock destructor.
863 */
864static void tcp_v4_reqsk_destructor(struct request_sock *req)
865{
866 kfree(inet_rsk(req)->opt);
867}
868
869/*
870 * Return true if a syncookie should be sent
871 */
872bool tcp_syn_flood_action(struct sock *sk,
873 const struct sk_buff *skb,
874 const char *proto)
875{
876 const char *msg = "Dropping request";
877 bool want_cookie = false;
878 struct listen_sock *lopt;
879
846/*
847 * IPv4 request_sock destructor.
848 */
849static void tcp_v4_reqsk_destructor(struct request_sock *req)
850{
851 kfree(inet_rsk(req)->opt);
852}
853
854/*
855 * Return true if a syncookie should be sent
856 */
857bool tcp_syn_flood_action(struct sock *sk,
858 const struct sk_buff *skb,
859 const char *proto)
860{
861 const char *msg = "Dropping request";
862 bool want_cookie = false;
863 struct listen_sock *lopt;
864
880
881
882#ifdef CONFIG_SYN_COOKIES
883 if (sysctl_tcp_syncookies) {
884 msg = "Sending cookies";
885 want_cookie = true;
886 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
887 } else
888#endif
889 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);

--- 174 unchanged lines hidden (view full) ---

1064 return -EINVAL;
1065
1066 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1067 return -EFAULT;
1068
1069 if (sin->sin_family != AF_INET)
1070 return -EINVAL;
1071
865#ifdef CONFIG_SYN_COOKIES
866 if (sysctl_tcp_syncookies) {
867 msg = "Sending cookies";
868 want_cookie = true;
869 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
870 } else
871#endif
872 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);

--- 174 unchanged lines hidden (view full) ---

1047 return -EINVAL;
1048
1049 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1050 return -EFAULT;
1051
1052 if (sin->sin_family != AF_INET)
1053 return -EINVAL;
1054
1072 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1055 if (!cmd.tcpm_keylen)
1073 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1074 AF_INET);
1075
1076 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1077 return -EINVAL;
1078
1079 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1080 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,

--- 101 unchanged lines hidden (view full) ---

1182clear_hash:
1183 tcp_put_md5sig_pool();
1184clear_hash_noput:
1185 memset(md5_hash, 0, 16);
1186 return 1;
1187}
1188EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1189
1056 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1057 AF_INET);
1058
1059 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1060 return -EINVAL;
1061
1062 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1063 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,

--- 101 unchanged lines hidden (view full) ---

1165clear_hash:
1166 tcp_put_md5sig_pool();
1167clear_hash_noput:
1168 memset(md5_hash, 0, 16);
1169 return 1;
1170}
1171EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1172
1190static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1173static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
1174 const struct sk_buff *skb)
1191{
1192 /*
1193 * This gets called for each TCP segment that arrives
1194 * so we want to be efficient.
1195 * We have 3 drop cases:
1196 * o No MD5 hash and one expected.
1197 * o MD5 hash and we're not expecting one.
1198 * o MD5 hash and its wrong.

--- 36 unchanged lines hidden (view full) ---

1235 &iph->daddr, ntohs(th->dest),
1236 genhash ? " tcp_v4_calc_md5_hash failed"
1237 : "");
1238 return true;
1239 }
1240 return false;
1241}
1242
1175{
1176 /*
1177 * This gets called for each TCP segment that arrives
1178 * so we want to be efficient.
1179 * We have 3 drop cases:
1180 * o No MD5 hash and one expected.
1181 * o MD5 hash and we're not expecting one.
1182 * o MD5 hash and its wrong.

--- 36 unchanged lines hidden (view full) ---

1219 &iph->daddr, ntohs(th->dest),
1220 genhash ? " tcp_v4_calc_md5_hash failed"
1221 : "");
1222 return true;
1223 }
1224 return false;
1225}
1226
1227static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1228{
1229 bool ret;
1230
1231 rcu_read_lock();
1232 ret = __tcp_v4_inbound_md5_hash(sk, skb);
1233 rcu_read_unlock();
1234
1235 return ret;
1236}
1237
1243#endif
1244
1238#endif
1239
1240static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
1241 struct sk_buff *skb)
1242{
1243 struct inet_request_sock *ireq = inet_rsk(req);
1244
1245 ireq->ir_loc_addr = ip_hdr(skb)->daddr;
1246 ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
1247 ireq->no_srccheck = inet_sk(sk)->transparent;
1248 ireq->opt = tcp_v4_save_options(skb);
1249}
1250
1251static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1252 const struct request_sock *req,
1253 bool *strict)
1254{
1255 struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1256
1257 if (strict) {
1258 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1259 *strict = true;
1260 else
1261 *strict = false;
1262 }
1263
1264 return dst;
1265}
1266
1245struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1246 .family = PF_INET,
1247 .obj_size = sizeof(struct tcp_request_sock),
1267struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1268 .family = PF_INET,
1269 .obj_size = sizeof(struct tcp_request_sock),
1248 .rtx_syn_ack = tcp_v4_rtx_synack,
1270 .rtx_syn_ack = tcp_rtx_synack,
1249 .send_ack = tcp_v4_reqsk_send_ack,
1250 .destructor = tcp_v4_reqsk_destructor,
1251 .send_reset = tcp_v4_send_reset,
1271 .send_ack = tcp_v4_reqsk_send_ack,
1272 .destructor = tcp_v4_reqsk_destructor,
1273 .send_reset = tcp_v4_send_reset,
1252 .syn_ack_timeout = tcp_syn_ack_timeout,
1274 .syn_ack_timeout = tcp_syn_ack_timeout,
1253};
1254
1275};
1276
1255#ifdef CONFIG_TCP_MD5SIG
1256static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1277static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1278 .mss_clamp = TCP_MSS_DEFAULT,
1279#ifdef CONFIG_TCP_MD5SIG
1257 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1258 .calc_md5_hash = tcp_v4_md5_hash_skb,
1280 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1281 .calc_md5_hash = tcp_v4_md5_hash_skb,
1259};
1260#endif
1282#endif
1283 .init_req = tcp_v4_init_req,
1284#ifdef CONFIG_SYN_COOKIES
1285 .cookie_init_seq = cookie_v4_init_sequence,
1286#endif
1287 .route_req = tcp_v4_route_req,
1288 .init_seq = tcp_v4_init_sequence,
1289 .send_synack = tcp_v4_send_synack,
1290 .queue_hash_add = inet_csk_reqsk_queue_hash_add,
1291};
1261
1292
1262static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1263 struct request_sock *req,
1264 struct tcp_fastopen_cookie *foc,
1265 struct tcp_fastopen_cookie *valid_foc)
1266{
1267 bool skip_cookie = false;
1268 struct fastopen_queue *fastopenq;
1269
1270 if (likely(!fastopen_cookie_present(foc))) {
1271 /* See include/net/tcp.h for the meaning of these knobs */
1272 if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
1273 ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
1274 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
1275 skip_cookie = true; /* no cookie to validate */
1276 else
1277 return false;
1278 }
1279 fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
1280 /* A FO option is present; bump the counter. */
1281 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
1282
1283 /* Make sure the listener has enabled fastopen, and we don't
1284 * exceed the max # of pending TFO requests allowed before trying
1285 * to validating the cookie in order to avoid burning CPU cycles
1286 * unnecessarily.
1287 *
1288 * XXX (TFO) - The implication of checking the max_qlen before
1289 * processing a cookie request is that clients can't differentiate
1290 * between qlen overflow causing Fast Open to be disabled
1291 * temporarily vs a server not supporting Fast Open at all.
1292 */
1293 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
1294 fastopenq == NULL || fastopenq->max_qlen == 0)
1295 return false;
1296
1297 if (fastopenq->qlen >= fastopenq->max_qlen) {
1298 struct request_sock *req1;
1299 spin_lock(&fastopenq->lock);
1300 req1 = fastopenq->rskq_rst_head;
1301 if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
1302 spin_unlock(&fastopenq->lock);
1303 NET_INC_STATS_BH(sock_net(sk),
1304 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
1305 /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
1306 foc->len = -1;
1307 return false;
1308 }
1309 fastopenq->rskq_rst_head = req1->dl_next;
1310 fastopenq->qlen--;
1311 spin_unlock(&fastopenq->lock);
1312 reqsk_free(req1);
1313 }
1314 if (skip_cookie) {
1315 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1316 return true;
1317 }
1318
1319 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1320 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1321 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1322 ip_hdr(skb)->daddr, valid_foc);
1323 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1324 memcmp(&foc->val[0], &valid_foc->val[0],
1325 TCP_FASTOPEN_COOKIE_SIZE) != 0)
1326 return false;
1327 valid_foc->len = -1;
1328 }
1329 /* Acknowledge the data received from the peer. */
1330 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1331 return true;
1332 } else if (foc->len == 0) { /* Client requesting a cookie */
1333 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1334 ip_hdr(skb)->daddr, valid_foc);
1335 NET_INC_STATS_BH(sock_net(sk),
1336 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1337 } else {
1338 /* Client sent a cookie with wrong size. Treat it
1339 * the same as invalid and return a valid one.
1340 */
1341 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1342 ip_hdr(skb)->daddr, valid_foc);
1343 }
1344 return false;
1345}
1346
1347static int tcp_v4_conn_req_fastopen(struct sock *sk,
1348 struct sk_buff *skb,
1349 struct sk_buff *skb_synack,
1350 struct request_sock *req)
1351{
1352 struct tcp_sock *tp = tcp_sk(sk);
1353 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1354 const struct inet_request_sock *ireq = inet_rsk(req);
1355 struct sock *child;
1356 int err;
1357
1358 req->num_retrans = 0;
1359 req->num_timeout = 0;
1360 req->sk = NULL;
1361
1362 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
1363 if (child == NULL) {
1364 NET_INC_STATS_BH(sock_net(sk),
1365 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1366 kfree_skb(skb_synack);
1367 return -1;
1368 }
1369 err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
1370 ireq->ir_rmt_addr, ireq->opt);
1371 err = net_xmit_eval(err);
1372 if (!err)
1373 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1374 /* XXX (TFO) - is it ok to ignore error and continue? */
1375
1376 spin_lock(&queue->fastopenq->lock);
1377 queue->fastopenq->qlen++;
1378 spin_unlock(&queue->fastopenq->lock);
1379
1380 /* Initialize the child socket. Have to fix some values to take
1381 * into account the child is a Fast Open socket and is created
1382 * only out of the bits carried in the SYN packet.
1383 */
1384 tp = tcp_sk(child);
1385
1386 tp->fastopen_rsk = req;
1387 /* Do a hold on the listner sk so that if the listener is being
1388 * closed, the child that has been accepted can live on and still
1389 * access listen_lock.
1390 */
1391 sock_hold(sk);
1392 tcp_rsk(req)->listener = sk;
1393
1394 /* RFC1323: The window in SYN & SYN/ACK segments is never
1395 * scaled. So correct it appropriately.
1396 */
1397 tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
1398
1399 /* Activate the retrans timer so that SYNACK can be retransmitted.
1400 * The request socket is not added to the SYN table of the parent
1401 * because it's been added to the accept queue directly.
1402 */
1403 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
1404 TCP_TIMEOUT_INIT, TCP_RTO_MAX);
1405
1406 /* Add the child socket directly into the accept queue */
1407 inet_csk_reqsk_queue_add(sk, req, child);
1408
1409 /* Now finish processing the fastopen child socket. */
1410 inet_csk(child)->icsk_af_ops->rebuild_header(child);
1411 tcp_init_congestion_control(child);
1412 tcp_mtup_init(child);
1413 tcp_init_metrics(child);
1414 tcp_init_buffer_space(child);
1415
1416 /* Queue the data carried in the SYN packet. We need to first
1417 * bump skb's refcnt because the caller will attempt to free it.
1418 *
1419 * XXX (TFO) - we honor a zero-payload TFO request for now.
1420 * (Any reason not to?)
1421 */
1422 if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
1423 /* Don't queue the skb if there is no payload in SYN.
1424 * XXX (TFO) - How about SYN+FIN?
1425 */
1426 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1427 } else {
1428 skb = skb_get(skb);
1429 skb_dst_drop(skb);
1430 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
1431 skb_set_owner_r(skb, child);
1432 __skb_queue_tail(&child->sk_receive_queue, skb);
1433 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1434 tp->syn_data_acked = 1;
1435 }
1436 sk->sk_data_ready(sk, 0);
1437 bh_unlock_sock(child);
1438 sock_put(child);
1439 WARN_ON(req->sk == NULL);
1440 return 0;
1441}
1442
1443int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1444{
1293int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1294{
1445 struct tcp_options_received tmp_opt;
1446 struct request_sock *req;
1447 struct inet_request_sock *ireq;
1448 struct tcp_sock *tp = tcp_sk(sk);
1449 struct dst_entry *dst = NULL;
1450 __be32 saddr = ip_hdr(skb)->saddr;
1451 __be32 daddr = ip_hdr(skb)->daddr;
1452 __u32 isn = TCP_SKB_CB(skb)->when;
1453 bool want_cookie = false;
1454 struct flowi4 fl4;
1455 struct tcp_fastopen_cookie foc = { .len = -1 };
1456 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
1457 struct sk_buff *skb_synack;
1458 int do_fastopen;
1459
1460 /* Never answer to SYNs send to broadcast or multicast */
1461 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1462 goto drop;
1463
1295 /* Never answer to SYNs send to broadcast or multicast */
1296 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1297 goto drop;
1298
1464 /* TW buckets are converted to open requests without
1465 * limitations, they conserve resources and peer is
1466 * evidently real one.
1467 */
1468 if ((sysctl_tcp_syncookies == 2 ||
1469 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
1470 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1471 if (!want_cookie)
1472 goto drop;
1473 }
1299 return tcp_conn_request(&tcp_request_sock_ops,
1300 &tcp_request_sock_ipv4_ops, sk, skb);
1474
1301
1475 /* Accept backlog is full. If we have already queued enough
1476 * of warm entries in syn queue, drop request. It is better than
1477 * clogging syn queue with openreqs with exponentially increasing
1478 * timeout.
1479 */
1480 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1481 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1482 goto drop;
1483 }
1484
1485 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1486 if (!req)
1487 goto drop;
1488
1489#ifdef CONFIG_TCP_MD5SIG
1490 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1491#endif
1492
1493 tcp_clear_options(&tmp_opt);
1494 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1495 tmp_opt.user_mss = tp->rx_opt.user_mss;
1496 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1497
1498 if (want_cookie && !tmp_opt.saw_tstamp)
1499 tcp_clear_options(&tmp_opt);
1500
1501 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1502 tcp_openreq_init(req, &tmp_opt, skb);
1503
1504 ireq = inet_rsk(req);
1505 ireq->ir_loc_addr = daddr;
1506 ireq->ir_rmt_addr = saddr;
1507 ireq->no_srccheck = inet_sk(sk)->transparent;
1508 ireq->opt = tcp_v4_save_options(skb);
1509
1510 if (security_inet_conn_request(sk, skb, req))
1511 goto drop_and_free;
1512
1513 if (!want_cookie || tmp_opt.tstamp_ok)
1514 TCP_ECN_create_request(req, skb, sock_net(sk));
1515
1516 if (want_cookie) {
1517 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1518 req->cookie_ts = tmp_opt.tstamp_ok;
1519 } else if (!isn) {
1520 /* VJ's idea. We save last timestamp seen
1521 * from the destination in peer table, when entering
1522 * state TIME-WAIT, and check against it before
1523 * accepting new connection request.
1524 *
1525 * If "isn" is not zero, this request hit alive
1526 * timewait bucket, so that all the necessary checks
1527 * are made in the function processing timewait state.
1528 */
1529 if (tmp_opt.saw_tstamp &&
1530 tcp_death_row.sysctl_tw_recycle &&
1531 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1532 fl4.daddr == saddr) {
1533 if (!tcp_peer_is_proven(req, dst, true)) {
1534 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1535 goto drop_and_release;
1536 }
1537 }
1538 /* Kill the following clause, if you dislike this way. */
1539 else if (!sysctl_tcp_syncookies &&
1540 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1541 (sysctl_max_syn_backlog >> 2)) &&
1542 !tcp_peer_is_proven(req, dst, false)) {
1543 /* Without syncookies last quarter of
1544 * backlog is filled with destinations,
1545 * proven to be alive.
1546 * It means that we continue to communicate
1547 * to destinations, already remembered
1548 * to the moment of synflood.
1549 */
1550 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1551 &saddr, ntohs(tcp_hdr(skb)->source));
1552 goto drop_and_release;
1553 }
1554
1555 isn = tcp_v4_init_sequence(skb);
1556 }
1557 tcp_rsk(req)->snt_isn = isn;
1558
1559 if (dst == NULL) {
1560 dst = inet_csk_route_req(sk, &fl4, req);
1561 if (dst == NULL)
1562 goto drop_and_free;
1563 }
1564 do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
1565
1566 /* We don't call tcp_v4_send_synack() directly because we need
1567 * to make sure a child socket can be created successfully before
1568 * sending back synack!
1569 *
1570 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
1571 * (or better yet, call tcp_send_synack() in the child context
1572 * directly, but will have to fix bunch of other code first)
1573 * after syn_recv_sock() except one will need to first fix the
1574 * latter to remove its dependency on the current implementation
1575 * of tcp_v4_send_synack()->tcp_select_initial_window().
1576 */
1577 skb_synack = tcp_make_synack(sk, dst, req,
1578 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1579
1580 if (skb_synack) {
1581 __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
1582 skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1583 } else
1584 goto drop_and_free;
1585
1586 if (likely(!do_fastopen)) {
1587 int err;
1588 err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
1589 ireq->ir_rmt_addr, ireq->opt);
1590 err = net_xmit_eval(err);
1591 if (err || want_cookie)
1592 goto drop_and_free;
1593
1594 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1595 tcp_rsk(req)->listener = NULL;
1596 /* Add the request_sock to the SYN table */
1597 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1598 if (fastopen_cookie_present(&foc) && foc.len != 0)
1599 NET_INC_STATS_BH(sock_net(sk),
1600 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1601 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
1602 goto drop_and_free;
1603
1604 return 0;
1605
1606drop_and_release:
1607 dst_release(dst);
1608drop_and_free:
1609 reqsk_free(req);
1610drop:
1611 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1612 return 0;
1613}
1614EXPORT_SYMBOL(tcp_v4_conn_request);
1615
1616
1617/*

--- 31 unchanged lines hidden (view full) ---

1649 newinet->inet_saddr = ireq->ir_loc_addr;
1650 inet_opt = ireq->opt;
1651 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1652 ireq->opt = NULL;
1653 newinet->mc_index = inet_iif(skb);
1654 newinet->mc_ttl = ip_hdr(skb)->ttl;
1655 newinet->rcv_tos = ip_hdr(skb)->tos;
1656 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1302drop:
1303 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1304 return 0;
1305}
1306EXPORT_SYMBOL(tcp_v4_conn_request);
1307
1308
1309/*

--- 31 unchanged lines hidden (view full) ---

1341 newinet->inet_saddr = ireq->ir_loc_addr;
1342 inet_opt = ireq->opt;
1343 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1344 ireq->opt = NULL;
1345 newinet->mc_index = inet_iif(skb);
1346 newinet->mc_ttl = ip_hdr(skb)->ttl;
1347 newinet->rcv_tos = ip_hdr(skb)->tos;
1348 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1349 inet_set_txhash(newsk);
1657 if (inet_opt)
1658 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1659 newinet->inet_id = newtp->write_seq ^ jiffies;
1660
1661 if (!dst) {
1662 dst = inet_csk_route_child_sock(sk, newsk, req);
1663 if (!dst)
1664 goto put_and_exit;

--- 73 unchanged lines hidden (view full) ---

1738
1739#ifdef CONFIG_SYN_COOKIES
1740 if (!th->syn)
1741 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1742#endif
1743 return sk;
1744}
1745
1350 if (inet_opt)
1351 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1352 newinet->inet_id = newtp->write_seq ^ jiffies;
1353
1354 if (!dst) {
1355 dst = inet_csk_route_child_sock(sk, newsk, req);
1356 if (!dst)
1357 goto put_and_exit;

--- 73 unchanged lines hidden (view full) ---

1431
1432#ifdef CONFIG_SYN_COOKIES
1433 if (!th->syn)
1434 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1435#endif
1436 return sk;
1437}
1438
1746static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1747{
1748 const struct iphdr *iph = ip_hdr(skb);
1749
1750 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1751 if (!tcp_v4_check(skb->len, iph->saddr,
1752 iph->daddr, skb->csum)) {
1753 skb->ip_summed = CHECKSUM_UNNECESSARY;
1754 return 0;
1755 }
1756 }
1757
1758 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1759 skb->len, IPPROTO_TCP, 0);
1760
1761 if (skb->len <= 76) {
1762 return __skb_checksum_complete(skb);
1763 }
1764 return 0;
1765}
1766
1767
1768/* The socket must have it's spinlock held when we get
1769 * here.
1770 *
1771 * We have a potential double-lock case here, so even when
1772 * doing backlog processing we use the BH locking scheme.
1773 * This is because we cannot sleep with the original spinlock
1774 * held.
1775 */
1776int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1777{
1778 struct sock *rsk;
1439/* The socket must have it's spinlock held when we get
1440 * here.
1441 *
1442 * We have a potential double-lock case here, so even when
1443 * doing backlog processing we use the BH locking scheme.
1444 * This is because we cannot sleep with the original spinlock
1445 * held.
1446 */
1447int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1448{
1449 struct sock *rsk;
1779#ifdef CONFIG_TCP_MD5SIG
1780 /*
1781 * We really want to reject the packet as early as possible
1782 * if:
1783 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1784 * o There is an MD5 option and we're not expecting one
1785 */
1786 if (tcp_v4_inbound_md5_hash(sk, skb))
1787 goto discard;
1788#endif
1789
1790 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1791 struct dst_entry *dst = sk->sk_rx_dst;
1792
1793 sock_rps_save_rxhash(sk, skb);
1794 if (dst) {
1795 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1796 dst->ops->check(dst, 0) == NULL) {

--- 98 unchanged lines hidden (view full) ---

1895
1896 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1897 return false;
1898
1899 if (skb->len <= tcp_hdrlen(skb) &&
1900 skb_queue_len(&tp->ucopy.prequeue) == 0)
1901 return false;
1902
1450
1451 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1452 struct dst_entry *dst = sk->sk_rx_dst;
1453
1454 sock_rps_save_rxhash(sk, skb);
1455 if (dst) {
1456 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1457 dst->ops->check(dst, 0) == NULL) {

--- 98 unchanged lines hidden (view full) ---

1556
1557 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1558 return false;
1559
1560 if (skb->len <= tcp_hdrlen(skb) &&
1561 skb_queue_len(&tp->ucopy.prequeue) == 0)
1562 return false;
1563
1903 skb_dst_force(skb);
1564 /* Before escaping RCU protected region, we need to take care of skb
1565 * dst. Prequeue is only enabled for established sockets.
1566 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1567 * Instead of doing full sk_rx_dst validity here, let's perform
1568 * an optimistic check.
1569 */
1570 if (likely(sk->sk_rx_dst))
1571 skb_dst_drop(skb);
1572 else
1573 skb_dst_force(skb);
1574
1904 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1905 tp->ucopy.memory += skb->truesize;
1906 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1907 struct sk_buff *skb1;
1908
1909 BUG_ON(sock_owned_by_user(sk));
1910
1911 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {

--- 42 unchanged lines hidden (view full) ---

1954 goto bad_packet;
1955 if (!pskb_may_pull(skb, th->doff * 4))
1956 goto discard_it;
1957
1958 /* An explanation is required here, I think.
1959 * Packet length and doff are validated by header prediction,
1960 * provided case of th->doff==0 is eliminated.
1961 * So, we defer the checks. */
1575 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1576 tp->ucopy.memory += skb->truesize;
1577 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1578 struct sk_buff *skb1;
1579
1580 BUG_ON(sock_owned_by_user(sk));
1581
1582 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {

--- 42 unchanged lines hidden (view full) ---

1625 goto bad_packet;
1626 if (!pskb_may_pull(skb, th->doff * 4))
1627 goto discard_it;
1628
1629 /* An explanation is required here, I think.
1630 * Packet length and doff are validated by header prediction,
1631 * provided case of th->doff==0 is eliminated.
1632 * So, we defer the checks. */
1962 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1633
1634 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1963 goto csum_error;
1964
1965 th = tcp_hdr(skb);
1966 iph = ip_hdr(skb);
1967 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1968 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1969 skb->len - th->doff * 4);
1970 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1635 goto csum_error;
1636
1637 th = tcp_hdr(skb);
1638 iph = ip_hdr(skb);
1639 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1640 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1641 skb->len - th->doff * 4);
1642 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1971 TCP_SKB_CB(skb)->when = 0;
1643 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1644 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1972 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1973 TCP_SKB_CB(skb)->sacked = 0;
1974
1975 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1976 if (!sk)
1977 goto no_tcp_socket;
1978
1979process:
1980 if (sk->sk_state == TCP_TIME_WAIT)
1981 goto do_time_wait;
1982
1983 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1984 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1985 goto discard_and_relse;
1986 }
1987
1988 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1989 goto discard_and_relse;
1645 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1646 TCP_SKB_CB(skb)->sacked = 0;
1647
1648 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1649 if (!sk)
1650 goto no_tcp_socket;
1651
1652process:
1653 if (sk->sk_state == TCP_TIME_WAIT)
1654 goto do_time_wait;
1655
1656 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1657 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1658 goto discard_and_relse;
1659 }
1660
1661 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1662 goto discard_and_relse;
1663
1664#ifdef CONFIG_TCP_MD5SIG
1665 /*
1666 * We really want to reject the packet as early as possible
1667 * if:
1668 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1669 * o There is an MD5 option and we're not expecting one
1670 */
1671 if (tcp_v4_inbound_md5_hash(sk, skb))
1672 goto discard_and_relse;
1673#endif
1674
1990 nf_reset(skb);
1991
1992 if (sk_filter(sk, skb))
1993 goto discard_and_relse;
1994
1995 sk_mark_napi_id(sk, skb);
1996 skb->dev = NULL;
1997
1998 bh_lock_sock_nested(sk);
1999 ret = 0;
2000 if (!sock_owned_by_user(sk)) {
1675 nf_reset(skb);
1676
1677 if (sk_filter(sk, skb))
1678 goto discard_and_relse;
1679
1680 sk_mark_napi_id(sk, skb);
1681 skb->dev = NULL;
1682
1683 bh_lock_sock_nested(sk);
1684 ret = 0;
1685 if (!sock_owned_by_user(sk)) {
2001 if (!tcp_prequeue(sk, skb))
1686#ifdef CONFIG_NET_DMA
1687 struct tcp_sock *tp = tcp_sk(sk);
1688 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1689 tp->ucopy.dma_chan = net_dma_find_channel();
1690 if (tp->ucopy.dma_chan)
2002 ret = tcp_v4_do_rcv(sk, skb);
1691 ret = tcp_v4_do_rcv(sk, skb);
1692 else
1693#endif
1694 {
1695 if (!tcp_prequeue(sk, skb))
1696 ret = tcp_v4_do_rcv(sk, skb);
1697 }
2003 } else if (unlikely(sk_add_backlog(sk, skb,
2004 sk->sk_rcvbuf + sk->sk_sndbuf))) {
2005 bh_unlock_sock(sk);
2006 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
2007 goto discard_and_relse;
2008 }
2009 bh_unlock_sock(sk);
2010

--- 67 unchanged lines hidden (view full) ---

2078 .twsk_unique = tcp_twsk_unique,
2079 .twsk_destructor= tcp_twsk_destructor,
2080};
2081
2082void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
2083{
2084 struct dst_entry *dst = skb_dst(skb);
2085
1698 } else if (unlikely(sk_add_backlog(sk, skb,
1699 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1700 bh_unlock_sock(sk);
1701 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1702 goto discard_and_relse;
1703 }
1704 bh_unlock_sock(sk);
1705

--- 67 unchanged lines hidden (view full) ---

1773 .twsk_unique = tcp_twsk_unique,
1774 .twsk_destructor= tcp_twsk_destructor,
1775};
1776
1777void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1778{
1779 struct dst_entry *dst = skb_dst(skb);
1780
2086 dst_hold(dst);
2087 sk->sk_rx_dst = dst;
2088 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1781 if (dst) {
1782 dst_hold(dst);
1783 sk->sk_rx_dst = dst;
1784 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1785 }
2089}
2090EXPORT_SYMBOL(inet_sk_rx_dst_set);
2091
2092const struct inet_connection_sock_af_ops ipv4_specific = {
2093 .queue_xmit = ip_queue_xmit,
2094 .send_check = tcp_v4_send_check,
2095 .rebuild_header = inet_sk_rebuild_header,
2096 .sk_rx_dst_set = inet_sk_rx_dst_set,

--- 4 unchanged lines hidden (view full) ---

2101 .getsockopt = ip_getsockopt,
2102 .addr2sockaddr = inet_csk_addr2sockaddr,
2103 .sockaddr_len = sizeof(struct sockaddr_in),
2104 .bind_conflict = inet_csk_bind_conflict,
2105#ifdef CONFIG_COMPAT
2106 .compat_setsockopt = compat_ip_setsockopt,
2107 .compat_getsockopt = compat_ip_getsockopt,
2108#endif
1786}
1787EXPORT_SYMBOL(inet_sk_rx_dst_set);
1788
1789const struct inet_connection_sock_af_ops ipv4_specific = {
1790 .queue_xmit = ip_queue_xmit,
1791 .send_check = tcp_v4_send_check,
1792 .rebuild_header = inet_sk_rebuild_header,
1793 .sk_rx_dst_set = inet_sk_rx_dst_set,

--- 4 unchanged lines hidden (view full) ---

1798 .getsockopt = ip_getsockopt,
1799 .addr2sockaddr = inet_csk_addr2sockaddr,
1800 .sockaddr_len = sizeof(struct sockaddr_in),
1801 .bind_conflict = inet_csk_bind_conflict,
1802#ifdef CONFIG_COMPAT
1803 .compat_setsockopt = compat_ip_setsockopt,
1804 .compat_getsockopt = compat_ip_getsockopt,
1805#endif
1806 .mtu_reduced = tcp_v4_mtu_reduced,
2109};
2110EXPORT_SYMBOL(ipv4_specific);
2111
2112#ifdef CONFIG_TCP_MD5SIG
2113static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
2114 .md5_lookup = tcp_v4_md5_lookup,
2115 .calc_md5_hash = tcp_v4_md5_hash_skb,
2116 .md5_parse = tcp_v4_parse_md5_keys,

--- 36 unchanged lines hidden (view full) ---

2153 /* Clean up the MD5 key list, if any */
2154 if (tp->md5sig_info) {
2155 tcp_clear_md5_list(sk);
2156 kfree_rcu(tp->md5sig_info, rcu);
2157 tp->md5sig_info = NULL;
2158 }
2159#endif
2160
1807};
1808EXPORT_SYMBOL(ipv4_specific);
1809
1810#ifdef CONFIG_TCP_MD5SIG
1811static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1812 .md5_lookup = tcp_v4_md5_lookup,
1813 .calc_md5_hash = tcp_v4_md5_hash_skb,
1814 .md5_parse = tcp_v4_parse_md5_keys,

--- 36 unchanged lines hidden (view full) ---

1851 /* Clean up the MD5 key list, if any */
1852 if (tp->md5sig_info) {
1853 tcp_clear_md5_list(sk);
1854 kfree_rcu(tp->md5sig_info, rcu);
1855 tp->md5sig_info = NULL;
1856 }
1857#endif
1858
1859#ifdef CONFIG_NET_DMA
1860 /* Cleans up our sk_async_wait_queue */
1861 __skb_queue_purge(&sk->sk_async_wait_queue);
1862#endif
1863
2161 /* Clean prequeue, it must be empty really */
2162 __skb_queue_purge(&tp->ucopy.prequeue);
2163
2164 /* Clean up a referenced TCP bind bucket. */
2165 if (inet_csk(sk)->icsk_bind_hash)
2166 inet_put_port(sk);
2167
2168 BUG_ON(tp->fastopen_rsk != NULL);

--- 321 unchanged lines hidden (view full) ---

2490
2491 err = seq_open_net(inode, file, &afinfo->seq_ops,
2492 sizeof(struct tcp_iter_state));
2493 if (err < 0)
2494 return err;
2495
2496 s = ((struct seq_file *)file->private_data)->private;
2497 s->family = afinfo->family;
1864 /* Clean prequeue, it must be empty really */
1865 __skb_queue_purge(&tp->ucopy.prequeue);
1866
1867 /* Clean up a referenced TCP bind bucket. */
1868 if (inet_csk(sk)->icsk_bind_hash)
1869 inet_put_port(sk);
1870
1871 BUG_ON(tp->fastopen_rsk != NULL);

--- 321 unchanged lines hidden (view full) ---

2193
2194 err = seq_open_net(inode, file, &afinfo->seq_ops,
2195 sizeof(struct tcp_iter_state));
2196 if (err < 0)
2197 return err;
2198
2199 s = ((struct seq_file *)file->private_data)->private;
2200 s->family = afinfo->family;
2498 s->last_pos = 0;
2201 s->last_pos = 0;
2499 return 0;
2500}
2501EXPORT_SYMBOL(tcp_seq_open);
2502
2503int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2504{
2505 int rc = 0;
2506 struct proc_dir_entry *p;

--- 100 unchanged lines hidden (view full) ---

2607 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2608}
2609
2610static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2611 struct seq_file *f, int i)
2612{
2613 __be32 dest, src;
2614 __u16 destp, srcp;
2202 return 0;
2203}
2204EXPORT_SYMBOL(tcp_seq_open);
2205
2206int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2207{
2208 int rc = 0;
2209 struct proc_dir_entry *p;

--- 100 unchanged lines hidden (view full) ---

2310 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2311}
2312
2313static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2314 struct seq_file *f, int i)
2315{
2316 __be32 dest, src;
2317 __u16 destp, srcp;
2615 long delta = tw->tw_ttd - jiffies;
2318 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
2616
2617 dest = tw->tw_daddr;
2618 src = tw->tw_rcv_saddr;
2619 destp = ntohs(tw->tw_dport);
2620 srcp = ntohs(tw->tw_sport);
2621
2622 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2623 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",

--- 91 unchanged lines hidden (view full) ---

2715 .shutdown = tcp_shutdown,
2716 .setsockopt = tcp_setsockopt,
2717 .getsockopt = tcp_getsockopt,
2718 .recvmsg = tcp_recvmsg,
2719 .sendmsg = tcp_sendmsg,
2720 .sendpage = tcp_sendpage,
2721 .backlog_rcv = tcp_v4_do_rcv,
2722 .release_cb = tcp_release_cb,
2319
2320 dest = tw->tw_daddr;
2321 src = tw->tw_rcv_saddr;
2322 destp = ntohs(tw->tw_dport);
2323 srcp = ntohs(tw->tw_sport);
2324
2325 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2326 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",

--- 91 unchanged lines hidden (view full) ---

2418 .shutdown = tcp_shutdown,
2419 .setsockopt = tcp_setsockopt,
2420 .getsockopt = tcp_getsockopt,
2421 .recvmsg = tcp_recvmsg,
2422 .sendmsg = tcp_sendmsg,
2423 .sendpage = tcp_sendpage,
2424 .backlog_rcv = tcp_v4_do_rcv,
2425 .release_cb = tcp_release_cb,
2723 .mtu_reduced = tcp_v4_mtu_reduced,
2724 .hash = inet_hash,
2725 .unhash = inet_unhash,
2726 .get_port = inet_csk_get_port,
2727 .enter_memory_pressure = tcp_enter_memory_pressure,
2728 .stream_memory_free = tcp_stream_memory_free,
2729 .sockets_allocated = &tcp_sockets_allocated,
2730 .orphan_count = &tcp_orphan_count,
2731 .memory_allocated = &tcp_memory_allocated,

--- 50 unchanged lines hidden ---
2426 .hash = inet_hash,
2427 .unhash = inet_unhash,
2428 .get_port = inet_csk_get_port,
2429 .enter_memory_pressure = tcp_enter_memory_pressure,
2430 .stream_memory_free = tcp_stream_memory_free,
2431 .sockets_allocated = &tcp_sockets_allocated,
2432 .orphan_count = &tcp_orphan_count,
2433 .memory_allocated = &tcp_memory_allocated,

--- 50 unchanged lines hidden ---