tcp_ipv4.c (9e9fd65d1fa51d919d54d731be0e66492b5b6c5a) tcp_ipv4.c (46d3ceabd8d98ed0ad10f20c595ca784e34786c5)
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * IPv4 specific functions

--- 195 unchanged lines hidden (view full) ---

204 /* Reset inherited state */
205 tp->rx_opt.ts_recent = 0;
206 tp->rx_opt.ts_recent_stamp = 0;
207 if (likely(!tp->repair))
208 tp->write_seq = 0;
209 }
210
211 if (tcp_death_row.sysctl_tw_recycle &&
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * IPv4 specific functions

--- 195 unchanged lines hidden (view full) ---

204 /* Reset inherited state */
205 tp->rx_opt.ts_recent = 0;
206 tp->rx_opt.ts_recent_stamp = 0;
207 if (likely(!tp->repair))
208 tp->write_seq = 0;
209 }
210
211 if (tcp_death_row.sysctl_tw_recycle &&
212 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
213 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
214 /*
215 * VJ's idea. We save last timestamp seen from
216 * the destination in peer table, when entering state
217 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
218 * when trying new connection.
219 */
220 if (peer) {
221 inet_peer_refcheck(peer);
222 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
223 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
224 tp->rx_opt.ts_recent = peer->tcp_ts;
225 }
226 }
227 }
212 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
213 tcp_fetch_timewait_stamp(sk, &rt->dst);
228
229 inet->inet_dport = usin->sin_port;
230 inet->inet_daddr = daddr;
231
232 inet_csk(sk)->icsk_ext_hdr_len = 0;
233 if (inet_opt)
234 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
235

--- 457 unchanged lines hidden (view full) ---

693 /* When socket is gone, all binding information is lost.
694 * routing might fail in this case. using iif for oif to
695 * make sure we can deliver it
696 */
697 arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
698
699 net = dev_net(skb_dst(skb)->dev);
700 arg.tos = ip_hdr(skb)->tos;
214
215 inet->inet_dport = usin->sin_port;
216 inet->inet_daddr = daddr;
217
218 inet_csk(sk)->icsk_ext_hdr_len = 0;
219 if (inet_opt)
220 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
221

--- 457 unchanged lines hidden (view full) ---

679 /* When socket is gone, all binding information is lost.
680 * routing might fail in this case. using iif for oif to
681 * make sure we can deliver it
682 */
683 arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
684
685 net = dev_net(skb_dst(skb)->dev);
686 arg.tos = ip_hdr(skb)->tos;
701 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
702 &arg, arg.iov[0].iov_len);
687 ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
688 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
703
704 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
705 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
706
707#ifdef CONFIG_TCP_MD5SIG
708release_sk1:
709 if (sk1) {
710 rcu_read_unlock();

--- 65 unchanged lines hidden (view full) ---

776 arg.flags = reply_flags;
777 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
778 ip_hdr(skb)->saddr, /* XXX */
779 arg.iov[0].iov_len, IPPROTO_TCP, 0);
780 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
781 if (oif)
782 arg.bound_dev_if = oif;
783 arg.tos = tos;
689
690 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
691 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
692
693#ifdef CONFIG_TCP_MD5SIG
694release_sk1:
695 if (sk1) {
696 rcu_read_unlock();

--- 65 unchanged lines hidden (view full) ---

762 arg.flags = reply_flags;
763 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
764 ip_hdr(skb)->saddr, /* XXX */
765 arg.iov[0].iov_len, IPPROTO_TCP, 0);
766 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
767 if (oif)
768 arg.bound_dev_if = oif;
769 arg.tos = tos;
784 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
785 &arg, arg.iov[0].iov_len);
770 ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
771 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
786
787 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
788}
789
790static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
791{
792 struct inet_timewait_sock *tw = inet_twsk(sk);
793 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);

--- 26 unchanged lines hidden (view full) ---

820/*
821 * Send a SYN-ACK after having received a SYN.
822 * This still operates on a request_sock only, not on a big
823 * socket.
824 */
825static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
826 struct request_sock *req,
827 struct request_values *rvp,
772
773 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
774}
775
776static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
777{
778 struct inet_timewait_sock *tw = inet_twsk(sk);
779 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);

--- 26 unchanged lines hidden (view full) ---

806/*
807 * Send a SYN-ACK after having received a SYN.
808 * This still operates on a request_sock only, not on a big
809 * socket.
810 */
811static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
812 struct request_sock *req,
813 struct request_values *rvp,
828 u16 queue_mapping)
814 u16 queue_mapping,
815 bool nocache)
829{
830 const struct inet_request_sock *ireq = inet_rsk(req);
831 struct flowi4 fl4;
832 int err = -1;
833 struct sk_buff * skb;
834
835 /* First, grab a route. */
816{
817 const struct inet_request_sock *ireq = inet_rsk(req);
818 struct flowi4 fl4;
819 int err = -1;
820 struct sk_buff * skb;
821
822 /* First, grab a route. */
836 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
823 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL)
837 return -1;
838
839 skb = tcp_make_synack(sk, dst, req, rvp);
840
841 if (skb) {
842 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
843
844 skb_set_queue_mapping(skb, queue_mapping);
845 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
846 ireq->rmt_addr,
847 ireq->opt);
848 err = net_xmit_eval(err);
849 }
850
824 return -1;
825
826 skb = tcp_make_synack(sk, dst, req, rvp);
827
828 if (skb) {
829 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
830
831 skb_set_queue_mapping(skb, queue_mapping);
832 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
833 ireq->rmt_addr,
834 ireq->opt);
835 err = net_xmit_eval(err);
836 }
837
851 dst_release(dst);
852 return err;
853}
854
855static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
856 struct request_values *rvp)
857{
858 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
838 return err;
839}
840
841static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
842 struct request_values *rvp)
843{
844 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
859 return tcp_v4_send_synack(sk, NULL, req, rvp, 0);
845 return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
860}
861
862/*
863 * IPv4 request_sock destructor.
864 */
865static void tcp_v4_reqsk_destructor(struct request_sock *req)
866{
867 kfree(inet_rsk(req)->opt);

--- 502 unchanged lines hidden (view full) ---

1370
1371 if (!want_cookie || tmp_opt.tstamp_ok)
1372 TCP_ECN_create_request(req, skb);
1373
1374 if (want_cookie) {
1375 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1376 req->cookie_ts = tmp_opt.tstamp_ok;
1377 } else if (!isn) {
846}
847
848/*
849 * IPv4 request_sock destructor.
850 */
851static void tcp_v4_reqsk_destructor(struct request_sock *req)
852{
853 kfree(inet_rsk(req)->opt);

--- 502 unchanged lines hidden (view full) ---

1356
1357 if (!want_cookie || tmp_opt.tstamp_ok)
1358 TCP_ECN_create_request(req, skb);
1359
1360 if (want_cookie) {
1361 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1362 req->cookie_ts = tmp_opt.tstamp_ok;
1363 } else if (!isn) {
1378 struct inet_peer *peer = NULL;
1379 struct flowi4 fl4;
1380
1381 /* VJ's idea. We save last timestamp seen
1382 * from the destination in peer table, when entering
1383 * state TIME-WAIT, and check against it before
1384 * accepting new connection request.
1385 *
1386 * If "isn" is not zero, this request hit alive
1387 * timewait bucket, so that all the necessary checks
1388 * are made in the function processing timewait state.
1389 */
1390 if (tmp_opt.saw_tstamp &&
1391 tcp_death_row.sysctl_tw_recycle &&
1364 struct flowi4 fl4;
1365
1366 /* VJ's idea. We save last timestamp seen
1367 * from the destination in peer table, when entering
1368 * state TIME-WAIT, and check against it before
1369 * accepting new connection request.
1370 *
1371 * If "isn" is not zero, this request hit alive
1372 * timewait bucket, so that all the necessary checks
1373 * are made in the function processing timewait state.
1374 */
1375 if (tmp_opt.saw_tstamp &&
1376 tcp_death_row.sysctl_tw_recycle &&
1392 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1393 fl4.daddr == saddr &&
1394 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1395 inet_peer_refcheck(peer);
1396 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1397 (s32)(peer->tcp_ts - req->ts_recent) >
1398 TCP_PAWS_WINDOW) {
1377 (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL &&
1378 fl4.daddr == saddr) {
1379 if (!tcp_peer_is_proven(req, dst, true)) {
1399 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1400 goto drop_and_release;
1401 }
1402 }
1403 /* Kill the following clause, if you dislike this way. */
1404 else if (!sysctl_tcp_syncookies &&
1405 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1406 (sysctl_max_syn_backlog >> 2)) &&
1380 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1381 goto drop_and_release;
1382 }
1383 }
1384 /* Kill the following clause, if you dislike this way. */
1385 else if (!sysctl_tcp_syncookies &&
1386 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1387 (sysctl_max_syn_backlog >> 2)) &&
1407 (!peer || !peer->tcp_ts_stamp) &&
1408 (!dst || !dst_metric(dst, RTAX_RTT))) {
1388 !tcp_peer_is_proven(req, dst, false)) {
1409 /* Without syncookies last quarter of
1410 * backlog is filled with destinations,
1411 * proven to be alive.
1412 * It means that we continue to communicate
1413 * to destinations, already remembered
1414 * to the moment of synflood.
1415 */
1416 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1417 &saddr, ntohs(tcp_hdr(skb)->source));
1418 goto drop_and_release;
1419 }
1420
1421 isn = tcp_v4_init_sequence(skb);
1422 }
1423 tcp_rsk(req)->snt_isn = isn;
1424 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1425
1426 if (tcp_v4_send_synack(sk, dst, req,
1427 (struct request_values *)&tmp_ext,
1389 /* Without syncookies last quarter of
1390 * backlog is filled with destinations,
1391 * proven to be alive.
1392 * It means that we continue to communicate
1393 * to destinations, already remembered
1394 * to the moment of synflood.
1395 */
1396 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1397 &saddr, ntohs(tcp_hdr(skb)->source));
1398 goto drop_and_release;
1399 }
1400
1401 isn = tcp_v4_init_sequence(skb);
1402 }
1403 tcp_rsk(req)->snt_isn = isn;
1404 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1405
1406 if (tcp_v4_send_synack(sk, dst, req,
1407 (struct request_values *)&tmp_ext,
1428 skb_get_queue_mapping(skb)) ||
1408 skb_get_queue_mapping(skb),
1409 want_cookie) ||
1429 want_cookie)
1430 goto drop_and_free;
1431
1432 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1433 return 0;
1434
1435drop_and_release:
1436 dst_release(dst);

--- 230 unchanged lines hidden (view full) ---

1667 return 0;
1668
1669csum_err:
1670 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1671 goto discard;
1672}
1673EXPORT_SYMBOL(tcp_v4_do_rcv);
1674
1410 want_cookie)
1411 goto drop_and_free;
1412
1413 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1414 return 0;
1415
1416drop_and_release:
1417 dst_release(dst);

--- 230 unchanged lines hidden (view full) ---

1648 return 0;
1649
1650csum_err:
1651 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1652 goto discard;
1653}
1654EXPORT_SYMBOL(tcp_v4_do_rcv);
1655
1656void tcp_v4_early_demux(struct sk_buff *skb)
1657{
1658 struct net *net = dev_net(skb->dev);
1659 const struct iphdr *iph;
1660 const struct tcphdr *th;
1661 struct net_device *dev;
1662 struct sock *sk;
1663
1664 if (skb->pkt_type != PACKET_HOST)
1665 return;
1666
1667 if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
1668 return;
1669
1670 iph = ip_hdr(skb);
1671 th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
1672
1673 if (th->doff < sizeof(struct tcphdr) / 4)
1674 return;
1675
1676 if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4))
1677 return;
1678
1679 dev = skb->dev;
1680 sk = __inet_lookup_established(net, &tcp_hashinfo,
1681 iph->saddr, th->source,
1682 iph->daddr, ntohs(th->dest),
1683 dev->ifindex);
1684 if (sk) {
1685 skb->sk = sk;
1686 skb->destructor = sock_edemux;
1687 if (sk->sk_state != TCP_TIME_WAIT) {
1688 struct dst_entry *dst = sk->sk_rx_dst;
1689 if (dst)
1690 dst = dst_check(dst, 0);
1691 if (dst) {
1692 struct rtable *rt = (struct rtable *) dst;
1693
1694 if (rt->rt_iif == dev->ifindex)
1695 skb_dst_set_noref(skb, dst);
1696 }
1697 }
1698 }
1699}
1700
1675/*
1676 * From tcp_input.c
1677 */
1678
1679int tcp_v4_rcv(struct sk_buff *skb)
1680{
1681 const struct iphdr *iph;
1682 const struct tcphdr *th;

--- 133 unchanged lines hidden (view full) ---

1816 break;
1817 case TCP_TW_RST:
1818 goto no_tcp_socket;
1819 case TCP_TW_SUCCESS:;
1820 }
1821 goto discard_it;
1822}
1823
1701/*
1702 * From tcp_input.c
1703 */
1704
1705int tcp_v4_rcv(struct sk_buff *skb)
1706{
1707 const struct iphdr *iph;
1708 const struct tcphdr *th;

--- 133 unchanged lines hidden (view full) ---

1842 break;
1843 case TCP_TW_RST:
1844 goto no_tcp_socket;
1845 case TCP_TW_SUCCESS:;
1846 }
1847 goto discard_it;
1848}
1849
1824struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1825{
1826 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1827 struct inet_sock *inet = inet_sk(sk);
1828 struct inet_peer *peer;
1829
1830 if (!rt ||
1831 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1832 peer = inet_getpeer_v4(inet->inet_daddr, 1);
1833 *release_it = true;
1834 } else {
1835 if (!rt->peer)
1836 rt_bind_peer(rt, inet->inet_daddr, 1);
1837 peer = rt->peer;
1838 *release_it = false;
1839 }
1840
1841 return peer;
1842}
1843EXPORT_SYMBOL(tcp_v4_get_peer);
1844
1845void *tcp_v4_tw_get_peer(struct sock *sk)
1846{
1847 const struct inet_timewait_sock *tw = inet_twsk(sk);
1848
1849 return inet_getpeer_v4(tw->tw_daddr, 1);
1850}
1851EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1852
1853static struct timewait_sock_ops tcp_timewait_sock_ops = {
1854 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1855 .twsk_unique = tcp_twsk_unique,
1856 .twsk_destructor= tcp_twsk_destructor,
1850static struct timewait_sock_ops tcp_timewait_sock_ops = {
1851 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1852 .twsk_unique = tcp_twsk_unique,
1853 .twsk_destructor= tcp_twsk_destructor,
1857 .twsk_getpeer = tcp_v4_tw_get_peer,
1858};
1859
1860const struct inet_connection_sock_af_ops ipv4_specific = {
1861 .queue_xmit = ip_queue_xmit,
1862 .send_check = tcp_v4_send_check,
1863 .rebuild_header = inet_sk_rebuild_header,
1864 .conn_request = tcp_v4_conn_request,
1865 .syn_recv_sock = tcp_v4_syn_recv_sock,
1854};
1855
1856const struct inet_connection_sock_af_ops ipv4_specific = {
1857 .queue_xmit = ip_queue_xmit,
1858 .send_check = tcp_v4_send_check,
1859 .rebuild_header = inet_sk_rebuild_header,
1860 .conn_request = tcp_v4_conn_request,
1861 .syn_recv_sock = tcp_v4_syn_recv_sock,
1866 .get_peer = tcp_v4_get_peer,
1867 .net_header_len = sizeof(struct iphdr),
1868 .setsockopt = ip_setsockopt,
1869 .getsockopt = ip_getsockopt,
1870 .addr2sockaddr = inet_csk_addr2sockaddr,
1871 .sockaddr_len = sizeof(struct sockaddr_in),
1872 .bind_conflict = inet_csk_bind_conflict,
1873#ifdef CONFIG_COMPAT
1874 .compat_setsockopt = compat_ip_setsockopt,

--- 713 unchanged lines hidden (view full) ---

2588 .destroy = tcp_v4_destroy_sock,
2589 .shutdown = tcp_shutdown,
2590 .setsockopt = tcp_setsockopt,
2591 .getsockopt = tcp_getsockopt,
2592 .recvmsg = tcp_recvmsg,
2593 .sendmsg = tcp_sendmsg,
2594 .sendpage = tcp_sendpage,
2595 .backlog_rcv = tcp_v4_do_rcv,
1862 .net_header_len = sizeof(struct iphdr),
1863 .setsockopt = ip_setsockopt,
1864 .getsockopt = ip_getsockopt,
1865 .addr2sockaddr = inet_csk_addr2sockaddr,
1866 .sockaddr_len = sizeof(struct sockaddr_in),
1867 .bind_conflict = inet_csk_bind_conflict,
1868#ifdef CONFIG_COMPAT
1869 .compat_setsockopt = compat_ip_setsockopt,

--- 713 unchanged lines hidden (view full) ---

2583 .destroy = tcp_v4_destroy_sock,
2584 .shutdown = tcp_shutdown,
2585 .setsockopt = tcp_setsockopt,
2586 .getsockopt = tcp_getsockopt,
2587 .recvmsg = tcp_recvmsg,
2588 .sendmsg = tcp_sendmsg,
2589 .sendpage = tcp_sendpage,
2590 .backlog_rcv = tcp_v4_do_rcv,
2591 .release_cb = tcp_release_cb,
2596 .hash = inet_hash,
2597 .unhash = inet_unhash,
2598 .get_port = inet_csk_get_port,
2599 .enter_memory_pressure = tcp_enter_memory_pressure,
2600 .sockets_allocated = &tcp_sockets_allocated,
2601 .orphan_count = &tcp_orphan_count,
2602 .memory_allocated = &tcp_memory_allocated,
2603 .memory_pressure = &tcp_memory_pressure,

--- 49 unchanged lines hidden ---
2592 .hash = inet_hash,
2593 .unhash = inet_unhash,
2594 .get_port = inet_csk_get_port,
2595 .enter_memory_pressure = tcp_enter_memory_pressure,
2596 .sockets_allocated = &tcp_sockets_allocated,
2597 .orphan_count = &tcp_orphan_count,
2598 .memory_allocated = &tcp_memory_allocated,
2599 .memory_pressure = &tcp_memory_pressure,

--- 49 unchanged lines hidden ---