ip_output.c (888dc273ea4e7ca332a6f73d10dfc8f2b212c803) | ip_output.c (b7034146756b9e91cc059b19df7fe4defd4d7de7) |
---|---|
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * The Internet Protocol (IP) output module. 8 * --- 273 unchanged lines hidden (view full) --- 282 if (err && ret == 0) 283 ret = err; 284 segs = nskb; 285 } while (segs); 286 287 return ret; 288} 289 | 1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * The Internet Protocol (IP) output module. 8 * --- 273 unchanged lines hidden (view full) --- 282 if (err && ret == 0) 283 ret = err; 284 segs = nskb; 285 } while (segs); 286 287 return ret; 288} 289 |
290static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) | 290static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) |
291{ 292 unsigned int mtu; | 291{ 292 unsigned int mtu; |
293 int ret; | |
294 | 293 |
295 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 296 if (ret) { 297 kfree_skb(skb); 298 return ret; 299 } 300 | |
301#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 302 /* Policy lookup after SNAT yielded a new policy */ 303 if (skb_dst(skb)->xfrm) { 304 IPCB(skb)->flags |= IPSKB_REROUTED; 305 return dst_output(net, sk, skb); 306 } 307#endif 308 mtu = ip_skb_dst_mtu(sk, skb); 309 if (skb_is_gso(skb)) 310 return ip_finish_output_gso(net, sk, skb, mtu); 311 312 if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU)) 313 return ip_fragment(net, sk, skb, mtu, ip_finish_output2); 314 315 return ip_finish_output2(net, sk, skb); 316} 317 | 294#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 295 /* Policy lookup after SNAT yielded a new policy */ 296 if (skb_dst(skb)->xfrm) { 297 IPCB(skb)->flags |= IPSKB_REROUTED; 298 return dst_output(net, sk, skb); 299 } 300#endif 301 mtu = ip_skb_dst_mtu(sk, skb); 302 if (skb_is_gso(skb)) 303 return ip_finish_output_gso(net, sk, skb, mtu); 304 305 if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU)) 306 return ip_fragment(net, sk, skb, mtu, ip_finish_output2); 307 308 return ip_finish_output2(net, sk, skb); 309} 310 |
311static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 312{ 313 int ret; 314 315 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 316 switch (ret) { 317 case NET_XMIT_SUCCESS: 318 return __ip_finish_output(net, sk, skb); 319 case NET_XMIT_CN: 320 return __ip_finish_output(net, sk, skb) ? : ret; 321 default: 322 kfree_skb(skb); 323 return ret; 324 } 325} 326 |
|
318static int ip_mc_finish_output(struct net *net, struct sock *sk, 319 struct sk_buff *skb) 320{ 321 int ret; 322 323 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); | 327static int ip_mc_finish_output(struct net *net, struct sock *sk, 328 struct sk_buff *skb) 329{ 330 int ret; 331 332 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); |
324 if (ret) { | 333 switch (ret) { 334 case NET_XMIT_SUCCESS: 335 return dev_loopback_xmit(net, sk, skb); 336 case NET_XMIT_CN: 337 return dev_loopback_xmit(net, sk, skb) ? : ret; 338 default: |
325 kfree_skb(skb); 326 return ret; 327 } | 339 kfree_skb(skb); 340 return ret; 341 } |
328 329 return dev_loopback_xmit(net, sk, skb); | |
330} 331 332int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) 333{ 334 struct rtable *rt = skb_rtable(skb); 335 struct net_device *dev = rt->dst.dev; 336 337 /* --- 182 unchanged lines hidden (view full) --- 520 to->skb_iif = from->skb_iif; 521 skb_dst_drop(to); 522 skb_dst_copy(to, from); 523 to->dev = from->dev; 524 to->mark = from->mark; 525 526 skb_copy_hash(to, from); 527 | 342} 343 344int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) 345{ 346 struct rtable *rt = skb_rtable(skb); 347 struct net_device *dev = rt->dst.dev; 348 349 /* --- 182 unchanged lines hidden (view full) --- 532 to->skb_iif = from->skb_iif; 533 skb_dst_drop(to); 534 skb_dst_copy(to, from); 535 to->dev = from->dev; 536 to->mark = from->mark; 537 538 skb_copy_hash(to, from); 539 |
528 /* Copy the flags to each fragment. */ 529 IPCB(to)->flags = IPCB(from)->flags; 530 | |
531#ifdef CONFIG_NET_SCHED 532 to->tc_index = from->tc_index; 533#endif 534 nf_copy(to, from); 535 skb_ext_copy(to, from); 536#if IS_ENABLED(CONFIG_IP_VS) 537 to->ipvs_property = from->ipvs_property; 538#endif --- 17 unchanged lines hidden (view full) --- 556 htonl(mtu)); 557 kfree_skb(skb); 558 return -EMSGSIZE; 559 } 560 561 return ip_do_fragment(net, sk, skb, output); 562} 563 | 540#ifdef CONFIG_NET_SCHED 541 to->tc_index = from->tc_index; 542#endif 543 nf_copy(to, from); 544 skb_ext_copy(to, from); 545#if IS_ENABLED(CONFIG_IP_VS) 546 to->ipvs_property = from->ipvs_property; 547#endif --- 17 unchanged lines hidden (view full) --- 565 htonl(mtu)); 566 kfree_skb(skb); 567 return -EMSGSIZE; 568 } 569 570 return ip_do_fragment(net, sk, skb, output); 571} 572 |
573void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph, 574 unsigned int hlen, struct ip_fraglist_iter *iter) 575{ 576 unsigned int first_len = skb_pagelen(skb); 577 578 iter->frag = skb_shinfo(skb)->frag_list; 579 skb_frag_list_init(skb); 580 581 iter->offset = 0; 582 iter->iph = iph; 583 iter->hlen = hlen; 584 585 skb->data_len = first_len - skb_headlen(skb); 586 skb->len = first_len; 587 iph->tot_len = htons(first_len); 588 iph->frag_off = htons(IP_MF); 589 ip_send_check(iph); 590} 591EXPORT_SYMBOL(ip_fraglist_init); 592 593static void ip_fraglist_ipcb_prepare(struct sk_buff *skb, 594 struct ip_fraglist_iter *iter) 595{ 596 struct sk_buff *to = iter->frag; 597 598 /* Copy the flags to each fragment. */ 599 IPCB(to)->flags = IPCB(skb)->flags; 600 601 if (iter->offset == 0) 602 ip_options_fragment(to); 603} 604 605void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter) 606{ 607 unsigned int hlen = iter->hlen; 608 struct iphdr *iph = iter->iph; 609 struct sk_buff *frag; 610 611 frag = iter->frag; 612 frag->ip_summed = CHECKSUM_NONE; 613 skb_reset_transport_header(frag); 614 __skb_push(frag, hlen); 615 skb_reset_network_header(frag); 616 memcpy(skb_network_header(frag), iph, hlen); 617 iter->iph = ip_hdr(frag); 618 iph = iter->iph; 619 iph->tot_len = htons(frag->len); 620 ip_copy_metadata(frag, skb); 621 iter->offset += skb->len - hlen; 622 iph->frag_off = htons(iter->offset >> 3); 623 if (frag->next) 624 iph->frag_off |= htons(IP_MF); 625 /* Ready, complete checksum */ 626 ip_send_check(iph); 627} 628EXPORT_SYMBOL(ip_fraglist_prepare); 629 630void ip_frag_init(struct sk_buff *skb, unsigned int hlen, 631 unsigned int ll_rs, unsigned int mtu, 632 struct ip_frag_state *state) 633{ 634 struct iphdr *iph = ip_hdr(skb); 635 636 state->hlen = hlen; 637 state->ll_rs = ll_rs; 638 state->mtu = mtu; 639 640 state->left = skb->len - hlen; /* Space per frame */ 641 state->ptr = hlen; /* Where to start from */ 642 643 state->offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; 644 state->not_last_frag = iph->frag_off & htons(IP_MF); 645} 646EXPORT_SYMBOL(ip_frag_init); 647 648static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to, 649 bool first_frag, struct ip_frag_state *state) 650{ 651 /* Copy the flags to each fragment. */ 652 IPCB(to)->flags = IPCB(from)->flags; 653 654 if (IPCB(from)->flags & IPSKB_FRAG_PMTU) 655 state->iph->frag_off |= htons(IP_DF); 656 657 /* ANK: dirty, but effective trick. Upgrade options only if 658 * the segment to be fragmented was THE FIRST (otherwise, 659 * options are already fixed) and make it ONCE 660 * on the initial skb, so that all the following fragments 661 * will inherit fixed options. 662 */ 663 if (first_frag) 664 ip_options_fragment(from); 665} 666 667struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state) 668{ 669 unsigned int len = state->left; 670 struct sk_buff *skb2; 671 struct iphdr *iph; 672 673 len = state->left; 674 /* IF: it doesn't fit, use 'mtu' - the data space left */ 675 if (len > state->mtu) 676 len = state->mtu; 677 /* IF: we are not sending up to and including the packet end 678 then align the next start on an eight byte boundary */ 679 if (len < state->left) { 680 len &= ~7; 681 } 682 683 /* Allocate buffer */ 684 skb2 = alloc_skb(len + state->hlen + state->ll_rs, GFP_ATOMIC); 685 if (!skb2) 686 return ERR_PTR(-ENOMEM); 687 688 /* 689 * Set up data on packet 690 */ 691 692 ip_copy_metadata(skb2, skb); 693 skb_reserve(skb2, state->ll_rs); 694 skb_put(skb2, len + state->hlen); 695 skb_reset_network_header(skb2); 696 skb2->transport_header = skb2->network_header + state->hlen; 697 698 /* 699 * Charge the memory for the fragment to any owner 700 * it might possess 701 */ 702 703 if (skb->sk) 704 skb_set_owner_w(skb2, skb->sk); 705 706 /* 707 * Copy the packet header into the new buffer. 708 */ 709 710 skb_copy_from_linear_data(skb, skb_network_header(skb2), state->hlen); 711 712 /* 713 * Copy a block of the IP datagram. 714 */ 715 if (skb_copy_bits(skb, state->ptr, skb_transport_header(skb2), len)) 716 BUG(); 717 state->left -= len; 718 719 /* 720 * Fill in the new header fields. 721 */ 722 iph = ip_hdr(skb2); 723 iph->frag_off = htons((state->offset >> 3)); 724 725 /* 726 * Added AC : If we are fragmenting a fragment that's not the 727 * last fragment then keep MF on each bit 728 */ 729 if (state->left > 0 || state->not_last_frag) 730 iph->frag_off |= htons(IP_MF); 731 state->ptr += len; 732 state->offset += len; 733 734 iph->tot_len = htons(len + state->hlen); 735 736 ip_send_check(iph); 737 738 return skb2; 739} 740EXPORT_SYMBOL(ip_frag_next); 741 |
|
564/* 565 * This IP datagram is too large to be sent in one piece. Break it up into 566 * smaller pieces (each of size equal to IP header plus 567 * a block of the data of the original IP data part) that will yet fit in a 568 * single device frame, and queue such a frame for sending. 569 */ 570 571int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 572 int (*output)(struct net *, struct sock *, struct sk_buff *)) 573{ 574 struct iphdr *iph; | 742/* 743 * This IP datagram is too large to be sent in one piece. Break it up into 744 * smaller pieces (each of size equal to IP header plus 745 * a block of the data of the original IP data part) that will yet fit in a 746 * single device frame, and queue such a frame for sending. 747 */ 748 749int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 750 int (*output)(struct net *, struct sock *, struct sk_buff *)) 751{ 752 struct iphdr *iph; |
575 int ptr; | |
576 struct sk_buff *skb2; | 753 struct sk_buff *skb2; |
577 unsigned int mtu, hlen, left, len, ll_rs; 578 int offset; 579 __be16 not_last_frag; | |
580 struct rtable *rt = skb_rtable(skb); | 754 struct rtable *rt = skb_rtable(skb); |
755 unsigned int mtu, hlen, ll_rs; 756 struct ip_fraglist_iter iter; 757 struct ip_frag_state state; |
|
581 int err = 0; 582 583 /* for offloaded checksums cleanup checksum before fragmentation */ 584 if (skb->ip_summed == CHECKSUM_PARTIAL && 585 (err = skb_checksum_help(skb))) 586 goto fail; 587 588 /* --- 48 unchanged lines hidden (view full) --- 637 if (skb->sk) { 638 frag->sk = skb->sk; 639 frag->destructor = sock_wfree; 640 } 641 skb->truesize -= frag->truesize; 642 } 643 644 /* Everything is OK. Generate! */ | 758 int err = 0; 759 760 /* for offloaded checksums cleanup checksum before fragmentation */ 761 if (skb->ip_summed == CHECKSUM_PARTIAL && 762 (err = skb_checksum_help(skb))) 763 goto fail; 764 765 /* --- 48 unchanged lines hidden (view full) --- 814 if (skb->sk) { 815 frag->sk = skb->sk; 816 frag->destructor = sock_wfree; 817 } 818 skb->truesize -= frag->truesize; 819 } 820 821 /* Everything is OK. Generate! */ |
822 ip_fraglist_init(skb, iph, hlen, &iter); |
|
645 | 823 |
646 err = 0; 647 offset = 0; 648 frag = skb_shinfo(skb)->frag_list; 649 skb_frag_list_init(skb); 650 skb->data_len = first_len - skb_headlen(skb); 651 skb->len = first_len; 652 iph->tot_len = htons(first_len); 653 iph->frag_off = htons(IP_MF); 654 ip_send_check(iph); 655 | |
656 for (;;) { 657 /* Prepare header of the next frame, 658 * before previous one went down. */ | 824 for (;;) { 825 /* Prepare header of the next frame, 826 * before previous one went down. */ |
659 if (frag) { 660 frag->ip_summed = CHECKSUM_NONE; 661 skb_reset_transport_header(frag); 662 __skb_push(frag, hlen); 663 skb_reset_network_header(frag); 664 memcpy(skb_network_header(frag), iph, hlen); 665 iph = ip_hdr(frag); 666 iph->tot_len = htons(frag->len); 667 ip_copy_metadata(frag, skb); 668 if (offset == 0) 669 ip_options_fragment(frag); 670 offset += skb->len - hlen; 671 iph->frag_off = htons(offset>>3); 672 if (frag->next) 673 iph->frag_off |= htons(IP_MF); 674 /* Ready, complete checksum */ 675 ip_send_check(iph); | 827 if (iter.frag) { 828 ip_fraglist_ipcb_prepare(skb, &iter); 829 ip_fraglist_prepare(skb, &iter); |
676 } 677 678 err = output(net, sk, skb); 679 680 if (!err) 681 IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); | 830 } 831 832 err = output(net, sk, skb); 833 834 if (!err) 835 IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); |
682 if (err || !frag) | 836 if (err || !iter.frag) |
683 break; 684 | 837 break; 838 |
685 skb = frag; 686 frag = skb->next; 687 skb_mark_not_on_list(skb); | 839 skb = ip_fraglist_next(&iter); |
688 } 689 690 if (err == 0) { 691 IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS); 692 return 0; 693 } 694 | 840 } 841 842 if (err == 0) { 843 IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS); 844 return 0; 845 } 846 |
695 kfree_skb_list(frag); | 847 kfree_skb_list(iter.frag); |
696 697 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 698 return err; 699 700slow_path_clean: 701 skb_walk_frags(skb, frag2) { 702 if (frag2 == frag) 703 break; 704 frag2->sk = NULL; 705 frag2->destructor = NULL; 706 skb->truesize += frag2->truesize; 707 } 708 } 709 710slow_path: | 848 849 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 850 return err; 851 852slow_path_clean: 853 skb_walk_frags(skb, frag2) { 854 if (frag2 == frag) 855 break; 856 frag2->sk = NULL; 857 frag2->destructor = NULL; 858 skb->truesize += frag2->truesize; 859 } 860 } 861 862slow_path: |
711 iph = ip_hdr(skb); 712 713 left = skb->len - hlen; /* Space per frame */ 714 ptr = hlen; /* Where to start from */ 715 | |
716 /* 717 * Fragment the datagram. 718 */ 719 | 863 /* 864 * Fragment the datagram. 865 */ 866 |
720 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; 721 not_last_frag = iph->frag_off & htons(IP_MF); | 867 ip_frag_init(skb, hlen, ll_rs, mtu, &state); |
722 723 /* 724 * Keep copying data until we run out. 725 */ 726 | 868 869 /* 870 * Keep copying data until we run out. 871 */ 872 |
727 while (left > 0) { 728 len = left; 729 /* IF: it doesn't fit, use 'mtu' - the data space left */ 730 if (len > mtu) 731 len = mtu; 732 /* IF: we are not sending up to and including the packet end 733 then align the next start on an eight byte boundary */ 734 if (len < left) { 735 len &= ~7; 736 } | 873 while (state.left > 0) { 874 bool first_frag = (state.offset == 0); |
737 | 875 |
738 /* Allocate buffer */ 739 skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC); 740 if (!skb2) { 741 err = -ENOMEM; | 876 skb2 = ip_frag_next(skb, &state); 877 if (IS_ERR(skb2)) { 878 err = PTR_ERR(skb2); |
742 goto fail; 743 } | 879 goto fail; 880 } |
881 ip_frag_ipcb(skb, skb2, first_frag, &state); |
|
744 745 /* | 882 883 /* |
746 * Set up data on packet 747 */ 748 749 ip_copy_metadata(skb2, skb); 750 skb_reserve(skb2, ll_rs); 751 skb_put(skb2, len + hlen); 752 skb_reset_network_header(skb2); 753 skb2->transport_header = skb2->network_header + hlen; 754 755 /* 756 * Charge the memory for the fragment to any owner 757 * it might possess 758 */ 759 760 if (skb->sk) 761 skb_set_owner_w(skb2, skb->sk); 762 763 /* 764 * Copy the packet header into the new buffer. 765 */ 766 767 skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen); 768 769 /* 770 * Copy a block of the IP datagram. 771 */ 772 if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len)) 773 BUG(); 774 left -= len; 775 776 /* 777 * Fill in the new header fields. 778 */ 779 iph = ip_hdr(skb2); 780 iph->frag_off = htons((offset >> 3)); 781 782 if (IPCB(skb)->flags & IPSKB_FRAG_PMTU) 783 iph->frag_off |= htons(IP_DF); 784 785 /* ANK: dirty, but effective trick. Upgrade options only if 786 * the segment to be fragmented was THE FIRST (otherwise, 787 * options are already fixed) and make it ONCE 788 * on the initial skb, so that all the following fragments 789 * will inherit fixed options. 790 */ 791 if (offset == 0) 792 ip_options_fragment(skb); 793 794 /* 795 * Added AC : If we are fragmenting a fragment that's not the 796 * last fragment then keep MF on each bit 797 */ 798 if (left > 0 || not_last_frag) 799 iph->frag_off |= htons(IP_MF); 800 ptr += len; 801 offset += len; 802 803 /* | |
804 * Put this fragment into the sending queue. 805 */ | 884 * Put this fragment into the sending queue. 885 */ |
806 iph->tot_len = htons(len + hlen); 807 808 ip_send_check(iph); 809 | |
810 err = output(net, sk, skb2); 811 if (err) 812 goto fail; 813 814 IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); 815 } 816 consume_skb(skb); 817 IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS); --- 818 unchanged lines hidden --- | 886 err = output(net, sk, skb2); 887 if (err) 888 goto fail; 889 890 IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); 891 } 892 consume_skb(skb); 893 IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS); --- 818 unchanged lines hidden --- |