ip_output.c (888dc273ea4e7ca332a6f73d10dfc8f2b212c803) ip_output.c (b7034146756b9e91cc059b19df7fe4defd4d7de7)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * The Internet Protocol (IP) output module.
8 *

--- 273 unchanged lines hidden (view full) ---

282 if (err && ret == 0)
283 ret = err;
284 segs = nskb;
285 } while (segs);
286
287 return ret;
288}
289
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * The Internet Protocol (IP) output module.
8 *

--- 273 unchanged lines hidden (view full) ---

282 if (err && ret == 0)
283 ret = err;
284 segs = nskb;
285 } while (segs);
286
287 return ret;
288}
289
290static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
290static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
291{
292 unsigned int mtu;
291{
292 unsigned int mtu;
293 int ret;
294
293
295 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
296 if (ret) {
297 kfree_skb(skb);
298 return ret;
299 }
300
301#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
302 /* Policy lookup after SNAT yielded a new policy */
303 if (skb_dst(skb)->xfrm) {
304 IPCB(skb)->flags |= IPSKB_REROUTED;
305 return dst_output(net, sk, skb);
306 }
307#endif
308 mtu = ip_skb_dst_mtu(sk, skb);
309 if (skb_is_gso(skb))
310 return ip_finish_output_gso(net, sk, skb, mtu);
311
312 if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
313 return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
314
315 return ip_finish_output2(net, sk, skb);
316}
317
294#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
295 /* Policy lookup after SNAT yielded a new policy */
296 if (skb_dst(skb)->xfrm) {
297 IPCB(skb)->flags |= IPSKB_REROUTED;
298 return dst_output(net, sk, skb);
299 }
300#endif
301 mtu = ip_skb_dst_mtu(sk, skb);
302 if (skb_is_gso(skb))
303 return ip_finish_output_gso(net, sk, skb, mtu);
304
305 if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
306 return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
307
308 return ip_finish_output2(net, sk, skb);
309}
310
311static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
312{
313 int ret;
314
315 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
316 switch (ret) {
317 case NET_XMIT_SUCCESS:
318 return __ip_finish_output(net, sk, skb);
319 case NET_XMIT_CN:
320 return __ip_finish_output(net, sk, skb) ? : ret;
321 default:
322 kfree_skb(skb);
323 return ret;
324 }
325}
326
318static int ip_mc_finish_output(struct net *net, struct sock *sk,
319 struct sk_buff *skb)
320{
321 int ret;
322
323 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
327static int ip_mc_finish_output(struct net *net, struct sock *sk,
328 struct sk_buff *skb)
329{
330 int ret;
331
332 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
324 if (ret) {
333 switch (ret) {
334 case NET_XMIT_SUCCESS:
335 return dev_loopback_xmit(net, sk, skb);
336 case NET_XMIT_CN:
337 return dev_loopback_xmit(net, sk, skb) ? : ret;
338 default:
325 kfree_skb(skb);
326 return ret;
327 }
339 kfree_skb(skb);
340 return ret;
341 }
328
329 return dev_loopback_xmit(net, sk, skb);
330}
331
332int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
333{
334 struct rtable *rt = skb_rtable(skb);
335 struct net_device *dev = rt->dst.dev;
336
337 /*

--- 182 unchanged lines hidden (view full) ---

520 to->skb_iif = from->skb_iif;
521 skb_dst_drop(to);
522 skb_dst_copy(to, from);
523 to->dev = from->dev;
524 to->mark = from->mark;
525
526 skb_copy_hash(to, from);
527
342}
343
344int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
345{
346 struct rtable *rt = skb_rtable(skb);
347 struct net_device *dev = rt->dst.dev;
348
349 /*

--- 182 unchanged lines hidden (view full) ---

532 to->skb_iif = from->skb_iif;
533 skb_dst_drop(to);
534 skb_dst_copy(to, from);
535 to->dev = from->dev;
536 to->mark = from->mark;
537
538 skb_copy_hash(to, from);
539
528 /* Copy the flags to each fragment. */
529 IPCB(to)->flags = IPCB(from)->flags;
530
531#ifdef CONFIG_NET_SCHED
532 to->tc_index = from->tc_index;
533#endif
534 nf_copy(to, from);
535 skb_ext_copy(to, from);
536#if IS_ENABLED(CONFIG_IP_VS)
537 to->ipvs_property = from->ipvs_property;
538#endif

--- 17 unchanged lines hidden (view full) ---

556 htonl(mtu));
557 kfree_skb(skb);
558 return -EMSGSIZE;
559 }
560
561 return ip_do_fragment(net, sk, skb, output);
562}
563
540#ifdef CONFIG_NET_SCHED
541 to->tc_index = from->tc_index;
542#endif
543 nf_copy(to, from);
544 skb_ext_copy(to, from);
545#if IS_ENABLED(CONFIG_IP_VS)
546 to->ipvs_property = from->ipvs_property;
547#endif

--- 17 unchanged lines hidden (view full) ---

565 htonl(mtu));
566 kfree_skb(skb);
567 return -EMSGSIZE;
568 }
569
570 return ip_do_fragment(net, sk, skb, output);
571}
572
573void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
574 unsigned int hlen, struct ip_fraglist_iter *iter)
575{
576 unsigned int first_len = skb_pagelen(skb);
577
578 iter->frag = skb_shinfo(skb)->frag_list;
579 skb_frag_list_init(skb);
580
581 iter->offset = 0;
582 iter->iph = iph;
583 iter->hlen = hlen;
584
585 skb->data_len = first_len - skb_headlen(skb);
586 skb->len = first_len;
587 iph->tot_len = htons(first_len);
588 iph->frag_off = htons(IP_MF);
589 ip_send_check(iph);
590}
591EXPORT_SYMBOL(ip_fraglist_init);
592
593static void ip_fraglist_ipcb_prepare(struct sk_buff *skb,
594 struct ip_fraglist_iter *iter)
595{
596 struct sk_buff *to = iter->frag;
597
598 /* Copy the flags to each fragment. */
599 IPCB(to)->flags = IPCB(skb)->flags;
600
601 if (iter->offset == 0)
602 ip_options_fragment(to);
603}
604
605void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter)
606{
607 unsigned int hlen = iter->hlen;
608 struct iphdr *iph = iter->iph;
609 struct sk_buff *frag;
610
611 frag = iter->frag;
612 frag->ip_summed = CHECKSUM_NONE;
613 skb_reset_transport_header(frag);
614 __skb_push(frag, hlen);
615 skb_reset_network_header(frag);
616 memcpy(skb_network_header(frag), iph, hlen);
617 iter->iph = ip_hdr(frag);
618 iph = iter->iph;
619 iph->tot_len = htons(frag->len);
620 ip_copy_metadata(frag, skb);
621 iter->offset += skb->len - hlen;
622 iph->frag_off = htons(iter->offset >> 3);
623 if (frag->next)
624 iph->frag_off |= htons(IP_MF);
625 /* Ready, complete checksum */
626 ip_send_check(iph);
627}
628EXPORT_SYMBOL(ip_fraglist_prepare);
629
630void ip_frag_init(struct sk_buff *skb, unsigned int hlen,
631 unsigned int ll_rs, unsigned int mtu,
632 struct ip_frag_state *state)
633{
634 struct iphdr *iph = ip_hdr(skb);
635
636 state->hlen = hlen;
637 state->ll_rs = ll_rs;
638 state->mtu = mtu;
639
640 state->left = skb->len - hlen; /* Space per frame */
641 state->ptr = hlen; /* Where to start from */
642
643 state->offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
644 state->not_last_frag = iph->frag_off & htons(IP_MF);
645}
646EXPORT_SYMBOL(ip_frag_init);
647
648static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to,
649 bool first_frag, struct ip_frag_state *state)
650{
651 /* Copy the flags to each fragment. */
652 IPCB(to)->flags = IPCB(from)->flags;
653
654 if (IPCB(from)->flags & IPSKB_FRAG_PMTU)
655 state->iph->frag_off |= htons(IP_DF);
656
657 /* ANK: dirty, but effective trick. Upgrade options only if
658 * the segment to be fragmented was THE FIRST (otherwise,
659 * options are already fixed) and make it ONCE
660 * on the initial skb, so that all the following fragments
661 * will inherit fixed options.
662 */
663 if (first_frag)
664 ip_options_fragment(from);
665}
666
667struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state)
668{
669 unsigned int len = state->left;
670 struct sk_buff *skb2;
671 struct iphdr *iph;
672
673 len = state->left;
674 /* IF: it doesn't fit, use 'mtu' - the data space left */
675 if (len > state->mtu)
676 len = state->mtu;
677 /* IF: we are not sending up to and including the packet end
678 then align the next start on an eight byte boundary */
679 if (len < state->left) {
680 len &= ~7;
681 }
682
683 /* Allocate buffer */
684 skb2 = alloc_skb(len + state->hlen + state->ll_rs, GFP_ATOMIC);
685 if (!skb2)
686 return ERR_PTR(-ENOMEM);
687
688 /*
689 * Set up data on packet
690 */
691
692 ip_copy_metadata(skb2, skb);
693 skb_reserve(skb2, state->ll_rs);
694 skb_put(skb2, len + state->hlen);
695 skb_reset_network_header(skb2);
696 skb2->transport_header = skb2->network_header + state->hlen;
697
698 /*
699 * Charge the memory for the fragment to any owner
700 * it might possess
701 */
702
703 if (skb->sk)
704 skb_set_owner_w(skb2, skb->sk);
705
706 /*
707 * Copy the packet header into the new buffer.
708 */
709
710 skb_copy_from_linear_data(skb, skb_network_header(skb2), state->hlen);
711
712 /*
713 * Copy a block of the IP datagram.
714 */
715 if (skb_copy_bits(skb, state->ptr, skb_transport_header(skb2), len))
716 BUG();
717 state->left -= len;
718
719 /*
720 * Fill in the new header fields.
721 */
722 iph = ip_hdr(skb2);
723 iph->frag_off = htons((state->offset >> 3));
724
725 /*
726 * Added AC : If we are fragmenting a fragment that's not the
727 * last fragment then keep MF on each bit
728 */
729 if (state->left > 0 || state->not_last_frag)
730 iph->frag_off |= htons(IP_MF);
731 state->ptr += len;
732 state->offset += len;
733
734 iph->tot_len = htons(len + state->hlen);
735
736 ip_send_check(iph);
737
738 return skb2;
739}
740EXPORT_SYMBOL(ip_frag_next);
741
564/*
565 * This IP datagram is too large to be sent in one piece. Break it up into
566 * smaller pieces (each of size equal to IP header plus
567 * a block of the data of the original IP data part) that will yet fit in a
568 * single device frame, and queue such a frame for sending.
569 */
570
571int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
572 int (*output)(struct net *, struct sock *, struct sk_buff *))
573{
574 struct iphdr *iph;
742/*
743 * This IP datagram is too large to be sent in one piece. Break it up into
744 * smaller pieces (each of size equal to IP header plus
745 * a block of the data of the original IP data part) that will yet fit in a
746 * single device frame, and queue such a frame for sending.
747 */
748
749int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
750 int (*output)(struct net *, struct sock *, struct sk_buff *))
751{
752 struct iphdr *iph;
575 int ptr;
576 struct sk_buff *skb2;
753 struct sk_buff *skb2;
577 unsigned int mtu, hlen, left, len, ll_rs;
578 int offset;
579 __be16 not_last_frag;
580 struct rtable *rt = skb_rtable(skb);
754 struct rtable *rt = skb_rtable(skb);
755 unsigned int mtu, hlen, ll_rs;
756 struct ip_fraglist_iter iter;
757 struct ip_frag_state state;
581 int err = 0;
582
583 /* for offloaded checksums cleanup checksum before fragmentation */
584 if (skb->ip_summed == CHECKSUM_PARTIAL &&
585 (err = skb_checksum_help(skb)))
586 goto fail;
587
588 /*

--- 48 unchanged lines hidden (view full) ---

637 if (skb->sk) {
638 frag->sk = skb->sk;
639 frag->destructor = sock_wfree;
640 }
641 skb->truesize -= frag->truesize;
642 }
643
644 /* Everything is OK. Generate! */
758 int err = 0;
759
760 /* for offloaded checksums cleanup checksum before fragmentation */
761 if (skb->ip_summed == CHECKSUM_PARTIAL &&
762 (err = skb_checksum_help(skb)))
763 goto fail;
764
765 /*

--- 48 unchanged lines hidden (view full) ---

814 if (skb->sk) {
815 frag->sk = skb->sk;
816 frag->destructor = sock_wfree;
817 }
818 skb->truesize -= frag->truesize;
819 }
820
821 /* Everything is OK. Generate! */
822 ip_fraglist_init(skb, iph, hlen, &iter);
645
823
646 err = 0;
647 offset = 0;
648 frag = skb_shinfo(skb)->frag_list;
649 skb_frag_list_init(skb);
650 skb->data_len = first_len - skb_headlen(skb);
651 skb->len = first_len;
652 iph->tot_len = htons(first_len);
653 iph->frag_off = htons(IP_MF);
654 ip_send_check(iph);
655
656 for (;;) {
657 /* Prepare header of the next frame,
658 * before previous one went down. */
824 for (;;) {
825 /* Prepare header of the next frame,
826 * before previous one went down. */
659 if (frag) {
660 frag->ip_summed = CHECKSUM_NONE;
661 skb_reset_transport_header(frag);
662 __skb_push(frag, hlen);
663 skb_reset_network_header(frag);
664 memcpy(skb_network_header(frag), iph, hlen);
665 iph = ip_hdr(frag);
666 iph->tot_len = htons(frag->len);
667 ip_copy_metadata(frag, skb);
668 if (offset == 0)
669 ip_options_fragment(frag);
670 offset += skb->len - hlen;
671 iph->frag_off = htons(offset>>3);
672 if (frag->next)
673 iph->frag_off |= htons(IP_MF);
674 /* Ready, complete checksum */
675 ip_send_check(iph);
827 if (iter.frag) {
828 ip_fraglist_ipcb_prepare(skb, &iter);
829 ip_fraglist_prepare(skb, &iter);
676 }
677
678 err = output(net, sk, skb);
679
680 if (!err)
681 IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
830 }
831
832 err = output(net, sk, skb);
833
834 if (!err)
835 IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
682 if (err || !frag)
836 if (err || !iter.frag)
683 break;
684
837 break;
838
685 skb = frag;
686 frag = skb->next;
687 skb_mark_not_on_list(skb);
839 skb = ip_fraglist_next(&iter);
688 }
689
690 if (err == 0) {
691 IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
692 return 0;
693 }
694
840 }
841
842 if (err == 0) {
843 IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
844 return 0;
845 }
846
695 kfree_skb_list(frag);
847 kfree_skb_list(iter.frag);
696
697 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
698 return err;
699
700slow_path_clean:
701 skb_walk_frags(skb, frag2) {
702 if (frag2 == frag)
703 break;
704 frag2->sk = NULL;
705 frag2->destructor = NULL;
706 skb->truesize += frag2->truesize;
707 }
708 }
709
710slow_path:
848
849 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
850 return err;
851
852slow_path_clean:
853 skb_walk_frags(skb, frag2) {
854 if (frag2 == frag)
855 break;
856 frag2->sk = NULL;
857 frag2->destructor = NULL;
858 skb->truesize += frag2->truesize;
859 }
860 }
861
862slow_path:
711 iph = ip_hdr(skb);
712
713 left = skb->len - hlen; /* Space per frame */
714 ptr = hlen; /* Where to start from */
715
716 /*
717 * Fragment the datagram.
718 */
719
863 /*
864 * Fragment the datagram.
865 */
866
720 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
721 not_last_frag = iph->frag_off & htons(IP_MF);
867 ip_frag_init(skb, hlen, ll_rs, mtu, &state);
722
723 /*
724 * Keep copying data until we run out.
725 */
726
868
869 /*
870 * Keep copying data until we run out.
871 */
872
727 while (left > 0) {
728 len = left;
729 /* IF: it doesn't fit, use 'mtu' - the data space left */
730 if (len > mtu)
731 len = mtu;
732 /* IF: we are not sending up to and including the packet end
733 then align the next start on an eight byte boundary */
734 if (len < left) {
735 len &= ~7;
736 }
873 while (state.left > 0) {
874 bool first_frag = (state.offset == 0);
737
875
738 /* Allocate buffer */
739 skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC);
740 if (!skb2) {
741 err = -ENOMEM;
876 skb2 = ip_frag_next(skb, &state);
877 if (IS_ERR(skb2)) {
878 err = PTR_ERR(skb2);
742 goto fail;
743 }
879 goto fail;
880 }
881 ip_frag_ipcb(skb, skb2, first_frag, &state);
744
745 /*
882
883 /*
746 * Set up data on packet
747 */
748
749 ip_copy_metadata(skb2, skb);
750 skb_reserve(skb2, ll_rs);
751 skb_put(skb2, len + hlen);
752 skb_reset_network_header(skb2);
753 skb2->transport_header = skb2->network_header + hlen;
754
755 /*
756 * Charge the memory for the fragment to any owner
757 * it might possess
758 */
759
760 if (skb->sk)
761 skb_set_owner_w(skb2, skb->sk);
762
763 /*
764 * Copy the packet header into the new buffer.
765 */
766
767 skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
768
769 /*
770 * Copy a block of the IP datagram.
771 */
772 if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
773 BUG();
774 left -= len;
775
776 /*
777 * Fill in the new header fields.
778 */
779 iph = ip_hdr(skb2);
780 iph->frag_off = htons((offset >> 3));
781
782 if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
783 iph->frag_off |= htons(IP_DF);
784
785 /* ANK: dirty, but effective trick. Upgrade options only if
786 * the segment to be fragmented was THE FIRST (otherwise,
787 * options are already fixed) and make it ONCE
788 * on the initial skb, so that all the following fragments
789 * will inherit fixed options.
790 */
791 if (offset == 0)
792 ip_options_fragment(skb);
793
794 /*
795 * Added AC : If we are fragmenting a fragment that's not the
796 * last fragment then keep MF on each bit
797 */
798 if (left > 0 || not_last_frag)
799 iph->frag_off |= htons(IP_MF);
800 ptr += len;
801 offset += len;
802
803 /*
804 * Put this fragment into the sending queue.
805 */
884 * Put this fragment into the sending queue.
885 */
806 iph->tot_len = htons(len + hlen);
807
808 ip_send_check(iph);
809
810 err = output(net, sk, skb2);
811 if (err)
812 goto fail;
813
814 IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
815 }
816 consume_skb(skb);
817 IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);

--- 818 unchanged lines hidden ---
886 err = output(net, sk, skb2);
887 if (err)
888 goto fail;
889
890 IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
891 }
892 consume_skb(skb);
893 IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);

--- 818 unchanged lines hidden ---