12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 21da177e4SLinus Torvalds /* 31da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 41da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 51da177e4SLinus Torvalds * interface as the means of communication with the user level. 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * The Internet Protocol (IP) module. 81da177e4SLinus Torvalds * 902c30a84SJesper Juhl * Authors: Ross Biro 101da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 111da177e4SLinus Torvalds * Donald Becker, <becker@super.org> 12113aa838SAlan Cox * Alan Cox, <alan@lxorguk.ukuu.org.uk> 131da177e4SLinus Torvalds * Richard Underwood 141da177e4SLinus Torvalds * Stefan Becker, <stefanb@yello.ping.de> 151da177e4SLinus Torvalds * Jorge Cwik, <jorge@laser.satlink.net> 161da177e4SLinus Torvalds * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * Fixes: 191da177e4SLinus Torvalds * Alan Cox : Commented a couple of minor bits of surplus code 201da177e4SLinus Torvalds * Alan Cox : Undefining IP_FORWARD doesn't include the code 211da177e4SLinus Torvalds * (just stops a compiler warning). 221da177e4SLinus Torvalds * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes 231da177e4SLinus Torvalds * are junked rather than corrupting things. 241da177e4SLinus Torvalds * Alan Cox : Frames to bad broadcast subnets are dumped 251da177e4SLinus Torvalds * We used to process them non broadcast and 261da177e4SLinus Torvalds * boy could that cause havoc. 271da177e4SLinus Torvalds * Alan Cox : ip_forward sets the free flag on the 281da177e4SLinus Torvalds * new frame it queues. Still crap because 291da177e4SLinus Torvalds * it copies the frame but at least it 301da177e4SLinus Torvalds * doesn't eat memory too. 311da177e4SLinus Torvalds * Alan Cox : Generic queue code and memory fixes. 321da177e4SLinus Torvalds * Fred Van Kempen : IP fragment support (borrowed from NET2E) 331da177e4SLinus Torvalds * Gerhard Koerting: Forward fragmented frames correctly. 341da177e4SLinus Torvalds * Gerhard Koerting: Fixes to my fix of the above 8-). 351da177e4SLinus Torvalds * Gerhard Koerting: IP interface addressing fix. 361da177e4SLinus Torvalds * Linus Torvalds : More robustness checks 371da177e4SLinus Torvalds * Alan Cox : Even more checks: Still not as robust as it ought to be 381da177e4SLinus Torvalds * Alan Cox : Save IP header pointer for later 391da177e4SLinus Torvalds * Alan Cox : ip option setting 401da177e4SLinus Torvalds * Alan Cox : Use ip_tos/ip_ttl settings 411da177e4SLinus Torvalds * Alan Cox : Fragmentation bogosity removed 421da177e4SLinus Torvalds * (Thanks to Mark.Bush@prg.ox.ac.uk) 431da177e4SLinus Torvalds * Dmitry Gorodchanin : Send of a raw packet crash fix. 441da177e4SLinus Torvalds * Alan Cox : Silly ip bug when an overlength 451da177e4SLinus Torvalds * fragment turns up. Now frees the 461da177e4SLinus Torvalds * queue. 471da177e4SLinus Torvalds * Linus Torvalds/ : Memory leakage on fragmentation 481da177e4SLinus Torvalds * Alan Cox : handling. 491da177e4SLinus Torvalds * Gerhard Koerting: Forwarding uses IP priority hints 501da177e4SLinus Torvalds * Teemu Rantanen : Fragment problems. 511da177e4SLinus Torvalds * Alan Cox : General cleanup, comments and reformat 521da177e4SLinus Torvalds * Alan Cox : SNMP statistics 531da177e4SLinus Torvalds * Alan Cox : BSD address rule semantics. Also see 541da177e4SLinus Torvalds * UDP as there is a nasty checksum issue 551da177e4SLinus Torvalds * if you do things the wrong way. 561da177e4SLinus Torvalds * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file 571da177e4SLinus Torvalds * Alan Cox : IP options adjust sk->priority. 581da177e4SLinus Torvalds * Pedro Roque : Fix mtu/length error in ip_forward. 591da177e4SLinus Torvalds * Alan Cox : Avoid ip_chk_addr when possible. 601da177e4SLinus Torvalds * Richard Underwood : IP multicasting. 611da177e4SLinus Torvalds * Alan Cox : Cleaned up multicast handlers. 621da177e4SLinus Torvalds * Alan Cox : RAW sockets demultiplex in the BSD style. 631da177e4SLinus Torvalds * Gunther Mayer : Fix the SNMP reporting typo 641da177e4SLinus Torvalds * Alan Cox : Always in group 224.0.0.1 651da177e4SLinus Torvalds * Pauline Middelink : Fast ip_checksum update when forwarding 661da177e4SLinus Torvalds * Masquerading support. 671da177e4SLinus Torvalds * Alan Cox : Multicast loopback error for 224.0.0.1 681da177e4SLinus Torvalds * Alan Cox : IP_MULTICAST_LOOP option. 691da177e4SLinus Torvalds * Alan Cox : Use notifiers. 701da177e4SLinus Torvalds * Bjorn Ekwall : Removed ip_csum (from slhc.c too) 711da177e4SLinus Torvalds * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!) 721da177e4SLinus Torvalds * Stefan Becker : Send out ICMP HOST REDIRECT 731da177e4SLinus Torvalds * Arnt Gulbrandsen : ip_build_xmit 741da177e4SLinus Torvalds * Alan Cox : Per socket routing cache 751da177e4SLinus Torvalds * Alan Cox : Fixed routing cache, added header cache. 761da177e4SLinus Torvalds * Alan Cox : Loopback didn't work right in original ip_build_xmit - fixed it. 771da177e4SLinus Torvalds * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net. 781da177e4SLinus Torvalds * Alan Cox : Incoming IP option handling. 791da177e4SLinus Torvalds * Alan Cox : Set saddr on raw output frames as per BSD. 801da177e4SLinus Torvalds * Alan Cox : Stopped broadcast source route explosions. 811da177e4SLinus Torvalds * Alan Cox : Can disable source routing 821da177e4SLinus Torvalds * Takeshi Sone : Masquerading didn't work. 831da177e4SLinus Torvalds * Dave Bonn,Alan Cox : Faster IP forwarding whenever possible. 841da177e4SLinus Torvalds * Alan Cox : Memory leaks, tramples, misc debugging. 851da177e4SLinus Torvalds * Alan Cox : Fixed multicast (by popular demand 8)) 861da177e4SLinus Torvalds * Alan Cox : Fixed forwarding (by even more popular demand 8)) 871da177e4SLinus Torvalds * Alan Cox : Fixed SNMP statistics [I think] 881da177e4SLinus Torvalds * Gerhard Koerting : IP fragmentation forwarding fix 891da177e4SLinus Torvalds * Alan Cox : Device lock against page fault. 901da177e4SLinus Torvalds * Alan Cox : IP_HDRINCL facility. 911da177e4SLinus Torvalds * Werner Almesberger : Zero fragment bug 921da177e4SLinus Torvalds * Alan Cox : RAW IP frame length bug 931da177e4SLinus Torvalds * Alan Cox : Outgoing firewall on build_xmit 941da177e4SLinus Torvalds * A.N.Kuznetsov : IP_OPTIONS support throughout the kernel 951da177e4SLinus Torvalds * Alan Cox : Multicast routing hooks 961da177e4SLinus Torvalds * Jos Vos : Do accounting *before* call_in_firewall 971da177e4SLinus Torvalds * Willy Konynenberg : Transparent proxying support 981da177e4SLinus Torvalds * 991da177e4SLinus Torvalds * To Fix: 1001da177e4SLinus Torvalds * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient 1011da177e4SLinus Torvalds * and could be made very efficient with the addition of some virtual memory hacks to permit 1021da177e4SLinus Torvalds * the allocation of a buffer that can then be 'grown' by twiddling page tables. 1031da177e4SLinus Torvalds * Output fragmentation wants updating along with the buffer management to use a single 1041da177e4SLinus Torvalds * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet 1051da177e4SLinus Torvalds * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause 1061da177e4SLinus Torvalds * fragmentation anyway. 1071da177e4SLinus Torvalds */ 1081da177e4SLinus Torvalds 109afd46503SJoe Perches #define pr_fmt(fmt) "IPv4: " fmt 110afd46503SJoe Perches 1111da177e4SLinus Torvalds #include <linux/module.h> 1121da177e4SLinus Torvalds #include <linux/types.h> 1131da177e4SLinus Torvalds #include <linux/kernel.h> 1141da177e4SLinus Torvalds #include <linux/string.h> 1151da177e4SLinus Torvalds #include <linux/errno.h> 1165a0e3ad6STejun Heo #include <linux/slab.h> 1171da177e4SLinus Torvalds 1181da177e4SLinus Torvalds #include <linux/net.h> 1191da177e4SLinus Torvalds #include <linux/socket.h> 1201da177e4SLinus Torvalds #include <linux/sockios.h> 1211da177e4SLinus Torvalds #include <linux/in.h> 1221da177e4SLinus Torvalds #include <linux/inet.h> 12314c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h> 1241da177e4SLinus Torvalds #include <linux/netdevice.h> 1251da177e4SLinus Torvalds #include <linux/etherdevice.h> 1260e219ae4SPaolo Abeni #include <linux/indirect_call_wrapper.h> 1271da177e4SLinus Torvalds 1281da177e4SLinus Torvalds #include <net/snmp.h> 1291da177e4SLinus Torvalds #include <net/ip.h> 1301da177e4SLinus Torvalds #include <net/protocol.h> 1311da177e4SLinus Torvalds #include <net/route.h> 1321da177e4SLinus Torvalds #include <linux/skbuff.h> 1331da177e4SLinus Torvalds #include <net/sock.h> 1341da177e4SLinus Torvalds #include <net/arp.h> 1351da177e4SLinus Torvalds #include <net/icmp.h> 1361da177e4SLinus Torvalds #include <net/raw.h> 1371da177e4SLinus Torvalds #include <net/checksum.h> 1381f07d03eSEric Dumazet #include <net/inet_ecn.h> 1391da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h> 1401da177e4SLinus Torvalds #include <net/xfrm.h> 1411da177e4SLinus Torvalds #include <linux/mroute.h> 1421da177e4SLinus Torvalds #include <linux/netlink.h> 143f38a9eb1SThomas Graf #include <net/dst_metadata.h> 1441da177e4SLinus Torvalds 1451da177e4SLinus Torvalds /* 14666018506SEric Dumazet * Process Router Attention IP option (RFC 2113) 1471da177e4SLinus Torvalds */ 148ba57b4dbSDavid S. Miller bool ip_call_ra_chain(struct sk_buff *skb) 1491da177e4SLinus Torvalds { 1501da177e4SLinus Torvalds struct ip_ra_chain *ra; 151eddc9ec5SArnaldo Carvalho de Melo u8 protocol = ip_hdr(skb)->protocol; 1521da177e4SLinus Torvalds struct sock *last = NULL; 153cb84663eSDenis V. Lunev struct net_device *dev = skb->dev; 15437fcbab6SEric W. Biederman struct net *net = dev_net(dev); 1551da177e4SLinus Torvalds 1565796ef75SKirill Tkhai for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) { 1571da177e4SLinus Torvalds struct sock *sk = ra->sk; 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds /* If socket is bound to an interface, only report 1601da177e4SLinus Torvalds * the packet if it came from that interface. 1611da177e4SLinus Torvalds */ 162c720c7e8SEric Dumazet if (sk && inet_sk(sk)->inet_num == protocol && 1631da177e4SLinus Torvalds (!sk->sk_bound_dev_if || 1645796ef75SKirill Tkhai sk->sk_bound_dev_if == dev->ifindex)) { 16556f8a75cSPaul Gortmaker if (ip_is_fragment(ip_hdr(skb))) { 16619bcf9f2SEric W. Biederman if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN)) 167ba57b4dbSDavid S. Miller return true; 1681da177e4SLinus Torvalds } 1691da177e4SLinus Torvalds if (last) { 1701da177e4SLinus Torvalds struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1711da177e4SLinus Torvalds if (skb2) 1721da177e4SLinus Torvalds raw_rcv(last, skb2); 1731da177e4SLinus Torvalds } 1741da177e4SLinus Torvalds last = sk; 1751da177e4SLinus Torvalds } 1761da177e4SLinus Torvalds } 1771da177e4SLinus Torvalds 1781da177e4SLinus Torvalds if (last) { 1791da177e4SLinus Torvalds raw_rcv(last, skb); 180ba57b4dbSDavid S. Miller return true; 1811da177e4SLinus Torvalds } 182ba57b4dbSDavid S. Miller return false; 1831da177e4SLinus Torvalds } 1841da177e4SLinus Torvalds 1850e219ae4SPaolo Abeni INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *)); 1860e219ae4SPaolo Abeni INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *)); 18768cb7d53SPaolo Abeni void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) 1881da177e4SLinus Torvalds { 18932613090SAlexey Dobriyan const struct net_protocol *ipprot; 19068cb7d53SPaolo Abeni int raw, ret; 1911da177e4SLinus Torvalds 1921da177e4SLinus Torvalds resubmit: 1937bc54c90SPavel Emelyanov raw = raw_local_deliver(skb, protocol); 1947bc54c90SPavel Emelyanov 195f9242b6bSDavid S. Miller ipprot = rcu_dereference(inet_protos[protocol]); 19600db4124SIan Morris if (ipprot) { 197b59c2701SPatrick McHardy if (!ipprot->no_policy) { 198b59c2701SPatrick McHardy if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 19910580c47SMenglong Dong kfree_skb_reason(skb, 20010580c47SMenglong Dong SKB_DROP_REASON_XFRM_POLICY); 20168cb7d53SPaolo Abeni return; 2021da177e4SLinus Torvalds } 203895b5c9fSFlorian Westphal nf_reset_ct(skb); 204b59c2701SPatrick McHardy } 2050e219ae4SPaolo Abeni ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv, 2060e219ae4SPaolo Abeni skb); 2071da177e4SLinus Torvalds if (ret < 0) { 2081da177e4SLinus Torvalds protocol = -ret; 2091da177e4SLinus Torvalds goto resubmit; 2101da177e4SLinus Torvalds } 211b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); 2121da177e4SLinus Torvalds } else { 2137bc54c90SPavel Emelyanov if (!raw) { 2141da177e4SLinus Torvalds if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 215b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS); 2161da177e4SLinus Torvalds icmp_send(skb, ICMP_DEST_UNREACH, 2171da177e4SLinus Torvalds ICMP_PROT_UNREACH, 0); 2181da177e4SLinus Torvalds } 21910580c47SMenglong Dong kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO); 220d8c6f4b9SNeil Horman } else { 221b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); 222d8c6f4b9SNeil Horman consume_skb(skb); 223d8c6f4b9SNeil Horman } 2241da177e4SLinus Torvalds } 2251da177e4SLinus Torvalds } 22668cb7d53SPaolo Abeni 22768cb7d53SPaolo Abeni static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 22868cb7d53SPaolo Abeni { 229cd14e9b7SMartin KaFai Lau skb_clear_delivery_time(skb); 23068cb7d53SPaolo Abeni __skb_pull(skb, skb_network_header_len(skb)); 23168cb7d53SPaolo Abeni 23268cb7d53SPaolo Abeni rcu_read_lock(); 23368cb7d53SPaolo Abeni ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol); 2341da177e4SLinus Torvalds rcu_read_unlock(); 2351da177e4SLinus Torvalds 2361da177e4SLinus Torvalds return 0; 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds /* 2401da177e4SLinus Torvalds * Deliver IP Packets to the higher protocol layers. 2411da177e4SLinus Torvalds */ 2421da177e4SLinus Torvalds int ip_local_deliver(struct sk_buff *skb) 2431da177e4SLinus Torvalds { 2441da177e4SLinus Torvalds /* 2451da177e4SLinus Torvalds * Reassemble IP fragments. 2461da177e4SLinus Torvalds */ 24719bcf9f2SEric W. Biederman struct net *net = dev_net(skb->dev); 2481da177e4SLinus Torvalds 24956f8a75cSPaul Gortmaker if (ip_is_fragment(ip_hdr(skb))) { 25019bcf9f2SEric W. Biederman if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER)) 2511da177e4SLinus Torvalds return 0; 2521da177e4SLinus Torvalds } 2531da177e4SLinus Torvalds 25429a26a56SEric W. Biederman return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, 25519bcf9f2SEric W. Biederman net, NULL, skb, skb->dev, NULL, 2561da177e4SLinus Torvalds ip_local_deliver_finish); 2571da177e4SLinus Torvalds } 258e43b2190SBrian Vazquez EXPORT_SYMBOL(ip_local_deliver); 2591da177e4SLinus Torvalds 2608c83f2dfSStephen Suryaputra static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) 261d245407eSThomas Graf { 262d245407eSThomas Graf struct ip_options *opt; 263b71d1d42SEric Dumazet const struct iphdr *iph; 264d245407eSThomas Graf 265d245407eSThomas Graf /* It looks as overkill, because not all 266d245407eSThomas Graf IP options require packet mangling. 267d245407eSThomas Graf But it is the easiest for now, especially taking 268d245407eSThomas Graf into account that combination of IP options 269d245407eSThomas Graf and running sniffer is extremely rare condition. 270d245407eSThomas Graf --ANK (980813) 271d245407eSThomas Graf */ 272d245407eSThomas Graf if (skb_cow(skb, skb_headroom(skb))) { 273b45386efSEric Dumazet __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS); 274d245407eSThomas Graf goto drop; 275d245407eSThomas Graf } 276d245407eSThomas Graf 277eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 27822aba383SDenis V. Lunev opt = &(IPCB(skb)->opt); 27922aba383SDenis V. Lunev opt->optlen = iph->ihl*4 - sizeof(struct iphdr); 280d245407eSThomas Graf 281c346dca1SYOSHIFUJI Hideaki if (ip_options_compile(dev_net(dev), opt, skb)) { 282b45386efSEric Dumazet __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS); 283d245407eSThomas Graf goto drop; 284d245407eSThomas Graf } 285d245407eSThomas Graf 286d245407eSThomas Graf if (unlikely(opt->srr)) { 2876e8b11b4SEric Dumazet struct in_device *in_dev = __in_dev_get_rcu(dev); 2886e8b11b4SEric Dumazet 289d245407eSThomas Graf if (in_dev) { 290d245407eSThomas Graf if (!IN_DEV_SOURCE_ROUTE(in_dev)) { 291e87cc472SJoe Perches if (IN_DEV_LOG_MARTIANS(in_dev)) 292e87cc472SJoe Perches net_info_ratelimited("source route option %pI4 -> %pI4\n", 293e87cc472SJoe Perches &iph->saddr, 294e87cc472SJoe Perches &iph->daddr); 295d245407eSThomas Graf goto drop; 296d245407eSThomas Graf } 297d245407eSThomas Graf } 298d245407eSThomas Graf 2998c83f2dfSStephen Suryaputra if (ip_options_rcv_srr(skb, dev)) 300d245407eSThomas Graf goto drop; 301d245407eSThomas Graf } 302d245407eSThomas Graf 3036a91395fSDavid S. Miller return false; 304d245407eSThomas Graf drop: 3056a91395fSDavid S. Miller return true; 306d245407eSThomas Graf } 307d245407eSThomas Graf 30802b24941SPaolo Abeni static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, 30902b24941SPaolo Abeni const struct sk_buff *hint) 31002b24941SPaolo Abeni { 31102b24941SPaolo Abeni return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr && 31202b24941SPaolo Abeni ip_hdr(hint)->tos == iph->tos; 31302b24941SPaolo Abeni } 31402b24941SPaolo Abeni 31597ff7ffbSPaolo Abeni INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *)); 31697ff7ffbSPaolo Abeni INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *)); 3175fa12739SEdward Cree static int ip_rcv_finish_core(struct net *net, struct sock *sk, 31802b24941SPaolo Abeni struct sk_buff *skb, struct net_device *dev, 31902b24941SPaolo Abeni const struct sk_buff *hint) 3201da177e4SLinus Torvalds { 321eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 3227487449cSPaolo Abeni int (*edemux)(struct sk_buff *skb); 323c1f166d1SMenglong Dong int err, drop_reason; 3247487449cSPaolo Abeni struct rtable *rt; 325c1f166d1SMenglong Dong 326c1f166d1SMenglong Dong drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 3271da177e4SLinus Torvalds 32802b24941SPaolo Abeni if (ip_can_use_hint(skb, iph, hint)) { 32902b24941SPaolo Abeni err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos, 33002b24941SPaolo Abeni dev, hint); 33102b24941SPaolo Abeni if (unlikely(err)) 33202b24941SPaolo Abeni goto drop_error; 33302b24941SPaolo Abeni } 33402b24941SPaolo Abeni 335e21145a9SNikolay Borisov if (net->ipv4.sysctl_ip_early_demux && 33663e51b6aSEric Dumazet !skb_dst(skb) && 33763e51b6aSEric Dumazet !skb->sk && 33863e51b6aSEric Dumazet !ip_is_fragment(iph)) { 33941063e9dSDavid S. Miller const struct net_protocol *ipprot; 34041063e9dSDavid S. Miller int protocol = iph->protocol; 34141063e9dSDavid S. Miller 34241063e9dSDavid S. Miller ipprot = rcu_dereference(inet_protos[protocol]); 343dddb64bcSsubashab@codeaurora.org if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { 34497ff7ffbSPaolo Abeni err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux, 34597ff7ffbSPaolo Abeni udp_v4_early_demux, skb); 3467487449cSPaolo Abeni if (unlikely(err)) 3477487449cSPaolo Abeni goto drop_error; 3489cb429d6SEric Dumazet /* must reload iph, skb->head might have changed */ 3499cb429d6SEric Dumazet iph = ip_hdr(skb); 3509cb429d6SEric Dumazet } 3516648bd7eSAlexander Duyck } 35241063e9dSDavid S. Miller 353160eb5a6SDavid S. Miller /* 354160eb5a6SDavid S. Miller * Initialise the virtual path cache for the packet. It describes 355160eb5a6SDavid S. Miller * how the packet travels inside Linux networking. 356160eb5a6SDavid S. Miller */ 357f38a9eb1SThomas Graf if (!skb_valid_dst(skb)) { 3587487449cSPaolo Abeni err = ip_route_input_noref(skb, iph->daddr, iph->saddr, 359d6f64d72SMark Tomlinson iph->tos, dev); 3607487449cSPaolo Abeni if (unlikely(err)) 3617487449cSPaolo Abeni goto drop_error; 3622c2910a4SDietmar Eggemann } 3631da177e4SLinus Torvalds 364c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 365adf30907SEric Dumazet if (unlikely(skb_dst(skb)->tclassid)) { 3667a9b2d59SEric Dumazet struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); 367adf30907SEric Dumazet u32 idx = skb_dst(skb)->tclassid; 3681da177e4SLinus Torvalds st[idx&0xFF].o_packets++; 3691da177e4SLinus Torvalds st[idx&0xFF].o_bytes += skb->len; 3701da177e4SLinus Torvalds st[(idx>>16)&0xFF].i_packets++; 3711da177e4SLinus Torvalds st[(idx>>16)&0xFF].i_bytes += skb->len; 3721da177e4SLinus Torvalds } 3731da177e4SLinus Torvalds #endif 3741da177e4SLinus Torvalds 3758c83f2dfSStephen Suryaputra if (iph->ihl > 5 && ip_rcv_options(skb, dev)) 3761da177e4SLinus Torvalds goto drop; 3771da177e4SLinus Torvalds 378511c3f92SEric Dumazet rt = skb_rtable(skb); 379edf391ffSNeil Horman if (rt->rt_type == RTN_MULTICAST) { 380b15084ecSEric Dumazet __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len); 38112b74dfaSJohannes Berg } else if (rt->rt_type == RTN_BROADCAST) { 382b15084ecSEric Dumazet __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len); 38312b74dfaSJohannes Berg } else if (skb->pkt_type == PACKET_BROADCAST || 38412b74dfaSJohannes Berg skb->pkt_type == PACKET_MULTICAST) { 385d6f64d72SMark Tomlinson struct in_device *in_dev = __in_dev_get_rcu(dev); 38612b74dfaSJohannes Berg 38712b74dfaSJohannes Berg /* RFC 1122 3.3.6: 38812b74dfaSJohannes Berg * 38912b74dfaSJohannes Berg * When a host sends a datagram to a link-layer broadcast 39012b74dfaSJohannes Berg * address, the IP destination address MUST be a legal IP 39112b74dfaSJohannes Berg * broadcast or IP multicast address. 39212b74dfaSJohannes Berg * 39312b74dfaSJohannes Berg * A host SHOULD silently discard a datagram that is received 39412b74dfaSJohannes Berg * via a link-layer broadcast (see Section 2.4) but does not 39512b74dfaSJohannes Berg * specify an IP multicast or broadcast destination address. 39612b74dfaSJohannes Berg * 39712b74dfaSJohannes Berg * This doesn't explicitly say L2 *broadcast*, but broadcast is 39812b74dfaSJohannes Berg * in a way a form of multicast and the most common use case for 39912b74dfaSJohannes Berg * this is 802.11 protecting against cross-station spoofing (the 40012b74dfaSJohannes Berg * so-called "hole-196" attack) so do it for both. 40112b74dfaSJohannes Berg */ 40212b74dfaSJohannes Berg if (in_dev && 403c1f166d1SMenglong Dong IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) { 404c1f166d1SMenglong Dong drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST; 40512b74dfaSJohannes Berg goto drop; 40612b74dfaSJohannes Berg } 407c1f166d1SMenglong Dong } 4085506b54bSMitsuru Chinen 4095fa12739SEdward Cree return NET_RX_SUCCESS; 4101da177e4SLinus Torvalds 4111da177e4SLinus Torvalds drop: 412c1f166d1SMenglong Dong kfree_skb_reason(skb, drop_reason); 4131da177e4SLinus Torvalds return NET_RX_DROP; 4147487449cSPaolo Abeni 4157487449cSPaolo Abeni drop_error: 416c1f166d1SMenglong Dong if (err == -EXDEV) { 417c1f166d1SMenglong Dong drop_reason = SKB_DROP_REASON_IP_RPFILTER; 4187487449cSPaolo Abeni __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); 419c1f166d1SMenglong Dong } 4207487449cSPaolo Abeni goto drop; 4211da177e4SLinus Torvalds } 4221da177e4SLinus Torvalds 4235fa12739SEdward Cree static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 4245fa12739SEdward Cree { 425a1fd1ad2SDavid Ahern struct net_device *dev = skb->dev; 426efe6aacaSEdward Cree int ret; 4275fa12739SEdward Cree 428efe6aacaSEdward Cree /* if ingress device is enslaved to an L3 master device pass the 429efe6aacaSEdward Cree * skb to its handler for processing 430efe6aacaSEdward Cree */ 431efe6aacaSEdward Cree skb = l3mdev_ip_rcv(skb); 432efe6aacaSEdward Cree if (!skb) 433efe6aacaSEdward Cree return NET_RX_SUCCESS; 434efe6aacaSEdward Cree 43502b24941SPaolo Abeni ret = ip_rcv_finish_core(net, sk, skb, dev, NULL); 4365fa12739SEdward Cree if (ret != NET_RX_DROP) 4375fa12739SEdward Cree ret = dst_input(skb); 4385fa12739SEdward Cree return ret; 4395fa12739SEdward Cree } 4405fa12739SEdward Cree 4411da177e4SLinus Torvalds /* 4421da177e4SLinus Torvalds * Main IP Receive routine. 4431da177e4SLinus Torvalds */ 44417266ee9SEdward Cree static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) 4451da177e4SLinus Torvalds { 446b71d1d42SEric Dumazet const struct iphdr *iph; 44733cba429SMenglong Dong int drop_reason; 44858615242SThomas Graf u32 len; 4491da177e4SLinus Torvalds 4501da177e4SLinus Torvalds /* When the interface is in promisc. mode, drop all the crap 4511da177e4SLinus Torvalds * that it receives, do not try to analyse it. 4521da177e4SLinus Torvalds */ 45333cba429SMenglong Dong if (skb->pkt_type == PACKET_OTHERHOST) { 454*794c24e9SJeffrey Ji dev_core_stats_rx_otherhost_dropped_inc(skb->dev); 45533cba429SMenglong Dong drop_reason = SKB_DROP_REASON_OTHERHOST; 4561da177e4SLinus Torvalds goto drop; 45733cba429SMenglong Dong } 4581da177e4SLinus Torvalds 459b15084ecSEric Dumazet __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); 4601da177e4SLinus Torvalds 46151456b29SIan Morris skb = skb_share_check(skb, GFP_ATOMIC); 46251456b29SIan Morris if (!skb) { 463b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); 4641da177e4SLinus Torvalds goto out; 4651da177e4SLinus Torvalds } 4661da177e4SLinus Torvalds 46733cba429SMenglong Dong drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 4681da177e4SLinus Torvalds if (!pskb_may_pull(skb, sizeof(struct iphdr))) 4691da177e4SLinus Torvalds goto inhdr_error; 4701da177e4SLinus Torvalds 471eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 4721da177e4SLinus Torvalds 4731da177e4SLinus Torvalds /* 474c67fa027SJ.H.M. Dassen (Ray) * RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum. 4751da177e4SLinus Torvalds * 4761da177e4SLinus Torvalds * Is the datagram acceptable? 4771da177e4SLinus Torvalds * 4781da177e4SLinus Torvalds * 1. Length at least the size of an ip header 4791da177e4SLinus Torvalds * 2. Version of 4 4801da177e4SLinus Torvalds * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] 4811da177e4SLinus Torvalds * 4. Doesn't have a bogus length 4821da177e4SLinus Torvalds */ 4831da177e4SLinus Torvalds 4841da177e4SLinus Torvalds if (iph->ihl < 5 || iph->version != 4) 4851da177e4SLinus Torvalds goto inhdr_error; 4861da177e4SLinus Torvalds 4871f07d03eSEric Dumazet BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); 4881f07d03eSEric Dumazet BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); 4891f07d03eSEric Dumazet BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); 49098f61995SEric Dumazet __IP_ADD_STATS(net, 4911f07d03eSEric Dumazet IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), 4921f07d03eSEric Dumazet max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); 4931f07d03eSEric Dumazet 4941da177e4SLinus Torvalds if (!pskb_may_pull(skb, iph->ihl*4)) 4951da177e4SLinus Torvalds goto inhdr_error; 4961da177e4SLinus Torvalds 497eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 4981da177e4SLinus Torvalds 499e9c60422SThomas Graf if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 5006a5dc9e5SEric Dumazet goto csum_error; 5011da177e4SLinus Torvalds 50258615242SThomas Graf len = ntohs(iph->tot_len); 503704aed53SMitsuru Chinen if (skb->len < len) { 50433cba429SMenglong Dong drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 505b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); 506704aed53SMitsuru Chinen goto drop; 507704aed53SMitsuru Chinen } else if (len < (iph->ihl*4)) 5081da177e4SLinus Torvalds goto inhdr_error; 5091da177e4SLinus Torvalds 5101da177e4SLinus Torvalds /* Our transport medium may have padded the buffer out. Now we know it 5111da177e4SLinus Torvalds * is IP we can trim to the true length of the frame. 5121da177e4SLinus Torvalds * Note this now means skb->len holds ntohs(iph->tot_len). 5131da177e4SLinus Torvalds */ 5141da177e4SLinus Torvalds if (pskb_trim_rcsum(skb, len)) { 515b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); 5161da177e4SLinus Torvalds goto drop; 5171da177e4SLinus Torvalds } 5181da177e4SLinus Torvalds 5196c57f045SRoss Lagerwall iph = ip_hdr(skb); 52021d1196aSEric Dumazet skb->transport_header = skb->network_header + iph->ihl*4; 52121d1196aSEric Dumazet 52253602f92SStephen Hemminger /* Remove any debris in the socket control block */ 523d569f1d7SGuillaume Chazarain memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 5240b922b7aSDavid Ahern IPCB(skb)->iif = skb->skb_iif; 52553602f92SStephen Hemminger 52671f9dacdSHerbert Xu /* Must drop socket now because of tproxy. */ 527cf7fbe66SJoe Stringer if (!skb_sk_is_prefetched(skb)) 52871f9dacdSHerbert Xu skb_orphan(skb); 52971f9dacdSHerbert Xu 53017266ee9SEdward Cree return skb; 5311da177e4SLinus Torvalds 5326a5dc9e5SEric Dumazet csum_error: 53333cba429SMenglong Dong drop_reason = SKB_DROP_REASON_IP_CSUM; 534b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); 5351da177e4SLinus Torvalds inhdr_error: 53633cba429SMenglong Dong if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED) 53733cba429SMenglong Dong drop_reason = SKB_DROP_REASON_IP_INHDR; 538b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); 5391da177e4SLinus Torvalds drop: 54033cba429SMenglong Dong kfree_skb_reason(skb, drop_reason); 5411da177e4SLinus Torvalds out: 54217266ee9SEdward Cree return NULL; 54317266ee9SEdward Cree } 54417266ee9SEdward Cree 54517266ee9SEdward Cree /* 54617266ee9SEdward Cree * IP receive entry point 54717266ee9SEdward Cree */ 54817266ee9SEdward Cree int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, 54917266ee9SEdward Cree struct net_device *orig_dev) 55017266ee9SEdward Cree { 55117266ee9SEdward Cree struct net *net = dev_net(dev); 55217266ee9SEdward Cree 55317266ee9SEdward Cree skb = ip_rcv_core(skb, net); 55417266ee9SEdward Cree if (skb == NULL) 5551da177e4SLinus Torvalds return NET_RX_DROP; 556fb1b6999SYang Wei 55717266ee9SEdward Cree return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, 55817266ee9SEdward Cree net, NULL, skb, dev, NULL, 55917266ee9SEdward Cree ip_rcv_finish); 56017266ee9SEdward Cree } 56117266ee9SEdward Cree 5625fa12739SEdward Cree static void ip_sublist_rcv_finish(struct list_head *head) 56317266ee9SEdward Cree { 56417266ee9SEdward Cree struct sk_buff *skb, *next; 56517266ee9SEdward Cree 5660761680dSJesper Dangaard Brouer list_for_each_entry_safe(skb, next, head, list) { 567992cba7eSDavid S. Miller skb_list_del_init(skb); 5685fa12739SEdward Cree dst_input(skb); 5695fa12739SEdward Cree } 5700761680dSJesper Dangaard Brouer } 5715fa12739SEdward Cree 57202b24941SPaolo Abeni static struct sk_buff *ip_extract_route_hint(const struct net *net, 57302b24941SPaolo Abeni struct sk_buff *skb, int rt_type) 57402b24941SPaolo Abeni { 57502b24941SPaolo Abeni if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST) 57602b24941SPaolo Abeni return NULL; 57702b24941SPaolo Abeni 57802b24941SPaolo Abeni return skb; 57902b24941SPaolo Abeni } 58002b24941SPaolo Abeni 5815fa12739SEdward Cree static void ip_list_rcv_finish(struct net *net, struct sock *sk, 5825fa12739SEdward Cree struct list_head *head) 5835fa12739SEdward Cree { 58402b24941SPaolo Abeni struct sk_buff *skb, *next, *hint = NULL; 5855fa12739SEdward Cree struct dst_entry *curr_dst = NULL; 5865fa12739SEdward Cree struct list_head sublist; 5875fa12739SEdward Cree 588a4ca8b7dSEdward Cree INIT_LIST_HEAD(&sublist); 5895fa12739SEdward Cree list_for_each_entry_safe(skb, next, head, list) { 590a1fd1ad2SDavid Ahern struct net_device *dev = skb->dev; 5915fa12739SEdward Cree struct dst_entry *dst; 5925fa12739SEdward Cree 59322f6bbb7SEdward Cree skb_list_del_init(skb); 594efe6aacaSEdward Cree /* if ingress device is enslaved to an L3 master device pass the 595efe6aacaSEdward Cree * skb to its handler for processing 596efe6aacaSEdward Cree */ 597efe6aacaSEdward Cree skb = l3mdev_ip_rcv(skb); 598efe6aacaSEdward Cree if (!skb) 599efe6aacaSEdward Cree continue; 60002b24941SPaolo Abeni if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP) 6015fa12739SEdward Cree continue; 6025fa12739SEdward Cree 6035fa12739SEdward Cree dst = skb_dst(skb); 6045fa12739SEdward Cree if (curr_dst != dst) { 60502b24941SPaolo Abeni hint = ip_extract_route_hint(net, skb, 60602b24941SPaolo Abeni ((struct rtable *)dst)->rt_type); 60702b24941SPaolo Abeni 6085fa12739SEdward Cree /* dispatch old sublist */ 6095fa12739SEdward Cree if (!list_empty(&sublist)) 6105fa12739SEdward Cree ip_sublist_rcv_finish(&sublist); 6115fa12739SEdward Cree /* start new sublist */ 612a4ca8b7dSEdward Cree INIT_LIST_HEAD(&sublist); 6135fa12739SEdward Cree curr_dst = dst; 6145fa12739SEdward Cree } 615a4ca8b7dSEdward Cree list_add_tail(&skb->list, &sublist); 6165fa12739SEdward Cree } 6175fa12739SEdward Cree /* dispatch final sublist */ 618a4ca8b7dSEdward Cree ip_sublist_rcv_finish(&sublist); 6195fa12739SEdward Cree } 6205fa12739SEdward Cree 6215fa12739SEdward Cree static void ip_sublist_rcv(struct list_head *head, struct net_device *dev, 6225fa12739SEdward Cree struct net *net) 6235fa12739SEdward Cree { 62417266ee9SEdward Cree NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL, 62517266ee9SEdward Cree head, dev, NULL, ip_rcv_finish); 6265fa12739SEdward Cree ip_list_rcv_finish(net, NULL, head); 62717266ee9SEdward Cree } 62817266ee9SEdward Cree 62917266ee9SEdward Cree /* Receive a list of IP packets */ 63017266ee9SEdward Cree void ip_list_rcv(struct list_head *head, struct packet_type *pt, 63117266ee9SEdward Cree struct net_device *orig_dev) 63217266ee9SEdward Cree { 63317266ee9SEdward Cree struct net_device *curr_dev = NULL; 63417266ee9SEdward Cree struct net *curr_net = NULL; 63517266ee9SEdward Cree struct sk_buff *skb, *next; 63617266ee9SEdward Cree struct list_head sublist; 63717266ee9SEdward Cree 638a4ca8b7dSEdward Cree INIT_LIST_HEAD(&sublist); 63917266ee9SEdward Cree list_for_each_entry_safe(skb, next, head, list) { 64017266ee9SEdward Cree struct net_device *dev = skb->dev; 64117266ee9SEdward Cree struct net *net = dev_net(dev); 64217266ee9SEdward Cree 64322f6bbb7SEdward Cree skb_list_del_init(skb); 64417266ee9SEdward Cree skb = ip_rcv_core(skb, net); 64517266ee9SEdward Cree if (skb == NULL) 64617266ee9SEdward Cree continue; 64717266ee9SEdward Cree 64817266ee9SEdward Cree if (curr_dev != dev || curr_net != net) { 64917266ee9SEdward Cree /* dispatch old sublist */ 65017266ee9SEdward Cree if (!list_empty(&sublist)) 651a4ca8b7dSEdward Cree ip_sublist_rcv(&sublist, curr_dev, curr_net); 65217266ee9SEdward Cree /* start new sublist */ 653a4ca8b7dSEdward Cree INIT_LIST_HEAD(&sublist); 65417266ee9SEdward Cree curr_dev = dev; 65517266ee9SEdward Cree curr_net = net; 65617266ee9SEdward Cree } 657a4ca8b7dSEdward Cree list_add_tail(&skb->list, &sublist); 65817266ee9SEdward Cree } 65917266ee9SEdward Cree /* dispatch final sublist */ 66051210ad5SFlorian Westphal if (!list_empty(&sublist)) 661a4ca8b7dSEdward Cree ip_sublist_rcv(&sublist, curr_dev, curr_net); 6621da177e4SLinus Torvalds } 663