xref: /openbmc/linux/net/ipv4/ip_input.c (revision 98f61995)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		The Internet Protocol (IP) module.
71da177e4SLinus Torvalds  *
802c30a84SJesper Juhl  * Authors:	Ross Biro
91da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
101da177e4SLinus Torvalds  *		Donald Becker, <becker@super.org>
11113aa838SAlan Cox  *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
121da177e4SLinus Torvalds  *		Richard Underwood
131da177e4SLinus Torvalds  *		Stefan Becker, <stefanb@yello.ping.de>
141da177e4SLinus Torvalds  *		Jorge Cwik, <jorge@laser.satlink.net>
151da177e4SLinus Torvalds  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
161da177e4SLinus Torvalds  *
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  * Fixes:
191da177e4SLinus Torvalds  *		Alan Cox	:	Commented a couple of minor bits of surplus code
201da177e4SLinus Torvalds  *		Alan Cox	:	Undefining IP_FORWARD doesn't include the code
211da177e4SLinus Torvalds  *					(just stops a compiler warning).
221da177e4SLinus Torvalds  *		Alan Cox	:	Frames with >=MAX_ROUTE record routes, strict routes or loose routes
231da177e4SLinus Torvalds  *					are junked rather than corrupting things.
241da177e4SLinus Torvalds  *		Alan Cox	:	Frames to bad broadcast subnets are dumped
251da177e4SLinus Torvalds  *					We used to process them non broadcast and
261da177e4SLinus Torvalds  *					boy could that cause havoc.
271da177e4SLinus Torvalds  *		Alan Cox	:	ip_forward sets the free flag on the
281da177e4SLinus Torvalds  *					new frame it queues. Still crap because
291da177e4SLinus Torvalds  *					it copies the frame but at least it
301da177e4SLinus Torvalds  *					doesn't eat memory too.
311da177e4SLinus Torvalds  *		Alan Cox	:	Generic queue code and memory fixes.
321da177e4SLinus Torvalds  *		Fred Van Kempen :	IP fragment support (borrowed from NET2E)
331da177e4SLinus Torvalds  *		Gerhard Koerting:	Forward fragmented frames correctly.
341da177e4SLinus Torvalds  *		Gerhard Koerting: 	Fixes to my fix of the above 8-).
351da177e4SLinus Torvalds  *		Gerhard Koerting:	IP interface addressing fix.
361da177e4SLinus Torvalds  *		Linus Torvalds	:	More robustness checks
371da177e4SLinus Torvalds  *		Alan Cox	:	Even more checks: Still not as robust as it ought to be
381da177e4SLinus Torvalds  *		Alan Cox	:	Save IP header pointer for later
391da177e4SLinus Torvalds  *		Alan Cox	:	ip option setting
401da177e4SLinus Torvalds  *		Alan Cox	:	Use ip_tos/ip_ttl settings
411da177e4SLinus Torvalds  *		Alan Cox	:	Fragmentation bogosity removed
421da177e4SLinus Torvalds  *					(Thanks to Mark.Bush@prg.ox.ac.uk)
431da177e4SLinus Torvalds  *		Dmitry Gorodchanin :	Send of a raw packet crash fix.
441da177e4SLinus Torvalds  *		Alan Cox	:	Silly ip bug when an overlength
451da177e4SLinus Torvalds  *					fragment turns up. Now frees the
461da177e4SLinus Torvalds  *					queue.
471da177e4SLinus Torvalds  *		Linus Torvalds/ :	Memory leakage on fragmentation
481da177e4SLinus Torvalds  *		Alan Cox	:	handling.
491da177e4SLinus Torvalds  *		Gerhard Koerting:	Forwarding uses IP priority hints
501da177e4SLinus Torvalds  *		Teemu Rantanen	:	Fragment problems.
511da177e4SLinus Torvalds  *		Alan Cox	:	General cleanup, comments and reformat
521da177e4SLinus Torvalds  *		Alan Cox	:	SNMP statistics
531da177e4SLinus Torvalds  *		Alan Cox	:	BSD address rule semantics. Also see
541da177e4SLinus Torvalds  *					UDP as there is a nasty checksum issue
551da177e4SLinus Torvalds  *					if you do things the wrong way.
561da177e4SLinus Torvalds  *		Alan Cox	:	Always defrag, moved IP_FORWARD to the config.in file
571da177e4SLinus Torvalds  *		Alan Cox	: 	IP options adjust sk->priority.
581da177e4SLinus Torvalds  *		Pedro Roque	:	Fix mtu/length error in ip_forward.
591da177e4SLinus Torvalds  *		Alan Cox	:	Avoid ip_chk_addr when possible.
601da177e4SLinus Torvalds  *	Richard Underwood	:	IP multicasting.
611da177e4SLinus Torvalds  *		Alan Cox	:	Cleaned up multicast handlers.
621da177e4SLinus Torvalds  *		Alan Cox	:	RAW sockets demultiplex in the BSD style.
631da177e4SLinus Torvalds  *		Gunther Mayer	:	Fix the SNMP reporting typo
641da177e4SLinus Torvalds  *		Alan Cox	:	Always in group 224.0.0.1
651da177e4SLinus Torvalds  *	Pauline Middelink	:	Fast ip_checksum update when forwarding
661da177e4SLinus Torvalds  *					Masquerading support.
671da177e4SLinus Torvalds  *		Alan Cox	:	Multicast loopback error for 224.0.0.1
681da177e4SLinus Torvalds  *		Alan Cox	:	IP_MULTICAST_LOOP option.
691da177e4SLinus Torvalds  *		Alan Cox	:	Use notifiers.
701da177e4SLinus Torvalds  *		Bjorn Ekwall	:	Removed ip_csum (from slhc.c too)
711da177e4SLinus Torvalds  *		Bjorn Ekwall	:	Moved ip_fast_csum to ip.h (inline!)
721da177e4SLinus Torvalds  *		Stefan Becker   :       Send out ICMP HOST REDIRECT
731da177e4SLinus Torvalds  *	Arnt Gulbrandsen	:	ip_build_xmit
741da177e4SLinus Torvalds  *		Alan Cox	:	Per socket routing cache
751da177e4SLinus Torvalds  *		Alan Cox	:	Fixed routing cache, added header cache.
761da177e4SLinus Torvalds  *		Alan Cox	:	Loopback didn't work right in original ip_build_xmit - fixed it.
771da177e4SLinus Torvalds  *		Alan Cox	:	Only send ICMP_REDIRECT if src/dest are the same net.
781da177e4SLinus Torvalds  *		Alan Cox	:	Incoming IP option handling.
791da177e4SLinus Torvalds  *		Alan Cox	:	Set saddr on raw output frames as per BSD.
801da177e4SLinus Torvalds  *		Alan Cox	:	Stopped broadcast source route explosions.
811da177e4SLinus Torvalds  *		Alan Cox	:	Can disable source routing
821da177e4SLinus Torvalds  *		Takeshi Sone    :	Masquerading didn't work.
831da177e4SLinus Torvalds  *	Dave Bonn,Alan Cox	:	Faster IP forwarding whenever possible.
841da177e4SLinus Torvalds  *		Alan Cox	:	Memory leaks, tramples, misc debugging.
851da177e4SLinus Torvalds  *		Alan Cox	:	Fixed multicast (by popular demand 8))
861da177e4SLinus Torvalds  *		Alan Cox	:	Fixed forwarding (by even more popular demand 8))
871da177e4SLinus Torvalds  *		Alan Cox	:	Fixed SNMP statistics [I think]
881da177e4SLinus Torvalds  *	Gerhard Koerting	:	IP fragmentation forwarding fix
891da177e4SLinus Torvalds  *		Alan Cox	:	Device lock against page fault.
901da177e4SLinus Torvalds  *		Alan Cox	:	IP_HDRINCL facility.
911da177e4SLinus Torvalds  *	Werner Almesberger	:	Zero fragment bug
921da177e4SLinus Torvalds  *		Alan Cox	:	RAW IP frame length bug
931da177e4SLinus Torvalds  *		Alan Cox	:	Outgoing firewall on build_xmit
941da177e4SLinus Torvalds  *		A.N.Kuznetsov	:	IP_OPTIONS support throughout the kernel
951da177e4SLinus Torvalds  *		Alan Cox	:	Multicast routing hooks
961da177e4SLinus Torvalds  *		Jos Vos		:	Do accounting *before* call_in_firewall
971da177e4SLinus Torvalds  *	Willy Konynenberg	:	Transparent proxying support
981da177e4SLinus Torvalds  *
991da177e4SLinus Torvalds  *
1001da177e4SLinus Torvalds  *
1011da177e4SLinus Torvalds  * To Fix:
1021da177e4SLinus Torvalds  *		IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
1031da177e4SLinus Torvalds  *		and could be made very efficient with the addition of some virtual memory hacks to permit
1041da177e4SLinus Torvalds  *		the allocation of a buffer that can then be 'grown' by twiddling page tables.
1051da177e4SLinus Torvalds  *		Output fragmentation wants updating along with the buffer management to use a single
1061da177e4SLinus Torvalds  *		interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
1071da177e4SLinus Torvalds  *		output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
1081da177e4SLinus Torvalds  *		fragmentation anyway.
1091da177e4SLinus Torvalds  *
1101da177e4SLinus Torvalds  *		This program is free software; you can redistribute it and/or
1111da177e4SLinus Torvalds  *		modify it under the terms of the GNU General Public License
1121da177e4SLinus Torvalds  *		as published by the Free Software Foundation; either version
1131da177e4SLinus Torvalds  *		2 of the License, or (at your option) any later version.
1141da177e4SLinus Torvalds  */
1151da177e4SLinus Torvalds 
116afd46503SJoe Perches #define pr_fmt(fmt) "IPv4: " fmt
117afd46503SJoe Perches 
1181da177e4SLinus Torvalds #include <linux/module.h>
1191da177e4SLinus Torvalds #include <linux/types.h>
1201da177e4SLinus Torvalds #include <linux/kernel.h>
1211da177e4SLinus Torvalds #include <linux/string.h>
1221da177e4SLinus Torvalds #include <linux/errno.h>
1235a0e3ad6STejun Heo #include <linux/slab.h>
1241da177e4SLinus Torvalds 
1251da177e4SLinus Torvalds #include <linux/net.h>
1261da177e4SLinus Torvalds #include <linux/socket.h>
1271da177e4SLinus Torvalds #include <linux/sockios.h>
1281da177e4SLinus Torvalds #include <linux/in.h>
1291da177e4SLinus Torvalds #include <linux/inet.h>
13014c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h>
1311da177e4SLinus Torvalds #include <linux/netdevice.h>
1321da177e4SLinus Torvalds #include <linux/etherdevice.h>
1331da177e4SLinus Torvalds 
1341da177e4SLinus Torvalds #include <net/snmp.h>
1351da177e4SLinus Torvalds #include <net/ip.h>
1361da177e4SLinus Torvalds #include <net/protocol.h>
1371da177e4SLinus Torvalds #include <net/route.h>
1381da177e4SLinus Torvalds #include <linux/skbuff.h>
1391da177e4SLinus Torvalds #include <net/sock.h>
1401da177e4SLinus Torvalds #include <net/arp.h>
1411da177e4SLinus Torvalds #include <net/icmp.h>
1421da177e4SLinus Torvalds #include <net/raw.h>
1431da177e4SLinus Torvalds #include <net/checksum.h>
1441f07d03eSEric Dumazet #include <net/inet_ecn.h>
1451da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h>
1461da177e4SLinus Torvalds #include <net/xfrm.h>
1471da177e4SLinus Torvalds #include <linux/mroute.h>
1481da177e4SLinus Torvalds #include <linux/netlink.h>
149f38a9eb1SThomas Graf #include <net/dst_metadata.h>
1501da177e4SLinus Torvalds 
1511da177e4SLinus Torvalds /*
15266018506SEric Dumazet  *	Process Router Attention IP option (RFC 2113)
1531da177e4SLinus Torvalds  */
154ba57b4dbSDavid S. Miller bool ip_call_ra_chain(struct sk_buff *skb)
1551da177e4SLinus Torvalds {
1561da177e4SLinus Torvalds 	struct ip_ra_chain *ra;
157eddc9ec5SArnaldo Carvalho de Melo 	u8 protocol = ip_hdr(skb)->protocol;
1581da177e4SLinus Torvalds 	struct sock *last = NULL;
159cb84663eSDenis V. Lunev 	struct net_device *dev = skb->dev;
16037fcbab6SEric W. Biederman 	struct net *net = dev_net(dev);
1611da177e4SLinus Torvalds 
16266018506SEric Dumazet 	for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
1631da177e4SLinus Torvalds 		struct sock *sk = ra->sk;
1641da177e4SLinus Torvalds 
1651da177e4SLinus Torvalds 		/* If socket is bound to an interface, only report
1661da177e4SLinus Torvalds 		 * the packet if it came  from that interface.
1671da177e4SLinus Torvalds 		 */
168c720c7e8SEric Dumazet 		if (sk && inet_sk(sk)->inet_num == protocol &&
1691da177e4SLinus Torvalds 		    (!sk->sk_bound_dev_if ||
170cb84663eSDenis V. Lunev 		     sk->sk_bound_dev_if == dev->ifindex) &&
17137fcbab6SEric W. Biederman 		    net_eq(sock_net(sk), net)) {
17256f8a75cSPaul Gortmaker 			if (ip_is_fragment(ip_hdr(skb))) {
17319bcf9f2SEric W. Biederman 				if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
174ba57b4dbSDavid S. Miller 					return true;
1751da177e4SLinus Torvalds 			}
1761da177e4SLinus Torvalds 			if (last) {
1771da177e4SLinus Torvalds 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1781da177e4SLinus Torvalds 				if (skb2)
1791da177e4SLinus Torvalds 					raw_rcv(last, skb2);
1801da177e4SLinus Torvalds 			}
1811da177e4SLinus Torvalds 			last = sk;
1821da177e4SLinus Torvalds 		}
1831da177e4SLinus Torvalds 	}
1841da177e4SLinus Torvalds 
1851da177e4SLinus Torvalds 	if (last) {
1861da177e4SLinus Torvalds 		raw_rcv(last, skb);
187ba57b4dbSDavid S. Miller 		return true;
1881da177e4SLinus Torvalds 	}
189ba57b4dbSDavid S. Miller 	return false;
1901da177e4SLinus Torvalds }
1911da177e4SLinus Torvalds 
1920c4b51f0SEric W. Biederman static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1931da177e4SLinus Torvalds {
19421d1196aSEric Dumazet 	__skb_pull(skb, skb_network_header_len(skb));
1951da177e4SLinus Torvalds 
1961da177e4SLinus Torvalds 	rcu_read_lock();
1971da177e4SLinus Torvalds 	{
198eddc9ec5SArnaldo Carvalho de Melo 		int protocol = ip_hdr(skb)->protocol;
19932613090SAlexey Dobriyan 		const struct net_protocol *ipprot;
200f9242b6bSDavid S. Miller 		int raw;
2011da177e4SLinus Torvalds 
2021da177e4SLinus Torvalds 	resubmit:
2037bc54c90SPavel Emelyanov 		raw = raw_local_deliver(skb, protocol);
2047bc54c90SPavel Emelyanov 
205f9242b6bSDavid S. Miller 		ipprot = rcu_dereference(inet_protos[protocol]);
20600db4124SIan Morris 		if (ipprot) {
2071da177e4SLinus Torvalds 			int ret;
2081da177e4SLinus Torvalds 
209b59c2701SPatrick McHardy 			if (!ipprot->no_policy) {
210b59c2701SPatrick McHardy 				if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
2111da177e4SLinus Torvalds 					kfree_skb(skb);
2121da177e4SLinus Torvalds 					goto out;
2131da177e4SLinus Torvalds 				}
214b59c2701SPatrick McHardy 				nf_reset(skb);
215b59c2701SPatrick McHardy 			}
2161da177e4SLinus Torvalds 			ret = ipprot->handler(skb);
2171da177e4SLinus Torvalds 			if (ret < 0) {
2181da177e4SLinus Torvalds 				protocol = -ret;
2191da177e4SLinus Torvalds 				goto resubmit;
2201da177e4SLinus Torvalds 			}
221b45386efSEric Dumazet 			__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
2221da177e4SLinus Torvalds 		} else {
2237bc54c90SPavel Emelyanov 			if (!raw) {
2241da177e4SLinus Torvalds 				if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
225b45386efSEric Dumazet 					__IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
2261da177e4SLinus Torvalds 					icmp_send(skb, ICMP_DEST_UNREACH,
2271da177e4SLinus Torvalds 						  ICMP_PROT_UNREACH, 0);
2281da177e4SLinus Torvalds 				}
2291da177e4SLinus Torvalds 				kfree_skb(skb);
230d8c6f4b9SNeil Horman 			} else {
231b45386efSEric Dumazet 				__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
232d8c6f4b9SNeil Horman 				consume_skb(skb);
233d8c6f4b9SNeil Horman 			}
2341da177e4SLinus Torvalds 		}
2351da177e4SLinus Torvalds 	}
2361da177e4SLinus Torvalds  out:
2371da177e4SLinus Torvalds 	rcu_read_unlock();
2381da177e4SLinus Torvalds 
2391da177e4SLinus Torvalds 	return 0;
2401da177e4SLinus Torvalds }
2411da177e4SLinus Torvalds 
2421da177e4SLinus Torvalds /*
2431da177e4SLinus Torvalds  * 	Deliver IP Packets to the higher protocol layers.
2441da177e4SLinus Torvalds  */
2451da177e4SLinus Torvalds int ip_local_deliver(struct sk_buff *skb)
2461da177e4SLinus Torvalds {
2471da177e4SLinus Torvalds 	/*
2481da177e4SLinus Torvalds 	 *	Reassemble IP fragments.
2491da177e4SLinus Torvalds 	 */
25019bcf9f2SEric W. Biederman 	struct net *net = dev_net(skb->dev);
2511da177e4SLinus Torvalds 
25256f8a75cSPaul Gortmaker 	if (ip_is_fragment(ip_hdr(skb))) {
25319bcf9f2SEric W. Biederman 		if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
2541da177e4SLinus Torvalds 			return 0;
2551da177e4SLinus Torvalds 	}
2561da177e4SLinus Torvalds 
25729a26a56SEric W. Biederman 	return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
25819bcf9f2SEric W. Biederman 		       net, NULL, skb, skb->dev, NULL,
2591da177e4SLinus Torvalds 		       ip_local_deliver_finish);
2601da177e4SLinus Torvalds }
2611da177e4SLinus Torvalds 
2626a91395fSDavid S. Miller static inline bool ip_rcv_options(struct sk_buff *skb)
263d245407eSThomas Graf {
264d245407eSThomas Graf 	struct ip_options *opt;
265b71d1d42SEric Dumazet 	const struct iphdr *iph;
266d245407eSThomas Graf 	struct net_device *dev = skb->dev;
267d245407eSThomas Graf 
268d245407eSThomas Graf 	/* It looks as overkill, because not all
269d245407eSThomas Graf 	   IP options require packet mangling.
270d245407eSThomas Graf 	   But it is the easiest for now, especially taking
271d245407eSThomas Graf 	   into account that combination of IP options
272d245407eSThomas Graf 	   and running sniffer is extremely rare condition.
273d245407eSThomas Graf 					      --ANK (980813)
274d245407eSThomas Graf 	*/
275d245407eSThomas Graf 	if (skb_cow(skb, skb_headroom(skb))) {
276b45386efSEric Dumazet 		__IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS);
277d245407eSThomas Graf 		goto drop;
278d245407eSThomas Graf 	}
279d245407eSThomas Graf 
280eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
28122aba383SDenis V. Lunev 	opt = &(IPCB(skb)->opt);
28222aba383SDenis V. Lunev 	opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
283d245407eSThomas Graf 
284c346dca1SYOSHIFUJI Hideaki 	if (ip_options_compile(dev_net(dev), opt, skb)) {
285b45386efSEric Dumazet 		__IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
286d245407eSThomas Graf 		goto drop;
287d245407eSThomas Graf 	}
288d245407eSThomas Graf 
289d245407eSThomas Graf 	if (unlikely(opt->srr)) {
2906e8b11b4SEric Dumazet 		struct in_device *in_dev = __in_dev_get_rcu(dev);
2916e8b11b4SEric Dumazet 
292d245407eSThomas Graf 		if (in_dev) {
293d245407eSThomas Graf 			if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
294e87cc472SJoe Perches 				if (IN_DEV_LOG_MARTIANS(in_dev))
295e87cc472SJoe Perches 					net_info_ratelimited("source route option %pI4 -> %pI4\n",
296e87cc472SJoe Perches 							     &iph->saddr,
297e87cc472SJoe Perches 							     &iph->daddr);
298d245407eSThomas Graf 				goto drop;
299d245407eSThomas Graf 			}
300d245407eSThomas Graf 		}
301d245407eSThomas Graf 
302d245407eSThomas Graf 		if (ip_options_rcv_srr(skb))
303d245407eSThomas Graf 			goto drop;
304d245407eSThomas Graf 	}
305d245407eSThomas Graf 
3066a91395fSDavid S. Miller 	return false;
307d245407eSThomas Graf drop:
3086a91395fSDavid S. Miller 	return true;
309d245407eSThomas Graf }
310d245407eSThomas Graf 
3110c4b51f0SEric W. Biederman static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
3121da177e4SLinus Torvalds {
313eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
3145506b54bSMitsuru Chinen 	struct rtable *rt;
3151da177e4SLinus Torvalds 
316e21145a9SNikolay Borisov 	if (net->ipv4.sysctl_ip_early_demux &&
31763e51b6aSEric Dumazet 	    !skb_dst(skb) &&
31863e51b6aSEric Dumazet 	    !skb->sk &&
31963e51b6aSEric Dumazet 	    !ip_is_fragment(iph)) {
32041063e9dSDavid S. Miller 		const struct net_protocol *ipprot;
32141063e9dSDavid S. Miller 		int protocol = iph->protocol;
32241063e9dSDavid S. Miller 
32341063e9dSDavid S. Miller 		ipprot = rcu_dereference(inet_protos[protocol]);
3249cb429d6SEric Dumazet 		if (ipprot && ipprot->early_demux) {
325160eb5a6SDavid S. Miller 			ipprot->early_demux(skb);
3269cb429d6SEric Dumazet 			/* must reload iph, skb->head might have changed */
3279cb429d6SEric Dumazet 			iph = ip_hdr(skb);
3289cb429d6SEric Dumazet 		}
3296648bd7eSAlexander Duyck 	}
33041063e9dSDavid S. Miller 
331160eb5a6SDavid S. Miller 	/*
332160eb5a6SDavid S. Miller 	 *	Initialise the virtual path cache for the packet. It describes
333160eb5a6SDavid S. Miller 	 *	how the packet travels inside Linux networking.
334160eb5a6SDavid S. Miller 	 */
335f38a9eb1SThomas Graf 	if (!skb_valid_dst(skb)) {
336c6cffba4SDavid S. Miller 		int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
337c10237e0SDavid S. Miller 					       iph->tos, skb->dev);
3383e192beaSThomas Graf 		if (unlikely(err)) {
339251da413SDavid S. Miller 			if (err == -EXDEV)
34038184b3bSEric W. Biederman 				NET_INC_STATS_BH(net, LINUX_MIB_IPRPFILTER);
3411da177e4SLinus Torvalds 			goto drop;
3421da177e4SLinus Torvalds 		}
3432c2910a4SDietmar Eggemann 	}
3441da177e4SLinus Torvalds 
345c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
346adf30907SEric Dumazet 	if (unlikely(skb_dst(skb)->tclassid)) {
3477a9b2d59SEric Dumazet 		struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
348adf30907SEric Dumazet 		u32 idx = skb_dst(skb)->tclassid;
3491da177e4SLinus Torvalds 		st[idx&0xFF].o_packets++;
3501da177e4SLinus Torvalds 		st[idx&0xFF].o_bytes += skb->len;
3511da177e4SLinus Torvalds 		st[(idx>>16)&0xFF].i_packets++;
3521da177e4SLinus Torvalds 		st[(idx>>16)&0xFF].i_bytes += skb->len;
3531da177e4SLinus Torvalds 	}
3541da177e4SLinus Torvalds #endif
3551da177e4SLinus Torvalds 
356d245407eSThomas Graf 	if (iph->ihl > 5 && ip_rcv_options(skb))
3571da177e4SLinus Torvalds 		goto drop;
3581da177e4SLinus Torvalds 
359511c3f92SEric Dumazet 	rt = skb_rtable(skb);
360edf391ffSNeil Horman 	if (rt->rt_type == RTN_MULTICAST) {
36138184b3bSEric W. Biederman 		IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len);
36212b74dfaSJohannes Berg 	} else if (rt->rt_type == RTN_BROADCAST) {
36338184b3bSEric W. Biederman 		IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len);
36412b74dfaSJohannes Berg 	} else if (skb->pkt_type == PACKET_BROADCAST ||
36512b74dfaSJohannes Berg 		   skb->pkt_type == PACKET_MULTICAST) {
36612b74dfaSJohannes Berg 		struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
36712b74dfaSJohannes Berg 
36812b74dfaSJohannes Berg 		/* RFC 1122 3.3.6:
36912b74dfaSJohannes Berg 		 *
37012b74dfaSJohannes Berg 		 *   When a host sends a datagram to a link-layer broadcast
37112b74dfaSJohannes Berg 		 *   address, the IP destination address MUST be a legal IP
37212b74dfaSJohannes Berg 		 *   broadcast or IP multicast address.
37312b74dfaSJohannes Berg 		 *
37412b74dfaSJohannes Berg 		 *   A host SHOULD silently discard a datagram that is received
37512b74dfaSJohannes Berg 		 *   via a link-layer broadcast (see Section 2.4) but does not
37612b74dfaSJohannes Berg 		 *   specify an IP multicast or broadcast destination address.
37712b74dfaSJohannes Berg 		 *
37812b74dfaSJohannes Berg 		 * This doesn't explicitly say L2 *broadcast*, but broadcast is
37912b74dfaSJohannes Berg 		 * in a way a form of multicast and the most common use case for
38012b74dfaSJohannes Berg 		 * this is 802.11 protecting against cross-station spoofing (the
38112b74dfaSJohannes Berg 		 * so-called "hole-196" attack) so do it for both.
38212b74dfaSJohannes Berg 		 */
38312b74dfaSJohannes Berg 		if (in_dev &&
38412b74dfaSJohannes Berg 		    IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
38512b74dfaSJohannes Berg 			goto drop;
38612b74dfaSJohannes Berg 	}
3875506b54bSMitsuru Chinen 
3881da177e4SLinus Torvalds 	return dst_input(skb);
3891da177e4SLinus Torvalds 
3901da177e4SLinus Torvalds drop:
3911da177e4SLinus Torvalds 	kfree_skb(skb);
3921da177e4SLinus Torvalds 	return NET_RX_DROP;
3931da177e4SLinus Torvalds }
3941da177e4SLinus Torvalds 
3951da177e4SLinus Torvalds /*
3961da177e4SLinus Torvalds  * 	Main IP Receive routine.
3971da177e4SLinus Torvalds  */
398f2ccd8faSDavid S. Miller int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
3991da177e4SLinus Torvalds {
400b71d1d42SEric Dumazet 	const struct iphdr *iph;
401e707766cSEric W. Biederman 	struct net *net;
40258615242SThomas Graf 	u32 len;
4031da177e4SLinus Torvalds 
4041da177e4SLinus Torvalds 	/* When the interface is in promisc. mode, drop all the crap
4051da177e4SLinus Torvalds 	 * that it receives, do not try to analyse it.
4061da177e4SLinus Torvalds 	 */
4071da177e4SLinus Torvalds 	if (skb->pkt_type == PACKET_OTHERHOST)
4081da177e4SLinus Torvalds 		goto drop;
4091da177e4SLinus Torvalds 
410edf391ffSNeil Horman 
411e707766cSEric W. Biederman 	net = dev_net(dev);
412e707766cSEric W. Biederman 	IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len);
4131da177e4SLinus Torvalds 
41451456b29SIan Morris 	skb = skb_share_check(skb, GFP_ATOMIC);
41551456b29SIan Morris 	if (!skb) {
416b45386efSEric Dumazet 		__IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
4171da177e4SLinus Torvalds 		goto out;
4181da177e4SLinus Torvalds 	}
4191da177e4SLinus Torvalds 
4201da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
4211da177e4SLinus Torvalds 		goto inhdr_error;
4221da177e4SLinus Torvalds 
423eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
4241da177e4SLinus Torvalds 
4251da177e4SLinus Torvalds 	/*
426c67fa027SJ.H.M. Dassen (Ray) 	 *	RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
4271da177e4SLinus Torvalds 	 *
4281da177e4SLinus Torvalds 	 *	Is the datagram acceptable?
4291da177e4SLinus Torvalds 	 *
4301da177e4SLinus Torvalds 	 *	1.	Length at least the size of an ip header
4311da177e4SLinus Torvalds 	 *	2.	Version of 4
4321da177e4SLinus Torvalds 	 *	3.	Checksums correctly. [Speed optimisation for later, skip loopback checksums]
4331da177e4SLinus Torvalds 	 *	4.	Doesn't have a bogus length
4341da177e4SLinus Torvalds 	 */
4351da177e4SLinus Torvalds 
4361da177e4SLinus Torvalds 	if (iph->ihl < 5 || iph->version != 4)
4371da177e4SLinus Torvalds 		goto inhdr_error;
4381da177e4SLinus Torvalds 
4391f07d03eSEric Dumazet 	BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
4401f07d03eSEric Dumazet 	BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
4411f07d03eSEric Dumazet 	BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
44298f61995SEric Dumazet 	__IP_ADD_STATS(net,
4431f07d03eSEric Dumazet 		       IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
4441f07d03eSEric Dumazet 		       max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
4451f07d03eSEric Dumazet 
4461da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, iph->ihl*4))
4471da177e4SLinus Torvalds 		goto inhdr_error;
4481da177e4SLinus Torvalds 
449eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
4501da177e4SLinus Torvalds 
451e9c60422SThomas Graf 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
4526a5dc9e5SEric Dumazet 		goto csum_error;
4531da177e4SLinus Torvalds 
45458615242SThomas Graf 	len = ntohs(iph->tot_len);
455704aed53SMitsuru Chinen 	if (skb->len < len) {
456b45386efSEric Dumazet 		__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
457704aed53SMitsuru Chinen 		goto drop;
458704aed53SMitsuru Chinen 	} else if (len < (iph->ihl*4))
4591da177e4SLinus Torvalds 		goto inhdr_error;
4601da177e4SLinus Torvalds 
4611da177e4SLinus Torvalds 	/* Our transport medium may have padded the buffer out. Now we know it
4621da177e4SLinus Torvalds 	 * is IP we can trim to the true length of the frame.
4631da177e4SLinus Torvalds 	 * Note this now means skb->len holds ntohs(iph->tot_len).
4641da177e4SLinus Torvalds 	 */
4651da177e4SLinus Torvalds 	if (pskb_trim_rcsum(skb, len)) {
466b45386efSEric Dumazet 		__IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
4671da177e4SLinus Torvalds 		goto drop;
4681da177e4SLinus Torvalds 	}
4691da177e4SLinus Torvalds 
47021d1196aSEric Dumazet 	skb->transport_header = skb->network_header + iph->ihl*4;
47121d1196aSEric Dumazet 
47253602f92SStephen Hemminger 	/* Remove any debris in the socket control block */
473d569f1d7SGuillaume Chazarain 	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
47453602f92SStephen Hemminger 
47571f9dacdSHerbert Xu 	/* Must drop socket now because of tproxy. */
47671f9dacdSHerbert Xu 	skb_orphan(skb);
47771f9dacdSHerbert Xu 
47829a26a56SEric W. Biederman 	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
47929a26a56SEric W. Biederman 		       net, NULL, skb, dev, NULL,
4801da177e4SLinus Torvalds 		       ip_rcv_finish);
4811da177e4SLinus Torvalds 
4826a5dc9e5SEric Dumazet csum_error:
483b45386efSEric Dumazet 	__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
4841da177e4SLinus Torvalds inhdr_error:
485b45386efSEric Dumazet 	__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
4861da177e4SLinus Torvalds drop:
4871da177e4SLinus Torvalds 	kfree_skb(skb);
4881da177e4SLinus Torvalds out:
4891da177e4SLinus Torvalds 	return NET_RX_DROP;
4901da177e4SLinus Torvalds }
491