xref: /openbmc/linux/net/ipv4/ip_input.c (revision badff6d0)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		The Internet Protocol (IP) module.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Version:	$Id: ip_input.c,v 1.55 2002/01/12 07:39:45 davem Exp $
91da177e4SLinus Torvalds  *
1002c30a84SJesper Juhl  * Authors:	Ross Biro
111da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
121da177e4SLinus Torvalds  *		Donald Becker, <becker@super.org>
131da177e4SLinus Torvalds  *		Alan Cox, <Alan.Cox@linux.org>
141da177e4SLinus Torvalds  *		Richard Underwood
151da177e4SLinus Torvalds  *		Stefan Becker, <stefanb@yello.ping.de>
161da177e4SLinus Torvalds  *		Jorge Cwik, <jorge@laser.satlink.net>
171da177e4SLinus Torvalds  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
181da177e4SLinus Torvalds  *
191da177e4SLinus Torvalds  *
201da177e4SLinus Torvalds  * Fixes:
211da177e4SLinus Torvalds  *		Alan Cox	:	Commented a couple of minor bits of surplus code
221da177e4SLinus Torvalds  *		Alan Cox	:	Undefining IP_FORWARD doesn't include the code
231da177e4SLinus Torvalds  *					(just stops a compiler warning).
241da177e4SLinus Torvalds  *		Alan Cox	:	Frames with >=MAX_ROUTE record routes, strict routes or loose routes
251da177e4SLinus Torvalds  *					are junked rather than corrupting things.
261da177e4SLinus Torvalds  *		Alan Cox	:	Frames to bad broadcast subnets are dumped
271da177e4SLinus Torvalds  *					We used to process them non broadcast and
281da177e4SLinus Torvalds  *					boy could that cause havoc.
291da177e4SLinus Torvalds  *		Alan Cox	:	ip_forward sets the free flag on the
301da177e4SLinus Torvalds  *					new frame it queues. Still crap because
311da177e4SLinus Torvalds  *					it copies the frame but at least it
321da177e4SLinus Torvalds  *					doesn't eat memory too.
331da177e4SLinus Torvalds  *		Alan Cox	:	Generic queue code and memory fixes.
341da177e4SLinus Torvalds  *		Fred Van Kempen :	IP fragment support (borrowed from NET2E)
351da177e4SLinus Torvalds  *		Gerhard Koerting:	Forward fragmented frames correctly.
361da177e4SLinus Torvalds  *		Gerhard Koerting: 	Fixes to my fix of the above 8-).
371da177e4SLinus Torvalds  *		Gerhard Koerting:	IP interface addressing fix.
381da177e4SLinus Torvalds  *		Linus Torvalds	:	More robustness checks
391da177e4SLinus Torvalds  *		Alan Cox	:	Even more checks: Still not as robust as it ought to be
401da177e4SLinus Torvalds  *		Alan Cox	:	Save IP header pointer for later
411da177e4SLinus Torvalds  *		Alan Cox	:	ip option setting
421da177e4SLinus Torvalds  *		Alan Cox	:	Use ip_tos/ip_ttl settings
431da177e4SLinus Torvalds  *		Alan Cox	:	Fragmentation bogosity removed
441da177e4SLinus Torvalds  *					(Thanks to Mark.Bush@prg.ox.ac.uk)
451da177e4SLinus Torvalds  *		Dmitry Gorodchanin :	Send of a raw packet crash fix.
461da177e4SLinus Torvalds  *		Alan Cox	:	Silly ip bug when an overlength
471da177e4SLinus Torvalds  *					fragment turns up. Now frees the
481da177e4SLinus Torvalds  *					queue.
491da177e4SLinus Torvalds  *		Linus Torvalds/ :	Memory leakage on fragmentation
501da177e4SLinus Torvalds  *		Alan Cox	:	handling.
511da177e4SLinus Torvalds  *		Gerhard Koerting:	Forwarding uses IP priority hints
521da177e4SLinus Torvalds  *		Teemu Rantanen	:	Fragment problems.
531da177e4SLinus Torvalds  *		Alan Cox	:	General cleanup, comments and reformat
541da177e4SLinus Torvalds  *		Alan Cox	:	SNMP statistics
551da177e4SLinus Torvalds  *		Alan Cox	:	BSD address rule semantics. Also see
561da177e4SLinus Torvalds  *					UDP as there is a nasty checksum issue
571da177e4SLinus Torvalds  *					if you do things the wrong way.
581da177e4SLinus Torvalds  *		Alan Cox	:	Always defrag, moved IP_FORWARD to the config.in file
591da177e4SLinus Torvalds  *		Alan Cox	: 	IP options adjust sk->priority.
601da177e4SLinus Torvalds  *		Pedro Roque	:	Fix mtu/length error in ip_forward.
611da177e4SLinus Torvalds  *		Alan Cox	:	Avoid ip_chk_addr when possible.
621da177e4SLinus Torvalds  *	Richard Underwood	:	IP multicasting.
631da177e4SLinus Torvalds  *		Alan Cox	:	Cleaned up multicast handlers.
641da177e4SLinus Torvalds  *		Alan Cox	:	RAW sockets demultiplex in the BSD style.
651da177e4SLinus Torvalds  *		Gunther Mayer	:	Fix the SNMP reporting typo
661da177e4SLinus Torvalds  *		Alan Cox	:	Always in group 224.0.0.1
671da177e4SLinus Torvalds  *	Pauline Middelink	:	Fast ip_checksum update when forwarding
681da177e4SLinus Torvalds  *					Masquerading support.
691da177e4SLinus Torvalds  *		Alan Cox	:	Multicast loopback error for 224.0.0.1
701da177e4SLinus Torvalds  *		Alan Cox	:	IP_MULTICAST_LOOP option.
711da177e4SLinus Torvalds  *		Alan Cox	:	Use notifiers.
721da177e4SLinus Torvalds  *		Bjorn Ekwall	:	Removed ip_csum (from slhc.c too)
731da177e4SLinus Torvalds  *		Bjorn Ekwall	:	Moved ip_fast_csum to ip.h (inline!)
741da177e4SLinus Torvalds  *		Stefan Becker   :       Send out ICMP HOST REDIRECT
751da177e4SLinus Torvalds  *	Arnt Gulbrandsen	:	ip_build_xmit
761da177e4SLinus Torvalds  *		Alan Cox	:	Per socket routing cache
771da177e4SLinus Torvalds  *		Alan Cox	:	Fixed routing cache, added header cache.
781da177e4SLinus Torvalds  *		Alan Cox	:	Loopback didn't work right in original ip_build_xmit - fixed it.
791da177e4SLinus Torvalds  *		Alan Cox	:	Only send ICMP_REDIRECT if src/dest are the same net.
801da177e4SLinus Torvalds  *		Alan Cox	:	Incoming IP option handling.
811da177e4SLinus Torvalds  *		Alan Cox	:	Set saddr on raw output frames as per BSD.
821da177e4SLinus Torvalds  *		Alan Cox	:	Stopped broadcast source route explosions.
831da177e4SLinus Torvalds  *		Alan Cox	:	Can disable source routing
841da177e4SLinus Torvalds  *		Takeshi Sone    :	Masquerading didn't work.
851da177e4SLinus Torvalds  *	Dave Bonn,Alan Cox	:	Faster IP forwarding whenever possible.
861da177e4SLinus Torvalds  *		Alan Cox	:	Memory leaks, tramples, misc debugging.
871da177e4SLinus Torvalds  *		Alan Cox	:	Fixed multicast (by popular demand 8))
881da177e4SLinus Torvalds  *		Alan Cox	:	Fixed forwarding (by even more popular demand 8))
891da177e4SLinus Torvalds  *		Alan Cox	:	Fixed SNMP statistics [I think]
901da177e4SLinus Torvalds  *	Gerhard Koerting	:	IP fragmentation forwarding fix
911da177e4SLinus Torvalds  *		Alan Cox	:	Device lock against page fault.
921da177e4SLinus Torvalds  *		Alan Cox	:	IP_HDRINCL facility.
931da177e4SLinus Torvalds  *	Werner Almesberger	:	Zero fragment bug
941da177e4SLinus Torvalds  *		Alan Cox	:	RAW IP frame length bug
951da177e4SLinus Torvalds  *		Alan Cox	:	Outgoing firewall on build_xmit
961da177e4SLinus Torvalds  *		A.N.Kuznetsov	:	IP_OPTIONS support throughout the kernel
971da177e4SLinus Torvalds  *		Alan Cox	:	Multicast routing hooks
981da177e4SLinus Torvalds  *		Jos Vos		:	Do accounting *before* call_in_firewall
991da177e4SLinus Torvalds  *	Willy Konynenberg	:	Transparent proxying support
1001da177e4SLinus Torvalds  *
1011da177e4SLinus Torvalds  *
1021da177e4SLinus Torvalds  *
1031da177e4SLinus Torvalds  * To Fix:
1041da177e4SLinus Torvalds  *		IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
1051da177e4SLinus Torvalds  *		and could be made very efficient with the addition of some virtual memory hacks to permit
1061da177e4SLinus Torvalds  *		the allocation of a buffer that can then be 'grown' by twiddling page tables.
1071da177e4SLinus Torvalds  *		Output fragmentation wants updating along with the buffer management to use a single
1081da177e4SLinus Torvalds  *		interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
1091da177e4SLinus Torvalds  *		output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
1101da177e4SLinus Torvalds  *		fragmentation anyway.
1111da177e4SLinus Torvalds  *
1121da177e4SLinus Torvalds  *		This program is free software; you can redistribute it and/or
1131da177e4SLinus Torvalds  *		modify it under the terms of the GNU General Public License
1141da177e4SLinus Torvalds  *		as published by the Free Software Foundation; either version
1151da177e4SLinus Torvalds  *		2 of the License, or (at your option) any later version.
1161da177e4SLinus Torvalds  */
1171da177e4SLinus Torvalds 
1181da177e4SLinus Torvalds #include <asm/system.h>
1191da177e4SLinus Torvalds #include <linux/module.h>
1201da177e4SLinus Torvalds #include <linux/types.h>
1211da177e4SLinus Torvalds #include <linux/kernel.h>
1221da177e4SLinus Torvalds #include <linux/string.h>
1231da177e4SLinus Torvalds #include <linux/errno.h>
1241da177e4SLinus Torvalds 
1251da177e4SLinus Torvalds #include <linux/net.h>
1261da177e4SLinus Torvalds #include <linux/socket.h>
1271da177e4SLinus Torvalds #include <linux/sockios.h>
1281da177e4SLinus Torvalds #include <linux/in.h>
1291da177e4SLinus Torvalds #include <linux/inet.h>
13014c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h>
1311da177e4SLinus Torvalds #include <linux/netdevice.h>
1321da177e4SLinus Torvalds #include <linux/etherdevice.h>
1331da177e4SLinus Torvalds 
1341da177e4SLinus Torvalds #include <net/snmp.h>
1351da177e4SLinus Torvalds #include <net/ip.h>
1361da177e4SLinus Torvalds #include <net/protocol.h>
1371da177e4SLinus Torvalds #include <net/route.h>
1381da177e4SLinus Torvalds #include <linux/skbuff.h>
1391da177e4SLinus Torvalds #include <net/sock.h>
1401da177e4SLinus Torvalds #include <net/arp.h>
1411da177e4SLinus Torvalds #include <net/icmp.h>
1421da177e4SLinus Torvalds #include <net/raw.h>
1431da177e4SLinus Torvalds #include <net/checksum.h>
1441da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h>
1451da177e4SLinus Torvalds #include <net/xfrm.h>
1461da177e4SLinus Torvalds #include <linux/mroute.h>
1471da177e4SLinus Torvalds #include <linux/netlink.h>
1481da177e4SLinus Torvalds 
1491da177e4SLinus Torvalds /*
1501da177e4SLinus Torvalds  *	SNMP management statistics
1511da177e4SLinus Torvalds  */
1521da177e4SLinus Torvalds 
153ba89966cSEric Dumazet DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
1541da177e4SLinus Torvalds 
1551da177e4SLinus Torvalds /*
1561da177e4SLinus Torvalds  *	Process Router Attention IP option
1571da177e4SLinus Torvalds  */
1581da177e4SLinus Torvalds int ip_call_ra_chain(struct sk_buff *skb)
1591da177e4SLinus Torvalds {
1601da177e4SLinus Torvalds 	struct ip_ra_chain *ra;
161eddc9ec5SArnaldo Carvalho de Melo 	u8 protocol = ip_hdr(skb)->protocol;
1621da177e4SLinus Torvalds 	struct sock *last = NULL;
1631da177e4SLinus Torvalds 
1641da177e4SLinus Torvalds 	read_lock(&ip_ra_lock);
1651da177e4SLinus Torvalds 	for (ra = ip_ra_chain; ra; ra = ra->next) {
1661da177e4SLinus Torvalds 		struct sock *sk = ra->sk;
1671da177e4SLinus Torvalds 
1681da177e4SLinus Torvalds 		/* If socket is bound to an interface, only report
1691da177e4SLinus Torvalds 		 * the packet if it came  from that interface.
1701da177e4SLinus Torvalds 		 */
1711da177e4SLinus Torvalds 		if (sk && inet_sk(sk)->num == protocol &&
1721da177e4SLinus Torvalds 		    (!sk->sk_bound_dev_if ||
1731da177e4SLinus Torvalds 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
174eddc9ec5SArnaldo Carvalho de Melo 			if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
1751da177e4SLinus Torvalds 				skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
1761da177e4SLinus Torvalds 				if (skb == NULL) {
1771da177e4SLinus Torvalds 					read_unlock(&ip_ra_lock);
1781da177e4SLinus Torvalds 					return 1;
1791da177e4SLinus Torvalds 				}
1801da177e4SLinus Torvalds 			}
1811da177e4SLinus Torvalds 			if (last) {
1821da177e4SLinus Torvalds 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1831da177e4SLinus Torvalds 				if (skb2)
1841da177e4SLinus Torvalds 					raw_rcv(last, skb2);
1851da177e4SLinus Torvalds 			}
1861da177e4SLinus Torvalds 			last = sk;
1871da177e4SLinus Torvalds 		}
1881da177e4SLinus Torvalds 	}
1891da177e4SLinus Torvalds 
1901da177e4SLinus Torvalds 	if (last) {
1911da177e4SLinus Torvalds 		raw_rcv(last, skb);
1921da177e4SLinus Torvalds 		read_unlock(&ip_ra_lock);
1931da177e4SLinus Torvalds 		return 1;
1941da177e4SLinus Torvalds 	}
1951da177e4SLinus Torvalds 	read_unlock(&ip_ra_lock);
1961da177e4SLinus Torvalds 	return 0;
1971da177e4SLinus Torvalds }
1981da177e4SLinus Torvalds 
1991da177e4SLinus Torvalds static inline int ip_local_deliver_finish(struct sk_buff *skb)
2001da177e4SLinus Torvalds {
201c9bdd4b5SArnaldo Carvalho de Melo 	__skb_pull(skb, ip_hdrlen(skb));
2021da177e4SLinus Torvalds 
2031da177e4SLinus Torvalds 	/* Point into the IP datagram, just past the header. */
204badff6d0SArnaldo Carvalho de Melo 	skb_reset_transport_header(skb);
2051da177e4SLinus Torvalds 
2061da177e4SLinus Torvalds 	rcu_read_lock();
2071da177e4SLinus Torvalds 	{
2081da177e4SLinus Torvalds 		/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
209eddc9ec5SArnaldo Carvalho de Melo 		int protocol = ip_hdr(skb)->protocol;
2101da177e4SLinus Torvalds 		int hash;
2111da177e4SLinus Torvalds 		struct sock *raw_sk;
2121da177e4SLinus Torvalds 		struct net_protocol *ipprot;
2131da177e4SLinus Torvalds 
2141da177e4SLinus Torvalds 	resubmit:
2151da177e4SLinus Torvalds 		hash = protocol & (MAX_INET_PROTOS - 1);
2161da177e4SLinus Torvalds 		raw_sk = sk_head(&raw_v4_htable[hash]);
2171da177e4SLinus Torvalds 
2181da177e4SLinus Torvalds 		/* If there maybe a raw socket we must check - if not we
2191da177e4SLinus Torvalds 		 * don't care less
2201da177e4SLinus Torvalds 		 */
221eddc9ec5SArnaldo Carvalho de Melo 		if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
222d13964f4SPatrick McHardy 			raw_sk = NULL;
2231da177e4SLinus Torvalds 
2241da177e4SLinus Torvalds 		if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
2251da177e4SLinus Torvalds 			int ret;
2261da177e4SLinus Torvalds 
227b59c2701SPatrick McHardy 			if (!ipprot->no_policy) {
228b59c2701SPatrick McHardy 				if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
2291da177e4SLinus Torvalds 					kfree_skb(skb);
2301da177e4SLinus Torvalds 					goto out;
2311da177e4SLinus Torvalds 				}
232b59c2701SPatrick McHardy 				nf_reset(skb);
233b59c2701SPatrick McHardy 			}
2341da177e4SLinus Torvalds 			ret = ipprot->handler(skb);
2351da177e4SLinus Torvalds 			if (ret < 0) {
2361da177e4SLinus Torvalds 				protocol = -ret;
2371da177e4SLinus Torvalds 				goto resubmit;
2381da177e4SLinus Torvalds 			}
2391da177e4SLinus Torvalds 			IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
2401da177e4SLinus Torvalds 		} else {
2411da177e4SLinus Torvalds 			if (!raw_sk) {
2421da177e4SLinus Torvalds 				if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
2431da177e4SLinus Torvalds 					IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
2441da177e4SLinus Torvalds 					icmp_send(skb, ICMP_DEST_UNREACH,
2451da177e4SLinus Torvalds 						  ICMP_PROT_UNREACH, 0);
2461da177e4SLinus Torvalds 				}
2471da177e4SLinus Torvalds 			} else
2481da177e4SLinus Torvalds 				IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
2491da177e4SLinus Torvalds 			kfree_skb(skb);
2501da177e4SLinus Torvalds 		}
2511da177e4SLinus Torvalds 	}
2521da177e4SLinus Torvalds  out:
2531da177e4SLinus Torvalds 	rcu_read_unlock();
2541da177e4SLinus Torvalds 
2551da177e4SLinus Torvalds 	return 0;
2561da177e4SLinus Torvalds }
2571da177e4SLinus Torvalds 
2581da177e4SLinus Torvalds /*
2591da177e4SLinus Torvalds  * 	Deliver IP Packets to the higher protocol layers.
2601da177e4SLinus Torvalds  */
2611da177e4SLinus Torvalds int ip_local_deliver(struct sk_buff *skb)
2621da177e4SLinus Torvalds {
2631da177e4SLinus Torvalds 	/*
2641da177e4SLinus Torvalds 	 *	Reassemble IP fragments.
2651da177e4SLinus Torvalds 	 */
2661da177e4SLinus Torvalds 
267eddc9ec5SArnaldo Carvalho de Melo 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
2681da177e4SLinus Torvalds 		skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
2691da177e4SLinus Torvalds 		if (!skb)
2701da177e4SLinus Torvalds 			return 0;
2711da177e4SLinus Torvalds 	}
2721da177e4SLinus Torvalds 
2731da177e4SLinus Torvalds 	return NF_HOOK(PF_INET, NF_IP_LOCAL_IN, skb, skb->dev, NULL,
2741da177e4SLinus Torvalds 		       ip_local_deliver_finish);
2751da177e4SLinus Torvalds }
2761da177e4SLinus Torvalds 
277d245407eSThomas Graf static inline int ip_rcv_options(struct sk_buff *skb)
278d245407eSThomas Graf {
279d245407eSThomas Graf 	struct ip_options *opt;
280d245407eSThomas Graf 	struct iphdr *iph;
281d245407eSThomas Graf 	struct net_device *dev = skb->dev;
282d245407eSThomas Graf 
283d245407eSThomas Graf 	/* It looks as overkill, because not all
284d245407eSThomas Graf 	   IP options require packet mangling.
285d245407eSThomas Graf 	   But it is the easiest for now, especially taking
286d245407eSThomas Graf 	   into account that combination of IP options
287d245407eSThomas Graf 	   and running sniffer is extremely rare condition.
288d245407eSThomas Graf 					      --ANK (980813)
289d245407eSThomas Graf 	*/
290d245407eSThomas Graf 	if (skb_cow(skb, skb_headroom(skb))) {
291d245407eSThomas Graf 		IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
292d245407eSThomas Graf 		goto drop;
293d245407eSThomas Graf 	}
294d245407eSThomas Graf 
295eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
296d245407eSThomas Graf 
297d245407eSThomas Graf 	if (ip_options_compile(NULL, skb)) {
298d245407eSThomas Graf 		IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
299d245407eSThomas Graf 		goto drop;
300d245407eSThomas Graf 	}
301d245407eSThomas Graf 
302d245407eSThomas Graf 	opt = &(IPCB(skb)->opt);
303d245407eSThomas Graf 	if (unlikely(opt->srr)) {
304d245407eSThomas Graf 		struct in_device *in_dev = in_dev_get(dev);
305d245407eSThomas Graf 		if (in_dev) {
306d245407eSThomas Graf 			if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
307d245407eSThomas Graf 				if (IN_DEV_LOG_MARTIANS(in_dev) &&
308d245407eSThomas Graf 				    net_ratelimit())
309d245407eSThomas Graf 					printk(KERN_INFO "source route option "
310d245407eSThomas Graf 					       "%u.%u.%u.%u -> %u.%u.%u.%u\n",
311d245407eSThomas Graf 					       NIPQUAD(iph->saddr),
312d245407eSThomas Graf 					       NIPQUAD(iph->daddr));
313d245407eSThomas Graf 				in_dev_put(in_dev);
314d245407eSThomas Graf 				goto drop;
315d245407eSThomas Graf 			}
316d245407eSThomas Graf 
317d245407eSThomas Graf 			in_dev_put(in_dev);
318d245407eSThomas Graf 		}
319d245407eSThomas Graf 
320d245407eSThomas Graf 		if (ip_options_rcv_srr(skb))
321d245407eSThomas Graf 			goto drop;
322d245407eSThomas Graf 	}
323d245407eSThomas Graf 
324d245407eSThomas Graf 	return 0;
325d245407eSThomas Graf drop:
326d245407eSThomas Graf 	return -1;
327d245407eSThomas Graf }
328d245407eSThomas Graf 
3291da177e4SLinus Torvalds static inline int ip_rcv_finish(struct sk_buff *skb)
3301da177e4SLinus Torvalds {
331eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
3321da177e4SLinus Torvalds 
3331da177e4SLinus Torvalds 	/*
3341da177e4SLinus Torvalds 	 *	Initialise the virtual path cache for the packet. It describes
3351da177e4SLinus Torvalds 	 *	how the packet travels inside Linux networking.
3361da177e4SLinus Torvalds 	 */
3370182bd2bSHua Zhong 	if (skb->dst == NULL) {
3383e192beaSThomas Graf 		int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
3393e192beaSThomas Graf 					 skb->dev);
3403e192beaSThomas Graf 		if (unlikely(err)) {
3412c2910a4SDietmar Eggemann 			if (err == -EHOSTUNREACH)
3422c2910a4SDietmar Eggemann 				IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
3431da177e4SLinus Torvalds 			goto drop;
3441da177e4SLinus Torvalds 		}
3452c2910a4SDietmar Eggemann 	}
3461da177e4SLinus Torvalds 
3471da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
3483e192beaSThomas Graf 	if (unlikely(skb->dst->tclassid)) {
3491da177e4SLinus Torvalds 		struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id();
3501da177e4SLinus Torvalds 		u32 idx = skb->dst->tclassid;
3511da177e4SLinus Torvalds 		st[idx&0xFF].o_packets++;
3521da177e4SLinus Torvalds 		st[idx&0xFF].o_bytes+=skb->len;
3531da177e4SLinus Torvalds 		st[(idx>>16)&0xFF].i_packets++;
3541da177e4SLinus Torvalds 		st[(idx>>16)&0xFF].i_bytes+=skb->len;
3551da177e4SLinus Torvalds 	}
3561da177e4SLinus Torvalds #endif
3571da177e4SLinus Torvalds 
358d245407eSThomas Graf 	if (iph->ihl > 5 && ip_rcv_options(skb))
3591da177e4SLinus Torvalds 		goto drop;
3601da177e4SLinus Torvalds 
3611da177e4SLinus Torvalds 	return dst_input(skb);
3621da177e4SLinus Torvalds 
3631da177e4SLinus Torvalds drop:
3641da177e4SLinus Torvalds 	kfree_skb(skb);
3651da177e4SLinus Torvalds 	return NET_RX_DROP;
3661da177e4SLinus Torvalds }
3671da177e4SLinus Torvalds 
3681da177e4SLinus Torvalds /*
3691da177e4SLinus Torvalds  * 	Main IP Receive routine.
3701da177e4SLinus Torvalds  */
371f2ccd8faSDavid S. Miller int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
3721da177e4SLinus Torvalds {
3731da177e4SLinus Torvalds 	struct iphdr *iph;
37458615242SThomas Graf 	u32 len;
3751da177e4SLinus Torvalds 
3761da177e4SLinus Torvalds 	/* When the interface is in promisc. mode, drop all the crap
3771da177e4SLinus Torvalds 	 * that it receives, do not try to analyse it.
3781da177e4SLinus Torvalds 	 */
3791da177e4SLinus Torvalds 	if (skb->pkt_type == PACKET_OTHERHOST)
3801da177e4SLinus Torvalds 		goto drop;
3811da177e4SLinus Torvalds 
3821da177e4SLinus Torvalds 	IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
3831da177e4SLinus Torvalds 
3841da177e4SLinus Torvalds 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
3851da177e4SLinus Torvalds 		IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
3861da177e4SLinus Torvalds 		goto out;
3871da177e4SLinus Torvalds 	}
3881da177e4SLinus Torvalds 
3891da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
3901da177e4SLinus Torvalds 		goto inhdr_error;
3911da177e4SLinus Torvalds 
392eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
3931da177e4SLinus Torvalds 
3941da177e4SLinus Torvalds 	/*
3951da177e4SLinus Torvalds 	 *	RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
3961da177e4SLinus Torvalds 	 *
3971da177e4SLinus Torvalds 	 *	Is the datagram acceptable?
3981da177e4SLinus Torvalds 	 *
3991da177e4SLinus Torvalds 	 *	1.	Length at least the size of an ip header
4001da177e4SLinus Torvalds 	 *	2.	Version of 4
4011da177e4SLinus Torvalds 	 *	3.	Checksums correctly. [Speed optimisation for later, skip loopback checksums]
4021da177e4SLinus Torvalds 	 *	4.	Doesn't have a bogus length
4031da177e4SLinus Torvalds 	 */
4041da177e4SLinus Torvalds 
4051da177e4SLinus Torvalds 	if (iph->ihl < 5 || iph->version != 4)
4061da177e4SLinus Torvalds 		goto inhdr_error;
4071da177e4SLinus Torvalds 
4081da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, iph->ihl*4))
4091da177e4SLinus Torvalds 		goto inhdr_error;
4101da177e4SLinus Torvalds 
411eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
4121da177e4SLinus Torvalds 
413e9c60422SThomas Graf 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
4141da177e4SLinus Torvalds 		goto inhdr_error;
4151da177e4SLinus Torvalds 
41658615242SThomas Graf 	len = ntohs(iph->tot_len);
41758615242SThomas Graf 	if (skb->len < len || len < (iph->ihl*4))
4181da177e4SLinus Torvalds 		goto inhdr_error;
4191da177e4SLinus Torvalds 
4201da177e4SLinus Torvalds 	/* Our transport medium may have padded the buffer out. Now we know it
4211da177e4SLinus Torvalds 	 * is IP we can trim to the true length of the frame.
4221da177e4SLinus Torvalds 	 * Note this now means skb->len holds ntohs(iph->tot_len).
4231da177e4SLinus Torvalds 	 */
4241da177e4SLinus Torvalds 	if (pskb_trim_rcsum(skb, len)) {
4251da177e4SLinus Torvalds 		IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
4261da177e4SLinus Torvalds 		goto drop;
4271da177e4SLinus Torvalds 	}
4281da177e4SLinus Torvalds 
42953602f92SStephen Hemminger 	/* Remove any debris in the socket control block */
430d569f1d7SGuillaume Chazarain 	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
43153602f92SStephen Hemminger 
4321da177e4SLinus Torvalds 	return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
4331da177e4SLinus Torvalds 		       ip_rcv_finish);
4341da177e4SLinus Torvalds 
4351da177e4SLinus Torvalds inhdr_error:
4361da177e4SLinus Torvalds 	IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
4371da177e4SLinus Torvalds drop:
4381da177e4SLinus Torvalds 	kfree_skb(skb);
4391da177e4SLinus Torvalds out:
4401da177e4SLinus Torvalds 	return NET_RX_DROP;
4411da177e4SLinus Torvalds }
4421da177e4SLinus Torvalds 
4431da177e4SLinus Torvalds EXPORT_SYMBOL(ip_statistics);
444