xref: /openbmc/linux/net/core/dev.c (revision 367b8112)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/ethtool.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/net_namespace.h>
97 #include <net/sock.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/proc_fs.h>
100 #include <linux/seq_file.h>
101 #include <linux/stat.h>
102 #include <linux/if_bridge.h>
103 #include <linux/if_macvlan.h>
104 #include <net/dst.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/kallsyms.h>
112 #include <linux/netpoll.h>
113 #include <linux/rcupdate.h>
114 #include <linux/delay.h>
115 #include <net/wext.h>
116 #include <net/iw_handler.h>
117 #include <asm/current.h>
118 #include <linux/audit.h>
119 #include <linux/dmaengine.h>
120 #include <linux/err.h>
121 #include <linux/ctype.h>
122 #include <linux/if_arp.h>
123 #include <linux/if_vlan.h>
124 #include <linux/ip.h>
125 #include <net/ip.h>
126 #include <linux/ipv6.h>
127 #include <linux/in.h>
128 #include <linux/jhash.h>
129 #include <linux/random.h>
130 
131 #include "net-sysfs.h"
132 
133 /*
134  *	The list of packet types we will receive (as opposed to discard)
135  *	and the routines to invoke.
136  *
137  *	Why 16. Because with 16 the only overlap we get on a hash of the
138  *	low nibble of the protocol value is RARP/SNAP/X.25.
139  *
140  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
141  *             sure which should go first, but I bet it won't make much
142  *             difference if we are running VLANs.  The good news is that
143  *             this protocol won't be in the list unless compiled in, so
144  *             the average user (w/out VLANs) will not be adversely affected.
145  *             --BLG
146  *
147  *		0800	IP
148  *		8100    802.1Q VLAN
149  *		0001	802.3
150  *		0002	AX.25
151  *		0004	802.2
152  *		8035	RARP
153  *		0005	SNAP
154  *		0805	X.25
155  *		0806	ARP
156  *		8137	IPX
157  *		0009	Localtalk
158  *		86DD	IPv6
159  */
160 
161 #define PTYPE_HASH_SIZE	(16)
162 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
163 
164 static DEFINE_SPINLOCK(ptype_lock);
165 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
166 static struct list_head ptype_all __read_mostly;	/* Taps */
167 
168 #ifdef CONFIG_NET_DMA
169 struct net_dma {
170 	struct dma_client client;
171 	spinlock_t lock;
172 	cpumask_t channel_mask;
173 	struct dma_chan **channels;
174 };
175 
176 static enum dma_state_client
177 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
178 	enum dma_state state);
179 
180 static struct net_dma net_dma = {
181 	.client = {
182 		.event_callback = netdev_dma_event,
183 	},
184 };
185 #endif
186 
187 /*
188  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
189  * semaphore.
190  *
191  * Pure readers hold dev_base_lock for reading.
192  *
193  * Writers must hold the rtnl semaphore while they loop through the
194  * dev_base_head list, and hold dev_base_lock for writing when they do the
195  * actual updates.  This allows pure readers to access the list even
196  * while a writer is preparing to update it.
197  *
198  * To put it another way, dev_base_lock is held for writing only to
199  * protect against pure readers; the rtnl semaphore provides the
200  * protection against other writers.
201  *
202  * See, for example usages, register_netdevice() and
203  * unregister_netdevice(), which must be called with the rtnl
204  * semaphore held.
205  */
206 DEFINE_RWLOCK(dev_base_lock);
207 
208 EXPORT_SYMBOL(dev_base_lock);
209 
210 #define NETDEV_HASHBITS	8
211 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
212 
213 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
214 {
215 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
216 	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
217 }
218 
219 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
220 {
221 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
222 }
223 
224 /* Device list insertion */
225 static int list_netdevice(struct net_device *dev)
226 {
227 	struct net *net = dev_net(dev);
228 
229 	ASSERT_RTNL();
230 
231 	write_lock_bh(&dev_base_lock);
232 	list_add_tail(&dev->dev_list, &net->dev_base_head);
233 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
234 	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
235 	write_unlock_bh(&dev_base_lock);
236 	return 0;
237 }
238 
239 /* Device list removal */
240 static void unlist_netdevice(struct net_device *dev)
241 {
242 	ASSERT_RTNL();
243 
244 	/* Unlink dev from the device chain */
245 	write_lock_bh(&dev_base_lock);
246 	list_del(&dev->dev_list);
247 	hlist_del(&dev->name_hlist);
248 	hlist_del(&dev->index_hlist);
249 	write_unlock_bh(&dev_base_lock);
250 }
251 
252 /*
253  *	Our notifier list
254  */
255 
256 static RAW_NOTIFIER_HEAD(netdev_chain);
257 
258 /*
259  *	Device drivers call our routines to queue packets here. We empty the
260  *	queue in the local softnet handler.
261  */
262 
263 DEFINE_PER_CPU(struct softnet_data, softnet_data);
264 
265 #ifdef CONFIG_LOCKDEP
266 /*
267  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
268  * according to dev->type
269  */
270 static const unsigned short netdev_lock_type[] =
271 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
272 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
273 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
274 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
275 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
276 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
277 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
278 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
279 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
280 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
281 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
282 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
283 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
284 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
285 	 ARPHRD_NONE};
286 
287 static const char *netdev_lock_name[] =
288 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
289 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
290 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
291 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
292 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
293 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
294 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
295 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
296 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
297 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
298 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
299 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
300 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
301 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
302 	 "_xmit_NONE"};
303 
304 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
305 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
306 
307 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
308 {
309 	int i;
310 
311 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
312 		if (netdev_lock_type[i] == dev_type)
313 			return i;
314 	/* the last key is used by default */
315 	return ARRAY_SIZE(netdev_lock_type) - 1;
316 }
317 
318 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
319 						 unsigned short dev_type)
320 {
321 	int i;
322 
323 	i = netdev_lock_pos(dev_type);
324 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
325 				   netdev_lock_name[i]);
326 }
327 
328 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
329 {
330 	int i;
331 
332 	i = netdev_lock_pos(dev->type);
333 	lockdep_set_class_and_name(&dev->addr_list_lock,
334 				   &netdev_addr_lock_key[i],
335 				   netdev_lock_name[i]);
336 }
337 #else
338 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
339 						 unsigned short dev_type)
340 {
341 }
342 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
343 {
344 }
345 #endif
346 
347 /*******************************************************************************
348 
349 		Protocol management and registration routines
350 
351 *******************************************************************************/
352 
353 /*
354  *	Add a protocol ID to the list. Now that the input handler is
355  *	smarter we can dispense with all the messy stuff that used to be
356  *	here.
357  *
358  *	BEWARE!!! Protocol handlers, mangling input packets,
359  *	MUST BE last in hash buckets and checking protocol handlers
360  *	MUST start from promiscuous ptype_all chain in net_bh.
361  *	It is true now, do not change it.
362  *	Explanation follows: if protocol handler, mangling packet, will
363  *	be the first on list, it is not able to sense, that packet
364  *	is cloned and should be copied-on-write, so that it will
365  *	change it and subsequent readers will get broken packet.
366  *							--ANK (980803)
367  */
368 
369 /**
370  *	dev_add_pack - add packet handler
371  *	@pt: packet type declaration
372  *
373  *	Add a protocol handler to the networking stack. The passed &packet_type
374  *	is linked into kernel lists and may not be freed until it has been
375  *	removed from the kernel lists.
376  *
377  *	This call does not sleep therefore it can not
378  *	guarantee all CPU's that are in middle of receiving packets
379  *	will see the new packet type (until the next received packet).
380  */
381 
382 void dev_add_pack(struct packet_type *pt)
383 {
384 	int hash;
385 
386 	spin_lock_bh(&ptype_lock);
387 	if (pt->type == htons(ETH_P_ALL))
388 		list_add_rcu(&pt->list, &ptype_all);
389 	else {
390 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
391 		list_add_rcu(&pt->list, &ptype_base[hash]);
392 	}
393 	spin_unlock_bh(&ptype_lock);
394 }
395 
396 /**
397  *	__dev_remove_pack	 - remove packet handler
398  *	@pt: packet type declaration
399  *
400  *	Remove a protocol handler that was previously added to the kernel
401  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
402  *	from the kernel lists and can be freed or reused once this function
403  *	returns.
404  *
405  *      The packet type might still be in use by receivers
406  *	and must not be freed until after all the CPU's have gone
407  *	through a quiescent state.
408  */
409 void __dev_remove_pack(struct packet_type *pt)
410 {
411 	struct list_head *head;
412 	struct packet_type *pt1;
413 
414 	spin_lock_bh(&ptype_lock);
415 
416 	if (pt->type == htons(ETH_P_ALL))
417 		head = &ptype_all;
418 	else
419 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
420 
421 	list_for_each_entry(pt1, head, list) {
422 		if (pt == pt1) {
423 			list_del_rcu(&pt->list);
424 			goto out;
425 		}
426 	}
427 
428 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
429 out:
430 	spin_unlock_bh(&ptype_lock);
431 }
432 /**
433  *	dev_remove_pack	 - remove packet handler
434  *	@pt: packet type declaration
435  *
436  *	Remove a protocol handler that was previously added to the kernel
437  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
438  *	from the kernel lists and can be freed or reused once this function
439  *	returns.
440  *
441  *	This call sleeps to guarantee that no CPU is looking at the packet
442  *	type after return.
443  */
444 void dev_remove_pack(struct packet_type *pt)
445 {
446 	__dev_remove_pack(pt);
447 
448 	synchronize_net();
449 }
450 
451 /******************************************************************************
452 
453 		      Device Boot-time Settings Routines
454 
455 *******************************************************************************/
456 
457 /* Boot time configuration table */
458 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
459 
460 /**
461  *	netdev_boot_setup_add	- add new setup entry
462  *	@name: name of the device
463  *	@map: configured settings for the device
464  *
465  *	Adds new setup entry to the dev_boot_setup list.  The function
466  *	returns 0 on error and 1 on success.  This is a generic routine to
467  *	all netdevices.
468  */
469 static int netdev_boot_setup_add(char *name, struct ifmap *map)
470 {
471 	struct netdev_boot_setup *s;
472 	int i;
473 
474 	s = dev_boot_setup;
475 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
476 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
477 			memset(s[i].name, 0, sizeof(s[i].name));
478 			strlcpy(s[i].name, name, IFNAMSIZ);
479 			memcpy(&s[i].map, map, sizeof(s[i].map));
480 			break;
481 		}
482 	}
483 
484 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
485 }
486 
487 /**
488  *	netdev_boot_setup_check	- check boot time settings
489  *	@dev: the netdevice
490  *
491  * 	Check boot time settings for the device.
492  *	The found settings are set for the device to be used
493  *	later in the device probing.
494  *	Returns 0 if no settings found, 1 if they are.
495  */
496 int netdev_boot_setup_check(struct net_device *dev)
497 {
498 	struct netdev_boot_setup *s = dev_boot_setup;
499 	int i;
500 
501 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
502 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
503 		    !strcmp(dev->name, s[i].name)) {
504 			dev->irq 	= s[i].map.irq;
505 			dev->base_addr 	= s[i].map.base_addr;
506 			dev->mem_start 	= s[i].map.mem_start;
507 			dev->mem_end 	= s[i].map.mem_end;
508 			return 1;
509 		}
510 	}
511 	return 0;
512 }
513 
514 
515 /**
516  *	netdev_boot_base	- get address from boot time settings
517  *	@prefix: prefix for network device
518  *	@unit: id for network device
519  *
520  * 	Check boot time settings for the base address of device.
521  *	The found settings are set for the device to be used
522  *	later in the device probing.
523  *	Returns 0 if no settings found.
524  */
525 unsigned long netdev_boot_base(const char *prefix, int unit)
526 {
527 	const struct netdev_boot_setup *s = dev_boot_setup;
528 	char name[IFNAMSIZ];
529 	int i;
530 
531 	sprintf(name, "%s%d", prefix, unit);
532 
533 	/*
534 	 * If device already registered then return base of 1
535 	 * to indicate not to probe for this interface
536 	 */
537 	if (__dev_get_by_name(&init_net, name))
538 		return 1;
539 
540 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
541 		if (!strcmp(name, s[i].name))
542 			return s[i].map.base_addr;
543 	return 0;
544 }
545 
546 /*
547  * Saves at boot time configured settings for any netdevice.
548  */
549 int __init netdev_boot_setup(char *str)
550 {
551 	int ints[5];
552 	struct ifmap map;
553 
554 	str = get_options(str, ARRAY_SIZE(ints), ints);
555 	if (!str || !*str)
556 		return 0;
557 
558 	/* Save settings */
559 	memset(&map, 0, sizeof(map));
560 	if (ints[0] > 0)
561 		map.irq = ints[1];
562 	if (ints[0] > 1)
563 		map.base_addr = ints[2];
564 	if (ints[0] > 2)
565 		map.mem_start = ints[3];
566 	if (ints[0] > 3)
567 		map.mem_end = ints[4];
568 
569 	/* Add new entry to the list */
570 	return netdev_boot_setup_add(str, &map);
571 }
572 
573 __setup("netdev=", netdev_boot_setup);
574 
575 /*******************************************************************************
576 
577 			    Device Interface Subroutines
578 
579 *******************************************************************************/
580 
581 /**
582  *	__dev_get_by_name	- find a device by its name
583  *	@net: the applicable net namespace
584  *	@name: name to find
585  *
586  *	Find an interface by name. Must be called under RTNL semaphore
587  *	or @dev_base_lock. If the name is found a pointer to the device
588  *	is returned. If the name is not found then %NULL is returned. The
589  *	reference counters are not incremented so the caller must be
590  *	careful with locks.
591  */
592 
593 struct net_device *__dev_get_by_name(struct net *net, const char *name)
594 {
595 	struct hlist_node *p;
596 
597 	hlist_for_each(p, dev_name_hash(net, name)) {
598 		struct net_device *dev
599 			= hlist_entry(p, struct net_device, name_hlist);
600 		if (!strncmp(dev->name, name, IFNAMSIZ))
601 			return dev;
602 	}
603 	return NULL;
604 }
605 
606 /**
607  *	dev_get_by_name		- find a device by its name
608  *	@net: the applicable net namespace
609  *	@name: name to find
610  *
611  *	Find an interface by name. This can be called from any
612  *	context and does its own locking. The returned handle has
613  *	the usage count incremented and the caller must use dev_put() to
614  *	release it when it is no longer needed. %NULL is returned if no
615  *	matching device is found.
616  */
617 
618 struct net_device *dev_get_by_name(struct net *net, const char *name)
619 {
620 	struct net_device *dev;
621 
622 	read_lock(&dev_base_lock);
623 	dev = __dev_get_by_name(net, name);
624 	if (dev)
625 		dev_hold(dev);
626 	read_unlock(&dev_base_lock);
627 	return dev;
628 }
629 
630 /**
631  *	__dev_get_by_index - find a device by its ifindex
632  *	@net: the applicable net namespace
633  *	@ifindex: index of device
634  *
635  *	Search for an interface by index. Returns %NULL if the device
636  *	is not found or a pointer to the device. The device has not
637  *	had its reference counter increased so the caller must be careful
638  *	about locking. The caller must hold either the RTNL semaphore
639  *	or @dev_base_lock.
640  */
641 
642 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
643 {
644 	struct hlist_node *p;
645 
646 	hlist_for_each(p, dev_index_hash(net, ifindex)) {
647 		struct net_device *dev
648 			= hlist_entry(p, struct net_device, index_hlist);
649 		if (dev->ifindex == ifindex)
650 			return dev;
651 	}
652 	return NULL;
653 }
654 
655 
656 /**
657  *	dev_get_by_index - find a device by its ifindex
658  *	@net: the applicable net namespace
659  *	@ifindex: index of device
660  *
661  *	Search for an interface by index. Returns NULL if the device
662  *	is not found or a pointer to the device. The device returned has
663  *	had a reference added and the pointer is safe until the user calls
664  *	dev_put to indicate they have finished with it.
665  */
666 
667 struct net_device *dev_get_by_index(struct net *net, int ifindex)
668 {
669 	struct net_device *dev;
670 
671 	read_lock(&dev_base_lock);
672 	dev = __dev_get_by_index(net, ifindex);
673 	if (dev)
674 		dev_hold(dev);
675 	read_unlock(&dev_base_lock);
676 	return dev;
677 }
678 
679 /**
680  *	dev_getbyhwaddr - find a device by its hardware address
681  *	@net: the applicable net namespace
682  *	@type: media type of device
683  *	@ha: hardware address
684  *
685  *	Search for an interface by MAC address. Returns NULL if the device
686  *	is not found or a pointer to the device. The caller must hold the
687  *	rtnl semaphore. The returned device has not had its ref count increased
688  *	and the caller must therefore be careful about locking
689  *
690  *	BUGS:
691  *	If the API was consistent this would be __dev_get_by_hwaddr
692  */
693 
694 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
695 {
696 	struct net_device *dev;
697 
698 	ASSERT_RTNL();
699 
700 	for_each_netdev(net, dev)
701 		if (dev->type == type &&
702 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
703 			return dev;
704 
705 	return NULL;
706 }
707 
708 EXPORT_SYMBOL(dev_getbyhwaddr);
709 
710 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
711 {
712 	struct net_device *dev;
713 
714 	ASSERT_RTNL();
715 	for_each_netdev(net, dev)
716 		if (dev->type == type)
717 			return dev;
718 
719 	return NULL;
720 }
721 
722 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
723 
724 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
725 {
726 	struct net_device *dev;
727 
728 	rtnl_lock();
729 	dev = __dev_getfirstbyhwtype(net, type);
730 	if (dev)
731 		dev_hold(dev);
732 	rtnl_unlock();
733 	return dev;
734 }
735 
736 EXPORT_SYMBOL(dev_getfirstbyhwtype);
737 
738 /**
739  *	dev_get_by_flags - find any device with given flags
740  *	@net: the applicable net namespace
741  *	@if_flags: IFF_* values
742  *	@mask: bitmask of bits in if_flags to check
743  *
744  *	Search for any interface with the given flags. Returns NULL if a device
745  *	is not found or a pointer to the device. The device returned has
746  *	had a reference added and the pointer is safe until the user calls
747  *	dev_put to indicate they have finished with it.
748  */
749 
750 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
751 {
752 	struct net_device *dev, *ret;
753 
754 	ret = NULL;
755 	read_lock(&dev_base_lock);
756 	for_each_netdev(net, dev) {
757 		if (((dev->flags ^ if_flags) & mask) == 0) {
758 			dev_hold(dev);
759 			ret = dev;
760 			break;
761 		}
762 	}
763 	read_unlock(&dev_base_lock);
764 	return ret;
765 }
766 
767 /**
768  *	dev_valid_name - check if name is okay for network device
769  *	@name: name string
770  *
771  *	Network device names need to be valid file names to
772  *	to allow sysfs to work.  We also disallow any kind of
773  *	whitespace.
774  */
775 int dev_valid_name(const char *name)
776 {
777 	if (*name == '\0')
778 		return 0;
779 	if (strlen(name) >= IFNAMSIZ)
780 		return 0;
781 	if (!strcmp(name, ".") || !strcmp(name, ".."))
782 		return 0;
783 
784 	while (*name) {
785 		if (*name == '/' || isspace(*name))
786 			return 0;
787 		name++;
788 	}
789 	return 1;
790 }
791 
792 /**
793  *	__dev_alloc_name - allocate a name for a device
794  *	@net: network namespace to allocate the device name in
795  *	@name: name format string
796  *	@buf:  scratch buffer and result name string
797  *
798  *	Passed a format string - eg "lt%d" it will try and find a suitable
799  *	id. It scans list of devices to build up a free map, then chooses
800  *	the first empty slot. The caller must hold the dev_base or rtnl lock
801  *	while allocating the name and adding the device in order to avoid
802  *	duplicates.
803  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
804  *	Returns the number of the unit assigned or a negative errno code.
805  */
806 
807 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
808 {
809 	int i = 0;
810 	const char *p;
811 	const int max_netdevices = 8*PAGE_SIZE;
812 	unsigned long *inuse;
813 	struct net_device *d;
814 
815 	p = strnchr(name, IFNAMSIZ-1, '%');
816 	if (p) {
817 		/*
818 		 * Verify the string as this thing may have come from
819 		 * the user.  There must be either one "%d" and no other "%"
820 		 * characters.
821 		 */
822 		if (p[1] != 'd' || strchr(p + 2, '%'))
823 			return -EINVAL;
824 
825 		/* Use one page as a bit array of possible slots */
826 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
827 		if (!inuse)
828 			return -ENOMEM;
829 
830 		for_each_netdev(net, d) {
831 			if (!sscanf(d->name, name, &i))
832 				continue;
833 			if (i < 0 || i >= max_netdevices)
834 				continue;
835 
836 			/*  avoid cases where sscanf is not exact inverse of printf */
837 			snprintf(buf, IFNAMSIZ, name, i);
838 			if (!strncmp(buf, d->name, IFNAMSIZ))
839 				set_bit(i, inuse);
840 		}
841 
842 		i = find_first_zero_bit(inuse, max_netdevices);
843 		free_page((unsigned long) inuse);
844 	}
845 
846 	snprintf(buf, IFNAMSIZ, name, i);
847 	if (!__dev_get_by_name(net, buf))
848 		return i;
849 
850 	/* It is possible to run out of possible slots
851 	 * when the name is long and there isn't enough space left
852 	 * for the digits, or if all bits are used.
853 	 */
854 	return -ENFILE;
855 }
856 
857 /**
858  *	dev_alloc_name - allocate a name for a device
859  *	@dev: device
860  *	@name: name format string
861  *
862  *	Passed a format string - eg "lt%d" it will try and find a suitable
863  *	id. It scans list of devices to build up a free map, then chooses
864  *	the first empty slot. The caller must hold the dev_base or rtnl lock
865  *	while allocating the name and adding the device in order to avoid
866  *	duplicates.
867  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
868  *	Returns the number of the unit assigned or a negative errno code.
869  */
870 
871 int dev_alloc_name(struct net_device *dev, const char *name)
872 {
873 	char buf[IFNAMSIZ];
874 	struct net *net;
875 	int ret;
876 
877 	BUG_ON(!dev_net(dev));
878 	net = dev_net(dev);
879 	ret = __dev_alloc_name(net, name, buf);
880 	if (ret >= 0)
881 		strlcpy(dev->name, buf, IFNAMSIZ);
882 	return ret;
883 }
884 
885 
886 /**
887  *	dev_change_name - change name of a device
888  *	@dev: device
889  *	@newname: name (or format string) must be at least IFNAMSIZ
890  *
891  *	Change name of a device, can pass format strings "eth%d".
892  *	for wildcarding.
893  */
894 int dev_change_name(struct net_device *dev, const char *newname)
895 {
896 	char oldname[IFNAMSIZ];
897 	int err = 0;
898 	int ret;
899 	struct net *net;
900 
901 	ASSERT_RTNL();
902 	BUG_ON(!dev_net(dev));
903 
904 	net = dev_net(dev);
905 	if (dev->flags & IFF_UP)
906 		return -EBUSY;
907 
908 	if (!dev_valid_name(newname))
909 		return -EINVAL;
910 
911 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
912 		return 0;
913 
914 	memcpy(oldname, dev->name, IFNAMSIZ);
915 
916 	if (strchr(newname, '%')) {
917 		err = dev_alloc_name(dev, newname);
918 		if (err < 0)
919 			return err;
920 	}
921 	else if (__dev_get_by_name(net, newname))
922 		return -EEXIST;
923 	else
924 		strlcpy(dev->name, newname, IFNAMSIZ);
925 
926 rollback:
927 	ret = device_rename(&dev->dev, dev->name);
928 	if (ret) {
929 		memcpy(dev->name, oldname, IFNAMSIZ);
930 		return ret;
931 	}
932 
933 	write_lock_bh(&dev_base_lock);
934 	hlist_del(&dev->name_hlist);
935 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
936 	write_unlock_bh(&dev_base_lock);
937 
938 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
939 	ret = notifier_to_errno(ret);
940 
941 	if (ret) {
942 		if (err) {
943 			printk(KERN_ERR
944 			       "%s: name change rollback failed: %d.\n",
945 			       dev->name, ret);
946 		} else {
947 			err = ret;
948 			memcpy(dev->name, oldname, IFNAMSIZ);
949 			goto rollback;
950 		}
951 	}
952 
953 	return err;
954 }
955 
956 /**
957  *	dev_set_alias - change ifalias of a device
958  *	@dev: device
959  *	@alias: name up to IFALIASZ
960  *	@len: limit of bytes to copy from info
961  *
962  *	Set ifalias for a device,
963  */
964 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
965 {
966 	ASSERT_RTNL();
967 
968 	if (len >= IFALIASZ)
969 		return -EINVAL;
970 
971 	if (!len) {
972 		if (dev->ifalias) {
973 			kfree(dev->ifalias);
974 			dev->ifalias = NULL;
975 		}
976 		return 0;
977 	}
978 
979 	dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
980 	if (!dev->ifalias)
981 		return -ENOMEM;
982 
983 	strlcpy(dev->ifalias, alias, len+1);
984 	return len;
985 }
986 
987 
988 /**
989  *	netdev_features_change - device changes features
990  *	@dev: device to cause notification
991  *
992  *	Called to indicate a device has changed features.
993  */
994 void netdev_features_change(struct net_device *dev)
995 {
996 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
997 }
998 EXPORT_SYMBOL(netdev_features_change);
999 
1000 /**
1001  *	netdev_state_change - device changes state
1002  *	@dev: device to cause notification
1003  *
1004  *	Called to indicate a device has changed state. This function calls
1005  *	the notifier chains for netdev_chain and sends a NEWLINK message
1006  *	to the routing socket.
1007  */
1008 void netdev_state_change(struct net_device *dev)
1009 {
1010 	if (dev->flags & IFF_UP) {
1011 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1012 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1013 	}
1014 }
1015 
1016 void netdev_bonding_change(struct net_device *dev)
1017 {
1018 	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1019 }
1020 EXPORT_SYMBOL(netdev_bonding_change);
1021 
1022 /**
1023  *	dev_load 	- load a network module
1024  *	@net: the applicable net namespace
1025  *	@name: name of interface
1026  *
1027  *	If a network interface is not present and the process has suitable
1028  *	privileges this function loads the module. If module loading is not
1029  *	available in this kernel then it becomes a nop.
1030  */
1031 
1032 void dev_load(struct net *net, const char *name)
1033 {
1034 	struct net_device *dev;
1035 
1036 	read_lock(&dev_base_lock);
1037 	dev = __dev_get_by_name(net, name);
1038 	read_unlock(&dev_base_lock);
1039 
1040 	if (!dev && capable(CAP_SYS_MODULE))
1041 		request_module("%s", name);
1042 }
1043 
1044 /**
1045  *	dev_open	- prepare an interface for use.
1046  *	@dev:	device to open
1047  *
1048  *	Takes a device from down to up state. The device's private open
1049  *	function is invoked and then the multicast lists are loaded. Finally
1050  *	the device is moved into the up state and a %NETDEV_UP message is
1051  *	sent to the netdev notifier chain.
1052  *
1053  *	Calling this function on an active interface is a nop. On a failure
1054  *	a negative errno code is returned.
1055  */
1056 int dev_open(struct net_device *dev)
1057 {
1058 	int ret = 0;
1059 
1060 	ASSERT_RTNL();
1061 
1062 	/*
1063 	 *	Is it already up?
1064 	 */
1065 
1066 	if (dev->flags & IFF_UP)
1067 		return 0;
1068 
1069 	/*
1070 	 *	Is it even present?
1071 	 */
1072 	if (!netif_device_present(dev))
1073 		return -ENODEV;
1074 
1075 	/*
1076 	 *	Call device private open method
1077 	 */
1078 	set_bit(__LINK_STATE_START, &dev->state);
1079 
1080 	if (dev->validate_addr)
1081 		ret = dev->validate_addr(dev);
1082 
1083 	if (!ret && dev->open)
1084 		ret = dev->open(dev);
1085 
1086 	/*
1087 	 *	If it went open OK then:
1088 	 */
1089 
1090 	if (ret)
1091 		clear_bit(__LINK_STATE_START, &dev->state);
1092 	else {
1093 		/*
1094 		 *	Set the flags.
1095 		 */
1096 		dev->flags |= IFF_UP;
1097 
1098 		/*
1099 		 *	Initialize multicasting status
1100 		 */
1101 		dev_set_rx_mode(dev);
1102 
1103 		/*
1104 		 *	Wakeup transmit queue engine
1105 		 */
1106 		dev_activate(dev);
1107 
1108 		/*
1109 		 *	... and announce new interface.
1110 		 */
1111 		call_netdevice_notifiers(NETDEV_UP, dev);
1112 	}
1113 
1114 	return ret;
1115 }
1116 
1117 /**
1118  *	dev_close - shutdown an interface.
1119  *	@dev: device to shutdown
1120  *
1121  *	This function moves an active device into down state. A
1122  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1123  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1124  *	chain.
1125  */
1126 int dev_close(struct net_device *dev)
1127 {
1128 	ASSERT_RTNL();
1129 
1130 	might_sleep();
1131 
1132 	if (!(dev->flags & IFF_UP))
1133 		return 0;
1134 
1135 	/*
1136 	 *	Tell people we are going down, so that they can
1137 	 *	prepare to death, when device is still operating.
1138 	 */
1139 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1140 
1141 	clear_bit(__LINK_STATE_START, &dev->state);
1142 
1143 	/* Synchronize to scheduled poll. We cannot touch poll list,
1144 	 * it can be even on different cpu. So just clear netif_running().
1145 	 *
1146 	 * dev->stop() will invoke napi_disable() on all of it's
1147 	 * napi_struct instances on this device.
1148 	 */
1149 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1150 
1151 	dev_deactivate(dev);
1152 
1153 	/*
1154 	 *	Call the device specific close. This cannot fail.
1155 	 *	Only if device is UP
1156 	 *
1157 	 *	We allow it to be called even after a DETACH hot-plug
1158 	 *	event.
1159 	 */
1160 	if (dev->stop)
1161 		dev->stop(dev);
1162 
1163 	/*
1164 	 *	Device is now down.
1165 	 */
1166 
1167 	dev->flags &= ~IFF_UP;
1168 
1169 	/*
1170 	 * Tell people we are down
1171 	 */
1172 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1173 
1174 	return 0;
1175 }
1176 
1177 
1178 /**
1179  *	dev_disable_lro - disable Large Receive Offload on a device
1180  *	@dev: device
1181  *
1182  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1183  *	called under RTNL.  This is needed if received packets may be
1184  *	forwarded to another interface.
1185  */
1186 void dev_disable_lro(struct net_device *dev)
1187 {
1188 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1189 	    dev->ethtool_ops->set_flags) {
1190 		u32 flags = dev->ethtool_ops->get_flags(dev);
1191 		if (flags & ETH_FLAG_LRO) {
1192 			flags &= ~ETH_FLAG_LRO;
1193 			dev->ethtool_ops->set_flags(dev, flags);
1194 		}
1195 	}
1196 	WARN_ON(dev->features & NETIF_F_LRO);
1197 }
1198 EXPORT_SYMBOL(dev_disable_lro);
1199 
1200 
1201 static int dev_boot_phase = 1;
1202 
1203 /*
1204  *	Device change register/unregister. These are not inline or static
1205  *	as we export them to the world.
1206  */
1207 
1208 /**
1209  *	register_netdevice_notifier - register a network notifier block
1210  *	@nb: notifier
1211  *
1212  *	Register a notifier to be called when network device events occur.
1213  *	The notifier passed is linked into the kernel structures and must
1214  *	not be reused until it has been unregistered. A negative errno code
1215  *	is returned on a failure.
1216  *
1217  * 	When registered all registration and up events are replayed
1218  *	to the new notifier to allow device to have a race free
1219  *	view of the network device list.
1220  */
1221 
1222 int register_netdevice_notifier(struct notifier_block *nb)
1223 {
1224 	struct net_device *dev;
1225 	struct net_device *last;
1226 	struct net *net;
1227 	int err;
1228 
1229 	rtnl_lock();
1230 	err = raw_notifier_chain_register(&netdev_chain, nb);
1231 	if (err)
1232 		goto unlock;
1233 	if (dev_boot_phase)
1234 		goto unlock;
1235 	for_each_net(net) {
1236 		for_each_netdev(net, dev) {
1237 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1238 			err = notifier_to_errno(err);
1239 			if (err)
1240 				goto rollback;
1241 
1242 			if (!(dev->flags & IFF_UP))
1243 				continue;
1244 
1245 			nb->notifier_call(nb, NETDEV_UP, dev);
1246 		}
1247 	}
1248 
1249 unlock:
1250 	rtnl_unlock();
1251 	return err;
1252 
1253 rollback:
1254 	last = dev;
1255 	for_each_net(net) {
1256 		for_each_netdev(net, dev) {
1257 			if (dev == last)
1258 				break;
1259 
1260 			if (dev->flags & IFF_UP) {
1261 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1262 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1263 			}
1264 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1265 		}
1266 	}
1267 
1268 	raw_notifier_chain_unregister(&netdev_chain, nb);
1269 	goto unlock;
1270 }
1271 
1272 /**
1273  *	unregister_netdevice_notifier - unregister a network notifier block
1274  *	@nb: notifier
1275  *
1276  *	Unregister a notifier previously registered by
1277  *	register_netdevice_notifier(). The notifier is unlinked into the
1278  *	kernel structures and may then be reused. A negative errno code
1279  *	is returned on a failure.
1280  */
1281 
1282 int unregister_netdevice_notifier(struct notifier_block *nb)
1283 {
1284 	int err;
1285 
1286 	rtnl_lock();
1287 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1288 	rtnl_unlock();
1289 	return err;
1290 }
1291 
1292 /**
1293  *	call_netdevice_notifiers - call all network notifier blocks
1294  *      @val: value passed unmodified to notifier function
1295  *      @dev: net_device pointer passed unmodified to notifier function
1296  *
1297  *	Call all network notifier blocks.  Parameters and return value
1298  *	are as for raw_notifier_call_chain().
1299  */
1300 
1301 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1302 {
1303 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1304 }
1305 
1306 /* When > 0 there are consumers of rx skb time stamps */
1307 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1308 
1309 void net_enable_timestamp(void)
1310 {
1311 	atomic_inc(&netstamp_needed);
1312 }
1313 
1314 void net_disable_timestamp(void)
1315 {
1316 	atomic_dec(&netstamp_needed);
1317 }
1318 
1319 static inline void net_timestamp(struct sk_buff *skb)
1320 {
1321 	if (atomic_read(&netstamp_needed))
1322 		__net_timestamp(skb);
1323 	else
1324 		skb->tstamp.tv64 = 0;
1325 }
1326 
1327 /*
1328  *	Support routine. Sends outgoing frames to any network
1329  *	taps currently in use.
1330  */
1331 
1332 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1333 {
1334 	struct packet_type *ptype;
1335 
1336 	net_timestamp(skb);
1337 
1338 	rcu_read_lock();
1339 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1340 		/* Never send packets back to the socket
1341 		 * they originated from - MvS (miquels@drinkel.ow.org)
1342 		 */
1343 		if ((ptype->dev == dev || !ptype->dev) &&
1344 		    (ptype->af_packet_priv == NULL ||
1345 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1346 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1347 			if (!skb2)
1348 				break;
1349 
1350 			/* skb->nh should be correctly
1351 			   set by sender, so that the second statement is
1352 			   just protection against buggy protocols.
1353 			 */
1354 			skb_reset_mac_header(skb2);
1355 
1356 			if (skb_network_header(skb2) < skb2->data ||
1357 			    skb2->network_header > skb2->tail) {
1358 				if (net_ratelimit())
1359 					printk(KERN_CRIT "protocol %04x is "
1360 					       "buggy, dev %s\n",
1361 					       skb2->protocol, dev->name);
1362 				skb_reset_network_header(skb2);
1363 			}
1364 
1365 			skb2->transport_header = skb2->network_header;
1366 			skb2->pkt_type = PACKET_OUTGOING;
1367 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1368 		}
1369 	}
1370 	rcu_read_unlock();
1371 }
1372 
1373 
1374 static inline void __netif_reschedule(struct Qdisc *q)
1375 {
1376 	struct softnet_data *sd;
1377 	unsigned long flags;
1378 
1379 	local_irq_save(flags);
1380 	sd = &__get_cpu_var(softnet_data);
1381 	q->next_sched = sd->output_queue;
1382 	sd->output_queue = q;
1383 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1384 	local_irq_restore(flags);
1385 }
1386 
1387 void __netif_schedule(struct Qdisc *q)
1388 {
1389 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1390 		__netif_reschedule(q);
1391 }
1392 EXPORT_SYMBOL(__netif_schedule);
1393 
1394 void dev_kfree_skb_irq(struct sk_buff *skb)
1395 {
1396 	if (atomic_dec_and_test(&skb->users)) {
1397 		struct softnet_data *sd;
1398 		unsigned long flags;
1399 
1400 		local_irq_save(flags);
1401 		sd = &__get_cpu_var(softnet_data);
1402 		skb->next = sd->completion_queue;
1403 		sd->completion_queue = skb;
1404 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1405 		local_irq_restore(flags);
1406 	}
1407 }
1408 EXPORT_SYMBOL(dev_kfree_skb_irq);
1409 
1410 void dev_kfree_skb_any(struct sk_buff *skb)
1411 {
1412 	if (in_irq() || irqs_disabled())
1413 		dev_kfree_skb_irq(skb);
1414 	else
1415 		dev_kfree_skb(skb);
1416 }
1417 EXPORT_SYMBOL(dev_kfree_skb_any);
1418 
1419 
1420 /**
1421  * netif_device_detach - mark device as removed
1422  * @dev: network device
1423  *
1424  * Mark device as removed from system and therefore no longer available.
1425  */
1426 void netif_device_detach(struct net_device *dev)
1427 {
1428 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1429 	    netif_running(dev)) {
1430 		netif_stop_queue(dev);
1431 	}
1432 }
1433 EXPORT_SYMBOL(netif_device_detach);
1434 
1435 /**
1436  * netif_device_attach - mark device as attached
1437  * @dev: network device
1438  *
1439  * Mark device as attached from system and restart if needed.
1440  */
1441 void netif_device_attach(struct net_device *dev)
1442 {
1443 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1444 	    netif_running(dev)) {
1445 		netif_wake_queue(dev);
1446 		__netdev_watchdog_up(dev);
1447 	}
1448 }
1449 EXPORT_SYMBOL(netif_device_attach);
1450 
1451 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1452 {
1453 	return ((features & NETIF_F_GEN_CSUM) ||
1454 		((features & NETIF_F_IP_CSUM) &&
1455 		 protocol == htons(ETH_P_IP)) ||
1456 		((features & NETIF_F_IPV6_CSUM) &&
1457 		 protocol == htons(ETH_P_IPV6)));
1458 }
1459 
1460 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1461 {
1462 	if (can_checksum_protocol(dev->features, skb->protocol))
1463 		return true;
1464 
1465 	if (skb->protocol == htons(ETH_P_8021Q)) {
1466 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1467 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1468 					  veh->h_vlan_encapsulated_proto))
1469 			return true;
1470 	}
1471 
1472 	return false;
1473 }
1474 
1475 /*
1476  * Invalidate hardware checksum when packet is to be mangled, and
1477  * complete checksum manually on outgoing path.
1478  */
1479 int skb_checksum_help(struct sk_buff *skb)
1480 {
1481 	__wsum csum;
1482 	int ret = 0, offset;
1483 
1484 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1485 		goto out_set_summed;
1486 
1487 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1488 		/* Let GSO fix up the checksum. */
1489 		goto out_set_summed;
1490 	}
1491 
1492 	offset = skb->csum_start - skb_headroom(skb);
1493 	BUG_ON(offset >= skb_headlen(skb));
1494 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1495 
1496 	offset += skb->csum_offset;
1497 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1498 
1499 	if (skb_cloned(skb) &&
1500 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1501 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1502 		if (ret)
1503 			goto out;
1504 	}
1505 
1506 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1507 out_set_summed:
1508 	skb->ip_summed = CHECKSUM_NONE;
1509 out:
1510 	return ret;
1511 }
1512 
1513 /**
1514  *	skb_gso_segment - Perform segmentation on skb.
1515  *	@skb: buffer to segment
1516  *	@features: features for the output path (see dev->features)
1517  *
1518  *	This function segments the given skb and returns a list of segments.
1519  *
1520  *	It may return NULL if the skb requires no segmentation.  This is
1521  *	only possible when GSO is used for verifying header integrity.
1522  */
1523 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1524 {
1525 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1526 	struct packet_type *ptype;
1527 	__be16 type = skb->protocol;
1528 	int err;
1529 
1530 	BUG_ON(skb_shinfo(skb)->frag_list);
1531 
1532 	skb_reset_mac_header(skb);
1533 	skb->mac_len = skb->network_header - skb->mac_header;
1534 	__skb_pull(skb, skb->mac_len);
1535 
1536 	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1537 		if (skb_header_cloned(skb) &&
1538 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1539 			return ERR_PTR(err);
1540 	}
1541 
1542 	rcu_read_lock();
1543 	list_for_each_entry_rcu(ptype,
1544 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1545 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1546 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1547 				err = ptype->gso_send_check(skb);
1548 				segs = ERR_PTR(err);
1549 				if (err || skb_gso_ok(skb, features))
1550 					break;
1551 				__skb_push(skb, (skb->data -
1552 						 skb_network_header(skb)));
1553 			}
1554 			segs = ptype->gso_segment(skb, features);
1555 			break;
1556 		}
1557 	}
1558 	rcu_read_unlock();
1559 
1560 	__skb_push(skb, skb->data - skb_mac_header(skb));
1561 
1562 	return segs;
1563 }
1564 
1565 EXPORT_SYMBOL(skb_gso_segment);
1566 
1567 /* Take action when hardware reception checksum errors are detected. */
1568 #ifdef CONFIG_BUG
1569 void netdev_rx_csum_fault(struct net_device *dev)
1570 {
1571 	if (net_ratelimit()) {
1572 		printk(KERN_ERR "%s: hw csum failure.\n",
1573 			dev ? dev->name : "<unknown>");
1574 		dump_stack();
1575 	}
1576 }
1577 EXPORT_SYMBOL(netdev_rx_csum_fault);
1578 #endif
1579 
1580 /* Actually, we should eliminate this check as soon as we know, that:
1581  * 1. IOMMU is present and allows to map all the memory.
1582  * 2. No high memory really exists on this machine.
1583  */
1584 
1585 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1586 {
1587 #ifdef CONFIG_HIGHMEM
1588 	int i;
1589 
1590 	if (dev->features & NETIF_F_HIGHDMA)
1591 		return 0;
1592 
1593 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1594 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1595 			return 1;
1596 
1597 #endif
1598 	return 0;
1599 }
1600 
1601 struct dev_gso_cb {
1602 	void (*destructor)(struct sk_buff *skb);
1603 };
1604 
1605 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1606 
1607 static void dev_gso_skb_destructor(struct sk_buff *skb)
1608 {
1609 	struct dev_gso_cb *cb;
1610 
1611 	do {
1612 		struct sk_buff *nskb = skb->next;
1613 
1614 		skb->next = nskb->next;
1615 		nskb->next = NULL;
1616 		kfree_skb(nskb);
1617 	} while (skb->next);
1618 
1619 	cb = DEV_GSO_CB(skb);
1620 	if (cb->destructor)
1621 		cb->destructor(skb);
1622 }
1623 
1624 /**
1625  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1626  *	@skb: buffer to segment
1627  *
1628  *	This function segments the given skb and stores the list of segments
1629  *	in skb->next.
1630  */
1631 static int dev_gso_segment(struct sk_buff *skb)
1632 {
1633 	struct net_device *dev = skb->dev;
1634 	struct sk_buff *segs;
1635 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1636 					 NETIF_F_SG : 0);
1637 
1638 	segs = skb_gso_segment(skb, features);
1639 
1640 	/* Verifying header integrity only. */
1641 	if (!segs)
1642 		return 0;
1643 
1644 	if (IS_ERR(segs))
1645 		return PTR_ERR(segs);
1646 
1647 	skb->next = segs;
1648 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1649 	skb->destructor = dev_gso_skb_destructor;
1650 
1651 	return 0;
1652 }
1653 
1654 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1655 			struct netdev_queue *txq)
1656 {
1657 	if (likely(!skb->next)) {
1658 		if (!list_empty(&ptype_all))
1659 			dev_queue_xmit_nit(skb, dev);
1660 
1661 		if (netif_needs_gso(dev, skb)) {
1662 			if (unlikely(dev_gso_segment(skb)))
1663 				goto out_kfree_skb;
1664 			if (skb->next)
1665 				goto gso;
1666 		}
1667 
1668 		return dev->hard_start_xmit(skb, dev);
1669 	}
1670 
1671 gso:
1672 	do {
1673 		struct sk_buff *nskb = skb->next;
1674 		int rc;
1675 
1676 		skb->next = nskb->next;
1677 		nskb->next = NULL;
1678 		rc = dev->hard_start_xmit(nskb, dev);
1679 		if (unlikely(rc)) {
1680 			nskb->next = skb->next;
1681 			skb->next = nskb;
1682 			return rc;
1683 		}
1684 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1685 			return NETDEV_TX_BUSY;
1686 	} while (skb->next);
1687 
1688 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1689 
1690 out_kfree_skb:
1691 	kfree_skb(skb);
1692 	return 0;
1693 }
1694 
1695 static u32 simple_tx_hashrnd;
1696 static int simple_tx_hashrnd_initialized = 0;
1697 
1698 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1699 {
1700 	u32 addr1, addr2, ports;
1701 	u32 hash, ihl;
1702 	u8 ip_proto = 0;
1703 
1704 	if (unlikely(!simple_tx_hashrnd_initialized)) {
1705 		get_random_bytes(&simple_tx_hashrnd, 4);
1706 		simple_tx_hashrnd_initialized = 1;
1707 	}
1708 
1709 	switch (skb->protocol) {
1710 	case htons(ETH_P_IP):
1711 		if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1712 			ip_proto = ip_hdr(skb)->protocol;
1713 		addr1 = ip_hdr(skb)->saddr;
1714 		addr2 = ip_hdr(skb)->daddr;
1715 		ihl = ip_hdr(skb)->ihl;
1716 		break;
1717 	case htons(ETH_P_IPV6):
1718 		ip_proto = ipv6_hdr(skb)->nexthdr;
1719 		addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1720 		addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1721 		ihl = (40 >> 2);
1722 		break;
1723 	default:
1724 		return 0;
1725 	}
1726 
1727 
1728 	switch (ip_proto) {
1729 	case IPPROTO_TCP:
1730 	case IPPROTO_UDP:
1731 	case IPPROTO_DCCP:
1732 	case IPPROTO_ESP:
1733 	case IPPROTO_AH:
1734 	case IPPROTO_SCTP:
1735 	case IPPROTO_UDPLITE:
1736 		ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1737 		break;
1738 
1739 	default:
1740 		ports = 0;
1741 		break;
1742 	}
1743 
1744 	hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1745 
1746 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1747 }
1748 
1749 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1750 					struct sk_buff *skb)
1751 {
1752 	u16 queue_index = 0;
1753 
1754 	if (dev->select_queue)
1755 		queue_index = dev->select_queue(dev, skb);
1756 	else if (dev->real_num_tx_queues > 1)
1757 		queue_index = simple_tx_hash(dev, skb);
1758 
1759 	skb_set_queue_mapping(skb, queue_index);
1760 	return netdev_get_tx_queue(dev, queue_index);
1761 }
1762 
1763 /**
1764  *	dev_queue_xmit - transmit a buffer
1765  *	@skb: buffer to transmit
1766  *
1767  *	Queue a buffer for transmission to a network device. The caller must
1768  *	have set the device and priority and built the buffer before calling
1769  *	this function. The function can be called from an interrupt.
1770  *
1771  *	A negative errno code is returned on a failure. A success does not
1772  *	guarantee the frame will be transmitted as it may be dropped due
1773  *	to congestion or traffic shaping.
1774  *
1775  * -----------------------------------------------------------------------------------
1776  *      I notice this method can also return errors from the queue disciplines,
1777  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1778  *      be positive.
1779  *
1780  *      Regardless of the return value, the skb is consumed, so it is currently
1781  *      difficult to retry a send to this method.  (You can bump the ref count
1782  *      before sending to hold a reference for retry if you are careful.)
1783  *
1784  *      When calling this method, interrupts MUST be enabled.  This is because
1785  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1786  *          --BLG
1787  */
1788 int dev_queue_xmit(struct sk_buff *skb)
1789 {
1790 	struct net_device *dev = skb->dev;
1791 	struct netdev_queue *txq;
1792 	struct Qdisc *q;
1793 	int rc = -ENOMEM;
1794 
1795 	/* GSO will handle the following emulations directly. */
1796 	if (netif_needs_gso(dev, skb))
1797 		goto gso;
1798 
1799 	if (skb_shinfo(skb)->frag_list &&
1800 	    !(dev->features & NETIF_F_FRAGLIST) &&
1801 	    __skb_linearize(skb))
1802 		goto out_kfree_skb;
1803 
1804 	/* Fragmented skb is linearized if device does not support SG,
1805 	 * or if at least one of fragments is in highmem and device
1806 	 * does not support DMA from it.
1807 	 */
1808 	if (skb_shinfo(skb)->nr_frags &&
1809 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1810 	    __skb_linearize(skb))
1811 		goto out_kfree_skb;
1812 
1813 	/* If packet is not checksummed and device does not support
1814 	 * checksumming for this protocol, complete checksumming here.
1815 	 */
1816 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1817 		skb_set_transport_header(skb, skb->csum_start -
1818 					      skb_headroom(skb));
1819 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1820 			goto out_kfree_skb;
1821 	}
1822 
1823 gso:
1824 	/* Disable soft irqs for various locks below. Also
1825 	 * stops preemption for RCU.
1826 	 */
1827 	rcu_read_lock_bh();
1828 
1829 	txq = dev_pick_tx(dev, skb);
1830 	q = rcu_dereference(txq->qdisc);
1831 
1832 #ifdef CONFIG_NET_CLS_ACT
1833 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1834 #endif
1835 	if (q->enqueue) {
1836 		spinlock_t *root_lock = qdisc_lock(q);
1837 
1838 		spin_lock(root_lock);
1839 
1840 		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1841 			kfree_skb(skb);
1842 			rc = NET_XMIT_DROP;
1843 		} else {
1844 			rc = qdisc_enqueue_root(skb, q);
1845 			qdisc_run(q);
1846 		}
1847 		spin_unlock(root_lock);
1848 
1849 		goto out;
1850 	}
1851 
1852 	/* The device has no queue. Common case for software devices:
1853 	   loopback, all the sorts of tunnels...
1854 
1855 	   Really, it is unlikely that netif_tx_lock protection is necessary
1856 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1857 	   counters.)
1858 	   However, it is possible, that they rely on protection
1859 	   made by us here.
1860 
1861 	   Check this and shot the lock. It is not prone from deadlocks.
1862 	   Either shot noqueue qdisc, it is even simpler 8)
1863 	 */
1864 	if (dev->flags & IFF_UP) {
1865 		int cpu = smp_processor_id(); /* ok because BHs are off */
1866 
1867 		if (txq->xmit_lock_owner != cpu) {
1868 
1869 			HARD_TX_LOCK(dev, txq, cpu);
1870 
1871 			if (!netif_tx_queue_stopped(txq)) {
1872 				rc = 0;
1873 				if (!dev_hard_start_xmit(skb, dev, txq)) {
1874 					HARD_TX_UNLOCK(dev, txq);
1875 					goto out;
1876 				}
1877 			}
1878 			HARD_TX_UNLOCK(dev, txq);
1879 			if (net_ratelimit())
1880 				printk(KERN_CRIT "Virtual device %s asks to "
1881 				       "queue packet!\n", dev->name);
1882 		} else {
1883 			/* Recursion is detected! It is possible,
1884 			 * unfortunately */
1885 			if (net_ratelimit())
1886 				printk(KERN_CRIT "Dead loop on virtual device "
1887 				       "%s, fix it urgently!\n", dev->name);
1888 		}
1889 	}
1890 
1891 	rc = -ENETDOWN;
1892 	rcu_read_unlock_bh();
1893 
1894 out_kfree_skb:
1895 	kfree_skb(skb);
1896 	return rc;
1897 out:
1898 	rcu_read_unlock_bh();
1899 	return rc;
1900 }
1901 
1902 
1903 /*=======================================================================
1904 			Receiver routines
1905   =======================================================================*/
1906 
1907 int netdev_max_backlog __read_mostly = 1000;
1908 int netdev_budget __read_mostly = 300;
1909 int weight_p __read_mostly = 64;            /* old backlog weight */
1910 
1911 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1912 
1913 
1914 /**
1915  *	netif_rx	-	post buffer to the network code
1916  *	@skb: buffer to post
1917  *
1918  *	This function receives a packet from a device driver and queues it for
1919  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1920  *	may be dropped during processing for congestion control or by the
1921  *	protocol layers.
1922  *
1923  *	return values:
1924  *	NET_RX_SUCCESS	(no congestion)
1925  *	NET_RX_DROP     (packet was dropped)
1926  *
1927  */
1928 
1929 int netif_rx(struct sk_buff *skb)
1930 {
1931 	struct softnet_data *queue;
1932 	unsigned long flags;
1933 
1934 	/* if netpoll wants it, pretend we never saw it */
1935 	if (netpoll_rx(skb))
1936 		return NET_RX_DROP;
1937 
1938 	if (!skb->tstamp.tv64)
1939 		net_timestamp(skb);
1940 
1941 	/*
1942 	 * The code is rearranged so that the path is the most
1943 	 * short when CPU is congested, but is still operating.
1944 	 */
1945 	local_irq_save(flags);
1946 	queue = &__get_cpu_var(softnet_data);
1947 
1948 	__get_cpu_var(netdev_rx_stat).total++;
1949 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1950 		if (queue->input_pkt_queue.qlen) {
1951 enqueue:
1952 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1953 			local_irq_restore(flags);
1954 			return NET_RX_SUCCESS;
1955 		}
1956 
1957 		napi_schedule(&queue->backlog);
1958 		goto enqueue;
1959 	}
1960 
1961 	__get_cpu_var(netdev_rx_stat).dropped++;
1962 	local_irq_restore(flags);
1963 
1964 	kfree_skb(skb);
1965 	return NET_RX_DROP;
1966 }
1967 
1968 int netif_rx_ni(struct sk_buff *skb)
1969 {
1970 	int err;
1971 
1972 	preempt_disable();
1973 	err = netif_rx(skb);
1974 	if (local_softirq_pending())
1975 		do_softirq();
1976 	preempt_enable();
1977 
1978 	return err;
1979 }
1980 
1981 EXPORT_SYMBOL(netif_rx_ni);
1982 
1983 static void net_tx_action(struct softirq_action *h)
1984 {
1985 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1986 
1987 	if (sd->completion_queue) {
1988 		struct sk_buff *clist;
1989 
1990 		local_irq_disable();
1991 		clist = sd->completion_queue;
1992 		sd->completion_queue = NULL;
1993 		local_irq_enable();
1994 
1995 		while (clist) {
1996 			struct sk_buff *skb = clist;
1997 			clist = clist->next;
1998 
1999 			WARN_ON(atomic_read(&skb->users));
2000 			__kfree_skb(skb);
2001 		}
2002 	}
2003 
2004 	if (sd->output_queue) {
2005 		struct Qdisc *head;
2006 
2007 		local_irq_disable();
2008 		head = sd->output_queue;
2009 		sd->output_queue = NULL;
2010 		local_irq_enable();
2011 
2012 		while (head) {
2013 			struct Qdisc *q = head;
2014 			spinlock_t *root_lock;
2015 
2016 			head = head->next_sched;
2017 
2018 			root_lock = qdisc_lock(q);
2019 			if (spin_trylock(root_lock)) {
2020 				smp_mb__before_clear_bit();
2021 				clear_bit(__QDISC_STATE_SCHED,
2022 					  &q->state);
2023 				qdisc_run(q);
2024 				spin_unlock(root_lock);
2025 			} else {
2026 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2027 					      &q->state)) {
2028 					__netif_reschedule(q);
2029 				} else {
2030 					smp_mb__before_clear_bit();
2031 					clear_bit(__QDISC_STATE_SCHED,
2032 						  &q->state);
2033 				}
2034 			}
2035 		}
2036 	}
2037 }
2038 
2039 static inline int deliver_skb(struct sk_buff *skb,
2040 			      struct packet_type *pt_prev,
2041 			      struct net_device *orig_dev)
2042 {
2043 	atomic_inc(&skb->users);
2044 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2045 }
2046 
2047 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2048 /* These hooks defined here for ATM */
2049 struct net_bridge;
2050 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2051 						unsigned char *addr);
2052 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2053 
2054 /*
2055  * If bridge module is loaded call bridging hook.
2056  *  returns NULL if packet was consumed.
2057  */
2058 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2059 					struct sk_buff *skb) __read_mostly;
2060 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2061 					    struct packet_type **pt_prev, int *ret,
2062 					    struct net_device *orig_dev)
2063 {
2064 	struct net_bridge_port *port;
2065 
2066 	if (skb->pkt_type == PACKET_LOOPBACK ||
2067 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2068 		return skb;
2069 
2070 	if (*pt_prev) {
2071 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2072 		*pt_prev = NULL;
2073 	}
2074 
2075 	return br_handle_frame_hook(port, skb);
2076 }
2077 #else
2078 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2079 #endif
2080 
2081 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2082 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2083 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2084 
2085 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2086 					     struct packet_type **pt_prev,
2087 					     int *ret,
2088 					     struct net_device *orig_dev)
2089 {
2090 	if (skb->dev->macvlan_port == NULL)
2091 		return skb;
2092 
2093 	if (*pt_prev) {
2094 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2095 		*pt_prev = NULL;
2096 	}
2097 	return macvlan_handle_frame_hook(skb);
2098 }
2099 #else
2100 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2101 #endif
2102 
2103 #ifdef CONFIG_NET_CLS_ACT
2104 /* TODO: Maybe we should just force sch_ingress to be compiled in
2105  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2106  * a compare and 2 stores extra right now if we dont have it on
2107  * but have CONFIG_NET_CLS_ACT
2108  * NOTE: This doesnt stop any functionality; if you dont have
2109  * the ingress scheduler, you just cant add policies on ingress.
2110  *
2111  */
2112 static int ing_filter(struct sk_buff *skb)
2113 {
2114 	struct net_device *dev = skb->dev;
2115 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2116 	struct netdev_queue *rxq;
2117 	int result = TC_ACT_OK;
2118 	struct Qdisc *q;
2119 
2120 	if (MAX_RED_LOOP < ttl++) {
2121 		printk(KERN_WARNING
2122 		       "Redir loop detected Dropping packet (%d->%d)\n",
2123 		       skb->iif, dev->ifindex);
2124 		return TC_ACT_SHOT;
2125 	}
2126 
2127 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2128 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2129 
2130 	rxq = &dev->rx_queue;
2131 
2132 	q = rxq->qdisc;
2133 	if (q != &noop_qdisc) {
2134 		spin_lock(qdisc_lock(q));
2135 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2136 			result = qdisc_enqueue_root(skb, q);
2137 		spin_unlock(qdisc_lock(q));
2138 	}
2139 
2140 	return result;
2141 }
2142 
2143 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2144 					 struct packet_type **pt_prev,
2145 					 int *ret, struct net_device *orig_dev)
2146 {
2147 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2148 		goto out;
2149 
2150 	if (*pt_prev) {
2151 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2152 		*pt_prev = NULL;
2153 	} else {
2154 		/* Huh? Why does turning on AF_PACKET affect this? */
2155 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2156 	}
2157 
2158 	switch (ing_filter(skb)) {
2159 	case TC_ACT_SHOT:
2160 	case TC_ACT_STOLEN:
2161 		kfree_skb(skb);
2162 		return NULL;
2163 	}
2164 
2165 out:
2166 	skb->tc_verd = 0;
2167 	return skb;
2168 }
2169 #endif
2170 
2171 /*
2172  * 	netif_nit_deliver - deliver received packets to network taps
2173  * 	@skb: buffer
2174  *
2175  * 	This function is used to deliver incoming packets to network
2176  * 	taps. It should be used when the normal netif_receive_skb path
2177  * 	is bypassed, for example because of VLAN acceleration.
2178  */
2179 void netif_nit_deliver(struct sk_buff *skb)
2180 {
2181 	struct packet_type *ptype;
2182 
2183 	if (list_empty(&ptype_all))
2184 		return;
2185 
2186 	skb_reset_network_header(skb);
2187 	skb_reset_transport_header(skb);
2188 	skb->mac_len = skb->network_header - skb->mac_header;
2189 
2190 	rcu_read_lock();
2191 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2192 		if (!ptype->dev || ptype->dev == skb->dev)
2193 			deliver_skb(skb, ptype, skb->dev);
2194 	}
2195 	rcu_read_unlock();
2196 }
2197 
2198 /**
2199  *	netif_receive_skb - process receive buffer from network
2200  *	@skb: buffer to process
2201  *
2202  *	netif_receive_skb() is the main receive data processing function.
2203  *	It always succeeds. The buffer may be dropped during processing
2204  *	for congestion control or by the protocol layers.
2205  *
2206  *	This function may only be called from softirq context and interrupts
2207  *	should be enabled.
2208  *
2209  *	Return values (usually ignored):
2210  *	NET_RX_SUCCESS: no congestion
2211  *	NET_RX_DROP: packet was dropped
2212  */
2213 int netif_receive_skb(struct sk_buff *skb)
2214 {
2215 	struct packet_type *ptype, *pt_prev;
2216 	struct net_device *orig_dev;
2217 	struct net_device *null_or_orig;
2218 	int ret = NET_RX_DROP;
2219 	__be16 type;
2220 
2221 	/* if we've gotten here through NAPI, check netpoll */
2222 	if (netpoll_receive_skb(skb))
2223 		return NET_RX_DROP;
2224 
2225 	if (!skb->tstamp.tv64)
2226 		net_timestamp(skb);
2227 
2228 	if (!skb->iif)
2229 		skb->iif = skb->dev->ifindex;
2230 
2231 	null_or_orig = NULL;
2232 	orig_dev = skb->dev;
2233 	if (orig_dev->master) {
2234 		if (skb_bond_should_drop(skb))
2235 			null_or_orig = orig_dev; /* deliver only exact match */
2236 		else
2237 			skb->dev = orig_dev->master;
2238 	}
2239 
2240 	__get_cpu_var(netdev_rx_stat).total++;
2241 
2242 	skb_reset_network_header(skb);
2243 	skb_reset_transport_header(skb);
2244 	skb->mac_len = skb->network_header - skb->mac_header;
2245 
2246 	pt_prev = NULL;
2247 
2248 	rcu_read_lock();
2249 
2250 	/* Don't receive packets in an exiting network namespace */
2251 	if (!net_alive(dev_net(skb->dev)))
2252 		goto out;
2253 
2254 #ifdef CONFIG_NET_CLS_ACT
2255 	if (skb->tc_verd & TC_NCLS) {
2256 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2257 		goto ncls;
2258 	}
2259 #endif
2260 
2261 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2262 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2263 		    ptype->dev == orig_dev) {
2264 			if (pt_prev)
2265 				ret = deliver_skb(skb, pt_prev, orig_dev);
2266 			pt_prev = ptype;
2267 		}
2268 	}
2269 
2270 #ifdef CONFIG_NET_CLS_ACT
2271 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2272 	if (!skb)
2273 		goto out;
2274 ncls:
2275 #endif
2276 
2277 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2278 	if (!skb)
2279 		goto out;
2280 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2281 	if (!skb)
2282 		goto out;
2283 
2284 	type = skb->protocol;
2285 	list_for_each_entry_rcu(ptype,
2286 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2287 		if (ptype->type == type &&
2288 		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2289 		     ptype->dev == orig_dev)) {
2290 			if (pt_prev)
2291 				ret = deliver_skb(skb, pt_prev, orig_dev);
2292 			pt_prev = ptype;
2293 		}
2294 	}
2295 
2296 	if (pt_prev) {
2297 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2298 	} else {
2299 		kfree_skb(skb);
2300 		/* Jamal, now you will not able to escape explaining
2301 		 * me how you were going to use this. :-)
2302 		 */
2303 		ret = NET_RX_DROP;
2304 	}
2305 
2306 out:
2307 	rcu_read_unlock();
2308 	return ret;
2309 }
2310 
2311 /* Network device is going away, flush any packets still pending  */
2312 static void flush_backlog(void *arg)
2313 {
2314 	struct net_device *dev = arg;
2315 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2316 	struct sk_buff *skb, *tmp;
2317 
2318 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2319 		if (skb->dev == dev) {
2320 			__skb_unlink(skb, &queue->input_pkt_queue);
2321 			kfree_skb(skb);
2322 		}
2323 }
2324 
2325 static int process_backlog(struct napi_struct *napi, int quota)
2326 {
2327 	int work = 0;
2328 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2329 	unsigned long start_time = jiffies;
2330 
2331 	napi->weight = weight_p;
2332 	do {
2333 		struct sk_buff *skb;
2334 
2335 		local_irq_disable();
2336 		skb = __skb_dequeue(&queue->input_pkt_queue);
2337 		if (!skb) {
2338 			__napi_complete(napi);
2339 			local_irq_enable();
2340 			break;
2341 		}
2342 		local_irq_enable();
2343 
2344 		netif_receive_skb(skb);
2345 	} while (++work < quota && jiffies == start_time);
2346 
2347 	return work;
2348 }
2349 
2350 /**
2351  * __napi_schedule - schedule for receive
2352  * @n: entry to schedule
2353  *
2354  * The entry's receive function will be scheduled to run
2355  */
2356 void __napi_schedule(struct napi_struct *n)
2357 {
2358 	unsigned long flags;
2359 
2360 	local_irq_save(flags);
2361 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2362 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2363 	local_irq_restore(flags);
2364 }
2365 EXPORT_SYMBOL(__napi_schedule);
2366 
2367 
2368 static void net_rx_action(struct softirq_action *h)
2369 {
2370 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2371 	unsigned long start_time = jiffies;
2372 	int budget = netdev_budget;
2373 	void *have;
2374 
2375 	local_irq_disable();
2376 
2377 	while (!list_empty(list)) {
2378 		struct napi_struct *n;
2379 		int work, weight;
2380 
2381 		/* If softirq window is exhuasted then punt.
2382 		 *
2383 		 * Note that this is a slight policy change from the
2384 		 * previous NAPI code, which would allow up to 2
2385 		 * jiffies to pass before breaking out.  The test
2386 		 * used to be "jiffies - start_time > 1".
2387 		 */
2388 		if (unlikely(budget <= 0 || jiffies != start_time))
2389 			goto softnet_break;
2390 
2391 		local_irq_enable();
2392 
2393 		/* Even though interrupts have been re-enabled, this
2394 		 * access is safe because interrupts can only add new
2395 		 * entries to the tail of this list, and only ->poll()
2396 		 * calls can remove this head entry from the list.
2397 		 */
2398 		n = list_entry(list->next, struct napi_struct, poll_list);
2399 
2400 		have = netpoll_poll_lock(n);
2401 
2402 		weight = n->weight;
2403 
2404 		/* This NAPI_STATE_SCHED test is for avoiding a race
2405 		 * with netpoll's poll_napi().  Only the entity which
2406 		 * obtains the lock and sees NAPI_STATE_SCHED set will
2407 		 * actually make the ->poll() call.  Therefore we avoid
2408 		 * accidently calling ->poll() when NAPI is not scheduled.
2409 		 */
2410 		work = 0;
2411 		if (test_bit(NAPI_STATE_SCHED, &n->state))
2412 			work = n->poll(n, weight);
2413 
2414 		WARN_ON_ONCE(work > weight);
2415 
2416 		budget -= work;
2417 
2418 		local_irq_disable();
2419 
2420 		/* Drivers must not modify the NAPI state if they
2421 		 * consume the entire weight.  In such cases this code
2422 		 * still "owns" the NAPI instance and therefore can
2423 		 * move the instance around on the list at-will.
2424 		 */
2425 		if (unlikely(work == weight)) {
2426 			if (unlikely(napi_disable_pending(n)))
2427 				__napi_complete(n);
2428 			else
2429 				list_move_tail(&n->poll_list, list);
2430 		}
2431 
2432 		netpoll_poll_unlock(have);
2433 	}
2434 out:
2435 	local_irq_enable();
2436 
2437 #ifdef CONFIG_NET_DMA
2438 	/*
2439 	 * There may not be any more sk_buffs coming right now, so push
2440 	 * any pending DMA copies to hardware
2441 	 */
2442 	if (!cpus_empty(net_dma.channel_mask)) {
2443 		int chan_idx;
2444 		for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
2445 			struct dma_chan *chan = net_dma.channels[chan_idx];
2446 			if (chan)
2447 				dma_async_memcpy_issue_pending(chan);
2448 		}
2449 	}
2450 #endif
2451 
2452 	return;
2453 
2454 softnet_break:
2455 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2456 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2457 	goto out;
2458 }
2459 
2460 static gifconf_func_t * gifconf_list [NPROTO];
2461 
2462 /**
2463  *	register_gifconf	-	register a SIOCGIF handler
2464  *	@family: Address family
2465  *	@gifconf: Function handler
2466  *
2467  *	Register protocol dependent address dumping routines. The handler
2468  *	that is passed must not be freed or reused until it has been replaced
2469  *	by another handler.
2470  */
2471 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2472 {
2473 	if (family >= NPROTO)
2474 		return -EINVAL;
2475 	gifconf_list[family] = gifconf;
2476 	return 0;
2477 }
2478 
2479 
2480 /*
2481  *	Map an interface index to its name (SIOCGIFNAME)
2482  */
2483 
2484 /*
2485  *	We need this ioctl for efficient implementation of the
2486  *	if_indextoname() function required by the IPv6 API.  Without
2487  *	it, we would have to search all the interfaces to find a
2488  *	match.  --pb
2489  */
2490 
2491 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2492 {
2493 	struct net_device *dev;
2494 	struct ifreq ifr;
2495 
2496 	/*
2497 	 *	Fetch the caller's info block.
2498 	 */
2499 
2500 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2501 		return -EFAULT;
2502 
2503 	read_lock(&dev_base_lock);
2504 	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2505 	if (!dev) {
2506 		read_unlock(&dev_base_lock);
2507 		return -ENODEV;
2508 	}
2509 
2510 	strcpy(ifr.ifr_name, dev->name);
2511 	read_unlock(&dev_base_lock);
2512 
2513 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2514 		return -EFAULT;
2515 	return 0;
2516 }
2517 
2518 /*
2519  *	Perform a SIOCGIFCONF call. This structure will change
2520  *	size eventually, and there is nothing I can do about it.
2521  *	Thus we will need a 'compatibility mode'.
2522  */
2523 
2524 static int dev_ifconf(struct net *net, char __user *arg)
2525 {
2526 	struct ifconf ifc;
2527 	struct net_device *dev;
2528 	char __user *pos;
2529 	int len;
2530 	int total;
2531 	int i;
2532 
2533 	/*
2534 	 *	Fetch the caller's info block.
2535 	 */
2536 
2537 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2538 		return -EFAULT;
2539 
2540 	pos = ifc.ifc_buf;
2541 	len = ifc.ifc_len;
2542 
2543 	/*
2544 	 *	Loop over the interfaces, and write an info block for each.
2545 	 */
2546 
2547 	total = 0;
2548 	for_each_netdev(net, dev) {
2549 		for (i = 0; i < NPROTO; i++) {
2550 			if (gifconf_list[i]) {
2551 				int done;
2552 				if (!pos)
2553 					done = gifconf_list[i](dev, NULL, 0);
2554 				else
2555 					done = gifconf_list[i](dev, pos + total,
2556 							       len - total);
2557 				if (done < 0)
2558 					return -EFAULT;
2559 				total += done;
2560 			}
2561 		}
2562 	}
2563 
2564 	/*
2565 	 *	All done.  Write the updated control block back to the caller.
2566 	 */
2567 	ifc.ifc_len = total;
2568 
2569 	/*
2570 	 * 	Both BSD and Solaris return 0 here, so we do too.
2571 	 */
2572 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2573 }
2574 
2575 #ifdef CONFIG_PROC_FS
2576 /*
2577  *	This is invoked by the /proc filesystem handler to display a device
2578  *	in detail.
2579  */
2580 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2581 	__acquires(dev_base_lock)
2582 {
2583 	struct net *net = seq_file_net(seq);
2584 	loff_t off;
2585 	struct net_device *dev;
2586 
2587 	read_lock(&dev_base_lock);
2588 	if (!*pos)
2589 		return SEQ_START_TOKEN;
2590 
2591 	off = 1;
2592 	for_each_netdev(net, dev)
2593 		if (off++ == *pos)
2594 			return dev;
2595 
2596 	return NULL;
2597 }
2598 
2599 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2600 {
2601 	struct net *net = seq_file_net(seq);
2602 	++*pos;
2603 	return v == SEQ_START_TOKEN ?
2604 		first_net_device(net) : next_net_device((struct net_device *)v);
2605 }
2606 
2607 void dev_seq_stop(struct seq_file *seq, void *v)
2608 	__releases(dev_base_lock)
2609 {
2610 	read_unlock(&dev_base_lock);
2611 }
2612 
2613 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2614 {
2615 	struct net_device_stats *stats = dev->get_stats(dev);
2616 
2617 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2618 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2619 		   dev->name, stats->rx_bytes, stats->rx_packets,
2620 		   stats->rx_errors,
2621 		   stats->rx_dropped + stats->rx_missed_errors,
2622 		   stats->rx_fifo_errors,
2623 		   stats->rx_length_errors + stats->rx_over_errors +
2624 		    stats->rx_crc_errors + stats->rx_frame_errors,
2625 		   stats->rx_compressed, stats->multicast,
2626 		   stats->tx_bytes, stats->tx_packets,
2627 		   stats->tx_errors, stats->tx_dropped,
2628 		   stats->tx_fifo_errors, stats->collisions,
2629 		   stats->tx_carrier_errors +
2630 		    stats->tx_aborted_errors +
2631 		    stats->tx_window_errors +
2632 		    stats->tx_heartbeat_errors,
2633 		   stats->tx_compressed);
2634 }
2635 
2636 /*
2637  *	Called from the PROCfs module. This now uses the new arbitrary sized
2638  *	/proc/net interface to create /proc/net/dev
2639  */
2640 static int dev_seq_show(struct seq_file *seq, void *v)
2641 {
2642 	if (v == SEQ_START_TOKEN)
2643 		seq_puts(seq, "Inter-|   Receive                            "
2644 			      "                    |  Transmit\n"
2645 			      " face |bytes    packets errs drop fifo frame "
2646 			      "compressed multicast|bytes    packets errs "
2647 			      "drop fifo colls carrier compressed\n");
2648 	else
2649 		dev_seq_printf_stats(seq, v);
2650 	return 0;
2651 }
2652 
2653 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2654 {
2655 	struct netif_rx_stats *rc = NULL;
2656 
2657 	while (*pos < nr_cpu_ids)
2658 		if (cpu_online(*pos)) {
2659 			rc = &per_cpu(netdev_rx_stat, *pos);
2660 			break;
2661 		} else
2662 			++*pos;
2663 	return rc;
2664 }
2665 
2666 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2667 {
2668 	return softnet_get_online(pos);
2669 }
2670 
2671 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2672 {
2673 	++*pos;
2674 	return softnet_get_online(pos);
2675 }
2676 
2677 static void softnet_seq_stop(struct seq_file *seq, void *v)
2678 {
2679 }
2680 
2681 static int softnet_seq_show(struct seq_file *seq, void *v)
2682 {
2683 	struct netif_rx_stats *s = v;
2684 
2685 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2686 		   s->total, s->dropped, s->time_squeeze, 0,
2687 		   0, 0, 0, 0, /* was fastroute */
2688 		   s->cpu_collision );
2689 	return 0;
2690 }
2691 
2692 static const struct seq_operations dev_seq_ops = {
2693 	.start = dev_seq_start,
2694 	.next  = dev_seq_next,
2695 	.stop  = dev_seq_stop,
2696 	.show  = dev_seq_show,
2697 };
2698 
2699 static int dev_seq_open(struct inode *inode, struct file *file)
2700 {
2701 	return seq_open_net(inode, file, &dev_seq_ops,
2702 			    sizeof(struct seq_net_private));
2703 }
2704 
2705 static const struct file_operations dev_seq_fops = {
2706 	.owner	 = THIS_MODULE,
2707 	.open    = dev_seq_open,
2708 	.read    = seq_read,
2709 	.llseek  = seq_lseek,
2710 	.release = seq_release_net,
2711 };
2712 
2713 static const struct seq_operations softnet_seq_ops = {
2714 	.start = softnet_seq_start,
2715 	.next  = softnet_seq_next,
2716 	.stop  = softnet_seq_stop,
2717 	.show  = softnet_seq_show,
2718 };
2719 
2720 static int softnet_seq_open(struct inode *inode, struct file *file)
2721 {
2722 	return seq_open(file, &softnet_seq_ops);
2723 }
2724 
2725 static const struct file_operations softnet_seq_fops = {
2726 	.owner	 = THIS_MODULE,
2727 	.open    = softnet_seq_open,
2728 	.read    = seq_read,
2729 	.llseek  = seq_lseek,
2730 	.release = seq_release,
2731 };
2732 
2733 static void *ptype_get_idx(loff_t pos)
2734 {
2735 	struct packet_type *pt = NULL;
2736 	loff_t i = 0;
2737 	int t;
2738 
2739 	list_for_each_entry_rcu(pt, &ptype_all, list) {
2740 		if (i == pos)
2741 			return pt;
2742 		++i;
2743 	}
2744 
2745 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2746 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2747 			if (i == pos)
2748 				return pt;
2749 			++i;
2750 		}
2751 	}
2752 	return NULL;
2753 }
2754 
2755 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2756 	__acquires(RCU)
2757 {
2758 	rcu_read_lock();
2759 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2760 }
2761 
2762 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2763 {
2764 	struct packet_type *pt;
2765 	struct list_head *nxt;
2766 	int hash;
2767 
2768 	++*pos;
2769 	if (v == SEQ_START_TOKEN)
2770 		return ptype_get_idx(0);
2771 
2772 	pt = v;
2773 	nxt = pt->list.next;
2774 	if (pt->type == htons(ETH_P_ALL)) {
2775 		if (nxt != &ptype_all)
2776 			goto found;
2777 		hash = 0;
2778 		nxt = ptype_base[0].next;
2779 	} else
2780 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2781 
2782 	while (nxt == &ptype_base[hash]) {
2783 		if (++hash >= PTYPE_HASH_SIZE)
2784 			return NULL;
2785 		nxt = ptype_base[hash].next;
2786 	}
2787 found:
2788 	return list_entry(nxt, struct packet_type, list);
2789 }
2790 
2791 static void ptype_seq_stop(struct seq_file *seq, void *v)
2792 	__releases(RCU)
2793 {
2794 	rcu_read_unlock();
2795 }
2796 
2797 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2798 {
2799 #ifdef CONFIG_KALLSYMS
2800 	unsigned long offset = 0, symsize;
2801 	const char *symname;
2802 	char *modname;
2803 	char namebuf[128];
2804 
2805 	symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2806 				  &modname, namebuf);
2807 
2808 	if (symname) {
2809 		char *delim = ":";
2810 
2811 		if (!modname)
2812 			modname = delim = "";
2813 		seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2814 			   symname, offset);
2815 		return;
2816 	}
2817 #endif
2818 
2819 	seq_printf(seq, "[%p]", sym);
2820 }
2821 
2822 static int ptype_seq_show(struct seq_file *seq, void *v)
2823 {
2824 	struct packet_type *pt = v;
2825 
2826 	if (v == SEQ_START_TOKEN)
2827 		seq_puts(seq, "Type Device      Function\n");
2828 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2829 		if (pt->type == htons(ETH_P_ALL))
2830 			seq_puts(seq, "ALL ");
2831 		else
2832 			seq_printf(seq, "%04x", ntohs(pt->type));
2833 
2834 		seq_printf(seq, " %-8s ",
2835 			   pt->dev ? pt->dev->name : "");
2836 		ptype_seq_decode(seq,  pt->func);
2837 		seq_putc(seq, '\n');
2838 	}
2839 
2840 	return 0;
2841 }
2842 
2843 static const struct seq_operations ptype_seq_ops = {
2844 	.start = ptype_seq_start,
2845 	.next  = ptype_seq_next,
2846 	.stop  = ptype_seq_stop,
2847 	.show  = ptype_seq_show,
2848 };
2849 
2850 static int ptype_seq_open(struct inode *inode, struct file *file)
2851 {
2852 	return seq_open_net(inode, file, &ptype_seq_ops,
2853 			sizeof(struct seq_net_private));
2854 }
2855 
2856 static const struct file_operations ptype_seq_fops = {
2857 	.owner	 = THIS_MODULE,
2858 	.open    = ptype_seq_open,
2859 	.read    = seq_read,
2860 	.llseek  = seq_lseek,
2861 	.release = seq_release_net,
2862 };
2863 
2864 
2865 static int __net_init dev_proc_net_init(struct net *net)
2866 {
2867 	int rc = -ENOMEM;
2868 
2869 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2870 		goto out;
2871 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2872 		goto out_dev;
2873 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2874 		goto out_softnet;
2875 
2876 	if (wext_proc_init(net))
2877 		goto out_ptype;
2878 	rc = 0;
2879 out:
2880 	return rc;
2881 out_ptype:
2882 	proc_net_remove(net, "ptype");
2883 out_softnet:
2884 	proc_net_remove(net, "softnet_stat");
2885 out_dev:
2886 	proc_net_remove(net, "dev");
2887 	goto out;
2888 }
2889 
2890 static void __net_exit dev_proc_net_exit(struct net *net)
2891 {
2892 	wext_proc_exit(net);
2893 
2894 	proc_net_remove(net, "ptype");
2895 	proc_net_remove(net, "softnet_stat");
2896 	proc_net_remove(net, "dev");
2897 }
2898 
2899 static struct pernet_operations __net_initdata dev_proc_ops = {
2900 	.init = dev_proc_net_init,
2901 	.exit = dev_proc_net_exit,
2902 };
2903 
2904 static int __init dev_proc_init(void)
2905 {
2906 	return register_pernet_subsys(&dev_proc_ops);
2907 }
2908 #else
2909 #define dev_proc_init() 0
2910 #endif	/* CONFIG_PROC_FS */
2911 
2912 
2913 /**
2914  *	netdev_set_master	-	set up master/slave pair
2915  *	@slave: slave device
2916  *	@master: new master device
2917  *
2918  *	Changes the master device of the slave. Pass %NULL to break the
2919  *	bonding. The caller must hold the RTNL semaphore. On a failure
2920  *	a negative errno code is returned. On success the reference counts
2921  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2922  *	function returns zero.
2923  */
2924 int netdev_set_master(struct net_device *slave, struct net_device *master)
2925 {
2926 	struct net_device *old = slave->master;
2927 
2928 	ASSERT_RTNL();
2929 
2930 	if (master) {
2931 		if (old)
2932 			return -EBUSY;
2933 		dev_hold(master);
2934 	}
2935 
2936 	slave->master = master;
2937 
2938 	synchronize_net();
2939 
2940 	if (old)
2941 		dev_put(old);
2942 
2943 	if (master)
2944 		slave->flags |= IFF_SLAVE;
2945 	else
2946 		slave->flags &= ~IFF_SLAVE;
2947 
2948 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2949 	return 0;
2950 }
2951 
2952 static void dev_change_rx_flags(struct net_device *dev, int flags)
2953 {
2954 	if (dev->flags & IFF_UP && dev->change_rx_flags)
2955 		dev->change_rx_flags(dev, flags);
2956 }
2957 
2958 static int __dev_set_promiscuity(struct net_device *dev, int inc)
2959 {
2960 	unsigned short old_flags = dev->flags;
2961 
2962 	ASSERT_RTNL();
2963 
2964 	dev->flags |= IFF_PROMISC;
2965 	dev->promiscuity += inc;
2966 	if (dev->promiscuity == 0) {
2967 		/*
2968 		 * Avoid overflow.
2969 		 * If inc causes overflow, untouch promisc and return error.
2970 		 */
2971 		if (inc < 0)
2972 			dev->flags &= ~IFF_PROMISC;
2973 		else {
2974 			dev->promiscuity -= inc;
2975 			printk(KERN_WARNING "%s: promiscuity touches roof, "
2976 				"set promiscuity failed, promiscuity feature "
2977 				"of device might be broken.\n", dev->name);
2978 			return -EOVERFLOW;
2979 		}
2980 	}
2981 	if (dev->flags != old_flags) {
2982 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2983 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2984 							       "left");
2985 		if (audit_enabled)
2986 			audit_log(current->audit_context, GFP_ATOMIC,
2987 				AUDIT_ANOM_PROMISCUOUS,
2988 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2989 				dev->name, (dev->flags & IFF_PROMISC),
2990 				(old_flags & IFF_PROMISC),
2991 				audit_get_loginuid(current),
2992 				current->uid, current->gid,
2993 				audit_get_sessionid(current));
2994 
2995 		dev_change_rx_flags(dev, IFF_PROMISC);
2996 	}
2997 	return 0;
2998 }
2999 
3000 /**
3001  *	dev_set_promiscuity	- update promiscuity count on a device
3002  *	@dev: device
3003  *	@inc: modifier
3004  *
3005  *	Add or remove promiscuity from a device. While the count in the device
3006  *	remains above zero the interface remains promiscuous. Once it hits zero
3007  *	the device reverts back to normal filtering operation. A negative inc
3008  *	value is used to drop promiscuity on the device.
3009  *	Return 0 if successful or a negative errno code on error.
3010  */
3011 int dev_set_promiscuity(struct net_device *dev, int inc)
3012 {
3013 	unsigned short old_flags = dev->flags;
3014 	int err;
3015 
3016 	err = __dev_set_promiscuity(dev, inc);
3017 	if (err < 0)
3018 		return err;
3019 	if (dev->flags != old_flags)
3020 		dev_set_rx_mode(dev);
3021 	return err;
3022 }
3023 
3024 /**
3025  *	dev_set_allmulti	- update allmulti count on a device
3026  *	@dev: device
3027  *	@inc: modifier
3028  *
3029  *	Add or remove reception of all multicast frames to a device. While the
3030  *	count in the device remains above zero the interface remains listening
3031  *	to all interfaces. Once it hits zero the device reverts back to normal
3032  *	filtering operation. A negative @inc value is used to drop the counter
3033  *	when releasing a resource needing all multicasts.
3034  *	Return 0 if successful or a negative errno code on error.
3035  */
3036 
3037 int dev_set_allmulti(struct net_device *dev, int inc)
3038 {
3039 	unsigned short old_flags = dev->flags;
3040 
3041 	ASSERT_RTNL();
3042 
3043 	dev->flags |= IFF_ALLMULTI;
3044 	dev->allmulti += inc;
3045 	if (dev->allmulti == 0) {
3046 		/*
3047 		 * Avoid overflow.
3048 		 * If inc causes overflow, untouch allmulti and return error.
3049 		 */
3050 		if (inc < 0)
3051 			dev->flags &= ~IFF_ALLMULTI;
3052 		else {
3053 			dev->allmulti -= inc;
3054 			printk(KERN_WARNING "%s: allmulti touches roof, "
3055 				"set allmulti failed, allmulti feature of "
3056 				"device might be broken.\n", dev->name);
3057 			return -EOVERFLOW;
3058 		}
3059 	}
3060 	if (dev->flags ^ old_flags) {
3061 		dev_change_rx_flags(dev, IFF_ALLMULTI);
3062 		dev_set_rx_mode(dev);
3063 	}
3064 	return 0;
3065 }
3066 
3067 /*
3068  *	Upload unicast and multicast address lists to device and
3069  *	configure RX filtering. When the device doesn't support unicast
3070  *	filtering it is put in promiscuous mode while unicast addresses
3071  *	are present.
3072  */
3073 void __dev_set_rx_mode(struct net_device *dev)
3074 {
3075 	/* dev_open will call this function so the list will stay sane. */
3076 	if (!(dev->flags&IFF_UP))
3077 		return;
3078 
3079 	if (!netif_device_present(dev))
3080 		return;
3081 
3082 	if (dev->set_rx_mode)
3083 		dev->set_rx_mode(dev);
3084 	else {
3085 		/* Unicast addresses changes may only happen under the rtnl,
3086 		 * therefore calling __dev_set_promiscuity here is safe.
3087 		 */
3088 		if (dev->uc_count > 0 && !dev->uc_promisc) {
3089 			__dev_set_promiscuity(dev, 1);
3090 			dev->uc_promisc = 1;
3091 		} else if (dev->uc_count == 0 && dev->uc_promisc) {
3092 			__dev_set_promiscuity(dev, -1);
3093 			dev->uc_promisc = 0;
3094 		}
3095 
3096 		if (dev->set_multicast_list)
3097 			dev->set_multicast_list(dev);
3098 	}
3099 }
3100 
3101 void dev_set_rx_mode(struct net_device *dev)
3102 {
3103 	netif_addr_lock_bh(dev);
3104 	__dev_set_rx_mode(dev);
3105 	netif_addr_unlock_bh(dev);
3106 }
3107 
3108 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3109 		      void *addr, int alen, int glbl)
3110 {
3111 	struct dev_addr_list *da;
3112 
3113 	for (; (da = *list) != NULL; list = &da->next) {
3114 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3115 		    alen == da->da_addrlen) {
3116 			if (glbl) {
3117 				int old_glbl = da->da_gusers;
3118 				da->da_gusers = 0;
3119 				if (old_glbl == 0)
3120 					break;
3121 			}
3122 			if (--da->da_users)
3123 				return 0;
3124 
3125 			*list = da->next;
3126 			kfree(da);
3127 			(*count)--;
3128 			return 0;
3129 		}
3130 	}
3131 	return -ENOENT;
3132 }
3133 
3134 int __dev_addr_add(struct dev_addr_list **list, int *count,
3135 		   void *addr, int alen, int glbl)
3136 {
3137 	struct dev_addr_list *da;
3138 
3139 	for (da = *list; da != NULL; da = da->next) {
3140 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3141 		    da->da_addrlen == alen) {
3142 			if (glbl) {
3143 				int old_glbl = da->da_gusers;
3144 				da->da_gusers = 1;
3145 				if (old_glbl)
3146 					return 0;
3147 			}
3148 			da->da_users++;
3149 			return 0;
3150 		}
3151 	}
3152 
3153 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
3154 	if (da == NULL)
3155 		return -ENOMEM;
3156 	memcpy(da->da_addr, addr, alen);
3157 	da->da_addrlen = alen;
3158 	da->da_users = 1;
3159 	da->da_gusers = glbl ? 1 : 0;
3160 	da->next = *list;
3161 	*list = da;
3162 	(*count)++;
3163 	return 0;
3164 }
3165 
3166 /**
3167  *	dev_unicast_delete	- Release secondary unicast address.
3168  *	@dev: device
3169  *	@addr: address to delete
3170  *	@alen: length of @addr
3171  *
3172  *	Release reference to a secondary unicast address and remove it
3173  *	from the device if the reference count drops to zero.
3174  *
3175  * 	The caller must hold the rtnl_mutex.
3176  */
3177 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3178 {
3179 	int err;
3180 
3181 	ASSERT_RTNL();
3182 
3183 	netif_addr_lock_bh(dev);
3184 	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3185 	if (!err)
3186 		__dev_set_rx_mode(dev);
3187 	netif_addr_unlock_bh(dev);
3188 	return err;
3189 }
3190 EXPORT_SYMBOL(dev_unicast_delete);
3191 
3192 /**
3193  *	dev_unicast_add		- add a secondary unicast address
3194  *	@dev: device
3195  *	@addr: address to add
3196  *	@alen: length of @addr
3197  *
3198  *	Add a secondary unicast address to the device or increase
3199  *	the reference count if it already exists.
3200  *
3201  *	The caller must hold the rtnl_mutex.
3202  */
3203 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3204 {
3205 	int err;
3206 
3207 	ASSERT_RTNL();
3208 
3209 	netif_addr_lock_bh(dev);
3210 	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3211 	if (!err)
3212 		__dev_set_rx_mode(dev);
3213 	netif_addr_unlock_bh(dev);
3214 	return err;
3215 }
3216 EXPORT_SYMBOL(dev_unicast_add);
3217 
3218 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3219 		    struct dev_addr_list **from, int *from_count)
3220 {
3221 	struct dev_addr_list *da, *next;
3222 	int err = 0;
3223 
3224 	da = *from;
3225 	while (da != NULL) {
3226 		next = da->next;
3227 		if (!da->da_synced) {
3228 			err = __dev_addr_add(to, to_count,
3229 					     da->da_addr, da->da_addrlen, 0);
3230 			if (err < 0)
3231 				break;
3232 			da->da_synced = 1;
3233 			da->da_users++;
3234 		} else if (da->da_users == 1) {
3235 			__dev_addr_delete(to, to_count,
3236 					  da->da_addr, da->da_addrlen, 0);
3237 			__dev_addr_delete(from, from_count,
3238 					  da->da_addr, da->da_addrlen, 0);
3239 		}
3240 		da = next;
3241 	}
3242 	return err;
3243 }
3244 
3245 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3246 		       struct dev_addr_list **from, int *from_count)
3247 {
3248 	struct dev_addr_list *da, *next;
3249 
3250 	da = *from;
3251 	while (da != NULL) {
3252 		next = da->next;
3253 		if (da->da_synced) {
3254 			__dev_addr_delete(to, to_count,
3255 					  da->da_addr, da->da_addrlen, 0);
3256 			da->da_synced = 0;
3257 			__dev_addr_delete(from, from_count,
3258 					  da->da_addr, da->da_addrlen, 0);
3259 		}
3260 		da = next;
3261 	}
3262 }
3263 
3264 /**
3265  *	dev_unicast_sync - Synchronize device's unicast list to another device
3266  *	@to: destination device
3267  *	@from: source device
3268  *
3269  *	Add newly added addresses to the destination device and release
3270  *	addresses that have no users left. The source device must be
3271  *	locked by netif_tx_lock_bh.
3272  *
3273  *	This function is intended to be called from the dev->set_rx_mode
3274  *	function of layered software devices.
3275  */
3276 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3277 {
3278 	int err = 0;
3279 
3280 	netif_addr_lock_bh(to);
3281 	err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3282 			      &from->uc_list, &from->uc_count);
3283 	if (!err)
3284 		__dev_set_rx_mode(to);
3285 	netif_addr_unlock_bh(to);
3286 	return err;
3287 }
3288 EXPORT_SYMBOL(dev_unicast_sync);
3289 
3290 /**
3291  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
3292  *	@to: destination device
3293  *	@from: source device
3294  *
3295  *	Remove all addresses that were added to the destination device by
3296  *	dev_unicast_sync(). This function is intended to be called from the
3297  *	dev->stop function of layered software devices.
3298  */
3299 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3300 {
3301 	netif_addr_lock_bh(from);
3302 	netif_addr_lock(to);
3303 
3304 	__dev_addr_unsync(&to->uc_list, &to->uc_count,
3305 			  &from->uc_list, &from->uc_count);
3306 	__dev_set_rx_mode(to);
3307 
3308 	netif_addr_unlock(to);
3309 	netif_addr_unlock_bh(from);
3310 }
3311 EXPORT_SYMBOL(dev_unicast_unsync);
3312 
3313 static void __dev_addr_discard(struct dev_addr_list **list)
3314 {
3315 	struct dev_addr_list *tmp;
3316 
3317 	while (*list != NULL) {
3318 		tmp = *list;
3319 		*list = tmp->next;
3320 		if (tmp->da_users > tmp->da_gusers)
3321 			printk("__dev_addr_discard: address leakage! "
3322 			       "da_users=%d\n", tmp->da_users);
3323 		kfree(tmp);
3324 	}
3325 }
3326 
3327 static void dev_addr_discard(struct net_device *dev)
3328 {
3329 	netif_addr_lock_bh(dev);
3330 
3331 	__dev_addr_discard(&dev->uc_list);
3332 	dev->uc_count = 0;
3333 
3334 	__dev_addr_discard(&dev->mc_list);
3335 	dev->mc_count = 0;
3336 
3337 	netif_addr_unlock_bh(dev);
3338 }
3339 
3340 /**
3341  *	dev_get_flags - get flags reported to userspace
3342  *	@dev: device
3343  *
3344  *	Get the combination of flag bits exported through APIs to userspace.
3345  */
3346 unsigned dev_get_flags(const struct net_device *dev)
3347 {
3348 	unsigned flags;
3349 
3350 	flags = (dev->flags & ~(IFF_PROMISC |
3351 				IFF_ALLMULTI |
3352 				IFF_RUNNING |
3353 				IFF_LOWER_UP |
3354 				IFF_DORMANT)) |
3355 		(dev->gflags & (IFF_PROMISC |
3356 				IFF_ALLMULTI));
3357 
3358 	if (netif_running(dev)) {
3359 		if (netif_oper_up(dev))
3360 			flags |= IFF_RUNNING;
3361 		if (netif_carrier_ok(dev))
3362 			flags |= IFF_LOWER_UP;
3363 		if (netif_dormant(dev))
3364 			flags |= IFF_DORMANT;
3365 	}
3366 
3367 	return flags;
3368 }
3369 
3370 /**
3371  *	dev_change_flags - change device settings
3372  *	@dev: device
3373  *	@flags: device state flags
3374  *
3375  *	Change settings on device based state flags. The flags are
3376  *	in the userspace exported format.
3377  */
3378 int dev_change_flags(struct net_device *dev, unsigned flags)
3379 {
3380 	int ret, changes;
3381 	int old_flags = dev->flags;
3382 
3383 	ASSERT_RTNL();
3384 
3385 	/*
3386 	 *	Set the flags on our device.
3387 	 */
3388 
3389 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3390 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3391 			       IFF_AUTOMEDIA)) |
3392 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3393 				    IFF_ALLMULTI));
3394 
3395 	/*
3396 	 *	Load in the correct multicast list now the flags have changed.
3397 	 */
3398 
3399 	if ((old_flags ^ flags) & IFF_MULTICAST)
3400 		dev_change_rx_flags(dev, IFF_MULTICAST);
3401 
3402 	dev_set_rx_mode(dev);
3403 
3404 	/*
3405 	 *	Have we downed the interface. We handle IFF_UP ourselves
3406 	 *	according to user attempts to set it, rather than blindly
3407 	 *	setting it.
3408 	 */
3409 
3410 	ret = 0;
3411 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
3412 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3413 
3414 		if (!ret)
3415 			dev_set_rx_mode(dev);
3416 	}
3417 
3418 	if (dev->flags & IFF_UP &&
3419 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3420 					  IFF_VOLATILE)))
3421 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
3422 
3423 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
3424 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
3425 		dev->gflags ^= IFF_PROMISC;
3426 		dev_set_promiscuity(dev, inc);
3427 	}
3428 
3429 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3430 	   is important. Some (broken) drivers set IFF_PROMISC, when
3431 	   IFF_ALLMULTI is requested not asking us and not reporting.
3432 	 */
3433 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3434 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3435 		dev->gflags ^= IFF_ALLMULTI;
3436 		dev_set_allmulti(dev, inc);
3437 	}
3438 
3439 	/* Exclude state transition flags, already notified */
3440 	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3441 	if (changes)
3442 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3443 
3444 	return ret;
3445 }
3446 
3447 /**
3448  *	dev_set_mtu - Change maximum transfer unit
3449  *	@dev: device
3450  *	@new_mtu: new transfer unit
3451  *
3452  *	Change the maximum transfer size of the network device.
3453  */
3454 int dev_set_mtu(struct net_device *dev, int new_mtu)
3455 {
3456 	int err;
3457 
3458 	if (new_mtu == dev->mtu)
3459 		return 0;
3460 
3461 	/*	MTU must be positive.	 */
3462 	if (new_mtu < 0)
3463 		return -EINVAL;
3464 
3465 	if (!netif_device_present(dev))
3466 		return -ENODEV;
3467 
3468 	err = 0;
3469 	if (dev->change_mtu)
3470 		err = dev->change_mtu(dev, new_mtu);
3471 	else
3472 		dev->mtu = new_mtu;
3473 	if (!err && dev->flags & IFF_UP)
3474 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3475 	return err;
3476 }
3477 
3478 /**
3479  *	dev_set_mac_address - Change Media Access Control Address
3480  *	@dev: device
3481  *	@sa: new address
3482  *
3483  *	Change the hardware (MAC) address of the device
3484  */
3485 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3486 {
3487 	int err;
3488 
3489 	if (!dev->set_mac_address)
3490 		return -EOPNOTSUPP;
3491 	if (sa->sa_family != dev->type)
3492 		return -EINVAL;
3493 	if (!netif_device_present(dev))
3494 		return -ENODEV;
3495 	err = dev->set_mac_address(dev, sa);
3496 	if (!err)
3497 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3498 	return err;
3499 }
3500 
3501 /*
3502  *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3503  */
3504 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3505 {
3506 	int err;
3507 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3508 
3509 	if (!dev)
3510 		return -ENODEV;
3511 
3512 	switch (cmd) {
3513 		case SIOCGIFFLAGS:	/* Get interface flags */
3514 			ifr->ifr_flags = dev_get_flags(dev);
3515 			return 0;
3516 
3517 		case SIOCGIFMETRIC:	/* Get the metric on the interface
3518 					   (currently unused) */
3519 			ifr->ifr_metric = 0;
3520 			return 0;
3521 
3522 		case SIOCGIFMTU:	/* Get the MTU of a device */
3523 			ifr->ifr_mtu = dev->mtu;
3524 			return 0;
3525 
3526 		case SIOCGIFHWADDR:
3527 			if (!dev->addr_len)
3528 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3529 			else
3530 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3531 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3532 			ifr->ifr_hwaddr.sa_family = dev->type;
3533 			return 0;
3534 
3535 		case SIOCGIFSLAVE:
3536 			err = -EINVAL;
3537 			break;
3538 
3539 		case SIOCGIFMAP:
3540 			ifr->ifr_map.mem_start = dev->mem_start;
3541 			ifr->ifr_map.mem_end   = dev->mem_end;
3542 			ifr->ifr_map.base_addr = dev->base_addr;
3543 			ifr->ifr_map.irq       = dev->irq;
3544 			ifr->ifr_map.dma       = dev->dma;
3545 			ifr->ifr_map.port      = dev->if_port;
3546 			return 0;
3547 
3548 		case SIOCGIFINDEX:
3549 			ifr->ifr_ifindex = dev->ifindex;
3550 			return 0;
3551 
3552 		case SIOCGIFTXQLEN:
3553 			ifr->ifr_qlen = dev->tx_queue_len;
3554 			return 0;
3555 
3556 		default:
3557 			/* dev_ioctl() should ensure this case
3558 			 * is never reached
3559 			 */
3560 			WARN_ON(1);
3561 			err = -EINVAL;
3562 			break;
3563 
3564 	}
3565 	return err;
3566 }
3567 
3568 /*
3569  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
3570  */
3571 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3572 {
3573 	int err;
3574 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3575 
3576 	if (!dev)
3577 		return -ENODEV;
3578 
3579 	switch (cmd) {
3580 		case SIOCSIFFLAGS:	/* Set interface flags */
3581 			return dev_change_flags(dev, ifr->ifr_flags);
3582 
3583 		case SIOCSIFMETRIC:	/* Set the metric on the interface
3584 					   (currently unused) */
3585 			return -EOPNOTSUPP;
3586 
3587 		case SIOCSIFMTU:	/* Set the MTU of a device */
3588 			return dev_set_mtu(dev, ifr->ifr_mtu);
3589 
3590 		case SIOCSIFHWADDR:
3591 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3592 
3593 		case SIOCSIFHWBROADCAST:
3594 			if (ifr->ifr_hwaddr.sa_family != dev->type)
3595 				return -EINVAL;
3596 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3597 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3598 			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3599 			return 0;
3600 
3601 		case SIOCSIFMAP:
3602 			if (dev->set_config) {
3603 				if (!netif_device_present(dev))
3604 					return -ENODEV;
3605 				return dev->set_config(dev, &ifr->ifr_map);
3606 			}
3607 			return -EOPNOTSUPP;
3608 
3609 		case SIOCADDMULTI:
3610 			if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3611 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3612 				return -EINVAL;
3613 			if (!netif_device_present(dev))
3614 				return -ENODEV;
3615 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3616 					  dev->addr_len, 1);
3617 
3618 		case SIOCDELMULTI:
3619 			if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3620 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3621 				return -EINVAL;
3622 			if (!netif_device_present(dev))
3623 				return -ENODEV;
3624 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3625 					     dev->addr_len, 1);
3626 
3627 		case SIOCSIFTXQLEN:
3628 			if (ifr->ifr_qlen < 0)
3629 				return -EINVAL;
3630 			dev->tx_queue_len = ifr->ifr_qlen;
3631 			return 0;
3632 
3633 		case SIOCSIFNAME:
3634 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3635 			return dev_change_name(dev, ifr->ifr_newname);
3636 
3637 		/*
3638 		 *	Unknown or private ioctl
3639 		 */
3640 
3641 		default:
3642 			if ((cmd >= SIOCDEVPRIVATE &&
3643 			    cmd <= SIOCDEVPRIVATE + 15) ||
3644 			    cmd == SIOCBONDENSLAVE ||
3645 			    cmd == SIOCBONDRELEASE ||
3646 			    cmd == SIOCBONDSETHWADDR ||
3647 			    cmd == SIOCBONDSLAVEINFOQUERY ||
3648 			    cmd == SIOCBONDINFOQUERY ||
3649 			    cmd == SIOCBONDCHANGEACTIVE ||
3650 			    cmd == SIOCGMIIPHY ||
3651 			    cmd == SIOCGMIIREG ||
3652 			    cmd == SIOCSMIIREG ||
3653 			    cmd == SIOCBRADDIF ||
3654 			    cmd == SIOCBRDELIF ||
3655 			    cmd == SIOCWANDEV) {
3656 				err = -EOPNOTSUPP;
3657 				if (dev->do_ioctl) {
3658 					if (netif_device_present(dev))
3659 						err = dev->do_ioctl(dev, ifr,
3660 								    cmd);
3661 					else
3662 						err = -ENODEV;
3663 				}
3664 			} else
3665 				err = -EINVAL;
3666 
3667 	}
3668 	return err;
3669 }
3670 
3671 /*
3672  *	This function handles all "interface"-type I/O control requests. The actual
3673  *	'doing' part of this is dev_ifsioc above.
3674  */
3675 
3676 /**
3677  *	dev_ioctl	-	network device ioctl
3678  *	@net: the applicable net namespace
3679  *	@cmd: command to issue
3680  *	@arg: pointer to a struct ifreq in user space
3681  *
3682  *	Issue ioctl functions to devices. This is normally called by the
3683  *	user space syscall interfaces but can sometimes be useful for
3684  *	other purposes. The return value is the return from the syscall if
3685  *	positive or a negative errno code on error.
3686  */
3687 
3688 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3689 {
3690 	struct ifreq ifr;
3691 	int ret;
3692 	char *colon;
3693 
3694 	/* One special case: SIOCGIFCONF takes ifconf argument
3695 	   and requires shared lock, because it sleeps writing
3696 	   to user space.
3697 	 */
3698 
3699 	if (cmd == SIOCGIFCONF) {
3700 		rtnl_lock();
3701 		ret = dev_ifconf(net, (char __user *) arg);
3702 		rtnl_unlock();
3703 		return ret;
3704 	}
3705 	if (cmd == SIOCGIFNAME)
3706 		return dev_ifname(net, (struct ifreq __user *)arg);
3707 
3708 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3709 		return -EFAULT;
3710 
3711 	ifr.ifr_name[IFNAMSIZ-1] = 0;
3712 
3713 	colon = strchr(ifr.ifr_name, ':');
3714 	if (colon)
3715 		*colon = 0;
3716 
3717 	/*
3718 	 *	See which interface the caller is talking about.
3719 	 */
3720 
3721 	switch (cmd) {
3722 		/*
3723 		 *	These ioctl calls:
3724 		 *	- can be done by all.
3725 		 *	- atomic and do not require locking.
3726 		 *	- return a value
3727 		 */
3728 		case SIOCGIFFLAGS:
3729 		case SIOCGIFMETRIC:
3730 		case SIOCGIFMTU:
3731 		case SIOCGIFHWADDR:
3732 		case SIOCGIFSLAVE:
3733 		case SIOCGIFMAP:
3734 		case SIOCGIFINDEX:
3735 		case SIOCGIFTXQLEN:
3736 			dev_load(net, ifr.ifr_name);
3737 			read_lock(&dev_base_lock);
3738 			ret = dev_ifsioc_locked(net, &ifr, cmd);
3739 			read_unlock(&dev_base_lock);
3740 			if (!ret) {
3741 				if (colon)
3742 					*colon = ':';
3743 				if (copy_to_user(arg, &ifr,
3744 						 sizeof(struct ifreq)))
3745 					ret = -EFAULT;
3746 			}
3747 			return ret;
3748 
3749 		case SIOCETHTOOL:
3750 			dev_load(net, ifr.ifr_name);
3751 			rtnl_lock();
3752 			ret = dev_ethtool(net, &ifr);
3753 			rtnl_unlock();
3754 			if (!ret) {
3755 				if (colon)
3756 					*colon = ':';
3757 				if (copy_to_user(arg, &ifr,
3758 						 sizeof(struct ifreq)))
3759 					ret = -EFAULT;
3760 			}
3761 			return ret;
3762 
3763 		/*
3764 		 *	These ioctl calls:
3765 		 *	- require superuser power.
3766 		 *	- require strict serialization.
3767 		 *	- return a value
3768 		 */
3769 		case SIOCGMIIPHY:
3770 		case SIOCGMIIREG:
3771 		case SIOCSIFNAME:
3772 			if (!capable(CAP_NET_ADMIN))
3773 				return -EPERM;
3774 			dev_load(net, ifr.ifr_name);
3775 			rtnl_lock();
3776 			ret = dev_ifsioc(net, &ifr, cmd);
3777 			rtnl_unlock();
3778 			if (!ret) {
3779 				if (colon)
3780 					*colon = ':';
3781 				if (copy_to_user(arg, &ifr,
3782 						 sizeof(struct ifreq)))
3783 					ret = -EFAULT;
3784 			}
3785 			return ret;
3786 
3787 		/*
3788 		 *	These ioctl calls:
3789 		 *	- require superuser power.
3790 		 *	- require strict serialization.
3791 		 *	- do not return a value
3792 		 */
3793 		case SIOCSIFFLAGS:
3794 		case SIOCSIFMETRIC:
3795 		case SIOCSIFMTU:
3796 		case SIOCSIFMAP:
3797 		case SIOCSIFHWADDR:
3798 		case SIOCSIFSLAVE:
3799 		case SIOCADDMULTI:
3800 		case SIOCDELMULTI:
3801 		case SIOCSIFHWBROADCAST:
3802 		case SIOCSIFTXQLEN:
3803 		case SIOCSMIIREG:
3804 		case SIOCBONDENSLAVE:
3805 		case SIOCBONDRELEASE:
3806 		case SIOCBONDSETHWADDR:
3807 		case SIOCBONDCHANGEACTIVE:
3808 		case SIOCBRADDIF:
3809 		case SIOCBRDELIF:
3810 			if (!capable(CAP_NET_ADMIN))
3811 				return -EPERM;
3812 			/* fall through */
3813 		case SIOCBONDSLAVEINFOQUERY:
3814 		case SIOCBONDINFOQUERY:
3815 			dev_load(net, ifr.ifr_name);
3816 			rtnl_lock();
3817 			ret = dev_ifsioc(net, &ifr, cmd);
3818 			rtnl_unlock();
3819 			return ret;
3820 
3821 		case SIOCGIFMEM:
3822 			/* Get the per device memory space. We can add this but
3823 			 * currently do not support it */
3824 		case SIOCSIFMEM:
3825 			/* Set the per device memory buffer space.
3826 			 * Not applicable in our case */
3827 		case SIOCSIFLINK:
3828 			return -EINVAL;
3829 
3830 		/*
3831 		 *	Unknown or private ioctl.
3832 		 */
3833 		default:
3834 			if (cmd == SIOCWANDEV ||
3835 			    (cmd >= SIOCDEVPRIVATE &&
3836 			     cmd <= SIOCDEVPRIVATE + 15)) {
3837 				dev_load(net, ifr.ifr_name);
3838 				rtnl_lock();
3839 				ret = dev_ifsioc(net, &ifr, cmd);
3840 				rtnl_unlock();
3841 				if (!ret && copy_to_user(arg, &ifr,
3842 							 sizeof(struct ifreq)))
3843 					ret = -EFAULT;
3844 				return ret;
3845 			}
3846 			/* Take care of Wireless Extensions */
3847 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3848 				return wext_handle_ioctl(net, &ifr, cmd, arg);
3849 			return -EINVAL;
3850 	}
3851 }
3852 
3853 
3854 /**
3855  *	dev_new_index	-	allocate an ifindex
3856  *	@net: the applicable net namespace
3857  *
3858  *	Returns a suitable unique value for a new device interface
3859  *	number.  The caller must hold the rtnl semaphore or the
3860  *	dev_base_lock to be sure it remains unique.
3861  */
3862 static int dev_new_index(struct net *net)
3863 {
3864 	static int ifindex;
3865 	for (;;) {
3866 		if (++ifindex <= 0)
3867 			ifindex = 1;
3868 		if (!__dev_get_by_index(net, ifindex))
3869 			return ifindex;
3870 	}
3871 }
3872 
3873 /* Delayed registration/unregisteration */
3874 static LIST_HEAD(net_todo_list);
3875 
3876 static void net_set_todo(struct net_device *dev)
3877 {
3878 	list_add_tail(&dev->todo_list, &net_todo_list);
3879 }
3880 
3881 static void rollback_registered(struct net_device *dev)
3882 {
3883 	BUG_ON(dev_boot_phase);
3884 	ASSERT_RTNL();
3885 
3886 	/* Some devices call without registering for initialization unwind. */
3887 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3888 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3889 				  "was registered\n", dev->name, dev);
3890 
3891 		WARN_ON(1);
3892 		return;
3893 	}
3894 
3895 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3896 
3897 	/* If device is running, close it first. */
3898 	dev_close(dev);
3899 
3900 	/* And unlink it from device chain. */
3901 	unlist_netdevice(dev);
3902 
3903 	dev->reg_state = NETREG_UNREGISTERING;
3904 
3905 	synchronize_net();
3906 
3907 	/* Shutdown queueing discipline. */
3908 	dev_shutdown(dev);
3909 
3910 
3911 	/* Notify protocols, that we are about to destroy
3912 	   this device. They should clean all the things.
3913 	*/
3914 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3915 
3916 	/*
3917 	 *	Flush the unicast and multicast chains
3918 	 */
3919 	dev_addr_discard(dev);
3920 
3921 	if (dev->uninit)
3922 		dev->uninit(dev);
3923 
3924 	/* Notifier chain MUST detach us from master device. */
3925 	WARN_ON(dev->master);
3926 
3927 	/* Remove entries from kobject tree */
3928 	netdev_unregister_kobject(dev);
3929 
3930 	synchronize_net();
3931 
3932 	dev_put(dev);
3933 }
3934 
3935 static void __netdev_init_queue_locks_one(struct net_device *dev,
3936 					  struct netdev_queue *dev_queue,
3937 					  void *_unused)
3938 {
3939 	spin_lock_init(&dev_queue->_xmit_lock);
3940 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
3941 	dev_queue->xmit_lock_owner = -1;
3942 }
3943 
3944 static void netdev_init_queue_locks(struct net_device *dev)
3945 {
3946 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3947 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
3948 }
3949 
3950 unsigned long netdev_fix_features(unsigned long features, const char *name)
3951 {
3952 	/* Fix illegal SG+CSUM combinations. */
3953 	if ((features & NETIF_F_SG) &&
3954 	    !(features & NETIF_F_ALL_CSUM)) {
3955 		if (name)
3956 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
3957 			       "checksum feature.\n", name);
3958 		features &= ~NETIF_F_SG;
3959 	}
3960 
3961 	/* TSO requires that SG is present as well. */
3962 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
3963 		if (name)
3964 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
3965 			       "SG feature.\n", name);
3966 		features &= ~NETIF_F_TSO;
3967 	}
3968 
3969 	if (features & NETIF_F_UFO) {
3970 		if (!(features & NETIF_F_GEN_CSUM)) {
3971 			if (name)
3972 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3973 				       "since no NETIF_F_HW_CSUM feature.\n",
3974 				       name);
3975 			features &= ~NETIF_F_UFO;
3976 		}
3977 
3978 		if (!(features & NETIF_F_SG)) {
3979 			if (name)
3980 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3981 				       "since no NETIF_F_SG feature.\n", name);
3982 			features &= ~NETIF_F_UFO;
3983 		}
3984 	}
3985 
3986 	return features;
3987 }
3988 EXPORT_SYMBOL(netdev_fix_features);
3989 
3990 /**
3991  *	register_netdevice	- register a network device
3992  *	@dev: device to register
3993  *
3994  *	Take a completed network device structure and add it to the kernel
3995  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3996  *	chain. 0 is returned on success. A negative errno code is returned
3997  *	on a failure to set up the device, or if the name is a duplicate.
3998  *
3999  *	Callers must hold the rtnl semaphore. You may want
4000  *	register_netdev() instead of this.
4001  *
4002  *	BUGS:
4003  *	The locking appears insufficient to guarantee two parallel registers
4004  *	will not get the same name.
4005  */
4006 
4007 int register_netdevice(struct net_device *dev)
4008 {
4009 	struct hlist_head *head;
4010 	struct hlist_node *p;
4011 	int ret;
4012 	struct net *net;
4013 
4014 	BUG_ON(dev_boot_phase);
4015 	ASSERT_RTNL();
4016 
4017 	might_sleep();
4018 
4019 	/* When net_device's are persistent, this will be fatal. */
4020 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4021 	BUG_ON(!dev_net(dev));
4022 	net = dev_net(dev);
4023 
4024 	spin_lock_init(&dev->addr_list_lock);
4025 	netdev_set_addr_lockdep_class(dev);
4026 	netdev_init_queue_locks(dev);
4027 
4028 	dev->iflink = -1;
4029 
4030 	/* Init, if this function is available */
4031 	if (dev->init) {
4032 		ret = dev->init(dev);
4033 		if (ret) {
4034 			if (ret > 0)
4035 				ret = -EIO;
4036 			goto out;
4037 		}
4038 	}
4039 
4040 	if (!dev_valid_name(dev->name)) {
4041 		ret = -EINVAL;
4042 		goto err_uninit;
4043 	}
4044 
4045 	dev->ifindex = dev_new_index(net);
4046 	if (dev->iflink == -1)
4047 		dev->iflink = dev->ifindex;
4048 
4049 	/* Check for existence of name */
4050 	head = dev_name_hash(net, dev->name);
4051 	hlist_for_each(p, head) {
4052 		struct net_device *d
4053 			= hlist_entry(p, struct net_device, name_hlist);
4054 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4055 			ret = -EEXIST;
4056 			goto err_uninit;
4057 		}
4058 	}
4059 
4060 	/* Fix illegal checksum combinations */
4061 	if ((dev->features & NETIF_F_HW_CSUM) &&
4062 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4063 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4064 		       dev->name);
4065 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4066 	}
4067 
4068 	if ((dev->features & NETIF_F_NO_CSUM) &&
4069 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4070 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4071 		       dev->name);
4072 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4073 	}
4074 
4075 	dev->features = netdev_fix_features(dev->features, dev->name);
4076 
4077 	/* Enable software GSO if SG is supported. */
4078 	if (dev->features & NETIF_F_SG)
4079 		dev->features |= NETIF_F_GSO;
4080 
4081 	netdev_initialize_kobject(dev);
4082 	ret = netdev_register_kobject(dev);
4083 	if (ret)
4084 		goto err_uninit;
4085 	dev->reg_state = NETREG_REGISTERED;
4086 
4087 	/*
4088 	 *	Default initial state at registry is that the
4089 	 *	device is present.
4090 	 */
4091 
4092 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4093 
4094 	dev_init_scheduler(dev);
4095 	dev_hold(dev);
4096 	list_netdevice(dev);
4097 
4098 	/* Notify protocols, that a new device appeared. */
4099 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4100 	ret = notifier_to_errno(ret);
4101 	if (ret) {
4102 		rollback_registered(dev);
4103 		dev->reg_state = NETREG_UNREGISTERED;
4104 	}
4105 
4106 out:
4107 	return ret;
4108 
4109 err_uninit:
4110 	if (dev->uninit)
4111 		dev->uninit(dev);
4112 	goto out;
4113 }
4114 
4115 /**
4116  *	register_netdev	- register a network device
4117  *	@dev: device to register
4118  *
4119  *	Take a completed network device structure and add it to the kernel
4120  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4121  *	chain. 0 is returned on success. A negative errno code is returned
4122  *	on a failure to set up the device, or if the name is a duplicate.
4123  *
4124  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
4125  *	and expands the device name if you passed a format string to
4126  *	alloc_netdev.
4127  */
4128 int register_netdev(struct net_device *dev)
4129 {
4130 	int err;
4131 
4132 	rtnl_lock();
4133 
4134 	/*
4135 	 * If the name is a format string the caller wants us to do a
4136 	 * name allocation.
4137 	 */
4138 	if (strchr(dev->name, '%')) {
4139 		err = dev_alloc_name(dev, dev->name);
4140 		if (err < 0)
4141 			goto out;
4142 	}
4143 
4144 	err = register_netdevice(dev);
4145 out:
4146 	rtnl_unlock();
4147 	return err;
4148 }
4149 EXPORT_SYMBOL(register_netdev);
4150 
4151 /*
4152  * netdev_wait_allrefs - wait until all references are gone.
4153  *
4154  * This is called when unregistering network devices.
4155  *
4156  * Any protocol or device that holds a reference should register
4157  * for netdevice notification, and cleanup and put back the
4158  * reference if they receive an UNREGISTER event.
4159  * We can get stuck here if buggy protocols don't correctly
4160  * call dev_put.
4161  */
4162 static void netdev_wait_allrefs(struct net_device *dev)
4163 {
4164 	unsigned long rebroadcast_time, warning_time;
4165 
4166 	rebroadcast_time = warning_time = jiffies;
4167 	while (atomic_read(&dev->refcnt) != 0) {
4168 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4169 			rtnl_lock();
4170 
4171 			/* Rebroadcast unregister notification */
4172 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4173 
4174 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4175 				     &dev->state)) {
4176 				/* We must not have linkwatch events
4177 				 * pending on unregister. If this
4178 				 * happens, we simply run the queue
4179 				 * unscheduled, resulting in a noop
4180 				 * for this device.
4181 				 */
4182 				linkwatch_run_queue();
4183 			}
4184 
4185 			__rtnl_unlock();
4186 
4187 			rebroadcast_time = jiffies;
4188 		}
4189 
4190 		msleep(250);
4191 
4192 		if (time_after(jiffies, warning_time + 10 * HZ)) {
4193 			printk(KERN_EMERG "unregister_netdevice: "
4194 			       "waiting for %s to become free. Usage "
4195 			       "count = %d\n",
4196 			       dev->name, atomic_read(&dev->refcnt));
4197 			warning_time = jiffies;
4198 		}
4199 	}
4200 }
4201 
4202 /* The sequence is:
4203  *
4204  *	rtnl_lock();
4205  *	...
4206  *	register_netdevice(x1);
4207  *	register_netdevice(x2);
4208  *	...
4209  *	unregister_netdevice(y1);
4210  *	unregister_netdevice(y2);
4211  *      ...
4212  *	rtnl_unlock();
4213  *	free_netdev(y1);
4214  *	free_netdev(y2);
4215  *
4216  * We are invoked by rtnl_unlock().
4217  * This allows us to deal with problems:
4218  * 1) We can delete sysfs objects which invoke hotplug
4219  *    without deadlocking with linkwatch via keventd.
4220  * 2) Since we run with the RTNL semaphore not held, we can sleep
4221  *    safely in order to wait for the netdev refcnt to drop to zero.
4222  *
4223  * We must not return until all unregister events added during
4224  * the interval the lock was held have been completed.
4225  */
4226 void netdev_run_todo(void)
4227 {
4228 	struct list_head list;
4229 
4230 	/* Snapshot list, allow later requests */
4231 	list_replace_init(&net_todo_list, &list);
4232 
4233 	__rtnl_unlock();
4234 
4235 	while (!list_empty(&list)) {
4236 		struct net_device *dev
4237 			= list_entry(list.next, struct net_device, todo_list);
4238 		list_del(&dev->todo_list);
4239 
4240 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4241 			printk(KERN_ERR "network todo '%s' but state %d\n",
4242 			       dev->name, dev->reg_state);
4243 			dump_stack();
4244 			continue;
4245 		}
4246 
4247 		dev->reg_state = NETREG_UNREGISTERED;
4248 
4249 		on_each_cpu(flush_backlog, dev, 1);
4250 
4251 		netdev_wait_allrefs(dev);
4252 
4253 		/* paranoia */
4254 		BUG_ON(atomic_read(&dev->refcnt));
4255 		WARN_ON(dev->ip_ptr);
4256 		WARN_ON(dev->ip6_ptr);
4257 		WARN_ON(dev->dn_ptr);
4258 
4259 		if (dev->destructor)
4260 			dev->destructor(dev);
4261 
4262 		/* Free network device */
4263 		kobject_put(&dev->dev.kobj);
4264 	}
4265 }
4266 
4267 static struct net_device_stats *internal_stats(struct net_device *dev)
4268 {
4269 	return &dev->stats;
4270 }
4271 
4272 static void netdev_init_one_queue(struct net_device *dev,
4273 				  struct netdev_queue *queue,
4274 				  void *_unused)
4275 {
4276 	queue->dev = dev;
4277 }
4278 
4279 static void netdev_init_queues(struct net_device *dev)
4280 {
4281 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4282 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4283 	spin_lock_init(&dev->tx_global_lock);
4284 }
4285 
4286 /**
4287  *	alloc_netdev_mq - allocate network device
4288  *	@sizeof_priv:	size of private data to allocate space for
4289  *	@name:		device name format string
4290  *	@setup:		callback to initialize device
4291  *	@queue_count:	the number of subqueues to allocate
4292  *
4293  *	Allocates a struct net_device with private data area for driver use
4294  *	and performs basic initialization.  Also allocates subquue structs
4295  *	for each queue on the device at the end of the netdevice.
4296  */
4297 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4298 		void (*setup)(struct net_device *), unsigned int queue_count)
4299 {
4300 	struct netdev_queue *tx;
4301 	struct net_device *dev;
4302 	size_t alloc_size;
4303 	void *p;
4304 
4305 	BUG_ON(strlen(name) >= sizeof(dev->name));
4306 
4307 	alloc_size = sizeof(struct net_device);
4308 	if (sizeof_priv) {
4309 		/* ensure 32-byte alignment of private area */
4310 		alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4311 		alloc_size += sizeof_priv;
4312 	}
4313 	/* ensure 32-byte alignment of whole construct */
4314 	alloc_size += NETDEV_ALIGN_CONST;
4315 
4316 	p = kzalloc(alloc_size, GFP_KERNEL);
4317 	if (!p) {
4318 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4319 		return NULL;
4320 	}
4321 
4322 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4323 	if (!tx) {
4324 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
4325 		       "tx qdiscs.\n");
4326 		kfree(p);
4327 		return NULL;
4328 	}
4329 
4330 	dev = (struct net_device *)
4331 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4332 	dev->padded = (char *)dev - (char *)p;
4333 	dev_net_set(dev, &init_net);
4334 
4335 	dev->_tx = tx;
4336 	dev->num_tx_queues = queue_count;
4337 	dev->real_num_tx_queues = queue_count;
4338 
4339 	if (sizeof_priv) {
4340 		dev->priv = ((char *)dev +
4341 			     ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
4342 			      & ~NETDEV_ALIGN_CONST));
4343 	}
4344 
4345 	dev->gso_max_size = GSO_MAX_SIZE;
4346 
4347 	netdev_init_queues(dev);
4348 
4349 	dev->get_stats = internal_stats;
4350 	netpoll_netdev_init(dev);
4351 	setup(dev);
4352 	strcpy(dev->name, name);
4353 	return dev;
4354 }
4355 EXPORT_SYMBOL(alloc_netdev_mq);
4356 
4357 /**
4358  *	free_netdev - free network device
4359  *	@dev: device
4360  *
4361  *	This function does the last stage of destroying an allocated device
4362  * 	interface. The reference to the device object is released.
4363  *	If this is the last reference then it will be freed.
4364  */
4365 void free_netdev(struct net_device *dev)
4366 {
4367 	release_net(dev_net(dev));
4368 
4369 	kfree(dev->_tx);
4370 
4371 	/*  Compatibility with error handling in drivers */
4372 	if (dev->reg_state == NETREG_UNINITIALIZED) {
4373 		kfree((char *)dev - dev->padded);
4374 		return;
4375 	}
4376 
4377 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4378 	dev->reg_state = NETREG_RELEASED;
4379 
4380 	/* will free via device release */
4381 	put_device(&dev->dev);
4382 }
4383 
4384 /**
4385  *	synchronize_net -  Synchronize with packet receive processing
4386  *
4387  *	Wait for packets currently being received to be done.
4388  *	Does not block later packets from starting.
4389  */
4390 void synchronize_net(void)
4391 {
4392 	might_sleep();
4393 	synchronize_rcu();
4394 }
4395 
4396 /**
4397  *	unregister_netdevice - remove device from the kernel
4398  *	@dev: device
4399  *
4400  *	This function shuts down a device interface and removes it
4401  *	from the kernel tables.
4402  *
4403  *	Callers must hold the rtnl semaphore.  You may want
4404  *	unregister_netdev() instead of this.
4405  */
4406 
4407 void unregister_netdevice(struct net_device *dev)
4408 {
4409 	ASSERT_RTNL();
4410 
4411 	rollback_registered(dev);
4412 	/* Finish processing unregister after unlock */
4413 	net_set_todo(dev);
4414 }
4415 
4416 /**
4417  *	unregister_netdev - remove device from the kernel
4418  *	@dev: device
4419  *
4420  *	This function shuts down a device interface and removes it
4421  *	from the kernel tables.
4422  *
4423  *	This is just a wrapper for unregister_netdevice that takes
4424  *	the rtnl semaphore.  In general you want to use this and not
4425  *	unregister_netdevice.
4426  */
4427 void unregister_netdev(struct net_device *dev)
4428 {
4429 	rtnl_lock();
4430 	unregister_netdevice(dev);
4431 	rtnl_unlock();
4432 }
4433 
4434 EXPORT_SYMBOL(unregister_netdev);
4435 
4436 /**
4437  *	dev_change_net_namespace - move device to different nethost namespace
4438  *	@dev: device
4439  *	@net: network namespace
4440  *	@pat: If not NULL name pattern to try if the current device name
4441  *	      is already taken in the destination network namespace.
4442  *
4443  *	This function shuts down a device interface and moves it
4444  *	to a new network namespace. On success 0 is returned, on
4445  *	a failure a netagive errno code is returned.
4446  *
4447  *	Callers must hold the rtnl semaphore.
4448  */
4449 
4450 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4451 {
4452 	char buf[IFNAMSIZ];
4453 	const char *destname;
4454 	int err;
4455 
4456 	ASSERT_RTNL();
4457 
4458 	/* Don't allow namespace local devices to be moved. */
4459 	err = -EINVAL;
4460 	if (dev->features & NETIF_F_NETNS_LOCAL)
4461 		goto out;
4462 
4463 	/* Ensure the device has been registrered */
4464 	err = -EINVAL;
4465 	if (dev->reg_state != NETREG_REGISTERED)
4466 		goto out;
4467 
4468 	/* Get out if there is nothing todo */
4469 	err = 0;
4470 	if (net_eq(dev_net(dev), net))
4471 		goto out;
4472 
4473 	/* Pick the destination device name, and ensure
4474 	 * we can use it in the destination network namespace.
4475 	 */
4476 	err = -EEXIST;
4477 	destname = dev->name;
4478 	if (__dev_get_by_name(net, destname)) {
4479 		/* We get here if we can't use the current device name */
4480 		if (!pat)
4481 			goto out;
4482 		if (!dev_valid_name(pat))
4483 			goto out;
4484 		if (strchr(pat, '%')) {
4485 			if (__dev_alloc_name(net, pat, buf) < 0)
4486 				goto out;
4487 			destname = buf;
4488 		} else
4489 			destname = pat;
4490 		if (__dev_get_by_name(net, destname))
4491 			goto out;
4492 	}
4493 
4494 	/*
4495 	 * And now a mini version of register_netdevice unregister_netdevice.
4496 	 */
4497 
4498 	/* If device is running close it first. */
4499 	dev_close(dev);
4500 
4501 	/* And unlink it from device chain */
4502 	err = -ENODEV;
4503 	unlist_netdevice(dev);
4504 
4505 	synchronize_net();
4506 
4507 	/* Shutdown queueing discipline. */
4508 	dev_shutdown(dev);
4509 
4510 	/* Notify protocols, that we are about to destroy
4511 	   this device. They should clean all the things.
4512 	*/
4513 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4514 
4515 	/*
4516 	 *	Flush the unicast and multicast chains
4517 	 */
4518 	dev_addr_discard(dev);
4519 
4520 	/* Actually switch the network namespace */
4521 	dev_net_set(dev, net);
4522 
4523 	/* Assign the new device name */
4524 	if (destname != dev->name)
4525 		strcpy(dev->name, destname);
4526 
4527 	/* If there is an ifindex conflict assign a new one */
4528 	if (__dev_get_by_index(net, dev->ifindex)) {
4529 		int iflink = (dev->iflink == dev->ifindex);
4530 		dev->ifindex = dev_new_index(net);
4531 		if (iflink)
4532 			dev->iflink = dev->ifindex;
4533 	}
4534 
4535 	/* Fixup kobjects */
4536 	netdev_unregister_kobject(dev);
4537 	err = netdev_register_kobject(dev);
4538 	WARN_ON(err);
4539 
4540 	/* Add the device back in the hashes */
4541 	list_netdevice(dev);
4542 
4543 	/* Notify protocols, that a new device appeared. */
4544 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
4545 
4546 	synchronize_net();
4547 	err = 0;
4548 out:
4549 	return err;
4550 }
4551 
4552 static int dev_cpu_callback(struct notifier_block *nfb,
4553 			    unsigned long action,
4554 			    void *ocpu)
4555 {
4556 	struct sk_buff **list_skb;
4557 	struct Qdisc **list_net;
4558 	struct sk_buff *skb;
4559 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
4560 	struct softnet_data *sd, *oldsd;
4561 
4562 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4563 		return NOTIFY_OK;
4564 
4565 	local_irq_disable();
4566 	cpu = smp_processor_id();
4567 	sd = &per_cpu(softnet_data, cpu);
4568 	oldsd = &per_cpu(softnet_data, oldcpu);
4569 
4570 	/* Find end of our completion_queue. */
4571 	list_skb = &sd->completion_queue;
4572 	while (*list_skb)
4573 		list_skb = &(*list_skb)->next;
4574 	/* Append completion queue from offline CPU. */
4575 	*list_skb = oldsd->completion_queue;
4576 	oldsd->completion_queue = NULL;
4577 
4578 	/* Find end of our output_queue. */
4579 	list_net = &sd->output_queue;
4580 	while (*list_net)
4581 		list_net = &(*list_net)->next_sched;
4582 	/* Append output queue from offline CPU. */
4583 	*list_net = oldsd->output_queue;
4584 	oldsd->output_queue = NULL;
4585 
4586 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
4587 	local_irq_enable();
4588 
4589 	/* Process offline CPU's input_pkt_queue */
4590 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4591 		netif_rx(skb);
4592 
4593 	return NOTIFY_OK;
4594 }
4595 
4596 #ifdef CONFIG_NET_DMA
4597 /**
4598  * net_dma_rebalance - try to maintain one DMA channel per CPU
4599  * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4600  *
4601  * This is called when the number of channels allocated to the net_dma client
4602  * changes.  The net_dma client tries to have one DMA channel per CPU.
4603  */
4604 
4605 static void net_dma_rebalance(struct net_dma *net_dma)
4606 {
4607 	unsigned int cpu, i, n, chan_idx;
4608 	struct dma_chan *chan;
4609 
4610 	if (cpus_empty(net_dma->channel_mask)) {
4611 		for_each_online_cpu(cpu)
4612 			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4613 		return;
4614 	}
4615 
4616 	i = 0;
4617 	cpu = first_cpu(cpu_online_map);
4618 
4619 	for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
4620 		chan = net_dma->channels[chan_idx];
4621 
4622 		n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4623 		   + (i < (num_online_cpus() %
4624 			cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4625 
4626 		while(n) {
4627 			per_cpu(softnet_data, cpu).net_dma = chan;
4628 			cpu = next_cpu(cpu, cpu_online_map);
4629 			n--;
4630 		}
4631 		i++;
4632 	}
4633 }
4634 
4635 /**
4636  * netdev_dma_event - event callback for the net_dma_client
4637  * @client: should always be net_dma_client
4638  * @chan: DMA channel for the event
4639  * @state: DMA state to be handled
4640  */
4641 static enum dma_state_client
4642 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4643 	enum dma_state state)
4644 {
4645 	int i, found = 0, pos = -1;
4646 	struct net_dma *net_dma =
4647 		container_of(client, struct net_dma, client);
4648 	enum dma_state_client ack = DMA_DUP; /* default: take no action */
4649 
4650 	spin_lock(&net_dma->lock);
4651 	switch (state) {
4652 	case DMA_RESOURCE_AVAILABLE:
4653 		for (i = 0; i < nr_cpu_ids; i++)
4654 			if (net_dma->channels[i] == chan) {
4655 				found = 1;
4656 				break;
4657 			} else if (net_dma->channels[i] == NULL && pos < 0)
4658 				pos = i;
4659 
4660 		if (!found && pos >= 0) {
4661 			ack = DMA_ACK;
4662 			net_dma->channels[pos] = chan;
4663 			cpu_set(pos, net_dma->channel_mask);
4664 			net_dma_rebalance(net_dma);
4665 		}
4666 		break;
4667 	case DMA_RESOURCE_REMOVED:
4668 		for (i = 0; i < nr_cpu_ids; i++)
4669 			if (net_dma->channels[i] == chan) {
4670 				found = 1;
4671 				pos = i;
4672 				break;
4673 			}
4674 
4675 		if (found) {
4676 			ack = DMA_ACK;
4677 			cpu_clear(pos, net_dma->channel_mask);
4678 			net_dma->channels[i] = NULL;
4679 			net_dma_rebalance(net_dma);
4680 		}
4681 		break;
4682 	default:
4683 		break;
4684 	}
4685 	spin_unlock(&net_dma->lock);
4686 
4687 	return ack;
4688 }
4689 
4690 /**
4691  * netdev_dma_register - register the networking subsystem as a DMA client
4692  */
4693 static int __init netdev_dma_register(void)
4694 {
4695 	net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4696 								GFP_KERNEL);
4697 	if (unlikely(!net_dma.channels)) {
4698 		printk(KERN_NOTICE
4699 				"netdev_dma: no memory for net_dma.channels\n");
4700 		return -ENOMEM;
4701 	}
4702 	spin_lock_init(&net_dma.lock);
4703 	dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4704 	dma_async_client_register(&net_dma.client);
4705 	dma_async_client_chan_request(&net_dma.client);
4706 	return 0;
4707 }
4708 
4709 #else
4710 static int __init netdev_dma_register(void) { return -ENODEV; }
4711 #endif /* CONFIG_NET_DMA */
4712 
4713 /**
4714  *	netdev_increment_features - increment feature set by one
4715  *	@all: current feature set
4716  *	@one: new feature set
4717  *	@mask: mask feature set
4718  *
4719  *	Computes a new feature set after adding a device with feature set
4720  *	@one to the master device with current feature set @all.  Will not
4721  *	enable anything that is off in @mask. Returns the new feature set.
4722  */
4723 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4724 					unsigned long mask)
4725 {
4726 	/* If device needs checksumming, downgrade to it. */
4727         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4728 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4729 	else if (mask & NETIF_F_ALL_CSUM) {
4730 		/* If one device supports v4/v6 checksumming, set for all. */
4731 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4732 		    !(all & NETIF_F_GEN_CSUM)) {
4733 			all &= ~NETIF_F_ALL_CSUM;
4734 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4735 		}
4736 
4737 		/* If one device supports hw checksumming, set for all. */
4738 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4739 			all &= ~NETIF_F_ALL_CSUM;
4740 			all |= NETIF_F_HW_CSUM;
4741 		}
4742 	}
4743 
4744 	one |= NETIF_F_ALL_CSUM;
4745 
4746 	one |= all & NETIF_F_ONE_FOR_ALL;
4747 	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4748 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
4749 
4750 	return all;
4751 }
4752 EXPORT_SYMBOL(netdev_increment_features);
4753 
4754 static struct hlist_head *netdev_create_hash(void)
4755 {
4756 	int i;
4757 	struct hlist_head *hash;
4758 
4759 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4760 	if (hash != NULL)
4761 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
4762 			INIT_HLIST_HEAD(&hash[i]);
4763 
4764 	return hash;
4765 }
4766 
4767 /* Initialize per network namespace state */
4768 static int __net_init netdev_init(struct net *net)
4769 {
4770 	INIT_LIST_HEAD(&net->dev_base_head);
4771 
4772 	net->dev_name_head = netdev_create_hash();
4773 	if (net->dev_name_head == NULL)
4774 		goto err_name;
4775 
4776 	net->dev_index_head = netdev_create_hash();
4777 	if (net->dev_index_head == NULL)
4778 		goto err_idx;
4779 
4780 	return 0;
4781 
4782 err_idx:
4783 	kfree(net->dev_name_head);
4784 err_name:
4785 	return -ENOMEM;
4786 }
4787 
4788 /**
4789  *	netdev_drivername - network driver for the device
4790  *	@dev: network device
4791  *	@buffer: buffer for resulting name
4792  *	@len: size of buffer
4793  *
4794  *	Determine network driver for device.
4795  */
4796 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
4797 {
4798 	const struct device_driver *driver;
4799 	const struct device *parent;
4800 
4801 	if (len <= 0 || !buffer)
4802 		return buffer;
4803 	buffer[0] = 0;
4804 
4805 	parent = dev->dev.parent;
4806 
4807 	if (!parent)
4808 		return buffer;
4809 
4810 	driver = parent->driver;
4811 	if (driver && driver->name)
4812 		strlcpy(buffer, driver->name, len);
4813 	return buffer;
4814 }
4815 
4816 static void __net_exit netdev_exit(struct net *net)
4817 {
4818 	kfree(net->dev_name_head);
4819 	kfree(net->dev_index_head);
4820 }
4821 
4822 static struct pernet_operations __net_initdata netdev_net_ops = {
4823 	.init = netdev_init,
4824 	.exit = netdev_exit,
4825 };
4826 
4827 static void __net_exit default_device_exit(struct net *net)
4828 {
4829 	struct net_device *dev, *next;
4830 	/*
4831 	 * Push all migratable of the network devices back to the
4832 	 * initial network namespace
4833 	 */
4834 	rtnl_lock();
4835 	for_each_netdev_safe(net, dev, next) {
4836 		int err;
4837 		char fb_name[IFNAMSIZ];
4838 
4839 		/* Ignore unmoveable devices (i.e. loopback) */
4840 		if (dev->features & NETIF_F_NETNS_LOCAL)
4841 			continue;
4842 
4843 		/* Push remaing network devices to init_net */
4844 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4845 		err = dev_change_net_namespace(dev, &init_net, fb_name);
4846 		if (err) {
4847 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4848 				__func__, dev->name, err);
4849 			BUG();
4850 		}
4851 	}
4852 	rtnl_unlock();
4853 }
4854 
4855 static struct pernet_operations __net_initdata default_device_ops = {
4856 	.exit = default_device_exit,
4857 };
4858 
4859 /*
4860  *	Initialize the DEV module. At boot time this walks the device list and
4861  *	unhooks any devices that fail to initialise (normally hardware not
4862  *	present) and leaves us with a valid list of present and active devices.
4863  *
4864  */
4865 
4866 /*
4867  *       This is called single threaded during boot, so no need
4868  *       to take the rtnl semaphore.
4869  */
4870 static int __init net_dev_init(void)
4871 {
4872 	int i, rc = -ENOMEM;
4873 
4874 	BUG_ON(!dev_boot_phase);
4875 
4876 	if (dev_proc_init())
4877 		goto out;
4878 
4879 	if (netdev_kobject_init())
4880 		goto out;
4881 
4882 	INIT_LIST_HEAD(&ptype_all);
4883 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
4884 		INIT_LIST_HEAD(&ptype_base[i]);
4885 
4886 	if (register_pernet_subsys(&netdev_net_ops))
4887 		goto out;
4888 
4889 	if (register_pernet_device(&default_device_ops))
4890 		goto out;
4891 
4892 	/*
4893 	 *	Initialise the packet receive queues.
4894 	 */
4895 
4896 	for_each_possible_cpu(i) {
4897 		struct softnet_data *queue;
4898 
4899 		queue = &per_cpu(softnet_data, i);
4900 		skb_queue_head_init(&queue->input_pkt_queue);
4901 		queue->completion_queue = NULL;
4902 		INIT_LIST_HEAD(&queue->poll_list);
4903 
4904 		queue->backlog.poll = process_backlog;
4905 		queue->backlog.weight = weight_p;
4906 	}
4907 
4908 	netdev_dma_register();
4909 
4910 	dev_boot_phase = 0;
4911 
4912 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4913 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
4914 
4915 	hotcpu_notifier(dev_cpu_callback, 0);
4916 	dst_init();
4917 	dev_mcast_init();
4918 	rc = 0;
4919 out:
4920 	return rc;
4921 }
4922 
4923 subsys_initcall(net_dev_init);
4924 
4925 EXPORT_SYMBOL(__dev_get_by_index);
4926 EXPORT_SYMBOL(__dev_get_by_name);
4927 EXPORT_SYMBOL(__dev_remove_pack);
4928 EXPORT_SYMBOL(dev_valid_name);
4929 EXPORT_SYMBOL(dev_add_pack);
4930 EXPORT_SYMBOL(dev_alloc_name);
4931 EXPORT_SYMBOL(dev_close);
4932 EXPORT_SYMBOL(dev_get_by_flags);
4933 EXPORT_SYMBOL(dev_get_by_index);
4934 EXPORT_SYMBOL(dev_get_by_name);
4935 EXPORT_SYMBOL(dev_open);
4936 EXPORT_SYMBOL(dev_queue_xmit);
4937 EXPORT_SYMBOL(dev_remove_pack);
4938 EXPORT_SYMBOL(dev_set_allmulti);
4939 EXPORT_SYMBOL(dev_set_promiscuity);
4940 EXPORT_SYMBOL(dev_change_flags);
4941 EXPORT_SYMBOL(dev_set_mtu);
4942 EXPORT_SYMBOL(dev_set_mac_address);
4943 EXPORT_SYMBOL(free_netdev);
4944 EXPORT_SYMBOL(netdev_boot_setup_check);
4945 EXPORT_SYMBOL(netdev_set_master);
4946 EXPORT_SYMBOL(netdev_state_change);
4947 EXPORT_SYMBOL(netif_receive_skb);
4948 EXPORT_SYMBOL(netif_rx);
4949 EXPORT_SYMBOL(register_gifconf);
4950 EXPORT_SYMBOL(register_netdevice);
4951 EXPORT_SYMBOL(register_netdevice_notifier);
4952 EXPORT_SYMBOL(skb_checksum_help);
4953 EXPORT_SYMBOL(synchronize_net);
4954 EXPORT_SYMBOL(unregister_netdevice);
4955 EXPORT_SYMBOL(unregister_netdevice_notifier);
4956 EXPORT_SYMBOL(net_enable_timestamp);
4957 EXPORT_SYMBOL(net_disable_timestamp);
4958 EXPORT_SYMBOL(dev_get_flags);
4959 
4960 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4961 EXPORT_SYMBOL(br_handle_frame_hook);
4962 EXPORT_SYMBOL(br_fdb_get_hook);
4963 EXPORT_SYMBOL(br_fdb_put_hook);
4964 #endif
4965 
4966 EXPORT_SYMBOL(dev_load);
4967 
4968 EXPORT_PER_CPU_SYMBOL(softnet_data);
4969