xref: /openbmc/linux/net/core/dev.c (revision e868d61272caa648214046a096e5a6bfc068dc8c)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/notifier.h>
94 #include <linux/skbuff.h>
95 #include <net/sock.h>
96 #include <linux/rtnetlink.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/stat.h>
100 #include <linux/if_bridge.h>
101 #include <net/dst.h>
102 #include <net/pkt_sched.h>
103 #include <net/checksum.h>
104 #include <linux/highmem.h>
105 #include <linux/init.h>
106 #include <linux/kmod.h>
107 #include <linux/module.h>
108 #include <linux/kallsyms.h>
109 #include <linux/netpoll.h>
110 #include <linux/rcupdate.h>
111 #include <linux/delay.h>
112 #include <net/wext.h>
113 #include <net/iw_handler.h>
114 #include <asm/current.h>
115 #include <linux/audit.h>
116 #include <linux/dmaengine.h>
117 #include <linux/err.h>
118 #include <linux/ctype.h>
119 
120 /*
121  *	The list of packet types we will receive (as opposed to discard)
122  *	and the routines to invoke.
123  *
124  *	Why 16. Because with 16 the only overlap we get on a hash of the
125  *	low nibble of the protocol value is RARP/SNAP/X.25.
126  *
127  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
128  *             sure which should go first, but I bet it won't make much
129  *             difference if we are running VLANs.  The good news is that
130  *             this protocol won't be in the list unless compiled in, so
131  *             the average user (w/out VLANs) will not be adversely affected.
132  *             --BLG
133  *
134  *		0800	IP
135  *		8100    802.1Q VLAN
136  *		0001	802.3
137  *		0002	AX.25
138  *		0004	802.2
139  *		8035	RARP
140  *		0005	SNAP
141  *		0805	X.25
142  *		0806	ARP
143  *		8137	IPX
144  *		0009	Localtalk
145  *		86DD	IPv6
146  */
147 
148 static DEFINE_SPINLOCK(ptype_lock);
149 static struct list_head ptype_base[16] __read_mostly;	/* 16 way hashed list */
150 static struct list_head ptype_all __read_mostly;	/* Taps */
151 
152 #ifdef CONFIG_NET_DMA
153 static struct dma_client *net_dma_client;
154 static unsigned int net_dma_count;
155 static spinlock_t net_dma_event_lock;
156 #endif
157 
158 /*
159  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
160  * semaphore.
161  *
162  * Pure readers hold dev_base_lock for reading.
163  *
164  * Writers must hold the rtnl semaphore while they loop through the
165  * dev_base_head list, and hold dev_base_lock for writing when they do the
166  * actual updates.  This allows pure readers to access the list even
167  * while a writer is preparing to update it.
168  *
169  * To put it another way, dev_base_lock is held for writing only to
170  * protect against pure readers; the rtnl semaphore provides the
171  * protection against other writers.
172  *
173  * See, for example usages, register_netdevice() and
174  * unregister_netdevice(), which must be called with the rtnl
175  * semaphore held.
176  */
177 LIST_HEAD(dev_base_head);
178 DEFINE_RWLOCK(dev_base_lock);
179 
180 EXPORT_SYMBOL(dev_base_head);
181 EXPORT_SYMBOL(dev_base_lock);
182 
183 #define NETDEV_HASHBITS	8
184 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
185 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
186 
187 static inline struct hlist_head *dev_name_hash(const char *name)
188 {
189 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
190 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
191 }
192 
193 static inline struct hlist_head *dev_index_hash(int ifindex)
194 {
195 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
196 }
197 
198 /*
199  *	Our notifier list
200  */
201 
202 static RAW_NOTIFIER_HEAD(netdev_chain);
203 
204 /*
205  *	Device drivers call our routines to queue packets here. We empty the
206  *	queue in the local softnet handler.
207  */
208 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
209 
210 #ifdef CONFIG_SYSFS
211 extern int netdev_sysfs_init(void);
212 extern int netdev_register_sysfs(struct net_device *);
213 extern void netdev_unregister_sysfs(struct net_device *);
214 #else
215 #define netdev_sysfs_init()	 	(0)
216 #define netdev_register_sysfs(dev)	(0)
217 #define	netdev_unregister_sysfs(dev)	do { } while(0)
218 #endif
219 
220 
221 /*******************************************************************************
222 
223 		Protocol management and registration routines
224 
225 *******************************************************************************/
226 
227 /*
228  *	Add a protocol ID to the list. Now that the input handler is
229  *	smarter we can dispense with all the messy stuff that used to be
230  *	here.
231  *
232  *	BEWARE!!! Protocol handlers, mangling input packets,
233  *	MUST BE last in hash buckets and checking protocol handlers
234  *	MUST start from promiscuous ptype_all chain in net_bh.
235  *	It is true now, do not change it.
236  *	Explanation follows: if protocol handler, mangling packet, will
237  *	be the first on list, it is not able to sense, that packet
238  *	is cloned and should be copied-on-write, so that it will
239  *	change it and subsequent readers will get broken packet.
240  *							--ANK (980803)
241  */
242 
243 /**
244  *	dev_add_pack - add packet handler
245  *	@pt: packet type declaration
246  *
247  *	Add a protocol handler to the networking stack. The passed &packet_type
248  *	is linked into kernel lists and may not be freed until it has been
249  *	removed from the kernel lists.
250  *
251  *	This call does not sleep therefore it can not
252  *	guarantee all CPU's that are in middle of receiving packets
253  *	will see the new packet type (until the next received packet).
254  */
255 
256 void dev_add_pack(struct packet_type *pt)
257 {
258 	int hash;
259 
260 	spin_lock_bh(&ptype_lock);
261 	if (pt->type == htons(ETH_P_ALL))
262 		list_add_rcu(&pt->list, &ptype_all);
263 	else {
264 		hash = ntohs(pt->type) & 15;
265 		list_add_rcu(&pt->list, &ptype_base[hash]);
266 	}
267 	spin_unlock_bh(&ptype_lock);
268 }
269 
270 /**
271  *	__dev_remove_pack	 - remove packet handler
272  *	@pt: packet type declaration
273  *
274  *	Remove a protocol handler that was previously added to the kernel
275  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
276  *	from the kernel lists and can be freed or reused once this function
277  *	returns.
278  *
279  *      The packet type might still be in use by receivers
280  *	and must not be freed until after all the CPU's have gone
281  *	through a quiescent state.
282  */
283 void __dev_remove_pack(struct packet_type *pt)
284 {
285 	struct list_head *head;
286 	struct packet_type *pt1;
287 
288 	spin_lock_bh(&ptype_lock);
289 
290 	if (pt->type == htons(ETH_P_ALL))
291 		head = &ptype_all;
292 	else
293 		head = &ptype_base[ntohs(pt->type) & 15];
294 
295 	list_for_each_entry(pt1, head, list) {
296 		if (pt == pt1) {
297 			list_del_rcu(&pt->list);
298 			goto out;
299 		}
300 	}
301 
302 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
303 out:
304 	spin_unlock_bh(&ptype_lock);
305 }
306 /**
307  *	dev_remove_pack	 - remove packet handler
308  *	@pt: packet type declaration
309  *
310  *	Remove a protocol handler that was previously added to the kernel
311  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
312  *	from the kernel lists and can be freed or reused once this function
313  *	returns.
314  *
315  *	This call sleeps to guarantee that no CPU is looking at the packet
316  *	type after return.
317  */
318 void dev_remove_pack(struct packet_type *pt)
319 {
320 	__dev_remove_pack(pt);
321 
322 	synchronize_net();
323 }
324 
325 /******************************************************************************
326 
327 		      Device Boot-time Settings Routines
328 
329 *******************************************************************************/
330 
331 /* Boot time configuration table */
332 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
333 
334 /**
335  *	netdev_boot_setup_add	- add new setup entry
336  *	@name: name of the device
337  *	@map: configured settings for the device
338  *
339  *	Adds new setup entry to the dev_boot_setup list.  The function
340  *	returns 0 on error and 1 on success.  This is a generic routine to
341  *	all netdevices.
342  */
343 static int netdev_boot_setup_add(char *name, struct ifmap *map)
344 {
345 	struct netdev_boot_setup *s;
346 	int i;
347 
348 	s = dev_boot_setup;
349 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
350 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
351 			memset(s[i].name, 0, sizeof(s[i].name));
352 			strcpy(s[i].name, name);
353 			memcpy(&s[i].map, map, sizeof(s[i].map));
354 			break;
355 		}
356 	}
357 
358 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
359 }
360 
361 /**
362  *	netdev_boot_setup_check	- check boot time settings
363  *	@dev: the netdevice
364  *
365  * 	Check boot time settings for the device.
366  *	The found settings are set for the device to be used
367  *	later in the device probing.
368  *	Returns 0 if no settings found, 1 if they are.
369  */
370 int netdev_boot_setup_check(struct net_device *dev)
371 {
372 	struct netdev_boot_setup *s = dev_boot_setup;
373 	int i;
374 
375 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
376 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
377 		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
378 			dev->irq 	= s[i].map.irq;
379 			dev->base_addr 	= s[i].map.base_addr;
380 			dev->mem_start 	= s[i].map.mem_start;
381 			dev->mem_end 	= s[i].map.mem_end;
382 			return 1;
383 		}
384 	}
385 	return 0;
386 }
387 
388 
389 /**
390  *	netdev_boot_base	- get address from boot time settings
391  *	@prefix: prefix for network device
392  *	@unit: id for network device
393  *
394  * 	Check boot time settings for the base address of device.
395  *	The found settings are set for the device to be used
396  *	later in the device probing.
397  *	Returns 0 if no settings found.
398  */
399 unsigned long netdev_boot_base(const char *prefix, int unit)
400 {
401 	const struct netdev_boot_setup *s = dev_boot_setup;
402 	char name[IFNAMSIZ];
403 	int i;
404 
405 	sprintf(name, "%s%d", prefix, unit);
406 
407 	/*
408 	 * If device already registered then return base of 1
409 	 * to indicate not to probe for this interface
410 	 */
411 	if (__dev_get_by_name(name))
412 		return 1;
413 
414 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
415 		if (!strcmp(name, s[i].name))
416 			return s[i].map.base_addr;
417 	return 0;
418 }
419 
420 /*
421  * Saves at boot time configured settings for any netdevice.
422  */
423 int __init netdev_boot_setup(char *str)
424 {
425 	int ints[5];
426 	struct ifmap map;
427 
428 	str = get_options(str, ARRAY_SIZE(ints), ints);
429 	if (!str || !*str)
430 		return 0;
431 
432 	/* Save settings */
433 	memset(&map, 0, sizeof(map));
434 	if (ints[0] > 0)
435 		map.irq = ints[1];
436 	if (ints[0] > 1)
437 		map.base_addr = ints[2];
438 	if (ints[0] > 2)
439 		map.mem_start = ints[3];
440 	if (ints[0] > 3)
441 		map.mem_end = ints[4];
442 
443 	/* Add new entry to the list */
444 	return netdev_boot_setup_add(str, &map);
445 }
446 
447 __setup("netdev=", netdev_boot_setup);
448 
449 /*******************************************************************************
450 
451 			    Device Interface Subroutines
452 
453 *******************************************************************************/
454 
455 /**
456  *	__dev_get_by_name	- find a device by its name
457  *	@name: name to find
458  *
459  *	Find an interface by name. Must be called under RTNL semaphore
460  *	or @dev_base_lock. If the name is found a pointer to the device
461  *	is returned. If the name is not found then %NULL is returned. The
462  *	reference counters are not incremented so the caller must be
463  *	careful with locks.
464  */
465 
466 struct net_device *__dev_get_by_name(const char *name)
467 {
468 	struct hlist_node *p;
469 
470 	hlist_for_each(p, dev_name_hash(name)) {
471 		struct net_device *dev
472 			= hlist_entry(p, struct net_device, name_hlist);
473 		if (!strncmp(dev->name, name, IFNAMSIZ))
474 			return dev;
475 	}
476 	return NULL;
477 }
478 
479 /**
480  *	dev_get_by_name		- find a device by its name
481  *	@name: name to find
482  *
483  *	Find an interface by name. This can be called from any
484  *	context and does its own locking. The returned handle has
485  *	the usage count incremented and the caller must use dev_put() to
486  *	release it when it is no longer needed. %NULL is returned if no
487  *	matching device is found.
488  */
489 
490 struct net_device *dev_get_by_name(const char *name)
491 {
492 	struct net_device *dev;
493 
494 	read_lock(&dev_base_lock);
495 	dev = __dev_get_by_name(name);
496 	if (dev)
497 		dev_hold(dev);
498 	read_unlock(&dev_base_lock);
499 	return dev;
500 }
501 
502 /**
503  *	__dev_get_by_index - find a device by its ifindex
504  *	@ifindex: index of device
505  *
506  *	Search for an interface by index. Returns %NULL if the device
507  *	is not found or a pointer to the device. The device has not
508  *	had its reference counter increased so the caller must be careful
509  *	about locking. The caller must hold either the RTNL semaphore
510  *	or @dev_base_lock.
511  */
512 
513 struct net_device *__dev_get_by_index(int ifindex)
514 {
515 	struct hlist_node *p;
516 
517 	hlist_for_each(p, dev_index_hash(ifindex)) {
518 		struct net_device *dev
519 			= hlist_entry(p, struct net_device, index_hlist);
520 		if (dev->ifindex == ifindex)
521 			return dev;
522 	}
523 	return NULL;
524 }
525 
526 
527 /**
528  *	dev_get_by_index - find a device by its ifindex
529  *	@ifindex: index of device
530  *
531  *	Search for an interface by index. Returns NULL if the device
532  *	is not found or a pointer to the device. The device returned has
533  *	had a reference added and the pointer is safe until the user calls
534  *	dev_put to indicate they have finished with it.
535  */
536 
537 struct net_device *dev_get_by_index(int ifindex)
538 {
539 	struct net_device *dev;
540 
541 	read_lock(&dev_base_lock);
542 	dev = __dev_get_by_index(ifindex);
543 	if (dev)
544 		dev_hold(dev);
545 	read_unlock(&dev_base_lock);
546 	return dev;
547 }
548 
549 /**
550  *	dev_getbyhwaddr - find a device by its hardware address
551  *	@type: media type of device
552  *	@ha: hardware address
553  *
554  *	Search for an interface by MAC address. Returns NULL if the device
555  *	is not found or a pointer to the device. The caller must hold the
556  *	rtnl semaphore. The returned device has not had its ref count increased
557  *	and the caller must therefore be careful about locking
558  *
559  *	BUGS:
560  *	If the API was consistent this would be __dev_get_by_hwaddr
561  */
562 
563 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
564 {
565 	struct net_device *dev;
566 
567 	ASSERT_RTNL();
568 
569 	for_each_netdev(dev)
570 		if (dev->type == type &&
571 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
572 			return dev;
573 
574 	return NULL;
575 }
576 
577 EXPORT_SYMBOL(dev_getbyhwaddr);
578 
579 struct net_device *__dev_getfirstbyhwtype(unsigned short type)
580 {
581 	struct net_device *dev;
582 
583 	ASSERT_RTNL();
584 	for_each_netdev(dev)
585 		if (dev->type == type)
586 			return dev;
587 
588 	return NULL;
589 }
590 
591 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
592 
593 struct net_device *dev_getfirstbyhwtype(unsigned short type)
594 {
595 	struct net_device *dev;
596 
597 	rtnl_lock();
598 	dev = __dev_getfirstbyhwtype(type);
599 	if (dev)
600 		dev_hold(dev);
601 	rtnl_unlock();
602 	return dev;
603 }
604 
605 EXPORT_SYMBOL(dev_getfirstbyhwtype);
606 
607 /**
608  *	dev_get_by_flags - find any device with given flags
609  *	@if_flags: IFF_* values
610  *	@mask: bitmask of bits in if_flags to check
611  *
612  *	Search for any interface with the given flags. Returns NULL if a device
613  *	is not found or a pointer to the device. The device returned has
614  *	had a reference added and the pointer is safe until the user calls
615  *	dev_put to indicate they have finished with it.
616  */
617 
618 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
619 {
620 	struct net_device *dev, *ret;
621 
622 	ret = NULL;
623 	read_lock(&dev_base_lock);
624 	for_each_netdev(dev) {
625 		if (((dev->flags ^ if_flags) & mask) == 0) {
626 			dev_hold(dev);
627 			ret = dev;
628 			break;
629 		}
630 	}
631 	read_unlock(&dev_base_lock);
632 	return ret;
633 }
634 
635 /**
636  *	dev_valid_name - check if name is okay for network device
637  *	@name: name string
638  *
639  *	Network device names need to be valid file names to
640  *	to allow sysfs to work.  We also disallow any kind of
641  *	whitespace.
642  */
643 int dev_valid_name(const char *name)
644 {
645 	if (*name == '\0')
646 		return 0;
647 	if (strlen(name) >= IFNAMSIZ)
648 		return 0;
649 	if (!strcmp(name, ".") || !strcmp(name, ".."))
650 		return 0;
651 
652 	while (*name) {
653 		if (*name == '/' || isspace(*name))
654 			return 0;
655 		name++;
656 	}
657 	return 1;
658 }
659 
660 /**
661  *	dev_alloc_name - allocate a name for a device
662  *	@dev: device
663  *	@name: name format string
664  *
665  *	Passed a format string - eg "lt%d" it will try and find a suitable
666  *	id. It scans list of devices to build up a free map, then chooses
667  *	the first empty slot. The caller must hold the dev_base or rtnl lock
668  *	while allocating the name and adding the device in order to avoid
669  *	duplicates.
670  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
671  *	Returns the number of the unit assigned or a negative errno code.
672  */
673 
674 int dev_alloc_name(struct net_device *dev, const char *name)
675 {
676 	int i = 0;
677 	char buf[IFNAMSIZ];
678 	const char *p;
679 	const int max_netdevices = 8*PAGE_SIZE;
680 	long *inuse;
681 	struct net_device *d;
682 
683 	p = strnchr(name, IFNAMSIZ-1, '%');
684 	if (p) {
685 		/*
686 		 * Verify the string as this thing may have come from
687 		 * the user.  There must be either one "%d" and no other "%"
688 		 * characters.
689 		 */
690 		if (p[1] != 'd' || strchr(p + 2, '%'))
691 			return -EINVAL;
692 
693 		/* Use one page as a bit array of possible slots */
694 		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
695 		if (!inuse)
696 			return -ENOMEM;
697 
698 		for_each_netdev(d) {
699 			if (!sscanf(d->name, name, &i))
700 				continue;
701 			if (i < 0 || i >= max_netdevices)
702 				continue;
703 
704 			/*  avoid cases where sscanf is not exact inverse of printf */
705 			snprintf(buf, sizeof(buf), name, i);
706 			if (!strncmp(buf, d->name, IFNAMSIZ))
707 				set_bit(i, inuse);
708 		}
709 
710 		i = find_first_zero_bit(inuse, max_netdevices);
711 		free_page((unsigned long) inuse);
712 	}
713 
714 	snprintf(buf, sizeof(buf), name, i);
715 	if (!__dev_get_by_name(buf)) {
716 		strlcpy(dev->name, buf, IFNAMSIZ);
717 		return i;
718 	}
719 
720 	/* It is possible to run out of possible slots
721 	 * when the name is long and there isn't enough space left
722 	 * for the digits, or if all bits are used.
723 	 */
724 	return -ENFILE;
725 }
726 
727 
728 /**
729  *	dev_change_name - change name of a device
730  *	@dev: device
731  *	@newname: name (or format string) must be at least IFNAMSIZ
732  *
733  *	Change name of a device, can pass format strings "eth%d".
734  *	for wildcarding.
735  */
736 int dev_change_name(struct net_device *dev, char *newname)
737 {
738 	int err = 0;
739 
740 	ASSERT_RTNL();
741 
742 	if (dev->flags & IFF_UP)
743 		return -EBUSY;
744 
745 	if (!dev_valid_name(newname))
746 		return -EINVAL;
747 
748 	if (strchr(newname, '%')) {
749 		err = dev_alloc_name(dev, newname);
750 		if (err < 0)
751 			return err;
752 		strcpy(newname, dev->name);
753 	}
754 	else if (__dev_get_by_name(newname))
755 		return -EEXIST;
756 	else
757 		strlcpy(dev->name, newname, IFNAMSIZ);
758 
759 	device_rename(&dev->dev, dev->name);
760 	hlist_del(&dev->name_hlist);
761 	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
762 	raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
763 
764 	return err;
765 }
766 
767 /**
768  *	netdev_features_change - device changes features
769  *	@dev: device to cause notification
770  *
771  *	Called to indicate a device has changed features.
772  */
773 void netdev_features_change(struct net_device *dev)
774 {
775 	raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
776 }
777 EXPORT_SYMBOL(netdev_features_change);
778 
779 /**
780  *	netdev_state_change - device changes state
781  *	@dev: device to cause notification
782  *
783  *	Called to indicate a device has changed state. This function calls
784  *	the notifier chains for netdev_chain and sends a NEWLINK message
785  *	to the routing socket.
786  */
787 void netdev_state_change(struct net_device *dev)
788 {
789 	if (dev->flags & IFF_UP) {
790 		raw_notifier_call_chain(&netdev_chain,
791 				NETDEV_CHANGE, dev);
792 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
793 	}
794 }
795 
796 /**
797  *	dev_load 	- load a network module
798  *	@name: name of interface
799  *
800  *	If a network interface is not present and the process has suitable
801  *	privileges this function loads the module. If module loading is not
802  *	available in this kernel then it becomes a nop.
803  */
804 
805 void dev_load(const char *name)
806 {
807 	struct net_device *dev;
808 
809 	read_lock(&dev_base_lock);
810 	dev = __dev_get_by_name(name);
811 	read_unlock(&dev_base_lock);
812 
813 	if (!dev && capable(CAP_SYS_MODULE))
814 		request_module("%s", name);
815 }
816 
817 static int default_rebuild_header(struct sk_buff *skb)
818 {
819 	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
820 	       skb->dev ? skb->dev->name : "NULL!!!");
821 	kfree_skb(skb);
822 	return 1;
823 }
824 
825 /**
826  *	dev_open	- prepare an interface for use.
827  *	@dev:	device to open
828  *
829  *	Takes a device from down to up state. The device's private open
830  *	function is invoked and then the multicast lists are loaded. Finally
831  *	the device is moved into the up state and a %NETDEV_UP message is
832  *	sent to the netdev notifier chain.
833  *
834  *	Calling this function on an active interface is a nop. On a failure
835  *	a negative errno code is returned.
836  */
837 int dev_open(struct net_device *dev)
838 {
839 	int ret = 0;
840 
841 	/*
842 	 *	Is it already up?
843 	 */
844 
845 	if (dev->flags & IFF_UP)
846 		return 0;
847 
848 	/*
849 	 *	Is it even present?
850 	 */
851 	if (!netif_device_present(dev))
852 		return -ENODEV;
853 
854 	/*
855 	 *	Call device private open method
856 	 */
857 	set_bit(__LINK_STATE_START, &dev->state);
858 	if (dev->open) {
859 		ret = dev->open(dev);
860 		if (ret)
861 			clear_bit(__LINK_STATE_START, &dev->state);
862 	}
863 
864 	/*
865 	 *	If it went open OK then:
866 	 */
867 
868 	if (!ret) {
869 		/*
870 		 *	Set the flags.
871 		 */
872 		dev->flags |= IFF_UP;
873 
874 		/*
875 		 *	Initialize multicasting status
876 		 */
877 		dev_mc_upload(dev);
878 
879 		/*
880 		 *	Wakeup transmit queue engine
881 		 */
882 		dev_activate(dev);
883 
884 		/*
885 		 *	... and announce new interface.
886 		 */
887 		raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
888 	}
889 	return ret;
890 }
891 
892 /**
893  *	dev_close - shutdown an interface.
894  *	@dev: device to shutdown
895  *
896  *	This function moves an active device into down state. A
897  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
898  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
899  *	chain.
900  */
901 int dev_close(struct net_device *dev)
902 {
903 	if (!(dev->flags & IFF_UP))
904 		return 0;
905 
906 	/*
907 	 *	Tell people we are going down, so that they can
908 	 *	prepare to death, when device is still operating.
909 	 */
910 	raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
911 
912 	dev_deactivate(dev);
913 
914 	clear_bit(__LINK_STATE_START, &dev->state);
915 
916 	/* Synchronize to scheduled poll. We cannot touch poll list,
917 	 * it can be even on different cpu. So just clear netif_running(),
918 	 * and wait when poll really will happen. Actually, the best place
919 	 * for this is inside dev->stop() after device stopped its irq
920 	 * engine, but this requires more changes in devices. */
921 
922 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
923 	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
924 		/* No hurry. */
925 		msleep(1);
926 	}
927 
928 	/*
929 	 *	Call the device specific close. This cannot fail.
930 	 *	Only if device is UP
931 	 *
932 	 *	We allow it to be called even after a DETACH hot-plug
933 	 *	event.
934 	 */
935 	if (dev->stop)
936 		dev->stop(dev);
937 
938 	/*
939 	 *	Device is now down.
940 	 */
941 
942 	dev->flags &= ~IFF_UP;
943 
944 	/*
945 	 * Tell people we are down
946 	 */
947 	raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
948 
949 	return 0;
950 }
951 
952 
953 /*
954  *	Device change register/unregister. These are not inline or static
955  *	as we export them to the world.
956  */
957 
958 /**
959  *	register_netdevice_notifier - register a network notifier block
960  *	@nb: notifier
961  *
962  *	Register a notifier to be called when network device events occur.
963  *	The notifier passed is linked into the kernel structures and must
964  *	not be reused until it has been unregistered. A negative errno code
965  *	is returned on a failure.
966  *
967  * 	When registered all registration and up events are replayed
968  *	to the new notifier to allow device to have a race free
969  *	view of the network device list.
970  */
971 
972 int register_netdevice_notifier(struct notifier_block *nb)
973 {
974 	struct net_device *dev;
975 	int err;
976 
977 	rtnl_lock();
978 	err = raw_notifier_chain_register(&netdev_chain, nb);
979 	if (!err) {
980 		for_each_netdev(dev) {
981 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
982 
983 			if (dev->flags & IFF_UP)
984 				nb->notifier_call(nb, NETDEV_UP, dev);
985 		}
986 	}
987 	rtnl_unlock();
988 	return err;
989 }
990 
991 /**
992  *	unregister_netdevice_notifier - unregister a network notifier block
993  *	@nb: notifier
994  *
995  *	Unregister a notifier previously registered by
996  *	register_netdevice_notifier(). The notifier is unlinked into the
997  *	kernel structures and may then be reused. A negative errno code
998  *	is returned on a failure.
999  */
1000 
1001 int unregister_netdevice_notifier(struct notifier_block *nb)
1002 {
1003 	int err;
1004 
1005 	rtnl_lock();
1006 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1007 	rtnl_unlock();
1008 	return err;
1009 }
1010 
1011 /**
1012  *	call_netdevice_notifiers - call all network notifier blocks
1013  *      @val: value passed unmodified to notifier function
1014  *      @v:   pointer passed unmodified to notifier function
1015  *
1016  *	Call all network notifier blocks.  Parameters and return value
1017  *	are as for raw_notifier_call_chain().
1018  */
1019 
1020 int call_netdevice_notifiers(unsigned long val, void *v)
1021 {
1022 	return raw_notifier_call_chain(&netdev_chain, val, v);
1023 }
1024 
1025 /* When > 0 there are consumers of rx skb time stamps */
1026 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1027 
1028 void net_enable_timestamp(void)
1029 {
1030 	atomic_inc(&netstamp_needed);
1031 }
1032 
1033 void net_disable_timestamp(void)
1034 {
1035 	atomic_dec(&netstamp_needed);
1036 }
1037 
1038 static inline void net_timestamp(struct sk_buff *skb)
1039 {
1040 	if (atomic_read(&netstamp_needed))
1041 		__net_timestamp(skb);
1042 	else
1043 		skb->tstamp.tv64 = 0;
1044 }
1045 
1046 /*
1047  *	Support routine. Sends outgoing frames to any network
1048  *	taps currently in use.
1049  */
1050 
1051 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1052 {
1053 	struct packet_type *ptype;
1054 
1055 	net_timestamp(skb);
1056 
1057 	rcu_read_lock();
1058 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1059 		/* Never send packets back to the socket
1060 		 * they originated from - MvS (miquels@drinkel.ow.org)
1061 		 */
1062 		if ((ptype->dev == dev || !ptype->dev) &&
1063 		    (ptype->af_packet_priv == NULL ||
1064 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1065 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1066 			if (!skb2)
1067 				break;
1068 
1069 			/* skb->nh should be correctly
1070 			   set by sender, so that the second statement is
1071 			   just protection against buggy protocols.
1072 			 */
1073 			skb_reset_mac_header(skb2);
1074 
1075 			if (skb_network_header(skb2) < skb2->data ||
1076 			    skb2->network_header > skb2->tail) {
1077 				if (net_ratelimit())
1078 					printk(KERN_CRIT "protocol %04x is "
1079 					       "buggy, dev %s\n",
1080 					       skb2->protocol, dev->name);
1081 				skb_reset_network_header(skb2);
1082 			}
1083 
1084 			skb2->transport_header = skb2->network_header;
1085 			skb2->pkt_type = PACKET_OUTGOING;
1086 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1087 		}
1088 	}
1089 	rcu_read_unlock();
1090 }
1091 
1092 
1093 void __netif_schedule(struct net_device *dev)
1094 {
1095 	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1096 		unsigned long flags;
1097 		struct softnet_data *sd;
1098 
1099 		local_irq_save(flags);
1100 		sd = &__get_cpu_var(softnet_data);
1101 		dev->next_sched = sd->output_queue;
1102 		sd->output_queue = dev;
1103 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1104 		local_irq_restore(flags);
1105 	}
1106 }
1107 EXPORT_SYMBOL(__netif_schedule);
1108 
1109 void __netif_rx_schedule(struct net_device *dev)
1110 {
1111 	unsigned long flags;
1112 
1113 	local_irq_save(flags);
1114 	dev_hold(dev);
1115 	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1116 	if (dev->quota < 0)
1117 		dev->quota += dev->weight;
1118 	else
1119 		dev->quota = dev->weight;
1120 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1121 	local_irq_restore(flags);
1122 }
1123 EXPORT_SYMBOL(__netif_rx_schedule);
1124 
1125 void dev_kfree_skb_any(struct sk_buff *skb)
1126 {
1127 	if (in_irq() || irqs_disabled())
1128 		dev_kfree_skb_irq(skb);
1129 	else
1130 		dev_kfree_skb(skb);
1131 }
1132 EXPORT_SYMBOL(dev_kfree_skb_any);
1133 
1134 
1135 /* Hot-plugging. */
1136 void netif_device_detach(struct net_device *dev)
1137 {
1138 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1139 	    netif_running(dev)) {
1140 		netif_stop_queue(dev);
1141 	}
1142 }
1143 EXPORT_SYMBOL(netif_device_detach);
1144 
1145 void netif_device_attach(struct net_device *dev)
1146 {
1147 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1148 	    netif_running(dev)) {
1149 		netif_wake_queue(dev);
1150 		__netdev_watchdog_up(dev);
1151 	}
1152 }
1153 EXPORT_SYMBOL(netif_device_attach);
1154 
1155 
1156 /*
1157  * Invalidate hardware checksum when packet is to be mangled, and
1158  * complete checksum manually on outgoing path.
1159  */
1160 int skb_checksum_help(struct sk_buff *skb)
1161 {
1162 	__wsum csum;
1163 	int ret = 0, offset;
1164 
1165 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1166 		goto out_set_summed;
1167 
1168 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1169 		/* Let GSO fix up the checksum. */
1170 		goto out_set_summed;
1171 	}
1172 
1173 	if (skb_cloned(skb)) {
1174 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1175 		if (ret)
1176 			goto out;
1177 	}
1178 
1179 	offset = skb->csum_start - skb_headroom(skb);
1180 	BUG_ON(offset > (int)skb->len);
1181 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
1182 
1183 	offset = skb_headlen(skb) - offset;
1184 	BUG_ON(offset <= 0);
1185 	BUG_ON(skb->csum_offset + 2 > offset);
1186 
1187 	*(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
1188 		csum_fold(csum);
1189 out_set_summed:
1190 	skb->ip_summed = CHECKSUM_NONE;
1191 out:
1192 	return ret;
1193 }
1194 
1195 /**
1196  *	skb_gso_segment - Perform segmentation on skb.
1197  *	@skb: buffer to segment
1198  *	@features: features for the output path (see dev->features)
1199  *
1200  *	This function segments the given skb and returns a list of segments.
1201  *
1202  *	It may return NULL if the skb requires no segmentation.  This is
1203  *	only possible when GSO is used for verifying header integrity.
1204  */
1205 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1206 {
1207 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1208 	struct packet_type *ptype;
1209 	__be16 type = skb->protocol;
1210 	int err;
1211 
1212 	BUG_ON(skb_shinfo(skb)->frag_list);
1213 
1214 	skb_reset_mac_header(skb);
1215 	skb->mac_len = skb->network_header - skb->mac_header;
1216 	__skb_pull(skb, skb->mac_len);
1217 
1218 	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1219 		if (skb_header_cloned(skb) &&
1220 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1221 			return ERR_PTR(err);
1222 	}
1223 
1224 	rcu_read_lock();
1225 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1226 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1227 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1228 				err = ptype->gso_send_check(skb);
1229 				segs = ERR_PTR(err);
1230 				if (err || skb_gso_ok(skb, features))
1231 					break;
1232 				__skb_push(skb, (skb->data -
1233 						 skb_network_header(skb)));
1234 			}
1235 			segs = ptype->gso_segment(skb, features);
1236 			break;
1237 		}
1238 	}
1239 	rcu_read_unlock();
1240 
1241 	__skb_push(skb, skb->data - skb_mac_header(skb));
1242 
1243 	return segs;
1244 }
1245 
1246 EXPORT_SYMBOL(skb_gso_segment);
1247 
1248 /* Take action when hardware reception checksum errors are detected. */
1249 #ifdef CONFIG_BUG
1250 void netdev_rx_csum_fault(struct net_device *dev)
1251 {
1252 	if (net_ratelimit()) {
1253 		printk(KERN_ERR "%s: hw csum failure.\n",
1254 			dev ? dev->name : "<unknown>");
1255 		dump_stack();
1256 	}
1257 }
1258 EXPORT_SYMBOL(netdev_rx_csum_fault);
1259 #endif
1260 
1261 /* Actually, we should eliminate this check as soon as we know, that:
1262  * 1. IOMMU is present and allows to map all the memory.
1263  * 2. No high memory really exists on this machine.
1264  */
1265 
1266 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1267 {
1268 #ifdef CONFIG_HIGHMEM
1269 	int i;
1270 
1271 	if (dev->features & NETIF_F_HIGHDMA)
1272 		return 0;
1273 
1274 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1275 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1276 			return 1;
1277 
1278 #endif
1279 	return 0;
1280 }
1281 
1282 struct dev_gso_cb {
1283 	void (*destructor)(struct sk_buff *skb);
1284 };
1285 
1286 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1287 
1288 static void dev_gso_skb_destructor(struct sk_buff *skb)
1289 {
1290 	struct dev_gso_cb *cb;
1291 
1292 	do {
1293 		struct sk_buff *nskb = skb->next;
1294 
1295 		skb->next = nskb->next;
1296 		nskb->next = NULL;
1297 		kfree_skb(nskb);
1298 	} while (skb->next);
1299 
1300 	cb = DEV_GSO_CB(skb);
1301 	if (cb->destructor)
1302 		cb->destructor(skb);
1303 }
1304 
1305 /**
1306  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1307  *	@skb: buffer to segment
1308  *
1309  *	This function segments the given skb and stores the list of segments
1310  *	in skb->next.
1311  */
1312 static int dev_gso_segment(struct sk_buff *skb)
1313 {
1314 	struct net_device *dev = skb->dev;
1315 	struct sk_buff *segs;
1316 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1317 					 NETIF_F_SG : 0);
1318 
1319 	segs = skb_gso_segment(skb, features);
1320 
1321 	/* Verifying header integrity only. */
1322 	if (!segs)
1323 		return 0;
1324 
1325 	if (unlikely(IS_ERR(segs)))
1326 		return PTR_ERR(segs);
1327 
1328 	skb->next = segs;
1329 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1330 	skb->destructor = dev_gso_skb_destructor;
1331 
1332 	return 0;
1333 }
1334 
1335 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1336 {
1337 	if (likely(!skb->next)) {
1338 		if (!list_empty(&ptype_all))
1339 			dev_queue_xmit_nit(skb, dev);
1340 
1341 		if (netif_needs_gso(dev, skb)) {
1342 			if (unlikely(dev_gso_segment(skb)))
1343 				goto out_kfree_skb;
1344 			if (skb->next)
1345 				goto gso;
1346 		}
1347 
1348 		return dev->hard_start_xmit(skb, dev);
1349 	}
1350 
1351 gso:
1352 	do {
1353 		struct sk_buff *nskb = skb->next;
1354 		int rc;
1355 
1356 		skb->next = nskb->next;
1357 		nskb->next = NULL;
1358 		rc = dev->hard_start_xmit(nskb, dev);
1359 		if (unlikely(rc)) {
1360 			nskb->next = skb->next;
1361 			skb->next = nskb;
1362 			return rc;
1363 		}
1364 		if (unlikely(netif_queue_stopped(dev) && skb->next))
1365 			return NETDEV_TX_BUSY;
1366 	} while (skb->next);
1367 
1368 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1369 
1370 out_kfree_skb:
1371 	kfree_skb(skb);
1372 	return 0;
1373 }
1374 
1375 #define HARD_TX_LOCK(dev, cpu) {			\
1376 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1377 		netif_tx_lock(dev);			\
1378 	}						\
1379 }
1380 
1381 #define HARD_TX_UNLOCK(dev) {				\
1382 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1383 		netif_tx_unlock(dev);			\
1384 	}						\
1385 }
1386 
1387 /**
1388  *	dev_queue_xmit - transmit a buffer
1389  *	@skb: buffer to transmit
1390  *
1391  *	Queue a buffer for transmission to a network device. The caller must
1392  *	have set the device and priority and built the buffer before calling
1393  *	this function. The function can be called from an interrupt.
1394  *
1395  *	A negative errno code is returned on a failure. A success does not
1396  *	guarantee the frame will be transmitted as it may be dropped due
1397  *	to congestion or traffic shaping.
1398  *
1399  * -----------------------------------------------------------------------------------
1400  *      I notice this method can also return errors from the queue disciplines,
1401  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1402  *      be positive.
1403  *
1404  *      Regardless of the return value, the skb is consumed, so it is currently
1405  *      difficult to retry a send to this method.  (You can bump the ref count
1406  *      before sending to hold a reference for retry if you are careful.)
1407  *
1408  *      When calling this method, interrupts MUST be enabled.  This is because
1409  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1410  *          --BLG
1411  */
1412 
1413 int dev_queue_xmit(struct sk_buff *skb)
1414 {
1415 	struct net_device *dev = skb->dev;
1416 	struct Qdisc *q;
1417 	int rc = -ENOMEM;
1418 
1419 	/* GSO will handle the following emulations directly. */
1420 	if (netif_needs_gso(dev, skb))
1421 		goto gso;
1422 
1423 	if (skb_shinfo(skb)->frag_list &&
1424 	    !(dev->features & NETIF_F_FRAGLIST) &&
1425 	    __skb_linearize(skb))
1426 		goto out_kfree_skb;
1427 
1428 	/* Fragmented skb is linearized if device does not support SG,
1429 	 * or if at least one of fragments is in highmem and device
1430 	 * does not support DMA from it.
1431 	 */
1432 	if (skb_shinfo(skb)->nr_frags &&
1433 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1434 	    __skb_linearize(skb))
1435 		goto out_kfree_skb;
1436 
1437 	/* If packet is not checksummed and device does not support
1438 	 * checksumming for this protocol, complete checksumming here.
1439 	 */
1440 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1441 		skb_set_transport_header(skb, skb->csum_start -
1442 					      skb_headroom(skb));
1443 
1444 		if (!(dev->features & NETIF_F_GEN_CSUM) &&
1445 		    (!(dev->features & NETIF_F_IP_CSUM) ||
1446 		     skb->protocol != htons(ETH_P_IP)))
1447 			if (skb_checksum_help(skb))
1448 				goto out_kfree_skb;
1449 	}
1450 
1451 gso:
1452 	spin_lock_prefetch(&dev->queue_lock);
1453 
1454 	/* Disable soft irqs for various locks below. Also
1455 	 * stops preemption for RCU.
1456 	 */
1457 	rcu_read_lock_bh();
1458 
1459 	/* Updates of qdisc are serialized by queue_lock.
1460 	 * The struct Qdisc which is pointed to by qdisc is now a
1461 	 * rcu structure - it may be accessed without acquiring
1462 	 * a lock (but the structure may be stale.) The freeing of the
1463 	 * qdisc will be deferred until it's known that there are no
1464 	 * more references to it.
1465 	 *
1466 	 * If the qdisc has an enqueue function, we still need to
1467 	 * hold the queue_lock before calling it, since queue_lock
1468 	 * also serializes access to the device queue.
1469 	 */
1470 
1471 	q = rcu_dereference(dev->qdisc);
1472 #ifdef CONFIG_NET_CLS_ACT
1473 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1474 #endif
1475 	if (q->enqueue) {
1476 		/* Grab device queue */
1477 		spin_lock(&dev->queue_lock);
1478 		q = dev->qdisc;
1479 		if (q->enqueue) {
1480 			rc = q->enqueue(skb, q);
1481 			qdisc_run(dev);
1482 			spin_unlock(&dev->queue_lock);
1483 
1484 			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1485 			goto out;
1486 		}
1487 		spin_unlock(&dev->queue_lock);
1488 	}
1489 
1490 	/* The device has no queue. Common case for software devices:
1491 	   loopback, all the sorts of tunnels...
1492 
1493 	   Really, it is unlikely that netif_tx_lock protection is necessary
1494 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1495 	   counters.)
1496 	   However, it is possible, that they rely on protection
1497 	   made by us here.
1498 
1499 	   Check this and shot the lock. It is not prone from deadlocks.
1500 	   Either shot noqueue qdisc, it is even simpler 8)
1501 	 */
1502 	if (dev->flags & IFF_UP) {
1503 		int cpu = smp_processor_id(); /* ok because BHs are off */
1504 
1505 		if (dev->xmit_lock_owner != cpu) {
1506 
1507 			HARD_TX_LOCK(dev, cpu);
1508 
1509 			if (!netif_queue_stopped(dev)) {
1510 				rc = 0;
1511 				if (!dev_hard_start_xmit(skb, dev)) {
1512 					HARD_TX_UNLOCK(dev);
1513 					goto out;
1514 				}
1515 			}
1516 			HARD_TX_UNLOCK(dev);
1517 			if (net_ratelimit())
1518 				printk(KERN_CRIT "Virtual device %s asks to "
1519 				       "queue packet!\n", dev->name);
1520 		} else {
1521 			/* Recursion is detected! It is possible,
1522 			 * unfortunately */
1523 			if (net_ratelimit())
1524 				printk(KERN_CRIT "Dead loop on virtual device "
1525 				       "%s, fix it urgently!\n", dev->name);
1526 		}
1527 	}
1528 
1529 	rc = -ENETDOWN;
1530 	rcu_read_unlock_bh();
1531 
1532 out_kfree_skb:
1533 	kfree_skb(skb);
1534 	return rc;
1535 out:
1536 	rcu_read_unlock_bh();
1537 	return rc;
1538 }
1539 
1540 
1541 /*=======================================================================
1542 			Receiver routines
1543   =======================================================================*/
1544 
1545 int netdev_max_backlog __read_mostly = 1000;
1546 int netdev_budget __read_mostly = 300;
1547 int weight_p __read_mostly = 64;            /* old backlog weight */
1548 
1549 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1550 
1551 
1552 /**
1553  *	netif_rx	-	post buffer to the network code
1554  *	@skb: buffer to post
1555  *
1556  *	This function receives a packet from a device driver and queues it for
1557  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1558  *	may be dropped during processing for congestion control or by the
1559  *	protocol layers.
1560  *
1561  *	return values:
1562  *	NET_RX_SUCCESS	(no congestion)
1563  *	NET_RX_CN_LOW   (low congestion)
1564  *	NET_RX_CN_MOD   (moderate congestion)
1565  *	NET_RX_CN_HIGH  (high congestion)
1566  *	NET_RX_DROP     (packet was dropped)
1567  *
1568  */
1569 
1570 int netif_rx(struct sk_buff *skb)
1571 {
1572 	struct softnet_data *queue;
1573 	unsigned long flags;
1574 
1575 	/* if netpoll wants it, pretend we never saw it */
1576 	if (netpoll_rx(skb))
1577 		return NET_RX_DROP;
1578 
1579 	if (!skb->tstamp.tv64)
1580 		net_timestamp(skb);
1581 
1582 	/*
1583 	 * The code is rearranged so that the path is the most
1584 	 * short when CPU is congested, but is still operating.
1585 	 */
1586 	local_irq_save(flags);
1587 	queue = &__get_cpu_var(softnet_data);
1588 
1589 	__get_cpu_var(netdev_rx_stat).total++;
1590 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1591 		if (queue->input_pkt_queue.qlen) {
1592 enqueue:
1593 			dev_hold(skb->dev);
1594 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1595 			local_irq_restore(flags);
1596 			return NET_RX_SUCCESS;
1597 		}
1598 
1599 		netif_rx_schedule(&queue->backlog_dev);
1600 		goto enqueue;
1601 	}
1602 
1603 	__get_cpu_var(netdev_rx_stat).dropped++;
1604 	local_irq_restore(flags);
1605 
1606 	kfree_skb(skb);
1607 	return NET_RX_DROP;
1608 }
1609 
1610 int netif_rx_ni(struct sk_buff *skb)
1611 {
1612 	int err;
1613 
1614 	preempt_disable();
1615 	err = netif_rx(skb);
1616 	if (local_softirq_pending())
1617 		do_softirq();
1618 	preempt_enable();
1619 
1620 	return err;
1621 }
1622 
1623 EXPORT_SYMBOL(netif_rx_ni);
1624 
1625 static inline struct net_device *skb_bond(struct sk_buff *skb)
1626 {
1627 	struct net_device *dev = skb->dev;
1628 
1629 	if (dev->master) {
1630 		if (skb_bond_should_drop(skb)) {
1631 			kfree_skb(skb);
1632 			return NULL;
1633 		}
1634 		skb->dev = dev->master;
1635 	}
1636 
1637 	return dev;
1638 }
1639 
1640 static void net_tx_action(struct softirq_action *h)
1641 {
1642 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1643 
1644 	if (sd->completion_queue) {
1645 		struct sk_buff *clist;
1646 
1647 		local_irq_disable();
1648 		clist = sd->completion_queue;
1649 		sd->completion_queue = NULL;
1650 		local_irq_enable();
1651 
1652 		while (clist) {
1653 			struct sk_buff *skb = clist;
1654 			clist = clist->next;
1655 
1656 			BUG_TRAP(!atomic_read(&skb->users));
1657 			__kfree_skb(skb);
1658 		}
1659 	}
1660 
1661 	if (sd->output_queue) {
1662 		struct net_device *head;
1663 
1664 		local_irq_disable();
1665 		head = sd->output_queue;
1666 		sd->output_queue = NULL;
1667 		local_irq_enable();
1668 
1669 		while (head) {
1670 			struct net_device *dev = head;
1671 			head = head->next_sched;
1672 
1673 			smp_mb__before_clear_bit();
1674 			clear_bit(__LINK_STATE_SCHED, &dev->state);
1675 
1676 			if (spin_trylock(&dev->queue_lock)) {
1677 				qdisc_run(dev);
1678 				spin_unlock(&dev->queue_lock);
1679 			} else {
1680 				netif_schedule(dev);
1681 			}
1682 		}
1683 	}
1684 }
1685 
1686 static inline int deliver_skb(struct sk_buff *skb,
1687 			      struct packet_type *pt_prev,
1688 			      struct net_device *orig_dev)
1689 {
1690 	atomic_inc(&skb->users);
1691 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1692 }
1693 
1694 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1695 /* These hooks defined here for ATM */
1696 struct net_bridge;
1697 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1698 						unsigned char *addr);
1699 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1700 
1701 /*
1702  * If bridge module is loaded call bridging hook.
1703  *  returns NULL if packet was consumed.
1704  */
1705 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1706 					struct sk_buff *skb) __read_mostly;
1707 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1708 					    struct packet_type **pt_prev, int *ret,
1709 					    struct net_device *orig_dev)
1710 {
1711 	struct net_bridge_port *port;
1712 
1713 	if (skb->pkt_type == PACKET_LOOPBACK ||
1714 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
1715 		return skb;
1716 
1717 	if (*pt_prev) {
1718 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
1719 		*pt_prev = NULL;
1720 	}
1721 
1722 	return br_handle_frame_hook(port, skb);
1723 }
1724 #else
1725 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
1726 #endif
1727 
1728 #ifdef CONFIG_NET_CLS_ACT
1729 /* TODO: Maybe we should just force sch_ingress to be compiled in
1730  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1731  * a compare and 2 stores extra right now if we dont have it on
1732  * but have CONFIG_NET_CLS_ACT
1733  * NOTE: This doesnt stop any functionality; if you dont have
1734  * the ingress scheduler, you just cant add policies on ingress.
1735  *
1736  */
1737 static int ing_filter(struct sk_buff *skb)
1738 {
1739 	struct Qdisc *q;
1740 	struct net_device *dev = skb->dev;
1741 	int result = TC_ACT_OK;
1742 
1743 	if (dev->qdisc_ingress) {
1744 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1745 		if (MAX_RED_LOOP < ttl++) {
1746 			printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
1747 				skb->iif, skb->dev->ifindex);
1748 			return TC_ACT_SHOT;
1749 		}
1750 
1751 		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1752 
1753 		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1754 
1755 		spin_lock(&dev->ingress_lock);
1756 		if ((q = dev->qdisc_ingress) != NULL)
1757 			result = q->enqueue(skb, q);
1758 		spin_unlock(&dev->ingress_lock);
1759 
1760 	}
1761 
1762 	return result;
1763 }
1764 #endif
1765 
1766 int netif_receive_skb(struct sk_buff *skb)
1767 {
1768 	struct packet_type *ptype, *pt_prev;
1769 	struct net_device *orig_dev;
1770 	int ret = NET_RX_DROP;
1771 	__be16 type;
1772 
1773 	/* if we've gotten here through NAPI, check netpoll */
1774 	if (skb->dev->poll && netpoll_rx(skb))
1775 		return NET_RX_DROP;
1776 
1777 	if (!skb->tstamp.tv64)
1778 		net_timestamp(skb);
1779 
1780 	if (!skb->iif)
1781 		skb->iif = skb->dev->ifindex;
1782 
1783 	orig_dev = skb_bond(skb);
1784 
1785 	if (!orig_dev)
1786 		return NET_RX_DROP;
1787 
1788 	__get_cpu_var(netdev_rx_stat).total++;
1789 
1790 	skb_reset_network_header(skb);
1791 	skb_reset_transport_header(skb);
1792 	skb->mac_len = skb->network_header - skb->mac_header;
1793 
1794 	pt_prev = NULL;
1795 
1796 	rcu_read_lock();
1797 
1798 #ifdef CONFIG_NET_CLS_ACT
1799 	if (skb->tc_verd & TC_NCLS) {
1800 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1801 		goto ncls;
1802 	}
1803 #endif
1804 
1805 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1806 		if (!ptype->dev || ptype->dev == skb->dev) {
1807 			if (pt_prev)
1808 				ret = deliver_skb(skb, pt_prev, orig_dev);
1809 			pt_prev = ptype;
1810 		}
1811 	}
1812 
1813 #ifdef CONFIG_NET_CLS_ACT
1814 	if (pt_prev) {
1815 		ret = deliver_skb(skb, pt_prev, orig_dev);
1816 		pt_prev = NULL; /* noone else should process this after*/
1817 	} else {
1818 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1819 	}
1820 
1821 	ret = ing_filter(skb);
1822 
1823 	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1824 		kfree_skb(skb);
1825 		goto out;
1826 	}
1827 
1828 	skb->tc_verd = 0;
1829 ncls:
1830 #endif
1831 
1832 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
1833 	if (!skb)
1834 		goto out;
1835 
1836 	type = skb->protocol;
1837 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1838 		if (ptype->type == type &&
1839 		    (!ptype->dev || ptype->dev == skb->dev)) {
1840 			if (pt_prev)
1841 				ret = deliver_skb(skb, pt_prev, orig_dev);
1842 			pt_prev = ptype;
1843 		}
1844 	}
1845 
1846 	if (pt_prev) {
1847 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1848 	} else {
1849 		kfree_skb(skb);
1850 		/* Jamal, now you will not able to escape explaining
1851 		 * me how you were going to use this. :-)
1852 		 */
1853 		ret = NET_RX_DROP;
1854 	}
1855 
1856 out:
1857 	rcu_read_unlock();
1858 	return ret;
1859 }
1860 
1861 static int process_backlog(struct net_device *backlog_dev, int *budget)
1862 {
1863 	int work = 0;
1864 	int quota = min(backlog_dev->quota, *budget);
1865 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1866 	unsigned long start_time = jiffies;
1867 
1868 	backlog_dev->weight = weight_p;
1869 	for (;;) {
1870 		struct sk_buff *skb;
1871 		struct net_device *dev;
1872 
1873 		local_irq_disable();
1874 		skb = __skb_dequeue(&queue->input_pkt_queue);
1875 		if (!skb)
1876 			goto job_done;
1877 		local_irq_enable();
1878 
1879 		dev = skb->dev;
1880 
1881 		netif_receive_skb(skb);
1882 
1883 		dev_put(dev);
1884 
1885 		work++;
1886 
1887 		if (work >= quota || jiffies - start_time > 1)
1888 			break;
1889 
1890 	}
1891 
1892 	backlog_dev->quota -= work;
1893 	*budget -= work;
1894 	return -1;
1895 
1896 job_done:
1897 	backlog_dev->quota -= work;
1898 	*budget -= work;
1899 
1900 	list_del(&backlog_dev->poll_list);
1901 	smp_mb__before_clear_bit();
1902 	netif_poll_enable(backlog_dev);
1903 
1904 	local_irq_enable();
1905 	return 0;
1906 }
1907 
1908 static void net_rx_action(struct softirq_action *h)
1909 {
1910 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1911 	unsigned long start_time = jiffies;
1912 	int budget = netdev_budget;
1913 	void *have;
1914 
1915 	local_irq_disable();
1916 
1917 	while (!list_empty(&queue->poll_list)) {
1918 		struct net_device *dev;
1919 
1920 		if (budget <= 0 || jiffies - start_time > 1)
1921 			goto softnet_break;
1922 
1923 		local_irq_enable();
1924 
1925 		dev = list_entry(queue->poll_list.next,
1926 				 struct net_device, poll_list);
1927 		have = netpoll_poll_lock(dev);
1928 
1929 		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1930 			netpoll_poll_unlock(have);
1931 			local_irq_disable();
1932 			list_move_tail(&dev->poll_list, &queue->poll_list);
1933 			if (dev->quota < 0)
1934 				dev->quota += dev->weight;
1935 			else
1936 				dev->quota = dev->weight;
1937 		} else {
1938 			netpoll_poll_unlock(have);
1939 			dev_put(dev);
1940 			local_irq_disable();
1941 		}
1942 	}
1943 out:
1944 #ifdef CONFIG_NET_DMA
1945 	/*
1946 	 * There may not be any more sk_buffs coming right now, so push
1947 	 * any pending DMA copies to hardware
1948 	 */
1949 	if (net_dma_client) {
1950 		struct dma_chan *chan;
1951 		rcu_read_lock();
1952 		list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
1953 			dma_async_memcpy_issue_pending(chan);
1954 		rcu_read_unlock();
1955 	}
1956 #endif
1957 	local_irq_enable();
1958 	return;
1959 
1960 softnet_break:
1961 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
1962 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1963 	goto out;
1964 }
1965 
1966 static gifconf_func_t * gifconf_list [NPROTO];
1967 
1968 /**
1969  *	register_gifconf	-	register a SIOCGIF handler
1970  *	@family: Address family
1971  *	@gifconf: Function handler
1972  *
1973  *	Register protocol dependent address dumping routines. The handler
1974  *	that is passed must not be freed or reused until it has been replaced
1975  *	by another handler.
1976  */
1977 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1978 {
1979 	if (family >= NPROTO)
1980 		return -EINVAL;
1981 	gifconf_list[family] = gifconf;
1982 	return 0;
1983 }
1984 
1985 
1986 /*
1987  *	Map an interface index to its name (SIOCGIFNAME)
1988  */
1989 
1990 /*
1991  *	We need this ioctl for efficient implementation of the
1992  *	if_indextoname() function required by the IPv6 API.  Without
1993  *	it, we would have to search all the interfaces to find a
1994  *	match.  --pb
1995  */
1996 
1997 static int dev_ifname(struct ifreq __user *arg)
1998 {
1999 	struct net_device *dev;
2000 	struct ifreq ifr;
2001 
2002 	/*
2003 	 *	Fetch the caller's info block.
2004 	 */
2005 
2006 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2007 		return -EFAULT;
2008 
2009 	read_lock(&dev_base_lock);
2010 	dev = __dev_get_by_index(ifr.ifr_ifindex);
2011 	if (!dev) {
2012 		read_unlock(&dev_base_lock);
2013 		return -ENODEV;
2014 	}
2015 
2016 	strcpy(ifr.ifr_name, dev->name);
2017 	read_unlock(&dev_base_lock);
2018 
2019 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2020 		return -EFAULT;
2021 	return 0;
2022 }
2023 
2024 /*
2025  *	Perform a SIOCGIFCONF call. This structure will change
2026  *	size eventually, and there is nothing I can do about it.
2027  *	Thus we will need a 'compatibility mode'.
2028  */
2029 
2030 static int dev_ifconf(char __user *arg)
2031 {
2032 	struct ifconf ifc;
2033 	struct net_device *dev;
2034 	char __user *pos;
2035 	int len;
2036 	int total;
2037 	int i;
2038 
2039 	/*
2040 	 *	Fetch the caller's info block.
2041 	 */
2042 
2043 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2044 		return -EFAULT;
2045 
2046 	pos = ifc.ifc_buf;
2047 	len = ifc.ifc_len;
2048 
2049 	/*
2050 	 *	Loop over the interfaces, and write an info block for each.
2051 	 */
2052 
2053 	total = 0;
2054 	for_each_netdev(dev) {
2055 		for (i = 0; i < NPROTO; i++) {
2056 			if (gifconf_list[i]) {
2057 				int done;
2058 				if (!pos)
2059 					done = gifconf_list[i](dev, NULL, 0);
2060 				else
2061 					done = gifconf_list[i](dev, pos + total,
2062 							       len - total);
2063 				if (done < 0)
2064 					return -EFAULT;
2065 				total += done;
2066 			}
2067 		}
2068 	}
2069 
2070 	/*
2071 	 *	All done.  Write the updated control block back to the caller.
2072 	 */
2073 	ifc.ifc_len = total;
2074 
2075 	/*
2076 	 * 	Both BSD and Solaris return 0 here, so we do too.
2077 	 */
2078 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2079 }
2080 
2081 #ifdef CONFIG_PROC_FS
2082 /*
2083  *	This is invoked by the /proc filesystem handler to display a device
2084  *	in detail.
2085  */
2086 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2087 {
2088 	loff_t off;
2089 	struct net_device *dev;
2090 
2091 	read_lock(&dev_base_lock);
2092 	if (!*pos)
2093 		return SEQ_START_TOKEN;
2094 
2095 	off = 1;
2096 	for_each_netdev(dev)
2097 		if (off++ == *pos)
2098 			return dev;
2099 
2100 	return NULL;
2101 }
2102 
2103 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2104 {
2105 	++*pos;
2106 	return v == SEQ_START_TOKEN ?
2107 		first_net_device() : next_net_device((struct net_device *)v);
2108 }
2109 
2110 void dev_seq_stop(struct seq_file *seq, void *v)
2111 {
2112 	read_unlock(&dev_base_lock);
2113 }
2114 
2115 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2116 {
2117 	struct net_device_stats *stats = dev->get_stats(dev);
2118 
2119 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2120 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2121 		   dev->name, stats->rx_bytes, stats->rx_packets,
2122 		   stats->rx_errors,
2123 		   stats->rx_dropped + stats->rx_missed_errors,
2124 		   stats->rx_fifo_errors,
2125 		   stats->rx_length_errors + stats->rx_over_errors +
2126 		    stats->rx_crc_errors + stats->rx_frame_errors,
2127 		   stats->rx_compressed, stats->multicast,
2128 		   stats->tx_bytes, stats->tx_packets,
2129 		   stats->tx_errors, stats->tx_dropped,
2130 		   stats->tx_fifo_errors, stats->collisions,
2131 		   stats->tx_carrier_errors +
2132 		    stats->tx_aborted_errors +
2133 		    stats->tx_window_errors +
2134 		    stats->tx_heartbeat_errors,
2135 		   stats->tx_compressed);
2136 }
2137 
2138 /*
2139  *	Called from the PROCfs module. This now uses the new arbitrary sized
2140  *	/proc/net interface to create /proc/net/dev
2141  */
2142 static int dev_seq_show(struct seq_file *seq, void *v)
2143 {
2144 	if (v == SEQ_START_TOKEN)
2145 		seq_puts(seq, "Inter-|   Receive                            "
2146 			      "                    |  Transmit\n"
2147 			      " face |bytes    packets errs drop fifo frame "
2148 			      "compressed multicast|bytes    packets errs "
2149 			      "drop fifo colls carrier compressed\n");
2150 	else
2151 		dev_seq_printf_stats(seq, v);
2152 	return 0;
2153 }
2154 
2155 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2156 {
2157 	struct netif_rx_stats *rc = NULL;
2158 
2159 	while (*pos < NR_CPUS)
2160 		if (cpu_online(*pos)) {
2161 			rc = &per_cpu(netdev_rx_stat, *pos);
2162 			break;
2163 		} else
2164 			++*pos;
2165 	return rc;
2166 }
2167 
2168 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2169 {
2170 	return softnet_get_online(pos);
2171 }
2172 
2173 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2174 {
2175 	++*pos;
2176 	return softnet_get_online(pos);
2177 }
2178 
2179 static void softnet_seq_stop(struct seq_file *seq, void *v)
2180 {
2181 }
2182 
2183 static int softnet_seq_show(struct seq_file *seq, void *v)
2184 {
2185 	struct netif_rx_stats *s = v;
2186 
2187 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2188 		   s->total, s->dropped, s->time_squeeze, 0,
2189 		   0, 0, 0, 0, /* was fastroute */
2190 		   s->cpu_collision );
2191 	return 0;
2192 }
2193 
2194 static const struct seq_operations dev_seq_ops = {
2195 	.start = dev_seq_start,
2196 	.next  = dev_seq_next,
2197 	.stop  = dev_seq_stop,
2198 	.show  = dev_seq_show,
2199 };
2200 
2201 static int dev_seq_open(struct inode *inode, struct file *file)
2202 {
2203 	return seq_open(file, &dev_seq_ops);
2204 }
2205 
2206 static const struct file_operations dev_seq_fops = {
2207 	.owner	 = THIS_MODULE,
2208 	.open    = dev_seq_open,
2209 	.read    = seq_read,
2210 	.llseek  = seq_lseek,
2211 	.release = seq_release,
2212 };
2213 
2214 static const struct seq_operations softnet_seq_ops = {
2215 	.start = softnet_seq_start,
2216 	.next  = softnet_seq_next,
2217 	.stop  = softnet_seq_stop,
2218 	.show  = softnet_seq_show,
2219 };
2220 
2221 static int softnet_seq_open(struct inode *inode, struct file *file)
2222 {
2223 	return seq_open(file, &softnet_seq_ops);
2224 }
2225 
2226 static const struct file_operations softnet_seq_fops = {
2227 	.owner	 = THIS_MODULE,
2228 	.open    = softnet_seq_open,
2229 	.read    = seq_read,
2230 	.llseek  = seq_lseek,
2231 	.release = seq_release,
2232 };
2233 
2234 static void *ptype_get_idx(loff_t pos)
2235 {
2236 	struct packet_type *pt = NULL;
2237 	loff_t i = 0;
2238 	int t;
2239 
2240 	list_for_each_entry_rcu(pt, &ptype_all, list) {
2241 		if (i == pos)
2242 			return pt;
2243 		++i;
2244 	}
2245 
2246 	for (t = 0; t < 16; t++) {
2247 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2248 			if (i == pos)
2249 				return pt;
2250 			++i;
2251 		}
2252 	}
2253 	return NULL;
2254 }
2255 
2256 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2257 {
2258 	rcu_read_lock();
2259 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2260 }
2261 
2262 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2263 {
2264 	struct packet_type *pt;
2265 	struct list_head *nxt;
2266 	int hash;
2267 
2268 	++*pos;
2269 	if (v == SEQ_START_TOKEN)
2270 		return ptype_get_idx(0);
2271 
2272 	pt = v;
2273 	nxt = pt->list.next;
2274 	if (pt->type == htons(ETH_P_ALL)) {
2275 		if (nxt != &ptype_all)
2276 			goto found;
2277 		hash = 0;
2278 		nxt = ptype_base[0].next;
2279 	} else
2280 		hash = ntohs(pt->type) & 15;
2281 
2282 	while (nxt == &ptype_base[hash]) {
2283 		if (++hash >= 16)
2284 			return NULL;
2285 		nxt = ptype_base[hash].next;
2286 	}
2287 found:
2288 	return list_entry(nxt, struct packet_type, list);
2289 }
2290 
2291 static void ptype_seq_stop(struct seq_file *seq, void *v)
2292 {
2293 	rcu_read_unlock();
2294 }
2295 
2296 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2297 {
2298 #ifdef CONFIG_KALLSYMS
2299 	unsigned long offset = 0, symsize;
2300 	const char *symname;
2301 	char *modname;
2302 	char namebuf[128];
2303 
2304 	symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2305 				  &modname, namebuf);
2306 
2307 	if (symname) {
2308 		char *delim = ":";
2309 
2310 		if (!modname)
2311 			modname = delim = "";
2312 		seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2313 			   symname, offset);
2314 		return;
2315 	}
2316 #endif
2317 
2318 	seq_printf(seq, "[%p]", sym);
2319 }
2320 
2321 static int ptype_seq_show(struct seq_file *seq, void *v)
2322 {
2323 	struct packet_type *pt = v;
2324 
2325 	if (v == SEQ_START_TOKEN)
2326 		seq_puts(seq, "Type Device      Function\n");
2327 	else {
2328 		if (pt->type == htons(ETH_P_ALL))
2329 			seq_puts(seq, "ALL ");
2330 		else
2331 			seq_printf(seq, "%04x", ntohs(pt->type));
2332 
2333 		seq_printf(seq, " %-8s ",
2334 			   pt->dev ? pt->dev->name : "");
2335 		ptype_seq_decode(seq,  pt->func);
2336 		seq_putc(seq, '\n');
2337 	}
2338 
2339 	return 0;
2340 }
2341 
2342 static const struct seq_operations ptype_seq_ops = {
2343 	.start = ptype_seq_start,
2344 	.next  = ptype_seq_next,
2345 	.stop  = ptype_seq_stop,
2346 	.show  = ptype_seq_show,
2347 };
2348 
2349 static int ptype_seq_open(struct inode *inode, struct file *file)
2350 {
2351 	return seq_open(file, &ptype_seq_ops);
2352 }
2353 
2354 static const struct file_operations ptype_seq_fops = {
2355 	.owner	 = THIS_MODULE,
2356 	.open    = ptype_seq_open,
2357 	.read    = seq_read,
2358 	.llseek  = seq_lseek,
2359 	.release = seq_release,
2360 };
2361 
2362 
2363 static int __init dev_proc_init(void)
2364 {
2365 	int rc = -ENOMEM;
2366 
2367 	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2368 		goto out;
2369 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2370 		goto out_dev;
2371 	if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
2372 		goto out_dev2;
2373 
2374 	if (wext_proc_init())
2375 		goto out_softnet;
2376 	rc = 0;
2377 out:
2378 	return rc;
2379 out_softnet:
2380 	proc_net_remove("ptype");
2381 out_dev2:
2382 	proc_net_remove("softnet_stat");
2383 out_dev:
2384 	proc_net_remove("dev");
2385 	goto out;
2386 }
2387 #else
2388 #define dev_proc_init() 0
2389 #endif	/* CONFIG_PROC_FS */
2390 
2391 
2392 /**
2393  *	netdev_set_master	-	set up master/slave pair
2394  *	@slave: slave device
2395  *	@master: new master device
2396  *
2397  *	Changes the master device of the slave. Pass %NULL to break the
2398  *	bonding. The caller must hold the RTNL semaphore. On a failure
2399  *	a negative errno code is returned. On success the reference counts
2400  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2401  *	function returns zero.
2402  */
2403 int netdev_set_master(struct net_device *slave, struct net_device *master)
2404 {
2405 	struct net_device *old = slave->master;
2406 
2407 	ASSERT_RTNL();
2408 
2409 	if (master) {
2410 		if (old)
2411 			return -EBUSY;
2412 		dev_hold(master);
2413 	}
2414 
2415 	slave->master = master;
2416 
2417 	synchronize_net();
2418 
2419 	if (old)
2420 		dev_put(old);
2421 
2422 	if (master)
2423 		slave->flags |= IFF_SLAVE;
2424 	else
2425 		slave->flags &= ~IFF_SLAVE;
2426 
2427 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2428 	return 0;
2429 }
2430 
2431 /**
2432  *	dev_set_promiscuity	- update promiscuity count on a device
2433  *	@dev: device
2434  *	@inc: modifier
2435  *
2436  *	Add or remove promiscuity from a device. While the count in the device
2437  *	remains above zero the interface remains promiscuous. Once it hits zero
2438  *	the device reverts back to normal filtering operation. A negative inc
2439  *	value is used to drop promiscuity on the device.
2440  */
2441 void dev_set_promiscuity(struct net_device *dev, int inc)
2442 {
2443 	unsigned short old_flags = dev->flags;
2444 
2445 	if ((dev->promiscuity += inc) == 0)
2446 		dev->flags &= ~IFF_PROMISC;
2447 	else
2448 		dev->flags |= IFF_PROMISC;
2449 	if (dev->flags != old_flags) {
2450 		dev_mc_upload(dev);
2451 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2452 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2453 							       "left");
2454 		audit_log(current->audit_context, GFP_ATOMIC,
2455 			AUDIT_ANOM_PROMISCUOUS,
2456 			"dev=%s prom=%d old_prom=%d auid=%u",
2457 			dev->name, (dev->flags & IFF_PROMISC),
2458 			(old_flags & IFF_PROMISC),
2459 			audit_get_loginuid(current->audit_context));
2460 	}
2461 }
2462 
2463 /**
2464  *	dev_set_allmulti	- update allmulti count on a device
2465  *	@dev: device
2466  *	@inc: modifier
2467  *
2468  *	Add or remove reception of all multicast frames to a device. While the
2469  *	count in the device remains above zero the interface remains listening
2470  *	to all interfaces. Once it hits zero the device reverts back to normal
2471  *	filtering operation. A negative @inc value is used to drop the counter
2472  *	when releasing a resource needing all multicasts.
2473  */
2474 
2475 void dev_set_allmulti(struct net_device *dev, int inc)
2476 {
2477 	unsigned short old_flags = dev->flags;
2478 
2479 	dev->flags |= IFF_ALLMULTI;
2480 	if ((dev->allmulti += inc) == 0)
2481 		dev->flags &= ~IFF_ALLMULTI;
2482 	if (dev->flags ^ old_flags)
2483 		dev_mc_upload(dev);
2484 }
2485 
2486 unsigned dev_get_flags(const struct net_device *dev)
2487 {
2488 	unsigned flags;
2489 
2490 	flags = (dev->flags & ~(IFF_PROMISC |
2491 				IFF_ALLMULTI |
2492 				IFF_RUNNING |
2493 				IFF_LOWER_UP |
2494 				IFF_DORMANT)) |
2495 		(dev->gflags & (IFF_PROMISC |
2496 				IFF_ALLMULTI));
2497 
2498 	if (netif_running(dev)) {
2499 		if (netif_oper_up(dev))
2500 			flags |= IFF_RUNNING;
2501 		if (netif_carrier_ok(dev))
2502 			flags |= IFF_LOWER_UP;
2503 		if (netif_dormant(dev))
2504 			flags |= IFF_DORMANT;
2505 	}
2506 
2507 	return flags;
2508 }
2509 
2510 int dev_change_flags(struct net_device *dev, unsigned flags)
2511 {
2512 	int ret;
2513 	int old_flags = dev->flags;
2514 
2515 	/*
2516 	 *	Set the flags on our device.
2517 	 */
2518 
2519 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2520 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2521 			       IFF_AUTOMEDIA)) |
2522 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2523 				    IFF_ALLMULTI));
2524 
2525 	/*
2526 	 *	Load in the correct multicast list now the flags have changed.
2527 	 */
2528 
2529 	dev_mc_upload(dev);
2530 
2531 	/*
2532 	 *	Have we downed the interface. We handle IFF_UP ourselves
2533 	 *	according to user attempts to set it, rather than blindly
2534 	 *	setting it.
2535 	 */
2536 
2537 	ret = 0;
2538 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
2539 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2540 
2541 		if (!ret)
2542 			dev_mc_upload(dev);
2543 	}
2544 
2545 	if (dev->flags & IFF_UP &&
2546 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2547 					  IFF_VOLATILE)))
2548 		raw_notifier_call_chain(&netdev_chain,
2549 				NETDEV_CHANGE, dev);
2550 
2551 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
2552 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
2553 		dev->gflags ^= IFF_PROMISC;
2554 		dev_set_promiscuity(dev, inc);
2555 	}
2556 
2557 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2558 	   is important. Some (broken) drivers set IFF_PROMISC, when
2559 	   IFF_ALLMULTI is requested not asking us and not reporting.
2560 	 */
2561 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2562 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2563 		dev->gflags ^= IFF_ALLMULTI;
2564 		dev_set_allmulti(dev, inc);
2565 	}
2566 
2567 	if (old_flags ^ dev->flags)
2568 		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2569 
2570 	return ret;
2571 }
2572 
2573 int dev_set_mtu(struct net_device *dev, int new_mtu)
2574 {
2575 	int err;
2576 
2577 	if (new_mtu == dev->mtu)
2578 		return 0;
2579 
2580 	/*	MTU must be positive.	 */
2581 	if (new_mtu < 0)
2582 		return -EINVAL;
2583 
2584 	if (!netif_device_present(dev))
2585 		return -ENODEV;
2586 
2587 	err = 0;
2588 	if (dev->change_mtu)
2589 		err = dev->change_mtu(dev, new_mtu);
2590 	else
2591 		dev->mtu = new_mtu;
2592 	if (!err && dev->flags & IFF_UP)
2593 		raw_notifier_call_chain(&netdev_chain,
2594 				NETDEV_CHANGEMTU, dev);
2595 	return err;
2596 }
2597 
2598 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2599 {
2600 	int err;
2601 
2602 	if (!dev->set_mac_address)
2603 		return -EOPNOTSUPP;
2604 	if (sa->sa_family != dev->type)
2605 		return -EINVAL;
2606 	if (!netif_device_present(dev))
2607 		return -ENODEV;
2608 	err = dev->set_mac_address(dev, sa);
2609 	if (!err)
2610 		raw_notifier_call_chain(&netdev_chain,
2611 				NETDEV_CHANGEADDR, dev);
2612 	return err;
2613 }
2614 
2615 /*
2616  *	Perform the SIOCxIFxxx calls.
2617  */
2618 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2619 {
2620 	int err;
2621 	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2622 
2623 	if (!dev)
2624 		return -ENODEV;
2625 
2626 	switch (cmd) {
2627 		case SIOCGIFFLAGS:	/* Get interface flags */
2628 			ifr->ifr_flags = dev_get_flags(dev);
2629 			return 0;
2630 
2631 		case SIOCSIFFLAGS:	/* Set interface flags */
2632 			return dev_change_flags(dev, ifr->ifr_flags);
2633 
2634 		case SIOCGIFMETRIC:	/* Get the metric on the interface
2635 					   (currently unused) */
2636 			ifr->ifr_metric = 0;
2637 			return 0;
2638 
2639 		case SIOCSIFMETRIC:	/* Set the metric on the interface
2640 					   (currently unused) */
2641 			return -EOPNOTSUPP;
2642 
2643 		case SIOCGIFMTU:	/* Get the MTU of a device */
2644 			ifr->ifr_mtu = dev->mtu;
2645 			return 0;
2646 
2647 		case SIOCSIFMTU:	/* Set the MTU of a device */
2648 			return dev_set_mtu(dev, ifr->ifr_mtu);
2649 
2650 		case SIOCGIFHWADDR:
2651 			if (!dev->addr_len)
2652 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2653 			else
2654 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2655 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2656 			ifr->ifr_hwaddr.sa_family = dev->type;
2657 			return 0;
2658 
2659 		case SIOCSIFHWADDR:
2660 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2661 
2662 		case SIOCSIFHWBROADCAST:
2663 			if (ifr->ifr_hwaddr.sa_family != dev->type)
2664 				return -EINVAL;
2665 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2666 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2667 			raw_notifier_call_chain(&netdev_chain,
2668 					    NETDEV_CHANGEADDR, dev);
2669 			return 0;
2670 
2671 		case SIOCGIFMAP:
2672 			ifr->ifr_map.mem_start = dev->mem_start;
2673 			ifr->ifr_map.mem_end   = dev->mem_end;
2674 			ifr->ifr_map.base_addr = dev->base_addr;
2675 			ifr->ifr_map.irq       = dev->irq;
2676 			ifr->ifr_map.dma       = dev->dma;
2677 			ifr->ifr_map.port      = dev->if_port;
2678 			return 0;
2679 
2680 		case SIOCSIFMAP:
2681 			if (dev->set_config) {
2682 				if (!netif_device_present(dev))
2683 					return -ENODEV;
2684 				return dev->set_config(dev, &ifr->ifr_map);
2685 			}
2686 			return -EOPNOTSUPP;
2687 
2688 		case SIOCADDMULTI:
2689 			if (!dev->set_multicast_list ||
2690 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2691 				return -EINVAL;
2692 			if (!netif_device_present(dev))
2693 				return -ENODEV;
2694 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2695 					  dev->addr_len, 1);
2696 
2697 		case SIOCDELMULTI:
2698 			if (!dev->set_multicast_list ||
2699 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2700 				return -EINVAL;
2701 			if (!netif_device_present(dev))
2702 				return -ENODEV;
2703 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2704 					     dev->addr_len, 1);
2705 
2706 		case SIOCGIFINDEX:
2707 			ifr->ifr_ifindex = dev->ifindex;
2708 			return 0;
2709 
2710 		case SIOCGIFTXQLEN:
2711 			ifr->ifr_qlen = dev->tx_queue_len;
2712 			return 0;
2713 
2714 		case SIOCSIFTXQLEN:
2715 			if (ifr->ifr_qlen < 0)
2716 				return -EINVAL;
2717 			dev->tx_queue_len = ifr->ifr_qlen;
2718 			return 0;
2719 
2720 		case SIOCSIFNAME:
2721 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2722 			return dev_change_name(dev, ifr->ifr_newname);
2723 
2724 		/*
2725 		 *	Unknown or private ioctl
2726 		 */
2727 
2728 		default:
2729 			if ((cmd >= SIOCDEVPRIVATE &&
2730 			    cmd <= SIOCDEVPRIVATE + 15) ||
2731 			    cmd == SIOCBONDENSLAVE ||
2732 			    cmd == SIOCBONDRELEASE ||
2733 			    cmd == SIOCBONDSETHWADDR ||
2734 			    cmd == SIOCBONDSLAVEINFOQUERY ||
2735 			    cmd == SIOCBONDINFOQUERY ||
2736 			    cmd == SIOCBONDCHANGEACTIVE ||
2737 			    cmd == SIOCGMIIPHY ||
2738 			    cmd == SIOCGMIIREG ||
2739 			    cmd == SIOCSMIIREG ||
2740 			    cmd == SIOCBRADDIF ||
2741 			    cmd == SIOCBRDELIF ||
2742 			    cmd == SIOCWANDEV) {
2743 				err = -EOPNOTSUPP;
2744 				if (dev->do_ioctl) {
2745 					if (netif_device_present(dev))
2746 						err = dev->do_ioctl(dev, ifr,
2747 								    cmd);
2748 					else
2749 						err = -ENODEV;
2750 				}
2751 			} else
2752 				err = -EINVAL;
2753 
2754 	}
2755 	return err;
2756 }
2757 
2758 /*
2759  *	This function handles all "interface"-type I/O control requests. The actual
2760  *	'doing' part of this is dev_ifsioc above.
2761  */
2762 
2763 /**
2764  *	dev_ioctl	-	network device ioctl
2765  *	@cmd: command to issue
2766  *	@arg: pointer to a struct ifreq in user space
2767  *
2768  *	Issue ioctl functions to devices. This is normally called by the
2769  *	user space syscall interfaces but can sometimes be useful for
2770  *	other purposes. The return value is the return from the syscall if
2771  *	positive or a negative errno code on error.
2772  */
2773 
2774 int dev_ioctl(unsigned int cmd, void __user *arg)
2775 {
2776 	struct ifreq ifr;
2777 	int ret;
2778 	char *colon;
2779 
2780 	/* One special case: SIOCGIFCONF takes ifconf argument
2781 	   and requires shared lock, because it sleeps writing
2782 	   to user space.
2783 	 */
2784 
2785 	if (cmd == SIOCGIFCONF) {
2786 		rtnl_lock();
2787 		ret = dev_ifconf((char __user *) arg);
2788 		rtnl_unlock();
2789 		return ret;
2790 	}
2791 	if (cmd == SIOCGIFNAME)
2792 		return dev_ifname((struct ifreq __user *)arg);
2793 
2794 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2795 		return -EFAULT;
2796 
2797 	ifr.ifr_name[IFNAMSIZ-1] = 0;
2798 
2799 	colon = strchr(ifr.ifr_name, ':');
2800 	if (colon)
2801 		*colon = 0;
2802 
2803 	/*
2804 	 *	See which interface the caller is talking about.
2805 	 */
2806 
2807 	switch (cmd) {
2808 		/*
2809 		 *	These ioctl calls:
2810 		 *	- can be done by all.
2811 		 *	- atomic and do not require locking.
2812 		 *	- return a value
2813 		 */
2814 		case SIOCGIFFLAGS:
2815 		case SIOCGIFMETRIC:
2816 		case SIOCGIFMTU:
2817 		case SIOCGIFHWADDR:
2818 		case SIOCGIFSLAVE:
2819 		case SIOCGIFMAP:
2820 		case SIOCGIFINDEX:
2821 		case SIOCGIFTXQLEN:
2822 			dev_load(ifr.ifr_name);
2823 			read_lock(&dev_base_lock);
2824 			ret = dev_ifsioc(&ifr, cmd);
2825 			read_unlock(&dev_base_lock);
2826 			if (!ret) {
2827 				if (colon)
2828 					*colon = ':';
2829 				if (copy_to_user(arg, &ifr,
2830 						 sizeof(struct ifreq)))
2831 					ret = -EFAULT;
2832 			}
2833 			return ret;
2834 
2835 		case SIOCETHTOOL:
2836 			dev_load(ifr.ifr_name);
2837 			rtnl_lock();
2838 			ret = dev_ethtool(&ifr);
2839 			rtnl_unlock();
2840 			if (!ret) {
2841 				if (colon)
2842 					*colon = ':';
2843 				if (copy_to_user(arg, &ifr,
2844 						 sizeof(struct ifreq)))
2845 					ret = -EFAULT;
2846 			}
2847 			return ret;
2848 
2849 		/*
2850 		 *	These ioctl calls:
2851 		 *	- require superuser power.
2852 		 *	- require strict serialization.
2853 		 *	- return a value
2854 		 */
2855 		case SIOCGMIIPHY:
2856 		case SIOCGMIIREG:
2857 		case SIOCSIFNAME:
2858 			if (!capable(CAP_NET_ADMIN))
2859 				return -EPERM;
2860 			dev_load(ifr.ifr_name);
2861 			rtnl_lock();
2862 			ret = dev_ifsioc(&ifr, cmd);
2863 			rtnl_unlock();
2864 			if (!ret) {
2865 				if (colon)
2866 					*colon = ':';
2867 				if (copy_to_user(arg, &ifr,
2868 						 sizeof(struct ifreq)))
2869 					ret = -EFAULT;
2870 			}
2871 			return ret;
2872 
2873 		/*
2874 		 *	These ioctl calls:
2875 		 *	- require superuser power.
2876 		 *	- require strict serialization.
2877 		 *	- do not return a value
2878 		 */
2879 		case SIOCSIFFLAGS:
2880 		case SIOCSIFMETRIC:
2881 		case SIOCSIFMTU:
2882 		case SIOCSIFMAP:
2883 		case SIOCSIFHWADDR:
2884 		case SIOCSIFSLAVE:
2885 		case SIOCADDMULTI:
2886 		case SIOCDELMULTI:
2887 		case SIOCSIFHWBROADCAST:
2888 		case SIOCSIFTXQLEN:
2889 		case SIOCSMIIREG:
2890 		case SIOCBONDENSLAVE:
2891 		case SIOCBONDRELEASE:
2892 		case SIOCBONDSETHWADDR:
2893 		case SIOCBONDCHANGEACTIVE:
2894 		case SIOCBRADDIF:
2895 		case SIOCBRDELIF:
2896 			if (!capable(CAP_NET_ADMIN))
2897 				return -EPERM;
2898 			/* fall through */
2899 		case SIOCBONDSLAVEINFOQUERY:
2900 		case SIOCBONDINFOQUERY:
2901 			dev_load(ifr.ifr_name);
2902 			rtnl_lock();
2903 			ret = dev_ifsioc(&ifr, cmd);
2904 			rtnl_unlock();
2905 			return ret;
2906 
2907 		case SIOCGIFMEM:
2908 			/* Get the per device memory space. We can add this but
2909 			 * currently do not support it */
2910 		case SIOCSIFMEM:
2911 			/* Set the per device memory buffer space.
2912 			 * Not applicable in our case */
2913 		case SIOCSIFLINK:
2914 			return -EINVAL;
2915 
2916 		/*
2917 		 *	Unknown or private ioctl.
2918 		 */
2919 		default:
2920 			if (cmd == SIOCWANDEV ||
2921 			    (cmd >= SIOCDEVPRIVATE &&
2922 			     cmd <= SIOCDEVPRIVATE + 15)) {
2923 				dev_load(ifr.ifr_name);
2924 				rtnl_lock();
2925 				ret = dev_ifsioc(&ifr, cmd);
2926 				rtnl_unlock();
2927 				if (!ret && copy_to_user(arg, &ifr,
2928 							 sizeof(struct ifreq)))
2929 					ret = -EFAULT;
2930 				return ret;
2931 			}
2932 			/* Take care of Wireless Extensions */
2933 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
2934 				return wext_handle_ioctl(&ifr, cmd, arg);
2935 			return -EINVAL;
2936 	}
2937 }
2938 
2939 
2940 /**
2941  *	dev_new_index	-	allocate an ifindex
2942  *
2943  *	Returns a suitable unique value for a new device interface
2944  *	number.  The caller must hold the rtnl semaphore or the
2945  *	dev_base_lock to be sure it remains unique.
2946  */
2947 static int dev_new_index(void)
2948 {
2949 	static int ifindex;
2950 	for (;;) {
2951 		if (++ifindex <= 0)
2952 			ifindex = 1;
2953 		if (!__dev_get_by_index(ifindex))
2954 			return ifindex;
2955 	}
2956 }
2957 
2958 static int dev_boot_phase = 1;
2959 
2960 /* Delayed registration/unregisteration */
2961 static DEFINE_SPINLOCK(net_todo_list_lock);
2962 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2963 
2964 static void net_set_todo(struct net_device *dev)
2965 {
2966 	spin_lock(&net_todo_list_lock);
2967 	list_add_tail(&dev->todo_list, &net_todo_list);
2968 	spin_unlock(&net_todo_list_lock);
2969 }
2970 
2971 /**
2972  *	register_netdevice	- register a network device
2973  *	@dev: device to register
2974  *
2975  *	Take a completed network device structure and add it to the kernel
2976  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2977  *	chain. 0 is returned on success. A negative errno code is returned
2978  *	on a failure to set up the device, or if the name is a duplicate.
2979  *
2980  *	Callers must hold the rtnl semaphore. You may want
2981  *	register_netdev() instead of this.
2982  *
2983  *	BUGS:
2984  *	The locking appears insufficient to guarantee two parallel registers
2985  *	will not get the same name.
2986  */
2987 
2988 int register_netdevice(struct net_device *dev)
2989 {
2990 	struct hlist_head *head;
2991 	struct hlist_node *p;
2992 	int ret;
2993 
2994 	BUG_ON(dev_boot_phase);
2995 	ASSERT_RTNL();
2996 
2997 	might_sleep();
2998 
2999 	/* When net_device's are persistent, this will be fatal. */
3000 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3001 
3002 	spin_lock_init(&dev->queue_lock);
3003 	spin_lock_init(&dev->_xmit_lock);
3004 	dev->xmit_lock_owner = -1;
3005 	spin_lock_init(&dev->ingress_lock);
3006 
3007 	dev->iflink = -1;
3008 
3009 	/* Init, if this function is available */
3010 	if (dev->init) {
3011 		ret = dev->init(dev);
3012 		if (ret) {
3013 			if (ret > 0)
3014 				ret = -EIO;
3015 			goto out;
3016 		}
3017 	}
3018 
3019 	if (!dev_valid_name(dev->name)) {
3020 		ret = -EINVAL;
3021 		goto out;
3022 	}
3023 
3024 	dev->ifindex = dev_new_index();
3025 	if (dev->iflink == -1)
3026 		dev->iflink = dev->ifindex;
3027 
3028 	/* Check for existence of name */
3029 	head = dev_name_hash(dev->name);
3030 	hlist_for_each(p, head) {
3031 		struct net_device *d
3032 			= hlist_entry(p, struct net_device, name_hlist);
3033 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3034 			ret = -EEXIST;
3035 			goto out;
3036 		}
3037 	}
3038 
3039 	/* Fix illegal SG+CSUM combinations. */
3040 	if ((dev->features & NETIF_F_SG) &&
3041 	    !(dev->features & NETIF_F_ALL_CSUM)) {
3042 		printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3043 		       dev->name);
3044 		dev->features &= ~NETIF_F_SG;
3045 	}
3046 
3047 	/* TSO requires that SG is present as well. */
3048 	if ((dev->features & NETIF_F_TSO) &&
3049 	    !(dev->features & NETIF_F_SG)) {
3050 		printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3051 		       dev->name);
3052 		dev->features &= ~NETIF_F_TSO;
3053 	}
3054 	if (dev->features & NETIF_F_UFO) {
3055 		if (!(dev->features & NETIF_F_HW_CSUM)) {
3056 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3057 					"NETIF_F_HW_CSUM feature.\n",
3058 							dev->name);
3059 			dev->features &= ~NETIF_F_UFO;
3060 		}
3061 		if (!(dev->features & NETIF_F_SG)) {
3062 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3063 					"NETIF_F_SG feature.\n",
3064 					dev->name);
3065 			dev->features &= ~NETIF_F_UFO;
3066 		}
3067 	}
3068 
3069 	/*
3070 	 *	nil rebuild_header routine,
3071 	 *	that should be never called and used as just bug trap.
3072 	 */
3073 
3074 	if (!dev->rebuild_header)
3075 		dev->rebuild_header = default_rebuild_header;
3076 
3077 	ret = netdev_register_sysfs(dev);
3078 	if (ret)
3079 		goto out;
3080 	dev->reg_state = NETREG_REGISTERED;
3081 
3082 	/*
3083 	 *	Default initial state at registry is that the
3084 	 *	device is present.
3085 	 */
3086 
3087 	set_bit(__LINK_STATE_PRESENT, &dev->state);
3088 
3089 	dev_init_scheduler(dev);
3090 	write_lock_bh(&dev_base_lock);
3091 	list_add_tail(&dev->dev_list, &dev_base_head);
3092 	hlist_add_head(&dev->name_hlist, head);
3093 	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
3094 	dev_hold(dev);
3095 	write_unlock_bh(&dev_base_lock);
3096 
3097 	/* Notify protocols, that a new device appeared. */
3098 	raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
3099 
3100 	ret = 0;
3101 
3102 out:
3103 	return ret;
3104 }
3105 
3106 /**
3107  *	register_netdev	- register a network device
3108  *	@dev: device to register
3109  *
3110  *	Take a completed network device structure and add it to the kernel
3111  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3112  *	chain. 0 is returned on success. A negative errno code is returned
3113  *	on a failure to set up the device, or if the name is a duplicate.
3114  *
3115  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
3116  *	and expands the device name if you passed a format string to
3117  *	alloc_netdev.
3118  */
3119 int register_netdev(struct net_device *dev)
3120 {
3121 	int err;
3122 
3123 	rtnl_lock();
3124 
3125 	/*
3126 	 * If the name is a format string the caller wants us to do a
3127 	 * name allocation.
3128 	 */
3129 	if (strchr(dev->name, '%')) {
3130 		err = dev_alloc_name(dev, dev->name);
3131 		if (err < 0)
3132 			goto out;
3133 	}
3134 
3135 	err = register_netdevice(dev);
3136 out:
3137 	rtnl_unlock();
3138 	return err;
3139 }
3140 EXPORT_SYMBOL(register_netdev);
3141 
3142 /*
3143  * netdev_wait_allrefs - wait until all references are gone.
3144  *
3145  * This is called when unregistering network devices.
3146  *
3147  * Any protocol or device that holds a reference should register
3148  * for netdevice notification, and cleanup and put back the
3149  * reference if they receive an UNREGISTER event.
3150  * We can get stuck here if buggy protocols don't correctly
3151  * call dev_put.
3152  */
3153 static void netdev_wait_allrefs(struct net_device *dev)
3154 {
3155 	unsigned long rebroadcast_time, warning_time;
3156 
3157 	rebroadcast_time = warning_time = jiffies;
3158 	while (atomic_read(&dev->refcnt) != 0) {
3159 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3160 			rtnl_lock();
3161 
3162 			/* Rebroadcast unregister notification */
3163 			raw_notifier_call_chain(&netdev_chain,
3164 					    NETDEV_UNREGISTER, dev);
3165 
3166 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3167 				     &dev->state)) {
3168 				/* We must not have linkwatch events
3169 				 * pending on unregister. If this
3170 				 * happens, we simply run the queue
3171 				 * unscheduled, resulting in a noop
3172 				 * for this device.
3173 				 */
3174 				linkwatch_run_queue();
3175 			}
3176 
3177 			__rtnl_unlock();
3178 
3179 			rebroadcast_time = jiffies;
3180 		}
3181 
3182 		msleep(250);
3183 
3184 		if (time_after(jiffies, warning_time + 10 * HZ)) {
3185 			printk(KERN_EMERG "unregister_netdevice: "
3186 			       "waiting for %s to become free. Usage "
3187 			       "count = %d\n",
3188 			       dev->name, atomic_read(&dev->refcnt));
3189 			warning_time = jiffies;
3190 		}
3191 	}
3192 }
3193 
3194 /* The sequence is:
3195  *
3196  *	rtnl_lock();
3197  *	...
3198  *	register_netdevice(x1);
3199  *	register_netdevice(x2);
3200  *	...
3201  *	unregister_netdevice(y1);
3202  *	unregister_netdevice(y2);
3203  *      ...
3204  *	rtnl_unlock();
3205  *	free_netdev(y1);
3206  *	free_netdev(y2);
3207  *
3208  * We are invoked by rtnl_unlock() after it drops the semaphore.
3209  * This allows us to deal with problems:
3210  * 1) We can delete sysfs objects which invoke hotplug
3211  *    without deadlocking with linkwatch via keventd.
3212  * 2) Since we run with the RTNL semaphore not held, we can sleep
3213  *    safely in order to wait for the netdev refcnt to drop to zero.
3214  */
3215 static DEFINE_MUTEX(net_todo_run_mutex);
3216 void netdev_run_todo(void)
3217 {
3218 	struct list_head list;
3219 
3220 	/* Need to guard against multiple cpu's getting out of order. */
3221 	mutex_lock(&net_todo_run_mutex);
3222 
3223 	/* Not safe to do outside the semaphore.  We must not return
3224 	 * until all unregister events invoked by the local processor
3225 	 * have been completed (either by this todo run, or one on
3226 	 * another cpu).
3227 	 */
3228 	if (list_empty(&net_todo_list))
3229 		goto out;
3230 
3231 	/* Snapshot list, allow later requests */
3232 	spin_lock(&net_todo_list_lock);
3233 	list_replace_init(&net_todo_list, &list);
3234 	spin_unlock(&net_todo_list_lock);
3235 
3236 	while (!list_empty(&list)) {
3237 		struct net_device *dev
3238 			= list_entry(list.next, struct net_device, todo_list);
3239 		list_del(&dev->todo_list);
3240 
3241 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3242 			printk(KERN_ERR "network todo '%s' but state %d\n",
3243 			       dev->name, dev->reg_state);
3244 			dump_stack();
3245 			continue;
3246 		}
3247 
3248 		netdev_unregister_sysfs(dev);
3249 		dev->reg_state = NETREG_UNREGISTERED;
3250 
3251 		netdev_wait_allrefs(dev);
3252 
3253 		/* paranoia */
3254 		BUG_ON(atomic_read(&dev->refcnt));
3255 		BUG_TRAP(!dev->ip_ptr);
3256 		BUG_TRAP(!dev->ip6_ptr);
3257 		BUG_TRAP(!dev->dn_ptr);
3258 
3259 		/* It must be the very last action,
3260 		 * after this 'dev' may point to freed up memory.
3261 		 */
3262 		if (dev->destructor)
3263 			dev->destructor(dev);
3264 	}
3265 
3266 out:
3267 	mutex_unlock(&net_todo_run_mutex);
3268 }
3269 
3270 static struct net_device_stats *internal_stats(struct net_device *dev)
3271 {
3272 	return &dev->stats;
3273 }
3274 
3275 /**
3276  *	alloc_netdev - allocate network device
3277  *	@sizeof_priv:	size of private data to allocate space for
3278  *	@name:		device name format string
3279  *	@setup:		callback to initialize device
3280  *
3281  *	Allocates a struct net_device with private data area for driver use
3282  *	and performs basic initialization.
3283  */
3284 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3285 		void (*setup)(struct net_device *))
3286 {
3287 	void *p;
3288 	struct net_device *dev;
3289 	int alloc_size;
3290 
3291 	BUG_ON(strlen(name) >= sizeof(dev->name));
3292 
3293 	/* ensure 32-byte alignment of both the device and private area */
3294 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3295 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3296 
3297 	p = kzalloc(alloc_size, GFP_KERNEL);
3298 	if (!p) {
3299 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3300 		return NULL;
3301 	}
3302 
3303 	dev = (struct net_device *)
3304 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3305 	dev->padded = (char *)dev - (char *)p;
3306 
3307 	if (sizeof_priv)
3308 		dev->priv = netdev_priv(dev);
3309 
3310 	dev->get_stats = internal_stats;
3311 	setup(dev);
3312 	strcpy(dev->name, name);
3313 	return dev;
3314 }
3315 EXPORT_SYMBOL(alloc_netdev);
3316 
3317 /**
3318  *	free_netdev - free network device
3319  *	@dev: device
3320  *
3321  *	This function does the last stage of destroying an allocated device
3322  * 	interface. The reference to the device object is released.
3323  *	If this is the last reference then it will be freed.
3324  */
3325 void free_netdev(struct net_device *dev)
3326 {
3327 #ifdef CONFIG_SYSFS
3328 	/*  Compatibility with error handling in drivers */
3329 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3330 		kfree((char *)dev - dev->padded);
3331 		return;
3332 	}
3333 
3334 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3335 	dev->reg_state = NETREG_RELEASED;
3336 
3337 	/* will free via device release */
3338 	put_device(&dev->dev);
3339 #else
3340 	kfree((char *)dev - dev->padded);
3341 #endif
3342 }
3343 
3344 /* Synchronize with packet receive processing. */
3345 void synchronize_net(void)
3346 {
3347 	might_sleep();
3348 	synchronize_rcu();
3349 }
3350 
3351 /**
3352  *	unregister_netdevice - remove device from the kernel
3353  *	@dev: device
3354  *
3355  *	This function shuts down a device interface and removes it
3356  *	from the kernel tables. On success 0 is returned, on a failure
3357  *	a negative errno code is returned.
3358  *
3359  *	Callers must hold the rtnl semaphore.  You may want
3360  *	unregister_netdev() instead of this.
3361  */
3362 
3363 void unregister_netdevice(struct net_device *dev)
3364 {
3365 	BUG_ON(dev_boot_phase);
3366 	ASSERT_RTNL();
3367 
3368 	/* Some devices call without registering for initialization unwind. */
3369 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3370 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3371 				  "was registered\n", dev->name, dev);
3372 
3373 		WARN_ON(1);
3374 		return;
3375 	}
3376 
3377 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3378 
3379 	/* If device is running, close it first. */
3380 	if (dev->flags & IFF_UP)
3381 		dev_close(dev);
3382 
3383 	/* And unlink it from device chain. */
3384 	write_lock_bh(&dev_base_lock);
3385 	list_del(&dev->dev_list);
3386 	hlist_del(&dev->name_hlist);
3387 	hlist_del(&dev->index_hlist);
3388 	write_unlock_bh(&dev_base_lock);
3389 
3390 	dev->reg_state = NETREG_UNREGISTERING;
3391 
3392 	synchronize_net();
3393 
3394 	/* Shutdown queueing discipline. */
3395 	dev_shutdown(dev);
3396 
3397 
3398 	/* Notify protocols, that we are about to destroy
3399 	   this device. They should clean all the things.
3400 	*/
3401 	raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3402 
3403 	/*
3404 	 *	Flush the multicast chain
3405 	 */
3406 	dev_mc_discard(dev);
3407 
3408 	if (dev->uninit)
3409 		dev->uninit(dev);
3410 
3411 	/* Notifier chain MUST detach us from master device. */
3412 	BUG_TRAP(!dev->master);
3413 
3414 	/* Finish processing unregister after unlock */
3415 	net_set_todo(dev);
3416 
3417 	synchronize_net();
3418 
3419 	dev_put(dev);
3420 }
3421 
3422 /**
3423  *	unregister_netdev - remove device from the kernel
3424  *	@dev: device
3425  *
3426  *	This function shuts down a device interface and removes it
3427  *	from the kernel tables. On success 0 is returned, on a failure
3428  *	a negative errno code is returned.
3429  *
3430  *	This is just a wrapper for unregister_netdevice that takes
3431  *	the rtnl semaphore.  In general you want to use this and not
3432  *	unregister_netdevice.
3433  */
3434 void unregister_netdev(struct net_device *dev)
3435 {
3436 	rtnl_lock();
3437 	unregister_netdevice(dev);
3438 	rtnl_unlock();
3439 }
3440 
3441 EXPORT_SYMBOL(unregister_netdev);
3442 
3443 static int dev_cpu_callback(struct notifier_block *nfb,
3444 			    unsigned long action,
3445 			    void *ocpu)
3446 {
3447 	struct sk_buff **list_skb;
3448 	struct net_device **list_net;
3449 	struct sk_buff *skb;
3450 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
3451 	struct softnet_data *sd, *oldsd;
3452 
3453 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
3454 		return NOTIFY_OK;
3455 
3456 	local_irq_disable();
3457 	cpu = smp_processor_id();
3458 	sd = &per_cpu(softnet_data, cpu);
3459 	oldsd = &per_cpu(softnet_data, oldcpu);
3460 
3461 	/* Find end of our completion_queue. */
3462 	list_skb = &sd->completion_queue;
3463 	while (*list_skb)
3464 		list_skb = &(*list_skb)->next;
3465 	/* Append completion queue from offline CPU. */
3466 	*list_skb = oldsd->completion_queue;
3467 	oldsd->completion_queue = NULL;
3468 
3469 	/* Find end of our output_queue. */
3470 	list_net = &sd->output_queue;
3471 	while (*list_net)
3472 		list_net = &(*list_net)->next_sched;
3473 	/* Append output queue from offline CPU. */
3474 	*list_net = oldsd->output_queue;
3475 	oldsd->output_queue = NULL;
3476 
3477 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
3478 	local_irq_enable();
3479 
3480 	/* Process offline CPU's input_pkt_queue */
3481 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3482 		netif_rx(skb);
3483 
3484 	return NOTIFY_OK;
3485 }
3486 
3487 #ifdef CONFIG_NET_DMA
3488 /**
3489  * net_dma_rebalance -
3490  * This is called when the number of channels allocated to the net_dma_client
3491  * changes.  The net_dma_client tries to have one DMA channel per CPU.
3492  */
3493 static void net_dma_rebalance(void)
3494 {
3495 	unsigned int cpu, i, n;
3496 	struct dma_chan *chan;
3497 
3498 	if (net_dma_count == 0) {
3499 		for_each_online_cpu(cpu)
3500 			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3501 		return;
3502 	}
3503 
3504 	i = 0;
3505 	cpu = first_cpu(cpu_online_map);
3506 
3507 	rcu_read_lock();
3508 	list_for_each_entry(chan, &net_dma_client->channels, client_node) {
3509 		n = ((num_online_cpus() / net_dma_count)
3510 		   + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
3511 
3512 		while(n) {
3513 			per_cpu(softnet_data, cpu).net_dma = chan;
3514 			cpu = next_cpu(cpu, cpu_online_map);
3515 			n--;
3516 		}
3517 		i++;
3518 	}
3519 	rcu_read_unlock();
3520 }
3521 
3522 /**
3523  * netdev_dma_event - event callback for the net_dma_client
3524  * @client: should always be net_dma_client
3525  * @chan: DMA channel for the event
3526  * @event: event type
3527  */
3528 static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3529 	enum dma_event event)
3530 {
3531 	spin_lock(&net_dma_event_lock);
3532 	switch (event) {
3533 	case DMA_RESOURCE_ADDED:
3534 		net_dma_count++;
3535 		net_dma_rebalance();
3536 		break;
3537 	case DMA_RESOURCE_REMOVED:
3538 		net_dma_count--;
3539 		net_dma_rebalance();
3540 		break;
3541 	default:
3542 		break;
3543 	}
3544 	spin_unlock(&net_dma_event_lock);
3545 }
3546 
3547 /**
3548  * netdev_dma_regiser - register the networking subsystem as a DMA client
3549  */
3550 static int __init netdev_dma_register(void)
3551 {
3552 	spin_lock_init(&net_dma_event_lock);
3553 	net_dma_client = dma_async_client_register(netdev_dma_event);
3554 	if (net_dma_client == NULL)
3555 		return -ENOMEM;
3556 
3557 	dma_async_client_chan_request(net_dma_client, num_online_cpus());
3558 	return 0;
3559 }
3560 
3561 #else
3562 static int __init netdev_dma_register(void) { return -ENODEV; }
3563 #endif /* CONFIG_NET_DMA */
3564 
3565 /*
3566  *	Initialize the DEV module. At boot time this walks the device list and
3567  *	unhooks any devices that fail to initialise (normally hardware not
3568  *	present) and leaves us with a valid list of present and active devices.
3569  *
3570  */
3571 
3572 /*
3573  *       This is called single threaded during boot, so no need
3574  *       to take the rtnl semaphore.
3575  */
3576 static int __init net_dev_init(void)
3577 {
3578 	int i, rc = -ENOMEM;
3579 
3580 	BUG_ON(!dev_boot_phase);
3581 
3582 	if (dev_proc_init())
3583 		goto out;
3584 
3585 	if (netdev_sysfs_init())
3586 		goto out;
3587 
3588 	INIT_LIST_HEAD(&ptype_all);
3589 	for (i = 0; i < 16; i++)
3590 		INIT_LIST_HEAD(&ptype_base[i]);
3591 
3592 	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3593 		INIT_HLIST_HEAD(&dev_name_head[i]);
3594 
3595 	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3596 		INIT_HLIST_HEAD(&dev_index_head[i]);
3597 
3598 	/*
3599 	 *	Initialise the packet receive queues.
3600 	 */
3601 
3602 	for_each_possible_cpu(i) {
3603 		struct softnet_data *queue;
3604 
3605 		queue = &per_cpu(softnet_data, i);
3606 		skb_queue_head_init(&queue->input_pkt_queue);
3607 		queue->completion_queue = NULL;
3608 		INIT_LIST_HEAD(&queue->poll_list);
3609 		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3610 		queue->backlog_dev.weight = weight_p;
3611 		queue->backlog_dev.poll = process_backlog;
3612 		atomic_set(&queue->backlog_dev.refcnt, 1);
3613 	}
3614 
3615 	netdev_dma_register();
3616 
3617 	dev_boot_phase = 0;
3618 
3619 	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3620 	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3621 
3622 	hotcpu_notifier(dev_cpu_callback, 0);
3623 	dst_init();
3624 	dev_mcast_init();
3625 	rc = 0;
3626 out:
3627 	return rc;
3628 }
3629 
3630 subsys_initcall(net_dev_init);
3631 
3632 EXPORT_SYMBOL(__dev_get_by_index);
3633 EXPORT_SYMBOL(__dev_get_by_name);
3634 EXPORT_SYMBOL(__dev_remove_pack);
3635 EXPORT_SYMBOL(dev_valid_name);
3636 EXPORT_SYMBOL(dev_add_pack);
3637 EXPORT_SYMBOL(dev_alloc_name);
3638 EXPORT_SYMBOL(dev_close);
3639 EXPORT_SYMBOL(dev_get_by_flags);
3640 EXPORT_SYMBOL(dev_get_by_index);
3641 EXPORT_SYMBOL(dev_get_by_name);
3642 EXPORT_SYMBOL(dev_open);
3643 EXPORT_SYMBOL(dev_queue_xmit);
3644 EXPORT_SYMBOL(dev_remove_pack);
3645 EXPORT_SYMBOL(dev_set_allmulti);
3646 EXPORT_SYMBOL(dev_set_promiscuity);
3647 EXPORT_SYMBOL(dev_change_flags);
3648 EXPORT_SYMBOL(dev_set_mtu);
3649 EXPORT_SYMBOL(dev_set_mac_address);
3650 EXPORT_SYMBOL(free_netdev);
3651 EXPORT_SYMBOL(netdev_boot_setup_check);
3652 EXPORT_SYMBOL(netdev_set_master);
3653 EXPORT_SYMBOL(netdev_state_change);
3654 EXPORT_SYMBOL(netif_receive_skb);
3655 EXPORT_SYMBOL(netif_rx);
3656 EXPORT_SYMBOL(register_gifconf);
3657 EXPORT_SYMBOL(register_netdevice);
3658 EXPORT_SYMBOL(register_netdevice_notifier);
3659 EXPORT_SYMBOL(skb_checksum_help);
3660 EXPORT_SYMBOL(synchronize_net);
3661 EXPORT_SYMBOL(unregister_netdevice);
3662 EXPORT_SYMBOL(unregister_netdevice_notifier);
3663 EXPORT_SYMBOL(net_enable_timestamp);
3664 EXPORT_SYMBOL(net_disable_timestamp);
3665 EXPORT_SYMBOL(dev_get_flags);
3666 
3667 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3668 EXPORT_SYMBOL(br_handle_frame_hook);
3669 EXPORT_SYMBOL(br_fdb_get_hook);
3670 EXPORT_SYMBOL(br_fdb_put_hook);
3671 #endif
3672 
3673 #ifdef CONFIG_KMOD
3674 EXPORT_SYMBOL(dev_load);
3675 #endif
3676 
3677 EXPORT_PER_CPU_SYMBOL(softnet_data);
3678