xref: /openbmc/linux/net/core/dev.c (revision d5cb9783536a41df9f9cba5b0a1d78047ed787f7)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/config.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/string.h>
84 #include <linux/mm.h>
85 #include <linux/socket.h>
86 #include <linux/sockios.h>
87 #include <linux/errno.h>
88 #include <linux/interrupt.h>
89 #include <linux/if_ether.h>
90 #include <linux/netdevice.h>
91 #include <linux/etherdevice.h>
92 #include <linux/notifier.h>
93 #include <linux/skbuff.h>
94 #include <net/sock.h>
95 #include <linux/rtnetlink.h>
96 #include <linux/proc_fs.h>
97 #include <linux/seq_file.h>
98 #include <linux/stat.h>
99 #include <linux/if_bridge.h>
100 #include <linux/divert.h>
101 #include <net/dst.h>
102 #include <net/pkt_sched.h>
103 #include <net/checksum.h>
104 #include <linux/highmem.h>
105 #include <linux/init.h>
106 #include <linux/kmod.h>
107 #include <linux/module.h>
108 #include <linux/kallsyms.h>
109 #include <linux/netpoll.h>
110 #include <linux/rcupdate.h>
111 #include <linux/delay.h>
112 #ifdef CONFIG_NET_RADIO
113 #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
114 #include <net/iw_handler.h>
115 #endif	/* CONFIG_NET_RADIO */
116 #include <asm/current.h>
117 
118 /*
119  *	The list of packet types we will receive (as opposed to discard)
120  *	and the routines to invoke.
121  *
122  *	Why 16. Because with 16 the only overlap we get on a hash of the
123  *	low nibble of the protocol value is RARP/SNAP/X.25.
124  *
125  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
126  *             sure which should go first, but I bet it won't make much
127  *             difference if we are running VLANs.  The good news is that
128  *             this protocol won't be in the list unless compiled in, so
129  *             the average user (w/out VLANs) will not be adversly affected.
130  *             --BLG
131  *
132  *		0800	IP
133  *		8100    802.1Q VLAN
134  *		0001	802.3
135  *		0002	AX.25
136  *		0004	802.2
137  *		8035	RARP
138  *		0005	SNAP
139  *		0805	X.25
140  *		0806	ARP
141  *		8137	IPX
142  *		0009	Localtalk
143  *		86DD	IPv6
144  */
145 
146 static DEFINE_SPINLOCK(ptype_lock);
147 static struct list_head ptype_base[16];	/* 16 way hashed list */
148 static struct list_head ptype_all;		/* Taps */
149 
150 /*
151  * The @dev_base list is protected by @dev_base_lock and the rtln
152  * semaphore.
153  *
154  * Pure readers hold dev_base_lock for reading.
155  *
156  * Writers must hold the rtnl semaphore while they loop through the
157  * dev_base list, and hold dev_base_lock for writing when they do the
158  * actual updates.  This allows pure readers to access the list even
159  * while a writer is preparing to update it.
160  *
161  * To put it another way, dev_base_lock is held for writing only to
162  * protect against pure readers; the rtnl semaphore provides the
163  * protection against other writers.
164  *
165  * See, for example usages, register_netdevice() and
166  * unregister_netdevice(), which must be called with the rtnl
167  * semaphore held.
168  */
169 struct net_device *dev_base;
170 static struct net_device **dev_tail = &dev_base;
171 DEFINE_RWLOCK(dev_base_lock);
172 
173 EXPORT_SYMBOL(dev_base);
174 EXPORT_SYMBOL(dev_base_lock);
175 
176 #define NETDEV_HASHBITS	8
177 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
178 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
179 
180 static inline struct hlist_head *dev_name_hash(const char *name)
181 {
182 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
183 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
184 }
185 
186 static inline struct hlist_head *dev_index_hash(int ifindex)
187 {
188 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
189 }
190 
191 /*
192  *	Our notifier list
193  */
194 
195 static struct notifier_block *netdev_chain;
196 
197 /*
198  *	Device drivers call our routines to queue packets here. We empty the
199  *	queue in the local softnet handler.
200  */
201 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
202 
203 #ifdef CONFIG_SYSFS
204 extern int netdev_sysfs_init(void);
205 extern int netdev_register_sysfs(struct net_device *);
206 extern void netdev_unregister_sysfs(struct net_device *);
207 #else
208 #define netdev_sysfs_init()	 	(0)
209 #define netdev_register_sysfs(dev)	(0)
210 #define	netdev_unregister_sysfs(dev)	do { } while(0)
211 #endif
212 
213 
214 /*******************************************************************************
215 
216 		Protocol management and registration routines
217 
218 *******************************************************************************/
219 
220 /*
221  *	For efficiency
222  */
223 
224 int netdev_nit;
225 
226 /*
227  *	Add a protocol ID to the list. Now that the input handler is
228  *	smarter we can dispense with all the messy stuff that used to be
229  *	here.
230  *
231  *	BEWARE!!! Protocol handlers, mangling input packets,
232  *	MUST BE last in hash buckets and checking protocol handlers
233  *	MUST start from promiscuous ptype_all chain in net_bh.
234  *	It is true now, do not change it.
235  *	Explanation follows: if protocol handler, mangling packet, will
236  *	be the first on list, it is not able to sense, that packet
237  *	is cloned and should be copied-on-write, so that it will
238  *	change it and subsequent readers will get broken packet.
239  *							--ANK (980803)
240  */
241 
242 /**
243  *	dev_add_pack - add packet handler
244  *	@pt: packet type declaration
245  *
246  *	Add a protocol handler to the networking stack. The passed &packet_type
247  *	is linked into kernel lists and may not be freed until it has been
248  *	removed from the kernel lists.
249  *
250  *	This call does not sleep therefore it can not
251  *	guarantee all CPU's that are in middle of receiving packets
252  *	will see the new packet type (until the next received packet).
253  */
254 
255 void dev_add_pack(struct packet_type *pt)
256 {
257 	int hash;
258 
259 	spin_lock_bh(&ptype_lock);
260 	if (pt->type == htons(ETH_P_ALL)) {
261 		netdev_nit++;
262 		list_add_rcu(&pt->list, &ptype_all);
263 	} else {
264 		hash = ntohs(pt->type) & 15;
265 		list_add_rcu(&pt->list, &ptype_base[hash]);
266 	}
267 	spin_unlock_bh(&ptype_lock);
268 }
269 
270 /**
271  *	__dev_remove_pack	 - remove packet handler
272  *	@pt: packet type declaration
273  *
274  *	Remove a protocol handler that was previously added to the kernel
275  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
276  *	from the kernel lists and can be freed or reused once this function
277  *	returns.
278  *
279  *      The packet type might still be in use by receivers
280  *	and must not be freed until after all the CPU's have gone
281  *	through a quiescent state.
282  */
283 void __dev_remove_pack(struct packet_type *pt)
284 {
285 	struct list_head *head;
286 	struct packet_type *pt1;
287 
288 	spin_lock_bh(&ptype_lock);
289 
290 	if (pt->type == htons(ETH_P_ALL)) {
291 		netdev_nit--;
292 		head = &ptype_all;
293 	} else
294 		head = &ptype_base[ntohs(pt->type) & 15];
295 
296 	list_for_each_entry(pt1, head, list) {
297 		if (pt == pt1) {
298 			list_del_rcu(&pt->list);
299 			goto out;
300 		}
301 	}
302 
303 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
304 out:
305 	spin_unlock_bh(&ptype_lock);
306 }
307 /**
308  *	dev_remove_pack	 - remove packet handler
309  *	@pt: packet type declaration
310  *
311  *	Remove a protocol handler that was previously added to the kernel
312  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
313  *	from the kernel lists and can be freed or reused once this function
314  *	returns.
315  *
316  *	This call sleeps to guarantee that no CPU is looking at the packet
317  *	type after return.
318  */
319 void dev_remove_pack(struct packet_type *pt)
320 {
321 	__dev_remove_pack(pt);
322 
323 	synchronize_net();
324 }
325 
326 /******************************************************************************
327 
328 		      Device Boot-time Settings Routines
329 
330 *******************************************************************************/
331 
332 /* Boot time configuration table */
333 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
334 
335 /**
336  *	netdev_boot_setup_add	- add new setup entry
337  *	@name: name of the device
338  *	@map: configured settings for the device
339  *
340  *	Adds new setup entry to the dev_boot_setup list.  The function
341  *	returns 0 on error and 1 on success.  This is a generic routine to
342  *	all netdevices.
343  */
344 static int netdev_boot_setup_add(char *name, struct ifmap *map)
345 {
346 	struct netdev_boot_setup *s;
347 	int i;
348 
349 	s = dev_boot_setup;
350 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
351 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
352 			memset(s[i].name, 0, sizeof(s[i].name));
353 			strcpy(s[i].name, name);
354 			memcpy(&s[i].map, map, sizeof(s[i].map));
355 			break;
356 		}
357 	}
358 
359 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
360 }
361 
362 /**
363  *	netdev_boot_setup_check	- check boot time settings
364  *	@dev: the netdevice
365  *
366  * 	Check boot time settings for the device.
367  *	The found settings are set for the device to be used
368  *	later in the device probing.
369  *	Returns 0 if no settings found, 1 if they are.
370  */
371 int netdev_boot_setup_check(struct net_device *dev)
372 {
373 	struct netdev_boot_setup *s = dev_boot_setup;
374 	int i;
375 
376 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
377 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
378 		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
379 			dev->irq 	= s[i].map.irq;
380 			dev->base_addr 	= s[i].map.base_addr;
381 			dev->mem_start 	= s[i].map.mem_start;
382 			dev->mem_end 	= s[i].map.mem_end;
383 			return 1;
384 		}
385 	}
386 	return 0;
387 }
388 
389 
390 /**
391  *	netdev_boot_base	- get address from boot time settings
392  *	@prefix: prefix for network device
393  *	@unit: id for network device
394  *
395  * 	Check boot time settings for the base address of device.
396  *	The found settings are set for the device to be used
397  *	later in the device probing.
398  *	Returns 0 if no settings found.
399  */
400 unsigned long netdev_boot_base(const char *prefix, int unit)
401 {
402 	const struct netdev_boot_setup *s = dev_boot_setup;
403 	char name[IFNAMSIZ];
404 	int i;
405 
406 	sprintf(name, "%s%d", prefix, unit);
407 
408 	/*
409 	 * If device already registered then return base of 1
410 	 * to indicate not to probe for this interface
411 	 */
412 	if (__dev_get_by_name(name))
413 		return 1;
414 
415 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
416 		if (!strcmp(name, s[i].name))
417 			return s[i].map.base_addr;
418 	return 0;
419 }
420 
421 /*
422  * Saves at boot time configured settings for any netdevice.
423  */
424 int __init netdev_boot_setup(char *str)
425 {
426 	int ints[5];
427 	struct ifmap map;
428 
429 	str = get_options(str, ARRAY_SIZE(ints), ints);
430 	if (!str || !*str)
431 		return 0;
432 
433 	/* Save settings */
434 	memset(&map, 0, sizeof(map));
435 	if (ints[0] > 0)
436 		map.irq = ints[1];
437 	if (ints[0] > 1)
438 		map.base_addr = ints[2];
439 	if (ints[0] > 2)
440 		map.mem_start = ints[3];
441 	if (ints[0] > 3)
442 		map.mem_end = ints[4];
443 
444 	/* Add new entry to the list */
445 	return netdev_boot_setup_add(str, &map);
446 }
447 
448 __setup("netdev=", netdev_boot_setup);
449 
450 /*******************************************************************************
451 
452 			    Device Interface Subroutines
453 
454 *******************************************************************************/
455 
456 /**
457  *	__dev_get_by_name	- find a device by its name
458  *	@name: name to find
459  *
460  *	Find an interface by name. Must be called under RTNL semaphore
461  *	or @dev_base_lock. If the name is found a pointer to the device
462  *	is returned. If the name is not found then %NULL is returned. The
463  *	reference counters are not incremented so the caller must be
464  *	careful with locks.
465  */
466 
467 struct net_device *__dev_get_by_name(const char *name)
468 {
469 	struct hlist_node *p;
470 
471 	hlist_for_each(p, dev_name_hash(name)) {
472 		struct net_device *dev
473 			= hlist_entry(p, struct net_device, name_hlist);
474 		if (!strncmp(dev->name, name, IFNAMSIZ))
475 			return dev;
476 	}
477 	return NULL;
478 }
479 
480 /**
481  *	dev_get_by_name		- find a device by its name
482  *	@name: name to find
483  *
484  *	Find an interface by name. This can be called from any
485  *	context and does its own locking. The returned handle has
486  *	the usage count incremented and the caller must use dev_put() to
487  *	release it when it is no longer needed. %NULL is returned if no
488  *	matching device is found.
489  */
490 
491 struct net_device *dev_get_by_name(const char *name)
492 {
493 	struct net_device *dev;
494 
495 	read_lock(&dev_base_lock);
496 	dev = __dev_get_by_name(name);
497 	if (dev)
498 		dev_hold(dev);
499 	read_unlock(&dev_base_lock);
500 	return dev;
501 }
502 
503 /**
504  *	__dev_get_by_index - find a device by its ifindex
505  *	@ifindex: index of device
506  *
507  *	Search for an interface by index. Returns %NULL if the device
508  *	is not found or a pointer to the device. The device has not
509  *	had its reference counter increased so the caller must be careful
510  *	about locking. The caller must hold either the RTNL semaphore
511  *	or @dev_base_lock.
512  */
513 
514 struct net_device *__dev_get_by_index(int ifindex)
515 {
516 	struct hlist_node *p;
517 
518 	hlist_for_each(p, dev_index_hash(ifindex)) {
519 		struct net_device *dev
520 			= hlist_entry(p, struct net_device, index_hlist);
521 		if (dev->ifindex == ifindex)
522 			return dev;
523 	}
524 	return NULL;
525 }
526 
527 
528 /**
529  *	dev_get_by_index - find a device by its ifindex
530  *	@ifindex: index of device
531  *
532  *	Search for an interface by index. Returns NULL if the device
533  *	is not found or a pointer to the device. The device returned has
534  *	had a reference added and the pointer is safe until the user calls
535  *	dev_put to indicate they have finished with it.
536  */
537 
538 struct net_device *dev_get_by_index(int ifindex)
539 {
540 	struct net_device *dev;
541 
542 	read_lock(&dev_base_lock);
543 	dev = __dev_get_by_index(ifindex);
544 	if (dev)
545 		dev_hold(dev);
546 	read_unlock(&dev_base_lock);
547 	return dev;
548 }
549 
550 /**
551  *	dev_getbyhwaddr - find a device by its hardware address
552  *	@type: media type of device
553  *	@ha: hardware address
554  *
555  *	Search for an interface by MAC address. Returns NULL if the device
556  *	is not found or a pointer to the device. The caller must hold the
557  *	rtnl semaphore. The returned device has not had its ref count increased
558  *	and the caller must therefore be careful about locking
559  *
560  *	BUGS:
561  *	If the API was consistent this would be __dev_get_by_hwaddr
562  */
563 
564 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
565 {
566 	struct net_device *dev;
567 
568 	ASSERT_RTNL();
569 
570 	for (dev = dev_base; dev; dev = dev->next)
571 		if (dev->type == type &&
572 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
573 			break;
574 	return dev;
575 }
576 
577 EXPORT_SYMBOL(dev_getbyhwaddr);
578 
579 struct net_device *dev_getfirstbyhwtype(unsigned short type)
580 {
581 	struct net_device *dev;
582 
583 	rtnl_lock();
584 	for (dev = dev_base; dev; dev = dev->next) {
585 		if (dev->type == type) {
586 			dev_hold(dev);
587 			break;
588 		}
589 	}
590 	rtnl_unlock();
591 	return dev;
592 }
593 
594 EXPORT_SYMBOL(dev_getfirstbyhwtype);
595 
596 /**
597  *	dev_get_by_flags - find any device with given flags
598  *	@if_flags: IFF_* values
599  *	@mask: bitmask of bits in if_flags to check
600  *
601  *	Search for any interface with the given flags. Returns NULL if a device
602  *	is not found or a pointer to the device. The device returned has
603  *	had a reference added and the pointer is safe until the user calls
604  *	dev_put to indicate they have finished with it.
605  */
606 
607 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
608 {
609 	struct net_device *dev;
610 
611 	read_lock(&dev_base_lock);
612 	for (dev = dev_base; dev != NULL; dev = dev->next) {
613 		if (((dev->flags ^ if_flags) & mask) == 0) {
614 			dev_hold(dev);
615 			break;
616 		}
617 	}
618 	read_unlock(&dev_base_lock);
619 	return dev;
620 }
621 
622 /**
623  *	dev_valid_name - check if name is okay for network device
624  *	@name: name string
625  *
626  *	Network device names need to be valid file names to
627  *	to allow sysfs to work
628  */
629 static int dev_valid_name(const char *name)
630 {
631 	return !(*name == '\0'
632 		 || !strcmp(name, ".")
633 		 || !strcmp(name, "..")
634 		 || strchr(name, '/'));
635 }
636 
637 /**
638  *	dev_alloc_name - allocate a name for a device
639  *	@dev: device
640  *	@name: name format string
641  *
642  *	Passed a format string - eg "lt%d" it will try and find a suitable
643  *	id. Not efficient for many devices, not called a lot. The caller
644  *	must hold the dev_base or rtnl lock while allocating the name and
645  *	adding the device in order to avoid duplicates. Returns the number
646  *	of the unit assigned or a negative errno code.
647  */
648 
649 int dev_alloc_name(struct net_device *dev, const char *name)
650 {
651 	int i = 0;
652 	char buf[IFNAMSIZ];
653 	const char *p;
654 	const int max_netdevices = 8*PAGE_SIZE;
655 	long *inuse;
656 	struct net_device *d;
657 
658 	p = strnchr(name, IFNAMSIZ-1, '%');
659 	if (p) {
660 		/*
661 		 * Verify the string as this thing may have come from
662 		 * the user.  There must be either one "%d" and no other "%"
663 		 * characters.
664 		 */
665 		if (p[1] != 'd' || strchr(p + 2, '%'))
666 			return -EINVAL;
667 
668 		/* Use one page as a bit array of possible slots */
669 		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
670 		if (!inuse)
671 			return -ENOMEM;
672 
673 		for (d = dev_base; d; d = d->next) {
674 			if (!sscanf(d->name, name, &i))
675 				continue;
676 			if (i < 0 || i >= max_netdevices)
677 				continue;
678 
679 			/*  avoid cases where sscanf is not exact inverse of printf */
680 			snprintf(buf, sizeof(buf), name, i);
681 			if (!strncmp(buf, d->name, IFNAMSIZ))
682 				set_bit(i, inuse);
683 		}
684 
685 		i = find_first_zero_bit(inuse, max_netdevices);
686 		free_page((unsigned long) inuse);
687 	}
688 
689 	snprintf(buf, sizeof(buf), name, i);
690 	if (!__dev_get_by_name(buf)) {
691 		strlcpy(dev->name, buf, IFNAMSIZ);
692 		return i;
693 	}
694 
695 	/* It is possible to run out of possible slots
696 	 * when the name is long and there isn't enough space left
697 	 * for the digits, or if all bits are used.
698 	 */
699 	return -ENFILE;
700 }
701 
702 
703 /**
704  *	dev_change_name - change name of a device
705  *	@dev: device
706  *	@newname: name (or format string) must be at least IFNAMSIZ
707  *
708  *	Change name of a device, can pass format strings "eth%d".
709  *	for wildcarding.
710  */
711 int dev_change_name(struct net_device *dev, char *newname)
712 {
713 	int err = 0;
714 
715 	ASSERT_RTNL();
716 
717 	if (dev->flags & IFF_UP)
718 		return -EBUSY;
719 
720 	if (!dev_valid_name(newname))
721 		return -EINVAL;
722 
723 	if (strchr(newname, '%')) {
724 		err = dev_alloc_name(dev, newname);
725 		if (err < 0)
726 			return err;
727 		strcpy(newname, dev->name);
728 	}
729 	else if (__dev_get_by_name(newname))
730 		return -EEXIST;
731 	else
732 		strlcpy(dev->name, newname, IFNAMSIZ);
733 
734 	err = class_device_rename(&dev->class_dev, dev->name);
735 	if (!err) {
736 		hlist_del(&dev->name_hlist);
737 		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
738 		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
739 	}
740 
741 	return err;
742 }
743 
744 /**
745  *	netdev_features_change - device changes fatures
746  *	@dev: device to cause notification
747  *
748  *	Called to indicate a device has changed features.
749  */
750 void netdev_features_change(struct net_device *dev)
751 {
752 	notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
753 }
754 EXPORT_SYMBOL(netdev_features_change);
755 
756 /**
757  *	netdev_state_change - device changes state
758  *	@dev: device to cause notification
759  *
760  *	Called to indicate a device has changed state. This function calls
761  *	the notifier chains for netdev_chain and sends a NEWLINK message
762  *	to the routing socket.
763  */
764 void netdev_state_change(struct net_device *dev)
765 {
766 	if (dev->flags & IFF_UP) {
767 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
768 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
769 	}
770 }
771 
772 /**
773  *	dev_load 	- load a network module
774  *	@name: name of interface
775  *
776  *	If a network interface is not present and the process has suitable
777  *	privileges this function loads the module. If module loading is not
778  *	available in this kernel then it becomes a nop.
779  */
780 
781 void dev_load(const char *name)
782 {
783 	struct net_device *dev;
784 
785 	read_lock(&dev_base_lock);
786 	dev = __dev_get_by_name(name);
787 	read_unlock(&dev_base_lock);
788 
789 	if (!dev && capable(CAP_SYS_MODULE))
790 		request_module("%s", name);
791 }
792 
793 static int default_rebuild_header(struct sk_buff *skb)
794 {
795 	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
796 	       skb->dev ? skb->dev->name : "NULL!!!");
797 	kfree_skb(skb);
798 	return 1;
799 }
800 
801 
802 /**
803  *	dev_open	- prepare an interface for use.
804  *	@dev:	device to open
805  *
806  *	Takes a device from down to up state. The device's private open
807  *	function is invoked and then the multicast lists are loaded. Finally
808  *	the device is moved into the up state and a %NETDEV_UP message is
809  *	sent to the netdev notifier chain.
810  *
811  *	Calling this function on an active interface is a nop. On a failure
812  *	a negative errno code is returned.
813  */
814 int dev_open(struct net_device *dev)
815 {
816 	int ret = 0;
817 
818 	/*
819 	 *	Is it already up?
820 	 */
821 
822 	if (dev->flags & IFF_UP)
823 		return 0;
824 
825 	/*
826 	 *	Is it even present?
827 	 */
828 	if (!netif_device_present(dev))
829 		return -ENODEV;
830 
831 	/*
832 	 *	Call device private open method
833 	 */
834 	set_bit(__LINK_STATE_START, &dev->state);
835 	if (dev->open) {
836 		ret = dev->open(dev);
837 		if (ret)
838 			clear_bit(__LINK_STATE_START, &dev->state);
839 	}
840 
841  	/*
842 	 *	If it went open OK then:
843 	 */
844 
845 	if (!ret) {
846 		/*
847 		 *	Set the flags.
848 		 */
849 		dev->flags |= IFF_UP;
850 
851 		/*
852 		 *	Initialize multicasting status
853 		 */
854 		dev_mc_upload(dev);
855 
856 		/*
857 		 *	Wakeup transmit queue engine
858 		 */
859 		dev_activate(dev);
860 
861 		/*
862 		 *	... and announce new interface.
863 		 */
864 		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
865 	}
866 	return ret;
867 }
868 
869 /**
870  *	dev_close - shutdown an interface.
871  *	@dev: device to shutdown
872  *
873  *	This function moves an active device into down state. A
874  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
875  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
876  *	chain.
877  */
878 int dev_close(struct net_device *dev)
879 {
880 	if (!(dev->flags & IFF_UP))
881 		return 0;
882 
883 	/*
884 	 *	Tell people we are going down, so that they can
885 	 *	prepare to death, when device is still operating.
886 	 */
887 	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
888 
889 	dev_deactivate(dev);
890 
891 	clear_bit(__LINK_STATE_START, &dev->state);
892 
893 	/* Synchronize to scheduled poll. We cannot touch poll list,
894 	 * it can be even on different cpu. So just clear netif_running(),
895 	 * and wait when poll really will happen. Actually, the best place
896 	 * for this is inside dev->stop() after device stopped its irq
897 	 * engine, but this requires more changes in devices. */
898 
899 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
900 	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
901 		/* No hurry. */
902 		msleep(1);
903 	}
904 
905 	/*
906 	 *	Call the device specific close. This cannot fail.
907 	 *	Only if device is UP
908 	 *
909 	 *	We allow it to be called even after a DETACH hot-plug
910 	 *	event.
911 	 */
912 	if (dev->stop)
913 		dev->stop(dev);
914 
915 	/*
916 	 *	Device is now down.
917 	 */
918 
919 	dev->flags &= ~IFF_UP;
920 
921 	/*
922 	 * Tell people we are down
923 	 */
924 	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
925 
926 	return 0;
927 }
928 
929 
930 /*
931  *	Device change register/unregister. These are not inline or static
932  *	as we export them to the world.
933  */
934 
935 /**
936  *	register_netdevice_notifier - register a network notifier block
937  *	@nb: notifier
938  *
939  *	Register a notifier to be called when network device events occur.
940  *	The notifier passed is linked into the kernel structures and must
941  *	not be reused until it has been unregistered. A negative errno code
942  *	is returned on a failure.
943  *
944  * 	When registered all registration and up events are replayed
945  *	to the new notifier to allow device to have a race free
946  *	view of the network device list.
947  */
948 
949 int register_netdevice_notifier(struct notifier_block *nb)
950 {
951 	struct net_device *dev;
952 	int err;
953 
954 	rtnl_lock();
955 	err = notifier_chain_register(&netdev_chain, nb);
956 	if (!err) {
957 		for (dev = dev_base; dev; dev = dev->next) {
958 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
959 
960 			if (dev->flags & IFF_UP)
961 				nb->notifier_call(nb, NETDEV_UP, dev);
962 		}
963 	}
964 	rtnl_unlock();
965 	return err;
966 }
967 
968 /**
969  *	unregister_netdevice_notifier - unregister a network notifier block
970  *	@nb: notifier
971  *
972  *	Unregister a notifier previously registered by
973  *	register_netdevice_notifier(). The notifier is unlinked into the
974  *	kernel structures and may then be reused. A negative errno code
975  *	is returned on a failure.
976  */
977 
978 int unregister_netdevice_notifier(struct notifier_block *nb)
979 {
980 	return notifier_chain_unregister(&netdev_chain, nb);
981 }
982 
983 /**
984  *	call_netdevice_notifiers - call all network notifier blocks
985  *      @val: value passed unmodified to notifier function
986  *      @v:   pointer passed unmodified to notifier function
987  *
988  *	Call all network notifier blocks.  Parameters and return value
989  *	are as for notifier_call_chain().
990  */
991 
992 int call_netdevice_notifiers(unsigned long val, void *v)
993 {
994 	return notifier_call_chain(&netdev_chain, val, v);
995 }
996 
997 /* When > 0 there are consumers of rx skb time stamps */
998 static atomic_t netstamp_needed = ATOMIC_INIT(0);
999 
1000 void net_enable_timestamp(void)
1001 {
1002 	atomic_inc(&netstamp_needed);
1003 }
1004 
1005 void net_disable_timestamp(void)
1006 {
1007 	atomic_dec(&netstamp_needed);
1008 }
1009 
1010 void __net_timestamp(struct sk_buff *skb)
1011 {
1012 	struct timeval tv;
1013 
1014 	do_gettimeofday(&tv);
1015 	skb_set_timestamp(skb, &tv);
1016 }
1017 EXPORT_SYMBOL(__net_timestamp);
1018 
1019 static inline void net_timestamp(struct sk_buff *skb)
1020 {
1021 	if (atomic_read(&netstamp_needed))
1022 		__net_timestamp(skb);
1023 	else {
1024 		skb->tstamp.off_sec = 0;
1025 		skb->tstamp.off_usec = 0;
1026 	}
1027 }
1028 
1029 /*
1030  *	Support routine. Sends outgoing frames to any network
1031  *	taps currently in use.
1032  */
1033 
1034 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1035 {
1036 	struct packet_type *ptype;
1037 
1038 	net_timestamp(skb);
1039 
1040 	rcu_read_lock();
1041 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1042 		/* Never send packets back to the socket
1043 		 * they originated from - MvS (miquels@drinkel.ow.org)
1044 		 */
1045 		if ((ptype->dev == dev || !ptype->dev) &&
1046 		    (ptype->af_packet_priv == NULL ||
1047 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1048 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1049 			if (!skb2)
1050 				break;
1051 
1052 			/* skb->nh should be correctly
1053 			   set by sender, so that the second statement is
1054 			   just protection against buggy protocols.
1055 			 */
1056 			skb2->mac.raw = skb2->data;
1057 
1058 			if (skb2->nh.raw < skb2->data ||
1059 			    skb2->nh.raw > skb2->tail) {
1060 				if (net_ratelimit())
1061 					printk(KERN_CRIT "protocol %04x is "
1062 					       "buggy, dev %s\n",
1063 					       skb2->protocol, dev->name);
1064 				skb2->nh.raw = skb2->data;
1065 			}
1066 
1067 			skb2->h.raw = skb2->nh.raw;
1068 			skb2->pkt_type = PACKET_OUTGOING;
1069 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1070 		}
1071 	}
1072 	rcu_read_unlock();
1073 }
1074 
1075 /*
1076  * Invalidate hardware checksum when packet is to be mangled, and
1077  * complete checksum manually on outgoing path.
1078  */
1079 int skb_checksum_help(struct sk_buff *skb, int inward)
1080 {
1081 	unsigned int csum;
1082 	int ret = 0, offset = skb->h.raw - skb->data;
1083 
1084 	if (inward) {
1085 		skb->ip_summed = CHECKSUM_NONE;
1086 		goto out;
1087 	}
1088 
1089 	if (skb_cloned(skb)) {
1090 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1091 		if (ret)
1092 			goto out;
1093 	}
1094 
1095 	if (offset > (int)skb->len)
1096 		BUG();
1097 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
1098 
1099 	offset = skb->tail - skb->h.raw;
1100 	if (offset <= 0)
1101 		BUG();
1102 	if (skb->csum + 2 > offset)
1103 		BUG();
1104 
1105 	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1106 	skb->ip_summed = CHECKSUM_NONE;
1107 out:
1108 	return ret;
1109 }
1110 
1111 #ifdef CONFIG_HIGHMEM
1112 /* Actually, we should eliminate this check as soon as we know, that:
1113  * 1. IOMMU is present and allows to map all the memory.
1114  * 2. No high memory really exists on this machine.
1115  */
1116 
1117 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1118 {
1119 	int i;
1120 
1121 	if (dev->features & NETIF_F_HIGHDMA)
1122 		return 0;
1123 
1124 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1125 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1126 			return 1;
1127 
1128 	return 0;
1129 }
1130 #else
1131 #define illegal_highdma(dev, skb)	(0)
1132 #endif
1133 
1134 /* Keep head the same: replace data */
1135 int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
1136 {
1137 	unsigned int size;
1138 	u8 *data;
1139 	long offset;
1140 	struct skb_shared_info *ninfo;
1141 	int headerlen = skb->data - skb->head;
1142 	int expand = (skb->tail + skb->data_len) - skb->end;
1143 
1144 	if (skb_shared(skb))
1145 		BUG();
1146 
1147 	if (expand <= 0)
1148 		expand = 0;
1149 
1150 	size = skb->end - skb->head + expand;
1151 	size = SKB_DATA_ALIGN(size);
1152 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1153 	if (!data)
1154 		return -ENOMEM;
1155 
1156 	/* Copy entire thing */
1157 	if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1158 		BUG();
1159 
1160 	/* Set up shinfo */
1161 	ninfo = (struct skb_shared_info*)(data + size);
1162 	atomic_set(&ninfo->dataref, 1);
1163 	ninfo->tso_size = skb_shinfo(skb)->tso_size;
1164 	ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1165 	ninfo->nr_frags = 0;
1166 	ninfo->frag_list = NULL;
1167 
1168 	/* Offset between the two in bytes */
1169 	offset = data - skb->head;
1170 
1171 	/* Free old data. */
1172 	skb_release_data(skb);
1173 
1174 	skb->head = data;
1175 	skb->end  = data + size;
1176 
1177 	/* Set up new pointers */
1178 	skb->h.raw   += offset;
1179 	skb->nh.raw  += offset;
1180 	skb->mac.raw += offset;
1181 	skb->tail    += offset;
1182 	skb->data    += offset;
1183 
1184 	/* We are no longer a clone, even if we were. */
1185 	skb->cloned    = 0;
1186 
1187 	skb->tail     += skb->data_len;
1188 	skb->data_len  = 0;
1189 	return 0;
1190 }
1191 
1192 #define HARD_TX_LOCK(dev, cpu) {			\
1193 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1194 		spin_lock(&dev->xmit_lock);		\
1195 		dev->xmit_lock_owner = cpu;		\
1196 	}						\
1197 }
1198 
1199 #define HARD_TX_UNLOCK(dev) {				\
1200 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1201 		dev->xmit_lock_owner = -1;		\
1202 		spin_unlock(&dev->xmit_lock);		\
1203 	}						\
1204 }
1205 
1206 /**
1207  *	dev_queue_xmit - transmit a buffer
1208  *	@skb: buffer to transmit
1209  *
1210  *	Queue a buffer for transmission to a network device. The caller must
1211  *	have set the device and priority and built the buffer before calling
1212  *	this function. The function can be called from an interrupt.
1213  *
1214  *	A negative errno code is returned on a failure. A success does not
1215  *	guarantee the frame will be transmitted as it may be dropped due
1216  *	to congestion or traffic shaping.
1217  *
1218  * -----------------------------------------------------------------------------------
1219  *      I notice this method can also return errors from the queue disciplines,
1220  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1221  *      be positive.
1222  *
1223  *      Regardless of the return value, the skb is consumed, so it is currently
1224  *      difficult to retry a send to this method.  (You can bump the ref count
1225  *      before sending to hold a reference for retry if you are careful.)
1226  *
1227  *      When calling this method, interrupts MUST be enabled.  This is because
1228  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1229  *          --BLG
1230  */
1231 
1232 int dev_queue_xmit(struct sk_buff *skb)
1233 {
1234 	struct net_device *dev = skb->dev;
1235 	struct Qdisc *q;
1236 	int rc = -ENOMEM;
1237 
1238 	if (skb_shinfo(skb)->frag_list &&
1239 	    !(dev->features & NETIF_F_FRAGLIST) &&
1240 	    __skb_linearize(skb, GFP_ATOMIC))
1241 		goto out_kfree_skb;
1242 
1243 	/* Fragmented skb is linearized if device does not support SG,
1244 	 * or if at least one of fragments is in highmem and device
1245 	 * does not support DMA from it.
1246 	 */
1247 	if (skb_shinfo(skb)->nr_frags &&
1248 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1249 	    __skb_linearize(skb, GFP_ATOMIC))
1250 		goto out_kfree_skb;
1251 
1252 	/* If packet is not checksummed and device does not support
1253 	 * checksumming for this protocol, complete checksumming here.
1254 	 */
1255 	if (skb->ip_summed == CHECKSUM_HW &&
1256 	    (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1257 	     (!(dev->features & NETIF_F_IP_CSUM) ||
1258 	      skb->protocol != htons(ETH_P_IP))))
1259 	      	if (skb_checksum_help(skb, 0))
1260 	      		goto out_kfree_skb;
1261 
1262 	spin_lock_prefetch(&dev->queue_lock);
1263 
1264 	/* Disable soft irqs for various locks below. Also
1265 	 * stops preemption for RCU.
1266 	 */
1267 	local_bh_disable();
1268 
1269 	/* Updates of qdisc are serialized by queue_lock.
1270 	 * The struct Qdisc which is pointed to by qdisc is now a
1271 	 * rcu structure - it may be accessed without acquiring
1272 	 * a lock (but the structure may be stale.) The freeing of the
1273 	 * qdisc will be deferred until it's known that there are no
1274 	 * more references to it.
1275 	 *
1276 	 * If the qdisc has an enqueue function, we still need to
1277 	 * hold the queue_lock before calling it, since queue_lock
1278 	 * also serializes access to the device queue.
1279 	 */
1280 
1281 	q = rcu_dereference(dev->qdisc);
1282 #ifdef CONFIG_NET_CLS_ACT
1283 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1284 #endif
1285 	if (q->enqueue) {
1286 		/* Grab device queue */
1287 		spin_lock(&dev->queue_lock);
1288 
1289 		rc = q->enqueue(skb, q);
1290 
1291 		qdisc_run(dev);
1292 
1293 		spin_unlock(&dev->queue_lock);
1294 		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1295 		goto out;
1296 	}
1297 
1298 	/* The device has no queue. Common case for software devices:
1299 	   loopback, all the sorts of tunnels...
1300 
1301 	   Really, it is unlikely that xmit_lock protection is necessary here.
1302 	   (f.e. loopback and IP tunnels are clean ignoring statistics
1303 	   counters.)
1304 	   However, it is possible, that they rely on protection
1305 	   made by us here.
1306 
1307 	   Check this and shot the lock. It is not prone from deadlocks.
1308 	   Either shot noqueue qdisc, it is even simpler 8)
1309 	 */
1310 	if (dev->flags & IFF_UP) {
1311 		int cpu = smp_processor_id(); /* ok because BHs are off */
1312 
1313 		if (dev->xmit_lock_owner != cpu) {
1314 
1315 			HARD_TX_LOCK(dev, cpu);
1316 
1317 			if (!netif_queue_stopped(dev)) {
1318 				if (netdev_nit)
1319 					dev_queue_xmit_nit(skb, dev);
1320 
1321 				rc = 0;
1322 				if (!dev->hard_start_xmit(skb, dev)) {
1323 					HARD_TX_UNLOCK(dev);
1324 					goto out;
1325 				}
1326 			}
1327 			HARD_TX_UNLOCK(dev);
1328 			if (net_ratelimit())
1329 				printk(KERN_CRIT "Virtual device %s asks to "
1330 				       "queue packet!\n", dev->name);
1331 		} else {
1332 			/* Recursion is detected! It is possible,
1333 			 * unfortunately */
1334 			if (net_ratelimit())
1335 				printk(KERN_CRIT "Dead loop on virtual device "
1336 				       "%s, fix it urgently!\n", dev->name);
1337 		}
1338 	}
1339 
1340 	rc = -ENETDOWN;
1341 	local_bh_enable();
1342 
1343 out_kfree_skb:
1344 	kfree_skb(skb);
1345 	return rc;
1346 out:
1347 	local_bh_enable();
1348 	return rc;
1349 }
1350 
1351 
1352 /*=======================================================================
1353 			Receiver routines
1354   =======================================================================*/
1355 
1356 int netdev_max_backlog = 1000;
1357 int netdev_budget = 300;
1358 int weight_p = 64;            /* old backlog weight */
1359 
1360 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1361 
1362 
1363 /**
1364  *	netif_rx	-	post buffer to the network code
1365  *	@skb: buffer to post
1366  *
1367  *	This function receives a packet from a device driver and queues it for
1368  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1369  *	may be dropped during processing for congestion control or by the
1370  *	protocol layers.
1371  *
1372  *	return values:
1373  *	NET_RX_SUCCESS	(no congestion)
1374  *	NET_RX_CN_LOW   (low congestion)
1375  *	NET_RX_CN_MOD   (moderate congestion)
1376  *	NET_RX_CN_HIGH  (high congestion)
1377  *	NET_RX_DROP     (packet was dropped)
1378  *
1379  */
1380 
1381 int netif_rx(struct sk_buff *skb)
1382 {
1383 	struct softnet_data *queue;
1384 	unsigned long flags;
1385 
1386 	/* if netpoll wants it, pretend we never saw it */
1387 	if (netpoll_rx(skb))
1388 		return NET_RX_DROP;
1389 
1390 	if (!skb->tstamp.off_sec)
1391 		net_timestamp(skb);
1392 
1393 	/*
1394 	 * The code is rearranged so that the path is the most
1395 	 * short when CPU is congested, but is still operating.
1396 	 */
1397 	local_irq_save(flags);
1398 	queue = &__get_cpu_var(softnet_data);
1399 
1400 	__get_cpu_var(netdev_rx_stat).total++;
1401 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1402 		if (queue->input_pkt_queue.qlen) {
1403 enqueue:
1404 			dev_hold(skb->dev);
1405 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1406 			local_irq_restore(flags);
1407 			return NET_RX_SUCCESS;
1408 		}
1409 
1410 		netif_rx_schedule(&queue->backlog_dev);
1411 		goto enqueue;
1412 	}
1413 
1414 	__get_cpu_var(netdev_rx_stat).dropped++;
1415 	local_irq_restore(flags);
1416 
1417 	kfree_skb(skb);
1418 	return NET_RX_DROP;
1419 }
1420 
1421 int netif_rx_ni(struct sk_buff *skb)
1422 {
1423 	int err;
1424 
1425 	preempt_disable();
1426 	err = netif_rx(skb);
1427 	if (local_softirq_pending())
1428 		do_softirq();
1429 	preempt_enable();
1430 
1431 	return err;
1432 }
1433 
1434 EXPORT_SYMBOL(netif_rx_ni);
1435 
1436 static inline struct net_device *skb_bond(struct sk_buff *skb)
1437 {
1438 	struct net_device *dev = skb->dev;
1439 
1440 	if (dev->master)
1441 		skb->dev = dev->master;
1442 
1443 	return dev;
1444 }
1445 
1446 static void net_tx_action(struct softirq_action *h)
1447 {
1448 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1449 
1450 	if (sd->completion_queue) {
1451 		struct sk_buff *clist;
1452 
1453 		local_irq_disable();
1454 		clist = sd->completion_queue;
1455 		sd->completion_queue = NULL;
1456 		local_irq_enable();
1457 
1458 		while (clist) {
1459 			struct sk_buff *skb = clist;
1460 			clist = clist->next;
1461 
1462 			BUG_TRAP(!atomic_read(&skb->users));
1463 			__kfree_skb(skb);
1464 		}
1465 	}
1466 
1467 	if (sd->output_queue) {
1468 		struct net_device *head;
1469 
1470 		local_irq_disable();
1471 		head = sd->output_queue;
1472 		sd->output_queue = NULL;
1473 		local_irq_enable();
1474 
1475 		while (head) {
1476 			struct net_device *dev = head;
1477 			head = head->next_sched;
1478 
1479 			smp_mb__before_clear_bit();
1480 			clear_bit(__LINK_STATE_SCHED, &dev->state);
1481 
1482 			if (spin_trylock(&dev->queue_lock)) {
1483 				qdisc_run(dev);
1484 				spin_unlock(&dev->queue_lock);
1485 			} else {
1486 				netif_schedule(dev);
1487 			}
1488 		}
1489 	}
1490 }
1491 
1492 static __inline__ int deliver_skb(struct sk_buff *skb,
1493 				  struct packet_type *pt_prev,
1494 				  struct net_device *orig_dev)
1495 {
1496 	atomic_inc(&skb->users);
1497 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1498 }
1499 
1500 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1501 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1502 struct net_bridge;
1503 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1504 						unsigned char *addr);
1505 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1506 
1507 static __inline__ int handle_bridge(struct sk_buff **pskb,
1508 				    struct packet_type **pt_prev, int *ret,
1509 				    struct net_device *orig_dev)
1510 {
1511 	struct net_bridge_port *port;
1512 
1513 	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1514 	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1515 		return 0;
1516 
1517 	if (*pt_prev) {
1518 		*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
1519 		*pt_prev = NULL;
1520 	}
1521 
1522 	return br_handle_frame_hook(port, pskb);
1523 }
1524 #else
1525 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(0)
1526 #endif
1527 
1528 #ifdef CONFIG_NET_CLS_ACT
1529 /* TODO: Maybe we should just force sch_ingress to be compiled in
1530  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1531  * a compare and 2 stores extra right now if we dont have it on
1532  * but have CONFIG_NET_CLS_ACT
1533  * NOTE: This doesnt stop any functionality; if you dont have
1534  * the ingress scheduler, you just cant add policies on ingress.
1535  *
1536  */
1537 static int ing_filter(struct sk_buff *skb)
1538 {
1539 	struct Qdisc *q;
1540 	struct net_device *dev = skb->dev;
1541 	int result = TC_ACT_OK;
1542 
1543 	if (dev->qdisc_ingress) {
1544 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1545 		if (MAX_RED_LOOP < ttl++) {
1546 			printk("Redir loop detected Dropping packet (%s->%s)\n",
1547 				skb->input_dev->name, skb->dev->name);
1548 			return TC_ACT_SHOT;
1549 		}
1550 
1551 		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1552 
1553 		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1554 
1555 		spin_lock(&dev->ingress_lock);
1556 		if ((q = dev->qdisc_ingress) != NULL)
1557 			result = q->enqueue(skb, q);
1558 		spin_unlock(&dev->ingress_lock);
1559 
1560 	}
1561 
1562 	return result;
1563 }
1564 #endif
1565 
1566 int netif_receive_skb(struct sk_buff *skb)
1567 {
1568 	struct packet_type *ptype, *pt_prev;
1569 	struct net_device *orig_dev;
1570 	int ret = NET_RX_DROP;
1571 	unsigned short type;
1572 
1573 	/* if we've gotten here through NAPI, check netpoll */
1574 	if (skb->dev->poll && netpoll_rx(skb))
1575 		return NET_RX_DROP;
1576 
1577 	if (!skb->tstamp.off_sec)
1578 		net_timestamp(skb);
1579 
1580 	if (!skb->input_dev)
1581 		skb->input_dev = skb->dev;
1582 
1583 	orig_dev = skb_bond(skb);
1584 
1585 	__get_cpu_var(netdev_rx_stat).total++;
1586 
1587 	skb->h.raw = skb->nh.raw = skb->data;
1588 	skb->mac_len = skb->nh.raw - skb->mac.raw;
1589 
1590 	pt_prev = NULL;
1591 
1592 	rcu_read_lock();
1593 
1594 #ifdef CONFIG_NET_CLS_ACT
1595 	if (skb->tc_verd & TC_NCLS) {
1596 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1597 		goto ncls;
1598 	}
1599 #endif
1600 
1601 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1602 		if (!ptype->dev || ptype->dev == skb->dev) {
1603 			if (pt_prev)
1604 				ret = deliver_skb(skb, pt_prev, orig_dev);
1605 			pt_prev = ptype;
1606 		}
1607 	}
1608 
1609 #ifdef CONFIG_NET_CLS_ACT
1610 	if (pt_prev) {
1611 		ret = deliver_skb(skb, pt_prev, orig_dev);
1612 		pt_prev = NULL; /* noone else should process this after*/
1613 	} else {
1614 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1615 	}
1616 
1617 	ret = ing_filter(skb);
1618 
1619 	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1620 		kfree_skb(skb);
1621 		goto out;
1622 	}
1623 
1624 	skb->tc_verd = 0;
1625 ncls:
1626 #endif
1627 
1628 	handle_diverter(skb);
1629 
1630 	if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
1631 		goto out;
1632 
1633 	type = skb->protocol;
1634 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1635 		if (ptype->type == type &&
1636 		    (!ptype->dev || ptype->dev == skb->dev)) {
1637 			if (pt_prev)
1638 				ret = deliver_skb(skb, pt_prev, orig_dev);
1639 			pt_prev = ptype;
1640 		}
1641 	}
1642 
1643 	if (pt_prev) {
1644 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1645 	} else {
1646 		kfree_skb(skb);
1647 		/* Jamal, now you will not able to escape explaining
1648 		 * me how you were going to use this. :-)
1649 		 */
1650 		ret = NET_RX_DROP;
1651 	}
1652 
1653 out:
1654 	rcu_read_unlock();
1655 	return ret;
1656 }
1657 
1658 static int process_backlog(struct net_device *backlog_dev, int *budget)
1659 {
1660 	int work = 0;
1661 	int quota = min(backlog_dev->quota, *budget);
1662 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1663 	unsigned long start_time = jiffies;
1664 
1665 	backlog_dev->weight = weight_p;
1666 	for (;;) {
1667 		struct sk_buff *skb;
1668 		struct net_device *dev;
1669 
1670 		local_irq_disable();
1671 		skb = __skb_dequeue(&queue->input_pkt_queue);
1672 		if (!skb)
1673 			goto job_done;
1674 		local_irq_enable();
1675 
1676 		dev = skb->dev;
1677 
1678 		netif_receive_skb(skb);
1679 
1680 		dev_put(dev);
1681 
1682 		work++;
1683 
1684 		if (work >= quota || jiffies - start_time > 1)
1685 			break;
1686 
1687 	}
1688 
1689 	backlog_dev->quota -= work;
1690 	*budget -= work;
1691 	return -1;
1692 
1693 job_done:
1694 	backlog_dev->quota -= work;
1695 	*budget -= work;
1696 
1697 	list_del(&backlog_dev->poll_list);
1698 	smp_mb__before_clear_bit();
1699 	netif_poll_enable(backlog_dev);
1700 
1701 	local_irq_enable();
1702 	return 0;
1703 }
1704 
1705 static void net_rx_action(struct softirq_action *h)
1706 {
1707 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1708 	unsigned long start_time = jiffies;
1709 	int budget = netdev_budget;
1710 	void *have;
1711 
1712 	local_irq_disable();
1713 
1714 	while (!list_empty(&queue->poll_list)) {
1715 		struct net_device *dev;
1716 
1717 		if (budget <= 0 || jiffies - start_time > 1)
1718 			goto softnet_break;
1719 
1720 		local_irq_enable();
1721 
1722 		dev = list_entry(queue->poll_list.next,
1723 				 struct net_device, poll_list);
1724 		have = netpoll_poll_lock(dev);
1725 
1726 		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1727 			netpoll_poll_unlock(have);
1728 			local_irq_disable();
1729 			list_del(&dev->poll_list);
1730 			list_add_tail(&dev->poll_list, &queue->poll_list);
1731 			if (dev->quota < 0)
1732 				dev->quota += dev->weight;
1733 			else
1734 				dev->quota = dev->weight;
1735 		} else {
1736 			netpoll_poll_unlock(have);
1737 			dev_put(dev);
1738 			local_irq_disable();
1739 		}
1740 	}
1741 out:
1742 	local_irq_enable();
1743 	return;
1744 
1745 softnet_break:
1746 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
1747 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1748 	goto out;
1749 }
1750 
1751 static gifconf_func_t * gifconf_list [NPROTO];
1752 
1753 /**
1754  *	register_gifconf	-	register a SIOCGIF handler
1755  *	@family: Address family
1756  *	@gifconf: Function handler
1757  *
1758  *	Register protocol dependent address dumping routines. The handler
1759  *	that is passed must not be freed or reused until it has been replaced
1760  *	by another handler.
1761  */
1762 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1763 {
1764 	if (family >= NPROTO)
1765 		return -EINVAL;
1766 	gifconf_list[family] = gifconf;
1767 	return 0;
1768 }
1769 
1770 
1771 /*
1772  *	Map an interface index to its name (SIOCGIFNAME)
1773  */
1774 
1775 /*
1776  *	We need this ioctl for efficient implementation of the
1777  *	if_indextoname() function required by the IPv6 API.  Without
1778  *	it, we would have to search all the interfaces to find a
1779  *	match.  --pb
1780  */
1781 
1782 static int dev_ifname(struct ifreq __user *arg)
1783 {
1784 	struct net_device *dev;
1785 	struct ifreq ifr;
1786 
1787 	/*
1788 	 *	Fetch the caller's info block.
1789 	 */
1790 
1791 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1792 		return -EFAULT;
1793 
1794 	read_lock(&dev_base_lock);
1795 	dev = __dev_get_by_index(ifr.ifr_ifindex);
1796 	if (!dev) {
1797 		read_unlock(&dev_base_lock);
1798 		return -ENODEV;
1799 	}
1800 
1801 	strcpy(ifr.ifr_name, dev->name);
1802 	read_unlock(&dev_base_lock);
1803 
1804 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1805 		return -EFAULT;
1806 	return 0;
1807 }
1808 
1809 /*
1810  *	Perform a SIOCGIFCONF call. This structure will change
1811  *	size eventually, and there is nothing I can do about it.
1812  *	Thus we will need a 'compatibility mode'.
1813  */
1814 
1815 static int dev_ifconf(char __user *arg)
1816 {
1817 	struct ifconf ifc;
1818 	struct net_device *dev;
1819 	char __user *pos;
1820 	int len;
1821 	int total;
1822 	int i;
1823 
1824 	/*
1825 	 *	Fetch the caller's info block.
1826 	 */
1827 
1828 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1829 		return -EFAULT;
1830 
1831 	pos = ifc.ifc_buf;
1832 	len = ifc.ifc_len;
1833 
1834 	/*
1835 	 *	Loop over the interfaces, and write an info block for each.
1836 	 */
1837 
1838 	total = 0;
1839 	for (dev = dev_base; dev; dev = dev->next) {
1840 		for (i = 0; i < NPROTO; i++) {
1841 			if (gifconf_list[i]) {
1842 				int done;
1843 				if (!pos)
1844 					done = gifconf_list[i](dev, NULL, 0);
1845 				else
1846 					done = gifconf_list[i](dev, pos + total,
1847 							       len - total);
1848 				if (done < 0)
1849 					return -EFAULT;
1850 				total += done;
1851 			}
1852 		}
1853   	}
1854 
1855 	/*
1856 	 *	All done.  Write the updated control block back to the caller.
1857 	 */
1858 	ifc.ifc_len = total;
1859 
1860 	/*
1861 	 * 	Both BSD and Solaris return 0 here, so we do too.
1862 	 */
1863 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1864 }
1865 
1866 #ifdef CONFIG_PROC_FS
1867 /*
1868  *	This is invoked by the /proc filesystem handler to display a device
1869  *	in detail.
1870  */
1871 static __inline__ struct net_device *dev_get_idx(loff_t pos)
1872 {
1873 	struct net_device *dev;
1874 	loff_t i;
1875 
1876 	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1877 
1878 	return i == pos ? dev : NULL;
1879 }
1880 
1881 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1882 {
1883 	read_lock(&dev_base_lock);
1884 	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1885 }
1886 
1887 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1888 {
1889 	++*pos;
1890 	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1891 }
1892 
1893 void dev_seq_stop(struct seq_file *seq, void *v)
1894 {
1895 	read_unlock(&dev_base_lock);
1896 }
1897 
1898 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
1899 {
1900 	if (dev->get_stats) {
1901 		struct net_device_stats *stats = dev->get_stats(dev);
1902 
1903 		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
1904 				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1905 			   dev->name, stats->rx_bytes, stats->rx_packets,
1906 			   stats->rx_errors,
1907 			   stats->rx_dropped + stats->rx_missed_errors,
1908 			   stats->rx_fifo_errors,
1909 			   stats->rx_length_errors + stats->rx_over_errors +
1910 			     stats->rx_crc_errors + stats->rx_frame_errors,
1911 			   stats->rx_compressed, stats->multicast,
1912 			   stats->tx_bytes, stats->tx_packets,
1913 			   stats->tx_errors, stats->tx_dropped,
1914 			   stats->tx_fifo_errors, stats->collisions,
1915 			   stats->tx_carrier_errors +
1916 			     stats->tx_aborted_errors +
1917 			     stats->tx_window_errors +
1918 			     stats->tx_heartbeat_errors,
1919 			   stats->tx_compressed);
1920 	} else
1921 		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
1922 }
1923 
1924 /*
1925  *	Called from the PROCfs module. This now uses the new arbitrary sized
1926  *	/proc/net interface to create /proc/net/dev
1927  */
1928 static int dev_seq_show(struct seq_file *seq, void *v)
1929 {
1930 	if (v == SEQ_START_TOKEN)
1931 		seq_puts(seq, "Inter-|   Receive                            "
1932 			      "                    |  Transmit\n"
1933 			      " face |bytes    packets errs drop fifo frame "
1934 			      "compressed multicast|bytes    packets errs "
1935 			      "drop fifo colls carrier compressed\n");
1936 	else
1937 		dev_seq_printf_stats(seq, v);
1938 	return 0;
1939 }
1940 
1941 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
1942 {
1943 	struct netif_rx_stats *rc = NULL;
1944 
1945 	while (*pos < NR_CPUS)
1946 	       	if (cpu_online(*pos)) {
1947 			rc = &per_cpu(netdev_rx_stat, *pos);
1948 			break;
1949 		} else
1950 			++*pos;
1951 	return rc;
1952 }
1953 
1954 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
1955 {
1956 	return softnet_get_online(pos);
1957 }
1958 
1959 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1960 {
1961 	++*pos;
1962 	return softnet_get_online(pos);
1963 }
1964 
1965 static void softnet_seq_stop(struct seq_file *seq, void *v)
1966 {
1967 }
1968 
1969 static int softnet_seq_show(struct seq_file *seq, void *v)
1970 {
1971 	struct netif_rx_stats *s = v;
1972 
1973 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
1974 		   s->total, s->dropped, s->time_squeeze, 0,
1975 		   0, 0, 0, 0, /* was fastroute */
1976 		   s->cpu_collision );
1977 	return 0;
1978 }
1979 
1980 static struct seq_operations dev_seq_ops = {
1981 	.start = dev_seq_start,
1982 	.next  = dev_seq_next,
1983 	.stop  = dev_seq_stop,
1984 	.show  = dev_seq_show,
1985 };
1986 
1987 static int dev_seq_open(struct inode *inode, struct file *file)
1988 {
1989 	return seq_open(file, &dev_seq_ops);
1990 }
1991 
1992 static struct file_operations dev_seq_fops = {
1993 	.owner	 = THIS_MODULE,
1994 	.open    = dev_seq_open,
1995 	.read    = seq_read,
1996 	.llseek  = seq_lseek,
1997 	.release = seq_release,
1998 };
1999 
2000 static struct seq_operations softnet_seq_ops = {
2001 	.start = softnet_seq_start,
2002 	.next  = softnet_seq_next,
2003 	.stop  = softnet_seq_stop,
2004 	.show  = softnet_seq_show,
2005 };
2006 
2007 static int softnet_seq_open(struct inode *inode, struct file *file)
2008 {
2009 	return seq_open(file, &softnet_seq_ops);
2010 }
2011 
2012 static struct file_operations softnet_seq_fops = {
2013 	.owner	 = THIS_MODULE,
2014 	.open    = softnet_seq_open,
2015 	.read    = seq_read,
2016 	.llseek  = seq_lseek,
2017 	.release = seq_release,
2018 };
2019 
2020 #ifdef WIRELESS_EXT
2021 extern int wireless_proc_init(void);
2022 #else
2023 #define wireless_proc_init() 0
2024 #endif
2025 
2026 static int __init dev_proc_init(void)
2027 {
2028 	int rc = -ENOMEM;
2029 
2030 	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2031 		goto out;
2032 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2033 		goto out_dev;
2034 	if (wireless_proc_init())
2035 		goto out_softnet;
2036 	rc = 0;
2037 out:
2038 	return rc;
2039 out_softnet:
2040 	proc_net_remove("softnet_stat");
2041 out_dev:
2042 	proc_net_remove("dev");
2043 	goto out;
2044 }
2045 #else
2046 #define dev_proc_init() 0
2047 #endif	/* CONFIG_PROC_FS */
2048 
2049 
2050 /**
2051  *	netdev_set_master	-	set up master/slave pair
2052  *	@slave: slave device
2053  *	@master: new master device
2054  *
2055  *	Changes the master device of the slave. Pass %NULL to break the
2056  *	bonding. The caller must hold the RTNL semaphore. On a failure
2057  *	a negative errno code is returned. On success the reference counts
2058  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2059  *	function returns zero.
2060  */
2061 int netdev_set_master(struct net_device *slave, struct net_device *master)
2062 {
2063 	struct net_device *old = slave->master;
2064 
2065 	ASSERT_RTNL();
2066 
2067 	if (master) {
2068 		if (old)
2069 			return -EBUSY;
2070 		dev_hold(master);
2071 	}
2072 
2073 	slave->master = master;
2074 
2075 	synchronize_net();
2076 
2077 	if (old)
2078 		dev_put(old);
2079 
2080 	if (master)
2081 		slave->flags |= IFF_SLAVE;
2082 	else
2083 		slave->flags &= ~IFF_SLAVE;
2084 
2085 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2086 	return 0;
2087 }
2088 
2089 /**
2090  *	dev_set_promiscuity	- update promiscuity count on a device
2091  *	@dev: device
2092  *	@inc: modifier
2093  *
2094  *	Add or remove promsicuity from a device. While the count in the device
2095  *	remains above zero the interface remains promiscuous. Once it hits zero
2096  *	the device reverts back to normal filtering operation. A negative inc
2097  *	value is used to drop promiscuity on the device.
2098  */
2099 void dev_set_promiscuity(struct net_device *dev, int inc)
2100 {
2101 	unsigned short old_flags = dev->flags;
2102 
2103 	if ((dev->promiscuity += inc) == 0)
2104 		dev->flags &= ~IFF_PROMISC;
2105 	else
2106 		dev->flags |= IFF_PROMISC;
2107 	if (dev->flags != old_flags) {
2108 		dev_mc_upload(dev);
2109 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2110 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2111 		       					       "left");
2112 	}
2113 }
2114 
2115 /**
2116  *	dev_set_allmulti	- update allmulti count on a device
2117  *	@dev: device
2118  *	@inc: modifier
2119  *
2120  *	Add or remove reception of all multicast frames to a device. While the
2121  *	count in the device remains above zero the interface remains listening
2122  *	to all interfaces. Once it hits zero the device reverts back to normal
2123  *	filtering operation. A negative @inc value is used to drop the counter
2124  *	when releasing a resource needing all multicasts.
2125  */
2126 
2127 void dev_set_allmulti(struct net_device *dev, int inc)
2128 {
2129 	unsigned short old_flags = dev->flags;
2130 
2131 	dev->flags |= IFF_ALLMULTI;
2132 	if ((dev->allmulti += inc) == 0)
2133 		dev->flags &= ~IFF_ALLMULTI;
2134 	if (dev->flags ^ old_flags)
2135 		dev_mc_upload(dev);
2136 }
2137 
2138 unsigned dev_get_flags(const struct net_device *dev)
2139 {
2140 	unsigned flags;
2141 
2142 	flags = (dev->flags & ~(IFF_PROMISC |
2143 				IFF_ALLMULTI |
2144 				IFF_RUNNING)) |
2145 		(dev->gflags & (IFF_PROMISC |
2146 				IFF_ALLMULTI));
2147 
2148 	if (netif_running(dev) && netif_carrier_ok(dev))
2149 		flags |= IFF_RUNNING;
2150 
2151 	return flags;
2152 }
2153 
2154 int dev_change_flags(struct net_device *dev, unsigned flags)
2155 {
2156 	int ret;
2157 	int old_flags = dev->flags;
2158 
2159 	/*
2160 	 *	Set the flags on our device.
2161 	 */
2162 
2163 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2164 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2165 			       IFF_AUTOMEDIA)) |
2166 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2167 				    IFF_ALLMULTI));
2168 
2169 	/*
2170 	 *	Load in the correct multicast list now the flags have changed.
2171 	 */
2172 
2173 	dev_mc_upload(dev);
2174 
2175 	/*
2176 	 *	Have we downed the interface. We handle IFF_UP ourselves
2177 	 *	according to user attempts to set it, rather than blindly
2178 	 *	setting it.
2179 	 */
2180 
2181 	ret = 0;
2182 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
2183 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2184 
2185 		if (!ret)
2186 			dev_mc_upload(dev);
2187 	}
2188 
2189 	if (dev->flags & IFF_UP &&
2190 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2191 					  IFF_VOLATILE)))
2192 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2193 
2194 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
2195 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
2196 		dev->gflags ^= IFF_PROMISC;
2197 		dev_set_promiscuity(dev, inc);
2198 	}
2199 
2200 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2201 	   is important. Some (broken) drivers set IFF_PROMISC, when
2202 	   IFF_ALLMULTI is requested not asking us and not reporting.
2203 	 */
2204 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2205 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2206 		dev->gflags ^= IFF_ALLMULTI;
2207 		dev_set_allmulti(dev, inc);
2208 	}
2209 
2210 	if (old_flags ^ dev->flags)
2211 		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2212 
2213 	return ret;
2214 }
2215 
2216 int dev_set_mtu(struct net_device *dev, int new_mtu)
2217 {
2218 	int err;
2219 
2220 	if (new_mtu == dev->mtu)
2221 		return 0;
2222 
2223 	/*	MTU must be positive.	 */
2224 	if (new_mtu < 0)
2225 		return -EINVAL;
2226 
2227 	if (!netif_device_present(dev))
2228 		return -ENODEV;
2229 
2230 	err = 0;
2231 	if (dev->change_mtu)
2232 		err = dev->change_mtu(dev, new_mtu);
2233 	else
2234 		dev->mtu = new_mtu;
2235 	if (!err && dev->flags & IFF_UP)
2236 		notifier_call_chain(&netdev_chain,
2237 				    NETDEV_CHANGEMTU, dev);
2238 	return err;
2239 }
2240 
2241 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2242 {
2243 	int err;
2244 
2245 	if (!dev->set_mac_address)
2246 		return -EOPNOTSUPP;
2247 	if (sa->sa_family != dev->type)
2248 		return -EINVAL;
2249 	if (!netif_device_present(dev))
2250 		return -ENODEV;
2251 	err = dev->set_mac_address(dev, sa);
2252 	if (!err)
2253 		notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2254 	return err;
2255 }
2256 
2257 /*
2258  *	Perform the SIOCxIFxxx calls.
2259  */
2260 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2261 {
2262 	int err;
2263 	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2264 
2265 	if (!dev)
2266 		return -ENODEV;
2267 
2268 	switch (cmd) {
2269 		case SIOCGIFFLAGS:	/* Get interface flags */
2270 			ifr->ifr_flags = dev_get_flags(dev);
2271 			return 0;
2272 
2273 		case SIOCSIFFLAGS:	/* Set interface flags */
2274 			return dev_change_flags(dev, ifr->ifr_flags);
2275 
2276 		case SIOCGIFMETRIC:	/* Get the metric on the interface
2277 					   (currently unused) */
2278 			ifr->ifr_metric = 0;
2279 			return 0;
2280 
2281 		case SIOCSIFMETRIC:	/* Set the metric on the interface
2282 					   (currently unused) */
2283 			return -EOPNOTSUPP;
2284 
2285 		case SIOCGIFMTU:	/* Get the MTU of a device */
2286 			ifr->ifr_mtu = dev->mtu;
2287 			return 0;
2288 
2289 		case SIOCSIFMTU:	/* Set the MTU of a device */
2290 			return dev_set_mtu(dev, ifr->ifr_mtu);
2291 
2292 		case SIOCGIFHWADDR:
2293 			if (!dev->addr_len)
2294 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2295 			else
2296 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2297 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2298 			ifr->ifr_hwaddr.sa_family = dev->type;
2299 			return 0;
2300 
2301 		case SIOCSIFHWADDR:
2302 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2303 
2304 		case SIOCSIFHWBROADCAST:
2305 			if (ifr->ifr_hwaddr.sa_family != dev->type)
2306 				return -EINVAL;
2307 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2308 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2309 			notifier_call_chain(&netdev_chain,
2310 					    NETDEV_CHANGEADDR, dev);
2311 			return 0;
2312 
2313 		case SIOCGIFMAP:
2314 			ifr->ifr_map.mem_start = dev->mem_start;
2315 			ifr->ifr_map.mem_end   = dev->mem_end;
2316 			ifr->ifr_map.base_addr = dev->base_addr;
2317 			ifr->ifr_map.irq       = dev->irq;
2318 			ifr->ifr_map.dma       = dev->dma;
2319 			ifr->ifr_map.port      = dev->if_port;
2320 			return 0;
2321 
2322 		case SIOCSIFMAP:
2323 			if (dev->set_config) {
2324 				if (!netif_device_present(dev))
2325 					return -ENODEV;
2326 				return dev->set_config(dev, &ifr->ifr_map);
2327 			}
2328 			return -EOPNOTSUPP;
2329 
2330 		case SIOCADDMULTI:
2331 			if (!dev->set_multicast_list ||
2332 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2333 				return -EINVAL;
2334 			if (!netif_device_present(dev))
2335 				return -ENODEV;
2336 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2337 					  dev->addr_len, 1);
2338 
2339 		case SIOCDELMULTI:
2340 			if (!dev->set_multicast_list ||
2341 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2342 				return -EINVAL;
2343 			if (!netif_device_present(dev))
2344 				return -ENODEV;
2345 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2346 					     dev->addr_len, 1);
2347 
2348 		case SIOCGIFINDEX:
2349 			ifr->ifr_ifindex = dev->ifindex;
2350 			return 0;
2351 
2352 		case SIOCGIFTXQLEN:
2353 			ifr->ifr_qlen = dev->tx_queue_len;
2354 			return 0;
2355 
2356 		case SIOCSIFTXQLEN:
2357 			if (ifr->ifr_qlen < 0)
2358 				return -EINVAL;
2359 			dev->tx_queue_len = ifr->ifr_qlen;
2360 			return 0;
2361 
2362 		case SIOCSIFNAME:
2363 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2364 			return dev_change_name(dev, ifr->ifr_newname);
2365 
2366 		/*
2367 		 *	Unknown or private ioctl
2368 		 */
2369 
2370 		default:
2371 			if ((cmd >= SIOCDEVPRIVATE &&
2372 			    cmd <= SIOCDEVPRIVATE + 15) ||
2373 			    cmd == SIOCBONDENSLAVE ||
2374 			    cmd == SIOCBONDRELEASE ||
2375 			    cmd == SIOCBONDSETHWADDR ||
2376 			    cmd == SIOCBONDSLAVEINFOQUERY ||
2377 			    cmd == SIOCBONDINFOQUERY ||
2378 			    cmd == SIOCBONDCHANGEACTIVE ||
2379 			    cmd == SIOCGMIIPHY ||
2380 			    cmd == SIOCGMIIREG ||
2381 			    cmd == SIOCSMIIREG ||
2382 			    cmd == SIOCBRADDIF ||
2383 			    cmd == SIOCBRDELIF ||
2384 			    cmd == SIOCWANDEV) {
2385 				err = -EOPNOTSUPP;
2386 				if (dev->do_ioctl) {
2387 					if (netif_device_present(dev))
2388 						err = dev->do_ioctl(dev, ifr,
2389 								    cmd);
2390 					else
2391 						err = -ENODEV;
2392 				}
2393 			} else
2394 				err = -EINVAL;
2395 
2396 	}
2397 	return err;
2398 }
2399 
2400 /*
2401  *	This function handles all "interface"-type I/O control requests. The actual
2402  *	'doing' part of this is dev_ifsioc above.
2403  */
2404 
2405 /**
2406  *	dev_ioctl	-	network device ioctl
2407  *	@cmd: command to issue
2408  *	@arg: pointer to a struct ifreq in user space
2409  *
2410  *	Issue ioctl functions to devices. This is normally called by the
2411  *	user space syscall interfaces but can sometimes be useful for
2412  *	other purposes. The return value is the return from the syscall if
2413  *	positive or a negative errno code on error.
2414  */
2415 
2416 int dev_ioctl(unsigned int cmd, void __user *arg)
2417 {
2418 	struct ifreq ifr;
2419 	int ret;
2420 	char *colon;
2421 
2422 	/* One special case: SIOCGIFCONF takes ifconf argument
2423 	   and requires shared lock, because it sleeps writing
2424 	   to user space.
2425 	 */
2426 
2427 	if (cmd == SIOCGIFCONF) {
2428 		rtnl_shlock();
2429 		ret = dev_ifconf((char __user *) arg);
2430 		rtnl_shunlock();
2431 		return ret;
2432 	}
2433 	if (cmd == SIOCGIFNAME)
2434 		return dev_ifname((struct ifreq __user *)arg);
2435 
2436 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2437 		return -EFAULT;
2438 
2439 	ifr.ifr_name[IFNAMSIZ-1] = 0;
2440 
2441 	colon = strchr(ifr.ifr_name, ':');
2442 	if (colon)
2443 		*colon = 0;
2444 
2445 	/*
2446 	 *	See which interface the caller is talking about.
2447 	 */
2448 
2449 	switch (cmd) {
2450 		/*
2451 		 *	These ioctl calls:
2452 		 *	- can be done by all.
2453 		 *	- atomic and do not require locking.
2454 		 *	- return a value
2455 		 */
2456 		case SIOCGIFFLAGS:
2457 		case SIOCGIFMETRIC:
2458 		case SIOCGIFMTU:
2459 		case SIOCGIFHWADDR:
2460 		case SIOCGIFSLAVE:
2461 		case SIOCGIFMAP:
2462 		case SIOCGIFINDEX:
2463 		case SIOCGIFTXQLEN:
2464 			dev_load(ifr.ifr_name);
2465 			read_lock(&dev_base_lock);
2466 			ret = dev_ifsioc(&ifr, cmd);
2467 			read_unlock(&dev_base_lock);
2468 			if (!ret) {
2469 				if (colon)
2470 					*colon = ':';
2471 				if (copy_to_user(arg, &ifr,
2472 						 sizeof(struct ifreq)))
2473 					ret = -EFAULT;
2474 			}
2475 			return ret;
2476 
2477 		case SIOCETHTOOL:
2478 			dev_load(ifr.ifr_name);
2479 			rtnl_lock();
2480 			ret = dev_ethtool(&ifr);
2481 			rtnl_unlock();
2482 			if (!ret) {
2483 				if (colon)
2484 					*colon = ':';
2485 				if (copy_to_user(arg, &ifr,
2486 						 sizeof(struct ifreq)))
2487 					ret = -EFAULT;
2488 			}
2489 			return ret;
2490 
2491 		/*
2492 		 *	These ioctl calls:
2493 		 *	- require superuser power.
2494 		 *	- require strict serialization.
2495 		 *	- return a value
2496 		 */
2497 		case SIOCGMIIPHY:
2498 		case SIOCGMIIREG:
2499 		case SIOCSIFNAME:
2500 			if (!capable(CAP_NET_ADMIN))
2501 				return -EPERM;
2502 			dev_load(ifr.ifr_name);
2503 			rtnl_lock();
2504 			ret = dev_ifsioc(&ifr, cmd);
2505 			rtnl_unlock();
2506 			if (!ret) {
2507 				if (colon)
2508 					*colon = ':';
2509 				if (copy_to_user(arg, &ifr,
2510 						 sizeof(struct ifreq)))
2511 					ret = -EFAULT;
2512 			}
2513 			return ret;
2514 
2515 		/*
2516 		 *	These ioctl calls:
2517 		 *	- require superuser power.
2518 		 *	- require strict serialization.
2519 		 *	- do not return a value
2520 		 */
2521 		case SIOCSIFFLAGS:
2522 		case SIOCSIFMETRIC:
2523 		case SIOCSIFMTU:
2524 		case SIOCSIFMAP:
2525 		case SIOCSIFHWADDR:
2526 		case SIOCSIFSLAVE:
2527 		case SIOCADDMULTI:
2528 		case SIOCDELMULTI:
2529 		case SIOCSIFHWBROADCAST:
2530 		case SIOCSIFTXQLEN:
2531 		case SIOCSMIIREG:
2532 		case SIOCBONDENSLAVE:
2533 		case SIOCBONDRELEASE:
2534 		case SIOCBONDSETHWADDR:
2535 		case SIOCBONDSLAVEINFOQUERY:
2536 		case SIOCBONDINFOQUERY:
2537 		case SIOCBONDCHANGEACTIVE:
2538 		case SIOCBRADDIF:
2539 		case SIOCBRDELIF:
2540 			if (!capable(CAP_NET_ADMIN))
2541 				return -EPERM;
2542 			dev_load(ifr.ifr_name);
2543 			rtnl_lock();
2544 			ret = dev_ifsioc(&ifr, cmd);
2545 			rtnl_unlock();
2546 			return ret;
2547 
2548 		case SIOCGIFMEM:
2549 			/* Get the per device memory space. We can add this but
2550 			 * currently do not support it */
2551 		case SIOCSIFMEM:
2552 			/* Set the per device memory buffer space.
2553 			 * Not applicable in our case */
2554 		case SIOCSIFLINK:
2555 			return -EINVAL;
2556 
2557 		/*
2558 		 *	Unknown or private ioctl.
2559 		 */
2560 		default:
2561 			if (cmd == SIOCWANDEV ||
2562 			    (cmd >= SIOCDEVPRIVATE &&
2563 			     cmd <= SIOCDEVPRIVATE + 15)) {
2564 				dev_load(ifr.ifr_name);
2565 				rtnl_lock();
2566 				ret = dev_ifsioc(&ifr, cmd);
2567 				rtnl_unlock();
2568 				if (!ret && copy_to_user(arg, &ifr,
2569 							 sizeof(struct ifreq)))
2570 					ret = -EFAULT;
2571 				return ret;
2572 			}
2573 #ifdef WIRELESS_EXT
2574 			/* Take care of Wireless Extensions */
2575 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2576 				/* If command is `set a parameter', or
2577 				 * `get the encoding parameters', check if
2578 				 * the user has the right to do it */
2579 				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2580 					if (!capable(CAP_NET_ADMIN))
2581 						return -EPERM;
2582 				}
2583 				dev_load(ifr.ifr_name);
2584 				rtnl_lock();
2585 				/* Follow me in net/core/wireless.c */
2586 				ret = wireless_process_ioctl(&ifr, cmd);
2587 				rtnl_unlock();
2588 				if (IW_IS_GET(cmd) &&
2589 				    copy_to_user(arg, &ifr,
2590 					    	 sizeof(struct ifreq)))
2591 					ret = -EFAULT;
2592 				return ret;
2593 			}
2594 #endif	/* WIRELESS_EXT */
2595 			return -EINVAL;
2596 	}
2597 }
2598 
2599 
2600 /**
2601  *	dev_new_index	-	allocate an ifindex
2602  *
2603  *	Returns a suitable unique value for a new device interface
2604  *	number.  The caller must hold the rtnl semaphore or the
2605  *	dev_base_lock to be sure it remains unique.
2606  */
2607 static int dev_new_index(void)
2608 {
2609 	static int ifindex;
2610 	for (;;) {
2611 		if (++ifindex <= 0)
2612 			ifindex = 1;
2613 		if (!__dev_get_by_index(ifindex))
2614 			return ifindex;
2615 	}
2616 }
2617 
2618 static int dev_boot_phase = 1;
2619 
2620 /* Delayed registration/unregisteration */
2621 static DEFINE_SPINLOCK(net_todo_list_lock);
2622 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2623 
2624 static inline void net_set_todo(struct net_device *dev)
2625 {
2626 	spin_lock(&net_todo_list_lock);
2627 	list_add_tail(&dev->todo_list, &net_todo_list);
2628 	spin_unlock(&net_todo_list_lock);
2629 }
2630 
2631 /**
2632  *	register_netdevice	- register a network device
2633  *	@dev: device to register
2634  *
2635  *	Take a completed network device structure and add it to the kernel
2636  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2637  *	chain. 0 is returned on success. A negative errno code is returned
2638  *	on a failure to set up the device, or if the name is a duplicate.
2639  *
2640  *	Callers must hold the rtnl semaphore. You may want
2641  *	register_netdev() instead of this.
2642  *
2643  *	BUGS:
2644  *	The locking appears insufficient to guarantee two parallel registers
2645  *	will not get the same name.
2646  */
2647 
2648 int register_netdevice(struct net_device *dev)
2649 {
2650 	struct hlist_head *head;
2651 	struct hlist_node *p;
2652 	int ret;
2653 
2654 	BUG_ON(dev_boot_phase);
2655 	ASSERT_RTNL();
2656 
2657 	/* When net_device's are persistent, this will be fatal. */
2658 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2659 
2660 	spin_lock_init(&dev->queue_lock);
2661 	spin_lock_init(&dev->xmit_lock);
2662 	dev->xmit_lock_owner = -1;
2663 #ifdef CONFIG_NET_CLS_ACT
2664 	spin_lock_init(&dev->ingress_lock);
2665 #endif
2666 
2667 	ret = alloc_divert_blk(dev);
2668 	if (ret)
2669 		goto out;
2670 
2671 	dev->iflink = -1;
2672 
2673 	/* Init, if this function is available */
2674 	if (dev->init) {
2675 		ret = dev->init(dev);
2676 		if (ret) {
2677 			if (ret > 0)
2678 				ret = -EIO;
2679 			goto out_err;
2680 		}
2681 	}
2682 
2683 	if (!dev_valid_name(dev->name)) {
2684 		ret = -EINVAL;
2685 		goto out_err;
2686 	}
2687 
2688 	dev->ifindex = dev_new_index();
2689 	if (dev->iflink == -1)
2690 		dev->iflink = dev->ifindex;
2691 
2692 	/* Check for existence of name */
2693 	head = dev_name_hash(dev->name);
2694 	hlist_for_each(p, head) {
2695 		struct net_device *d
2696 			= hlist_entry(p, struct net_device, name_hlist);
2697 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2698 			ret = -EEXIST;
2699  			goto out_err;
2700 		}
2701  	}
2702 
2703 	/* Fix illegal SG+CSUM combinations. */
2704 	if ((dev->features & NETIF_F_SG) &&
2705 	    !(dev->features & (NETIF_F_IP_CSUM |
2706 			       NETIF_F_NO_CSUM |
2707 			       NETIF_F_HW_CSUM))) {
2708 		printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2709 		       dev->name);
2710 		dev->features &= ~NETIF_F_SG;
2711 	}
2712 
2713 	/* TSO requires that SG is present as well. */
2714 	if ((dev->features & NETIF_F_TSO) &&
2715 	    !(dev->features & NETIF_F_SG)) {
2716 		printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2717 		       dev->name);
2718 		dev->features &= ~NETIF_F_TSO;
2719 	}
2720 	if (dev->features & NETIF_F_UFO) {
2721 		if (!(dev->features & NETIF_F_HW_CSUM)) {
2722 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2723 					"NETIF_F_HW_CSUM feature.\n",
2724 							dev->name);
2725 			dev->features &= ~NETIF_F_UFO;
2726 		}
2727 		if (!(dev->features & NETIF_F_SG)) {
2728 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2729 					"NETIF_F_SG feature.\n",
2730 					dev->name);
2731 			dev->features &= ~NETIF_F_UFO;
2732 		}
2733 	}
2734 
2735 	/*
2736 	 *	nil rebuild_header routine,
2737 	 *	that should be never called and used as just bug trap.
2738 	 */
2739 
2740 	if (!dev->rebuild_header)
2741 		dev->rebuild_header = default_rebuild_header;
2742 
2743 	/*
2744 	 *	Default initial state at registry is that the
2745 	 *	device is present.
2746 	 */
2747 
2748 	set_bit(__LINK_STATE_PRESENT, &dev->state);
2749 
2750 	dev->next = NULL;
2751 	dev_init_scheduler(dev);
2752 	write_lock_bh(&dev_base_lock);
2753 	*dev_tail = dev;
2754 	dev_tail = &dev->next;
2755 	hlist_add_head(&dev->name_hlist, head);
2756 	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2757 	dev_hold(dev);
2758 	dev->reg_state = NETREG_REGISTERING;
2759 	write_unlock_bh(&dev_base_lock);
2760 
2761 	/* Notify protocols, that a new device appeared. */
2762 	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2763 
2764 	/* Finish registration after unlock */
2765 	net_set_todo(dev);
2766 	ret = 0;
2767 
2768 out:
2769 	return ret;
2770 out_err:
2771 	free_divert_blk(dev);
2772 	goto out;
2773 }
2774 
2775 /**
2776  *	register_netdev	- register a network device
2777  *	@dev: device to register
2778  *
2779  *	Take a completed network device structure and add it to the kernel
2780  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2781  *	chain. 0 is returned on success. A negative errno code is returned
2782  *	on a failure to set up the device, or if the name is a duplicate.
2783  *
2784  *	This is a wrapper around register_netdev that takes the rtnl semaphore
2785  *	and expands the device name if you passed a format string to
2786  *	alloc_netdev.
2787  */
2788 int register_netdev(struct net_device *dev)
2789 {
2790 	int err;
2791 
2792 	rtnl_lock();
2793 
2794 	/*
2795 	 * If the name is a format string the caller wants us to do a
2796 	 * name allocation.
2797 	 */
2798 	if (strchr(dev->name, '%')) {
2799 		err = dev_alloc_name(dev, dev->name);
2800 		if (err < 0)
2801 			goto out;
2802 	}
2803 
2804 	/*
2805 	 * Back compatibility hook. Kill this one in 2.5
2806 	 */
2807 	if (dev->name[0] == 0 || dev->name[0] == ' ') {
2808 		err = dev_alloc_name(dev, "eth%d");
2809 		if (err < 0)
2810 			goto out;
2811 	}
2812 
2813 	err = register_netdevice(dev);
2814 out:
2815 	rtnl_unlock();
2816 	return err;
2817 }
2818 EXPORT_SYMBOL(register_netdev);
2819 
2820 /*
2821  * netdev_wait_allrefs - wait until all references are gone.
2822  *
2823  * This is called when unregistering network devices.
2824  *
2825  * Any protocol or device that holds a reference should register
2826  * for netdevice notification, and cleanup and put back the
2827  * reference if they receive an UNREGISTER event.
2828  * We can get stuck here if buggy protocols don't correctly
2829  * call dev_put.
2830  */
2831 static void netdev_wait_allrefs(struct net_device *dev)
2832 {
2833 	unsigned long rebroadcast_time, warning_time;
2834 
2835 	rebroadcast_time = warning_time = jiffies;
2836 	while (atomic_read(&dev->refcnt) != 0) {
2837 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2838 			rtnl_shlock();
2839 
2840 			/* Rebroadcast unregister notification */
2841 			notifier_call_chain(&netdev_chain,
2842 					    NETDEV_UNREGISTER, dev);
2843 
2844 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2845 				     &dev->state)) {
2846 				/* We must not have linkwatch events
2847 				 * pending on unregister. If this
2848 				 * happens, we simply run the queue
2849 				 * unscheduled, resulting in a noop
2850 				 * for this device.
2851 				 */
2852 				linkwatch_run_queue();
2853 			}
2854 
2855 			rtnl_shunlock();
2856 
2857 			rebroadcast_time = jiffies;
2858 		}
2859 
2860 		msleep(250);
2861 
2862 		if (time_after(jiffies, warning_time + 10 * HZ)) {
2863 			printk(KERN_EMERG "unregister_netdevice: "
2864 			       "waiting for %s to become free. Usage "
2865 			       "count = %d\n",
2866 			       dev->name, atomic_read(&dev->refcnt));
2867 			warning_time = jiffies;
2868 		}
2869 	}
2870 }
2871 
2872 /* The sequence is:
2873  *
2874  *	rtnl_lock();
2875  *	...
2876  *	register_netdevice(x1);
2877  *	register_netdevice(x2);
2878  *	...
2879  *	unregister_netdevice(y1);
2880  *	unregister_netdevice(y2);
2881  *      ...
2882  *	rtnl_unlock();
2883  *	free_netdev(y1);
2884  *	free_netdev(y2);
2885  *
2886  * We are invoked by rtnl_unlock() after it drops the semaphore.
2887  * This allows us to deal with problems:
2888  * 1) We can create/delete sysfs objects which invoke hotplug
2889  *    without deadlocking with linkwatch via keventd.
2890  * 2) Since we run with the RTNL semaphore not held, we can sleep
2891  *    safely in order to wait for the netdev refcnt to drop to zero.
2892  */
2893 static DECLARE_MUTEX(net_todo_run_mutex);
2894 void netdev_run_todo(void)
2895 {
2896 	struct list_head list = LIST_HEAD_INIT(list);
2897 	int err;
2898 
2899 
2900 	/* Need to guard against multiple cpu's getting out of order. */
2901 	down(&net_todo_run_mutex);
2902 
2903 	/* Not safe to do outside the semaphore.  We must not return
2904 	 * until all unregister events invoked by the local processor
2905 	 * have been completed (either by this todo run, or one on
2906 	 * another cpu).
2907 	 */
2908 	if (list_empty(&net_todo_list))
2909 		goto out;
2910 
2911 	/* Snapshot list, allow later requests */
2912 	spin_lock(&net_todo_list_lock);
2913 	list_splice_init(&net_todo_list, &list);
2914 	spin_unlock(&net_todo_list_lock);
2915 
2916 	while (!list_empty(&list)) {
2917 		struct net_device *dev
2918 			= list_entry(list.next, struct net_device, todo_list);
2919 		list_del(&dev->todo_list);
2920 
2921 		switch(dev->reg_state) {
2922 		case NETREG_REGISTERING:
2923 			err = netdev_register_sysfs(dev);
2924 			if (err)
2925 				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
2926 				       dev->name, err);
2927 			dev->reg_state = NETREG_REGISTERED;
2928 			break;
2929 
2930 		case NETREG_UNREGISTERING:
2931 			netdev_unregister_sysfs(dev);
2932 			dev->reg_state = NETREG_UNREGISTERED;
2933 
2934 			netdev_wait_allrefs(dev);
2935 
2936 			/* paranoia */
2937 			BUG_ON(atomic_read(&dev->refcnt));
2938 			BUG_TRAP(!dev->ip_ptr);
2939 			BUG_TRAP(!dev->ip6_ptr);
2940 			BUG_TRAP(!dev->dn_ptr);
2941 
2942 
2943 			/* It must be the very last action,
2944 			 * after this 'dev' may point to freed up memory.
2945 			 */
2946 			if (dev->destructor)
2947 				dev->destructor(dev);
2948 			break;
2949 
2950 		default:
2951 			printk(KERN_ERR "network todo '%s' but state %d\n",
2952 			       dev->name, dev->reg_state);
2953 			break;
2954 		}
2955 	}
2956 
2957 out:
2958 	up(&net_todo_run_mutex);
2959 }
2960 
2961 /**
2962  *	alloc_netdev - allocate network device
2963  *	@sizeof_priv:	size of private data to allocate space for
2964  *	@name:		device name format string
2965  *	@setup:		callback to initialize device
2966  *
2967  *	Allocates a struct net_device with private data area for driver use
2968  *	and performs basic initialization.
2969  */
2970 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
2971 		void (*setup)(struct net_device *))
2972 {
2973 	void *p;
2974 	struct net_device *dev;
2975 	int alloc_size;
2976 
2977 	/* ensure 32-byte alignment of both the device and private area */
2978 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
2979 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
2980 
2981 	p = kmalloc(alloc_size, GFP_KERNEL);
2982 	if (!p) {
2983 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
2984 		return NULL;
2985 	}
2986 	memset(p, 0, alloc_size);
2987 
2988 	dev = (struct net_device *)
2989 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
2990 	dev->padded = (char *)dev - (char *)p;
2991 
2992 	if (sizeof_priv)
2993 		dev->priv = netdev_priv(dev);
2994 
2995 	setup(dev);
2996 	strcpy(dev->name, name);
2997 	return dev;
2998 }
2999 EXPORT_SYMBOL(alloc_netdev);
3000 
3001 /**
3002  *	free_netdev - free network device
3003  *	@dev: device
3004  *
3005  *	This function does the last stage of destroying an allocated device
3006  * 	interface. The reference to the device object is released.
3007  *	If this is the last reference then it will be freed.
3008  */
3009 void free_netdev(struct net_device *dev)
3010 {
3011 #ifdef CONFIG_SYSFS
3012 	/*  Compatiablity with error handling in drivers */
3013 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3014 		kfree((char *)dev - dev->padded);
3015 		return;
3016 	}
3017 
3018 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3019 	dev->reg_state = NETREG_RELEASED;
3020 
3021 	/* will free via class release */
3022 	class_device_put(&dev->class_dev);
3023 #else
3024 	kfree((char *)dev - dev->padded);
3025 #endif
3026 }
3027 
3028 /* Synchronize with packet receive processing. */
3029 void synchronize_net(void)
3030 {
3031 	might_sleep();
3032 	synchronize_rcu();
3033 }
3034 
3035 /**
3036  *	unregister_netdevice - remove device from the kernel
3037  *	@dev: device
3038  *
3039  *	This function shuts down a device interface and removes it
3040  *	from the kernel tables. On success 0 is returned, on a failure
3041  *	a negative errno code is returned.
3042  *
3043  *	Callers must hold the rtnl semaphore.  You may want
3044  *	unregister_netdev() instead of this.
3045  */
3046 
3047 int unregister_netdevice(struct net_device *dev)
3048 {
3049 	struct net_device *d, **dp;
3050 
3051 	BUG_ON(dev_boot_phase);
3052 	ASSERT_RTNL();
3053 
3054 	/* Some devices call without registering for initialization unwind. */
3055 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3056 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3057 				  "was registered\n", dev->name, dev);
3058 		return -ENODEV;
3059 	}
3060 
3061 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3062 
3063 	/* If device is running, close it first. */
3064 	if (dev->flags & IFF_UP)
3065 		dev_close(dev);
3066 
3067 	/* And unlink it from device chain. */
3068 	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3069 		if (d == dev) {
3070 			write_lock_bh(&dev_base_lock);
3071 			hlist_del(&dev->name_hlist);
3072 			hlist_del(&dev->index_hlist);
3073 			if (dev_tail == &dev->next)
3074 				dev_tail = dp;
3075 			*dp = d->next;
3076 			write_unlock_bh(&dev_base_lock);
3077 			break;
3078 		}
3079 	}
3080 	if (!d) {
3081 		printk(KERN_ERR "unregister net_device: '%s' not found\n",
3082 		       dev->name);
3083 		return -ENODEV;
3084 	}
3085 
3086 	dev->reg_state = NETREG_UNREGISTERING;
3087 
3088 	synchronize_net();
3089 
3090 	/* Shutdown queueing discipline. */
3091 	dev_shutdown(dev);
3092 
3093 
3094 	/* Notify protocols, that we are about to destroy
3095 	   this device. They should clean all the things.
3096 	*/
3097 	notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3098 
3099 	/*
3100 	 *	Flush the multicast chain
3101 	 */
3102 	dev_mc_discard(dev);
3103 
3104 	if (dev->uninit)
3105 		dev->uninit(dev);
3106 
3107 	/* Notifier chain MUST detach us from master device. */
3108 	BUG_TRAP(!dev->master);
3109 
3110 	free_divert_blk(dev);
3111 
3112 	/* Finish processing unregister after unlock */
3113 	net_set_todo(dev);
3114 
3115 	synchronize_net();
3116 
3117 	dev_put(dev);
3118 	return 0;
3119 }
3120 
3121 /**
3122  *	unregister_netdev - remove device from the kernel
3123  *	@dev: device
3124  *
3125  *	This function shuts down a device interface and removes it
3126  *	from the kernel tables. On success 0 is returned, on a failure
3127  *	a negative errno code is returned.
3128  *
3129  *	This is just a wrapper for unregister_netdevice that takes
3130  *	the rtnl semaphore.  In general you want to use this and not
3131  *	unregister_netdevice.
3132  */
3133 void unregister_netdev(struct net_device *dev)
3134 {
3135 	rtnl_lock();
3136 	unregister_netdevice(dev);
3137 	rtnl_unlock();
3138 }
3139 
3140 EXPORT_SYMBOL(unregister_netdev);
3141 
3142 #ifdef CONFIG_HOTPLUG_CPU
3143 static int dev_cpu_callback(struct notifier_block *nfb,
3144 			    unsigned long action,
3145 			    void *ocpu)
3146 {
3147 	struct sk_buff **list_skb;
3148 	struct net_device **list_net;
3149 	struct sk_buff *skb;
3150 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
3151 	struct softnet_data *sd, *oldsd;
3152 
3153 	if (action != CPU_DEAD)
3154 		return NOTIFY_OK;
3155 
3156 	local_irq_disable();
3157 	cpu = smp_processor_id();
3158 	sd = &per_cpu(softnet_data, cpu);
3159 	oldsd = &per_cpu(softnet_data, oldcpu);
3160 
3161 	/* Find end of our completion_queue. */
3162 	list_skb = &sd->completion_queue;
3163 	while (*list_skb)
3164 		list_skb = &(*list_skb)->next;
3165 	/* Append completion queue from offline CPU. */
3166 	*list_skb = oldsd->completion_queue;
3167 	oldsd->completion_queue = NULL;
3168 
3169 	/* Find end of our output_queue. */
3170 	list_net = &sd->output_queue;
3171 	while (*list_net)
3172 		list_net = &(*list_net)->next_sched;
3173 	/* Append output queue from offline CPU. */
3174 	*list_net = oldsd->output_queue;
3175 	oldsd->output_queue = NULL;
3176 
3177 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
3178 	local_irq_enable();
3179 
3180 	/* Process offline CPU's input_pkt_queue */
3181 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3182 		netif_rx(skb);
3183 
3184 	return NOTIFY_OK;
3185 }
3186 #endif /* CONFIG_HOTPLUG_CPU */
3187 
3188 
3189 /*
3190  *	Initialize the DEV module. At boot time this walks the device list and
3191  *	unhooks any devices that fail to initialise (normally hardware not
3192  *	present) and leaves us with a valid list of present and active devices.
3193  *
3194  */
3195 
3196 /*
3197  *       This is called single threaded during boot, so no need
3198  *       to take the rtnl semaphore.
3199  */
3200 static int __init net_dev_init(void)
3201 {
3202 	int i, rc = -ENOMEM;
3203 
3204 	BUG_ON(!dev_boot_phase);
3205 
3206 	net_random_init();
3207 
3208 	if (dev_proc_init())
3209 		goto out;
3210 
3211 	if (netdev_sysfs_init())
3212 		goto out;
3213 
3214 	INIT_LIST_HEAD(&ptype_all);
3215 	for (i = 0; i < 16; i++)
3216 		INIT_LIST_HEAD(&ptype_base[i]);
3217 
3218 	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3219 		INIT_HLIST_HEAD(&dev_name_head[i]);
3220 
3221 	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3222 		INIT_HLIST_HEAD(&dev_index_head[i]);
3223 
3224 	/*
3225 	 *	Initialise the packet receive queues.
3226 	 */
3227 
3228 	for (i = 0; i < NR_CPUS; i++) {
3229 		struct softnet_data *queue;
3230 
3231 		queue = &per_cpu(softnet_data, i);
3232 		skb_queue_head_init(&queue->input_pkt_queue);
3233 		queue->completion_queue = NULL;
3234 		INIT_LIST_HEAD(&queue->poll_list);
3235 		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3236 		queue->backlog_dev.weight = weight_p;
3237 		queue->backlog_dev.poll = process_backlog;
3238 		atomic_set(&queue->backlog_dev.refcnt, 1);
3239 	}
3240 
3241 	dev_boot_phase = 0;
3242 
3243 	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3244 	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3245 
3246 	hotcpu_notifier(dev_cpu_callback, 0);
3247 	dst_init();
3248 	dev_mcast_init();
3249 	rc = 0;
3250 out:
3251 	return rc;
3252 }
3253 
3254 subsys_initcall(net_dev_init);
3255 
3256 EXPORT_SYMBOL(__dev_get_by_index);
3257 EXPORT_SYMBOL(__dev_get_by_name);
3258 EXPORT_SYMBOL(__dev_remove_pack);
3259 EXPORT_SYMBOL(__skb_linearize);
3260 EXPORT_SYMBOL(dev_add_pack);
3261 EXPORT_SYMBOL(dev_alloc_name);
3262 EXPORT_SYMBOL(dev_close);
3263 EXPORT_SYMBOL(dev_get_by_flags);
3264 EXPORT_SYMBOL(dev_get_by_index);
3265 EXPORT_SYMBOL(dev_get_by_name);
3266 EXPORT_SYMBOL(dev_ioctl);
3267 EXPORT_SYMBOL(dev_open);
3268 EXPORT_SYMBOL(dev_queue_xmit);
3269 EXPORT_SYMBOL(dev_remove_pack);
3270 EXPORT_SYMBOL(dev_set_allmulti);
3271 EXPORT_SYMBOL(dev_set_promiscuity);
3272 EXPORT_SYMBOL(dev_change_flags);
3273 EXPORT_SYMBOL(dev_set_mtu);
3274 EXPORT_SYMBOL(dev_set_mac_address);
3275 EXPORT_SYMBOL(free_netdev);
3276 EXPORT_SYMBOL(netdev_boot_setup_check);
3277 EXPORT_SYMBOL(netdev_set_master);
3278 EXPORT_SYMBOL(netdev_state_change);
3279 EXPORT_SYMBOL(netif_receive_skb);
3280 EXPORT_SYMBOL(netif_rx);
3281 EXPORT_SYMBOL(register_gifconf);
3282 EXPORT_SYMBOL(register_netdevice);
3283 EXPORT_SYMBOL(register_netdevice_notifier);
3284 EXPORT_SYMBOL(skb_checksum_help);
3285 EXPORT_SYMBOL(synchronize_net);
3286 EXPORT_SYMBOL(unregister_netdevice);
3287 EXPORT_SYMBOL(unregister_netdevice_notifier);
3288 EXPORT_SYMBOL(net_enable_timestamp);
3289 EXPORT_SYMBOL(net_disable_timestamp);
3290 EXPORT_SYMBOL(dev_get_flags);
3291 
3292 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3293 EXPORT_SYMBOL(br_handle_frame_hook);
3294 EXPORT_SYMBOL(br_fdb_get_hook);
3295 EXPORT_SYMBOL(br_fdb_put_hook);
3296 #endif
3297 
3298 #ifdef CONFIG_KMOD
3299 EXPORT_SYMBOL(dev_load);
3300 #endif
3301 
3302 EXPORT_PER_CPU_SYMBOL(softnet_data);
3303