xref: /openbmc/linux/net/core/dev.c (revision dba0a918722ee0f0ba3442575e4448c3ab622be4)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/ethtool.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/net_namespace.h>
97 #include <net/sock.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/proc_fs.h>
100 #include <linux/seq_file.h>
101 #include <linux/stat.h>
102 #include <linux/if_bridge.h>
103 #include <linux/if_macvlan.h>
104 #include <net/dst.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/kallsyms.h>
112 #include <linux/netpoll.h>
113 #include <linux/rcupdate.h>
114 #include <linux/delay.h>
115 #include <net/wext.h>
116 #include <net/iw_handler.h>
117 #include <asm/current.h>
118 #include <linux/audit.h>
119 #include <linux/dmaengine.h>
120 #include <linux/err.h>
121 #include <linux/ctype.h>
122 #include <linux/if_arp.h>
123 #include <linux/if_vlan.h>
124 #include <linux/ip.h>
125 #include <net/ip.h>
126 #include <linux/ipv6.h>
127 #include <linux/in.h>
128 #include <linux/jhash.h>
129 #include <linux/random.h>
130 
131 #include "net-sysfs.h"
132 
133 /*
134  *	The list of packet types we will receive (as opposed to discard)
135  *	and the routines to invoke.
136  *
137  *	Why 16. Because with 16 the only overlap we get on a hash of the
138  *	low nibble of the protocol value is RARP/SNAP/X.25.
139  *
140  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
141  *             sure which should go first, but I bet it won't make much
142  *             difference if we are running VLANs.  The good news is that
143  *             this protocol won't be in the list unless compiled in, so
144  *             the average user (w/out VLANs) will not be adversely affected.
145  *             --BLG
146  *
147  *		0800	IP
148  *		8100    802.1Q VLAN
149  *		0001	802.3
150  *		0002	AX.25
151  *		0004	802.2
152  *		8035	RARP
153  *		0005	SNAP
154  *		0805	X.25
155  *		0806	ARP
156  *		8137	IPX
157  *		0009	Localtalk
158  *		86DD	IPv6
159  */
160 
161 #define PTYPE_HASH_SIZE	(16)
162 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
163 
164 static DEFINE_SPINLOCK(ptype_lock);
165 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
166 static struct list_head ptype_all __read_mostly;	/* Taps */
167 
168 #ifdef CONFIG_NET_DMA
169 struct net_dma {
170 	struct dma_client client;
171 	spinlock_t lock;
172 	cpumask_t channel_mask;
173 	struct dma_chan **channels;
174 };
175 
176 static enum dma_state_client
177 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
178 	enum dma_state state);
179 
180 static struct net_dma net_dma = {
181 	.client = {
182 		.event_callback = netdev_dma_event,
183 	},
184 };
185 #endif
186 
187 /*
188  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
189  * semaphore.
190  *
191  * Pure readers hold dev_base_lock for reading.
192  *
193  * Writers must hold the rtnl semaphore while they loop through the
194  * dev_base_head list, and hold dev_base_lock for writing when they do the
195  * actual updates.  This allows pure readers to access the list even
196  * while a writer is preparing to update it.
197  *
198  * To put it another way, dev_base_lock is held for writing only to
199  * protect against pure readers; the rtnl semaphore provides the
200  * protection against other writers.
201  *
202  * See, for example usages, register_netdevice() and
203  * unregister_netdevice(), which must be called with the rtnl
204  * semaphore held.
205  */
206 DEFINE_RWLOCK(dev_base_lock);
207 
208 EXPORT_SYMBOL(dev_base_lock);
209 
210 #define NETDEV_HASHBITS	8
211 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
212 
213 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
214 {
215 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
216 	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
217 }
218 
219 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
220 {
221 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
222 }
223 
224 /* Device list insertion */
225 static int list_netdevice(struct net_device *dev)
226 {
227 	struct net *net = dev_net(dev);
228 
229 	ASSERT_RTNL();
230 
231 	write_lock_bh(&dev_base_lock);
232 	list_add_tail(&dev->dev_list, &net->dev_base_head);
233 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
234 	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
235 	write_unlock_bh(&dev_base_lock);
236 	return 0;
237 }
238 
239 /* Device list removal */
240 static void unlist_netdevice(struct net_device *dev)
241 {
242 	ASSERT_RTNL();
243 
244 	/* Unlink dev from the device chain */
245 	write_lock_bh(&dev_base_lock);
246 	list_del(&dev->dev_list);
247 	hlist_del(&dev->name_hlist);
248 	hlist_del(&dev->index_hlist);
249 	write_unlock_bh(&dev_base_lock);
250 }
251 
252 /*
253  *	Our notifier list
254  */
255 
256 static RAW_NOTIFIER_HEAD(netdev_chain);
257 
258 /*
259  *	Device drivers call our routines to queue packets here. We empty the
260  *	queue in the local softnet handler.
261  */
262 
263 DEFINE_PER_CPU(struct softnet_data, softnet_data);
264 
265 #ifdef CONFIG_LOCKDEP
266 /*
267  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
268  * according to dev->type
269  */
270 static const unsigned short netdev_lock_type[] =
271 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
272 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
273 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
274 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
275 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
276 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
277 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
278 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
279 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
280 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
281 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
282 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
283 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
284 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
285 	 ARPHRD_NONE};
286 
287 static const char *netdev_lock_name[] =
288 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
289 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
290 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
291 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
292 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
293 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
294 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
295 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
296 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
297 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
298 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
299 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
300 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
301 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
302 	 "_xmit_NONE"};
303 
304 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
305 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
306 
307 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
308 {
309 	int i;
310 
311 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
312 		if (netdev_lock_type[i] == dev_type)
313 			return i;
314 	/* the last key is used by default */
315 	return ARRAY_SIZE(netdev_lock_type) - 1;
316 }
317 
318 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
319 						 unsigned short dev_type)
320 {
321 	int i;
322 
323 	i = netdev_lock_pos(dev_type);
324 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
325 				   netdev_lock_name[i]);
326 }
327 
328 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
329 {
330 	int i;
331 
332 	i = netdev_lock_pos(dev->type);
333 	lockdep_set_class_and_name(&dev->addr_list_lock,
334 				   &netdev_addr_lock_key[i],
335 				   netdev_lock_name[i]);
336 }
337 #else
338 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
339 						 unsigned short dev_type)
340 {
341 }
342 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
343 {
344 }
345 #endif
346 
347 /*******************************************************************************
348 
349 		Protocol management and registration routines
350 
351 *******************************************************************************/
352 
353 /*
354  *	Add a protocol ID to the list. Now that the input handler is
355  *	smarter we can dispense with all the messy stuff that used to be
356  *	here.
357  *
358  *	BEWARE!!! Protocol handlers, mangling input packets,
359  *	MUST BE last in hash buckets and checking protocol handlers
360  *	MUST start from promiscuous ptype_all chain in net_bh.
361  *	It is true now, do not change it.
362  *	Explanation follows: if protocol handler, mangling packet, will
363  *	be the first on list, it is not able to sense, that packet
364  *	is cloned and should be copied-on-write, so that it will
365  *	change it and subsequent readers will get broken packet.
366  *							--ANK (980803)
367  */
368 
369 /**
370  *	dev_add_pack - add packet handler
371  *	@pt: packet type declaration
372  *
373  *	Add a protocol handler to the networking stack. The passed &packet_type
374  *	is linked into kernel lists and may not be freed until it has been
375  *	removed from the kernel lists.
376  *
377  *	This call does not sleep therefore it can not
378  *	guarantee all CPU's that are in middle of receiving packets
379  *	will see the new packet type (until the next received packet).
380  */
381 
382 void dev_add_pack(struct packet_type *pt)
383 {
384 	int hash;
385 
386 	spin_lock_bh(&ptype_lock);
387 	if (pt->type == htons(ETH_P_ALL))
388 		list_add_rcu(&pt->list, &ptype_all);
389 	else {
390 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
391 		list_add_rcu(&pt->list, &ptype_base[hash]);
392 	}
393 	spin_unlock_bh(&ptype_lock);
394 }
395 
396 /**
397  *	__dev_remove_pack	 - remove packet handler
398  *	@pt: packet type declaration
399  *
400  *	Remove a protocol handler that was previously added to the kernel
401  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
402  *	from the kernel lists and can be freed or reused once this function
403  *	returns.
404  *
405  *      The packet type might still be in use by receivers
406  *	and must not be freed until after all the CPU's have gone
407  *	through a quiescent state.
408  */
409 void __dev_remove_pack(struct packet_type *pt)
410 {
411 	struct list_head *head;
412 	struct packet_type *pt1;
413 
414 	spin_lock_bh(&ptype_lock);
415 
416 	if (pt->type == htons(ETH_P_ALL))
417 		head = &ptype_all;
418 	else
419 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
420 
421 	list_for_each_entry(pt1, head, list) {
422 		if (pt == pt1) {
423 			list_del_rcu(&pt->list);
424 			goto out;
425 		}
426 	}
427 
428 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
429 out:
430 	spin_unlock_bh(&ptype_lock);
431 }
432 /**
433  *	dev_remove_pack	 - remove packet handler
434  *	@pt: packet type declaration
435  *
436  *	Remove a protocol handler that was previously added to the kernel
437  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
438  *	from the kernel lists and can be freed or reused once this function
439  *	returns.
440  *
441  *	This call sleeps to guarantee that no CPU is looking at the packet
442  *	type after return.
443  */
444 void dev_remove_pack(struct packet_type *pt)
445 {
446 	__dev_remove_pack(pt);
447 
448 	synchronize_net();
449 }
450 
451 /******************************************************************************
452 
453 		      Device Boot-time Settings Routines
454 
455 *******************************************************************************/
456 
457 /* Boot time configuration table */
458 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
459 
460 /**
461  *	netdev_boot_setup_add	- add new setup entry
462  *	@name: name of the device
463  *	@map: configured settings for the device
464  *
465  *	Adds new setup entry to the dev_boot_setup list.  The function
466  *	returns 0 on error and 1 on success.  This is a generic routine to
467  *	all netdevices.
468  */
469 static int netdev_boot_setup_add(char *name, struct ifmap *map)
470 {
471 	struct netdev_boot_setup *s;
472 	int i;
473 
474 	s = dev_boot_setup;
475 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
476 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
477 			memset(s[i].name, 0, sizeof(s[i].name));
478 			strlcpy(s[i].name, name, IFNAMSIZ);
479 			memcpy(&s[i].map, map, sizeof(s[i].map));
480 			break;
481 		}
482 	}
483 
484 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
485 }
486 
487 /**
488  *	netdev_boot_setup_check	- check boot time settings
489  *	@dev: the netdevice
490  *
491  * 	Check boot time settings for the device.
492  *	The found settings are set for the device to be used
493  *	later in the device probing.
494  *	Returns 0 if no settings found, 1 if they are.
495  */
496 int netdev_boot_setup_check(struct net_device *dev)
497 {
498 	struct netdev_boot_setup *s = dev_boot_setup;
499 	int i;
500 
501 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
502 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
503 		    !strcmp(dev->name, s[i].name)) {
504 			dev->irq 	= s[i].map.irq;
505 			dev->base_addr 	= s[i].map.base_addr;
506 			dev->mem_start 	= s[i].map.mem_start;
507 			dev->mem_end 	= s[i].map.mem_end;
508 			return 1;
509 		}
510 	}
511 	return 0;
512 }
513 
514 
515 /**
516  *	netdev_boot_base	- get address from boot time settings
517  *	@prefix: prefix for network device
518  *	@unit: id for network device
519  *
520  * 	Check boot time settings for the base address of device.
521  *	The found settings are set for the device to be used
522  *	later in the device probing.
523  *	Returns 0 if no settings found.
524  */
525 unsigned long netdev_boot_base(const char *prefix, int unit)
526 {
527 	const struct netdev_boot_setup *s = dev_boot_setup;
528 	char name[IFNAMSIZ];
529 	int i;
530 
531 	sprintf(name, "%s%d", prefix, unit);
532 
533 	/*
534 	 * If device already registered then return base of 1
535 	 * to indicate not to probe for this interface
536 	 */
537 	if (__dev_get_by_name(&init_net, name))
538 		return 1;
539 
540 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
541 		if (!strcmp(name, s[i].name))
542 			return s[i].map.base_addr;
543 	return 0;
544 }
545 
546 /*
547  * Saves at boot time configured settings for any netdevice.
548  */
549 int __init netdev_boot_setup(char *str)
550 {
551 	int ints[5];
552 	struct ifmap map;
553 
554 	str = get_options(str, ARRAY_SIZE(ints), ints);
555 	if (!str || !*str)
556 		return 0;
557 
558 	/* Save settings */
559 	memset(&map, 0, sizeof(map));
560 	if (ints[0] > 0)
561 		map.irq = ints[1];
562 	if (ints[0] > 1)
563 		map.base_addr = ints[2];
564 	if (ints[0] > 2)
565 		map.mem_start = ints[3];
566 	if (ints[0] > 3)
567 		map.mem_end = ints[4];
568 
569 	/* Add new entry to the list */
570 	return netdev_boot_setup_add(str, &map);
571 }
572 
573 __setup("netdev=", netdev_boot_setup);
574 
575 /*******************************************************************************
576 
577 			    Device Interface Subroutines
578 
579 *******************************************************************************/
580 
581 /**
582  *	__dev_get_by_name	- find a device by its name
583  *	@net: the applicable net namespace
584  *	@name: name to find
585  *
586  *	Find an interface by name. Must be called under RTNL semaphore
587  *	or @dev_base_lock. If the name is found a pointer to the device
588  *	is returned. If the name is not found then %NULL is returned. The
589  *	reference counters are not incremented so the caller must be
590  *	careful with locks.
591  */
592 
593 struct net_device *__dev_get_by_name(struct net *net, const char *name)
594 {
595 	struct hlist_node *p;
596 
597 	hlist_for_each(p, dev_name_hash(net, name)) {
598 		struct net_device *dev
599 			= hlist_entry(p, struct net_device, name_hlist);
600 		if (!strncmp(dev->name, name, IFNAMSIZ))
601 			return dev;
602 	}
603 	return NULL;
604 }
605 
606 /**
607  *	dev_get_by_name		- find a device by its name
608  *	@net: the applicable net namespace
609  *	@name: name to find
610  *
611  *	Find an interface by name. This can be called from any
612  *	context and does its own locking. The returned handle has
613  *	the usage count incremented and the caller must use dev_put() to
614  *	release it when it is no longer needed. %NULL is returned if no
615  *	matching device is found.
616  */
617 
618 struct net_device *dev_get_by_name(struct net *net, const char *name)
619 {
620 	struct net_device *dev;
621 
622 	read_lock(&dev_base_lock);
623 	dev = __dev_get_by_name(net, name);
624 	if (dev)
625 		dev_hold(dev);
626 	read_unlock(&dev_base_lock);
627 	return dev;
628 }
629 
630 /**
631  *	__dev_get_by_index - find a device by its ifindex
632  *	@net: the applicable net namespace
633  *	@ifindex: index of device
634  *
635  *	Search for an interface by index. Returns %NULL if the device
636  *	is not found or a pointer to the device. The device has not
637  *	had its reference counter increased so the caller must be careful
638  *	about locking. The caller must hold either the RTNL semaphore
639  *	or @dev_base_lock.
640  */
641 
642 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
643 {
644 	struct hlist_node *p;
645 
646 	hlist_for_each(p, dev_index_hash(net, ifindex)) {
647 		struct net_device *dev
648 			= hlist_entry(p, struct net_device, index_hlist);
649 		if (dev->ifindex == ifindex)
650 			return dev;
651 	}
652 	return NULL;
653 }
654 
655 
656 /**
657  *	dev_get_by_index - find a device by its ifindex
658  *	@net: the applicable net namespace
659  *	@ifindex: index of device
660  *
661  *	Search for an interface by index. Returns NULL if the device
662  *	is not found or a pointer to the device. The device returned has
663  *	had a reference added and the pointer is safe until the user calls
664  *	dev_put to indicate they have finished with it.
665  */
666 
667 struct net_device *dev_get_by_index(struct net *net, int ifindex)
668 {
669 	struct net_device *dev;
670 
671 	read_lock(&dev_base_lock);
672 	dev = __dev_get_by_index(net, ifindex);
673 	if (dev)
674 		dev_hold(dev);
675 	read_unlock(&dev_base_lock);
676 	return dev;
677 }
678 
679 /**
680  *	dev_getbyhwaddr - find a device by its hardware address
681  *	@net: the applicable net namespace
682  *	@type: media type of device
683  *	@ha: hardware address
684  *
685  *	Search for an interface by MAC address. Returns NULL if the device
686  *	is not found or a pointer to the device. The caller must hold the
687  *	rtnl semaphore. The returned device has not had its ref count increased
688  *	and the caller must therefore be careful about locking
689  *
690  *	BUGS:
691  *	If the API was consistent this would be __dev_get_by_hwaddr
692  */
693 
694 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
695 {
696 	struct net_device *dev;
697 
698 	ASSERT_RTNL();
699 
700 	for_each_netdev(net, dev)
701 		if (dev->type == type &&
702 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
703 			return dev;
704 
705 	return NULL;
706 }
707 
708 EXPORT_SYMBOL(dev_getbyhwaddr);
709 
710 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
711 {
712 	struct net_device *dev;
713 
714 	ASSERT_RTNL();
715 	for_each_netdev(net, dev)
716 		if (dev->type == type)
717 			return dev;
718 
719 	return NULL;
720 }
721 
722 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
723 
724 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
725 {
726 	struct net_device *dev;
727 
728 	rtnl_lock();
729 	dev = __dev_getfirstbyhwtype(net, type);
730 	if (dev)
731 		dev_hold(dev);
732 	rtnl_unlock();
733 	return dev;
734 }
735 
736 EXPORT_SYMBOL(dev_getfirstbyhwtype);
737 
738 /**
739  *	dev_get_by_flags - find any device with given flags
740  *	@net: the applicable net namespace
741  *	@if_flags: IFF_* values
742  *	@mask: bitmask of bits in if_flags to check
743  *
744  *	Search for any interface with the given flags. Returns NULL if a device
745  *	is not found or a pointer to the device. The device returned has
746  *	had a reference added and the pointer is safe until the user calls
747  *	dev_put to indicate they have finished with it.
748  */
749 
750 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
751 {
752 	struct net_device *dev, *ret;
753 
754 	ret = NULL;
755 	read_lock(&dev_base_lock);
756 	for_each_netdev(net, dev) {
757 		if (((dev->flags ^ if_flags) & mask) == 0) {
758 			dev_hold(dev);
759 			ret = dev;
760 			break;
761 		}
762 	}
763 	read_unlock(&dev_base_lock);
764 	return ret;
765 }
766 
767 /**
768  *	dev_valid_name - check if name is okay for network device
769  *	@name: name string
770  *
771  *	Network device names need to be valid file names to
772  *	to allow sysfs to work.  We also disallow any kind of
773  *	whitespace.
774  */
775 int dev_valid_name(const char *name)
776 {
777 	if (*name == '\0')
778 		return 0;
779 	if (strlen(name) >= IFNAMSIZ)
780 		return 0;
781 	if (!strcmp(name, ".") || !strcmp(name, ".."))
782 		return 0;
783 
784 	while (*name) {
785 		if (*name == '/' || isspace(*name))
786 			return 0;
787 		name++;
788 	}
789 	return 1;
790 }
791 
792 /**
793  *	__dev_alloc_name - allocate a name for a device
794  *	@net: network namespace to allocate the device name in
795  *	@name: name format string
796  *	@buf:  scratch buffer and result name string
797  *
798  *	Passed a format string - eg "lt%d" it will try and find a suitable
799  *	id. It scans list of devices to build up a free map, then chooses
800  *	the first empty slot. The caller must hold the dev_base or rtnl lock
801  *	while allocating the name and adding the device in order to avoid
802  *	duplicates.
803  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
804  *	Returns the number of the unit assigned or a negative errno code.
805  */
806 
807 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
808 {
809 	int i = 0;
810 	const char *p;
811 	const int max_netdevices = 8*PAGE_SIZE;
812 	unsigned long *inuse;
813 	struct net_device *d;
814 
815 	p = strnchr(name, IFNAMSIZ-1, '%');
816 	if (p) {
817 		/*
818 		 * Verify the string as this thing may have come from
819 		 * the user.  There must be either one "%d" and no other "%"
820 		 * characters.
821 		 */
822 		if (p[1] != 'd' || strchr(p + 2, '%'))
823 			return -EINVAL;
824 
825 		/* Use one page as a bit array of possible slots */
826 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
827 		if (!inuse)
828 			return -ENOMEM;
829 
830 		for_each_netdev(net, d) {
831 			if (!sscanf(d->name, name, &i))
832 				continue;
833 			if (i < 0 || i >= max_netdevices)
834 				continue;
835 
836 			/*  avoid cases where sscanf is not exact inverse of printf */
837 			snprintf(buf, IFNAMSIZ, name, i);
838 			if (!strncmp(buf, d->name, IFNAMSIZ))
839 				set_bit(i, inuse);
840 		}
841 
842 		i = find_first_zero_bit(inuse, max_netdevices);
843 		free_page((unsigned long) inuse);
844 	}
845 
846 	snprintf(buf, IFNAMSIZ, name, i);
847 	if (!__dev_get_by_name(net, buf))
848 		return i;
849 
850 	/* It is possible to run out of possible slots
851 	 * when the name is long and there isn't enough space left
852 	 * for the digits, or if all bits are used.
853 	 */
854 	return -ENFILE;
855 }
856 
857 /**
858  *	dev_alloc_name - allocate a name for a device
859  *	@dev: device
860  *	@name: name format string
861  *
862  *	Passed a format string - eg "lt%d" it will try and find a suitable
863  *	id. It scans list of devices to build up a free map, then chooses
864  *	the first empty slot. The caller must hold the dev_base or rtnl lock
865  *	while allocating the name and adding the device in order to avoid
866  *	duplicates.
867  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
868  *	Returns the number of the unit assigned or a negative errno code.
869  */
870 
871 int dev_alloc_name(struct net_device *dev, const char *name)
872 {
873 	char buf[IFNAMSIZ];
874 	struct net *net;
875 	int ret;
876 
877 	BUG_ON(!dev_net(dev));
878 	net = dev_net(dev);
879 	ret = __dev_alloc_name(net, name, buf);
880 	if (ret >= 0)
881 		strlcpy(dev->name, buf, IFNAMSIZ);
882 	return ret;
883 }
884 
885 
886 /**
887  *	dev_change_name - change name of a device
888  *	@dev: device
889  *	@newname: name (or format string) must be at least IFNAMSIZ
890  *
891  *	Change name of a device, can pass format strings "eth%d".
892  *	for wildcarding.
893  */
894 int dev_change_name(struct net_device *dev, const char *newname)
895 {
896 	char oldname[IFNAMSIZ];
897 	int err = 0;
898 	int ret;
899 	struct net *net;
900 
901 	ASSERT_RTNL();
902 	BUG_ON(!dev_net(dev));
903 
904 	net = dev_net(dev);
905 	if (dev->flags & IFF_UP)
906 		return -EBUSY;
907 
908 	if (!dev_valid_name(newname))
909 		return -EINVAL;
910 
911 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
912 		return 0;
913 
914 	memcpy(oldname, dev->name, IFNAMSIZ);
915 
916 	if (strchr(newname, '%')) {
917 		err = dev_alloc_name(dev, newname);
918 		if (err < 0)
919 			return err;
920 	}
921 	else if (__dev_get_by_name(net, newname))
922 		return -EEXIST;
923 	else
924 		strlcpy(dev->name, newname, IFNAMSIZ);
925 
926 rollback:
927 	ret = device_rename(&dev->dev, dev->name);
928 	if (ret) {
929 		memcpy(dev->name, oldname, IFNAMSIZ);
930 		return ret;
931 	}
932 
933 	write_lock_bh(&dev_base_lock);
934 	hlist_del(&dev->name_hlist);
935 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
936 	write_unlock_bh(&dev_base_lock);
937 
938 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
939 	ret = notifier_to_errno(ret);
940 
941 	if (ret) {
942 		if (err) {
943 			printk(KERN_ERR
944 			       "%s: name change rollback failed: %d.\n",
945 			       dev->name, ret);
946 		} else {
947 			err = ret;
948 			memcpy(dev->name, oldname, IFNAMSIZ);
949 			goto rollback;
950 		}
951 	}
952 
953 	return err;
954 }
955 
956 /**
957  *	dev_set_alias - change ifalias of a device
958  *	@dev: device
959  *	@alias: name up to IFALIASZ
960  *	@len: limit of bytes to copy from info
961  *
962  *	Set ifalias for a device,
963  */
964 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
965 {
966 	ASSERT_RTNL();
967 
968 	if (len >= IFALIASZ)
969 		return -EINVAL;
970 
971 	if (!len) {
972 		if (dev->ifalias) {
973 			kfree(dev->ifalias);
974 			dev->ifalias = NULL;
975 		}
976 		return 0;
977 	}
978 
979 	dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
980 	if (!dev->ifalias)
981 		return -ENOMEM;
982 
983 	strlcpy(dev->ifalias, alias, len+1);
984 	return len;
985 }
986 
987 
988 /**
989  *	netdev_features_change - device changes features
990  *	@dev: device to cause notification
991  *
992  *	Called to indicate a device has changed features.
993  */
994 void netdev_features_change(struct net_device *dev)
995 {
996 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
997 }
998 EXPORT_SYMBOL(netdev_features_change);
999 
1000 /**
1001  *	netdev_state_change - device changes state
1002  *	@dev: device to cause notification
1003  *
1004  *	Called to indicate a device has changed state. This function calls
1005  *	the notifier chains for netdev_chain and sends a NEWLINK message
1006  *	to the routing socket.
1007  */
1008 void netdev_state_change(struct net_device *dev)
1009 {
1010 	if (dev->flags & IFF_UP) {
1011 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1012 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1013 	}
1014 }
1015 
1016 void netdev_bonding_change(struct net_device *dev)
1017 {
1018 	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1019 }
1020 EXPORT_SYMBOL(netdev_bonding_change);
1021 
1022 /**
1023  *	dev_load 	- load a network module
1024  *	@net: the applicable net namespace
1025  *	@name: name of interface
1026  *
1027  *	If a network interface is not present and the process has suitable
1028  *	privileges this function loads the module. If module loading is not
1029  *	available in this kernel then it becomes a nop.
1030  */
1031 
1032 void dev_load(struct net *net, const char *name)
1033 {
1034 	struct net_device *dev;
1035 
1036 	read_lock(&dev_base_lock);
1037 	dev = __dev_get_by_name(net, name);
1038 	read_unlock(&dev_base_lock);
1039 
1040 	if (!dev && capable(CAP_SYS_MODULE))
1041 		request_module("%s", name);
1042 }
1043 
1044 /**
1045  *	dev_open	- prepare an interface for use.
1046  *	@dev:	device to open
1047  *
1048  *	Takes a device from down to up state. The device's private open
1049  *	function is invoked and then the multicast lists are loaded. Finally
1050  *	the device is moved into the up state and a %NETDEV_UP message is
1051  *	sent to the netdev notifier chain.
1052  *
1053  *	Calling this function on an active interface is a nop. On a failure
1054  *	a negative errno code is returned.
1055  */
1056 int dev_open(struct net_device *dev)
1057 {
1058 	int ret = 0;
1059 
1060 	ASSERT_RTNL();
1061 
1062 	/*
1063 	 *	Is it already up?
1064 	 */
1065 
1066 	if (dev->flags & IFF_UP)
1067 		return 0;
1068 
1069 	/*
1070 	 *	Is it even present?
1071 	 */
1072 	if (!netif_device_present(dev))
1073 		return -ENODEV;
1074 
1075 	/*
1076 	 *	Call device private open method
1077 	 */
1078 	set_bit(__LINK_STATE_START, &dev->state);
1079 
1080 	if (dev->validate_addr)
1081 		ret = dev->validate_addr(dev);
1082 
1083 	if (!ret && dev->open)
1084 		ret = dev->open(dev);
1085 
1086 	/*
1087 	 *	If it went open OK then:
1088 	 */
1089 
1090 	if (ret)
1091 		clear_bit(__LINK_STATE_START, &dev->state);
1092 	else {
1093 		/*
1094 		 *	Set the flags.
1095 		 */
1096 		dev->flags |= IFF_UP;
1097 
1098 		/*
1099 		 *	Initialize multicasting status
1100 		 */
1101 		dev_set_rx_mode(dev);
1102 
1103 		/*
1104 		 *	Wakeup transmit queue engine
1105 		 */
1106 		dev_activate(dev);
1107 
1108 		/*
1109 		 *	... and announce new interface.
1110 		 */
1111 		call_netdevice_notifiers(NETDEV_UP, dev);
1112 	}
1113 
1114 	return ret;
1115 }
1116 
1117 /**
1118  *	dev_close - shutdown an interface.
1119  *	@dev: device to shutdown
1120  *
1121  *	This function moves an active device into down state. A
1122  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1123  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1124  *	chain.
1125  */
1126 int dev_close(struct net_device *dev)
1127 {
1128 	ASSERT_RTNL();
1129 
1130 	might_sleep();
1131 
1132 	if (!(dev->flags & IFF_UP))
1133 		return 0;
1134 
1135 	/*
1136 	 *	Tell people we are going down, so that they can
1137 	 *	prepare to death, when device is still operating.
1138 	 */
1139 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1140 
1141 	clear_bit(__LINK_STATE_START, &dev->state);
1142 
1143 	/* Synchronize to scheduled poll. We cannot touch poll list,
1144 	 * it can be even on different cpu. So just clear netif_running().
1145 	 *
1146 	 * dev->stop() will invoke napi_disable() on all of it's
1147 	 * napi_struct instances on this device.
1148 	 */
1149 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1150 
1151 	dev_deactivate(dev);
1152 
1153 	/*
1154 	 *	Call the device specific close. This cannot fail.
1155 	 *	Only if device is UP
1156 	 *
1157 	 *	We allow it to be called even after a DETACH hot-plug
1158 	 *	event.
1159 	 */
1160 	if (dev->stop)
1161 		dev->stop(dev);
1162 
1163 	/*
1164 	 *	Device is now down.
1165 	 */
1166 
1167 	dev->flags &= ~IFF_UP;
1168 
1169 	/*
1170 	 * Tell people we are down
1171 	 */
1172 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1173 
1174 	return 0;
1175 }
1176 
1177 
1178 /**
1179  *	dev_disable_lro - disable Large Receive Offload on a device
1180  *	@dev: device
1181  *
1182  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1183  *	called under RTNL.  This is needed if received packets may be
1184  *	forwarded to another interface.
1185  */
1186 void dev_disable_lro(struct net_device *dev)
1187 {
1188 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1189 	    dev->ethtool_ops->set_flags) {
1190 		u32 flags = dev->ethtool_ops->get_flags(dev);
1191 		if (flags & ETH_FLAG_LRO) {
1192 			flags &= ~ETH_FLAG_LRO;
1193 			dev->ethtool_ops->set_flags(dev, flags);
1194 		}
1195 	}
1196 	WARN_ON(dev->features & NETIF_F_LRO);
1197 }
1198 EXPORT_SYMBOL(dev_disable_lro);
1199 
1200 
1201 static int dev_boot_phase = 1;
1202 
1203 /*
1204  *	Device change register/unregister. These are not inline or static
1205  *	as we export them to the world.
1206  */
1207 
1208 /**
1209  *	register_netdevice_notifier - register a network notifier block
1210  *	@nb: notifier
1211  *
1212  *	Register a notifier to be called when network device events occur.
1213  *	The notifier passed is linked into the kernel structures and must
1214  *	not be reused until it has been unregistered. A negative errno code
1215  *	is returned on a failure.
1216  *
1217  * 	When registered all registration and up events are replayed
1218  *	to the new notifier to allow device to have a race free
1219  *	view of the network device list.
1220  */
1221 
1222 int register_netdevice_notifier(struct notifier_block *nb)
1223 {
1224 	struct net_device *dev;
1225 	struct net_device *last;
1226 	struct net *net;
1227 	int err;
1228 
1229 	rtnl_lock();
1230 	err = raw_notifier_chain_register(&netdev_chain, nb);
1231 	if (err)
1232 		goto unlock;
1233 	if (dev_boot_phase)
1234 		goto unlock;
1235 	for_each_net(net) {
1236 		for_each_netdev(net, dev) {
1237 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1238 			err = notifier_to_errno(err);
1239 			if (err)
1240 				goto rollback;
1241 
1242 			if (!(dev->flags & IFF_UP))
1243 				continue;
1244 
1245 			nb->notifier_call(nb, NETDEV_UP, dev);
1246 		}
1247 	}
1248 
1249 unlock:
1250 	rtnl_unlock();
1251 	return err;
1252 
1253 rollback:
1254 	last = dev;
1255 	for_each_net(net) {
1256 		for_each_netdev(net, dev) {
1257 			if (dev == last)
1258 				break;
1259 
1260 			if (dev->flags & IFF_UP) {
1261 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1262 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1263 			}
1264 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1265 		}
1266 	}
1267 
1268 	raw_notifier_chain_unregister(&netdev_chain, nb);
1269 	goto unlock;
1270 }
1271 
1272 /**
1273  *	unregister_netdevice_notifier - unregister a network notifier block
1274  *	@nb: notifier
1275  *
1276  *	Unregister a notifier previously registered by
1277  *	register_netdevice_notifier(). The notifier is unlinked into the
1278  *	kernel structures and may then be reused. A negative errno code
1279  *	is returned on a failure.
1280  */
1281 
1282 int unregister_netdevice_notifier(struct notifier_block *nb)
1283 {
1284 	int err;
1285 
1286 	rtnl_lock();
1287 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1288 	rtnl_unlock();
1289 	return err;
1290 }
1291 
1292 /**
1293  *	call_netdevice_notifiers - call all network notifier blocks
1294  *      @val: value passed unmodified to notifier function
1295  *      @dev: net_device pointer passed unmodified to notifier function
1296  *
1297  *	Call all network notifier blocks.  Parameters and return value
1298  *	are as for raw_notifier_call_chain().
1299  */
1300 
1301 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1302 {
1303 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1304 }
1305 
1306 /* When > 0 there are consumers of rx skb time stamps */
1307 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1308 
1309 void net_enable_timestamp(void)
1310 {
1311 	atomic_inc(&netstamp_needed);
1312 }
1313 
1314 void net_disable_timestamp(void)
1315 {
1316 	atomic_dec(&netstamp_needed);
1317 }
1318 
1319 static inline void net_timestamp(struct sk_buff *skb)
1320 {
1321 	if (atomic_read(&netstamp_needed))
1322 		__net_timestamp(skb);
1323 	else
1324 		skb->tstamp.tv64 = 0;
1325 }
1326 
1327 /*
1328  *	Support routine. Sends outgoing frames to any network
1329  *	taps currently in use.
1330  */
1331 
1332 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1333 {
1334 	struct packet_type *ptype;
1335 
1336 	net_timestamp(skb);
1337 
1338 	rcu_read_lock();
1339 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1340 		/* Never send packets back to the socket
1341 		 * they originated from - MvS (miquels@drinkel.ow.org)
1342 		 */
1343 		if ((ptype->dev == dev || !ptype->dev) &&
1344 		    (ptype->af_packet_priv == NULL ||
1345 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1346 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1347 			if (!skb2)
1348 				break;
1349 
1350 			/* skb->nh should be correctly
1351 			   set by sender, so that the second statement is
1352 			   just protection against buggy protocols.
1353 			 */
1354 			skb_reset_mac_header(skb2);
1355 
1356 			if (skb_network_header(skb2) < skb2->data ||
1357 			    skb2->network_header > skb2->tail) {
1358 				if (net_ratelimit())
1359 					printk(KERN_CRIT "protocol %04x is "
1360 					       "buggy, dev %s\n",
1361 					       skb2->protocol, dev->name);
1362 				skb_reset_network_header(skb2);
1363 			}
1364 
1365 			skb2->transport_header = skb2->network_header;
1366 			skb2->pkt_type = PACKET_OUTGOING;
1367 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1368 		}
1369 	}
1370 	rcu_read_unlock();
1371 }
1372 
1373 
1374 static inline void __netif_reschedule(struct Qdisc *q)
1375 {
1376 	struct softnet_data *sd;
1377 	unsigned long flags;
1378 
1379 	local_irq_save(flags);
1380 	sd = &__get_cpu_var(softnet_data);
1381 	q->next_sched = sd->output_queue;
1382 	sd->output_queue = q;
1383 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1384 	local_irq_restore(flags);
1385 }
1386 
1387 void __netif_schedule(struct Qdisc *q)
1388 {
1389 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1390 		__netif_reschedule(q);
1391 }
1392 EXPORT_SYMBOL(__netif_schedule);
1393 
1394 void dev_kfree_skb_irq(struct sk_buff *skb)
1395 {
1396 	if (atomic_dec_and_test(&skb->users)) {
1397 		struct softnet_data *sd;
1398 		unsigned long flags;
1399 
1400 		local_irq_save(flags);
1401 		sd = &__get_cpu_var(softnet_data);
1402 		skb->next = sd->completion_queue;
1403 		sd->completion_queue = skb;
1404 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1405 		local_irq_restore(flags);
1406 	}
1407 }
1408 EXPORT_SYMBOL(dev_kfree_skb_irq);
1409 
1410 void dev_kfree_skb_any(struct sk_buff *skb)
1411 {
1412 	if (in_irq() || irqs_disabled())
1413 		dev_kfree_skb_irq(skb);
1414 	else
1415 		dev_kfree_skb(skb);
1416 }
1417 EXPORT_SYMBOL(dev_kfree_skb_any);
1418 
1419 
1420 /**
1421  * netif_device_detach - mark device as removed
1422  * @dev: network device
1423  *
1424  * Mark device as removed from system and therefore no longer available.
1425  */
1426 void netif_device_detach(struct net_device *dev)
1427 {
1428 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1429 	    netif_running(dev)) {
1430 		netif_stop_queue(dev);
1431 	}
1432 }
1433 EXPORT_SYMBOL(netif_device_detach);
1434 
1435 /**
1436  * netif_device_attach - mark device as attached
1437  * @dev: network device
1438  *
1439  * Mark device as attached from system and restart if needed.
1440  */
1441 void netif_device_attach(struct net_device *dev)
1442 {
1443 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1444 	    netif_running(dev)) {
1445 		netif_wake_queue(dev);
1446 		__netdev_watchdog_up(dev);
1447 	}
1448 }
1449 EXPORT_SYMBOL(netif_device_attach);
1450 
1451 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1452 {
1453 	return ((features & NETIF_F_GEN_CSUM) ||
1454 		((features & NETIF_F_IP_CSUM) &&
1455 		 protocol == htons(ETH_P_IP)) ||
1456 		((features & NETIF_F_IPV6_CSUM) &&
1457 		 protocol == htons(ETH_P_IPV6)));
1458 }
1459 
1460 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1461 {
1462 	if (can_checksum_protocol(dev->features, skb->protocol))
1463 		return true;
1464 
1465 	if (skb->protocol == htons(ETH_P_8021Q)) {
1466 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1467 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1468 					  veh->h_vlan_encapsulated_proto))
1469 			return true;
1470 	}
1471 
1472 	return false;
1473 }
1474 
1475 /*
1476  * Invalidate hardware checksum when packet is to be mangled, and
1477  * complete checksum manually on outgoing path.
1478  */
1479 int skb_checksum_help(struct sk_buff *skb)
1480 {
1481 	__wsum csum;
1482 	int ret = 0, offset;
1483 
1484 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1485 		goto out_set_summed;
1486 
1487 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1488 		/* Let GSO fix up the checksum. */
1489 		goto out_set_summed;
1490 	}
1491 
1492 	offset = skb->csum_start - skb_headroom(skb);
1493 	BUG_ON(offset >= skb_headlen(skb));
1494 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1495 
1496 	offset += skb->csum_offset;
1497 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1498 
1499 	if (skb_cloned(skb) &&
1500 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1501 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1502 		if (ret)
1503 			goto out;
1504 	}
1505 
1506 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1507 out_set_summed:
1508 	skb->ip_summed = CHECKSUM_NONE;
1509 out:
1510 	return ret;
1511 }
1512 
1513 /**
1514  *	skb_gso_segment - Perform segmentation on skb.
1515  *	@skb: buffer to segment
1516  *	@features: features for the output path (see dev->features)
1517  *
1518  *	This function segments the given skb and returns a list of segments.
1519  *
1520  *	It may return NULL if the skb requires no segmentation.  This is
1521  *	only possible when GSO is used for verifying header integrity.
1522  */
1523 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1524 {
1525 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1526 	struct packet_type *ptype;
1527 	__be16 type = skb->protocol;
1528 	int err;
1529 
1530 	BUG_ON(skb_shinfo(skb)->frag_list);
1531 
1532 	skb_reset_mac_header(skb);
1533 	skb->mac_len = skb->network_header - skb->mac_header;
1534 	__skb_pull(skb, skb->mac_len);
1535 
1536 	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1537 		if (skb_header_cloned(skb) &&
1538 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1539 			return ERR_PTR(err);
1540 	}
1541 
1542 	rcu_read_lock();
1543 	list_for_each_entry_rcu(ptype,
1544 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1545 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1546 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1547 				err = ptype->gso_send_check(skb);
1548 				segs = ERR_PTR(err);
1549 				if (err || skb_gso_ok(skb, features))
1550 					break;
1551 				__skb_push(skb, (skb->data -
1552 						 skb_network_header(skb)));
1553 			}
1554 			segs = ptype->gso_segment(skb, features);
1555 			break;
1556 		}
1557 	}
1558 	rcu_read_unlock();
1559 
1560 	__skb_push(skb, skb->data - skb_mac_header(skb));
1561 
1562 	return segs;
1563 }
1564 
1565 EXPORT_SYMBOL(skb_gso_segment);
1566 
1567 /* Take action when hardware reception checksum errors are detected. */
1568 #ifdef CONFIG_BUG
1569 void netdev_rx_csum_fault(struct net_device *dev)
1570 {
1571 	if (net_ratelimit()) {
1572 		printk(KERN_ERR "%s: hw csum failure.\n",
1573 			dev ? dev->name : "<unknown>");
1574 		dump_stack();
1575 	}
1576 }
1577 EXPORT_SYMBOL(netdev_rx_csum_fault);
1578 #endif
1579 
1580 /* Actually, we should eliminate this check as soon as we know, that:
1581  * 1. IOMMU is present and allows to map all the memory.
1582  * 2. No high memory really exists on this machine.
1583  */
1584 
1585 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1586 {
1587 #ifdef CONFIG_HIGHMEM
1588 	int i;
1589 
1590 	if (dev->features & NETIF_F_HIGHDMA)
1591 		return 0;
1592 
1593 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1594 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1595 			return 1;
1596 
1597 #endif
1598 	return 0;
1599 }
1600 
1601 struct dev_gso_cb {
1602 	void (*destructor)(struct sk_buff *skb);
1603 };
1604 
1605 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1606 
1607 static void dev_gso_skb_destructor(struct sk_buff *skb)
1608 {
1609 	struct dev_gso_cb *cb;
1610 
1611 	do {
1612 		struct sk_buff *nskb = skb->next;
1613 
1614 		skb->next = nskb->next;
1615 		nskb->next = NULL;
1616 		kfree_skb(nskb);
1617 	} while (skb->next);
1618 
1619 	cb = DEV_GSO_CB(skb);
1620 	if (cb->destructor)
1621 		cb->destructor(skb);
1622 }
1623 
1624 /**
1625  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1626  *	@skb: buffer to segment
1627  *
1628  *	This function segments the given skb and stores the list of segments
1629  *	in skb->next.
1630  */
1631 static int dev_gso_segment(struct sk_buff *skb)
1632 {
1633 	struct net_device *dev = skb->dev;
1634 	struct sk_buff *segs;
1635 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1636 					 NETIF_F_SG : 0);
1637 
1638 	segs = skb_gso_segment(skb, features);
1639 
1640 	/* Verifying header integrity only. */
1641 	if (!segs)
1642 		return 0;
1643 
1644 	if (IS_ERR(segs))
1645 		return PTR_ERR(segs);
1646 
1647 	skb->next = segs;
1648 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1649 	skb->destructor = dev_gso_skb_destructor;
1650 
1651 	return 0;
1652 }
1653 
1654 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1655 			struct netdev_queue *txq)
1656 {
1657 	if (likely(!skb->next)) {
1658 		if (!list_empty(&ptype_all))
1659 			dev_queue_xmit_nit(skb, dev);
1660 
1661 		if (netif_needs_gso(dev, skb)) {
1662 			if (unlikely(dev_gso_segment(skb)))
1663 				goto out_kfree_skb;
1664 			if (skb->next)
1665 				goto gso;
1666 		}
1667 
1668 		return dev->hard_start_xmit(skb, dev);
1669 	}
1670 
1671 gso:
1672 	do {
1673 		struct sk_buff *nskb = skb->next;
1674 		int rc;
1675 
1676 		skb->next = nskb->next;
1677 		nskb->next = NULL;
1678 		rc = dev->hard_start_xmit(nskb, dev);
1679 		if (unlikely(rc)) {
1680 			nskb->next = skb->next;
1681 			skb->next = nskb;
1682 			return rc;
1683 		}
1684 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1685 			return NETDEV_TX_BUSY;
1686 	} while (skb->next);
1687 
1688 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1689 
1690 out_kfree_skb:
1691 	kfree_skb(skb);
1692 	return 0;
1693 }
1694 
1695 static u32 simple_tx_hashrnd;
1696 static int simple_tx_hashrnd_initialized = 0;
1697 
1698 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1699 {
1700 	u32 addr1, addr2, ports;
1701 	u32 hash, ihl;
1702 	u8 ip_proto = 0;
1703 
1704 	if (unlikely(!simple_tx_hashrnd_initialized)) {
1705 		get_random_bytes(&simple_tx_hashrnd, 4);
1706 		simple_tx_hashrnd_initialized = 1;
1707 	}
1708 
1709 	switch (skb->protocol) {
1710 	case htons(ETH_P_IP):
1711 		if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1712 			ip_proto = ip_hdr(skb)->protocol;
1713 		addr1 = ip_hdr(skb)->saddr;
1714 		addr2 = ip_hdr(skb)->daddr;
1715 		ihl = ip_hdr(skb)->ihl;
1716 		break;
1717 	case htons(ETH_P_IPV6):
1718 		ip_proto = ipv6_hdr(skb)->nexthdr;
1719 		addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1720 		addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1721 		ihl = (40 >> 2);
1722 		break;
1723 	default:
1724 		return 0;
1725 	}
1726 
1727 
1728 	switch (ip_proto) {
1729 	case IPPROTO_TCP:
1730 	case IPPROTO_UDP:
1731 	case IPPROTO_DCCP:
1732 	case IPPROTO_ESP:
1733 	case IPPROTO_AH:
1734 	case IPPROTO_SCTP:
1735 	case IPPROTO_UDPLITE:
1736 		ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1737 		break;
1738 
1739 	default:
1740 		ports = 0;
1741 		break;
1742 	}
1743 
1744 	hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1745 
1746 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1747 }
1748 
1749 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1750 					struct sk_buff *skb)
1751 {
1752 	u16 queue_index = 0;
1753 
1754 	if (dev->select_queue)
1755 		queue_index = dev->select_queue(dev, skb);
1756 	else if (dev->real_num_tx_queues > 1)
1757 		queue_index = simple_tx_hash(dev, skb);
1758 
1759 	skb_set_queue_mapping(skb, queue_index);
1760 	return netdev_get_tx_queue(dev, queue_index);
1761 }
1762 
1763 /**
1764  *	dev_queue_xmit - transmit a buffer
1765  *	@skb: buffer to transmit
1766  *
1767  *	Queue a buffer for transmission to a network device. The caller must
1768  *	have set the device and priority and built the buffer before calling
1769  *	this function. The function can be called from an interrupt.
1770  *
1771  *	A negative errno code is returned on a failure. A success does not
1772  *	guarantee the frame will be transmitted as it may be dropped due
1773  *	to congestion or traffic shaping.
1774  *
1775  * -----------------------------------------------------------------------------------
1776  *      I notice this method can also return errors from the queue disciplines,
1777  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1778  *      be positive.
1779  *
1780  *      Regardless of the return value, the skb is consumed, so it is currently
1781  *      difficult to retry a send to this method.  (You can bump the ref count
1782  *      before sending to hold a reference for retry if you are careful.)
1783  *
1784  *      When calling this method, interrupts MUST be enabled.  This is because
1785  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1786  *          --BLG
1787  */
1788 int dev_queue_xmit(struct sk_buff *skb)
1789 {
1790 	struct net_device *dev = skb->dev;
1791 	struct netdev_queue *txq;
1792 	struct Qdisc *q;
1793 	int rc = -ENOMEM;
1794 
1795 	/* GSO will handle the following emulations directly. */
1796 	if (netif_needs_gso(dev, skb))
1797 		goto gso;
1798 
1799 	if (skb_shinfo(skb)->frag_list &&
1800 	    !(dev->features & NETIF_F_FRAGLIST) &&
1801 	    __skb_linearize(skb))
1802 		goto out_kfree_skb;
1803 
1804 	/* Fragmented skb is linearized if device does not support SG,
1805 	 * or if at least one of fragments is in highmem and device
1806 	 * does not support DMA from it.
1807 	 */
1808 	if (skb_shinfo(skb)->nr_frags &&
1809 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1810 	    __skb_linearize(skb))
1811 		goto out_kfree_skb;
1812 
1813 	/* If packet is not checksummed and device does not support
1814 	 * checksumming for this protocol, complete checksumming here.
1815 	 */
1816 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1817 		skb_set_transport_header(skb, skb->csum_start -
1818 					      skb_headroom(skb));
1819 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1820 			goto out_kfree_skb;
1821 	}
1822 
1823 gso:
1824 	/* Disable soft irqs for various locks below. Also
1825 	 * stops preemption for RCU.
1826 	 */
1827 	rcu_read_lock_bh();
1828 
1829 	txq = dev_pick_tx(dev, skb);
1830 	q = rcu_dereference(txq->qdisc);
1831 
1832 #ifdef CONFIG_NET_CLS_ACT
1833 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1834 #endif
1835 	if (q->enqueue) {
1836 		spinlock_t *root_lock = qdisc_lock(q);
1837 
1838 		spin_lock(root_lock);
1839 
1840 		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1841 			kfree_skb(skb);
1842 			rc = NET_XMIT_DROP;
1843 		} else {
1844 			rc = qdisc_enqueue_root(skb, q);
1845 			qdisc_run(q);
1846 		}
1847 		spin_unlock(root_lock);
1848 
1849 		goto out;
1850 	}
1851 
1852 	/* The device has no queue. Common case for software devices:
1853 	   loopback, all the sorts of tunnels...
1854 
1855 	   Really, it is unlikely that netif_tx_lock protection is necessary
1856 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1857 	   counters.)
1858 	   However, it is possible, that they rely on protection
1859 	   made by us here.
1860 
1861 	   Check this and shot the lock. It is not prone from deadlocks.
1862 	   Either shot noqueue qdisc, it is even simpler 8)
1863 	 */
1864 	if (dev->flags & IFF_UP) {
1865 		int cpu = smp_processor_id(); /* ok because BHs are off */
1866 
1867 		if (txq->xmit_lock_owner != cpu) {
1868 
1869 			HARD_TX_LOCK(dev, txq, cpu);
1870 
1871 			if (!netif_tx_queue_stopped(txq)) {
1872 				rc = 0;
1873 				if (!dev_hard_start_xmit(skb, dev, txq)) {
1874 					HARD_TX_UNLOCK(dev, txq);
1875 					goto out;
1876 				}
1877 			}
1878 			HARD_TX_UNLOCK(dev, txq);
1879 			if (net_ratelimit())
1880 				printk(KERN_CRIT "Virtual device %s asks to "
1881 				       "queue packet!\n", dev->name);
1882 		} else {
1883 			/* Recursion is detected! It is possible,
1884 			 * unfortunately */
1885 			if (net_ratelimit())
1886 				printk(KERN_CRIT "Dead loop on virtual device "
1887 				       "%s, fix it urgently!\n", dev->name);
1888 		}
1889 	}
1890 
1891 	rc = -ENETDOWN;
1892 	rcu_read_unlock_bh();
1893 
1894 out_kfree_skb:
1895 	kfree_skb(skb);
1896 	return rc;
1897 out:
1898 	rcu_read_unlock_bh();
1899 	return rc;
1900 }
1901 
1902 
1903 /*=======================================================================
1904 			Receiver routines
1905   =======================================================================*/
1906 
1907 int netdev_max_backlog __read_mostly = 1000;
1908 int netdev_budget __read_mostly = 300;
1909 int weight_p __read_mostly = 64;            /* old backlog weight */
1910 
1911 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1912 
1913 
1914 /**
1915  *	netif_rx	-	post buffer to the network code
1916  *	@skb: buffer to post
1917  *
1918  *	This function receives a packet from a device driver and queues it for
1919  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1920  *	may be dropped during processing for congestion control or by the
1921  *	protocol layers.
1922  *
1923  *	return values:
1924  *	NET_RX_SUCCESS	(no congestion)
1925  *	NET_RX_DROP     (packet was dropped)
1926  *
1927  */
1928 
1929 int netif_rx(struct sk_buff *skb)
1930 {
1931 	struct softnet_data *queue;
1932 	unsigned long flags;
1933 
1934 	/* if netpoll wants it, pretend we never saw it */
1935 	if (netpoll_rx(skb))
1936 		return NET_RX_DROP;
1937 
1938 	if (!skb->tstamp.tv64)
1939 		net_timestamp(skb);
1940 
1941 	/*
1942 	 * The code is rearranged so that the path is the most
1943 	 * short when CPU is congested, but is still operating.
1944 	 */
1945 	local_irq_save(flags);
1946 	queue = &__get_cpu_var(softnet_data);
1947 
1948 	__get_cpu_var(netdev_rx_stat).total++;
1949 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1950 		if (queue->input_pkt_queue.qlen) {
1951 enqueue:
1952 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1953 			local_irq_restore(flags);
1954 			return NET_RX_SUCCESS;
1955 		}
1956 
1957 		napi_schedule(&queue->backlog);
1958 		goto enqueue;
1959 	}
1960 
1961 	__get_cpu_var(netdev_rx_stat).dropped++;
1962 	local_irq_restore(flags);
1963 
1964 	kfree_skb(skb);
1965 	return NET_RX_DROP;
1966 }
1967 
1968 int netif_rx_ni(struct sk_buff *skb)
1969 {
1970 	int err;
1971 
1972 	preempt_disable();
1973 	err = netif_rx(skb);
1974 	if (local_softirq_pending())
1975 		do_softirq();
1976 	preempt_enable();
1977 
1978 	return err;
1979 }
1980 
1981 EXPORT_SYMBOL(netif_rx_ni);
1982 
1983 static void net_tx_action(struct softirq_action *h)
1984 {
1985 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1986 
1987 	if (sd->completion_queue) {
1988 		struct sk_buff *clist;
1989 
1990 		local_irq_disable();
1991 		clist = sd->completion_queue;
1992 		sd->completion_queue = NULL;
1993 		local_irq_enable();
1994 
1995 		while (clist) {
1996 			struct sk_buff *skb = clist;
1997 			clist = clist->next;
1998 
1999 			WARN_ON(atomic_read(&skb->users));
2000 			__kfree_skb(skb);
2001 		}
2002 	}
2003 
2004 	if (sd->output_queue) {
2005 		struct Qdisc *head;
2006 
2007 		local_irq_disable();
2008 		head = sd->output_queue;
2009 		sd->output_queue = NULL;
2010 		local_irq_enable();
2011 
2012 		while (head) {
2013 			struct Qdisc *q = head;
2014 			spinlock_t *root_lock;
2015 
2016 			head = head->next_sched;
2017 
2018 			root_lock = qdisc_lock(q);
2019 			if (spin_trylock(root_lock)) {
2020 				smp_mb__before_clear_bit();
2021 				clear_bit(__QDISC_STATE_SCHED,
2022 					  &q->state);
2023 				qdisc_run(q);
2024 				spin_unlock(root_lock);
2025 			} else {
2026 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2027 					      &q->state)) {
2028 					__netif_reschedule(q);
2029 				} else {
2030 					smp_mb__before_clear_bit();
2031 					clear_bit(__QDISC_STATE_SCHED,
2032 						  &q->state);
2033 				}
2034 			}
2035 		}
2036 	}
2037 }
2038 
2039 static inline int deliver_skb(struct sk_buff *skb,
2040 			      struct packet_type *pt_prev,
2041 			      struct net_device *orig_dev)
2042 {
2043 	atomic_inc(&skb->users);
2044 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2045 }
2046 
2047 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2048 /* These hooks defined here for ATM */
2049 struct net_bridge;
2050 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2051 						unsigned char *addr);
2052 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2053 
2054 /*
2055  * If bridge module is loaded call bridging hook.
2056  *  returns NULL if packet was consumed.
2057  */
2058 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2059 					struct sk_buff *skb) __read_mostly;
2060 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2061 					    struct packet_type **pt_prev, int *ret,
2062 					    struct net_device *orig_dev)
2063 {
2064 	struct net_bridge_port *port;
2065 
2066 	if (skb->pkt_type == PACKET_LOOPBACK ||
2067 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2068 		return skb;
2069 
2070 	if (*pt_prev) {
2071 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2072 		*pt_prev = NULL;
2073 	}
2074 
2075 	return br_handle_frame_hook(port, skb);
2076 }
2077 #else
2078 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2079 #endif
2080 
2081 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2082 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2083 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2084 
2085 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2086 					     struct packet_type **pt_prev,
2087 					     int *ret,
2088 					     struct net_device *orig_dev)
2089 {
2090 	if (skb->dev->macvlan_port == NULL)
2091 		return skb;
2092 
2093 	if (*pt_prev) {
2094 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2095 		*pt_prev = NULL;
2096 	}
2097 	return macvlan_handle_frame_hook(skb);
2098 }
2099 #else
2100 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2101 #endif
2102 
2103 #ifdef CONFIG_NET_CLS_ACT
2104 /* TODO: Maybe we should just force sch_ingress to be compiled in
2105  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2106  * a compare and 2 stores extra right now if we dont have it on
2107  * but have CONFIG_NET_CLS_ACT
2108  * NOTE: This doesnt stop any functionality; if you dont have
2109  * the ingress scheduler, you just cant add policies on ingress.
2110  *
2111  */
2112 static int ing_filter(struct sk_buff *skb)
2113 {
2114 	struct net_device *dev = skb->dev;
2115 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2116 	struct netdev_queue *rxq;
2117 	int result = TC_ACT_OK;
2118 	struct Qdisc *q;
2119 
2120 	if (MAX_RED_LOOP < ttl++) {
2121 		printk(KERN_WARNING
2122 		       "Redir loop detected Dropping packet (%d->%d)\n",
2123 		       skb->iif, dev->ifindex);
2124 		return TC_ACT_SHOT;
2125 	}
2126 
2127 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2128 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2129 
2130 	rxq = &dev->rx_queue;
2131 
2132 	q = rxq->qdisc;
2133 	if (q != &noop_qdisc) {
2134 		spin_lock(qdisc_lock(q));
2135 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2136 			result = qdisc_enqueue_root(skb, q);
2137 		spin_unlock(qdisc_lock(q));
2138 	}
2139 
2140 	return result;
2141 }
2142 
2143 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2144 					 struct packet_type **pt_prev,
2145 					 int *ret, struct net_device *orig_dev)
2146 {
2147 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2148 		goto out;
2149 
2150 	if (*pt_prev) {
2151 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2152 		*pt_prev = NULL;
2153 	} else {
2154 		/* Huh? Why does turning on AF_PACKET affect this? */
2155 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2156 	}
2157 
2158 	switch (ing_filter(skb)) {
2159 	case TC_ACT_SHOT:
2160 	case TC_ACT_STOLEN:
2161 		kfree_skb(skb);
2162 		return NULL;
2163 	}
2164 
2165 out:
2166 	skb->tc_verd = 0;
2167 	return skb;
2168 }
2169 #endif
2170 
2171 /*
2172  * 	netif_nit_deliver - deliver received packets to network taps
2173  * 	@skb: buffer
2174  *
2175  * 	This function is used to deliver incoming packets to network
2176  * 	taps. It should be used when the normal netif_receive_skb path
2177  * 	is bypassed, for example because of VLAN acceleration.
2178  */
2179 void netif_nit_deliver(struct sk_buff *skb)
2180 {
2181 	struct packet_type *ptype;
2182 
2183 	if (list_empty(&ptype_all))
2184 		return;
2185 
2186 	skb_reset_network_header(skb);
2187 	skb_reset_transport_header(skb);
2188 	skb->mac_len = skb->network_header - skb->mac_header;
2189 
2190 	rcu_read_lock();
2191 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2192 		if (!ptype->dev || ptype->dev == skb->dev)
2193 			deliver_skb(skb, ptype, skb->dev);
2194 	}
2195 	rcu_read_unlock();
2196 }
2197 
2198 /**
2199  *	netif_receive_skb - process receive buffer from network
2200  *	@skb: buffer to process
2201  *
2202  *	netif_receive_skb() is the main receive data processing function.
2203  *	It always succeeds. The buffer may be dropped during processing
2204  *	for congestion control or by the protocol layers.
2205  *
2206  *	This function may only be called from softirq context and interrupts
2207  *	should be enabled.
2208  *
2209  *	Return values (usually ignored):
2210  *	NET_RX_SUCCESS: no congestion
2211  *	NET_RX_DROP: packet was dropped
2212  */
2213 int netif_receive_skb(struct sk_buff *skb)
2214 {
2215 	struct packet_type *ptype, *pt_prev;
2216 	struct net_device *orig_dev;
2217 	struct net_device *null_or_orig;
2218 	int ret = NET_RX_DROP;
2219 	__be16 type;
2220 
2221 	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2222 		return NET_RX_SUCCESS;
2223 
2224 	/* if we've gotten here through NAPI, check netpoll */
2225 	if (netpoll_receive_skb(skb))
2226 		return NET_RX_DROP;
2227 
2228 	if (!skb->tstamp.tv64)
2229 		net_timestamp(skb);
2230 
2231 	if (!skb->iif)
2232 		skb->iif = skb->dev->ifindex;
2233 
2234 	null_or_orig = NULL;
2235 	orig_dev = skb->dev;
2236 	if (orig_dev->master) {
2237 		if (skb_bond_should_drop(skb))
2238 			null_or_orig = orig_dev; /* deliver only exact match */
2239 		else
2240 			skb->dev = orig_dev->master;
2241 	}
2242 
2243 	__get_cpu_var(netdev_rx_stat).total++;
2244 
2245 	skb_reset_network_header(skb);
2246 	skb_reset_transport_header(skb);
2247 	skb->mac_len = skb->network_header - skb->mac_header;
2248 
2249 	pt_prev = NULL;
2250 
2251 	rcu_read_lock();
2252 
2253 	/* Don't receive packets in an exiting network namespace */
2254 	if (!net_alive(dev_net(skb->dev)))
2255 		goto out;
2256 
2257 #ifdef CONFIG_NET_CLS_ACT
2258 	if (skb->tc_verd & TC_NCLS) {
2259 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2260 		goto ncls;
2261 	}
2262 #endif
2263 
2264 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2265 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2266 		    ptype->dev == orig_dev) {
2267 			if (pt_prev)
2268 				ret = deliver_skb(skb, pt_prev, orig_dev);
2269 			pt_prev = ptype;
2270 		}
2271 	}
2272 
2273 #ifdef CONFIG_NET_CLS_ACT
2274 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2275 	if (!skb)
2276 		goto out;
2277 ncls:
2278 #endif
2279 
2280 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2281 	if (!skb)
2282 		goto out;
2283 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2284 	if (!skb)
2285 		goto out;
2286 
2287 	type = skb->protocol;
2288 	list_for_each_entry_rcu(ptype,
2289 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2290 		if (ptype->type == type &&
2291 		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2292 		     ptype->dev == orig_dev)) {
2293 			if (pt_prev)
2294 				ret = deliver_skb(skb, pt_prev, orig_dev);
2295 			pt_prev = ptype;
2296 		}
2297 	}
2298 
2299 	if (pt_prev) {
2300 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2301 	} else {
2302 		kfree_skb(skb);
2303 		/* Jamal, now you will not able to escape explaining
2304 		 * me how you were going to use this. :-)
2305 		 */
2306 		ret = NET_RX_DROP;
2307 	}
2308 
2309 out:
2310 	rcu_read_unlock();
2311 	return ret;
2312 }
2313 
2314 /* Network device is going away, flush any packets still pending  */
2315 static void flush_backlog(void *arg)
2316 {
2317 	struct net_device *dev = arg;
2318 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2319 	struct sk_buff *skb, *tmp;
2320 
2321 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2322 		if (skb->dev == dev) {
2323 			__skb_unlink(skb, &queue->input_pkt_queue);
2324 			kfree_skb(skb);
2325 		}
2326 }
2327 
2328 static int process_backlog(struct napi_struct *napi, int quota)
2329 {
2330 	int work = 0;
2331 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2332 	unsigned long start_time = jiffies;
2333 
2334 	napi->weight = weight_p;
2335 	do {
2336 		struct sk_buff *skb;
2337 
2338 		local_irq_disable();
2339 		skb = __skb_dequeue(&queue->input_pkt_queue);
2340 		if (!skb) {
2341 			__napi_complete(napi);
2342 			local_irq_enable();
2343 			break;
2344 		}
2345 		local_irq_enable();
2346 
2347 		netif_receive_skb(skb);
2348 	} while (++work < quota && jiffies == start_time);
2349 
2350 	return work;
2351 }
2352 
2353 /**
2354  * __napi_schedule - schedule for receive
2355  * @n: entry to schedule
2356  *
2357  * The entry's receive function will be scheduled to run
2358  */
2359 void __napi_schedule(struct napi_struct *n)
2360 {
2361 	unsigned long flags;
2362 
2363 	local_irq_save(flags);
2364 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2365 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2366 	local_irq_restore(flags);
2367 }
2368 EXPORT_SYMBOL(__napi_schedule);
2369 
2370 
2371 static void net_rx_action(struct softirq_action *h)
2372 {
2373 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2374 	unsigned long start_time = jiffies;
2375 	int budget = netdev_budget;
2376 	void *have;
2377 
2378 	local_irq_disable();
2379 
2380 	while (!list_empty(list)) {
2381 		struct napi_struct *n;
2382 		int work, weight;
2383 
2384 		/* If softirq window is exhuasted then punt.
2385 		 *
2386 		 * Note that this is a slight policy change from the
2387 		 * previous NAPI code, which would allow up to 2
2388 		 * jiffies to pass before breaking out.  The test
2389 		 * used to be "jiffies - start_time > 1".
2390 		 */
2391 		if (unlikely(budget <= 0 || jiffies != start_time))
2392 			goto softnet_break;
2393 
2394 		local_irq_enable();
2395 
2396 		/* Even though interrupts have been re-enabled, this
2397 		 * access is safe because interrupts can only add new
2398 		 * entries to the tail of this list, and only ->poll()
2399 		 * calls can remove this head entry from the list.
2400 		 */
2401 		n = list_entry(list->next, struct napi_struct, poll_list);
2402 
2403 		have = netpoll_poll_lock(n);
2404 
2405 		weight = n->weight;
2406 
2407 		/* This NAPI_STATE_SCHED test is for avoiding a race
2408 		 * with netpoll's poll_napi().  Only the entity which
2409 		 * obtains the lock and sees NAPI_STATE_SCHED set will
2410 		 * actually make the ->poll() call.  Therefore we avoid
2411 		 * accidently calling ->poll() when NAPI is not scheduled.
2412 		 */
2413 		work = 0;
2414 		if (test_bit(NAPI_STATE_SCHED, &n->state))
2415 			work = n->poll(n, weight);
2416 
2417 		WARN_ON_ONCE(work > weight);
2418 
2419 		budget -= work;
2420 
2421 		local_irq_disable();
2422 
2423 		/* Drivers must not modify the NAPI state if they
2424 		 * consume the entire weight.  In such cases this code
2425 		 * still "owns" the NAPI instance and therefore can
2426 		 * move the instance around on the list at-will.
2427 		 */
2428 		if (unlikely(work == weight)) {
2429 			if (unlikely(napi_disable_pending(n)))
2430 				__napi_complete(n);
2431 			else
2432 				list_move_tail(&n->poll_list, list);
2433 		}
2434 
2435 		netpoll_poll_unlock(have);
2436 	}
2437 out:
2438 	local_irq_enable();
2439 
2440 #ifdef CONFIG_NET_DMA
2441 	/*
2442 	 * There may not be any more sk_buffs coming right now, so push
2443 	 * any pending DMA copies to hardware
2444 	 */
2445 	if (!cpus_empty(net_dma.channel_mask)) {
2446 		int chan_idx;
2447 		for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
2448 			struct dma_chan *chan = net_dma.channels[chan_idx];
2449 			if (chan)
2450 				dma_async_memcpy_issue_pending(chan);
2451 		}
2452 	}
2453 #endif
2454 
2455 	return;
2456 
2457 softnet_break:
2458 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2459 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2460 	goto out;
2461 }
2462 
2463 static gifconf_func_t * gifconf_list [NPROTO];
2464 
2465 /**
2466  *	register_gifconf	-	register a SIOCGIF handler
2467  *	@family: Address family
2468  *	@gifconf: Function handler
2469  *
2470  *	Register protocol dependent address dumping routines. The handler
2471  *	that is passed must not be freed or reused until it has been replaced
2472  *	by another handler.
2473  */
2474 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2475 {
2476 	if (family >= NPROTO)
2477 		return -EINVAL;
2478 	gifconf_list[family] = gifconf;
2479 	return 0;
2480 }
2481 
2482 
2483 /*
2484  *	Map an interface index to its name (SIOCGIFNAME)
2485  */
2486 
2487 /*
2488  *	We need this ioctl for efficient implementation of the
2489  *	if_indextoname() function required by the IPv6 API.  Without
2490  *	it, we would have to search all the interfaces to find a
2491  *	match.  --pb
2492  */
2493 
2494 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2495 {
2496 	struct net_device *dev;
2497 	struct ifreq ifr;
2498 
2499 	/*
2500 	 *	Fetch the caller's info block.
2501 	 */
2502 
2503 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2504 		return -EFAULT;
2505 
2506 	read_lock(&dev_base_lock);
2507 	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2508 	if (!dev) {
2509 		read_unlock(&dev_base_lock);
2510 		return -ENODEV;
2511 	}
2512 
2513 	strcpy(ifr.ifr_name, dev->name);
2514 	read_unlock(&dev_base_lock);
2515 
2516 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2517 		return -EFAULT;
2518 	return 0;
2519 }
2520 
2521 /*
2522  *	Perform a SIOCGIFCONF call. This structure will change
2523  *	size eventually, and there is nothing I can do about it.
2524  *	Thus we will need a 'compatibility mode'.
2525  */
2526 
2527 static int dev_ifconf(struct net *net, char __user *arg)
2528 {
2529 	struct ifconf ifc;
2530 	struct net_device *dev;
2531 	char __user *pos;
2532 	int len;
2533 	int total;
2534 	int i;
2535 
2536 	/*
2537 	 *	Fetch the caller's info block.
2538 	 */
2539 
2540 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2541 		return -EFAULT;
2542 
2543 	pos = ifc.ifc_buf;
2544 	len = ifc.ifc_len;
2545 
2546 	/*
2547 	 *	Loop over the interfaces, and write an info block for each.
2548 	 */
2549 
2550 	total = 0;
2551 	for_each_netdev(net, dev) {
2552 		for (i = 0; i < NPROTO; i++) {
2553 			if (gifconf_list[i]) {
2554 				int done;
2555 				if (!pos)
2556 					done = gifconf_list[i](dev, NULL, 0);
2557 				else
2558 					done = gifconf_list[i](dev, pos + total,
2559 							       len - total);
2560 				if (done < 0)
2561 					return -EFAULT;
2562 				total += done;
2563 			}
2564 		}
2565 	}
2566 
2567 	/*
2568 	 *	All done.  Write the updated control block back to the caller.
2569 	 */
2570 	ifc.ifc_len = total;
2571 
2572 	/*
2573 	 * 	Both BSD and Solaris return 0 here, so we do too.
2574 	 */
2575 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2576 }
2577 
2578 #ifdef CONFIG_PROC_FS
2579 /*
2580  *	This is invoked by the /proc filesystem handler to display a device
2581  *	in detail.
2582  */
2583 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2584 	__acquires(dev_base_lock)
2585 {
2586 	struct net *net = seq_file_net(seq);
2587 	loff_t off;
2588 	struct net_device *dev;
2589 
2590 	read_lock(&dev_base_lock);
2591 	if (!*pos)
2592 		return SEQ_START_TOKEN;
2593 
2594 	off = 1;
2595 	for_each_netdev(net, dev)
2596 		if (off++ == *pos)
2597 			return dev;
2598 
2599 	return NULL;
2600 }
2601 
2602 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2603 {
2604 	struct net *net = seq_file_net(seq);
2605 	++*pos;
2606 	return v == SEQ_START_TOKEN ?
2607 		first_net_device(net) : next_net_device((struct net_device *)v);
2608 }
2609 
2610 void dev_seq_stop(struct seq_file *seq, void *v)
2611 	__releases(dev_base_lock)
2612 {
2613 	read_unlock(&dev_base_lock);
2614 }
2615 
2616 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2617 {
2618 	struct net_device_stats *stats = dev->get_stats(dev);
2619 
2620 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2621 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2622 		   dev->name, stats->rx_bytes, stats->rx_packets,
2623 		   stats->rx_errors,
2624 		   stats->rx_dropped + stats->rx_missed_errors,
2625 		   stats->rx_fifo_errors,
2626 		   stats->rx_length_errors + stats->rx_over_errors +
2627 		    stats->rx_crc_errors + stats->rx_frame_errors,
2628 		   stats->rx_compressed, stats->multicast,
2629 		   stats->tx_bytes, stats->tx_packets,
2630 		   stats->tx_errors, stats->tx_dropped,
2631 		   stats->tx_fifo_errors, stats->collisions,
2632 		   stats->tx_carrier_errors +
2633 		    stats->tx_aborted_errors +
2634 		    stats->tx_window_errors +
2635 		    stats->tx_heartbeat_errors,
2636 		   stats->tx_compressed);
2637 }
2638 
2639 /*
2640  *	Called from the PROCfs module. This now uses the new arbitrary sized
2641  *	/proc/net interface to create /proc/net/dev
2642  */
2643 static int dev_seq_show(struct seq_file *seq, void *v)
2644 {
2645 	if (v == SEQ_START_TOKEN)
2646 		seq_puts(seq, "Inter-|   Receive                            "
2647 			      "                    |  Transmit\n"
2648 			      " face |bytes    packets errs drop fifo frame "
2649 			      "compressed multicast|bytes    packets errs "
2650 			      "drop fifo colls carrier compressed\n");
2651 	else
2652 		dev_seq_printf_stats(seq, v);
2653 	return 0;
2654 }
2655 
2656 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2657 {
2658 	struct netif_rx_stats *rc = NULL;
2659 
2660 	while (*pos < nr_cpu_ids)
2661 		if (cpu_online(*pos)) {
2662 			rc = &per_cpu(netdev_rx_stat, *pos);
2663 			break;
2664 		} else
2665 			++*pos;
2666 	return rc;
2667 }
2668 
2669 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2670 {
2671 	return softnet_get_online(pos);
2672 }
2673 
2674 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2675 {
2676 	++*pos;
2677 	return softnet_get_online(pos);
2678 }
2679 
2680 static void softnet_seq_stop(struct seq_file *seq, void *v)
2681 {
2682 }
2683 
2684 static int softnet_seq_show(struct seq_file *seq, void *v)
2685 {
2686 	struct netif_rx_stats *s = v;
2687 
2688 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2689 		   s->total, s->dropped, s->time_squeeze, 0,
2690 		   0, 0, 0, 0, /* was fastroute */
2691 		   s->cpu_collision );
2692 	return 0;
2693 }
2694 
2695 static const struct seq_operations dev_seq_ops = {
2696 	.start = dev_seq_start,
2697 	.next  = dev_seq_next,
2698 	.stop  = dev_seq_stop,
2699 	.show  = dev_seq_show,
2700 };
2701 
2702 static int dev_seq_open(struct inode *inode, struct file *file)
2703 {
2704 	return seq_open_net(inode, file, &dev_seq_ops,
2705 			    sizeof(struct seq_net_private));
2706 }
2707 
2708 static const struct file_operations dev_seq_fops = {
2709 	.owner	 = THIS_MODULE,
2710 	.open    = dev_seq_open,
2711 	.read    = seq_read,
2712 	.llseek  = seq_lseek,
2713 	.release = seq_release_net,
2714 };
2715 
2716 static const struct seq_operations softnet_seq_ops = {
2717 	.start = softnet_seq_start,
2718 	.next  = softnet_seq_next,
2719 	.stop  = softnet_seq_stop,
2720 	.show  = softnet_seq_show,
2721 };
2722 
2723 static int softnet_seq_open(struct inode *inode, struct file *file)
2724 {
2725 	return seq_open(file, &softnet_seq_ops);
2726 }
2727 
2728 static const struct file_operations softnet_seq_fops = {
2729 	.owner	 = THIS_MODULE,
2730 	.open    = softnet_seq_open,
2731 	.read    = seq_read,
2732 	.llseek  = seq_lseek,
2733 	.release = seq_release,
2734 };
2735 
2736 static void *ptype_get_idx(loff_t pos)
2737 {
2738 	struct packet_type *pt = NULL;
2739 	loff_t i = 0;
2740 	int t;
2741 
2742 	list_for_each_entry_rcu(pt, &ptype_all, list) {
2743 		if (i == pos)
2744 			return pt;
2745 		++i;
2746 	}
2747 
2748 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2749 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2750 			if (i == pos)
2751 				return pt;
2752 			++i;
2753 		}
2754 	}
2755 	return NULL;
2756 }
2757 
2758 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2759 	__acquires(RCU)
2760 {
2761 	rcu_read_lock();
2762 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2763 }
2764 
2765 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2766 {
2767 	struct packet_type *pt;
2768 	struct list_head *nxt;
2769 	int hash;
2770 
2771 	++*pos;
2772 	if (v == SEQ_START_TOKEN)
2773 		return ptype_get_idx(0);
2774 
2775 	pt = v;
2776 	nxt = pt->list.next;
2777 	if (pt->type == htons(ETH_P_ALL)) {
2778 		if (nxt != &ptype_all)
2779 			goto found;
2780 		hash = 0;
2781 		nxt = ptype_base[0].next;
2782 	} else
2783 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2784 
2785 	while (nxt == &ptype_base[hash]) {
2786 		if (++hash >= PTYPE_HASH_SIZE)
2787 			return NULL;
2788 		nxt = ptype_base[hash].next;
2789 	}
2790 found:
2791 	return list_entry(nxt, struct packet_type, list);
2792 }
2793 
2794 static void ptype_seq_stop(struct seq_file *seq, void *v)
2795 	__releases(RCU)
2796 {
2797 	rcu_read_unlock();
2798 }
2799 
2800 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2801 {
2802 #ifdef CONFIG_KALLSYMS
2803 	unsigned long offset = 0, symsize;
2804 	const char *symname;
2805 	char *modname;
2806 	char namebuf[128];
2807 
2808 	symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2809 				  &modname, namebuf);
2810 
2811 	if (symname) {
2812 		char *delim = ":";
2813 
2814 		if (!modname)
2815 			modname = delim = "";
2816 		seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2817 			   symname, offset);
2818 		return;
2819 	}
2820 #endif
2821 
2822 	seq_printf(seq, "[%p]", sym);
2823 }
2824 
2825 static int ptype_seq_show(struct seq_file *seq, void *v)
2826 {
2827 	struct packet_type *pt = v;
2828 
2829 	if (v == SEQ_START_TOKEN)
2830 		seq_puts(seq, "Type Device      Function\n");
2831 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2832 		if (pt->type == htons(ETH_P_ALL))
2833 			seq_puts(seq, "ALL ");
2834 		else
2835 			seq_printf(seq, "%04x", ntohs(pt->type));
2836 
2837 		seq_printf(seq, " %-8s ",
2838 			   pt->dev ? pt->dev->name : "");
2839 		ptype_seq_decode(seq,  pt->func);
2840 		seq_putc(seq, '\n');
2841 	}
2842 
2843 	return 0;
2844 }
2845 
2846 static const struct seq_operations ptype_seq_ops = {
2847 	.start = ptype_seq_start,
2848 	.next  = ptype_seq_next,
2849 	.stop  = ptype_seq_stop,
2850 	.show  = ptype_seq_show,
2851 };
2852 
2853 static int ptype_seq_open(struct inode *inode, struct file *file)
2854 {
2855 	return seq_open_net(inode, file, &ptype_seq_ops,
2856 			sizeof(struct seq_net_private));
2857 }
2858 
2859 static const struct file_operations ptype_seq_fops = {
2860 	.owner	 = THIS_MODULE,
2861 	.open    = ptype_seq_open,
2862 	.read    = seq_read,
2863 	.llseek  = seq_lseek,
2864 	.release = seq_release_net,
2865 };
2866 
2867 
2868 static int __net_init dev_proc_net_init(struct net *net)
2869 {
2870 	int rc = -ENOMEM;
2871 
2872 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2873 		goto out;
2874 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2875 		goto out_dev;
2876 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2877 		goto out_softnet;
2878 
2879 	if (wext_proc_init(net))
2880 		goto out_ptype;
2881 	rc = 0;
2882 out:
2883 	return rc;
2884 out_ptype:
2885 	proc_net_remove(net, "ptype");
2886 out_softnet:
2887 	proc_net_remove(net, "softnet_stat");
2888 out_dev:
2889 	proc_net_remove(net, "dev");
2890 	goto out;
2891 }
2892 
2893 static void __net_exit dev_proc_net_exit(struct net *net)
2894 {
2895 	wext_proc_exit(net);
2896 
2897 	proc_net_remove(net, "ptype");
2898 	proc_net_remove(net, "softnet_stat");
2899 	proc_net_remove(net, "dev");
2900 }
2901 
2902 static struct pernet_operations __net_initdata dev_proc_ops = {
2903 	.init = dev_proc_net_init,
2904 	.exit = dev_proc_net_exit,
2905 };
2906 
2907 static int __init dev_proc_init(void)
2908 {
2909 	return register_pernet_subsys(&dev_proc_ops);
2910 }
2911 #else
2912 #define dev_proc_init() 0
2913 #endif	/* CONFIG_PROC_FS */
2914 
2915 
2916 /**
2917  *	netdev_set_master	-	set up master/slave pair
2918  *	@slave: slave device
2919  *	@master: new master device
2920  *
2921  *	Changes the master device of the slave. Pass %NULL to break the
2922  *	bonding. The caller must hold the RTNL semaphore. On a failure
2923  *	a negative errno code is returned. On success the reference counts
2924  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2925  *	function returns zero.
2926  */
2927 int netdev_set_master(struct net_device *slave, struct net_device *master)
2928 {
2929 	struct net_device *old = slave->master;
2930 
2931 	ASSERT_RTNL();
2932 
2933 	if (master) {
2934 		if (old)
2935 			return -EBUSY;
2936 		dev_hold(master);
2937 	}
2938 
2939 	slave->master = master;
2940 
2941 	synchronize_net();
2942 
2943 	if (old)
2944 		dev_put(old);
2945 
2946 	if (master)
2947 		slave->flags |= IFF_SLAVE;
2948 	else
2949 		slave->flags &= ~IFF_SLAVE;
2950 
2951 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2952 	return 0;
2953 }
2954 
2955 static void dev_change_rx_flags(struct net_device *dev, int flags)
2956 {
2957 	if (dev->flags & IFF_UP && dev->change_rx_flags)
2958 		dev->change_rx_flags(dev, flags);
2959 }
2960 
2961 static int __dev_set_promiscuity(struct net_device *dev, int inc)
2962 {
2963 	unsigned short old_flags = dev->flags;
2964 
2965 	ASSERT_RTNL();
2966 
2967 	dev->flags |= IFF_PROMISC;
2968 	dev->promiscuity += inc;
2969 	if (dev->promiscuity == 0) {
2970 		/*
2971 		 * Avoid overflow.
2972 		 * If inc causes overflow, untouch promisc and return error.
2973 		 */
2974 		if (inc < 0)
2975 			dev->flags &= ~IFF_PROMISC;
2976 		else {
2977 			dev->promiscuity -= inc;
2978 			printk(KERN_WARNING "%s: promiscuity touches roof, "
2979 				"set promiscuity failed, promiscuity feature "
2980 				"of device might be broken.\n", dev->name);
2981 			return -EOVERFLOW;
2982 		}
2983 	}
2984 	if (dev->flags != old_flags) {
2985 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2986 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2987 							       "left");
2988 		if (audit_enabled)
2989 			audit_log(current->audit_context, GFP_ATOMIC,
2990 				AUDIT_ANOM_PROMISCUOUS,
2991 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2992 				dev->name, (dev->flags & IFF_PROMISC),
2993 				(old_flags & IFF_PROMISC),
2994 				audit_get_loginuid(current),
2995 				current->uid, current->gid,
2996 				audit_get_sessionid(current));
2997 
2998 		dev_change_rx_flags(dev, IFF_PROMISC);
2999 	}
3000 	return 0;
3001 }
3002 
3003 /**
3004  *	dev_set_promiscuity	- update promiscuity count on a device
3005  *	@dev: device
3006  *	@inc: modifier
3007  *
3008  *	Add or remove promiscuity from a device. While the count in the device
3009  *	remains above zero the interface remains promiscuous. Once it hits zero
3010  *	the device reverts back to normal filtering operation. A negative inc
3011  *	value is used to drop promiscuity on the device.
3012  *	Return 0 if successful or a negative errno code on error.
3013  */
3014 int dev_set_promiscuity(struct net_device *dev, int inc)
3015 {
3016 	unsigned short old_flags = dev->flags;
3017 	int err;
3018 
3019 	err = __dev_set_promiscuity(dev, inc);
3020 	if (err < 0)
3021 		return err;
3022 	if (dev->flags != old_flags)
3023 		dev_set_rx_mode(dev);
3024 	return err;
3025 }
3026 
3027 /**
3028  *	dev_set_allmulti	- update allmulti count on a device
3029  *	@dev: device
3030  *	@inc: modifier
3031  *
3032  *	Add or remove reception of all multicast frames to a device. While the
3033  *	count in the device remains above zero the interface remains listening
3034  *	to all interfaces. Once it hits zero the device reverts back to normal
3035  *	filtering operation. A negative @inc value is used to drop the counter
3036  *	when releasing a resource needing all multicasts.
3037  *	Return 0 if successful or a negative errno code on error.
3038  */
3039 
3040 int dev_set_allmulti(struct net_device *dev, int inc)
3041 {
3042 	unsigned short old_flags = dev->flags;
3043 
3044 	ASSERT_RTNL();
3045 
3046 	dev->flags |= IFF_ALLMULTI;
3047 	dev->allmulti += inc;
3048 	if (dev->allmulti == 0) {
3049 		/*
3050 		 * Avoid overflow.
3051 		 * If inc causes overflow, untouch allmulti and return error.
3052 		 */
3053 		if (inc < 0)
3054 			dev->flags &= ~IFF_ALLMULTI;
3055 		else {
3056 			dev->allmulti -= inc;
3057 			printk(KERN_WARNING "%s: allmulti touches roof, "
3058 				"set allmulti failed, allmulti feature of "
3059 				"device might be broken.\n", dev->name);
3060 			return -EOVERFLOW;
3061 		}
3062 	}
3063 	if (dev->flags ^ old_flags) {
3064 		dev_change_rx_flags(dev, IFF_ALLMULTI);
3065 		dev_set_rx_mode(dev);
3066 	}
3067 	return 0;
3068 }
3069 
3070 /*
3071  *	Upload unicast and multicast address lists to device and
3072  *	configure RX filtering. When the device doesn't support unicast
3073  *	filtering it is put in promiscuous mode while unicast addresses
3074  *	are present.
3075  */
3076 void __dev_set_rx_mode(struct net_device *dev)
3077 {
3078 	/* dev_open will call this function so the list will stay sane. */
3079 	if (!(dev->flags&IFF_UP))
3080 		return;
3081 
3082 	if (!netif_device_present(dev))
3083 		return;
3084 
3085 	if (dev->set_rx_mode)
3086 		dev->set_rx_mode(dev);
3087 	else {
3088 		/* Unicast addresses changes may only happen under the rtnl,
3089 		 * therefore calling __dev_set_promiscuity here is safe.
3090 		 */
3091 		if (dev->uc_count > 0 && !dev->uc_promisc) {
3092 			__dev_set_promiscuity(dev, 1);
3093 			dev->uc_promisc = 1;
3094 		} else if (dev->uc_count == 0 && dev->uc_promisc) {
3095 			__dev_set_promiscuity(dev, -1);
3096 			dev->uc_promisc = 0;
3097 		}
3098 
3099 		if (dev->set_multicast_list)
3100 			dev->set_multicast_list(dev);
3101 	}
3102 }
3103 
3104 void dev_set_rx_mode(struct net_device *dev)
3105 {
3106 	netif_addr_lock_bh(dev);
3107 	__dev_set_rx_mode(dev);
3108 	netif_addr_unlock_bh(dev);
3109 }
3110 
3111 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3112 		      void *addr, int alen, int glbl)
3113 {
3114 	struct dev_addr_list *da;
3115 
3116 	for (; (da = *list) != NULL; list = &da->next) {
3117 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3118 		    alen == da->da_addrlen) {
3119 			if (glbl) {
3120 				int old_glbl = da->da_gusers;
3121 				da->da_gusers = 0;
3122 				if (old_glbl == 0)
3123 					break;
3124 			}
3125 			if (--da->da_users)
3126 				return 0;
3127 
3128 			*list = da->next;
3129 			kfree(da);
3130 			(*count)--;
3131 			return 0;
3132 		}
3133 	}
3134 	return -ENOENT;
3135 }
3136 
3137 int __dev_addr_add(struct dev_addr_list **list, int *count,
3138 		   void *addr, int alen, int glbl)
3139 {
3140 	struct dev_addr_list *da;
3141 
3142 	for (da = *list; da != NULL; da = da->next) {
3143 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3144 		    da->da_addrlen == alen) {
3145 			if (glbl) {
3146 				int old_glbl = da->da_gusers;
3147 				da->da_gusers = 1;
3148 				if (old_glbl)
3149 					return 0;
3150 			}
3151 			da->da_users++;
3152 			return 0;
3153 		}
3154 	}
3155 
3156 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
3157 	if (da == NULL)
3158 		return -ENOMEM;
3159 	memcpy(da->da_addr, addr, alen);
3160 	da->da_addrlen = alen;
3161 	da->da_users = 1;
3162 	da->da_gusers = glbl ? 1 : 0;
3163 	da->next = *list;
3164 	*list = da;
3165 	(*count)++;
3166 	return 0;
3167 }
3168 
3169 /**
3170  *	dev_unicast_delete	- Release secondary unicast address.
3171  *	@dev: device
3172  *	@addr: address to delete
3173  *	@alen: length of @addr
3174  *
3175  *	Release reference to a secondary unicast address and remove it
3176  *	from the device if the reference count drops to zero.
3177  *
3178  * 	The caller must hold the rtnl_mutex.
3179  */
3180 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3181 {
3182 	int err;
3183 
3184 	ASSERT_RTNL();
3185 
3186 	netif_addr_lock_bh(dev);
3187 	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3188 	if (!err)
3189 		__dev_set_rx_mode(dev);
3190 	netif_addr_unlock_bh(dev);
3191 	return err;
3192 }
3193 EXPORT_SYMBOL(dev_unicast_delete);
3194 
3195 /**
3196  *	dev_unicast_add		- add a secondary unicast address
3197  *	@dev: device
3198  *	@addr: address to add
3199  *	@alen: length of @addr
3200  *
3201  *	Add a secondary unicast address to the device or increase
3202  *	the reference count if it already exists.
3203  *
3204  *	The caller must hold the rtnl_mutex.
3205  */
3206 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3207 {
3208 	int err;
3209 
3210 	ASSERT_RTNL();
3211 
3212 	netif_addr_lock_bh(dev);
3213 	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3214 	if (!err)
3215 		__dev_set_rx_mode(dev);
3216 	netif_addr_unlock_bh(dev);
3217 	return err;
3218 }
3219 EXPORT_SYMBOL(dev_unicast_add);
3220 
3221 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3222 		    struct dev_addr_list **from, int *from_count)
3223 {
3224 	struct dev_addr_list *da, *next;
3225 	int err = 0;
3226 
3227 	da = *from;
3228 	while (da != NULL) {
3229 		next = da->next;
3230 		if (!da->da_synced) {
3231 			err = __dev_addr_add(to, to_count,
3232 					     da->da_addr, da->da_addrlen, 0);
3233 			if (err < 0)
3234 				break;
3235 			da->da_synced = 1;
3236 			da->da_users++;
3237 		} else if (da->da_users == 1) {
3238 			__dev_addr_delete(to, to_count,
3239 					  da->da_addr, da->da_addrlen, 0);
3240 			__dev_addr_delete(from, from_count,
3241 					  da->da_addr, da->da_addrlen, 0);
3242 		}
3243 		da = next;
3244 	}
3245 	return err;
3246 }
3247 
3248 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3249 		       struct dev_addr_list **from, int *from_count)
3250 {
3251 	struct dev_addr_list *da, *next;
3252 
3253 	da = *from;
3254 	while (da != NULL) {
3255 		next = da->next;
3256 		if (da->da_synced) {
3257 			__dev_addr_delete(to, to_count,
3258 					  da->da_addr, da->da_addrlen, 0);
3259 			da->da_synced = 0;
3260 			__dev_addr_delete(from, from_count,
3261 					  da->da_addr, da->da_addrlen, 0);
3262 		}
3263 		da = next;
3264 	}
3265 }
3266 
3267 /**
3268  *	dev_unicast_sync - Synchronize device's unicast list to another device
3269  *	@to: destination device
3270  *	@from: source device
3271  *
3272  *	Add newly added addresses to the destination device and release
3273  *	addresses that have no users left. The source device must be
3274  *	locked by netif_tx_lock_bh.
3275  *
3276  *	This function is intended to be called from the dev->set_rx_mode
3277  *	function of layered software devices.
3278  */
3279 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3280 {
3281 	int err = 0;
3282 
3283 	netif_addr_lock_bh(to);
3284 	err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3285 			      &from->uc_list, &from->uc_count);
3286 	if (!err)
3287 		__dev_set_rx_mode(to);
3288 	netif_addr_unlock_bh(to);
3289 	return err;
3290 }
3291 EXPORT_SYMBOL(dev_unicast_sync);
3292 
3293 /**
3294  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
3295  *	@to: destination device
3296  *	@from: source device
3297  *
3298  *	Remove all addresses that were added to the destination device by
3299  *	dev_unicast_sync(). This function is intended to be called from the
3300  *	dev->stop function of layered software devices.
3301  */
3302 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3303 {
3304 	netif_addr_lock_bh(from);
3305 	netif_addr_lock(to);
3306 
3307 	__dev_addr_unsync(&to->uc_list, &to->uc_count,
3308 			  &from->uc_list, &from->uc_count);
3309 	__dev_set_rx_mode(to);
3310 
3311 	netif_addr_unlock(to);
3312 	netif_addr_unlock_bh(from);
3313 }
3314 EXPORT_SYMBOL(dev_unicast_unsync);
3315 
3316 static void __dev_addr_discard(struct dev_addr_list **list)
3317 {
3318 	struct dev_addr_list *tmp;
3319 
3320 	while (*list != NULL) {
3321 		tmp = *list;
3322 		*list = tmp->next;
3323 		if (tmp->da_users > tmp->da_gusers)
3324 			printk("__dev_addr_discard: address leakage! "
3325 			       "da_users=%d\n", tmp->da_users);
3326 		kfree(tmp);
3327 	}
3328 }
3329 
3330 static void dev_addr_discard(struct net_device *dev)
3331 {
3332 	netif_addr_lock_bh(dev);
3333 
3334 	__dev_addr_discard(&dev->uc_list);
3335 	dev->uc_count = 0;
3336 
3337 	__dev_addr_discard(&dev->mc_list);
3338 	dev->mc_count = 0;
3339 
3340 	netif_addr_unlock_bh(dev);
3341 }
3342 
3343 /**
3344  *	dev_get_flags - get flags reported to userspace
3345  *	@dev: device
3346  *
3347  *	Get the combination of flag bits exported through APIs to userspace.
3348  */
3349 unsigned dev_get_flags(const struct net_device *dev)
3350 {
3351 	unsigned flags;
3352 
3353 	flags = (dev->flags & ~(IFF_PROMISC |
3354 				IFF_ALLMULTI |
3355 				IFF_RUNNING |
3356 				IFF_LOWER_UP |
3357 				IFF_DORMANT)) |
3358 		(dev->gflags & (IFF_PROMISC |
3359 				IFF_ALLMULTI));
3360 
3361 	if (netif_running(dev)) {
3362 		if (netif_oper_up(dev))
3363 			flags |= IFF_RUNNING;
3364 		if (netif_carrier_ok(dev))
3365 			flags |= IFF_LOWER_UP;
3366 		if (netif_dormant(dev))
3367 			flags |= IFF_DORMANT;
3368 	}
3369 
3370 	return flags;
3371 }
3372 
3373 /**
3374  *	dev_change_flags - change device settings
3375  *	@dev: device
3376  *	@flags: device state flags
3377  *
3378  *	Change settings on device based state flags. The flags are
3379  *	in the userspace exported format.
3380  */
3381 int dev_change_flags(struct net_device *dev, unsigned flags)
3382 {
3383 	int ret, changes;
3384 	int old_flags = dev->flags;
3385 
3386 	ASSERT_RTNL();
3387 
3388 	/*
3389 	 *	Set the flags on our device.
3390 	 */
3391 
3392 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3393 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3394 			       IFF_AUTOMEDIA)) |
3395 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3396 				    IFF_ALLMULTI));
3397 
3398 	/*
3399 	 *	Load in the correct multicast list now the flags have changed.
3400 	 */
3401 
3402 	if ((old_flags ^ flags) & IFF_MULTICAST)
3403 		dev_change_rx_flags(dev, IFF_MULTICAST);
3404 
3405 	dev_set_rx_mode(dev);
3406 
3407 	/*
3408 	 *	Have we downed the interface. We handle IFF_UP ourselves
3409 	 *	according to user attempts to set it, rather than blindly
3410 	 *	setting it.
3411 	 */
3412 
3413 	ret = 0;
3414 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
3415 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3416 
3417 		if (!ret)
3418 			dev_set_rx_mode(dev);
3419 	}
3420 
3421 	if (dev->flags & IFF_UP &&
3422 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3423 					  IFF_VOLATILE)))
3424 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
3425 
3426 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
3427 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
3428 		dev->gflags ^= IFF_PROMISC;
3429 		dev_set_promiscuity(dev, inc);
3430 	}
3431 
3432 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3433 	   is important. Some (broken) drivers set IFF_PROMISC, when
3434 	   IFF_ALLMULTI is requested not asking us and not reporting.
3435 	 */
3436 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3437 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3438 		dev->gflags ^= IFF_ALLMULTI;
3439 		dev_set_allmulti(dev, inc);
3440 	}
3441 
3442 	/* Exclude state transition flags, already notified */
3443 	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3444 	if (changes)
3445 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3446 
3447 	return ret;
3448 }
3449 
3450 /**
3451  *	dev_set_mtu - Change maximum transfer unit
3452  *	@dev: device
3453  *	@new_mtu: new transfer unit
3454  *
3455  *	Change the maximum transfer size of the network device.
3456  */
3457 int dev_set_mtu(struct net_device *dev, int new_mtu)
3458 {
3459 	int err;
3460 
3461 	if (new_mtu == dev->mtu)
3462 		return 0;
3463 
3464 	/*	MTU must be positive.	 */
3465 	if (new_mtu < 0)
3466 		return -EINVAL;
3467 
3468 	if (!netif_device_present(dev))
3469 		return -ENODEV;
3470 
3471 	err = 0;
3472 	if (dev->change_mtu)
3473 		err = dev->change_mtu(dev, new_mtu);
3474 	else
3475 		dev->mtu = new_mtu;
3476 	if (!err && dev->flags & IFF_UP)
3477 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3478 	return err;
3479 }
3480 
3481 /**
3482  *	dev_set_mac_address - Change Media Access Control Address
3483  *	@dev: device
3484  *	@sa: new address
3485  *
3486  *	Change the hardware (MAC) address of the device
3487  */
3488 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3489 {
3490 	int err;
3491 
3492 	if (!dev->set_mac_address)
3493 		return -EOPNOTSUPP;
3494 	if (sa->sa_family != dev->type)
3495 		return -EINVAL;
3496 	if (!netif_device_present(dev))
3497 		return -ENODEV;
3498 	err = dev->set_mac_address(dev, sa);
3499 	if (!err)
3500 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3501 	return err;
3502 }
3503 
3504 /*
3505  *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3506  */
3507 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3508 {
3509 	int err;
3510 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3511 
3512 	if (!dev)
3513 		return -ENODEV;
3514 
3515 	switch (cmd) {
3516 		case SIOCGIFFLAGS:	/* Get interface flags */
3517 			ifr->ifr_flags = dev_get_flags(dev);
3518 			return 0;
3519 
3520 		case SIOCGIFMETRIC:	/* Get the metric on the interface
3521 					   (currently unused) */
3522 			ifr->ifr_metric = 0;
3523 			return 0;
3524 
3525 		case SIOCGIFMTU:	/* Get the MTU of a device */
3526 			ifr->ifr_mtu = dev->mtu;
3527 			return 0;
3528 
3529 		case SIOCGIFHWADDR:
3530 			if (!dev->addr_len)
3531 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3532 			else
3533 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3534 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3535 			ifr->ifr_hwaddr.sa_family = dev->type;
3536 			return 0;
3537 
3538 		case SIOCGIFSLAVE:
3539 			err = -EINVAL;
3540 			break;
3541 
3542 		case SIOCGIFMAP:
3543 			ifr->ifr_map.mem_start = dev->mem_start;
3544 			ifr->ifr_map.mem_end   = dev->mem_end;
3545 			ifr->ifr_map.base_addr = dev->base_addr;
3546 			ifr->ifr_map.irq       = dev->irq;
3547 			ifr->ifr_map.dma       = dev->dma;
3548 			ifr->ifr_map.port      = dev->if_port;
3549 			return 0;
3550 
3551 		case SIOCGIFINDEX:
3552 			ifr->ifr_ifindex = dev->ifindex;
3553 			return 0;
3554 
3555 		case SIOCGIFTXQLEN:
3556 			ifr->ifr_qlen = dev->tx_queue_len;
3557 			return 0;
3558 
3559 		default:
3560 			/* dev_ioctl() should ensure this case
3561 			 * is never reached
3562 			 */
3563 			WARN_ON(1);
3564 			err = -EINVAL;
3565 			break;
3566 
3567 	}
3568 	return err;
3569 }
3570 
3571 /*
3572  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
3573  */
3574 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3575 {
3576 	int err;
3577 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3578 
3579 	if (!dev)
3580 		return -ENODEV;
3581 
3582 	switch (cmd) {
3583 		case SIOCSIFFLAGS:	/* Set interface flags */
3584 			return dev_change_flags(dev, ifr->ifr_flags);
3585 
3586 		case SIOCSIFMETRIC:	/* Set the metric on the interface
3587 					   (currently unused) */
3588 			return -EOPNOTSUPP;
3589 
3590 		case SIOCSIFMTU:	/* Set the MTU of a device */
3591 			return dev_set_mtu(dev, ifr->ifr_mtu);
3592 
3593 		case SIOCSIFHWADDR:
3594 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3595 
3596 		case SIOCSIFHWBROADCAST:
3597 			if (ifr->ifr_hwaddr.sa_family != dev->type)
3598 				return -EINVAL;
3599 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3600 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3601 			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3602 			return 0;
3603 
3604 		case SIOCSIFMAP:
3605 			if (dev->set_config) {
3606 				if (!netif_device_present(dev))
3607 					return -ENODEV;
3608 				return dev->set_config(dev, &ifr->ifr_map);
3609 			}
3610 			return -EOPNOTSUPP;
3611 
3612 		case SIOCADDMULTI:
3613 			if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3614 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3615 				return -EINVAL;
3616 			if (!netif_device_present(dev))
3617 				return -ENODEV;
3618 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3619 					  dev->addr_len, 1);
3620 
3621 		case SIOCDELMULTI:
3622 			if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3623 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3624 				return -EINVAL;
3625 			if (!netif_device_present(dev))
3626 				return -ENODEV;
3627 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3628 					     dev->addr_len, 1);
3629 
3630 		case SIOCSIFTXQLEN:
3631 			if (ifr->ifr_qlen < 0)
3632 				return -EINVAL;
3633 			dev->tx_queue_len = ifr->ifr_qlen;
3634 			return 0;
3635 
3636 		case SIOCSIFNAME:
3637 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3638 			return dev_change_name(dev, ifr->ifr_newname);
3639 
3640 		/*
3641 		 *	Unknown or private ioctl
3642 		 */
3643 
3644 		default:
3645 			if ((cmd >= SIOCDEVPRIVATE &&
3646 			    cmd <= SIOCDEVPRIVATE + 15) ||
3647 			    cmd == SIOCBONDENSLAVE ||
3648 			    cmd == SIOCBONDRELEASE ||
3649 			    cmd == SIOCBONDSETHWADDR ||
3650 			    cmd == SIOCBONDSLAVEINFOQUERY ||
3651 			    cmd == SIOCBONDINFOQUERY ||
3652 			    cmd == SIOCBONDCHANGEACTIVE ||
3653 			    cmd == SIOCGMIIPHY ||
3654 			    cmd == SIOCGMIIREG ||
3655 			    cmd == SIOCSMIIREG ||
3656 			    cmd == SIOCBRADDIF ||
3657 			    cmd == SIOCBRDELIF ||
3658 			    cmd == SIOCWANDEV) {
3659 				err = -EOPNOTSUPP;
3660 				if (dev->do_ioctl) {
3661 					if (netif_device_present(dev))
3662 						err = dev->do_ioctl(dev, ifr,
3663 								    cmd);
3664 					else
3665 						err = -ENODEV;
3666 				}
3667 			} else
3668 				err = -EINVAL;
3669 
3670 	}
3671 	return err;
3672 }
3673 
3674 /*
3675  *	This function handles all "interface"-type I/O control requests. The actual
3676  *	'doing' part of this is dev_ifsioc above.
3677  */
3678 
3679 /**
3680  *	dev_ioctl	-	network device ioctl
3681  *	@net: the applicable net namespace
3682  *	@cmd: command to issue
3683  *	@arg: pointer to a struct ifreq in user space
3684  *
3685  *	Issue ioctl functions to devices. This is normally called by the
3686  *	user space syscall interfaces but can sometimes be useful for
3687  *	other purposes. The return value is the return from the syscall if
3688  *	positive or a negative errno code on error.
3689  */
3690 
3691 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3692 {
3693 	struct ifreq ifr;
3694 	int ret;
3695 	char *colon;
3696 
3697 	/* One special case: SIOCGIFCONF takes ifconf argument
3698 	   and requires shared lock, because it sleeps writing
3699 	   to user space.
3700 	 */
3701 
3702 	if (cmd == SIOCGIFCONF) {
3703 		rtnl_lock();
3704 		ret = dev_ifconf(net, (char __user *) arg);
3705 		rtnl_unlock();
3706 		return ret;
3707 	}
3708 	if (cmd == SIOCGIFNAME)
3709 		return dev_ifname(net, (struct ifreq __user *)arg);
3710 
3711 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3712 		return -EFAULT;
3713 
3714 	ifr.ifr_name[IFNAMSIZ-1] = 0;
3715 
3716 	colon = strchr(ifr.ifr_name, ':');
3717 	if (colon)
3718 		*colon = 0;
3719 
3720 	/*
3721 	 *	See which interface the caller is talking about.
3722 	 */
3723 
3724 	switch (cmd) {
3725 		/*
3726 		 *	These ioctl calls:
3727 		 *	- can be done by all.
3728 		 *	- atomic and do not require locking.
3729 		 *	- return a value
3730 		 */
3731 		case SIOCGIFFLAGS:
3732 		case SIOCGIFMETRIC:
3733 		case SIOCGIFMTU:
3734 		case SIOCGIFHWADDR:
3735 		case SIOCGIFSLAVE:
3736 		case SIOCGIFMAP:
3737 		case SIOCGIFINDEX:
3738 		case SIOCGIFTXQLEN:
3739 			dev_load(net, ifr.ifr_name);
3740 			read_lock(&dev_base_lock);
3741 			ret = dev_ifsioc_locked(net, &ifr, cmd);
3742 			read_unlock(&dev_base_lock);
3743 			if (!ret) {
3744 				if (colon)
3745 					*colon = ':';
3746 				if (copy_to_user(arg, &ifr,
3747 						 sizeof(struct ifreq)))
3748 					ret = -EFAULT;
3749 			}
3750 			return ret;
3751 
3752 		case SIOCETHTOOL:
3753 			dev_load(net, ifr.ifr_name);
3754 			rtnl_lock();
3755 			ret = dev_ethtool(net, &ifr);
3756 			rtnl_unlock();
3757 			if (!ret) {
3758 				if (colon)
3759 					*colon = ':';
3760 				if (copy_to_user(arg, &ifr,
3761 						 sizeof(struct ifreq)))
3762 					ret = -EFAULT;
3763 			}
3764 			return ret;
3765 
3766 		/*
3767 		 *	These ioctl calls:
3768 		 *	- require superuser power.
3769 		 *	- require strict serialization.
3770 		 *	- return a value
3771 		 */
3772 		case SIOCGMIIPHY:
3773 		case SIOCGMIIREG:
3774 		case SIOCSIFNAME:
3775 			if (!capable(CAP_NET_ADMIN))
3776 				return -EPERM;
3777 			dev_load(net, ifr.ifr_name);
3778 			rtnl_lock();
3779 			ret = dev_ifsioc(net, &ifr, cmd);
3780 			rtnl_unlock();
3781 			if (!ret) {
3782 				if (colon)
3783 					*colon = ':';
3784 				if (copy_to_user(arg, &ifr,
3785 						 sizeof(struct ifreq)))
3786 					ret = -EFAULT;
3787 			}
3788 			return ret;
3789 
3790 		/*
3791 		 *	These ioctl calls:
3792 		 *	- require superuser power.
3793 		 *	- require strict serialization.
3794 		 *	- do not return a value
3795 		 */
3796 		case SIOCSIFFLAGS:
3797 		case SIOCSIFMETRIC:
3798 		case SIOCSIFMTU:
3799 		case SIOCSIFMAP:
3800 		case SIOCSIFHWADDR:
3801 		case SIOCSIFSLAVE:
3802 		case SIOCADDMULTI:
3803 		case SIOCDELMULTI:
3804 		case SIOCSIFHWBROADCAST:
3805 		case SIOCSIFTXQLEN:
3806 		case SIOCSMIIREG:
3807 		case SIOCBONDENSLAVE:
3808 		case SIOCBONDRELEASE:
3809 		case SIOCBONDSETHWADDR:
3810 		case SIOCBONDCHANGEACTIVE:
3811 		case SIOCBRADDIF:
3812 		case SIOCBRDELIF:
3813 			if (!capable(CAP_NET_ADMIN))
3814 				return -EPERM;
3815 			/* fall through */
3816 		case SIOCBONDSLAVEINFOQUERY:
3817 		case SIOCBONDINFOQUERY:
3818 			dev_load(net, ifr.ifr_name);
3819 			rtnl_lock();
3820 			ret = dev_ifsioc(net, &ifr, cmd);
3821 			rtnl_unlock();
3822 			return ret;
3823 
3824 		case SIOCGIFMEM:
3825 			/* Get the per device memory space. We can add this but
3826 			 * currently do not support it */
3827 		case SIOCSIFMEM:
3828 			/* Set the per device memory buffer space.
3829 			 * Not applicable in our case */
3830 		case SIOCSIFLINK:
3831 			return -EINVAL;
3832 
3833 		/*
3834 		 *	Unknown or private ioctl.
3835 		 */
3836 		default:
3837 			if (cmd == SIOCWANDEV ||
3838 			    (cmd >= SIOCDEVPRIVATE &&
3839 			     cmd <= SIOCDEVPRIVATE + 15)) {
3840 				dev_load(net, ifr.ifr_name);
3841 				rtnl_lock();
3842 				ret = dev_ifsioc(net, &ifr, cmd);
3843 				rtnl_unlock();
3844 				if (!ret && copy_to_user(arg, &ifr,
3845 							 sizeof(struct ifreq)))
3846 					ret = -EFAULT;
3847 				return ret;
3848 			}
3849 			/* Take care of Wireless Extensions */
3850 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3851 				return wext_handle_ioctl(net, &ifr, cmd, arg);
3852 			return -EINVAL;
3853 	}
3854 }
3855 
3856 
3857 /**
3858  *	dev_new_index	-	allocate an ifindex
3859  *	@net: the applicable net namespace
3860  *
3861  *	Returns a suitable unique value for a new device interface
3862  *	number.  The caller must hold the rtnl semaphore or the
3863  *	dev_base_lock to be sure it remains unique.
3864  */
3865 static int dev_new_index(struct net *net)
3866 {
3867 	static int ifindex;
3868 	for (;;) {
3869 		if (++ifindex <= 0)
3870 			ifindex = 1;
3871 		if (!__dev_get_by_index(net, ifindex))
3872 			return ifindex;
3873 	}
3874 }
3875 
3876 /* Delayed registration/unregisteration */
3877 static LIST_HEAD(net_todo_list);
3878 
3879 static void net_set_todo(struct net_device *dev)
3880 {
3881 	list_add_tail(&dev->todo_list, &net_todo_list);
3882 }
3883 
3884 static void rollback_registered(struct net_device *dev)
3885 {
3886 	BUG_ON(dev_boot_phase);
3887 	ASSERT_RTNL();
3888 
3889 	/* Some devices call without registering for initialization unwind. */
3890 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3891 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3892 				  "was registered\n", dev->name, dev);
3893 
3894 		WARN_ON(1);
3895 		return;
3896 	}
3897 
3898 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3899 
3900 	/* If device is running, close it first. */
3901 	dev_close(dev);
3902 
3903 	/* And unlink it from device chain. */
3904 	unlist_netdevice(dev);
3905 
3906 	dev->reg_state = NETREG_UNREGISTERING;
3907 
3908 	synchronize_net();
3909 
3910 	/* Shutdown queueing discipline. */
3911 	dev_shutdown(dev);
3912 
3913 
3914 	/* Notify protocols, that we are about to destroy
3915 	   this device. They should clean all the things.
3916 	*/
3917 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3918 
3919 	/*
3920 	 *	Flush the unicast and multicast chains
3921 	 */
3922 	dev_addr_discard(dev);
3923 
3924 	if (dev->uninit)
3925 		dev->uninit(dev);
3926 
3927 	/* Notifier chain MUST detach us from master device. */
3928 	WARN_ON(dev->master);
3929 
3930 	/* Remove entries from kobject tree */
3931 	netdev_unregister_kobject(dev);
3932 
3933 	synchronize_net();
3934 
3935 	dev_put(dev);
3936 }
3937 
3938 static void __netdev_init_queue_locks_one(struct net_device *dev,
3939 					  struct netdev_queue *dev_queue,
3940 					  void *_unused)
3941 {
3942 	spin_lock_init(&dev_queue->_xmit_lock);
3943 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
3944 	dev_queue->xmit_lock_owner = -1;
3945 }
3946 
3947 static void netdev_init_queue_locks(struct net_device *dev)
3948 {
3949 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3950 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
3951 }
3952 
3953 unsigned long netdev_fix_features(unsigned long features, const char *name)
3954 {
3955 	/* Fix illegal SG+CSUM combinations. */
3956 	if ((features & NETIF_F_SG) &&
3957 	    !(features & NETIF_F_ALL_CSUM)) {
3958 		if (name)
3959 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
3960 			       "checksum feature.\n", name);
3961 		features &= ~NETIF_F_SG;
3962 	}
3963 
3964 	/* TSO requires that SG is present as well. */
3965 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
3966 		if (name)
3967 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
3968 			       "SG feature.\n", name);
3969 		features &= ~NETIF_F_TSO;
3970 	}
3971 
3972 	if (features & NETIF_F_UFO) {
3973 		if (!(features & NETIF_F_GEN_CSUM)) {
3974 			if (name)
3975 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3976 				       "since no NETIF_F_HW_CSUM feature.\n",
3977 				       name);
3978 			features &= ~NETIF_F_UFO;
3979 		}
3980 
3981 		if (!(features & NETIF_F_SG)) {
3982 			if (name)
3983 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3984 				       "since no NETIF_F_SG feature.\n", name);
3985 			features &= ~NETIF_F_UFO;
3986 		}
3987 	}
3988 
3989 	return features;
3990 }
3991 EXPORT_SYMBOL(netdev_fix_features);
3992 
3993 /**
3994  *	register_netdevice	- register a network device
3995  *	@dev: device to register
3996  *
3997  *	Take a completed network device structure and add it to the kernel
3998  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3999  *	chain. 0 is returned on success. A negative errno code is returned
4000  *	on a failure to set up the device, or if the name is a duplicate.
4001  *
4002  *	Callers must hold the rtnl semaphore. You may want
4003  *	register_netdev() instead of this.
4004  *
4005  *	BUGS:
4006  *	The locking appears insufficient to guarantee two parallel registers
4007  *	will not get the same name.
4008  */
4009 
4010 int register_netdevice(struct net_device *dev)
4011 {
4012 	struct hlist_head *head;
4013 	struct hlist_node *p;
4014 	int ret;
4015 	struct net *net;
4016 
4017 	BUG_ON(dev_boot_phase);
4018 	ASSERT_RTNL();
4019 
4020 	might_sleep();
4021 
4022 	/* When net_device's are persistent, this will be fatal. */
4023 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4024 	BUG_ON(!dev_net(dev));
4025 	net = dev_net(dev);
4026 
4027 	spin_lock_init(&dev->addr_list_lock);
4028 	netdev_set_addr_lockdep_class(dev);
4029 	netdev_init_queue_locks(dev);
4030 
4031 	dev->iflink = -1;
4032 
4033 	/* Init, if this function is available */
4034 	if (dev->init) {
4035 		ret = dev->init(dev);
4036 		if (ret) {
4037 			if (ret > 0)
4038 				ret = -EIO;
4039 			goto out;
4040 		}
4041 	}
4042 
4043 	if (!dev_valid_name(dev->name)) {
4044 		ret = -EINVAL;
4045 		goto err_uninit;
4046 	}
4047 
4048 	dev->ifindex = dev_new_index(net);
4049 	if (dev->iflink == -1)
4050 		dev->iflink = dev->ifindex;
4051 
4052 	/* Check for existence of name */
4053 	head = dev_name_hash(net, dev->name);
4054 	hlist_for_each(p, head) {
4055 		struct net_device *d
4056 			= hlist_entry(p, struct net_device, name_hlist);
4057 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4058 			ret = -EEXIST;
4059 			goto err_uninit;
4060 		}
4061 	}
4062 
4063 	/* Fix illegal checksum combinations */
4064 	if ((dev->features & NETIF_F_HW_CSUM) &&
4065 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4066 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4067 		       dev->name);
4068 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4069 	}
4070 
4071 	if ((dev->features & NETIF_F_NO_CSUM) &&
4072 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4073 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4074 		       dev->name);
4075 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4076 	}
4077 
4078 	dev->features = netdev_fix_features(dev->features, dev->name);
4079 
4080 	/* Enable software GSO if SG is supported. */
4081 	if (dev->features & NETIF_F_SG)
4082 		dev->features |= NETIF_F_GSO;
4083 
4084 	netdev_initialize_kobject(dev);
4085 	ret = netdev_register_kobject(dev);
4086 	if (ret)
4087 		goto err_uninit;
4088 	dev->reg_state = NETREG_REGISTERED;
4089 
4090 	/*
4091 	 *	Default initial state at registry is that the
4092 	 *	device is present.
4093 	 */
4094 
4095 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4096 
4097 	dev_init_scheduler(dev);
4098 	dev_hold(dev);
4099 	list_netdevice(dev);
4100 
4101 	/* Notify protocols, that a new device appeared. */
4102 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4103 	ret = notifier_to_errno(ret);
4104 	if (ret) {
4105 		rollback_registered(dev);
4106 		dev->reg_state = NETREG_UNREGISTERED;
4107 	}
4108 
4109 out:
4110 	return ret;
4111 
4112 err_uninit:
4113 	if (dev->uninit)
4114 		dev->uninit(dev);
4115 	goto out;
4116 }
4117 
4118 /**
4119  *	register_netdev	- register a network device
4120  *	@dev: device to register
4121  *
4122  *	Take a completed network device structure and add it to the kernel
4123  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4124  *	chain. 0 is returned on success. A negative errno code is returned
4125  *	on a failure to set up the device, or if the name is a duplicate.
4126  *
4127  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
4128  *	and expands the device name if you passed a format string to
4129  *	alloc_netdev.
4130  */
4131 int register_netdev(struct net_device *dev)
4132 {
4133 	int err;
4134 
4135 	rtnl_lock();
4136 
4137 	/*
4138 	 * If the name is a format string the caller wants us to do a
4139 	 * name allocation.
4140 	 */
4141 	if (strchr(dev->name, '%')) {
4142 		err = dev_alloc_name(dev, dev->name);
4143 		if (err < 0)
4144 			goto out;
4145 	}
4146 
4147 	err = register_netdevice(dev);
4148 out:
4149 	rtnl_unlock();
4150 	return err;
4151 }
4152 EXPORT_SYMBOL(register_netdev);
4153 
4154 /*
4155  * netdev_wait_allrefs - wait until all references are gone.
4156  *
4157  * This is called when unregistering network devices.
4158  *
4159  * Any protocol or device that holds a reference should register
4160  * for netdevice notification, and cleanup and put back the
4161  * reference if they receive an UNREGISTER event.
4162  * We can get stuck here if buggy protocols don't correctly
4163  * call dev_put.
4164  */
4165 static void netdev_wait_allrefs(struct net_device *dev)
4166 {
4167 	unsigned long rebroadcast_time, warning_time;
4168 
4169 	rebroadcast_time = warning_time = jiffies;
4170 	while (atomic_read(&dev->refcnt) != 0) {
4171 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4172 			rtnl_lock();
4173 
4174 			/* Rebroadcast unregister notification */
4175 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4176 
4177 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4178 				     &dev->state)) {
4179 				/* We must not have linkwatch events
4180 				 * pending on unregister. If this
4181 				 * happens, we simply run the queue
4182 				 * unscheduled, resulting in a noop
4183 				 * for this device.
4184 				 */
4185 				linkwatch_run_queue();
4186 			}
4187 
4188 			__rtnl_unlock();
4189 
4190 			rebroadcast_time = jiffies;
4191 		}
4192 
4193 		msleep(250);
4194 
4195 		if (time_after(jiffies, warning_time + 10 * HZ)) {
4196 			printk(KERN_EMERG "unregister_netdevice: "
4197 			       "waiting for %s to become free. Usage "
4198 			       "count = %d\n",
4199 			       dev->name, atomic_read(&dev->refcnt));
4200 			warning_time = jiffies;
4201 		}
4202 	}
4203 }
4204 
4205 /* The sequence is:
4206  *
4207  *	rtnl_lock();
4208  *	...
4209  *	register_netdevice(x1);
4210  *	register_netdevice(x2);
4211  *	...
4212  *	unregister_netdevice(y1);
4213  *	unregister_netdevice(y2);
4214  *      ...
4215  *	rtnl_unlock();
4216  *	free_netdev(y1);
4217  *	free_netdev(y2);
4218  *
4219  * We are invoked by rtnl_unlock().
4220  * This allows us to deal with problems:
4221  * 1) We can delete sysfs objects which invoke hotplug
4222  *    without deadlocking with linkwatch via keventd.
4223  * 2) Since we run with the RTNL semaphore not held, we can sleep
4224  *    safely in order to wait for the netdev refcnt to drop to zero.
4225  *
4226  * We must not return until all unregister events added during
4227  * the interval the lock was held have been completed.
4228  */
4229 void netdev_run_todo(void)
4230 {
4231 	struct list_head list;
4232 
4233 	/* Snapshot list, allow later requests */
4234 	list_replace_init(&net_todo_list, &list);
4235 
4236 	__rtnl_unlock();
4237 
4238 	while (!list_empty(&list)) {
4239 		struct net_device *dev
4240 			= list_entry(list.next, struct net_device, todo_list);
4241 		list_del(&dev->todo_list);
4242 
4243 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4244 			printk(KERN_ERR "network todo '%s' but state %d\n",
4245 			       dev->name, dev->reg_state);
4246 			dump_stack();
4247 			continue;
4248 		}
4249 
4250 		dev->reg_state = NETREG_UNREGISTERED;
4251 
4252 		on_each_cpu(flush_backlog, dev, 1);
4253 
4254 		netdev_wait_allrefs(dev);
4255 
4256 		/* paranoia */
4257 		BUG_ON(atomic_read(&dev->refcnt));
4258 		WARN_ON(dev->ip_ptr);
4259 		WARN_ON(dev->ip6_ptr);
4260 		WARN_ON(dev->dn_ptr);
4261 
4262 		if (dev->destructor)
4263 			dev->destructor(dev);
4264 
4265 		/* Free network device */
4266 		kobject_put(&dev->dev.kobj);
4267 	}
4268 }
4269 
4270 static struct net_device_stats *internal_stats(struct net_device *dev)
4271 {
4272 	return &dev->stats;
4273 }
4274 
4275 static void netdev_init_one_queue(struct net_device *dev,
4276 				  struct netdev_queue *queue,
4277 				  void *_unused)
4278 {
4279 	queue->dev = dev;
4280 }
4281 
4282 static void netdev_init_queues(struct net_device *dev)
4283 {
4284 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4285 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4286 	spin_lock_init(&dev->tx_global_lock);
4287 }
4288 
4289 /**
4290  *	alloc_netdev_mq - allocate network device
4291  *	@sizeof_priv:	size of private data to allocate space for
4292  *	@name:		device name format string
4293  *	@setup:		callback to initialize device
4294  *	@queue_count:	the number of subqueues to allocate
4295  *
4296  *	Allocates a struct net_device with private data area for driver use
4297  *	and performs basic initialization.  Also allocates subquue structs
4298  *	for each queue on the device at the end of the netdevice.
4299  */
4300 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4301 		void (*setup)(struct net_device *), unsigned int queue_count)
4302 {
4303 	struct netdev_queue *tx;
4304 	struct net_device *dev;
4305 	size_t alloc_size;
4306 	void *p;
4307 
4308 	BUG_ON(strlen(name) >= sizeof(dev->name));
4309 
4310 	alloc_size = sizeof(struct net_device);
4311 	if (sizeof_priv) {
4312 		/* ensure 32-byte alignment of private area */
4313 		alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4314 		alloc_size += sizeof_priv;
4315 	}
4316 	/* ensure 32-byte alignment of whole construct */
4317 	alloc_size += NETDEV_ALIGN_CONST;
4318 
4319 	p = kzalloc(alloc_size, GFP_KERNEL);
4320 	if (!p) {
4321 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4322 		return NULL;
4323 	}
4324 
4325 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4326 	if (!tx) {
4327 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
4328 		       "tx qdiscs.\n");
4329 		kfree(p);
4330 		return NULL;
4331 	}
4332 
4333 	dev = (struct net_device *)
4334 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4335 	dev->padded = (char *)dev - (char *)p;
4336 	dev_net_set(dev, &init_net);
4337 
4338 	dev->_tx = tx;
4339 	dev->num_tx_queues = queue_count;
4340 	dev->real_num_tx_queues = queue_count;
4341 
4342 	if (sizeof_priv) {
4343 		dev->priv = ((char *)dev +
4344 			     ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
4345 			      & ~NETDEV_ALIGN_CONST));
4346 	}
4347 
4348 	dev->gso_max_size = GSO_MAX_SIZE;
4349 
4350 	netdev_init_queues(dev);
4351 
4352 	dev->get_stats = internal_stats;
4353 	netpoll_netdev_init(dev);
4354 	setup(dev);
4355 	strcpy(dev->name, name);
4356 	return dev;
4357 }
4358 EXPORT_SYMBOL(alloc_netdev_mq);
4359 
4360 /**
4361  *	free_netdev - free network device
4362  *	@dev: device
4363  *
4364  *	This function does the last stage of destroying an allocated device
4365  * 	interface. The reference to the device object is released.
4366  *	If this is the last reference then it will be freed.
4367  */
4368 void free_netdev(struct net_device *dev)
4369 {
4370 	release_net(dev_net(dev));
4371 
4372 	kfree(dev->_tx);
4373 
4374 	/*  Compatibility with error handling in drivers */
4375 	if (dev->reg_state == NETREG_UNINITIALIZED) {
4376 		kfree((char *)dev - dev->padded);
4377 		return;
4378 	}
4379 
4380 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4381 	dev->reg_state = NETREG_RELEASED;
4382 
4383 	/* will free via device release */
4384 	put_device(&dev->dev);
4385 }
4386 
4387 /**
4388  *	synchronize_net -  Synchronize with packet receive processing
4389  *
4390  *	Wait for packets currently being received to be done.
4391  *	Does not block later packets from starting.
4392  */
4393 void synchronize_net(void)
4394 {
4395 	might_sleep();
4396 	synchronize_rcu();
4397 }
4398 
4399 /**
4400  *	unregister_netdevice - remove device from the kernel
4401  *	@dev: device
4402  *
4403  *	This function shuts down a device interface and removes it
4404  *	from the kernel tables.
4405  *
4406  *	Callers must hold the rtnl semaphore.  You may want
4407  *	unregister_netdev() instead of this.
4408  */
4409 
4410 void unregister_netdevice(struct net_device *dev)
4411 {
4412 	ASSERT_RTNL();
4413 
4414 	rollback_registered(dev);
4415 	/* Finish processing unregister after unlock */
4416 	net_set_todo(dev);
4417 }
4418 
4419 /**
4420  *	unregister_netdev - remove device from the kernel
4421  *	@dev: device
4422  *
4423  *	This function shuts down a device interface and removes it
4424  *	from the kernel tables.
4425  *
4426  *	This is just a wrapper for unregister_netdevice that takes
4427  *	the rtnl semaphore.  In general you want to use this and not
4428  *	unregister_netdevice.
4429  */
4430 void unregister_netdev(struct net_device *dev)
4431 {
4432 	rtnl_lock();
4433 	unregister_netdevice(dev);
4434 	rtnl_unlock();
4435 }
4436 
4437 EXPORT_SYMBOL(unregister_netdev);
4438 
4439 /**
4440  *	dev_change_net_namespace - move device to different nethost namespace
4441  *	@dev: device
4442  *	@net: network namespace
4443  *	@pat: If not NULL name pattern to try if the current device name
4444  *	      is already taken in the destination network namespace.
4445  *
4446  *	This function shuts down a device interface and moves it
4447  *	to a new network namespace. On success 0 is returned, on
4448  *	a failure a netagive errno code is returned.
4449  *
4450  *	Callers must hold the rtnl semaphore.
4451  */
4452 
4453 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4454 {
4455 	char buf[IFNAMSIZ];
4456 	const char *destname;
4457 	int err;
4458 
4459 	ASSERT_RTNL();
4460 
4461 	/* Don't allow namespace local devices to be moved. */
4462 	err = -EINVAL;
4463 	if (dev->features & NETIF_F_NETNS_LOCAL)
4464 		goto out;
4465 
4466 	/* Ensure the device has been registrered */
4467 	err = -EINVAL;
4468 	if (dev->reg_state != NETREG_REGISTERED)
4469 		goto out;
4470 
4471 	/* Get out if there is nothing todo */
4472 	err = 0;
4473 	if (net_eq(dev_net(dev), net))
4474 		goto out;
4475 
4476 	/* Pick the destination device name, and ensure
4477 	 * we can use it in the destination network namespace.
4478 	 */
4479 	err = -EEXIST;
4480 	destname = dev->name;
4481 	if (__dev_get_by_name(net, destname)) {
4482 		/* We get here if we can't use the current device name */
4483 		if (!pat)
4484 			goto out;
4485 		if (!dev_valid_name(pat))
4486 			goto out;
4487 		if (strchr(pat, '%')) {
4488 			if (__dev_alloc_name(net, pat, buf) < 0)
4489 				goto out;
4490 			destname = buf;
4491 		} else
4492 			destname = pat;
4493 		if (__dev_get_by_name(net, destname))
4494 			goto out;
4495 	}
4496 
4497 	/*
4498 	 * And now a mini version of register_netdevice unregister_netdevice.
4499 	 */
4500 
4501 	/* If device is running close it first. */
4502 	dev_close(dev);
4503 
4504 	/* And unlink it from device chain */
4505 	err = -ENODEV;
4506 	unlist_netdevice(dev);
4507 
4508 	synchronize_net();
4509 
4510 	/* Shutdown queueing discipline. */
4511 	dev_shutdown(dev);
4512 
4513 	/* Notify protocols, that we are about to destroy
4514 	   this device. They should clean all the things.
4515 	*/
4516 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4517 
4518 	/*
4519 	 *	Flush the unicast and multicast chains
4520 	 */
4521 	dev_addr_discard(dev);
4522 
4523 	/* Actually switch the network namespace */
4524 	dev_net_set(dev, net);
4525 
4526 	/* Assign the new device name */
4527 	if (destname != dev->name)
4528 		strcpy(dev->name, destname);
4529 
4530 	/* If there is an ifindex conflict assign a new one */
4531 	if (__dev_get_by_index(net, dev->ifindex)) {
4532 		int iflink = (dev->iflink == dev->ifindex);
4533 		dev->ifindex = dev_new_index(net);
4534 		if (iflink)
4535 			dev->iflink = dev->ifindex;
4536 	}
4537 
4538 	/* Fixup kobjects */
4539 	netdev_unregister_kobject(dev);
4540 	err = netdev_register_kobject(dev);
4541 	WARN_ON(err);
4542 
4543 	/* Add the device back in the hashes */
4544 	list_netdevice(dev);
4545 
4546 	/* Notify protocols, that a new device appeared. */
4547 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
4548 
4549 	synchronize_net();
4550 	err = 0;
4551 out:
4552 	return err;
4553 }
4554 
4555 static int dev_cpu_callback(struct notifier_block *nfb,
4556 			    unsigned long action,
4557 			    void *ocpu)
4558 {
4559 	struct sk_buff **list_skb;
4560 	struct Qdisc **list_net;
4561 	struct sk_buff *skb;
4562 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
4563 	struct softnet_data *sd, *oldsd;
4564 
4565 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4566 		return NOTIFY_OK;
4567 
4568 	local_irq_disable();
4569 	cpu = smp_processor_id();
4570 	sd = &per_cpu(softnet_data, cpu);
4571 	oldsd = &per_cpu(softnet_data, oldcpu);
4572 
4573 	/* Find end of our completion_queue. */
4574 	list_skb = &sd->completion_queue;
4575 	while (*list_skb)
4576 		list_skb = &(*list_skb)->next;
4577 	/* Append completion queue from offline CPU. */
4578 	*list_skb = oldsd->completion_queue;
4579 	oldsd->completion_queue = NULL;
4580 
4581 	/* Find end of our output_queue. */
4582 	list_net = &sd->output_queue;
4583 	while (*list_net)
4584 		list_net = &(*list_net)->next_sched;
4585 	/* Append output queue from offline CPU. */
4586 	*list_net = oldsd->output_queue;
4587 	oldsd->output_queue = NULL;
4588 
4589 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
4590 	local_irq_enable();
4591 
4592 	/* Process offline CPU's input_pkt_queue */
4593 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4594 		netif_rx(skb);
4595 
4596 	return NOTIFY_OK;
4597 }
4598 
4599 #ifdef CONFIG_NET_DMA
4600 /**
4601  * net_dma_rebalance - try to maintain one DMA channel per CPU
4602  * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4603  *
4604  * This is called when the number of channels allocated to the net_dma client
4605  * changes.  The net_dma client tries to have one DMA channel per CPU.
4606  */
4607 
4608 static void net_dma_rebalance(struct net_dma *net_dma)
4609 {
4610 	unsigned int cpu, i, n, chan_idx;
4611 	struct dma_chan *chan;
4612 
4613 	if (cpus_empty(net_dma->channel_mask)) {
4614 		for_each_online_cpu(cpu)
4615 			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4616 		return;
4617 	}
4618 
4619 	i = 0;
4620 	cpu = first_cpu(cpu_online_map);
4621 
4622 	for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
4623 		chan = net_dma->channels[chan_idx];
4624 
4625 		n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4626 		   + (i < (num_online_cpus() %
4627 			cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4628 
4629 		while(n) {
4630 			per_cpu(softnet_data, cpu).net_dma = chan;
4631 			cpu = next_cpu(cpu, cpu_online_map);
4632 			n--;
4633 		}
4634 		i++;
4635 	}
4636 }
4637 
4638 /**
4639  * netdev_dma_event - event callback for the net_dma_client
4640  * @client: should always be net_dma_client
4641  * @chan: DMA channel for the event
4642  * @state: DMA state to be handled
4643  */
4644 static enum dma_state_client
4645 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4646 	enum dma_state state)
4647 {
4648 	int i, found = 0, pos = -1;
4649 	struct net_dma *net_dma =
4650 		container_of(client, struct net_dma, client);
4651 	enum dma_state_client ack = DMA_DUP; /* default: take no action */
4652 
4653 	spin_lock(&net_dma->lock);
4654 	switch (state) {
4655 	case DMA_RESOURCE_AVAILABLE:
4656 		for (i = 0; i < nr_cpu_ids; i++)
4657 			if (net_dma->channels[i] == chan) {
4658 				found = 1;
4659 				break;
4660 			} else if (net_dma->channels[i] == NULL && pos < 0)
4661 				pos = i;
4662 
4663 		if (!found && pos >= 0) {
4664 			ack = DMA_ACK;
4665 			net_dma->channels[pos] = chan;
4666 			cpu_set(pos, net_dma->channel_mask);
4667 			net_dma_rebalance(net_dma);
4668 		}
4669 		break;
4670 	case DMA_RESOURCE_REMOVED:
4671 		for (i = 0; i < nr_cpu_ids; i++)
4672 			if (net_dma->channels[i] == chan) {
4673 				found = 1;
4674 				pos = i;
4675 				break;
4676 			}
4677 
4678 		if (found) {
4679 			ack = DMA_ACK;
4680 			cpu_clear(pos, net_dma->channel_mask);
4681 			net_dma->channels[i] = NULL;
4682 			net_dma_rebalance(net_dma);
4683 		}
4684 		break;
4685 	default:
4686 		break;
4687 	}
4688 	spin_unlock(&net_dma->lock);
4689 
4690 	return ack;
4691 }
4692 
4693 /**
4694  * netdev_dma_register - register the networking subsystem as a DMA client
4695  */
4696 static int __init netdev_dma_register(void)
4697 {
4698 	net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4699 								GFP_KERNEL);
4700 	if (unlikely(!net_dma.channels)) {
4701 		printk(KERN_NOTICE
4702 				"netdev_dma: no memory for net_dma.channels\n");
4703 		return -ENOMEM;
4704 	}
4705 	spin_lock_init(&net_dma.lock);
4706 	dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4707 	dma_async_client_register(&net_dma.client);
4708 	dma_async_client_chan_request(&net_dma.client);
4709 	return 0;
4710 }
4711 
4712 #else
4713 static int __init netdev_dma_register(void) { return -ENODEV; }
4714 #endif /* CONFIG_NET_DMA */
4715 
4716 /**
4717  *	netdev_increment_features - increment feature set by one
4718  *	@all: current feature set
4719  *	@one: new feature set
4720  *	@mask: mask feature set
4721  *
4722  *	Computes a new feature set after adding a device with feature set
4723  *	@one to the master device with current feature set @all.  Will not
4724  *	enable anything that is off in @mask. Returns the new feature set.
4725  */
4726 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4727 					unsigned long mask)
4728 {
4729 	/* If device needs checksumming, downgrade to it. */
4730         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4731 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4732 	else if (mask & NETIF_F_ALL_CSUM) {
4733 		/* If one device supports v4/v6 checksumming, set for all. */
4734 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4735 		    !(all & NETIF_F_GEN_CSUM)) {
4736 			all &= ~NETIF_F_ALL_CSUM;
4737 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4738 		}
4739 
4740 		/* If one device supports hw checksumming, set for all. */
4741 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4742 			all &= ~NETIF_F_ALL_CSUM;
4743 			all |= NETIF_F_HW_CSUM;
4744 		}
4745 	}
4746 
4747 	one |= NETIF_F_ALL_CSUM;
4748 
4749 	one |= all & NETIF_F_ONE_FOR_ALL;
4750 	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4751 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
4752 
4753 	return all;
4754 }
4755 EXPORT_SYMBOL(netdev_increment_features);
4756 
4757 static struct hlist_head *netdev_create_hash(void)
4758 {
4759 	int i;
4760 	struct hlist_head *hash;
4761 
4762 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4763 	if (hash != NULL)
4764 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
4765 			INIT_HLIST_HEAD(&hash[i]);
4766 
4767 	return hash;
4768 }
4769 
4770 /* Initialize per network namespace state */
4771 static int __net_init netdev_init(struct net *net)
4772 {
4773 	INIT_LIST_HEAD(&net->dev_base_head);
4774 
4775 	net->dev_name_head = netdev_create_hash();
4776 	if (net->dev_name_head == NULL)
4777 		goto err_name;
4778 
4779 	net->dev_index_head = netdev_create_hash();
4780 	if (net->dev_index_head == NULL)
4781 		goto err_idx;
4782 
4783 	return 0;
4784 
4785 err_idx:
4786 	kfree(net->dev_name_head);
4787 err_name:
4788 	return -ENOMEM;
4789 }
4790 
4791 /**
4792  *	netdev_drivername - network driver for the device
4793  *	@dev: network device
4794  *	@buffer: buffer for resulting name
4795  *	@len: size of buffer
4796  *
4797  *	Determine network driver for device.
4798  */
4799 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
4800 {
4801 	const struct device_driver *driver;
4802 	const struct device *parent;
4803 
4804 	if (len <= 0 || !buffer)
4805 		return buffer;
4806 	buffer[0] = 0;
4807 
4808 	parent = dev->dev.parent;
4809 
4810 	if (!parent)
4811 		return buffer;
4812 
4813 	driver = parent->driver;
4814 	if (driver && driver->name)
4815 		strlcpy(buffer, driver->name, len);
4816 	return buffer;
4817 }
4818 
4819 static void __net_exit netdev_exit(struct net *net)
4820 {
4821 	kfree(net->dev_name_head);
4822 	kfree(net->dev_index_head);
4823 }
4824 
4825 static struct pernet_operations __net_initdata netdev_net_ops = {
4826 	.init = netdev_init,
4827 	.exit = netdev_exit,
4828 };
4829 
4830 static void __net_exit default_device_exit(struct net *net)
4831 {
4832 	struct net_device *dev, *next;
4833 	/*
4834 	 * Push all migratable of the network devices back to the
4835 	 * initial network namespace
4836 	 */
4837 	rtnl_lock();
4838 	for_each_netdev_safe(net, dev, next) {
4839 		int err;
4840 		char fb_name[IFNAMSIZ];
4841 
4842 		/* Ignore unmoveable devices (i.e. loopback) */
4843 		if (dev->features & NETIF_F_NETNS_LOCAL)
4844 			continue;
4845 
4846 		/* Push remaing network devices to init_net */
4847 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4848 		err = dev_change_net_namespace(dev, &init_net, fb_name);
4849 		if (err) {
4850 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4851 				__func__, dev->name, err);
4852 			BUG();
4853 		}
4854 	}
4855 	rtnl_unlock();
4856 }
4857 
4858 static struct pernet_operations __net_initdata default_device_ops = {
4859 	.exit = default_device_exit,
4860 };
4861 
4862 /*
4863  *	Initialize the DEV module. At boot time this walks the device list and
4864  *	unhooks any devices that fail to initialise (normally hardware not
4865  *	present) and leaves us with a valid list of present and active devices.
4866  *
4867  */
4868 
4869 /*
4870  *       This is called single threaded during boot, so no need
4871  *       to take the rtnl semaphore.
4872  */
4873 static int __init net_dev_init(void)
4874 {
4875 	int i, rc = -ENOMEM;
4876 
4877 	BUG_ON(!dev_boot_phase);
4878 
4879 	if (dev_proc_init())
4880 		goto out;
4881 
4882 	if (netdev_kobject_init())
4883 		goto out;
4884 
4885 	INIT_LIST_HEAD(&ptype_all);
4886 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
4887 		INIT_LIST_HEAD(&ptype_base[i]);
4888 
4889 	if (register_pernet_subsys(&netdev_net_ops))
4890 		goto out;
4891 
4892 	if (register_pernet_device(&default_device_ops))
4893 		goto out;
4894 
4895 	/*
4896 	 *	Initialise the packet receive queues.
4897 	 */
4898 
4899 	for_each_possible_cpu(i) {
4900 		struct softnet_data *queue;
4901 
4902 		queue = &per_cpu(softnet_data, i);
4903 		skb_queue_head_init(&queue->input_pkt_queue);
4904 		queue->completion_queue = NULL;
4905 		INIT_LIST_HEAD(&queue->poll_list);
4906 
4907 		queue->backlog.poll = process_backlog;
4908 		queue->backlog.weight = weight_p;
4909 	}
4910 
4911 	netdev_dma_register();
4912 
4913 	dev_boot_phase = 0;
4914 
4915 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4916 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
4917 
4918 	hotcpu_notifier(dev_cpu_callback, 0);
4919 	dst_init();
4920 	dev_mcast_init();
4921 	rc = 0;
4922 out:
4923 	return rc;
4924 }
4925 
4926 subsys_initcall(net_dev_init);
4927 
4928 EXPORT_SYMBOL(__dev_get_by_index);
4929 EXPORT_SYMBOL(__dev_get_by_name);
4930 EXPORT_SYMBOL(__dev_remove_pack);
4931 EXPORT_SYMBOL(dev_valid_name);
4932 EXPORT_SYMBOL(dev_add_pack);
4933 EXPORT_SYMBOL(dev_alloc_name);
4934 EXPORT_SYMBOL(dev_close);
4935 EXPORT_SYMBOL(dev_get_by_flags);
4936 EXPORT_SYMBOL(dev_get_by_index);
4937 EXPORT_SYMBOL(dev_get_by_name);
4938 EXPORT_SYMBOL(dev_open);
4939 EXPORT_SYMBOL(dev_queue_xmit);
4940 EXPORT_SYMBOL(dev_remove_pack);
4941 EXPORT_SYMBOL(dev_set_allmulti);
4942 EXPORT_SYMBOL(dev_set_promiscuity);
4943 EXPORT_SYMBOL(dev_change_flags);
4944 EXPORT_SYMBOL(dev_set_mtu);
4945 EXPORT_SYMBOL(dev_set_mac_address);
4946 EXPORT_SYMBOL(free_netdev);
4947 EXPORT_SYMBOL(netdev_boot_setup_check);
4948 EXPORT_SYMBOL(netdev_set_master);
4949 EXPORT_SYMBOL(netdev_state_change);
4950 EXPORT_SYMBOL(netif_receive_skb);
4951 EXPORT_SYMBOL(netif_rx);
4952 EXPORT_SYMBOL(register_gifconf);
4953 EXPORT_SYMBOL(register_netdevice);
4954 EXPORT_SYMBOL(register_netdevice_notifier);
4955 EXPORT_SYMBOL(skb_checksum_help);
4956 EXPORT_SYMBOL(synchronize_net);
4957 EXPORT_SYMBOL(unregister_netdevice);
4958 EXPORT_SYMBOL(unregister_netdevice_notifier);
4959 EXPORT_SYMBOL(net_enable_timestamp);
4960 EXPORT_SYMBOL(net_disable_timestamp);
4961 EXPORT_SYMBOL(dev_get_flags);
4962 
4963 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4964 EXPORT_SYMBOL(br_handle_frame_hook);
4965 EXPORT_SYMBOL(br_fdb_get_hook);
4966 EXPORT_SYMBOL(br_fdb_put_hook);
4967 #endif
4968 
4969 EXPORT_SYMBOL(dev_load);
4970 
4971 EXPORT_PER_CPU_SYMBOL(softnet_data);
4972