xref: /openbmc/linux/net/core/dev.c (revision eeda3fd64f75bcbfaa70ce946513abaf3f23b8e0)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/ethtool.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/net_namespace.h>
97 #include <net/sock.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/proc_fs.h>
100 #include <linux/seq_file.h>
101 #include <linux/stat.h>
102 #include <linux/if_bridge.h>
103 #include <linux/if_macvlan.h>
104 #include <net/dst.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
124 #include <net/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
129 
130 #include "net-sysfs.h"
131 
132 /*
133  *	The list of packet types we will receive (as opposed to discard)
134  *	and the routines to invoke.
135  *
136  *	Why 16. Because with 16 the only overlap we get on a hash of the
137  *	low nibble of the protocol value is RARP/SNAP/X.25.
138  *
139  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
140  *             sure which should go first, but I bet it won't make much
141  *             difference if we are running VLANs.  The good news is that
142  *             this protocol won't be in the list unless compiled in, so
143  *             the average user (w/out VLANs) will not be adversely affected.
144  *             --BLG
145  *
146  *		0800	IP
147  *		8100    802.1Q VLAN
148  *		0001	802.3
149  *		0002	AX.25
150  *		0004	802.2
151  *		8035	RARP
152  *		0005	SNAP
153  *		0805	X.25
154  *		0806	ARP
155  *		8137	IPX
156  *		0009	Localtalk
157  *		86DD	IPv6
158  */
159 
160 #define PTYPE_HASH_SIZE	(16)
161 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
162 
163 static DEFINE_SPINLOCK(ptype_lock);
164 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
165 static struct list_head ptype_all __read_mostly;	/* Taps */
166 
167 #ifdef CONFIG_NET_DMA
168 struct net_dma {
169 	struct dma_client client;
170 	spinlock_t lock;
171 	cpumask_t channel_mask;
172 	struct dma_chan **channels;
173 };
174 
175 static enum dma_state_client
176 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
177 	enum dma_state state);
178 
179 static struct net_dma net_dma = {
180 	.client = {
181 		.event_callback = netdev_dma_event,
182 	},
183 };
184 #endif
185 
186 /*
187  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
188  * semaphore.
189  *
190  * Pure readers hold dev_base_lock for reading.
191  *
192  * Writers must hold the rtnl semaphore while they loop through the
193  * dev_base_head list, and hold dev_base_lock for writing when they do the
194  * actual updates.  This allows pure readers to access the list even
195  * while a writer is preparing to update it.
196  *
197  * To put it another way, dev_base_lock is held for writing only to
198  * protect against pure readers; the rtnl semaphore provides the
199  * protection against other writers.
200  *
201  * See, for example usages, register_netdevice() and
202  * unregister_netdevice(), which must be called with the rtnl
203  * semaphore held.
204  */
205 DEFINE_RWLOCK(dev_base_lock);
206 
207 EXPORT_SYMBOL(dev_base_lock);
208 
209 #define NETDEV_HASHBITS	8
210 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
211 
212 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
213 {
214 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
215 	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
216 }
217 
218 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
219 {
220 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
221 }
222 
223 /* Device list insertion */
224 static int list_netdevice(struct net_device *dev)
225 {
226 	struct net *net = dev_net(dev);
227 
228 	ASSERT_RTNL();
229 
230 	write_lock_bh(&dev_base_lock);
231 	list_add_tail(&dev->dev_list, &net->dev_base_head);
232 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
233 	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
234 	write_unlock_bh(&dev_base_lock);
235 	return 0;
236 }
237 
238 /* Device list removal */
239 static void unlist_netdevice(struct net_device *dev)
240 {
241 	ASSERT_RTNL();
242 
243 	/* Unlink dev from the device chain */
244 	write_lock_bh(&dev_base_lock);
245 	list_del(&dev->dev_list);
246 	hlist_del(&dev->name_hlist);
247 	hlist_del(&dev->index_hlist);
248 	write_unlock_bh(&dev_base_lock);
249 }
250 
251 /*
252  *	Our notifier list
253  */
254 
255 static RAW_NOTIFIER_HEAD(netdev_chain);
256 
257 /*
258  *	Device drivers call our routines to queue packets here. We empty the
259  *	queue in the local softnet handler.
260  */
261 
262 DEFINE_PER_CPU(struct softnet_data, softnet_data);
263 
264 #ifdef CONFIG_LOCKDEP
265 /*
266  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
267  * according to dev->type
268  */
269 static const unsigned short netdev_lock_type[] =
270 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
271 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
272 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
273 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
274 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
275 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
276 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
277 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
278 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
279 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
280 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
281 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
282 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
283 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
284 	 ARPHRD_NONE};
285 
286 static const char *netdev_lock_name[] =
287 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
288 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
289 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
290 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
291 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
292 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
293 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
294 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
295 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
296 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
297 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
298 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
299 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
300 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
301 	 "_xmit_NONE"};
302 
303 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
304 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
305 
306 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
307 {
308 	int i;
309 
310 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
311 		if (netdev_lock_type[i] == dev_type)
312 			return i;
313 	/* the last key is used by default */
314 	return ARRAY_SIZE(netdev_lock_type) - 1;
315 }
316 
317 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
318 						 unsigned short dev_type)
319 {
320 	int i;
321 
322 	i = netdev_lock_pos(dev_type);
323 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
324 				   netdev_lock_name[i]);
325 }
326 
327 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
328 {
329 	int i;
330 
331 	i = netdev_lock_pos(dev->type);
332 	lockdep_set_class_and_name(&dev->addr_list_lock,
333 				   &netdev_addr_lock_key[i],
334 				   netdev_lock_name[i]);
335 }
336 #else
337 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
338 						 unsigned short dev_type)
339 {
340 }
341 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
342 {
343 }
344 #endif
345 
346 /*******************************************************************************
347 
348 		Protocol management and registration routines
349 
350 *******************************************************************************/
351 
352 /*
353  *	Add a protocol ID to the list. Now that the input handler is
354  *	smarter we can dispense with all the messy stuff that used to be
355  *	here.
356  *
357  *	BEWARE!!! Protocol handlers, mangling input packets,
358  *	MUST BE last in hash buckets and checking protocol handlers
359  *	MUST start from promiscuous ptype_all chain in net_bh.
360  *	It is true now, do not change it.
361  *	Explanation follows: if protocol handler, mangling packet, will
362  *	be the first on list, it is not able to sense, that packet
363  *	is cloned and should be copied-on-write, so that it will
364  *	change it and subsequent readers will get broken packet.
365  *							--ANK (980803)
366  */
367 
368 /**
369  *	dev_add_pack - add packet handler
370  *	@pt: packet type declaration
371  *
372  *	Add a protocol handler to the networking stack. The passed &packet_type
373  *	is linked into kernel lists and may not be freed until it has been
374  *	removed from the kernel lists.
375  *
376  *	This call does not sleep therefore it can not
377  *	guarantee all CPU's that are in middle of receiving packets
378  *	will see the new packet type (until the next received packet).
379  */
380 
381 void dev_add_pack(struct packet_type *pt)
382 {
383 	int hash;
384 
385 	spin_lock_bh(&ptype_lock);
386 	if (pt->type == htons(ETH_P_ALL))
387 		list_add_rcu(&pt->list, &ptype_all);
388 	else {
389 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
390 		list_add_rcu(&pt->list, &ptype_base[hash]);
391 	}
392 	spin_unlock_bh(&ptype_lock);
393 }
394 
395 /**
396  *	__dev_remove_pack	 - remove packet handler
397  *	@pt: packet type declaration
398  *
399  *	Remove a protocol handler that was previously added to the kernel
400  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
401  *	from the kernel lists and can be freed or reused once this function
402  *	returns.
403  *
404  *      The packet type might still be in use by receivers
405  *	and must not be freed until after all the CPU's have gone
406  *	through a quiescent state.
407  */
408 void __dev_remove_pack(struct packet_type *pt)
409 {
410 	struct list_head *head;
411 	struct packet_type *pt1;
412 
413 	spin_lock_bh(&ptype_lock);
414 
415 	if (pt->type == htons(ETH_P_ALL))
416 		head = &ptype_all;
417 	else
418 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
419 
420 	list_for_each_entry(pt1, head, list) {
421 		if (pt == pt1) {
422 			list_del_rcu(&pt->list);
423 			goto out;
424 		}
425 	}
426 
427 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
428 out:
429 	spin_unlock_bh(&ptype_lock);
430 }
431 /**
432  *	dev_remove_pack	 - remove packet handler
433  *	@pt: packet type declaration
434  *
435  *	Remove a protocol handler that was previously added to the kernel
436  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
437  *	from the kernel lists and can be freed or reused once this function
438  *	returns.
439  *
440  *	This call sleeps to guarantee that no CPU is looking at the packet
441  *	type after return.
442  */
443 void dev_remove_pack(struct packet_type *pt)
444 {
445 	__dev_remove_pack(pt);
446 
447 	synchronize_net();
448 }
449 
450 /******************************************************************************
451 
452 		      Device Boot-time Settings Routines
453 
454 *******************************************************************************/
455 
456 /* Boot time configuration table */
457 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
458 
459 /**
460  *	netdev_boot_setup_add	- add new setup entry
461  *	@name: name of the device
462  *	@map: configured settings for the device
463  *
464  *	Adds new setup entry to the dev_boot_setup list.  The function
465  *	returns 0 on error and 1 on success.  This is a generic routine to
466  *	all netdevices.
467  */
468 static int netdev_boot_setup_add(char *name, struct ifmap *map)
469 {
470 	struct netdev_boot_setup *s;
471 	int i;
472 
473 	s = dev_boot_setup;
474 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
475 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
476 			memset(s[i].name, 0, sizeof(s[i].name));
477 			strlcpy(s[i].name, name, IFNAMSIZ);
478 			memcpy(&s[i].map, map, sizeof(s[i].map));
479 			break;
480 		}
481 	}
482 
483 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
484 }
485 
486 /**
487  *	netdev_boot_setup_check	- check boot time settings
488  *	@dev: the netdevice
489  *
490  * 	Check boot time settings for the device.
491  *	The found settings are set for the device to be used
492  *	later in the device probing.
493  *	Returns 0 if no settings found, 1 if they are.
494  */
495 int netdev_boot_setup_check(struct net_device *dev)
496 {
497 	struct netdev_boot_setup *s = dev_boot_setup;
498 	int i;
499 
500 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
501 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
502 		    !strcmp(dev->name, s[i].name)) {
503 			dev->irq 	= s[i].map.irq;
504 			dev->base_addr 	= s[i].map.base_addr;
505 			dev->mem_start 	= s[i].map.mem_start;
506 			dev->mem_end 	= s[i].map.mem_end;
507 			return 1;
508 		}
509 	}
510 	return 0;
511 }
512 
513 
514 /**
515  *	netdev_boot_base	- get address from boot time settings
516  *	@prefix: prefix for network device
517  *	@unit: id for network device
518  *
519  * 	Check boot time settings for the base address of device.
520  *	The found settings are set for the device to be used
521  *	later in the device probing.
522  *	Returns 0 if no settings found.
523  */
524 unsigned long netdev_boot_base(const char *prefix, int unit)
525 {
526 	const struct netdev_boot_setup *s = dev_boot_setup;
527 	char name[IFNAMSIZ];
528 	int i;
529 
530 	sprintf(name, "%s%d", prefix, unit);
531 
532 	/*
533 	 * If device already registered then return base of 1
534 	 * to indicate not to probe for this interface
535 	 */
536 	if (__dev_get_by_name(&init_net, name))
537 		return 1;
538 
539 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
540 		if (!strcmp(name, s[i].name))
541 			return s[i].map.base_addr;
542 	return 0;
543 }
544 
545 /*
546  * Saves at boot time configured settings for any netdevice.
547  */
548 int __init netdev_boot_setup(char *str)
549 {
550 	int ints[5];
551 	struct ifmap map;
552 
553 	str = get_options(str, ARRAY_SIZE(ints), ints);
554 	if (!str || !*str)
555 		return 0;
556 
557 	/* Save settings */
558 	memset(&map, 0, sizeof(map));
559 	if (ints[0] > 0)
560 		map.irq = ints[1];
561 	if (ints[0] > 1)
562 		map.base_addr = ints[2];
563 	if (ints[0] > 2)
564 		map.mem_start = ints[3];
565 	if (ints[0] > 3)
566 		map.mem_end = ints[4];
567 
568 	/* Add new entry to the list */
569 	return netdev_boot_setup_add(str, &map);
570 }
571 
572 __setup("netdev=", netdev_boot_setup);
573 
574 /*******************************************************************************
575 
576 			    Device Interface Subroutines
577 
578 *******************************************************************************/
579 
580 /**
581  *	__dev_get_by_name	- find a device by its name
582  *	@net: the applicable net namespace
583  *	@name: name to find
584  *
585  *	Find an interface by name. Must be called under RTNL semaphore
586  *	or @dev_base_lock. If the name is found a pointer to the device
587  *	is returned. If the name is not found then %NULL is returned. The
588  *	reference counters are not incremented so the caller must be
589  *	careful with locks.
590  */
591 
592 struct net_device *__dev_get_by_name(struct net *net, const char *name)
593 {
594 	struct hlist_node *p;
595 
596 	hlist_for_each(p, dev_name_hash(net, name)) {
597 		struct net_device *dev
598 			= hlist_entry(p, struct net_device, name_hlist);
599 		if (!strncmp(dev->name, name, IFNAMSIZ))
600 			return dev;
601 	}
602 	return NULL;
603 }
604 
605 /**
606  *	dev_get_by_name		- find a device by its name
607  *	@net: the applicable net namespace
608  *	@name: name to find
609  *
610  *	Find an interface by name. This can be called from any
611  *	context and does its own locking. The returned handle has
612  *	the usage count incremented and the caller must use dev_put() to
613  *	release it when it is no longer needed. %NULL is returned if no
614  *	matching device is found.
615  */
616 
617 struct net_device *dev_get_by_name(struct net *net, const char *name)
618 {
619 	struct net_device *dev;
620 
621 	read_lock(&dev_base_lock);
622 	dev = __dev_get_by_name(net, name);
623 	if (dev)
624 		dev_hold(dev);
625 	read_unlock(&dev_base_lock);
626 	return dev;
627 }
628 
629 /**
630  *	__dev_get_by_index - find a device by its ifindex
631  *	@net: the applicable net namespace
632  *	@ifindex: index of device
633  *
634  *	Search for an interface by index. Returns %NULL if the device
635  *	is not found or a pointer to the device. The device has not
636  *	had its reference counter increased so the caller must be careful
637  *	about locking. The caller must hold either the RTNL semaphore
638  *	or @dev_base_lock.
639  */
640 
641 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
642 {
643 	struct hlist_node *p;
644 
645 	hlist_for_each(p, dev_index_hash(net, ifindex)) {
646 		struct net_device *dev
647 			= hlist_entry(p, struct net_device, index_hlist);
648 		if (dev->ifindex == ifindex)
649 			return dev;
650 	}
651 	return NULL;
652 }
653 
654 
655 /**
656  *	dev_get_by_index - find a device by its ifindex
657  *	@net: the applicable net namespace
658  *	@ifindex: index of device
659  *
660  *	Search for an interface by index. Returns NULL if the device
661  *	is not found or a pointer to the device. The device returned has
662  *	had a reference added and the pointer is safe until the user calls
663  *	dev_put to indicate they have finished with it.
664  */
665 
666 struct net_device *dev_get_by_index(struct net *net, int ifindex)
667 {
668 	struct net_device *dev;
669 
670 	read_lock(&dev_base_lock);
671 	dev = __dev_get_by_index(net, ifindex);
672 	if (dev)
673 		dev_hold(dev);
674 	read_unlock(&dev_base_lock);
675 	return dev;
676 }
677 
678 /**
679  *	dev_getbyhwaddr - find a device by its hardware address
680  *	@net: the applicable net namespace
681  *	@type: media type of device
682  *	@ha: hardware address
683  *
684  *	Search for an interface by MAC address. Returns NULL if the device
685  *	is not found or a pointer to the device. The caller must hold the
686  *	rtnl semaphore. The returned device has not had its ref count increased
687  *	and the caller must therefore be careful about locking
688  *
689  *	BUGS:
690  *	If the API was consistent this would be __dev_get_by_hwaddr
691  */
692 
693 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
694 {
695 	struct net_device *dev;
696 
697 	ASSERT_RTNL();
698 
699 	for_each_netdev(net, dev)
700 		if (dev->type == type &&
701 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
702 			return dev;
703 
704 	return NULL;
705 }
706 
707 EXPORT_SYMBOL(dev_getbyhwaddr);
708 
709 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
710 {
711 	struct net_device *dev;
712 
713 	ASSERT_RTNL();
714 	for_each_netdev(net, dev)
715 		if (dev->type == type)
716 			return dev;
717 
718 	return NULL;
719 }
720 
721 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
722 
723 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
724 {
725 	struct net_device *dev;
726 
727 	rtnl_lock();
728 	dev = __dev_getfirstbyhwtype(net, type);
729 	if (dev)
730 		dev_hold(dev);
731 	rtnl_unlock();
732 	return dev;
733 }
734 
735 EXPORT_SYMBOL(dev_getfirstbyhwtype);
736 
737 /**
738  *	dev_get_by_flags - find any device with given flags
739  *	@net: the applicable net namespace
740  *	@if_flags: IFF_* values
741  *	@mask: bitmask of bits in if_flags to check
742  *
743  *	Search for any interface with the given flags. Returns NULL if a device
744  *	is not found or a pointer to the device. The device returned has
745  *	had a reference added and the pointer is safe until the user calls
746  *	dev_put to indicate they have finished with it.
747  */
748 
749 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
750 {
751 	struct net_device *dev, *ret;
752 
753 	ret = NULL;
754 	read_lock(&dev_base_lock);
755 	for_each_netdev(net, dev) {
756 		if (((dev->flags ^ if_flags) & mask) == 0) {
757 			dev_hold(dev);
758 			ret = dev;
759 			break;
760 		}
761 	}
762 	read_unlock(&dev_base_lock);
763 	return ret;
764 }
765 
766 /**
767  *	dev_valid_name - check if name is okay for network device
768  *	@name: name string
769  *
770  *	Network device names need to be valid file names to
771  *	to allow sysfs to work.  We also disallow any kind of
772  *	whitespace.
773  */
774 int dev_valid_name(const char *name)
775 {
776 	if (*name == '\0')
777 		return 0;
778 	if (strlen(name) >= IFNAMSIZ)
779 		return 0;
780 	if (!strcmp(name, ".") || !strcmp(name, ".."))
781 		return 0;
782 
783 	while (*name) {
784 		if (*name == '/' || isspace(*name))
785 			return 0;
786 		name++;
787 	}
788 	return 1;
789 }
790 
791 /**
792  *	__dev_alloc_name - allocate a name for a device
793  *	@net: network namespace to allocate the device name in
794  *	@name: name format string
795  *	@buf:  scratch buffer and result name string
796  *
797  *	Passed a format string - eg "lt%d" it will try and find a suitable
798  *	id. It scans list of devices to build up a free map, then chooses
799  *	the first empty slot. The caller must hold the dev_base or rtnl lock
800  *	while allocating the name and adding the device in order to avoid
801  *	duplicates.
802  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
803  *	Returns the number of the unit assigned or a negative errno code.
804  */
805 
806 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
807 {
808 	int i = 0;
809 	const char *p;
810 	const int max_netdevices = 8*PAGE_SIZE;
811 	unsigned long *inuse;
812 	struct net_device *d;
813 
814 	p = strnchr(name, IFNAMSIZ-1, '%');
815 	if (p) {
816 		/*
817 		 * Verify the string as this thing may have come from
818 		 * the user.  There must be either one "%d" and no other "%"
819 		 * characters.
820 		 */
821 		if (p[1] != 'd' || strchr(p + 2, '%'))
822 			return -EINVAL;
823 
824 		/* Use one page as a bit array of possible slots */
825 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
826 		if (!inuse)
827 			return -ENOMEM;
828 
829 		for_each_netdev(net, d) {
830 			if (!sscanf(d->name, name, &i))
831 				continue;
832 			if (i < 0 || i >= max_netdevices)
833 				continue;
834 
835 			/*  avoid cases where sscanf is not exact inverse of printf */
836 			snprintf(buf, IFNAMSIZ, name, i);
837 			if (!strncmp(buf, d->name, IFNAMSIZ))
838 				set_bit(i, inuse);
839 		}
840 
841 		i = find_first_zero_bit(inuse, max_netdevices);
842 		free_page((unsigned long) inuse);
843 	}
844 
845 	snprintf(buf, IFNAMSIZ, name, i);
846 	if (!__dev_get_by_name(net, buf))
847 		return i;
848 
849 	/* It is possible to run out of possible slots
850 	 * when the name is long and there isn't enough space left
851 	 * for the digits, or if all bits are used.
852 	 */
853 	return -ENFILE;
854 }
855 
856 /**
857  *	dev_alloc_name - allocate a name for a device
858  *	@dev: device
859  *	@name: name format string
860  *
861  *	Passed a format string - eg "lt%d" it will try and find a suitable
862  *	id. It scans list of devices to build up a free map, then chooses
863  *	the first empty slot. The caller must hold the dev_base or rtnl lock
864  *	while allocating the name and adding the device in order to avoid
865  *	duplicates.
866  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
867  *	Returns the number of the unit assigned or a negative errno code.
868  */
869 
870 int dev_alloc_name(struct net_device *dev, const char *name)
871 {
872 	char buf[IFNAMSIZ];
873 	struct net *net;
874 	int ret;
875 
876 	BUG_ON(!dev_net(dev));
877 	net = dev_net(dev);
878 	ret = __dev_alloc_name(net, name, buf);
879 	if (ret >= 0)
880 		strlcpy(dev->name, buf, IFNAMSIZ);
881 	return ret;
882 }
883 
884 
885 /**
886  *	dev_change_name - change name of a device
887  *	@dev: device
888  *	@newname: name (or format string) must be at least IFNAMSIZ
889  *
890  *	Change name of a device, can pass format strings "eth%d".
891  *	for wildcarding.
892  */
893 int dev_change_name(struct net_device *dev, const char *newname)
894 {
895 	char oldname[IFNAMSIZ];
896 	int err = 0;
897 	int ret;
898 	struct net *net;
899 
900 	ASSERT_RTNL();
901 	BUG_ON(!dev_net(dev));
902 
903 	net = dev_net(dev);
904 	if (dev->flags & IFF_UP)
905 		return -EBUSY;
906 
907 	if (!dev_valid_name(newname))
908 		return -EINVAL;
909 
910 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 		return 0;
912 
913 	memcpy(oldname, dev->name, IFNAMSIZ);
914 
915 	if (strchr(newname, '%')) {
916 		err = dev_alloc_name(dev, newname);
917 		if (err < 0)
918 			return err;
919 	}
920 	else if (__dev_get_by_name(net, newname))
921 		return -EEXIST;
922 	else
923 		strlcpy(dev->name, newname, IFNAMSIZ);
924 
925 rollback:
926 	/* For now only devices in the initial network namespace
927 	 * are in sysfs.
928 	 */
929 	if (net == &init_net) {
930 		ret = device_rename(&dev->dev, dev->name);
931 		if (ret) {
932 			memcpy(dev->name, oldname, IFNAMSIZ);
933 			return ret;
934 		}
935 	}
936 
937 	write_lock_bh(&dev_base_lock);
938 	hlist_del(&dev->name_hlist);
939 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
940 	write_unlock_bh(&dev_base_lock);
941 
942 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
943 	ret = notifier_to_errno(ret);
944 
945 	if (ret) {
946 		if (err) {
947 			printk(KERN_ERR
948 			       "%s: name change rollback failed: %d.\n",
949 			       dev->name, ret);
950 		} else {
951 			err = ret;
952 			memcpy(dev->name, oldname, IFNAMSIZ);
953 			goto rollback;
954 		}
955 	}
956 
957 	return err;
958 }
959 
960 /**
961  *	dev_set_alias - change ifalias of a device
962  *	@dev: device
963  *	@alias: name up to IFALIASZ
964  *	@len: limit of bytes to copy from info
965  *
966  *	Set ifalias for a device,
967  */
968 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
969 {
970 	ASSERT_RTNL();
971 
972 	if (len >= IFALIASZ)
973 		return -EINVAL;
974 
975 	if (!len) {
976 		if (dev->ifalias) {
977 			kfree(dev->ifalias);
978 			dev->ifalias = NULL;
979 		}
980 		return 0;
981 	}
982 
983 	dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
984 	if (!dev->ifalias)
985 		return -ENOMEM;
986 
987 	strlcpy(dev->ifalias, alias, len+1);
988 	return len;
989 }
990 
991 
992 /**
993  *	netdev_features_change - device changes features
994  *	@dev: device to cause notification
995  *
996  *	Called to indicate a device has changed features.
997  */
998 void netdev_features_change(struct net_device *dev)
999 {
1000 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1001 }
1002 EXPORT_SYMBOL(netdev_features_change);
1003 
1004 /**
1005  *	netdev_state_change - device changes state
1006  *	@dev: device to cause notification
1007  *
1008  *	Called to indicate a device has changed state. This function calls
1009  *	the notifier chains for netdev_chain and sends a NEWLINK message
1010  *	to the routing socket.
1011  */
1012 void netdev_state_change(struct net_device *dev)
1013 {
1014 	if (dev->flags & IFF_UP) {
1015 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1016 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1017 	}
1018 }
1019 
1020 void netdev_bonding_change(struct net_device *dev)
1021 {
1022 	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1023 }
1024 EXPORT_SYMBOL(netdev_bonding_change);
1025 
1026 /**
1027  *	dev_load 	- load a network module
1028  *	@net: the applicable net namespace
1029  *	@name: name of interface
1030  *
1031  *	If a network interface is not present and the process has suitable
1032  *	privileges this function loads the module. If module loading is not
1033  *	available in this kernel then it becomes a nop.
1034  */
1035 
1036 void dev_load(struct net *net, const char *name)
1037 {
1038 	struct net_device *dev;
1039 
1040 	read_lock(&dev_base_lock);
1041 	dev = __dev_get_by_name(net, name);
1042 	read_unlock(&dev_base_lock);
1043 
1044 	if (!dev && capable(CAP_SYS_MODULE))
1045 		request_module("%s", name);
1046 }
1047 
1048 /**
1049  *	dev_open	- prepare an interface for use.
1050  *	@dev:	device to open
1051  *
1052  *	Takes a device from down to up state. The device's private open
1053  *	function is invoked and then the multicast lists are loaded. Finally
1054  *	the device is moved into the up state and a %NETDEV_UP message is
1055  *	sent to the netdev notifier chain.
1056  *
1057  *	Calling this function on an active interface is a nop. On a failure
1058  *	a negative errno code is returned.
1059  */
1060 int dev_open(struct net_device *dev)
1061 {
1062 	const struct net_device_ops *ops = dev->netdev_ops;
1063 	int ret = 0;
1064 
1065 	ASSERT_RTNL();
1066 
1067 	/*
1068 	 *	Is it already up?
1069 	 */
1070 
1071 	if (dev->flags & IFF_UP)
1072 		return 0;
1073 
1074 	/*
1075 	 *	Is it even present?
1076 	 */
1077 	if (!netif_device_present(dev))
1078 		return -ENODEV;
1079 
1080 	/*
1081 	 *	Call device private open method
1082 	 */
1083 	set_bit(__LINK_STATE_START, &dev->state);
1084 
1085 	if (ops->ndo_validate_addr)
1086 		ret = ops->ndo_validate_addr(dev);
1087 
1088 	if (!ret && ops->ndo_open)
1089 		ret = ops->ndo_open(dev);
1090 
1091 	/*
1092 	 *	If it went open OK then:
1093 	 */
1094 
1095 	if (ret)
1096 		clear_bit(__LINK_STATE_START, &dev->state);
1097 	else {
1098 		/*
1099 		 *	Set the flags.
1100 		 */
1101 		dev->flags |= IFF_UP;
1102 
1103 		/*
1104 		 *	Initialize multicasting status
1105 		 */
1106 		dev_set_rx_mode(dev);
1107 
1108 		/*
1109 		 *	Wakeup transmit queue engine
1110 		 */
1111 		dev_activate(dev);
1112 
1113 		/*
1114 		 *	... and announce new interface.
1115 		 */
1116 		call_netdevice_notifiers(NETDEV_UP, dev);
1117 	}
1118 
1119 	return ret;
1120 }
1121 
1122 /**
1123  *	dev_close - shutdown an interface.
1124  *	@dev: device to shutdown
1125  *
1126  *	This function moves an active device into down state. A
1127  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1128  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1129  *	chain.
1130  */
1131 int dev_close(struct net_device *dev)
1132 {
1133 	const struct net_device_ops *ops = dev->netdev_ops;
1134 	ASSERT_RTNL();
1135 
1136 	might_sleep();
1137 
1138 	if (!(dev->flags & IFF_UP))
1139 		return 0;
1140 
1141 	/*
1142 	 *	Tell people we are going down, so that they can
1143 	 *	prepare to death, when device is still operating.
1144 	 */
1145 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1146 
1147 	clear_bit(__LINK_STATE_START, &dev->state);
1148 
1149 	/* Synchronize to scheduled poll. We cannot touch poll list,
1150 	 * it can be even on different cpu. So just clear netif_running().
1151 	 *
1152 	 * dev->stop() will invoke napi_disable() on all of it's
1153 	 * napi_struct instances on this device.
1154 	 */
1155 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1156 
1157 	dev_deactivate(dev);
1158 
1159 	/*
1160 	 *	Call the device specific close. This cannot fail.
1161 	 *	Only if device is UP
1162 	 *
1163 	 *	We allow it to be called even after a DETACH hot-plug
1164 	 *	event.
1165 	 */
1166 	if (ops->ndo_stop)
1167 		ops->ndo_stop(dev);
1168 
1169 	/*
1170 	 *	Device is now down.
1171 	 */
1172 
1173 	dev->flags &= ~IFF_UP;
1174 
1175 	/*
1176 	 * Tell people we are down
1177 	 */
1178 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1179 
1180 	return 0;
1181 }
1182 
1183 
1184 /**
1185  *	dev_disable_lro - disable Large Receive Offload on a device
1186  *	@dev: device
1187  *
1188  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1189  *	called under RTNL.  This is needed if received packets may be
1190  *	forwarded to another interface.
1191  */
1192 void dev_disable_lro(struct net_device *dev)
1193 {
1194 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1195 	    dev->ethtool_ops->set_flags) {
1196 		u32 flags = dev->ethtool_ops->get_flags(dev);
1197 		if (flags & ETH_FLAG_LRO) {
1198 			flags &= ~ETH_FLAG_LRO;
1199 			dev->ethtool_ops->set_flags(dev, flags);
1200 		}
1201 	}
1202 	WARN_ON(dev->features & NETIF_F_LRO);
1203 }
1204 EXPORT_SYMBOL(dev_disable_lro);
1205 
1206 
1207 static int dev_boot_phase = 1;
1208 
1209 /*
1210  *	Device change register/unregister. These are not inline or static
1211  *	as we export them to the world.
1212  */
1213 
1214 /**
1215  *	register_netdevice_notifier - register a network notifier block
1216  *	@nb: notifier
1217  *
1218  *	Register a notifier to be called when network device events occur.
1219  *	The notifier passed is linked into the kernel structures and must
1220  *	not be reused until it has been unregistered. A negative errno code
1221  *	is returned on a failure.
1222  *
1223  * 	When registered all registration and up events are replayed
1224  *	to the new notifier to allow device to have a race free
1225  *	view of the network device list.
1226  */
1227 
1228 int register_netdevice_notifier(struct notifier_block *nb)
1229 {
1230 	struct net_device *dev;
1231 	struct net_device *last;
1232 	struct net *net;
1233 	int err;
1234 
1235 	rtnl_lock();
1236 	err = raw_notifier_chain_register(&netdev_chain, nb);
1237 	if (err)
1238 		goto unlock;
1239 	if (dev_boot_phase)
1240 		goto unlock;
1241 	for_each_net(net) {
1242 		for_each_netdev(net, dev) {
1243 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1244 			err = notifier_to_errno(err);
1245 			if (err)
1246 				goto rollback;
1247 
1248 			if (!(dev->flags & IFF_UP))
1249 				continue;
1250 
1251 			nb->notifier_call(nb, NETDEV_UP, dev);
1252 		}
1253 	}
1254 
1255 unlock:
1256 	rtnl_unlock();
1257 	return err;
1258 
1259 rollback:
1260 	last = dev;
1261 	for_each_net(net) {
1262 		for_each_netdev(net, dev) {
1263 			if (dev == last)
1264 				break;
1265 
1266 			if (dev->flags & IFF_UP) {
1267 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1268 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1269 			}
1270 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1271 		}
1272 	}
1273 
1274 	raw_notifier_chain_unregister(&netdev_chain, nb);
1275 	goto unlock;
1276 }
1277 
1278 /**
1279  *	unregister_netdevice_notifier - unregister a network notifier block
1280  *	@nb: notifier
1281  *
1282  *	Unregister a notifier previously registered by
1283  *	register_netdevice_notifier(). The notifier is unlinked into the
1284  *	kernel structures and may then be reused. A negative errno code
1285  *	is returned on a failure.
1286  */
1287 
1288 int unregister_netdevice_notifier(struct notifier_block *nb)
1289 {
1290 	int err;
1291 
1292 	rtnl_lock();
1293 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1294 	rtnl_unlock();
1295 	return err;
1296 }
1297 
1298 /**
1299  *	call_netdevice_notifiers - call all network notifier blocks
1300  *      @val: value passed unmodified to notifier function
1301  *      @dev: net_device pointer passed unmodified to notifier function
1302  *
1303  *	Call all network notifier blocks.  Parameters and return value
1304  *	are as for raw_notifier_call_chain().
1305  */
1306 
1307 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1308 {
1309 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1310 }
1311 
1312 /* When > 0 there are consumers of rx skb time stamps */
1313 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1314 
1315 void net_enable_timestamp(void)
1316 {
1317 	atomic_inc(&netstamp_needed);
1318 }
1319 
1320 void net_disable_timestamp(void)
1321 {
1322 	atomic_dec(&netstamp_needed);
1323 }
1324 
1325 static inline void net_timestamp(struct sk_buff *skb)
1326 {
1327 	if (atomic_read(&netstamp_needed))
1328 		__net_timestamp(skb);
1329 	else
1330 		skb->tstamp.tv64 = 0;
1331 }
1332 
1333 /*
1334  *	Support routine. Sends outgoing frames to any network
1335  *	taps currently in use.
1336  */
1337 
1338 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1339 {
1340 	struct packet_type *ptype;
1341 
1342 	net_timestamp(skb);
1343 
1344 	rcu_read_lock();
1345 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1346 		/* Never send packets back to the socket
1347 		 * they originated from - MvS (miquels@drinkel.ow.org)
1348 		 */
1349 		if ((ptype->dev == dev || !ptype->dev) &&
1350 		    (ptype->af_packet_priv == NULL ||
1351 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1352 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1353 			if (!skb2)
1354 				break;
1355 
1356 			/* skb->nh should be correctly
1357 			   set by sender, so that the second statement is
1358 			   just protection against buggy protocols.
1359 			 */
1360 			skb_reset_mac_header(skb2);
1361 
1362 			if (skb_network_header(skb2) < skb2->data ||
1363 			    skb2->network_header > skb2->tail) {
1364 				if (net_ratelimit())
1365 					printk(KERN_CRIT "protocol %04x is "
1366 					       "buggy, dev %s\n",
1367 					       skb2->protocol, dev->name);
1368 				skb_reset_network_header(skb2);
1369 			}
1370 
1371 			skb2->transport_header = skb2->network_header;
1372 			skb2->pkt_type = PACKET_OUTGOING;
1373 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1374 		}
1375 	}
1376 	rcu_read_unlock();
1377 }
1378 
1379 
1380 static inline void __netif_reschedule(struct Qdisc *q)
1381 {
1382 	struct softnet_data *sd;
1383 	unsigned long flags;
1384 
1385 	local_irq_save(flags);
1386 	sd = &__get_cpu_var(softnet_data);
1387 	q->next_sched = sd->output_queue;
1388 	sd->output_queue = q;
1389 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1390 	local_irq_restore(flags);
1391 }
1392 
1393 void __netif_schedule(struct Qdisc *q)
1394 {
1395 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1396 		__netif_reschedule(q);
1397 }
1398 EXPORT_SYMBOL(__netif_schedule);
1399 
1400 void dev_kfree_skb_irq(struct sk_buff *skb)
1401 {
1402 	if (atomic_dec_and_test(&skb->users)) {
1403 		struct softnet_data *sd;
1404 		unsigned long flags;
1405 
1406 		local_irq_save(flags);
1407 		sd = &__get_cpu_var(softnet_data);
1408 		skb->next = sd->completion_queue;
1409 		sd->completion_queue = skb;
1410 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1411 		local_irq_restore(flags);
1412 	}
1413 }
1414 EXPORT_SYMBOL(dev_kfree_skb_irq);
1415 
1416 void dev_kfree_skb_any(struct sk_buff *skb)
1417 {
1418 	if (in_irq() || irqs_disabled())
1419 		dev_kfree_skb_irq(skb);
1420 	else
1421 		dev_kfree_skb(skb);
1422 }
1423 EXPORT_SYMBOL(dev_kfree_skb_any);
1424 
1425 
1426 /**
1427  * netif_device_detach - mark device as removed
1428  * @dev: network device
1429  *
1430  * Mark device as removed from system and therefore no longer available.
1431  */
1432 void netif_device_detach(struct net_device *dev)
1433 {
1434 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1435 	    netif_running(dev)) {
1436 		netif_stop_queue(dev);
1437 	}
1438 }
1439 EXPORT_SYMBOL(netif_device_detach);
1440 
1441 /**
1442  * netif_device_attach - mark device as attached
1443  * @dev: network device
1444  *
1445  * Mark device as attached from system and restart if needed.
1446  */
1447 void netif_device_attach(struct net_device *dev)
1448 {
1449 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1450 	    netif_running(dev)) {
1451 		netif_wake_queue(dev);
1452 		__netdev_watchdog_up(dev);
1453 	}
1454 }
1455 EXPORT_SYMBOL(netif_device_attach);
1456 
1457 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1458 {
1459 	return ((features & NETIF_F_GEN_CSUM) ||
1460 		((features & NETIF_F_IP_CSUM) &&
1461 		 protocol == htons(ETH_P_IP)) ||
1462 		((features & NETIF_F_IPV6_CSUM) &&
1463 		 protocol == htons(ETH_P_IPV6)));
1464 }
1465 
1466 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1467 {
1468 	if (can_checksum_protocol(dev->features, skb->protocol))
1469 		return true;
1470 
1471 	if (skb->protocol == htons(ETH_P_8021Q)) {
1472 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1473 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1474 					  veh->h_vlan_encapsulated_proto))
1475 			return true;
1476 	}
1477 
1478 	return false;
1479 }
1480 
1481 /*
1482  * Invalidate hardware checksum when packet is to be mangled, and
1483  * complete checksum manually on outgoing path.
1484  */
1485 int skb_checksum_help(struct sk_buff *skb)
1486 {
1487 	__wsum csum;
1488 	int ret = 0, offset;
1489 
1490 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1491 		goto out_set_summed;
1492 
1493 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1494 		/* Let GSO fix up the checksum. */
1495 		goto out_set_summed;
1496 	}
1497 
1498 	offset = skb->csum_start - skb_headroom(skb);
1499 	BUG_ON(offset >= skb_headlen(skb));
1500 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1501 
1502 	offset += skb->csum_offset;
1503 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1504 
1505 	if (skb_cloned(skb) &&
1506 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1507 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1508 		if (ret)
1509 			goto out;
1510 	}
1511 
1512 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1513 out_set_summed:
1514 	skb->ip_summed = CHECKSUM_NONE;
1515 out:
1516 	return ret;
1517 }
1518 
1519 /**
1520  *	skb_gso_segment - Perform segmentation on skb.
1521  *	@skb: buffer to segment
1522  *	@features: features for the output path (see dev->features)
1523  *
1524  *	This function segments the given skb and returns a list of segments.
1525  *
1526  *	It may return NULL if the skb requires no segmentation.  This is
1527  *	only possible when GSO is used for verifying header integrity.
1528  */
1529 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1530 {
1531 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1532 	struct packet_type *ptype;
1533 	__be16 type = skb->protocol;
1534 	int err;
1535 
1536 	BUG_ON(skb_shinfo(skb)->frag_list);
1537 
1538 	skb_reset_mac_header(skb);
1539 	skb->mac_len = skb->network_header - skb->mac_header;
1540 	__skb_pull(skb, skb->mac_len);
1541 
1542 	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1543 		if (skb_header_cloned(skb) &&
1544 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1545 			return ERR_PTR(err);
1546 	}
1547 
1548 	rcu_read_lock();
1549 	list_for_each_entry_rcu(ptype,
1550 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1551 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1552 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1553 				err = ptype->gso_send_check(skb);
1554 				segs = ERR_PTR(err);
1555 				if (err || skb_gso_ok(skb, features))
1556 					break;
1557 				__skb_push(skb, (skb->data -
1558 						 skb_network_header(skb)));
1559 			}
1560 			segs = ptype->gso_segment(skb, features);
1561 			break;
1562 		}
1563 	}
1564 	rcu_read_unlock();
1565 
1566 	__skb_push(skb, skb->data - skb_mac_header(skb));
1567 
1568 	return segs;
1569 }
1570 
1571 EXPORT_SYMBOL(skb_gso_segment);
1572 
1573 /* Take action when hardware reception checksum errors are detected. */
1574 #ifdef CONFIG_BUG
1575 void netdev_rx_csum_fault(struct net_device *dev)
1576 {
1577 	if (net_ratelimit()) {
1578 		printk(KERN_ERR "%s: hw csum failure.\n",
1579 			dev ? dev->name : "<unknown>");
1580 		dump_stack();
1581 	}
1582 }
1583 EXPORT_SYMBOL(netdev_rx_csum_fault);
1584 #endif
1585 
1586 /* Actually, we should eliminate this check as soon as we know, that:
1587  * 1. IOMMU is present and allows to map all the memory.
1588  * 2. No high memory really exists on this machine.
1589  */
1590 
1591 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1592 {
1593 #ifdef CONFIG_HIGHMEM
1594 	int i;
1595 
1596 	if (dev->features & NETIF_F_HIGHDMA)
1597 		return 0;
1598 
1599 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1600 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1601 			return 1;
1602 
1603 #endif
1604 	return 0;
1605 }
1606 
1607 struct dev_gso_cb {
1608 	void (*destructor)(struct sk_buff *skb);
1609 };
1610 
1611 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1612 
1613 static void dev_gso_skb_destructor(struct sk_buff *skb)
1614 {
1615 	struct dev_gso_cb *cb;
1616 
1617 	do {
1618 		struct sk_buff *nskb = skb->next;
1619 
1620 		skb->next = nskb->next;
1621 		nskb->next = NULL;
1622 		kfree_skb(nskb);
1623 	} while (skb->next);
1624 
1625 	cb = DEV_GSO_CB(skb);
1626 	if (cb->destructor)
1627 		cb->destructor(skb);
1628 }
1629 
1630 /**
1631  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1632  *	@skb: buffer to segment
1633  *
1634  *	This function segments the given skb and stores the list of segments
1635  *	in skb->next.
1636  */
1637 static int dev_gso_segment(struct sk_buff *skb)
1638 {
1639 	struct net_device *dev = skb->dev;
1640 	struct sk_buff *segs;
1641 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1642 					 NETIF_F_SG : 0);
1643 
1644 	segs = skb_gso_segment(skb, features);
1645 
1646 	/* Verifying header integrity only. */
1647 	if (!segs)
1648 		return 0;
1649 
1650 	if (IS_ERR(segs))
1651 		return PTR_ERR(segs);
1652 
1653 	skb->next = segs;
1654 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1655 	skb->destructor = dev_gso_skb_destructor;
1656 
1657 	return 0;
1658 }
1659 
1660 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1661 			struct netdev_queue *txq)
1662 {
1663 	if (likely(!skb->next)) {
1664 		if (!list_empty(&ptype_all))
1665 			dev_queue_xmit_nit(skb, dev);
1666 
1667 		if (netif_needs_gso(dev, skb)) {
1668 			if (unlikely(dev_gso_segment(skb)))
1669 				goto out_kfree_skb;
1670 			if (skb->next)
1671 				goto gso;
1672 		}
1673 
1674 		return dev->hard_start_xmit(skb, dev);
1675 	}
1676 
1677 gso:
1678 	do {
1679 		struct sk_buff *nskb = skb->next;
1680 		int rc;
1681 
1682 		skb->next = nskb->next;
1683 		nskb->next = NULL;
1684 		rc = dev->hard_start_xmit(nskb, dev);
1685 		if (unlikely(rc)) {
1686 			nskb->next = skb->next;
1687 			skb->next = nskb;
1688 			return rc;
1689 		}
1690 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1691 			return NETDEV_TX_BUSY;
1692 	} while (skb->next);
1693 
1694 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1695 
1696 out_kfree_skb:
1697 	kfree_skb(skb);
1698 	return 0;
1699 }
1700 
1701 static u32 simple_tx_hashrnd;
1702 static int simple_tx_hashrnd_initialized = 0;
1703 
1704 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1705 {
1706 	u32 addr1, addr2, ports;
1707 	u32 hash, ihl;
1708 	u8 ip_proto = 0;
1709 
1710 	if (unlikely(!simple_tx_hashrnd_initialized)) {
1711 		get_random_bytes(&simple_tx_hashrnd, 4);
1712 		simple_tx_hashrnd_initialized = 1;
1713 	}
1714 
1715 	switch (skb->protocol) {
1716 	case htons(ETH_P_IP):
1717 		if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1718 			ip_proto = ip_hdr(skb)->protocol;
1719 		addr1 = ip_hdr(skb)->saddr;
1720 		addr2 = ip_hdr(skb)->daddr;
1721 		ihl = ip_hdr(skb)->ihl;
1722 		break;
1723 	case htons(ETH_P_IPV6):
1724 		ip_proto = ipv6_hdr(skb)->nexthdr;
1725 		addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1726 		addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1727 		ihl = (40 >> 2);
1728 		break;
1729 	default:
1730 		return 0;
1731 	}
1732 
1733 
1734 	switch (ip_proto) {
1735 	case IPPROTO_TCP:
1736 	case IPPROTO_UDP:
1737 	case IPPROTO_DCCP:
1738 	case IPPROTO_ESP:
1739 	case IPPROTO_AH:
1740 	case IPPROTO_SCTP:
1741 	case IPPROTO_UDPLITE:
1742 		ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1743 		break;
1744 
1745 	default:
1746 		ports = 0;
1747 		break;
1748 	}
1749 
1750 	hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1751 
1752 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1753 }
1754 
1755 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1756 					struct sk_buff *skb)
1757 {
1758 	u16 queue_index = 0;
1759 
1760 	if (dev->select_queue)
1761 		queue_index = dev->select_queue(dev, skb);
1762 	else if (dev->real_num_tx_queues > 1)
1763 		queue_index = simple_tx_hash(dev, skb);
1764 
1765 	skb_set_queue_mapping(skb, queue_index);
1766 	return netdev_get_tx_queue(dev, queue_index);
1767 }
1768 
1769 /**
1770  *	dev_queue_xmit - transmit a buffer
1771  *	@skb: buffer to transmit
1772  *
1773  *	Queue a buffer for transmission to a network device. The caller must
1774  *	have set the device and priority and built the buffer before calling
1775  *	this function. The function can be called from an interrupt.
1776  *
1777  *	A negative errno code is returned on a failure. A success does not
1778  *	guarantee the frame will be transmitted as it may be dropped due
1779  *	to congestion or traffic shaping.
1780  *
1781  * -----------------------------------------------------------------------------------
1782  *      I notice this method can also return errors from the queue disciplines,
1783  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1784  *      be positive.
1785  *
1786  *      Regardless of the return value, the skb is consumed, so it is currently
1787  *      difficult to retry a send to this method.  (You can bump the ref count
1788  *      before sending to hold a reference for retry if you are careful.)
1789  *
1790  *      When calling this method, interrupts MUST be enabled.  This is because
1791  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1792  *          --BLG
1793  */
1794 int dev_queue_xmit(struct sk_buff *skb)
1795 {
1796 	struct net_device *dev = skb->dev;
1797 	struct netdev_queue *txq;
1798 	struct Qdisc *q;
1799 	int rc = -ENOMEM;
1800 
1801 	/* GSO will handle the following emulations directly. */
1802 	if (netif_needs_gso(dev, skb))
1803 		goto gso;
1804 
1805 	if (skb_shinfo(skb)->frag_list &&
1806 	    !(dev->features & NETIF_F_FRAGLIST) &&
1807 	    __skb_linearize(skb))
1808 		goto out_kfree_skb;
1809 
1810 	/* Fragmented skb is linearized if device does not support SG,
1811 	 * or if at least one of fragments is in highmem and device
1812 	 * does not support DMA from it.
1813 	 */
1814 	if (skb_shinfo(skb)->nr_frags &&
1815 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1816 	    __skb_linearize(skb))
1817 		goto out_kfree_skb;
1818 
1819 	/* If packet is not checksummed and device does not support
1820 	 * checksumming for this protocol, complete checksumming here.
1821 	 */
1822 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1823 		skb_set_transport_header(skb, skb->csum_start -
1824 					      skb_headroom(skb));
1825 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1826 			goto out_kfree_skb;
1827 	}
1828 
1829 gso:
1830 	/* Disable soft irqs for various locks below. Also
1831 	 * stops preemption for RCU.
1832 	 */
1833 	rcu_read_lock_bh();
1834 
1835 	txq = dev_pick_tx(dev, skb);
1836 	q = rcu_dereference(txq->qdisc);
1837 
1838 #ifdef CONFIG_NET_CLS_ACT
1839 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1840 #endif
1841 	if (q->enqueue) {
1842 		spinlock_t *root_lock = qdisc_lock(q);
1843 
1844 		spin_lock(root_lock);
1845 
1846 		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1847 			kfree_skb(skb);
1848 			rc = NET_XMIT_DROP;
1849 		} else {
1850 			rc = qdisc_enqueue_root(skb, q);
1851 			qdisc_run(q);
1852 		}
1853 		spin_unlock(root_lock);
1854 
1855 		goto out;
1856 	}
1857 
1858 	/* The device has no queue. Common case for software devices:
1859 	   loopback, all the sorts of tunnels...
1860 
1861 	   Really, it is unlikely that netif_tx_lock protection is necessary
1862 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1863 	   counters.)
1864 	   However, it is possible, that they rely on protection
1865 	   made by us here.
1866 
1867 	   Check this and shot the lock. It is not prone from deadlocks.
1868 	   Either shot noqueue qdisc, it is even simpler 8)
1869 	 */
1870 	if (dev->flags & IFF_UP) {
1871 		int cpu = smp_processor_id(); /* ok because BHs are off */
1872 
1873 		if (txq->xmit_lock_owner != cpu) {
1874 
1875 			HARD_TX_LOCK(dev, txq, cpu);
1876 
1877 			if (!netif_tx_queue_stopped(txq)) {
1878 				rc = 0;
1879 				if (!dev_hard_start_xmit(skb, dev, txq)) {
1880 					HARD_TX_UNLOCK(dev, txq);
1881 					goto out;
1882 				}
1883 			}
1884 			HARD_TX_UNLOCK(dev, txq);
1885 			if (net_ratelimit())
1886 				printk(KERN_CRIT "Virtual device %s asks to "
1887 				       "queue packet!\n", dev->name);
1888 		} else {
1889 			/* Recursion is detected! It is possible,
1890 			 * unfortunately */
1891 			if (net_ratelimit())
1892 				printk(KERN_CRIT "Dead loop on virtual device "
1893 				       "%s, fix it urgently!\n", dev->name);
1894 		}
1895 	}
1896 
1897 	rc = -ENETDOWN;
1898 	rcu_read_unlock_bh();
1899 
1900 out_kfree_skb:
1901 	kfree_skb(skb);
1902 	return rc;
1903 out:
1904 	rcu_read_unlock_bh();
1905 	return rc;
1906 }
1907 
1908 
1909 /*=======================================================================
1910 			Receiver routines
1911   =======================================================================*/
1912 
1913 int netdev_max_backlog __read_mostly = 1000;
1914 int netdev_budget __read_mostly = 300;
1915 int weight_p __read_mostly = 64;            /* old backlog weight */
1916 
1917 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1918 
1919 
1920 /**
1921  *	netif_rx	-	post buffer to the network code
1922  *	@skb: buffer to post
1923  *
1924  *	This function receives a packet from a device driver and queues it for
1925  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1926  *	may be dropped during processing for congestion control or by the
1927  *	protocol layers.
1928  *
1929  *	return values:
1930  *	NET_RX_SUCCESS	(no congestion)
1931  *	NET_RX_DROP     (packet was dropped)
1932  *
1933  */
1934 
1935 int netif_rx(struct sk_buff *skb)
1936 {
1937 	struct softnet_data *queue;
1938 	unsigned long flags;
1939 
1940 	/* if netpoll wants it, pretend we never saw it */
1941 	if (netpoll_rx(skb))
1942 		return NET_RX_DROP;
1943 
1944 	if (!skb->tstamp.tv64)
1945 		net_timestamp(skb);
1946 
1947 	/*
1948 	 * The code is rearranged so that the path is the most
1949 	 * short when CPU is congested, but is still operating.
1950 	 */
1951 	local_irq_save(flags);
1952 	queue = &__get_cpu_var(softnet_data);
1953 
1954 	__get_cpu_var(netdev_rx_stat).total++;
1955 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1956 		if (queue->input_pkt_queue.qlen) {
1957 enqueue:
1958 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1959 			local_irq_restore(flags);
1960 			return NET_RX_SUCCESS;
1961 		}
1962 
1963 		napi_schedule(&queue->backlog);
1964 		goto enqueue;
1965 	}
1966 
1967 	__get_cpu_var(netdev_rx_stat).dropped++;
1968 	local_irq_restore(flags);
1969 
1970 	kfree_skb(skb);
1971 	return NET_RX_DROP;
1972 }
1973 
1974 int netif_rx_ni(struct sk_buff *skb)
1975 {
1976 	int err;
1977 
1978 	preempt_disable();
1979 	err = netif_rx(skb);
1980 	if (local_softirq_pending())
1981 		do_softirq();
1982 	preempt_enable();
1983 
1984 	return err;
1985 }
1986 
1987 EXPORT_SYMBOL(netif_rx_ni);
1988 
1989 static void net_tx_action(struct softirq_action *h)
1990 {
1991 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1992 
1993 	if (sd->completion_queue) {
1994 		struct sk_buff *clist;
1995 
1996 		local_irq_disable();
1997 		clist = sd->completion_queue;
1998 		sd->completion_queue = NULL;
1999 		local_irq_enable();
2000 
2001 		while (clist) {
2002 			struct sk_buff *skb = clist;
2003 			clist = clist->next;
2004 
2005 			WARN_ON(atomic_read(&skb->users));
2006 			__kfree_skb(skb);
2007 		}
2008 	}
2009 
2010 	if (sd->output_queue) {
2011 		struct Qdisc *head;
2012 
2013 		local_irq_disable();
2014 		head = sd->output_queue;
2015 		sd->output_queue = NULL;
2016 		local_irq_enable();
2017 
2018 		while (head) {
2019 			struct Qdisc *q = head;
2020 			spinlock_t *root_lock;
2021 
2022 			head = head->next_sched;
2023 
2024 			root_lock = qdisc_lock(q);
2025 			if (spin_trylock(root_lock)) {
2026 				smp_mb__before_clear_bit();
2027 				clear_bit(__QDISC_STATE_SCHED,
2028 					  &q->state);
2029 				qdisc_run(q);
2030 				spin_unlock(root_lock);
2031 			} else {
2032 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2033 					      &q->state)) {
2034 					__netif_reschedule(q);
2035 				} else {
2036 					smp_mb__before_clear_bit();
2037 					clear_bit(__QDISC_STATE_SCHED,
2038 						  &q->state);
2039 				}
2040 			}
2041 		}
2042 	}
2043 }
2044 
2045 static inline int deliver_skb(struct sk_buff *skb,
2046 			      struct packet_type *pt_prev,
2047 			      struct net_device *orig_dev)
2048 {
2049 	atomic_inc(&skb->users);
2050 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2051 }
2052 
2053 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2054 /* These hooks defined here for ATM */
2055 struct net_bridge;
2056 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2057 						unsigned char *addr);
2058 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2059 
2060 /*
2061  * If bridge module is loaded call bridging hook.
2062  *  returns NULL if packet was consumed.
2063  */
2064 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2065 					struct sk_buff *skb) __read_mostly;
2066 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2067 					    struct packet_type **pt_prev, int *ret,
2068 					    struct net_device *orig_dev)
2069 {
2070 	struct net_bridge_port *port;
2071 
2072 	if (skb->pkt_type == PACKET_LOOPBACK ||
2073 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2074 		return skb;
2075 
2076 	if (*pt_prev) {
2077 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2078 		*pt_prev = NULL;
2079 	}
2080 
2081 	return br_handle_frame_hook(port, skb);
2082 }
2083 #else
2084 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2085 #endif
2086 
2087 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2088 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2089 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2090 
2091 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2092 					     struct packet_type **pt_prev,
2093 					     int *ret,
2094 					     struct net_device *orig_dev)
2095 {
2096 	if (skb->dev->macvlan_port == NULL)
2097 		return skb;
2098 
2099 	if (*pt_prev) {
2100 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2101 		*pt_prev = NULL;
2102 	}
2103 	return macvlan_handle_frame_hook(skb);
2104 }
2105 #else
2106 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2107 #endif
2108 
2109 #ifdef CONFIG_NET_CLS_ACT
2110 /* TODO: Maybe we should just force sch_ingress to be compiled in
2111  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2112  * a compare and 2 stores extra right now if we dont have it on
2113  * but have CONFIG_NET_CLS_ACT
2114  * NOTE: This doesnt stop any functionality; if you dont have
2115  * the ingress scheduler, you just cant add policies on ingress.
2116  *
2117  */
2118 static int ing_filter(struct sk_buff *skb)
2119 {
2120 	struct net_device *dev = skb->dev;
2121 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2122 	struct netdev_queue *rxq;
2123 	int result = TC_ACT_OK;
2124 	struct Qdisc *q;
2125 
2126 	if (MAX_RED_LOOP < ttl++) {
2127 		printk(KERN_WARNING
2128 		       "Redir loop detected Dropping packet (%d->%d)\n",
2129 		       skb->iif, dev->ifindex);
2130 		return TC_ACT_SHOT;
2131 	}
2132 
2133 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2134 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2135 
2136 	rxq = &dev->rx_queue;
2137 
2138 	q = rxq->qdisc;
2139 	if (q != &noop_qdisc) {
2140 		spin_lock(qdisc_lock(q));
2141 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2142 			result = qdisc_enqueue_root(skb, q);
2143 		spin_unlock(qdisc_lock(q));
2144 	}
2145 
2146 	return result;
2147 }
2148 
2149 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2150 					 struct packet_type **pt_prev,
2151 					 int *ret, struct net_device *orig_dev)
2152 {
2153 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2154 		goto out;
2155 
2156 	if (*pt_prev) {
2157 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2158 		*pt_prev = NULL;
2159 	} else {
2160 		/* Huh? Why does turning on AF_PACKET affect this? */
2161 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2162 	}
2163 
2164 	switch (ing_filter(skb)) {
2165 	case TC_ACT_SHOT:
2166 	case TC_ACT_STOLEN:
2167 		kfree_skb(skb);
2168 		return NULL;
2169 	}
2170 
2171 out:
2172 	skb->tc_verd = 0;
2173 	return skb;
2174 }
2175 #endif
2176 
2177 /*
2178  * 	netif_nit_deliver - deliver received packets to network taps
2179  * 	@skb: buffer
2180  *
2181  * 	This function is used to deliver incoming packets to network
2182  * 	taps. It should be used when the normal netif_receive_skb path
2183  * 	is bypassed, for example because of VLAN acceleration.
2184  */
2185 void netif_nit_deliver(struct sk_buff *skb)
2186 {
2187 	struct packet_type *ptype;
2188 
2189 	if (list_empty(&ptype_all))
2190 		return;
2191 
2192 	skb_reset_network_header(skb);
2193 	skb_reset_transport_header(skb);
2194 	skb->mac_len = skb->network_header - skb->mac_header;
2195 
2196 	rcu_read_lock();
2197 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2198 		if (!ptype->dev || ptype->dev == skb->dev)
2199 			deliver_skb(skb, ptype, skb->dev);
2200 	}
2201 	rcu_read_unlock();
2202 }
2203 
2204 /**
2205  *	netif_receive_skb - process receive buffer from network
2206  *	@skb: buffer to process
2207  *
2208  *	netif_receive_skb() is the main receive data processing function.
2209  *	It always succeeds. The buffer may be dropped during processing
2210  *	for congestion control or by the protocol layers.
2211  *
2212  *	This function may only be called from softirq context and interrupts
2213  *	should be enabled.
2214  *
2215  *	Return values (usually ignored):
2216  *	NET_RX_SUCCESS: no congestion
2217  *	NET_RX_DROP: packet was dropped
2218  */
2219 int netif_receive_skb(struct sk_buff *skb)
2220 {
2221 	struct packet_type *ptype, *pt_prev;
2222 	struct net_device *orig_dev;
2223 	struct net_device *null_or_orig;
2224 	int ret = NET_RX_DROP;
2225 	__be16 type;
2226 
2227 	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2228 		return NET_RX_SUCCESS;
2229 
2230 	/* if we've gotten here through NAPI, check netpoll */
2231 	if (netpoll_receive_skb(skb))
2232 		return NET_RX_DROP;
2233 
2234 	if (!skb->tstamp.tv64)
2235 		net_timestamp(skb);
2236 
2237 	if (!skb->iif)
2238 		skb->iif = skb->dev->ifindex;
2239 
2240 	null_or_orig = NULL;
2241 	orig_dev = skb->dev;
2242 	if (orig_dev->master) {
2243 		if (skb_bond_should_drop(skb))
2244 			null_or_orig = orig_dev; /* deliver only exact match */
2245 		else
2246 			skb->dev = orig_dev->master;
2247 	}
2248 
2249 	__get_cpu_var(netdev_rx_stat).total++;
2250 
2251 	skb_reset_network_header(skb);
2252 	skb_reset_transport_header(skb);
2253 	skb->mac_len = skb->network_header - skb->mac_header;
2254 
2255 	pt_prev = NULL;
2256 
2257 	rcu_read_lock();
2258 
2259 	/* Don't receive packets in an exiting network namespace */
2260 	if (!net_alive(dev_net(skb->dev))) {
2261 		kfree_skb(skb);
2262 		goto out;
2263 	}
2264 
2265 #ifdef CONFIG_NET_CLS_ACT
2266 	if (skb->tc_verd & TC_NCLS) {
2267 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2268 		goto ncls;
2269 	}
2270 #endif
2271 
2272 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2273 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2274 		    ptype->dev == orig_dev) {
2275 			if (pt_prev)
2276 				ret = deliver_skb(skb, pt_prev, orig_dev);
2277 			pt_prev = ptype;
2278 		}
2279 	}
2280 
2281 #ifdef CONFIG_NET_CLS_ACT
2282 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2283 	if (!skb)
2284 		goto out;
2285 ncls:
2286 #endif
2287 
2288 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2289 	if (!skb)
2290 		goto out;
2291 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2292 	if (!skb)
2293 		goto out;
2294 
2295 	type = skb->protocol;
2296 	list_for_each_entry_rcu(ptype,
2297 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2298 		if (ptype->type == type &&
2299 		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2300 		     ptype->dev == orig_dev)) {
2301 			if (pt_prev)
2302 				ret = deliver_skb(skb, pt_prev, orig_dev);
2303 			pt_prev = ptype;
2304 		}
2305 	}
2306 
2307 	if (pt_prev) {
2308 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2309 	} else {
2310 		kfree_skb(skb);
2311 		/* Jamal, now you will not able to escape explaining
2312 		 * me how you were going to use this. :-)
2313 		 */
2314 		ret = NET_RX_DROP;
2315 	}
2316 
2317 out:
2318 	rcu_read_unlock();
2319 	return ret;
2320 }
2321 
2322 /* Network device is going away, flush any packets still pending  */
2323 static void flush_backlog(void *arg)
2324 {
2325 	struct net_device *dev = arg;
2326 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2327 	struct sk_buff *skb, *tmp;
2328 
2329 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2330 		if (skb->dev == dev) {
2331 			__skb_unlink(skb, &queue->input_pkt_queue);
2332 			kfree_skb(skb);
2333 		}
2334 }
2335 
2336 static int process_backlog(struct napi_struct *napi, int quota)
2337 {
2338 	int work = 0;
2339 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2340 	unsigned long start_time = jiffies;
2341 
2342 	napi->weight = weight_p;
2343 	do {
2344 		struct sk_buff *skb;
2345 
2346 		local_irq_disable();
2347 		skb = __skb_dequeue(&queue->input_pkt_queue);
2348 		if (!skb) {
2349 			__napi_complete(napi);
2350 			local_irq_enable();
2351 			break;
2352 		}
2353 		local_irq_enable();
2354 
2355 		netif_receive_skb(skb);
2356 	} while (++work < quota && jiffies == start_time);
2357 
2358 	return work;
2359 }
2360 
2361 /**
2362  * __napi_schedule - schedule for receive
2363  * @n: entry to schedule
2364  *
2365  * The entry's receive function will be scheduled to run
2366  */
2367 void __napi_schedule(struct napi_struct *n)
2368 {
2369 	unsigned long flags;
2370 
2371 	local_irq_save(flags);
2372 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2373 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2374 	local_irq_restore(flags);
2375 }
2376 EXPORT_SYMBOL(__napi_schedule);
2377 
2378 
2379 static void net_rx_action(struct softirq_action *h)
2380 {
2381 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2382 	unsigned long time_limit = jiffies + 2;
2383 	int budget = netdev_budget;
2384 	void *have;
2385 
2386 	local_irq_disable();
2387 
2388 	while (!list_empty(list)) {
2389 		struct napi_struct *n;
2390 		int work, weight;
2391 
2392 		/* If softirq window is exhuasted then punt.
2393 		 * Allow this to run for 2 jiffies since which will allow
2394 		 * an average latency of 1.5/HZ.
2395 		 */
2396 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2397 			goto softnet_break;
2398 
2399 		local_irq_enable();
2400 
2401 		/* Even though interrupts have been re-enabled, this
2402 		 * access is safe because interrupts can only add new
2403 		 * entries to the tail of this list, and only ->poll()
2404 		 * calls can remove this head entry from the list.
2405 		 */
2406 		n = list_entry(list->next, struct napi_struct, poll_list);
2407 
2408 		have = netpoll_poll_lock(n);
2409 
2410 		weight = n->weight;
2411 
2412 		/* This NAPI_STATE_SCHED test is for avoiding a race
2413 		 * with netpoll's poll_napi().  Only the entity which
2414 		 * obtains the lock and sees NAPI_STATE_SCHED set will
2415 		 * actually make the ->poll() call.  Therefore we avoid
2416 		 * accidently calling ->poll() when NAPI is not scheduled.
2417 		 */
2418 		work = 0;
2419 		if (test_bit(NAPI_STATE_SCHED, &n->state))
2420 			work = n->poll(n, weight);
2421 
2422 		WARN_ON_ONCE(work > weight);
2423 
2424 		budget -= work;
2425 
2426 		local_irq_disable();
2427 
2428 		/* Drivers must not modify the NAPI state if they
2429 		 * consume the entire weight.  In such cases this code
2430 		 * still "owns" the NAPI instance and therefore can
2431 		 * move the instance around on the list at-will.
2432 		 */
2433 		if (unlikely(work == weight)) {
2434 			if (unlikely(napi_disable_pending(n)))
2435 				__napi_complete(n);
2436 			else
2437 				list_move_tail(&n->poll_list, list);
2438 		}
2439 
2440 		netpoll_poll_unlock(have);
2441 	}
2442 out:
2443 	local_irq_enable();
2444 
2445 #ifdef CONFIG_NET_DMA
2446 	/*
2447 	 * There may not be any more sk_buffs coming right now, so push
2448 	 * any pending DMA copies to hardware
2449 	 */
2450 	if (!cpus_empty(net_dma.channel_mask)) {
2451 		int chan_idx;
2452 		for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
2453 			struct dma_chan *chan = net_dma.channels[chan_idx];
2454 			if (chan)
2455 				dma_async_memcpy_issue_pending(chan);
2456 		}
2457 	}
2458 #endif
2459 
2460 	return;
2461 
2462 softnet_break:
2463 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2464 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2465 	goto out;
2466 }
2467 
2468 static gifconf_func_t * gifconf_list [NPROTO];
2469 
2470 /**
2471  *	register_gifconf	-	register a SIOCGIF handler
2472  *	@family: Address family
2473  *	@gifconf: Function handler
2474  *
2475  *	Register protocol dependent address dumping routines. The handler
2476  *	that is passed must not be freed or reused until it has been replaced
2477  *	by another handler.
2478  */
2479 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2480 {
2481 	if (family >= NPROTO)
2482 		return -EINVAL;
2483 	gifconf_list[family] = gifconf;
2484 	return 0;
2485 }
2486 
2487 
2488 /*
2489  *	Map an interface index to its name (SIOCGIFNAME)
2490  */
2491 
2492 /*
2493  *	We need this ioctl for efficient implementation of the
2494  *	if_indextoname() function required by the IPv6 API.  Without
2495  *	it, we would have to search all the interfaces to find a
2496  *	match.  --pb
2497  */
2498 
2499 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2500 {
2501 	struct net_device *dev;
2502 	struct ifreq ifr;
2503 
2504 	/*
2505 	 *	Fetch the caller's info block.
2506 	 */
2507 
2508 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2509 		return -EFAULT;
2510 
2511 	read_lock(&dev_base_lock);
2512 	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2513 	if (!dev) {
2514 		read_unlock(&dev_base_lock);
2515 		return -ENODEV;
2516 	}
2517 
2518 	strcpy(ifr.ifr_name, dev->name);
2519 	read_unlock(&dev_base_lock);
2520 
2521 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2522 		return -EFAULT;
2523 	return 0;
2524 }
2525 
2526 /*
2527  *	Perform a SIOCGIFCONF call. This structure will change
2528  *	size eventually, and there is nothing I can do about it.
2529  *	Thus we will need a 'compatibility mode'.
2530  */
2531 
2532 static int dev_ifconf(struct net *net, char __user *arg)
2533 {
2534 	struct ifconf ifc;
2535 	struct net_device *dev;
2536 	char __user *pos;
2537 	int len;
2538 	int total;
2539 	int i;
2540 
2541 	/*
2542 	 *	Fetch the caller's info block.
2543 	 */
2544 
2545 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2546 		return -EFAULT;
2547 
2548 	pos = ifc.ifc_buf;
2549 	len = ifc.ifc_len;
2550 
2551 	/*
2552 	 *	Loop over the interfaces, and write an info block for each.
2553 	 */
2554 
2555 	total = 0;
2556 	for_each_netdev(net, dev) {
2557 		for (i = 0; i < NPROTO; i++) {
2558 			if (gifconf_list[i]) {
2559 				int done;
2560 				if (!pos)
2561 					done = gifconf_list[i](dev, NULL, 0);
2562 				else
2563 					done = gifconf_list[i](dev, pos + total,
2564 							       len - total);
2565 				if (done < 0)
2566 					return -EFAULT;
2567 				total += done;
2568 			}
2569 		}
2570 	}
2571 
2572 	/*
2573 	 *	All done.  Write the updated control block back to the caller.
2574 	 */
2575 	ifc.ifc_len = total;
2576 
2577 	/*
2578 	 * 	Both BSD and Solaris return 0 here, so we do too.
2579 	 */
2580 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2581 }
2582 
2583 #ifdef CONFIG_PROC_FS
2584 /*
2585  *	This is invoked by the /proc filesystem handler to display a device
2586  *	in detail.
2587  */
2588 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2589 	__acquires(dev_base_lock)
2590 {
2591 	struct net *net = seq_file_net(seq);
2592 	loff_t off;
2593 	struct net_device *dev;
2594 
2595 	read_lock(&dev_base_lock);
2596 	if (!*pos)
2597 		return SEQ_START_TOKEN;
2598 
2599 	off = 1;
2600 	for_each_netdev(net, dev)
2601 		if (off++ == *pos)
2602 			return dev;
2603 
2604 	return NULL;
2605 }
2606 
2607 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2608 {
2609 	struct net *net = seq_file_net(seq);
2610 	++*pos;
2611 	return v == SEQ_START_TOKEN ?
2612 		first_net_device(net) : next_net_device((struct net_device *)v);
2613 }
2614 
2615 void dev_seq_stop(struct seq_file *seq, void *v)
2616 	__releases(dev_base_lock)
2617 {
2618 	read_unlock(&dev_base_lock);
2619 }
2620 
2621 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2622 {
2623 	const struct net_device_stats *stats = dev_get_stats(dev);
2624 
2625 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2626 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2627 		   dev->name, stats->rx_bytes, stats->rx_packets,
2628 		   stats->rx_errors,
2629 		   stats->rx_dropped + stats->rx_missed_errors,
2630 		   stats->rx_fifo_errors,
2631 		   stats->rx_length_errors + stats->rx_over_errors +
2632 		    stats->rx_crc_errors + stats->rx_frame_errors,
2633 		   stats->rx_compressed, stats->multicast,
2634 		   stats->tx_bytes, stats->tx_packets,
2635 		   stats->tx_errors, stats->tx_dropped,
2636 		   stats->tx_fifo_errors, stats->collisions,
2637 		   stats->tx_carrier_errors +
2638 		    stats->tx_aborted_errors +
2639 		    stats->tx_window_errors +
2640 		    stats->tx_heartbeat_errors,
2641 		   stats->tx_compressed);
2642 }
2643 
2644 /*
2645  *	Called from the PROCfs module. This now uses the new arbitrary sized
2646  *	/proc/net interface to create /proc/net/dev
2647  */
2648 static int dev_seq_show(struct seq_file *seq, void *v)
2649 {
2650 	if (v == SEQ_START_TOKEN)
2651 		seq_puts(seq, "Inter-|   Receive                            "
2652 			      "                    |  Transmit\n"
2653 			      " face |bytes    packets errs drop fifo frame "
2654 			      "compressed multicast|bytes    packets errs "
2655 			      "drop fifo colls carrier compressed\n");
2656 	else
2657 		dev_seq_printf_stats(seq, v);
2658 	return 0;
2659 }
2660 
2661 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2662 {
2663 	struct netif_rx_stats *rc = NULL;
2664 
2665 	while (*pos < nr_cpu_ids)
2666 		if (cpu_online(*pos)) {
2667 			rc = &per_cpu(netdev_rx_stat, *pos);
2668 			break;
2669 		} else
2670 			++*pos;
2671 	return rc;
2672 }
2673 
2674 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2675 {
2676 	return softnet_get_online(pos);
2677 }
2678 
2679 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2680 {
2681 	++*pos;
2682 	return softnet_get_online(pos);
2683 }
2684 
2685 static void softnet_seq_stop(struct seq_file *seq, void *v)
2686 {
2687 }
2688 
2689 static int softnet_seq_show(struct seq_file *seq, void *v)
2690 {
2691 	struct netif_rx_stats *s = v;
2692 
2693 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2694 		   s->total, s->dropped, s->time_squeeze, 0,
2695 		   0, 0, 0, 0, /* was fastroute */
2696 		   s->cpu_collision );
2697 	return 0;
2698 }
2699 
2700 static const struct seq_operations dev_seq_ops = {
2701 	.start = dev_seq_start,
2702 	.next  = dev_seq_next,
2703 	.stop  = dev_seq_stop,
2704 	.show  = dev_seq_show,
2705 };
2706 
2707 static int dev_seq_open(struct inode *inode, struct file *file)
2708 {
2709 	return seq_open_net(inode, file, &dev_seq_ops,
2710 			    sizeof(struct seq_net_private));
2711 }
2712 
2713 static const struct file_operations dev_seq_fops = {
2714 	.owner	 = THIS_MODULE,
2715 	.open    = dev_seq_open,
2716 	.read    = seq_read,
2717 	.llseek  = seq_lseek,
2718 	.release = seq_release_net,
2719 };
2720 
2721 static const struct seq_operations softnet_seq_ops = {
2722 	.start = softnet_seq_start,
2723 	.next  = softnet_seq_next,
2724 	.stop  = softnet_seq_stop,
2725 	.show  = softnet_seq_show,
2726 };
2727 
2728 static int softnet_seq_open(struct inode *inode, struct file *file)
2729 {
2730 	return seq_open(file, &softnet_seq_ops);
2731 }
2732 
2733 static const struct file_operations softnet_seq_fops = {
2734 	.owner	 = THIS_MODULE,
2735 	.open    = softnet_seq_open,
2736 	.read    = seq_read,
2737 	.llseek  = seq_lseek,
2738 	.release = seq_release,
2739 };
2740 
2741 static void *ptype_get_idx(loff_t pos)
2742 {
2743 	struct packet_type *pt = NULL;
2744 	loff_t i = 0;
2745 	int t;
2746 
2747 	list_for_each_entry_rcu(pt, &ptype_all, list) {
2748 		if (i == pos)
2749 			return pt;
2750 		++i;
2751 	}
2752 
2753 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2754 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2755 			if (i == pos)
2756 				return pt;
2757 			++i;
2758 		}
2759 	}
2760 	return NULL;
2761 }
2762 
2763 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2764 	__acquires(RCU)
2765 {
2766 	rcu_read_lock();
2767 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2768 }
2769 
2770 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2771 {
2772 	struct packet_type *pt;
2773 	struct list_head *nxt;
2774 	int hash;
2775 
2776 	++*pos;
2777 	if (v == SEQ_START_TOKEN)
2778 		return ptype_get_idx(0);
2779 
2780 	pt = v;
2781 	nxt = pt->list.next;
2782 	if (pt->type == htons(ETH_P_ALL)) {
2783 		if (nxt != &ptype_all)
2784 			goto found;
2785 		hash = 0;
2786 		nxt = ptype_base[0].next;
2787 	} else
2788 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2789 
2790 	while (nxt == &ptype_base[hash]) {
2791 		if (++hash >= PTYPE_HASH_SIZE)
2792 			return NULL;
2793 		nxt = ptype_base[hash].next;
2794 	}
2795 found:
2796 	return list_entry(nxt, struct packet_type, list);
2797 }
2798 
2799 static void ptype_seq_stop(struct seq_file *seq, void *v)
2800 	__releases(RCU)
2801 {
2802 	rcu_read_unlock();
2803 }
2804 
2805 static int ptype_seq_show(struct seq_file *seq, void *v)
2806 {
2807 	struct packet_type *pt = v;
2808 
2809 	if (v == SEQ_START_TOKEN)
2810 		seq_puts(seq, "Type Device      Function\n");
2811 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2812 		if (pt->type == htons(ETH_P_ALL))
2813 			seq_puts(seq, "ALL ");
2814 		else
2815 			seq_printf(seq, "%04x", ntohs(pt->type));
2816 
2817 		seq_printf(seq, " %-8s %pF\n",
2818 			   pt->dev ? pt->dev->name : "", pt->func);
2819 	}
2820 
2821 	return 0;
2822 }
2823 
2824 static const struct seq_operations ptype_seq_ops = {
2825 	.start = ptype_seq_start,
2826 	.next  = ptype_seq_next,
2827 	.stop  = ptype_seq_stop,
2828 	.show  = ptype_seq_show,
2829 };
2830 
2831 static int ptype_seq_open(struct inode *inode, struct file *file)
2832 {
2833 	return seq_open_net(inode, file, &ptype_seq_ops,
2834 			sizeof(struct seq_net_private));
2835 }
2836 
2837 static const struct file_operations ptype_seq_fops = {
2838 	.owner	 = THIS_MODULE,
2839 	.open    = ptype_seq_open,
2840 	.read    = seq_read,
2841 	.llseek  = seq_lseek,
2842 	.release = seq_release_net,
2843 };
2844 
2845 
2846 static int __net_init dev_proc_net_init(struct net *net)
2847 {
2848 	int rc = -ENOMEM;
2849 
2850 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2851 		goto out;
2852 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2853 		goto out_dev;
2854 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2855 		goto out_softnet;
2856 
2857 	if (wext_proc_init(net))
2858 		goto out_ptype;
2859 	rc = 0;
2860 out:
2861 	return rc;
2862 out_ptype:
2863 	proc_net_remove(net, "ptype");
2864 out_softnet:
2865 	proc_net_remove(net, "softnet_stat");
2866 out_dev:
2867 	proc_net_remove(net, "dev");
2868 	goto out;
2869 }
2870 
2871 static void __net_exit dev_proc_net_exit(struct net *net)
2872 {
2873 	wext_proc_exit(net);
2874 
2875 	proc_net_remove(net, "ptype");
2876 	proc_net_remove(net, "softnet_stat");
2877 	proc_net_remove(net, "dev");
2878 }
2879 
2880 static struct pernet_operations __net_initdata dev_proc_ops = {
2881 	.init = dev_proc_net_init,
2882 	.exit = dev_proc_net_exit,
2883 };
2884 
2885 static int __init dev_proc_init(void)
2886 {
2887 	return register_pernet_subsys(&dev_proc_ops);
2888 }
2889 #else
2890 #define dev_proc_init() 0
2891 #endif	/* CONFIG_PROC_FS */
2892 
2893 
2894 /**
2895  *	netdev_set_master	-	set up master/slave pair
2896  *	@slave: slave device
2897  *	@master: new master device
2898  *
2899  *	Changes the master device of the slave. Pass %NULL to break the
2900  *	bonding. The caller must hold the RTNL semaphore. On a failure
2901  *	a negative errno code is returned. On success the reference counts
2902  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2903  *	function returns zero.
2904  */
2905 int netdev_set_master(struct net_device *slave, struct net_device *master)
2906 {
2907 	struct net_device *old = slave->master;
2908 
2909 	ASSERT_RTNL();
2910 
2911 	if (master) {
2912 		if (old)
2913 			return -EBUSY;
2914 		dev_hold(master);
2915 	}
2916 
2917 	slave->master = master;
2918 
2919 	synchronize_net();
2920 
2921 	if (old)
2922 		dev_put(old);
2923 
2924 	if (master)
2925 		slave->flags |= IFF_SLAVE;
2926 	else
2927 		slave->flags &= ~IFF_SLAVE;
2928 
2929 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2930 	return 0;
2931 }
2932 
2933 static void dev_change_rx_flags(struct net_device *dev, int flags)
2934 {
2935 	const struct net_device_ops *ops = dev->netdev_ops;
2936 
2937 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
2938 		ops->ndo_change_rx_flags(dev, flags);
2939 }
2940 
2941 static int __dev_set_promiscuity(struct net_device *dev, int inc)
2942 {
2943 	unsigned short old_flags = dev->flags;
2944 
2945 	ASSERT_RTNL();
2946 
2947 	dev->flags |= IFF_PROMISC;
2948 	dev->promiscuity += inc;
2949 	if (dev->promiscuity == 0) {
2950 		/*
2951 		 * Avoid overflow.
2952 		 * If inc causes overflow, untouch promisc and return error.
2953 		 */
2954 		if (inc < 0)
2955 			dev->flags &= ~IFF_PROMISC;
2956 		else {
2957 			dev->promiscuity -= inc;
2958 			printk(KERN_WARNING "%s: promiscuity touches roof, "
2959 				"set promiscuity failed, promiscuity feature "
2960 				"of device might be broken.\n", dev->name);
2961 			return -EOVERFLOW;
2962 		}
2963 	}
2964 	if (dev->flags != old_flags) {
2965 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2966 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2967 							       "left");
2968 		if (audit_enabled)
2969 			audit_log(current->audit_context, GFP_ATOMIC,
2970 				AUDIT_ANOM_PROMISCUOUS,
2971 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2972 				dev->name, (dev->flags & IFF_PROMISC),
2973 				(old_flags & IFF_PROMISC),
2974 				audit_get_loginuid(current),
2975 				current->uid, current->gid,
2976 				audit_get_sessionid(current));
2977 
2978 		dev_change_rx_flags(dev, IFF_PROMISC);
2979 	}
2980 	return 0;
2981 }
2982 
2983 /**
2984  *	dev_set_promiscuity	- update promiscuity count on a device
2985  *	@dev: device
2986  *	@inc: modifier
2987  *
2988  *	Add or remove promiscuity from a device. While the count in the device
2989  *	remains above zero the interface remains promiscuous. Once it hits zero
2990  *	the device reverts back to normal filtering operation. A negative inc
2991  *	value is used to drop promiscuity on the device.
2992  *	Return 0 if successful or a negative errno code on error.
2993  */
2994 int dev_set_promiscuity(struct net_device *dev, int inc)
2995 {
2996 	unsigned short old_flags = dev->flags;
2997 	int err;
2998 
2999 	err = __dev_set_promiscuity(dev, inc);
3000 	if (err < 0)
3001 		return err;
3002 	if (dev->flags != old_flags)
3003 		dev_set_rx_mode(dev);
3004 	return err;
3005 }
3006 
3007 /**
3008  *	dev_set_allmulti	- update allmulti count on a device
3009  *	@dev: device
3010  *	@inc: modifier
3011  *
3012  *	Add or remove reception of all multicast frames to a device. While the
3013  *	count in the device remains above zero the interface remains listening
3014  *	to all interfaces. Once it hits zero the device reverts back to normal
3015  *	filtering operation. A negative @inc value is used to drop the counter
3016  *	when releasing a resource needing all multicasts.
3017  *	Return 0 if successful or a negative errno code on error.
3018  */
3019 
3020 int dev_set_allmulti(struct net_device *dev, int inc)
3021 {
3022 	unsigned short old_flags = dev->flags;
3023 
3024 	ASSERT_RTNL();
3025 
3026 	dev->flags |= IFF_ALLMULTI;
3027 	dev->allmulti += inc;
3028 	if (dev->allmulti == 0) {
3029 		/*
3030 		 * Avoid overflow.
3031 		 * If inc causes overflow, untouch allmulti and return error.
3032 		 */
3033 		if (inc < 0)
3034 			dev->flags &= ~IFF_ALLMULTI;
3035 		else {
3036 			dev->allmulti -= inc;
3037 			printk(KERN_WARNING "%s: allmulti touches roof, "
3038 				"set allmulti failed, allmulti feature of "
3039 				"device might be broken.\n", dev->name);
3040 			return -EOVERFLOW;
3041 		}
3042 	}
3043 	if (dev->flags ^ old_flags) {
3044 		dev_change_rx_flags(dev, IFF_ALLMULTI);
3045 		dev_set_rx_mode(dev);
3046 	}
3047 	return 0;
3048 }
3049 
3050 /*
3051  *	Upload unicast and multicast address lists to device and
3052  *	configure RX filtering. When the device doesn't support unicast
3053  *	filtering it is put in promiscuous mode while unicast addresses
3054  *	are present.
3055  */
3056 void __dev_set_rx_mode(struct net_device *dev)
3057 {
3058 	const struct net_device_ops *ops = dev->netdev_ops;
3059 
3060 	/* dev_open will call this function so the list will stay sane. */
3061 	if (!(dev->flags&IFF_UP))
3062 		return;
3063 
3064 	if (!netif_device_present(dev))
3065 		return;
3066 
3067 	if (ops->ndo_set_rx_mode)
3068 		ops->ndo_set_rx_mode(dev);
3069 	else {
3070 		/* Unicast addresses changes may only happen under the rtnl,
3071 		 * therefore calling __dev_set_promiscuity here is safe.
3072 		 */
3073 		if (dev->uc_count > 0 && !dev->uc_promisc) {
3074 			__dev_set_promiscuity(dev, 1);
3075 			dev->uc_promisc = 1;
3076 		} else if (dev->uc_count == 0 && dev->uc_promisc) {
3077 			__dev_set_promiscuity(dev, -1);
3078 			dev->uc_promisc = 0;
3079 		}
3080 
3081 		if (ops->ndo_set_multicast_list)
3082 			ops->ndo_set_multicast_list(dev);
3083 	}
3084 }
3085 
3086 void dev_set_rx_mode(struct net_device *dev)
3087 {
3088 	netif_addr_lock_bh(dev);
3089 	__dev_set_rx_mode(dev);
3090 	netif_addr_unlock_bh(dev);
3091 }
3092 
3093 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3094 		      void *addr, int alen, int glbl)
3095 {
3096 	struct dev_addr_list *da;
3097 
3098 	for (; (da = *list) != NULL; list = &da->next) {
3099 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3100 		    alen == da->da_addrlen) {
3101 			if (glbl) {
3102 				int old_glbl = da->da_gusers;
3103 				da->da_gusers = 0;
3104 				if (old_glbl == 0)
3105 					break;
3106 			}
3107 			if (--da->da_users)
3108 				return 0;
3109 
3110 			*list = da->next;
3111 			kfree(da);
3112 			(*count)--;
3113 			return 0;
3114 		}
3115 	}
3116 	return -ENOENT;
3117 }
3118 
3119 int __dev_addr_add(struct dev_addr_list **list, int *count,
3120 		   void *addr, int alen, int glbl)
3121 {
3122 	struct dev_addr_list *da;
3123 
3124 	for (da = *list; da != NULL; da = da->next) {
3125 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3126 		    da->da_addrlen == alen) {
3127 			if (glbl) {
3128 				int old_glbl = da->da_gusers;
3129 				da->da_gusers = 1;
3130 				if (old_glbl)
3131 					return 0;
3132 			}
3133 			da->da_users++;
3134 			return 0;
3135 		}
3136 	}
3137 
3138 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
3139 	if (da == NULL)
3140 		return -ENOMEM;
3141 	memcpy(da->da_addr, addr, alen);
3142 	da->da_addrlen = alen;
3143 	da->da_users = 1;
3144 	da->da_gusers = glbl ? 1 : 0;
3145 	da->next = *list;
3146 	*list = da;
3147 	(*count)++;
3148 	return 0;
3149 }
3150 
3151 /**
3152  *	dev_unicast_delete	- Release secondary unicast address.
3153  *	@dev: device
3154  *	@addr: address to delete
3155  *	@alen: length of @addr
3156  *
3157  *	Release reference to a secondary unicast address and remove it
3158  *	from the device if the reference count drops to zero.
3159  *
3160  * 	The caller must hold the rtnl_mutex.
3161  */
3162 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3163 {
3164 	int err;
3165 
3166 	ASSERT_RTNL();
3167 
3168 	netif_addr_lock_bh(dev);
3169 	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3170 	if (!err)
3171 		__dev_set_rx_mode(dev);
3172 	netif_addr_unlock_bh(dev);
3173 	return err;
3174 }
3175 EXPORT_SYMBOL(dev_unicast_delete);
3176 
3177 /**
3178  *	dev_unicast_add		- add a secondary unicast address
3179  *	@dev: device
3180  *	@addr: address to add
3181  *	@alen: length of @addr
3182  *
3183  *	Add a secondary unicast address to the device or increase
3184  *	the reference count if it already exists.
3185  *
3186  *	The caller must hold the rtnl_mutex.
3187  */
3188 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3189 {
3190 	int err;
3191 
3192 	ASSERT_RTNL();
3193 
3194 	netif_addr_lock_bh(dev);
3195 	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3196 	if (!err)
3197 		__dev_set_rx_mode(dev);
3198 	netif_addr_unlock_bh(dev);
3199 	return err;
3200 }
3201 EXPORT_SYMBOL(dev_unicast_add);
3202 
3203 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3204 		    struct dev_addr_list **from, int *from_count)
3205 {
3206 	struct dev_addr_list *da, *next;
3207 	int err = 0;
3208 
3209 	da = *from;
3210 	while (da != NULL) {
3211 		next = da->next;
3212 		if (!da->da_synced) {
3213 			err = __dev_addr_add(to, to_count,
3214 					     da->da_addr, da->da_addrlen, 0);
3215 			if (err < 0)
3216 				break;
3217 			da->da_synced = 1;
3218 			da->da_users++;
3219 		} else if (da->da_users == 1) {
3220 			__dev_addr_delete(to, to_count,
3221 					  da->da_addr, da->da_addrlen, 0);
3222 			__dev_addr_delete(from, from_count,
3223 					  da->da_addr, da->da_addrlen, 0);
3224 		}
3225 		da = next;
3226 	}
3227 	return err;
3228 }
3229 
3230 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3231 		       struct dev_addr_list **from, int *from_count)
3232 {
3233 	struct dev_addr_list *da, *next;
3234 
3235 	da = *from;
3236 	while (da != NULL) {
3237 		next = da->next;
3238 		if (da->da_synced) {
3239 			__dev_addr_delete(to, to_count,
3240 					  da->da_addr, da->da_addrlen, 0);
3241 			da->da_synced = 0;
3242 			__dev_addr_delete(from, from_count,
3243 					  da->da_addr, da->da_addrlen, 0);
3244 		}
3245 		da = next;
3246 	}
3247 }
3248 
3249 /**
3250  *	dev_unicast_sync - Synchronize device's unicast list to another device
3251  *	@to: destination device
3252  *	@from: source device
3253  *
3254  *	Add newly added addresses to the destination device and release
3255  *	addresses that have no users left. The source device must be
3256  *	locked by netif_tx_lock_bh.
3257  *
3258  *	This function is intended to be called from the dev->set_rx_mode
3259  *	function of layered software devices.
3260  */
3261 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3262 {
3263 	int err = 0;
3264 
3265 	netif_addr_lock_bh(to);
3266 	err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3267 			      &from->uc_list, &from->uc_count);
3268 	if (!err)
3269 		__dev_set_rx_mode(to);
3270 	netif_addr_unlock_bh(to);
3271 	return err;
3272 }
3273 EXPORT_SYMBOL(dev_unicast_sync);
3274 
3275 /**
3276  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
3277  *	@to: destination device
3278  *	@from: source device
3279  *
3280  *	Remove all addresses that were added to the destination device by
3281  *	dev_unicast_sync(). This function is intended to be called from the
3282  *	dev->stop function of layered software devices.
3283  */
3284 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3285 {
3286 	netif_addr_lock_bh(from);
3287 	netif_addr_lock(to);
3288 
3289 	__dev_addr_unsync(&to->uc_list, &to->uc_count,
3290 			  &from->uc_list, &from->uc_count);
3291 	__dev_set_rx_mode(to);
3292 
3293 	netif_addr_unlock(to);
3294 	netif_addr_unlock_bh(from);
3295 }
3296 EXPORT_SYMBOL(dev_unicast_unsync);
3297 
3298 static void __dev_addr_discard(struct dev_addr_list **list)
3299 {
3300 	struct dev_addr_list *tmp;
3301 
3302 	while (*list != NULL) {
3303 		tmp = *list;
3304 		*list = tmp->next;
3305 		if (tmp->da_users > tmp->da_gusers)
3306 			printk("__dev_addr_discard: address leakage! "
3307 			       "da_users=%d\n", tmp->da_users);
3308 		kfree(tmp);
3309 	}
3310 }
3311 
3312 static void dev_addr_discard(struct net_device *dev)
3313 {
3314 	netif_addr_lock_bh(dev);
3315 
3316 	__dev_addr_discard(&dev->uc_list);
3317 	dev->uc_count = 0;
3318 
3319 	__dev_addr_discard(&dev->mc_list);
3320 	dev->mc_count = 0;
3321 
3322 	netif_addr_unlock_bh(dev);
3323 }
3324 
3325 /**
3326  *	dev_get_flags - get flags reported to userspace
3327  *	@dev: device
3328  *
3329  *	Get the combination of flag bits exported through APIs to userspace.
3330  */
3331 unsigned dev_get_flags(const struct net_device *dev)
3332 {
3333 	unsigned flags;
3334 
3335 	flags = (dev->flags & ~(IFF_PROMISC |
3336 				IFF_ALLMULTI |
3337 				IFF_RUNNING |
3338 				IFF_LOWER_UP |
3339 				IFF_DORMANT)) |
3340 		(dev->gflags & (IFF_PROMISC |
3341 				IFF_ALLMULTI));
3342 
3343 	if (netif_running(dev)) {
3344 		if (netif_oper_up(dev))
3345 			flags |= IFF_RUNNING;
3346 		if (netif_carrier_ok(dev))
3347 			flags |= IFF_LOWER_UP;
3348 		if (netif_dormant(dev))
3349 			flags |= IFF_DORMANT;
3350 	}
3351 
3352 	return flags;
3353 }
3354 
3355 /**
3356  *	dev_change_flags - change device settings
3357  *	@dev: device
3358  *	@flags: device state flags
3359  *
3360  *	Change settings on device based state flags. The flags are
3361  *	in the userspace exported format.
3362  */
3363 int dev_change_flags(struct net_device *dev, unsigned flags)
3364 {
3365 	int ret, changes;
3366 	int old_flags = dev->flags;
3367 
3368 	ASSERT_RTNL();
3369 
3370 	/*
3371 	 *	Set the flags on our device.
3372 	 */
3373 
3374 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3375 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3376 			       IFF_AUTOMEDIA)) |
3377 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3378 				    IFF_ALLMULTI));
3379 
3380 	/*
3381 	 *	Load in the correct multicast list now the flags have changed.
3382 	 */
3383 
3384 	if ((old_flags ^ flags) & IFF_MULTICAST)
3385 		dev_change_rx_flags(dev, IFF_MULTICAST);
3386 
3387 	dev_set_rx_mode(dev);
3388 
3389 	/*
3390 	 *	Have we downed the interface. We handle IFF_UP ourselves
3391 	 *	according to user attempts to set it, rather than blindly
3392 	 *	setting it.
3393 	 */
3394 
3395 	ret = 0;
3396 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
3397 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3398 
3399 		if (!ret)
3400 			dev_set_rx_mode(dev);
3401 	}
3402 
3403 	if (dev->flags & IFF_UP &&
3404 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3405 					  IFF_VOLATILE)))
3406 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
3407 
3408 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
3409 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
3410 		dev->gflags ^= IFF_PROMISC;
3411 		dev_set_promiscuity(dev, inc);
3412 	}
3413 
3414 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3415 	   is important. Some (broken) drivers set IFF_PROMISC, when
3416 	   IFF_ALLMULTI is requested not asking us and not reporting.
3417 	 */
3418 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3419 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3420 		dev->gflags ^= IFF_ALLMULTI;
3421 		dev_set_allmulti(dev, inc);
3422 	}
3423 
3424 	/* Exclude state transition flags, already notified */
3425 	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3426 	if (changes)
3427 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3428 
3429 	return ret;
3430 }
3431 
3432 /**
3433  *	dev_set_mtu - Change maximum transfer unit
3434  *	@dev: device
3435  *	@new_mtu: new transfer unit
3436  *
3437  *	Change the maximum transfer size of the network device.
3438  */
3439 int dev_set_mtu(struct net_device *dev, int new_mtu)
3440 {
3441 	const struct net_device_ops *ops = dev->netdev_ops;
3442 	int err;
3443 
3444 	if (new_mtu == dev->mtu)
3445 		return 0;
3446 
3447 	/*	MTU must be positive.	 */
3448 	if (new_mtu < 0)
3449 		return -EINVAL;
3450 
3451 	if (!netif_device_present(dev))
3452 		return -ENODEV;
3453 
3454 	err = 0;
3455 	if (ops->ndo_change_mtu)
3456 		err = ops->ndo_change_mtu(dev, new_mtu);
3457 	else
3458 		dev->mtu = new_mtu;
3459 
3460 	if (!err && dev->flags & IFF_UP)
3461 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3462 	return err;
3463 }
3464 
3465 /**
3466  *	dev_set_mac_address - Change Media Access Control Address
3467  *	@dev: device
3468  *	@sa: new address
3469  *
3470  *	Change the hardware (MAC) address of the device
3471  */
3472 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3473 {
3474 	const struct net_device_ops *ops = dev->netdev_ops;
3475 	int err;
3476 
3477 	if (!ops->ndo_set_mac_address)
3478 		return -EOPNOTSUPP;
3479 	if (sa->sa_family != dev->type)
3480 		return -EINVAL;
3481 	if (!netif_device_present(dev))
3482 		return -ENODEV;
3483 	err = ops->ndo_set_mac_address(dev, sa);
3484 	if (!err)
3485 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3486 	return err;
3487 }
3488 
3489 /*
3490  *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3491  */
3492 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3493 {
3494 	int err;
3495 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3496 
3497 	if (!dev)
3498 		return -ENODEV;
3499 
3500 	switch (cmd) {
3501 		case SIOCGIFFLAGS:	/* Get interface flags */
3502 			ifr->ifr_flags = dev_get_flags(dev);
3503 			return 0;
3504 
3505 		case SIOCGIFMETRIC:	/* Get the metric on the interface
3506 					   (currently unused) */
3507 			ifr->ifr_metric = 0;
3508 			return 0;
3509 
3510 		case SIOCGIFMTU:	/* Get the MTU of a device */
3511 			ifr->ifr_mtu = dev->mtu;
3512 			return 0;
3513 
3514 		case SIOCGIFHWADDR:
3515 			if (!dev->addr_len)
3516 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3517 			else
3518 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3519 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3520 			ifr->ifr_hwaddr.sa_family = dev->type;
3521 			return 0;
3522 
3523 		case SIOCGIFSLAVE:
3524 			err = -EINVAL;
3525 			break;
3526 
3527 		case SIOCGIFMAP:
3528 			ifr->ifr_map.mem_start = dev->mem_start;
3529 			ifr->ifr_map.mem_end   = dev->mem_end;
3530 			ifr->ifr_map.base_addr = dev->base_addr;
3531 			ifr->ifr_map.irq       = dev->irq;
3532 			ifr->ifr_map.dma       = dev->dma;
3533 			ifr->ifr_map.port      = dev->if_port;
3534 			return 0;
3535 
3536 		case SIOCGIFINDEX:
3537 			ifr->ifr_ifindex = dev->ifindex;
3538 			return 0;
3539 
3540 		case SIOCGIFTXQLEN:
3541 			ifr->ifr_qlen = dev->tx_queue_len;
3542 			return 0;
3543 
3544 		default:
3545 			/* dev_ioctl() should ensure this case
3546 			 * is never reached
3547 			 */
3548 			WARN_ON(1);
3549 			err = -EINVAL;
3550 			break;
3551 
3552 	}
3553 	return err;
3554 }
3555 
3556 /*
3557  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
3558  */
3559 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3560 {
3561 	int err;
3562 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3563 	const struct net_device_ops *ops = dev->netdev_ops;
3564 
3565 	if (!dev)
3566 		return -ENODEV;
3567 
3568 	switch (cmd) {
3569 		case SIOCSIFFLAGS:	/* Set interface flags */
3570 			return dev_change_flags(dev, ifr->ifr_flags);
3571 
3572 		case SIOCSIFMETRIC:	/* Set the metric on the interface
3573 					   (currently unused) */
3574 			return -EOPNOTSUPP;
3575 
3576 		case SIOCSIFMTU:	/* Set the MTU of a device */
3577 			return dev_set_mtu(dev, ifr->ifr_mtu);
3578 
3579 		case SIOCSIFHWADDR:
3580 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3581 
3582 		case SIOCSIFHWBROADCAST:
3583 			if (ifr->ifr_hwaddr.sa_family != dev->type)
3584 				return -EINVAL;
3585 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3586 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3587 			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3588 			return 0;
3589 
3590 		case SIOCSIFMAP:
3591 			if (ops->ndo_set_config) {
3592 				if (!netif_device_present(dev))
3593 					return -ENODEV;
3594 				return ops->ndo_set_config(dev, &ifr->ifr_map);
3595 			}
3596 			return -EOPNOTSUPP;
3597 
3598 		case SIOCADDMULTI:
3599 			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3600 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3601 				return -EINVAL;
3602 			if (!netif_device_present(dev))
3603 				return -ENODEV;
3604 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3605 					  dev->addr_len, 1);
3606 
3607 		case SIOCDELMULTI:
3608 			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3609 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3610 				return -EINVAL;
3611 			if (!netif_device_present(dev))
3612 				return -ENODEV;
3613 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3614 					     dev->addr_len, 1);
3615 
3616 		case SIOCSIFTXQLEN:
3617 			if (ifr->ifr_qlen < 0)
3618 				return -EINVAL;
3619 			dev->tx_queue_len = ifr->ifr_qlen;
3620 			return 0;
3621 
3622 		case SIOCSIFNAME:
3623 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3624 			return dev_change_name(dev, ifr->ifr_newname);
3625 
3626 		/*
3627 		 *	Unknown or private ioctl
3628 		 */
3629 
3630 		default:
3631 			if ((cmd >= SIOCDEVPRIVATE &&
3632 			    cmd <= SIOCDEVPRIVATE + 15) ||
3633 			    cmd == SIOCBONDENSLAVE ||
3634 			    cmd == SIOCBONDRELEASE ||
3635 			    cmd == SIOCBONDSETHWADDR ||
3636 			    cmd == SIOCBONDSLAVEINFOQUERY ||
3637 			    cmd == SIOCBONDINFOQUERY ||
3638 			    cmd == SIOCBONDCHANGEACTIVE ||
3639 			    cmd == SIOCGMIIPHY ||
3640 			    cmd == SIOCGMIIREG ||
3641 			    cmd == SIOCSMIIREG ||
3642 			    cmd == SIOCBRADDIF ||
3643 			    cmd == SIOCBRDELIF ||
3644 			    cmd == SIOCWANDEV) {
3645 				err = -EOPNOTSUPP;
3646 				if (ops->ndo_do_ioctl) {
3647 					if (netif_device_present(dev))
3648 						err = ops->ndo_do_ioctl(dev, ifr, cmd);
3649 					else
3650 						err = -ENODEV;
3651 				}
3652 			} else
3653 				err = -EINVAL;
3654 
3655 	}
3656 	return err;
3657 }
3658 
3659 /*
3660  *	This function handles all "interface"-type I/O control requests. The actual
3661  *	'doing' part of this is dev_ifsioc above.
3662  */
3663 
3664 /**
3665  *	dev_ioctl	-	network device ioctl
3666  *	@net: the applicable net namespace
3667  *	@cmd: command to issue
3668  *	@arg: pointer to a struct ifreq in user space
3669  *
3670  *	Issue ioctl functions to devices. This is normally called by the
3671  *	user space syscall interfaces but can sometimes be useful for
3672  *	other purposes. The return value is the return from the syscall if
3673  *	positive or a negative errno code on error.
3674  */
3675 
3676 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3677 {
3678 	struct ifreq ifr;
3679 	int ret;
3680 	char *colon;
3681 
3682 	/* One special case: SIOCGIFCONF takes ifconf argument
3683 	   and requires shared lock, because it sleeps writing
3684 	   to user space.
3685 	 */
3686 
3687 	if (cmd == SIOCGIFCONF) {
3688 		rtnl_lock();
3689 		ret = dev_ifconf(net, (char __user *) arg);
3690 		rtnl_unlock();
3691 		return ret;
3692 	}
3693 	if (cmd == SIOCGIFNAME)
3694 		return dev_ifname(net, (struct ifreq __user *)arg);
3695 
3696 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3697 		return -EFAULT;
3698 
3699 	ifr.ifr_name[IFNAMSIZ-1] = 0;
3700 
3701 	colon = strchr(ifr.ifr_name, ':');
3702 	if (colon)
3703 		*colon = 0;
3704 
3705 	/*
3706 	 *	See which interface the caller is talking about.
3707 	 */
3708 
3709 	switch (cmd) {
3710 		/*
3711 		 *	These ioctl calls:
3712 		 *	- can be done by all.
3713 		 *	- atomic and do not require locking.
3714 		 *	- return a value
3715 		 */
3716 		case SIOCGIFFLAGS:
3717 		case SIOCGIFMETRIC:
3718 		case SIOCGIFMTU:
3719 		case SIOCGIFHWADDR:
3720 		case SIOCGIFSLAVE:
3721 		case SIOCGIFMAP:
3722 		case SIOCGIFINDEX:
3723 		case SIOCGIFTXQLEN:
3724 			dev_load(net, ifr.ifr_name);
3725 			read_lock(&dev_base_lock);
3726 			ret = dev_ifsioc_locked(net, &ifr, cmd);
3727 			read_unlock(&dev_base_lock);
3728 			if (!ret) {
3729 				if (colon)
3730 					*colon = ':';
3731 				if (copy_to_user(arg, &ifr,
3732 						 sizeof(struct ifreq)))
3733 					ret = -EFAULT;
3734 			}
3735 			return ret;
3736 
3737 		case SIOCETHTOOL:
3738 			dev_load(net, ifr.ifr_name);
3739 			rtnl_lock();
3740 			ret = dev_ethtool(net, &ifr);
3741 			rtnl_unlock();
3742 			if (!ret) {
3743 				if (colon)
3744 					*colon = ':';
3745 				if (copy_to_user(arg, &ifr,
3746 						 sizeof(struct ifreq)))
3747 					ret = -EFAULT;
3748 			}
3749 			return ret;
3750 
3751 		/*
3752 		 *	These ioctl calls:
3753 		 *	- require superuser power.
3754 		 *	- require strict serialization.
3755 		 *	- return a value
3756 		 */
3757 		case SIOCGMIIPHY:
3758 		case SIOCGMIIREG:
3759 		case SIOCSIFNAME:
3760 			if (!capable(CAP_NET_ADMIN))
3761 				return -EPERM;
3762 			dev_load(net, ifr.ifr_name);
3763 			rtnl_lock();
3764 			ret = dev_ifsioc(net, &ifr, cmd);
3765 			rtnl_unlock();
3766 			if (!ret) {
3767 				if (colon)
3768 					*colon = ':';
3769 				if (copy_to_user(arg, &ifr,
3770 						 sizeof(struct ifreq)))
3771 					ret = -EFAULT;
3772 			}
3773 			return ret;
3774 
3775 		/*
3776 		 *	These ioctl calls:
3777 		 *	- require superuser power.
3778 		 *	- require strict serialization.
3779 		 *	- do not return a value
3780 		 */
3781 		case SIOCSIFFLAGS:
3782 		case SIOCSIFMETRIC:
3783 		case SIOCSIFMTU:
3784 		case SIOCSIFMAP:
3785 		case SIOCSIFHWADDR:
3786 		case SIOCSIFSLAVE:
3787 		case SIOCADDMULTI:
3788 		case SIOCDELMULTI:
3789 		case SIOCSIFHWBROADCAST:
3790 		case SIOCSIFTXQLEN:
3791 		case SIOCSMIIREG:
3792 		case SIOCBONDENSLAVE:
3793 		case SIOCBONDRELEASE:
3794 		case SIOCBONDSETHWADDR:
3795 		case SIOCBONDCHANGEACTIVE:
3796 		case SIOCBRADDIF:
3797 		case SIOCBRDELIF:
3798 			if (!capable(CAP_NET_ADMIN))
3799 				return -EPERM;
3800 			/* fall through */
3801 		case SIOCBONDSLAVEINFOQUERY:
3802 		case SIOCBONDINFOQUERY:
3803 			dev_load(net, ifr.ifr_name);
3804 			rtnl_lock();
3805 			ret = dev_ifsioc(net, &ifr, cmd);
3806 			rtnl_unlock();
3807 			return ret;
3808 
3809 		case SIOCGIFMEM:
3810 			/* Get the per device memory space. We can add this but
3811 			 * currently do not support it */
3812 		case SIOCSIFMEM:
3813 			/* Set the per device memory buffer space.
3814 			 * Not applicable in our case */
3815 		case SIOCSIFLINK:
3816 			return -EINVAL;
3817 
3818 		/*
3819 		 *	Unknown or private ioctl.
3820 		 */
3821 		default:
3822 			if (cmd == SIOCWANDEV ||
3823 			    (cmd >= SIOCDEVPRIVATE &&
3824 			     cmd <= SIOCDEVPRIVATE + 15)) {
3825 				dev_load(net, ifr.ifr_name);
3826 				rtnl_lock();
3827 				ret = dev_ifsioc(net, &ifr, cmd);
3828 				rtnl_unlock();
3829 				if (!ret && copy_to_user(arg, &ifr,
3830 							 sizeof(struct ifreq)))
3831 					ret = -EFAULT;
3832 				return ret;
3833 			}
3834 			/* Take care of Wireless Extensions */
3835 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3836 				return wext_handle_ioctl(net, &ifr, cmd, arg);
3837 			return -EINVAL;
3838 	}
3839 }
3840 
3841 
3842 /**
3843  *	dev_new_index	-	allocate an ifindex
3844  *	@net: the applicable net namespace
3845  *
3846  *	Returns a suitable unique value for a new device interface
3847  *	number.  The caller must hold the rtnl semaphore or the
3848  *	dev_base_lock to be sure it remains unique.
3849  */
3850 static int dev_new_index(struct net *net)
3851 {
3852 	static int ifindex;
3853 	for (;;) {
3854 		if (++ifindex <= 0)
3855 			ifindex = 1;
3856 		if (!__dev_get_by_index(net, ifindex))
3857 			return ifindex;
3858 	}
3859 }
3860 
3861 /* Delayed registration/unregisteration */
3862 static LIST_HEAD(net_todo_list);
3863 
3864 static void net_set_todo(struct net_device *dev)
3865 {
3866 	list_add_tail(&dev->todo_list, &net_todo_list);
3867 }
3868 
3869 static void rollback_registered(struct net_device *dev)
3870 {
3871 	BUG_ON(dev_boot_phase);
3872 	ASSERT_RTNL();
3873 
3874 	/* Some devices call without registering for initialization unwind. */
3875 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3876 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3877 				  "was registered\n", dev->name, dev);
3878 
3879 		WARN_ON(1);
3880 		return;
3881 	}
3882 
3883 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3884 
3885 	/* If device is running, close it first. */
3886 	dev_close(dev);
3887 
3888 	/* And unlink it from device chain. */
3889 	unlist_netdevice(dev);
3890 
3891 	dev->reg_state = NETREG_UNREGISTERING;
3892 
3893 	synchronize_net();
3894 
3895 	/* Shutdown queueing discipline. */
3896 	dev_shutdown(dev);
3897 
3898 
3899 	/* Notify protocols, that we are about to destroy
3900 	   this device. They should clean all the things.
3901 	*/
3902 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3903 
3904 	/*
3905 	 *	Flush the unicast and multicast chains
3906 	 */
3907 	dev_addr_discard(dev);
3908 
3909 	if (dev->netdev_ops->ndo_uninit)
3910 		dev->netdev_ops->ndo_uninit(dev);
3911 
3912 	/* Notifier chain MUST detach us from master device. */
3913 	WARN_ON(dev->master);
3914 
3915 	/* Remove entries from kobject tree */
3916 	netdev_unregister_kobject(dev);
3917 
3918 	synchronize_net();
3919 
3920 	dev_put(dev);
3921 }
3922 
3923 static void __netdev_init_queue_locks_one(struct net_device *dev,
3924 					  struct netdev_queue *dev_queue,
3925 					  void *_unused)
3926 {
3927 	spin_lock_init(&dev_queue->_xmit_lock);
3928 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
3929 	dev_queue->xmit_lock_owner = -1;
3930 }
3931 
3932 static void netdev_init_queue_locks(struct net_device *dev)
3933 {
3934 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3935 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
3936 }
3937 
3938 unsigned long netdev_fix_features(unsigned long features, const char *name)
3939 {
3940 	/* Fix illegal SG+CSUM combinations. */
3941 	if ((features & NETIF_F_SG) &&
3942 	    !(features & NETIF_F_ALL_CSUM)) {
3943 		if (name)
3944 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
3945 			       "checksum feature.\n", name);
3946 		features &= ~NETIF_F_SG;
3947 	}
3948 
3949 	/* TSO requires that SG is present as well. */
3950 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
3951 		if (name)
3952 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
3953 			       "SG feature.\n", name);
3954 		features &= ~NETIF_F_TSO;
3955 	}
3956 
3957 	if (features & NETIF_F_UFO) {
3958 		if (!(features & NETIF_F_GEN_CSUM)) {
3959 			if (name)
3960 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3961 				       "since no NETIF_F_HW_CSUM feature.\n",
3962 				       name);
3963 			features &= ~NETIF_F_UFO;
3964 		}
3965 
3966 		if (!(features & NETIF_F_SG)) {
3967 			if (name)
3968 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3969 				       "since no NETIF_F_SG feature.\n", name);
3970 			features &= ~NETIF_F_UFO;
3971 		}
3972 	}
3973 
3974 	return features;
3975 }
3976 EXPORT_SYMBOL(netdev_fix_features);
3977 
3978 /**
3979  *	register_netdevice	- register a network device
3980  *	@dev: device to register
3981  *
3982  *	Take a completed network device structure and add it to the kernel
3983  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3984  *	chain. 0 is returned on success. A negative errno code is returned
3985  *	on a failure to set up the device, or if the name is a duplicate.
3986  *
3987  *	Callers must hold the rtnl semaphore. You may want
3988  *	register_netdev() instead of this.
3989  *
3990  *	BUGS:
3991  *	The locking appears insufficient to guarantee two parallel registers
3992  *	will not get the same name.
3993  */
3994 
3995 int register_netdevice(struct net_device *dev)
3996 {
3997 	struct hlist_head *head;
3998 	struct hlist_node *p;
3999 	int ret;
4000 	struct net *net = dev_net(dev);
4001 
4002 	BUG_ON(dev_boot_phase);
4003 	ASSERT_RTNL();
4004 
4005 	might_sleep();
4006 
4007 	/* When net_device's are persistent, this will be fatal. */
4008 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4009 	BUG_ON(!net);
4010 
4011 	spin_lock_init(&dev->addr_list_lock);
4012 	netdev_set_addr_lockdep_class(dev);
4013 	netdev_init_queue_locks(dev);
4014 
4015 	dev->iflink = -1;
4016 
4017 #ifdef CONFIG_COMPAT_NET_DEV_OPS
4018 	/* Netdevice_ops API compatiability support.
4019 	 * This is temporary until all network devices are converted.
4020 	 */
4021 	if (dev->netdev_ops) {
4022 		const struct net_device_ops *ops = dev->netdev_ops;
4023 
4024 		dev->init = ops->ndo_init;
4025 		dev->uninit = ops->ndo_uninit;
4026 		dev->open = ops->ndo_open;
4027 		dev->change_rx_flags = ops->ndo_change_rx_flags;
4028 		dev->set_rx_mode = ops->ndo_set_rx_mode;
4029 		dev->set_multicast_list = ops->ndo_set_multicast_list;
4030 		dev->set_mac_address = ops->ndo_set_mac_address;
4031 		dev->validate_addr = ops->ndo_validate_addr;
4032 		dev->do_ioctl = ops->ndo_do_ioctl;
4033 		dev->set_config = ops->ndo_set_config;
4034 		dev->change_mtu = ops->ndo_change_mtu;
4035 		dev->tx_timeout = ops->ndo_tx_timeout;
4036 		dev->get_stats = ops->ndo_get_stats;
4037 		dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4038 		dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4039 		dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4040 #ifdef CONFIG_NET_POLL_CONTROLLER
4041 		dev->poll_controller = ops->ndo_poll_controller;
4042 #endif
4043 	} else {
4044 		char drivername[64];
4045 		pr_info("%s (%s): not using net_device_ops yet\n",
4046 			dev->name, netdev_drivername(dev, drivername, 64));
4047 
4048 		/* This works only because net_device_ops and the
4049 		   compatiablity structure are the same. */
4050 		dev->netdev_ops = (void *) &(dev->init);
4051 	}
4052 #endif
4053 
4054 	/* Init, if this function is available */
4055 	if (dev->netdev_ops->ndo_init) {
4056 		ret = dev->netdev_ops->ndo_init(dev);
4057 		if (ret) {
4058 			if (ret > 0)
4059 				ret = -EIO;
4060 			goto out;
4061 		}
4062 	}
4063 
4064 	if (!dev_valid_name(dev->name)) {
4065 		ret = -EINVAL;
4066 		goto err_uninit;
4067 	}
4068 
4069 	dev->ifindex = dev_new_index(net);
4070 	if (dev->iflink == -1)
4071 		dev->iflink = dev->ifindex;
4072 
4073 	/* Check for existence of name */
4074 	head = dev_name_hash(net, dev->name);
4075 	hlist_for_each(p, head) {
4076 		struct net_device *d
4077 			= hlist_entry(p, struct net_device, name_hlist);
4078 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4079 			ret = -EEXIST;
4080 			goto err_uninit;
4081 		}
4082 	}
4083 
4084 	/* Fix illegal checksum combinations */
4085 	if ((dev->features & NETIF_F_HW_CSUM) &&
4086 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4087 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4088 		       dev->name);
4089 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4090 	}
4091 
4092 	if ((dev->features & NETIF_F_NO_CSUM) &&
4093 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4094 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4095 		       dev->name);
4096 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4097 	}
4098 
4099 	dev->features = netdev_fix_features(dev->features, dev->name);
4100 
4101 	/* Enable software GSO if SG is supported. */
4102 	if (dev->features & NETIF_F_SG)
4103 		dev->features |= NETIF_F_GSO;
4104 
4105 	netdev_initialize_kobject(dev);
4106 	ret = netdev_register_kobject(dev);
4107 	if (ret)
4108 		goto err_uninit;
4109 	dev->reg_state = NETREG_REGISTERED;
4110 
4111 	/*
4112 	 *	Default initial state at registry is that the
4113 	 *	device is present.
4114 	 */
4115 
4116 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4117 
4118 	dev_init_scheduler(dev);
4119 	dev_hold(dev);
4120 	list_netdevice(dev);
4121 
4122 	/* Notify protocols, that a new device appeared. */
4123 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4124 	ret = notifier_to_errno(ret);
4125 	if (ret) {
4126 		rollback_registered(dev);
4127 		dev->reg_state = NETREG_UNREGISTERED;
4128 	}
4129 
4130 out:
4131 	return ret;
4132 
4133 err_uninit:
4134 	if (dev->netdev_ops->ndo_uninit)
4135 		dev->netdev_ops->ndo_uninit(dev);
4136 	goto out;
4137 }
4138 
4139 /**
4140  *	register_netdev	- register a network device
4141  *	@dev: device to register
4142  *
4143  *	Take a completed network device structure and add it to the kernel
4144  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4145  *	chain. 0 is returned on success. A negative errno code is returned
4146  *	on a failure to set up the device, or if the name is a duplicate.
4147  *
4148  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
4149  *	and expands the device name if you passed a format string to
4150  *	alloc_netdev.
4151  */
4152 int register_netdev(struct net_device *dev)
4153 {
4154 	int err;
4155 
4156 	rtnl_lock();
4157 
4158 	/*
4159 	 * If the name is a format string the caller wants us to do a
4160 	 * name allocation.
4161 	 */
4162 	if (strchr(dev->name, '%')) {
4163 		err = dev_alloc_name(dev, dev->name);
4164 		if (err < 0)
4165 			goto out;
4166 	}
4167 
4168 	err = register_netdevice(dev);
4169 out:
4170 	rtnl_unlock();
4171 	return err;
4172 }
4173 EXPORT_SYMBOL(register_netdev);
4174 
4175 /*
4176  * netdev_wait_allrefs - wait until all references are gone.
4177  *
4178  * This is called when unregistering network devices.
4179  *
4180  * Any protocol or device that holds a reference should register
4181  * for netdevice notification, and cleanup and put back the
4182  * reference if they receive an UNREGISTER event.
4183  * We can get stuck here if buggy protocols don't correctly
4184  * call dev_put.
4185  */
4186 static void netdev_wait_allrefs(struct net_device *dev)
4187 {
4188 	unsigned long rebroadcast_time, warning_time;
4189 
4190 	rebroadcast_time = warning_time = jiffies;
4191 	while (atomic_read(&dev->refcnt) != 0) {
4192 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4193 			rtnl_lock();
4194 
4195 			/* Rebroadcast unregister notification */
4196 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4197 
4198 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4199 				     &dev->state)) {
4200 				/* We must not have linkwatch events
4201 				 * pending on unregister. If this
4202 				 * happens, we simply run the queue
4203 				 * unscheduled, resulting in a noop
4204 				 * for this device.
4205 				 */
4206 				linkwatch_run_queue();
4207 			}
4208 
4209 			__rtnl_unlock();
4210 
4211 			rebroadcast_time = jiffies;
4212 		}
4213 
4214 		msleep(250);
4215 
4216 		if (time_after(jiffies, warning_time + 10 * HZ)) {
4217 			printk(KERN_EMERG "unregister_netdevice: "
4218 			       "waiting for %s to become free. Usage "
4219 			       "count = %d\n",
4220 			       dev->name, atomic_read(&dev->refcnt));
4221 			warning_time = jiffies;
4222 		}
4223 	}
4224 }
4225 
4226 /* The sequence is:
4227  *
4228  *	rtnl_lock();
4229  *	...
4230  *	register_netdevice(x1);
4231  *	register_netdevice(x2);
4232  *	...
4233  *	unregister_netdevice(y1);
4234  *	unregister_netdevice(y2);
4235  *      ...
4236  *	rtnl_unlock();
4237  *	free_netdev(y1);
4238  *	free_netdev(y2);
4239  *
4240  * We are invoked by rtnl_unlock().
4241  * This allows us to deal with problems:
4242  * 1) We can delete sysfs objects which invoke hotplug
4243  *    without deadlocking with linkwatch via keventd.
4244  * 2) Since we run with the RTNL semaphore not held, we can sleep
4245  *    safely in order to wait for the netdev refcnt to drop to zero.
4246  *
4247  * We must not return until all unregister events added during
4248  * the interval the lock was held have been completed.
4249  */
4250 void netdev_run_todo(void)
4251 {
4252 	struct list_head list;
4253 
4254 	/* Snapshot list, allow later requests */
4255 	list_replace_init(&net_todo_list, &list);
4256 
4257 	__rtnl_unlock();
4258 
4259 	while (!list_empty(&list)) {
4260 		struct net_device *dev
4261 			= list_entry(list.next, struct net_device, todo_list);
4262 		list_del(&dev->todo_list);
4263 
4264 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4265 			printk(KERN_ERR "network todo '%s' but state %d\n",
4266 			       dev->name, dev->reg_state);
4267 			dump_stack();
4268 			continue;
4269 		}
4270 
4271 		dev->reg_state = NETREG_UNREGISTERED;
4272 
4273 		on_each_cpu(flush_backlog, dev, 1);
4274 
4275 		netdev_wait_allrefs(dev);
4276 
4277 		/* paranoia */
4278 		BUG_ON(atomic_read(&dev->refcnt));
4279 		WARN_ON(dev->ip_ptr);
4280 		WARN_ON(dev->ip6_ptr);
4281 		WARN_ON(dev->dn_ptr);
4282 
4283 		if (dev->destructor)
4284 			dev->destructor(dev);
4285 
4286 		/* Free network device */
4287 		kobject_put(&dev->dev.kobj);
4288 	}
4289 }
4290 
4291 /**
4292  *	dev_get_stats	- get network device statistics
4293  *	@dev: device to get statistics from
4294  *
4295  *	Get network statistics from device. The device driver may provide
4296  *	its own method by setting dev->netdev_ops->get_stats; otherwise
4297  *	the internal statistics structure is used.
4298  */
4299 const struct net_device_stats *dev_get_stats(struct net_device *dev)
4300  {
4301 	const struct net_device_ops *ops = dev->netdev_ops;
4302 
4303 	if (ops->ndo_get_stats)
4304 		return ops->ndo_get_stats(dev);
4305 	else
4306 		return &dev->stats;
4307 }
4308 EXPORT_SYMBOL(dev_get_stats);
4309 
4310 static void netdev_init_one_queue(struct net_device *dev,
4311 				  struct netdev_queue *queue,
4312 				  void *_unused)
4313 {
4314 	queue->dev = dev;
4315 }
4316 
4317 static void netdev_init_queues(struct net_device *dev)
4318 {
4319 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4320 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4321 	spin_lock_init(&dev->tx_global_lock);
4322 }
4323 
4324 /**
4325  *	alloc_netdev_mq - allocate network device
4326  *	@sizeof_priv:	size of private data to allocate space for
4327  *	@name:		device name format string
4328  *	@setup:		callback to initialize device
4329  *	@queue_count:	the number of subqueues to allocate
4330  *
4331  *	Allocates a struct net_device with private data area for driver use
4332  *	and performs basic initialization.  Also allocates subquue structs
4333  *	for each queue on the device at the end of the netdevice.
4334  */
4335 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4336 		void (*setup)(struct net_device *), unsigned int queue_count)
4337 {
4338 	struct netdev_queue *tx;
4339 	struct net_device *dev;
4340 	size_t alloc_size;
4341 	void *p;
4342 
4343 	BUG_ON(strlen(name) >= sizeof(dev->name));
4344 
4345 	alloc_size = sizeof(struct net_device);
4346 	if (sizeof_priv) {
4347 		/* ensure 32-byte alignment of private area */
4348 		alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4349 		alloc_size += sizeof_priv;
4350 	}
4351 	/* ensure 32-byte alignment of whole construct */
4352 	alloc_size += NETDEV_ALIGN_CONST;
4353 
4354 	p = kzalloc(alloc_size, GFP_KERNEL);
4355 	if (!p) {
4356 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4357 		return NULL;
4358 	}
4359 
4360 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4361 	if (!tx) {
4362 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
4363 		       "tx qdiscs.\n");
4364 		kfree(p);
4365 		return NULL;
4366 	}
4367 
4368 	dev = (struct net_device *)
4369 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4370 	dev->padded = (char *)dev - (char *)p;
4371 	dev_net_set(dev, &init_net);
4372 
4373 	dev->_tx = tx;
4374 	dev->num_tx_queues = queue_count;
4375 	dev->real_num_tx_queues = queue_count;
4376 
4377 	if (sizeof_priv) {
4378 		dev->priv = ((char *)dev +
4379 			     ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
4380 			      & ~NETDEV_ALIGN_CONST));
4381 	}
4382 
4383 	dev->gso_max_size = GSO_MAX_SIZE;
4384 
4385 	netdev_init_queues(dev);
4386 
4387 	netpoll_netdev_init(dev);
4388 	setup(dev);
4389 	strcpy(dev->name, name);
4390 	return dev;
4391 }
4392 EXPORT_SYMBOL(alloc_netdev_mq);
4393 
4394 /**
4395  *	free_netdev - free network device
4396  *	@dev: device
4397  *
4398  *	This function does the last stage of destroying an allocated device
4399  * 	interface. The reference to the device object is released.
4400  *	If this is the last reference then it will be freed.
4401  */
4402 void free_netdev(struct net_device *dev)
4403 {
4404 	release_net(dev_net(dev));
4405 
4406 	kfree(dev->_tx);
4407 
4408 	/*  Compatibility with error handling in drivers */
4409 	if (dev->reg_state == NETREG_UNINITIALIZED) {
4410 		kfree((char *)dev - dev->padded);
4411 		return;
4412 	}
4413 
4414 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4415 	dev->reg_state = NETREG_RELEASED;
4416 
4417 	/* will free via device release */
4418 	put_device(&dev->dev);
4419 }
4420 
4421 /**
4422  *	synchronize_net -  Synchronize with packet receive processing
4423  *
4424  *	Wait for packets currently being received to be done.
4425  *	Does not block later packets from starting.
4426  */
4427 void synchronize_net(void)
4428 {
4429 	might_sleep();
4430 	synchronize_rcu();
4431 }
4432 
4433 /**
4434  *	unregister_netdevice - remove device from the kernel
4435  *	@dev: device
4436  *
4437  *	This function shuts down a device interface and removes it
4438  *	from the kernel tables.
4439  *
4440  *	Callers must hold the rtnl semaphore.  You may want
4441  *	unregister_netdev() instead of this.
4442  */
4443 
4444 void unregister_netdevice(struct net_device *dev)
4445 {
4446 	ASSERT_RTNL();
4447 
4448 	rollback_registered(dev);
4449 	/* Finish processing unregister after unlock */
4450 	net_set_todo(dev);
4451 }
4452 
4453 /**
4454  *	unregister_netdev - remove device from the kernel
4455  *	@dev: device
4456  *
4457  *	This function shuts down a device interface and removes it
4458  *	from the kernel tables.
4459  *
4460  *	This is just a wrapper for unregister_netdevice that takes
4461  *	the rtnl semaphore.  In general you want to use this and not
4462  *	unregister_netdevice.
4463  */
4464 void unregister_netdev(struct net_device *dev)
4465 {
4466 	rtnl_lock();
4467 	unregister_netdevice(dev);
4468 	rtnl_unlock();
4469 }
4470 
4471 EXPORT_SYMBOL(unregister_netdev);
4472 
4473 /**
4474  *	dev_change_net_namespace - move device to different nethost namespace
4475  *	@dev: device
4476  *	@net: network namespace
4477  *	@pat: If not NULL name pattern to try if the current device name
4478  *	      is already taken in the destination network namespace.
4479  *
4480  *	This function shuts down a device interface and moves it
4481  *	to a new network namespace. On success 0 is returned, on
4482  *	a failure a netagive errno code is returned.
4483  *
4484  *	Callers must hold the rtnl semaphore.
4485  */
4486 
4487 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4488 {
4489 	char buf[IFNAMSIZ];
4490 	const char *destname;
4491 	int err;
4492 
4493 	ASSERT_RTNL();
4494 
4495 	/* Don't allow namespace local devices to be moved. */
4496 	err = -EINVAL;
4497 	if (dev->features & NETIF_F_NETNS_LOCAL)
4498 		goto out;
4499 
4500 #ifdef CONFIG_SYSFS
4501 	/* Don't allow real devices to be moved when sysfs
4502 	 * is enabled.
4503 	 */
4504 	err = -EINVAL;
4505 	if (dev->dev.parent)
4506 		goto out;
4507 #endif
4508 
4509 	/* Ensure the device has been registrered */
4510 	err = -EINVAL;
4511 	if (dev->reg_state != NETREG_REGISTERED)
4512 		goto out;
4513 
4514 	/* Get out if there is nothing todo */
4515 	err = 0;
4516 	if (net_eq(dev_net(dev), net))
4517 		goto out;
4518 
4519 	/* Pick the destination device name, and ensure
4520 	 * we can use it in the destination network namespace.
4521 	 */
4522 	err = -EEXIST;
4523 	destname = dev->name;
4524 	if (__dev_get_by_name(net, destname)) {
4525 		/* We get here if we can't use the current device name */
4526 		if (!pat)
4527 			goto out;
4528 		if (!dev_valid_name(pat))
4529 			goto out;
4530 		if (strchr(pat, '%')) {
4531 			if (__dev_alloc_name(net, pat, buf) < 0)
4532 				goto out;
4533 			destname = buf;
4534 		} else
4535 			destname = pat;
4536 		if (__dev_get_by_name(net, destname))
4537 			goto out;
4538 	}
4539 
4540 	/*
4541 	 * And now a mini version of register_netdevice unregister_netdevice.
4542 	 */
4543 
4544 	/* If device is running close it first. */
4545 	dev_close(dev);
4546 
4547 	/* And unlink it from device chain */
4548 	err = -ENODEV;
4549 	unlist_netdevice(dev);
4550 
4551 	synchronize_net();
4552 
4553 	/* Shutdown queueing discipline. */
4554 	dev_shutdown(dev);
4555 
4556 	/* Notify protocols, that we are about to destroy
4557 	   this device. They should clean all the things.
4558 	*/
4559 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4560 
4561 	/*
4562 	 *	Flush the unicast and multicast chains
4563 	 */
4564 	dev_addr_discard(dev);
4565 
4566 	netdev_unregister_kobject(dev);
4567 
4568 	/* Actually switch the network namespace */
4569 	dev_net_set(dev, net);
4570 
4571 	/* Assign the new device name */
4572 	if (destname != dev->name)
4573 		strcpy(dev->name, destname);
4574 
4575 	/* If there is an ifindex conflict assign a new one */
4576 	if (__dev_get_by_index(net, dev->ifindex)) {
4577 		int iflink = (dev->iflink == dev->ifindex);
4578 		dev->ifindex = dev_new_index(net);
4579 		if (iflink)
4580 			dev->iflink = dev->ifindex;
4581 	}
4582 
4583 	/* Fixup kobjects */
4584 	err = netdev_register_kobject(dev);
4585 	WARN_ON(err);
4586 
4587 	/* Add the device back in the hashes */
4588 	list_netdevice(dev);
4589 
4590 	/* Notify protocols, that a new device appeared. */
4591 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
4592 
4593 	synchronize_net();
4594 	err = 0;
4595 out:
4596 	return err;
4597 }
4598 
4599 static int dev_cpu_callback(struct notifier_block *nfb,
4600 			    unsigned long action,
4601 			    void *ocpu)
4602 {
4603 	struct sk_buff **list_skb;
4604 	struct Qdisc **list_net;
4605 	struct sk_buff *skb;
4606 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
4607 	struct softnet_data *sd, *oldsd;
4608 
4609 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4610 		return NOTIFY_OK;
4611 
4612 	local_irq_disable();
4613 	cpu = smp_processor_id();
4614 	sd = &per_cpu(softnet_data, cpu);
4615 	oldsd = &per_cpu(softnet_data, oldcpu);
4616 
4617 	/* Find end of our completion_queue. */
4618 	list_skb = &sd->completion_queue;
4619 	while (*list_skb)
4620 		list_skb = &(*list_skb)->next;
4621 	/* Append completion queue from offline CPU. */
4622 	*list_skb = oldsd->completion_queue;
4623 	oldsd->completion_queue = NULL;
4624 
4625 	/* Find end of our output_queue. */
4626 	list_net = &sd->output_queue;
4627 	while (*list_net)
4628 		list_net = &(*list_net)->next_sched;
4629 	/* Append output queue from offline CPU. */
4630 	*list_net = oldsd->output_queue;
4631 	oldsd->output_queue = NULL;
4632 
4633 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
4634 	local_irq_enable();
4635 
4636 	/* Process offline CPU's input_pkt_queue */
4637 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4638 		netif_rx(skb);
4639 
4640 	return NOTIFY_OK;
4641 }
4642 
4643 #ifdef CONFIG_NET_DMA
4644 /**
4645  * net_dma_rebalance - try to maintain one DMA channel per CPU
4646  * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4647  *
4648  * This is called when the number of channels allocated to the net_dma client
4649  * changes.  The net_dma client tries to have one DMA channel per CPU.
4650  */
4651 
4652 static void net_dma_rebalance(struct net_dma *net_dma)
4653 {
4654 	unsigned int cpu, i, n, chan_idx;
4655 	struct dma_chan *chan;
4656 
4657 	if (cpus_empty(net_dma->channel_mask)) {
4658 		for_each_online_cpu(cpu)
4659 			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4660 		return;
4661 	}
4662 
4663 	i = 0;
4664 	cpu = first_cpu(cpu_online_map);
4665 
4666 	for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
4667 		chan = net_dma->channels[chan_idx];
4668 
4669 		n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4670 		   + (i < (num_online_cpus() %
4671 			cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4672 
4673 		while(n) {
4674 			per_cpu(softnet_data, cpu).net_dma = chan;
4675 			cpu = next_cpu(cpu, cpu_online_map);
4676 			n--;
4677 		}
4678 		i++;
4679 	}
4680 }
4681 
4682 /**
4683  * netdev_dma_event - event callback for the net_dma_client
4684  * @client: should always be net_dma_client
4685  * @chan: DMA channel for the event
4686  * @state: DMA state to be handled
4687  */
4688 static enum dma_state_client
4689 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4690 	enum dma_state state)
4691 {
4692 	int i, found = 0, pos = -1;
4693 	struct net_dma *net_dma =
4694 		container_of(client, struct net_dma, client);
4695 	enum dma_state_client ack = DMA_DUP; /* default: take no action */
4696 
4697 	spin_lock(&net_dma->lock);
4698 	switch (state) {
4699 	case DMA_RESOURCE_AVAILABLE:
4700 		for (i = 0; i < nr_cpu_ids; i++)
4701 			if (net_dma->channels[i] == chan) {
4702 				found = 1;
4703 				break;
4704 			} else if (net_dma->channels[i] == NULL && pos < 0)
4705 				pos = i;
4706 
4707 		if (!found && pos >= 0) {
4708 			ack = DMA_ACK;
4709 			net_dma->channels[pos] = chan;
4710 			cpu_set(pos, net_dma->channel_mask);
4711 			net_dma_rebalance(net_dma);
4712 		}
4713 		break;
4714 	case DMA_RESOURCE_REMOVED:
4715 		for (i = 0; i < nr_cpu_ids; i++)
4716 			if (net_dma->channels[i] == chan) {
4717 				found = 1;
4718 				pos = i;
4719 				break;
4720 			}
4721 
4722 		if (found) {
4723 			ack = DMA_ACK;
4724 			cpu_clear(pos, net_dma->channel_mask);
4725 			net_dma->channels[i] = NULL;
4726 			net_dma_rebalance(net_dma);
4727 		}
4728 		break;
4729 	default:
4730 		break;
4731 	}
4732 	spin_unlock(&net_dma->lock);
4733 
4734 	return ack;
4735 }
4736 
4737 /**
4738  * netdev_dma_register - register the networking subsystem as a DMA client
4739  */
4740 static int __init netdev_dma_register(void)
4741 {
4742 	net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4743 								GFP_KERNEL);
4744 	if (unlikely(!net_dma.channels)) {
4745 		printk(KERN_NOTICE
4746 				"netdev_dma: no memory for net_dma.channels\n");
4747 		return -ENOMEM;
4748 	}
4749 	spin_lock_init(&net_dma.lock);
4750 	dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4751 	dma_async_client_register(&net_dma.client);
4752 	dma_async_client_chan_request(&net_dma.client);
4753 	return 0;
4754 }
4755 
4756 #else
4757 static int __init netdev_dma_register(void) { return -ENODEV; }
4758 #endif /* CONFIG_NET_DMA */
4759 
4760 /**
4761  *	netdev_increment_features - increment feature set by one
4762  *	@all: current feature set
4763  *	@one: new feature set
4764  *	@mask: mask feature set
4765  *
4766  *	Computes a new feature set after adding a device with feature set
4767  *	@one to the master device with current feature set @all.  Will not
4768  *	enable anything that is off in @mask. Returns the new feature set.
4769  */
4770 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4771 					unsigned long mask)
4772 {
4773 	/* If device needs checksumming, downgrade to it. */
4774         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4775 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4776 	else if (mask & NETIF_F_ALL_CSUM) {
4777 		/* If one device supports v4/v6 checksumming, set for all. */
4778 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4779 		    !(all & NETIF_F_GEN_CSUM)) {
4780 			all &= ~NETIF_F_ALL_CSUM;
4781 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4782 		}
4783 
4784 		/* If one device supports hw checksumming, set for all. */
4785 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4786 			all &= ~NETIF_F_ALL_CSUM;
4787 			all |= NETIF_F_HW_CSUM;
4788 		}
4789 	}
4790 
4791 	one |= NETIF_F_ALL_CSUM;
4792 
4793 	one |= all & NETIF_F_ONE_FOR_ALL;
4794 	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4795 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
4796 
4797 	return all;
4798 }
4799 EXPORT_SYMBOL(netdev_increment_features);
4800 
4801 static struct hlist_head *netdev_create_hash(void)
4802 {
4803 	int i;
4804 	struct hlist_head *hash;
4805 
4806 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4807 	if (hash != NULL)
4808 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
4809 			INIT_HLIST_HEAD(&hash[i]);
4810 
4811 	return hash;
4812 }
4813 
4814 /* Initialize per network namespace state */
4815 static int __net_init netdev_init(struct net *net)
4816 {
4817 	INIT_LIST_HEAD(&net->dev_base_head);
4818 
4819 	net->dev_name_head = netdev_create_hash();
4820 	if (net->dev_name_head == NULL)
4821 		goto err_name;
4822 
4823 	net->dev_index_head = netdev_create_hash();
4824 	if (net->dev_index_head == NULL)
4825 		goto err_idx;
4826 
4827 	return 0;
4828 
4829 err_idx:
4830 	kfree(net->dev_name_head);
4831 err_name:
4832 	return -ENOMEM;
4833 }
4834 
4835 /**
4836  *	netdev_drivername - network driver for the device
4837  *	@dev: network device
4838  *	@buffer: buffer for resulting name
4839  *	@len: size of buffer
4840  *
4841  *	Determine network driver for device.
4842  */
4843 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
4844 {
4845 	const struct device_driver *driver;
4846 	const struct device *parent;
4847 
4848 	if (len <= 0 || !buffer)
4849 		return buffer;
4850 	buffer[0] = 0;
4851 
4852 	parent = dev->dev.parent;
4853 
4854 	if (!parent)
4855 		return buffer;
4856 
4857 	driver = parent->driver;
4858 	if (driver && driver->name)
4859 		strlcpy(buffer, driver->name, len);
4860 	return buffer;
4861 }
4862 
4863 static void __net_exit netdev_exit(struct net *net)
4864 {
4865 	kfree(net->dev_name_head);
4866 	kfree(net->dev_index_head);
4867 }
4868 
4869 static struct pernet_operations __net_initdata netdev_net_ops = {
4870 	.init = netdev_init,
4871 	.exit = netdev_exit,
4872 };
4873 
4874 static void __net_exit default_device_exit(struct net *net)
4875 {
4876 	struct net_device *dev, *next;
4877 	/*
4878 	 * Push all migratable of the network devices back to the
4879 	 * initial network namespace
4880 	 */
4881 	rtnl_lock();
4882 	for_each_netdev_safe(net, dev, next) {
4883 		int err;
4884 		char fb_name[IFNAMSIZ];
4885 
4886 		/* Ignore unmoveable devices (i.e. loopback) */
4887 		if (dev->features & NETIF_F_NETNS_LOCAL)
4888 			continue;
4889 
4890 		/* Delete virtual devices */
4891 		if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
4892 			dev->rtnl_link_ops->dellink(dev);
4893 			continue;
4894 		}
4895 
4896 		/* Push remaing network devices to init_net */
4897 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4898 		err = dev_change_net_namespace(dev, &init_net, fb_name);
4899 		if (err) {
4900 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4901 				__func__, dev->name, err);
4902 			BUG();
4903 		}
4904 	}
4905 	rtnl_unlock();
4906 }
4907 
4908 static struct pernet_operations __net_initdata default_device_ops = {
4909 	.exit = default_device_exit,
4910 };
4911 
4912 /*
4913  *	Initialize the DEV module. At boot time this walks the device list and
4914  *	unhooks any devices that fail to initialise (normally hardware not
4915  *	present) and leaves us with a valid list of present and active devices.
4916  *
4917  */
4918 
4919 /*
4920  *       This is called single threaded during boot, so no need
4921  *       to take the rtnl semaphore.
4922  */
4923 static int __init net_dev_init(void)
4924 {
4925 	int i, rc = -ENOMEM;
4926 
4927 	BUG_ON(!dev_boot_phase);
4928 
4929 	if (dev_proc_init())
4930 		goto out;
4931 
4932 	if (netdev_kobject_init())
4933 		goto out;
4934 
4935 	INIT_LIST_HEAD(&ptype_all);
4936 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
4937 		INIT_LIST_HEAD(&ptype_base[i]);
4938 
4939 	if (register_pernet_subsys(&netdev_net_ops))
4940 		goto out;
4941 
4942 	/*
4943 	 *	Initialise the packet receive queues.
4944 	 */
4945 
4946 	for_each_possible_cpu(i) {
4947 		struct softnet_data *queue;
4948 
4949 		queue = &per_cpu(softnet_data, i);
4950 		skb_queue_head_init(&queue->input_pkt_queue);
4951 		queue->completion_queue = NULL;
4952 		INIT_LIST_HEAD(&queue->poll_list);
4953 
4954 		queue->backlog.poll = process_backlog;
4955 		queue->backlog.weight = weight_p;
4956 	}
4957 
4958 	dev_boot_phase = 0;
4959 
4960 	/* The loopback device is special if any other network devices
4961 	 * is present in a network namespace the loopback device must
4962 	 * be present. Since we now dynamically allocate and free the
4963 	 * loopback device ensure this invariant is maintained by
4964 	 * keeping the loopback device as the first device on the
4965 	 * list of network devices.  Ensuring the loopback devices
4966 	 * is the first device that appears and the last network device
4967 	 * that disappears.
4968 	 */
4969 	if (register_pernet_device(&loopback_net_ops))
4970 		goto out;
4971 
4972 	if (register_pernet_device(&default_device_ops))
4973 		goto out;
4974 
4975 	netdev_dma_register();
4976 
4977 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4978 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
4979 
4980 	hotcpu_notifier(dev_cpu_callback, 0);
4981 	dst_init();
4982 	dev_mcast_init();
4983 	rc = 0;
4984 out:
4985 	return rc;
4986 }
4987 
4988 subsys_initcall(net_dev_init);
4989 
4990 EXPORT_SYMBOL(__dev_get_by_index);
4991 EXPORT_SYMBOL(__dev_get_by_name);
4992 EXPORT_SYMBOL(__dev_remove_pack);
4993 EXPORT_SYMBOL(dev_valid_name);
4994 EXPORT_SYMBOL(dev_add_pack);
4995 EXPORT_SYMBOL(dev_alloc_name);
4996 EXPORT_SYMBOL(dev_close);
4997 EXPORT_SYMBOL(dev_get_by_flags);
4998 EXPORT_SYMBOL(dev_get_by_index);
4999 EXPORT_SYMBOL(dev_get_by_name);
5000 EXPORT_SYMBOL(dev_open);
5001 EXPORT_SYMBOL(dev_queue_xmit);
5002 EXPORT_SYMBOL(dev_remove_pack);
5003 EXPORT_SYMBOL(dev_set_allmulti);
5004 EXPORT_SYMBOL(dev_set_promiscuity);
5005 EXPORT_SYMBOL(dev_change_flags);
5006 EXPORT_SYMBOL(dev_set_mtu);
5007 EXPORT_SYMBOL(dev_set_mac_address);
5008 EXPORT_SYMBOL(free_netdev);
5009 EXPORT_SYMBOL(netdev_boot_setup_check);
5010 EXPORT_SYMBOL(netdev_set_master);
5011 EXPORT_SYMBOL(netdev_state_change);
5012 EXPORT_SYMBOL(netif_receive_skb);
5013 EXPORT_SYMBOL(netif_rx);
5014 EXPORT_SYMBOL(register_gifconf);
5015 EXPORT_SYMBOL(register_netdevice);
5016 EXPORT_SYMBOL(register_netdevice_notifier);
5017 EXPORT_SYMBOL(skb_checksum_help);
5018 EXPORT_SYMBOL(synchronize_net);
5019 EXPORT_SYMBOL(unregister_netdevice);
5020 EXPORT_SYMBOL(unregister_netdevice_notifier);
5021 EXPORT_SYMBOL(net_enable_timestamp);
5022 EXPORT_SYMBOL(net_disable_timestamp);
5023 EXPORT_SYMBOL(dev_get_flags);
5024 
5025 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5026 EXPORT_SYMBOL(br_handle_frame_hook);
5027 EXPORT_SYMBOL(br_fdb_get_hook);
5028 EXPORT_SYMBOL(br_fdb_put_hook);
5029 #endif
5030 
5031 EXPORT_SYMBOL(dev_load);
5032 
5033 EXPORT_PER_CPU_SYMBOL(softnet_data);
5034