xref: /linux-6.15/net/core/dev.c (revision 17c324fa)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <[email protected]>
12  *				Mark Evans, <[email protected]>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <[email protected]>
16  *		Alan Cox <[email protected]>
17  *		David Hinds <[email protected]>
18  *		Alexey Kuznetsov <[email protected]>
19  *		Adam Sulmicki <[email protected]>
20  *              Pekka Riikonen <[email protected]>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/ethtool.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/net_namespace.h>
97 #include <net/sock.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/proc_fs.h>
100 #include <linux/seq_file.h>
101 #include <linux/stat.h>
102 #include <linux/if_bridge.h>
103 #include <linux/if_macvlan.h>
104 #include <net/dst.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
124 #include <net/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
129 
130 #include "net-sysfs.h"
131 
132 /*
133  *	The list of packet types we will receive (as opposed to discard)
134  *	and the routines to invoke.
135  *
136  *	Why 16. Because with 16 the only overlap we get on a hash of the
137  *	low nibble of the protocol value is RARP/SNAP/X.25.
138  *
139  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
140  *             sure which should go first, but I bet it won't make much
141  *             difference if we are running VLANs.  The good news is that
142  *             this protocol won't be in the list unless compiled in, so
143  *             the average user (w/out VLANs) will not be adversely affected.
144  *             --BLG
145  *
146  *		0800	IP
147  *		8100    802.1Q VLAN
148  *		0001	802.3
149  *		0002	AX.25
150  *		0004	802.2
151  *		8035	RARP
152  *		0005	SNAP
153  *		0805	X.25
154  *		0806	ARP
155  *		8137	IPX
156  *		0009	Localtalk
157  *		86DD	IPv6
158  */
159 
160 #define PTYPE_HASH_SIZE	(16)
161 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
162 
163 static DEFINE_SPINLOCK(ptype_lock);
164 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
165 static struct list_head ptype_all __read_mostly;	/* Taps */
166 
167 #ifdef CONFIG_NET_DMA
168 struct net_dma {
169 	struct dma_client client;
170 	spinlock_t lock;
171 	cpumask_t channel_mask;
172 	struct dma_chan **channels;
173 };
174 
175 static enum dma_state_client
176 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
177 	enum dma_state state);
178 
179 static struct net_dma net_dma = {
180 	.client = {
181 		.event_callback = netdev_dma_event,
182 	},
183 };
184 #endif
185 
186 /*
187  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
188  * semaphore.
189  *
190  * Pure readers hold dev_base_lock for reading.
191  *
192  * Writers must hold the rtnl semaphore while they loop through the
193  * dev_base_head list, and hold dev_base_lock for writing when they do the
194  * actual updates.  This allows pure readers to access the list even
195  * while a writer is preparing to update it.
196  *
197  * To put it another way, dev_base_lock is held for writing only to
198  * protect against pure readers; the rtnl semaphore provides the
199  * protection against other writers.
200  *
201  * See, for example usages, register_netdevice() and
202  * unregister_netdevice(), which must be called with the rtnl
203  * semaphore held.
204  */
205 DEFINE_RWLOCK(dev_base_lock);
206 
207 EXPORT_SYMBOL(dev_base_lock);
208 
209 #define NETDEV_HASHBITS	8
210 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
211 
212 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
213 {
214 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
215 	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
216 }
217 
218 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
219 {
220 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
221 }
222 
223 /* Device list insertion */
224 static int list_netdevice(struct net_device *dev)
225 {
226 	struct net *net = dev_net(dev);
227 
228 	ASSERT_RTNL();
229 
230 	write_lock_bh(&dev_base_lock);
231 	list_add_tail(&dev->dev_list, &net->dev_base_head);
232 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
233 	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
234 	write_unlock_bh(&dev_base_lock);
235 	return 0;
236 }
237 
238 /* Device list removal */
239 static void unlist_netdevice(struct net_device *dev)
240 {
241 	ASSERT_RTNL();
242 
243 	/* Unlink dev from the device chain */
244 	write_lock_bh(&dev_base_lock);
245 	list_del(&dev->dev_list);
246 	hlist_del(&dev->name_hlist);
247 	hlist_del(&dev->index_hlist);
248 	write_unlock_bh(&dev_base_lock);
249 }
250 
251 /*
252  *	Our notifier list
253  */
254 
255 static RAW_NOTIFIER_HEAD(netdev_chain);
256 
257 /*
258  *	Device drivers call our routines to queue packets here. We empty the
259  *	queue in the local softnet handler.
260  */
261 
262 DEFINE_PER_CPU(struct softnet_data, softnet_data);
263 
264 #ifdef CONFIG_LOCKDEP
265 /*
266  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
267  * according to dev->type
268  */
269 static const unsigned short netdev_lock_type[] =
270 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
271 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
272 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
273 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
274 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
275 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
276 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
277 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
278 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
279 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
280 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
281 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
282 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
283 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
284 	 ARPHRD_NONE};
285 
286 static const char *netdev_lock_name[] =
287 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
288 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
289 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
290 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
291 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
292 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
293 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
294 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
295 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
296 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
297 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
298 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
299 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
300 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
301 	 "_xmit_NONE"};
302 
303 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
304 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
305 
306 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
307 {
308 	int i;
309 
310 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
311 		if (netdev_lock_type[i] == dev_type)
312 			return i;
313 	/* the last key is used by default */
314 	return ARRAY_SIZE(netdev_lock_type) - 1;
315 }
316 
317 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
318 						 unsigned short dev_type)
319 {
320 	int i;
321 
322 	i = netdev_lock_pos(dev_type);
323 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
324 				   netdev_lock_name[i]);
325 }
326 
327 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
328 {
329 	int i;
330 
331 	i = netdev_lock_pos(dev->type);
332 	lockdep_set_class_and_name(&dev->addr_list_lock,
333 				   &netdev_addr_lock_key[i],
334 				   netdev_lock_name[i]);
335 }
336 #else
337 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
338 						 unsigned short dev_type)
339 {
340 }
341 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
342 {
343 }
344 #endif
345 
346 /*******************************************************************************
347 
348 		Protocol management and registration routines
349 
350 *******************************************************************************/
351 
352 /*
353  *	Add a protocol ID to the list. Now that the input handler is
354  *	smarter we can dispense with all the messy stuff that used to be
355  *	here.
356  *
357  *	BEWARE!!! Protocol handlers, mangling input packets,
358  *	MUST BE last in hash buckets and checking protocol handlers
359  *	MUST start from promiscuous ptype_all chain in net_bh.
360  *	It is true now, do not change it.
361  *	Explanation follows: if protocol handler, mangling packet, will
362  *	be the first on list, it is not able to sense, that packet
363  *	is cloned and should be copied-on-write, so that it will
364  *	change it and subsequent readers will get broken packet.
365  *							--ANK (980803)
366  */
367 
368 /**
369  *	dev_add_pack - add packet handler
370  *	@pt: packet type declaration
371  *
372  *	Add a protocol handler to the networking stack. The passed &packet_type
373  *	is linked into kernel lists and may not be freed until it has been
374  *	removed from the kernel lists.
375  *
376  *	This call does not sleep therefore it can not
377  *	guarantee all CPU's that are in middle of receiving packets
378  *	will see the new packet type (until the next received packet).
379  */
380 
381 void dev_add_pack(struct packet_type *pt)
382 {
383 	int hash;
384 
385 	spin_lock_bh(&ptype_lock);
386 	if (pt->type == htons(ETH_P_ALL))
387 		list_add_rcu(&pt->list, &ptype_all);
388 	else {
389 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
390 		list_add_rcu(&pt->list, &ptype_base[hash]);
391 	}
392 	spin_unlock_bh(&ptype_lock);
393 }
394 
395 /**
396  *	__dev_remove_pack	 - remove packet handler
397  *	@pt: packet type declaration
398  *
399  *	Remove a protocol handler that was previously added to the kernel
400  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
401  *	from the kernel lists and can be freed or reused once this function
402  *	returns.
403  *
404  *      The packet type might still be in use by receivers
405  *	and must not be freed until after all the CPU's have gone
406  *	through a quiescent state.
407  */
408 void __dev_remove_pack(struct packet_type *pt)
409 {
410 	struct list_head *head;
411 	struct packet_type *pt1;
412 
413 	spin_lock_bh(&ptype_lock);
414 
415 	if (pt->type == htons(ETH_P_ALL))
416 		head = &ptype_all;
417 	else
418 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
419 
420 	list_for_each_entry(pt1, head, list) {
421 		if (pt == pt1) {
422 			list_del_rcu(&pt->list);
423 			goto out;
424 		}
425 	}
426 
427 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
428 out:
429 	spin_unlock_bh(&ptype_lock);
430 }
431 /**
432  *	dev_remove_pack	 - remove packet handler
433  *	@pt: packet type declaration
434  *
435  *	Remove a protocol handler that was previously added to the kernel
436  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
437  *	from the kernel lists and can be freed or reused once this function
438  *	returns.
439  *
440  *	This call sleeps to guarantee that no CPU is looking at the packet
441  *	type after return.
442  */
443 void dev_remove_pack(struct packet_type *pt)
444 {
445 	__dev_remove_pack(pt);
446 
447 	synchronize_net();
448 }
449 
450 /******************************************************************************
451 
452 		      Device Boot-time Settings Routines
453 
454 *******************************************************************************/
455 
456 /* Boot time configuration table */
457 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
458 
459 /**
460  *	netdev_boot_setup_add	- add new setup entry
461  *	@name: name of the device
462  *	@map: configured settings for the device
463  *
464  *	Adds new setup entry to the dev_boot_setup list.  The function
465  *	returns 0 on error and 1 on success.  This is a generic routine to
466  *	all netdevices.
467  */
468 static int netdev_boot_setup_add(char *name, struct ifmap *map)
469 {
470 	struct netdev_boot_setup *s;
471 	int i;
472 
473 	s = dev_boot_setup;
474 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
475 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
476 			memset(s[i].name, 0, sizeof(s[i].name));
477 			strlcpy(s[i].name, name, IFNAMSIZ);
478 			memcpy(&s[i].map, map, sizeof(s[i].map));
479 			break;
480 		}
481 	}
482 
483 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
484 }
485 
486 /**
487  *	netdev_boot_setup_check	- check boot time settings
488  *	@dev: the netdevice
489  *
490  * 	Check boot time settings for the device.
491  *	The found settings are set for the device to be used
492  *	later in the device probing.
493  *	Returns 0 if no settings found, 1 if they are.
494  */
495 int netdev_boot_setup_check(struct net_device *dev)
496 {
497 	struct netdev_boot_setup *s = dev_boot_setup;
498 	int i;
499 
500 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
501 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
502 		    !strcmp(dev->name, s[i].name)) {
503 			dev->irq 	= s[i].map.irq;
504 			dev->base_addr 	= s[i].map.base_addr;
505 			dev->mem_start 	= s[i].map.mem_start;
506 			dev->mem_end 	= s[i].map.mem_end;
507 			return 1;
508 		}
509 	}
510 	return 0;
511 }
512 
513 
514 /**
515  *	netdev_boot_base	- get address from boot time settings
516  *	@prefix: prefix for network device
517  *	@unit: id for network device
518  *
519  * 	Check boot time settings for the base address of device.
520  *	The found settings are set for the device to be used
521  *	later in the device probing.
522  *	Returns 0 if no settings found.
523  */
524 unsigned long netdev_boot_base(const char *prefix, int unit)
525 {
526 	const struct netdev_boot_setup *s = dev_boot_setup;
527 	char name[IFNAMSIZ];
528 	int i;
529 
530 	sprintf(name, "%s%d", prefix, unit);
531 
532 	/*
533 	 * If device already registered then return base of 1
534 	 * to indicate not to probe for this interface
535 	 */
536 	if (__dev_get_by_name(&init_net, name))
537 		return 1;
538 
539 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
540 		if (!strcmp(name, s[i].name))
541 			return s[i].map.base_addr;
542 	return 0;
543 }
544 
545 /*
546  * Saves at boot time configured settings for any netdevice.
547  */
548 int __init netdev_boot_setup(char *str)
549 {
550 	int ints[5];
551 	struct ifmap map;
552 
553 	str = get_options(str, ARRAY_SIZE(ints), ints);
554 	if (!str || !*str)
555 		return 0;
556 
557 	/* Save settings */
558 	memset(&map, 0, sizeof(map));
559 	if (ints[0] > 0)
560 		map.irq = ints[1];
561 	if (ints[0] > 1)
562 		map.base_addr = ints[2];
563 	if (ints[0] > 2)
564 		map.mem_start = ints[3];
565 	if (ints[0] > 3)
566 		map.mem_end = ints[4];
567 
568 	/* Add new entry to the list */
569 	return netdev_boot_setup_add(str, &map);
570 }
571 
572 __setup("netdev=", netdev_boot_setup);
573 
574 /*******************************************************************************
575 
576 			    Device Interface Subroutines
577 
578 *******************************************************************************/
579 
580 /**
581  *	__dev_get_by_name	- find a device by its name
582  *	@net: the applicable net namespace
583  *	@name: name to find
584  *
585  *	Find an interface by name. Must be called under RTNL semaphore
586  *	or @dev_base_lock. If the name is found a pointer to the device
587  *	is returned. If the name is not found then %NULL is returned. The
588  *	reference counters are not incremented so the caller must be
589  *	careful with locks.
590  */
591 
592 struct net_device *__dev_get_by_name(struct net *net, const char *name)
593 {
594 	struct hlist_node *p;
595 
596 	hlist_for_each(p, dev_name_hash(net, name)) {
597 		struct net_device *dev
598 			= hlist_entry(p, struct net_device, name_hlist);
599 		if (!strncmp(dev->name, name, IFNAMSIZ))
600 			return dev;
601 	}
602 	return NULL;
603 }
604 
605 /**
606  *	dev_get_by_name		- find a device by its name
607  *	@net: the applicable net namespace
608  *	@name: name to find
609  *
610  *	Find an interface by name. This can be called from any
611  *	context and does its own locking. The returned handle has
612  *	the usage count incremented and the caller must use dev_put() to
613  *	release it when it is no longer needed. %NULL is returned if no
614  *	matching device is found.
615  */
616 
617 struct net_device *dev_get_by_name(struct net *net, const char *name)
618 {
619 	struct net_device *dev;
620 
621 	read_lock(&dev_base_lock);
622 	dev = __dev_get_by_name(net, name);
623 	if (dev)
624 		dev_hold(dev);
625 	read_unlock(&dev_base_lock);
626 	return dev;
627 }
628 
629 /**
630  *	__dev_get_by_index - find a device by its ifindex
631  *	@net: the applicable net namespace
632  *	@ifindex: index of device
633  *
634  *	Search for an interface by index. Returns %NULL if the device
635  *	is not found or a pointer to the device. The device has not
636  *	had its reference counter increased so the caller must be careful
637  *	about locking. The caller must hold either the RTNL semaphore
638  *	or @dev_base_lock.
639  */
640 
641 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
642 {
643 	struct hlist_node *p;
644 
645 	hlist_for_each(p, dev_index_hash(net, ifindex)) {
646 		struct net_device *dev
647 			= hlist_entry(p, struct net_device, index_hlist);
648 		if (dev->ifindex == ifindex)
649 			return dev;
650 	}
651 	return NULL;
652 }
653 
654 
655 /**
656  *	dev_get_by_index - find a device by its ifindex
657  *	@net: the applicable net namespace
658  *	@ifindex: index of device
659  *
660  *	Search for an interface by index. Returns NULL if the device
661  *	is not found or a pointer to the device. The device returned has
662  *	had a reference added and the pointer is safe until the user calls
663  *	dev_put to indicate they have finished with it.
664  */
665 
666 struct net_device *dev_get_by_index(struct net *net, int ifindex)
667 {
668 	struct net_device *dev;
669 
670 	read_lock(&dev_base_lock);
671 	dev = __dev_get_by_index(net, ifindex);
672 	if (dev)
673 		dev_hold(dev);
674 	read_unlock(&dev_base_lock);
675 	return dev;
676 }
677 
678 /**
679  *	dev_getbyhwaddr - find a device by its hardware address
680  *	@net: the applicable net namespace
681  *	@type: media type of device
682  *	@ha: hardware address
683  *
684  *	Search for an interface by MAC address. Returns NULL if the device
685  *	is not found or a pointer to the device. The caller must hold the
686  *	rtnl semaphore. The returned device has not had its ref count increased
687  *	and the caller must therefore be careful about locking
688  *
689  *	BUGS:
690  *	If the API was consistent this would be __dev_get_by_hwaddr
691  */
692 
693 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
694 {
695 	struct net_device *dev;
696 
697 	ASSERT_RTNL();
698 
699 	for_each_netdev(net, dev)
700 		if (dev->type == type &&
701 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
702 			return dev;
703 
704 	return NULL;
705 }
706 
707 EXPORT_SYMBOL(dev_getbyhwaddr);
708 
709 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
710 {
711 	struct net_device *dev;
712 
713 	ASSERT_RTNL();
714 	for_each_netdev(net, dev)
715 		if (dev->type == type)
716 			return dev;
717 
718 	return NULL;
719 }
720 
721 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
722 
723 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
724 {
725 	struct net_device *dev;
726 
727 	rtnl_lock();
728 	dev = __dev_getfirstbyhwtype(net, type);
729 	if (dev)
730 		dev_hold(dev);
731 	rtnl_unlock();
732 	return dev;
733 }
734 
735 EXPORT_SYMBOL(dev_getfirstbyhwtype);
736 
737 /**
738  *	dev_get_by_flags - find any device with given flags
739  *	@net: the applicable net namespace
740  *	@if_flags: IFF_* values
741  *	@mask: bitmask of bits in if_flags to check
742  *
743  *	Search for any interface with the given flags. Returns NULL if a device
744  *	is not found or a pointer to the device. The device returned has
745  *	had a reference added and the pointer is safe until the user calls
746  *	dev_put to indicate they have finished with it.
747  */
748 
749 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
750 {
751 	struct net_device *dev, *ret;
752 
753 	ret = NULL;
754 	read_lock(&dev_base_lock);
755 	for_each_netdev(net, dev) {
756 		if (((dev->flags ^ if_flags) & mask) == 0) {
757 			dev_hold(dev);
758 			ret = dev;
759 			break;
760 		}
761 	}
762 	read_unlock(&dev_base_lock);
763 	return ret;
764 }
765 
766 /**
767  *	dev_valid_name - check if name is okay for network device
768  *	@name: name string
769  *
770  *	Network device names need to be valid file names to
771  *	to allow sysfs to work.  We also disallow any kind of
772  *	whitespace.
773  */
774 int dev_valid_name(const char *name)
775 {
776 	if (*name == '\0')
777 		return 0;
778 	if (strlen(name) >= IFNAMSIZ)
779 		return 0;
780 	if (!strcmp(name, ".") || !strcmp(name, ".."))
781 		return 0;
782 
783 	while (*name) {
784 		if (*name == '/' || isspace(*name))
785 			return 0;
786 		name++;
787 	}
788 	return 1;
789 }
790 
791 /**
792  *	__dev_alloc_name - allocate a name for a device
793  *	@net: network namespace to allocate the device name in
794  *	@name: name format string
795  *	@buf:  scratch buffer and result name string
796  *
797  *	Passed a format string - eg "lt%d" it will try and find a suitable
798  *	id. It scans list of devices to build up a free map, then chooses
799  *	the first empty slot. The caller must hold the dev_base or rtnl lock
800  *	while allocating the name and adding the device in order to avoid
801  *	duplicates.
802  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
803  *	Returns the number of the unit assigned or a negative errno code.
804  */
805 
806 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
807 {
808 	int i = 0;
809 	const char *p;
810 	const int max_netdevices = 8*PAGE_SIZE;
811 	unsigned long *inuse;
812 	struct net_device *d;
813 
814 	p = strnchr(name, IFNAMSIZ-1, '%');
815 	if (p) {
816 		/*
817 		 * Verify the string as this thing may have come from
818 		 * the user.  There must be either one "%d" and no other "%"
819 		 * characters.
820 		 */
821 		if (p[1] != 'd' || strchr(p + 2, '%'))
822 			return -EINVAL;
823 
824 		/* Use one page as a bit array of possible slots */
825 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
826 		if (!inuse)
827 			return -ENOMEM;
828 
829 		for_each_netdev(net, d) {
830 			if (!sscanf(d->name, name, &i))
831 				continue;
832 			if (i < 0 || i >= max_netdevices)
833 				continue;
834 
835 			/*  avoid cases where sscanf is not exact inverse of printf */
836 			snprintf(buf, IFNAMSIZ, name, i);
837 			if (!strncmp(buf, d->name, IFNAMSIZ))
838 				set_bit(i, inuse);
839 		}
840 
841 		i = find_first_zero_bit(inuse, max_netdevices);
842 		free_page((unsigned long) inuse);
843 	}
844 
845 	snprintf(buf, IFNAMSIZ, name, i);
846 	if (!__dev_get_by_name(net, buf))
847 		return i;
848 
849 	/* It is possible to run out of possible slots
850 	 * when the name is long and there isn't enough space left
851 	 * for the digits, or if all bits are used.
852 	 */
853 	return -ENFILE;
854 }
855 
856 /**
857  *	dev_alloc_name - allocate a name for a device
858  *	@dev: device
859  *	@name: name format string
860  *
861  *	Passed a format string - eg "lt%d" it will try and find a suitable
862  *	id. It scans list of devices to build up a free map, then chooses
863  *	the first empty slot. The caller must hold the dev_base or rtnl lock
864  *	while allocating the name and adding the device in order to avoid
865  *	duplicates.
866  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
867  *	Returns the number of the unit assigned or a negative errno code.
868  */
869 
870 int dev_alloc_name(struct net_device *dev, const char *name)
871 {
872 	char buf[IFNAMSIZ];
873 	struct net *net;
874 	int ret;
875 
876 	BUG_ON(!dev_net(dev));
877 	net = dev_net(dev);
878 	ret = __dev_alloc_name(net, name, buf);
879 	if (ret >= 0)
880 		strlcpy(dev->name, buf, IFNAMSIZ);
881 	return ret;
882 }
883 
884 
885 /**
886  *	dev_change_name - change name of a device
887  *	@dev: device
888  *	@newname: name (or format string) must be at least IFNAMSIZ
889  *
890  *	Change name of a device, can pass format strings "eth%d".
891  *	for wildcarding.
892  */
893 int dev_change_name(struct net_device *dev, const char *newname)
894 {
895 	char oldname[IFNAMSIZ];
896 	int err = 0;
897 	int ret;
898 	struct net *net;
899 
900 	ASSERT_RTNL();
901 	BUG_ON(!dev_net(dev));
902 
903 	net = dev_net(dev);
904 	if (dev->flags & IFF_UP)
905 		return -EBUSY;
906 
907 	if (!dev_valid_name(newname))
908 		return -EINVAL;
909 
910 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 		return 0;
912 
913 	memcpy(oldname, dev->name, IFNAMSIZ);
914 
915 	if (strchr(newname, '%')) {
916 		err = dev_alloc_name(dev, newname);
917 		if (err < 0)
918 			return err;
919 	}
920 	else if (__dev_get_by_name(net, newname))
921 		return -EEXIST;
922 	else
923 		strlcpy(dev->name, newname, IFNAMSIZ);
924 
925 rollback:
926 	/* For now only devices in the initial network namespace
927 	 * are in sysfs.
928 	 */
929 	if (net == &init_net) {
930 		ret = device_rename(&dev->dev, dev->name);
931 		if (ret) {
932 			memcpy(dev->name, oldname, IFNAMSIZ);
933 			return ret;
934 		}
935 	}
936 
937 	write_lock_bh(&dev_base_lock);
938 	hlist_del(&dev->name_hlist);
939 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
940 	write_unlock_bh(&dev_base_lock);
941 
942 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
943 	ret = notifier_to_errno(ret);
944 
945 	if (ret) {
946 		if (err) {
947 			printk(KERN_ERR
948 			       "%s: name change rollback failed: %d.\n",
949 			       dev->name, ret);
950 		} else {
951 			err = ret;
952 			memcpy(dev->name, oldname, IFNAMSIZ);
953 			goto rollback;
954 		}
955 	}
956 
957 	return err;
958 }
959 
960 /**
961  *	dev_set_alias - change ifalias of a device
962  *	@dev: device
963  *	@alias: name up to IFALIASZ
964  *	@len: limit of bytes to copy from info
965  *
966  *	Set ifalias for a device,
967  */
968 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
969 {
970 	ASSERT_RTNL();
971 
972 	if (len >= IFALIASZ)
973 		return -EINVAL;
974 
975 	if (!len) {
976 		if (dev->ifalias) {
977 			kfree(dev->ifalias);
978 			dev->ifalias = NULL;
979 		}
980 		return 0;
981 	}
982 
983 	dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
984 	if (!dev->ifalias)
985 		return -ENOMEM;
986 
987 	strlcpy(dev->ifalias, alias, len+1);
988 	return len;
989 }
990 
991 
992 /**
993  *	netdev_features_change - device changes features
994  *	@dev: device to cause notification
995  *
996  *	Called to indicate a device has changed features.
997  */
998 void netdev_features_change(struct net_device *dev)
999 {
1000 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1001 }
1002 EXPORT_SYMBOL(netdev_features_change);
1003 
1004 /**
1005  *	netdev_state_change - device changes state
1006  *	@dev: device to cause notification
1007  *
1008  *	Called to indicate a device has changed state. This function calls
1009  *	the notifier chains for netdev_chain and sends a NEWLINK message
1010  *	to the routing socket.
1011  */
1012 void netdev_state_change(struct net_device *dev)
1013 {
1014 	if (dev->flags & IFF_UP) {
1015 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1016 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1017 	}
1018 }
1019 
1020 void netdev_bonding_change(struct net_device *dev)
1021 {
1022 	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1023 }
1024 EXPORT_SYMBOL(netdev_bonding_change);
1025 
1026 /**
1027  *	dev_load 	- load a network module
1028  *	@net: the applicable net namespace
1029  *	@name: name of interface
1030  *
1031  *	If a network interface is not present and the process has suitable
1032  *	privileges this function loads the module. If module loading is not
1033  *	available in this kernel then it becomes a nop.
1034  */
1035 
1036 void dev_load(struct net *net, const char *name)
1037 {
1038 	struct net_device *dev;
1039 
1040 	read_lock(&dev_base_lock);
1041 	dev = __dev_get_by_name(net, name);
1042 	read_unlock(&dev_base_lock);
1043 
1044 	if (!dev && capable(CAP_SYS_MODULE))
1045 		request_module("%s", name);
1046 }
1047 
1048 /**
1049  *	dev_open	- prepare an interface for use.
1050  *	@dev:	device to open
1051  *
1052  *	Takes a device from down to up state. The device's private open
1053  *	function is invoked and then the multicast lists are loaded. Finally
1054  *	the device is moved into the up state and a %NETDEV_UP message is
1055  *	sent to the netdev notifier chain.
1056  *
1057  *	Calling this function on an active interface is a nop. On a failure
1058  *	a negative errno code is returned.
1059  */
1060 int dev_open(struct net_device *dev)
1061 {
1062 	const struct net_device_ops *ops = dev->netdev_ops;
1063 	int ret = 0;
1064 
1065 	ASSERT_RTNL();
1066 
1067 	/*
1068 	 *	Is it already up?
1069 	 */
1070 
1071 	if (dev->flags & IFF_UP)
1072 		return 0;
1073 
1074 	/*
1075 	 *	Is it even present?
1076 	 */
1077 	if (!netif_device_present(dev))
1078 		return -ENODEV;
1079 
1080 	/*
1081 	 *	Call device private open method
1082 	 */
1083 	set_bit(__LINK_STATE_START, &dev->state);
1084 
1085 	if (ops->ndo_validate_addr)
1086 		ret = ops->ndo_validate_addr(dev);
1087 
1088 	if (!ret && ops->ndo_open)
1089 		ret = ops->ndo_open(dev);
1090 
1091 	/*
1092 	 *	If it went open OK then:
1093 	 */
1094 
1095 	if (ret)
1096 		clear_bit(__LINK_STATE_START, &dev->state);
1097 	else {
1098 		/*
1099 		 *	Set the flags.
1100 		 */
1101 		dev->flags |= IFF_UP;
1102 
1103 		/*
1104 		 *	Initialize multicasting status
1105 		 */
1106 		dev_set_rx_mode(dev);
1107 
1108 		/*
1109 		 *	Wakeup transmit queue engine
1110 		 */
1111 		dev_activate(dev);
1112 
1113 		/*
1114 		 *	... and announce new interface.
1115 		 */
1116 		call_netdevice_notifiers(NETDEV_UP, dev);
1117 	}
1118 
1119 	return ret;
1120 }
1121 
1122 /**
1123  *	dev_close - shutdown an interface.
1124  *	@dev: device to shutdown
1125  *
1126  *	This function moves an active device into down state. A
1127  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1128  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1129  *	chain.
1130  */
1131 int dev_close(struct net_device *dev)
1132 {
1133 	const struct net_device_ops *ops = dev->netdev_ops;
1134 	ASSERT_RTNL();
1135 
1136 	might_sleep();
1137 
1138 	if (!(dev->flags & IFF_UP))
1139 		return 0;
1140 
1141 	/*
1142 	 *	Tell people we are going down, so that they can
1143 	 *	prepare to death, when device is still operating.
1144 	 */
1145 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1146 
1147 	clear_bit(__LINK_STATE_START, &dev->state);
1148 
1149 	/* Synchronize to scheduled poll. We cannot touch poll list,
1150 	 * it can be even on different cpu. So just clear netif_running().
1151 	 *
1152 	 * dev->stop() will invoke napi_disable() on all of it's
1153 	 * napi_struct instances on this device.
1154 	 */
1155 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1156 
1157 	dev_deactivate(dev);
1158 
1159 	/*
1160 	 *	Call the device specific close. This cannot fail.
1161 	 *	Only if device is UP
1162 	 *
1163 	 *	We allow it to be called even after a DETACH hot-plug
1164 	 *	event.
1165 	 */
1166 	if (ops->ndo_stop)
1167 		ops->ndo_stop(dev);
1168 
1169 	/*
1170 	 *	Device is now down.
1171 	 */
1172 
1173 	dev->flags &= ~IFF_UP;
1174 
1175 	/*
1176 	 * Tell people we are down
1177 	 */
1178 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1179 
1180 	return 0;
1181 }
1182 
1183 
1184 /**
1185  *	dev_disable_lro - disable Large Receive Offload on a device
1186  *	@dev: device
1187  *
1188  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1189  *	called under RTNL.  This is needed if received packets may be
1190  *	forwarded to another interface.
1191  */
1192 void dev_disable_lro(struct net_device *dev)
1193 {
1194 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1195 	    dev->ethtool_ops->set_flags) {
1196 		u32 flags = dev->ethtool_ops->get_flags(dev);
1197 		if (flags & ETH_FLAG_LRO) {
1198 			flags &= ~ETH_FLAG_LRO;
1199 			dev->ethtool_ops->set_flags(dev, flags);
1200 		}
1201 	}
1202 	WARN_ON(dev->features & NETIF_F_LRO);
1203 }
1204 EXPORT_SYMBOL(dev_disable_lro);
1205 
1206 
1207 static int dev_boot_phase = 1;
1208 
1209 /*
1210  *	Device change register/unregister. These are not inline or static
1211  *	as we export them to the world.
1212  */
1213 
1214 /**
1215  *	register_netdevice_notifier - register a network notifier block
1216  *	@nb: notifier
1217  *
1218  *	Register a notifier to be called when network device events occur.
1219  *	The notifier passed is linked into the kernel structures and must
1220  *	not be reused until it has been unregistered. A negative errno code
1221  *	is returned on a failure.
1222  *
1223  * 	When registered all registration and up events are replayed
1224  *	to the new notifier to allow device to have a race free
1225  *	view of the network device list.
1226  */
1227 
1228 int register_netdevice_notifier(struct notifier_block *nb)
1229 {
1230 	struct net_device *dev;
1231 	struct net_device *last;
1232 	struct net *net;
1233 	int err;
1234 
1235 	rtnl_lock();
1236 	err = raw_notifier_chain_register(&netdev_chain, nb);
1237 	if (err)
1238 		goto unlock;
1239 	if (dev_boot_phase)
1240 		goto unlock;
1241 	for_each_net(net) {
1242 		for_each_netdev(net, dev) {
1243 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1244 			err = notifier_to_errno(err);
1245 			if (err)
1246 				goto rollback;
1247 
1248 			if (!(dev->flags & IFF_UP))
1249 				continue;
1250 
1251 			nb->notifier_call(nb, NETDEV_UP, dev);
1252 		}
1253 	}
1254 
1255 unlock:
1256 	rtnl_unlock();
1257 	return err;
1258 
1259 rollback:
1260 	last = dev;
1261 	for_each_net(net) {
1262 		for_each_netdev(net, dev) {
1263 			if (dev == last)
1264 				break;
1265 
1266 			if (dev->flags & IFF_UP) {
1267 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1268 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1269 			}
1270 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1271 		}
1272 	}
1273 
1274 	raw_notifier_chain_unregister(&netdev_chain, nb);
1275 	goto unlock;
1276 }
1277 
1278 /**
1279  *	unregister_netdevice_notifier - unregister a network notifier block
1280  *	@nb: notifier
1281  *
1282  *	Unregister a notifier previously registered by
1283  *	register_netdevice_notifier(). The notifier is unlinked into the
1284  *	kernel structures and may then be reused. A negative errno code
1285  *	is returned on a failure.
1286  */
1287 
1288 int unregister_netdevice_notifier(struct notifier_block *nb)
1289 {
1290 	int err;
1291 
1292 	rtnl_lock();
1293 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1294 	rtnl_unlock();
1295 	return err;
1296 }
1297 
1298 /**
1299  *	call_netdevice_notifiers - call all network notifier blocks
1300  *      @val: value passed unmodified to notifier function
1301  *      @dev: net_device pointer passed unmodified to notifier function
1302  *
1303  *	Call all network notifier blocks.  Parameters and return value
1304  *	are as for raw_notifier_call_chain().
1305  */
1306 
1307 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1308 {
1309 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1310 }
1311 
1312 /* When > 0 there are consumers of rx skb time stamps */
1313 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1314 
1315 void net_enable_timestamp(void)
1316 {
1317 	atomic_inc(&netstamp_needed);
1318 }
1319 
1320 void net_disable_timestamp(void)
1321 {
1322 	atomic_dec(&netstamp_needed);
1323 }
1324 
1325 static inline void net_timestamp(struct sk_buff *skb)
1326 {
1327 	if (atomic_read(&netstamp_needed))
1328 		__net_timestamp(skb);
1329 	else
1330 		skb->tstamp.tv64 = 0;
1331 }
1332 
1333 /*
1334  *	Support routine. Sends outgoing frames to any network
1335  *	taps currently in use.
1336  */
1337 
1338 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1339 {
1340 	struct packet_type *ptype;
1341 
1342 	net_timestamp(skb);
1343 
1344 	rcu_read_lock();
1345 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1346 		/* Never send packets back to the socket
1347 		 * they originated from - MvS ([email protected])
1348 		 */
1349 		if ((ptype->dev == dev || !ptype->dev) &&
1350 		    (ptype->af_packet_priv == NULL ||
1351 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1352 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1353 			if (!skb2)
1354 				break;
1355 
1356 			/* skb->nh should be correctly
1357 			   set by sender, so that the second statement is
1358 			   just protection against buggy protocols.
1359 			 */
1360 			skb_reset_mac_header(skb2);
1361 
1362 			if (skb_network_header(skb2) < skb2->data ||
1363 			    skb2->network_header > skb2->tail) {
1364 				if (net_ratelimit())
1365 					printk(KERN_CRIT "protocol %04x is "
1366 					       "buggy, dev %s\n",
1367 					       skb2->protocol, dev->name);
1368 				skb_reset_network_header(skb2);
1369 			}
1370 
1371 			skb2->transport_header = skb2->network_header;
1372 			skb2->pkt_type = PACKET_OUTGOING;
1373 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1374 		}
1375 	}
1376 	rcu_read_unlock();
1377 }
1378 
1379 
1380 static inline void __netif_reschedule(struct Qdisc *q)
1381 {
1382 	struct softnet_data *sd;
1383 	unsigned long flags;
1384 
1385 	local_irq_save(flags);
1386 	sd = &__get_cpu_var(softnet_data);
1387 	q->next_sched = sd->output_queue;
1388 	sd->output_queue = q;
1389 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1390 	local_irq_restore(flags);
1391 }
1392 
1393 void __netif_schedule(struct Qdisc *q)
1394 {
1395 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1396 		__netif_reschedule(q);
1397 }
1398 EXPORT_SYMBOL(__netif_schedule);
1399 
1400 void dev_kfree_skb_irq(struct sk_buff *skb)
1401 {
1402 	if (atomic_dec_and_test(&skb->users)) {
1403 		struct softnet_data *sd;
1404 		unsigned long flags;
1405 
1406 		local_irq_save(flags);
1407 		sd = &__get_cpu_var(softnet_data);
1408 		skb->next = sd->completion_queue;
1409 		sd->completion_queue = skb;
1410 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1411 		local_irq_restore(flags);
1412 	}
1413 }
1414 EXPORT_SYMBOL(dev_kfree_skb_irq);
1415 
1416 void dev_kfree_skb_any(struct sk_buff *skb)
1417 {
1418 	if (in_irq() || irqs_disabled())
1419 		dev_kfree_skb_irq(skb);
1420 	else
1421 		dev_kfree_skb(skb);
1422 }
1423 EXPORT_SYMBOL(dev_kfree_skb_any);
1424 
1425 
1426 /**
1427  * netif_device_detach - mark device as removed
1428  * @dev: network device
1429  *
1430  * Mark device as removed from system and therefore no longer available.
1431  */
1432 void netif_device_detach(struct net_device *dev)
1433 {
1434 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1435 	    netif_running(dev)) {
1436 		netif_stop_queue(dev);
1437 	}
1438 }
1439 EXPORT_SYMBOL(netif_device_detach);
1440 
1441 /**
1442  * netif_device_attach - mark device as attached
1443  * @dev: network device
1444  *
1445  * Mark device as attached from system and restart if needed.
1446  */
1447 void netif_device_attach(struct net_device *dev)
1448 {
1449 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1450 	    netif_running(dev)) {
1451 		netif_wake_queue(dev);
1452 		__netdev_watchdog_up(dev);
1453 	}
1454 }
1455 EXPORT_SYMBOL(netif_device_attach);
1456 
1457 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1458 {
1459 	return ((features & NETIF_F_GEN_CSUM) ||
1460 		((features & NETIF_F_IP_CSUM) &&
1461 		 protocol == htons(ETH_P_IP)) ||
1462 		((features & NETIF_F_IPV6_CSUM) &&
1463 		 protocol == htons(ETH_P_IPV6)));
1464 }
1465 
1466 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1467 {
1468 	if (can_checksum_protocol(dev->features, skb->protocol))
1469 		return true;
1470 
1471 	if (skb->protocol == htons(ETH_P_8021Q)) {
1472 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1473 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1474 					  veh->h_vlan_encapsulated_proto))
1475 			return true;
1476 	}
1477 
1478 	return false;
1479 }
1480 
1481 /*
1482  * Invalidate hardware checksum when packet is to be mangled, and
1483  * complete checksum manually on outgoing path.
1484  */
1485 int skb_checksum_help(struct sk_buff *skb)
1486 {
1487 	__wsum csum;
1488 	int ret = 0, offset;
1489 
1490 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1491 		goto out_set_summed;
1492 
1493 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1494 		/* Let GSO fix up the checksum. */
1495 		goto out_set_summed;
1496 	}
1497 
1498 	offset = skb->csum_start - skb_headroom(skb);
1499 	BUG_ON(offset >= skb_headlen(skb));
1500 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1501 
1502 	offset += skb->csum_offset;
1503 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1504 
1505 	if (skb_cloned(skb) &&
1506 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1507 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1508 		if (ret)
1509 			goto out;
1510 	}
1511 
1512 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1513 out_set_summed:
1514 	skb->ip_summed = CHECKSUM_NONE;
1515 out:
1516 	return ret;
1517 }
1518 
1519 /**
1520  *	skb_gso_segment - Perform segmentation on skb.
1521  *	@skb: buffer to segment
1522  *	@features: features for the output path (see dev->features)
1523  *
1524  *	This function segments the given skb and returns a list of segments.
1525  *
1526  *	It may return NULL if the skb requires no segmentation.  This is
1527  *	only possible when GSO is used for verifying header integrity.
1528  */
1529 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1530 {
1531 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1532 	struct packet_type *ptype;
1533 	__be16 type = skb->protocol;
1534 	int err;
1535 
1536 	BUG_ON(skb_shinfo(skb)->frag_list);
1537 
1538 	skb_reset_mac_header(skb);
1539 	skb->mac_len = skb->network_header - skb->mac_header;
1540 	__skb_pull(skb, skb->mac_len);
1541 
1542 	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1543 		if (skb_header_cloned(skb) &&
1544 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1545 			return ERR_PTR(err);
1546 	}
1547 
1548 	rcu_read_lock();
1549 	list_for_each_entry_rcu(ptype,
1550 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1551 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1552 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1553 				err = ptype->gso_send_check(skb);
1554 				segs = ERR_PTR(err);
1555 				if (err || skb_gso_ok(skb, features))
1556 					break;
1557 				__skb_push(skb, (skb->data -
1558 						 skb_network_header(skb)));
1559 			}
1560 			segs = ptype->gso_segment(skb, features);
1561 			break;
1562 		}
1563 	}
1564 	rcu_read_unlock();
1565 
1566 	__skb_push(skb, skb->data - skb_mac_header(skb));
1567 
1568 	return segs;
1569 }
1570 
1571 EXPORT_SYMBOL(skb_gso_segment);
1572 
1573 /* Take action when hardware reception checksum errors are detected. */
1574 #ifdef CONFIG_BUG
1575 void netdev_rx_csum_fault(struct net_device *dev)
1576 {
1577 	if (net_ratelimit()) {
1578 		printk(KERN_ERR "%s: hw csum failure.\n",
1579 			dev ? dev->name : "<unknown>");
1580 		dump_stack();
1581 	}
1582 }
1583 EXPORT_SYMBOL(netdev_rx_csum_fault);
1584 #endif
1585 
1586 /* Actually, we should eliminate this check as soon as we know, that:
1587  * 1. IOMMU is present and allows to map all the memory.
1588  * 2. No high memory really exists on this machine.
1589  */
1590 
1591 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1592 {
1593 #ifdef CONFIG_HIGHMEM
1594 	int i;
1595 
1596 	if (dev->features & NETIF_F_HIGHDMA)
1597 		return 0;
1598 
1599 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1600 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1601 			return 1;
1602 
1603 #endif
1604 	return 0;
1605 }
1606 
1607 struct dev_gso_cb {
1608 	void (*destructor)(struct sk_buff *skb);
1609 };
1610 
1611 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1612 
1613 static void dev_gso_skb_destructor(struct sk_buff *skb)
1614 {
1615 	struct dev_gso_cb *cb;
1616 
1617 	do {
1618 		struct sk_buff *nskb = skb->next;
1619 
1620 		skb->next = nskb->next;
1621 		nskb->next = NULL;
1622 		kfree_skb(nskb);
1623 	} while (skb->next);
1624 
1625 	cb = DEV_GSO_CB(skb);
1626 	if (cb->destructor)
1627 		cb->destructor(skb);
1628 }
1629 
1630 /**
1631  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1632  *	@skb: buffer to segment
1633  *
1634  *	This function segments the given skb and stores the list of segments
1635  *	in skb->next.
1636  */
1637 static int dev_gso_segment(struct sk_buff *skb)
1638 {
1639 	struct net_device *dev = skb->dev;
1640 	struct sk_buff *segs;
1641 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1642 					 NETIF_F_SG : 0);
1643 
1644 	segs = skb_gso_segment(skb, features);
1645 
1646 	/* Verifying header integrity only. */
1647 	if (!segs)
1648 		return 0;
1649 
1650 	if (IS_ERR(segs))
1651 		return PTR_ERR(segs);
1652 
1653 	skb->next = segs;
1654 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1655 	skb->destructor = dev_gso_skb_destructor;
1656 
1657 	return 0;
1658 }
1659 
1660 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1661 			struct netdev_queue *txq)
1662 {
1663 	const struct net_device_ops *ops = dev->netdev_ops;
1664 
1665 	prefetch(&dev->netdev_ops->ndo_start_xmit);
1666 	if (likely(!skb->next)) {
1667 		if (!list_empty(&ptype_all))
1668 			dev_queue_xmit_nit(skb, dev);
1669 
1670 		if (netif_needs_gso(dev, skb)) {
1671 			if (unlikely(dev_gso_segment(skb)))
1672 				goto out_kfree_skb;
1673 			if (skb->next)
1674 				goto gso;
1675 		}
1676 
1677 		return ops->ndo_start_xmit(skb, dev);
1678 	}
1679 
1680 gso:
1681 	do {
1682 		struct sk_buff *nskb = skb->next;
1683 		int rc;
1684 
1685 		skb->next = nskb->next;
1686 		nskb->next = NULL;
1687 		rc = ops->ndo_start_xmit(nskb, dev);
1688 		if (unlikely(rc)) {
1689 			nskb->next = skb->next;
1690 			skb->next = nskb;
1691 			return rc;
1692 		}
1693 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1694 			return NETDEV_TX_BUSY;
1695 	} while (skb->next);
1696 
1697 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1698 
1699 out_kfree_skb:
1700 	kfree_skb(skb);
1701 	return 0;
1702 }
1703 
1704 static u32 simple_tx_hashrnd;
1705 static int simple_tx_hashrnd_initialized = 0;
1706 
1707 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1708 {
1709 	u32 addr1, addr2, ports;
1710 	u32 hash, ihl;
1711 	u8 ip_proto = 0;
1712 
1713 	if (unlikely(!simple_tx_hashrnd_initialized)) {
1714 		get_random_bytes(&simple_tx_hashrnd, 4);
1715 		simple_tx_hashrnd_initialized = 1;
1716 	}
1717 
1718 	switch (skb->protocol) {
1719 	case htons(ETH_P_IP):
1720 		if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1721 			ip_proto = ip_hdr(skb)->protocol;
1722 		addr1 = ip_hdr(skb)->saddr;
1723 		addr2 = ip_hdr(skb)->daddr;
1724 		ihl = ip_hdr(skb)->ihl;
1725 		break;
1726 	case htons(ETH_P_IPV6):
1727 		ip_proto = ipv6_hdr(skb)->nexthdr;
1728 		addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1729 		addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1730 		ihl = (40 >> 2);
1731 		break;
1732 	default:
1733 		return 0;
1734 	}
1735 
1736 
1737 	switch (ip_proto) {
1738 	case IPPROTO_TCP:
1739 	case IPPROTO_UDP:
1740 	case IPPROTO_DCCP:
1741 	case IPPROTO_ESP:
1742 	case IPPROTO_AH:
1743 	case IPPROTO_SCTP:
1744 	case IPPROTO_UDPLITE:
1745 		ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1746 		break;
1747 
1748 	default:
1749 		ports = 0;
1750 		break;
1751 	}
1752 
1753 	hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1754 
1755 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1756 }
1757 
1758 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1759 					struct sk_buff *skb)
1760 {
1761 	const struct net_device_ops *ops = dev->netdev_ops;
1762 	u16 queue_index = 0;
1763 
1764 	if (ops->ndo_select_queue)
1765 		queue_index = ops->ndo_select_queue(dev, skb);
1766 	else if (dev->real_num_tx_queues > 1)
1767 		queue_index = simple_tx_hash(dev, skb);
1768 
1769 	skb_set_queue_mapping(skb, queue_index);
1770 	return netdev_get_tx_queue(dev, queue_index);
1771 }
1772 
1773 /**
1774  *	dev_queue_xmit - transmit a buffer
1775  *	@skb: buffer to transmit
1776  *
1777  *	Queue a buffer for transmission to a network device. The caller must
1778  *	have set the device and priority and built the buffer before calling
1779  *	this function. The function can be called from an interrupt.
1780  *
1781  *	A negative errno code is returned on a failure. A success does not
1782  *	guarantee the frame will be transmitted as it may be dropped due
1783  *	to congestion or traffic shaping.
1784  *
1785  * -----------------------------------------------------------------------------------
1786  *      I notice this method can also return errors from the queue disciplines,
1787  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1788  *      be positive.
1789  *
1790  *      Regardless of the return value, the skb is consumed, so it is currently
1791  *      difficult to retry a send to this method.  (You can bump the ref count
1792  *      before sending to hold a reference for retry if you are careful.)
1793  *
1794  *      When calling this method, interrupts MUST be enabled.  This is because
1795  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1796  *          --BLG
1797  */
1798 int dev_queue_xmit(struct sk_buff *skb)
1799 {
1800 	struct net_device *dev = skb->dev;
1801 	struct netdev_queue *txq;
1802 	struct Qdisc *q;
1803 	int rc = -ENOMEM;
1804 
1805 	/* GSO will handle the following emulations directly. */
1806 	if (netif_needs_gso(dev, skb))
1807 		goto gso;
1808 
1809 	if (skb_shinfo(skb)->frag_list &&
1810 	    !(dev->features & NETIF_F_FRAGLIST) &&
1811 	    __skb_linearize(skb))
1812 		goto out_kfree_skb;
1813 
1814 	/* Fragmented skb is linearized if device does not support SG,
1815 	 * or if at least one of fragments is in highmem and device
1816 	 * does not support DMA from it.
1817 	 */
1818 	if (skb_shinfo(skb)->nr_frags &&
1819 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1820 	    __skb_linearize(skb))
1821 		goto out_kfree_skb;
1822 
1823 	/* If packet is not checksummed and device does not support
1824 	 * checksumming for this protocol, complete checksumming here.
1825 	 */
1826 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1827 		skb_set_transport_header(skb, skb->csum_start -
1828 					      skb_headroom(skb));
1829 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1830 			goto out_kfree_skb;
1831 	}
1832 
1833 gso:
1834 	/* Disable soft irqs for various locks below. Also
1835 	 * stops preemption for RCU.
1836 	 */
1837 	rcu_read_lock_bh();
1838 
1839 	txq = dev_pick_tx(dev, skb);
1840 	q = rcu_dereference(txq->qdisc);
1841 
1842 #ifdef CONFIG_NET_CLS_ACT
1843 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1844 #endif
1845 	if (q->enqueue) {
1846 		spinlock_t *root_lock = qdisc_lock(q);
1847 
1848 		spin_lock(root_lock);
1849 
1850 		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1851 			kfree_skb(skb);
1852 			rc = NET_XMIT_DROP;
1853 		} else {
1854 			rc = qdisc_enqueue_root(skb, q);
1855 			qdisc_run(q);
1856 		}
1857 		spin_unlock(root_lock);
1858 
1859 		goto out;
1860 	}
1861 
1862 	/* The device has no queue. Common case for software devices:
1863 	   loopback, all the sorts of tunnels...
1864 
1865 	   Really, it is unlikely that netif_tx_lock protection is necessary
1866 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1867 	   counters.)
1868 	   However, it is possible, that they rely on protection
1869 	   made by us here.
1870 
1871 	   Check this and shot the lock. It is not prone from deadlocks.
1872 	   Either shot noqueue qdisc, it is even simpler 8)
1873 	 */
1874 	if (dev->flags & IFF_UP) {
1875 		int cpu = smp_processor_id(); /* ok because BHs are off */
1876 
1877 		if (txq->xmit_lock_owner != cpu) {
1878 
1879 			HARD_TX_LOCK(dev, txq, cpu);
1880 
1881 			if (!netif_tx_queue_stopped(txq)) {
1882 				rc = 0;
1883 				if (!dev_hard_start_xmit(skb, dev, txq)) {
1884 					HARD_TX_UNLOCK(dev, txq);
1885 					goto out;
1886 				}
1887 			}
1888 			HARD_TX_UNLOCK(dev, txq);
1889 			if (net_ratelimit())
1890 				printk(KERN_CRIT "Virtual device %s asks to "
1891 				       "queue packet!\n", dev->name);
1892 		} else {
1893 			/* Recursion is detected! It is possible,
1894 			 * unfortunately */
1895 			if (net_ratelimit())
1896 				printk(KERN_CRIT "Dead loop on virtual device "
1897 				       "%s, fix it urgently!\n", dev->name);
1898 		}
1899 	}
1900 
1901 	rc = -ENETDOWN;
1902 	rcu_read_unlock_bh();
1903 
1904 out_kfree_skb:
1905 	kfree_skb(skb);
1906 	return rc;
1907 out:
1908 	rcu_read_unlock_bh();
1909 	return rc;
1910 }
1911 
1912 
1913 /*=======================================================================
1914 			Receiver routines
1915   =======================================================================*/
1916 
1917 int netdev_max_backlog __read_mostly = 1000;
1918 int netdev_budget __read_mostly = 300;
1919 int weight_p __read_mostly = 64;            /* old backlog weight */
1920 
1921 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1922 
1923 
1924 /**
1925  *	netif_rx	-	post buffer to the network code
1926  *	@skb: buffer to post
1927  *
1928  *	This function receives a packet from a device driver and queues it for
1929  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1930  *	may be dropped during processing for congestion control or by the
1931  *	protocol layers.
1932  *
1933  *	return values:
1934  *	NET_RX_SUCCESS	(no congestion)
1935  *	NET_RX_DROP     (packet was dropped)
1936  *
1937  */
1938 
1939 int netif_rx(struct sk_buff *skb)
1940 {
1941 	struct softnet_data *queue;
1942 	unsigned long flags;
1943 
1944 	/* if netpoll wants it, pretend we never saw it */
1945 	if (netpoll_rx(skb))
1946 		return NET_RX_DROP;
1947 
1948 	if (!skb->tstamp.tv64)
1949 		net_timestamp(skb);
1950 
1951 	/*
1952 	 * The code is rearranged so that the path is the most
1953 	 * short when CPU is congested, but is still operating.
1954 	 */
1955 	local_irq_save(flags);
1956 	queue = &__get_cpu_var(softnet_data);
1957 
1958 	__get_cpu_var(netdev_rx_stat).total++;
1959 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1960 		if (queue->input_pkt_queue.qlen) {
1961 enqueue:
1962 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1963 			local_irq_restore(flags);
1964 			return NET_RX_SUCCESS;
1965 		}
1966 
1967 		napi_schedule(&queue->backlog);
1968 		goto enqueue;
1969 	}
1970 
1971 	__get_cpu_var(netdev_rx_stat).dropped++;
1972 	local_irq_restore(flags);
1973 
1974 	kfree_skb(skb);
1975 	return NET_RX_DROP;
1976 }
1977 
1978 int netif_rx_ni(struct sk_buff *skb)
1979 {
1980 	int err;
1981 
1982 	preempt_disable();
1983 	err = netif_rx(skb);
1984 	if (local_softirq_pending())
1985 		do_softirq();
1986 	preempt_enable();
1987 
1988 	return err;
1989 }
1990 
1991 EXPORT_SYMBOL(netif_rx_ni);
1992 
1993 static void net_tx_action(struct softirq_action *h)
1994 {
1995 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1996 
1997 	if (sd->completion_queue) {
1998 		struct sk_buff *clist;
1999 
2000 		local_irq_disable();
2001 		clist = sd->completion_queue;
2002 		sd->completion_queue = NULL;
2003 		local_irq_enable();
2004 
2005 		while (clist) {
2006 			struct sk_buff *skb = clist;
2007 			clist = clist->next;
2008 
2009 			WARN_ON(atomic_read(&skb->users));
2010 			__kfree_skb(skb);
2011 		}
2012 	}
2013 
2014 	if (sd->output_queue) {
2015 		struct Qdisc *head;
2016 
2017 		local_irq_disable();
2018 		head = sd->output_queue;
2019 		sd->output_queue = NULL;
2020 		local_irq_enable();
2021 
2022 		while (head) {
2023 			struct Qdisc *q = head;
2024 			spinlock_t *root_lock;
2025 
2026 			head = head->next_sched;
2027 
2028 			root_lock = qdisc_lock(q);
2029 			if (spin_trylock(root_lock)) {
2030 				smp_mb__before_clear_bit();
2031 				clear_bit(__QDISC_STATE_SCHED,
2032 					  &q->state);
2033 				qdisc_run(q);
2034 				spin_unlock(root_lock);
2035 			} else {
2036 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2037 					      &q->state)) {
2038 					__netif_reschedule(q);
2039 				} else {
2040 					smp_mb__before_clear_bit();
2041 					clear_bit(__QDISC_STATE_SCHED,
2042 						  &q->state);
2043 				}
2044 			}
2045 		}
2046 	}
2047 }
2048 
2049 static inline int deliver_skb(struct sk_buff *skb,
2050 			      struct packet_type *pt_prev,
2051 			      struct net_device *orig_dev)
2052 {
2053 	atomic_inc(&skb->users);
2054 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2055 }
2056 
2057 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2058 /* These hooks defined here for ATM */
2059 struct net_bridge;
2060 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2061 						unsigned char *addr);
2062 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2063 
2064 /*
2065  * If bridge module is loaded call bridging hook.
2066  *  returns NULL if packet was consumed.
2067  */
2068 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2069 					struct sk_buff *skb) __read_mostly;
2070 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2071 					    struct packet_type **pt_prev, int *ret,
2072 					    struct net_device *orig_dev)
2073 {
2074 	struct net_bridge_port *port;
2075 
2076 	if (skb->pkt_type == PACKET_LOOPBACK ||
2077 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2078 		return skb;
2079 
2080 	if (*pt_prev) {
2081 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2082 		*pt_prev = NULL;
2083 	}
2084 
2085 	return br_handle_frame_hook(port, skb);
2086 }
2087 #else
2088 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2089 #endif
2090 
2091 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2092 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2093 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2094 
2095 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2096 					     struct packet_type **pt_prev,
2097 					     int *ret,
2098 					     struct net_device *orig_dev)
2099 {
2100 	if (skb->dev->macvlan_port == NULL)
2101 		return skb;
2102 
2103 	if (*pt_prev) {
2104 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2105 		*pt_prev = NULL;
2106 	}
2107 	return macvlan_handle_frame_hook(skb);
2108 }
2109 #else
2110 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2111 #endif
2112 
2113 #ifdef CONFIG_NET_CLS_ACT
2114 /* TODO: Maybe we should just force sch_ingress to be compiled in
2115  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2116  * a compare and 2 stores extra right now if we dont have it on
2117  * but have CONFIG_NET_CLS_ACT
2118  * NOTE: This doesnt stop any functionality; if you dont have
2119  * the ingress scheduler, you just cant add policies on ingress.
2120  *
2121  */
2122 static int ing_filter(struct sk_buff *skb)
2123 {
2124 	struct net_device *dev = skb->dev;
2125 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2126 	struct netdev_queue *rxq;
2127 	int result = TC_ACT_OK;
2128 	struct Qdisc *q;
2129 
2130 	if (MAX_RED_LOOP < ttl++) {
2131 		printk(KERN_WARNING
2132 		       "Redir loop detected Dropping packet (%d->%d)\n",
2133 		       skb->iif, dev->ifindex);
2134 		return TC_ACT_SHOT;
2135 	}
2136 
2137 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2138 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2139 
2140 	rxq = &dev->rx_queue;
2141 
2142 	q = rxq->qdisc;
2143 	if (q != &noop_qdisc) {
2144 		spin_lock(qdisc_lock(q));
2145 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2146 			result = qdisc_enqueue_root(skb, q);
2147 		spin_unlock(qdisc_lock(q));
2148 	}
2149 
2150 	return result;
2151 }
2152 
2153 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2154 					 struct packet_type **pt_prev,
2155 					 int *ret, struct net_device *orig_dev)
2156 {
2157 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2158 		goto out;
2159 
2160 	if (*pt_prev) {
2161 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2162 		*pt_prev = NULL;
2163 	} else {
2164 		/* Huh? Why does turning on AF_PACKET affect this? */
2165 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2166 	}
2167 
2168 	switch (ing_filter(skb)) {
2169 	case TC_ACT_SHOT:
2170 	case TC_ACT_STOLEN:
2171 		kfree_skb(skb);
2172 		return NULL;
2173 	}
2174 
2175 out:
2176 	skb->tc_verd = 0;
2177 	return skb;
2178 }
2179 #endif
2180 
2181 /*
2182  * 	netif_nit_deliver - deliver received packets to network taps
2183  * 	@skb: buffer
2184  *
2185  * 	This function is used to deliver incoming packets to network
2186  * 	taps. It should be used when the normal netif_receive_skb path
2187  * 	is bypassed, for example because of VLAN acceleration.
2188  */
2189 void netif_nit_deliver(struct sk_buff *skb)
2190 {
2191 	struct packet_type *ptype;
2192 
2193 	if (list_empty(&ptype_all))
2194 		return;
2195 
2196 	skb_reset_network_header(skb);
2197 	skb_reset_transport_header(skb);
2198 	skb->mac_len = skb->network_header - skb->mac_header;
2199 
2200 	rcu_read_lock();
2201 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2202 		if (!ptype->dev || ptype->dev == skb->dev)
2203 			deliver_skb(skb, ptype, skb->dev);
2204 	}
2205 	rcu_read_unlock();
2206 }
2207 
2208 /**
2209  *	netif_receive_skb - process receive buffer from network
2210  *	@skb: buffer to process
2211  *
2212  *	netif_receive_skb() is the main receive data processing function.
2213  *	It always succeeds. The buffer may be dropped during processing
2214  *	for congestion control or by the protocol layers.
2215  *
2216  *	This function may only be called from softirq context and interrupts
2217  *	should be enabled.
2218  *
2219  *	Return values (usually ignored):
2220  *	NET_RX_SUCCESS: no congestion
2221  *	NET_RX_DROP: packet was dropped
2222  */
2223 int netif_receive_skb(struct sk_buff *skb)
2224 {
2225 	struct packet_type *ptype, *pt_prev;
2226 	struct net_device *orig_dev;
2227 	struct net_device *null_or_orig;
2228 	int ret = NET_RX_DROP;
2229 	__be16 type;
2230 
2231 	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2232 		return NET_RX_SUCCESS;
2233 
2234 	/* if we've gotten here through NAPI, check netpoll */
2235 	if (netpoll_receive_skb(skb))
2236 		return NET_RX_DROP;
2237 
2238 	if (!skb->tstamp.tv64)
2239 		net_timestamp(skb);
2240 
2241 	if (!skb->iif)
2242 		skb->iif = skb->dev->ifindex;
2243 
2244 	null_or_orig = NULL;
2245 	orig_dev = skb->dev;
2246 	if (orig_dev->master) {
2247 		if (skb_bond_should_drop(skb))
2248 			null_or_orig = orig_dev; /* deliver only exact match */
2249 		else
2250 			skb->dev = orig_dev->master;
2251 	}
2252 
2253 	__get_cpu_var(netdev_rx_stat).total++;
2254 
2255 	skb_reset_network_header(skb);
2256 	skb_reset_transport_header(skb);
2257 	skb->mac_len = skb->network_header - skb->mac_header;
2258 
2259 	pt_prev = NULL;
2260 
2261 	rcu_read_lock();
2262 
2263 	/* Don't receive packets in an exiting network namespace */
2264 	if (!net_alive(dev_net(skb->dev))) {
2265 		kfree_skb(skb);
2266 		goto out;
2267 	}
2268 
2269 #ifdef CONFIG_NET_CLS_ACT
2270 	if (skb->tc_verd & TC_NCLS) {
2271 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2272 		goto ncls;
2273 	}
2274 #endif
2275 
2276 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2277 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2278 		    ptype->dev == orig_dev) {
2279 			if (pt_prev)
2280 				ret = deliver_skb(skb, pt_prev, orig_dev);
2281 			pt_prev = ptype;
2282 		}
2283 	}
2284 
2285 #ifdef CONFIG_NET_CLS_ACT
2286 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2287 	if (!skb)
2288 		goto out;
2289 ncls:
2290 #endif
2291 
2292 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2293 	if (!skb)
2294 		goto out;
2295 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2296 	if (!skb)
2297 		goto out;
2298 
2299 	type = skb->protocol;
2300 	list_for_each_entry_rcu(ptype,
2301 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2302 		if (ptype->type == type &&
2303 		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2304 		     ptype->dev == orig_dev)) {
2305 			if (pt_prev)
2306 				ret = deliver_skb(skb, pt_prev, orig_dev);
2307 			pt_prev = ptype;
2308 		}
2309 	}
2310 
2311 	if (pt_prev) {
2312 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2313 	} else {
2314 		kfree_skb(skb);
2315 		/* Jamal, now you will not able to escape explaining
2316 		 * me how you were going to use this. :-)
2317 		 */
2318 		ret = NET_RX_DROP;
2319 	}
2320 
2321 out:
2322 	rcu_read_unlock();
2323 	return ret;
2324 }
2325 
2326 /* Network device is going away, flush any packets still pending  */
2327 static void flush_backlog(void *arg)
2328 {
2329 	struct net_device *dev = arg;
2330 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2331 	struct sk_buff *skb, *tmp;
2332 
2333 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2334 		if (skb->dev == dev) {
2335 			__skb_unlink(skb, &queue->input_pkt_queue);
2336 			kfree_skb(skb);
2337 		}
2338 }
2339 
2340 static int process_backlog(struct napi_struct *napi, int quota)
2341 {
2342 	int work = 0;
2343 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2344 	unsigned long start_time = jiffies;
2345 
2346 	napi->weight = weight_p;
2347 	do {
2348 		struct sk_buff *skb;
2349 
2350 		local_irq_disable();
2351 		skb = __skb_dequeue(&queue->input_pkt_queue);
2352 		if (!skb) {
2353 			__napi_complete(napi);
2354 			local_irq_enable();
2355 			break;
2356 		}
2357 		local_irq_enable();
2358 
2359 		netif_receive_skb(skb);
2360 	} while (++work < quota && jiffies == start_time);
2361 
2362 	return work;
2363 }
2364 
2365 /**
2366  * __napi_schedule - schedule for receive
2367  * @n: entry to schedule
2368  *
2369  * The entry's receive function will be scheduled to run
2370  */
2371 void __napi_schedule(struct napi_struct *n)
2372 {
2373 	unsigned long flags;
2374 
2375 	local_irq_save(flags);
2376 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2377 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2378 	local_irq_restore(flags);
2379 }
2380 EXPORT_SYMBOL(__napi_schedule);
2381 
2382 
2383 static void net_rx_action(struct softirq_action *h)
2384 {
2385 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2386 	unsigned long time_limit = jiffies + 2;
2387 	int budget = netdev_budget;
2388 	void *have;
2389 
2390 	local_irq_disable();
2391 
2392 	while (!list_empty(list)) {
2393 		struct napi_struct *n;
2394 		int work, weight;
2395 
2396 		/* If softirq window is exhuasted then punt.
2397 		 * Allow this to run for 2 jiffies since which will allow
2398 		 * an average latency of 1.5/HZ.
2399 		 */
2400 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2401 			goto softnet_break;
2402 
2403 		local_irq_enable();
2404 
2405 		/* Even though interrupts have been re-enabled, this
2406 		 * access is safe because interrupts can only add new
2407 		 * entries to the tail of this list, and only ->poll()
2408 		 * calls can remove this head entry from the list.
2409 		 */
2410 		n = list_entry(list->next, struct napi_struct, poll_list);
2411 
2412 		have = netpoll_poll_lock(n);
2413 
2414 		weight = n->weight;
2415 
2416 		/* This NAPI_STATE_SCHED test is for avoiding a race
2417 		 * with netpoll's poll_napi().  Only the entity which
2418 		 * obtains the lock and sees NAPI_STATE_SCHED set will
2419 		 * actually make the ->poll() call.  Therefore we avoid
2420 		 * accidently calling ->poll() when NAPI is not scheduled.
2421 		 */
2422 		work = 0;
2423 		if (test_bit(NAPI_STATE_SCHED, &n->state))
2424 			work = n->poll(n, weight);
2425 
2426 		WARN_ON_ONCE(work > weight);
2427 
2428 		budget -= work;
2429 
2430 		local_irq_disable();
2431 
2432 		/* Drivers must not modify the NAPI state if they
2433 		 * consume the entire weight.  In such cases this code
2434 		 * still "owns" the NAPI instance and therefore can
2435 		 * move the instance around on the list at-will.
2436 		 */
2437 		if (unlikely(work == weight)) {
2438 			if (unlikely(napi_disable_pending(n)))
2439 				__napi_complete(n);
2440 			else
2441 				list_move_tail(&n->poll_list, list);
2442 		}
2443 
2444 		netpoll_poll_unlock(have);
2445 	}
2446 out:
2447 	local_irq_enable();
2448 
2449 #ifdef CONFIG_NET_DMA
2450 	/*
2451 	 * There may not be any more sk_buffs coming right now, so push
2452 	 * any pending DMA copies to hardware
2453 	 */
2454 	if (!cpus_empty(net_dma.channel_mask)) {
2455 		int chan_idx;
2456 		for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
2457 			struct dma_chan *chan = net_dma.channels[chan_idx];
2458 			if (chan)
2459 				dma_async_memcpy_issue_pending(chan);
2460 		}
2461 	}
2462 #endif
2463 
2464 	return;
2465 
2466 softnet_break:
2467 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2468 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2469 	goto out;
2470 }
2471 
2472 static gifconf_func_t * gifconf_list [NPROTO];
2473 
2474 /**
2475  *	register_gifconf	-	register a SIOCGIF handler
2476  *	@family: Address family
2477  *	@gifconf: Function handler
2478  *
2479  *	Register protocol dependent address dumping routines. The handler
2480  *	that is passed must not be freed or reused until it has been replaced
2481  *	by another handler.
2482  */
2483 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2484 {
2485 	if (family >= NPROTO)
2486 		return -EINVAL;
2487 	gifconf_list[family] = gifconf;
2488 	return 0;
2489 }
2490 
2491 
2492 /*
2493  *	Map an interface index to its name (SIOCGIFNAME)
2494  */
2495 
2496 /*
2497  *	We need this ioctl for efficient implementation of the
2498  *	if_indextoname() function required by the IPv6 API.  Without
2499  *	it, we would have to search all the interfaces to find a
2500  *	match.  --pb
2501  */
2502 
2503 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2504 {
2505 	struct net_device *dev;
2506 	struct ifreq ifr;
2507 
2508 	/*
2509 	 *	Fetch the caller's info block.
2510 	 */
2511 
2512 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2513 		return -EFAULT;
2514 
2515 	read_lock(&dev_base_lock);
2516 	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2517 	if (!dev) {
2518 		read_unlock(&dev_base_lock);
2519 		return -ENODEV;
2520 	}
2521 
2522 	strcpy(ifr.ifr_name, dev->name);
2523 	read_unlock(&dev_base_lock);
2524 
2525 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2526 		return -EFAULT;
2527 	return 0;
2528 }
2529 
2530 /*
2531  *	Perform a SIOCGIFCONF call. This structure will change
2532  *	size eventually, and there is nothing I can do about it.
2533  *	Thus we will need a 'compatibility mode'.
2534  */
2535 
2536 static int dev_ifconf(struct net *net, char __user *arg)
2537 {
2538 	struct ifconf ifc;
2539 	struct net_device *dev;
2540 	char __user *pos;
2541 	int len;
2542 	int total;
2543 	int i;
2544 
2545 	/*
2546 	 *	Fetch the caller's info block.
2547 	 */
2548 
2549 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2550 		return -EFAULT;
2551 
2552 	pos = ifc.ifc_buf;
2553 	len = ifc.ifc_len;
2554 
2555 	/*
2556 	 *	Loop over the interfaces, and write an info block for each.
2557 	 */
2558 
2559 	total = 0;
2560 	for_each_netdev(net, dev) {
2561 		for (i = 0; i < NPROTO; i++) {
2562 			if (gifconf_list[i]) {
2563 				int done;
2564 				if (!pos)
2565 					done = gifconf_list[i](dev, NULL, 0);
2566 				else
2567 					done = gifconf_list[i](dev, pos + total,
2568 							       len - total);
2569 				if (done < 0)
2570 					return -EFAULT;
2571 				total += done;
2572 			}
2573 		}
2574 	}
2575 
2576 	/*
2577 	 *	All done.  Write the updated control block back to the caller.
2578 	 */
2579 	ifc.ifc_len = total;
2580 
2581 	/*
2582 	 * 	Both BSD and Solaris return 0 here, so we do too.
2583 	 */
2584 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2585 }
2586 
2587 #ifdef CONFIG_PROC_FS
2588 /*
2589  *	This is invoked by the /proc filesystem handler to display a device
2590  *	in detail.
2591  */
2592 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2593 	__acquires(dev_base_lock)
2594 {
2595 	struct net *net = seq_file_net(seq);
2596 	loff_t off;
2597 	struct net_device *dev;
2598 
2599 	read_lock(&dev_base_lock);
2600 	if (!*pos)
2601 		return SEQ_START_TOKEN;
2602 
2603 	off = 1;
2604 	for_each_netdev(net, dev)
2605 		if (off++ == *pos)
2606 			return dev;
2607 
2608 	return NULL;
2609 }
2610 
2611 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2612 {
2613 	struct net *net = seq_file_net(seq);
2614 	++*pos;
2615 	return v == SEQ_START_TOKEN ?
2616 		first_net_device(net) : next_net_device((struct net_device *)v);
2617 }
2618 
2619 void dev_seq_stop(struct seq_file *seq, void *v)
2620 	__releases(dev_base_lock)
2621 {
2622 	read_unlock(&dev_base_lock);
2623 }
2624 
2625 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2626 {
2627 	const struct net_device_stats *stats = dev_get_stats(dev);
2628 
2629 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2630 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2631 		   dev->name, stats->rx_bytes, stats->rx_packets,
2632 		   stats->rx_errors,
2633 		   stats->rx_dropped + stats->rx_missed_errors,
2634 		   stats->rx_fifo_errors,
2635 		   stats->rx_length_errors + stats->rx_over_errors +
2636 		    stats->rx_crc_errors + stats->rx_frame_errors,
2637 		   stats->rx_compressed, stats->multicast,
2638 		   stats->tx_bytes, stats->tx_packets,
2639 		   stats->tx_errors, stats->tx_dropped,
2640 		   stats->tx_fifo_errors, stats->collisions,
2641 		   stats->tx_carrier_errors +
2642 		    stats->tx_aborted_errors +
2643 		    stats->tx_window_errors +
2644 		    stats->tx_heartbeat_errors,
2645 		   stats->tx_compressed);
2646 }
2647 
2648 /*
2649  *	Called from the PROCfs module. This now uses the new arbitrary sized
2650  *	/proc/net interface to create /proc/net/dev
2651  */
2652 static int dev_seq_show(struct seq_file *seq, void *v)
2653 {
2654 	if (v == SEQ_START_TOKEN)
2655 		seq_puts(seq, "Inter-|   Receive                            "
2656 			      "                    |  Transmit\n"
2657 			      " face |bytes    packets errs drop fifo frame "
2658 			      "compressed multicast|bytes    packets errs "
2659 			      "drop fifo colls carrier compressed\n");
2660 	else
2661 		dev_seq_printf_stats(seq, v);
2662 	return 0;
2663 }
2664 
2665 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2666 {
2667 	struct netif_rx_stats *rc = NULL;
2668 
2669 	while (*pos < nr_cpu_ids)
2670 		if (cpu_online(*pos)) {
2671 			rc = &per_cpu(netdev_rx_stat, *pos);
2672 			break;
2673 		} else
2674 			++*pos;
2675 	return rc;
2676 }
2677 
2678 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2679 {
2680 	return softnet_get_online(pos);
2681 }
2682 
2683 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2684 {
2685 	++*pos;
2686 	return softnet_get_online(pos);
2687 }
2688 
2689 static void softnet_seq_stop(struct seq_file *seq, void *v)
2690 {
2691 }
2692 
2693 static int softnet_seq_show(struct seq_file *seq, void *v)
2694 {
2695 	struct netif_rx_stats *s = v;
2696 
2697 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2698 		   s->total, s->dropped, s->time_squeeze, 0,
2699 		   0, 0, 0, 0, /* was fastroute */
2700 		   s->cpu_collision );
2701 	return 0;
2702 }
2703 
2704 static const struct seq_operations dev_seq_ops = {
2705 	.start = dev_seq_start,
2706 	.next  = dev_seq_next,
2707 	.stop  = dev_seq_stop,
2708 	.show  = dev_seq_show,
2709 };
2710 
2711 static int dev_seq_open(struct inode *inode, struct file *file)
2712 {
2713 	return seq_open_net(inode, file, &dev_seq_ops,
2714 			    sizeof(struct seq_net_private));
2715 }
2716 
2717 static const struct file_operations dev_seq_fops = {
2718 	.owner	 = THIS_MODULE,
2719 	.open    = dev_seq_open,
2720 	.read    = seq_read,
2721 	.llseek  = seq_lseek,
2722 	.release = seq_release_net,
2723 };
2724 
2725 static const struct seq_operations softnet_seq_ops = {
2726 	.start = softnet_seq_start,
2727 	.next  = softnet_seq_next,
2728 	.stop  = softnet_seq_stop,
2729 	.show  = softnet_seq_show,
2730 };
2731 
2732 static int softnet_seq_open(struct inode *inode, struct file *file)
2733 {
2734 	return seq_open(file, &softnet_seq_ops);
2735 }
2736 
2737 static const struct file_operations softnet_seq_fops = {
2738 	.owner	 = THIS_MODULE,
2739 	.open    = softnet_seq_open,
2740 	.read    = seq_read,
2741 	.llseek  = seq_lseek,
2742 	.release = seq_release,
2743 };
2744 
2745 static void *ptype_get_idx(loff_t pos)
2746 {
2747 	struct packet_type *pt = NULL;
2748 	loff_t i = 0;
2749 	int t;
2750 
2751 	list_for_each_entry_rcu(pt, &ptype_all, list) {
2752 		if (i == pos)
2753 			return pt;
2754 		++i;
2755 	}
2756 
2757 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2758 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2759 			if (i == pos)
2760 				return pt;
2761 			++i;
2762 		}
2763 	}
2764 	return NULL;
2765 }
2766 
2767 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2768 	__acquires(RCU)
2769 {
2770 	rcu_read_lock();
2771 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2772 }
2773 
2774 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2775 {
2776 	struct packet_type *pt;
2777 	struct list_head *nxt;
2778 	int hash;
2779 
2780 	++*pos;
2781 	if (v == SEQ_START_TOKEN)
2782 		return ptype_get_idx(0);
2783 
2784 	pt = v;
2785 	nxt = pt->list.next;
2786 	if (pt->type == htons(ETH_P_ALL)) {
2787 		if (nxt != &ptype_all)
2788 			goto found;
2789 		hash = 0;
2790 		nxt = ptype_base[0].next;
2791 	} else
2792 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2793 
2794 	while (nxt == &ptype_base[hash]) {
2795 		if (++hash >= PTYPE_HASH_SIZE)
2796 			return NULL;
2797 		nxt = ptype_base[hash].next;
2798 	}
2799 found:
2800 	return list_entry(nxt, struct packet_type, list);
2801 }
2802 
2803 static void ptype_seq_stop(struct seq_file *seq, void *v)
2804 	__releases(RCU)
2805 {
2806 	rcu_read_unlock();
2807 }
2808 
2809 static int ptype_seq_show(struct seq_file *seq, void *v)
2810 {
2811 	struct packet_type *pt = v;
2812 
2813 	if (v == SEQ_START_TOKEN)
2814 		seq_puts(seq, "Type Device      Function\n");
2815 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2816 		if (pt->type == htons(ETH_P_ALL))
2817 			seq_puts(seq, "ALL ");
2818 		else
2819 			seq_printf(seq, "%04x", ntohs(pt->type));
2820 
2821 		seq_printf(seq, " %-8s %pF\n",
2822 			   pt->dev ? pt->dev->name : "", pt->func);
2823 	}
2824 
2825 	return 0;
2826 }
2827 
2828 static const struct seq_operations ptype_seq_ops = {
2829 	.start = ptype_seq_start,
2830 	.next  = ptype_seq_next,
2831 	.stop  = ptype_seq_stop,
2832 	.show  = ptype_seq_show,
2833 };
2834 
2835 static int ptype_seq_open(struct inode *inode, struct file *file)
2836 {
2837 	return seq_open_net(inode, file, &ptype_seq_ops,
2838 			sizeof(struct seq_net_private));
2839 }
2840 
2841 static const struct file_operations ptype_seq_fops = {
2842 	.owner	 = THIS_MODULE,
2843 	.open    = ptype_seq_open,
2844 	.read    = seq_read,
2845 	.llseek  = seq_lseek,
2846 	.release = seq_release_net,
2847 };
2848 
2849 
2850 static int __net_init dev_proc_net_init(struct net *net)
2851 {
2852 	int rc = -ENOMEM;
2853 
2854 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2855 		goto out;
2856 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2857 		goto out_dev;
2858 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2859 		goto out_softnet;
2860 
2861 	if (wext_proc_init(net))
2862 		goto out_ptype;
2863 	rc = 0;
2864 out:
2865 	return rc;
2866 out_ptype:
2867 	proc_net_remove(net, "ptype");
2868 out_softnet:
2869 	proc_net_remove(net, "softnet_stat");
2870 out_dev:
2871 	proc_net_remove(net, "dev");
2872 	goto out;
2873 }
2874 
2875 static void __net_exit dev_proc_net_exit(struct net *net)
2876 {
2877 	wext_proc_exit(net);
2878 
2879 	proc_net_remove(net, "ptype");
2880 	proc_net_remove(net, "softnet_stat");
2881 	proc_net_remove(net, "dev");
2882 }
2883 
2884 static struct pernet_operations __net_initdata dev_proc_ops = {
2885 	.init = dev_proc_net_init,
2886 	.exit = dev_proc_net_exit,
2887 };
2888 
2889 static int __init dev_proc_init(void)
2890 {
2891 	return register_pernet_subsys(&dev_proc_ops);
2892 }
2893 #else
2894 #define dev_proc_init() 0
2895 #endif	/* CONFIG_PROC_FS */
2896 
2897 
2898 /**
2899  *	netdev_set_master	-	set up master/slave pair
2900  *	@slave: slave device
2901  *	@master: new master device
2902  *
2903  *	Changes the master device of the slave. Pass %NULL to break the
2904  *	bonding. The caller must hold the RTNL semaphore. On a failure
2905  *	a negative errno code is returned. On success the reference counts
2906  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2907  *	function returns zero.
2908  */
2909 int netdev_set_master(struct net_device *slave, struct net_device *master)
2910 {
2911 	struct net_device *old = slave->master;
2912 
2913 	ASSERT_RTNL();
2914 
2915 	if (master) {
2916 		if (old)
2917 			return -EBUSY;
2918 		dev_hold(master);
2919 	}
2920 
2921 	slave->master = master;
2922 
2923 	synchronize_net();
2924 
2925 	if (old)
2926 		dev_put(old);
2927 
2928 	if (master)
2929 		slave->flags |= IFF_SLAVE;
2930 	else
2931 		slave->flags &= ~IFF_SLAVE;
2932 
2933 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2934 	return 0;
2935 }
2936 
2937 static void dev_change_rx_flags(struct net_device *dev, int flags)
2938 {
2939 	const struct net_device_ops *ops = dev->netdev_ops;
2940 
2941 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
2942 		ops->ndo_change_rx_flags(dev, flags);
2943 }
2944 
2945 static int __dev_set_promiscuity(struct net_device *dev, int inc)
2946 {
2947 	unsigned short old_flags = dev->flags;
2948 
2949 	ASSERT_RTNL();
2950 
2951 	dev->flags |= IFF_PROMISC;
2952 	dev->promiscuity += inc;
2953 	if (dev->promiscuity == 0) {
2954 		/*
2955 		 * Avoid overflow.
2956 		 * If inc causes overflow, untouch promisc and return error.
2957 		 */
2958 		if (inc < 0)
2959 			dev->flags &= ~IFF_PROMISC;
2960 		else {
2961 			dev->promiscuity -= inc;
2962 			printk(KERN_WARNING "%s: promiscuity touches roof, "
2963 				"set promiscuity failed, promiscuity feature "
2964 				"of device might be broken.\n", dev->name);
2965 			return -EOVERFLOW;
2966 		}
2967 	}
2968 	if (dev->flags != old_flags) {
2969 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2970 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2971 							       "left");
2972 		if (audit_enabled)
2973 			audit_log(current->audit_context, GFP_ATOMIC,
2974 				AUDIT_ANOM_PROMISCUOUS,
2975 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2976 				dev->name, (dev->flags & IFF_PROMISC),
2977 				(old_flags & IFF_PROMISC),
2978 				audit_get_loginuid(current),
2979 				current->uid, current->gid,
2980 				audit_get_sessionid(current));
2981 
2982 		dev_change_rx_flags(dev, IFF_PROMISC);
2983 	}
2984 	return 0;
2985 }
2986 
2987 /**
2988  *	dev_set_promiscuity	- update promiscuity count on a device
2989  *	@dev: device
2990  *	@inc: modifier
2991  *
2992  *	Add or remove promiscuity from a device. While the count in the device
2993  *	remains above zero the interface remains promiscuous. Once it hits zero
2994  *	the device reverts back to normal filtering operation. A negative inc
2995  *	value is used to drop promiscuity on the device.
2996  *	Return 0 if successful or a negative errno code on error.
2997  */
2998 int dev_set_promiscuity(struct net_device *dev, int inc)
2999 {
3000 	unsigned short old_flags = dev->flags;
3001 	int err;
3002 
3003 	err = __dev_set_promiscuity(dev, inc);
3004 	if (err < 0)
3005 		return err;
3006 	if (dev->flags != old_flags)
3007 		dev_set_rx_mode(dev);
3008 	return err;
3009 }
3010 
3011 /**
3012  *	dev_set_allmulti	- update allmulti count on a device
3013  *	@dev: device
3014  *	@inc: modifier
3015  *
3016  *	Add or remove reception of all multicast frames to a device. While the
3017  *	count in the device remains above zero the interface remains listening
3018  *	to all interfaces. Once it hits zero the device reverts back to normal
3019  *	filtering operation. A negative @inc value is used to drop the counter
3020  *	when releasing a resource needing all multicasts.
3021  *	Return 0 if successful or a negative errno code on error.
3022  */
3023 
3024 int dev_set_allmulti(struct net_device *dev, int inc)
3025 {
3026 	unsigned short old_flags = dev->flags;
3027 
3028 	ASSERT_RTNL();
3029 
3030 	dev->flags |= IFF_ALLMULTI;
3031 	dev->allmulti += inc;
3032 	if (dev->allmulti == 0) {
3033 		/*
3034 		 * Avoid overflow.
3035 		 * If inc causes overflow, untouch allmulti and return error.
3036 		 */
3037 		if (inc < 0)
3038 			dev->flags &= ~IFF_ALLMULTI;
3039 		else {
3040 			dev->allmulti -= inc;
3041 			printk(KERN_WARNING "%s: allmulti touches roof, "
3042 				"set allmulti failed, allmulti feature of "
3043 				"device might be broken.\n", dev->name);
3044 			return -EOVERFLOW;
3045 		}
3046 	}
3047 	if (dev->flags ^ old_flags) {
3048 		dev_change_rx_flags(dev, IFF_ALLMULTI);
3049 		dev_set_rx_mode(dev);
3050 	}
3051 	return 0;
3052 }
3053 
3054 /*
3055  *	Upload unicast and multicast address lists to device and
3056  *	configure RX filtering. When the device doesn't support unicast
3057  *	filtering it is put in promiscuous mode while unicast addresses
3058  *	are present.
3059  */
3060 void __dev_set_rx_mode(struct net_device *dev)
3061 {
3062 	const struct net_device_ops *ops = dev->netdev_ops;
3063 
3064 	/* dev_open will call this function so the list will stay sane. */
3065 	if (!(dev->flags&IFF_UP))
3066 		return;
3067 
3068 	if (!netif_device_present(dev))
3069 		return;
3070 
3071 	if (ops->ndo_set_rx_mode)
3072 		ops->ndo_set_rx_mode(dev);
3073 	else {
3074 		/* Unicast addresses changes may only happen under the rtnl,
3075 		 * therefore calling __dev_set_promiscuity here is safe.
3076 		 */
3077 		if (dev->uc_count > 0 && !dev->uc_promisc) {
3078 			__dev_set_promiscuity(dev, 1);
3079 			dev->uc_promisc = 1;
3080 		} else if (dev->uc_count == 0 && dev->uc_promisc) {
3081 			__dev_set_promiscuity(dev, -1);
3082 			dev->uc_promisc = 0;
3083 		}
3084 
3085 		if (ops->ndo_set_multicast_list)
3086 			ops->ndo_set_multicast_list(dev);
3087 	}
3088 }
3089 
3090 void dev_set_rx_mode(struct net_device *dev)
3091 {
3092 	netif_addr_lock_bh(dev);
3093 	__dev_set_rx_mode(dev);
3094 	netif_addr_unlock_bh(dev);
3095 }
3096 
3097 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3098 		      void *addr, int alen, int glbl)
3099 {
3100 	struct dev_addr_list *da;
3101 
3102 	for (; (da = *list) != NULL; list = &da->next) {
3103 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3104 		    alen == da->da_addrlen) {
3105 			if (glbl) {
3106 				int old_glbl = da->da_gusers;
3107 				da->da_gusers = 0;
3108 				if (old_glbl == 0)
3109 					break;
3110 			}
3111 			if (--da->da_users)
3112 				return 0;
3113 
3114 			*list = da->next;
3115 			kfree(da);
3116 			(*count)--;
3117 			return 0;
3118 		}
3119 	}
3120 	return -ENOENT;
3121 }
3122 
3123 int __dev_addr_add(struct dev_addr_list **list, int *count,
3124 		   void *addr, int alen, int glbl)
3125 {
3126 	struct dev_addr_list *da;
3127 
3128 	for (da = *list; da != NULL; da = da->next) {
3129 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3130 		    da->da_addrlen == alen) {
3131 			if (glbl) {
3132 				int old_glbl = da->da_gusers;
3133 				da->da_gusers = 1;
3134 				if (old_glbl)
3135 					return 0;
3136 			}
3137 			da->da_users++;
3138 			return 0;
3139 		}
3140 	}
3141 
3142 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
3143 	if (da == NULL)
3144 		return -ENOMEM;
3145 	memcpy(da->da_addr, addr, alen);
3146 	da->da_addrlen = alen;
3147 	da->da_users = 1;
3148 	da->da_gusers = glbl ? 1 : 0;
3149 	da->next = *list;
3150 	*list = da;
3151 	(*count)++;
3152 	return 0;
3153 }
3154 
3155 /**
3156  *	dev_unicast_delete	- Release secondary unicast address.
3157  *	@dev: device
3158  *	@addr: address to delete
3159  *	@alen: length of @addr
3160  *
3161  *	Release reference to a secondary unicast address and remove it
3162  *	from the device if the reference count drops to zero.
3163  *
3164  * 	The caller must hold the rtnl_mutex.
3165  */
3166 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3167 {
3168 	int err;
3169 
3170 	ASSERT_RTNL();
3171 
3172 	netif_addr_lock_bh(dev);
3173 	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3174 	if (!err)
3175 		__dev_set_rx_mode(dev);
3176 	netif_addr_unlock_bh(dev);
3177 	return err;
3178 }
3179 EXPORT_SYMBOL(dev_unicast_delete);
3180 
3181 /**
3182  *	dev_unicast_add		- add a secondary unicast address
3183  *	@dev: device
3184  *	@addr: address to add
3185  *	@alen: length of @addr
3186  *
3187  *	Add a secondary unicast address to the device or increase
3188  *	the reference count if it already exists.
3189  *
3190  *	The caller must hold the rtnl_mutex.
3191  */
3192 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3193 {
3194 	int err;
3195 
3196 	ASSERT_RTNL();
3197 
3198 	netif_addr_lock_bh(dev);
3199 	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3200 	if (!err)
3201 		__dev_set_rx_mode(dev);
3202 	netif_addr_unlock_bh(dev);
3203 	return err;
3204 }
3205 EXPORT_SYMBOL(dev_unicast_add);
3206 
3207 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3208 		    struct dev_addr_list **from, int *from_count)
3209 {
3210 	struct dev_addr_list *da, *next;
3211 	int err = 0;
3212 
3213 	da = *from;
3214 	while (da != NULL) {
3215 		next = da->next;
3216 		if (!da->da_synced) {
3217 			err = __dev_addr_add(to, to_count,
3218 					     da->da_addr, da->da_addrlen, 0);
3219 			if (err < 0)
3220 				break;
3221 			da->da_synced = 1;
3222 			da->da_users++;
3223 		} else if (da->da_users == 1) {
3224 			__dev_addr_delete(to, to_count,
3225 					  da->da_addr, da->da_addrlen, 0);
3226 			__dev_addr_delete(from, from_count,
3227 					  da->da_addr, da->da_addrlen, 0);
3228 		}
3229 		da = next;
3230 	}
3231 	return err;
3232 }
3233 
3234 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3235 		       struct dev_addr_list **from, int *from_count)
3236 {
3237 	struct dev_addr_list *da, *next;
3238 
3239 	da = *from;
3240 	while (da != NULL) {
3241 		next = da->next;
3242 		if (da->da_synced) {
3243 			__dev_addr_delete(to, to_count,
3244 					  da->da_addr, da->da_addrlen, 0);
3245 			da->da_synced = 0;
3246 			__dev_addr_delete(from, from_count,
3247 					  da->da_addr, da->da_addrlen, 0);
3248 		}
3249 		da = next;
3250 	}
3251 }
3252 
3253 /**
3254  *	dev_unicast_sync - Synchronize device's unicast list to another device
3255  *	@to: destination device
3256  *	@from: source device
3257  *
3258  *	Add newly added addresses to the destination device and release
3259  *	addresses that have no users left. The source device must be
3260  *	locked by netif_tx_lock_bh.
3261  *
3262  *	This function is intended to be called from the dev->set_rx_mode
3263  *	function of layered software devices.
3264  */
3265 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3266 {
3267 	int err = 0;
3268 
3269 	netif_addr_lock_bh(to);
3270 	err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3271 			      &from->uc_list, &from->uc_count);
3272 	if (!err)
3273 		__dev_set_rx_mode(to);
3274 	netif_addr_unlock_bh(to);
3275 	return err;
3276 }
3277 EXPORT_SYMBOL(dev_unicast_sync);
3278 
3279 /**
3280  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
3281  *	@to: destination device
3282  *	@from: source device
3283  *
3284  *	Remove all addresses that were added to the destination device by
3285  *	dev_unicast_sync(). This function is intended to be called from the
3286  *	dev->stop function of layered software devices.
3287  */
3288 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3289 {
3290 	netif_addr_lock_bh(from);
3291 	netif_addr_lock(to);
3292 
3293 	__dev_addr_unsync(&to->uc_list, &to->uc_count,
3294 			  &from->uc_list, &from->uc_count);
3295 	__dev_set_rx_mode(to);
3296 
3297 	netif_addr_unlock(to);
3298 	netif_addr_unlock_bh(from);
3299 }
3300 EXPORT_SYMBOL(dev_unicast_unsync);
3301 
3302 static void __dev_addr_discard(struct dev_addr_list **list)
3303 {
3304 	struct dev_addr_list *tmp;
3305 
3306 	while (*list != NULL) {
3307 		tmp = *list;
3308 		*list = tmp->next;
3309 		if (tmp->da_users > tmp->da_gusers)
3310 			printk("__dev_addr_discard: address leakage! "
3311 			       "da_users=%d\n", tmp->da_users);
3312 		kfree(tmp);
3313 	}
3314 }
3315 
3316 static void dev_addr_discard(struct net_device *dev)
3317 {
3318 	netif_addr_lock_bh(dev);
3319 
3320 	__dev_addr_discard(&dev->uc_list);
3321 	dev->uc_count = 0;
3322 
3323 	__dev_addr_discard(&dev->mc_list);
3324 	dev->mc_count = 0;
3325 
3326 	netif_addr_unlock_bh(dev);
3327 }
3328 
3329 /**
3330  *	dev_get_flags - get flags reported to userspace
3331  *	@dev: device
3332  *
3333  *	Get the combination of flag bits exported through APIs to userspace.
3334  */
3335 unsigned dev_get_flags(const struct net_device *dev)
3336 {
3337 	unsigned flags;
3338 
3339 	flags = (dev->flags & ~(IFF_PROMISC |
3340 				IFF_ALLMULTI |
3341 				IFF_RUNNING |
3342 				IFF_LOWER_UP |
3343 				IFF_DORMANT)) |
3344 		(dev->gflags & (IFF_PROMISC |
3345 				IFF_ALLMULTI));
3346 
3347 	if (netif_running(dev)) {
3348 		if (netif_oper_up(dev))
3349 			flags |= IFF_RUNNING;
3350 		if (netif_carrier_ok(dev))
3351 			flags |= IFF_LOWER_UP;
3352 		if (netif_dormant(dev))
3353 			flags |= IFF_DORMANT;
3354 	}
3355 
3356 	return flags;
3357 }
3358 
3359 /**
3360  *	dev_change_flags - change device settings
3361  *	@dev: device
3362  *	@flags: device state flags
3363  *
3364  *	Change settings on device based state flags. The flags are
3365  *	in the userspace exported format.
3366  */
3367 int dev_change_flags(struct net_device *dev, unsigned flags)
3368 {
3369 	int ret, changes;
3370 	int old_flags = dev->flags;
3371 
3372 	ASSERT_RTNL();
3373 
3374 	/*
3375 	 *	Set the flags on our device.
3376 	 */
3377 
3378 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3379 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3380 			       IFF_AUTOMEDIA)) |
3381 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3382 				    IFF_ALLMULTI));
3383 
3384 	/*
3385 	 *	Load in the correct multicast list now the flags have changed.
3386 	 */
3387 
3388 	if ((old_flags ^ flags) & IFF_MULTICAST)
3389 		dev_change_rx_flags(dev, IFF_MULTICAST);
3390 
3391 	dev_set_rx_mode(dev);
3392 
3393 	/*
3394 	 *	Have we downed the interface. We handle IFF_UP ourselves
3395 	 *	according to user attempts to set it, rather than blindly
3396 	 *	setting it.
3397 	 */
3398 
3399 	ret = 0;
3400 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
3401 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3402 
3403 		if (!ret)
3404 			dev_set_rx_mode(dev);
3405 	}
3406 
3407 	if (dev->flags & IFF_UP &&
3408 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3409 					  IFF_VOLATILE)))
3410 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
3411 
3412 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
3413 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
3414 		dev->gflags ^= IFF_PROMISC;
3415 		dev_set_promiscuity(dev, inc);
3416 	}
3417 
3418 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3419 	   is important. Some (broken) drivers set IFF_PROMISC, when
3420 	   IFF_ALLMULTI is requested not asking us and not reporting.
3421 	 */
3422 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3423 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3424 		dev->gflags ^= IFF_ALLMULTI;
3425 		dev_set_allmulti(dev, inc);
3426 	}
3427 
3428 	/* Exclude state transition flags, already notified */
3429 	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3430 	if (changes)
3431 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3432 
3433 	return ret;
3434 }
3435 
3436 /**
3437  *	dev_set_mtu - Change maximum transfer unit
3438  *	@dev: device
3439  *	@new_mtu: new transfer unit
3440  *
3441  *	Change the maximum transfer size of the network device.
3442  */
3443 int dev_set_mtu(struct net_device *dev, int new_mtu)
3444 {
3445 	const struct net_device_ops *ops = dev->netdev_ops;
3446 	int err;
3447 
3448 	if (new_mtu == dev->mtu)
3449 		return 0;
3450 
3451 	/*	MTU must be positive.	 */
3452 	if (new_mtu < 0)
3453 		return -EINVAL;
3454 
3455 	if (!netif_device_present(dev))
3456 		return -ENODEV;
3457 
3458 	err = 0;
3459 	if (ops->ndo_change_mtu)
3460 		err = ops->ndo_change_mtu(dev, new_mtu);
3461 	else
3462 		dev->mtu = new_mtu;
3463 
3464 	if (!err && dev->flags & IFF_UP)
3465 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3466 	return err;
3467 }
3468 
3469 /**
3470  *	dev_set_mac_address - Change Media Access Control Address
3471  *	@dev: device
3472  *	@sa: new address
3473  *
3474  *	Change the hardware (MAC) address of the device
3475  */
3476 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3477 {
3478 	const struct net_device_ops *ops = dev->netdev_ops;
3479 	int err;
3480 
3481 	if (!ops->ndo_set_mac_address)
3482 		return -EOPNOTSUPP;
3483 	if (sa->sa_family != dev->type)
3484 		return -EINVAL;
3485 	if (!netif_device_present(dev))
3486 		return -ENODEV;
3487 	err = ops->ndo_set_mac_address(dev, sa);
3488 	if (!err)
3489 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3490 	return err;
3491 }
3492 
3493 /*
3494  *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3495  */
3496 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3497 {
3498 	int err;
3499 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3500 
3501 	if (!dev)
3502 		return -ENODEV;
3503 
3504 	switch (cmd) {
3505 		case SIOCGIFFLAGS:	/* Get interface flags */
3506 			ifr->ifr_flags = dev_get_flags(dev);
3507 			return 0;
3508 
3509 		case SIOCGIFMETRIC:	/* Get the metric on the interface
3510 					   (currently unused) */
3511 			ifr->ifr_metric = 0;
3512 			return 0;
3513 
3514 		case SIOCGIFMTU:	/* Get the MTU of a device */
3515 			ifr->ifr_mtu = dev->mtu;
3516 			return 0;
3517 
3518 		case SIOCGIFHWADDR:
3519 			if (!dev->addr_len)
3520 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3521 			else
3522 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3523 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3524 			ifr->ifr_hwaddr.sa_family = dev->type;
3525 			return 0;
3526 
3527 		case SIOCGIFSLAVE:
3528 			err = -EINVAL;
3529 			break;
3530 
3531 		case SIOCGIFMAP:
3532 			ifr->ifr_map.mem_start = dev->mem_start;
3533 			ifr->ifr_map.mem_end   = dev->mem_end;
3534 			ifr->ifr_map.base_addr = dev->base_addr;
3535 			ifr->ifr_map.irq       = dev->irq;
3536 			ifr->ifr_map.dma       = dev->dma;
3537 			ifr->ifr_map.port      = dev->if_port;
3538 			return 0;
3539 
3540 		case SIOCGIFINDEX:
3541 			ifr->ifr_ifindex = dev->ifindex;
3542 			return 0;
3543 
3544 		case SIOCGIFTXQLEN:
3545 			ifr->ifr_qlen = dev->tx_queue_len;
3546 			return 0;
3547 
3548 		default:
3549 			/* dev_ioctl() should ensure this case
3550 			 * is never reached
3551 			 */
3552 			WARN_ON(1);
3553 			err = -EINVAL;
3554 			break;
3555 
3556 	}
3557 	return err;
3558 }
3559 
3560 /*
3561  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
3562  */
3563 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3564 {
3565 	int err;
3566 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3567 	const struct net_device_ops *ops = dev->netdev_ops;
3568 
3569 	if (!dev)
3570 		return -ENODEV;
3571 
3572 	switch (cmd) {
3573 		case SIOCSIFFLAGS:	/* Set interface flags */
3574 			return dev_change_flags(dev, ifr->ifr_flags);
3575 
3576 		case SIOCSIFMETRIC:	/* Set the metric on the interface
3577 					   (currently unused) */
3578 			return -EOPNOTSUPP;
3579 
3580 		case SIOCSIFMTU:	/* Set the MTU of a device */
3581 			return dev_set_mtu(dev, ifr->ifr_mtu);
3582 
3583 		case SIOCSIFHWADDR:
3584 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3585 
3586 		case SIOCSIFHWBROADCAST:
3587 			if (ifr->ifr_hwaddr.sa_family != dev->type)
3588 				return -EINVAL;
3589 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3590 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3591 			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3592 			return 0;
3593 
3594 		case SIOCSIFMAP:
3595 			if (ops->ndo_set_config) {
3596 				if (!netif_device_present(dev))
3597 					return -ENODEV;
3598 				return ops->ndo_set_config(dev, &ifr->ifr_map);
3599 			}
3600 			return -EOPNOTSUPP;
3601 
3602 		case SIOCADDMULTI:
3603 			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3604 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3605 				return -EINVAL;
3606 			if (!netif_device_present(dev))
3607 				return -ENODEV;
3608 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3609 					  dev->addr_len, 1);
3610 
3611 		case SIOCDELMULTI:
3612 			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3613 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3614 				return -EINVAL;
3615 			if (!netif_device_present(dev))
3616 				return -ENODEV;
3617 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3618 					     dev->addr_len, 1);
3619 
3620 		case SIOCSIFTXQLEN:
3621 			if (ifr->ifr_qlen < 0)
3622 				return -EINVAL;
3623 			dev->tx_queue_len = ifr->ifr_qlen;
3624 			return 0;
3625 
3626 		case SIOCSIFNAME:
3627 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3628 			return dev_change_name(dev, ifr->ifr_newname);
3629 
3630 		/*
3631 		 *	Unknown or private ioctl
3632 		 */
3633 
3634 		default:
3635 			if ((cmd >= SIOCDEVPRIVATE &&
3636 			    cmd <= SIOCDEVPRIVATE + 15) ||
3637 			    cmd == SIOCBONDENSLAVE ||
3638 			    cmd == SIOCBONDRELEASE ||
3639 			    cmd == SIOCBONDSETHWADDR ||
3640 			    cmd == SIOCBONDSLAVEINFOQUERY ||
3641 			    cmd == SIOCBONDINFOQUERY ||
3642 			    cmd == SIOCBONDCHANGEACTIVE ||
3643 			    cmd == SIOCGMIIPHY ||
3644 			    cmd == SIOCGMIIREG ||
3645 			    cmd == SIOCSMIIREG ||
3646 			    cmd == SIOCBRADDIF ||
3647 			    cmd == SIOCBRDELIF ||
3648 			    cmd == SIOCWANDEV) {
3649 				err = -EOPNOTSUPP;
3650 				if (ops->ndo_do_ioctl) {
3651 					if (netif_device_present(dev))
3652 						err = ops->ndo_do_ioctl(dev, ifr, cmd);
3653 					else
3654 						err = -ENODEV;
3655 				}
3656 			} else
3657 				err = -EINVAL;
3658 
3659 	}
3660 	return err;
3661 }
3662 
3663 /*
3664  *	This function handles all "interface"-type I/O control requests. The actual
3665  *	'doing' part of this is dev_ifsioc above.
3666  */
3667 
3668 /**
3669  *	dev_ioctl	-	network device ioctl
3670  *	@net: the applicable net namespace
3671  *	@cmd: command to issue
3672  *	@arg: pointer to a struct ifreq in user space
3673  *
3674  *	Issue ioctl functions to devices. This is normally called by the
3675  *	user space syscall interfaces but can sometimes be useful for
3676  *	other purposes. The return value is the return from the syscall if
3677  *	positive or a negative errno code on error.
3678  */
3679 
3680 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3681 {
3682 	struct ifreq ifr;
3683 	int ret;
3684 	char *colon;
3685 
3686 	/* One special case: SIOCGIFCONF takes ifconf argument
3687 	   and requires shared lock, because it sleeps writing
3688 	   to user space.
3689 	 */
3690 
3691 	if (cmd == SIOCGIFCONF) {
3692 		rtnl_lock();
3693 		ret = dev_ifconf(net, (char __user *) arg);
3694 		rtnl_unlock();
3695 		return ret;
3696 	}
3697 	if (cmd == SIOCGIFNAME)
3698 		return dev_ifname(net, (struct ifreq __user *)arg);
3699 
3700 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3701 		return -EFAULT;
3702 
3703 	ifr.ifr_name[IFNAMSIZ-1] = 0;
3704 
3705 	colon = strchr(ifr.ifr_name, ':');
3706 	if (colon)
3707 		*colon = 0;
3708 
3709 	/*
3710 	 *	See which interface the caller is talking about.
3711 	 */
3712 
3713 	switch (cmd) {
3714 		/*
3715 		 *	These ioctl calls:
3716 		 *	- can be done by all.
3717 		 *	- atomic and do not require locking.
3718 		 *	- return a value
3719 		 */
3720 		case SIOCGIFFLAGS:
3721 		case SIOCGIFMETRIC:
3722 		case SIOCGIFMTU:
3723 		case SIOCGIFHWADDR:
3724 		case SIOCGIFSLAVE:
3725 		case SIOCGIFMAP:
3726 		case SIOCGIFINDEX:
3727 		case SIOCGIFTXQLEN:
3728 			dev_load(net, ifr.ifr_name);
3729 			read_lock(&dev_base_lock);
3730 			ret = dev_ifsioc_locked(net, &ifr, cmd);
3731 			read_unlock(&dev_base_lock);
3732 			if (!ret) {
3733 				if (colon)
3734 					*colon = ':';
3735 				if (copy_to_user(arg, &ifr,
3736 						 sizeof(struct ifreq)))
3737 					ret = -EFAULT;
3738 			}
3739 			return ret;
3740 
3741 		case SIOCETHTOOL:
3742 			dev_load(net, ifr.ifr_name);
3743 			rtnl_lock();
3744 			ret = dev_ethtool(net, &ifr);
3745 			rtnl_unlock();
3746 			if (!ret) {
3747 				if (colon)
3748 					*colon = ':';
3749 				if (copy_to_user(arg, &ifr,
3750 						 sizeof(struct ifreq)))
3751 					ret = -EFAULT;
3752 			}
3753 			return ret;
3754 
3755 		/*
3756 		 *	These ioctl calls:
3757 		 *	- require superuser power.
3758 		 *	- require strict serialization.
3759 		 *	- return a value
3760 		 */
3761 		case SIOCGMIIPHY:
3762 		case SIOCGMIIREG:
3763 		case SIOCSIFNAME:
3764 			if (!capable(CAP_NET_ADMIN))
3765 				return -EPERM;
3766 			dev_load(net, ifr.ifr_name);
3767 			rtnl_lock();
3768 			ret = dev_ifsioc(net, &ifr, cmd);
3769 			rtnl_unlock();
3770 			if (!ret) {
3771 				if (colon)
3772 					*colon = ':';
3773 				if (copy_to_user(arg, &ifr,
3774 						 sizeof(struct ifreq)))
3775 					ret = -EFAULT;
3776 			}
3777 			return ret;
3778 
3779 		/*
3780 		 *	These ioctl calls:
3781 		 *	- require superuser power.
3782 		 *	- require strict serialization.
3783 		 *	- do not return a value
3784 		 */
3785 		case SIOCSIFFLAGS:
3786 		case SIOCSIFMETRIC:
3787 		case SIOCSIFMTU:
3788 		case SIOCSIFMAP:
3789 		case SIOCSIFHWADDR:
3790 		case SIOCSIFSLAVE:
3791 		case SIOCADDMULTI:
3792 		case SIOCDELMULTI:
3793 		case SIOCSIFHWBROADCAST:
3794 		case SIOCSIFTXQLEN:
3795 		case SIOCSMIIREG:
3796 		case SIOCBONDENSLAVE:
3797 		case SIOCBONDRELEASE:
3798 		case SIOCBONDSETHWADDR:
3799 		case SIOCBONDCHANGEACTIVE:
3800 		case SIOCBRADDIF:
3801 		case SIOCBRDELIF:
3802 			if (!capable(CAP_NET_ADMIN))
3803 				return -EPERM;
3804 			/* fall through */
3805 		case SIOCBONDSLAVEINFOQUERY:
3806 		case SIOCBONDINFOQUERY:
3807 			dev_load(net, ifr.ifr_name);
3808 			rtnl_lock();
3809 			ret = dev_ifsioc(net, &ifr, cmd);
3810 			rtnl_unlock();
3811 			return ret;
3812 
3813 		case SIOCGIFMEM:
3814 			/* Get the per device memory space. We can add this but
3815 			 * currently do not support it */
3816 		case SIOCSIFMEM:
3817 			/* Set the per device memory buffer space.
3818 			 * Not applicable in our case */
3819 		case SIOCSIFLINK:
3820 			return -EINVAL;
3821 
3822 		/*
3823 		 *	Unknown or private ioctl.
3824 		 */
3825 		default:
3826 			if (cmd == SIOCWANDEV ||
3827 			    (cmd >= SIOCDEVPRIVATE &&
3828 			     cmd <= SIOCDEVPRIVATE + 15)) {
3829 				dev_load(net, ifr.ifr_name);
3830 				rtnl_lock();
3831 				ret = dev_ifsioc(net, &ifr, cmd);
3832 				rtnl_unlock();
3833 				if (!ret && copy_to_user(arg, &ifr,
3834 							 sizeof(struct ifreq)))
3835 					ret = -EFAULT;
3836 				return ret;
3837 			}
3838 			/* Take care of Wireless Extensions */
3839 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3840 				return wext_handle_ioctl(net, &ifr, cmd, arg);
3841 			return -EINVAL;
3842 	}
3843 }
3844 
3845 
3846 /**
3847  *	dev_new_index	-	allocate an ifindex
3848  *	@net: the applicable net namespace
3849  *
3850  *	Returns a suitable unique value for a new device interface
3851  *	number.  The caller must hold the rtnl semaphore or the
3852  *	dev_base_lock to be sure it remains unique.
3853  */
3854 static int dev_new_index(struct net *net)
3855 {
3856 	static int ifindex;
3857 	for (;;) {
3858 		if (++ifindex <= 0)
3859 			ifindex = 1;
3860 		if (!__dev_get_by_index(net, ifindex))
3861 			return ifindex;
3862 	}
3863 }
3864 
3865 /* Delayed registration/unregisteration */
3866 static LIST_HEAD(net_todo_list);
3867 
3868 static void net_set_todo(struct net_device *dev)
3869 {
3870 	list_add_tail(&dev->todo_list, &net_todo_list);
3871 }
3872 
3873 static void rollback_registered(struct net_device *dev)
3874 {
3875 	BUG_ON(dev_boot_phase);
3876 	ASSERT_RTNL();
3877 
3878 	/* Some devices call without registering for initialization unwind. */
3879 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3880 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3881 				  "was registered\n", dev->name, dev);
3882 
3883 		WARN_ON(1);
3884 		return;
3885 	}
3886 
3887 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3888 
3889 	/* If device is running, close it first. */
3890 	dev_close(dev);
3891 
3892 	/* And unlink it from device chain. */
3893 	unlist_netdevice(dev);
3894 
3895 	dev->reg_state = NETREG_UNREGISTERING;
3896 
3897 	synchronize_net();
3898 
3899 	/* Shutdown queueing discipline. */
3900 	dev_shutdown(dev);
3901 
3902 
3903 	/* Notify protocols, that we are about to destroy
3904 	   this device. They should clean all the things.
3905 	*/
3906 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3907 
3908 	/*
3909 	 *	Flush the unicast and multicast chains
3910 	 */
3911 	dev_addr_discard(dev);
3912 
3913 	if (dev->netdev_ops->ndo_uninit)
3914 		dev->netdev_ops->ndo_uninit(dev);
3915 
3916 	/* Notifier chain MUST detach us from master device. */
3917 	WARN_ON(dev->master);
3918 
3919 	/* Remove entries from kobject tree */
3920 	netdev_unregister_kobject(dev);
3921 
3922 	synchronize_net();
3923 
3924 	dev_put(dev);
3925 }
3926 
3927 static void __netdev_init_queue_locks_one(struct net_device *dev,
3928 					  struct netdev_queue *dev_queue,
3929 					  void *_unused)
3930 {
3931 	spin_lock_init(&dev_queue->_xmit_lock);
3932 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
3933 	dev_queue->xmit_lock_owner = -1;
3934 }
3935 
3936 static void netdev_init_queue_locks(struct net_device *dev)
3937 {
3938 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3939 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
3940 }
3941 
3942 unsigned long netdev_fix_features(unsigned long features, const char *name)
3943 {
3944 	/* Fix illegal SG+CSUM combinations. */
3945 	if ((features & NETIF_F_SG) &&
3946 	    !(features & NETIF_F_ALL_CSUM)) {
3947 		if (name)
3948 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
3949 			       "checksum feature.\n", name);
3950 		features &= ~NETIF_F_SG;
3951 	}
3952 
3953 	/* TSO requires that SG is present as well. */
3954 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
3955 		if (name)
3956 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
3957 			       "SG feature.\n", name);
3958 		features &= ~NETIF_F_TSO;
3959 	}
3960 
3961 	if (features & NETIF_F_UFO) {
3962 		if (!(features & NETIF_F_GEN_CSUM)) {
3963 			if (name)
3964 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3965 				       "since no NETIF_F_HW_CSUM feature.\n",
3966 				       name);
3967 			features &= ~NETIF_F_UFO;
3968 		}
3969 
3970 		if (!(features & NETIF_F_SG)) {
3971 			if (name)
3972 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3973 				       "since no NETIF_F_SG feature.\n", name);
3974 			features &= ~NETIF_F_UFO;
3975 		}
3976 	}
3977 
3978 	return features;
3979 }
3980 EXPORT_SYMBOL(netdev_fix_features);
3981 
3982 /**
3983  *	register_netdevice	- register a network device
3984  *	@dev: device to register
3985  *
3986  *	Take a completed network device structure and add it to the kernel
3987  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3988  *	chain. 0 is returned on success. A negative errno code is returned
3989  *	on a failure to set up the device, or if the name is a duplicate.
3990  *
3991  *	Callers must hold the rtnl semaphore. You may want
3992  *	register_netdev() instead of this.
3993  *
3994  *	BUGS:
3995  *	The locking appears insufficient to guarantee two parallel registers
3996  *	will not get the same name.
3997  */
3998 
3999 int register_netdevice(struct net_device *dev)
4000 {
4001 	struct hlist_head *head;
4002 	struct hlist_node *p;
4003 	int ret;
4004 	struct net *net = dev_net(dev);
4005 
4006 	BUG_ON(dev_boot_phase);
4007 	ASSERT_RTNL();
4008 
4009 	might_sleep();
4010 
4011 	/* When net_device's are persistent, this will be fatal. */
4012 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4013 	BUG_ON(!net);
4014 
4015 	spin_lock_init(&dev->addr_list_lock);
4016 	netdev_set_addr_lockdep_class(dev);
4017 	netdev_init_queue_locks(dev);
4018 
4019 	dev->iflink = -1;
4020 
4021 #ifdef CONFIG_COMPAT_NET_DEV_OPS
4022 	/* Netdevice_ops API compatiability support.
4023 	 * This is temporary until all network devices are converted.
4024 	 */
4025 	if (dev->netdev_ops) {
4026 		const struct net_device_ops *ops = dev->netdev_ops;
4027 
4028 		dev->init = ops->ndo_init;
4029 		dev->uninit = ops->ndo_uninit;
4030 		dev->open = ops->ndo_open;
4031 		dev->change_rx_flags = ops->ndo_change_rx_flags;
4032 		dev->set_rx_mode = ops->ndo_set_rx_mode;
4033 		dev->set_multicast_list = ops->ndo_set_multicast_list;
4034 		dev->set_mac_address = ops->ndo_set_mac_address;
4035 		dev->validate_addr = ops->ndo_validate_addr;
4036 		dev->do_ioctl = ops->ndo_do_ioctl;
4037 		dev->set_config = ops->ndo_set_config;
4038 		dev->change_mtu = ops->ndo_change_mtu;
4039 		dev->tx_timeout = ops->ndo_tx_timeout;
4040 		dev->get_stats = ops->ndo_get_stats;
4041 		dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4042 		dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4043 		dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4044 #ifdef CONFIG_NET_POLL_CONTROLLER
4045 		dev->poll_controller = ops->ndo_poll_controller;
4046 #endif
4047 	} else {
4048 		char drivername[64];
4049 		pr_info("%s (%s): not using net_device_ops yet\n",
4050 			dev->name, netdev_drivername(dev, drivername, 64));
4051 
4052 		/* This works only because net_device_ops and the
4053 		   compatiablity structure are the same. */
4054 		dev->netdev_ops = (void *) &(dev->init);
4055 	}
4056 #endif
4057 
4058 	/* Init, if this function is available */
4059 	if (dev->netdev_ops->ndo_init) {
4060 		ret = dev->netdev_ops->ndo_init(dev);
4061 		if (ret) {
4062 			if (ret > 0)
4063 				ret = -EIO;
4064 			goto out;
4065 		}
4066 	}
4067 
4068 	if (!dev_valid_name(dev->name)) {
4069 		ret = -EINVAL;
4070 		goto err_uninit;
4071 	}
4072 
4073 	dev->ifindex = dev_new_index(net);
4074 	if (dev->iflink == -1)
4075 		dev->iflink = dev->ifindex;
4076 
4077 	/* Check for existence of name */
4078 	head = dev_name_hash(net, dev->name);
4079 	hlist_for_each(p, head) {
4080 		struct net_device *d
4081 			= hlist_entry(p, struct net_device, name_hlist);
4082 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4083 			ret = -EEXIST;
4084 			goto err_uninit;
4085 		}
4086 	}
4087 
4088 	/* Fix illegal checksum combinations */
4089 	if ((dev->features & NETIF_F_HW_CSUM) &&
4090 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4091 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4092 		       dev->name);
4093 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4094 	}
4095 
4096 	if ((dev->features & NETIF_F_NO_CSUM) &&
4097 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4098 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4099 		       dev->name);
4100 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4101 	}
4102 
4103 	dev->features = netdev_fix_features(dev->features, dev->name);
4104 
4105 	/* Enable software GSO if SG is supported. */
4106 	if (dev->features & NETIF_F_SG)
4107 		dev->features |= NETIF_F_GSO;
4108 
4109 	netdev_initialize_kobject(dev);
4110 	ret = netdev_register_kobject(dev);
4111 	if (ret)
4112 		goto err_uninit;
4113 	dev->reg_state = NETREG_REGISTERED;
4114 
4115 	/*
4116 	 *	Default initial state at registry is that the
4117 	 *	device is present.
4118 	 */
4119 
4120 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4121 
4122 	dev_init_scheduler(dev);
4123 	dev_hold(dev);
4124 	list_netdevice(dev);
4125 
4126 	/* Notify protocols, that a new device appeared. */
4127 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4128 	ret = notifier_to_errno(ret);
4129 	if (ret) {
4130 		rollback_registered(dev);
4131 		dev->reg_state = NETREG_UNREGISTERED;
4132 	}
4133 
4134 out:
4135 	return ret;
4136 
4137 err_uninit:
4138 	if (dev->netdev_ops->ndo_uninit)
4139 		dev->netdev_ops->ndo_uninit(dev);
4140 	goto out;
4141 }
4142 
4143 /**
4144  *	register_netdev	- register a network device
4145  *	@dev: device to register
4146  *
4147  *	Take a completed network device structure and add it to the kernel
4148  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4149  *	chain. 0 is returned on success. A negative errno code is returned
4150  *	on a failure to set up the device, or if the name is a duplicate.
4151  *
4152  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
4153  *	and expands the device name if you passed a format string to
4154  *	alloc_netdev.
4155  */
4156 int register_netdev(struct net_device *dev)
4157 {
4158 	int err;
4159 
4160 	rtnl_lock();
4161 
4162 	/*
4163 	 * If the name is a format string the caller wants us to do a
4164 	 * name allocation.
4165 	 */
4166 	if (strchr(dev->name, '%')) {
4167 		err = dev_alloc_name(dev, dev->name);
4168 		if (err < 0)
4169 			goto out;
4170 	}
4171 
4172 	err = register_netdevice(dev);
4173 out:
4174 	rtnl_unlock();
4175 	return err;
4176 }
4177 EXPORT_SYMBOL(register_netdev);
4178 
4179 /*
4180  * netdev_wait_allrefs - wait until all references are gone.
4181  *
4182  * This is called when unregistering network devices.
4183  *
4184  * Any protocol or device that holds a reference should register
4185  * for netdevice notification, and cleanup and put back the
4186  * reference if they receive an UNREGISTER event.
4187  * We can get stuck here if buggy protocols don't correctly
4188  * call dev_put.
4189  */
4190 static void netdev_wait_allrefs(struct net_device *dev)
4191 {
4192 	unsigned long rebroadcast_time, warning_time;
4193 
4194 	rebroadcast_time = warning_time = jiffies;
4195 	while (atomic_read(&dev->refcnt) != 0) {
4196 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4197 			rtnl_lock();
4198 
4199 			/* Rebroadcast unregister notification */
4200 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4201 
4202 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4203 				     &dev->state)) {
4204 				/* We must not have linkwatch events
4205 				 * pending on unregister. If this
4206 				 * happens, we simply run the queue
4207 				 * unscheduled, resulting in a noop
4208 				 * for this device.
4209 				 */
4210 				linkwatch_run_queue();
4211 			}
4212 
4213 			__rtnl_unlock();
4214 
4215 			rebroadcast_time = jiffies;
4216 		}
4217 
4218 		msleep(250);
4219 
4220 		if (time_after(jiffies, warning_time + 10 * HZ)) {
4221 			printk(KERN_EMERG "unregister_netdevice: "
4222 			       "waiting for %s to become free. Usage "
4223 			       "count = %d\n",
4224 			       dev->name, atomic_read(&dev->refcnt));
4225 			warning_time = jiffies;
4226 		}
4227 	}
4228 }
4229 
4230 /* The sequence is:
4231  *
4232  *	rtnl_lock();
4233  *	...
4234  *	register_netdevice(x1);
4235  *	register_netdevice(x2);
4236  *	...
4237  *	unregister_netdevice(y1);
4238  *	unregister_netdevice(y2);
4239  *      ...
4240  *	rtnl_unlock();
4241  *	free_netdev(y1);
4242  *	free_netdev(y2);
4243  *
4244  * We are invoked by rtnl_unlock().
4245  * This allows us to deal with problems:
4246  * 1) We can delete sysfs objects which invoke hotplug
4247  *    without deadlocking with linkwatch via keventd.
4248  * 2) Since we run with the RTNL semaphore not held, we can sleep
4249  *    safely in order to wait for the netdev refcnt to drop to zero.
4250  *
4251  * We must not return until all unregister events added during
4252  * the interval the lock was held have been completed.
4253  */
4254 void netdev_run_todo(void)
4255 {
4256 	struct list_head list;
4257 
4258 	/* Snapshot list, allow later requests */
4259 	list_replace_init(&net_todo_list, &list);
4260 
4261 	__rtnl_unlock();
4262 
4263 	while (!list_empty(&list)) {
4264 		struct net_device *dev
4265 			= list_entry(list.next, struct net_device, todo_list);
4266 		list_del(&dev->todo_list);
4267 
4268 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4269 			printk(KERN_ERR "network todo '%s' but state %d\n",
4270 			       dev->name, dev->reg_state);
4271 			dump_stack();
4272 			continue;
4273 		}
4274 
4275 		dev->reg_state = NETREG_UNREGISTERED;
4276 
4277 		on_each_cpu(flush_backlog, dev, 1);
4278 
4279 		netdev_wait_allrefs(dev);
4280 
4281 		/* paranoia */
4282 		BUG_ON(atomic_read(&dev->refcnt));
4283 		WARN_ON(dev->ip_ptr);
4284 		WARN_ON(dev->ip6_ptr);
4285 		WARN_ON(dev->dn_ptr);
4286 
4287 		if (dev->destructor)
4288 			dev->destructor(dev);
4289 
4290 		/* Free network device */
4291 		kobject_put(&dev->dev.kobj);
4292 	}
4293 }
4294 
4295 /**
4296  *	dev_get_stats	- get network device statistics
4297  *	@dev: device to get statistics from
4298  *
4299  *	Get network statistics from device. The device driver may provide
4300  *	its own method by setting dev->netdev_ops->get_stats; otherwise
4301  *	the internal statistics structure is used.
4302  */
4303 const struct net_device_stats *dev_get_stats(struct net_device *dev)
4304  {
4305 	const struct net_device_ops *ops = dev->netdev_ops;
4306 
4307 	if (ops->ndo_get_stats)
4308 		return ops->ndo_get_stats(dev);
4309 	else
4310 		return &dev->stats;
4311 }
4312 EXPORT_SYMBOL(dev_get_stats);
4313 
4314 static void netdev_init_one_queue(struct net_device *dev,
4315 				  struct netdev_queue *queue,
4316 				  void *_unused)
4317 {
4318 	queue->dev = dev;
4319 }
4320 
4321 static void netdev_init_queues(struct net_device *dev)
4322 {
4323 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4324 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4325 	spin_lock_init(&dev->tx_global_lock);
4326 }
4327 
4328 /**
4329  *	alloc_netdev_mq - allocate network device
4330  *	@sizeof_priv:	size of private data to allocate space for
4331  *	@name:		device name format string
4332  *	@setup:		callback to initialize device
4333  *	@queue_count:	the number of subqueues to allocate
4334  *
4335  *	Allocates a struct net_device with private data area for driver use
4336  *	and performs basic initialization.  Also allocates subquue structs
4337  *	for each queue on the device at the end of the netdevice.
4338  */
4339 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4340 		void (*setup)(struct net_device *), unsigned int queue_count)
4341 {
4342 	struct netdev_queue *tx;
4343 	struct net_device *dev;
4344 	size_t alloc_size;
4345 	void *p;
4346 
4347 	BUG_ON(strlen(name) >= sizeof(dev->name));
4348 
4349 	alloc_size = sizeof(struct net_device);
4350 	if (sizeof_priv) {
4351 		/* ensure 32-byte alignment of private area */
4352 		alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4353 		alloc_size += sizeof_priv;
4354 	}
4355 	/* ensure 32-byte alignment of whole construct */
4356 	alloc_size += NETDEV_ALIGN_CONST;
4357 
4358 	p = kzalloc(alloc_size, GFP_KERNEL);
4359 	if (!p) {
4360 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4361 		return NULL;
4362 	}
4363 
4364 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4365 	if (!tx) {
4366 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
4367 		       "tx qdiscs.\n");
4368 		kfree(p);
4369 		return NULL;
4370 	}
4371 
4372 	dev = (struct net_device *)
4373 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4374 	dev->padded = (char *)dev - (char *)p;
4375 	dev_net_set(dev, &init_net);
4376 
4377 	dev->_tx = tx;
4378 	dev->num_tx_queues = queue_count;
4379 	dev->real_num_tx_queues = queue_count;
4380 
4381 	if (sizeof_priv) {
4382 		dev->priv = ((char *)dev +
4383 			     ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
4384 			      & ~NETDEV_ALIGN_CONST));
4385 	}
4386 
4387 	dev->gso_max_size = GSO_MAX_SIZE;
4388 
4389 	netdev_init_queues(dev);
4390 
4391 	netpoll_netdev_init(dev);
4392 	setup(dev);
4393 	strcpy(dev->name, name);
4394 	return dev;
4395 }
4396 EXPORT_SYMBOL(alloc_netdev_mq);
4397 
4398 /**
4399  *	free_netdev - free network device
4400  *	@dev: device
4401  *
4402  *	This function does the last stage of destroying an allocated device
4403  * 	interface. The reference to the device object is released.
4404  *	If this is the last reference then it will be freed.
4405  */
4406 void free_netdev(struct net_device *dev)
4407 {
4408 	release_net(dev_net(dev));
4409 
4410 	kfree(dev->_tx);
4411 
4412 	/*  Compatibility with error handling in drivers */
4413 	if (dev->reg_state == NETREG_UNINITIALIZED) {
4414 		kfree((char *)dev - dev->padded);
4415 		return;
4416 	}
4417 
4418 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4419 	dev->reg_state = NETREG_RELEASED;
4420 
4421 	/* will free via device release */
4422 	put_device(&dev->dev);
4423 }
4424 
4425 /**
4426  *	synchronize_net -  Synchronize with packet receive processing
4427  *
4428  *	Wait for packets currently being received to be done.
4429  *	Does not block later packets from starting.
4430  */
4431 void synchronize_net(void)
4432 {
4433 	might_sleep();
4434 	synchronize_rcu();
4435 }
4436 
4437 /**
4438  *	unregister_netdevice - remove device from the kernel
4439  *	@dev: device
4440  *
4441  *	This function shuts down a device interface and removes it
4442  *	from the kernel tables.
4443  *
4444  *	Callers must hold the rtnl semaphore.  You may want
4445  *	unregister_netdev() instead of this.
4446  */
4447 
4448 void unregister_netdevice(struct net_device *dev)
4449 {
4450 	ASSERT_RTNL();
4451 
4452 	rollback_registered(dev);
4453 	/* Finish processing unregister after unlock */
4454 	net_set_todo(dev);
4455 }
4456 
4457 /**
4458  *	unregister_netdev - remove device from the kernel
4459  *	@dev: device
4460  *
4461  *	This function shuts down a device interface and removes it
4462  *	from the kernel tables.
4463  *
4464  *	This is just a wrapper for unregister_netdevice that takes
4465  *	the rtnl semaphore.  In general you want to use this and not
4466  *	unregister_netdevice.
4467  */
4468 void unregister_netdev(struct net_device *dev)
4469 {
4470 	rtnl_lock();
4471 	unregister_netdevice(dev);
4472 	rtnl_unlock();
4473 }
4474 
4475 EXPORT_SYMBOL(unregister_netdev);
4476 
4477 /**
4478  *	dev_change_net_namespace - move device to different nethost namespace
4479  *	@dev: device
4480  *	@net: network namespace
4481  *	@pat: If not NULL name pattern to try if the current device name
4482  *	      is already taken in the destination network namespace.
4483  *
4484  *	This function shuts down a device interface and moves it
4485  *	to a new network namespace. On success 0 is returned, on
4486  *	a failure a netagive errno code is returned.
4487  *
4488  *	Callers must hold the rtnl semaphore.
4489  */
4490 
4491 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4492 {
4493 	char buf[IFNAMSIZ];
4494 	const char *destname;
4495 	int err;
4496 
4497 	ASSERT_RTNL();
4498 
4499 	/* Don't allow namespace local devices to be moved. */
4500 	err = -EINVAL;
4501 	if (dev->features & NETIF_F_NETNS_LOCAL)
4502 		goto out;
4503 
4504 #ifdef CONFIG_SYSFS
4505 	/* Don't allow real devices to be moved when sysfs
4506 	 * is enabled.
4507 	 */
4508 	err = -EINVAL;
4509 	if (dev->dev.parent)
4510 		goto out;
4511 #endif
4512 
4513 	/* Ensure the device has been registrered */
4514 	err = -EINVAL;
4515 	if (dev->reg_state != NETREG_REGISTERED)
4516 		goto out;
4517 
4518 	/* Get out if there is nothing todo */
4519 	err = 0;
4520 	if (net_eq(dev_net(dev), net))
4521 		goto out;
4522 
4523 	/* Pick the destination device name, and ensure
4524 	 * we can use it in the destination network namespace.
4525 	 */
4526 	err = -EEXIST;
4527 	destname = dev->name;
4528 	if (__dev_get_by_name(net, destname)) {
4529 		/* We get here if we can't use the current device name */
4530 		if (!pat)
4531 			goto out;
4532 		if (!dev_valid_name(pat))
4533 			goto out;
4534 		if (strchr(pat, '%')) {
4535 			if (__dev_alloc_name(net, pat, buf) < 0)
4536 				goto out;
4537 			destname = buf;
4538 		} else
4539 			destname = pat;
4540 		if (__dev_get_by_name(net, destname))
4541 			goto out;
4542 	}
4543 
4544 	/*
4545 	 * And now a mini version of register_netdevice unregister_netdevice.
4546 	 */
4547 
4548 	/* If device is running close it first. */
4549 	dev_close(dev);
4550 
4551 	/* And unlink it from device chain */
4552 	err = -ENODEV;
4553 	unlist_netdevice(dev);
4554 
4555 	synchronize_net();
4556 
4557 	/* Shutdown queueing discipline. */
4558 	dev_shutdown(dev);
4559 
4560 	/* Notify protocols, that we are about to destroy
4561 	   this device. They should clean all the things.
4562 	*/
4563 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4564 
4565 	/*
4566 	 *	Flush the unicast and multicast chains
4567 	 */
4568 	dev_addr_discard(dev);
4569 
4570 	netdev_unregister_kobject(dev);
4571 
4572 	/* Actually switch the network namespace */
4573 	dev_net_set(dev, net);
4574 
4575 	/* Assign the new device name */
4576 	if (destname != dev->name)
4577 		strcpy(dev->name, destname);
4578 
4579 	/* If there is an ifindex conflict assign a new one */
4580 	if (__dev_get_by_index(net, dev->ifindex)) {
4581 		int iflink = (dev->iflink == dev->ifindex);
4582 		dev->ifindex = dev_new_index(net);
4583 		if (iflink)
4584 			dev->iflink = dev->ifindex;
4585 	}
4586 
4587 	/* Fixup kobjects */
4588 	err = netdev_register_kobject(dev);
4589 	WARN_ON(err);
4590 
4591 	/* Add the device back in the hashes */
4592 	list_netdevice(dev);
4593 
4594 	/* Notify protocols, that a new device appeared. */
4595 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
4596 
4597 	synchronize_net();
4598 	err = 0;
4599 out:
4600 	return err;
4601 }
4602 
4603 static int dev_cpu_callback(struct notifier_block *nfb,
4604 			    unsigned long action,
4605 			    void *ocpu)
4606 {
4607 	struct sk_buff **list_skb;
4608 	struct Qdisc **list_net;
4609 	struct sk_buff *skb;
4610 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
4611 	struct softnet_data *sd, *oldsd;
4612 
4613 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4614 		return NOTIFY_OK;
4615 
4616 	local_irq_disable();
4617 	cpu = smp_processor_id();
4618 	sd = &per_cpu(softnet_data, cpu);
4619 	oldsd = &per_cpu(softnet_data, oldcpu);
4620 
4621 	/* Find end of our completion_queue. */
4622 	list_skb = &sd->completion_queue;
4623 	while (*list_skb)
4624 		list_skb = &(*list_skb)->next;
4625 	/* Append completion queue from offline CPU. */
4626 	*list_skb = oldsd->completion_queue;
4627 	oldsd->completion_queue = NULL;
4628 
4629 	/* Find end of our output_queue. */
4630 	list_net = &sd->output_queue;
4631 	while (*list_net)
4632 		list_net = &(*list_net)->next_sched;
4633 	/* Append output queue from offline CPU. */
4634 	*list_net = oldsd->output_queue;
4635 	oldsd->output_queue = NULL;
4636 
4637 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
4638 	local_irq_enable();
4639 
4640 	/* Process offline CPU's input_pkt_queue */
4641 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4642 		netif_rx(skb);
4643 
4644 	return NOTIFY_OK;
4645 }
4646 
4647 #ifdef CONFIG_NET_DMA
4648 /**
4649  * net_dma_rebalance - try to maintain one DMA channel per CPU
4650  * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4651  *
4652  * This is called when the number of channels allocated to the net_dma client
4653  * changes.  The net_dma client tries to have one DMA channel per CPU.
4654  */
4655 
4656 static void net_dma_rebalance(struct net_dma *net_dma)
4657 {
4658 	unsigned int cpu, i, n, chan_idx;
4659 	struct dma_chan *chan;
4660 
4661 	if (cpus_empty(net_dma->channel_mask)) {
4662 		for_each_online_cpu(cpu)
4663 			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4664 		return;
4665 	}
4666 
4667 	i = 0;
4668 	cpu = first_cpu(cpu_online_map);
4669 
4670 	for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
4671 		chan = net_dma->channels[chan_idx];
4672 
4673 		n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4674 		   + (i < (num_online_cpus() %
4675 			cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4676 
4677 		while(n) {
4678 			per_cpu(softnet_data, cpu).net_dma = chan;
4679 			cpu = next_cpu(cpu, cpu_online_map);
4680 			n--;
4681 		}
4682 		i++;
4683 	}
4684 }
4685 
4686 /**
4687  * netdev_dma_event - event callback for the net_dma_client
4688  * @client: should always be net_dma_client
4689  * @chan: DMA channel for the event
4690  * @state: DMA state to be handled
4691  */
4692 static enum dma_state_client
4693 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4694 	enum dma_state state)
4695 {
4696 	int i, found = 0, pos = -1;
4697 	struct net_dma *net_dma =
4698 		container_of(client, struct net_dma, client);
4699 	enum dma_state_client ack = DMA_DUP; /* default: take no action */
4700 
4701 	spin_lock(&net_dma->lock);
4702 	switch (state) {
4703 	case DMA_RESOURCE_AVAILABLE:
4704 		for (i = 0; i < nr_cpu_ids; i++)
4705 			if (net_dma->channels[i] == chan) {
4706 				found = 1;
4707 				break;
4708 			} else if (net_dma->channels[i] == NULL && pos < 0)
4709 				pos = i;
4710 
4711 		if (!found && pos >= 0) {
4712 			ack = DMA_ACK;
4713 			net_dma->channels[pos] = chan;
4714 			cpu_set(pos, net_dma->channel_mask);
4715 			net_dma_rebalance(net_dma);
4716 		}
4717 		break;
4718 	case DMA_RESOURCE_REMOVED:
4719 		for (i = 0; i < nr_cpu_ids; i++)
4720 			if (net_dma->channels[i] == chan) {
4721 				found = 1;
4722 				pos = i;
4723 				break;
4724 			}
4725 
4726 		if (found) {
4727 			ack = DMA_ACK;
4728 			cpu_clear(pos, net_dma->channel_mask);
4729 			net_dma->channels[i] = NULL;
4730 			net_dma_rebalance(net_dma);
4731 		}
4732 		break;
4733 	default:
4734 		break;
4735 	}
4736 	spin_unlock(&net_dma->lock);
4737 
4738 	return ack;
4739 }
4740 
4741 /**
4742  * netdev_dma_register - register the networking subsystem as a DMA client
4743  */
4744 static int __init netdev_dma_register(void)
4745 {
4746 	net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4747 								GFP_KERNEL);
4748 	if (unlikely(!net_dma.channels)) {
4749 		printk(KERN_NOTICE
4750 				"netdev_dma: no memory for net_dma.channels\n");
4751 		return -ENOMEM;
4752 	}
4753 	spin_lock_init(&net_dma.lock);
4754 	dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4755 	dma_async_client_register(&net_dma.client);
4756 	dma_async_client_chan_request(&net_dma.client);
4757 	return 0;
4758 }
4759 
4760 #else
4761 static int __init netdev_dma_register(void) { return -ENODEV; }
4762 #endif /* CONFIG_NET_DMA */
4763 
4764 /**
4765  *	netdev_increment_features - increment feature set by one
4766  *	@all: current feature set
4767  *	@one: new feature set
4768  *	@mask: mask feature set
4769  *
4770  *	Computes a new feature set after adding a device with feature set
4771  *	@one to the master device with current feature set @all.  Will not
4772  *	enable anything that is off in @mask. Returns the new feature set.
4773  */
4774 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4775 					unsigned long mask)
4776 {
4777 	/* If device needs checksumming, downgrade to it. */
4778         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4779 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4780 	else if (mask & NETIF_F_ALL_CSUM) {
4781 		/* If one device supports v4/v6 checksumming, set for all. */
4782 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4783 		    !(all & NETIF_F_GEN_CSUM)) {
4784 			all &= ~NETIF_F_ALL_CSUM;
4785 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4786 		}
4787 
4788 		/* If one device supports hw checksumming, set for all. */
4789 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4790 			all &= ~NETIF_F_ALL_CSUM;
4791 			all |= NETIF_F_HW_CSUM;
4792 		}
4793 	}
4794 
4795 	one |= NETIF_F_ALL_CSUM;
4796 
4797 	one |= all & NETIF_F_ONE_FOR_ALL;
4798 	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4799 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
4800 
4801 	return all;
4802 }
4803 EXPORT_SYMBOL(netdev_increment_features);
4804 
4805 static struct hlist_head *netdev_create_hash(void)
4806 {
4807 	int i;
4808 	struct hlist_head *hash;
4809 
4810 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4811 	if (hash != NULL)
4812 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
4813 			INIT_HLIST_HEAD(&hash[i]);
4814 
4815 	return hash;
4816 }
4817 
4818 /* Initialize per network namespace state */
4819 static int __net_init netdev_init(struct net *net)
4820 {
4821 	INIT_LIST_HEAD(&net->dev_base_head);
4822 
4823 	net->dev_name_head = netdev_create_hash();
4824 	if (net->dev_name_head == NULL)
4825 		goto err_name;
4826 
4827 	net->dev_index_head = netdev_create_hash();
4828 	if (net->dev_index_head == NULL)
4829 		goto err_idx;
4830 
4831 	return 0;
4832 
4833 err_idx:
4834 	kfree(net->dev_name_head);
4835 err_name:
4836 	return -ENOMEM;
4837 }
4838 
4839 /**
4840  *	netdev_drivername - network driver for the device
4841  *	@dev: network device
4842  *	@buffer: buffer for resulting name
4843  *	@len: size of buffer
4844  *
4845  *	Determine network driver for device.
4846  */
4847 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
4848 {
4849 	const struct device_driver *driver;
4850 	const struct device *parent;
4851 
4852 	if (len <= 0 || !buffer)
4853 		return buffer;
4854 	buffer[0] = 0;
4855 
4856 	parent = dev->dev.parent;
4857 
4858 	if (!parent)
4859 		return buffer;
4860 
4861 	driver = parent->driver;
4862 	if (driver && driver->name)
4863 		strlcpy(buffer, driver->name, len);
4864 	return buffer;
4865 }
4866 
4867 static void __net_exit netdev_exit(struct net *net)
4868 {
4869 	kfree(net->dev_name_head);
4870 	kfree(net->dev_index_head);
4871 }
4872 
4873 static struct pernet_operations __net_initdata netdev_net_ops = {
4874 	.init = netdev_init,
4875 	.exit = netdev_exit,
4876 };
4877 
4878 static void __net_exit default_device_exit(struct net *net)
4879 {
4880 	struct net_device *dev, *next;
4881 	/*
4882 	 * Push all migratable of the network devices back to the
4883 	 * initial network namespace
4884 	 */
4885 	rtnl_lock();
4886 	for_each_netdev_safe(net, dev, next) {
4887 		int err;
4888 		char fb_name[IFNAMSIZ];
4889 
4890 		/* Ignore unmoveable devices (i.e. loopback) */
4891 		if (dev->features & NETIF_F_NETNS_LOCAL)
4892 			continue;
4893 
4894 		/* Delete virtual devices */
4895 		if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
4896 			dev->rtnl_link_ops->dellink(dev);
4897 			continue;
4898 		}
4899 
4900 		/* Push remaing network devices to init_net */
4901 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4902 		err = dev_change_net_namespace(dev, &init_net, fb_name);
4903 		if (err) {
4904 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4905 				__func__, dev->name, err);
4906 			BUG();
4907 		}
4908 	}
4909 	rtnl_unlock();
4910 }
4911 
4912 static struct pernet_operations __net_initdata default_device_ops = {
4913 	.exit = default_device_exit,
4914 };
4915 
4916 /*
4917  *	Initialize the DEV module. At boot time this walks the device list and
4918  *	unhooks any devices that fail to initialise (normally hardware not
4919  *	present) and leaves us with a valid list of present and active devices.
4920  *
4921  */
4922 
4923 /*
4924  *       This is called single threaded during boot, so no need
4925  *       to take the rtnl semaphore.
4926  */
4927 static int __init net_dev_init(void)
4928 {
4929 	int i, rc = -ENOMEM;
4930 
4931 	BUG_ON(!dev_boot_phase);
4932 
4933 	if (dev_proc_init())
4934 		goto out;
4935 
4936 	if (netdev_kobject_init())
4937 		goto out;
4938 
4939 	INIT_LIST_HEAD(&ptype_all);
4940 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
4941 		INIT_LIST_HEAD(&ptype_base[i]);
4942 
4943 	if (register_pernet_subsys(&netdev_net_ops))
4944 		goto out;
4945 
4946 	/*
4947 	 *	Initialise the packet receive queues.
4948 	 */
4949 
4950 	for_each_possible_cpu(i) {
4951 		struct softnet_data *queue;
4952 
4953 		queue = &per_cpu(softnet_data, i);
4954 		skb_queue_head_init(&queue->input_pkt_queue);
4955 		queue->completion_queue = NULL;
4956 		INIT_LIST_HEAD(&queue->poll_list);
4957 
4958 		queue->backlog.poll = process_backlog;
4959 		queue->backlog.weight = weight_p;
4960 	}
4961 
4962 	dev_boot_phase = 0;
4963 
4964 	/* The loopback device is special if any other network devices
4965 	 * is present in a network namespace the loopback device must
4966 	 * be present. Since we now dynamically allocate and free the
4967 	 * loopback device ensure this invariant is maintained by
4968 	 * keeping the loopback device as the first device on the
4969 	 * list of network devices.  Ensuring the loopback devices
4970 	 * is the first device that appears and the last network device
4971 	 * that disappears.
4972 	 */
4973 	if (register_pernet_device(&loopback_net_ops))
4974 		goto out;
4975 
4976 	if (register_pernet_device(&default_device_ops))
4977 		goto out;
4978 
4979 	netdev_dma_register();
4980 
4981 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4982 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
4983 
4984 	hotcpu_notifier(dev_cpu_callback, 0);
4985 	dst_init();
4986 	dev_mcast_init();
4987 	rc = 0;
4988 out:
4989 	return rc;
4990 }
4991 
4992 subsys_initcall(net_dev_init);
4993 
4994 EXPORT_SYMBOL(__dev_get_by_index);
4995 EXPORT_SYMBOL(__dev_get_by_name);
4996 EXPORT_SYMBOL(__dev_remove_pack);
4997 EXPORT_SYMBOL(dev_valid_name);
4998 EXPORT_SYMBOL(dev_add_pack);
4999 EXPORT_SYMBOL(dev_alloc_name);
5000 EXPORT_SYMBOL(dev_close);
5001 EXPORT_SYMBOL(dev_get_by_flags);
5002 EXPORT_SYMBOL(dev_get_by_index);
5003 EXPORT_SYMBOL(dev_get_by_name);
5004 EXPORT_SYMBOL(dev_open);
5005 EXPORT_SYMBOL(dev_queue_xmit);
5006 EXPORT_SYMBOL(dev_remove_pack);
5007 EXPORT_SYMBOL(dev_set_allmulti);
5008 EXPORT_SYMBOL(dev_set_promiscuity);
5009 EXPORT_SYMBOL(dev_change_flags);
5010 EXPORT_SYMBOL(dev_set_mtu);
5011 EXPORT_SYMBOL(dev_set_mac_address);
5012 EXPORT_SYMBOL(free_netdev);
5013 EXPORT_SYMBOL(netdev_boot_setup_check);
5014 EXPORT_SYMBOL(netdev_set_master);
5015 EXPORT_SYMBOL(netdev_state_change);
5016 EXPORT_SYMBOL(netif_receive_skb);
5017 EXPORT_SYMBOL(netif_rx);
5018 EXPORT_SYMBOL(register_gifconf);
5019 EXPORT_SYMBOL(register_netdevice);
5020 EXPORT_SYMBOL(register_netdevice_notifier);
5021 EXPORT_SYMBOL(skb_checksum_help);
5022 EXPORT_SYMBOL(synchronize_net);
5023 EXPORT_SYMBOL(unregister_netdevice);
5024 EXPORT_SYMBOL(unregister_netdevice_notifier);
5025 EXPORT_SYMBOL(net_enable_timestamp);
5026 EXPORT_SYMBOL(net_disable_timestamp);
5027 EXPORT_SYMBOL(dev_get_flags);
5028 
5029 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5030 EXPORT_SYMBOL(br_handle_frame_hook);
5031 EXPORT_SYMBOL(br_fdb_get_hook);
5032 EXPORT_SYMBOL(br_fdb_put_hook);
5033 #endif
5034 
5035 EXPORT_SYMBOL(dev_load);
5036 
5037 EXPORT_PER_CPU_SYMBOL(softnet_data);
5038