xref: /linux-6.15/net/core/dev.c (revision 652afc27)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <[email protected]>
12  *				Mark Evans, <[email protected]>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <[email protected]>
16  *		Alan Cox <[email protected]>
17  *		David Hinds <[email protected]>
18  *		Alexey Kuznetsov <[email protected]>
19  *		Adam Sulmicki <[email protected]>
20  *              Pekka Riikonen <[email protected]>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/ethtool.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/net_namespace.h>
97 #include <net/sock.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/proc_fs.h>
100 #include <linux/seq_file.h>
101 #include <linux/stat.h>
102 #include <linux/if_bridge.h>
103 #include <linux/if_macvlan.h>
104 #include <net/dst.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
124 #include <net/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
129 
130 #include "net-sysfs.h"
131 
132 /* Instead of increasing this, you should create a hash table. */
133 #define MAX_GRO_SKBS 8
134 
135 /*
136  *	The list of packet types we will receive (as opposed to discard)
137  *	and the routines to invoke.
138  *
139  *	Why 16. Because with 16 the only overlap we get on a hash of the
140  *	low nibble of the protocol value is RARP/SNAP/X.25.
141  *
142  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
143  *             sure which should go first, but I bet it won't make much
144  *             difference if we are running VLANs.  The good news is that
145  *             this protocol won't be in the list unless compiled in, so
146  *             the average user (w/out VLANs) will not be adversely affected.
147  *             --BLG
148  *
149  *		0800	IP
150  *		8100    802.1Q VLAN
151  *		0001	802.3
152  *		0002	AX.25
153  *		0004	802.2
154  *		8035	RARP
155  *		0005	SNAP
156  *		0805	X.25
157  *		0806	ARP
158  *		8137	IPX
159  *		0009	Localtalk
160  *		86DD	IPv6
161  */
162 
163 #define PTYPE_HASH_SIZE	(16)
164 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
165 
166 static DEFINE_SPINLOCK(ptype_lock);
167 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
168 static struct list_head ptype_all __read_mostly;	/* Taps */
169 
170 /*
171  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
172  * semaphore.
173  *
174  * Pure readers hold dev_base_lock for reading.
175  *
176  * Writers must hold the rtnl semaphore while they loop through the
177  * dev_base_head list, and hold dev_base_lock for writing when they do the
178  * actual updates.  This allows pure readers to access the list even
179  * while a writer is preparing to update it.
180  *
181  * To put it another way, dev_base_lock is held for writing only to
182  * protect against pure readers; the rtnl semaphore provides the
183  * protection against other writers.
184  *
185  * See, for example usages, register_netdevice() and
186  * unregister_netdevice(), which must be called with the rtnl
187  * semaphore held.
188  */
189 DEFINE_RWLOCK(dev_base_lock);
190 
191 EXPORT_SYMBOL(dev_base_lock);
192 
193 #define NETDEV_HASHBITS	8
194 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
195 
196 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
197 {
198 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
199 	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
200 }
201 
202 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
203 {
204 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
205 }
206 
207 /* Device list insertion */
208 static int list_netdevice(struct net_device *dev)
209 {
210 	struct net *net = dev_net(dev);
211 
212 	ASSERT_RTNL();
213 
214 	write_lock_bh(&dev_base_lock);
215 	list_add_tail(&dev->dev_list, &net->dev_base_head);
216 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
217 	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
218 	write_unlock_bh(&dev_base_lock);
219 	return 0;
220 }
221 
222 /* Device list removal */
223 static void unlist_netdevice(struct net_device *dev)
224 {
225 	ASSERT_RTNL();
226 
227 	/* Unlink dev from the device chain */
228 	write_lock_bh(&dev_base_lock);
229 	list_del(&dev->dev_list);
230 	hlist_del(&dev->name_hlist);
231 	hlist_del(&dev->index_hlist);
232 	write_unlock_bh(&dev_base_lock);
233 }
234 
235 /*
236  *	Our notifier list
237  */
238 
239 static RAW_NOTIFIER_HEAD(netdev_chain);
240 
241 /*
242  *	Device drivers call our routines to queue packets here. We empty the
243  *	queue in the local softnet handler.
244  */
245 
246 DEFINE_PER_CPU(struct softnet_data, softnet_data);
247 
248 #ifdef CONFIG_LOCKDEP
249 /*
250  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
251  * according to dev->type
252  */
253 static const unsigned short netdev_lock_type[] =
254 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
255 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
256 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
257 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
258 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
259 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
260 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
261 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
262 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
263 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
264 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
265 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
266 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
267 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
268 	 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
269 
270 static const char *netdev_lock_name[] =
271 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
272 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
273 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
274 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
275 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
276 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
277 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
278 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
279 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
280 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
281 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
282 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
283 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
284 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
285 	 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
286 
287 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
288 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
289 
290 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
291 {
292 	int i;
293 
294 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
295 		if (netdev_lock_type[i] == dev_type)
296 			return i;
297 	/* the last key is used by default */
298 	return ARRAY_SIZE(netdev_lock_type) - 1;
299 }
300 
301 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
302 						 unsigned short dev_type)
303 {
304 	int i;
305 
306 	i = netdev_lock_pos(dev_type);
307 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
308 				   netdev_lock_name[i]);
309 }
310 
311 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
312 {
313 	int i;
314 
315 	i = netdev_lock_pos(dev->type);
316 	lockdep_set_class_and_name(&dev->addr_list_lock,
317 				   &netdev_addr_lock_key[i],
318 				   netdev_lock_name[i]);
319 }
320 #else
321 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
322 						 unsigned short dev_type)
323 {
324 }
325 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
326 {
327 }
328 #endif
329 
330 /*******************************************************************************
331 
332 		Protocol management and registration routines
333 
334 *******************************************************************************/
335 
336 /*
337  *	Add a protocol ID to the list. Now that the input handler is
338  *	smarter we can dispense with all the messy stuff that used to be
339  *	here.
340  *
341  *	BEWARE!!! Protocol handlers, mangling input packets,
342  *	MUST BE last in hash buckets and checking protocol handlers
343  *	MUST start from promiscuous ptype_all chain in net_bh.
344  *	It is true now, do not change it.
345  *	Explanation follows: if protocol handler, mangling packet, will
346  *	be the first on list, it is not able to sense, that packet
347  *	is cloned and should be copied-on-write, so that it will
348  *	change it and subsequent readers will get broken packet.
349  *							--ANK (980803)
350  */
351 
352 /**
353  *	dev_add_pack - add packet handler
354  *	@pt: packet type declaration
355  *
356  *	Add a protocol handler to the networking stack. The passed &packet_type
357  *	is linked into kernel lists and may not be freed until it has been
358  *	removed from the kernel lists.
359  *
360  *	This call does not sleep therefore it can not
361  *	guarantee all CPU's that are in middle of receiving packets
362  *	will see the new packet type (until the next received packet).
363  */
364 
365 void dev_add_pack(struct packet_type *pt)
366 {
367 	int hash;
368 
369 	spin_lock_bh(&ptype_lock);
370 	if (pt->type == htons(ETH_P_ALL))
371 		list_add_rcu(&pt->list, &ptype_all);
372 	else {
373 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
374 		list_add_rcu(&pt->list, &ptype_base[hash]);
375 	}
376 	spin_unlock_bh(&ptype_lock);
377 }
378 
379 /**
380  *	__dev_remove_pack	 - remove packet handler
381  *	@pt: packet type declaration
382  *
383  *	Remove a protocol handler that was previously added to the kernel
384  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
385  *	from the kernel lists and can be freed or reused once this function
386  *	returns.
387  *
388  *      The packet type might still be in use by receivers
389  *	and must not be freed until after all the CPU's have gone
390  *	through a quiescent state.
391  */
392 void __dev_remove_pack(struct packet_type *pt)
393 {
394 	struct list_head *head;
395 	struct packet_type *pt1;
396 
397 	spin_lock_bh(&ptype_lock);
398 
399 	if (pt->type == htons(ETH_P_ALL))
400 		head = &ptype_all;
401 	else
402 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
403 
404 	list_for_each_entry(pt1, head, list) {
405 		if (pt == pt1) {
406 			list_del_rcu(&pt->list);
407 			goto out;
408 		}
409 	}
410 
411 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
412 out:
413 	spin_unlock_bh(&ptype_lock);
414 }
415 /**
416  *	dev_remove_pack	 - remove packet handler
417  *	@pt: packet type declaration
418  *
419  *	Remove a protocol handler that was previously added to the kernel
420  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
421  *	from the kernel lists and can be freed or reused once this function
422  *	returns.
423  *
424  *	This call sleeps to guarantee that no CPU is looking at the packet
425  *	type after return.
426  */
427 void dev_remove_pack(struct packet_type *pt)
428 {
429 	__dev_remove_pack(pt);
430 
431 	synchronize_net();
432 }
433 
434 /******************************************************************************
435 
436 		      Device Boot-time Settings Routines
437 
438 *******************************************************************************/
439 
440 /* Boot time configuration table */
441 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
442 
443 /**
444  *	netdev_boot_setup_add	- add new setup entry
445  *	@name: name of the device
446  *	@map: configured settings for the device
447  *
448  *	Adds new setup entry to the dev_boot_setup list.  The function
449  *	returns 0 on error and 1 on success.  This is a generic routine to
450  *	all netdevices.
451  */
452 static int netdev_boot_setup_add(char *name, struct ifmap *map)
453 {
454 	struct netdev_boot_setup *s;
455 	int i;
456 
457 	s = dev_boot_setup;
458 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
459 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
460 			memset(s[i].name, 0, sizeof(s[i].name));
461 			strlcpy(s[i].name, name, IFNAMSIZ);
462 			memcpy(&s[i].map, map, sizeof(s[i].map));
463 			break;
464 		}
465 	}
466 
467 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
468 }
469 
470 /**
471  *	netdev_boot_setup_check	- check boot time settings
472  *	@dev: the netdevice
473  *
474  * 	Check boot time settings for the device.
475  *	The found settings are set for the device to be used
476  *	later in the device probing.
477  *	Returns 0 if no settings found, 1 if they are.
478  */
479 int netdev_boot_setup_check(struct net_device *dev)
480 {
481 	struct netdev_boot_setup *s = dev_boot_setup;
482 	int i;
483 
484 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
485 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
486 		    !strcmp(dev->name, s[i].name)) {
487 			dev->irq 	= s[i].map.irq;
488 			dev->base_addr 	= s[i].map.base_addr;
489 			dev->mem_start 	= s[i].map.mem_start;
490 			dev->mem_end 	= s[i].map.mem_end;
491 			return 1;
492 		}
493 	}
494 	return 0;
495 }
496 
497 
498 /**
499  *	netdev_boot_base	- get address from boot time settings
500  *	@prefix: prefix for network device
501  *	@unit: id for network device
502  *
503  * 	Check boot time settings for the base address of device.
504  *	The found settings are set for the device to be used
505  *	later in the device probing.
506  *	Returns 0 if no settings found.
507  */
508 unsigned long netdev_boot_base(const char *prefix, int unit)
509 {
510 	const struct netdev_boot_setup *s = dev_boot_setup;
511 	char name[IFNAMSIZ];
512 	int i;
513 
514 	sprintf(name, "%s%d", prefix, unit);
515 
516 	/*
517 	 * If device already registered then return base of 1
518 	 * to indicate not to probe for this interface
519 	 */
520 	if (__dev_get_by_name(&init_net, name))
521 		return 1;
522 
523 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
524 		if (!strcmp(name, s[i].name))
525 			return s[i].map.base_addr;
526 	return 0;
527 }
528 
529 /*
530  * Saves at boot time configured settings for any netdevice.
531  */
532 int __init netdev_boot_setup(char *str)
533 {
534 	int ints[5];
535 	struct ifmap map;
536 
537 	str = get_options(str, ARRAY_SIZE(ints), ints);
538 	if (!str || !*str)
539 		return 0;
540 
541 	/* Save settings */
542 	memset(&map, 0, sizeof(map));
543 	if (ints[0] > 0)
544 		map.irq = ints[1];
545 	if (ints[0] > 1)
546 		map.base_addr = ints[2];
547 	if (ints[0] > 2)
548 		map.mem_start = ints[3];
549 	if (ints[0] > 3)
550 		map.mem_end = ints[4];
551 
552 	/* Add new entry to the list */
553 	return netdev_boot_setup_add(str, &map);
554 }
555 
556 __setup("netdev=", netdev_boot_setup);
557 
558 /*******************************************************************************
559 
560 			    Device Interface Subroutines
561 
562 *******************************************************************************/
563 
564 /**
565  *	__dev_get_by_name	- find a device by its name
566  *	@net: the applicable net namespace
567  *	@name: name to find
568  *
569  *	Find an interface by name. Must be called under RTNL semaphore
570  *	or @dev_base_lock. If the name is found a pointer to the device
571  *	is returned. If the name is not found then %NULL is returned. The
572  *	reference counters are not incremented so the caller must be
573  *	careful with locks.
574  */
575 
576 struct net_device *__dev_get_by_name(struct net *net, const char *name)
577 {
578 	struct hlist_node *p;
579 
580 	hlist_for_each(p, dev_name_hash(net, name)) {
581 		struct net_device *dev
582 			= hlist_entry(p, struct net_device, name_hlist);
583 		if (!strncmp(dev->name, name, IFNAMSIZ))
584 			return dev;
585 	}
586 	return NULL;
587 }
588 
589 /**
590  *	dev_get_by_name		- find a device by its name
591  *	@net: the applicable net namespace
592  *	@name: name to find
593  *
594  *	Find an interface by name. This can be called from any
595  *	context and does its own locking. The returned handle has
596  *	the usage count incremented and the caller must use dev_put() to
597  *	release it when it is no longer needed. %NULL is returned if no
598  *	matching device is found.
599  */
600 
601 struct net_device *dev_get_by_name(struct net *net, const char *name)
602 {
603 	struct net_device *dev;
604 
605 	read_lock(&dev_base_lock);
606 	dev = __dev_get_by_name(net, name);
607 	if (dev)
608 		dev_hold(dev);
609 	read_unlock(&dev_base_lock);
610 	return dev;
611 }
612 
613 /**
614  *	__dev_get_by_index - find a device by its ifindex
615  *	@net: the applicable net namespace
616  *	@ifindex: index of device
617  *
618  *	Search for an interface by index. Returns %NULL if the device
619  *	is not found or a pointer to the device. The device has not
620  *	had its reference counter increased so the caller must be careful
621  *	about locking. The caller must hold either the RTNL semaphore
622  *	or @dev_base_lock.
623  */
624 
625 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
626 {
627 	struct hlist_node *p;
628 
629 	hlist_for_each(p, dev_index_hash(net, ifindex)) {
630 		struct net_device *dev
631 			= hlist_entry(p, struct net_device, index_hlist);
632 		if (dev->ifindex == ifindex)
633 			return dev;
634 	}
635 	return NULL;
636 }
637 
638 
639 /**
640  *	dev_get_by_index - find a device by its ifindex
641  *	@net: the applicable net namespace
642  *	@ifindex: index of device
643  *
644  *	Search for an interface by index. Returns NULL if the device
645  *	is not found or a pointer to the device. The device returned has
646  *	had a reference added and the pointer is safe until the user calls
647  *	dev_put to indicate they have finished with it.
648  */
649 
650 struct net_device *dev_get_by_index(struct net *net, int ifindex)
651 {
652 	struct net_device *dev;
653 
654 	read_lock(&dev_base_lock);
655 	dev = __dev_get_by_index(net, ifindex);
656 	if (dev)
657 		dev_hold(dev);
658 	read_unlock(&dev_base_lock);
659 	return dev;
660 }
661 
662 /**
663  *	dev_getbyhwaddr - find a device by its hardware address
664  *	@net: the applicable net namespace
665  *	@type: media type of device
666  *	@ha: hardware address
667  *
668  *	Search for an interface by MAC address. Returns NULL if the device
669  *	is not found or a pointer to the device. The caller must hold the
670  *	rtnl semaphore. The returned device has not had its ref count increased
671  *	and the caller must therefore be careful about locking
672  *
673  *	BUGS:
674  *	If the API was consistent this would be __dev_get_by_hwaddr
675  */
676 
677 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
678 {
679 	struct net_device *dev;
680 
681 	ASSERT_RTNL();
682 
683 	for_each_netdev(net, dev)
684 		if (dev->type == type &&
685 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
686 			return dev;
687 
688 	return NULL;
689 }
690 
691 EXPORT_SYMBOL(dev_getbyhwaddr);
692 
693 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
694 {
695 	struct net_device *dev;
696 
697 	ASSERT_RTNL();
698 	for_each_netdev(net, dev)
699 		if (dev->type == type)
700 			return dev;
701 
702 	return NULL;
703 }
704 
705 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
706 
707 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
708 {
709 	struct net_device *dev;
710 
711 	rtnl_lock();
712 	dev = __dev_getfirstbyhwtype(net, type);
713 	if (dev)
714 		dev_hold(dev);
715 	rtnl_unlock();
716 	return dev;
717 }
718 
719 EXPORT_SYMBOL(dev_getfirstbyhwtype);
720 
721 /**
722  *	dev_get_by_flags - find any device with given flags
723  *	@net: the applicable net namespace
724  *	@if_flags: IFF_* values
725  *	@mask: bitmask of bits in if_flags to check
726  *
727  *	Search for any interface with the given flags. Returns NULL if a device
728  *	is not found or a pointer to the device. The device returned has
729  *	had a reference added and the pointer is safe until the user calls
730  *	dev_put to indicate they have finished with it.
731  */
732 
733 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
734 {
735 	struct net_device *dev, *ret;
736 
737 	ret = NULL;
738 	read_lock(&dev_base_lock);
739 	for_each_netdev(net, dev) {
740 		if (((dev->flags ^ if_flags) & mask) == 0) {
741 			dev_hold(dev);
742 			ret = dev;
743 			break;
744 		}
745 	}
746 	read_unlock(&dev_base_lock);
747 	return ret;
748 }
749 
750 /**
751  *	dev_valid_name - check if name is okay for network device
752  *	@name: name string
753  *
754  *	Network device names need to be valid file names to
755  *	to allow sysfs to work.  We also disallow any kind of
756  *	whitespace.
757  */
758 int dev_valid_name(const char *name)
759 {
760 	if (*name == '\0')
761 		return 0;
762 	if (strlen(name) >= IFNAMSIZ)
763 		return 0;
764 	if (!strcmp(name, ".") || !strcmp(name, ".."))
765 		return 0;
766 
767 	while (*name) {
768 		if (*name == '/' || isspace(*name))
769 			return 0;
770 		name++;
771 	}
772 	return 1;
773 }
774 
775 /**
776  *	__dev_alloc_name - allocate a name for a device
777  *	@net: network namespace to allocate the device name in
778  *	@name: name format string
779  *	@buf:  scratch buffer and result name string
780  *
781  *	Passed a format string - eg "lt%d" it will try and find a suitable
782  *	id. It scans list of devices to build up a free map, then chooses
783  *	the first empty slot. The caller must hold the dev_base or rtnl lock
784  *	while allocating the name and adding the device in order to avoid
785  *	duplicates.
786  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
787  *	Returns the number of the unit assigned or a negative errno code.
788  */
789 
790 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
791 {
792 	int i = 0;
793 	const char *p;
794 	const int max_netdevices = 8*PAGE_SIZE;
795 	unsigned long *inuse;
796 	struct net_device *d;
797 
798 	p = strnchr(name, IFNAMSIZ-1, '%');
799 	if (p) {
800 		/*
801 		 * Verify the string as this thing may have come from
802 		 * the user.  There must be either one "%d" and no other "%"
803 		 * characters.
804 		 */
805 		if (p[1] != 'd' || strchr(p + 2, '%'))
806 			return -EINVAL;
807 
808 		/* Use one page as a bit array of possible slots */
809 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
810 		if (!inuse)
811 			return -ENOMEM;
812 
813 		for_each_netdev(net, d) {
814 			if (!sscanf(d->name, name, &i))
815 				continue;
816 			if (i < 0 || i >= max_netdevices)
817 				continue;
818 
819 			/*  avoid cases where sscanf is not exact inverse of printf */
820 			snprintf(buf, IFNAMSIZ, name, i);
821 			if (!strncmp(buf, d->name, IFNAMSIZ))
822 				set_bit(i, inuse);
823 		}
824 
825 		i = find_first_zero_bit(inuse, max_netdevices);
826 		free_page((unsigned long) inuse);
827 	}
828 
829 	snprintf(buf, IFNAMSIZ, name, i);
830 	if (!__dev_get_by_name(net, buf))
831 		return i;
832 
833 	/* It is possible to run out of possible slots
834 	 * when the name is long and there isn't enough space left
835 	 * for the digits, or if all bits are used.
836 	 */
837 	return -ENFILE;
838 }
839 
840 /**
841  *	dev_alloc_name - allocate a name for a device
842  *	@dev: device
843  *	@name: name format string
844  *
845  *	Passed a format string - eg "lt%d" it will try and find a suitable
846  *	id. It scans list of devices to build up a free map, then chooses
847  *	the first empty slot. The caller must hold the dev_base or rtnl lock
848  *	while allocating the name and adding the device in order to avoid
849  *	duplicates.
850  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
851  *	Returns the number of the unit assigned or a negative errno code.
852  */
853 
854 int dev_alloc_name(struct net_device *dev, const char *name)
855 {
856 	char buf[IFNAMSIZ];
857 	struct net *net;
858 	int ret;
859 
860 	BUG_ON(!dev_net(dev));
861 	net = dev_net(dev);
862 	ret = __dev_alloc_name(net, name, buf);
863 	if (ret >= 0)
864 		strlcpy(dev->name, buf, IFNAMSIZ);
865 	return ret;
866 }
867 
868 
869 /**
870  *	dev_change_name - change name of a device
871  *	@dev: device
872  *	@newname: name (or format string) must be at least IFNAMSIZ
873  *
874  *	Change name of a device, can pass format strings "eth%d".
875  *	for wildcarding.
876  */
877 int dev_change_name(struct net_device *dev, const char *newname)
878 {
879 	char oldname[IFNAMSIZ];
880 	int err = 0;
881 	int ret;
882 	struct net *net;
883 
884 	ASSERT_RTNL();
885 	BUG_ON(!dev_net(dev));
886 
887 	net = dev_net(dev);
888 	if (dev->flags & IFF_UP)
889 		return -EBUSY;
890 
891 	if (!dev_valid_name(newname))
892 		return -EINVAL;
893 
894 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
895 		return 0;
896 
897 	memcpy(oldname, dev->name, IFNAMSIZ);
898 
899 	if (strchr(newname, '%')) {
900 		err = dev_alloc_name(dev, newname);
901 		if (err < 0)
902 			return err;
903 	}
904 	else if (__dev_get_by_name(net, newname))
905 		return -EEXIST;
906 	else
907 		strlcpy(dev->name, newname, IFNAMSIZ);
908 
909 rollback:
910 	/* For now only devices in the initial network namespace
911 	 * are in sysfs.
912 	 */
913 	if (net == &init_net) {
914 		ret = device_rename(&dev->dev, dev->name);
915 		if (ret) {
916 			memcpy(dev->name, oldname, IFNAMSIZ);
917 			return ret;
918 		}
919 	}
920 
921 	write_lock_bh(&dev_base_lock);
922 	hlist_del(&dev->name_hlist);
923 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
924 	write_unlock_bh(&dev_base_lock);
925 
926 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
927 	ret = notifier_to_errno(ret);
928 
929 	if (ret) {
930 		if (err) {
931 			printk(KERN_ERR
932 			       "%s: name change rollback failed: %d.\n",
933 			       dev->name, ret);
934 		} else {
935 			err = ret;
936 			memcpy(dev->name, oldname, IFNAMSIZ);
937 			goto rollback;
938 		}
939 	}
940 
941 	return err;
942 }
943 
944 /**
945  *	dev_set_alias - change ifalias of a device
946  *	@dev: device
947  *	@alias: name up to IFALIASZ
948  *	@len: limit of bytes to copy from info
949  *
950  *	Set ifalias for a device,
951  */
952 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
953 {
954 	ASSERT_RTNL();
955 
956 	if (len >= IFALIASZ)
957 		return -EINVAL;
958 
959 	if (!len) {
960 		if (dev->ifalias) {
961 			kfree(dev->ifalias);
962 			dev->ifalias = NULL;
963 		}
964 		return 0;
965 	}
966 
967 	dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
968 	if (!dev->ifalias)
969 		return -ENOMEM;
970 
971 	strlcpy(dev->ifalias, alias, len+1);
972 	return len;
973 }
974 
975 
976 /**
977  *	netdev_features_change - device changes features
978  *	@dev: device to cause notification
979  *
980  *	Called to indicate a device has changed features.
981  */
982 void netdev_features_change(struct net_device *dev)
983 {
984 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
985 }
986 EXPORT_SYMBOL(netdev_features_change);
987 
988 /**
989  *	netdev_state_change - device changes state
990  *	@dev: device to cause notification
991  *
992  *	Called to indicate a device has changed state. This function calls
993  *	the notifier chains for netdev_chain and sends a NEWLINK message
994  *	to the routing socket.
995  */
996 void netdev_state_change(struct net_device *dev)
997 {
998 	if (dev->flags & IFF_UP) {
999 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1000 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1001 	}
1002 }
1003 
1004 void netdev_bonding_change(struct net_device *dev)
1005 {
1006 	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1007 }
1008 EXPORT_SYMBOL(netdev_bonding_change);
1009 
1010 /**
1011  *	dev_load 	- load a network module
1012  *	@net: the applicable net namespace
1013  *	@name: name of interface
1014  *
1015  *	If a network interface is not present and the process has suitable
1016  *	privileges this function loads the module. If module loading is not
1017  *	available in this kernel then it becomes a nop.
1018  */
1019 
1020 void dev_load(struct net *net, const char *name)
1021 {
1022 	struct net_device *dev;
1023 
1024 	read_lock(&dev_base_lock);
1025 	dev = __dev_get_by_name(net, name);
1026 	read_unlock(&dev_base_lock);
1027 
1028 	if (!dev && capable(CAP_SYS_MODULE))
1029 		request_module("%s", name);
1030 }
1031 
1032 /**
1033  *	dev_open	- prepare an interface for use.
1034  *	@dev:	device to open
1035  *
1036  *	Takes a device from down to up state. The device's private open
1037  *	function is invoked and then the multicast lists are loaded. Finally
1038  *	the device is moved into the up state and a %NETDEV_UP message is
1039  *	sent to the netdev notifier chain.
1040  *
1041  *	Calling this function on an active interface is a nop. On a failure
1042  *	a negative errno code is returned.
1043  */
1044 int dev_open(struct net_device *dev)
1045 {
1046 	const struct net_device_ops *ops = dev->netdev_ops;
1047 	int ret = 0;
1048 
1049 	ASSERT_RTNL();
1050 
1051 	/*
1052 	 *	Is it already up?
1053 	 */
1054 
1055 	if (dev->flags & IFF_UP)
1056 		return 0;
1057 
1058 	/*
1059 	 *	Is it even present?
1060 	 */
1061 	if (!netif_device_present(dev))
1062 		return -ENODEV;
1063 
1064 	/*
1065 	 *	Call device private open method
1066 	 */
1067 	set_bit(__LINK_STATE_START, &dev->state);
1068 
1069 	if (ops->ndo_validate_addr)
1070 		ret = ops->ndo_validate_addr(dev);
1071 
1072 	if (!ret && ops->ndo_open)
1073 		ret = ops->ndo_open(dev);
1074 
1075 	/*
1076 	 *	If it went open OK then:
1077 	 */
1078 
1079 	if (ret)
1080 		clear_bit(__LINK_STATE_START, &dev->state);
1081 	else {
1082 		/*
1083 		 *	Set the flags.
1084 		 */
1085 		dev->flags |= IFF_UP;
1086 
1087 		/*
1088 		 *	Initialize multicasting status
1089 		 */
1090 		dev_set_rx_mode(dev);
1091 
1092 		/*
1093 		 *	Wakeup transmit queue engine
1094 		 */
1095 		dev_activate(dev);
1096 
1097 		/*
1098 		 *	... and announce new interface.
1099 		 */
1100 		call_netdevice_notifiers(NETDEV_UP, dev);
1101 	}
1102 
1103 	return ret;
1104 }
1105 
1106 /**
1107  *	dev_close - shutdown an interface.
1108  *	@dev: device to shutdown
1109  *
1110  *	This function moves an active device into down state. A
1111  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1112  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1113  *	chain.
1114  */
1115 int dev_close(struct net_device *dev)
1116 {
1117 	const struct net_device_ops *ops = dev->netdev_ops;
1118 	ASSERT_RTNL();
1119 
1120 	might_sleep();
1121 
1122 	if (!(dev->flags & IFF_UP))
1123 		return 0;
1124 
1125 	/*
1126 	 *	Tell people we are going down, so that they can
1127 	 *	prepare to death, when device is still operating.
1128 	 */
1129 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1130 
1131 	clear_bit(__LINK_STATE_START, &dev->state);
1132 
1133 	/* Synchronize to scheduled poll. We cannot touch poll list,
1134 	 * it can be even on different cpu. So just clear netif_running().
1135 	 *
1136 	 * dev->stop() will invoke napi_disable() on all of it's
1137 	 * napi_struct instances on this device.
1138 	 */
1139 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1140 
1141 	dev_deactivate(dev);
1142 
1143 	/*
1144 	 *	Call the device specific close. This cannot fail.
1145 	 *	Only if device is UP
1146 	 *
1147 	 *	We allow it to be called even after a DETACH hot-plug
1148 	 *	event.
1149 	 */
1150 	if (ops->ndo_stop)
1151 		ops->ndo_stop(dev);
1152 
1153 	/*
1154 	 *	Device is now down.
1155 	 */
1156 
1157 	dev->flags &= ~IFF_UP;
1158 
1159 	/*
1160 	 * Tell people we are down
1161 	 */
1162 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1163 
1164 	return 0;
1165 }
1166 
1167 
1168 /**
1169  *	dev_disable_lro - disable Large Receive Offload on a device
1170  *	@dev: device
1171  *
1172  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1173  *	called under RTNL.  This is needed if received packets may be
1174  *	forwarded to another interface.
1175  */
1176 void dev_disable_lro(struct net_device *dev)
1177 {
1178 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1179 	    dev->ethtool_ops->set_flags) {
1180 		u32 flags = dev->ethtool_ops->get_flags(dev);
1181 		if (flags & ETH_FLAG_LRO) {
1182 			flags &= ~ETH_FLAG_LRO;
1183 			dev->ethtool_ops->set_flags(dev, flags);
1184 		}
1185 	}
1186 	WARN_ON(dev->features & NETIF_F_LRO);
1187 }
1188 EXPORT_SYMBOL(dev_disable_lro);
1189 
1190 
1191 static int dev_boot_phase = 1;
1192 
1193 /*
1194  *	Device change register/unregister. These are not inline or static
1195  *	as we export them to the world.
1196  */
1197 
1198 /**
1199  *	register_netdevice_notifier - register a network notifier block
1200  *	@nb: notifier
1201  *
1202  *	Register a notifier to be called when network device events occur.
1203  *	The notifier passed is linked into the kernel structures and must
1204  *	not be reused until it has been unregistered. A negative errno code
1205  *	is returned on a failure.
1206  *
1207  * 	When registered all registration and up events are replayed
1208  *	to the new notifier to allow device to have a race free
1209  *	view of the network device list.
1210  */
1211 
1212 int register_netdevice_notifier(struct notifier_block *nb)
1213 {
1214 	struct net_device *dev;
1215 	struct net_device *last;
1216 	struct net *net;
1217 	int err;
1218 
1219 	rtnl_lock();
1220 	err = raw_notifier_chain_register(&netdev_chain, nb);
1221 	if (err)
1222 		goto unlock;
1223 	if (dev_boot_phase)
1224 		goto unlock;
1225 	for_each_net(net) {
1226 		for_each_netdev(net, dev) {
1227 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1228 			err = notifier_to_errno(err);
1229 			if (err)
1230 				goto rollback;
1231 
1232 			if (!(dev->flags & IFF_UP))
1233 				continue;
1234 
1235 			nb->notifier_call(nb, NETDEV_UP, dev);
1236 		}
1237 	}
1238 
1239 unlock:
1240 	rtnl_unlock();
1241 	return err;
1242 
1243 rollback:
1244 	last = dev;
1245 	for_each_net(net) {
1246 		for_each_netdev(net, dev) {
1247 			if (dev == last)
1248 				break;
1249 
1250 			if (dev->flags & IFF_UP) {
1251 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1252 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1253 			}
1254 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1255 		}
1256 	}
1257 
1258 	raw_notifier_chain_unregister(&netdev_chain, nb);
1259 	goto unlock;
1260 }
1261 
1262 /**
1263  *	unregister_netdevice_notifier - unregister a network notifier block
1264  *	@nb: notifier
1265  *
1266  *	Unregister a notifier previously registered by
1267  *	register_netdevice_notifier(). The notifier is unlinked into the
1268  *	kernel structures and may then be reused. A negative errno code
1269  *	is returned on a failure.
1270  */
1271 
1272 int unregister_netdevice_notifier(struct notifier_block *nb)
1273 {
1274 	int err;
1275 
1276 	rtnl_lock();
1277 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1278 	rtnl_unlock();
1279 	return err;
1280 }
1281 
1282 /**
1283  *	call_netdevice_notifiers - call all network notifier blocks
1284  *      @val: value passed unmodified to notifier function
1285  *      @dev: net_device pointer passed unmodified to notifier function
1286  *
1287  *	Call all network notifier blocks.  Parameters and return value
1288  *	are as for raw_notifier_call_chain().
1289  */
1290 
1291 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1292 {
1293 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1294 }
1295 
1296 /* When > 0 there are consumers of rx skb time stamps */
1297 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1298 
1299 void net_enable_timestamp(void)
1300 {
1301 	atomic_inc(&netstamp_needed);
1302 }
1303 
1304 void net_disable_timestamp(void)
1305 {
1306 	atomic_dec(&netstamp_needed);
1307 }
1308 
1309 static inline void net_timestamp(struct sk_buff *skb)
1310 {
1311 	if (atomic_read(&netstamp_needed))
1312 		__net_timestamp(skb);
1313 	else
1314 		skb->tstamp.tv64 = 0;
1315 }
1316 
1317 /*
1318  *	Support routine. Sends outgoing frames to any network
1319  *	taps currently in use.
1320  */
1321 
1322 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1323 {
1324 	struct packet_type *ptype;
1325 
1326 	net_timestamp(skb);
1327 
1328 	rcu_read_lock();
1329 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1330 		/* Never send packets back to the socket
1331 		 * they originated from - MvS ([email protected])
1332 		 */
1333 		if ((ptype->dev == dev || !ptype->dev) &&
1334 		    (ptype->af_packet_priv == NULL ||
1335 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1336 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1337 			if (!skb2)
1338 				break;
1339 
1340 			/* skb->nh should be correctly
1341 			   set by sender, so that the second statement is
1342 			   just protection against buggy protocols.
1343 			 */
1344 			skb_reset_mac_header(skb2);
1345 
1346 			if (skb_network_header(skb2) < skb2->data ||
1347 			    skb2->network_header > skb2->tail) {
1348 				if (net_ratelimit())
1349 					printk(KERN_CRIT "protocol %04x is "
1350 					       "buggy, dev %s\n",
1351 					       skb2->protocol, dev->name);
1352 				skb_reset_network_header(skb2);
1353 			}
1354 
1355 			skb2->transport_header = skb2->network_header;
1356 			skb2->pkt_type = PACKET_OUTGOING;
1357 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1358 		}
1359 	}
1360 	rcu_read_unlock();
1361 }
1362 
1363 
1364 static inline void __netif_reschedule(struct Qdisc *q)
1365 {
1366 	struct softnet_data *sd;
1367 	unsigned long flags;
1368 
1369 	local_irq_save(flags);
1370 	sd = &__get_cpu_var(softnet_data);
1371 	q->next_sched = sd->output_queue;
1372 	sd->output_queue = q;
1373 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1374 	local_irq_restore(flags);
1375 }
1376 
1377 void __netif_schedule(struct Qdisc *q)
1378 {
1379 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1380 		__netif_reschedule(q);
1381 }
1382 EXPORT_SYMBOL(__netif_schedule);
1383 
1384 void dev_kfree_skb_irq(struct sk_buff *skb)
1385 {
1386 	if (atomic_dec_and_test(&skb->users)) {
1387 		struct softnet_data *sd;
1388 		unsigned long flags;
1389 
1390 		local_irq_save(flags);
1391 		sd = &__get_cpu_var(softnet_data);
1392 		skb->next = sd->completion_queue;
1393 		sd->completion_queue = skb;
1394 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1395 		local_irq_restore(flags);
1396 	}
1397 }
1398 EXPORT_SYMBOL(dev_kfree_skb_irq);
1399 
1400 void dev_kfree_skb_any(struct sk_buff *skb)
1401 {
1402 	if (in_irq() || irqs_disabled())
1403 		dev_kfree_skb_irq(skb);
1404 	else
1405 		dev_kfree_skb(skb);
1406 }
1407 EXPORT_SYMBOL(dev_kfree_skb_any);
1408 
1409 
1410 /**
1411  * netif_device_detach - mark device as removed
1412  * @dev: network device
1413  *
1414  * Mark device as removed from system and therefore no longer available.
1415  */
1416 void netif_device_detach(struct net_device *dev)
1417 {
1418 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1419 	    netif_running(dev)) {
1420 		netif_stop_queue(dev);
1421 	}
1422 }
1423 EXPORT_SYMBOL(netif_device_detach);
1424 
1425 /**
1426  * netif_device_attach - mark device as attached
1427  * @dev: network device
1428  *
1429  * Mark device as attached from system and restart if needed.
1430  */
1431 void netif_device_attach(struct net_device *dev)
1432 {
1433 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1434 	    netif_running(dev)) {
1435 		netif_wake_queue(dev);
1436 		__netdev_watchdog_up(dev);
1437 	}
1438 }
1439 EXPORT_SYMBOL(netif_device_attach);
1440 
1441 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1442 {
1443 	return ((features & NETIF_F_GEN_CSUM) ||
1444 		((features & NETIF_F_IP_CSUM) &&
1445 		 protocol == htons(ETH_P_IP)) ||
1446 		((features & NETIF_F_IPV6_CSUM) &&
1447 		 protocol == htons(ETH_P_IPV6)));
1448 }
1449 
1450 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1451 {
1452 	if (can_checksum_protocol(dev->features, skb->protocol))
1453 		return true;
1454 
1455 	if (skb->protocol == htons(ETH_P_8021Q)) {
1456 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1457 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1458 					  veh->h_vlan_encapsulated_proto))
1459 			return true;
1460 	}
1461 
1462 	return false;
1463 }
1464 
1465 /*
1466  * Invalidate hardware checksum when packet is to be mangled, and
1467  * complete checksum manually on outgoing path.
1468  */
1469 int skb_checksum_help(struct sk_buff *skb)
1470 {
1471 	__wsum csum;
1472 	int ret = 0, offset;
1473 
1474 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1475 		goto out_set_summed;
1476 
1477 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1478 		/* Let GSO fix up the checksum. */
1479 		goto out_set_summed;
1480 	}
1481 
1482 	offset = skb->csum_start - skb_headroom(skb);
1483 	BUG_ON(offset >= skb_headlen(skb));
1484 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1485 
1486 	offset += skb->csum_offset;
1487 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1488 
1489 	if (skb_cloned(skb) &&
1490 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1491 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1492 		if (ret)
1493 			goto out;
1494 	}
1495 
1496 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1497 out_set_summed:
1498 	skb->ip_summed = CHECKSUM_NONE;
1499 out:
1500 	return ret;
1501 }
1502 
1503 /**
1504  *	skb_gso_segment - Perform segmentation on skb.
1505  *	@skb: buffer to segment
1506  *	@features: features for the output path (see dev->features)
1507  *
1508  *	This function segments the given skb and returns a list of segments.
1509  *
1510  *	It may return NULL if the skb requires no segmentation.  This is
1511  *	only possible when GSO is used for verifying header integrity.
1512  */
1513 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1514 {
1515 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1516 	struct packet_type *ptype;
1517 	__be16 type = skb->protocol;
1518 	int err;
1519 
1520 	skb_reset_mac_header(skb);
1521 	skb->mac_len = skb->network_header - skb->mac_header;
1522 	__skb_pull(skb, skb->mac_len);
1523 
1524 	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1525 		if (skb_header_cloned(skb) &&
1526 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1527 			return ERR_PTR(err);
1528 	}
1529 
1530 	rcu_read_lock();
1531 	list_for_each_entry_rcu(ptype,
1532 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1533 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1534 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1535 				err = ptype->gso_send_check(skb);
1536 				segs = ERR_PTR(err);
1537 				if (err || skb_gso_ok(skb, features))
1538 					break;
1539 				__skb_push(skb, (skb->data -
1540 						 skb_network_header(skb)));
1541 			}
1542 			segs = ptype->gso_segment(skb, features);
1543 			break;
1544 		}
1545 	}
1546 	rcu_read_unlock();
1547 
1548 	__skb_push(skb, skb->data - skb_mac_header(skb));
1549 
1550 	return segs;
1551 }
1552 
1553 EXPORT_SYMBOL(skb_gso_segment);
1554 
1555 /* Take action when hardware reception checksum errors are detected. */
1556 #ifdef CONFIG_BUG
1557 void netdev_rx_csum_fault(struct net_device *dev)
1558 {
1559 	if (net_ratelimit()) {
1560 		printk(KERN_ERR "%s: hw csum failure.\n",
1561 			dev ? dev->name : "<unknown>");
1562 		dump_stack();
1563 	}
1564 }
1565 EXPORT_SYMBOL(netdev_rx_csum_fault);
1566 #endif
1567 
1568 /* Actually, we should eliminate this check as soon as we know, that:
1569  * 1. IOMMU is present and allows to map all the memory.
1570  * 2. No high memory really exists on this machine.
1571  */
1572 
1573 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1574 {
1575 #ifdef CONFIG_HIGHMEM
1576 	int i;
1577 
1578 	if (dev->features & NETIF_F_HIGHDMA)
1579 		return 0;
1580 
1581 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1582 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1583 			return 1;
1584 
1585 #endif
1586 	return 0;
1587 }
1588 
1589 struct dev_gso_cb {
1590 	void (*destructor)(struct sk_buff *skb);
1591 };
1592 
1593 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1594 
1595 static void dev_gso_skb_destructor(struct sk_buff *skb)
1596 {
1597 	struct dev_gso_cb *cb;
1598 
1599 	do {
1600 		struct sk_buff *nskb = skb->next;
1601 
1602 		skb->next = nskb->next;
1603 		nskb->next = NULL;
1604 		kfree_skb(nskb);
1605 	} while (skb->next);
1606 
1607 	cb = DEV_GSO_CB(skb);
1608 	if (cb->destructor)
1609 		cb->destructor(skb);
1610 }
1611 
1612 /**
1613  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1614  *	@skb: buffer to segment
1615  *
1616  *	This function segments the given skb and stores the list of segments
1617  *	in skb->next.
1618  */
1619 static int dev_gso_segment(struct sk_buff *skb)
1620 {
1621 	struct net_device *dev = skb->dev;
1622 	struct sk_buff *segs;
1623 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1624 					 NETIF_F_SG : 0);
1625 
1626 	segs = skb_gso_segment(skb, features);
1627 
1628 	/* Verifying header integrity only. */
1629 	if (!segs)
1630 		return 0;
1631 
1632 	if (IS_ERR(segs))
1633 		return PTR_ERR(segs);
1634 
1635 	skb->next = segs;
1636 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1637 	skb->destructor = dev_gso_skb_destructor;
1638 
1639 	return 0;
1640 }
1641 
1642 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1643 			struct netdev_queue *txq)
1644 {
1645 	const struct net_device_ops *ops = dev->netdev_ops;
1646 
1647 	prefetch(&dev->netdev_ops->ndo_start_xmit);
1648 	if (likely(!skb->next)) {
1649 		if (!list_empty(&ptype_all))
1650 			dev_queue_xmit_nit(skb, dev);
1651 
1652 		if (netif_needs_gso(dev, skb)) {
1653 			if (unlikely(dev_gso_segment(skb)))
1654 				goto out_kfree_skb;
1655 			if (skb->next)
1656 				goto gso;
1657 		}
1658 
1659 		return ops->ndo_start_xmit(skb, dev);
1660 	}
1661 
1662 gso:
1663 	do {
1664 		struct sk_buff *nskb = skb->next;
1665 		int rc;
1666 
1667 		skb->next = nskb->next;
1668 		nskb->next = NULL;
1669 		rc = ops->ndo_start_xmit(nskb, dev);
1670 		if (unlikely(rc)) {
1671 			nskb->next = skb->next;
1672 			skb->next = nskb;
1673 			return rc;
1674 		}
1675 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1676 			return NETDEV_TX_BUSY;
1677 	} while (skb->next);
1678 
1679 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1680 
1681 out_kfree_skb:
1682 	kfree_skb(skb);
1683 	return 0;
1684 }
1685 
1686 static u32 simple_tx_hashrnd;
1687 static int simple_tx_hashrnd_initialized = 0;
1688 
1689 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1690 {
1691 	u32 addr1, addr2, ports;
1692 	u32 hash, ihl;
1693 	u8 ip_proto = 0;
1694 
1695 	if (unlikely(!simple_tx_hashrnd_initialized)) {
1696 		get_random_bytes(&simple_tx_hashrnd, 4);
1697 		simple_tx_hashrnd_initialized = 1;
1698 	}
1699 
1700 	switch (skb->protocol) {
1701 	case htons(ETH_P_IP):
1702 		if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1703 			ip_proto = ip_hdr(skb)->protocol;
1704 		addr1 = ip_hdr(skb)->saddr;
1705 		addr2 = ip_hdr(skb)->daddr;
1706 		ihl = ip_hdr(skb)->ihl;
1707 		break;
1708 	case htons(ETH_P_IPV6):
1709 		ip_proto = ipv6_hdr(skb)->nexthdr;
1710 		addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1711 		addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1712 		ihl = (40 >> 2);
1713 		break;
1714 	default:
1715 		return 0;
1716 	}
1717 
1718 
1719 	switch (ip_proto) {
1720 	case IPPROTO_TCP:
1721 	case IPPROTO_UDP:
1722 	case IPPROTO_DCCP:
1723 	case IPPROTO_ESP:
1724 	case IPPROTO_AH:
1725 	case IPPROTO_SCTP:
1726 	case IPPROTO_UDPLITE:
1727 		ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1728 		break;
1729 
1730 	default:
1731 		ports = 0;
1732 		break;
1733 	}
1734 
1735 	hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1736 
1737 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1738 }
1739 
1740 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1741 					struct sk_buff *skb)
1742 {
1743 	const struct net_device_ops *ops = dev->netdev_ops;
1744 	u16 queue_index = 0;
1745 
1746 	if (ops->ndo_select_queue)
1747 		queue_index = ops->ndo_select_queue(dev, skb);
1748 	else if (dev->real_num_tx_queues > 1)
1749 		queue_index = simple_tx_hash(dev, skb);
1750 
1751 	skb_set_queue_mapping(skb, queue_index);
1752 	return netdev_get_tx_queue(dev, queue_index);
1753 }
1754 
1755 /**
1756  *	dev_queue_xmit - transmit a buffer
1757  *	@skb: buffer to transmit
1758  *
1759  *	Queue a buffer for transmission to a network device. The caller must
1760  *	have set the device and priority and built the buffer before calling
1761  *	this function. The function can be called from an interrupt.
1762  *
1763  *	A negative errno code is returned on a failure. A success does not
1764  *	guarantee the frame will be transmitted as it may be dropped due
1765  *	to congestion or traffic shaping.
1766  *
1767  * -----------------------------------------------------------------------------------
1768  *      I notice this method can also return errors from the queue disciplines,
1769  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1770  *      be positive.
1771  *
1772  *      Regardless of the return value, the skb is consumed, so it is currently
1773  *      difficult to retry a send to this method.  (You can bump the ref count
1774  *      before sending to hold a reference for retry if you are careful.)
1775  *
1776  *      When calling this method, interrupts MUST be enabled.  This is because
1777  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1778  *          --BLG
1779  */
1780 int dev_queue_xmit(struct sk_buff *skb)
1781 {
1782 	struct net_device *dev = skb->dev;
1783 	struct netdev_queue *txq;
1784 	struct Qdisc *q;
1785 	int rc = -ENOMEM;
1786 
1787 	/* GSO will handle the following emulations directly. */
1788 	if (netif_needs_gso(dev, skb))
1789 		goto gso;
1790 
1791 	if (skb_shinfo(skb)->frag_list &&
1792 	    !(dev->features & NETIF_F_FRAGLIST) &&
1793 	    __skb_linearize(skb))
1794 		goto out_kfree_skb;
1795 
1796 	/* Fragmented skb is linearized if device does not support SG,
1797 	 * or if at least one of fragments is in highmem and device
1798 	 * does not support DMA from it.
1799 	 */
1800 	if (skb_shinfo(skb)->nr_frags &&
1801 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1802 	    __skb_linearize(skb))
1803 		goto out_kfree_skb;
1804 
1805 	/* If packet is not checksummed and device does not support
1806 	 * checksumming for this protocol, complete checksumming here.
1807 	 */
1808 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1809 		skb_set_transport_header(skb, skb->csum_start -
1810 					      skb_headroom(skb));
1811 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1812 			goto out_kfree_skb;
1813 	}
1814 
1815 gso:
1816 	/* Disable soft irqs for various locks below. Also
1817 	 * stops preemption for RCU.
1818 	 */
1819 	rcu_read_lock_bh();
1820 
1821 	txq = dev_pick_tx(dev, skb);
1822 	q = rcu_dereference(txq->qdisc);
1823 
1824 #ifdef CONFIG_NET_CLS_ACT
1825 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1826 #endif
1827 	if (q->enqueue) {
1828 		spinlock_t *root_lock = qdisc_lock(q);
1829 
1830 		spin_lock(root_lock);
1831 
1832 		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1833 			kfree_skb(skb);
1834 			rc = NET_XMIT_DROP;
1835 		} else {
1836 			rc = qdisc_enqueue_root(skb, q);
1837 			qdisc_run(q);
1838 		}
1839 		spin_unlock(root_lock);
1840 
1841 		goto out;
1842 	}
1843 
1844 	/* The device has no queue. Common case for software devices:
1845 	   loopback, all the sorts of tunnels...
1846 
1847 	   Really, it is unlikely that netif_tx_lock protection is necessary
1848 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1849 	   counters.)
1850 	   However, it is possible, that they rely on protection
1851 	   made by us here.
1852 
1853 	   Check this and shot the lock. It is not prone from deadlocks.
1854 	   Either shot noqueue qdisc, it is even simpler 8)
1855 	 */
1856 	if (dev->flags & IFF_UP) {
1857 		int cpu = smp_processor_id(); /* ok because BHs are off */
1858 
1859 		if (txq->xmit_lock_owner != cpu) {
1860 
1861 			HARD_TX_LOCK(dev, txq, cpu);
1862 
1863 			if (!netif_tx_queue_stopped(txq)) {
1864 				rc = 0;
1865 				if (!dev_hard_start_xmit(skb, dev, txq)) {
1866 					HARD_TX_UNLOCK(dev, txq);
1867 					goto out;
1868 				}
1869 			}
1870 			HARD_TX_UNLOCK(dev, txq);
1871 			if (net_ratelimit())
1872 				printk(KERN_CRIT "Virtual device %s asks to "
1873 				       "queue packet!\n", dev->name);
1874 		} else {
1875 			/* Recursion is detected! It is possible,
1876 			 * unfortunately */
1877 			if (net_ratelimit())
1878 				printk(KERN_CRIT "Dead loop on virtual device "
1879 				       "%s, fix it urgently!\n", dev->name);
1880 		}
1881 	}
1882 
1883 	rc = -ENETDOWN;
1884 	rcu_read_unlock_bh();
1885 
1886 out_kfree_skb:
1887 	kfree_skb(skb);
1888 	return rc;
1889 out:
1890 	rcu_read_unlock_bh();
1891 	return rc;
1892 }
1893 
1894 
1895 /*=======================================================================
1896 			Receiver routines
1897   =======================================================================*/
1898 
1899 int netdev_max_backlog __read_mostly = 1000;
1900 int netdev_budget __read_mostly = 300;
1901 int weight_p __read_mostly = 64;            /* old backlog weight */
1902 
1903 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1904 
1905 
1906 /**
1907  *	netif_rx	-	post buffer to the network code
1908  *	@skb: buffer to post
1909  *
1910  *	This function receives a packet from a device driver and queues it for
1911  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1912  *	may be dropped during processing for congestion control or by the
1913  *	protocol layers.
1914  *
1915  *	return values:
1916  *	NET_RX_SUCCESS	(no congestion)
1917  *	NET_RX_DROP     (packet was dropped)
1918  *
1919  */
1920 
1921 int netif_rx(struct sk_buff *skb)
1922 {
1923 	struct softnet_data *queue;
1924 	unsigned long flags;
1925 
1926 	/* if netpoll wants it, pretend we never saw it */
1927 	if (netpoll_rx(skb))
1928 		return NET_RX_DROP;
1929 
1930 	if (!skb->tstamp.tv64)
1931 		net_timestamp(skb);
1932 
1933 	/*
1934 	 * The code is rearranged so that the path is the most
1935 	 * short when CPU is congested, but is still operating.
1936 	 */
1937 	local_irq_save(flags);
1938 	queue = &__get_cpu_var(softnet_data);
1939 
1940 	__get_cpu_var(netdev_rx_stat).total++;
1941 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1942 		if (queue->input_pkt_queue.qlen) {
1943 enqueue:
1944 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1945 			local_irq_restore(flags);
1946 			return NET_RX_SUCCESS;
1947 		}
1948 
1949 		napi_schedule(&queue->backlog);
1950 		goto enqueue;
1951 	}
1952 
1953 	__get_cpu_var(netdev_rx_stat).dropped++;
1954 	local_irq_restore(flags);
1955 
1956 	kfree_skb(skb);
1957 	return NET_RX_DROP;
1958 }
1959 
1960 int netif_rx_ni(struct sk_buff *skb)
1961 {
1962 	int err;
1963 
1964 	preempt_disable();
1965 	err = netif_rx(skb);
1966 	if (local_softirq_pending())
1967 		do_softirq();
1968 	preempt_enable();
1969 
1970 	return err;
1971 }
1972 
1973 EXPORT_SYMBOL(netif_rx_ni);
1974 
1975 static void net_tx_action(struct softirq_action *h)
1976 {
1977 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1978 
1979 	if (sd->completion_queue) {
1980 		struct sk_buff *clist;
1981 
1982 		local_irq_disable();
1983 		clist = sd->completion_queue;
1984 		sd->completion_queue = NULL;
1985 		local_irq_enable();
1986 
1987 		while (clist) {
1988 			struct sk_buff *skb = clist;
1989 			clist = clist->next;
1990 
1991 			WARN_ON(atomic_read(&skb->users));
1992 			__kfree_skb(skb);
1993 		}
1994 	}
1995 
1996 	if (sd->output_queue) {
1997 		struct Qdisc *head;
1998 
1999 		local_irq_disable();
2000 		head = sd->output_queue;
2001 		sd->output_queue = NULL;
2002 		local_irq_enable();
2003 
2004 		while (head) {
2005 			struct Qdisc *q = head;
2006 			spinlock_t *root_lock;
2007 
2008 			head = head->next_sched;
2009 
2010 			root_lock = qdisc_lock(q);
2011 			if (spin_trylock(root_lock)) {
2012 				smp_mb__before_clear_bit();
2013 				clear_bit(__QDISC_STATE_SCHED,
2014 					  &q->state);
2015 				qdisc_run(q);
2016 				spin_unlock(root_lock);
2017 			} else {
2018 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2019 					      &q->state)) {
2020 					__netif_reschedule(q);
2021 				} else {
2022 					smp_mb__before_clear_bit();
2023 					clear_bit(__QDISC_STATE_SCHED,
2024 						  &q->state);
2025 				}
2026 			}
2027 		}
2028 	}
2029 }
2030 
2031 static inline int deliver_skb(struct sk_buff *skb,
2032 			      struct packet_type *pt_prev,
2033 			      struct net_device *orig_dev)
2034 {
2035 	atomic_inc(&skb->users);
2036 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2037 }
2038 
2039 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2040 /* These hooks defined here for ATM */
2041 struct net_bridge;
2042 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2043 						unsigned char *addr);
2044 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2045 
2046 /*
2047  * If bridge module is loaded call bridging hook.
2048  *  returns NULL if packet was consumed.
2049  */
2050 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2051 					struct sk_buff *skb) __read_mostly;
2052 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2053 					    struct packet_type **pt_prev, int *ret,
2054 					    struct net_device *orig_dev)
2055 {
2056 	struct net_bridge_port *port;
2057 
2058 	if (skb->pkt_type == PACKET_LOOPBACK ||
2059 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2060 		return skb;
2061 
2062 	if (*pt_prev) {
2063 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2064 		*pt_prev = NULL;
2065 	}
2066 
2067 	return br_handle_frame_hook(port, skb);
2068 }
2069 #else
2070 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2071 #endif
2072 
2073 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2074 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2075 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2076 
2077 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2078 					     struct packet_type **pt_prev,
2079 					     int *ret,
2080 					     struct net_device *orig_dev)
2081 {
2082 	if (skb->dev->macvlan_port == NULL)
2083 		return skb;
2084 
2085 	if (*pt_prev) {
2086 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2087 		*pt_prev = NULL;
2088 	}
2089 	return macvlan_handle_frame_hook(skb);
2090 }
2091 #else
2092 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2093 #endif
2094 
2095 #ifdef CONFIG_NET_CLS_ACT
2096 /* TODO: Maybe we should just force sch_ingress to be compiled in
2097  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2098  * a compare and 2 stores extra right now if we dont have it on
2099  * but have CONFIG_NET_CLS_ACT
2100  * NOTE: This doesnt stop any functionality; if you dont have
2101  * the ingress scheduler, you just cant add policies on ingress.
2102  *
2103  */
2104 static int ing_filter(struct sk_buff *skb)
2105 {
2106 	struct net_device *dev = skb->dev;
2107 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2108 	struct netdev_queue *rxq;
2109 	int result = TC_ACT_OK;
2110 	struct Qdisc *q;
2111 
2112 	if (MAX_RED_LOOP < ttl++) {
2113 		printk(KERN_WARNING
2114 		       "Redir loop detected Dropping packet (%d->%d)\n",
2115 		       skb->iif, dev->ifindex);
2116 		return TC_ACT_SHOT;
2117 	}
2118 
2119 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2120 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2121 
2122 	rxq = &dev->rx_queue;
2123 
2124 	q = rxq->qdisc;
2125 	if (q != &noop_qdisc) {
2126 		spin_lock(qdisc_lock(q));
2127 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2128 			result = qdisc_enqueue_root(skb, q);
2129 		spin_unlock(qdisc_lock(q));
2130 	}
2131 
2132 	return result;
2133 }
2134 
2135 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2136 					 struct packet_type **pt_prev,
2137 					 int *ret, struct net_device *orig_dev)
2138 {
2139 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2140 		goto out;
2141 
2142 	if (*pt_prev) {
2143 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2144 		*pt_prev = NULL;
2145 	} else {
2146 		/* Huh? Why does turning on AF_PACKET affect this? */
2147 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2148 	}
2149 
2150 	switch (ing_filter(skb)) {
2151 	case TC_ACT_SHOT:
2152 	case TC_ACT_STOLEN:
2153 		kfree_skb(skb);
2154 		return NULL;
2155 	}
2156 
2157 out:
2158 	skb->tc_verd = 0;
2159 	return skb;
2160 }
2161 #endif
2162 
2163 /*
2164  * 	netif_nit_deliver - deliver received packets to network taps
2165  * 	@skb: buffer
2166  *
2167  * 	This function is used to deliver incoming packets to network
2168  * 	taps. It should be used when the normal netif_receive_skb path
2169  * 	is bypassed, for example because of VLAN acceleration.
2170  */
2171 void netif_nit_deliver(struct sk_buff *skb)
2172 {
2173 	struct packet_type *ptype;
2174 
2175 	if (list_empty(&ptype_all))
2176 		return;
2177 
2178 	skb_reset_network_header(skb);
2179 	skb_reset_transport_header(skb);
2180 	skb->mac_len = skb->network_header - skb->mac_header;
2181 
2182 	rcu_read_lock();
2183 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2184 		if (!ptype->dev || ptype->dev == skb->dev)
2185 			deliver_skb(skb, ptype, skb->dev);
2186 	}
2187 	rcu_read_unlock();
2188 }
2189 
2190 /**
2191  *	netif_receive_skb - process receive buffer from network
2192  *	@skb: buffer to process
2193  *
2194  *	netif_receive_skb() is the main receive data processing function.
2195  *	It always succeeds. The buffer may be dropped during processing
2196  *	for congestion control or by the protocol layers.
2197  *
2198  *	This function may only be called from softirq context and interrupts
2199  *	should be enabled.
2200  *
2201  *	Return values (usually ignored):
2202  *	NET_RX_SUCCESS: no congestion
2203  *	NET_RX_DROP: packet was dropped
2204  */
2205 int netif_receive_skb(struct sk_buff *skb)
2206 {
2207 	struct packet_type *ptype, *pt_prev;
2208 	struct net_device *orig_dev;
2209 	struct net_device *null_or_orig;
2210 	int ret = NET_RX_DROP;
2211 	__be16 type;
2212 
2213 	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2214 		return NET_RX_SUCCESS;
2215 
2216 	/* if we've gotten here through NAPI, check netpoll */
2217 	if (netpoll_receive_skb(skb))
2218 		return NET_RX_DROP;
2219 
2220 	if (!skb->tstamp.tv64)
2221 		net_timestamp(skb);
2222 
2223 	if (!skb->iif)
2224 		skb->iif = skb->dev->ifindex;
2225 
2226 	null_or_orig = NULL;
2227 	orig_dev = skb->dev;
2228 	if (orig_dev->master) {
2229 		if (skb_bond_should_drop(skb))
2230 			null_or_orig = orig_dev; /* deliver only exact match */
2231 		else
2232 			skb->dev = orig_dev->master;
2233 	}
2234 
2235 	__get_cpu_var(netdev_rx_stat).total++;
2236 
2237 	skb_reset_network_header(skb);
2238 	skb_reset_transport_header(skb);
2239 	skb->mac_len = skb->network_header - skb->mac_header;
2240 
2241 	pt_prev = NULL;
2242 
2243 	rcu_read_lock();
2244 
2245 	/* Don't receive packets in an exiting network namespace */
2246 	if (!net_alive(dev_net(skb->dev))) {
2247 		kfree_skb(skb);
2248 		goto out;
2249 	}
2250 
2251 #ifdef CONFIG_NET_CLS_ACT
2252 	if (skb->tc_verd & TC_NCLS) {
2253 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2254 		goto ncls;
2255 	}
2256 #endif
2257 
2258 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2259 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2260 		    ptype->dev == orig_dev) {
2261 			if (pt_prev)
2262 				ret = deliver_skb(skb, pt_prev, orig_dev);
2263 			pt_prev = ptype;
2264 		}
2265 	}
2266 
2267 #ifdef CONFIG_NET_CLS_ACT
2268 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2269 	if (!skb)
2270 		goto out;
2271 ncls:
2272 #endif
2273 
2274 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2275 	if (!skb)
2276 		goto out;
2277 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2278 	if (!skb)
2279 		goto out;
2280 
2281 	type = skb->protocol;
2282 	list_for_each_entry_rcu(ptype,
2283 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2284 		if (ptype->type == type &&
2285 		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2286 		     ptype->dev == orig_dev)) {
2287 			if (pt_prev)
2288 				ret = deliver_skb(skb, pt_prev, orig_dev);
2289 			pt_prev = ptype;
2290 		}
2291 	}
2292 
2293 	if (pt_prev) {
2294 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2295 	} else {
2296 		kfree_skb(skb);
2297 		/* Jamal, now you will not able to escape explaining
2298 		 * me how you were going to use this. :-)
2299 		 */
2300 		ret = NET_RX_DROP;
2301 	}
2302 
2303 out:
2304 	rcu_read_unlock();
2305 	return ret;
2306 }
2307 
2308 /* Network device is going away, flush any packets still pending  */
2309 static void flush_backlog(void *arg)
2310 {
2311 	struct net_device *dev = arg;
2312 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2313 	struct sk_buff *skb, *tmp;
2314 
2315 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2316 		if (skb->dev == dev) {
2317 			__skb_unlink(skb, &queue->input_pkt_queue);
2318 			kfree_skb(skb);
2319 		}
2320 }
2321 
2322 static int napi_gro_complete(struct sk_buff *skb)
2323 {
2324 	struct packet_type *ptype;
2325 	__be16 type = skb->protocol;
2326 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2327 	int err = -ENOENT;
2328 
2329 	if (!skb_shinfo(skb)->frag_list)
2330 		goto out;
2331 
2332 	rcu_read_lock();
2333 	list_for_each_entry_rcu(ptype, head, list) {
2334 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2335 			continue;
2336 
2337 		err = ptype->gro_complete(skb);
2338 		break;
2339 	}
2340 	rcu_read_unlock();
2341 
2342 	if (err) {
2343 		WARN_ON(&ptype->list == head);
2344 		kfree_skb(skb);
2345 		return NET_RX_SUCCESS;
2346 	}
2347 
2348 out:
2349 	__skb_push(skb, -skb_network_offset(skb));
2350 	return netif_receive_skb(skb);
2351 }
2352 
2353 void napi_gro_flush(struct napi_struct *napi)
2354 {
2355 	struct sk_buff *skb, *next;
2356 
2357 	for (skb = napi->gro_list; skb; skb = next) {
2358 		next = skb->next;
2359 		skb->next = NULL;
2360 		napi_gro_complete(skb);
2361 	}
2362 
2363 	napi->gro_list = NULL;
2364 }
2365 EXPORT_SYMBOL(napi_gro_flush);
2366 
2367 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2368 {
2369 	struct sk_buff **pp = NULL;
2370 	struct packet_type *ptype;
2371 	__be16 type = skb->protocol;
2372 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2373 	int count = 0;
2374 	int same_flow;
2375 	int mac_len;
2376 
2377 	if (!(skb->dev->features & NETIF_F_GRO))
2378 		goto normal;
2379 
2380 	rcu_read_lock();
2381 	list_for_each_entry_rcu(ptype, head, list) {
2382 		struct sk_buff *p;
2383 
2384 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2385 			continue;
2386 
2387 		skb_reset_network_header(skb);
2388 		mac_len = skb->network_header - skb->mac_header;
2389 		skb->mac_len = mac_len;
2390 		NAPI_GRO_CB(skb)->same_flow = 0;
2391 		NAPI_GRO_CB(skb)->flush = 0;
2392 
2393 		for (p = napi->gro_list; p; p = p->next) {
2394 			count++;
2395 			NAPI_GRO_CB(p)->same_flow =
2396 				p->mac_len == mac_len &&
2397 				!memcmp(skb_mac_header(p), skb_mac_header(skb),
2398 					mac_len);
2399 			NAPI_GRO_CB(p)->flush = 0;
2400 		}
2401 
2402 		pp = ptype->gro_receive(&napi->gro_list, skb);
2403 		break;
2404 	}
2405 	rcu_read_unlock();
2406 
2407 	if (&ptype->list == head)
2408 		goto normal;
2409 
2410 	same_flow = NAPI_GRO_CB(skb)->same_flow;
2411 
2412 	if (pp) {
2413 		struct sk_buff *nskb = *pp;
2414 
2415 		*pp = nskb->next;
2416 		nskb->next = NULL;
2417 		napi_gro_complete(nskb);
2418 		count--;
2419 	}
2420 
2421 	if (same_flow)
2422 		goto ok;
2423 
2424 	if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
2425 		__skb_push(skb, -skb_network_offset(skb));
2426 		goto normal;
2427 	}
2428 
2429 	NAPI_GRO_CB(skb)->count = 1;
2430 	skb->next = napi->gro_list;
2431 	napi->gro_list = skb;
2432 
2433 ok:
2434 	return NET_RX_SUCCESS;
2435 
2436 normal:
2437 	return netif_receive_skb(skb);
2438 }
2439 EXPORT_SYMBOL(napi_gro_receive);
2440 
2441 static int process_backlog(struct napi_struct *napi, int quota)
2442 {
2443 	int work = 0;
2444 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2445 	unsigned long start_time = jiffies;
2446 
2447 	napi->weight = weight_p;
2448 	do {
2449 		struct sk_buff *skb;
2450 
2451 		local_irq_disable();
2452 		skb = __skb_dequeue(&queue->input_pkt_queue);
2453 		if (!skb) {
2454 			__napi_complete(napi);
2455 			local_irq_enable();
2456 			break;
2457 		}
2458 		local_irq_enable();
2459 
2460 		napi_gro_receive(napi, skb);
2461 	} while (++work < quota && jiffies == start_time);
2462 
2463 	napi_gro_flush(napi);
2464 
2465 	return work;
2466 }
2467 
2468 /**
2469  * __napi_schedule - schedule for receive
2470  * @n: entry to schedule
2471  *
2472  * The entry's receive function will be scheduled to run
2473  */
2474 void __napi_schedule(struct napi_struct *n)
2475 {
2476 	unsigned long flags;
2477 
2478 	local_irq_save(flags);
2479 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2480 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2481 	local_irq_restore(flags);
2482 }
2483 EXPORT_SYMBOL(__napi_schedule);
2484 
2485 void __napi_complete(struct napi_struct *n)
2486 {
2487 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2488 	BUG_ON(n->gro_list);
2489 
2490 	list_del(&n->poll_list);
2491 	smp_mb__before_clear_bit();
2492 	clear_bit(NAPI_STATE_SCHED, &n->state);
2493 }
2494 EXPORT_SYMBOL(__napi_complete);
2495 
2496 void napi_complete(struct napi_struct *n)
2497 {
2498 	unsigned long flags;
2499 
2500 	/*
2501 	 * don't let napi dequeue from the cpu poll list
2502 	 * just in case its running on a different cpu
2503 	 */
2504 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2505 		return;
2506 
2507 	napi_gro_flush(n);
2508 	local_irq_save(flags);
2509 	__napi_complete(n);
2510 	local_irq_restore(flags);
2511 }
2512 EXPORT_SYMBOL(napi_complete);
2513 
2514 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2515 		    int (*poll)(struct napi_struct *, int), int weight)
2516 {
2517 	INIT_LIST_HEAD(&napi->poll_list);
2518 	napi->gro_list = NULL;
2519 	napi->poll = poll;
2520 	napi->weight = weight;
2521 	list_add(&napi->dev_list, &dev->napi_list);
2522 #ifdef CONFIG_NETPOLL
2523 	napi->dev = dev;
2524 	spin_lock_init(&napi->poll_lock);
2525 	napi->poll_owner = -1;
2526 #endif
2527 	set_bit(NAPI_STATE_SCHED, &napi->state);
2528 }
2529 EXPORT_SYMBOL(netif_napi_add);
2530 
2531 void netif_napi_del(struct napi_struct *napi)
2532 {
2533 	struct sk_buff *skb, *next;
2534 
2535 	list_del_init(&napi->dev_list);
2536 
2537 	for (skb = napi->gro_list; skb; skb = next) {
2538 		next = skb->next;
2539 		skb->next = NULL;
2540 		kfree_skb(skb);
2541 	}
2542 
2543 	napi->gro_list = NULL;
2544 }
2545 EXPORT_SYMBOL(netif_napi_del);
2546 
2547 
2548 static void net_rx_action(struct softirq_action *h)
2549 {
2550 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2551 	unsigned long time_limit = jiffies + 2;
2552 	int budget = netdev_budget;
2553 	void *have;
2554 
2555 	local_irq_disable();
2556 
2557 	while (!list_empty(list)) {
2558 		struct napi_struct *n;
2559 		int work, weight;
2560 
2561 		/* If softirq window is exhuasted then punt.
2562 		 * Allow this to run for 2 jiffies since which will allow
2563 		 * an average latency of 1.5/HZ.
2564 		 */
2565 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2566 			goto softnet_break;
2567 
2568 		local_irq_enable();
2569 
2570 		/* Even though interrupts have been re-enabled, this
2571 		 * access is safe because interrupts can only add new
2572 		 * entries to the tail of this list, and only ->poll()
2573 		 * calls can remove this head entry from the list.
2574 		 */
2575 		n = list_entry(list->next, struct napi_struct, poll_list);
2576 
2577 		have = netpoll_poll_lock(n);
2578 
2579 		weight = n->weight;
2580 
2581 		/* This NAPI_STATE_SCHED test is for avoiding a race
2582 		 * with netpoll's poll_napi().  Only the entity which
2583 		 * obtains the lock and sees NAPI_STATE_SCHED set will
2584 		 * actually make the ->poll() call.  Therefore we avoid
2585 		 * accidently calling ->poll() when NAPI is not scheduled.
2586 		 */
2587 		work = 0;
2588 		if (test_bit(NAPI_STATE_SCHED, &n->state))
2589 			work = n->poll(n, weight);
2590 
2591 		WARN_ON_ONCE(work > weight);
2592 
2593 		budget -= work;
2594 
2595 		local_irq_disable();
2596 
2597 		/* Drivers must not modify the NAPI state if they
2598 		 * consume the entire weight.  In such cases this code
2599 		 * still "owns" the NAPI instance and therefore can
2600 		 * move the instance around on the list at-will.
2601 		 */
2602 		if (unlikely(work == weight)) {
2603 			if (unlikely(napi_disable_pending(n)))
2604 				__napi_complete(n);
2605 			else
2606 				list_move_tail(&n->poll_list, list);
2607 		}
2608 
2609 		netpoll_poll_unlock(have);
2610 	}
2611 out:
2612 	local_irq_enable();
2613 
2614 #ifdef CONFIG_NET_DMA
2615 	/*
2616 	 * There may not be any more sk_buffs coming right now, so push
2617 	 * any pending DMA copies to hardware
2618 	 */
2619 	dma_issue_pending_all();
2620 #endif
2621 
2622 	return;
2623 
2624 softnet_break:
2625 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2626 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2627 	goto out;
2628 }
2629 
2630 static gifconf_func_t * gifconf_list [NPROTO];
2631 
2632 /**
2633  *	register_gifconf	-	register a SIOCGIF handler
2634  *	@family: Address family
2635  *	@gifconf: Function handler
2636  *
2637  *	Register protocol dependent address dumping routines. The handler
2638  *	that is passed must not be freed or reused until it has been replaced
2639  *	by another handler.
2640  */
2641 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2642 {
2643 	if (family >= NPROTO)
2644 		return -EINVAL;
2645 	gifconf_list[family] = gifconf;
2646 	return 0;
2647 }
2648 
2649 
2650 /*
2651  *	Map an interface index to its name (SIOCGIFNAME)
2652  */
2653 
2654 /*
2655  *	We need this ioctl for efficient implementation of the
2656  *	if_indextoname() function required by the IPv6 API.  Without
2657  *	it, we would have to search all the interfaces to find a
2658  *	match.  --pb
2659  */
2660 
2661 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2662 {
2663 	struct net_device *dev;
2664 	struct ifreq ifr;
2665 
2666 	/*
2667 	 *	Fetch the caller's info block.
2668 	 */
2669 
2670 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2671 		return -EFAULT;
2672 
2673 	read_lock(&dev_base_lock);
2674 	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2675 	if (!dev) {
2676 		read_unlock(&dev_base_lock);
2677 		return -ENODEV;
2678 	}
2679 
2680 	strcpy(ifr.ifr_name, dev->name);
2681 	read_unlock(&dev_base_lock);
2682 
2683 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2684 		return -EFAULT;
2685 	return 0;
2686 }
2687 
2688 /*
2689  *	Perform a SIOCGIFCONF call. This structure will change
2690  *	size eventually, and there is nothing I can do about it.
2691  *	Thus we will need a 'compatibility mode'.
2692  */
2693 
2694 static int dev_ifconf(struct net *net, char __user *arg)
2695 {
2696 	struct ifconf ifc;
2697 	struct net_device *dev;
2698 	char __user *pos;
2699 	int len;
2700 	int total;
2701 	int i;
2702 
2703 	/*
2704 	 *	Fetch the caller's info block.
2705 	 */
2706 
2707 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2708 		return -EFAULT;
2709 
2710 	pos = ifc.ifc_buf;
2711 	len = ifc.ifc_len;
2712 
2713 	/*
2714 	 *	Loop over the interfaces, and write an info block for each.
2715 	 */
2716 
2717 	total = 0;
2718 	for_each_netdev(net, dev) {
2719 		for (i = 0; i < NPROTO; i++) {
2720 			if (gifconf_list[i]) {
2721 				int done;
2722 				if (!pos)
2723 					done = gifconf_list[i](dev, NULL, 0);
2724 				else
2725 					done = gifconf_list[i](dev, pos + total,
2726 							       len - total);
2727 				if (done < 0)
2728 					return -EFAULT;
2729 				total += done;
2730 			}
2731 		}
2732 	}
2733 
2734 	/*
2735 	 *	All done.  Write the updated control block back to the caller.
2736 	 */
2737 	ifc.ifc_len = total;
2738 
2739 	/*
2740 	 * 	Both BSD and Solaris return 0 here, so we do too.
2741 	 */
2742 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2743 }
2744 
2745 #ifdef CONFIG_PROC_FS
2746 /*
2747  *	This is invoked by the /proc filesystem handler to display a device
2748  *	in detail.
2749  */
2750 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2751 	__acquires(dev_base_lock)
2752 {
2753 	struct net *net = seq_file_net(seq);
2754 	loff_t off;
2755 	struct net_device *dev;
2756 
2757 	read_lock(&dev_base_lock);
2758 	if (!*pos)
2759 		return SEQ_START_TOKEN;
2760 
2761 	off = 1;
2762 	for_each_netdev(net, dev)
2763 		if (off++ == *pos)
2764 			return dev;
2765 
2766 	return NULL;
2767 }
2768 
2769 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2770 {
2771 	struct net *net = seq_file_net(seq);
2772 	++*pos;
2773 	return v == SEQ_START_TOKEN ?
2774 		first_net_device(net) : next_net_device((struct net_device *)v);
2775 }
2776 
2777 void dev_seq_stop(struct seq_file *seq, void *v)
2778 	__releases(dev_base_lock)
2779 {
2780 	read_unlock(&dev_base_lock);
2781 }
2782 
2783 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2784 {
2785 	const struct net_device_stats *stats = dev_get_stats(dev);
2786 
2787 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2788 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2789 		   dev->name, stats->rx_bytes, stats->rx_packets,
2790 		   stats->rx_errors,
2791 		   stats->rx_dropped + stats->rx_missed_errors,
2792 		   stats->rx_fifo_errors,
2793 		   stats->rx_length_errors + stats->rx_over_errors +
2794 		    stats->rx_crc_errors + stats->rx_frame_errors,
2795 		   stats->rx_compressed, stats->multicast,
2796 		   stats->tx_bytes, stats->tx_packets,
2797 		   stats->tx_errors, stats->tx_dropped,
2798 		   stats->tx_fifo_errors, stats->collisions,
2799 		   stats->tx_carrier_errors +
2800 		    stats->tx_aborted_errors +
2801 		    stats->tx_window_errors +
2802 		    stats->tx_heartbeat_errors,
2803 		   stats->tx_compressed);
2804 }
2805 
2806 /*
2807  *	Called from the PROCfs module. This now uses the new arbitrary sized
2808  *	/proc/net interface to create /proc/net/dev
2809  */
2810 static int dev_seq_show(struct seq_file *seq, void *v)
2811 {
2812 	if (v == SEQ_START_TOKEN)
2813 		seq_puts(seq, "Inter-|   Receive                            "
2814 			      "                    |  Transmit\n"
2815 			      " face |bytes    packets errs drop fifo frame "
2816 			      "compressed multicast|bytes    packets errs "
2817 			      "drop fifo colls carrier compressed\n");
2818 	else
2819 		dev_seq_printf_stats(seq, v);
2820 	return 0;
2821 }
2822 
2823 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2824 {
2825 	struct netif_rx_stats *rc = NULL;
2826 
2827 	while (*pos < nr_cpu_ids)
2828 		if (cpu_online(*pos)) {
2829 			rc = &per_cpu(netdev_rx_stat, *pos);
2830 			break;
2831 		} else
2832 			++*pos;
2833 	return rc;
2834 }
2835 
2836 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2837 {
2838 	return softnet_get_online(pos);
2839 }
2840 
2841 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2842 {
2843 	++*pos;
2844 	return softnet_get_online(pos);
2845 }
2846 
2847 static void softnet_seq_stop(struct seq_file *seq, void *v)
2848 {
2849 }
2850 
2851 static int softnet_seq_show(struct seq_file *seq, void *v)
2852 {
2853 	struct netif_rx_stats *s = v;
2854 
2855 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2856 		   s->total, s->dropped, s->time_squeeze, 0,
2857 		   0, 0, 0, 0, /* was fastroute */
2858 		   s->cpu_collision );
2859 	return 0;
2860 }
2861 
2862 static const struct seq_operations dev_seq_ops = {
2863 	.start = dev_seq_start,
2864 	.next  = dev_seq_next,
2865 	.stop  = dev_seq_stop,
2866 	.show  = dev_seq_show,
2867 };
2868 
2869 static int dev_seq_open(struct inode *inode, struct file *file)
2870 {
2871 	return seq_open_net(inode, file, &dev_seq_ops,
2872 			    sizeof(struct seq_net_private));
2873 }
2874 
2875 static const struct file_operations dev_seq_fops = {
2876 	.owner	 = THIS_MODULE,
2877 	.open    = dev_seq_open,
2878 	.read    = seq_read,
2879 	.llseek  = seq_lseek,
2880 	.release = seq_release_net,
2881 };
2882 
2883 static const struct seq_operations softnet_seq_ops = {
2884 	.start = softnet_seq_start,
2885 	.next  = softnet_seq_next,
2886 	.stop  = softnet_seq_stop,
2887 	.show  = softnet_seq_show,
2888 };
2889 
2890 static int softnet_seq_open(struct inode *inode, struct file *file)
2891 {
2892 	return seq_open(file, &softnet_seq_ops);
2893 }
2894 
2895 static const struct file_operations softnet_seq_fops = {
2896 	.owner	 = THIS_MODULE,
2897 	.open    = softnet_seq_open,
2898 	.read    = seq_read,
2899 	.llseek  = seq_lseek,
2900 	.release = seq_release,
2901 };
2902 
2903 static void *ptype_get_idx(loff_t pos)
2904 {
2905 	struct packet_type *pt = NULL;
2906 	loff_t i = 0;
2907 	int t;
2908 
2909 	list_for_each_entry_rcu(pt, &ptype_all, list) {
2910 		if (i == pos)
2911 			return pt;
2912 		++i;
2913 	}
2914 
2915 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2916 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2917 			if (i == pos)
2918 				return pt;
2919 			++i;
2920 		}
2921 	}
2922 	return NULL;
2923 }
2924 
2925 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2926 	__acquires(RCU)
2927 {
2928 	rcu_read_lock();
2929 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2930 }
2931 
2932 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2933 {
2934 	struct packet_type *pt;
2935 	struct list_head *nxt;
2936 	int hash;
2937 
2938 	++*pos;
2939 	if (v == SEQ_START_TOKEN)
2940 		return ptype_get_idx(0);
2941 
2942 	pt = v;
2943 	nxt = pt->list.next;
2944 	if (pt->type == htons(ETH_P_ALL)) {
2945 		if (nxt != &ptype_all)
2946 			goto found;
2947 		hash = 0;
2948 		nxt = ptype_base[0].next;
2949 	} else
2950 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2951 
2952 	while (nxt == &ptype_base[hash]) {
2953 		if (++hash >= PTYPE_HASH_SIZE)
2954 			return NULL;
2955 		nxt = ptype_base[hash].next;
2956 	}
2957 found:
2958 	return list_entry(nxt, struct packet_type, list);
2959 }
2960 
2961 static void ptype_seq_stop(struct seq_file *seq, void *v)
2962 	__releases(RCU)
2963 {
2964 	rcu_read_unlock();
2965 }
2966 
2967 static int ptype_seq_show(struct seq_file *seq, void *v)
2968 {
2969 	struct packet_type *pt = v;
2970 
2971 	if (v == SEQ_START_TOKEN)
2972 		seq_puts(seq, "Type Device      Function\n");
2973 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2974 		if (pt->type == htons(ETH_P_ALL))
2975 			seq_puts(seq, "ALL ");
2976 		else
2977 			seq_printf(seq, "%04x", ntohs(pt->type));
2978 
2979 		seq_printf(seq, " %-8s %pF\n",
2980 			   pt->dev ? pt->dev->name : "", pt->func);
2981 	}
2982 
2983 	return 0;
2984 }
2985 
2986 static const struct seq_operations ptype_seq_ops = {
2987 	.start = ptype_seq_start,
2988 	.next  = ptype_seq_next,
2989 	.stop  = ptype_seq_stop,
2990 	.show  = ptype_seq_show,
2991 };
2992 
2993 static int ptype_seq_open(struct inode *inode, struct file *file)
2994 {
2995 	return seq_open_net(inode, file, &ptype_seq_ops,
2996 			sizeof(struct seq_net_private));
2997 }
2998 
2999 static const struct file_operations ptype_seq_fops = {
3000 	.owner	 = THIS_MODULE,
3001 	.open    = ptype_seq_open,
3002 	.read    = seq_read,
3003 	.llseek  = seq_lseek,
3004 	.release = seq_release_net,
3005 };
3006 
3007 
3008 static int __net_init dev_proc_net_init(struct net *net)
3009 {
3010 	int rc = -ENOMEM;
3011 
3012 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3013 		goto out;
3014 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3015 		goto out_dev;
3016 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3017 		goto out_softnet;
3018 
3019 	if (wext_proc_init(net))
3020 		goto out_ptype;
3021 	rc = 0;
3022 out:
3023 	return rc;
3024 out_ptype:
3025 	proc_net_remove(net, "ptype");
3026 out_softnet:
3027 	proc_net_remove(net, "softnet_stat");
3028 out_dev:
3029 	proc_net_remove(net, "dev");
3030 	goto out;
3031 }
3032 
3033 static void __net_exit dev_proc_net_exit(struct net *net)
3034 {
3035 	wext_proc_exit(net);
3036 
3037 	proc_net_remove(net, "ptype");
3038 	proc_net_remove(net, "softnet_stat");
3039 	proc_net_remove(net, "dev");
3040 }
3041 
3042 static struct pernet_operations __net_initdata dev_proc_ops = {
3043 	.init = dev_proc_net_init,
3044 	.exit = dev_proc_net_exit,
3045 };
3046 
3047 static int __init dev_proc_init(void)
3048 {
3049 	return register_pernet_subsys(&dev_proc_ops);
3050 }
3051 #else
3052 #define dev_proc_init() 0
3053 #endif	/* CONFIG_PROC_FS */
3054 
3055 
3056 /**
3057  *	netdev_set_master	-	set up master/slave pair
3058  *	@slave: slave device
3059  *	@master: new master device
3060  *
3061  *	Changes the master device of the slave. Pass %NULL to break the
3062  *	bonding. The caller must hold the RTNL semaphore. On a failure
3063  *	a negative errno code is returned. On success the reference counts
3064  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3065  *	function returns zero.
3066  */
3067 int netdev_set_master(struct net_device *slave, struct net_device *master)
3068 {
3069 	struct net_device *old = slave->master;
3070 
3071 	ASSERT_RTNL();
3072 
3073 	if (master) {
3074 		if (old)
3075 			return -EBUSY;
3076 		dev_hold(master);
3077 	}
3078 
3079 	slave->master = master;
3080 
3081 	synchronize_net();
3082 
3083 	if (old)
3084 		dev_put(old);
3085 
3086 	if (master)
3087 		slave->flags |= IFF_SLAVE;
3088 	else
3089 		slave->flags &= ~IFF_SLAVE;
3090 
3091 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3092 	return 0;
3093 }
3094 
3095 static void dev_change_rx_flags(struct net_device *dev, int flags)
3096 {
3097 	const struct net_device_ops *ops = dev->netdev_ops;
3098 
3099 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3100 		ops->ndo_change_rx_flags(dev, flags);
3101 }
3102 
3103 static int __dev_set_promiscuity(struct net_device *dev, int inc)
3104 {
3105 	unsigned short old_flags = dev->flags;
3106 	uid_t uid;
3107 	gid_t gid;
3108 
3109 	ASSERT_RTNL();
3110 
3111 	dev->flags |= IFF_PROMISC;
3112 	dev->promiscuity += inc;
3113 	if (dev->promiscuity == 0) {
3114 		/*
3115 		 * Avoid overflow.
3116 		 * If inc causes overflow, untouch promisc and return error.
3117 		 */
3118 		if (inc < 0)
3119 			dev->flags &= ~IFF_PROMISC;
3120 		else {
3121 			dev->promiscuity -= inc;
3122 			printk(KERN_WARNING "%s: promiscuity touches roof, "
3123 				"set promiscuity failed, promiscuity feature "
3124 				"of device might be broken.\n", dev->name);
3125 			return -EOVERFLOW;
3126 		}
3127 	}
3128 	if (dev->flags != old_flags) {
3129 		printk(KERN_INFO "device %s %s promiscuous mode\n",
3130 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3131 							       "left");
3132 		if (audit_enabled) {
3133 			current_uid_gid(&uid, &gid);
3134 			audit_log(current->audit_context, GFP_ATOMIC,
3135 				AUDIT_ANOM_PROMISCUOUS,
3136 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3137 				dev->name, (dev->flags & IFF_PROMISC),
3138 				(old_flags & IFF_PROMISC),
3139 				audit_get_loginuid(current),
3140 				uid, gid,
3141 				audit_get_sessionid(current));
3142 		}
3143 
3144 		dev_change_rx_flags(dev, IFF_PROMISC);
3145 	}
3146 	return 0;
3147 }
3148 
3149 /**
3150  *	dev_set_promiscuity	- update promiscuity count on a device
3151  *	@dev: device
3152  *	@inc: modifier
3153  *
3154  *	Add or remove promiscuity from a device. While the count in the device
3155  *	remains above zero the interface remains promiscuous. Once it hits zero
3156  *	the device reverts back to normal filtering operation. A negative inc
3157  *	value is used to drop promiscuity on the device.
3158  *	Return 0 if successful or a negative errno code on error.
3159  */
3160 int dev_set_promiscuity(struct net_device *dev, int inc)
3161 {
3162 	unsigned short old_flags = dev->flags;
3163 	int err;
3164 
3165 	err = __dev_set_promiscuity(dev, inc);
3166 	if (err < 0)
3167 		return err;
3168 	if (dev->flags != old_flags)
3169 		dev_set_rx_mode(dev);
3170 	return err;
3171 }
3172 
3173 /**
3174  *	dev_set_allmulti	- update allmulti count on a device
3175  *	@dev: device
3176  *	@inc: modifier
3177  *
3178  *	Add or remove reception of all multicast frames to a device. While the
3179  *	count in the device remains above zero the interface remains listening
3180  *	to all interfaces. Once it hits zero the device reverts back to normal
3181  *	filtering operation. A negative @inc value is used to drop the counter
3182  *	when releasing a resource needing all multicasts.
3183  *	Return 0 if successful or a negative errno code on error.
3184  */
3185 
3186 int dev_set_allmulti(struct net_device *dev, int inc)
3187 {
3188 	unsigned short old_flags = dev->flags;
3189 
3190 	ASSERT_RTNL();
3191 
3192 	dev->flags |= IFF_ALLMULTI;
3193 	dev->allmulti += inc;
3194 	if (dev->allmulti == 0) {
3195 		/*
3196 		 * Avoid overflow.
3197 		 * If inc causes overflow, untouch allmulti and return error.
3198 		 */
3199 		if (inc < 0)
3200 			dev->flags &= ~IFF_ALLMULTI;
3201 		else {
3202 			dev->allmulti -= inc;
3203 			printk(KERN_WARNING "%s: allmulti touches roof, "
3204 				"set allmulti failed, allmulti feature of "
3205 				"device might be broken.\n", dev->name);
3206 			return -EOVERFLOW;
3207 		}
3208 	}
3209 	if (dev->flags ^ old_flags) {
3210 		dev_change_rx_flags(dev, IFF_ALLMULTI);
3211 		dev_set_rx_mode(dev);
3212 	}
3213 	return 0;
3214 }
3215 
3216 /*
3217  *	Upload unicast and multicast address lists to device and
3218  *	configure RX filtering. When the device doesn't support unicast
3219  *	filtering it is put in promiscuous mode while unicast addresses
3220  *	are present.
3221  */
3222 void __dev_set_rx_mode(struct net_device *dev)
3223 {
3224 	const struct net_device_ops *ops = dev->netdev_ops;
3225 
3226 	/* dev_open will call this function so the list will stay sane. */
3227 	if (!(dev->flags&IFF_UP))
3228 		return;
3229 
3230 	if (!netif_device_present(dev))
3231 		return;
3232 
3233 	if (ops->ndo_set_rx_mode)
3234 		ops->ndo_set_rx_mode(dev);
3235 	else {
3236 		/* Unicast addresses changes may only happen under the rtnl,
3237 		 * therefore calling __dev_set_promiscuity here is safe.
3238 		 */
3239 		if (dev->uc_count > 0 && !dev->uc_promisc) {
3240 			__dev_set_promiscuity(dev, 1);
3241 			dev->uc_promisc = 1;
3242 		} else if (dev->uc_count == 0 && dev->uc_promisc) {
3243 			__dev_set_promiscuity(dev, -1);
3244 			dev->uc_promisc = 0;
3245 		}
3246 
3247 		if (ops->ndo_set_multicast_list)
3248 			ops->ndo_set_multicast_list(dev);
3249 	}
3250 }
3251 
3252 void dev_set_rx_mode(struct net_device *dev)
3253 {
3254 	netif_addr_lock_bh(dev);
3255 	__dev_set_rx_mode(dev);
3256 	netif_addr_unlock_bh(dev);
3257 }
3258 
3259 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3260 		      void *addr, int alen, int glbl)
3261 {
3262 	struct dev_addr_list *da;
3263 
3264 	for (; (da = *list) != NULL; list = &da->next) {
3265 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3266 		    alen == da->da_addrlen) {
3267 			if (glbl) {
3268 				int old_glbl = da->da_gusers;
3269 				da->da_gusers = 0;
3270 				if (old_glbl == 0)
3271 					break;
3272 			}
3273 			if (--da->da_users)
3274 				return 0;
3275 
3276 			*list = da->next;
3277 			kfree(da);
3278 			(*count)--;
3279 			return 0;
3280 		}
3281 	}
3282 	return -ENOENT;
3283 }
3284 
3285 int __dev_addr_add(struct dev_addr_list **list, int *count,
3286 		   void *addr, int alen, int glbl)
3287 {
3288 	struct dev_addr_list *da;
3289 
3290 	for (da = *list; da != NULL; da = da->next) {
3291 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3292 		    da->da_addrlen == alen) {
3293 			if (glbl) {
3294 				int old_glbl = da->da_gusers;
3295 				da->da_gusers = 1;
3296 				if (old_glbl)
3297 					return 0;
3298 			}
3299 			da->da_users++;
3300 			return 0;
3301 		}
3302 	}
3303 
3304 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
3305 	if (da == NULL)
3306 		return -ENOMEM;
3307 	memcpy(da->da_addr, addr, alen);
3308 	da->da_addrlen = alen;
3309 	da->da_users = 1;
3310 	da->da_gusers = glbl ? 1 : 0;
3311 	da->next = *list;
3312 	*list = da;
3313 	(*count)++;
3314 	return 0;
3315 }
3316 
3317 /**
3318  *	dev_unicast_delete	- Release secondary unicast address.
3319  *	@dev: device
3320  *	@addr: address to delete
3321  *	@alen: length of @addr
3322  *
3323  *	Release reference to a secondary unicast address and remove it
3324  *	from the device if the reference count drops to zero.
3325  *
3326  * 	The caller must hold the rtnl_mutex.
3327  */
3328 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3329 {
3330 	int err;
3331 
3332 	ASSERT_RTNL();
3333 
3334 	netif_addr_lock_bh(dev);
3335 	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3336 	if (!err)
3337 		__dev_set_rx_mode(dev);
3338 	netif_addr_unlock_bh(dev);
3339 	return err;
3340 }
3341 EXPORT_SYMBOL(dev_unicast_delete);
3342 
3343 /**
3344  *	dev_unicast_add		- add a secondary unicast address
3345  *	@dev: device
3346  *	@addr: address to add
3347  *	@alen: length of @addr
3348  *
3349  *	Add a secondary unicast address to the device or increase
3350  *	the reference count if it already exists.
3351  *
3352  *	The caller must hold the rtnl_mutex.
3353  */
3354 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3355 {
3356 	int err;
3357 
3358 	ASSERT_RTNL();
3359 
3360 	netif_addr_lock_bh(dev);
3361 	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3362 	if (!err)
3363 		__dev_set_rx_mode(dev);
3364 	netif_addr_unlock_bh(dev);
3365 	return err;
3366 }
3367 EXPORT_SYMBOL(dev_unicast_add);
3368 
3369 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3370 		    struct dev_addr_list **from, int *from_count)
3371 {
3372 	struct dev_addr_list *da, *next;
3373 	int err = 0;
3374 
3375 	da = *from;
3376 	while (da != NULL) {
3377 		next = da->next;
3378 		if (!da->da_synced) {
3379 			err = __dev_addr_add(to, to_count,
3380 					     da->da_addr, da->da_addrlen, 0);
3381 			if (err < 0)
3382 				break;
3383 			da->da_synced = 1;
3384 			da->da_users++;
3385 		} else if (da->da_users == 1) {
3386 			__dev_addr_delete(to, to_count,
3387 					  da->da_addr, da->da_addrlen, 0);
3388 			__dev_addr_delete(from, from_count,
3389 					  da->da_addr, da->da_addrlen, 0);
3390 		}
3391 		da = next;
3392 	}
3393 	return err;
3394 }
3395 
3396 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3397 		       struct dev_addr_list **from, int *from_count)
3398 {
3399 	struct dev_addr_list *da, *next;
3400 
3401 	da = *from;
3402 	while (da != NULL) {
3403 		next = da->next;
3404 		if (da->da_synced) {
3405 			__dev_addr_delete(to, to_count,
3406 					  da->da_addr, da->da_addrlen, 0);
3407 			da->da_synced = 0;
3408 			__dev_addr_delete(from, from_count,
3409 					  da->da_addr, da->da_addrlen, 0);
3410 		}
3411 		da = next;
3412 	}
3413 }
3414 
3415 /**
3416  *	dev_unicast_sync - Synchronize device's unicast list to another device
3417  *	@to: destination device
3418  *	@from: source device
3419  *
3420  *	Add newly added addresses to the destination device and release
3421  *	addresses that have no users left. The source device must be
3422  *	locked by netif_tx_lock_bh.
3423  *
3424  *	This function is intended to be called from the dev->set_rx_mode
3425  *	function of layered software devices.
3426  */
3427 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3428 {
3429 	int err = 0;
3430 
3431 	netif_addr_lock_bh(to);
3432 	err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3433 			      &from->uc_list, &from->uc_count);
3434 	if (!err)
3435 		__dev_set_rx_mode(to);
3436 	netif_addr_unlock_bh(to);
3437 	return err;
3438 }
3439 EXPORT_SYMBOL(dev_unicast_sync);
3440 
3441 /**
3442  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
3443  *	@to: destination device
3444  *	@from: source device
3445  *
3446  *	Remove all addresses that were added to the destination device by
3447  *	dev_unicast_sync(). This function is intended to be called from the
3448  *	dev->stop function of layered software devices.
3449  */
3450 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3451 {
3452 	netif_addr_lock_bh(from);
3453 	netif_addr_lock(to);
3454 
3455 	__dev_addr_unsync(&to->uc_list, &to->uc_count,
3456 			  &from->uc_list, &from->uc_count);
3457 	__dev_set_rx_mode(to);
3458 
3459 	netif_addr_unlock(to);
3460 	netif_addr_unlock_bh(from);
3461 }
3462 EXPORT_SYMBOL(dev_unicast_unsync);
3463 
3464 static void __dev_addr_discard(struct dev_addr_list **list)
3465 {
3466 	struct dev_addr_list *tmp;
3467 
3468 	while (*list != NULL) {
3469 		tmp = *list;
3470 		*list = tmp->next;
3471 		if (tmp->da_users > tmp->da_gusers)
3472 			printk("__dev_addr_discard: address leakage! "
3473 			       "da_users=%d\n", tmp->da_users);
3474 		kfree(tmp);
3475 	}
3476 }
3477 
3478 static void dev_addr_discard(struct net_device *dev)
3479 {
3480 	netif_addr_lock_bh(dev);
3481 
3482 	__dev_addr_discard(&dev->uc_list);
3483 	dev->uc_count = 0;
3484 
3485 	__dev_addr_discard(&dev->mc_list);
3486 	dev->mc_count = 0;
3487 
3488 	netif_addr_unlock_bh(dev);
3489 }
3490 
3491 /**
3492  *	dev_get_flags - get flags reported to userspace
3493  *	@dev: device
3494  *
3495  *	Get the combination of flag bits exported through APIs to userspace.
3496  */
3497 unsigned dev_get_flags(const struct net_device *dev)
3498 {
3499 	unsigned flags;
3500 
3501 	flags = (dev->flags & ~(IFF_PROMISC |
3502 				IFF_ALLMULTI |
3503 				IFF_RUNNING |
3504 				IFF_LOWER_UP |
3505 				IFF_DORMANT)) |
3506 		(dev->gflags & (IFF_PROMISC |
3507 				IFF_ALLMULTI));
3508 
3509 	if (netif_running(dev)) {
3510 		if (netif_oper_up(dev))
3511 			flags |= IFF_RUNNING;
3512 		if (netif_carrier_ok(dev))
3513 			flags |= IFF_LOWER_UP;
3514 		if (netif_dormant(dev))
3515 			flags |= IFF_DORMANT;
3516 	}
3517 
3518 	return flags;
3519 }
3520 
3521 /**
3522  *	dev_change_flags - change device settings
3523  *	@dev: device
3524  *	@flags: device state flags
3525  *
3526  *	Change settings on device based state flags. The flags are
3527  *	in the userspace exported format.
3528  */
3529 int dev_change_flags(struct net_device *dev, unsigned flags)
3530 {
3531 	int ret, changes;
3532 	int old_flags = dev->flags;
3533 
3534 	ASSERT_RTNL();
3535 
3536 	/*
3537 	 *	Set the flags on our device.
3538 	 */
3539 
3540 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3541 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3542 			       IFF_AUTOMEDIA)) |
3543 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3544 				    IFF_ALLMULTI));
3545 
3546 	/*
3547 	 *	Load in the correct multicast list now the flags have changed.
3548 	 */
3549 
3550 	if ((old_flags ^ flags) & IFF_MULTICAST)
3551 		dev_change_rx_flags(dev, IFF_MULTICAST);
3552 
3553 	dev_set_rx_mode(dev);
3554 
3555 	/*
3556 	 *	Have we downed the interface. We handle IFF_UP ourselves
3557 	 *	according to user attempts to set it, rather than blindly
3558 	 *	setting it.
3559 	 */
3560 
3561 	ret = 0;
3562 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
3563 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3564 
3565 		if (!ret)
3566 			dev_set_rx_mode(dev);
3567 	}
3568 
3569 	if (dev->flags & IFF_UP &&
3570 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3571 					  IFF_VOLATILE)))
3572 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
3573 
3574 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
3575 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
3576 		dev->gflags ^= IFF_PROMISC;
3577 		dev_set_promiscuity(dev, inc);
3578 	}
3579 
3580 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3581 	   is important. Some (broken) drivers set IFF_PROMISC, when
3582 	   IFF_ALLMULTI is requested not asking us and not reporting.
3583 	 */
3584 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3585 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3586 		dev->gflags ^= IFF_ALLMULTI;
3587 		dev_set_allmulti(dev, inc);
3588 	}
3589 
3590 	/* Exclude state transition flags, already notified */
3591 	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3592 	if (changes)
3593 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3594 
3595 	return ret;
3596 }
3597 
3598 /**
3599  *	dev_set_mtu - Change maximum transfer unit
3600  *	@dev: device
3601  *	@new_mtu: new transfer unit
3602  *
3603  *	Change the maximum transfer size of the network device.
3604  */
3605 int dev_set_mtu(struct net_device *dev, int new_mtu)
3606 {
3607 	const struct net_device_ops *ops = dev->netdev_ops;
3608 	int err;
3609 
3610 	if (new_mtu == dev->mtu)
3611 		return 0;
3612 
3613 	/*	MTU must be positive.	 */
3614 	if (new_mtu < 0)
3615 		return -EINVAL;
3616 
3617 	if (!netif_device_present(dev))
3618 		return -ENODEV;
3619 
3620 	err = 0;
3621 	if (ops->ndo_change_mtu)
3622 		err = ops->ndo_change_mtu(dev, new_mtu);
3623 	else
3624 		dev->mtu = new_mtu;
3625 
3626 	if (!err && dev->flags & IFF_UP)
3627 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3628 	return err;
3629 }
3630 
3631 /**
3632  *	dev_set_mac_address - Change Media Access Control Address
3633  *	@dev: device
3634  *	@sa: new address
3635  *
3636  *	Change the hardware (MAC) address of the device
3637  */
3638 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3639 {
3640 	const struct net_device_ops *ops = dev->netdev_ops;
3641 	int err;
3642 
3643 	if (!ops->ndo_set_mac_address)
3644 		return -EOPNOTSUPP;
3645 	if (sa->sa_family != dev->type)
3646 		return -EINVAL;
3647 	if (!netif_device_present(dev))
3648 		return -ENODEV;
3649 	err = ops->ndo_set_mac_address(dev, sa);
3650 	if (!err)
3651 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3652 	return err;
3653 }
3654 
3655 /*
3656  *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3657  */
3658 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3659 {
3660 	int err;
3661 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3662 
3663 	if (!dev)
3664 		return -ENODEV;
3665 
3666 	switch (cmd) {
3667 		case SIOCGIFFLAGS:	/* Get interface flags */
3668 			ifr->ifr_flags = dev_get_flags(dev);
3669 			return 0;
3670 
3671 		case SIOCGIFMETRIC:	/* Get the metric on the interface
3672 					   (currently unused) */
3673 			ifr->ifr_metric = 0;
3674 			return 0;
3675 
3676 		case SIOCGIFMTU:	/* Get the MTU of a device */
3677 			ifr->ifr_mtu = dev->mtu;
3678 			return 0;
3679 
3680 		case SIOCGIFHWADDR:
3681 			if (!dev->addr_len)
3682 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3683 			else
3684 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3685 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3686 			ifr->ifr_hwaddr.sa_family = dev->type;
3687 			return 0;
3688 
3689 		case SIOCGIFSLAVE:
3690 			err = -EINVAL;
3691 			break;
3692 
3693 		case SIOCGIFMAP:
3694 			ifr->ifr_map.mem_start = dev->mem_start;
3695 			ifr->ifr_map.mem_end   = dev->mem_end;
3696 			ifr->ifr_map.base_addr = dev->base_addr;
3697 			ifr->ifr_map.irq       = dev->irq;
3698 			ifr->ifr_map.dma       = dev->dma;
3699 			ifr->ifr_map.port      = dev->if_port;
3700 			return 0;
3701 
3702 		case SIOCGIFINDEX:
3703 			ifr->ifr_ifindex = dev->ifindex;
3704 			return 0;
3705 
3706 		case SIOCGIFTXQLEN:
3707 			ifr->ifr_qlen = dev->tx_queue_len;
3708 			return 0;
3709 
3710 		default:
3711 			/* dev_ioctl() should ensure this case
3712 			 * is never reached
3713 			 */
3714 			WARN_ON(1);
3715 			err = -EINVAL;
3716 			break;
3717 
3718 	}
3719 	return err;
3720 }
3721 
3722 /*
3723  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
3724  */
3725 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3726 {
3727 	int err;
3728 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3729 	const struct net_device_ops *ops;
3730 
3731 	if (!dev)
3732 		return -ENODEV;
3733 
3734 	ops = dev->netdev_ops;
3735 
3736 	switch (cmd) {
3737 		case SIOCSIFFLAGS:	/* Set interface flags */
3738 			return dev_change_flags(dev, ifr->ifr_flags);
3739 
3740 		case SIOCSIFMETRIC:	/* Set the metric on the interface
3741 					   (currently unused) */
3742 			return -EOPNOTSUPP;
3743 
3744 		case SIOCSIFMTU:	/* Set the MTU of a device */
3745 			return dev_set_mtu(dev, ifr->ifr_mtu);
3746 
3747 		case SIOCSIFHWADDR:
3748 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3749 
3750 		case SIOCSIFHWBROADCAST:
3751 			if (ifr->ifr_hwaddr.sa_family != dev->type)
3752 				return -EINVAL;
3753 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3754 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3755 			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3756 			return 0;
3757 
3758 		case SIOCSIFMAP:
3759 			if (ops->ndo_set_config) {
3760 				if (!netif_device_present(dev))
3761 					return -ENODEV;
3762 				return ops->ndo_set_config(dev, &ifr->ifr_map);
3763 			}
3764 			return -EOPNOTSUPP;
3765 
3766 		case SIOCADDMULTI:
3767 			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3768 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3769 				return -EINVAL;
3770 			if (!netif_device_present(dev))
3771 				return -ENODEV;
3772 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3773 					  dev->addr_len, 1);
3774 
3775 		case SIOCDELMULTI:
3776 			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3777 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3778 				return -EINVAL;
3779 			if (!netif_device_present(dev))
3780 				return -ENODEV;
3781 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3782 					     dev->addr_len, 1);
3783 
3784 		case SIOCSIFTXQLEN:
3785 			if (ifr->ifr_qlen < 0)
3786 				return -EINVAL;
3787 			dev->tx_queue_len = ifr->ifr_qlen;
3788 			return 0;
3789 
3790 		case SIOCSIFNAME:
3791 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3792 			return dev_change_name(dev, ifr->ifr_newname);
3793 
3794 		/*
3795 		 *	Unknown or private ioctl
3796 		 */
3797 
3798 		default:
3799 			if ((cmd >= SIOCDEVPRIVATE &&
3800 			    cmd <= SIOCDEVPRIVATE + 15) ||
3801 			    cmd == SIOCBONDENSLAVE ||
3802 			    cmd == SIOCBONDRELEASE ||
3803 			    cmd == SIOCBONDSETHWADDR ||
3804 			    cmd == SIOCBONDSLAVEINFOQUERY ||
3805 			    cmd == SIOCBONDINFOQUERY ||
3806 			    cmd == SIOCBONDCHANGEACTIVE ||
3807 			    cmd == SIOCGMIIPHY ||
3808 			    cmd == SIOCGMIIREG ||
3809 			    cmd == SIOCSMIIREG ||
3810 			    cmd == SIOCBRADDIF ||
3811 			    cmd == SIOCBRDELIF ||
3812 			    cmd == SIOCWANDEV) {
3813 				err = -EOPNOTSUPP;
3814 				if (ops->ndo_do_ioctl) {
3815 					if (netif_device_present(dev))
3816 						err = ops->ndo_do_ioctl(dev, ifr, cmd);
3817 					else
3818 						err = -ENODEV;
3819 				}
3820 			} else
3821 				err = -EINVAL;
3822 
3823 	}
3824 	return err;
3825 }
3826 
3827 /*
3828  *	This function handles all "interface"-type I/O control requests. The actual
3829  *	'doing' part of this is dev_ifsioc above.
3830  */
3831 
3832 /**
3833  *	dev_ioctl	-	network device ioctl
3834  *	@net: the applicable net namespace
3835  *	@cmd: command to issue
3836  *	@arg: pointer to a struct ifreq in user space
3837  *
3838  *	Issue ioctl functions to devices. This is normally called by the
3839  *	user space syscall interfaces but can sometimes be useful for
3840  *	other purposes. The return value is the return from the syscall if
3841  *	positive or a negative errno code on error.
3842  */
3843 
3844 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3845 {
3846 	struct ifreq ifr;
3847 	int ret;
3848 	char *colon;
3849 
3850 	/* One special case: SIOCGIFCONF takes ifconf argument
3851 	   and requires shared lock, because it sleeps writing
3852 	   to user space.
3853 	 */
3854 
3855 	if (cmd == SIOCGIFCONF) {
3856 		rtnl_lock();
3857 		ret = dev_ifconf(net, (char __user *) arg);
3858 		rtnl_unlock();
3859 		return ret;
3860 	}
3861 	if (cmd == SIOCGIFNAME)
3862 		return dev_ifname(net, (struct ifreq __user *)arg);
3863 
3864 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3865 		return -EFAULT;
3866 
3867 	ifr.ifr_name[IFNAMSIZ-1] = 0;
3868 
3869 	colon = strchr(ifr.ifr_name, ':');
3870 	if (colon)
3871 		*colon = 0;
3872 
3873 	/*
3874 	 *	See which interface the caller is talking about.
3875 	 */
3876 
3877 	switch (cmd) {
3878 		/*
3879 		 *	These ioctl calls:
3880 		 *	- can be done by all.
3881 		 *	- atomic and do not require locking.
3882 		 *	- return a value
3883 		 */
3884 		case SIOCGIFFLAGS:
3885 		case SIOCGIFMETRIC:
3886 		case SIOCGIFMTU:
3887 		case SIOCGIFHWADDR:
3888 		case SIOCGIFSLAVE:
3889 		case SIOCGIFMAP:
3890 		case SIOCGIFINDEX:
3891 		case SIOCGIFTXQLEN:
3892 			dev_load(net, ifr.ifr_name);
3893 			read_lock(&dev_base_lock);
3894 			ret = dev_ifsioc_locked(net, &ifr, cmd);
3895 			read_unlock(&dev_base_lock);
3896 			if (!ret) {
3897 				if (colon)
3898 					*colon = ':';
3899 				if (copy_to_user(arg, &ifr,
3900 						 sizeof(struct ifreq)))
3901 					ret = -EFAULT;
3902 			}
3903 			return ret;
3904 
3905 		case SIOCETHTOOL:
3906 			dev_load(net, ifr.ifr_name);
3907 			rtnl_lock();
3908 			ret = dev_ethtool(net, &ifr);
3909 			rtnl_unlock();
3910 			if (!ret) {
3911 				if (colon)
3912 					*colon = ':';
3913 				if (copy_to_user(arg, &ifr,
3914 						 sizeof(struct ifreq)))
3915 					ret = -EFAULT;
3916 			}
3917 			return ret;
3918 
3919 		/*
3920 		 *	These ioctl calls:
3921 		 *	- require superuser power.
3922 		 *	- require strict serialization.
3923 		 *	- return a value
3924 		 */
3925 		case SIOCGMIIPHY:
3926 		case SIOCGMIIREG:
3927 		case SIOCSIFNAME:
3928 			if (!capable(CAP_NET_ADMIN))
3929 				return -EPERM;
3930 			dev_load(net, ifr.ifr_name);
3931 			rtnl_lock();
3932 			ret = dev_ifsioc(net, &ifr, cmd);
3933 			rtnl_unlock();
3934 			if (!ret) {
3935 				if (colon)
3936 					*colon = ':';
3937 				if (copy_to_user(arg, &ifr,
3938 						 sizeof(struct ifreq)))
3939 					ret = -EFAULT;
3940 			}
3941 			return ret;
3942 
3943 		/*
3944 		 *	These ioctl calls:
3945 		 *	- require superuser power.
3946 		 *	- require strict serialization.
3947 		 *	- do not return a value
3948 		 */
3949 		case SIOCSIFFLAGS:
3950 		case SIOCSIFMETRIC:
3951 		case SIOCSIFMTU:
3952 		case SIOCSIFMAP:
3953 		case SIOCSIFHWADDR:
3954 		case SIOCSIFSLAVE:
3955 		case SIOCADDMULTI:
3956 		case SIOCDELMULTI:
3957 		case SIOCSIFHWBROADCAST:
3958 		case SIOCSIFTXQLEN:
3959 		case SIOCSMIIREG:
3960 		case SIOCBONDENSLAVE:
3961 		case SIOCBONDRELEASE:
3962 		case SIOCBONDSETHWADDR:
3963 		case SIOCBONDCHANGEACTIVE:
3964 		case SIOCBRADDIF:
3965 		case SIOCBRDELIF:
3966 			if (!capable(CAP_NET_ADMIN))
3967 				return -EPERM;
3968 			/* fall through */
3969 		case SIOCBONDSLAVEINFOQUERY:
3970 		case SIOCBONDINFOQUERY:
3971 			dev_load(net, ifr.ifr_name);
3972 			rtnl_lock();
3973 			ret = dev_ifsioc(net, &ifr, cmd);
3974 			rtnl_unlock();
3975 			return ret;
3976 
3977 		case SIOCGIFMEM:
3978 			/* Get the per device memory space. We can add this but
3979 			 * currently do not support it */
3980 		case SIOCSIFMEM:
3981 			/* Set the per device memory buffer space.
3982 			 * Not applicable in our case */
3983 		case SIOCSIFLINK:
3984 			return -EINVAL;
3985 
3986 		/*
3987 		 *	Unknown or private ioctl.
3988 		 */
3989 		default:
3990 			if (cmd == SIOCWANDEV ||
3991 			    (cmd >= SIOCDEVPRIVATE &&
3992 			     cmd <= SIOCDEVPRIVATE + 15)) {
3993 				dev_load(net, ifr.ifr_name);
3994 				rtnl_lock();
3995 				ret = dev_ifsioc(net, &ifr, cmd);
3996 				rtnl_unlock();
3997 				if (!ret && copy_to_user(arg, &ifr,
3998 							 sizeof(struct ifreq)))
3999 					ret = -EFAULT;
4000 				return ret;
4001 			}
4002 			/* Take care of Wireless Extensions */
4003 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4004 				return wext_handle_ioctl(net, &ifr, cmd, arg);
4005 			return -EINVAL;
4006 	}
4007 }
4008 
4009 
4010 /**
4011  *	dev_new_index	-	allocate an ifindex
4012  *	@net: the applicable net namespace
4013  *
4014  *	Returns a suitable unique value for a new device interface
4015  *	number.  The caller must hold the rtnl semaphore or the
4016  *	dev_base_lock to be sure it remains unique.
4017  */
4018 static int dev_new_index(struct net *net)
4019 {
4020 	static int ifindex;
4021 	for (;;) {
4022 		if (++ifindex <= 0)
4023 			ifindex = 1;
4024 		if (!__dev_get_by_index(net, ifindex))
4025 			return ifindex;
4026 	}
4027 }
4028 
4029 /* Delayed registration/unregisteration */
4030 static LIST_HEAD(net_todo_list);
4031 
4032 static void net_set_todo(struct net_device *dev)
4033 {
4034 	list_add_tail(&dev->todo_list, &net_todo_list);
4035 }
4036 
4037 static void rollback_registered(struct net_device *dev)
4038 {
4039 	BUG_ON(dev_boot_phase);
4040 	ASSERT_RTNL();
4041 
4042 	/* Some devices call without registering for initialization unwind. */
4043 	if (dev->reg_state == NETREG_UNINITIALIZED) {
4044 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4045 				  "was registered\n", dev->name, dev);
4046 
4047 		WARN_ON(1);
4048 		return;
4049 	}
4050 
4051 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
4052 
4053 	/* If device is running, close it first. */
4054 	dev_close(dev);
4055 
4056 	/* And unlink it from device chain. */
4057 	unlist_netdevice(dev);
4058 
4059 	dev->reg_state = NETREG_UNREGISTERING;
4060 
4061 	synchronize_net();
4062 
4063 	/* Shutdown queueing discipline. */
4064 	dev_shutdown(dev);
4065 
4066 
4067 	/* Notify protocols, that we are about to destroy
4068 	   this device. They should clean all the things.
4069 	*/
4070 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4071 
4072 	/*
4073 	 *	Flush the unicast and multicast chains
4074 	 */
4075 	dev_addr_discard(dev);
4076 
4077 	if (dev->netdev_ops->ndo_uninit)
4078 		dev->netdev_ops->ndo_uninit(dev);
4079 
4080 	/* Notifier chain MUST detach us from master device. */
4081 	WARN_ON(dev->master);
4082 
4083 	/* Remove entries from kobject tree */
4084 	netdev_unregister_kobject(dev);
4085 
4086 	synchronize_net();
4087 
4088 	dev_put(dev);
4089 }
4090 
4091 static void __netdev_init_queue_locks_one(struct net_device *dev,
4092 					  struct netdev_queue *dev_queue,
4093 					  void *_unused)
4094 {
4095 	spin_lock_init(&dev_queue->_xmit_lock);
4096 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4097 	dev_queue->xmit_lock_owner = -1;
4098 }
4099 
4100 static void netdev_init_queue_locks(struct net_device *dev)
4101 {
4102 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4103 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4104 }
4105 
4106 unsigned long netdev_fix_features(unsigned long features, const char *name)
4107 {
4108 	/* Fix illegal SG+CSUM combinations. */
4109 	if ((features & NETIF_F_SG) &&
4110 	    !(features & NETIF_F_ALL_CSUM)) {
4111 		if (name)
4112 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4113 			       "checksum feature.\n", name);
4114 		features &= ~NETIF_F_SG;
4115 	}
4116 
4117 	/* TSO requires that SG is present as well. */
4118 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4119 		if (name)
4120 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4121 			       "SG feature.\n", name);
4122 		features &= ~NETIF_F_TSO;
4123 	}
4124 
4125 	if (features & NETIF_F_UFO) {
4126 		if (!(features & NETIF_F_GEN_CSUM)) {
4127 			if (name)
4128 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4129 				       "since no NETIF_F_HW_CSUM feature.\n",
4130 				       name);
4131 			features &= ~NETIF_F_UFO;
4132 		}
4133 
4134 		if (!(features & NETIF_F_SG)) {
4135 			if (name)
4136 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4137 				       "since no NETIF_F_SG feature.\n", name);
4138 			features &= ~NETIF_F_UFO;
4139 		}
4140 	}
4141 
4142 	return features;
4143 }
4144 EXPORT_SYMBOL(netdev_fix_features);
4145 
4146 /**
4147  *	register_netdevice	- register a network device
4148  *	@dev: device to register
4149  *
4150  *	Take a completed network device structure and add it to the kernel
4151  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4152  *	chain. 0 is returned on success. A negative errno code is returned
4153  *	on a failure to set up the device, or if the name is a duplicate.
4154  *
4155  *	Callers must hold the rtnl semaphore. You may want
4156  *	register_netdev() instead of this.
4157  *
4158  *	BUGS:
4159  *	The locking appears insufficient to guarantee two parallel registers
4160  *	will not get the same name.
4161  */
4162 
4163 int register_netdevice(struct net_device *dev)
4164 {
4165 	struct hlist_head *head;
4166 	struct hlist_node *p;
4167 	int ret;
4168 	struct net *net = dev_net(dev);
4169 
4170 	BUG_ON(dev_boot_phase);
4171 	ASSERT_RTNL();
4172 
4173 	might_sleep();
4174 
4175 	/* When net_device's are persistent, this will be fatal. */
4176 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4177 	BUG_ON(!net);
4178 
4179 	spin_lock_init(&dev->addr_list_lock);
4180 	netdev_set_addr_lockdep_class(dev);
4181 	netdev_init_queue_locks(dev);
4182 
4183 	dev->iflink = -1;
4184 
4185 #ifdef CONFIG_COMPAT_NET_DEV_OPS
4186 	/* Netdevice_ops API compatiability support.
4187 	 * This is temporary until all network devices are converted.
4188 	 */
4189 	if (dev->netdev_ops) {
4190 		const struct net_device_ops *ops = dev->netdev_ops;
4191 
4192 		dev->init = ops->ndo_init;
4193 		dev->uninit = ops->ndo_uninit;
4194 		dev->open = ops->ndo_open;
4195 		dev->change_rx_flags = ops->ndo_change_rx_flags;
4196 		dev->set_rx_mode = ops->ndo_set_rx_mode;
4197 		dev->set_multicast_list = ops->ndo_set_multicast_list;
4198 		dev->set_mac_address = ops->ndo_set_mac_address;
4199 		dev->validate_addr = ops->ndo_validate_addr;
4200 		dev->do_ioctl = ops->ndo_do_ioctl;
4201 		dev->set_config = ops->ndo_set_config;
4202 		dev->change_mtu = ops->ndo_change_mtu;
4203 		dev->tx_timeout = ops->ndo_tx_timeout;
4204 		dev->get_stats = ops->ndo_get_stats;
4205 		dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4206 		dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4207 		dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4208 #ifdef CONFIG_NET_POLL_CONTROLLER
4209 		dev->poll_controller = ops->ndo_poll_controller;
4210 #endif
4211 	} else {
4212 		char drivername[64];
4213 		pr_info("%s (%s): not using net_device_ops yet\n",
4214 			dev->name, netdev_drivername(dev, drivername, 64));
4215 
4216 		/* This works only because net_device_ops and the
4217 		   compatiablity structure are the same. */
4218 		dev->netdev_ops = (void *) &(dev->init);
4219 	}
4220 #endif
4221 
4222 	/* Init, if this function is available */
4223 	if (dev->netdev_ops->ndo_init) {
4224 		ret = dev->netdev_ops->ndo_init(dev);
4225 		if (ret) {
4226 			if (ret > 0)
4227 				ret = -EIO;
4228 			goto out;
4229 		}
4230 	}
4231 
4232 	if (!dev_valid_name(dev->name)) {
4233 		ret = -EINVAL;
4234 		goto err_uninit;
4235 	}
4236 
4237 	dev->ifindex = dev_new_index(net);
4238 	if (dev->iflink == -1)
4239 		dev->iflink = dev->ifindex;
4240 
4241 	/* Check for existence of name */
4242 	head = dev_name_hash(net, dev->name);
4243 	hlist_for_each(p, head) {
4244 		struct net_device *d
4245 			= hlist_entry(p, struct net_device, name_hlist);
4246 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4247 			ret = -EEXIST;
4248 			goto err_uninit;
4249 		}
4250 	}
4251 
4252 	/* Fix illegal checksum combinations */
4253 	if ((dev->features & NETIF_F_HW_CSUM) &&
4254 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4255 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4256 		       dev->name);
4257 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4258 	}
4259 
4260 	if ((dev->features & NETIF_F_NO_CSUM) &&
4261 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4262 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4263 		       dev->name);
4264 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4265 	}
4266 
4267 	dev->features = netdev_fix_features(dev->features, dev->name);
4268 
4269 	/* Enable software GSO if SG is supported. */
4270 	if (dev->features & NETIF_F_SG)
4271 		dev->features |= NETIF_F_GSO;
4272 
4273 	netdev_initialize_kobject(dev);
4274 	ret = netdev_register_kobject(dev);
4275 	if (ret)
4276 		goto err_uninit;
4277 	dev->reg_state = NETREG_REGISTERED;
4278 
4279 	/*
4280 	 *	Default initial state at registry is that the
4281 	 *	device is present.
4282 	 */
4283 
4284 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4285 
4286 	dev_init_scheduler(dev);
4287 	dev_hold(dev);
4288 	list_netdevice(dev);
4289 
4290 	/* Notify protocols, that a new device appeared. */
4291 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4292 	ret = notifier_to_errno(ret);
4293 	if (ret) {
4294 		rollback_registered(dev);
4295 		dev->reg_state = NETREG_UNREGISTERED;
4296 	}
4297 
4298 out:
4299 	return ret;
4300 
4301 err_uninit:
4302 	if (dev->netdev_ops->ndo_uninit)
4303 		dev->netdev_ops->ndo_uninit(dev);
4304 	goto out;
4305 }
4306 
4307 /**
4308  *	register_netdev	- register a network device
4309  *	@dev: device to register
4310  *
4311  *	Take a completed network device structure and add it to the kernel
4312  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4313  *	chain. 0 is returned on success. A negative errno code is returned
4314  *	on a failure to set up the device, or if the name is a duplicate.
4315  *
4316  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
4317  *	and expands the device name if you passed a format string to
4318  *	alloc_netdev.
4319  */
4320 int register_netdev(struct net_device *dev)
4321 {
4322 	int err;
4323 
4324 	rtnl_lock();
4325 
4326 	/*
4327 	 * If the name is a format string the caller wants us to do a
4328 	 * name allocation.
4329 	 */
4330 	if (strchr(dev->name, '%')) {
4331 		err = dev_alloc_name(dev, dev->name);
4332 		if (err < 0)
4333 			goto out;
4334 	}
4335 
4336 	err = register_netdevice(dev);
4337 out:
4338 	rtnl_unlock();
4339 	return err;
4340 }
4341 EXPORT_SYMBOL(register_netdev);
4342 
4343 /*
4344  * netdev_wait_allrefs - wait until all references are gone.
4345  *
4346  * This is called when unregistering network devices.
4347  *
4348  * Any protocol or device that holds a reference should register
4349  * for netdevice notification, and cleanup and put back the
4350  * reference if they receive an UNREGISTER event.
4351  * We can get stuck here if buggy protocols don't correctly
4352  * call dev_put.
4353  */
4354 static void netdev_wait_allrefs(struct net_device *dev)
4355 {
4356 	unsigned long rebroadcast_time, warning_time;
4357 
4358 	rebroadcast_time = warning_time = jiffies;
4359 	while (atomic_read(&dev->refcnt) != 0) {
4360 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4361 			rtnl_lock();
4362 
4363 			/* Rebroadcast unregister notification */
4364 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4365 
4366 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4367 				     &dev->state)) {
4368 				/* We must not have linkwatch events
4369 				 * pending on unregister. If this
4370 				 * happens, we simply run the queue
4371 				 * unscheduled, resulting in a noop
4372 				 * for this device.
4373 				 */
4374 				linkwatch_run_queue();
4375 			}
4376 
4377 			__rtnl_unlock();
4378 
4379 			rebroadcast_time = jiffies;
4380 		}
4381 
4382 		msleep(250);
4383 
4384 		if (time_after(jiffies, warning_time + 10 * HZ)) {
4385 			printk(KERN_EMERG "unregister_netdevice: "
4386 			       "waiting for %s to become free. Usage "
4387 			       "count = %d\n",
4388 			       dev->name, atomic_read(&dev->refcnt));
4389 			warning_time = jiffies;
4390 		}
4391 	}
4392 }
4393 
4394 /* The sequence is:
4395  *
4396  *	rtnl_lock();
4397  *	...
4398  *	register_netdevice(x1);
4399  *	register_netdevice(x2);
4400  *	...
4401  *	unregister_netdevice(y1);
4402  *	unregister_netdevice(y2);
4403  *      ...
4404  *	rtnl_unlock();
4405  *	free_netdev(y1);
4406  *	free_netdev(y2);
4407  *
4408  * We are invoked by rtnl_unlock().
4409  * This allows us to deal with problems:
4410  * 1) We can delete sysfs objects which invoke hotplug
4411  *    without deadlocking with linkwatch via keventd.
4412  * 2) Since we run with the RTNL semaphore not held, we can sleep
4413  *    safely in order to wait for the netdev refcnt to drop to zero.
4414  *
4415  * We must not return until all unregister events added during
4416  * the interval the lock was held have been completed.
4417  */
4418 void netdev_run_todo(void)
4419 {
4420 	struct list_head list;
4421 
4422 	/* Snapshot list, allow later requests */
4423 	list_replace_init(&net_todo_list, &list);
4424 
4425 	__rtnl_unlock();
4426 
4427 	while (!list_empty(&list)) {
4428 		struct net_device *dev
4429 			= list_entry(list.next, struct net_device, todo_list);
4430 		list_del(&dev->todo_list);
4431 
4432 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4433 			printk(KERN_ERR "network todo '%s' but state %d\n",
4434 			       dev->name, dev->reg_state);
4435 			dump_stack();
4436 			continue;
4437 		}
4438 
4439 		dev->reg_state = NETREG_UNREGISTERED;
4440 
4441 		on_each_cpu(flush_backlog, dev, 1);
4442 
4443 		netdev_wait_allrefs(dev);
4444 
4445 		/* paranoia */
4446 		BUG_ON(atomic_read(&dev->refcnt));
4447 		WARN_ON(dev->ip_ptr);
4448 		WARN_ON(dev->ip6_ptr);
4449 		WARN_ON(dev->dn_ptr);
4450 
4451 		if (dev->destructor)
4452 			dev->destructor(dev);
4453 
4454 		/* Free network device */
4455 		kobject_put(&dev->dev.kobj);
4456 	}
4457 }
4458 
4459 /**
4460  *	dev_get_stats	- get network device statistics
4461  *	@dev: device to get statistics from
4462  *
4463  *	Get network statistics from device. The device driver may provide
4464  *	its own method by setting dev->netdev_ops->get_stats; otherwise
4465  *	the internal statistics structure is used.
4466  */
4467 const struct net_device_stats *dev_get_stats(struct net_device *dev)
4468  {
4469 	const struct net_device_ops *ops = dev->netdev_ops;
4470 
4471 	if (ops->ndo_get_stats)
4472 		return ops->ndo_get_stats(dev);
4473 	else
4474 		return &dev->stats;
4475 }
4476 EXPORT_SYMBOL(dev_get_stats);
4477 
4478 static void netdev_init_one_queue(struct net_device *dev,
4479 				  struct netdev_queue *queue,
4480 				  void *_unused)
4481 {
4482 	queue->dev = dev;
4483 }
4484 
4485 static void netdev_init_queues(struct net_device *dev)
4486 {
4487 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4488 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4489 	spin_lock_init(&dev->tx_global_lock);
4490 }
4491 
4492 /**
4493  *	alloc_netdev_mq - allocate network device
4494  *	@sizeof_priv:	size of private data to allocate space for
4495  *	@name:		device name format string
4496  *	@setup:		callback to initialize device
4497  *	@queue_count:	the number of subqueues to allocate
4498  *
4499  *	Allocates a struct net_device with private data area for driver use
4500  *	and performs basic initialization.  Also allocates subquue structs
4501  *	for each queue on the device at the end of the netdevice.
4502  */
4503 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4504 		void (*setup)(struct net_device *), unsigned int queue_count)
4505 {
4506 	struct netdev_queue *tx;
4507 	struct net_device *dev;
4508 	size_t alloc_size;
4509 	void *p;
4510 
4511 	BUG_ON(strlen(name) >= sizeof(dev->name));
4512 
4513 	alloc_size = sizeof(struct net_device);
4514 	if (sizeof_priv) {
4515 		/* ensure 32-byte alignment of private area */
4516 		alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4517 		alloc_size += sizeof_priv;
4518 	}
4519 	/* ensure 32-byte alignment of whole construct */
4520 	alloc_size += NETDEV_ALIGN_CONST;
4521 
4522 	p = kzalloc(alloc_size, GFP_KERNEL);
4523 	if (!p) {
4524 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4525 		return NULL;
4526 	}
4527 
4528 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4529 	if (!tx) {
4530 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
4531 		       "tx qdiscs.\n");
4532 		kfree(p);
4533 		return NULL;
4534 	}
4535 
4536 	dev = (struct net_device *)
4537 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4538 	dev->padded = (char *)dev - (char *)p;
4539 	dev_net_set(dev, &init_net);
4540 
4541 	dev->_tx = tx;
4542 	dev->num_tx_queues = queue_count;
4543 	dev->real_num_tx_queues = queue_count;
4544 
4545 	dev->gso_max_size = GSO_MAX_SIZE;
4546 
4547 	netdev_init_queues(dev);
4548 
4549 	INIT_LIST_HEAD(&dev->napi_list);
4550 	setup(dev);
4551 	strcpy(dev->name, name);
4552 	return dev;
4553 }
4554 EXPORT_SYMBOL(alloc_netdev_mq);
4555 
4556 /**
4557  *	free_netdev - free network device
4558  *	@dev: device
4559  *
4560  *	This function does the last stage of destroying an allocated device
4561  * 	interface. The reference to the device object is released.
4562  *	If this is the last reference then it will be freed.
4563  */
4564 void free_netdev(struct net_device *dev)
4565 {
4566 	struct napi_struct *p, *n;
4567 
4568 	release_net(dev_net(dev));
4569 
4570 	kfree(dev->_tx);
4571 
4572 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
4573 		netif_napi_del(p);
4574 
4575 	/*  Compatibility with error handling in drivers */
4576 	if (dev->reg_state == NETREG_UNINITIALIZED) {
4577 		kfree((char *)dev - dev->padded);
4578 		return;
4579 	}
4580 
4581 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4582 	dev->reg_state = NETREG_RELEASED;
4583 
4584 	/* will free via device release */
4585 	put_device(&dev->dev);
4586 }
4587 
4588 /**
4589  *	synchronize_net -  Synchronize with packet receive processing
4590  *
4591  *	Wait for packets currently being received to be done.
4592  *	Does not block later packets from starting.
4593  */
4594 void synchronize_net(void)
4595 {
4596 	might_sleep();
4597 	synchronize_rcu();
4598 }
4599 
4600 /**
4601  *	unregister_netdevice - remove device from the kernel
4602  *	@dev: device
4603  *
4604  *	This function shuts down a device interface and removes it
4605  *	from the kernel tables.
4606  *
4607  *	Callers must hold the rtnl semaphore.  You may want
4608  *	unregister_netdev() instead of this.
4609  */
4610 
4611 void unregister_netdevice(struct net_device *dev)
4612 {
4613 	ASSERT_RTNL();
4614 
4615 	rollback_registered(dev);
4616 	/* Finish processing unregister after unlock */
4617 	net_set_todo(dev);
4618 }
4619 
4620 /**
4621  *	unregister_netdev - remove device from the kernel
4622  *	@dev: device
4623  *
4624  *	This function shuts down a device interface and removes it
4625  *	from the kernel tables.
4626  *
4627  *	This is just a wrapper for unregister_netdevice that takes
4628  *	the rtnl semaphore.  In general you want to use this and not
4629  *	unregister_netdevice.
4630  */
4631 void unregister_netdev(struct net_device *dev)
4632 {
4633 	rtnl_lock();
4634 	unregister_netdevice(dev);
4635 	rtnl_unlock();
4636 }
4637 
4638 EXPORT_SYMBOL(unregister_netdev);
4639 
4640 /**
4641  *	dev_change_net_namespace - move device to different nethost namespace
4642  *	@dev: device
4643  *	@net: network namespace
4644  *	@pat: If not NULL name pattern to try if the current device name
4645  *	      is already taken in the destination network namespace.
4646  *
4647  *	This function shuts down a device interface and moves it
4648  *	to a new network namespace. On success 0 is returned, on
4649  *	a failure a netagive errno code is returned.
4650  *
4651  *	Callers must hold the rtnl semaphore.
4652  */
4653 
4654 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4655 {
4656 	char buf[IFNAMSIZ];
4657 	const char *destname;
4658 	int err;
4659 
4660 	ASSERT_RTNL();
4661 
4662 	/* Don't allow namespace local devices to be moved. */
4663 	err = -EINVAL;
4664 	if (dev->features & NETIF_F_NETNS_LOCAL)
4665 		goto out;
4666 
4667 #ifdef CONFIG_SYSFS
4668 	/* Don't allow real devices to be moved when sysfs
4669 	 * is enabled.
4670 	 */
4671 	err = -EINVAL;
4672 	if (dev->dev.parent)
4673 		goto out;
4674 #endif
4675 
4676 	/* Ensure the device has been registrered */
4677 	err = -EINVAL;
4678 	if (dev->reg_state != NETREG_REGISTERED)
4679 		goto out;
4680 
4681 	/* Get out if there is nothing todo */
4682 	err = 0;
4683 	if (net_eq(dev_net(dev), net))
4684 		goto out;
4685 
4686 	/* Pick the destination device name, and ensure
4687 	 * we can use it in the destination network namespace.
4688 	 */
4689 	err = -EEXIST;
4690 	destname = dev->name;
4691 	if (__dev_get_by_name(net, destname)) {
4692 		/* We get here if we can't use the current device name */
4693 		if (!pat)
4694 			goto out;
4695 		if (!dev_valid_name(pat))
4696 			goto out;
4697 		if (strchr(pat, '%')) {
4698 			if (__dev_alloc_name(net, pat, buf) < 0)
4699 				goto out;
4700 			destname = buf;
4701 		} else
4702 			destname = pat;
4703 		if (__dev_get_by_name(net, destname))
4704 			goto out;
4705 	}
4706 
4707 	/*
4708 	 * And now a mini version of register_netdevice unregister_netdevice.
4709 	 */
4710 
4711 	/* If device is running close it first. */
4712 	dev_close(dev);
4713 
4714 	/* And unlink it from device chain */
4715 	err = -ENODEV;
4716 	unlist_netdevice(dev);
4717 
4718 	synchronize_net();
4719 
4720 	/* Shutdown queueing discipline. */
4721 	dev_shutdown(dev);
4722 
4723 	/* Notify protocols, that we are about to destroy
4724 	   this device. They should clean all the things.
4725 	*/
4726 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4727 
4728 	/*
4729 	 *	Flush the unicast and multicast chains
4730 	 */
4731 	dev_addr_discard(dev);
4732 
4733 	netdev_unregister_kobject(dev);
4734 
4735 	/* Actually switch the network namespace */
4736 	dev_net_set(dev, net);
4737 
4738 	/* Assign the new device name */
4739 	if (destname != dev->name)
4740 		strcpy(dev->name, destname);
4741 
4742 	/* If there is an ifindex conflict assign a new one */
4743 	if (__dev_get_by_index(net, dev->ifindex)) {
4744 		int iflink = (dev->iflink == dev->ifindex);
4745 		dev->ifindex = dev_new_index(net);
4746 		if (iflink)
4747 			dev->iflink = dev->ifindex;
4748 	}
4749 
4750 	/* Fixup kobjects */
4751 	err = netdev_register_kobject(dev);
4752 	WARN_ON(err);
4753 
4754 	/* Add the device back in the hashes */
4755 	list_netdevice(dev);
4756 
4757 	/* Notify protocols, that a new device appeared. */
4758 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
4759 
4760 	synchronize_net();
4761 	err = 0;
4762 out:
4763 	return err;
4764 }
4765 
4766 static int dev_cpu_callback(struct notifier_block *nfb,
4767 			    unsigned long action,
4768 			    void *ocpu)
4769 {
4770 	struct sk_buff **list_skb;
4771 	struct Qdisc **list_net;
4772 	struct sk_buff *skb;
4773 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
4774 	struct softnet_data *sd, *oldsd;
4775 
4776 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4777 		return NOTIFY_OK;
4778 
4779 	local_irq_disable();
4780 	cpu = smp_processor_id();
4781 	sd = &per_cpu(softnet_data, cpu);
4782 	oldsd = &per_cpu(softnet_data, oldcpu);
4783 
4784 	/* Find end of our completion_queue. */
4785 	list_skb = &sd->completion_queue;
4786 	while (*list_skb)
4787 		list_skb = &(*list_skb)->next;
4788 	/* Append completion queue from offline CPU. */
4789 	*list_skb = oldsd->completion_queue;
4790 	oldsd->completion_queue = NULL;
4791 
4792 	/* Find end of our output_queue. */
4793 	list_net = &sd->output_queue;
4794 	while (*list_net)
4795 		list_net = &(*list_net)->next_sched;
4796 	/* Append output queue from offline CPU. */
4797 	*list_net = oldsd->output_queue;
4798 	oldsd->output_queue = NULL;
4799 
4800 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
4801 	local_irq_enable();
4802 
4803 	/* Process offline CPU's input_pkt_queue */
4804 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4805 		netif_rx(skb);
4806 
4807 	return NOTIFY_OK;
4808 }
4809 
4810 
4811 /**
4812  *	netdev_increment_features - increment feature set by one
4813  *	@all: current feature set
4814  *	@one: new feature set
4815  *	@mask: mask feature set
4816  *
4817  *	Computes a new feature set after adding a device with feature set
4818  *	@one to the master device with current feature set @all.  Will not
4819  *	enable anything that is off in @mask. Returns the new feature set.
4820  */
4821 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4822 					unsigned long mask)
4823 {
4824 	/* If device needs checksumming, downgrade to it. */
4825         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4826 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4827 	else if (mask & NETIF_F_ALL_CSUM) {
4828 		/* If one device supports v4/v6 checksumming, set for all. */
4829 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4830 		    !(all & NETIF_F_GEN_CSUM)) {
4831 			all &= ~NETIF_F_ALL_CSUM;
4832 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4833 		}
4834 
4835 		/* If one device supports hw checksumming, set for all. */
4836 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4837 			all &= ~NETIF_F_ALL_CSUM;
4838 			all |= NETIF_F_HW_CSUM;
4839 		}
4840 	}
4841 
4842 	one |= NETIF_F_ALL_CSUM;
4843 
4844 	one |= all & NETIF_F_ONE_FOR_ALL;
4845 	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4846 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
4847 
4848 	return all;
4849 }
4850 EXPORT_SYMBOL(netdev_increment_features);
4851 
4852 static struct hlist_head *netdev_create_hash(void)
4853 {
4854 	int i;
4855 	struct hlist_head *hash;
4856 
4857 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4858 	if (hash != NULL)
4859 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
4860 			INIT_HLIST_HEAD(&hash[i]);
4861 
4862 	return hash;
4863 }
4864 
4865 /* Initialize per network namespace state */
4866 static int __net_init netdev_init(struct net *net)
4867 {
4868 	INIT_LIST_HEAD(&net->dev_base_head);
4869 
4870 	net->dev_name_head = netdev_create_hash();
4871 	if (net->dev_name_head == NULL)
4872 		goto err_name;
4873 
4874 	net->dev_index_head = netdev_create_hash();
4875 	if (net->dev_index_head == NULL)
4876 		goto err_idx;
4877 
4878 	return 0;
4879 
4880 err_idx:
4881 	kfree(net->dev_name_head);
4882 err_name:
4883 	return -ENOMEM;
4884 }
4885 
4886 /**
4887  *	netdev_drivername - network driver for the device
4888  *	@dev: network device
4889  *	@buffer: buffer for resulting name
4890  *	@len: size of buffer
4891  *
4892  *	Determine network driver for device.
4893  */
4894 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
4895 {
4896 	const struct device_driver *driver;
4897 	const struct device *parent;
4898 
4899 	if (len <= 0 || !buffer)
4900 		return buffer;
4901 	buffer[0] = 0;
4902 
4903 	parent = dev->dev.parent;
4904 
4905 	if (!parent)
4906 		return buffer;
4907 
4908 	driver = parent->driver;
4909 	if (driver && driver->name)
4910 		strlcpy(buffer, driver->name, len);
4911 	return buffer;
4912 }
4913 
4914 static void __net_exit netdev_exit(struct net *net)
4915 {
4916 	kfree(net->dev_name_head);
4917 	kfree(net->dev_index_head);
4918 }
4919 
4920 static struct pernet_operations __net_initdata netdev_net_ops = {
4921 	.init = netdev_init,
4922 	.exit = netdev_exit,
4923 };
4924 
4925 static void __net_exit default_device_exit(struct net *net)
4926 {
4927 	struct net_device *dev;
4928 	/*
4929 	 * Push all migratable of the network devices back to the
4930 	 * initial network namespace
4931 	 */
4932 	rtnl_lock();
4933 restart:
4934 	for_each_netdev(net, dev) {
4935 		int err;
4936 		char fb_name[IFNAMSIZ];
4937 
4938 		/* Ignore unmoveable devices (i.e. loopback) */
4939 		if (dev->features & NETIF_F_NETNS_LOCAL)
4940 			continue;
4941 
4942 		/* Delete virtual devices */
4943 		if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
4944 			dev->rtnl_link_ops->dellink(dev);
4945 			goto restart;
4946 		}
4947 
4948 		/* Push remaing network devices to init_net */
4949 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4950 		err = dev_change_net_namespace(dev, &init_net, fb_name);
4951 		if (err) {
4952 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4953 				__func__, dev->name, err);
4954 			BUG();
4955 		}
4956 		goto restart;
4957 	}
4958 	rtnl_unlock();
4959 }
4960 
4961 static struct pernet_operations __net_initdata default_device_ops = {
4962 	.exit = default_device_exit,
4963 };
4964 
4965 /*
4966  *	Initialize the DEV module. At boot time this walks the device list and
4967  *	unhooks any devices that fail to initialise (normally hardware not
4968  *	present) and leaves us with a valid list of present and active devices.
4969  *
4970  */
4971 
4972 /*
4973  *       This is called single threaded during boot, so no need
4974  *       to take the rtnl semaphore.
4975  */
4976 static int __init net_dev_init(void)
4977 {
4978 	int i, rc = -ENOMEM;
4979 
4980 	BUG_ON(!dev_boot_phase);
4981 
4982 	if (dev_proc_init())
4983 		goto out;
4984 
4985 	if (netdev_kobject_init())
4986 		goto out;
4987 
4988 	INIT_LIST_HEAD(&ptype_all);
4989 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
4990 		INIT_LIST_HEAD(&ptype_base[i]);
4991 
4992 	if (register_pernet_subsys(&netdev_net_ops))
4993 		goto out;
4994 
4995 	/*
4996 	 *	Initialise the packet receive queues.
4997 	 */
4998 
4999 	for_each_possible_cpu(i) {
5000 		struct softnet_data *queue;
5001 
5002 		queue = &per_cpu(softnet_data, i);
5003 		skb_queue_head_init(&queue->input_pkt_queue);
5004 		queue->completion_queue = NULL;
5005 		INIT_LIST_HEAD(&queue->poll_list);
5006 
5007 		queue->backlog.poll = process_backlog;
5008 		queue->backlog.weight = weight_p;
5009 		queue->backlog.gro_list = NULL;
5010 	}
5011 
5012 	dev_boot_phase = 0;
5013 
5014 	/* The loopback device is special if any other network devices
5015 	 * is present in a network namespace the loopback device must
5016 	 * be present. Since we now dynamically allocate and free the
5017 	 * loopback device ensure this invariant is maintained by
5018 	 * keeping the loopback device as the first device on the
5019 	 * list of network devices.  Ensuring the loopback devices
5020 	 * is the first device that appears and the last network device
5021 	 * that disappears.
5022 	 */
5023 	if (register_pernet_device(&loopback_net_ops))
5024 		goto out;
5025 
5026 	if (register_pernet_device(&default_device_ops))
5027 		goto out;
5028 
5029 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5030 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5031 
5032 	hotcpu_notifier(dev_cpu_callback, 0);
5033 	dst_init();
5034 	dev_mcast_init();
5035 	#ifdef CONFIG_NET_DMA
5036 	dmaengine_get();
5037 	#endif
5038 	rc = 0;
5039 out:
5040 	return rc;
5041 }
5042 
5043 subsys_initcall(net_dev_init);
5044 
5045 EXPORT_SYMBOL(__dev_get_by_index);
5046 EXPORT_SYMBOL(__dev_get_by_name);
5047 EXPORT_SYMBOL(__dev_remove_pack);
5048 EXPORT_SYMBOL(dev_valid_name);
5049 EXPORT_SYMBOL(dev_add_pack);
5050 EXPORT_SYMBOL(dev_alloc_name);
5051 EXPORT_SYMBOL(dev_close);
5052 EXPORT_SYMBOL(dev_get_by_flags);
5053 EXPORT_SYMBOL(dev_get_by_index);
5054 EXPORT_SYMBOL(dev_get_by_name);
5055 EXPORT_SYMBOL(dev_open);
5056 EXPORT_SYMBOL(dev_queue_xmit);
5057 EXPORT_SYMBOL(dev_remove_pack);
5058 EXPORT_SYMBOL(dev_set_allmulti);
5059 EXPORT_SYMBOL(dev_set_promiscuity);
5060 EXPORT_SYMBOL(dev_change_flags);
5061 EXPORT_SYMBOL(dev_set_mtu);
5062 EXPORT_SYMBOL(dev_set_mac_address);
5063 EXPORT_SYMBOL(free_netdev);
5064 EXPORT_SYMBOL(netdev_boot_setup_check);
5065 EXPORT_SYMBOL(netdev_set_master);
5066 EXPORT_SYMBOL(netdev_state_change);
5067 EXPORT_SYMBOL(netif_receive_skb);
5068 EXPORT_SYMBOL(netif_rx);
5069 EXPORT_SYMBOL(register_gifconf);
5070 EXPORT_SYMBOL(register_netdevice);
5071 EXPORT_SYMBOL(register_netdevice_notifier);
5072 EXPORT_SYMBOL(skb_checksum_help);
5073 EXPORT_SYMBOL(synchronize_net);
5074 EXPORT_SYMBOL(unregister_netdevice);
5075 EXPORT_SYMBOL(unregister_netdevice_notifier);
5076 EXPORT_SYMBOL(net_enable_timestamp);
5077 EXPORT_SYMBOL(net_disable_timestamp);
5078 EXPORT_SYMBOL(dev_get_flags);
5079 
5080 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5081 EXPORT_SYMBOL(br_handle_frame_hook);
5082 EXPORT_SYMBOL(br_fdb_get_hook);
5083 EXPORT_SYMBOL(br_fdb_put_hook);
5084 #endif
5085 
5086 EXPORT_SYMBOL(dev_load);
5087 
5088 EXPORT_PER_CPU_SYMBOL(softnet_data);
5089