xref: /linux-6.15/net/core/dev.c (revision a115bc07)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <[email protected]>
12  *				Mark Evans, <[email protected]>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <[email protected]>
16  *		Alan Cox <[email protected]>
17  *		David Hinds <[email protected]>
18  *		Alexey Kuznetsov <[email protected]>
19  *		Adam Sulmicki <[email protected]>
20  *              Pekka Riikonen <[email protected]>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/hash.h>
83 #include <linux/sched.h>
84 #include <linux/mutex.h>
85 #include <linux/string.h>
86 #include <linux/mm.h>
87 #include <linux/socket.h>
88 #include <linux/sockios.h>
89 #include <linux/errno.h>
90 #include <linux/interrupt.h>
91 #include <linux/if_ether.h>
92 #include <linux/netdevice.h>
93 #include <linux/etherdevice.h>
94 #include <linux/ethtool.h>
95 #include <linux/notifier.h>
96 #include <linux/skbuff.h>
97 #include <net/net_namespace.h>
98 #include <net/sock.h>
99 #include <linux/rtnetlink.h>
100 #include <linux/proc_fs.h>
101 #include <linux/seq_file.h>
102 #include <linux/stat.h>
103 #include <linux/if_bridge.h>
104 #include <linux/if_macvlan.h>
105 #include <net/dst.h>
106 #include <net/pkt_sched.h>
107 #include <net/checksum.h>
108 #include <net/xfrm.h>
109 #include <linux/highmem.h>
110 #include <linux/init.h>
111 #include <linux/kmod.h>
112 #include <linux/module.h>
113 #include <linux/netpoll.h>
114 #include <linux/rcupdate.h>
115 #include <linux/delay.h>
116 #include <net/wext.h>
117 #include <net/iw_handler.h>
118 #include <asm/current.h>
119 #include <linux/audit.h>
120 #include <linux/dmaengine.h>
121 #include <linux/err.h>
122 #include <linux/ctype.h>
123 #include <linux/if_arp.h>
124 #include <linux/if_vlan.h>
125 #include <linux/ip.h>
126 #include <net/ip.h>
127 #include <linux/ipv6.h>
128 #include <linux/in.h>
129 #include <linux/jhash.h>
130 #include <linux/random.h>
131 #include <trace/events/napi.h>
132 
133 #include "net-sysfs.h"
134 
135 /* Instead of increasing this, you should create a hash table. */
136 #define MAX_GRO_SKBS 8
137 
138 /* This should be increased if a protocol with a bigger head is added. */
139 #define GRO_MAX_HEAD (MAX_HEADER + 128)
140 
141 /*
142  *	The list of packet types we will receive (as opposed to discard)
143  *	and the routines to invoke.
144  *
145  *	Why 16. Because with 16 the only overlap we get on a hash of the
146  *	low nibble of the protocol value is RARP/SNAP/X.25.
147  *
148  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
149  *             sure which should go first, but I bet it won't make much
150  *             difference if we are running VLANs.  The good news is that
151  *             this protocol won't be in the list unless compiled in, so
152  *             the average user (w/out VLANs) will not be adversely affected.
153  *             --BLG
154  *
155  *		0800	IP
156  *		8100    802.1Q VLAN
157  *		0001	802.3
158  *		0002	AX.25
159  *		0004	802.2
160  *		8035	RARP
161  *		0005	SNAP
162  *		0805	X.25
163  *		0806	ARP
164  *		8137	IPX
165  *		0009	Localtalk
166  *		86DD	IPv6
167  */
168 
169 #define PTYPE_HASH_SIZE	(16)
170 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
171 
172 static DEFINE_SPINLOCK(ptype_lock);
173 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
174 static struct list_head ptype_all __read_mostly;	/* Taps */
175 
176 /*
177  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
178  * semaphore.
179  *
180  * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
181  *
182  * Writers must hold the rtnl semaphore while they loop through the
183  * dev_base_head list, and hold dev_base_lock for writing when they do the
184  * actual updates.  This allows pure readers to access the list even
185  * while a writer is preparing to update it.
186  *
187  * To put it another way, dev_base_lock is held for writing only to
188  * protect against pure readers; the rtnl semaphore provides the
189  * protection against other writers.
190  *
191  * See, for example usages, register_netdevice() and
192  * unregister_netdevice(), which must be called with the rtnl
193  * semaphore held.
194  */
195 DEFINE_RWLOCK(dev_base_lock);
196 EXPORT_SYMBOL(dev_base_lock);
197 
198 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
199 {
200 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
201 	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
202 }
203 
204 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
205 {
206 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
207 }
208 
209 /* Device list insertion */
210 static int list_netdevice(struct net_device *dev)
211 {
212 	struct net *net = dev_net(dev);
213 
214 	ASSERT_RTNL();
215 
216 	write_lock_bh(&dev_base_lock);
217 	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
218 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
219 	hlist_add_head_rcu(&dev->index_hlist,
220 			   dev_index_hash(net, dev->ifindex));
221 	write_unlock_bh(&dev_base_lock);
222 	return 0;
223 }
224 
225 /* Device list removal
226  * caller must respect a RCU grace period before freeing/reusing dev
227  */
228 static void unlist_netdevice(struct net_device *dev)
229 {
230 	ASSERT_RTNL();
231 
232 	/* Unlink dev from the device chain */
233 	write_lock_bh(&dev_base_lock);
234 	list_del_rcu(&dev->dev_list);
235 	hlist_del_rcu(&dev->name_hlist);
236 	hlist_del_rcu(&dev->index_hlist);
237 	write_unlock_bh(&dev_base_lock);
238 }
239 
240 /*
241  *	Our notifier list
242  */
243 
244 static RAW_NOTIFIER_HEAD(netdev_chain);
245 
246 /*
247  *	Device drivers call our routines to queue packets here. We empty the
248  *	queue in the local softnet handler.
249  */
250 
251 DEFINE_PER_CPU(struct softnet_data, softnet_data);
252 EXPORT_PER_CPU_SYMBOL(softnet_data);
253 
254 #ifdef CONFIG_LOCKDEP
255 /*
256  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
257  * according to dev->type
258  */
259 static const unsigned short netdev_lock_type[] =
260 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
261 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
262 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
263 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
264 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
265 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
266 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
267 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
268 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
269 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
270 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
271 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
272 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
273 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
274 	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
275 	 ARPHRD_VOID, ARPHRD_NONE};
276 
277 static const char *const netdev_lock_name[] =
278 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
279 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
280 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
281 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
282 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
283 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
284 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
285 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
286 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
287 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
288 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
289 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
290 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
291 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
292 	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
293 	 "_xmit_VOID", "_xmit_NONE"};
294 
295 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
296 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
297 
298 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
299 {
300 	int i;
301 
302 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
303 		if (netdev_lock_type[i] == dev_type)
304 			return i;
305 	/* the last key is used by default */
306 	return ARRAY_SIZE(netdev_lock_type) - 1;
307 }
308 
309 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
310 						 unsigned short dev_type)
311 {
312 	int i;
313 
314 	i = netdev_lock_pos(dev_type);
315 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
316 				   netdev_lock_name[i]);
317 }
318 
319 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
320 {
321 	int i;
322 
323 	i = netdev_lock_pos(dev->type);
324 	lockdep_set_class_and_name(&dev->addr_list_lock,
325 				   &netdev_addr_lock_key[i],
326 				   netdev_lock_name[i]);
327 }
328 #else
329 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
330 						 unsigned short dev_type)
331 {
332 }
333 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
334 {
335 }
336 #endif
337 
338 /*******************************************************************************
339 
340 		Protocol management and registration routines
341 
342 *******************************************************************************/
343 
344 /*
345  *	Add a protocol ID to the list. Now that the input handler is
346  *	smarter we can dispense with all the messy stuff that used to be
347  *	here.
348  *
349  *	BEWARE!!! Protocol handlers, mangling input packets,
350  *	MUST BE last in hash buckets and checking protocol handlers
351  *	MUST start from promiscuous ptype_all chain in net_bh.
352  *	It is true now, do not change it.
353  *	Explanation follows: if protocol handler, mangling packet, will
354  *	be the first on list, it is not able to sense, that packet
355  *	is cloned and should be copied-on-write, so that it will
356  *	change it and subsequent readers will get broken packet.
357  *							--ANK (980803)
358  */
359 
360 /**
361  *	dev_add_pack - add packet handler
362  *	@pt: packet type declaration
363  *
364  *	Add a protocol handler to the networking stack. The passed &packet_type
365  *	is linked into kernel lists and may not be freed until it has been
366  *	removed from the kernel lists.
367  *
368  *	This call does not sleep therefore it can not
369  *	guarantee all CPU's that are in middle of receiving packets
370  *	will see the new packet type (until the next received packet).
371  */
372 
373 void dev_add_pack(struct packet_type *pt)
374 {
375 	int hash;
376 
377 	spin_lock_bh(&ptype_lock);
378 	if (pt->type == htons(ETH_P_ALL))
379 		list_add_rcu(&pt->list, &ptype_all);
380 	else {
381 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
382 		list_add_rcu(&pt->list, &ptype_base[hash]);
383 	}
384 	spin_unlock_bh(&ptype_lock);
385 }
386 EXPORT_SYMBOL(dev_add_pack);
387 
388 /**
389  *	__dev_remove_pack	 - remove packet handler
390  *	@pt: packet type declaration
391  *
392  *	Remove a protocol handler that was previously added to the kernel
393  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
394  *	from the kernel lists and can be freed or reused once this function
395  *	returns.
396  *
397  *      The packet type might still be in use by receivers
398  *	and must not be freed until after all the CPU's have gone
399  *	through a quiescent state.
400  */
401 void __dev_remove_pack(struct packet_type *pt)
402 {
403 	struct list_head *head;
404 	struct packet_type *pt1;
405 
406 	spin_lock_bh(&ptype_lock);
407 
408 	if (pt->type == htons(ETH_P_ALL))
409 		head = &ptype_all;
410 	else
411 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
412 
413 	list_for_each_entry(pt1, head, list) {
414 		if (pt == pt1) {
415 			list_del_rcu(&pt->list);
416 			goto out;
417 		}
418 	}
419 
420 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
421 out:
422 	spin_unlock_bh(&ptype_lock);
423 }
424 EXPORT_SYMBOL(__dev_remove_pack);
425 
426 /**
427  *	dev_remove_pack	 - remove packet handler
428  *	@pt: packet type declaration
429  *
430  *	Remove a protocol handler that was previously added to the kernel
431  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
432  *	from the kernel lists and can be freed or reused once this function
433  *	returns.
434  *
435  *	This call sleeps to guarantee that no CPU is looking at the packet
436  *	type after return.
437  */
438 void dev_remove_pack(struct packet_type *pt)
439 {
440 	__dev_remove_pack(pt);
441 
442 	synchronize_net();
443 }
444 EXPORT_SYMBOL(dev_remove_pack);
445 
446 /******************************************************************************
447 
448 		      Device Boot-time Settings Routines
449 
450 *******************************************************************************/
451 
452 /* Boot time configuration table */
453 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
454 
455 /**
456  *	netdev_boot_setup_add	- add new setup entry
457  *	@name: name of the device
458  *	@map: configured settings for the device
459  *
460  *	Adds new setup entry to the dev_boot_setup list.  The function
461  *	returns 0 on error and 1 on success.  This is a generic routine to
462  *	all netdevices.
463  */
464 static int netdev_boot_setup_add(char *name, struct ifmap *map)
465 {
466 	struct netdev_boot_setup *s;
467 	int i;
468 
469 	s = dev_boot_setup;
470 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
471 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
472 			memset(s[i].name, 0, sizeof(s[i].name));
473 			strlcpy(s[i].name, name, IFNAMSIZ);
474 			memcpy(&s[i].map, map, sizeof(s[i].map));
475 			break;
476 		}
477 	}
478 
479 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
480 }
481 
482 /**
483  *	netdev_boot_setup_check	- check boot time settings
484  *	@dev: the netdevice
485  *
486  * 	Check boot time settings for the device.
487  *	The found settings are set for the device to be used
488  *	later in the device probing.
489  *	Returns 0 if no settings found, 1 if they are.
490  */
491 int netdev_boot_setup_check(struct net_device *dev)
492 {
493 	struct netdev_boot_setup *s = dev_boot_setup;
494 	int i;
495 
496 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
497 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
498 		    !strcmp(dev->name, s[i].name)) {
499 			dev->irq 	= s[i].map.irq;
500 			dev->base_addr 	= s[i].map.base_addr;
501 			dev->mem_start 	= s[i].map.mem_start;
502 			dev->mem_end 	= s[i].map.mem_end;
503 			return 1;
504 		}
505 	}
506 	return 0;
507 }
508 EXPORT_SYMBOL(netdev_boot_setup_check);
509 
510 
511 /**
512  *	netdev_boot_base	- get address from boot time settings
513  *	@prefix: prefix for network device
514  *	@unit: id for network device
515  *
516  * 	Check boot time settings for the base address of device.
517  *	The found settings are set for the device to be used
518  *	later in the device probing.
519  *	Returns 0 if no settings found.
520  */
521 unsigned long netdev_boot_base(const char *prefix, int unit)
522 {
523 	const struct netdev_boot_setup *s = dev_boot_setup;
524 	char name[IFNAMSIZ];
525 	int i;
526 
527 	sprintf(name, "%s%d", prefix, unit);
528 
529 	/*
530 	 * If device already registered then return base of 1
531 	 * to indicate not to probe for this interface
532 	 */
533 	if (__dev_get_by_name(&init_net, name))
534 		return 1;
535 
536 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
537 		if (!strcmp(name, s[i].name))
538 			return s[i].map.base_addr;
539 	return 0;
540 }
541 
542 /*
543  * Saves at boot time configured settings for any netdevice.
544  */
545 int __init netdev_boot_setup(char *str)
546 {
547 	int ints[5];
548 	struct ifmap map;
549 
550 	str = get_options(str, ARRAY_SIZE(ints), ints);
551 	if (!str || !*str)
552 		return 0;
553 
554 	/* Save settings */
555 	memset(&map, 0, sizeof(map));
556 	if (ints[0] > 0)
557 		map.irq = ints[1];
558 	if (ints[0] > 1)
559 		map.base_addr = ints[2];
560 	if (ints[0] > 2)
561 		map.mem_start = ints[3];
562 	if (ints[0] > 3)
563 		map.mem_end = ints[4];
564 
565 	/* Add new entry to the list */
566 	return netdev_boot_setup_add(str, &map);
567 }
568 
569 __setup("netdev=", netdev_boot_setup);
570 
571 /*******************************************************************************
572 
573 			    Device Interface Subroutines
574 
575 *******************************************************************************/
576 
577 /**
578  *	__dev_get_by_name	- find a device by its name
579  *	@net: the applicable net namespace
580  *	@name: name to find
581  *
582  *	Find an interface by name. Must be called under RTNL semaphore
583  *	or @dev_base_lock. If the name is found a pointer to the device
584  *	is returned. If the name is not found then %NULL is returned. The
585  *	reference counters are not incremented so the caller must be
586  *	careful with locks.
587  */
588 
589 struct net_device *__dev_get_by_name(struct net *net, const char *name)
590 {
591 	struct hlist_node *p;
592 	struct net_device *dev;
593 	struct hlist_head *head = dev_name_hash(net, name);
594 
595 	hlist_for_each_entry(dev, p, head, name_hlist)
596 		if (!strncmp(dev->name, name, IFNAMSIZ))
597 			return dev;
598 
599 	return NULL;
600 }
601 EXPORT_SYMBOL(__dev_get_by_name);
602 
603 /**
604  *	dev_get_by_name_rcu	- find a device by its name
605  *	@net: the applicable net namespace
606  *	@name: name to find
607  *
608  *	Find an interface by name.
609  *	If the name is found a pointer to the device is returned.
610  * 	If the name is not found then %NULL is returned.
611  *	The reference counters are not incremented so the caller must be
612  *	careful with locks. The caller must hold RCU lock.
613  */
614 
615 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
616 {
617 	struct hlist_node *p;
618 	struct net_device *dev;
619 	struct hlist_head *head = dev_name_hash(net, name);
620 
621 	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
622 		if (!strncmp(dev->name, name, IFNAMSIZ))
623 			return dev;
624 
625 	return NULL;
626 }
627 EXPORT_SYMBOL(dev_get_by_name_rcu);
628 
629 /**
630  *	dev_get_by_name		- find a device by its name
631  *	@net: the applicable net namespace
632  *	@name: name to find
633  *
634  *	Find an interface by name. This can be called from any
635  *	context and does its own locking. The returned handle has
636  *	the usage count incremented and the caller must use dev_put() to
637  *	release it when it is no longer needed. %NULL is returned if no
638  *	matching device is found.
639  */
640 
641 struct net_device *dev_get_by_name(struct net *net, const char *name)
642 {
643 	struct net_device *dev;
644 
645 	rcu_read_lock();
646 	dev = dev_get_by_name_rcu(net, name);
647 	if (dev)
648 		dev_hold(dev);
649 	rcu_read_unlock();
650 	return dev;
651 }
652 EXPORT_SYMBOL(dev_get_by_name);
653 
654 /**
655  *	__dev_get_by_index - find a device by its ifindex
656  *	@net: the applicable net namespace
657  *	@ifindex: index of device
658  *
659  *	Search for an interface by index. Returns %NULL if the device
660  *	is not found or a pointer to the device. The device has not
661  *	had its reference counter increased so the caller must be careful
662  *	about locking. The caller must hold either the RTNL semaphore
663  *	or @dev_base_lock.
664  */
665 
666 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
667 {
668 	struct hlist_node *p;
669 	struct net_device *dev;
670 	struct hlist_head *head = dev_index_hash(net, ifindex);
671 
672 	hlist_for_each_entry(dev, p, head, index_hlist)
673 		if (dev->ifindex == ifindex)
674 			return dev;
675 
676 	return NULL;
677 }
678 EXPORT_SYMBOL(__dev_get_by_index);
679 
680 /**
681  *	dev_get_by_index_rcu - find a device by its ifindex
682  *	@net: the applicable net namespace
683  *	@ifindex: index of device
684  *
685  *	Search for an interface by index. Returns %NULL if the device
686  *	is not found or a pointer to the device. The device has not
687  *	had its reference counter increased so the caller must be careful
688  *	about locking. The caller must hold RCU lock.
689  */
690 
691 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
692 {
693 	struct hlist_node *p;
694 	struct net_device *dev;
695 	struct hlist_head *head = dev_index_hash(net, ifindex);
696 
697 	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
698 		if (dev->ifindex == ifindex)
699 			return dev;
700 
701 	return NULL;
702 }
703 EXPORT_SYMBOL(dev_get_by_index_rcu);
704 
705 
706 /**
707  *	dev_get_by_index - find a device by its ifindex
708  *	@net: the applicable net namespace
709  *	@ifindex: index of device
710  *
711  *	Search for an interface by index. Returns NULL if the device
712  *	is not found or a pointer to the device. The device returned has
713  *	had a reference added and the pointer is safe until the user calls
714  *	dev_put to indicate they have finished with it.
715  */
716 
717 struct net_device *dev_get_by_index(struct net *net, int ifindex)
718 {
719 	struct net_device *dev;
720 
721 	rcu_read_lock();
722 	dev = dev_get_by_index_rcu(net, ifindex);
723 	if (dev)
724 		dev_hold(dev);
725 	rcu_read_unlock();
726 	return dev;
727 }
728 EXPORT_SYMBOL(dev_get_by_index);
729 
730 /**
731  *	dev_getbyhwaddr - find a device by its hardware address
732  *	@net: the applicable net namespace
733  *	@type: media type of device
734  *	@ha: hardware address
735  *
736  *	Search for an interface by MAC address. Returns NULL if the device
737  *	is not found or a pointer to the device. The caller must hold the
738  *	rtnl semaphore. The returned device has not had its ref count increased
739  *	and the caller must therefore be careful about locking
740  *
741  *	BUGS:
742  *	If the API was consistent this would be __dev_get_by_hwaddr
743  */
744 
745 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
746 {
747 	struct net_device *dev;
748 
749 	ASSERT_RTNL();
750 
751 	for_each_netdev(net, dev)
752 		if (dev->type == type &&
753 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
754 			return dev;
755 
756 	return NULL;
757 }
758 EXPORT_SYMBOL(dev_getbyhwaddr);
759 
760 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
761 {
762 	struct net_device *dev;
763 
764 	ASSERT_RTNL();
765 	for_each_netdev(net, dev)
766 		if (dev->type == type)
767 			return dev;
768 
769 	return NULL;
770 }
771 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
772 
773 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
774 {
775 	struct net_device *dev;
776 
777 	rtnl_lock();
778 	dev = __dev_getfirstbyhwtype(net, type);
779 	if (dev)
780 		dev_hold(dev);
781 	rtnl_unlock();
782 	return dev;
783 }
784 EXPORT_SYMBOL(dev_getfirstbyhwtype);
785 
786 /**
787  *	dev_get_by_flags - find any device with given flags
788  *	@net: the applicable net namespace
789  *	@if_flags: IFF_* values
790  *	@mask: bitmask of bits in if_flags to check
791  *
792  *	Search for any interface with the given flags. Returns NULL if a device
793  *	is not found or a pointer to the device. The device returned has
794  *	had a reference added and the pointer is safe until the user calls
795  *	dev_put to indicate they have finished with it.
796  */
797 
798 struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
799 				    unsigned short mask)
800 {
801 	struct net_device *dev, *ret;
802 
803 	ret = NULL;
804 	rcu_read_lock();
805 	for_each_netdev_rcu(net, dev) {
806 		if (((dev->flags ^ if_flags) & mask) == 0) {
807 			dev_hold(dev);
808 			ret = dev;
809 			break;
810 		}
811 	}
812 	rcu_read_unlock();
813 	return ret;
814 }
815 EXPORT_SYMBOL(dev_get_by_flags);
816 
817 /**
818  *	dev_valid_name - check if name is okay for network device
819  *	@name: name string
820  *
821  *	Network device names need to be valid file names to
822  *	to allow sysfs to work.  We also disallow any kind of
823  *	whitespace.
824  */
825 int dev_valid_name(const char *name)
826 {
827 	if (*name == '\0')
828 		return 0;
829 	if (strlen(name) >= IFNAMSIZ)
830 		return 0;
831 	if (!strcmp(name, ".") || !strcmp(name, ".."))
832 		return 0;
833 
834 	while (*name) {
835 		if (*name == '/' || isspace(*name))
836 			return 0;
837 		name++;
838 	}
839 	return 1;
840 }
841 EXPORT_SYMBOL(dev_valid_name);
842 
843 /**
844  *	__dev_alloc_name - allocate a name for a device
845  *	@net: network namespace to allocate the device name in
846  *	@name: name format string
847  *	@buf:  scratch buffer and result name string
848  *
849  *	Passed a format string - eg "lt%d" it will try and find a suitable
850  *	id. It scans list of devices to build up a free map, then chooses
851  *	the first empty slot. The caller must hold the dev_base or rtnl lock
852  *	while allocating the name and adding the device in order to avoid
853  *	duplicates.
854  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
855  *	Returns the number of the unit assigned or a negative errno code.
856  */
857 
858 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
859 {
860 	int i = 0;
861 	const char *p;
862 	const int max_netdevices = 8*PAGE_SIZE;
863 	unsigned long *inuse;
864 	struct net_device *d;
865 
866 	p = strnchr(name, IFNAMSIZ-1, '%');
867 	if (p) {
868 		/*
869 		 * Verify the string as this thing may have come from
870 		 * the user.  There must be either one "%d" and no other "%"
871 		 * characters.
872 		 */
873 		if (p[1] != 'd' || strchr(p + 2, '%'))
874 			return -EINVAL;
875 
876 		/* Use one page as a bit array of possible slots */
877 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
878 		if (!inuse)
879 			return -ENOMEM;
880 
881 		for_each_netdev(net, d) {
882 			if (!sscanf(d->name, name, &i))
883 				continue;
884 			if (i < 0 || i >= max_netdevices)
885 				continue;
886 
887 			/*  avoid cases where sscanf is not exact inverse of printf */
888 			snprintf(buf, IFNAMSIZ, name, i);
889 			if (!strncmp(buf, d->name, IFNAMSIZ))
890 				set_bit(i, inuse);
891 		}
892 
893 		i = find_first_zero_bit(inuse, max_netdevices);
894 		free_page((unsigned long) inuse);
895 	}
896 
897 	if (buf != name)
898 		snprintf(buf, IFNAMSIZ, name, i);
899 	if (!__dev_get_by_name(net, buf))
900 		return i;
901 
902 	/* It is possible to run out of possible slots
903 	 * when the name is long and there isn't enough space left
904 	 * for the digits, or if all bits are used.
905 	 */
906 	return -ENFILE;
907 }
908 
909 /**
910  *	dev_alloc_name - allocate a name for a device
911  *	@dev: device
912  *	@name: name format string
913  *
914  *	Passed a format string - eg "lt%d" it will try and find a suitable
915  *	id. It scans list of devices to build up a free map, then chooses
916  *	the first empty slot. The caller must hold the dev_base or rtnl lock
917  *	while allocating the name and adding the device in order to avoid
918  *	duplicates.
919  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
920  *	Returns the number of the unit assigned or a negative errno code.
921  */
922 
923 int dev_alloc_name(struct net_device *dev, const char *name)
924 {
925 	char buf[IFNAMSIZ];
926 	struct net *net;
927 	int ret;
928 
929 	BUG_ON(!dev_net(dev));
930 	net = dev_net(dev);
931 	ret = __dev_alloc_name(net, name, buf);
932 	if (ret >= 0)
933 		strlcpy(dev->name, buf, IFNAMSIZ);
934 	return ret;
935 }
936 EXPORT_SYMBOL(dev_alloc_name);
937 
938 static int dev_get_valid_name(struct net *net, const char *name, char *buf,
939 			      bool fmt)
940 {
941 	if (!dev_valid_name(name))
942 		return -EINVAL;
943 
944 	if (fmt && strchr(name, '%'))
945 		return __dev_alloc_name(net, name, buf);
946 	else if (__dev_get_by_name(net, name))
947 		return -EEXIST;
948 	else if (buf != name)
949 		strlcpy(buf, name, IFNAMSIZ);
950 
951 	return 0;
952 }
953 
954 /**
955  *	dev_change_name - change name of a device
956  *	@dev: device
957  *	@newname: name (or format string) must be at least IFNAMSIZ
958  *
959  *	Change name of a device, can pass format strings "eth%d".
960  *	for wildcarding.
961  */
962 int dev_change_name(struct net_device *dev, const char *newname)
963 {
964 	char oldname[IFNAMSIZ];
965 	int err = 0;
966 	int ret;
967 	struct net *net;
968 
969 	ASSERT_RTNL();
970 	BUG_ON(!dev_net(dev));
971 
972 	net = dev_net(dev);
973 	if (dev->flags & IFF_UP)
974 		return -EBUSY;
975 
976 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
977 		return 0;
978 
979 	memcpy(oldname, dev->name, IFNAMSIZ);
980 
981 	err = dev_get_valid_name(net, newname, dev->name, 1);
982 	if (err < 0)
983 		return err;
984 
985 rollback:
986 	/* For now only devices in the initial network namespace
987 	 * are in sysfs.
988 	 */
989 	if (net_eq(net, &init_net)) {
990 		ret = device_rename(&dev->dev, dev->name);
991 		if (ret) {
992 			memcpy(dev->name, oldname, IFNAMSIZ);
993 			return ret;
994 		}
995 	}
996 
997 	write_lock_bh(&dev_base_lock);
998 	hlist_del(&dev->name_hlist);
999 	write_unlock_bh(&dev_base_lock);
1000 
1001 	synchronize_rcu();
1002 
1003 	write_lock_bh(&dev_base_lock);
1004 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1005 	write_unlock_bh(&dev_base_lock);
1006 
1007 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1008 	ret = notifier_to_errno(ret);
1009 
1010 	if (ret) {
1011 		/* err >= 0 after dev_alloc_name() or stores the first errno */
1012 		if (err >= 0) {
1013 			err = ret;
1014 			memcpy(dev->name, oldname, IFNAMSIZ);
1015 			goto rollback;
1016 		} else {
1017 			printk(KERN_ERR
1018 			       "%s: name change rollback failed: %d.\n",
1019 			       dev->name, ret);
1020 		}
1021 	}
1022 
1023 	return err;
1024 }
1025 
1026 /**
1027  *	dev_set_alias - change ifalias of a device
1028  *	@dev: device
1029  *	@alias: name up to IFALIASZ
1030  *	@len: limit of bytes to copy from info
1031  *
1032  *	Set ifalias for a device,
1033  */
1034 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1035 {
1036 	ASSERT_RTNL();
1037 
1038 	if (len >= IFALIASZ)
1039 		return -EINVAL;
1040 
1041 	if (!len) {
1042 		if (dev->ifalias) {
1043 			kfree(dev->ifalias);
1044 			dev->ifalias = NULL;
1045 		}
1046 		return 0;
1047 	}
1048 
1049 	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1050 	if (!dev->ifalias)
1051 		return -ENOMEM;
1052 
1053 	strlcpy(dev->ifalias, alias, len+1);
1054 	return len;
1055 }
1056 
1057 
1058 /**
1059  *	netdev_features_change - device changes features
1060  *	@dev: device to cause notification
1061  *
1062  *	Called to indicate a device has changed features.
1063  */
1064 void netdev_features_change(struct net_device *dev)
1065 {
1066 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1067 }
1068 EXPORT_SYMBOL(netdev_features_change);
1069 
1070 /**
1071  *	netdev_state_change - device changes state
1072  *	@dev: device to cause notification
1073  *
1074  *	Called to indicate a device has changed state. This function calls
1075  *	the notifier chains for netdev_chain and sends a NEWLINK message
1076  *	to the routing socket.
1077  */
1078 void netdev_state_change(struct net_device *dev)
1079 {
1080 	if (dev->flags & IFF_UP) {
1081 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1082 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1083 	}
1084 }
1085 EXPORT_SYMBOL(netdev_state_change);
1086 
1087 void netdev_bonding_change(struct net_device *dev, unsigned long event)
1088 {
1089 	call_netdevice_notifiers(event, dev);
1090 }
1091 EXPORT_SYMBOL(netdev_bonding_change);
1092 
1093 /**
1094  *	dev_load 	- load a network module
1095  *	@net: the applicable net namespace
1096  *	@name: name of interface
1097  *
1098  *	If a network interface is not present and the process has suitable
1099  *	privileges this function loads the module. If module loading is not
1100  *	available in this kernel then it becomes a nop.
1101  */
1102 
1103 void dev_load(struct net *net, const char *name)
1104 {
1105 	struct net_device *dev;
1106 
1107 	rcu_read_lock();
1108 	dev = dev_get_by_name_rcu(net, name);
1109 	rcu_read_unlock();
1110 
1111 	if (!dev && capable(CAP_NET_ADMIN))
1112 		request_module("%s", name);
1113 }
1114 EXPORT_SYMBOL(dev_load);
1115 
1116 static int __dev_open(struct net_device *dev)
1117 {
1118 	const struct net_device_ops *ops = dev->netdev_ops;
1119 	int ret;
1120 
1121 	ASSERT_RTNL();
1122 
1123 	/*
1124 	 *	Is it even present?
1125 	 */
1126 	if (!netif_device_present(dev))
1127 		return -ENODEV;
1128 
1129 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1130 	ret = notifier_to_errno(ret);
1131 	if (ret)
1132 		return ret;
1133 
1134 	/*
1135 	 *	Call device private open method
1136 	 */
1137 	set_bit(__LINK_STATE_START, &dev->state);
1138 
1139 	if (ops->ndo_validate_addr)
1140 		ret = ops->ndo_validate_addr(dev);
1141 
1142 	if (!ret && ops->ndo_open)
1143 		ret = ops->ndo_open(dev);
1144 
1145 	/*
1146 	 *	If it went open OK then:
1147 	 */
1148 
1149 	if (ret)
1150 		clear_bit(__LINK_STATE_START, &dev->state);
1151 	else {
1152 		/*
1153 		 *	Set the flags.
1154 		 */
1155 		dev->flags |= IFF_UP;
1156 
1157 		/*
1158 		 *	Enable NET_DMA
1159 		 */
1160 		net_dmaengine_get();
1161 
1162 		/*
1163 		 *	Initialize multicasting status
1164 		 */
1165 		dev_set_rx_mode(dev);
1166 
1167 		/*
1168 		 *	Wakeup transmit queue engine
1169 		 */
1170 		dev_activate(dev);
1171 	}
1172 
1173 	return ret;
1174 }
1175 
1176 /**
1177  *	dev_open	- prepare an interface for use.
1178  *	@dev:	device to open
1179  *
1180  *	Takes a device from down to up state. The device's private open
1181  *	function is invoked and then the multicast lists are loaded. Finally
1182  *	the device is moved into the up state and a %NETDEV_UP message is
1183  *	sent to the netdev notifier chain.
1184  *
1185  *	Calling this function on an active interface is a nop. On a failure
1186  *	a negative errno code is returned.
1187  */
1188 int dev_open(struct net_device *dev)
1189 {
1190 	int ret;
1191 
1192 	/*
1193 	 *	Is it already up?
1194 	 */
1195 	if (dev->flags & IFF_UP)
1196 		return 0;
1197 
1198 	/*
1199 	 *	Open device
1200 	 */
1201 	ret = __dev_open(dev);
1202 	if (ret < 0)
1203 		return ret;
1204 
1205 	/*
1206 	 *	... and announce new interface.
1207 	 */
1208 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1209 	call_netdevice_notifiers(NETDEV_UP, dev);
1210 
1211 	return ret;
1212 }
1213 EXPORT_SYMBOL(dev_open);
1214 
1215 static int __dev_close(struct net_device *dev)
1216 {
1217 	const struct net_device_ops *ops = dev->netdev_ops;
1218 
1219 	ASSERT_RTNL();
1220 	might_sleep();
1221 
1222 	/*
1223 	 *	Tell people we are going down, so that they can
1224 	 *	prepare to death, when device is still operating.
1225 	 */
1226 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1227 
1228 	clear_bit(__LINK_STATE_START, &dev->state);
1229 
1230 	/* Synchronize to scheduled poll. We cannot touch poll list,
1231 	 * it can be even on different cpu. So just clear netif_running().
1232 	 *
1233 	 * dev->stop() will invoke napi_disable() on all of it's
1234 	 * napi_struct instances on this device.
1235 	 */
1236 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1237 
1238 	dev_deactivate(dev);
1239 
1240 	/*
1241 	 *	Call the device specific close. This cannot fail.
1242 	 *	Only if device is UP
1243 	 *
1244 	 *	We allow it to be called even after a DETACH hot-plug
1245 	 *	event.
1246 	 */
1247 	if (ops->ndo_stop)
1248 		ops->ndo_stop(dev);
1249 
1250 	/*
1251 	 *	Device is now down.
1252 	 */
1253 
1254 	dev->flags &= ~IFF_UP;
1255 
1256 	/*
1257 	 *	Shutdown NET_DMA
1258 	 */
1259 	net_dmaengine_put();
1260 
1261 	return 0;
1262 }
1263 
1264 /**
1265  *	dev_close - shutdown an interface.
1266  *	@dev: device to shutdown
1267  *
1268  *	This function moves an active device into down state. A
1269  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1270  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1271  *	chain.
1272  */
1273 int dev_close(struct net_device *dev)
1274 {
1275 	if (!(dev->flags & IFF_UP))
1276 		return 0;
1277 
1278 	__dev_close(dev);
1279 
1280 	/*
1281 	 * Tell people we are down
1282 	 */
1283 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1284 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1285 
1286 	return 0;
1287 }
1288 EXPORT_SYMBOL(dev_close);
1289 
1290 
1291 /**
1292  *	dev_disable_lro - disable Large Receive Offload on a device
1293  *	@dev: device
1294  *
1295  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1296  *	called under RTNL.  This is needed if received packets may be
1297  *	forwarded to another interface.
1298  */
1299 void dev_disable_lro(struct net_device *dev)
1300 {
1301 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1302 	    dev->ethtool_ops->set_flags) {
1303 		u32 flags = dev->ethtool_ops->get_flags(dev);
1304 		if (flags & ETH_FLAG_LRO) {
1305 			flags &= ~ETH_FLAG_LRO;
1306 			dev->ethtool_ops->set_flags(dev, flags);
1307 		}
1308 	}
1309 	WARN_ON(dev->features & NETIF_F_LRO);
1310 }
1311 EXPORT_SYMBOL(dev_disable_lro);
1312 
1313 
1314 static int dev_boot_phase = 1;
1315 
1316 /*
1317  *	Device change register/unregister. These are not inline or static
1318  *	as we export them to the world.
1319  */
1320 
1321 /**
1322  *	register_netdevice_notifier - register a network notifier block
1323  *	@nb: notifier
1324  *
1325  *	Register a notifier to be called when network device events occur.
1326  *	The notifier passed is linked into the kernel structures and must
1327  *	not be reused until it has been unregistered. A negative errno code
1328  *	is returned on a failure.
1329  *
1330  * 	When registered all registration and up events are replayed
1331  *	to the new notifier to allow device to have a race free
1332  *	view of the network device list.
1333  */
1334 
1335 int register_netdevice_notifier(struct notifier_block *nb)
1336 {
1337 	struct net_device *dev;
1338 	struct net_device *last;
1339 	struct net *net;
1340 	int err;
1341 
1342 	rtnl_lock();
1343 	err = raw_notifier_chain_register(&netdev_chain, nb);
1344 	if (err)
1345 		goto unlock;
1346 	if (dev_boot_phase)
1347 		goto unlock;
1348 	for_each_net(net) {
1349 		for_each_netdev(net, dev) {
1350 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1351 			err = notifier_to_errno(err);
1352 			if (err)
1353 				goto rollback;
1354 
1355 			if (!(dev->flags & IFF_UP))
1356 				continue;
1357 
1358 			nb->notifier_call(nb, NETDEV_UP, dev);
1359 		}
1360 	}
1361 
1362 unlock:
1363 	rtnl_unlock();
1364 	return err;
1365 
1366 rollback:
1367 	last = dev;
1368 	for_each_net(net) {
1369 		for_each_netdev(net, dev) {
1370 			if (dev == last)
1371 				break;
1372 
1373 			if (dev->flags & IFF_UP) {
1374 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1375 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1376 			}
1377 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1378 			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1379 		}
1380 	}
1381 
1382 	raw_notifier_chain_unregister(&netdev_chain, nb);
1383 	goto unlock;
1384 }
1385 EXPORT_SYMBOL(register_netdevice_notifier);
1386 
1387 /**
1388  *	unregister_netdevice_notifier - unregister a network notifier block
1389  *	@nb: notifier
1390  *
1391  *	Unregister a notifier previously registered by
1392  *	register_netdevice_notifier(). The notifier is unlinked into the
1393  *	kernel structures and may then be reused. A negative errno code
1394  *	is returned on a failure.
1395  */
1396 
1397 int unregister_netdevice_notifier(struct notifier_block *nb)
1398 {
1399 	int err;
1400 
1401 	rtnl_lock();
1402 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1403 	rtnl_unlock();
1404 	return err;
1405 }
1406 EXPORT_SYMBOL(unregister_netdevice_notifier);
1407 
1408 /**
1409  *	call_netdevice_notifiers - call all network notifier blocks
1410  *      @val: value passed unmodified to notifier function
1411  *      @dev: net_device pointer passed unmodified to notifier function
1412  *
1413  *	Call all network notifier blocks.  Parameters and return value
1414  *	are as for raw_notifier_call_chain().
1415  */
1416 
1417 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1418 {
1419 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1420 }
1421 
1422 /* When > 0 there are consumers of rx skb time stamps */
1423 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1424 
1425 void net_enable_timestamp(void)
1426 {
1427 	atomic_inc(&netstamp_needed);
1428 }
1429 EXPORT_SYMBOL(net_enable_timestamp);
1430 
1431 void net_disable_timestamp(void)
1432 {
1433 	atomic_dec(&netstamp_needed);
1434 }
1435 EXPORT_SYMBOL(net_disable_timestamp);
1436 
1437 static inline void net_timestamp(struct sk_buff *skb)
1438 {
1439 	if (atomic_read(&netstamp_needed))
1440 		__net_timestamp(skb);
1441 	else
1442 		skb->tstamp.tv64 = 0;
1443 }
1444 
1445 /**
1446  * dev_forward_skb - loopback an skb to another netif
1447  *
1448  * @dev: destination network device
1449  * @skb: buffer to forward
1450  *
1451  * return values:
1452  *	NET_RX_SUCCESS	(no congestion)
1453  *	NET_RX_DROP     (packet was dropped)
1454  *
1455  * dev_forward_skb can be used for injecting an skb from the
1456  * start_xmit function of one device into the receive queue
1457  * of another device.
1458  *
1459  * The receiving device may be in another namespace, so
1460  * we have to clear all information in the skb that could
1461  * impact namespace isolation.
1462  */
1463 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1464 {
1465 	skb_orphan(skb);
1466 
1467 	if (!(dev->flags & IFF_UP))
1468 		return NET_RX_DROP;
1469 
1470 	if (skb->len > (dev->mtu + dev->hard_header_len))
1471 		return NET_RX_DROP;
1472 
1473 	skb_set_dev(skb, dev);
1474 	skb->tstamp.tv64 = 0;
1475 	skb->pkt_type = PACKET_HOST;
1476 	skb->protocol = eth_type_trans(skb, dev);
1477 	return netif_rx(skb);
1478 }
1479 EXPORT_SYMBOL_GPL(dev_forward_skb);
1480 
1481 /*
1482  *	Support routine. Sends outgoing frames to any network
1483  *	taps currently in use.
1484  */
1485 
1486 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1487 {
1488 	struct packet_type *ptype;
1489 
1490 #ifdef CONFIG_NET_CLS_ACT
1491 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1492 		net_timestamp(skb);
1493 #else
1494 	net_timestamp(skb);
1495 #endif
1496 
1497 	rcu_read_lock();
1498 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1499 		/* Never send packets back to the socket
1500 		 * they originated from - MvS ([email protected])
1501 		 */
1502 		if ((ptype->dev == dev || !ptype->dev) &&
1503 		    (ptype->af_packet_priv == NULL ||
1504 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1505 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1506 			if (!skb2)
1507 				break;
1508 
1509 			/* skb->nh should be correctly
1510 			   set by sender, so that the second statement is
1511 			   just protection against buggy protocols.
1512 			 */
1513 			skb_reset_mac_header(skb2);
1514 
1515 			if (skb_network_header(skb2) < skb2->data ||
1516 			    skb2->network_header > skb2->tail) {
1517 				if (net_ratelimit())
1518 					printk(KERN_CRIT "protocol %04x is "
1519 					       "buggy, dev %s\n",
1520 					       skb2->protocol, dev->name);
1521 				skb_reset_network_header(skb2);
1522 			}
1523 
1524 			skb2->transport_header = skb2->network_header;
1525 			skb2->pkt_type = PACKET_OUTGOING;
1526 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1527 		}
1528 	}
1529 	rcu_read_unlock();
1530 }
1531 
1532 
1533 static inline void __netif_reschedule(struct Qdisc *q)
1534 {
1535 	struct softnet_data *sd;
1536 	unsigned long flags;
1537 
1538 	local_irq_save(flags);
1539 	sd = &__get_cpu_var(softnet_data);
1540 	q->next_sched = sd->output_queue;
1541 	sd->output_queue = q;
1542 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1543 	local_irq_restore(flags);
1544 }
1545 
1546 void __netif_schedule(struct Qdisc *q)
1547 {
1548 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1549 		__netif_reschedule(q);
1550 }
1551 EXPORT_SYMBOL(__netif_schedule);
1552 
1553 void dev_kfree_skb_irq(struct sk_buff *skb)
1554 {
1555 	if (atomic_dec_and_test(&skb->users)) {
1556 		struct softnet_data *sd;
1557 		unsigned long flags;
1558 
1559 		local_irq_save(flags);
1560 		sd = &__get_cpu_var(softnet_data);
1561 		skb->next = sd->completion_queue;
1562 		sd->completion_queue = skb;
1563 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1564 		local_irq_restore(flags);
1565 	}
1566 }
1567 EXPORT_SYMBOL(dev_kfree_skb_irq);
1568 
1569 void dev_kfree_skb_any(struct sk_buff *skb)
1570 {
1571 	if (in_irq() || irqs_disabled())
1572 		dev_kfree_skb_irq(skb);
1573 	else
1574 		dev_kfree_skb(skb);
1575 }
1576 EXPORT_SYMBOL(dev_kfree_skb_any);
1577 
1578 
1579 /**
1580  * netif_device_detach - mark device as removed
1581  * @dev: network device
1582  *
1583  * Mark device as removed from system and therefore no longer available.
1584  */
1585 void netif_device_detach(struct net_device *dev)
1586 {
1587 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1588 	    netif_running(dev)) {
1589 		netif_tx_stop_all_queues(dev);
1590 	}
1591 }
1592 EXPORT_SYMBOL(netif_device_detach);
1593 
1594 /**
1595  * netif_device_attach - mark device as attached
1596  * @dev: network device
1597  *
1598  * Mark device as attached from system and restart if needed.
1599  */
1600 void netif_device_attach(struct net_device *dev)
1601 {
1602 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1603 	    netif_running(dev)) {
1604 		netif_tx_wake_all_queues(dev);
1605 		__netdev_watchdog_up(dev);
1606 	}
1607 }
1608 EXPORT_SYMBOL(netif_device_attach);
1609 
1610 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1611 {
1612 	return ((features & NETIF_F_GEN_CSUM) ||
1613 		((features & NETIF_F_IP_CSUM) &&
1614 		 protocol == htons(ETH_P_IP)) ||
1615 		((features & NETIF_F_IPV6_CSUM) &&
1616 		 protocol == htons(ETH_P_IPV6)) ||
1617 		((features & NETIF_F_FCOE_CRC) &&
1618 		 protocol == htons(ETH_P_FCOE)));
1619 }
1620 
1621 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1622 {
1623 	if (can_checksum_protocol(dev->features, skb->protocol))
1624 		return true;
1625 
1626 	if (skb->protocol == htons(ETH_P_8021Q)) {
1627 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1628 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1629 					  veh->h_vlan_encapsulated_proto))
1630 			return true;
1631 	}
1632 
1633 	return false;
1634 }
1635 
1636 /**
1637  * skb_dev_set -- assign a new device to a buffer
1638  * @skb: buffer for the new device
1639  * @dev: network device
1640  *
1641  * If an skb is owned by a device already, we have to reset
1642  * all data private to the namespace a device belongs to
1643  * before assigning it a new device.
1644  */
1645 #ifdef CONFIG_NET_NS
1646 void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1647 {
1648 	skb_dst_drop(skb);
1649 	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1650 		secpath_reset(skb);
1651 		nf_reset(skb);
1652 		skb_init_secmark(skb);
1653 		skb->mark = 0;
1654 		skb->priority = 0;
1655 		skb->nf_trace = 0;
1656 		skb->ipvs_property = 0;
1657 #ifdef CONFIG_NET_SCHED
1658 		skb->tc_index = 0;
1659 #endif
1660 	}
1661 	skb->dev = dev;
1662 }
1663 EXPORT_SYMBOL(skb_set_dev);
1664 #endif /* CONFIG_NET_NS */
1665 
1666 /*
1667  * Invalidate hardware checksum when packet is to be mangled, and
1668  * complete checksum manually on outgoing path.
1669  */
1670 int skb_checksum_help(struct sk_buff *skb)
1671 {
1672 	__wsum csum;
1673 	int ret = 0, offset;
1674 
1675 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1676 		goto out_set_summed;
1677 
1678 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1679 		/* Let GSO fix up the checksum. */
1680 		goto out_set_summed;
1681 	}
1682 
1683 	offset = skb->csum_start - skb_headroom(skb);
1684 	BUG_ON(offset >= skb_headlen(skb));
1685 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1686 
1687 	offset += skb->csum_offset;
1688 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1689 
1690 	if (skb_cloned(skb) &&
1691 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1692 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1693 		if (ret)
1694 			goto out;
1695 	}
1696 
1697 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1698 out_set_summed:
1699 	skb->ip_summed = CHECKSUM_NONE;
1700 out:
1701 	return ret;
1702 }
1703 EXPORT_SYMBOL(skb_checksum_help);
1704 
1705 /**
1706  *	skb_gso_segment - Perform segmentation on skb.
1707  *	@skb: buffer to segment
1708  *	@features: features for the output path (see dev->features)
1709  *
1710  *	This function segments the given skb and returns a list of segments.
1711  *
1712  *	It may return NULL if the skb requires no segmentation.  This is
1713  *	only possible when GSO is used for verifying header integrity.
1714  */
1715 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1716 {
1717 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1718 	struct packet_type *ptype;
1719 	__be16 type = skb->protocol;
1720 	int err;
1721 
1722 	skb_reset_mac_header(skb);
1723 	skb->mac_len = skb->network_header - skb->mac_header;
1724 	__skb_pull(skb, skb->mac_len);
1725 
1726 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1727 		struct net_device *dev = skb->dev;
1728 		struct ethtool_drvinfo info = {};
1729 
1730 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1731 			dev->ethtool_ops->get_drvinfo(dev, &info);
1732 
1733 		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1734 			"ip_summed=%d",
1735 		     info.driver, dev ? dev->features : 0L,
1736 		     skb->sk ? skb->sk->sk_route_caps : 0L,
1737 		     skb->len, skb->data_len, skb->ip_summed);
1738 
1739 		if (skb_header_cloned(skb) &&
1740 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1741 			return ERR_PTR(err);
1742 	}
1743 
1744 	rcu_read_lock();
1745 	list_for_each_entry_rcu(ptype,
1746 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1747 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1748 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1749 				err = ptype->gso_send_check(skb);
1750 				segs = ERR_PTR(err);
1751 				if (err || skb_gso_ok(skb, features))
1752 					break;
1753 				__skb_push(skb, (skb->data -
1754 						 skb_network_header(skb)));
1755 			}
1756 			segs = ptype->gso_segment(skb, features);
1757 			break;
1758 		}
1759 	}
1760 	rcu_read_unlock();
1761 
1762 	__skb_push(skb, skb->data - skb_mac_header(skb));
1763 
1764 	return segs;
1765 }
1766 EXPORT_SYMBOL(skb_gso_segment);
1767 
1768 /* Take action when hardware reception checksum errors are detected. */
1769 #ifdef CONFIG_BUG
1770 void netdev_rx_csum_fault(struct net_device *dev)
1771 {
1772 	if (net_ratelimit()) {
1773 		printk(KERN_ERR "%s: hw csum failure.\n",
1774 			dev ? dev->name : "<unknown>");
1775 		dump_stack();
1776 	}
1777 }
1778 EXPORT_SYMBOL(netdev_rx_csum_fault);
1779 #endif
1780 
1781 /* Actually, we should eliminate this check as soon as we know, that:
1782  * 1. IOMMU is present and allows to map all the memory.
1783  * 2. No high memory really exists on this machine.
1784  */
1785 
1786 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1787 {
1788 #ifdef CONFIG_HIGHMEM
1789 	int i;
1790 
1791 	if (dev->features & NETIF_F_HIGHDMA)
1792 		return 0;
1793 
1794 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1795 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1796 			return 1;
1797 
1798 #endif
1799 	return 0;
1800 }
1801 
1802 struct dev_gso_cb {
1803 	void (*destructor)(struct sk_buff *skb);
1804 };
1805 
1806 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1807 
1808 static void dev_gso_skb_destructor(struct sk_buff *skb)
1809 {
1810 	struct dev_gso_cb *cb;
1811 
1812 	do {
1813 		struct sk_buff *nskb = skb->next;
1814 
1815 		skb->next = nskb->next;
1816 		nskb->next = NULL;
1817 		kfree_skb(nskb);
1818 	} while (skb->next);
1819 
1820 	cb = DEV_GSO_CB(skb);
1821 	if (cb->destructor)
1822 		cb->destructor(skb);
1823 }
1824 
1825 /**
1826  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1827  *	@skb: buffer to segment
1828  *
1829  *	This function segments the given skb and stores the list of segments
1830  *	in skb->next.
1831  */
1832 static int dev_gso_segment(struct sk_buff *skb)
1833 {
1834 	struct net_device *dev = skb->dev;
1835 	struct sk_buff *segs;
1836 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1837 					 NETIF_F_SG : 0);
1838 
1839 	segs = skb_gso_segment(skb, features);
1840 
1841 	/* Verifying header integrity only. */
1842 	if (!segs)
1843 		return 0;
1844 
1845 	if (IS_ERR(segs))
1846 		return PTR_ERR(segs);
1847 
1848 	skb->next = segs;
1849 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1850 	skb->destructor = dev_gso_skb_destructor;
1851 
1852 	return 0;
1853 }
1854 
1855 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1856 			struct netdev_queue *txq)
1857 {
1858 	const struct net_device_ops *ops = dev->netdev_ops;
1859 	int rc = NETDEV_TX_OK;
1860 
1861 	if (likely(!skb->next)) {
1862 		if (!list_empty(&ptype_all))
1863 			dev_queue_xmit_nit(skb, dev);
1864 
1865 		if (netif_needs_gso(dev, skb)) {
1866 			if (unlikely(dev_gso_segment(skb)))
1867 				goto out_kfree_skb;
1868 			if (skb->next)
1869 				goto gso;
1870 		}
1871 
1872 		/*
1873 		 * If device doesnt need skb->dst, release it right now while
1874 		 * its hot in this cpu cache
1875 		 */
1876 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1877 			skb_dst_drop(skb);
1878 
1879 		rc = ops->ndo_start_xmit(skb, dev);
1880 		if (rc == NETDEV_TX_OK)
1881 			txq_trans_update(txq);
1882 		/*
1883 		 * TODO: if skb_orphan() was called by
1884 		 * dev->hard_start_xmit() (for example, the unmodified
1885 		 * igb driver does that; bnx2 doesn't), then
1886 		 * skb_tx_software_timestamp() will be unable to send
1887 		 * back the time stamp.
1888 		 *
1889 		 * How can this be prevented? Always create another
1890 		 * reference to the socket before calling
1891 		 * dev->hard_start_xmit()? Prevent that skb_orphan()
1892 		 * does anything in dev->hard_start_xmit() by clearing
1893 		 * the skb destructor before the call and restoring it
1894 		 * afterwards, then doing the skb_orphan() ourselves?
1895 		 */
1896 		return rc;
1897 	}
1898 
1899 gso:
1900 	do {
1901 		struct sk_buff *nskb = skb->next;
1902 
1903 		skb->next = nskb->next;
1904 		nskb->next = NULL;
1905 
1906 		/*
1907 		 * If device doesnt need nskb->dst, release it right now while
1908 		 * its hot in this cpu cache
1909 		 */
1910 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1911 			skb_dst_drop(nskb);
1912 
1913 		rc = ops->ndo_start_xmit(nskb, dev);
1914 		if (unlikely(rc != NETDEV_TX_OK)) {
1915 			if (rc & ~NETDEV_TX_MASK)
1916 				goto out_kfree_gso_skb;
1917 			nskb->next = skb->next;
1918 			skb->next = nskb;
1919 			return rc;
1920 		}
1921 		txq_trans_update(txq);
1922 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1923 			return NETDEV_TX_BUSY;
1924 	} while (skb->next);
1925 
1926 out_kfree_gso_skb:
1927 	if (likely(skb->next == NULL))
1928 		skb->destructor = DEV_GSO_CB(skb)->destructor;
1929 out_kfree_skb:
1930 	kfree_skb(skb);
1931 	return rc;
1932 }
1933 
1934 static u32 skb_tx_hashrnd;
1935 
1936 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1937 {
1938 	u32 hash;
1939 
1940 	if (skb_rx_queue_recorded(skb)) {
1941 		hash = skb_get_rx_queue(skb);
1942 		while (unlikely(hash >= dev->real_num_tx_queues))
1943 			hash -= dev->real_num_tx_queues;
1944 		return hash;
1945 	}
1946 
1947 	if (skb->sk && skb->sk->sk_hash)
1948 		hash = skb->sk->sk_hash;
1949 	else
1950 		hash = skb->protocol;
1951 
1952 	hash = jhash_1word(hash, skb_tx_hashrnd);
1953 
1954 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1955 }
1956 EXPORT_SYMBOL(skb_tx_hash);
1957 
1958 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1959 {
1960 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1961 		if (net_ratelimit()) {
1962 			WARN(1, "%s selects TX queue %d, but "
1963 			     "real number of TX queues is %d\n",
1964 			     dev->name, queue_index,
1965 			     dev->real_num_tx_queues);
1966 		}
1967 		return 0;
1968 	}
1969 	return queue_index;
1970 }
1971 
1972 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1973 					struct sk_buff *skb)
1974 {
1975 	u16 queue_index;
1976 	struct sock *sk = skb->sk;
1977 
1978 	if (sk_tx_queue_recorded(sk)) {
1979 		queue_index = sk_tx_queue_get(sk);
1980 	} else {
1981 		const struct net_device_ops *ops = dev->netdev_ops;
1982 
1983 		if (ops->ndo_select_queue) {
1984 			queue_index = ops->ndo_select_queue(dev, skb);
1985 			queue_index = dev_cap_txqueue(dev, queue_index);
1986 		} else {
1987 			queue_index = 0;
1988 			if (dev->real_num_tx_queues > 1)
1989 				queue_index = skb_tx_hash(dev, skb);
1990 
1991 			if (sk && sk->sk_dst_cache)
1992 				sk_tx_queue_set(sk, queue_index);
1993 		}
1994 	}
1995 
1996 	skb_set_queue_mapping(skb, queue_index);
1997 	return netdev_get_tx_queue(dev, queue_index);
1998 }
1999 
2000 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2001 				 struct net_device *dev,
2002 				 struct netdev_queue *txq)
2003 {
2004 	spinlock_t *root_lock = qdisc_lock(q);
2005 	int rc;
2006 
2007 	spin_lock(root_lock);
2008 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2009 		kfree_skb(skb);
2010 		rc = NET_XMIT_DROP;
2011 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2012 		   !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
2013 		/*
2014 		 * This is a work-conserving queue; there are no old skbs
2015 		 * waiting to be sent out; and the qdisc is not running -
2016 		 * xmit the skb directly.
2017 		 */
2018 		__qdisc_update_bstats(q, skb->len);
2019 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
2020 			__qdisc_run(q);
2021 		else
2022 			clear_bit(__QDISC_STATE_RUNNING, &q->state);
2023 
2024 		rc = NET_XMIT_SUCCESS;
2025 	} else {
2026 		rc = qdisc_enqueue_root(skb, q);
2027 		qdisc_run(q);
2028 	}
2029 	spin_unlock(root_lock);
2030 
2031 	return rc;
2032 }
2033 
2034 /*
2035  * Returns true if either:
2036  *	1. skb has frag_list and the device doesn't support FRAGLIST, or
2037  *	2. skb is fragmented and the device does not support SG, or if
2038  *	   at least one of fragments is in highmem and device does not
2039  *	   support DMA from it.
2040  */
2041 static inline int skb_needs_linearize(struct sk_buff *skb,
2042 				      struct net_device *dev)
2043 {
2044 	return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
2045 	       (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
2046 					      illegal_highdma(dev, skb)));
2047 }
2048 
2049 /**
2050  *	dev_queue_xmit - transmit a buffer
2051  *	@skb: buffer to transmit
2052  *
2053  *	Queue a buffer for transmission to a network device. The caller must
2054  *	have set the device and priority and built the buffer before calling
2055  *	this function. The function can be called from an interrupt.
2056  *
2057  *	A negative errno code is returned on a failure. A success does not
2058  *	guarantee the frame will be transmitted as it may be dropped due
2059  *	to congestion or traffic shaping.
2060  *
2061  * -----------------------------------------------------------------------------------
2062  *      I notice this method can also return errors from the queue disciplines,
2063  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
2064  *      be positive.
2065  *
2066  *      Regardless of the return value, the skb is consumed, so it is currently
2067  *      difficult to retry a send to this method.  (You can bump the ref count
2068  *      before sending to hold a reference for retry if you are careful.)
2069  *
2070  *      When calling this method, interrupts MUST be enabled.  This is because
2071  *      the BH enable code must have IRQs enabled so that it will not deadlock.
2072  *          --BLG
2073  */
2074 int dev_queue_xmit(struct sk_buff *skb)
2075 {
2076 	struct net_device *dev = skb->dev;
2077 	struct netdev_queue *txq;
2078 	struct Qdisc *q;
2079 	int rc = -ENOMEM;
2080 
2081 	/* GSO will handle the following emulations directly. */
2082 	if (netif_needs_gso(dev, skb))
2083 		goto gso;
2084 
2085 	/* Convert a paged skb to linear, if required */
2086 	if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
2087 		goto out_kfree_skb;
2088 
2089 	/* If packet is not checksummed and device does not support
2090 	 * checksumming for this protocol, complete checksumming here.
2091 	 */
2092 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
2093 		skb_set_transport_header(skb, skb->csum_start -
2094 					      skb_headroom(skb));
2095 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
2096 			goto out_kfree_skb;
2097 	}
2098 
2099 gso:
2100 	/* Disable soft irqs for various locks below. Also
2101 	 * stops preemption for RCU.
2102 	 */
2103 	rcu_read_lock_bh();
2104 
2105 	txq = dev_pick_tx(dev, skb);
2106 	q = rcu_dereference_bh(txq->qdisc);
2107 
2108 #ifdef CONFIG_NET_CLS_ACT
2109 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2110 #endif
2111 	if (q->enqueue) {
2112 		rc = __dev_xmit_skb(skb, q, dev, txq);
2113 		goto out;
2114 	}
2115 
2116 	/* The device has no queue. Common case for software devices:
2117 	   loopback, all the sorts of tunnels...
2118 
2119 	   Really, it is unlikely that netif_tx_lock protection is necessary
2120 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
2121 	   counters.)
2122 	   However, it is possible, that they rely on protection
2123 	   made by us here.
2124 
2125 	   Check this and shot the lock. It is not prone from deadlocks.
2126 	   Either shot noqueue qdisc, it is even simpler 8)
2127 	 */
2128 	if (dev->flags & IFF_UP) {
2129 		int cpu = smp_processor_id(); /* ok because BHs are off */
2130 
2131 		if (txq->xmit_lock_owner != cpu) {
2132 
2133 			HARD_TX_LOCK(dev, txq, cpu);
2134 
2135 			if (!netif_tx_queue_stopped(txq)) {
2136 				rc = dev_hard_start_xmit(skb, dev, txq);
2137 				if (dev_xmit_complete(rc)) {
2138 					HARD_TX_UNLOCK(dev, txq);
2139 					goto out;
2140 				}
2141 			}
2142 			HARD_TX_UNLOCK(dev, txq);
2143 			if (net_ratelimit())
2144 				printk(KERN_CRIT "Virtual device %s asks to "
2145 				       "queue packet!\n", dev->name);
2146 		} else {
2147 			/* Recursion is detected! It is possible,
2148 			 * unfortunately */
2149 			if (net_ratelimit())
2150 				printk(KERN_CRIT "Dead loop on virtual device "
2151 				       "%s, fix it urgently!\n", dev->name);
2152 		}
2153 	}
2154 
2155 	rc = -ENETDOWN;
2156 	rcu_read_unlock_bh();
2157 
2158 out_kfree_skb:
2159 	kfree_skb(skb);
2160 	return rc;
2161 out:
2162 	rcu_read_unlock_bh();
2163 	return rc;
2164 }
2165 EXPORT_SYMBOL(dev_queue_xmit);
2166 
2167 
2168 /*=======================================================================
2169 			Receiver routines
2170   =======================================================================*/
2171 
2172 int netdev_max_backlog __read_mostly = 1000;
2173 int netdev_budget __read_mostly = 300;
2174 int weight_p __read_mostly = 64;            /* old backlog weight */
2175 
2176 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
2177 
2178 
2179 /**
2180  *	netif_rx	-	post buffer to the network code
2181  *	@skb: buffer to post
2182  *
2183  *	This function receives a packet from a device driver and queues it for
2184  *	the upper (protocol) levels to process.  It always succeeds. The buffer
2185  *	may be dropped during processing for congestion control or by the
2186  *	protocol layers.
2187  *
2188  *	return values:
2189  *	NET_RX_SUCCESS	(no congestion)
2190  *	NET_RX_DROP     (packet was dropped)
2191  *
2192  */
2193 
2194 int netif_rx(struct sk_buff *skb)
2195 {
2196 	struct softnet_data *queue;
2197 	unsigned long flags;
2198 
2199 	/* if netpoll wants it, pretend we never saw it */
2200 	if (netpoll_rx(skb))
2201 		return NET_RX_DROP;
2202 
2203 	if (!skb->tstamp.tv64)
2204 		net_timestamp(skb);
2205 
2206 	/*
2207 	 * The code is rearranged so that the path is the most
2208 	 * short when CPU is congested, but is still operating.
2209 	 */
2210 	local_irq_save(flags);
2211 	queue = &__get_cpu_var(softnet_data);
2212 
2213 	__get_cpu_var(netdev_rx_stat).total++;
2214 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
2215 		if (queue->input_pkt_queue.qlen) {
2216 enqueue:
2217 			__skb_queue_tail(&queue->input_pkt_queue, skb);
2218 			local_irq_restore(flags);
2219 			return NET_RX_SUCCESS;
2220 		}
2221 
2222 		napi_schedule(&queue->backlog);
2223 		goto enqueue;
2224 	}
2225 
2226 	__get_cpu_var(netdev_rx_stat).dropped++;
2227 	local_irq_restore(flags);
2228 
2229 	kfree_skb(skb);
2230 	return NET_RX_DROP;
2231 }
2232 EXPORT_SYMBOL(netif_rx);
2233 
2234 int netif_rx_ni(struct sk_buff *skb)
2235 {
2236 	int err;
2237 
2238 	preempt_disable();
2239 	err = netif_rx(skb);
2240 	if (local_softirq_pending())
2241 		do_softirq();
2242 	preempt_enable();
2243 
2244 	return err;
2245 }
2246 EXPORT_SYMBOL(netif_rx_ni);
2247 
2248 static void net_tx_action(struct softirq_action *h)
2249 {
2250 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2251 
2252 	if (sd->completion_queue) {
2253 		struct sk_buff *clist;
2254 
2255 		local_irq_disable();
2256 		clist = sd->completion_queue;
2257 		sd->completion_queue = NULL;
2258 		local_irq_enable();
2259 
2260 		while (clist) {
2261 			struct sk_buff *skb = clist;
2262 			clist = clist->next;
2263 
2264 			WARN_ON(atomic_read(&skb->users));
2265 			__kfree_skb(skb);
2266 		}
2267 	}
2268 
2269 	if (sd->output_queue) {
2270 		struct Qdisc *head;
2271 
2272 		local_irq_disable();
2273 		head = sd->output_queue;
2274 		sd->output_queue = NULL;
2275 		local_irq_enable();
2276 
2277 		while (head) {
2278 			struct Qdisc *q = head;
2279 			spinlock_t *root_lock;
2280 
2281 			head = head->next_sched;
2282 
2283 			root_lock = qdisc_lock(q);
2284 			if (spin_trylock(root_lock)) {
2285 				smp_mb__before_clear_bit();
2286 				clear_bit(__QDISC_STATE_SCHED,
2287 					  &q->state);
2288 				qdisc_run(q);
2289 				spin_unlock(root_lock);
2290 			} else {
2291 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2292 					      &q->state)) {
2293 					__netif_reschedule(q);
2294 				} else {
2295 					smp_mb__before_clear_bit();
2296 					clear_bit(__QDISC_STATE_SCHED,
2297 						  &q->state);
2298 				}
2299 			}
2300 		}
2301 	}
2302 }
2303 
2304 static inline int deliver_skb(struct sk_buff *skb,
2305 			      struct packet_type *pt_prev,
2306 			      struct net_device *orig_dev)
2307 {
2308 	atomic_inc(&skb->users);
2309 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2310 }
2311 
2312 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2313 
2314 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2315 /* This hook is defined here for ATM LANE */
2316 int (*br_fdb_test_addr_hook)(struct net_device *dev,
2317 			     unsigned char *addr) __read_mostly;
2318 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2319 #endif
2320 
2321 /*
2322  * If bridge module is loaded call bridging hook.
2323  *  returns NULL if packet was consumed.
2324  */
2325 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2326 					struct sk_buff *skb) __read_mostly;
2327 EXPORT_SYMBOL_GPL(br_handle_frame_hook);
2328 
2329 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2330 					    struct packet_type **pt_prev, int *ret,
2331 					    struct net_device *orig_dev)
2332 {
2333 	struct net_bridge_port *port;
2334 
2335 	if (skb->pkt_type == PACKET_LOOPBACK ||
2336 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2337 		return skb;
2338 
2339 	if (*pt_prev) {
2340 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2341 		*pt_prev = NULL;
2342 	}
2343 
2344 	return br_handle_frame_hook(port, skb);
2345 }
2346 #else
2347 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2348 #endif
2349 
2350 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2351 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2352 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2353 
2354 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2355 					     struct packet_type **pt_prev,
2356 					     int *ret,
2357 					     struct net_device *orig_dev)
2358 {
2359 	if (skb->dev->macvlan_port == NULL)
2360 		return skb;
2361 
2362 	if (*pt_prev) {
2363 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2364 		*pt_prev = NULL;
2365 	}
2366 	return macvlan_handle_frame_hook(skb);
2367 }
2368 #else
2369 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2370 #endif
2371 
2372 #ifdef CONFIG_NET_CLS_ACT
2373 /* TODO: Maybe we should just force sch_ingress to be compiled in
2374  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2375  * a compare and 2 stores extra right now if we dont have it on
2376  * but have CONFIG_NET_CLS_ACT
2377  * NOTE: This doesnt stop any functionality; if you dont have
2378  * the ingress scheduler, you just cant add policies on ingress.
2379  *
2380  */
2381 static int ing_filter(struct sk_buff *skb)
2382 {
2383 	struct net_device *dev = skb->dev;
2384 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2385 	struct netdev_queue *rxq;
2386 	int result = TC_ACT_OK;
2387 	struct Qdisc *q;
2388 
2389 	if (MAX_RED_LOOP < ttl++) {
2390 		printk(KERN_WARNING
2391 		       "Redir loop detected Dropping packet (%d->%d)\n",
2392 		       skb->skb_iif, dev->ifindex);
2393 		return TC_ACT_SHOT;
2394 	}
2395 
2396 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2397 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2398 
2399 	rxq = &dev->rx_queue;
2400 
2401 	q = rxq->qdisc;
2402 	if (q != &noop_qdisc) {
2403 		spin_lock(qdisc_lock(q));
2404 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2405 			result = qdisc_enqueue_root(skb, q);
2406 		spin_unlock(qdisc_lock(q));
2407 	}
2408 
2409 	return result;
2410 }
2411 
2412 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2413 					 struct packet_type **pt_prev,
2414 					 int *ret, struct net_device *orig_dev)
2415 {
2416 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2417 		goto out;
2418 
2419 	if (*pt_prev) {
2420 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2421 		*pt_prev = NULL;
2422 	} else {
2423 		/* Huh? Why does turning on AF_PACKET affect this? */
2424 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2425 	}
2426 
2427 	switch (ing_filter(skb)) {
2428 	case TC_ACT_SHOT:
2429 	case TC_ACT_STOLEN:
2430 		kfree_skb(skb);
2431 		return NULL;
2432 	}
2433 
2434 out:
2435 	skb->tc_verd = 0;
2436 	return skb;
2437 }
2438 #endif
2439 
2440 /*
2441  * 	netif_nit_deliver - deliver received packets to network taps
2442  * 	@skb: buffer
2443  *
2444  * 	This function is used to deliver incoming packets to network
2445  * 	taps. It should be used when the normal netif_receive_skb path
2446  * 	is bypassed, for example because of VLAN acceleration.
2447  */
2448 void netif_nit_deliver(struct sk_buff *skb)
2449 {
2450 	struct packet_type *ptype;
2451 
2452 	if (list_empty(&ptype_all))
2453 		return;
2454 
2455 	skb_reset_network_header(skb);
2456 	skb_reset_transport_header(skb);
2457 	skb->mac_len = skb->network_header - skb->mac_header;
2458 
2459 	rcu_read_lock();
2460 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2461 		if (!ptype->dev || ptype->dev == skb->dev)
2462 			deliver_skb(skb, ptype, skb->dev);
2463 	}
2464 	rcu_read_unlock();
2465 }
2466 
2467 /**
2468  *	netif_receive_skb - process receive buffer from network
2469  *	@skb: buffer to process
2470  *
2471  *	netif_receive_skb() is the main receive data processing function.
2472  *	It always succeeds. The buffer may be dropped during processing
2473  *	for congestion control or by the protocol layers.
2474  *
2475  *	This function may only be called from softirq context and interrupts
2476  *	should be enabled.
2477  *
2478  *	Return values (usually ignored):
2479  *	NET_RX_SUCCESS: no congestion
2480  *	NET_RX_DROP: packet was dropped
2481  */
2482 int netif_receive_skb(struct sk_buff *skb)
2483 {
2484 	struct packet_type *ptype, *pt_prev;
2485 	struct net_device *orig_dev;
2486 	struct net_device *null_or_orig;
2487 	struct net_device *null_or_bond;
2488 	int ret = NET_RX_DROP;
2489 	__be16 type;
2490 
2491 	if (!skb->tstamp.tv64)
2492 		net_timestamp(skb);
2493 
2494 	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2495 		return NET_RX_SUCCESS;
2496 
2497 	/* if we've gotten here through NAPI, check netpoll */
2498 	if (netpoll_receive_skb(skb))
2499 		return NET_RX_DROP;
2500 
2501 	if (!skb->skb_iif)
2502 		skb->skb_iif = skb->dev->ifindex;
2503 
2504 	null_or_orig = NULL;
2505 	orig_dev = skb->dev;
2506 	if (orig_dev->master) {
2507 		if (skb_bond_should_drop(skb))
2508 			null_or_orig = orig_dev; /* deliver only exact match */
2509 		else
2510 			skb->dev = orig_dev->master;
2511 	}
2512 
2513 	__get_cpu_var(netdev_rx_stat).total++;
2514 
2515 	skb_reset_network_header(skb);
2516 	skb_reset_transport_header(skb);
2517 	skb->mac_len = skb->network_header - skb->mac_header;
2518 
2519 	pt_prev = NULL;
2520 
2521 	rcu_read_lock();
2522 
2523 #ifdef CONFIG_NET_CLS_ACT
2524 	if (skb->tc_verd & TC_NCLS) {
2525 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2526 		goto ncls;
2527 	}
2528 #endif
2529 
2530 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2531 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2532 		    ptype->dev == orig_dev) {
2533 			if (pt_prev)
2534 				ret = deliver_skb(skb, pt_prev, orig_dev);
2535 			pt_prev = ptype;
2536 		}
2537 	}
2538 
2539 #ifdef CONFIG_NET_CLS_ACT
2540 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2541 	if (!skb)
2542 		goto out;
2543 ncls:
2544 #endif
2545 
2546 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2547 	if (!skb)
2548 		goto out;
2549 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2550 	if (!skb)
2551 		goto out;
2552 
2553 	/*
2554 	 * Make sure frames received on VLAN interfaces stacked on
2555 	 * bonding interfaces still make their way to any base bonding
2556 	 * device that may have registered for a specific ptype.  The
2557 	 * handler may have to adjust skb->dev and orig_dev.
2558 	 */
2559 	null_or_bond = NULL;
2560 	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2561 	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
2562 		null_or_bond = vlan_dev_real_dev(skb->dev);
2563 	}
2564 
2565 	type = skb->protocol;
2566 	list_for_each_entry_rcu(ptype,
2567 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2568 		if (ptype->type == type && (ptype->dev == null_or_orig ||
2569 		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
2570 		     ptype->dev == null_or_bond)) {
2571 			if (pt_prev)
2572 				ret = deliver_skb(skb, pt_prev, orig_dev);
2573 			pt_prev = ptype;
2574 		}
2575 	}
2576 
2577 	if (pt_prev) {
2578 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2579 	} else {
2580 		kfree_skb(skb);
2581 		/* Jamal, now you will not able to escape explaining
2582 		 * me how you were going to use this. :-)
2583 		 */
2584 		ret = NET_RX_DROP;
2585 	}
2586 
2587 out:
2588 	rcu_read_unlock();
2589 	return ret;
2590 }
2591 EXPORT_SYMBOL(netif_receive_skb);
2592 
2593 /* Network device is going away, flush any packets still pending  */
2594 static void flush_backlog(void *arg)
2595 {
2596 	struct net_device *dev = arg;
2597 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2598 	struct sk_buff *skb, *tmp;
2599 
2600 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2601 		if (skb->dev == dev) {
2602 			__skb_unlink(skb, &queue->input_pkt_queue);
2603 			kfree_skb(skb);
2604 		}
2605 }
2606 
2607 static int napi_gro_complete(struct sk_buff *skb)
2608 {
2609 	struct packet_type *ptype;
2610 	__be16 type = skb->protocol;
2611 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2612 	int err = -ENOENT;
2613 
2614 	if (NAPI_GRO_CB(skb)->count == 1) {
2615 		skb_shinfo(skb)->gso_size = 0;
2616 		goto out;
2617 	}
2618 
2619 	rcu_read_lock();
2620 	list_for_each_entry_rcu(ptype, head, list) {
2621 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2622 			continue;
2623 
2624 		err = ptype->gro_complete(skb);
2625 		break;
2626 	}
2627 	rcu_read_unlock();
2628 
2629 	if (err) {
2630 		WARN_ON(&ptype->list == head);
2631 		kfree_skb(skb);
2632 		return NET_RX_SUCCESS;
2633 	}
2634 
2635 out:
2636 	return netif_receive_skb(skb);
2637 }
2638 
2639 static void napi_gro_flush(struct napi_struct *napi)
2640 {
2641 	struct sk_buff *skb, *next;
2642 
2643 	for (skb = napi->gro_list; skb; skb = next) {
2644 		next = skb->next;
2645 		skb->next = NULL;
2646 		napi_gro_complete(skb);
2647 	}
2648 
2649 	napi->gro_count = 0;
2650 	napi->gro_list = NULL;
2651 }
2652 
2653 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2654 {
2655 	struct sk_buff **pp = NULL;
2656 	struct packet_type *ptype;
2657 	__be16 type = skb->protocol;
2658 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2659 	int same_flow;
2660 	int mac_len;
2661 	enum gro_result ret;
2662 
2663 	if (!(skb->dev->features & NETIF_F_GRO))
2664 		goto normal;
2665 
2666 	if (skb_is_gso(skb) || skb_has_frags(skb))
2667 		goto normal;
2668 
2669 	rcu_read_lock();
2670 	list_for_each_entry_rcu(ptype, head, list) {
2671 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2672 			continue;
2673 
2674 		skb_set_network_header(skb, skb_gro_offset(skb));
2675 		mac_len = skb->network_header - skb->mac_header;
2676 		skb->mac_len = mac_len;
2677 		NAPI_GRO_CB(skb)->same_flow = 0;
2678 		NAPI_GRO_CB(skb)->flush = 0;
2679 		NAPI_GRO_CB(skb)->free = 0;
2680 
2681 		pp = ptype->gro_receive(&napi->gro_list, skb);
2682 		break;
2683 	}
2684 	rcu_read_unlock();
2685 
2686 	if (&ptype->list == head)
2687 		goto normal;
2688 
2689 	same_flow = NAPI_GRO_CB(skb)->same_flow;
2690 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2691 
2692 	if (pp) {
2693 		struct sk_buff *nskb = *pp;
2694 
2695 		*pp = nskb->next;
2696 		nskb->next = NULL;
2697 		napi_gro_complete(nskb);
2698 		napi->gro_count--;
2699 	}
2700 
2701 	if (same_flow)
2702 		goto ok;
2703 
2704 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2705 		goto normal;
2706 
2707 	napi->gro_count++;
2708 	NAPI_GRO_CB(skb)->count = 1;
2709 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2710 	skb->next = napi->gro_list;
2711 	napi->gro_list = skb;
2712 	ret = GRO_HELD;
2713 
2714 pull:
2715 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
2716 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
2717 
2718 		BUG_ON(skb->end - skb->tail < grow);
2719 
2720 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
2721 
2722 		skb->tail += grow;
2723 		skb->data_len -= grow;
2724 
2725 		skb_shinfo(skb)->frags[0].page_offset += grow;
2726 		skb_shinfo(skb)->frags[0].size -= grow;
2727 
2728 		if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
2729 			put_page(skb_shinfo(skb)->frags[0].page);
2730 			memmove(skb_shinfo(skb)->frags,
2731 				skb_shinfo(skb)->frags + 1,
2732 				--skb_shinfo(skb)->nr_frags);
2733 		}
2734 	}
2735 
2736 ok:
2737 	return ret;
2738 
2739 normal:
2740 	ret = GRO_NORMAL;
2741 	goto pull;
2742 }
2743 EXPORT_SYMBOL(dev_gro_receive);
2744 
2745 static gro_result_t
2746 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2747 {
2748 	struct sk_buff *p;
2749 
2750 	if (netpoll_rx_on(skb))
2751 		return GRO_NORMAL;
2752 
2753 	for (p = napi->gro_list; p; p = p->next) {
2754 		NAPI_GRO_CB(p)->same_flow =
2755 			(p->dev == skb->dev) &&
2756 			!compare_ether_header(skb_mac_header(p),
2757 					      skb_gro_mac_header(skb));
2758 		NAPI_GRO_CB(p)->flush = 0;
2759 	}
2760 
2761 	return dev_gro_receive(napi, skb);
2762 }
2763 
2764 gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
2765 {
2766 	switch (ret) {
2767 	case GRO_NORMAL:
2768 		if (netif_receive_skb(skb))
2769 			ret = GRO_DROP;
2770 		break;
2771 
2772 	case GRO_DROP:
2773 	case GRO_MERGED_FREE:
2774 		kfree_skb(skb);
2775 		break;
2776 
2777 	case GRO_HELD:
2778 	case GRO_MERGED:
2779 		break;
2780 	}
2781 
2782 	return ret;
2783 }
2784 EXPORT_SYMBOL(napi_skb_finish);
2785 
2786 void skb_gro_reset_offset(struct sk_buff *skb)
2787 {
2788 	NAPI_GRO_CB(skb)->data_offset = 0;
2789 	NAPI_GRO_CB(skb)->frag0 = NULL;
2790 	NAPI_GRO_CB(skb)->frag0_len = 0;
2791 
2792 	if (skb->mac_header == skb->tail &&
2793 	    !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
2794 		NAPI_GRO_CB(skb)->frag0 =
2795 			page_address(skb_shinfo(skb)->frags[0].page) +
2796 			skb_shinfo(skb)->frags[0].page_offset;
2797 		NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2798 	}
2799 }
2800 EXPORT_SYMBOL(skb_gro_reset_offset);
2801 
2802 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2803 {
2804 	skb_gro_reset_offset(skb);
2805 
2806 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2807 }
2808 EXPORT_SYMBOL(napi_gro_receive);
2809 
2810 void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2811 {
2812 	__skb_pull(skb, skb_headlen(skb));
2813 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2814 
2815 	napi->skb = skb;
2816 }
2817 EXPORT_SYMBOL(napi_reuse_skb);
2818 
2819 struct sk_buff *napi_get_frags(struct napi_struct *napi)
2820 {
2821 	struct sk_buff *skb = napi->skb;
2822 
2823 	if (!skb) {
2824 		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
2825 		if (skb)
2826 			napi->skb = skb;
2827 	}
2828 	return skb;
2829 }
2830 EXPORT_SYMBOL(napi_get_frags);
2831 
2832 gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
2833 			       gro_result_t ret)
2834 {
2835 	switch (ret) {
2836 	case GRO_NORMAL:
2837 	case GRO_HELD:
2838 		skb->protocol = eth_type_trans(skb, skb->dev);
2839 
2840 		if (ret == GRO_HELD)
2841 			skb_gro_pull(skb, -ETH_HLEN);
2842 		else if (netif_receive_skb(skb))
2843 			ret = GRO_DROP;
2844 		break;
2845 
2846 	case GRO_DROP:
2847 	case GRO_MERGED_FREE:
2848 		napi_reuse_skb(napi, skb);
2849 		break;
2850 
2851 	case GRO_MERGED:
2852 		break;
2853 	}
2854 
2855 	return ret;
2856 }
2857 EXPORT_SYMBOL(napi_frags_finish);
2858 
2859 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
2860 {
2861 	struct sk_buff *skb = napi->skb;
2862 	struct ethhdr *eth;
2863 	unsigned int hlen;
2864 	unsigned int off;
2865 
2866 	napi->skb = NULL;
2867 
2868 	skb_reset_mac_header(skb);
2869 	skb_gro_reset_offset(skb);
2870 
2871 	off = skb_gro_offset(skb);
2872 	hlen = off + sizeof(*eth);
2873 	eth = skb_gro_header_fast(skb, off);
2874 	if (skb_gro_header_hard(skb, hlen)) {
2875 		eth = skb_gro_header_slow(skb, hlen, off);
2876 		if (unlikely(!eth)) {
2877 			napi_reuse_skb(napi, skb);
2878 			skb = NULL;
2879 			goto out;
2880 		}
2881 	}
2882 
2883 	skb_gro_pull(skb, sizeof(*eth));
2884 
2885 	/*
2886 	 * This works because the only protocols we care about don't require
2887 	 * special handling.  We'll fix it up properly at the end.
2888 	 */
2889 	skb->protocol = eth->h_proto;
2890 
2891 out:
2892 	return skb;
2893 }
2894 EXPORT_SYMBOL(napi_frags_skb);
2895 
2896 gro_result_t napi_gro_frags(struct napi_struct *napi)
2897 {
2898 	struct sk_buff *skb = napi_frags_skb(napi);
2899 
2900 	if (!skb)
2901 		return GRO_DROP;
2902 
2903 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2904 }
2905 EXPORT_SYMBOL(napi_gro_frags);
2906 
2907 static int process_backlog(struct napi_struct *napi, int quota)
2908 {
2909 	int work = 0;
2910 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2911 	unsigned long start_time = jiffies;
2912 
2913 	napi->weight = weight_p;
2914 	do {
2915 		struct sk_buff *skb;
2916 
2917 		local_irq_disable();
2918 		skb = __skb_dequeue(&queue->input_pkt_queue);
2919 		if (!skb) {
2920 			__napi_complete(napi);
2921 			local_irq_enable();
2922 			break;
2923 		}
2924 		local_irq_enable();
2925 
2926 		netif_receive_skb(skb);
2927 	} while (++work < quota && jiffies == start_time);
2928 
2929 	return work;
2930 }
2931 
2932 /**
2933  * __napi_schedule - schedule for receive
2934  * @n: entry to schedule
2935  *
2936  * The entry's receive function will be scheduled to run
2937  */
2938 void __napi_schedule(struct napi_struct *n)
2939 {
2940 	unsigned long flags;
2941 
2942 	local_irq_save(flags);
2943 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2944 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2945 	local_irq_restore(flags);
2946 }
2947 EXPORT_SYMBOL(__napi_schedule);
2948 
2949 void __napi_complete(struct napi_struct *n)
2950 {
2951 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2952 	BUG_ON(n->gro_list);
2953 
2954 	list_del(&n->poll_list);
2955 	smp_mb__before_clear_bit();
2956 	clear_bit(NAPI_STATE_SCHED, &n->state);
2957 }
2958 EXPORT_SYMBOL(__napi_complete);
2959 
2960 void napi_complete(struct napi_struct *n)
2961 {
2962 	unsigned long flags;
2963 
2964 	/*
2965 	 * don't let napi dequeue from the cpu poll list
2966 	 * just in case its running on a different cpu
2967 	 */
2968 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2969 		return;
2970 
2971 	napi_gro_flush(n);
2972 	local_irq_save(flags);
2973 	__napi_complete(n);
2974 	local_irq_restore(flags);
2975 }
2976 EXPORT_SYMBOL(napi_complete);
2977 
2978 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2979 		    int (*poll)(struct napi_struct *, int), int weight)
2980 {
2981 	INIT_LIST_HEAD(&napi->poll_list);
2982 	napi->gro_count = 0;
2983 	napi->gro_list = NULL;
2984 	napi->skb = NULL;
2985 	napi->poll = poll;
2986 	napi->weight = weight;
2987 	list_add(&napi->dev_list, &dev->napi_list);
2988 	napi->dev = dev;
2989 #ifdef CONFIG_NETPOLL
2990 	spin_lock_init(&napi->poll_lock);
2991 	napi->poll_owner = -1;
2992 #endif
2993 	set_bit(NAPI_STATE_SCHED, &napi->state);
2994 }
2995 EXPORT_SYMBOL(netif_napi_add);
2996 
2997 void netif_napi_del(struct napi_struct *napi)
2998 {
2999 	struct sk_buff *skb, *next;
3000 
3001 	list_del_init(&napi->dev_list);
3002 	napi_free_frags(napi);
3003 
3004 	for (skb = napi->gro_list; skb; skb = next) {
3005 		next = skb->next;
3006 		skb->next = NULL;
3007 		kfree_skb(skb);
3008 	}
3009 
3010 	napi->gro_list = NULL;
3011 	napi->gro_count = 0;
3012 }
3013 EXPORT_SYMBOL(netif_napi_del);
3014 
3015 
3016 static void net_rx_action(struct softirq_action *h)
3017 {
3018 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
3019 	unsigned long time_limit = jiffies + 2;
3020 	int budget = netdev_budget;
3021 	void *have;
3022 
3023 	local_irq_disable();
3024 
3025 	while (!list_empty(list)) {
3026 		struct napi_struct *n;
3027 		int work, weight;
3028 
3029 		/* If softirq window is exhuasted then punt.
3030 		 * Allow this to run for 2 jiffies since which will allow
3031 		 * an average latency of 1.5/HZ.
3032 		 */
3033 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
3034 			goto softnet_break;
3035 
3036 		local_irq_enable();
3037 
3038 		/* Even though interrupts have been re-enabled, this
3039 		 * access is safe because interrupts can only add new
3040 		 * entries to the tail of this list, and only ->poll()
3041 		 * calls can remove this head entry from the list.
3042 		 */
3043 		n = list_first_entry(list, struct napi_struct, poll_list);
3044 
3045 		have = netpoll_poll_lock(n);
3046 
3047 		weight = n->weight;
3048 
3049 		/* This NAPI_STATE_SCHED test is for avoiding a race
3050 		 * with netpoll's poll_napi().  Only the entity which
3051 		 * obtains the lock and sees NAPI_STATE_SCHED set will
3052 		 * actually make the ->poll() call.  Therefore we avoid
3053 		 * accidently calling ->poll() when NAPI is not scheduled.
3054 		 */
3055 		work = 0;
3056 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
3057 			work = n->poll(n, weight);
3058 			trace_napi_poll(n);
3059 		}
3060 
3061 		WARN_ON_ONCE(work > weight);
3062 
3063 		budget -= work;
3064 
3065 		local_irq_disable();
3066 
3067 		/* Drivers must not modify the NAPI state if they
3068 		 * consume the entire weight.  In such cases this code
3069 		 * still "owns" the NAPI instance and therefore can
3070 		 * move the instance around on the list at-will.
3071 		 */
3072 		if (unlikely(work == weight)) {
3073 			if (unlikely(napi_disable_pending(n))) {
3074 				local_irq_enable();
3075 				napi_complete(n);
3076 				local_irq_disable();
3077 			} else
3078 				list_move_tail(&n->poll_list, list);
3079 		}
3080 
3081 		netpoll_poll_unlock(have);
3082 	}
3083 out:
3084 	local_irq_enable();
3085 
3086 #ifdef CONFIG_NET_DMA
3087 	/*
3088 	 * There may not be any more sk_buffs coming right now, so push
3089 	 * any pending DMA copies to hardware
3090 	 */
3091 	dma_issue_pending_all();
3092 #endif
3093 
3094 	return;
3095 
3096 softnet_break:
3097 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
3098 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
3099 	goto out;
3100 }
3101 
3102 static gifconf_func_t *gifconf_list[NPROTO];
3103 
3104 /**
3105  *	register_gifconf	-	register a SIOCGIF handler
3106  *	@family: Address family
3107  *	@gifconf: Function handler
3108  *
3109  *	Register protocol dependent address dumping routines. The handler
3110  *	that is passed must not be freed or reused until it has been replaced
3111  *	by another handler.
3112  */
3113 int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
3114 {
3115 	if (family >= NPROTO)
3116 		return -EINVAL;
3117 	gifconf_list[family] = gifconf;
3118 	return 0;
3119 }
3120 EXPORT_SYMBOL(register_gifconf);
3121 
3122 
3123 /*
3124  *	Map an interface index to its name (SIOCGIFNAME)
3125  */
3126 
3127 /*
3128  *	We need this ioctl for efficient implementation of the
3129  *	if_indextoname() function required by the IPv6 API.  Without
3130  *	it, we would have to search all the interfaces to find a
3131  *	match.  --pb
3132  */
3133 
3134 static int dev_ifname(struct net *net, struct ifreq __user *arg)
3135 {
3136 	struct net_device *dev;
3137 	struct ifreq ifr;
3138 
3139 	/*
3140 	 *	Fetch the caller's info block.
3141 	 */
3142 
3143 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3144 		return -EFAULT;
3145 
3146 	rcu_read_lock();
3147 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
3148 	if (!dev) {
3149 		rcu_read_unlock();
3150 		return -ENODEV;
3151 	}
3152 
3153 	strcpy(ifr.ifr_name, dev->name);
3154 	rcu_read_unlock();
3155 
3156 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3157 		return -EFAULT;
3158 	return 0;
3159 }
3160 
3161 /*
3162  *	Perform a SIOCGIFCONF call. This structure will change
3163  *	size eventually, and there is nothing I can do about it.
3164  *	Thus we will need a 'compatibility mode'.
3165  */
3166 
3167 static int dev_ifconf(struct net *net, char __user *arg)
3168 {
3169 	struct ifconf ifc;
3170 	struct net_device *dev;
3171 	char __user *pos;
3172 	int len;
3173 	int total;
3174 	int i;
3175 
3176 	/*
3177 	 *	Fetch the caller's info block.
3178 	 */
3179 
3180 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
3181 		return -EFAULT;
3182 
3183 	pos = ifc.ifc_buf;
3184 	len = ifc.ifc_len;
3185 
3186 	/*
3187 	 *	Loop over the interfaces, and write an info block for each.
3188 	 */
3189 
3190 	total = 0;
3191 	for_each_netdev(net, dev) {
3192 		for (i = 0; i < NPROTO; i++) {
3193 			if (gifconf_list[i]) {
3194 				int done;
3195 				if (!pos)
3196 					done = gifconf_list[i](dev, NULL, 0);
3197 				else
3198 					done = gifconf_list[i](dev, pos + total,
3199 							       len - total);
3200 				if (done < 0)
3201 					return -EFAULT;
3202 				total += done;
3203 			}
3204 		}
3205 	}
3206 
3207 	/*
3208 	 *	All done.  Write the updated control block back to the caller.
3209 	 */
3210 	ifc.ifc_len = total;
3211 
3212 	/*
3213 	 * 	Both BSD and Solaris return 0 here, so we do too.
3214 	 */
3215 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
3216 }
3217 
3218 #ifdef CONFIG_PROC_FS
3219 /*
3220  *	This is invoked by the /proc filesystem handler to display a device
3221  *	in detail.
3222  */
3223 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3224 	__acquires(RCU)
3225 {
3226 	struct net *net = seq_file_net(seq);
3227 	loff_t off;
3228 	struct net_device *dev;
3229 
3230 	rcu_read_lock();
3231 	if (!*pos)
3232 		return SEQ_START_TOKEN;
3233 
3234 	off = 1;
3235 	for_each_netdev_rcu(net, dev)
3236 		if (off++ == *pos)
3237 			return dev;
3238 
3239 	return NULL;
3240 }
3241 
3242 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3243 {
3244 	struct net_device *dev = (v == SEQ_START_TOKEN) ?
3245 				  first_net_device(seq_file_net(seq)) :
3246 				  next_net_device((struct net_device *)v);
3247 
3248 	++*pos;
3249 	return rcu_dereference(dev);
3250 }
3251 
3252 void dev_seq_stop(struct seq_file *seq, void *v)
3253 	__releases(RCU)
3254 {
3255 	rcu_read_unlock();
3256 }
3257 
3258 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3259 {
3260 	const struct net_device_stats *stats = dev_get_stats(dev);
3261 
3262 	seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3263 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3264 		   dev->name, stats->rx_bytes, stats->rx_packets,
3265 		   stats->rx_errors,
3266 		   stats->rx_dropped + stats->rx_missed_errors,
3267 		   stats->rx_fifo_errors,
3268 		   stats->rx_length_errors + stats->rx_over_errors +
3269 		    stats->rx_crc_errors + stats->rx_frame_errors,
3270 		   stats->rx_compressed, stats->multicast,
3271 		   stats->tx_bytes, stats->tx_packets,
3272 		   stats->tx_errors, stats->tx_dropped,
3273 		   stats->tx_fifo_errors, stats->collisions,
3274 		   stats->tx_carrier_errors +
3275 		    stats->tx_aborted_errors +
3276 		    stats->tx_window_errors +
3277 		    stats->tx_heartbeat_errors,
3278 		   stats->tx_compressed);
3279 }
3280 
3281 /*
3282  *	Called from the PROCfs module. This now uses the new arbitrary sized
3283  *	/proc/net interface to create /proc/net/dev
3284  */
3285 static int dev_seq_show(struct seq_file *seq, void *v)
3286 {
3287 	if (v == SEQ_START_TOKEN)
3288 		seq_puts(seq, "Inter-|   Receive                            "
3289 			      "                    |  Transmit\n"
3290 			      " face |bytes    packets errs drop fifo frame "
3291 			      "compressed multicast|bytes    packets errs "
3292 			      "drop fifo colls carrier compressed\n");
3293 	else
3294 		dev_seq_printf_stats(seq, v);
3295 	return 0;
3296 }
3297 
3298 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3299 {
3300 	struct netif_rx_stats *rc = NULL;
3301 
3302 	while (*pos < nr_cpu_ids)
3303 		if (cpu_online(*pos)) {
3304 			rc = &per_cpu(netdev_rx_stat, *pos);
3305 			break;
3306 		} else
3307 			++*pos;
3308 	return rc;
3309 }
3310 
3311 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3312 {
3313 	return softnet_get_online(pos);
3314 }
3315 
3316 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3317 {
3318 	++*pos;
3319 	return softnet_get_online(pos);
3320 }
3321 
3322 static void softnet_seq_stop(struct seq_file *seq, void *v)
3323 {
3324 }
3325 
3326 static int softnet_seq_show(struct seq_file *seq, void *v)
3327 {
3328 	struct netif_rx_stats *s = v;
3329 
3330 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3331 		   s->total, s->dropped, s->time_squeeze, 0,
3332 		   0, 0, 0, 0, /* was fastroute */
3333 		   s->cpu_collision);
3334 	return 0;
3335 }
3336 
3337 static const struct seq_operations dev_seq_ops = {
3338 	.start = dev_seq_start,
3339 	.next  = dev_seq_next,
3340 	.stop  = dev_seq_stop,
3341 	.show  = dev_seq_show,
3342 };
3343 
3344 static int dev_seq_open(struct inode *inode, struct file *file)
3345 {
3346 	return seq_open_net(inode, file, &dev_seq_ops,
3347 			    sizeof(struct seq_net_private));
3348 }
3349 
3350 static const struct file_operations dev_seq_fops = {
3351 	.owner	 = THIS_MODULE,
3352 	.open    = dev_seq_open,
3353 	.read    = seq_read,
3354 	.llseek  = seq_lseek,
3355 	.release = seq_release_net,
3356 };
3357 
3358 static const struct seq_operations softnet_seq_ops = {
3359 	.start = softnet_seq_start,
3360 	.next  = softnet_seq_next,
3361 	.stop  = softnet_seq_stop,
3362 	.show  = softnet_seq_show,
3363 };
3364 
3365 static int softnet_seq_open(struct inode *inode, struct file *file)
3366 {
3367 	return seq_open(file, &softnet_seq_ops);
3368 }
3369 
3370 static const struct file_operations softnet_seq_fops = {
3371 	.owner	 = THIS_MODULE,
3372 	.open    = softnet_seq_open,
3373 	.read    = seq_read,
3374 	.llseek  = seq_lseek,
3375 	.release = seq_release,
3376 };
3377 
3378 static void *ptype_get_idx(loff_t pos)
3379 {
3380 	struct packet_type *pt = NULL;
3381 	loff_t i = 0;
3382 	int t;
3383 
3384 	list_for_each_entry_rcu(pt, &ptype_all, list) {
3385 		if (i == pos)
3386 			return pt;
3387 		++i;
3388 	}
3389 
3390 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3391 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3392 			if (i == pos)
3393 				return pt;
3394 			++i;
3395 		}
3396 	}
3397 	return NULL;
3398 }
3399 
3400 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3401 	__acquires(RCU)
3402 {
3403 	rcu_read_lock();
3404 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3405 }
3406 
3407 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3408 {
3409 	struct packet_type *pt;
3410 	struct list_head *nxt;
3411 	int hash;
3412 
3413 	++*pos;
3414 	if (v == SEQ_START_TOKEN)
3415 		return ptype_get_idx(0);
3416 
3417 	pt = v;
3418 	nxt = pt->list.next;
3419 	if (pt->type == htons(ETH_P_ALL)) {
3420 		if (nxt != &ptype_all)
3421 			goto found;
3422 		hash = 0;
3423 		nxt = ptype_base[0].next;
3424 	} else
3425 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3426 
3427 	while (nxt == &ptype_base[hash]) {
3428 		if (++hash >= PTYPE_HASH_SIZE)
3429 			return NULL;
3430 		nxt = ptype_base[hash].next;
3431 	}
3432 found:
3433 	return list_entry(nxt, struct packet_type, list);
3434 }
3435 
3436 static void ptype_seq_stop(struct seq_file *seq, void *v)
3437 	__releases(RCU)
3438 {
3439 	rcu_read_unlock();
3440 }
3441 
3442 static int ptype_seq_show(struct seq_file *seq, void *v)
3443 {
3444 	struct packet_type *pt = v;
3445 
3446 	if (v == SEQ_START_TOKEN)
3447 		seq_puts(seq, "Type Device      Function\n");
3448 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
3449 		if (pt->type == htons(ETH_P_ALL))
3450 			seq_puts(seq, "ALL ");
3451 		else
3452 			seq_printf(seq, "%04x", ntohs(pt->type));
3453 
3454 		seq_printf(seq, " %-8s %pF\n",
3455 			   pt->dev ? pt->dev->name : "", pt->func);
3456 	}
3457 
3458 	return 0;
3459 }
3460 
3461 static const struct seq_operations ptype_seq_ops = {
3462 	.start = ptype_seq_start,
3463 	.next  = ptype_seq_next,
3464 	.stop  = ptype_seq_stop,
3465 	.show  = ptype_seq_show,
3466 };
3467 
3468 static int ptype_seq_open(struct inode *inode, struct file *file)
3469 {
3470 	return seq_open_net(inode, file, &ptype_seq_ops,
3471 			sizeof(struct seq_net_private));
3472 }
3473 
3474 static const struct file_operations ptype_seq_fops = {
3475 	.owner	 = THIS_MODULE,
3476 	.open    = ptype_seq_open,
3477 	.read    = seq_read,
3478 	.llseek  = seq_lseek,
3479 	.release = seq_release_net,
3480 };
3481 
3482 
3483 static int __net_init dev_proc_net_init(struct net *net)
3484 {
3485 	int rc = -ENOMEM;
3486 
3487 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3488 		goto out;
3489 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3490 		goto out_dev;
3491 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3492 		goto out_softnet;
3493 
3494 	if (wext_proc_init(net))
3495 		goto out_ptype;
3496 	rc = 0;
3497 out:
3498 	return rc;
3499 out_ptype:
3500 	proc_net_remove(net, "ptype");
3501 out_softnet:
3502 	proc_net_remove(net, "softnet_stat");
3503 out_dev:
3504 	proc_net_remove(net, "dev");
3505 	goto out;
3506 }
3507 
3508 static void __net_exit dev_proc_net_exit(struct net *net)
3509 {
3510 	wext_proc_exit(net);
3511 
3512 	proc_net_remove(net, "ptype");
3513 	proc_net_remove(net, "softnet_stat");
3514 	proc_net_remove(net, "dev");
3515 }
3516 
3517 static struct pernet_operations __net_initdata dev_proc_ops = {
3518 	.init = dev_proc_net_init,
3519 	.exit = dev_proc_net_exit,
3520 };
3521 
3522 static int __init dev_proc_init(void)
3523 {
3524 	return register_pernet_subsys(&dev_proc_ops);
3525 }
3526 #else
3527 #define dev_proc_init() 0
3528 #endif	/* CONFIG_PROC_FS */
3529 
3530 
3531 /**
3532  *	netdev_set_master	-	set up master/slave pair
3533  *	@slave: slave device
3534  *	@master: new master device
3535  *
3536  *	Changes the master device of the slave. Pass %NULL to break the
3537  *	bonding. The caller must hold the RTNL semaphore. On a failure
3538  *	a negative errno code is returned. On success the reference counts
3539  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3540  *	function returns zero.
3541  */
3542 int netdev_set_master(struct net_device *slave, struct net_device *master)
3543 {
3544 	struct net_device *old = slave->master;
3545 
3546 	ASSERT_RTNL();
3547 
3548 	if (master) {
3549 		if (old)
3550 			return -EBUSY;
3551 		dev_hold(master);
3552 	}
3553 
3554 	slave->master = master;
3555 
3556 	synchronize_net();
3557 
3558 	if (old)
3559 		dev_put(old);
3560 
3561 	if (master)
3562 		slave->flags |= IFF_SLAVE;
3563 	else
3564 		slave->flags &= ~IFF_SLAVE;
3565 
3566 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3567 	return 0;
3568 }
3569 EXPORT_SYMBOL(netdev_set_master);
3570 
3571 static void dev_change_rx_flags(struct net_device *dev, int flags)
3572 {
3573 	const struct net_device_ops *ops = dev->netdev_ops;
3574 
3575 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3576 		ops->ndo_change_rx_flags(dev, flags);
3577 }
3578 
3579 static int __dev_set_promiscuity(struct net_device *dev, int inc)
3580 {
3581 	unsigned short old_flags = dev->flags;
3582 	uid_t uid;
3583 	gid_t gid;
3584 
3585 	ASSERT_RTNL();
3586 
3587 	dev->flags |= IFF_PROMISC;
3588 	dev->promiscuity += inc;
3589 	if (dev->promiscuity == 0) {
3590 		/*
3591 		 * Avoid overflow.
3592 		 * If inc causes overflow, untouch promisc and return error.
3593 		 */
3594 		if (inc < 0)
3595 			dev->flags &= ~IFF_PROMISC;
3596 		else {
3597 			dev->promiscuity -= inc;
3598 			printk(KERN_WARNING "%s: promiscuity touches roof, "
3599 				"set promiscuity failed, promiscuity feature "
3600 				"of device might be broken.\n", dev->name);
3601 			return -EOVERFLOW;
3602 		}
3603 	}
3604 	if (dev->flags != old_flags) {
3605 		printk(KERN_INFO "device %s %s promiscuous mode\n",
3606 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3607 							       "left");
3608 		if (audit_enabled) {
3609 			current_uid_gid(&uid, &gid);
3610 			audit_log(current->audit_context, GFP_ATOMIC,
3611 				AUDIT_ANOM_PROMISCUOUS,
3612 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3613 				dev->name, (dev->flags & IFF_PROMISC),
3614 				(old_flags & IFF_PROMISC),
3615 				audit_get_loginuid(current),
3616 				uid, gid,
3617 				audit_get_sessionid(current));
3618 		}
3619 
3620 		dev_change_rx_flags(dev, IFF_PROMISC);
3621 	}
3622 	return 0;
3623 }
3624 
3625 /**
3626  *	dev_set_promiscuity	- update promiscuity count on a device
3627  *	@dev: device
3628  *	@inc: modifier
3629  *
3630  *	Add or remove promiscuity from a device. While the count in the device
3631  *	remains above zero the interface remains promiscuous. Once it hits zero
3632  *	the device reverts back to normal filtering operation. A negative inc
3633  *	value is used to drop promiscuity on the device.
3634  *	Return 0 if successful or a negative errno code on error.
3635  */
3636 int dev_set_promiscuity(struct net_device *dev, int inc)
3637 {
3638 	unsigned short old_flags = dev->flags;
3639 	int err;
3640 
3641 	err = __dev_set_promiscuity(dev, inc);
3642 	if (err < 0)
3643 		return err;
3644 	if (dev->flags != old_flags)
3645 		dev_set_rx_mode(dev);
3646 	return err;
3647 }
3648 EXPORT_SYMBOL(dev_set_promiscuity);
3649 
3650 /**
3651  *	dev_set_allmulti	- update allmulti count on a device
3652  *	@dev: device
3653  *	@inc: modifier
3654  *
3655  *	Add or remove reception of all multicast frames to a device. While the
3656  *	count in the device remains above zero the interface remains listening
3657  *	to all interfaces. Once it hits zero the device reverts back to normal
3658  *	filtering operation. A negative @inc value is used to drop the counter
3659  *	when releasing a resource needing all multicasts.
3660  *	Return 0 if successful or a negative errno code on error.
3661  */
3662 
3663 int dev_set_allmulti(struct net_device *dev, int inc)
3664 {
3665 	unsigned short old_flags = dev->flags;
3666 
3667 	ASSERT_RTNL();
3668 
3669 	dev->flags |= IFF_ALLMULTI;
3670 	dev->allmulti += inc;
3671 	if (dev->allmulti == 0) {
3672 		/*
3673 		 * Avoid overflow.
3674 		 * If inc causes overflow, untouch allmulti and return error.
3675 		 */
3676 		if (inc < 0)
3677 			dev->flags &= ~IFF_ALLMULTI;
3678 		else {
3679 			dev->allmulti -= inc;
3680 			printk(KERN_WARNING "%s: allmulti touches roof, "
3681 				"set allmulti failed, allmulti feature of "
3682 				"device might be broken.\n", dev->name);
3683 			return -EOVERFLOW;
3684 		}
3685 	}
3686 	if (dev->flags ^ old_flags) {
3687 		dev_change_rx_flags(dev, IFF_ALLMULTI);
3688 		dev_set_rx_mode(dev);
3689 	}
3690 	return 0;
3691 }
3692 EXPORT_SYMBOL(dev_set_allmulti);
3693 
3694 /*
3695  *	Upload unicast and multicast address lists to device and
3696  *	configure RX filtering. When the device doesn't support unicast
3697  *	filtering it is put in promiscuous mode while unicast addresses
3698  *	are present.
3699  */
3700 void __dev_set_rx_mode(struct net_device *dev)
3701 {
3702 	const struct net_device_ops *ops = dev->netdev_ops;
3703 
3704 	/* dev_open will call this function so the list will stay sane. */
3705 	if (!(dev->flags&IFF_UP))
3706 		return;
3707 
3708 	if (!netif_device_present(dev))
3709 		return;
3710 
3711 	if (ops->ndo_set_rx_mode)
3712 		ops->ndo_set_rx_mode(dev);
3713 	else {
3714 		/* Unicast addresses changes may only happen under the rtnl,
3715 		 * therefore calling __dev_set_promiscuity here is safe.
3716 		 */
3717 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
3718 			__dev_set_promiscuity(dev, 1);
3719 			dev->uc_promisc = 1;
3720 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
3721 			__dev_set_promiscuity(dev, -1);
3722 			dev->uc_promisc = 0;
3723 		}
3724 
3725 		if (ops->ndo_set_multicast_list)
3726 			ops->ndo_set_multicast_list(dev);
3727 	}
3728 }
3729 
3730 void dev_set_rx_mode(struct net_device *dev)
3731 {
3732 	netif_addr_lock_bh(dev);
3733 	__dev_set_rx_mode(dev);
3734 	netif_addr_unlock_bh(dev);
3735 }
3736 
3737 /* hw addresses list handling functions */
3738 
3739 static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3740 			 int addr_len, unsigned char addr_type)
3741 {
3742 	struct netdev_hw_addr *ha;
3743 	int alloc_size;
3744 
3745 	if (addr_len > MAX_ADDR_LEN)
3746 		return -EINVAL;
3747 
3748 	list_for_each_entry(ha, &list->list, list) {
3749 		if (!memcmp(ha->addr, addr, addr_len) &&
3750 		    ha->type == addr_type) {
3751 			ha->refcount++;
3752 			return 0;
3753 		}
3754 	}
3755 
3756 
3757 	alloc_size = sizeof(*ha);
3758 	if (alloc_size < L1_CACHE_BYTES)
3759 		alloc_size = L1_CACHE_BYTES;
3760 	ha = kmalloc(alloc_size, GFP_ATOMIC);
3761 	if (!ha)
3762 		return -ENOMEM;
3763 	memcpy(ha->addr, addr, addr_len);
3764 	ha->type = addr_type;
3765 	ha->refcount = 1;
3766 	ha->synced = false;
3767 	list_add_tail_rcu(&ha->list, &list->list);
3768 	list->count++;
3769 	return 0;
3770 }
3771 
3772 static void ha_rcu_free(struct rcu_head *head)
3773 {
3774 	struct netdev_hw_addr *ha;
3775 
3776 	ha = container_of(head, struct netdev_hw_addr, rcu_head);
3777 	kfree(ha);
3778 }
3779 
3780 static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3781 			 int addr_len, unsigned char addr_type)
3782 {
3783 	struct netdev_hw_addr *ha;
3784 
3785 	list_for_each_entry(ha, &list->list, list) {
3786 		if (!memcmp(ha->addr, addr, addr_len) &&
3787 		    (ha->type == addr_type || !addr_type)) {
3788 			if (--ha->refcount)
3789 				return 0;
3790 			list_del_rcu(&ha->list);
3791 			call_rcu(&ha->rcu_head, ha_rcu_free);
3792 			list->count--;
3793 			return 0;
3794 		}
3795 	}
3796 	return -ENOENT;
3797 }
3798 
3799 static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3800 				  struct netdev_hw_addr_list *from_list,
3801 				  int addr_len,
3802 				  unsigned char addr_type)
3803 {
3804 	int err;
3805 	struct netdev_hw_addr *ha, *ha2;
3806 	unsigned char type;
3807 
3808 	list_for_each_entry(ha, &from_list->list, list) {
3809 		type = addr_type ? addr_type : ha->type;
3810 		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3811 		if (err)
3812 			goto unroll;
3813 	}
3814 	return 0;
3815 
3816 unroll:
3817 	list_for_each_entry(ha2, &from_list->list, list) {
3818 		if (ha2 == ha)
3819 			break;
3820 		type = addr_type ? addr_type : ha2->type;
3821 		__hw_addr_del(to_list, ha2->addr, addr_len, type);
3822 	}
3823 	return err;
3824 }
3825 
3826 static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3827 				   struct netdev_hw_addr_list *from_list,
3828 				   int addr_len,
3829 				   unsigned char addr_type)
3830 {
3831 	struct netdev_hw_addr *ha;
3832 	unsigned char type;
3833 
3834 	list_for_each_entry(ha, &from_list->list, list) {
3835 		type = addr_type ? addr_type : ha->type;
3836 		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3837 	}
3838 }
3839 
3840 static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3841 			  struct netdev_hw_addr_list *from_list,
3842 			  int addr_len)
3843 {
3844 	int err = 0;
3845 	struct netdev_hw_addr *ha, *tmp;
3846 
3847 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3848 		if (!ha->synced) {
3849 			err = __hw_addr_add(to_list, ha->addr,
3850 					    addr_len, ha->type);
3851 			if (err)
3852 				break;
3853 			ha->synced = true;
3854 			ha->refcount++;
3855 		} else if (ha->refcount == 1) {
3856 			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3857 			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3858 		}
3859 	}
3860 	return err;
3861 }
3862 
3863 static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3864 			     struct netdev_hw_addr_list *from_list,
3865 			     int addr_len)
3866 {
3867 	struct netdev_hw_addr *ha, *tmp;
3868 
3869 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3870 		if (ha->synced) {
3871 			__hw_addr_del(to_list, ha->addr,
3872 				      addr_len, ha->type);
3873 			ha->synced = false;
3874 			__hw_addr_del(from_list, ha->addr,
3875 				      addr_len, ha->type);
3876 		}
3877 	}
3878 }
3879 
3880 static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3881 {
3882 	struct netdev_hw_addr *ha, *tmp;
3883 
3884 	list_for_each_entry_safe(ha, tmp, &list->list, list) {
3885 		list_del_rcu(&ha->list);
3886 		call_rcu(&ha->rcu_head, ha_rcu_free);
3887 	}
3888 	list->count = 0;
3889 }
3890 
3891 static void __hw_addr_init(struct netdev_hw_addr_list *list)
3892 {
3893 	INIT_LIST_HEAD(&list->list);
3894 	list->count = 0;
3895 }
3896 
3897 /* Device addresses handling functions */
3898 
3899 static void dev_addr_flush(struct net_device *dev)
3900 {
3901 	/* rtnl_mutex must be held here */
3902 
3903 	__hw_addr_flush(&dev->dev_addrs);
3904 	dev->dev_addr = NULL;
3905 }
3906 
3907 static int dev_addr_init(struct net_device *dev)
3908 {
3909 	unsigned char addr[MAX_ADDR_LEN];
3910 	struct netdev_hw_addr *ha;
3911 	int err;
3912 
3913 	/* rtnl_mutex must be held here */
3914 
3915 	__hw_addr_init(&dev->dev_addrs);
3916 	memset(addr, 0, sizeof(addr));
3917 	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3918 			    NETDEV_HW_ADDR_T_LAN);
3919 	if (!err) {
3920 		/*
3921 		 * Get the first (previously created) address from the list
3922 		 * and set dev_addr pointer to this location.
3923 		 */
3924 		ha = list_first_entry(&dev->dev_addrs.list,
3925 				      struct netdev_hw_addr, list);
3926 		dev->dev_addr = ha->addr;
3927 	}
3928 	return err;
3929 }
3930 
3931 /**
3932  *	dev_addr_add	- Add a device address
3933  *	@dev: device
3934  *	@addr: address to add
3935  *	@addr_type: address type
3936  *
3937  *	Add a device address to the device or increase the reference count if
3938  *	it already exists.
3939  *
3940  *	The caller must hold the rtnl_mutex.
3941  */
3942 int dev_addr_add(struct net_device *dev, unsigned char *addr,
3943 		 unsigned char addr_type)
3944 {
3945 	int err;
3946 
3947 	ASSERT_RTNL();
3948 
3949 	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3950 	if (!err)
3951 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3952 	return err;
3953 }
3954 EXPORT_SYMBOL(dev_addr_add);
3955 
3956 /**
3957  *	dev_addr_del	- Release a device address.
3958  *	@dev: device
3959  *	@addr: address to delete
3960  *	@addr_type: address type
3961  *
3962  *	Release reference to a device address and remove it from the device
3963  *	if the reference count drops to zero.
3964  *
3965  *	The caller must hold the rtnl_mutex.
3966  */
3967 int dev_addr_del(struct net_device *dev, unsigned char *addr,
3968 		 unsigned char addr_type)
3969 {
3970 	int err;
3971 	struct netdev_hw_addr *ha;
3972 
3973 	ASSERT_RTNL();
3974 
3975 	/*
3976 	 * We can not remove the first address from the list because
3977 	 * dev->dev_addr points to that.
3978 	 */
3979 	ha = list_first_entry(&dev->dev_addrs.list,
3980 			      struct netdev_hw_addr, list);
3981 	if (ha->addr == dev->dev_addr && ha->refcount == 1)
3982 		return -ENOENT;
3983 
3984 	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3985 			    addr_type);
3986 	if (!err)
3987 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3988 	return err;
3989 }
3990 EXPORT_SYMBOL(dev_addr_del);
3991 
3992 /**
3993  *	dev_addr_add_multiple	- Add device addresses from another device
3994  *	@to_dev: device to which addresses will be added
3995  *	@from_dev: device from which addresses will be added
3996  *	@addr_type: address type - 0 means type will be used from from_dev
3997  *
3998  *	Add device addresses of the one device to another.
3999  **
4000  *	The caller must hold the rtnl_mutex.
4001  */
4002 int dev_addr_add_multiple(struct net_device *to_dev,
4003 			  struct net_device *from_dev,
4004 			  unsigned char addr_type)
4005 {
4006 	int err;
4007 
4008 	ASSERT_RTNL();
4009 
4010 	if (from_dev->addr_len != to_dev->addr_len)
4011 		return -EINVAL;
4012 	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
4013 				     to_dev->addr_len, addr_type);
4014 	if (!err)
4015 		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4016 	return err;
4017 }
4018 EXPORT_SYMBOL(dev_addr_add_multiple);
4019 
4020 /**
4021  *	dev_addr_del_multiple	- Delete device addresses by another device
4022  *	@to_dev: device where the addresses will be deleted
4023  *	@from_dev: device by which addresses the addresses will be deleted
4024  *	@addr_type: address type - 0 means type will used from from_dev
4025  *
4026  *	Deletes addresses in to device by the list of addresses in from device.
4027  *
4028  *	The caller must hold the rtnl_mutex.
4029  */
4030 int dev_addr_del_multiple(struct net_device *to_dev,
4031 			  struct net_device *from_dev,
4032 			  unsigned char addr_type)
4033 {
4034 	ASSERT_RTNL();
4035 
4036 	if (from_dev->addr_len != to_dev->addr_len)
4037 		return -EINVAL;
4038 	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
4039 			       to_dev->addr_len, addr_type);
4040 	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4041 	return 0;
4042 }
4043 EXPORT_SYMBOL(dev_addr_del_multiple);
4044 
4045 /* multicast addresses handling functions */
4046 
4047 int __dev_addr_delete(struct dev_addr_list **list, int *count,
4048 		      void *addr, int alen, int glbl)
4049 {
4050 	struct dev_addr_list *da;
4051 
4052 	for (; (da = *list) != NULL; list = &da->next) {
4053 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4054 		    alen == da->da_addrlen) {
4055 			if (glbl) {
4056 				int old_glbl = da->da_gusers;
4057 				da->da_gusers = 0;
4058 				if (old_glbl == 0)
4059 					break;
4060 			}
4061 			if (--da->da_users)
4062 				return 0;
4063 
4064 			*list = da->next;
4065 			kfree(da);
4066 			(*count)--;
4067 			return 0;
4068 		}
4069 	}
4070 	return -ENOENT;
4071 }
4072 
4073 int __dev_addr_add(struct dev_addr_list **list, int *count,
4074 		   void *addr, int alen, int glbl)
4075 {
4076 	struct dev_addr_list *da;
4077 
4078 	for (da = *list; da != NULL; da = da->next) {
4079 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4080 		    da->da_addrlen == alen) {
4081 			if (glbl) {
4082 				int old_glbl = da->da_gusers;
4083 				da->da_gusers = 1;
4084 				if (old_glbl)
4085 					return 0;
4086 			}
4087 			da->da_users++;
4088 			return 0;
4089 		}
4090 	}
4091 
4092 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
4093 	if (da == NULL)
4094 		return -ENOMEM;
4095 	memcpy(da->da_addr, addr, alen);
4096 	da->da_addrlen = alen;
4097 	da->da_users = 1;
4098 	da->da_gusers = glbl ? 1 : 0;
4099 	da->next = *list;
4100 	*list = da;
4101 	(*count)++;
4102 	return 0;
4103 }
4104 
4105 /**
4106  *	dev_unicast_delete	- Release secondary unicast address.
4107  *	@dev: device
4108  *	@addr: address to delete
4109  *
4110  *	Release reference to a secondary unicast address and remove it
4111  *	from the device if the reference count drops to zero.
4112  *
4113  * 	The caller must hold the rtnl_mutex.
4114  */
4115 int dev_unicast_delete(struct net_device *dev, void *addr)
4116 {
4117 	int err;
4118 
4119 	ASSERT_RTNL();
4120 
4121 	netif_addr_lock_bh(dev);
4122 	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
4123 			    NETDEV_HW_ADDR_T_UNICAST);
4124 	if (!err)
4125 		__dev_set_rx_mode(dev);
4126 	netif_addr_unlock_bh(dev);
4127 	return err;
4128 }
4129 EXPORT_SYMBOL(dev_unicast_delete);
4130 
4131 /**
4132  *	dev_unicast_add		- add a secondary unicast address
4133  *	@dev: device
4134  *	@addr: address to add
4135  *
4136  *	Add a secondary unicast address to the device or increase
4137  *	the reference count if it already exists.
4138  *
4139  *	The caller must hold the rtnl_mutex.
4140  */
4141 int dev_unicast_add(struct net_device *dev, void *addr)
4142 {
4143 	int err;
4144 
4145 	ASSERT_RTNL();
4146 
4147 	netif_addr_lock_bh(dev);
4148 	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
4149 			    NETDEV_HW_ADDR_T_UNICAST);
4150 	if (!err)
4151 		__dev_set_rx_mode(dev);
4152 	netif_addr_unlock_bh(dev);
4153 	return err;
4154 }
4155 EXPORT_SYMBOL(dev_unicast_add);
4156 
4157 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
4158 		    struct dev_addr_list **from, int *from_count)
4159 {
4160 	struct dev_addr_list *da, *next;
4161 	int err = 0;
4162 
4163 	da = *from;
4164 	while (da != NULL) {
4165 		next = da->next;
4166 		if (!da->da_synced) {
4167 			err = __dev_addr_add(to, to_count,
4168 					     da->da_addr, da->da_addrlen, 0);
4169 			if (err < 0)
4170 				break;
4171 			da->da_synced = 1;
4172 			da->da_users++;
4173 		} else if (da->da_users == 1) {
4174 			__dev_addr_delete(to, to_count,
4175 					  da->da_addr, da->da_addrlen, 0);
4176 			__dev_addr_delete(from, from_count,
4177 					  da->da_addr, da->da_addrlen, 0);
4178 		}
4179 		da = next;
4180 	}
4181 	return err;
4182 }
4183 EXPORT_SYMBOL_GPL(__dev_addr_sync);
4184 
4185 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
4186 		       struct dev_addr_list **from, int *from_count)
4187 {
4188 	struct dev_addr_list *da, *next;
4189 
4190 	da = *from;
4191 	while (da != NULL) {
4192 		next = da->next;
4193 		if (da->da_synced) {
4194 			__dev_addr_delete(to, to_count,
4195 					  da->da_addr, da->da_addrlen, 0);
4196 			da->da_synced = 0;
4197 			__dev_addr_delete(from, from_count,
4198 					  da->da_addr, da->da_addrlen, 0);
4199 		}
4200 		da = next;
4201 	}
4202 }
4203 EXPORT_SYMBOL_GPL(__dev_addr_unsync);
4204 
4205 /**
4206  *	dev_unicast_sync - Synchronize device's unicast list to another device
4207  *	@to: destination device
4208  *	@from: source device
4209  *
4210  *	Add newly added addresses to the destination device and release
4211  *	addresses that have no users left. The source device must be
4212  *	locked by netif_tx_lock_bh.
4213  *
4214  *	This function is intended to be called from the dev->set_rx_mode
4215  *	function of layered software devices.
4216  */
4217 int dev_unicast_sync(struct net_device *to, struct net_device *from)
4218 {
4219 	int err = 0;
4220 
4221 	if (to->addr_len != from->addr_len)
4222 		return -EINVAL;
4223 
4224 	netif_addr_lock_bh(to);
4225 	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
4226 	if (!err)
4227 		__dev_set_rx_mode(to);
4228 	netif_addr_unlock_bh(to);
4229 	return err;
4230 }
4231 EXPORT_SYMBOL(dev_unicast_sync);
4232 
4233 /**
4234  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
4235  *	@to: destination device
4236  *	@from: source device
4237  *
4238  *	Remove all addresses that were added to the destination device by
4239  *	dev_unicast_sync(). This function is intended to be called from the
4240  *	dev->stop function of layered software devices.
4241  */
4242 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4243 {
4244 	if (to->addr_len != from->addr_len)
4245 		return;
4246 
4247 	netif_addr_lock_bh(from);
4248 	netif_addr_lock(to);
4249 	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
4250 	__dev_set_rx_mode(to);
4251 	netif_addr_unlock(to);
4252 	netif_addr_unlock_bh(from);
4253 }
4254 EXPORT_SYMBOL(dev_unicast_unsync);
4255 
4256 static void dev_unicast_flush(struct net_device *dev)
4257 {
4258 	netif_addr_lock_bh(dev);
4259 	__hw_addr_flush(&dev->uc);
4260 	netif_addr_unlock_bh(dev);
4261 }
4262 
4263 static void dev_unicast_init(struct net_device *dev)
4264 {
4265 	__hw_addr_init(&dev->uc);
4266 }
4267 
4268 
4269 static void __dev_addr_discard(struct dev_addr_list **list)
4270 {
4271 	struct dev_addr_list *tmp;
4272 
4273 	while (*list != NULL) {
4274 		tmp = *list;
4275 		*list = tmp->next;
4276 		if (tmp->da_users > tmp->da_gusers)
4277 			printk("__dev_addr_discard: address leakage! "
4278 			       "da_users=%d\n", tmp->da_users);
4279 		kfree(tmp);
4280 	}
4281 }
4282 
4283 static void dev_addr_discard(struct net_device *dev)
4284 {
4285 	netif_addr_lock_bh(dev);
4286 
4287 	__dev_addr_discard(&dev->mc_list);
4288 	netdev_mc_count(dev) = 0;
4289 
4290 	netif_addr_unlock_bh(dev);
4291 }
4292 
4293 /**
4294  *	dev_get_flags - get flags reported to userspace
4295  *	@dev: device
4296  *
4297  *	Get the combination of flag bits exported through APIs to userspace.
4298  */
4299 unsigned dev_get_flags(const struct net_device *dev)
4300 {
4301 	unsigned flags;
4302 
4303 	flags = (dev->flags & ~(IFF_PROMISC |
4304 				IFF_ALLMULTI |
4305 				IFF_RUNNING |
4306 				IFF_LOWER_UP |
4307 				IFF_DORMANT)) |
4308 		(dev->gflags & (IFF_PROMISC |
4309 				IFF_ALLMULTI));
4310 
4311 	if (netif_running(dev)) {
4312 		if (netif_oper_up(dev))
4313 			flags |= IFF_RUNNING;
4314 		if (netif_carrier_ok(dev))
4315 			flags |= IFF_LOWER_UP;
4316 		if (netif_dormant(dev))
4317 			flags |= IFF_DORMANT;
4318 	}
4319 
4320 	return flags;
4321 }
4322 EXPORT_SYMBOL(dev_get_flags);
4323 
4324 int __dev_change_flags(struct net_device *dev, unsigned int flags)
4325 {
4326 	int old_flags = dev->flags;
4327 	int ret;
4328 
4329 	ASSERT_RTNL();
4330 
4331 	/*
4332 	 *	Set the flags on our device.
4333 	 */
4334 
4335 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4336 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4337 			       IFF_AUTOMEDIA)) |
4338 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4339 				    IFF_ALLMULTI));
4340 
4341 	/*
4342 	 *	Load in the correct multicast list now the flags have changed.
4343 	 */
4344 
4345 	if ((old_flags ^ flags) & IFF_MULTICAST)
4346 		dev_change_rx_flags(dev, IFF_MULTICAST);
4347 
4348 	dev_set_rx_mode(dev);
4349 
4350 	/*
4351 	 *	Have we downed the interface. We handle IFF_UP ourselves
4352 	 *	according to user attempts to set it, rather than blindly
4353 	 *	setting it.
4354 	 */
4355 
4356 	ret = 0;
4357 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4358 		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4359 
4360 		if (!ret)
4361 			dev_set_rx_mode(dev);
4362 	}
4363 
4364 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4365 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
4366 
4367 		dev->gflags ^= IFF_PROMISC;
4368 		dev_set_promiscuity(dev, inc);
4369 	}
4370 
4371 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4372 	   is important. Some (broken) drivers set IFF_PROMISC, when
4373 	   IFF_ALLMULTI is requested not asking us and not reporting.
4374 	 */
4375 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4376 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4377 
4378 		dev->gflags ^= IFF_ALLMULTI;
4379 		dev_set_allmulti(dev, inc);
4380 	}
4381 
4382 	return ret;
4383 }
4384 
4385 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4386 {
4387 	unsigned int changes = dev->flags ^ old_flags;
4388 
4389 	if (changes & IFF_UP) {
4390 		if (dev->flags & IFF_UP)
4391 			call_netdevice_notifiers(NETDEV_UP, dev);
4392 		else
4393 			call_netdevice_notifiers(NETDEV_DOWN, dev);
4394 	}
4395 
4396 	if (dev->flags & IFF_UP &&
4397 	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4398 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4399 }
4400 
4401 /**
4402  *	dev_change_flags - change device settings
4403  *	@dev: device
4404  *	@flags: device state flags
4405  *
4406  *	Change settings on device based state flags. The flags are
4407  *	in the userspace exported format.
4408  */
4409 int dev_change_flags(struct net_device *dev, unsigned flags)
4410 {
4411 	int ret, changes;
4412 	int old_flags = dev->flags;
4413 
4414 	ret = __dev_change_flags(dev, flags);
4415 	if (ret < 0)
4416 		return ret;
4417 
4418 	changes = old_flags ^ dev->flags;
4419 	if (changes)
4420 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4421 
4422 	__dev_notify_flags(dev, old_flags);
4423 	return ret;
4424 }
4425 EXPORT_SYMBOL(dev_change_flags);
4426 
4427 /**
4428  *	dev_set_mtu - Change maximum transfer unit
4429  *	@dev: device
4430  *	@new_mtu: new transfer unit
4431  *
4432  *	Change the maximum transfer size of the network device.
4433  */
4434 int dev_set_mtu(struct net_device *dev, int new_mtu)
4435 {
4436 	const struct net_device_ops *ops = dev->netdev_ops;
4437 	int err;
4438 
4439 	if (new_mtu == dev->mtu)
4440 		return 0;
4441 
4442 	/*	MTU must be positive.	 */
4443 	if (new_mtu < 0)
4444 		return -EINVAL;
4445 
4446 	if (!netif_device_present(dev))
4447 		return -ENODEV;
4448 
4449 	err = 0;
4450 	if (ops->ndo_change_mtu)
4451 		err = ops->ndo_change_mtu(dev, new_mtu);
4452 	else
4453 		dev->mtu = new_mtu;
4454 
4455 	if (!err && dev->flags & IFF_UP)
4456 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4457 	return err;
4458 }
4459 EXPORT_SYMBOL(dev_set_mtu);
4460 
4461 /**
4462  *	dev_set_mac_address - Change Media Access Control Address
4463  *	@dev: device
4464  *	@sa: new address
4465  *
4466  *	Change the hardware (MAC) address of the device
4467  */
4468 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4469 {
4470 	const struct net_device_ops *ops = dev->netdev_ops;
4471 	int err;
4472 
4473 	if (!ops->ndo_set_mac_address)
4474 		return -EOPNOTSUPP;
4475 	if (sa->sa_family != dev->type)
4476 		return -EINVAL;
4477 	if (!netif_device_present(dev))
4478 		return -ENODEV;
4479 	err = ops->ndo_set_mac_address(dev, sa);
4480 	if (!err)
4481 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4482 	return err;
4483 }
4484 EXPORT_SYMBOL(dev_set_mac_address);
4485 
4486 /*
4487  *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4488  */
4489 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4490 {
4491 	int err;
4492 	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4493 
4494 	if (!dev)
4495 		return -ENODEV;
4496 
4497 	switch (cmd) {
4498 	case SIOCGIFFLAGS:	/* Get interface flags */
4499 		ifr->ifr_flags = (short) dev_get_flags(dev);
4500 		return 0;
4501 
4502 	case SIOCGIFMETRIC:	/* Get the metric on the interface
4503 				   (currently unused) */
4504 		ifr->ifr_metric = 0;
4505 		return 0;
4506 
4507 	case SIOCGIFMTU:	/* Get the MTU of a device */
4508 		ifr->ifr_mtu = dev->mtu;
4509 		return 0;
4510 
4511 	case SIOCGIFHWADDR:
4512 		if (!dev->addr_len)
4513 			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4514 		else
4515 			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4516 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4517 		ifr->ifr_hwaddr.sa_family = dev->type;
4518 		return 0;
4519 
4520 	case SIOCGIFSLAVE:
4521 		err = -EINVAL;
4522 		break;
4523 
4524 	case SIOCGIFMAP:
4525 		ifr->ifr_map.mem_start = dev->mem_start;
4526 		ifr->ifr_map.mem_end   = dev->mem_end;
4527 		ifr->ifr_map.base_addr = dev->base_addr;
4528 		ifr->ifr_map.irq       = dev->irq;
4529 		ifr->ifr_map.dma       = dev->dma;
4530 		ifr->ifr_map.port      = dev->if_port;
4531 		return 0;
4532 
4533 	case SIOCGIFINDEX:
4534 		ifr->ifr_ifindex = dev->ifindex;
4535 		return 0;
4536 
4537 	case SIOCGIFTXQLEN:
4538 		ifr->ifr_qlen = dev->tx_queue_len;
4539 		return 0;
4540 
4541 	default:
4542 		/* dev_ioctl() should ensure this case
4543 		 * is never reached
4544 		 */
4545 		WARN_ON(1);
4546 		err = -EINVAL;
4547 		break;
4548 
4549 	}
4550 	return err;
4551 }
4552 
4553 /*
4554  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4555  */
4556 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4557 {
4558 	int err;
4559 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4560 	const struct net_device_ops *ops;
4561 
4562 	if (!dev)
4563 		return -ENODEV;
4564 
4565 	ops = dev->netdev_ops;
4566 
4567 	switch (cmd) {
4568 	case SIOCSIFFLAGS:	/* Set interface flags */
4569 		return dev_change_flags(dev, ifr->ifr_flags);
4570 
4571 	case SIOCSIFMETRIC:	/* Set the metric on the interface
4572 				   (currently unused) */
4573 		return -EOPNOTSUPP;
4574 
4575 	case SIOCSIFMTU:	/* Set the MTU of a device */
4576 		return dev_set_mtu(dev, ifr->ifr_mtu);
4577 
4578 	case SIOCSIFHWADDR:
4579 		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4580 
4581 	case SIOCSIFHWBROADCAST:
4582 		if (ifr->ifr_hwaddr.sa_family != dev->type)
4583 			return -EINVAL;
4584 		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4585 		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4586 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4587 		return 0;
4588 
4589 	case SIOCSIFMAP:
4590 		if (ops->ndo_set_config) {
4591 			if (!netif_device_present(dev))
4592 				return -ENODEV;
4593 			return ops->ndo_set_config(dev, &ifr->ifr_map);
4594 		}
4595 		return -EOPNOTSUPP;
4596 
4597 	case SIOCADDMULTI:
4598 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4599 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4600 			return -EINVAL;
4601 		if (!netif_device_present(dev))
4602 			return -ENODEV;
4603 		return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4604 				  dev->addr_len, 1);
4605 
4606 	case SIOCDELMULTI:
4607 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4608 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4609 			return -EINVAL;
4610 		if (!netif_device_present(dev))
4611 			return -ENODEV;
4612 		return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4613 				     dev->addr_len, 1);
4614 
4615 	case SIOCSIFTXQLEN:
4616 		if (ifr->ifr_qlen < 0)
4617 			return -EINVAL;
4618 		dev->tx_queue_len = ifr->ifr_qlen;
4619 		return 0;
4620 
4621 	case SIOCSIFNAME:
4622 		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4623 		return dev_change_name(dev, ifr->ifr_newname);
4624 
4625 	/*
4626 	 *	Unknown or private ioctl
4627 	 */
4628 	default:
4629 		if ((cmd >= SIOCDEVPRIVATE &&
4630 		    cmd <= SIOCDEVPRIVATE + 15) ||
4631 		    cmd == SIOCBONDENSLAVE ||
4632 		    cmd == SIOCBONDRELEASE ||
4633 		    cmd == SIOCBONDSETHWADDR ||
4634 		    cmd == SIOCBONDSLAVEINFOQUERY ||
4635 		    cmd == SIOCBONDINFOQUERY ||
4636 		    cmd == SIOCBONDCHANGEACTIVE ||
4637 		    cmd == SIOCGMIIPHY ||
4638 		    cmd == SIOCGMIIREG ||
4639 		    cmd == SIOCSMIIREG ||
4640 		    cmd == SIOCBRADDIF ||
4641 		    cmd == SIOCBRDELIF ||
4642 		    cmd == SIOCSHWTSTAMP ||
4643 		    cmd == SIOCWANDEV) {
4644 			err = -EOPNOTSUPP;
4645 			if (ops->ndo_do_ioctl) {
4646 				if (netif_device_present(dev))
4647 					err = ops->ndo_do_ioctl(dev, ifr, cmd);
4648 				else
4649 					err = -ENODEV;
4650 			}
4651 		} else
4652 			err = -EINVAL;
4653 
4654 	}
4655 	return err;
4656 }
4657 
4658 /*
4659  *	This function handles all "interface"-type I/O control requests. The actual
4660  *	'doing' part of this is dev_ifsioc above.
4661  */
4662 
4663 /**
4664  *	dev_ioctl	-	network device ioctl
4665  *	@net: the applicable net namespace
4666  *	@cmd: command to issue
4667  *	@arg: pointer to a struct ifreq in user space
4668  *
4669  *	Issue ioctl functions to devices. This is normally called by the
4670  *	user space syscall interfaces but can sometimes be useful for
4671  *	other purposes. The return value is the return from the syscall if
4672  *	positive or a negative errno code on error.
4673  */
4674 
4675 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4676 {
4677 	struct ifreq ifr;
4678 	int ret;
4679 	char *colon;
4680 
4681 	/* One special case: SIOCGIFCONF takes ifconf argument
4682 	   and requires shared lock, because it sleeps writing
4683 	   to user space.
4684 	 */
4685 
4686 	if (cmd == SIOCGIFCONF) {
4687 		rtnl_lock();
4688 		ret = dev_ifconf(net, (char __user *) arg);
4689 		rtnl_unlock();
4690 		return ret;
4691 	}
4692 	if (cmd == SIOCGIFNAME)
4693 		return dev_ifname(net, (struct ifreq __user *)arg);
4694 
4695 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4696 		return -EFAULT;
4697 
4698 	ifr.ifr_name[IFNAMSIZ-1] = 0;
4699 
4700 	colon = strchr(ifr.ifr_name, ':');
4701 	if (colon)
4702 		*colon = 0;
4703 
4704 	/*
4705 	 *	See which interface the caller is talking about.
4706 	 */
4707 
4708 	switch (cmd) {
4709 	/*
4710 	 *	These ioctl calls:
4711 	 *	- can be done by all.
4712 	 *	- atomic and do not require locking.
4713 	 *	- return a value
4714 	 */
4715 	case SIOCGIFFLAGS:
4716 	case SIOCGIFMETRIC:
4717 	case SIOCGIFMTU:
4718 	case SIOCGIFHWADDR:
4719 	case SIOCGIFSLAVE:
4720 	case SIOCGIFMAP:
4721 	case SIOCGIFINDEX:
4722 	case SIOCGIFTXQLEN:
4723 		dev_load(net, ifr.ifr_name);
4724 		rcu_read_lock();
4725 		ret = dev_ifsioc_locked(net, &ifr, cmd);
4726 		rcu_read_unlock();
4727 		if (!ret) {
4728 			if (colon)
4729 				*colon = ':';
4730 			if (copy_to_user(arg, &ifr,
4731 					 sizeof(struct ifreq)))
4732 				ret = -EFAULT;
4733 		}
4734 		return ret;
4735 
4736 	case SIOCETHTOOL:
4737 		dev_load(net, ifr.ifr_name);
4738 		rtnl_lock();
4739 		ret = dev_ethtool(net, &ifr);
4740 		rtnl_unlock();
4741 		if (!ret) {
4742 			if (colon)
4743 				*colon = ':';
4744 			if (copy_to_user(arg, &ifr,
4745 					 sizeof(struct ifreq)))
4746 				ret = -EFAULT;
4747 		}
4748 		return ret;
4749 
4750 	/*
4751 	 *	These ioctl calls:
4752 	 *	- require superuser power.
4753 	 *	- require strict serialization.
4754 	 *	- return a value
4755 	 */
4756 	case SIOCGMIIPHY:
4757 	case SIOCGMIIREG:
4758 	case SIOCSIFNAME:
4759 		if (!capable(CAP_NET_ADMIN))
4760 			return -EPERM;
4761 		dev_load(net, ifr.ifr_name);
4762 		rtnl_lock();
4763 		ret = dev_ifsioc(net, &ifr, cmd);
4764 		rtnl_unlock();
4765 		if (!ret) {
4766 			if (colon)
4767 				*colon = ':';
4768 			if (copy_to_user(arg, &ifr,
4769 					 sizeof(struct ifreq)))
4770 				ret = -EFAULT;
4771 		}
4772 		return ret;
4773 
4774 	/*
4775 	 *	These ioctl calls:
4776 	 *	- require superuser power.
4777 	 *	- require strict serialization.
4778 	 *	- do not return a value
4779 	 */
4780 	case SIOCSIFFLAGS:
4781 	case SIOCSIFMETRIC:
4782 	case SIOCSIFMTU:
4783 	case SIOCSIFMAP:
4784 	case SIOCSIFHWADDR:
4785 	case SIOCSIFSLAVE:
4786 	case SIOCADDMULTI:
4787 	case SIOCDELMULTI:
4788 	case SIOCSIFHWBROADCAST:
4789 	case SIOCSIFTXQLEN:
4790 	case SIOCSMIIREG:
4791 	case SIOCBONDENSLAVE:
4792 	case SIOCBONDRELEASE:
4793 	case SIOCBONDSETHWADDR:
4794 	case SIOCBONDCHANGEACTIVE:
4795 	case SIOCBRADDIF:
4796 	case SIOCBRDELIF:
4797 	case SIOCSHWTSTAMP:
4798 		if (!capable(CAP_NET_ADMIN))
4799 			return -EPERM;
4800 		/* fall through */
4801 	case SIOCBONDSLAVEINFOQUERY:
4802 	case SIOCBONDINFOQUERY:
4803 		dev_load(net, ifr.ifr_name);
4804 		rtnl_lock();
4805 		ret = dev_ifsioc(net, &ifr, cmd);
4806 		rtnl_unlock();
4807 		return ret;
4808 
4809 	case SIOCGIFMEM:
4810 		/* Get the per device memory space. We can add this but
4811 		 * currently do not support it */
4812 	case SIOCSIFMEM:
4813 		/* Set the per device memory buffer space.
4814 		 * Not applicable in our case */
4815 	case SIOCSIFLINK:
4816 		return -EINVAL;
4817 
4818 	/*
4819 	 *	Unknown or private ioctl.
4820 	 */
4821 	default:
4822 		if (cmd == SIOCWANDEV ||
4823 		    (cmd >= SIOCDEVPRIVATE &&
4824 		     cmd <= SIOCDEVPRIVATE + 15)) {
4825 			dev_load(net, ifr.ifr_name);
4826 			rtnl_lock();
4827 			ret = dev_ifsioc(net, &ifr, cmd);
4828 			rtnl_unlock();
4829 			if (!ret && copy_to_user(arg, &ifr,
4830 						 sizeof(struct ifreq)))
4831 				ret = -EFAULT;
4832 			return ret;
4833 		}
4834 		/* Take care of Wireless Extensions */
4835 		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4836 			return wext_handle_ioctl(net, &ifr, cmd, arg);
4837 		return -EINVAL;
4838 	}
4839 }
4840 
4841 
4842 /**
4843  *	dev_new_index	-	allocate an ifindex
4844  *	@net: the applicable net namespace
4845  *
4846  *	Returns a suitable unique value for a new device interface
4847  *	number.  The caller must hold the rtnl semaphore or the
4848  *	dev_base_lock to be sure it remains unique.
4849  */
4850 static int dev_new_index(struct net *net)
4851 {
4852 	static int ifindex;
4853 	for (;;) {
4854 		if (++ifindex <= 0)
4855 			ifindex = 1;
4856 		if (!__dev_get_by_index(net, ifindex))
4857 			return ifindex;
4858 	}
4859 }
4860 
4861 /* Delayed registration/unregisteration */
4862 static LIST_HEAD(net_todo_list);
4863 
4864 static void net_set_todo(struct net_device *dev)
4865 {
4866 	list_add_tail(&dev->todo_list, &net_todo_list);
4867 }
4868 
4869 static void rollback_registered_many(struct list_head *head)
4870 {
4871 	struct net_device *dev, *tmp;
4872 
4873 	BUG_ON(dev_boot_phase);
4874 	ASSERT_RTNL();
4875 
4876 	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
4877 		/* Some devices call without registering
4878 		 * for initialization unwind. Remove those
4879 		 * devices and proceed with the remaining.
4880 		 */
4881 		if (dev->reg_state == NETREG_UNINITIALIZED) {
4882 			pr_debug("unregister_netdevice: device %s/%p never "
4883 				 "was registered\n", dev->name, dev);
4884 
4885 			WARN_ON(1);
4886 			list_del(&dev->unreg_list);
4887 			continue;
4888 		}
4889 
4890 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
4891 
4892 		/* If device is running, close it first. */
4893 		dev_close(dev);
4894 
4895 		/* And unlink it from device chain. */
4896 		unlist_netdevice(dev);
4897 
4898 		dev->reg_state = NETREG_UNREGISTERING;
4899 	}
4900 
4901 	synchronize_net();
4902 
4903 	list_for_each_entry(dev, head, unreg_list) {
4904 		/* Shutdown queueing discipline. */
4905 		dev_shutdown(dev);
4906 
4907 
4908 		/* Notify protocols, that we are about to destroy
4909 		   this device. They should clean all the things.
4910 		*/
4911 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4912 
4913 		if (!dev->rtnl_link_ops ||
4914 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
4915 			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
4916 
4917 		/*
4918 		 *	Flush the unicast and multicast chains
4919 		 */
4920 		dev_unicast_flush(dev);
4921 		dev_addr_discard(dev);
4922 
4923 		if (dev->netdev_ops->ndo_uninit)
4924 			dev->netdev_ops->ndo_uninit(dev);
4925 
4926 		/* Notifier chain MUST detach us from master device. */
4927 		WARN_ON(dev->master);
4928 
4929 		/* Remove entries from kobject tree */
4930 		netdev_unregister_kobject(dev);
4931 	}
4932 
4933 	/* Process any work delayed until the end of the batch */
4934 	dev = list_first_entry(head, struct net_device, unreg_list);
4935 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4936 
4937 	synchronize_net();
4938 
4939 	list_for_each_entry(dev, head, unreg_list)
4940 		dev_put(dev);
4941 }
4942 
4943 static void rollback_registered(struct net_device *dev)
4944 {
4945 	LIST_HEAD(single);
4946 
4947 	list_add(&dev->unreg_list, &single);
4948 	rollback_registered_many(&single);
4949 }
4950 
4951 static void __netdev_init_queue_locks_one(struct net_device *dev,
4952 					  struct netdev_queue *dev_queue,
4953 					  void *_unused)
4954 {
4955 	spin_lock_init(&dev_queue->_xmit_lock);
4956 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4957 	dev_queue->xmit_lock_owner = -1;
4958 }
4959 
4960 static void netdev_init_queue_locks(struct net_device *dev)
4961 {
4962 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4963 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4964 }
4965 
4966 unsigned long netdev_fix_features(unsigned long features, const char *name)
4967 {
4968 	/* Fix illegal SG+CSUM combinations. */
4969 	if ((features & NETIF_F_SG) &&
4970 	    !(features & NETIF_F_ALL_CSUM)) {
4971 		if (name)
4972 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4973 			       "checksum feature.\n", name);
4974 		features &= ~NETIF_F_SG;
4975 	}
4976 
4977 	/* TSO requires that SG is present as well. */
4978 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4979 		if (name)
4980 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4981 			       "SG feature.\n", name);
4982 		features &= ~NETIF_F_TSO;
4983 	}
4984 
4985 	if (features & NETIF_F_UFO) {
4986 		if (!(features & NETIF_F_GEN_CSUM)) {
4987 			if (name)
4988 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4989 				       "since no NETIF_F_HW_CSUM feature.\n",
4990 				       name);
4991 			features &= ~NETIF_F_UFO;
4992 		}
4993 
4994 		if (!(features & NETIF_F_SG)) {
4995 			if (name)
4996 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4997 				       "since no NETIF_F_SG feature.\n", name);
4998 			features &= ~NETIF_F_UFO;
4999 		}
5000 	}
5001 
5002 	return features;
5003 }
5004 EXPORT_SYMBOL(netdev_fix_features);
5005 
5006 /**
5007  *	netif_stacked_transfer_operstate -	transfer operstate
5008  *	@rootdev: the root or lower level device to transfer state from
5009  *	@dev: the device to transfer operstate to
5010  *
5011  *	Transfer operational state from root to device. This is normally
5012  *	called when a stacking relationship exists between the root
5013  *	device and the device(a leaf device).
5014  */
5015 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5016 					struct net_device *dev)
5017 {
5018 	if (rootdev->operstate == IF_OPER_DORMANT)
5019 		netif_dormant_on(dev);
5020 	else
5021 		netif_dormant_off(dev);
5022 
5023 	if (netif_carrier_ok(rootdev)) {
5024 		if (!netif_carrier_ok(dev))
5025 			netif_carrier_on(dev);
5026 	} else {
5027 		if (netif_carrier_ok(dev))
5028 			netif_carrier_off(dev);
5029 	}
5030 }
5031 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5032 
5033 /**
5034  *	register_netdevice	- register a network device
5035  *	@dev: device to register
5036  *
5037  *	Take a completed network device structure and add it to the kernel
5038  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5039  *	chain. 0 is returned on success. A negative errno code is returned
5040  *	on a failure to set up the device, or if the name is a duplicate.
5041  *
5042  *	Callers must hold the rtnl semaphore. You may want
5043  *	register_netdev() instead of this.
5044  *
5045  *	BUGS:
5046  *	The locking appears insufficient to guarantee two parallel registers
5047  *	will not get the same name.
5048  */
5049 
5050 int register_netdevice(struct net_device *dev)
5051 {
5052 	int ret;
5053 	struct net *net = dev_net(dev);
5054 
5055 	BUG_ON(dev_boot_phase);
5056 	ASSERT_RTNL();
5057 
5058 	might_sleep();
5059 
5060 	/* When net_device's are persistent, this will be fatal. */
5061 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5062 	BUG_ON(!net);
5063 
5064 	spin_lock_init(&dev->addr_list_lock);
5065 	netdev_set_addr_lockdep_class(dev);
5066 	netdev_init_queue_locks(dev);
5067 
5068 	dev->iflink = -1;
5069 
5070 	/* Init, if this function is available */
5071 	if (dev->netdev_ops->ndo_init) {
5072 		ret = dev->netdev_ops->ndo_init(dev);
5073 		if (ret) {
5074 			if (ret > 0)
5075 				ret = -EIO;
5076 			goto out;
5077 		}
5078 	}
5079 
5080 	ret = dev_get_valid_name(net, dev->name, dev->name, 0);
5081 	if (ret)
5082 		goto err_uninit;
5083 
5084 	dev->ifindex = dev_new_index(net);
5085 	if (dev->iflink == -1)
5086 		dev->iflink = dev->ifindex;
5087 
5088 	/* Fix illegal checksum combinations */
5089 	if ((dev->features & NETIF_F_HW_CSUM) &&
5090 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5091 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
5092 		       dev->name);
5093 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5094 	}
5095 
5096 	if ((dev->features & NETIF_F_NO_CSUM) &&
5097 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5098 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
5099 		       dev->name);
5100 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5101 	}
5102 
5103 	dev->features = netdev_fix_features(dev->features, dev->name);
5104 
5105 	/* Enable software GSO if SG is supported. */
5106 	if (dev->features & NETIF_F_SG)
5107 		dev->features |= NETIF_F_GSO;
5108 
5109 	netdev_initialize_kobject(dev);
5110 
5111 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5112 	ret = notifier_to_errno(ret);
5113 	if (ret)
5114 		goto err_uninit;
5115 
5116 	ret = netdev_register_kobject(dev);
5117 	if (ret)
5118 		goto err_uninit;
5119 	dev->reg_state = NETREG_REGISTERED;
5120 
5121 	/*
5122 	 *	Default initial state at registry is that the
5123 	 *	device is present.
5124 	 */
5125 
5126 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5127 
5128 	dev_init_scheduler(dev);
5129 	dev_hold(dev);
5130 	list_netdevice(dev);
5131 
5132 	/* Notify protocols, that a new device appeared. */
5133 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5134 	ret = notifier_to_errno(ret);
5135 	if (ret) {
5136 		rollback_registered(dev);
5137 		dev->reg_state = NETREG_UNREGISTERED;
5138 	}
5139 	/*
5140 	 *	Prevent userspace races by waiting until the network
5141 	 *	device is fully setup before sending notifications.
5142 	 */
5143 	if (!dev->rtnl_link_ops ||
5144 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5145 		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5146 
5147 out:
5148 	return ret;
5149 
5150 err_uninit:
5151 	if (dev->netdev_ops->ndo_uninit)
5152 		dev->netdev_ops->ndo_uninit(dev);
5153 	goto out;
5154 }
5155 EXPORT_SYMBOL(register_netdevice);
5156 
5157 /**
5158  *	init_dummy_netdev	- init a dummy network device for NAPI
5159  *	@dev: device to init
5160  *
5161  *	This takes a network device structure and initialize the minimum
5162  *	amount of fields so it can be used to schedule NAPI polls without
5163  *	registering a full blown interface. This is to be used by drivers
5164  *	that need to tie several hardware interfaces to a single NAPI
5165  *	poll scheduler due to HW limitations.
5166  */
5167 int init_dummy_netdev(struct net_device *dev)
5168 {
5169 	/* Clear everything. Note we don't initialize spinlocks
5170 	 * are they aren't supposed to be taken by any of the
5171 	 * NAPI code and this dummy netdev is supposed to be
5172 	 * only ever used for NAPI polls
5173 	 */
5174 	memset(dev, 0, sizeof(struct net_device));
5175 
5176 	/* make sure we BUG if trying to hit standard
5177 	 * register/unregister code path
5178 	 */
5179 	dev->reg_state = NETREG_DUMMY;
5180 
5181 	/* initialize the ref count */
5182 	atomic_set(&dev->refcnt, 1);
5183 
5184 	/* NAPI wants this */
5185 	INIT_LIST_HEAD(&dev->napi_list);
5186 
5187 	/* a dummy interface is started by default */
5188 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5189 	set_bit(__LINK_STATE_START, &dev->state);
5190 
5191 	return 0;
5192 }
5193 EXPORT_SYMBOL_GPL(init_dummy_netdev);
5194 
5195 
5196 /**
5197  *	register_netdev	- register a network device
5198  *	@dev: device to register
5199  *
5200  *	Take a completed network device structure and add it to the kernel
5201  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5202  *	chain. 0 is returned on success. A negative errno code is returned
5203  *	on a failure to set up the device, or if the name is a duplicate.
5204  *
5205  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
5206  *	and expands the device name if you passed a format string to
5207  *	alloc_netdev.
5208  */
5209 int register_netdev(struct net_device *dev)
5210 {
5211 	int err;
5212 
5213 	rtnl_lock();
5214 
5215 	/*
5216 	 * If the name is a format string the caller wants us to do a
5217 	 * name allocation.
5218 	 */
5219 	if (strchr(dev->name, '%')) {
5220 		err = dev_alloc_name(dev, dev->name);
5221 		if (err < 0)
5222 			goto out;
5223 	}
5224 
5225 	err = register_netdevice(dev);
5226 out:
5227 	rtnl_unlock();
5228 	return err;
5229 }
5230 EXPORT_SYMBOL(register_netdev);
5231 
5232 /*
5233  * netdev_wait_allrefs - wait until all references are gone.
5234  *
5235  * This is called when unregistering network devices.
5236  *
5237  * Any protocol or device that holds a reference should register
5238  * for netdevice notification, and cleanup and put back the
5239  * reference if they receive an UNREGISTER event.
5240  * We can get stuck here if buggy protocols don't correctly
5241  * call dev_put.
5242  */
5243 static void netdev_wait_allrefs(struct net_device *dev)
5244 {
5245 	unsigned long rebroadcast_time, warning_time;
5246 
5247 	linkwatch_forget_dev(dev);
5248 
5249 	rebroadcast_time = warning_time = jiffies;
5250 	while (atomic_read(&dev->refcnt) != 0) {
5251 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5252 			rtnl_lock();
5253 
5254 			/* Rebroadcast unregister notification */
5255 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5256 			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5257 			 * should have already handle it the first time */
5258 
5259 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5260 				     &dev->state)) {
5261 				/* We must not have linkwatch events
5262 				 * pending on unregister. If this
5263 				 * happens, we simply run the queue
5264 				 * unscheduled, resulting in a noop
5265 				 * for this device.
5266 				 */
5267 				linkwatch_run_queue();
5268 			}
5269 
5270 			__rtnl_unlock();
5271 
5272 			rebroadcast_time = jiffies;
5273 		}
5274 
5275 		msleep(250);
5276 
5277 		if (time_after(jiffies, warning_time + 10 * HZ)) {
5278 			printk(KERN_EMERG "unregister_netdevice: "
5279 			       "waiting for %s to become free. Usage "
5280 			       "count = %d\n",
5281 			       dev->name, atomic_read(&dev->refcnt));
5282 			warning_time = jiffies;
5283 		}
5284 	}
5285 }
5286 
5287 /* The sequence is:
5288  *
5289  *	rtnl_lock();
5290  *	...
5291  *	register_netdevice(x1);
5292  *	register_netdevice(x2);
5293  *	...
5294  *	unregister_netdevice(y1);
5295  *	unregister_netdevice(y2);
5296  *      ...
5297  *	rtnl_unlock();
5298  *	free_netdev(y1);
5299  *	free_netdev(y2);
5300  *
5301  * We are invoked by rtnl_unlock().
5302  * This allows us to deal with problems:
5303  * 1) We can delete sysfs objects which invoke hotplug
5304  *    without deadlocking with linkwatch via keventd.
5305  * 2) Since we run with the RTNL semaphore not held, we can sleep
5306  *    safely in order to wait for the netdev refcnt to drop to zero.
5307  *
5308  * We must not return until all unregister events added during
5309  * the interval the lock was held have been completed.
5310  */
5311 void netdev_run_todo(void)
5312 {
5313 	struct list_head list;
5314 
5315 	/* Snapshot list, allow later requests */
5316 	list_replace_init(&net_todo_list, &list);
5317 
5318 	__rtnl_unlock();
5319 
5320 	while (!list_empty(&list)) {
5321 		struct net_device *dev
5322 			= list_first_entry(&list, struct net_device, todo_list);
5323 		list_del(&dev->todo_list);
5324 
5325 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5326 			printk(KERN_ERR "network todo '%s' but state %d\n",
5327 			       dev->name, dev->reg_state);
5328 			dump_stack();
5329 			continue;
5330 		}
5331 
5332 		dev->reg_state = NETREG_UNREGISTERED;
5333 
5334 		on_each_cpu(flush_backlog, dev, 1);
5335 
5336 		netdev_wait_allrefs(dev);
5337 
5338 		/* paranoia */
5339 		BUG_ON(atomic_read(&dev->refcnt));
5340 		WARN_ON(dev->ip_ptr);
5341 		WARN_ON(dev->ip6_ptr);
5342 		WARN_ON(dev->dn_ptr);
5343 
5344 		if (dev->destructor)
5345 			dev->destructor(dev);
5346 
5347 		/* Free network device */
5348 		kobject_put(&dev->dev.kobj);
5349 	}
5350 }
5351 
5352 /**
5353  *	dev_txq_stats_fold - fold tx_queues stats
5354  *	@dev: device to get statistics from
5355  *	@stats: struct net_device_stats to hold results
5356  */
5357 void dev_txq_stats_fold(const struct net_device *dev,
5358 			struct net_device_stats *stats)
5359 {
5360 	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5361 	unsigned int i;
5362 	struct netdev_queue *txq;
5363 
5364 	for (i = 0; i < dev->num_tx_queues; i++) {
5365 		txq = netdev_get_tx_queue(dev, i);
5366 		tx_bytes   += txq->tx_bytes;
5367 		tx_packets += txq->tx_packets;
5368 		tx_dropped += txq->tx_dropped;
5369 	}
5370 	if (tx_bytes || tx_packets || tx_dropped) {
5371 		stats->tx_bytes   = tx_bytes;
5372 		stats->tx_packets = tx_packets;
5373 		stats->tx_dropped = tx_dropped;
5374 	}
5375 }
5376 EXPORT_SYMBOL(dev_txq_stats_fold);
5377 
5378 /**
5379  *	dev_get_stats	- get network device statistics
5380  *	@dev: device to get statistics from
5381  *
5382  *	Get network statistics from device. The device driver may provide
5383  *	its own method by setting dev->netdev_ops->get_stats; otherwise
5384  *	the internal statistics structure is used.
5385  */
5386 const struct net_device_stats *dev_get_stats(struct net_device *dev)
5387 {
5388 	const struct net_device_ops *ops = dev->netdev_ops;
5389 
5390 	if (ops->ndo_get_stats)
5391 		return ops->ndo_get_stats(dev);
5392 
5393 	dev_txq_stats_fold(dev, &dev->stats);
5394 	return &dev->stats;
5395 }
5396 EXPORT_SYMBOL(dev_get_stats);
5397 
5398 static void netdev_init_one_queue(struct net_device *dev,
5399 				  struct netdev_queue *queue,
5400 				  void *_unused)
5401 {
5402 	queue->dev = dev;
5403 }
5404 
5405 static void netdev_init_queues(struct net_device *dev)
5406 {
5407 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5408 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5409 	spin_lock_init(&dev->tx_global_lock);
5410 }
5411 
5412 /**
5413  *	alloc_netdev_mq - allocate network device
5414  *	@sizeof_priv:	size of private data to allocate space for
5415  *	@name:		device name format string
5416  *	@setup:		callback to initialize device
5417  *	@queue_count:	the number of subqueues to allocate
5418  *
5419  *	Allocates a struct net_device with private data area for driver use
5420  *	and performs basic initialization.  Also allocates subquue structs
5421  *	for each queue on the device at the end of the netdevice.
5422  */
5423 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5424 		void (*setup)(struct net_device *), unsigned int queue_count)
5425 {
5426 	struct netdev_queue *tx;
5427 	struct net_device *dev;
5428 	size_t alloc_size;
5429 	struct net_device *p;
5430 
5431 	BUG_ON(strlen(name) >= sizeof(dev->name));
5432 
5433 	alloc_size = sizeof(struct net_device);
5434 	if (sizeof_priv) {
5435 		/* ensure 32-byte alignment of private area */
5436 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5437 		alloc_size += sizeof_priv;
5438 	}
5439 	/* ensure 32-byte alignment of whole construct */
5440 	alloc_size += NETDEV_ALIGN - 1;
5441 
5442 	p = kzalloc(alloc_size, GFP_KERNEL);
5443 	if (!p) {
5444 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5445 		return NULL;
5446 	}
5447 
5448 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5449 	if (!tx) {
5450 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
5451 		       "tx qdiscs.\n");
5452 		goto free_p;
5453 	}
5454 
5455 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5456 	dev->padded = (char *)dev - (char *)p;
5457 
5458 	if (dev_addr_init(dev))
5459 		goto free_tx;
5460 
5461 	dev_unicast_init(dev);
5462 
5463 	dev_net_set(dev, &init_net);
5464 
5465 	dev->_tx = tx;
5466 	dev->num_tx_queues = queue_count;
5467 	dev->real_num_tx_queues = queue_count;
5468 
5469 	dev->gso_max_size = GSO_MAX_SIZE;
5470 
5471 	netdev_init_queues(dev);
5472 
5473 	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5474 	dev->ethtool_ntuple_list.count = 0;
5475 	INIT_LIST_HEAD(&dev->napi_list);
5476 	INIT_LIST_HEAD(&dev->unreg_list);
5477 	INIT_LIST_HEAD(&dev->link_watch_list);
5478 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5479 	setup(dev);
5480 	strcpy(dev->name, name);
5481 	return dev;
5482 
5483 free_tx:
5484 	kfree(tx);
5485 
5486 free_p:
5487 	kfree(p);
5488 	return NULL;
5489 }
5490 EXPORT_SYMBOL(alloc_netdev_mq);
5491 
5492 /**
5493  *	free_netdev - free network device
5494  *	@dev: device
5495  *
5496  *	This function does the last stage of destroying an allocated device
5497  * 	interface. The reference to the device object is released.
5498  *	If this is the last reference then it will be freed.
5499  */
5500 void free_netdev(struct net_device *dev)
5501 {
5502 	struct napi_struct *p, *n;
5503 
5504 	release_net(dev_net(dev));
5505 
5506 	kfree(dev->_tx);
5507 
5508 	/* Flush device addresses */
5509 	dev_addr_flush(dev);
5510 
5511 	/* Clear ethtool n-tuple list */
5512 	ethtool_ntuple_flush(dev);
5513 
5514 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5515 		netif_napi_del(p);
5516 
5517 	/*  Compatibility with error handling in drivers */
5518 	if (dev->reg_state == NETREG_UNINITIALIZED) {
5519 		kfree((char *)dev - dev->padded);
5520 		return;
5521 	}
5522 
5523 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5524 	dev->reg_state = NETREG_RELEASED;
5525 
5526 	/* will free via device release */
5527 	put_device(&dev->dev);
5528 }
5529 EXPORT_SYMBOL(free_netdev);
5530 
5531 /**
5532  *	synchronize_net -  Synchronize with packet receive processing
5533  *
5534  *	Wait for packets currently being received to be done.
5535  *	Does not block later packets from starting.
5536  */
5537 void synchronize_net(void)
5538 {
5539 	might_sleep();
5540 	synchronize_rcu();
5541 }
5542 EXPORT_SYMBOL(synchronize_net);
5543 
5544 /**
5545  *	unregister_netdevice_queue - remove device from the kernel
5546  *	@dev: device
5547  *	@head: list
5548  *
5549  *	This function shuts down a device interface and removes it
5550  *	from the kernel tables.
5551  *	If head not NULL, device is queued to be unregistered later.
5552  *
5553  *	Callers must hold the rtnl semaphore.  You may want
5554  *	unregister_netdev() instead of this.
5555  */
5556 
5557 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
5558 {
5559 	ASSERT_RTNL();
5560 
5561 	if (head) {
5562 		list_move_tail(&dev->unreg_list, head);
5563 	} else {
5564 		rollback_registered(dev);
5565 		/* Finish processing unregister after unlock */
5566 		net_set_todo(dev);
5567 	}
5568 }
5569 EXPORT_SYMBOL(unregister_netdevice_queue);
5570 
5571 /**
5572  *	unregister_netdevice_many - unregister many devices
5573  *	@head: list of devices
5574  */
5575 void unregister_netdevice_many(struct list_head *head)
5576 {
5577 	struct net_device *dev;
5578 
5579 	if (!list_empty(head)) {
5580 		rollback_registered_many(head);
5581 		list_for_each_entry(dev, head, unreg_list)
5582 			net_set_todo(dev);
5583 	}
5584 }
5585 EXPORT_SYMBOL(unregister_netdevice_many);
5586 
5587 /**
5588  *	unregister_netdev - remove device from the kernel
5589  *	@dev: device
5590  *
5591  *	This function shuts down a device interface and removes it
5592  *	from the kernel tables.
5593  *
5594  *	This is just a wrapper for unregister_netdevice that takes
5595  *	the rtnl semaphore.  In general you want to use this and not
5596  *	unregister_netdevice.
5597  */
5598 void unregister_netdev(struct net_device *dev)
5599 {
5600 	rtnl_lock();
5601 	unregister_netdevice(dev);
5602 	rtnl_unlock();
5603 }
5604 EXPORT_SYMBOL(unregister_netdev);
5605 
5606 /**
5607  *	dev_change_net_namespace - move device to different nethost namespace
5608  *	@dev: device
5609  *	@net: network namespace
5610  *	@pat: If not NULL name pattern to try if the current device name
5611  *	      is already taken in the destination network namespace.
5612  *
5613  *	This function shuts down a device interface and moves it
5614  *	to a new network namespace. On success 0 is returned, on
5615  *	a failure a netagive errno code is returned.
5616  *
5617  *	Callers must hold the rtnl semaphore.
5618  */
5619 
5620 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5621 {
5622 	int err;
5623 
5624 	ASSERT_RTNL();
5625 
5626 	/* Don't allow namespace local devices to be moved. */
5627 	err = -EINVAL;
5628 	if (dev->features & NETIF_F_NETNS_LOCAL)
5629 		goto out;
5630 
5631 #ifdef CONFIG_SYSFS
5632 	/* Don't allow real devices to be moved when sysfs
5633 	 * is enabled.
5634 	 */
5635 	err = -EINVAL;
5636 	if (dev->dev.parent)
5637 		goto out;
5638 #endif
5639 
5640 	/* Ensure the device has been registrered */
5641 	err = -EINVAL;
5642 	if (dev->reg_state != NETREG_REGISTERED)
5643 		goto out;
5644 
5645 	/* Get out if there is nothing todo */
5646 	err = 0;
5647 	if (net_eq(dev_net(dev), net))
5648 		goto out;
5649 
5650 	/* Pick the destination device name, and ensure
5651 	 * we can use it in the destination network namespace.
5652 	 */
5653 	err = -EEXIST;
5654 	if (__dev_get_by_name(net, dev->name)) {
5655 		/* We get here if we can't use the current device name */
5656 		if (!pat)
5657 			goto out;
5658 		if (dev_get_valid_name(net, pat, dev->name, 1))
5659 			goto out;
5660 	}
5661 
5662 	/*
5663 	 * And now a mini version of register_netdevice unregister_netdevice.
5664 	 */
5665 
5666 	/* If device is running close it first. */
5667 	dev_close(dev);
5668 
5669 	/* And unlink it from device chain */
5670 	err = -ENODEV;
5671 	unlist_netdevice(dev);
5672 
5673 	synchronize_net();
5674 
5675 	/* Shutdown queueing discipline. */
5676 	dev_shutdown(dev);
5677 
5678 	/* Notify protocols, that we are about to destroy
5679 	   this device. They should clean all the things.
5680 	*/
5681 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5682 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5683 
5684 	/*
5685 	 *	Flush the unicast and multicast chains
5686 	 */
5687 	dev_unicast_flush(dev);
5688 	dev_addr_discard(dev);
5689 
5690 	netdev_unregister_kobject(dev);
5691 
5692 	/* Actually switch the network namespace */
5693 	dev_net_set(dev, net);
5694 
5695 	/* If there is an ifindex conflict assign a new one */
5696 	if (__dev_get_by_index(net, dev->ifindex)) {
5697 		int iflink = (dev->iflink == dev->ifindex);
5698 		dev->ifindex = dev_new_index(net);
5699 		if (iflink)
5700 			dev->iflink = dev->ifindex;
5701 	}
5702 
5703 	/* Fixup kobjects */
5704 	err = netdev_register_kobject(dev);
5705 	WARN_ON(err);
5706 
5707 	/* Add the device back in the hashes */
5708 	list_netdevice(dev);
5709 
5710 	/* Notify protocols, that a new device appeared. */
5711 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
5712 
5713 	/*
5714 	 *	Prevent userspace races by waiting until the network
5715 	 *	device is fully setup before sending notifications.
5716 	 */
5717 	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5718 
5719 	synchronize_net();
5720 	err = 0;
5721 out:
5722 	return err;
5723 }
5724 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
5725 
5726 static int dev_cpu_callback(struct notifier_block *nfb,
5727 			    unsigned long action,
5728 			    void *ocpu)
5729 {
5730 	struct sk_buff **list_skb;
5731 	struct Qdisc **list_net;
5732 	struct sk_buff *skb;
5733 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
5734 	struct softnet_data *sd, *oldsd;
5735 
5736 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5737 		return NOTIFY_OK;
5738 
5739 	local_irq_disable();
5740 	cpu = smp_processor_id();
5741 	sd = &per_cpu(softnet_data, cpu);
5742 	oldsd = &per_cpu(softnet_data, oldcpu);
5743 
5744 	/* Find end of our completion_queue. */
5745 	list_skb = &sd->completion_queue;
5746 	while (*list_skb)
5747 		list_skb = &(*list_skb)->next;
5748 	/* Append completion queue from offline CPU. */
5749 	*list_skb = oldsd->completion_queue;
5750 	oldsd->completion_queue = NULL;
5751 
5752 	/* Find end of our output_queue. */
5753 	list_net = &sd->output_queue;
5754 	while (*list_net)
5755 		list_net = &(*list_net)->next_sched;
5756 	/* Append output queue from offline CPU. */
5757 	*list_net = oldsd->output_queue;
5758 	oldsd->output_queue = NULL;
5759 
5760 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
5761 	local_irq_enable();
5762 
5763 	/* Process offline CPU's input_pkt_queue */
5764 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5765 		netif_rx(skb);
5766 
5767 	return NOTIFY_OK;
5768 }
5769 
5770 
5771 /**
5772  *	netdev_increment_features - increment feature set by one
5773  *	@all: current feature set
5774  *	@one: new feature set
5775  *	@mask: mask feature set
5776  *
5777  *	Computes a new feature set after adding a device with feature set
5778  *	@one to the master device with current feature set @all.  Will not
5779  *	enable anything that is off in @mask. Returns the new feature set.
5780  */
5781 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5782 					unsigned long mask)
5783 {
5784 	/* If device needs checksumming, downgrade to it. */
5785 	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5786 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5787 	else if (mask & NETIF_F_ALL_CSUM) {
5788 		/* If one device supports v4/v6 checksumming, set for all. */
5789 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5790 		    !(all & NETIF_F_GEN_CSUM)) {
5791 			all &= ~NETIF_F_ALL_CSUM;
5792 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5793 		}
5794 
5795 		/* If one device supports hw checksumming, set for all. */
5796 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5797 			all &= ~NETIF_F_ALL_CSUM;
5798 			all |= NETIF_F_HW_CSUM;
5799 		}
5800 	}
5801 
5802 	one |= NETIF_F_ALL_CSUM;
5803 
5804 	one |= all & NETIF_F_ONE_FOR_ALL;
5805 	all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
5806 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
5807 
5808 	return all;
5809 }
5810 EXPORT_SYMBOL(netdev_increment_features);
5811 
5812 static struct hlist_head *netdev_create_hash(void)
5813 {
5814 	int i;
5815 	struct hlist_head *hash;
5816 
5817 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5818 	if (hash != NULL)
5819 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
5820 			INIT_HLIST_HEAD(&hash[i]);
5821 
5822 	return hash;
5823 }
5824 
5825 /* Initialize per network namespace state */
5826 static int __net_init netdev_init(struct net *net)
5827 {
5828 	INIT_LIST_HEAD(&net->dev_base_head);
5829 
5830 	net->dev_name_head = netdev_create_hash();
5831 	if (net->dev_name_head == NULL)
5832 		goto err_name;
5833 
5834 	net->dev_index_head = netdev_create_hash();
5835 	if (net->dev_index_head == NULL)
5836 		goto err_idx;
5837 
5838 	return 0;
5839 
5840 err_idx:
5841 	kfree(net->dev_name_head);
5842 err_name:
5843 	return -ENOMEM;
5844 }
5845 
5846 /**
5847  *	netdev_drivername - network driver for the device
5848  *	@dev: network device
5849  *	@buffer: buffer for resulting name
5850  *	@len: size of buffer
5851  *
5852  *	Determine network driver for device.
5853  */
5854 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5855 {
5856 	const struct device_driver *driver;
5857 	const struct device *parent;
5858 
5859 	if (len <= 0 || !buffer)
5860 		return buffer;
5861 	buffer[0] = 0;
5862 
5863 	parent = dev->dev.parent;
5864 
5865 	if (!parent)
5866 		return buffer;
5867 
5868 	driver = parent->driver;
5869 	if (driver && driver->name)
5870 		strlcpy(buffer, driver->name, len);
5871 	return buffer;
5872 }
5873 
5874 static void __net_exit netdev_exit(struct net *net)
5875 {
5876 	kfree(net->dev_name_head);
5877 	kfree(net->dev_index_head);
5878 }
5879 
5880 static struct pernet_operations __net_initdata netdev_net_ops = {
5881 	.init = netdev_init,
5882 	.exit = netdev_exit,
5883 };
5884 
5885 static void __net_exit default_device_exit(struct net *net)
5886 {
5887 	struct net_device *dev, *aux;
5888 	/*
5889 	 * Push all migratable network devices back to the
5890 	 * initial network namespace
5891 	 */
5892 	rtnl_lock();
5893 	for_each_netdev_safe(net, dev, aux) {
5894 		int err;
5895 		char fb_name[IFNAMSIZ];
5896 
5897 		/* Ignore unmoveable devices (i.e. loopback) */
5898 		if (dev->features & NETIF_F_NETNS_LOCAL)
5899 			continue;
5900 
5901 		/* Leave virtual devices for the generic cleanup */
5902 		if (dev->rtnl_link_ops)
5903 			continue;
5904 
5905 		/* Push remaing network devices to init_net */
5906 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5907 		err = dev_change_net_namespace(dev, &init_net, fb_name);
5908 		if (err) {
5909 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
5910 				__func__, dev->name, err);
5911 			BUG();
5912 		}
5913 	}
5914 	rtnl_unlock();
5915 }
5916 
5917 static void __net_exit default_device_exit_batch(struct list_head *net_list)
5918 {
5919 	/* At exit all network devices most be removed from a network
5920 	 * namespace.  Do this in the reverse order of registeration.
5921 	 * Do this across as many network namespaces as possible to
5922 	 * improve batching efficiency.
5923 	 */
5924 	struct net_device *dev;
5925 	struct net *net;
5926 	LIST_HEAD(dev_kill_list);
5927 
5928 	rtnl_lock();
5929 	list_for_each_entry(net, net_list, exit_list) {
5930 		for_each_netdev_reverse(net, dev) {
5931 			if (dev->rtnl_link_ops)
5932 				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
5933 			else
5934 				unregister_netdevice_queue(dev, &dev_kill_list);
5935 		}
5936 	}
5937 	unregister_netdevice_many(&dev_kill_list);
5938 	rtnl_unlock();
5939 }
5940 
5941 static struct pernet_operations __net_initdata default_device_ops = {
5942 	.exit = default_device_exit,
5943 	.exit_batch = default_device_exit_batch,
5944 };
5945 
5946 /*
5947  *	Initialize the DEV module. At boot time this walks the device list and
5948  *	unhooks any devices that fail to initialise (normally hardware not
5949  *	present) and leaves us with a valid list of present and active devices.
5950  *
5951  */
5952 
5953 /*
5954  *       This is called single threaded during boot, so no need
5955  *       to take the rtnl semaphore.
5956  */
5957 static int __init net_dev_init(void)
5958 {
5959 	int i, rc = -ENOMEM;
5960 
5961 	BUG_ON(!dev_boot_phase);
5962 
5963 	if (dev_proc_init())
5964 		goto out;
5965 
5966 	if (netdev_kobject_init())
5967 		goto out;
5968 
5969 	INIT_LIST_HEAD(&ptype_all);
5970 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
5971 		INIT_LIST_HEAD(&ptype_base[i]);
5972 
5973 	if (register_pernet_subsys(&netdev_net_ops))
5974 		goto out;
5975 
5976 	/*
5977 	 *	Initialise the packet receive queues.
5978 	 */
5979 
5980 	for_each_possible_cpu(i) {
5981 		struct softnet_data *queue;
5982 
5983 		queue = &per_cpu(softnet_data, i);
5984 		skb_queue_head_init(&queue->input_pkt_queue);
5985 		queue->completion_queue = NULL;
5986 		INIT_LIST_HEAD(&queue->poll_list);
5987 
5988 		queue->backlog.poll = process_backlog;
5989 		queue->backlog.weight = weight_p;
5990 		queue->backlog.gro_list = NULL;
5991 		queue->backlog.gro_count = 0;
5992 	}
5993 
5994 	dev_boot_phase = 0;
5995 
5996 	/* The loopback device is special if any other network devices
5997 	 * is present in a network namespace the loopback device must
5998 	 * be present. Since we now dynamically allocate and free the
5999 	 * loopback device ensure this invariant is maintained by
6000 	 * keeping the loopback device as the first device on the
6001 	 * list of network devices.  Ensuring the loopback devices
6002 	 * is the first device that appears and the last network device
6003 	 * that disappears.
6004 	 */
6005 	if (register_pernet_device(&loopback_net_ops))
6006 		goto out;
6007 
6008 	if (register_pernet_device(&default_device_ops))
6009 		goto out;
6010 
6011 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6012 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6013 
6014 	hotcpu_notifier(dev_cpu_callback, 0);
6015 	dst_init();
6016 	dev_mcast_init();
6017 	rc = 0;
6018 out:
6019 	return rc;
6020 }
6021 
6022 subsys_initcall(net_dev_init);
6023 
6024 static int __init initialize_hashrnd(void)
6025 {
6026 	get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
6027 	return 0;
6028 }
6029 
6030 late_initcall_sync(initialize_hashrnd);
6031 
6032