xref: /linux-6.15/net/core/dev.c (revision 43f5b308)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <[email protected]>
12  *				Mark Evans, <[email protected]>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <[email protected]>
16  *		Alan Cox <[email protected]>
17  *		David Hinds <[email protected]>
18  *		Alexey Kuznetsov <[email protected]>
19  *		Adam Sulmicki <[email protected]>
20  *              Pekka Riikonen <[email protected]>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/notifier.h>
94 #include <linux/skbuff.h>
95 #include <net/net_namespace.h>
96 #include <net/sock.h>
97 #include <linux/rtnetlink.h>
98 #include <linux/proc_fs.h>
99 #include <linux/seq_file.h>
100 #include <linux/stat.h>
101 #include <linux/if_bridge.h>
102 #include <linux/if_macvlan.h>
103 #include <net/dst.h>
104 #include <net/pkt_sched.h>
105 #include <net/checksum.h>
106 #include <linux/highmem.h>
107 #include <linux/init.h>
108 #include <linux/kmod.h>
109 #include <linux/module.h>
110 #include <linux/kallsyms.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 
123 #include "net-sysfs.h"
124 
125 /*
126  *	The list of packet types we will receive (as opposed to discard)
127  *	and the routines to invoke.
128  *
129  *	Why 16. Because with 16 the only overlap we get on a hash of the
130  *	low nibble of the protocol value is RARP/SNAP/X.25.
131  *
132  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
133  *             sure which should go first, but I bet it won't make much
134  *             difference if we are running VLANs.  The good news is that
135  *             this protocol won't be in the list unless compiled in, so
136  *             the average user (w/out VLANs) will not be adversely affected.
137  *             --BLG
138  *
139  *		0800	IP
140  *		8100    802.1Q VLAN
141  *		0001	802.3
142  *		0002	AX.25
143  *		0004	802.2
144  *		8035	RARP
145  *		0005	SNAP
146  *		0805	X.25
147  *		0806	ARP
148  *		8137	IPX
149  *		0009	Localtalk
150  *		86DD	IPv6
151  */
152 
153 #define PTYPE_HASH_SIZE	(16)
154 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
155 
156 static DEFINE_SPINLOCK(ptype_lock);
157 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
158 static struct list_head ptype_all __read_mostly;	/* Taps */
159 
160 #ifdef CONFIG_NET_DMA
161 struct net_dma {
162 	struct dma_client client;
163 	spinlock_t lock;
164 	cpumask_t channel_mask;
165 	struct dma_chan **channels;
166 };
167 
168 static enum dma_state_client
169 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
170 	enum dma_state state);
171 
172 static struct net_dma net_dma = {
173 	.client = {
174 		.event_callback = netdev_dma_event,
175 	},
176 };
177 #endif
178 
179 /*
180  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
181  * semaphore.
182  *
183  * Pure readers hold dev_base_lock for reading.
184  *
185  * Writers must hold the rtnl semaphore while they loop through the
186  * dev_base_head list, and hold dev_base_lock for writing when they do the
187  * actual updates.  This allows pure readers to access the list even
188  * while a writer is preparing to update it.
189  *
190  * To put it another way, dev_base_lock is held for writing only to
191  * protect against pure readers; the rtnl semaphore provides the
192  * protection against other writers.
193  *
194  * See, for example usages, register_netdevice() and
195  * unregister_netdevice(), which must be called with the rtnl
196  * semaphore held.
197  */
198 DEFINE_RWLOCK(dev_base_lock);
199 
200 EXPORT_SYMBOL(dev_base_lock);
201 
202 #define NETDEV_HASHBITS	8
203 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
204 
205 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
206 {
207 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
208 	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
209 }
210 
211 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
212 {
213 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
214 }
215 
216 /* Device list insertion */
217 static int list_netdevice(struct net_device *dev)
218 {
219 	struct net *net = dev_net(dev);
220 
221 	ASSERT_RTNL();
222 
223 	write_lock_bh(&dev_base_lock);
224 	list_add_tail(&dev->dev_list, &net->dev_base_head);
225 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
226 	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
227 	write_unlock_bh(&dev_base_lock);
228 	return 0;
229 }
230 
231 /* Device list removal */
232 static void unlist_netdevice(struct net_device *dev)
233 {
234 	ASSERT_RTNL();
235 
236 	/* Unlink dev from the device chain */
237 	write_lock_bh(&dev_base_lock);
238 	list_del(&dev->dev_list);
239 	hlist_del(&dev->name_hlist);
240 	hlist_del(&dev->index_hlist);
241 	write_unlock_bh(&dev_base_lock);
242 }
243 
244 /*
245  *	Our notifier list
246  */
247 
248 static RAW_NOTIFIER_HEAD(netdev_chain);
249 
250 /*
251  *	Device drivers call our routines to queue packets here. We empty the
252  *	queue in the local softnet handler.
253  */
254 
255 DEFINE_PER_CPU(struct softnet_data, softnet_data);
256 
257 #ifdef CONFIG_DEBUG_LOCK_ALLOC
258 /*
259  * register_netdevice() inits dev->_xmit_lock and sets lockdep class
260  * according to dev->type
261  */
262 static const unsigned short netdev_lock_type[] =
263 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
264 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
265 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
266 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
267 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
268 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
269 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
270 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
271 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
272 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
273 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
274 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
275 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
276 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
277 	 ARPHRD_NONE};
278 
279 static const char *netdev_lock_name[] =
280 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
281 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
282 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
283 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
284 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
285 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
286 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
287 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
288 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
289 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
290 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
291 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
292 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
293 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
294 	 "_xmit_NONE"};
295 
296 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
297 
298 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
299 {
300 	int i;
301 
302 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
303 		if (netdev_lock_type[i] == dev_type)
304 			return i;
305 	/* the last key is used by default */
306 	return ARRAY_SIZE(netdev_lock_type) - 1;
307 }
308 
309 static inline void netdev_set_lockdep_class(spinlock_t *lock,
310 					    unsigned short dev_type)
311 {
312 	int i;
313 
314 	i = netdev_lock_pos(dev_type);
315 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
316 				   netdev_lock_name[i]);
317 }
318 #else
319 static inline void netdev_set_lockdep_class(spinlock_t *lock,
320 					    unsigned short dev_type)
321 {
322 }
323 #endif
324 
325 /*******************************************************************************
326 
327 		Protocol management and registration routines
328 
329 *******************************************************************************/
330 
331 /*
332  *	Add a protocol ID to the list. Now that the input handler is
333  *	smarter we can dispense with all the messy stuff that used to be
334  *	here.
335  *
336  *	BEWARE!!! Protocol handlers, mangling input packets,
337  *	MUST BE last in hash buckets and checking protocol handlers
338  *	MUST start from promiscuous ptype_all chain in net_bh.
339  *	It is true now, do not change it.
340  *	Explanation follows: if protocol handler, mangling packet, will
341  *	be the first on list, it is not able to sense, that packet
342  *	is cloned and should be copied-on-write, so that it will
343  *	change it and subsequent readers will get broken packet.
344  *							--ANK (980803)
345  */
346 
347 /**
348  *	dev_add_pack - add packet handler
349  *	@pt: packet type declaration
350  *
351  *	Add a protocol handler to the networking stack. The passed &packet_type
352  *	is linked into kernel lists and may not be freed until it has been
353  *	removed from the kernel lists.
354  *
355  *	This call does not sleep therefore it can not
356  *	guarantee all CPU's that are in middle of receiving packets
357  *	will see the new packet type (until the next received packet).
358  */
359 
360 void dev_add_pack(struct packet_type *pt)
361 {
362 	int hash;
363 
364 	spin_lock_bh(&ptype_lock);
365 	if (pt->type == htons(ETH_P_ALL))
366 		list_add_rcu(&pt->list, &ptype_all);
367 	else {
368 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
369 		list_add_rcu(&pt->list, &ptype_base[hash]);
370 	}
371 	spin_unlock_bh(&ptype_lock);
372 }
373 
374 /**
375  *	__dev_remove_pack	 - remove packet handler
376  *	@pt: packet type declaration
377  *
378  *	Remove a protocol handler that was previously added to the kernel
379  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
380  *	from the kernel lists and can be freed or reused once this function
381  *	returns.
382  *
383  *      The packet type might still be in use by receivers
384  *	and must not be freed until after all the CPU's have gone
385  *	through a quiescent state.
386  */
387 void __dev_remove_pack(struct packet_type *pt)
388 {
389 	struct list_head *head;
390 	struct packet_type *pt1;
391 
392 	spin_lock_bh(&ptype_lock);
393 
394 	if (pt->type == htons(ETH_P_ALL))
395 		head = &ptype_all;
396 	else
397 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
398 
399 	list_for_each_entry(pt1, head, list) {
400 		if (pt == pt1) {
401 			list_del_rcu(&pt->list);
402 			goto out;
403 		}
404 	}
405 
406 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
407 out:
408 	spin_unlock_bh(&ptype_lock);
409 }
410 /**
411  *	dev_remove_pack	 - remove packet handler
412  *	@pt: packet type declaration
413  *
414  *	Remove a protocol handler that was previously added to the kernel
415  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
416  *	from the kernel lists and can be freed or reused once this function
417  *	returns.
418  *
419  *	This call sleeps to guarantee that no CPU is looking at the packet
420  *	type after return.
421  */
422 void dev_remove_pack(struct packet_type *pt)
423 {
424 	__dev_remove_pack(pt);
425 
426 	synchronize_net();
427 }
428 
429 /******************************************************************************
430 
431 		      Device Boot-time Settings Routines
432 
433 *******************************************************************************/
434 
435 /* Boot time configuration table */
436 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
437 
438 /**
439  *	netdev_boot_setup_add	- add new setup entry
440  *	@name: name of the device
441  *	@map: configured settings for the device
442  *
443  *	Adds new setup entry to the dev_boot_setup list.  The function
444  *	returns 0 on error and 1 on success.  This is a generic routine to
445  *	all netdevices.
446  */
447 static int netdev_boot_setup_add(char *name, struct ifmap *map)
448 {
449 	struct netdev_boot_setup *s;
450 	int i;
451 
452 	s = dev_boot_setup;
453 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
454 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
455 			memset(s[i].name, 0, sizeof(s[i].name));
456 			strcpy(s[i].name, name);
457 			memcpy(&s[i].map, map, sizeof(s[i].map));
458 			break;
459 		}
460 	}
461 
462 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
463 }
464 
465 /**
466  *	netdev_boot_setup_check	- check boot time settings
467  *	@dev: the netdevice
468  *
469  * 	Check boot time settings for the device.
470  *	The found settings are set for the device to be used
471  *	later in the device probing.
472  *	Returns 0 if no settings found, 1 if they are.
473  */
474 int netdev_boot_setup_check(struct net_device *dev)
475 {
476 	struct netdev_boot_setup *s = dev_boot_setup;
477 	int i;
478 
479 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
480 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
481 		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
482 			dev->irq 	= s[i].map.irq;
483 			dev->base_addr 	= s[i].map.base_addr;
484 			dev->mem_start 	= s[i].map.mem_start;
485 			dev->mem_end 	= s[i].map.mem_end;
486 			return 1;
487 		}
488 	}
489 	return 0;
490 }
491 
492 
493 /**
494  *	netdev_boot_base	- get address from boot time settings
495  *	@prefix: prefix for network device
496  *	@unit: id for network device
497  *
498  * 	Check boot time settings for the base address of device.
499  *	The found settings are set for the device to be used
500  *	later in the device probing.
501  *	Returns 0 if no settings found.
502  */
503 unsigned long netdev_boot_base(const char *prefix, int unit)
504 {
505 	const struct netdev_boot_setup *s = dev_boot_setup;
506 	char name[IFNAMSIZ];
507 	int i;
508 
509 	sprintf(name, "%s%d", prefix, unit);
510 
511 	/*
512 	 * If device already registered then return base of 1
513 	 * to indicate not to probe for this interface
514 	 */
515 	if (__dev_get_by_name(&init_net, name))
516 		return 1;
517 
518 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
519 		if (!strcmp(name, s[i].name))
520 			return s[i].map.base_addr;
521 	return 0;
522 }
523 
524 /*
525  * Saves at boot time configured settings for any netdevice.
526  */
527 int __init netdev_boot_setup(char *str)
528 {
529 	int ints[5];
530 	struct ifmap map;
531 
532 	str = get_options(str, ARRAY_SIZE(ints), ints);
533 	if (!str || !*str)
534 		return 0;
535 
536 	/* Save settings */
537 	memset(&map, 0, sizeof(map));
538 	if (ints[0] > 0)
539 		map.irq = ints[1];
540 	if (ints[0] > 1)
541 		map.base_addr = ints[2];
542 	if (ints[0] > 2)
543 		map.mem_start = ints[3];
544 	if (ints[0] > 3)
545 		map.mem_end = ints[4];
546 
547 	/* Add new entry to the list */
548 	return netdev_boot_setup_add(str, &map);
549 }
550 
551 __setup("netdev=", netdev_boot_setup);
552 
553 /*******************************************************************************
554 
555 			    Device Interface Subroutines
556 
557 *******************************************************************************/
558 
559 /**
560  *	__dev_get_by_name	- find a device by its name
561  *	@net: the applicable net namespace
562  *	@name: name to find
563  *
564  *	Find an interface by name. Must be called under RTNL semaphore
565  *	or @dev_base_lock. If the name is found a pointer to the device
566  *	is returned. If the name is not found then %NULL is returned. The
567  *	reference counters are not incremented so the caller must be
568  *	careful with locks.
569  */
570 
571 struct net_device *__dev_get_by_name(struct net *net, const char *name)
572 {
573 	struct hlist_node *p;
574 
575 	hlist_for_each(p, dev_name_hash(net, name)) {
576 		struct net_device *dev
577 			= hlist_entry(p, struct net_device, name_hlist);
578 		if (!strncmp(dev->name, name, IFNAMSIZ))
579 			return dev;
580 	}
581 	return NULL;
582 }
583 
584 /**
585  *	dev_get_by_name		- find a device by its name
586  *	@net: the applicable net namespace
587  *	@name: name to find
588  *
589  *	Find an interface by name. This can be called from any
590  *	context and does its own locking. The returned handle has
591  *	the usage count incremented and the caller must use dev_put() to
592  *	release it when it is no longer needed. %NULL is returned if no
593  *	matching device is found.
594  */
595 
596 struct net_device *dev_get_by_name(struct net *net, const char *name)
597 {
598 	struct net_device *dev;
599 
600 	read_lock(&dev_base_lock);
601 	dev = __dev_get_by_name(net, name);
602 	if (dev)
603 		dev_hold(dev);
604 	read_unlock(&dev_base_lock);
605 	return dev;
606 }
607 
608 /**
609  *	__dev_get_by_index - find a device by its ifindex
610  *	@net: the applicable net namespace
611  *	@ifindex: index of device
612  *
613  *	Search for an interface by index. Returns %NULL if the device
614  *	is not found or a pointer to the device. The device has not
615  *	had its reference counter increased so the caller must be careful
616  *	about locking. The caller must hold either the RTNL semaphore
617  *	or @dev_base_lock.
618  */
619 
620 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
621 {
622 	struct hlist_node *p;
623 
624 	hlist_for_each(p, dev_index_hash(net, ifindex)) {
625 		struct net_device *dev
626 			= hlist_entry(p, struct net_device, index_hlist);
627 		if (dev->ifindex == ifindex)
628 			return dev;
629 	}
630 	return NULL;
631 }
632 
633 
634 /**
635  *	dev_get_by_index - find a device by its ifindex
636  *	@net: the applicable net namespace
637  *	@ifindex: index of device
638  *
639  *	Search for an interface by index. Returns NULL if the device
640  *	is not found or a pointer to the device. The device returned has
641  *	had a reference added and the pointer is safe until the user calls
642  *	dev_put to indicate they have finished with it.
643  */
644 
645 struct net_device *dev_get_by_index(struct net *net, int ifindex)
646 {
647 	struct net_device *dev;
648 
649 	read_lock(&dev_base_lock);
650 	dev = __dev_get_by_index(net, ifindex);
651 	if (dev)
652 		dev_hold(dev);
653 	read_unlock(&dev_base_lock);
654 	return dev;
655 }
656 
657 /**
658  *	dev_getbyhwaddr - find a device by its hardware address
659  *	@net: the applicable net namespace
660  *	@type: media type of device
661  *	@ha: hardware address
662  *
663  *	Search for an interface by MAC address. Returns NULL if the device
664  *	is not found or a pointer to the device. The caller must hold the
665  *	rtnl semaphore. The returned device has not had its ref count increased
666  *	and the caller must therefore be careful about locking
667  *
668  *	BUGS:
669  *	If the API was consistent this would be __dev_get_by_hwaddr
670  */
671 
672 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
673 {
674 	struct net_device *dev;
675 
676 	ASSERT_RTNL();
677 
678 	for_each_netdev(net, dev)
679 		if (dev->type == type &&
680 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
681 			return dev;
682 
683 	return NULL;
684 }
685 
686 EXPORT_SYMBOL(dev_getbyhwaddr);
687 
688 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
689 {
690 	struct net_device *dev;
691 
692 	ASSERT_RTNL();
693 	for_each_netdev(net, dev)
694 		if (dev->type == type)
695 			return dev;
696 
697 	return NULL;
698 }
699 
700 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
701 
702 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
703 {
704 	struct net_device *dev;
705 
706 	rtnl_lock();
707 	dev = __dev_getfirstbyhwtype(net, type);
708 	if (dev)
709 		dev_hold(dev);
710 	rtnl_unlock();
711 	return dev;
712 }
713 
714 EXPORT_SYMBOL(dev_getfirstbyhwtype);
715 
716 /**
717  *	dev_get_by_flags - find any device with given flags
718  *	@net: the applicable net namespace
719  *	@if_flags: IFF_* values
720  *	@mask: bitmask of bits in if_flags to check
721  *
722  *	Search for any interface with the given flags. Returns NULL if a device
723  *	is not found or a pointer to the device. The device returned has
724  *	had a reference added and the pointer is safe until the user calls
725  *	dev_put to indicate they have finished with it.
726  */
727 
728 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
729 {
730 	struct net_device *dev, *ret;
731 
732 	ret = NULL;
733 	read_lock(&dev_base_lock);
734 	for_each_netdev(net, dev) {
735 		if (((dev->flags ^ if_flags) & mask) == 0) {
736 			dev_hold(dev);
737 			ret = dev;
738 			break;
739 		}
740 	}
741 	read_unlock(&dev_base_lock);
742 	return ret;
743 }
744 
745 /**
746  *	dev_valid_name - check if name is okay for network device
747  *	@name: name string
748  *
749  *	Network device names need to be valid file names to
750  *	to allow sysfs to work.  We also disallow any kind of
751  *	whitespace.
752  */
753 int dev_valid_name(const char *name)
754 {
755 	if (*name == '\0')
756 		return 0;
757 	if (strlen(name) >= IFNAMSIZ)
758 		return 0;
759 	if (!strcmp(name, ".") || !strcmp(name, ".."))
760 		return 0;
761 
762 	while (*name) {
763 		if (*name == '/' || isspace(*name))
764 			return 0;
765 		name++;
766 	}
767 	return 1;
768 }
769 
770 /**
771  *	__dev_alloc_name - allocate a name for a device
772  *	@net: network namespace to allocate the device name in
773  *	@name: name format string
774  *	@buf:  scratch buffer and result name string
775  *
776  *	Passed a format string - eg "lt%d" it will try and find a suitable
777  *	id. It scans list of devices to build up a free map, then chooses
778  *	the first empty slot. The caller must hold the dev_base or rtnl lock
779  *	while allocating the name and adding the device in order to avoid
780  *	duplicates.
781  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
782  *	Returns the number of the unit assigned or a negative errno code.
783  */
784 
785 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
786 {
787 	int i = 0;
788 	const char *p;
789 	const int max_netdevices = 8*PAGE_SIZE;
790 	unsigned long *inuse;
791 	struct net_device *d;
792 
793 	p = strnchr(name, IFNAMSIZ-1, '%');
794 	if (p) {
795 		/*
796 		 * Verify the string as this thing may have come from
797 		 * the user.  There must be either one "%d" and no other "%"
798 		 * characters.
799 		 */
800 		if (p[1] != 'd' || strchr(p + 2, '%'))
801 			return -EINVAL;
802 
803 		/* Use one page as a bit array of possible slots */
804 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
805 		if (!inuse)
806 			return -ENOMEM;
807 
808 		for_each_netdev(net, d) {
809 			if (!sscanf(d->name, name, &i))
810 				continue;
811 			if (i < 0 || i >= max_netdevices)
812 				continue;
813 
814 			/*  avoid cases where sscanf is not exact inverse of printf */
815 			snprintf(buf, IFNAMSIZ, name, i);
816 			if (!strncmp(buf, d->name, IFNAMSIZ))
817 				set_bit(i, inuse);
818 		}
819 
820 		i = find_first_zero_bit(inuse, max_netdevices);
821 		free_page((unsigned long) inuse);
822 	}
823 
824 	snprintf(buf, IFNAMSIZ, name, i);
825 	if (!__dev_get_by_name(net, buf))
826 		return i;
827 
828 	/* It is possible to run out of possible slots
829 	 * when the name is long and there isn't enough space left
830 	 * for the digits, or if all bits are used.
831 	 */
832 	return -ENFILE;
833 }
834 
835 /**
836  *	dev_alloc_name - allocate a name for a device
837  *	@dev: device
838  *	@name: name format string
839  *
840  *	Passed a format string - eg "lt%d" it will try and find a suitable
841  *	id. It scans list of devices to build up a free map, then chooses
842  *	the first empty slot. The caller must hold the dev_base or rtnl lock
843  *	while allocating the name and adding the device in order to avoid
844  *	duplicates.
845  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
846  *	Returns the number of the unit assigned or a negative errno code.
847  */
848 
849 int dev_alloc_name(struct net_device *dev, const char *name)
850 {
851 	char buf[IFNAMSIZ];
852 	struct net *net;
853 	int ret;
854 
855 	BUG_ON(!dev_net(dev));
856 	net = dev_net(dev);
857 	ret = __dev_alloc_name(net, name, buf);
858 	if (ret >= 0)
859 		strlcpy(dev->name, buf, IFNAMSIZ);
860 	return ret;
861 }
862 
863 
864 /**
865  *	dev_change_name - change name of a device
866  *	@dev: device
867  *	@newname: name (or format string) must be at least IFNAMSIZ
868  *
869  *	Change name of a device, can pass format strings "eth%d".
870  *	for wildcarding.
871  */
872 int dev_change_name(struct net_device *dev, char *newname)
873 {
874 	char oldname[IFNAMSIZ];
875 	int err = 0;
876 	int ret;
877 	struct net *net;
878 
879 	ASSERT_RTNL();
880 	BUG_ON(!dev_net(dev));
881 
882 	net = dev_net(dev);
883 	if (dev->flags & IFF_UP)
884 		return -EBUSY;
885 
886 	if (!dev_valid_name(newname))
887 		return -EINVAL;
888 
889 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
890 		return 0;
891 
892 	memcpy(oldname, dev->name, IFNAMSIZ);
893 
894 	if (strchr(newname, '%')) {
895 		err = dev_alloc_name(dev, newname);
896 		if (err < 0)
897 			return err;
898 		strcpy(newname, dev->name);
899 	}
900 	else if (__dev_get_by_name(net, newname))
901 		return -EEXIST;
902 	else
903 		strlcpy(dev->name, newname, IFNAMSIZ);
904 
905 rollback:
906 	device_rename(&dev->dev, dev->name);
907 
908 	write_lock_bh(&dev_base_lock);
909 	hlist_del(&dev->name_hlist);
910 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
911 	write_unlock_bh(&dev_base_lock);
912 
913 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
914 	ret = notifier_to_errno(ret);
915 
916 	if (ret) {
917 		if (err) {
918 			printk(KERN_ERR
919 			       "%s: name change rollback failed: %d.\n",
920 			       dev->name, ret);
921 		} else {
922 			err = ret;
923 			memcpy(dev->name, oldname, IFNAMSIZ);
924 			goto rollback;
925 		}
926 	}
927 
928 	return err;
929 }
930 
931 /**
932  *	netdev_features_change - device changes features
933  *	@dev: device to cause notification
934  *
935  *	Called to indicate a device has changed features.
936  */
937 void netdev_features_change(struct net_device *dev)
938 {
939 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
940 }
941 EXPORT_SYMBOL(netdev_features_change);
942 
943 /**
944  *	netdev_state_change - device changes state
945  *	@dev: device to cause notification
946  *
947  *	Called to indicate a device has changed state. This function calls
948  *	the notifier chains for netdev_chain and sends a NEWLINK message
949  *	to the routing socket.
950  */
951 void netdev_state_change(struct net_device *dev)
952 {
953 	if (dev->flags & IFF_UP) {
954 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
955 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
956 	}
957 }
958 
959 /**
960  *	dev_load 	- load a network module
961  *	@net: the applicable net namespace
962  *	@name: name of interface
963  *
964  *	If a network interface is not present and the process has suitable
965  *	privileges this function loads the module. If module loading is not
966  *	available in this kernel then it becomes a nop.
967  */
968 
969 void dev_load(struct net *net, const char *name)
970 {
971 	struct net_device *dev;
972 
973 	read_lock(&dev_base_lock);
974 	dev = __dev_get_by_name(net, name);
975 	read_unlock(&dev_base_lock);
976 
977 	if (!dev && capable(CAP_SYS_MODULE))
978 		request_module("%s", name);
979 }
980 
981 /**
982  *	dev_open	- prepare an interface for use.
983  *	@dev:	device to open
984  *
985  *	Takes a device from down to up state. The device's private open
986  *	function is invoked and then the multicast lists are loaded. Finally
987  *	the device is moved into the up state and a %NETDEV_UP message is
988  *	sent to the netdev notifier chain.
989  *
990  *	Calling this function on an active interface is a nop. On a failure
991  *	a negative errno code is returned.
992  */
993 int dev_open(struct net_device *dev)
994 {
995 	int ret = 0;
996 
997 	ASSERT_RTNL();
998 
999 	/*
1000 	 *	Is it already up?
1001 	 */
1002 
1003 	if (dev->flags & IFF_UP)
1004 		return 0;
1005 
1006 	/*
1007 	 *	Is it even present?
1008 	 */
1009 	if (!netif_device_present(dev))
1010 		return -ENODEV;
1011 
1012 	/*
1013 	 *	Call device private open method
1014 	 */
1015 	set_bit(__LINK_STATE_START, &dev->state);
1016 
1017 	if (dev->validate_addr)
1018 		ret = dev->validate_addr(dev);
1019 
1020 	if (!ret && dev->open)
1021 		ret = dev->open(dev);
1022 
1023 	/*
1024 	 *	If it went open OK then:
1025 	 */
1026 
1027 	if (ret)
1028 		clear_bit(__LINK_STATE_START, &dev->state);
1029 	else {
1030 		/*
1031 		 *	Set the flags.
1032 		 */
1033 		dev->flags |= IFF_UP;
1034 
1035 		/*
1036 		 *	Initialize multicasting status
1037 		 */
1038 		dev_set_rx_mode(dev);
1039 
1040 		/*
1041 		 *	Wakeup transmit queue engine
1042 		 */
1043 		dev_activate(dev);
1044 
1045 		/*
1046 		 *	... and announce new interface.
1047 		 */
1048 		call_netdevice_notifiers(NETDEV_UP, dev);
1049 	}
1050 
1051 	return ret;
1052 }
1053 
1054 /**
1055  *	dev_close - shutdown an interface.
1056  *	@dev: device to shutdown
1057  *
1058  *	This function moves an active device into down state. A
1059  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1060  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1061  *	chain.
1062  */
1063 int dev_close(struct net_device *dev)
1064 {
1065 	ASSERT_RTNL();
1066 
1067 	might_sleep();
1068 
1069 	if (!(dev->flags & IFF_UP))
1070 		return 0;
1071 
1072 	/*
1073 	 *	Tell people we are going down, so that they can
1074 	 *	prepare to death, when device is still operating.
1075 	 */
1076 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1077 
1078 	clear_bit(__LINK_STATE_START, &dev->state);
1079 
1080 	/* Synchronize to scheduled poll. We cannot touch poll list,
1081 	 * it can be even on different cpu. So just clear netif_running().
1082 	 *
1083 	 * dev->stop() will invoke napi_disable() on all of it's
1084 	 * napi_struct instances on this device.
1085 	 */
1086 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1087 
1088 	dev_deactivate(dev);
1089 
1090 	/*
1091 	 *	Call the device specific close. This cannot fail.
1092 	 *	Only if device is UP
1093 	 *
1094 	 *	We allow it to be called even after a DETACH hot-plug
1095 	 *	event.
1096 	 */
1097 	if (dev->stop)
1098 		dev->stop(dev);
1099 
1100 	/*
1101 	 *	Device is now down.
1102 	 */
1103 
1104 	dev->flags &= ~IFF_UP;
1105 
1106 	/*
1107 	 * Tell people we are down
1108 	 */
1109 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1110 
1111 	return 0;
1112 }
1113 
1114 
1115 static int dev_boot_phase = 1;
1116 
1117 /*
1118  *	Device change register/unregister. These are not inline or static
1119  *	as we export them to the world.
1120  */
1121 
1122 /**
1123  *	register_netdevice_notifier - register a network notifier block
1124  *	@nb: notifier
1125  *
1126  *	Register a notifier to be called when network device events occur.
1127  *	The notifier passed is linked into the kernel structures and must
1128  *	not be reused until it has been unregistered. A negative errno code
1129  *	is returned on a failure.
1130  *
1131  * 	When registered all registration and up events are replayed
1132  *	to the new notifier to allow device to have a race free
1133  *	view of the network device list.
1134  */
1135 
1136 int register_netdevice_notifier(struct notifier_block *nb)
1137 {
1138 	struct net_device *dev;
1139 	struct net_device *last;
1140 	struct net *net;
1141 	int err;
1142 
1143 	rtnl_lock();
1144 	err = raw_notifier_chain_register(&netdev_chain, nb);
1145 	if (err)
1146 		goto unlock;
1147 	if (dev_boot_phase)
1148 		goto unlock;
1149 	for_each_net(net) {
1150 		for_each_netdev(net, dev) {
1151 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1152 			err = notifier_to_errno(err);
1153 			if (err)
1154 				goto rollback;
1155 
1156 			if (!(dev->flags & IFF_UP))
1157 				continue;
1158 
1159 			nb->notifier_call(nb, NETDEV_UP, dev);
1160 		}
1161 	}
1162 
1163 unlock:
1164 	rtnl_unlock();
1165 	return err;
1166 
1167 rollback:
1168 	last = dev;
1169 	for_each_net(net) {
1170 		for_each_netdev(net, dev) {
1171 			if (dev == last)
1172 				break;
1173 
1174 			if (dev->flags & IFF_UP) {
1175 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1176 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1177 			}
1178 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1179 		}
1180 	}
1181 
1182 	raw_notifier_chain_unregister(&netdev_chain, nb);
1183 	goto unlock;
1184 }
1185 
1186 /**
1187  *	unregister_netdevice_notifier - unregister a network notifier block
1188  *	@nb: notifier
1189  *
1190  *	Unregister a notifier previously registered by
1191  *	register_netdevice_notifier(). The notifier is unlinked into the
1192  *	kernel structures and may then be reused. A negative errno code
1193  *	is returned on a failure.
1194  */
1195 
1196 int unregister_netdevice_notifier(struct notifier_block *nb)
1197 {
1198 	int err;
1199 
1200 	rtnl_lock();
1201 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1202 	rtnl_unlock();
1203 	return err;
1204 }
1205 
1206 /**
1207  *	call_netdevice_notifiers - call all network notifier blocks
1208  *      @val: value passed unmodified to notifier function
1209  *      @dev: net_device pointer passed unmodified to notifier function
1210  *
1211  *	Call all network notifier blocks.  Parameters and return value
1212  *	are as for raw_notifier_call_chain().
1213  */
1214 
1215 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1216 {
1217 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1218 }
1219 
1220 /* When > 0 there are consumers of rx skb time stamps */
1221 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1222 
1223 void net_enable_timestamp(void)
1224 {
1225 	atomic_inc(&netstamp_needed);
1226 }
1227 
1228 void net_disable_timestamp(void)
1229 {
1230 	atomic_dec(&netstamp_needed);
1231 }
1232 
1233 static inline void net_timestamp(struct sk_buff *skb)
1234 {
1235 	if (atomic_read(&netstamp_needed))
1236 		__net_timestamp(skb);
1237 	else
1238 		skb->tstamp.tv64 = 0;
1239 }
1240 
1241 /*
1242  *	Support routine. Sends outgoing frames to any network
1243  *	taps currently in use.
1244  */
1245 
1246 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1247 {
1248 	struct packet_type *ptype;
1249 
1250 	net_timestamp(skb);
1251 
1252 	rcu_read_lock();
1253 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1254 		/* Never send packets back to the socket
1255 		 * they originated from - MvS ([email protected])
1256 		 */
1257 		if ((ptype->dev == dev || !ptype->dev) &&
1258 		    (ptype->af_packet_priv == NULL ||
1259 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1260 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1261 			if (!skb2)
1262 				break;
1263 
1264 			/* skb->nh should be correctly
1265 			   set by sender, so that the second statement is
1266 			   just protection against buggy protocols.
1267 			 */
1268 			skb_reset_mac_header(skb2);
1269 
1270 			if (skb_network_header(skb2) < skb2->data ||
1271 			    skb2->network_header > skb2->tail) {
1272 				if (net_ratelimit())
1273 					printk(KERN_CRIT "protocol %04x is "
1274 					       "buggy, dev %s\n",
1275 					       skb2->protocol, dev->name);
1276 				skb_reset_network_header(skb2);
1277 			}
1278 
1279 			skb2->transport_header = skb2->network_header;
1280 			skb2->pkt_type = PACKET_OUTGOING;
1281 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1282 		}
1283 	}
1284 	rcu_read_unlock();
1285 }
1286 
1287 
1288 void __netif_schedule(struct net_device *dev)
1289 {
1290 	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1291 		unsigned long flags;
1292 		struct softnet_data *sd;
1293 
1294 		local_irq_save(flags);
1295 		sd = &__get_cpu_var(softnet_data);
1296 		dev->next_sched = sd->output_queue;
1297 		sd->output_queue = dev;
1298 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1299 		local_irq_restore(flags);
1300 	}
1301 }
1302 EXPORT_SYMBOL(__netif_schedule);
1303 
1304 void dev_kfree_skb_irq(struct sk_buff *skb)
1305 {
1306 	if (atomic_dec_and_test(&skb->users)) {
1307 		struct softnet_data *sd;
1308 		unsigned long flags;
1309 
1310 		local_irq_save(flags);
1311 		sd = &__get_cpu_var(softnet_data);
1312 		skb->next = sd->completion_queue;
1313 		sd->completion_queue = skb;
1314 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1315 		local_irq_restore(flags);
1316 	}
1317 }
1318 EXPORT_SYMBOL(dev_kfree_skb_irq);
1319 
1320 void dev_kfree_skb_any(struct sk_buff *skb)
1321 {
1322 	if (in_irq() || irqs_disabled())
1323 		dev_kfree_skb_irq(skb);
1324 	else
1325 		dev_kfree_skb(skb);
1326 }
1327 EXPORT_SYMBOL(dev_kfree_skb_any);
1328 
1329 
1330 /**
1331  * netif_device_detach - mark device as removed
1332  * @dev: network device
1333  *
1334  * Mark device as removed from system and therefore no longer available.
1335  */
1336 void netif_device_detach(struct net_device *dev)
1337 {
1338 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1339 	    netif_running(dev)) {
1340 		netif_stop_queue(dev);
1341 	}
1342 }
1343 EXPORT_SYMBOL(netif_device_detach);
1344 
1345 /**
1346  * netif_device_attach - mark device as attached
1347  * @dev: network device
1348  *
1349  * Mark device as attached from system and restart if needed.
1350  */
1351 void netif_device_attach(struct net_device *dev)
1352 {
1353 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1354 	    netif_running(dev)) {
1355 		netif_wake_queue(dev);
1356 		__netdev_watchdog_up(dev);
1357 	}
1358 }
1359 EXPORT_SYMBOL(netif_device_attach);
1360 
1361 
1362 /*
1363  * Invalidate hardware checksum when packet is to be mangled, and
1364  * complete checksum manually on outgoing path.
1365  */
1366 int skb_checksum_help(struct sk_buff *skb)
1367 {
1368 	__wsum csum;
1369 	int ret = 0, offset;
1370 
1371 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1372 		goto out_set_summed;
1373 
1374 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1375 		/* Let GSO fix up the checksum. */
1376 		goto out_set_summed;
1377 	}
1378 
1379 	offset = skb->csum_start - skb_headroom(skb);
1380 	BUG_ON(offset >= skb_headlen(skb));
1381 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1382 
1383 	offset += skb->csum_offset;
1384 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1385 
1386 	if (skb_cloned(skb) &&
1387 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1388 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1389 		if (ret)
1390 			goto out;
1391 	}
1392 
1393 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1394 out_set_summed:
1395 	skb->ip_summed = CHECKSUM_NONE;
1396 out:
1397 	return ret;
1398 }
1399 
1400 /**
1401  *	skb_gso_segment - Perform segmentation on skb.
1402  *	@skb: buffer to segment
1403  *	@features: features for the output path (see dev->features)
1404  *
1405  *	This function segments the given skb and returns a list of segments.
1406  *
1407  *	It may return NULL if the skb requires no segmentation.  This is
1408  *	only possible when GSO is used for verifying header integrity.
1409  */
1410 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1411 {
1412 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1413 	struct packet_type *ptype;
1414 	__be16 type = skb->protocol;
1415 	int err;
1416 
1417 	BUG_ON(skb_shinfo(skb)->frag_list);
1418 
1419 	skb_reset_mac_header(skb);
1420 	skb->mac_len = skb->network_header - skb->mac_header;
1421 	__skb_pull(skb, skb->mac_len);
1422 
1423 	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1424 		if (skb_header_cloned(skb) &&
1425 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1426 			return ERR_PTR(err);
1427 	}
1428 
1429 	rcu_read_lock();
1430 	list_for_each_entry_rcu(ptype,
1431 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1432 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1433 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1434 				err = ptype->gso_send_check(skb);
1435 				segs = ERR_PTR(err);
1436 				if (err || skb_gso_ok(skb, features))
1437 					break;
1438 				__skb_push(skb, (skb->data -
1439 						 skb_network_header(skb)));
1440 			}
1441 			segs = ptype->gso_segment(skb, features);
1442 			break;
1443 		}
1444 	}
1445 	rcu_read_unlock();
1446 
1447 	__skb_push(skb, skb->data - skb_mac_header(skb));
1448 
1449 	return segs;
1450 }
1451 
1452 EXPORT_SYMBOL(skb_gso_segment);
1453 
1454 /* Take action when hardware reception checksum errors are detected. */
1455 #ifdef CONFIG_BUG
1456 void netdev_rx_csum_fault(struct net_device *dev)
1457 {
1458 	if (net_ratelimit()) {
1459 		printk(KERN_ERR "%s: hw csum failure.\n",
1460 			dev ? dev->name : "<unknown>");
1461 		dump_stack();
1462 	}
1463 }
1464 EXPORT_SYMBOL(netdev_rx_csum_fault);
1465 #endif
1466 
1467 /* Actually, we should eliminate this check as soon as we know, that:
1468  * 1. IOMMU is present and allows to map all the memory.
1469  * 2. No high memory really exists on this machine.
1470  */
1471 
1472 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1473 {
1474 #ifdef CONFIG_HIGHMEM
1475 	int i;
1476 
1477 	if (dev->features & NETIF_F_HIGHDMA)
1478 		return 0;
1479 
1480 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1481 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1482 			return 1;
1483 
1484 #endif
1485 	return 0;
1486 }
1487 
1488 struct dev_gso_cb {
1489 	void (*destructor)(struct sk_buff *skb);
1490 };
1491 
1492 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1493 
1494 static void dev_gso_skb_destructor(struct sk_buff *skb)
1495 {
1496 	struct dev_gso_cb *cb;
1497 
1498 	do {
1499 		struct sk_buff *nskb = skb->next;
1500 
1501 		skb->next = nskb->next;
1502 		nskb->next = NULL;
1503 		kfree_skb(nskb);
1504 	} while (skb->next);
1505 
1506 	cb = DEV_GSO_CB(skb);
1507 	if (cb->destructor)
1508 		cb->destructor(skb);
1509 }
1510 
1511 /**
1512  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1513  *	@skb: buffer to segment
1514  *
1515  *	This function segments the given skb and stores the list of segments
1516  *	in skb->next.
1517  */
1518 static int dev_gso_segment(struct sk_buff *skb)
1519 {
1520 	struct net_device *dev = skb->dev;
1521 	struct sk_buff *segs;
1522 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1523 					 NETIF_F_SG : 0);
1524 
1525 	segs = skb_gso_segment(skb, features);
1526 
1527 	/* Verifying header integrity only. */
1528 	if (!segs)
1529 		return 0;
1530 
1531 	if (IS_ERR(segs))
1532 		return PTR_ERR(segs);
1533 
1534 	skb->next = segs;
1535 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1536 	skb->destructor = dev_gso_skb_destructor;
1537 
1538 	return 0;
1539 }
1540 
1541 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1542 {
1543 	if (likely(!skb->next)) {
1544 		if (!list_empty(&ptype_all))
1545 			dev_queue_xmit_nit(skb, dev);
1546 
1547 		if (netif_needs_gso(dev, skb)) {
1548 			if (unlikely(dev_gso_segment(skb)))
1549 				goto out_kfree_skb;
1550 			if (skb->next)
1551 				goto gso;
1552 		}
1553 
1554 		return dev->hard_start_xmit(skb, dev);
1555 	}
1556 
1557 gso:
1558 	do {
1559 		struct sk_buff *nskb = skb->next;
1560 		int rc;
1561 
1562 		skb->next = nskb->next;
1563 		nskb->next = NULL;
1564 		rc = dev->hard_start_xmit(nskb, dev);
1565 		if (unlikely(rc)) {
1566 			nskb->next = skb->next;
1567 			skb->next = nskb;
1568 			return rc;
1569 		}
1570 		if (unlikely((netif_queue_stopped(dev) ||
1571 			     netif_subqueue_stopped(dev, skb)) &&
1572 			     skb->next))
1573 			return NETDEV_TX_BUSY;
1574 	} while (skb->next);
1575 
1576 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1577 
1578 out_kfree_skb:
1579 	kfree_skb(skb);
1580 	return 0;
1581 }
1582 
1583 /**
1584  *	dev_queue_xmit - transmit a buffer
1585  *	@skb: buffer to transmit
1586  *
1587  *	Queue a buffer for transmission to a network device. The caller must
1588  *	have set the device and priority and built the buffer before calling
1589  *	this function. The function can be called from an interrupt.
1590  *
1591  *	A negative errno code is returned on a failure. A success does not
1592  *	guarantee the frame will be transmitted as it may be dropped due
1593  *	to congestion or traffic shaping.
1594  *
1595  * -----------------------------------------------------------------------------------
1596  *      I notice this method can also return errors from the queue disciplines,
1597  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1598  *      be positive.
1599  *
1600  *      Regardless of the return value, the skb is consumed, so it is currently
1601  *      difficult to retry a send to this method.  (You can bump the ref count
1602  *      before sending to hold a reference for retry if you are careful.)
1603  *
1604  *      When calling this method, interrupts MUST be enabled.  This is because
1605  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1606  *          --BLG
1607  */
1608 
1609 int dev_queue_xmit(struct sk_buff *skb)
1610 {
1611 	struct net_device *dev = skb->dev;
1612 	struct Qdisc *q;
1613 	int rc = -ENOMEM;
1614 
1615 	/* GSO will handle the following emulations directly. */
1616 	if (netif_needs_gso(dev, skb))
1617 		goto gso;
1618 
1619 	if (skb_shinfo(skb)->frag_list &&
1620 	    !(dev->features & NETIF_F_FRAGLIST) &&
1621 	    __skb_linearize(skb))
1622 		goto out_kfree_skb;
1623 
1624 	/* Fragmented skb is linearized if device does not support SG,
1625 	 * or if at least one of fragments is in highmem and device
1626 	 * does not support DMA from it.
1627 	 */
1628 	if (skb_shinfo(skb)->nr_frags &&
1629 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1630 	    __skb_linearize(skb))
1631 		goto out_kfree_skb;
1632 
1633 	/* If packet is not checksummed and device does not support
1634 	 * checksumming for this protocol, complete checksumming here.
1635 	 */
1636 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1637 		skb_set_transport_header(skb, skb->csum_start -
1638 					      skb_headroom(skb));
1639 
1640 		if (!(dev->features & NETIF_F_GEN_CSUM) &&
1641 		    !((dev->features & NETIF_F_IP_CSUM) &&
1642 		      skb->protocol == htons(ETH_P_IP)) &&
1643 		    !((dev->features & NETIF_F_IPV6_CSUM) &&
1644 		      skb->protocol == htons(ETH_P_IPV6)))
1645 			if (skb_checksum_help(skb))
1646 				goto out_kfree_skb;
1647 	}
1648 
1649 gso:
1650 	spin_lock_prefetch(&dev->queue_lock);
1651 
1652 	/* Disable soft irqs for various locks below. Also
1653 	 * stops preemption for RCU.
1654 	 */
1655 	rcu_read_lock_bh();
1656 
1657 	/* Updates of qdisc are serialized by queue_lock.
1658 	 * The struct Qdisc which is pointed to by qdisc is now a
1659 	 * rcu structure - it may be accessed without acquiring
1660 	 * a lock (but the structure may be stale.) The freeing of the
1661 	 * qdisc will be deferred until it's known that there are no
1662 	 * more references to it.
1663 	 *
1664 	 * If the qdisc has an enqueue function, we still need to
1665 	 * hold the queue_lock before calling it, since queue_lock
1666 	 * also serializes access to the device queue.
1667 	 */
1668 
1669 	q = rcu_dereference(dev->qdisc);
1670 #ifdef CONFIG_NET_CLS_ACT
1671 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1672 #endif
1673 	if (q->enqueue) {
1674 		/* Grab device queue */
1675 		spin_lock(&dev->queue_lock);
1676 		q = dev->qdisc;
1677 		if (q->enqueue) {
1678 			/* reset queue_mapping to zero */
1679 			skb_set_queue_mapping(skb, 0);
1680 			rc = q->enqueue(skb, q);
1681 			qdisc_run(dev);
1682 			spin_unlock(&dev->queue_lock);
1683 
1684 			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1685 			goto out;
1686 		}
1687 		spin_unlock(&dev->queue_lock);
1688 	}
1689 
1690 	/* The device has no queue. Common case for software devices:
1691 	   loopback, all the sorts of tunnels...
1692 
1693 	   Really, it is unlikely that netif_tx_lock protection is necessary
1694 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1695 	   counters.)
1696 	   However, it is possible, that they rely on protection
1697 	   made by us here.
1698 
1699 	   Check this and shot the lock. It is not prone from deadlocks.
1700 	   Either shot noqueue qdisc, it is even simpler 8)
1701 	 */
1702 	if (dev->flags & IFF_UP) {
1703 		int cpu = smp_processor_id(); /* ok because BHs are off */
1704 
1705 		if (dev->xmit_lock_owner != cpu) {
1706 
1707 			HARD_TX_LOCK(dev, cpu);
1708 
1709 			if (!netif_queue_stopped(dev) &&
1710 			    !netif_subqueue_stopped(dev, skb)) {
1711 				rc = 0;
1712 				if (!dev_hard_start_xmit(skb, dev)) {
1713 					HARD_TX_UNLOCK(dev);
1714 					goto out;
1715 				}
1716 			}
1717 			HARD_TX_UNLOCK(dev);
1718 			if (net_ratelimit())
1719 				printk(KERN_CRIT "Virtual device %s asks to "
1720 				       "queue packet!\n", dev->name);
1721 		} else {
1722 			/* Recursion is detected! It is possible,
1723 			 * unfortunately */
1724 			if (net_ratelimit())
1725 				printk(KERN_CRIT "Dead loop on virtual device "
1726 				       "%s, fix it urgently!\n", dev->name);
1727 		}
1728 	}
1729 
1730 	rc = -ENETDOWN;
1731 	rcu_read_unlock_bh();
1732 
1733 out_kfree_skb:
1734 	kfree_skb(skb);
1735 	return rc;
1736 out:
1737 	rcu_read_unlock_bh();
1738 	return rc;
1739 }
1740 
1741 
1742 /*=======================================================================
1743 			Receiver routines
1744   =======================================================================*/
1745 
1746 int netdev_max_backlog __read_mostly = 1000;
1747 int netdev_budget __read_mostly = 300;
1748 int weight_p __read_mostly = 64;            /* old backlog weight */
1749 
1750 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1751 
1752 
1753 /**
1754  *	netif_rx	-	post buffer to the network code
1755  *	@skb: buffer to post
1756  *
1757  *	This function receives a packet from a device driver and queues it for
1758  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1759  *	may be dropped during processing for congestion control or by the
1760  *	protocol layers.
1761  *
1762  *	return values:
1763  *	NET_RX_SUCCESS	(no congestion)
1764  *	NET_RX_DROP     (packet was dropped)
1765  *
1766  */
1767 
1768 int netif_rx(struct sk_buff *skb)
1769 {
1770 	struct softnet_data *queue;
1771 	unsigned long flags;
1772 
1773 	/* if netpoll wants it, pretend we never saw it */
1774 	if (netpoll_rx(skb))
1775 		return NET_RX_DROP;
1776 
1777 	if (!skb->tstamp.tv64)
1778 		net_timestamp(skb);
1779 
1780 	/*
1781 	 * The code is rearranged so that the path is the most
1782 	 * short when CPU is congested, but is still operating.
1783 	 */
1784 	local_irq_save(flags);
1785 	queue = &__get_cpu_var(softnet_data);
1786 
1787 	__get_cpu_var(netdev_rx_stat).total++;
1788 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1789 		if (queue->input_pkt_queue.qlen) {
1790 enqueue:
1791 			dev_hold(skb->dev);
1792 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1793 			local_irq_restore(flags);
1794 			return NET_RX_SUCCESS;
1795 		}
1796 
1797 		napi_schedule(&queue->backlog);
1798 		goto enqueue;
1799 	}
1800 
1801 	__get_cpu_var(netdev_rx_stat).dropped++;
1802 	local_irq_restore(flags);
1803 
1804 	kfree_skb(skb);
1805 	return NET_RX_DROP;
1806 }
1807 
1808 int netif_rx_ni(struct sk_buff *skb)
1809 {
1810 	int err;
1811 
1812 	preempt_disable();
1813 	err = netif_rx(skb);
1814 	if (local_softirq_pending())
1815 		do_softirq();
1816 	preempt_enable();
1817 
1818 	return err;
1819 }
1820 
1821 EXPORT_SYMBOL(netif_rx_ni);
1822 
1823 static inline struct net_device *skb_bond(struct sk_buff *skb)
1824 {
1825 	struct net_device *dev = skb->dev;
1826 
1827 	if (dev->master) {
1828 		if (skb_bond_should_drop(skb)) {
1829 			kfree_skb(skb);
1830 			return NULL;
1831 		}
1832 		skb->dev = dev->master;
1833 	}
1834 
1835 	return dev;
1836 }
1837 
1838 
1839 static void net_tx_action(struct softirq_action *h)
1840 {
1841 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1842 
1843 	if (sd->completion_queue) {
1844 		struct sk_buff *clist;
1845 
1846 		local_irq_disable();
1847 		clist = sd->completion_queue;
1848 		sd->completion_queue = NULL;
1849 		local_irq_enable();
1850 
1851 		while (clist) {
1852 			struct sk_buff *skb = clist;
1853 			clist = clist->next;
1854 
1855 			BUG_TRAP(!atomic_read(&skb->users));
1856 			__kfree_skb(skb);
1857 		}
1858 	}
1859 
1860 	if (sd->output_queue) {
1861 		struct net_device *head;
1862 
1863 		local_irq_disable();
1864 		head = sd->output_queue;
1865 		sd->output_queue = NULL;
1866 		local_irq_enable();
1867 
1868 		while (head) {
1869 			struct net_device *dev = head;
1870 			head = head->next_sched;
1871 
1872 			smp_mb__before_clear_bit();
1873 			clear_bit(__LINK_STATE_SCHED, &dev->state);
1874 
1875 			if (spin_trylock(&dev->queue_lock)) {
1876 				qdisc_run(dev);
1877 				spin_unlock(&dev->queue_lock);
1878 			} else {
1879 				netif_schedule(dev);
1880 			}
1881 		}
1882 	}
1883 }
1884 
1885 static inline int deliver_skb(struct sk_buff *skb,
1886 			      struct packet_type *pt_prev,
1887 			      struct net_device *orig_dev)
1888 {
1889 	atomic_inc(&skb->users);
1890 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1891 }
1892 
1893 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1894 /* These hooks defined here for ATM */
1895 struct net_bridge;
1896 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1897 						unsigned char *addr);
1898 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1899 
1900 /*
1901  * If bridge module is loaded call bridging hook.
1902  *  returns NULL if packet was consumed.
1903  */
1904 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1905 					struct sk_buff *skb) __read_mostly;
1906 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1907 					    struct packet_type **pt_prev, int *ret,
1908 					    struct net_device *orig_dev)
1909 {
1910 	struct net_bridge_port *port;
1911 
1912 	if (skb->pkt_type == PACKET_LOOPBACK ||
1913 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
1914 		return skb;
1915 
1916 	if (*pt_prev) {
1917 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
1918 		*pt_prev = NULL;
1919 	}
1920 
1921 	return br_handle_frame_hook(port, skb);
1922 }
1923 #else
1924 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
1925 #endif
1926 
1927 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
1928 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
1929 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
1930 
1931 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1932 					     struct packet_type **pt_prev,
1933 					     int *ret,
1934 					     struct net_device *orig_dev)
1935 {
1936 	if (skb->dev->macvlan_port == NULL)
1937 		return skb;
1938 
1939 	if (*pt_prev) {
1940 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
1941 		*pt_prev = NULL;
1942 	}
1943 	return macvlan_handle_frame_hook(skb);
1944 }
1945 #else
1946 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
1947 #endif
1948 
1949 #ifdef CONFIG_NET_CLS_ACT
1950 /* TODO: Maybe we should just force sch_ingress to be compiled in
1951  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1952  * a compare and 2 stores extra right now if we dont have it on
1953  * but have CONFIG_NET_CLS_ACT
1954  * NOTE: This doesnt stop any functionality; if you dont have
1955  * the ingress scheduler, you just cant add policies on ingress.
1956  *
1957  */
1958 static int ing_filter(struct sk_buff *skb)
1959 {
1960 	struct Qdisc *q;
1961 	struct net_device *dev = skb->dev;
1962 	int result = TC_ACT_OK;
1963 	u32 ttl = G_TC_RTTL(skb->tc_verd);
1964 
1965 	if (MAX_RED_LOOP < ttl++) {
1966 		printk(KERN_WARNING
1967 		       "Redir loop detected Dropping packet (%d->%d)\n",
1968 		       skb->iif, dev->ifindex);
1969 		return TC_ACT_SHOT;
1970 	}
1971 
1972 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
1973 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
1974 
1975 	spin_lock(&dev->ingress_lock);
1976 	if ((q = dev->qdisc_ingress) != NULL)
1977 		result = q->enqueue(skb, q);
1978 	spin_unlock(&dev->ingress_lock);
1979 
1980 	return result;
1981 }
1982 
1983 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
1984 					 struct packet_type **pt_prev,
1985 					 int *ret, struct net_device *orig_dev)
1986 {
1987 	if (!skb->dev->qdisc_ingress)
1988 		goto out;
1989 
1990 	if (*pt_prev) {
1991 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
1992 		*pt_prev = NULL;
1993 	} else {
1994 		/* Huh? Why does turning on AF_PACKET affect this? */
1995 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1996 	}
1997 
1998 	switch (ing_filter(skb)) {
1999 	case TC_ACT_SHOT:
2000 	case TC_ACT_STOLEN:
2001 		kfree_skb(skb);
2002 		return NULL;
2003 	}
2004 
2005 out:
2006 	skb->tc_verd = 0;
2007 	return skb;
2008 }
2009 #endif
2010 
2011 /**
2012  *	netif_receive_skb - process receive buffer from network
2013  *	@skb: buffer to process
2014  *
2015  *	netif_receive_skb() is the main receive data processing function.
2016  *	It always succeeds. The buffer may be dropped during processing
2017  *	for congestion control or by the protocol layers.
2018  *
2019  *	This function may only be called from softirq context and interrupts
2020  *	should be enabled.
2021  *
2022  *	Return values (usually ignored):
2023  *	NET_RX_SUCCESS: no congestion
2024  *	NET_RX_DROP: packet was dropped
2025  */
2026 int netif_receive_skb(struct sk_buff *skb)
2027 {
2028 	struct packet_type *ptype, *pt_prev;
2029 	struct net_device *orig_dev;
2030 	int ret = NET_RX_DROP;
2031 	__be16 type;
2032 
2033 	/* if we've gotten here through NAPI, check netpoll */
2034 	if (netpoll_receive_skb(skb))
2035 		return NET_RX_DROP;
2036 
2037 	if (!skb->tstamp.tv64)
2038 		net_timestamp(skb);
2039 
2040 	if (!skb->iif)
2041 		skb->iif = skb->dev->ifindex;
2042 
2043 	orig_dev = skb_bond(skb);
2044 
2045 	if (!orig_dev)
2046 		return NET_RX_DROP;
2047 
2048 	__get_cpu_var(netdev_rx_stat).total++;
2049 
2050 	skb_reset_network_header(skb);
2051 	skb_reset_transport_header(skb);
2052 	skb->mac_len = skb->network_header - skb->mac_header;
2053 
2054 	pt_prev = NULL;
2055 
2056 	rcu_read_lock();
2057 
2058 #ifdef CONFIG_NET_CLS_ACT
2059 	if (skb->tc_verd & TC_NCLS) {
2060 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2061 		goto ncls;
2062 	}
2063 #endif
2064 
2065 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2066 		if (!ptype->dev || ptype->dev == skb->dev) {
2067 			if (pt_prev)
2068 				ret = deliver_skb(skb, pt_prev, orig_dev);
2069 			pt_prev = ptype;
2070 		}
2071 	}
2072 
2073 #ifdef CONFIG_NET_CLS_ACT
2074 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2075 	if (!skb)
2076 		goto out;
2077 ncls:
2078 #endif
2079 
2080 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2081 	if (!skb)
2082 		goto out;
2083 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2084 	if (!skb)
2085 		goto out;
2086 
2087 	type = skb->protocol;
2088 	list_for_each_entry_rcu(ptype,
2089 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2090 		if (ptype->type == type &&
2091 		    (!ptype->dev || ptype->dev == skb->dev)) {
2092 			if (pt_prev)
2093 				ret = deliver_skb(skb, pt_prev, orig_dev);
2094 			pt_prev = ptype;
2095 		}
2096 	}
2097 
2098 	if (pt_prev) {
2099 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2100 	} else {
2101 		kfree_skb(skb);
2102 		/* Jamal, now you will not able to escape explaining
2103 		 * me how you were going to use this. :-)
2104 		 */
2105 		ret = NET_RX_DROP;
2106 	}
2107 
2108 out:
2109 	rcu_read_unlock();
2110 	return ret;
2111 }
2112 
2113 static int process_backlog(struct napi_struct *napi, int quota)
2114 {
2115 	int work = 0;
2116 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2117 	unsigned long start_time = jiffies;
2118 
2119 	napi->weight = weight_p;
2120 	do {
2121 		struct sk_buff *skb;
2122 		struct net_device *dev;
2123 
2124 		local_irq_disable();
2125 		skb = __skb_dequeue(&queue->input_pkt_queue);
2126 		if (!skb) {
2127 			__napi_complete(napi);
2128 			local_irq_enable();
2129 			break;
2130 		}
2131 
2132 		local_irq_enable();
2133 
2134 		dev = skb->dev;
2135 
2136 		netif_receive_skb(skb);
2137 
2138 		dev_put(dev);
2139 	} while (++work < quota && jiffies == start_time);
2140 
2141 	return work;
2142 }
2143 
2144 /**
2145  * __napi_schedule - schedule for receive
2146  * @n: entry to schedule
2147  *
2148  * The entry's receive function will be scheduled to run
2149  */
2150 void __napi_schedule(struct napi_struct *n)
2151 {
2152 	unsigned long flags;
2153 
2154 	local_irq_save(flags);
2155 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2156 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2157 	local_irq_restore(flags);
2158 }
2159 EXPORT_SYMBOL(__napi_schedule);
2160 
2161 
2162 static void net_rx_action(struct softirq_action *h)
2163 {
2164 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2165 	unsigned long start_time = jiffies;
2166 	int budget = netdev_budget;
2167 	void *have;
2168 
2169 	local_irq_disable();
2170 
2171 	while (!list_empty(list)) {
2172 		struct napi_struct *n;
2173 		int work, weight;
2174 
2175 		/* If softirq window is exhuasted then punt.
2176 		 *
2177 		 * Note that this is a slight policy change from the
2178 		 * previous NAPI code, which would allow up to 2
2179 		 * jiffies to pass before breaking out.  The test
2180 		 * used to be "jiffies - start_time > 1".
2181 		 */
2182 		if (unlikely(budget <= 0 || jiffies != start_time))
2183 			goto softnet_break;
2184 
2185 		local_irq_enable();
2186 
2187 		/* Even though interrupts have been re-enabled, this
2188 		 * access is safe because interrupts can only add new
2189 		 * entries to the tail of this list, and only ->poll()
2190 		 * calls can remove this head entry from the list.
2191 		 */
2192 		n = list_entry(list->next, struct napi_struct, poll_list);
2193 
2194 		have = netpoll_poll_lock(n);
2195 
2196 		weight = n->weight;
2197 
2198 		/* This NAPI_STATE_SCHED test is for avoiding a race
2199 		 * with netpoll's poll_napi().  Only the entity which
2200 		 * obtains the lock and sees NAPI_STATE_SCHED set will
2201 		 * actually make the ->poll() call.  Therefore we avoid
2202 		 * accidently calling ->poll() when NAPI is not scheduled.
2203 		 */
2204 		work = 0;
2205 		if (test_bit(NAPI_STATE_SCHED, &n->state))
2206 			work = n->poll(n, weight);
2207 
2208 		WARN_ON_ONCE(work > weight);
2209 
2210 		budget -= work;
2211 
2212 		local_irq_disable();
2213 
2214 		/* Drivers must not modify the NAPI state if they
2215 		 * consume the entire weight.  In such cases this code
2216 		 * still "owns" the NAPI instance and therefore can
2217 		 * move the instance around on the list at-will.
2218 		 */
2219 		if (unlikely(work == weight)) {
2220 			if (unlikely(napi_disable_pending(n)))
2221 				__napi_complete(n);
2222 			else
2223 				list_move_tail(&n->poll_list, list);
2224 		}
2225 
2226 		netpoll_poll_unlock(have);
2227 	}
2228 out:
2229 	local_irq_enable();
2230 
2231 #ifdef CONFIG_NET_DMA
2232 	/*
2233 	 * There may not be any more sk_buffs coming right now, so push
2234 	 * any pending DMA copies to hardware
2235 	 */
2236 	if (!cpus_empty(net_dma.channel_mask)) {
2237 		int chan_idx;
2238 		for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2239 			struct dma_chan *chan = net_dma.channels[chan_idx];
2240 			if (chan)
2241 				dma_async_memcpy_issue_pending(chan);
2242 		}
2243 	}
2244 #endif
2245 
2246 	return;
2247 
2248 softnet_break:
2249 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2250 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2251 	goto out;
2252 }
2253 
2254 static gifconf_func_t * gifconf_list [NPROTO];
2255 
2256 /**
2257  *	register_gifconf	-	register a SIOCGIF handler
2258  *	@family: Address family
2259  *	@gifconf: Function handler
2260  *
2261  *	Register protocol dependent address dumping routines. The handler
2262  *	that is passed must not be freed or reused until it has been replaced
2263  *	by another handler.
2264  */
2265 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2266 {
2267 	if (family >= NPROTO)
2268 		return -EINVAL;
2269 	gifconf_list[family] = gifconf;
2270 	return 0;
2271 }
2272 
2273 
2274 /*
2275  *	Map an interface index to its name (SIOCGIFNAME)
2276  */
2277 
2278 /*
2279  *	We need this ioctl for efficient implementation of the
2280  *	if_indextoname() function required by the IPv6 API.  Without
2281  *	it, we would have to search all the interfaces to find a
2282  *	match.  --pb
2283  */
2284 
2285 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2286 {
2287 	struct net_device *dev;
2288 	struct ifreq ifr;
2289 
2290 	/*
2291 	 *	Fetch the caller's info block.
2292 	 */
2293 
2294 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2295 		return -EFAULT;
2296 
2297 	read_lock(&dev_base_lock);
2298 	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2299 	if (!dev) {
2300 		read_unlock(&dev_base_lock);
2301 		return -ENODEV;
2302 	}
2303 
2304 	strcpy(ifr.ifr_name, dev->name);
2305 	read_unlock(&dev_base_lock);
2306 
2307 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2308 		return -EFAULT;
2309 	return 0;
2310 }
2311 
2312 /*
2313  *	Perform a SIOCGIFCONF call. This structure will change
2314  *	size eventually, and there is nothing I can do about it.
2315  *	Thus we will need a 'compatibility mode'.
2316  */
2317 
2318 static int dev_ifconf(struct net *net, char __user *arg)
2319 {
2320 	struct ifconf ifc;
2321 	struct net_device *dev;
2322 	char __user *pos;
2323 	int len;
2324 	int total;
2325 	int i;
2326 
2327 	/*
2328 	 *	Fetch the caller's info block.
2329 	 */
2330 
2331 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2332 		return -EFAULT;
2333 
2334 	pos = ifc.ifc_buf;
2335 	len = ifc.ifc_len;
2336 
2337 	/*
2338 	 *	Loop over the interfaces, and write an info block for each.
2339 	 */
2340 
2341 	total = 0;
2342 	for_each_netdev(net, dev) {
2343 		for (i = 0; i < NPROTO; i++) {
2344 			if (gifconf_list[i]) {
2345 				int done;
2346 				if (!pos)
2347 					done = gifconf_list[i](dev, NULL, 0);
2348 				else
2349 					done = gifconf_list[i](dev, pos + total,
2350 							       len - total);
2351 				if (done < 0)
2352 					return -EFAULT;
2353 				total += done;
2354 			}
2355 		}
2356 	}
2357 
2358 	/*
2359 	 *	All done.  Write the updated control block back to the caller.
2360 	 */
2361 	ifc.ifc_len = total;
2362 
2363 	/*
2364 	 * 	Both BSD and Solaris return 0 here, so we do too.
2365 	 */
2366 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2367 }
2368 
2369 #ifdef CONFIG_PROC_FS
2370 /*
2371  *	This is invoked by the /proc filesystem handler to display a device
2372  *	in detail.
2373  */
2374 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2375 	__acquires(dev_base_lock)
2376 {
2377 	struct net *net = seq_file_net(seq);
2378 	loff_t off;
2379 	struct net_device *dev;
2380 
2381 	read_lock(&dev_base_lock);
2382 	if (!*pos)
2383 		return SEQ_START_TOKEN;
2384 
2385 	off = 1;
2386 	for_each_netdev(net, dev)
2387 		if (off++ == *pos)
2388 			return dev;
2389 
2390 	return NULL;
2391 }
2392 
2393 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2394 {
2395 	struct net *net = seq_file_net(seq);
2396 	++*pos;
2397 	return v == SEQ_START_TOKEN ?
2398 		first_net_device(net) : next_net_device((struct net_device *)v);
2399 }
2400 
2401 void dev_seq_stop(struct seq_file *seq, void *v)
2402 	__releases(dev_base_lock)
2403 {
2404 	read_unlock(&dev_base_lock);
2405 }
2406 
2407 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2408 {
2409 	struct net_device_stats *stats = dev->get_stats(dev);
2410 
2411 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2412 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2413 		   dev->name, stats->rx_bytes, stats->rx_packets,
2414 		   stats->rx_errors,
2415 		   stats->rx_dropped + stats->rx_missed_errors,
2416 		   stats->rx_fifo_errors,
2417 		   stats->rx_length_errors + stats->rx_over_errors +
2418 		    stats->rx_crc_errors + stats->rx_frame_errors,
2419 		   stats->rx_compressed, stats->multicast,
2420 		   stats->tx_bytes, stats->tx_packets,
2421 		   stats->tx_errors, stats->tx_dropped,
2422 		   stats->tx_fifo_errors, stats->collisions,
2423 		   stats->tx_carrier_errors +
2424 		    stats->tx_aborted_errors +
2425 		    stats->tx_window_errors +
2426 		    stats->tx_heartbeat_errors,
2427 		   stats->tx_compressed);
2428 }
2429 
2430 /*
2431  *	Called from the PROCfs module. This now uses the new arbitrary sized
2432  *	/proc/net interface to create /proc/net/dev
2433  */
2434 static int dev_seq_show(struct seq_file *seq, void *v)
2435 {
2436 	if (v == SEQ_START_TOKEN)
2437 		seq_puts(seq, "Inter-|   Receive                            "
2438 			      "                    |  Transmit\n"
2439 			      " face |bytes    packets errs drop fifo frame "
2440 			      "compressed multicast|bytes    packets errs "
2441 			      "drop fifo colls carrier compressed\n");
2442 	else
2443 		dev_seq_printf_stats(seq, v);
2444 	return 0;
2445 }
2446 
2447 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2448 {
2449 	struct netif_rx_stats *rc = NULL;
2450 
2451 	while (*pos < nr_cpu_ids)
2452 		if (cpu_online(*pos)) {
2453 			rc = &per_cpu(netdev_rx_stat, *pos);
2454 			break;
2455 		} else
2456 			++*pos;
2457 	return rc;
2458 }
2459 
2460 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2461 {
2462 	return softnet_get_online(pos);
2463 }
2464 
2465 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2466 {
2467 	++*pos;
2468 	return softnet_get_online(pos);
2469 }
2470 
2471 static void softnet_seq_stop(struct seq_file *seq, void *v)
2472 {
2473 }
2474 
2475 static int softnet_seq_show(struct seq_file *seq, void *v)
2476 {
2477 	struct netif_rx_stats *s = v;
2478 
2479 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2480 		   s->total, s->dropped, s->time_squeeze, 0,
2481 		   0, 0, 0, 0, /* was fastroute */
2482 		   s->cpu_collision );
2483 	return 0;
2484 }
2485 
2486 static const struct seq_operations dev_seq_ops = {
2487 	.start = dev_seq_start,
2488 	.next  = dev_seq_next,
2489 	.stop  = dev_seq_stop,
2490 	.show  = dev_seq_show,
2491 };
2492 
2493 static int dev_seq_open(struct inode *inode, struct file *file)
2494 {
2495 	return seq_open_net(inode, file, &dev_seq_ops,
2496 			    sizeof(struct seq_net_private));
2497 }
2498 
2499 static const struct file_operations dev_seq_fops = {
2500 	.owner	 = THIS_MODULE,
2501 	.open    = dev_seq_open,
2502 	.read    = seq_read,
2503 	.llseek  = seq_lseek,
2504 	.release = seq_release_net,
2505 };
2506 
2507 static const struct seq_operations softnet_seq_ops = {
2508 	.start = softnet_seq_start,
2509 	.next  = softnet_seq_next,
2510 	.stop  = softnet_seq_stop,
2511 	.show  = softnet_seq_show,
2512 };
2513 
2514 static int softnet_seq_open(struct inode *inode, struct file *file)
2515 {
2516 	return seq_open(file, &softnet_seq_ops);
2517 }
2518 
2519 static const struct file_operations softnet_seq_fops = {
2520 	.owner	 = THIS_MODULE,
2521 	.open    = softnet_seq_open,
2522 	.read    = seq_read,
2523 	.llseek  = seq_lseek,
2524 	.release = seq_release,
2525 };
2526 
2527 static void *ptype_get_idx(loff_t pos)
2528 {
2529 	struct packet_type *pt = NULL;
2530 	loff_t i = 0;
2531 	int t;
2532 
2533 	list_for_each_entry_rcu(pt, &ptype_all, list) {
2534 		if (i == pos)
2535 			return pt;
2536 		++i;
2537 	}
2538 
2539 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2540 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2541 			if (i == pos)
2542 				return pt;
2543 			++i;
2544 		}
2545 	}
2546 	return NULL;
2547 }
2548 
2549 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2550 	__acquires(RCU)
2551 {
2552 	rcu_read_lock();
2553 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2554 }
2555 
2556 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2557 {
2558 	struct packet_type *pt;
2559 	struct list_head *nxt;
2560 	int hash;
2561 
2562 	++*pos;
2563 	if (v == SEQ_START_TOKEN)
2564 		return ptype_get_idx(0);
2565 
2566 	pt = v;
2567 	nxt = pt->list.next;
2568 	if (pt->type == htons(ETH_P_ALL)) {
2569 		if (nxt != &ptype_all)
2570 			goto found;
2571 		hash = 0;
2572 		nxt = ptype_base[0].next;
2573 	} else
2574 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2575 
2576 	while (nxt == &ptype_base[hash]) {
2577 		if (++hash >= PTYPE_HASH_SIZE)
2578 			return NULL;
2579 		nxt = ptype_base[hash].next;
2580 	}
2581 found:
2582 	return list_entry(nxt, struct packet_type, list);
2583 }
2584 
2585 static void ptype_seq_stop(struct seq_file *seq, void *v)
2586 	__releases(RCU)
2587 {
2588 	rcu_read_unlock();
2589 }
2590 
2591 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2592 {
2593 #ifdef CONFIG_KALLSYMS
2594 	unsigned long offset = 0, symsize;
2595 	const char *symname;
2596 	char *modname;
2597 	char namebuf[128];
2598 
2599 	symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2600 				  &modname, namebuf);
2601 
2602 	if (symname) {
2603 		char *delim = ":";
2604 
2605 		if (!modname)
2606 			modname = delim = "";
2607 		seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2608 			   symname, offset);
2609 		return;
2610 	}
2611 #endif
2612 
2613 	seq_printf(seq, "[%p]", sym);
2614 }
2615 
2616 static int ptype_seq_show(struct seq_file *seq, void *v)
2617 {
2618 	struct packet_type *pt = v;
2619 
2620 	if (v == SEQ_START_TOKEN)
2621 		seq_puts(seq, "Type Device      Function\n");
2622 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
2623 		if (pt->type == htons(ETH_P_ALL))
2624 			seq_puts(seq, "ALL ");
2625 		else
2626 			seq_printf(seq, "%04x", ntohs(pt->type));
2627 
2628 		seq_printf(seq, " %-8s ",
2629 			   pt->dev ? pt->dev->name : "");
2630 		ptype_seq_decode(seq,  pt->func);
2631 		seq_putc(seq, '\n');
2632 	}
2633 
2634 	return 0;
2635 }
2636 
2637 static const struct seq_operations ptype_seq_ops = {
2638 	.start = ptype_seq_start,
2639 	.next  = ptype_seq_next,
2640 	.stop  = ptype_seq_stop,
2641 	.show  = ptype_seq_show,
2642 };
2643 
2644 static int ptype_seq_open(struct inode *inode, struct file *file)
2645 {
2646 	return seq_open_net(inode, file, &ptype_seq_ops,
2647 			sizeof(struct seq_net_private));
2648 }
2649 
2650 static const struct file_operations ptype_seq_fops = {
2651 	.owner	 = THIS_MODULE,
2652 	.open    = ptype_seq_open,
2653 	.read    = seq_read,
2654 	.llseek  = seq_lseek,
2655 	.release = seq_release_net,
2656 };
2657 
2658 
2659 static int __net_init dev_proc_net_init(struct net *net)
2660 {
2661 	int rc = -ENOMEM;
2662 
2663 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2664 		goto out;
2665 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2666 		goto out_dev;
2667 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2668 		goto out_softnet;
2669 
2670 	if (wext_proc_init(net))
2671 		goto out_ptype;
2672 	rc = 0;
2673 out:
2674 	return rc;
2675 out_ptype:
2676 	proc_net_remove(net, "ptype");
2677 out_softnet:
2678 	proc_net_remove(net, "softnet_stat");
2679 out_dev:
2680 	proc_net_remove(net, "dev");
2681 	goto out;
2682 }
2683 
2684 static void __net_exit dev_proc_net_exit(struct net *net)
2685 {
2686 	wext_proc_exit(net);
2687 
2688 	proc_net_remove(net, "ptype");
2689 	proc_net_remove(net, "softnet_stat");
2690 	proc_net_remove(net, "dev");
2691 }
2692 
2693 static struct pernet_operations __net_initdata dev_proc_ops = {
2694 	.init = dev_proc_net_init,
2695 	.exit = dev_proc_net_exit,
2696 };
2697 
2698 static int __init dev_proc_init(void)
2699 {
2700 	return register_pernet_subsys(&dev_proc_ops);
2701 }
2702 #else
2703 #define dev_proc_init() 0
2704 #endif	/* CONFIG_PROC_FS */
2705 
2706 
2707 /**
2708  *	netdev_set_master	-	set up master/slave pair
2709  *	@slave: slave device
2710  *	@master: new master device
2711  *
2712  *	Changes the master device of the slave. Pass %NULL to break the
2713  *	bonding. The caller must hold the RTNL semaphore. On a failure
2714  *	a negative errno code is returned. On success the reference counts
2715  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2716  *	function returns zero.
2717  */
2718 int netdev_set_master(struct net_device *slave, struct net_device *master)
2719 {
2720 	struct net_device *old = slave->master;
2721 
2722 	ASSERT_RTNL();
2723 
2724 	if (master) {
2725 		if (old)
2726 			return -EBUSY;
2727 		dev_hold(master);
2728 	}
2729 
2730 	slave->master = master;
2731 
2732 	synchronize_net();
2733 
2734 	if (old)
2735 		dev_put(old);
2736 
2737 	if (master)
2738 		slave->flags |= IFF_SLAVE;
2739 	else
2740 		slave->flags &= ~IFF_SLAVE;
2741 
2742 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2743 	return 0;
2744 }
2745 
2746 static void __dev_set_promiscuity(struct net_device *dev, int inc)
2747 {
2748 	unsigned short old_flags = dev->flags;
2749 
2750 	ASSERT_RTNL();
2751 
2752 	if ((dev->promiscuity += inc) == 0)
2753 		dev->flags &= ~IFF_PROMISC;
2754 	else
2755 		dev->flags |= IFF_PROMISC;
2756 	if (dev->flags != old_flags) {
2757 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2758 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2759 							       "left");
2760 		if (audit_enabled)
2761 			audit_log(current->audit_context, GFP_ATOMIC,
2762 				AUDIT_ANOM_PROMISCUOUS,
2763 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2764 				dev->name, (dev->flags & IFF_PROMISC),
2765 				(old_flags & IFF_PROMISC),
2766 				audit_get_loginuid(current),
2767 				current->uid, current->gid,
2768 				audit_get_sessionid(current));
2769 
2770 		if (dev->change_rx_flags)
2771 			dev->change_rx_flags(dev, IFF_PROMISC);
2772 	}
2773 }
2774 
2775 /**
2776  *	dev_set_promiscuity	- update promiscuity count on a device
2777  *	@dev: device
2778  *	@inc: modifier
2779  *
2780  *	Add or remove promiscuity from a device. While the count in the device
2781  *	remains above zero the interface remains promiscuous. Once it hits zero
2782  *	the device reverts back to normal filtering operation. A negative inc
2783  *	value is used to drop promiscuity on the device.
2784  */
2785 void dev_set_promiscuity(struct net_device *dev, int inc)
2786 {
2787 	unsigned short old_flags = dev->flags;
2788 
2789 	__dev_set_promiscuity(dev, inc);
2790 	if (dev->flags != old_flags)
2791 		dev_set_rx_mode(dev);
2792 }
2793 
2794 /**
2795  *	dev_set_allmulti	- update allmulti count on a device
2796  *	@dev: device
2797  *	@inc: modifier
2798  *
2799  *	Add or remove reception of all multicast frames to a device. While the
2800  *	count in the device remains above zero the interface remains listening
2801  *	to all interfaces. Once it hits zero the device reverts back to normal
2802  *	filtering operation. A negative @inc value is used to drop the counter
2803  *	when releasing a resource needing all multicasts.
2804  */
2805 
2806 void dev_set_allmulti(struct net_device *dev, int inc)
2807 {
2808 	unsigned short old_flags = dev->flags;
2809 
2810 	ASSERT_RTNL();
2811 
2812 	dev->flags |= IFF_ALLMULTI;
2813 	if ((dev->allmulti += inc) == 0)
2814 		dev->flags &= ~IFF_ALLMULTI;
2815 	if (dev->flags ^ old_flags) {
2816 		if (dev->change_rx_flags)
2817 			dev->change_rx_flags(dev, IFF_ALLMULTI);
2818 		dev_set_rx_mode(dev);
2819 	}
2820 }
2821 
2822 /*
2823  *	Upload unicast and multicast address lists to device and
2824  *	configure RX filtering. When the device doesn't support unicast
2825  *	filtering it is put in promiscuous mode while unicast addresses
2826  *	are present.
2827  */
2828 void __dev_set_rx_mode(struct net_device *dev)
2829 {
2830 	/* dev_open will call this function so the list will stay sane. */
2831 	if (!(dev->flags&IFF_UP))
2832 		return;
2833 
2834 	if (!netif_device_present(dev))
2835 		return;
2836 
2837 	if (dev->set_rx_mode)
2838 		dev->set_rx_mode(dev);
2839 	else {
2840 		/* Unicast addresses changes may only happen under the rtnl,
2841 		 * therefore calling __dev_set_promiscuity here is safe.
2842 		 */
2843 		if (dev->uc_count > 0 && !dev->uc_promisc) {
2844 			__dev_set_promiscuity(dev, 1);
2845 			dev->uc_promisc = 1;
2846 		} else if (dev->uc_count == 0 && dev->uc_promisc) {
2847 			__dev_set_promiscuity(dev, -1);
2848 			dev->uc_promisc = 0;
2849 		}
2850 
2851 		if (dev->set_multicast_list)
2852 			dev->set_multicast_list(dev);
2853 	}
2854 }
2855 
2856 void dev_set_rx_mode(struct net_device *dev)
2857 {
2858 	netif_tx_lock_bh(dev);
2859 	__dev_set_rx_mode(dev);
2860 	netif_tx_unlock_bh(dev);
2861 }
2862 
2863 int __dev_addr_delete(struct dev_addr_list **list, int *count,
2864 		      void *addr, int alen, int glbl)
2865 {
2866 	struct dev_addr_list *da;
2867 
2868 	for (; (da = *list) != NULL; list = &da->next) {
2869 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2870 		    alen == da->da_addrlen) {
2871 			if (glbl) {
2872 				int old_glbl = da->da_gusers;
2873 				da->da_gusers = 0;
2874 				if (old_glbl == 0)
2875 					break;
2876 			}
2877 			if (--da->da_users)
2878 				return 0;
2879 
2880 			*list = da->next;
2881 			kfree(da);
2882 			(*count)--;
2883 			return 0;
2884 		}
2885 	}
2886 	return -ENOENT;
2887 }
2888 
2889 int __dev_addr_add(struct dev_addr_list **list, int *count,
2890 		   void *addr, int alen, int glbl)
2891 {
2892 	struct dev_addr_list *da;
2893 
2894 	for (da = *list; da != NULL; da = da->next) {
2895 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2896 		    da->da_addrlen == alen) {
2897 			if (glbl) {
2898 				int old_glbl = da->da_gusers;
2899 				da->da_gusers = 1;
2900 				if (old_glbl)
2901 					return 0;
2902 			}
2903 			da->da_users++;
2904 			return 0;
2905 		}
2906 	}
2907 
2908 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
2909 	if (da == NULL)
2910 		return -ENOMEM;
2911 	memcpy(da->da_addr, addr, alen);
2912 	da->da_addrlen = alen;
2913 	da->da_users = 1;
2914 	da->da_gusers = glbl ? 1 : 0;
2915 	da->next = *list;
2916 	*list = da;
2917 	(*count)++;
2918 	return 0;
2919 }
2920 
2921 /**
2922  *	dev_unicast_delete	- Release secondary unicast address.
2923  *	@dev: device
2924  *	@addr: address to delete
2925  *	@alen: length of @addr
2926  *
2927  *	Release reference to a secondary unicast address and remove it
2928  *	from the device if the reference count drops to zero.
2929  *
2930  * 	The caller must hold the rtnl_mutex.
2931  */
2932 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2933 {
2934 	int err;
2935 
2936 	ASSERT_RTNL();
2937 
2938 	netif_tx_lock_bh(dev);
2939 	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2940 	if (!err)
2941 		__dev_set_rx_mode(dev);
2942 	netif_tx_unlock_bh(dev);
2943 	return err;
2944 }
2945 EXPORT_SYMBOL(dev_unicast_delete);
2946 
2947 /**
2948  *	dev_unicast_add		- add a secondary unicast address
2949  *	@dev: device
2950  *	@addr: address to delete
2951  *	@alen: length of @addr
2952  *
2953  *	Add a secondary unicast address to the device or increase
2954  *	the reference count if it already exists.
2955  *
2956  *	The caller must hold the rtnl_mutex.
2957  */
2958 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2959 {
2960 	int err;
2961 
2962 	ASSERT_RTNL();
2963 
2964 	netif_tx_lock_bh(dev);
2965 	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2966 	if (!err)
2967 		__dev_set_rx_mode(dev);
2968 	netif_tx_unlock_bh(dev);
2969 	return err;
2970 }
2971 EXPORT_SYMBOL(dev_unicast_add);
2972 
2973 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
2974 		    struct dev_addr_list **from, int *from_count)
2975 {
2976 	struct dev_addr_list *da, *next;
2977 	int err = 0;
2978 
2979 	da = *from;
2980 	while (da != NULL) {
2981 		next = da->next;
2982 		if (!da->da_synced) {
2983 			err = __dev_addr_add(to, to_count,
2984 					     da->da_addr, da->da_addrlen, 0);
2985 			if (err < 0)
2986 				break;
2987 			da->da_synced = 1;
2988 			da->da_users++;
2989 		} else if (da->da_users == 1) {
2990 			__dev_addr_delete(to, to_count,
2991 					  da->da_addr, da->da_addrlen, 0);
2992 			__dev_addr_delete(from, from_count,
2993 					  da->da_addr, da->da_addrlen, 0);
2994 		}
2995 		da = next;
2996 	}
2997 	return err;
2998 }
2999 
3000 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3001 		       struct dev_addr_list **from, int *from_count)
3002 {
3003 	struct dev_addr_list *da, *next;
3004 
3005 	da = *from;
3006 	while (da != NULL) {
3007 		next = da->next;
3008 		if (da->da_synced) {
3009 			__dev_addr_delete(to, to_count,
3010 					  da->da_addr, da->da_addrlen, 0);
3011 			da->da_synced = 0;
3012 			__dev_addr_delete(from, from_count,
3013 					  da->da_addr, da->da_addrlen, 0);
3014 		}
3015 		da = next;
3016 	}
3017 }
3018 
3019 /**
3020  *	dev_unicast_sync - Synchronize device's unicast list to another device
3021  *	@to: destination device
3022  *	@from: source device
3023  *
3024  *	Add newly added addresses to the destination device and release
3025  *	addresses that have no users left. The source device must be
3026  *	locked by netif_tx_lock_bh.
3027  *
3028  *	This function is intended to be called from the dev->set_rx_mode
3029  *	function of layered software devices.
3030  */
3031 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3032 {
3033 	int err = 0;
3034 
3035 	netif_tx_lock_bh(to);
3036 	err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3037 			      &from->uc_list, &from->uc_count);
3038 	if (!err)
3039 		__dev_set_rx_mode(to);
3040 	netif_tx_unlock_bh(to);
3041 	return err;
3042 }
3043 EXPORT_SYMBOL(dev_unicast_sync);
3044 
3045 /**
3046  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
3047  *	@to: destination device
3048  *	@from: source device
3049  *
3050  *	Remove all addresses that were added to the destination device by
3051  *	dev_unicast_sync(). This function is intended to be called from the
3052  *	dev->stop function of layered software devices.
3053  */
3054 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3055 {
3056 	netif_tx_lock_bh(from);
3057 	netif_tx_lock_bh(to);
3058 
3059 	__dev_addr_unsync(&to->uc_list, &to->uc_count,
3060 			  &from->uc_list, &from->uc_count);
3061 	__dev_set_rx_mode(to);
3062 
3063 	netif_tx_unlock_bh(to);
3064 	netif_tx_unlock_bh(from);
3065 }
3066 EXPORT_SYMBOL(dev_unicast_unsync);
3067 
3068 static void __dev_addr_discard(struct dev_addr_list **list)
3069 {
3070 	struct dev_addr_list *tmp;
3071 
3072 	while (*list != NULL) {
3073 		tmp = *list;
3074 		*list = tmp->next;
3075 		if (tmp->da_users > tmp->da_gusers)
3076 			printk("__dev_addr_discard: address leakage! "
3077 			       "da_users=%d\n", tmp->da_users);
3078 		kfree(tmp);
3079 	}
3080 }
3081 
3082 static void dev_addr_discard(struct net_device *dev)
3083 {
3084 	netif_tx_lock_bh(dev);
3085 
3086 	__dev_addr_discard(&dev->uc_list);
3087 	dev->uc_count = 0;
3088 
3089 	__dev_addr_discard(&dev->mc_list);
3090 	dev->mc_count = 0;
3091 
3092 	netif_tx_unlock_bh(dev);
3093 }
3094 
3095 unsigned dev_get_flags(const struct net_device *dev)
3096 {
3097 	unsigned flags;
3098 
3099 	flags = (dev->flags & ~(IFF_PROMISC |
3100 				IFF_ALLMULTI |
3101 				IFF_RUNNING |
3102 				IFF_LOWER_UP |
3103 				IFF_DORMANT)) |
3104 		(dev->gflags & (IFF_PROMISC |
3105 				IFF_ALLMULTI));
3106 
3107 	if (netif_running(dev)) {
3108 		if (netif_oper_up(dev))
3109 			flags |= IFF_RUNNING;
3110 		if (netif_carrier_ok(dev))
3111 			flags |= IFF_LOWER_UP;
3112 		if (netif_dormant(dev))
3113 			flags |= IFF_DORMANT;
3114 	}
3115 
3116 	return flags;
3117 }
3118 
3119 int dev_change_flags(struct net_device *dev, unsigned flags)
3120 {
3121 	int ret, changes;
3122 	int old_flags = dev->flags;
3123 
3124 	ASSERT_RTNL();
3125 
3126 	/*
3127 	 *	Set the flags on our device.
3128 	 */
3129 
3130 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3131 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3132 			       IFF_AUTOMEDIA)) |
3133 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3134 				    IFF_ALLMULTI));
3135 
3136 	/*
3137 	 *	Load in the correct multicast list now the flags have changed.
3138 	 */
3139 
3140 	if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
3141 		dev->change_rx_flags(dev, IFF_MULTICAST);
3142 
3143 	dev_set_rx_mode(dev);
3144 
3145 	/*
3146 	 *	Have we downed the interface. We handle IFF_UP ourselves
3147 	 *	according to user attempts to set it, rather than blindly
3148 	 *	setting it.
3149 	 */
3150 
3151 	ret = 0;
3152 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
3153 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3154 
3155 		if (!ret)
3156 			dev_set_rx_mode(dev);
3157 	}
3158 
3159 	if (dev->flags & IFF_UP &&
3160 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3161 					  IFF_VOLATILE)))
3162 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
3163 
3164 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
3165 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
3166 		dev->gflags ^= IFF_PROMISC;
3167 		dev_set_promiscuity(dev, inc);
3168 	}
3169 
3170 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3171 	   is important. Some (broken) drivers set IFF_PROMISC, when
3172 	   IFF_ALLMULTI is requested not asking us and not reporting.
3173 	 */
3174 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3175 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3176 		dev->gflags ^= IFF_ALLMULTI;
3177 		dev_set_allmulti(dev, inc);
3178 	}
3179 
3180 	/* Exclude state transition flags, already notified */
3181 	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3182 	if (changes)
3183 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3184 
3185 	return ret;
3186 }
3187 
3188 int dev_set_mtu(struct net_device *dev, int new_mtu)
3189 {
3190 	int err;
3191 
3192 	if (new_mtu == dev->mtu)
3193 		return 0;
3194 
3195 	/*	MTU must be positive.	 */
3196 	if (new_mtu < 0)
3197 		return -EINVAL;
3198 
3199 	if (!netif_device_present(dev))
3200 		return -ENODEV;
3201 
3202 	err = 0;
3203 	if (dev->change_mtu)
3204 		err = dev->change_mtu(dev, new_mtu);
3205 	else
3206 		dev->mtu = new_mtu;
3207 	if (!err && dev->flags & IFF_UP)
3208 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3209 	return err;
3210 }
3211 
3212 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3213 {
3214 	int err;
3215 
3216 	if (!dev->set_mac_address)
3217 		return -EOPNOTSUPP;
3218 	if (sa->sa_family != dev->type)
3219 		return -EINVAL;
3220 	if (!netif_device_present(dev))
3221 		return -ENODEV;
3222 	err = dev->set_mac_address(dev, sa);
3223 	if (!err)
3224 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3225 	return err;
3226 }
3227 
3228 /*
3229  *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3230  */
3231 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3232 {
3233 	int err;
3234 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3235 
3236 	if (!dev)
3237 		return -ENODEV;
3238 
3239 	switch (cmd) {
3240 		case SIOCGIFFLAGS:	/* Get interface flags */
3241 			ifr->ifr_flags = dev_get_flags(dev);
3242 			return 0;
3243 
3244 		case SIOCGIFMETRIC:	/* Get the metric on the interface
3245 					   (currently unused) */
3246 			ifr->ifr_metric = 0;
3247 			return 0;
3248 
3249 		case SIOCGIFMTU:	/* Get the MTU of a device */
3250 			ifr->ifr_mtu = dev->mtu;
3251 			return 0;
3252 
3253 		case SIOCGIFHWADDR:
3254 			if (!dev->addr_len)
3255 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3256 			else
3257 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3258 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3259 			ifr->ifr_hwaddr.sa_family = dev->type;
3260 			return 0;
3261 
3262 		case SIOCGIFSLAVE:
3263 			err = -EINVAL;
3264 			break;
3265 
3266 		case SIOCGIFMAP:
3267 			ifr->ifr_map.mem_start = dev->mem_start;
3268 			ifr->ifr_map.mem_end   = dev->mem_end;
3269 			ifr->ifr_map.base_addr = dev->base_addr;
3270 			ifr->ifr_map.irq       = dev->irq;
3271 			ifr->ifr_map.dma       = dev->dma;
3272 			ifr->ifr_map.port      = dev->if_port;
3273 			return 0;
3274 
3275 		case SIOCGIFINDEX:
3276 			ifr->ifr_ifindex = dev->ifindex;
3277 			return 0;
3278 
3279 		case SIOCGIFTXQLEN:
3280 			ifr->ifr_qlen = dev->tx_queue_len;
3281 			return 0;
3282 
3283 		default:
3284 			/* dev_ioctl() should ensure this case
3285 			 * is never reached
3286 			 */
3287 			WARN_ON(1);
3288 			err = -EINVAL;
3289 			break;
3290 
3291 	}
3292 	return err;
3293 }
3294 
3295 /*
3296  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
3297  */
3298 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3299 {
3300 	int err;
3301 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3302 
3303 	if (!dev)
3304 		return -ENODEV;
3305 
3306 	switch (cmd) {
3307 		case SIOCSIFFLAGS:	/* Set interface flags */
3308 			return dev_change_flags(dev, ifr->ifr_flags);
3309 
3310 		case SIOCSIFMETRIC:	/* Set the metric on the interface
3311 					   (currently unused) */
3312 			return -EOPNOTSUPP;
3313 
3314 		case SIOCSIFMTU:	/* Set the MTU of a device */
3315 			return dev_set_mtu(dev, ifr->ifr_mtu);
3316 
3317 		case SIOCSIFHWADDR:
3318 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3319 
3320 		case SIOCSIFHWBROADCAST:
3321 			if (ifr->ifr_hwaddr.sa_family != dev->type)
3322 				return -EINVAL;
3323 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3324 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3325 			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3326 			return 0;
3327 
3328 		case SIOCSIFMAP:
3329 			if (dev->set_config) {
3330 				if (!netif_device_present(dev))
3331 					return -ENODEV;
3332 				return dev->set_config(dev, &ifr->ifr_map);
3333 			}
3334 			return -EOPNOTSUPP;
3335 
3336 		case SIOCADDMULTI:
3337 			if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3338 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3339 				return -EINVAL;
3340 			if (!netif_device_present(dev))
3341 				return -ENODEV;
3342 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3343 					  dev->addr_len, 1);
3344 
3345 		case SIOCDELMULTI:
3346 			if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
3347 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3348 				return -EINVAL;
3349 			if (!netif_device_present(dev))
3350 				return -ENODEV;
3351 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3352 					     dev->addr_len, 1);
3353 
3354 		case SIOCSIFTXQLEN:
3355 			if (ifr->ifr_qlen < 0)
3356 				return -EINVAL;
3357 			dev->tx_queue_len = ifr->ifr_qlen;
3358 			return 0;
3359 
3360 		case SIOCSIFNAME:
3361 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3362 			return dev_change_name(dev, ifr->ifr_newname);
3363 
3364 		/*
3365 		 *	Unknown or private ioctl
3366 		 */
3367 
3368 		default:
3369 			if ((cmd >= SIOCDEVPRIVATE &&
3370 			    cmd <= SIOCDEVPRIVATE + 15) ||
3371 			    cmd == SIOCBONDENSLAVE ||
3372 			    cmd == SIOCBONDRELEASE ||
3373 			    cmd == SIOCBONDSETHWADDR ||
3374 			    cmd == SIOCBONDSLAVEINFOQUERY ||
3375 			    cmd == SIOCBONDINFOQUERY ||
3376 			    cmd == SIOCBONDCHANGEACTIVE ||
3377 			    cmd == SIOCGMIIPHY ||
3378 			    cmd == SIOCGMIIREG ||
3379 			    cmd == SIOCSMIIREG ||
3380 			    cmd == SIOCBRADDIF ||
3381 			    cmd == SIOCBRDELIF ||
3382 			    cmd == SIOCWANDEV) {
3383 				err = -EOPNOTSUPP;
3384 				if (dev->do_ioctl) {
3385 					if (netif_device_present(dev))
3386 						err = dev->do_ioctl(dev, ifr,
3387 								    cmd);
3388 					else
3389 						err = -ENODEV;
3390 				}
3391 			} else
3392 				err = -EINVAL;
3393 
3394 	}
3395 	return err;
3396 }
3397 
3398 /*
3399  *	This function handles all "interface"-type I/O control requests. The actual
3400  *	'doing' part of this is dev_ifsioc above.
3401  */
3402 
3403 /**
3404  *	dev_ioctl	-	network device ioctl
3405  *	@net: the applicable net namespace
3406  *	@cmd: command to issue
3407  *	@arg: pointer to a struct ifreq in user space
3408  *
3409  *	Issue ioctl functions to devices. This is normally called by the
3410  *	user space syscall interfaces but can sometimes be useful for
3411  *	other purposes. The return value is the return from the syscall if
3412  *	positive or a negative errno code on error.
3413  */
3414 
3415 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3416 {
3417 	struct ifreq ifr;
3418 	int ret;
3419 	char *colon;
3420 
3421 	/* One special case: SIOCGIFCONF takes ifconf argument
3422 	   and requires shared lock, because it sleeps writing
3423 	   to user space.
3424 	 */
3425 
3426 	if (cmd == SIOCGIFCONF) {
3427 		rtnl_lock();
3428 		ret = dev_ifconf(net, (char __user *) arg);
3429 		rtnl_unlock();
3430 		return ret;
3431 	}
3432 	if (cmd == SIOCGIFNAME)
3433 		return dev_ifname(net, (struct ifreq __user *)arg);
3434 
3435 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3436 		return -EFAULT;
3437 
3438 	ifr.ifr_name[IFNAMSIZ-1] = 0;
3439 
3440 	colon = strchr(ifr.ifr_name, ':');
3441 	if (colon)
3442 		*colon = 0;
3443 
3444 	/*
3445 	 *	See which interface the caller is talking about.
3446 	 */
3447 
3448 	switch (cmd) {
3449 		/*
3450 		 *	These ioctl calls:
3451 		 *	- can be done by all.
3452 		 *	- atomic and do not require locking.
3453 		 *	- return a value
3454 		 */
3455 		case SIOCGIFFLAGS:
3456 		case SIOCGIFMETRIC:
3457 		case SIOCGIFMTU:
3458 		case SIOCGIFHWADDR:
3459 		case SIOCGIFSLAVE:
3460 		case SIOCGIFMAP:
3461 		case SIOCGIFINDEX:
3462 		case SIOCGIFTXQLEN:
3463 			dev_load(net, ifr.ifr_name);
3464 			read_lock(&dev_base_lock);
3465 			ret = dev_ifsioc_locked(net, &ifr, cmd);
3466 			read_unlock(&dev_base_lock);
3467 			if (!ret) {
3468 				if (colon)
3469 					*colon = ':';
3470 				if (copy_to_user(arg, &ifr,
3471 						 sizeof(struct ifreq)))
3472 					ret = -EFAULT;
3473 			}
3474 			return ret;
3475 
3476 		case SIOCETHTOOL:
3477 			dev_load(net, ifr.ifr_name);
3478 			rtnl_lock();
3479 			ret = dev_ethtool(net, &ifr);
3480 			rtnl_unlock();
3481 			if (!ret) {
3482 				if (colon)
3483 					*colon = ':';
3484 				if (copy_to_user(arg, &ifr,
3485 						 sizeof(struct ifreq)))
3486 					ret = -EFAULT;
3487 			}
3488 			return ret;
3489 
3490 		/*
3491 		 *	These ioctl calls:
3492 		 *	- require superuser power.
3493 		 *	- require strict serialization.
3494 		 *	- return a value
3495 		 */
3496 		case SIOCGMIIPHY:
3497 		case SIOCGMIIREG:
3498 		case SIOCSIFNAME:
3499 			if (!capable(CAP_NET_ADMIN))
3500 				return -EPERM;
3501 			dev_load(net, ifr.ifr_name);
3502 			rtnl_lock();
3503 			ret = dev_ifsioc(net, &ifr, cmd);
3504 			rtnl_unlock();
3505 			if (!ret) {
3506 				if (colon)
3507 					*colon = ':';
3508 				if (copy_to_user(arg, &ifr,
3509 						 sizeof(struct ifreq)))
3510 					ret = -EFAULT;
3511 			}
3512 			return ret;
3513 
3514 		/*
3515 		 *	These ioctl calls:
3516 		 *	- require superuser power.
3517 		 *	- require strict serialization.
3518 		 *	- do not return a value
3519 		 */
3520 		case SIOCSIFFLAGS:
3521 		case SIOCSIFMETRIC:
3522 		case SIOCSIFMTU:
3523 		case SIOCSIFMAP:
3524 		case SIOCSIFHWADDR:
3525 		case SIOCSIFSLAVE:
3526 		case SIOCADDMULTI:
3527 		case SIOCDELMULTI:
3528 		case SIOCSIFHWBROADCAST:
3529 		case SIOCSIFTXQLEN:
3530 		case SIOCSMIIREG:
3531 		case SIOCBONDENSLAVE:
3532 		case SIOCBONDRELEASE:
3533 		case SIOCBONDSETHWADDR:
3534 		case SIOCBONDCHANGEACTIVE:
3535 		case SIOCBRADDIF:
3536 		case SIOCBRDELIF:
3537 			if (!capable(CAP_NET_ADMIN))
3538 				return -EPERM;
3539 			/* fall through */
3540 		case SIOCBONDSLAVEINFOQUERY:
3541 		case SIOCBONDINFOQUERY:
3542 			dev_load(net, ifr.ifr_name);
3543 			rtnl_lock();
3544 			ret = dev_ifsioc(net, &ifr, cmd);
3545 			rtnl_unlock();
3546 			return ret;
3547 
3548 		case SIOCGIFMEM:
3549 			/* Get the per device memory space. We can add this but
3550 			 * currently do not support it */
3551 		case SIOCSIFMEM:
3552 			/* Set the per device memory buffer space.
3553 			 * Not applicable in our case */
3554 		case SIOCSIFLINK:
3555 			return -EINVAL;
3556 
3557 		/*
3558 		 *	Unknown or private ioctl.
3559 		 */
3560 		default:
3561 			if (cmd == SIOCWANDEV ||
3562 			    (cmd >= SIOCDEVPRIVATE &&
3563 			     cmd <= SIOCDEVPRIVATE + 15)) {
3564 				dev_load(net, ifr.ifr_name);
3565 				rtnl_lock();
3566 				ret = dev_ifsioc(net, &ifr, cmd);
3567 				rtnl_unlock();
3568 				if (!ret && copy_to_user(arg, &ifr,
3569 							 sizeof(struct ifreq)))
3570 					ret = -EFAULT;
3571 				return ret;
3572 			}
3573 			/* Take care of Wireless Extensions */
3574 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3575 				return wext_handle_ioctl(net, &ifr, cmd, arg);
3576 			return -EINVAL;
3577 	}
3578 }
3579 
3580 
3581 /**
3582  *	dev_new_index	-	allocate an ifindex
3583  *	@net: the applicable net namespace
3584  *
3585  *	Returns a suitable unique value for a new device interface
3586  *	number.  The caller must hold the rtnl semaphore or the
3587  *	dev_base_lock to be sure it remains unique.
3588  */
3589 static int dev_new_index(struct net *net)
3590 {
3591 	static int ifindex;
3592 	for (;;) {
3593 		if (++ifindex <= 0)
3594 			ifindex = 1;
3595 		if (!__dev_get_by_index(net, ifindex))
3596 			return ifindex;
3597 	}
3598 }
3599 
3600 /* Delayed registration/unregisteration */
3601 static DEFINE_SPINLOCK(net_todo_list_lock);
3602 static LIST_HEAD(net_todo_list);
3603 
3604 static void net_set_todo(struct net_device *dev)
3605 {
3606 	spin_lock(&net_todo_list_lock);
3607 	list_add_tail(&dev->todo_list, &net_todo_list);
3608 	spin_unlock(&net_todo_list_lock);
3609 }
3610 
3611 static void rollback_registered(struct net_device *dev)
3612 {
3613 	BUG_ON(dev_boot_phase);
3614 	ASSERT_RTNL();
3615 
3616 	/* Some devices call without registering for initialization unwind. */
3617 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3618 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3619 				  "was registered\n", dev->name, dev);
3620 
3621 		WARN_ON(1);
3622 		return;
3623 	}
3624 
3625 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3626 
3627 	/* If device is running, close it first. */
3628 	dev_close(dev);
3629 
3630 	/* And unlink it from device chain. */
3631 	unlist_netdevice(dev);
3632 
3633 	dev->reg_state = NETREG_UNREGISTERING;
3634 
3635 	synchronize_net();
3636 
3637 	/* Shutdown queueing discipline. */
3638 	dev_shutdown(dev);
3639 
3640 
3641 	/* Notify protocols, that we are about to destroy
3642 	   this device. They should clean all the things.
3643 	*/
3644 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3645 
3646 	/*
3647 	 *	Flush the unicast and multicast chains
3648 	 */
3649 	dev_addr_discard(dev);
3650 
3651 	if (dev->uninit)
3652 		dev->uninit(dev);
3653 
3654 	/* Notifier chain MUST detach us from master device. */
3655 	BUG_TRAP(!dev->master);
3656 
3657 	/* Remove entries from kobject tree */
3658 	netdev_unregister_kobject(dev);
3659 
3660 	synchronize_net();
3661 
3662 	dev_put(dev);
3663 }
3664 
3665 /**
3666  *	register_netdevice	- register a network device
3667  *	@dev: device to register
3668  *
3669  *	Take a completed network device structure and add it to the kernel
3670  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3671  *	chain. 0 is returned on success. A negative errno code is returned
3672  *	on a failure to set up the device, or if the name is a duplicate.
3673  *
3674  *	Callers must hold the rtnl semaphore. You may want
3675  *	register_netdev() instead of this.
3676  *
3677  *	BUGS:
3678  *	The locking appears insufficient to guarantee two parallel registers
3679  *	will not get the same name.
3680  */
3681 
3682 int register_netdevice(struct net_device *dev)
3683 {
3684 	struct hlist_head *head;
3685 	struct hlist_node *p;
3686 	int ret;
3687 	struct net *net;
3688 
3689 	BUG_ON(dev_boot_phase);
3690 	ASSERT_RTNL();
3691 
3692 	might_sleep();
3693 
3694 	/* When net_device's are persistent, this will be fatal. */
3695 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3696 	BUG_ON(!dev_net(dev));
3697 	net = dev_net(dev);
3698 
3699 	spin_lock_init(&dev->queue_lock);
3700 	spin_lock_init(&dev->_xmit_lock);
3701 	netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3702 	dev->xmit_lock_owner = -1;
3703 	spin_lock_init(&dev->ingress_lock);
3704 
3705 	dev->iflink = -1;
3706 
3707 	/* Init, if this function is available */
3708 	if (dev->init) {
3709 		ret = dev->init(dev);
3710 		if (ret) {
3711 			if (ret > 0)
3712 				ret = -EIO;
3713 			goto out;
3714 		}
3715 	}
3716 
3717 	if (!dev_valid_name(dev->name)) {
3718 		ret = -EINVAL;
3719 		goto err_uninit;
3720 	}
3721 
3722 	dev->ifindex = dev_new_index(net);
3723 	if (dev->iflink == -1)
3724 		dev->iflink = dev->ifindex;
3725 
3726 	/* Check for existence of name */
3727 	head = dev_name_hash(net, dev->name);
3728 	hlist_for_each(p, head) {
3729 		struct net_device *d
3730 			= hlist_entry(p, struct net_device, name_hlist);
3731 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3732 			ret = -EEXIST;
3733 			goto err_uninit;
3734 		}
3735 	}
3736 
3737 	/* Fix illegal checksum combinations */
3738 	if ((dev->features & NETIF_F_HW_CSUM) &&
3739 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3740 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3741 		       dev->name);
3742 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3743 	}
3744 
3745 	if ((dev->features & NETIF_F_NO_CSUM) &&
3746 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3747 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3748 		       dev->name);
3749 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3750 	}
3751 
3752 
3753 	/* Fix illegal SG+CSUM combinations. */
3754 	if ((dev->features & NETIF_F_SG) &&
3755 	    !(dev->features & NETIF_F_ALL_CSUM)) {
3756 		printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3757 		       dev->name);
3758 		dev->features &= ~NETIF_F_SG;
3759 	}
3760 
3761 	/* TSO requires that SG is present as well. */
3762 	if ((dev->features & NETIF_F_TSO) &&
3763 	    !(dev->features & NETIF_F_SG)) {
3764 		printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3765 		       dev->name);
3766 		dev->features &= ~NETIF_F_TSO;
3767 	}
3768 	if (dev->features & NETIF_F_UFO) {
3769 		if (!(dev->features & NETIF_F_HW_CSUM)) {
3770 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3771 					"NETIF_F_HW_CSUM feature.\n",
3772 							dev->name);
3773 			dev->features &= ~NETIF_F_UFO;
3774 		}
3775 		if (!(dev->features & NETIF_F_SG)) {
3776 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3777 					"NETIF_F_SG feature.\n",
3778 					dev->name);
3779 			dev->features &= ~NETIF_F_UFO;
3780 		}
3781 	}
3782 
3783 	netdev_initialize_kobject(dev);
3784 	ret = netdev_register_kobject(dev);
3785 	if (ret)
3786 		goto err_uninit;
3787 	dev->reg_state = NETREG_REGISTERED;
3788 
3789 	/*
3790 	 *	Default initial state at registry is that the
3791 	 *	device is present.
3792 	 */
3793 
3794 	set_bit(__LINK_STATE_PRESENT, &dev->state);
3795 
3796 	dev_init_scheduler(dev);
3797 	dev_hold(dev);
3798 	list_netdevice(dev);
3799 
3800 	/* Notify protocols, that a new device appeared. */
3801 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
3802 	ret = notifier_to_errno(ret);
3803 	if (ret) {
3804 		rollback_registered(dev);
3805 		dev->reg_state = NETREG_UNREGISTERED;
3806 	}
3807 
3808 out:
3809 	return ret;
3810 
3811 err_uninit:
3812 	if (dev->uninit)
3813 		dev->uninit(dev);
3814 	goto out;
3815 }
3816 
3817 /**
3818  *	register_netdev	- register a network device
3819  *	@dev: device to register
3820  *
3821  *	Take a completed network device structure and add it to the kernel
3822  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3823  *	chain. 0 is returned on success. A negative errno code is returned
3824  *	on a failure to set up the device, or if the name is a duplicate.
3825  *
3826  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
3827  *	and expands the device name if you passed a format string to
3828  *	alloc_netdev.
3829  */
3830 int register_netdev(struct net_device *dev)
3831 {
3832 	int err;
3833 
3834 	rtnl_lock();
3835 
3836 	/*
3837 	 * If the name is a format string the caller wants us to do a
3838 	 * name allocation.
3839 	 */
3840 	if (strchr(dev->name, '%')) {
3841 		err = dev_alloc_name(dev, dev->name);
3842 		if (err < 0)
3843 			goto out;
3844 	}
3845 
3846 	err = register_netdevice(dev);
3847 out:
3848 	rtnl_unlock();
3849 	return err;
3850 }
3851 EXPORT_SYMBOL(register_netdev);
3852 
3853 /*
3854  * netdev_wait_allrefs - wait until all references are gone.
3855  *
3856  * This is called when unregistering network devices.
3857  *
3858  * Any protocol or device that holds a reference should register
3859  * for netdevice notification, and cleanup and put back the
3860  * reference if they receive an UNREGISTER event.
3861  * We can get stuck here if buggy protocols don't correctly
3862  * call dev_put.
3863  */
3864 static void netdev_wait_allrefs(struct net_device *dev)
3865 {
3866 	unsigned long rebroadcast_time, warning_time;
3867 
3868 	rebroadcast_time = warning_time = jiffies;
3869 	while (atomic_read(&dev->refcnt) != 0) {
3870 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3871 			rtnl_lock();
3872 
3873 			/* Rebroadcast unregister notification */
3874 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3875 
3876 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3877 				     &dev->state)) {
3878 				/* We must not have linkwatch events
3879 				 * pending on unregister. If this
3880 				 * happens, we simply run the queue
3881 				 * unscheduled, resulting in a noop
3882 				 * for this device.
3883 				 */
3884 				linkwatch_run_queue();
3885 			}
3886 
3887 			__rtnl_unlock();
3888 
3889 			rebroadcast_time = jiffies;
3890 		}
3891 
3892 		msleep(250);
3893 
3894 		if (time_after(jiffies, warning_time + 10 * HZ)) {
3895 			printk(KERN_EMERG "unregister_netdevice: "
3896 			       "waiting for %s to become free. Usage "
3897 			       "count = %d\n",
3898 			       dev->name, atomic_read(&dev->refcnt));
3899 			warning_time = jiffies;
3900 		}
3901 	}
3902 }
3903 
3904 /* The sequence is:
3905  *
3906  *	rtnl_lock();
3907  *	...
3908  *	register_netdevice(x1);
3909  *	register_netdevice(x2);
3910  *	...
3911  *	unregister_netdevice(y1);
3912  *	unregister_netdevice(y2);
3913  *      ...
3914  *	rtnl_unlock();
3915  *	free_netdev(y1);
3916  *	free_netdev(y2);
3917  *
3918  * We are invoked by rtnl_unlock() after it drops the semaphore.
3919  * This allows us to deal with problems:
3920  * 1) We can delete sysfs objects which invoke hotplug
3921  *    without deadlocking with linkwatch via keventd.
3922  * 2) Since we run with the RTNL semaphore not held, we can sleep
3923  *    safely in order to wait for the netdev refcnt to drop to zero.
3924  */
3925 static DEFINE_MUTEX(net_todo_run_mutex);
3926 void netdev_run_todo(void)
3927 {
3928 	struct list_head list;
3929 
3930 	/* Need to guard against multiple cpu's getting out of order. */
3931 	mutex_lock(&net_todo_run_mutex);
3932 
3933 	/* Not safe to do outside the semaphore.  We must not return
3934 	 * until all unregister events invoked by the local processor
3935 	 * have been completed (either by this todo run, or one on
3936 	 * another cpu).
3937 	 */
3938 	if (list_empty(&net_todo_list))
3939 		goto out;
3940 
3941 	/* Snapshot list, allow later requests */
3942 	spin_lock(&net_todo_list_lock);
3943 	list_replace_init(&net_todo_list, &list);
3944 	spin_unlock(&net_todo_list_lock);
3945 
3946 	while (!list_empty(&list)) {
3947 		struct net_device *dev
3948 			= list_entry(list.next, struct net_device, todo_list);
3949 		list_del(&dev->todo_list);
3950 
3951 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3952 			printk(KERN_ERR "network todo '%s' but state %d\n",
3953 			       dev->name, dev->reg_state);
3954 			dump_stack();
3955 			continue;
3956 		}
3957 
3958 		dev->reg_state = NETREG_UNREGISTERED;
3959 
3960 		netdev_wait_allrefs(dev);
3961 
3962 		/* paranoia */
3963 		BUG_ON(atomic_read(&dev->refcnt));
3964 		BUG_TRAP(!dev->ip_ptr);
3965 		BUG_TRAP(!dev->ip6_ptr);
3966 		BUG_TRAP(!dev->dn_ptr);
3967 
3968 		if (dev->destructor)
3969 			dev->destructor(dev);
3970 
3971 		/* Free network device */
3972 		kobject_put(&dev->dev.kobj);
3973 	}
3974 
3975 out:
3976 	mutex_unlock(&net_todo_run_mutex);
3977 }
3978 
3979 static struct net_device_stats *internal_stats(struct net_device *dev)
3980 {
3981 	return &dev->stats;
3982 }
3983 
3984 /**
3985  *	alloc_netdev_mq - allocate network device
3986  *	@sizeof_priv:	size of private data to allocate space for
3987  *	@name:		device name format string
3988  *	@setup:		callback to initialize device
3989  *	@queue_count:	the number of subqueues to allocate
3990  *
3991  *	Allocates a struct net_device with private data area for driver use
3992  *	and performs basic initialization.  Also allocates subquue structs
3993  *	for each queue on the device at the end of the netdevice.
3994  */
3995 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3996 		void (*setup)(struct net_device *), unsigned int queue_count)
3997 {
3998 	void *p;
3999 	struct net_device *dev;
4000 	int alloc_size;
4001 
4002 	BUG_ON(strlen(name) >= sizeof(dev->name));
4003 
4004 	alloc_size = sizeof(struct net_device) +
4005 		     sizeof(struct net_device_subqueue) * (queue_count - 1);
4006 	if (sizeof_priv) {
4007 		/* ensure 32-byte alignment of private area */
4008 		alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4009 		alloc_size += sizeof_priv;
4010 	}
4011 	/* ensure 32-byte alignment of whole construct */
4012 	alloc_size += NETDEV_ALIGN_CONST;
4013 
4014 	p = kzalloc(alloc_size, GFP_KERNEL);
4015 	if (!p) {
4016 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4017 		return NULL;
4018 	}
4019 
4020 	dev = (struct net_device *)
4021 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4022 	dev->padded = (char *)dev - (char *)p;
4023 	dev_net_set(dev, &init_net);
4024 
4025 	if (sizeof_priv) {
4026 		dev->priv = ((char *)dev +
4027 			     ((sizeof(struct net_device) +
4028 			       (sizeof(struct net_device_subqueue) *
4029 				(queue_count - 1)) + NETDEV_ALIGN_CONST)
4030 			      & ~NETDEV_ALIGN_CONST));
4031 	}
4032 
4033 	dev->egress_subqueue_count = queue_count;
4034 	dev->gso_max_size = GSO_MAX_SIZE;
4035 
4036 	dev->get_stats = internal_stats;
4037 	netpoll_netdev_init(dev);
4038 	setup(dev);
4039 	strcpy(dev->name, name);
4040 	return dev;
4041 }
4042 EXPORT_SYMBOL(alloc_netdev_mq);
4043 
4044 /**
4045  *	free_netdev - free network device
4046  *	@dev: device
4047  *
4048  *	This function does the last stage of destroying an allocated device
4049  * 	interface. The reference to the device object is released.
4050  *	If this is the last reference then it will be freed.
4051  */
4052 void free_netdev(struct net_device *dev)
4053 {
4054 	release_net(dev_net(dev));
4055 
4056 	/*  Compatibility with error handling in drivers */
4057 	if (dev->reg_state == NETREG_UNINITIALIZED) {
4058 		kfree((char *)dev - dev->padded);
4059 		return;
4060 	}
4061 
4062 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4063 	dev->reg_state = NETREG_RELEASED;
4064 
4065 	/* will free via device release */
4066 	put_device(&dev->dev);
4067 }
4068 
4069 /* Synchronize with packet receive processing. */
4070 void synchronize_net(void)
4071 {
4072 	might_sleep();
4073 	synchronize_rcu();
4074 }
4075 
4076 /**
4077  *	unregister_netdevice - remove device from the kernel
4078  *	@dev: device
4079  *
4080  *	This function shuts down a device interface and removes it
4081  *	from the kernel tables.
4082  *
4083  *	Callers must hold the rtnl semaphore.  You may want
4084  *	unregister_netdev() instead of this.
4085  */
4086 
4087 void unregister_netdevice(struct net_device *dev)
4088 {
4089 	ASSERT_RTNL();
4090 
4091 	rollback_registered(dev);
4092 	/* Finish processing unregister after unlock */
4093 	net_set_todo(dev);
4094 }
4095 
4096 /**
4097  *	unregister_netdev - remove device from the kernel
4098  *	@dev: device
4099  *
4100  *	This function shuts down a device interface and removes it
4101  *	from the kernel tables.
4102  *
4103  *	This is just a wrapper for unregister_netdevice that takes
4104  *	the rtnl semaphore.  In general you want to use this and not
4105  *	unregister_netdevice.
4106  */
4107 void unregister_netdev(struct net_device *dev)
4108 {
4109 	rtnl_lock();
4110 	unregister_netdevice(dev);
4111 	rtnl_unlock();
4112 }
4113 
4114 EXPORT_SYMBOL(unregister_netdev);
4115 
4116 /**
4117  *	dev_change_net_namespace - move device to different nethost namespace
4118  *	@dev: device
4119  *	@net: network namespace
4120  *	@pat: If not NULL name pattern to try if the current device name
4121  *	      is already taken in the destination network namespace.
4122  *
4123  *	This function shuts down a device interface and moves it
4124  *	to a new network namespace. On success 0 is returned, on
4125  *	a failure a netagive errno code is returned.
4126  *
4127  *	Callers must hold the rtnl semaphore.
4128  */
4129 
4130 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4131 {
4132 	char buf[IFNAMSIZ];
4133 	const char *destname;
4134 	int err;
4135 
4136 	ASSERT_RTNL();
4137 
4138 	/* Don't allow namespace local devices to be moved. */
4139 	err = -EINVAL;
4140 	if (dev->features & NETIF_F_NETNS_LOCAL)
4141 		goto out;
4142 
4143 	/* Ensure the device has been registrered */
4144 	err = -EINVAL;
4145 	if (dev->reg_state != NETREG_REGISTERED)
4146 		goto out;
4147 
4148 	/* Get out if there is nothing todo */
4149 	err = 0;
4150 	if (net_eq(dev_net(dev), net))
4151 		goto out;
4152 
4153 	/* Pick the destination device name, and ensure
4154 	 * we can use it in the destination network namespace.
4155 	 */
4156 	err = -EEXIST;
4157 	destname = dev->name;
4158 	if (__dev_get_by_name(net, destname)) {
4159 		/* We get here if we can't use the current device name */
4160 		if (!pat)
4161 			goto out;
4162 		if (!dev_valid_name(pat))
4163 			goto out;
4164 		if (strchr(pat, '%')) {
4165 			if (__dev_alloc_name(net, pat, buf) < 0)
4166 				goto out;
4167 			destname = buf;
4168 		} else
4169 			destname = pat;
4170 		if (__dev_get_by_name(net, destname))
4171 			goto out;
4172 	}
4173 
4174 	/*
4175 	 * And now a mini version of register_netdevice unregister_netdevice.
4176 	 */
4177 
4178 	/* If device is running close it first. */
4179 	dev_close(dev);
4180 
4181 	/* And unlink it from device chain */
4182 	err = -ENODEV;
4183 	unlist_netdevice(dev);
4184 
4185 	synchronize_net();
4186 
4187 	/* Shutdown queueing discipline. */
4188 	dev_shutdown(dev);
4189 
4190 	/* Notify protocols, that we are about to destroy
4191 	   this device. They should clean all the things.
4192 	*/
4193 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4194 
4195 	/*
4196 	 *	Flush the unicast and multicast chains
4197 	 */
4198 	dev_addr_discard(dev);
4199 
4200 	/* Actually switch the network namespace */
4201 	dev_net_set(dev, net);
4202 
4203 	/* Assign the new device name */
4204 	if (destname != dev->name)
4205 		strcpy(dev->name, destname);
4206 
4207 	/* If there is an ifindex conflict assign a new one */
4208 	if (__dev_get_by_index(net, dev->ifindex)) {
4209 		int iflink = (dev->iflink == dev->ifindex);
4210 		dev->ifindex = dev_new_index(net);
4211 		if (iflink)
4212 			dev->iflink = dev->ifindex;
4213 	}
4214 
4215 	/* Fixup kobjects */
4216 	netdev_unregister_kobject(dev);
4217 	err = netdev_register_kobject(dev);
4218 	WARN_ON(err);
4219 
4220 	/* Add the device back in the hashes */
4221 	list_netdevice(dev);
4222 
4223 	/* Notify protocols, that a new device appeared. */
4224 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
4225 
4226 	synchronize_net();
4227 	err = 0;
4228 out:
4229 	return err;
4230 }
4231 
4232 static int dev_cpu_callback(struct notifier_block *nfb,
4233 			    unsigned long action,
4234 			    void *ocpu)
4235 {
4236 	struct sk_buff **list_skb;
4237 	struct net_device **list_net;
4238 	struct sk_buff *skb;
4239 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
4240 	struct softnet_data *sd, *oldsd;
4241 
4242 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4243 		return NOTIFY_OK;
4244 
4245 	local_irq_disable();
4246 	cpu = smp_processor_id();
4247 	sd = &per_cpu(softnet_data, cpu);
4248 	oldsd = &per_cpu(softnet_data, oldcpu);
4249 
4250 	/* Find end of our completion_queue. */
4251 	list_skb = &sd->completion_queue;
4252 	while (*list_skb)
4253 		list_skb = &(*list_skb)->next;
4254 	/* Append completion queue from offline CPU. */
4255 	*list_skb = oldsd->completion_queue;
4256 	oldsd->completion_queue = NULL;
4257 
4258 	/* Find end of our output_queue. */
4259 	list_net = &sd->output_queue;
4260 	while (*list_net)
4261 		list_net = &(*list_net)->next_sched;
4262 	/* Append output queue from offline CPU. */
4263 	*list_net = oldsd->output_queue;
4264 	oldsd->output_queue = NULL;
4265 
4266 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
4267 	local_irq_enable();
4268 
4269 	/* Process offline CPU's input_pkt_queue */
4270 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4271 		netif_rx(skb);
4272 
4273 	return NOTIFY_OK;
4274 }
4275 
4276 #ifdef CONFIG_NET_DMA
4277 /**
4278  * net_dma_rebalance - try to maintain one DMA channel per CPU
4279  * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4280  *
4281  * This is called when the number of channels allocated to the net_dma client
4282  * changes.  The net_dma client tries to have one DMA channel per CPU.
4283  */
4284 
4285 static void net_dma_rebalance(struct net_dma *net_dma)
4286 {
4287 	unsigned int cpu, i, n, chan_idx;
4288 	struct dma_chan *chan;
4289 
4290 	if (cpus_empty(net_dma->channel_mask)) {
4291 		for_each_online_cpu(cpu)
4292 			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4293 		return;
4294 	}
4295 
4296 	i = 0;
4297 	cpu = first_cpu(cpu_online_map);
4298 
4299 	for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
4300 		chan = net_dma->channels[chan_idx];
4301 
4302 		n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4303 		   + (i < (num_online_cpus() %
4304 			cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4305 
4306 		while(n) {
4307 			per_cpu(softnet_data, cpu).net_dma = chan;
4308 			cpu = next_cpu(cpu, cpu_online_map);
4309 			n--;
4310 		}
4311 		i++;
4312 	}
4313 }
4314 
4315 /**
4316  * netdev_dma_event - event callback for the net_dma_client
4317  * @client: should always be net_dma_client
4318  * @chan: DMA channel for the event
4319  * @state: DMA state to be handled
4320  */
4321 static enum dma_state_client
4322 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4323 	enum dma_state state)
4324 {
4325 	int i, found = 0, pos = -1;
4326 	struct net_dma *net_dma =
4327 		container_of(client, struct net_dma, client);
4328 	enum dma_state_client ack = DMA_DUP; /* default: take no action */
4329 
4330 	spin_lock(&net_dma->lock);
4331 	switch (state) {
4332 	case DMA_RESOURCE_AVAILABLE:
4333 		for (i = 0; i < nr_cpu_ids; i++)
4334 			if (net_dma->channels[i] == chan) {
4335 				found = 1;
4336 				break;
4337 			} else if (net_dma->channels[i] == NULL && pos < 0)
4338 				pos = i;
4339 
4340 		if (!found && pos >= 0) {
4341 			ack = DMA_ACK;
4342 			net_dma->channels[pos] = chan;
4343 			cpu_set(pos, net_dma->channel_mask);
4344 			net_dma_rebalance(net_dma);
4345 		}
4346 		break;
4347 	case DMA_RESOURCE_REMOVED:
4348 		for (i = 0; i < nr_cpu_ids; i++)
4349 			if (net_dma->channels[i] == chan) {
4350 				found = 1;
4351 				pos = i;
4352 				break;
4353 			}
4354 
4355 		if (found) {
4356 			ack = DMA_ACK;
4357 			cpu_clear(pos, net_dma->channel_mask);
4358 			net_dma->channels[i] = NULL;
4359 			net_dma_rebalance(net_dma);
4360 		}
4361 		break;
4362 	default:
4363 		break;
4364 	}
4365 	spin_unlock(&net_dma->lock);
4366 
4367 	return ack;
4368 }
4369 
4370 /**
4371  * netdev_dma_regiser - register the networking subsystem as a DMA client
4372  */
4373 static int __init netdev_dma_register(void)
4374 {
4375 	net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4376 								GFP_KERNEL);
4377 	if (unlikely(!net_dma.channels)) {
4378 		printk(KERN_NOTICE
4379 				"netdev_dma: no memory for net_dma.channels\n");
4380 		return -ENOMEM;
4381 	}
4382 	spin_lock_init(&net_dma.lock);
4383 	dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4384 	dma_async_client_register(&net_dma.client);
4385 	dma_async_client_chan_request(&net_dma.client);
4386 	return 0;
4387 }
4388 
4389 #else
4390 static int __init netdev_dma_register(void) { return -ENODEV; }
4391 #endif /* CONFIG_NET_DMA */
4392 
4393 /**
4394  *	netdev_compute_feature - compute conjunction of two feature sets
4395  *	@all: first feature set
4396  *	@one: second feature set
4397  *
4398  *	Computes a new feature set after adding a device with feature set
4399  *	@one to the master device with current feature set @all.  Returns
4400  *	the new feature set.
4401  */
4402 int netdev_compute_features(unsigned long all, unsigned long one)
4403 {
4404 	/* if device needs checksumming, downgrade to hw checksumming */
4405 	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4406 		all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4407 
4408 	/* if device can't do all checksum, downgrade to ipv4/ipv6 */
4409 	if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4410 		all ^= NETIF_F_HW_CSUM
4411 			| NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4412 
4413 	if (one & NETIF_F_GSO)
4414 		one |= NETIF_F_GSO_SOFTWARE;
4415 	one |= NETIF_F_GSO;
4416 
4417 	/* If even one device supports robust GSO, enable it for all. */
4418 	if (one & NETIF_F_GSO_ROBUST)
4419 		all |= NETIF_F_GSO_ROBUST;
4420 
4421 	all &= one | NETIF_F_LLTX;
4422 
4423 	if (!(all & NETIF_F_ALL_CSUM))
4424 		all &= ~NETIF_F_SG;
4425 	if (!(all & NETIF_F_SG))
4426 		all &= ~NETIF_F_GSO_MASK;
4427 
4428 	return all;
4429 }
4430 EXPORT_SYMBOL(netdev_compute_features);
4431 
4432 static struct hlist_head *netdev_create_hash(void)
4433 {
4434 	int i;
4435 	struct hlist_head *hash;
4436 
4437 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4438 	if (hash != NULL)
4439 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
4440 			INIT_HLIST_HEAD(&hash[i]);
4441 
4442 	return hash;
4443 }
4444 
4445 /* Initialize per network namespace state */
4446 static int __net_init netdev_init(struct net *net)
4447 {
4448 	INIT_LIST_HEAD(&net->dev_base_head);
4449 
4450 	net->dev_name_head = netdev_create_hash();
4451 	if (net->dev_name_head == NULL)
4452 		goto err_name;
4453 
4454 	net->dev_index_head = netdev_create_hash();
4455 	if (net->dev_index_head == NULL)
4456 		goto err_idx;
4457 
4458 	return 0;
4459 
4460 err_idx:
4461 	kfree(net->dev_name_head);
4462 err_name:
4463 	return -ENOMEM;
4464 }
4465 
4466 static void __net_exit netdev_exit(struct net *net)
4467 {
4468 	kfree(net->dev_name_head);
4469 	kfree(net->dev_index_head);
4470 }
4471 
4472 static struct pernet_operations __net_initdata netdev_net_ops = {
4473 	.init = netdev_init,
4474 	.exit = netdev_exit,
4475 };
4476 
4477 static void __net_exit default_device_exit(struct net *net)
4478 {
4479 	struct net_device *dev, *next;
4480 	/*
4481 	 * Push all migratable of the network devices back to the
4482 	 * initial network namespace
4483 	 */
4484 	rtnl_lock();
4485 	for_each_netdev_safe(net, dev, next) {
4486 		int err;
4487 		char fb_name[IFNAMSIZ];
4488 
4489 		/* Ignore unmoveable devices (i.e. loopback) */
4490 		if (dev->features & NETIF_F_NETNS_LOCAL)
4491 			continue;
4492 
4493 		/* Push remaing network devices to init_net */
4494 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4495 		err = dev_change_net_namespace(dev, &init_net, fb_name);
4496 		if (err) {
4497 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
4498 				__func__, dev->name, err);
4499 			BUG();
4500 		}
4501 	}
4502 	rtnl_unlock();
4503 }
4504 
4505 static struct pernet_operations __net_initdata default_device_ops = {
4506 	.exit = default_device_exit,
4507 };
4508 
4509 /*
4510  *	Initialize the DEV module. At boot time this walks the device list and
4511  *	unhooks any devices that fail to initialise (normally hardware not
4512  *	present) and leaves us with a valid list of present and active devices.
4513  *
4514  */
4515 
4516 /*
4517  *       This is called single threaded during boot, so no need
4518  *       to take the rtnl semaphore.
4519  */
4520 static int __init net_dev_init(void)
4521 {
4522 	int i, rc = -ENOMEM;
4523 
4524 	BUG_ON(!dev_boot_phase);
4525 
4526 	if (dev_proc_init())
4527 		goto out;
4528 
4529 	if (netdev_kobject_init())
4530 		goto out;
4531 
4532 	INIT_LIST_HEAD(&ptype_all);
4533 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
4534 		INIT_LIST_HEAD(&ptype_base[i]);
4535 
4536 	if (register_pernet_subsys(&netdev_net_ops))
4537 		goto out;
4538 
4539 	if (register_pernet_device(&default_device_ops))
4540 		goto out;
4541 
4542 	/*
4543 	 *	Initialise the packet receive queues.
4544 	 */
4545 
4546 	for_each_possible_cpu(i) {
4547 		struct softnet_data *queue;
4548 
4549 		queue = &per_cpu(softnet_data, i);
4550 		skb_queue_head_init(&queue->input_pkt_queue);
4551 		queue->completion_queue = NULL;
4552 		INIT_LIST_HEAD(&queue->poll_list);
4553 
4554 		queue->backlog.poll = process_backlog;
4555 		queue->backlog.weight = weight_p;
4556 	}
4557 
4558 	netdev_dma_register();
4559 
4560 	dev_boot_phase = 0;
4561 
4562 	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
4563 	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
4564 
4565 	hotcpu_notifier(dev_cpu_callback, 0);
4566 	dst_init();
4567 	dev_mcast_init();
4568 	rc = 0;
4569 out:
4570 	return rc;
4571 }
4572 
4573 subsys_initcall(net_dev_init);
4574 
4575 EXPORT_SYMBOL(__dev_get_by_index);
4576 EXPORT_SYMBOL(__dev_get_by_name);
4577 EXPORT_SYMBOL(__dev_remove_pack);
4578 EXPORT_SYMBOL(dev_valid_name);
4579 EXPORT_SYMBOL(dev_add_pack);
4580 EXPORT_SYMBOL(dev_alloc_name);
4581 EXPORT_SYMBOL(dev_close);
4582 EXPORT_SYMBOL(dev_get_by_flags);
4583 EXPORT_SYMBOL(dev_get_by_index);
4584 EXPORT_SYMBOL(dev_get_by_name);
4585 EXPORT_SYMBOL(dev_open);
4586 EXPORT_SYMBOL(dev_queue_xmit);
4587 EXPORT_SYMBOL(dev_remove_pack);
4588 EXPORT_SYMBOL(dev_set_allmulti);
4589 EXPORT_SYMBOL(dev_set_promiscuity);
4590 EXPORT_SYMBOL(dev_change_flags);
4591 EXPORT_SYMBOL(dev_set_mtu);
4592 EXPORT_SYMBOL(dev_set_mac_address);
4593 EXPORT_SYMBOL(free_netdev);
4594 EXPORT_SYMBOL(netdev_boot_setup_check);
4595 EXPORT_SYMBOL(netdev_set_master);
4596 EXPORT_SYMBOL(netdev_state_change);
4597 EXPORT_SYMBOL(netif_receive_skb);
4598 EXPORT_SYMBOL(netif_rx);
4599 EXPORT_SYMBOL(register_gifconf);
4600 EXPORT_SYMBOL(register_netdevice);
4601 EXPORT_SYMBOL(register_netdevice_notifier);
4602 EXPORT_SYMBOL(skb_checksum_help);
4603 EXPORT_SYMBOL(synchronize_net);
4604 EXPORT_SYMBOL(unregister_netdevice);
4605 EXPORT_SYMBOL(unregister_netdevice_notifier);
4606 EXPORT_SYMBOL(net_enable_timestamp);
4607 EXPORT_SYMBOL(net_disable_timestamp);
4608 EXPORT_SYMBOL(dev_get_flags);
4609 
4610 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4611 EXPORT_SYMBOL(br_handle_frame_hook);
4612 EXPORT_SYMBOL(br_fdb_get_hook);
4613 EXPORT_SYMBOL(br_fdb_put_hook);
4614 #endif
4615 
4616 #ifdef CONFIG_KMOD
4617 EXPORT_SYMBOL(dev_load);
4618 #endif
4619 
4620 EXPORT_PER_CPU_SYMBOL(softnet_data);
4621