xref: /linux-6.15/net/core/dev.c (revision 529d6dad)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <[email protected]>
12  *				Mark Evans, <[email protected]>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <[email protected]>
16  *		Alan Cox <[email protected]>
17  *		David Hinds <[email protected]>
18  *		Alexey Kuznetsov <[email protected]>
19  *		Adam Sulmicki <[email protected]>
20  *              Pekka Riikonen <[email protected]>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/hash.h>
83 #include <linux/slab.h>
84 #include <linux/sched.h>
85 #include <linux/mutex.h>
86 #include <linux/string.h>
87 #include <linux/mm.h>
88 #include <linux/socket.h>
89 #include <linux/sockios.h>
90 #include <linux/errno.h>
91 #include <linux/interrupt.h>
92 #include <linux/if_ether.h>
93 #include <linux/netdevice.h>
94 #include <linux/etherdevice.h>
95 #include <linux/ethtool.h>
96 #include <linux/notifier.h>
97 #include <linux/skbuff.h>
98 #include <net/net_namespace.h>
99 #include <net/sock.h>
100 #include <linux/rtnetlink.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/stat.h>
104 #include <linux/if_bridge.h>
105 #include <linux/if_macvlan.h>
106 #include <net/dst.h>
107 #include <net/pkt_sched.h>
108 #include <net/checksum.h>
109 #include <net/xfrm.h>
110 #include <linux/highmem.h>
111 #include <linux/init.h>
112 #include <linux/kmod.h>
113 #include <linux/module.h>
114 #include <linux/netpoll.h>
115 #include <linux/rcupdate.h>
116 #include <linux/delay.h>
117 #include <net/wext.h>
118 #include <net/iw_handler.h>
119 #include <asm/current.h>
120 #include <linux/audit.h>
121 #include <linux/dmaengine.h>
122 #include <linux/err.h>
123 #include <linux/ctype.h>
124 #include <linux/if_arp.h>
125 #include <linux/if_vlan.h>
126 #include <linux/ip.h>
127 #include <net/ip.h>
128 #include <linux/ipv6.h>
129 #include <linux/in.h>
130 #include <linux/jhash.h>
131 #include <linux/random.h>
132 #include <trace/events/napi.h>
133 #include <linux/pci.h>
134 
135 #include "net-sysfs.h"
136 
137 /* Instead of increasing this, you should create a hash table. */
138 #define MAX_GRO_SKBS 8
139 
140 /* This should be increased if a protocol with a bigger head is added. */
141 #define GRO_MAX_HEAD (MAX_HEADER + 128)
142 
143 /*
144  *	The list of packet types we will receive (as opposed to discard)
145  *	and the routines to invoke.
146  *
147  *	Why 16. Because with 16 the only overlap we get on a hash of the
148  *	low nibble of the protocol value is RARP/SNAP/X.25.
149  *
150  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
151  *             sure which should go first, but I bet it won't make much
152  *             difference if we are running VLANs.  The good news is that
153  *             this protocol won't be in the list unless compiled in, so
154  *             the average user (w/out VLANs) will not be adversely affected.
155  *             --BLG
156  *
157  *		0800	IP
158  *		8100    802.1Q VLAN
159  *		0001	802.3
160  *		0002	AX.25
161  *		0004	802.2
162  *		8035	RARP
163  *		0005	SNAP
164  *		0805	X.25
165  *		0806	ARP
166  *		8137	IPX
167  *		0009	Localtalk
168  *		86DD	IPv6
169  */
170 
171 #define PTYPE_HASH_SIZE	(16)
172 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
173 
174 static DEFINE_SPINLOCK(ptype_lock);
175 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
176 static struct list_head ptype_all __read_mostly;	/* Taps */
177 
178 /*
179  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
180  * semaphore.
181  *
182  * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
183  *
184  * Writers must hold the rtnl semaphore while they loop through the
185  * dev_base_head list, and hold dev_base_lock for writing when they do the
186  * actual updates.  This allows pure readers to access the list even
187  * while a writer is preparing to update it.
188  *
189  * To put it another way, dev_base_lock is held for writing only to
190  * protect against pure readers; the rtnl semaphore provides the
191  * protection against other writers.
192  *
193  * See, for example usages, register_netdevice() and
194  * unregister_netdevice(), which must be called with the rtnl
195  * semaphore held.
196  */
197 DEFINE_RWLOCK(dev_base_lock);
198 EXPORT_SYMBOL(dev_base_lock);
199 
200 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
201 {
202 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
203 	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
204 }
205 
206 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
207 {
208 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
209 }
210 
211 static inline void rps_lock(struct softnet_data *sd)
212 {
213 #ifdef CONFIG_RPS
214 	spin_lock(&sd->input_pkt_queue.lock);
215 #endif
216 }
217 
218 static inline void rps_unlock(struct softnet_data *sd)
219 {
220 #ifdef CONFIG_RPS
221 	spin_unlock(&sd->input_pkt_queue.lock);
222 #endif
223 }
224 
225 /* Device list insertion */
226 static int list_netdevice(struct net_device *dev)
227 {
228 	struct net *net = dev_net(dev);
229 
230 	ASSERT_RTNL();
231 
232 	write_lock_bh(&dev_base_lock);
233 	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
234 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
235 	hlist_add_head_rcu(&dev->index_hlist,
236 			   dev_index_hash(net, dev->ifindex));
237 	write_unlock_bh(&dev_base_lock);
238 	return 0;
239 }
240 
241 /* Device list removal
242  * caller must respect a RCU grace period before freeing/reusing dev
243  */
244 static void unlist_netdevice(struct net_device *dev)
245 {
246 	ASSERT_RTNL();
247 
248 	/* Unlink dev from the device chain */
249 	write_lock_bh(&dev_base_lock);
250 	list_del_rcu(&dev->dev_list);
251 	hlist_del_rcu(&dev->name_hlist);
252 	hlist_del_rcu(&dev->index_hlist);
253 	write_unlock_bh(&dev_base_lock);
254 }
255 
256 /*
257  *	Our notifier list
258  */
259 
260 static RAW_NOTIFIER_HEAD(netdev_chain);
261 
262 /*
263  *	Device drivers call our routines to queue packets here. We empty the
264  *	queue in the local softnet handler.
265  */
266 
267 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
268 EXPORT_PER_CPU_SYMBOL(softnet_data);
269 
270 #ifdef CONFIG_LOCKDEP
271 /*
272  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
273  * according to dev->type
274  */
275 static const unsigned short netdev_lock_type[] =
276 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
277 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
278 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
279 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
280 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
281 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
282 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
283 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
284 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
285 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
286 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
287 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
288 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
289 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
290 	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
291 	 ARPHRD_VOID, ARPHRD_NONE};
292 
293 static const char *const netdev_lock_name[] =
294 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
295 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
296 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
297 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
298 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
299 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
300 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
301 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
302 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
303 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
304 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
305 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
306 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
307 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
308 	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
309 	 "_xmit_VOID", "_xmit_NONE"};
310 
311 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
312 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
313 
314 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
315 {
316 	int i;
317 
318 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
319 		if (netdev_lock_type[i] == dev_type)
320 			return i;
321 	/* the last key is used by default */
322 	return ARRAY_SIZE(netdev_lock_type) - 1;
323 }
324 
325 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
326 						 unsigned short dev_type)
327 {
328 	int i;
329 
330 	i = netdev_lock_pos(dev_type);
331 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
332 				   netdev_lock_name[i]);
333 }
334 
335 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
336 {
337 	int i;
338 
339 	i = netdev_lock_pos(dev->type);
340 	lockdep_set_class_and_name(&dev->addr_list_lock,
341 				   &netdev_addr_lock_key[i],
342 				   netdev_lock_name[i]);
343 }
344 #else
345 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
346 						 unsigned short dev_type)
347 {
348 }
349 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
350 {
351 }
352 #endif
353 
354 /*******************************************************************************
355 
356 		Protocol management and registration routines
357 
358 *******************************************************************************/
359 
360 /*
361  *	Add a protocol ID to the list. Now that the input handler is
362  *	smarter we can dispense with all the messy stuff that used to be
363  *	here.
364  *
365  *	BEWARE!!! Protocol handlers, mangling input packets,
366  *	MUST BE last in hash buckets and checking protocol handlers
367  *	MUST start from promiscuous ptype_all chain in net_bh.
368  *	It is true now, do not change it.
369  *	Explanation follows: if protocol handler, mangling packet, will
370  *	be the first on list, it is not able to sense, that packet
371  *	is cloned and should be copied-on-write, so that it will
372  *	change it and subsequent readers will get broken packet.
373  *							--ANK (980803)
374  */
375 
376 /**
377  *	dev_add_pack - add packet handler
378  *	@pt: packet type declaration
379  *
380  *	Add a protocol handler to the networking stack. The passed &packet_type
381  *	is linked into kernel lists and may not be freed until it has been
382  *	removed from the kernel lists.
383  *
384  *	This call does not sleep therefore it can not
385  *	guarantee all CPU's that are in middle of receiving packets
386  *	will see the new packet type (until the next received packet).
387  */
388 
389 void dev_add_pack(struct packet_type *pt)
390 {
391 	int hash;
392 
393 	spin_lock_bh(&ptype_lock);
394 	if (pt->type == htons(ETH_P_ALL))
395 		list_add_rcu(&pt->list, &ptype_all);
396 	else {
397 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
398 		list_add_rcu(&pt->list, &ptype_base[hash]);
399 	}
400 	spin_unlock_bh(&ptype_lock);
401 }
402 EXPORT_SYMBOL(dev_add_pack);
403 
404 /**
405  *	__dev_remove_pack	 - remove packet handler
406  *	@pt: packet type declaration
407  *
408  *	Remove a protocol handler that was previously added to the kernel
409  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
410  *	from the kernel lists and can be freed or reused once this function
411  *	returns.
412  *
413  *      The packet type might still be in use by receivers
414  *	and must not be freed until after all the CPU's have gone
415  *	through a quiescent state.
416  */
417 void __dev_remove_pack(struct packet_type *pt)
418 {
419 	struct list_head *head;
420 	struct packet_type *pt1;
421 
422 	spin_lock_bh(&ptype_lock);
423 
424 	if (pt->type == htons(ETH_P_ALL))
425 		head = &ptype_all;
426 	else
427 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
428 
429 	list_for_each_entry(pt1, head, list) {
430 		if (pt == pt1) {
431 			list_del_rcu(&pt->list);
432 			goto out;
433 		}
434 	}
435 
436 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
437 out:
438 	spin_unlock_bh(&ptype_lock);
439 }
440 EXPORT_SYMBOL(__dev_remove_pack);
441 
442 /**
443  *	dev_remove_pack	 - remove packet handler
444  *	@pt: packet type declaration
445  *
446  *	Remove a protocol handler that was previously added to the kernel
447  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
448  *	from the kernel lists and can be freed or reused once this function
449  *	returns.
450  *
451  *	This call sleeps to guarantee that no CPU is looking at the packet
452  *	type after return.
453  */
454 void dev_remove_pack(struct packet_type *pt)
455 {
456 	__dev_remove_pack(pt);
457 
458 	synchronize_net();
459 }
460 EXPORT_SYMBOL(dev_remove_pack);
461 
462 /******************************************************************************
463 
464 		      Device Boot-time Settings Routines
465 
466 *******************************************************************************/
467 
468 /* Boot time configuration table */
469 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
470 
471 /**
472  *	netdev_boot_setup_add	- add new setup entry
473  *	@name: name of the device
474  *	@map: configured settings for the device
475  *
476  *	Adds new setup entry to the dev_boot_setup list.  The function
477  *	returns 0 on error and 1 on success.  This is a generic routine to
478  *	all netdevices.
479  */
480 static int netdev_boot_setup_add(char *name, struct ifmap *map)
481 {
482 	struct netdev_boot_setup *s;
483 	int i;
484 
485 	s = dev_boot_setup;
486 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
487 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
488 			memset(s[i].name, 0, sizeof(s[i].name));
489 			strlcpy(s[i].name, name, IFNAMSIZ);
490 			memcpy(&s[i].map, map, sizeof(s[i].map));
491 			break;
492 		}
493 	}
494 
495 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
496 }
497 
498 /**
499  *	netdev_boot_setup_check	- check boot time settings
500  *	@dev: the netdevice
501  *
502  * 	Check boot time settings for the device.
503  *	The found settings are set for the device to be used
504  *	later in the device probing.
505  *	Returns 0 if no settings found, 1 if they are.
506  */
507 int netdev_boot_setup_check(struct net_device *dev)
508 {
509 	struct netdev_boot_setup *s = dev_boot_setup;
510 	int i;
511 
512 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
513 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
514 		    !strcmp(dev->name, s[i].name)) {
515 			dev->irq 	= s[i].map.irq;
516 			dev->base_addr 	= s[i].map.base_addr;
517 			dev->mem_start 	= s[i].map.mem_start;
518 			dev->mem_end 	= s[i].map.mem_end;
519 			return 1;
520 		}
521 	}
522 	return 0;
523 }
524 EXPORT_SYMBOL(netdev_boot_setup_check);
525 
526 
527 /**
528  *	netdev_boot_base	- get address from boot time settings
529  *	@prefix: prefix for network device
530  *	@unit: id for network device
531  *
532  * 	Check boot time settings for the base address of device.
533  *	The found settings are set for the device to be used
534  *	later in the device probing.
535  *	Returns 0 if no settings found.
536  */
537 unsigned long netdev_boot_base(const char *prefix, int unit)
538 {
539 	const struct netdev_boot_setup *s = dev_boot_setup;
540 	char name[IFNAMSIZ];
541 	int i;
542 
543 	sprintf(name, "%s%d", prefix, unit);
544 
545 	/*
546 	 * If device already registered then return base of 1
547 	 * to indicate not to probe for this interface
548 	 */
549 	if (__dev_get_by_name(&init_net, name))
550 		return 1;
551 
552 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
553 		if (!strcmp(name, s[i].name))
554 			return s[i].map.base_addr;
555 	return 0;
556 }
557 
558 /*
559  * Saves at boot time configured settings for any netdevice.
560  */
561 int __init netdev_boot_setup(char *str)
562 {
563 	int ints[5];
564 	struct ifmap map;
565 
566 	str = get_options(str, ARRAY_SIZE(ints), ints);
567 	if (!str || !*str)
568 		return 0;
569 
570 	/* Save settings */
571 	memset(&map, 0, sizeof(map));
572 	if (ints[0] > 0)
573 		map.irq = ints[1];
574 	if (ints[0] > 1)
575 		map.base_addr = ints[2];
576 	if (ints[0] > 2)
577 		map.mem_start = ints[3];
578 	if (ints[0] > 3)
579 		map.mem_end = ints[4];
580 
581 	/* Add new entry to the list */
582 	return netdev_boot_setup_add(str, &map);
583 }
584 
585 __setup("netdev=", netdev_boot_setup);
586 
587 /*******************************************************************************
588 
589 			    Device Interface Subroutines
590 
591 *******************************************************************************/
592 
593 /**
594  *	__dev_get_by_name	- find a device by its name
595  *	@net: the applicable net namespace
596  *	@name: name to find
597  *
598  *	Find an interface by name. Must be called under RTNL semaphore
599  *	or @dev_base_lock. If the name is found a pointer to the device
600  *	is returned. If the name is not found then %NULL is returned. The
601  *	reference counters are not incremented so the caller must be
602  *	careful with locks.
603  */
604 
605 struct net_device *__dev_get_by_name(struct net *net, const char *name)
606 {
607 	struct hlist_node *p;
608 	struct net_device *dev;
609 	struct hlist_head *head = dev_name_hash(net, name);
610 
611 	hlist_for_each_entry(dev, p, head, name_hlist)
612 		if (!strncmp(dev->name, name, IFNAMSIZ))
613 			return dev;
614 
615 	return NULL;
616 }
617 EXPORT_SYMBOL(__dev_get_by_name);
618 
619 /**
620  *	dev_get_by_name_rcu	- find a device by its name
621  *	@net: the applicable net namespace
622  *	@name: name to find
623  *
624  *	Find an interface by name.
625  *	If the name is found a pointer to the device is returned.
626  * 	If the name is not found then %NULL is returned.
627  *	The reference counters are not incremented so the caller must be
628  *	careful with locks. The caller must hold RCU lock.
629  */
630 
631 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
632 {
633 	struct hlist_node *p;
634 	struct net_device *dev;
635 	struct hlist_head *head = dev_name_hash(net, name);
636 
637 	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
638 		if (!strncmp(dev->name, name, IFNAMSIZ))
639 			return dev;
640 
641 	return NULL;
642 }
643 EXPORT_SYMBOL(dev_get_by_name_rcu);
644 
645 /**
646  *	dev_get_by_name		- find a device by its name
647  *	@net: the applicable net namespace
648  *	@name: name to find
649  *
650  *	Find an interface by name. This can be called from any
651  *	context and does its own locking. The returned handle has
652  *	the usage count incremented and the caller must use dev_put() to
653  *	release it when it is no longer needed. %NULL is returned if no
654  *	matching device is found.
655  */
656 
657 struct net_device *dev_get_by_name(struct net *net, const char *name)
658 {
659 	struct net_device *dev;
660 
661 	rcu_read_lock();
662 	dev = dev_get_by_name_rcu(net, name);
663 	if (dev)
664 		dev_hold(dev);
665 	rcu_read_unlock();
666 	return dev;
667 }
668 EXPORT_SYMBOL(dev_get_by_name);
669 
670 /**
671  *	__dev_get_by_index - find a device by its ifindex
672  *	@net: the applicable net namespace
673  *	@ifindex: index of device
674  *
675  *	Search for an interface by index. Returns %NULL if the device
676  *	is not found or a pointer to the device. The device has not
677  *	had its reference counter increased so the caller must be careful
678  *	about locking. The caller must hold either the RTNL semaphore
679  *	or @dev_base_lock.
680  */
681 
682 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
683 {
684 	struct hlist_node *p;
685 	struct net_device *dev;
686 	struct hlist_head *head = dev_index_hash(net, ifindex);
687 
688 	hlist_for_each_entry(dev, p, head, index_hlist)
689 		if (dev->ifindex == ifindex)
690 			return dev;
691 
692 	return NULL;
693 }
694 EXPORT_SYMBOL(__dev_get_by_index);
695 
696 /**
697  *	dev_get_by_index_rcu - find a device by its ifindex
698  *	@net: the applicable net namespace
699  *	@ifindex: index of device
700  *
701  *	Search for an interface by index. Returns %NULL if the device
702  *	is not found or a pointer to the device. The device has not
703  *	had its reference counter increased so the caller must be careful
704  *	about locking. The caller must hold RCU lock.
705  */
706 
707 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
708 {
709 	struct hlist_node *p;
710 	struct net_device *dev;
711 	struct hlist_head *head = dev_index_hash(net, ifindex);
712 
713 	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
714 		if (dev->ifindex == ifindex)
715 			return dev;
716 
717 	return NULL;
718 }
719 EXPORT_SYMBOL(dev_get_by_index_rcu);
720 
721 
722 /**
723  *	dev_get_by_index - find a device by its ifindex
724  *	@net: the applicable net namespace
725  *	@ifindex: index of device
726  *
727  *	Search for an interface by index. Returns NULL if the device
728  *	is not found or a pointer to the device. The device returned has
729  *	had a reference added and the pointer is safe until the user calls
730  *	dev_put to indicate they have finished with it.
731  */
732 
733 struct net_device *dev_get_by_index(struct net *net, int ifindex)
734 {
735 	struct net_device *dev;
736 
737 	rcu_read_lock();
738 	dev = dev_get_by_index_rcu(net, ifindex);
739 	if (dev)
740 		dev_hold(dev);
741 	rcu_read_unlock();
742 	return dev;
743 }
744 EXPORT_SYMBOL(dev_get_by_index);
745 
746 /**
747  *	dev_getbyhwaddr - find a device by its hardware address
748  *	@net: the applicable net namespace
749  *	@type: media type of device
750  *	@ha: hardware address
751  *
752  *	Search for an interface by MAC address. Returns NULL if the device
753  *	is not found or a pointer to the device. The caller must hold the
754  *	rtnl semaphore. The returned device has not had its ref count increased
755  *	and the caller must therefore be careful about locking
756  *
757  *	BUGS:
758  *	If the API was consistent this would be __dev_get_by_hwaddr
759  */
760 
761 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
762 {
763 	struct net_device *dev;
764 
765 	ASSERT_RTNL();
766 
767 	for_each_netdev(net, dev)
768 		if (dev->type == type &&
769 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
770 			return dev;
771 
772 	return NULL;
773 }
774 EXPORT_SYMBOL(dev_getbyhwaddr);
775 
776 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
777 {
778 	struct net_device *dev;
779 
780 	ASSERT_RTNL();
781 	for_each_netdev(net, dev)
782 		if (dev->type == type)
783 			return dev;
784 
785 	return NULL;
786 }
787 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
788 
789 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
790 {
791 	struct net_device *dev, *ret = NULL;
792 
793 	rcu_read_lock();
794 	for_each_netdev_rcu(net, dev)
795 		if (dev->type == type) {
796 			dev_hold(dev);
797 			ret = dev;
798 			break;
799 		}
800 	rcu_read_unlock();
801 	return ret;
802 }
803 EXPORT_SYMBOL(dev_getfirstbyhwtype);
804 
805 /**
806  *	dev_get_by_flags_rcu - find any device with given flags
807  *	@net: the applicable net namespace
808  *	@if_flags: IFF_* values
809  *	@mask: bitmask of bits in if_flags to check
810  *
811  *	Search for any interface with the given flags. Returns NULL if a device
812  *	is not found or a pointer to the device. Must be called inside
813  *	rcu_read_lock(), and result refcount is unchanged.
814  */
815 
816 struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
817 				    unsigned short mask)
818 {
819 	struct net_device *dev, *ret;
820 
821 	ret = NULL;
822 	for_each_netdev_rcu(net, dev) {
823 		if (((dev->flags ^ if_flags) & mask) == 0) {
824 			ret = dev;
825 			break;
826 		}
827 	}
828 	return ret;
829 }
830 EXPORT_SYMBOL(dev_get_by_flags_rcu);
831 
832 /**
833  *	dev_valid_name - check if name is okay for network device
834  *	@name: name string
835  *
836  *	Network device names need to be valid file names to
837  *	to allow sysfs to work.  We also disallow any kind of
838  *	whitespace.
839  */
840 int dev_valid_name(const char *name)
841 {
842 	if (*name == '\0')
843 		return 0;
844 	if (strlen(name) >= IFNAMSIZ)
845 		return 0;
846 	if (!strcmp(name, ".") || !strcmp(name, ".."))
847 		return 0;
848 
849 	while (*name) {
850 		if (*name == '/' || isspace(*name))
851 			return 0;
852 		name++;
853 	}
854 	return 1;
855 }
856 EXPORT_SYMBOL(dev_valid_name);
857 
858 /**
859  *	__dev_alloc_name - allocate a name for a device
860  *	@net: network namespace to allocate the device name in
861  *	@name: name format string
862  *	@buf:  scratch buffer and result name string
863  *
864  *	Passed a format string - eg "lt%d" it will try and find a suitable
865  *	id. It scans list of devices to build up a free map, then chooses
866  *	the first empty slot. The caller must hold the dev_base or rtnl lock
867  *	while allocating the name and adding the device in order to avoid
868  *	duplicates.
869  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
870  *	Returns the number of the unit assigned or a negative errno code.
871  */
872 
873 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
874 {
875 	int i = 0;
876 	const char *p;
877 	const int max_netdevices = 8*PAGE_SIZE;
878 	unsigned long *inuse;
879 	struct net_device *d;
880 
881 	p = strnchr(name, IFNAMSIZ-1, '%');
882 	if (p) {
883 		/*
884 		 * Verify the string as this thing may have come from
885 		 * the user.  There must be either one "%d" and no other "%"
886 		 * characters.
887 		 */
888 		if (p[1] != 'd' || strchr(p + 2, '%'))
889 			return -EINVAL;
890 
891 		/* Use one page as a bit array of possible slots */
892 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
893 		if (!inuse)
894 			return -ENOMEM;
895 
896 		for_each_netdev(net, d) {
897 			if (!sscanf(d->name, name, &i))
898 				continue;
899 			if (i < 0 || i >= max_netdevices)
900 				continue;
901 
902 			/*  avoid cases where sscanf is not exact inverse of printf */
903 			snprintf(buf, IFNAMSIZ, name, i);
904 			if (!strncmp(buf, d->name, IFNAMSIZ))
905 				set_bit(i, inuse);
906 		}
907 
908 		i = find_first_zero_bit(inuse, max_netdevices);
909 		free_page((unsigned long) inuse);
910 	}
911 
912 	if (buf != name)
913 		snprintf(buf, IFNAMSIZ, name, i);
914 	if (!__dev_get_by_name(net, buf))
915 		return i;
916 
917 	/* It is possible to run out of possible slots
918 	 * when the name is long and there isn't enough space left
919 	 * for the digits, or if all bits are used.
920 	 */
921 	return -ENFILE;
922 }
923 
924 /**
925  *	dev_alloc_name - allocate a name for a device
926  *	@dev: device
927  *	@name: name format string
928  *
929  *	Passed a format string - eg "lt%d" it will try and find a suitable
930  *	id. It scans list of devices to build up a free map, then chooses
931  *	the first empty slot. The caller must hold the dev_base or rtnl lock
932  *	while allocating the name and adding the device in order to avoid
933  *	duplicates.
934  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
935  *	Returns the number of the unit assigned or a negative errno code.
936  */
937 
938 int dev_alloc_name(struct net_device *dev, const char *name)
939 {
940 	char buf[IFNAMSIZ];
941 	struct net *net;
942 	int ret;
943 
944 	BUG_ON(!dev_net(dev));
945 	net = dev_net(dev);
946 	ret = __dev_alloc_name(net, name, buf);
947 	if (ret >= 0)
948 		strlcpy(dev->name, buf, IFNAMSIZ);
949 	return ret;
950 }
951 EXPORT_SYMBOL(dev_alloc_name);
952 
953 static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt)
954 {
955 	struct net *net;
956 
957 	BUG_ON(!dev_net(dev));
958 	net = dev_net(dev);
959 
960 	if (!dev_valid_name(name))
961 		return -EINVAL;
962 
963 	if (fmt && strchr(name, '%'))
964 		return dev_alloc_name(dev, name);
965 	else if (__dev_get_by_name(net, name))
966 		return -EEXIST;
967 	else if (dev->name != name)
968 		strlcpy(dev->name, name, IFNAMSIZ);
969 
970 	return 0;
971 }
972 
973 /**
974  *	dev_change_name - change name of a device
975  *	@dev: device
976  *	@newname: name (or format string) must be at least IFNAMSIZ
977  *
978  *	Change name of a device, can pass format strings "eth%d".
979  *	for wildcarding.
980  */
981 int dev_change_name(struct net_device *dev, const char *newname)
982 {
983 	char oldname[IFNAMSIZ];
984 	int err = 0;
985 	int ret;
986 	struct net *net;
987 
988 	ASSERT_RTNL();
989 	BUG_ON(!dev_net(dev));
990 
991 	net = dev_net(dev);
992 	if (dev->flags & IFF_UP)
993 		return -EBUSY;
994 
995 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
996 		return 0;
997 
998 	memcpy(oldname, dev->name, IFNAMSIZ);
999 
1000 	err = dev_get_valid_name(dev, newname, 1);
1001 	if (err < 0)
1002 		return err;
1003 
1004 rollback:
1005 	ret = device_rename(&dev->dev, dev->name);
1006 	if (ret) {
1007 		memcpy(dev->name, oldname, IFNAMSIZ);
1008 		return ret;
1009 	}
1010 
1011 	write_lock_bh(&dev_base_lock);
1012 	hlist_del(&dev->name_hlist);
1013 	write_unlock_bh(&dev_base_lock);
1014 
1015 	synchronize_rcu();
1016 
1017 	write_lock_bh(&dev_base_lock);
1018 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1019 	write_unlock_bh(&dev_base_lock);
1020 
1021 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1022 	ret = notifier_to_errno(ret);
1023 
1024 	if (ret) {
1025 		/* err >= 0 after dev_alloc_name() or stores the first errno */
1026 		if (err >= 0) {
1027 			err = ret;
1028 			memcpy(dev->name, oldname, IFNAMSIZ);
1029 			goto rollback;
1030 		} else {
1031 			printk(KERN_ERR
1032 			       "%s: name change rollback failed: %d.\n",
1033 			       dev->name, ret);
1034 		}
1035 	}
1036 
1037 	return err;
1038 }
1039 
1040 /**
1041  *	dev_set_alias - change ifalias of a device
1042  *	@dev: device
1043  *	@alias: name up to IFALIASZ
1044  *	@len: limit of bytes to copy from info
1045  *
1046  *	Set ifalias for a device,
1047  */
1048 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1049 {
1050 	ASSERT_RTNL();
1051 
1052 	if (len >= IFALIASZ)
1053 		return -EINVAL;
1054 
1055 	if (!len) {
1056 		if (dev->ifalias) {
1057 			kfree(dev->ifalias);
1058 			dev->ifalias = NULL;
1059 		}
1060 		return 0;
1061 	}
1062 
1063 	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1064 	if (!dev->ifalias)
1065 		return -ENOMEM;
1066 
1067 	strlcpy(dev->ifalias, alias, len+1);
1068 	return len;
1069 }
1070 
1071 
1072 /**
1073  *	netdev_features_change - device changes features
1074  *	@dev: device to cause notification
1075  *
1076  *	Called to indicate a device has changed features.
1077  */
1078 void netdev_features_change(struct net_device *dev)
1079 {
1080 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1081 }
1082 EXPORT_SYMBOL(netdev_features_change);
1083 
1084 /**
1085  *	netdev_state_change - device changes state
1086  *	@dev: device to cause notification
1087  *
1088  *	Called to indicate a device has changed state. This function calls
1089  *	the notifier chains for netdev_chain and sends a NEWLINK message
1090  *	to the routing socket.
1091  */
1092 void netdev_state_change(struct net_device *dev)
1093 {
1094 	if (dev->flags & IFF_UP) {
1095 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1096 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1097 	}
1098 }
1099 EXPORT_SYMBOL(netdev_state_change);
1100 
1101 int netdev_bonding_change(struct net_device *dev, unsigned long event)
1102 {
1103 	return call_netdevice_notifiers(event, dev);
1104 }
1105 EXPORT_SYMBOL(netdev_bonding_change);
1106 
1107 /**
1108  *	dev_load 	- load a network module
1109  *	@net: the applicable net namespace
1110  *	@name: name of interface
1111  *
1112  *	If a network interface is not present and the process has suitable
1113  *	privileges this function loads the module. If module loading is not
1114  *	available in this kernel then it becomes a nop.
1115  */
1116 
1117 void dev_load(struct net *net, const char *name)
1118 {
1119 	struct net_device *dev;
1120 
1121 	rcu_read_lock();
1122 	dev = dev_get_by_name_rcu(net, name);
1123 	rcu_read_unlock();
1124 
1125 	if (!dev && capable(CAP_NET_ADMIN))
1126 		request_module("%s", name);
1127 }
1128 EXPORT_SYMBOL(dev_load);
1129 
1130 static int __dev_open(struct net_device *dev)
1131 {
1132 	const struct net_device_ops *ops = dev->netdev_ops;
1133 	int ret;
1134 
1135 	ASSERT_RTNL();
1136 
1137 	/*
1138 	 *	Is it even present?
1139 	 */
1140 	if (!netif_device_present(dev))
1141 		return -ENODEV;
1142 
1143 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1144 	ret = notifier_to_errno(ret);
1145 	if (ret)
1146 		return ret;
1147 
1148 	/*
1149 	 *	Call device private open method
1150 	 */
1151 	set_bit(__LINK_STATE_START, &dev->state);
1152 
1153 	if (ops->ndo_validate_addr)
1154 		ret = ops->ndo_validate_addr(dev);
1155 
1156 	if (!ret && ops->ndo_open)
1157 		ret = ops->ndo_open(dev);
1158 
1159 	/*
1160 	 *	If it went open OK then:
1161 	 */
1162 
1163 	if (ret)
1164 		clear_bit(__LINK_STATE_START, &dev->state);
1165 	else {
1166 		/*
1167 		 *	Set the flags.
1168 		 */
1169 		dev->flags |= IFF_UP;
1170 
1171 		/*
1172 		 *	Enable NET_DMA
1173 		 */
1174 		net_dmaengine_get();
1175 
1176 		/*
1177 		 *	Initialize multicasting status
1178 		 */
1179 		dev_set_rx_mode(dev);
1180 
1181 		/*
1182 		 *	Wakeup transmit queue engine
1183 		 */
1184 		dev_activate(dev);
1185 	}
1186 
1187 	return ret;
1188 }
1189 
1190 /**
1191  *	dev_open	- prepare an interface for use.
1192  *	@dev:	device to open
1193  *
1194  *	Takes a device from down to up state. The device's private open
1195  *	function is invoked and then the multicast lists are loaded. Finally
1196  *	the device is moved into the up state and a %NETDEV_UP message is
1197  *	sent to the netdev notifier chain.
1198  *
1199  *	Calling this function on an active interface is a nop. On a failure
1200  *	a negative errno code is returned.
1201  */
1202 int dev_open(struct net_device *dev)
1203 {
1204 	int ret;
1205 
1206 	/*
1207 	 *	Is it already up?
1208 	 */
1209 	if (dev->flags & IFF_UP)
1210 		return 0;
1211 
1212 	/*
1213 	 *	Open device
1214 	 */
1215 	ret = __dev_open(dev);
1216 	if (ret < 0)
1217 		return ret;
1218 
1219 	/*
1220 	 *	... and announce new interface.
1221 	 */
1222 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1223 	call_netdevice_notifiers(NETDEV_UP, dev);
1224 
1225 	return ret;
1226 }
1227 EXPORT_SYMBOL(dev_open);
1228 
1229 static int __dev_close(struct net_device *dev)
1230 {
1231 	const struct net_device_ops *ops = dev->netdev_ops;
1232 
1233 	ASSERT_RTNL();
1234 	might_sleep();
1235 
1236 	/*
1237 	 *	Tell people we are going down, so that they can
1238 	 *	prepare to death, when device is still operating.
1239 	 */
1240 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1241 
1242 	clear_bit(__LINK_STATE_START, &dev->state);
1243 
1244 	/* Synchronize to scheduled poll. We cannot touch poll list,
1245 	 * it can be even on different cpu. So just clear netif_running().
1246 	 *
1247 	 * dev->stop() will invoke napi_disable() on all of it's
1248 	 * napi_struct instances on this device.
1249 	 */
1250 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1251 
1252 	dev_deactivate(dev);
1253 
1254 	/*
1255 	 *	Call the device specific close. This cannot fail.
1256 	 *	Only if device is UP
1257 	 *
1258 	 *	We allow it to be called even after a DETACH hot-plug
1259 	 *	event.
1260 	 */
1261 	if (ops->ndo_stop)
1262 		ops->ndo_stop(dev);
1263 
1264 	/*
1265 	 *	Device is now down.
1266 	 */
1267 
1268 	dev->flags &= ~IFF_UP;
1269 
1270 	/*
1271 	 *	Shutdown NET_DMA
1272 	 */
1273 	net_dmaengine_put();
1274 
1275 	return 0;
1276 }
1277 
1278 /**
1279  *	dev_close - shutdown an interface.
1280  *	@dev: device to shutdown
1281  *
1282  *	This function moves an active device into down state. A
1283  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1284  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1285  *	chain.
1286  */
1287 int dev_close(struct net_device *dev)
1288 {
1289 	if (!(dev->flags & IFF_UP))
1290 		return 0;
1291 
1292 	__dev_close(dev);
1293 
1294 	/*
1295 	 * Tell people we are down
1296 	 */
1297 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1298 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1299 
1300 	return 0;
1301 }
1302 EXPORT_SYMBOL(dev_close);
1303 
1304 
1305 /**
1306  *	dev_disable_lro - disable Large Receive Offload on a device
1307  *	@dev: device
1308  *
1309  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1310  *	called under RTNL.  This is needed if received packets may be
1311  *	forwarded to another interface.
1312  */
1313 void dev_disable_lro(struct net_device *dev)
1314 {
1315 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1316 	    dev->ethtool_ops->set_flags) {
1317 		u32 flags = dev->ethtool_ops->get_flags(dev);
1318 		if (flags & ETH_FLAG_LRO) {
1319 			flags &= ~ETH_FLAG_LRO;
1320 			dev->ethtool_ops->set_flags(dev, flags);
1321 		}
1322 	}
1323 	WARN_ON(dev->features & NETIF_F_LRO);
1324 }
1325 EXPORT_SYMBOL(dev_disable_lro);
1326 
1327 
1328 static int dev_boot_phase = 1;
1329 
1330 /*
1331  *	Device change register/unregister. These are not inline or static
1332  *	as we export them to the world.
1333  */
1334 
1335 /**
1336  *	register_netdevice_notifier - register a network notifier block
1337  *	@nb: notifier
1338  *
1339  *	Register a notifier to be called when network device events occur.
1340  *	The notifier passed is linked into the kernel structures and must
1341  *	not be reused until it has been unregistered. A negative errno code
1342  *	is returned on a failure.
1343  *
1344  * 	When registered all registration and up events are replayed
1345  *	to the new notifier to allow device to have a race free
1346  *	view of the network device list.
1347  */
1348 
1349 int register_netdevice_notifier(struct notifier_block *nb)
1350 {
1351 	struct net_device *dev;
1352 	struct net_device *last;
1353 	struct net *net;
1354 	int err;
1355 
1356 	rtnl_lock();
1357 	err = raw_notifier_chain_register(&netdev_chain, nb);
1358 	if (err)
1359 		goto unlock;
1360 	if (dev_boot_phase)
1361 		goto unlock;
1362 	for_each_net(net) {
1363 		for_each_netdev(net, dev) {
1364 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1365 			err = notifier_to_errno(err);
1366 			if (err)
1367 				goto rollback;
1368 
1369 			if (!(dev->flags & IFF_UP))
1370 				continue;
1371 
1372 			nb->notifier_call(nb, NETDEV_UP, dev);
1373 		}
1374 	}
1375 
1376 unlock:
1377 	rtnl_unlock();
1378 	return err;
1379 
1380 rollback:
1381 	last = dev;
1382 	for_each_net(net) {
1383 		for_each_netdev(net, dev) {
1384 			if (dev == last)
1385 				break;
1386 
1387 			if (dev->flags & IFF_UP) {
1388 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1389 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1390 			}
1391 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1392 			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1393 		}
1394 	}
1395 
1396 	raw_notifier_chain_unregister(&netdev_chain, nb);
1397 	goto unlock;
1398 }
1399 EXPORT_SYMBOL(register_netdevice_notifier);
1400 
1401 /**
1402  *	unregister_netdevice_notifier - unregister a network notifier block
1403  *	@nb: notifier
1404  *
1405  *	Unregister a notifier previously registered by
1406  *	register_netdevice_notifier(). The notifier is unlinked into the
1407  *	kernel structures and may then be reused. A negative errno code
1408  *	is returned on a failure.
1409  */
1410 
1411 int unregister_netdevice_notifier(struct notifier_block *nb)
1412 {
1413 	int err;
1414 
1415 	rtnl_lock();
1416 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1417 	rtnl_unlock();
1418 	return err;
1419 }
1420 EXPORT_SYMBOL(unregister_netdevice_notifier);
1421 
1422 /**
1423  *	call_netdevice_notifiers - call all network notifier blocks
1424  *      @val: value passed unmodified to notifier function
1425  *      @dev: net_device pointer passed unmodified to notifier function
1426  *
1427  *	Call all network notifier blocks.  Parameters and return value
1428  *	are as for raw_notifier_call_chain().
1429  */
1430 
1431 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1432 {
1433 	ASSERT_RTNL();
1434 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1435 }
1436 
1437 /* When > 0 there are consumers of rx skb time stamps */
1438 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1439 
1440 void net_enable_timestamp(void)
1441 {
1442 	atomic_inc(&netstamp_needed);
1443 }
1444 EXPORT_SYMBOL(net_enable_timestamp);
1445 
1446 void net_disable_timestamp(void)
1447 {
1448 	atomic_dec(&netstamp_needed);
1449 }
1450 EXPORT_SYMBOL(net_disable_timestamp);
1451 
1452 static inline void net_timestamp_set(struct sk_buff *skb)
1453 {
1454 	if (atomic_read(&netstamp_needed))
1455 		__net_timestamp(skb);
1456 	else
1457 		skb->tstamp.tv64 = 0;
1458 }
1459 
1460 static inline void net_timestamp_check(struct sk_buff *skb)
1461 {
1462 	if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
1463 		__net_timestamp(skb);
1464 }
1465 
1466 /**
1467  * dev_forward_skb - loopback an skb to another netif
1468  *
1469  * @dev: destination network device
1470  * @skb: buffer to forward
1471  *
1472  * return values:
1473  *	NET_RX_SUCCESS	(no congestion)
1474  *	NET_RX_DROP     (packet was dropped, but freed)
1475  *
1476  * dev_forward_skb can be used for injecting an skb from the
1477  * start_xmit function of one device into the receive queue
1478  * of another device.
1479  *
1480  * The receiving device may be in another namespace, so
1481  * we have to clear all information in the skb that could
1482  * impact namespace isolation.
1483  */
1484 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1485 {
1486 	skb_orphan(skb);
1487 
1488 	if (!(dev->flags & IFF_UP) ||
1489 	    (skb->len > (dev->mtu + dev->hard_header_len))) {
1490 		kfree_skb(skb);
1491 		return NET_RX_DROP;
1492 	}
1493 	skb_set_dev(skb, dev);
1494 	skb->tstamp.tv64 = 0;
1495 	skb->pkt_type = PACKET_HOST;
1496 	skb->protocol = eth_type_trans(skb, dev);
1497 	return netif_rx(skb);
1498 }
1499 EXPORT_SYMBOL_GPL(dev_forward_skb);
1500 
1501 /*
1502  *	Support routine. Sends outgoing frames to any network
1503  *	taps currently in use.
1504  */
1505 
1506 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1507 {
1508 	struct packet_type *ptype;
1509 
1510 #ifdef CONFIG_NET_CLS_ACT
1511 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1512 		net_timestamp_set(skb);
1513 #else
1514 	net_timestamp_set(skb);
1515 #endif
1516 
1517 	rcu_read_lock();
1518 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1519 		/* Never send packets back to the socket
1520 		 * they originated from - MvS ([email protected])
1521 		 */
1522 		if ((ptype->dev == dev || !ptype->dev) &&
1523 		    (ptype->af_packet_priv == NULL ||
1524 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1525 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1526 			if (!skb2)
1527 				break;
1528 
1529 			/* skb->nh should be correctly
1530 			   set by sender, so that the second statement is
1531 			   just protection against buggy protocols.
1532 			 */
1533 			skb_reset_mac_header(skb2);
1534 
1535 			if (skb_network_header(skb2) < skb2->data ||
1536 			    skb2->network_header > skb2->tail) {
1537 				if (net_ratelimit())
1538 					printk(KERN_CRIT "protocol %04x is "
1539 					       "buggy, dev %s\n",
1540 					       skb2->protocol, dev->name);
1541 				skb_reset_network_header(skb2);
1542 			}
1543 
1544 			skb2->transport_header = skb2->network_header;
1545 			skb2->pkt_type = PACKET_OUTGOING;
1546 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1547 		}
1548 	}
1549 	rcu_read_unlock();
1550 }
1551 
1552 
1553 static inline void __netif_reschedule(struct Qdisc *q)
1554 {
1555 	struct softnet_data *sd;
1556 	unsigned long flags;
1557 
1558 	local_irq_save(flags);
1559 	sd = &__get_cpu_var(softnet_data);
1560 	q->next_sched = NULL;
1561 	*sd->output_queue_tailp = q;
1562 	sd->output_queue_tailp = &q->next_sched;
1563 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1564 	local_irq_restore(flags);
1565 }
1566 
1567 void __netif_schedule(struct Qdisc *q)
1568 {
1569 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1570 		__netif_reschedule(q);
1571 }
1572 EXPORT_SYMBOL(__netif_schedule);
1573 
1574 void dev_kfree_skb_irq(struct sk_buff *skb)
1575 {
1576 	if (!skb->destructor)
1577 		dev_kfree_skb(skb);
1578 	else if (atomic_dec_and_test(&skb->users)) {
1579 		struct softnet_data *sd;
1580 		unsigned long flags;
1581 
1582 		local_irq_save(flags);
1583 		sd = &__get_cpu_var(softnet_data);
1584 		skb->next = sd->completion_queue;
1585 		sd->completion_queue = skb;
1586 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1587 		local_irq_restore(flags);
1588 	}
1589 }
1590 EXPORT_SYMBOL(dev_kfree_skb_irq);
1591 
1592 void dev_kfree_skb_any(struct sk_buff *skb)
1593 {
1594 	if (in_irq() || irqs_disabled())
1595 		dev_kfree_skb_irq(skb);
1596 	else
1597 		dev_kfree_skb(skb);
1598 }
1599 EXPORT_SYMBOL(dev_kfree_skb_any);
1600 
1601 
1602 /**
1603  * netif_device_detach - mark device as removed
1604  * @dev: network device
1605  *
1606  * Mark device as removed from system and therefore no longer available.
1607  */
1608 void netif_device_detach(struct net_device *dev)
1609 {
1610 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1611 	    netif_running(dev)) {
1612 		netif_tx_stop_all_queues(dev);
1613 	}
1614 }
1615 EXPORT_SYMBOL(netif_device_detach);
1616 
1617 /**
1618  * netif_device_attach - mark device as attached
1619  * @dev: network device
1620  *
1621  * Mark device as attached from system and restart if needed.
1622  */
1623 void netif_device_attach(struct net_device *dev)
1624 {
1625 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1626 	    netif_running(dev)) {
1627 		netif_tx_wake_all_queues(dev);
1628 		__netdev_watchdog_up(dev);
1629 	}
1630 }
1631 EXPORT_SYMBOL(netif_device_attach);
1632 
1633 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1634 {
1635 	return ((features & NETIF_F_GEN_CSUM) ||
1636 		((features & NETIF_F_IP_CSUM) &&
1637 		 protocol == htons(ETH_P_IP)) ||
1638 		((features & NETIF_F_IPV6_CSUM) &&
1639 		 protocol == htons(ETH_P_IPV6)) ||
1640 		((features & NETIF_F_FCOE_CRC) &&
1641 		 protocol == htons(ETH_P_FCOE)));
1642 }
1643 
1644 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1645 {
1646 	if (can_checksum_protocol(dev->features, skb->protocol))
1647 		return true;
1648 
1649 	if (skb->protocol == htons(ETH_P_8021Q)) {
1650 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1651 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1652 					  veh->h_vlan_encapsulated_proto))
1653 			return true;
1654 	}
1655 
1656 	return false;
1657 }
1658 
1659 /**
1660  * skb_dev_set -- assign a new device to a buffer
1661  * @skb: buffer for the new device
1662  * @dev: network device
1663  *
1664  * If an skb is owned by a device already, we have to reset
1665  * all data private to the namespace a device belongs to
1666  * before assigning it a new device.
1667  */
1668 #ifdef CONFIG_NET_NS
1669 void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1670 {
1671 	skb_dst_drop(skb);
1672 	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1673 		secpath_reset(skb);
1674 		nf_reset(skb);
1675 		skb_init_secmark(skb);
1676 		skb->mark = 0;
1677 		skb->priority = 0;
1678 		skb->nf_trace = 0;
1679 		skb->ipvs_property = 0;
1680 #ifdef CONFIG_NET_SCHED
1681 		skb->tc_index = 0;
1682 #endif
1683 	}
1684 	skb->dev = dev;
1685 }
1686 EXPORT_SYMBOL(skb_set_dev);
1687 #endif /* CONFIG_NET_NS */
1688 
1689 /*
1690  * Invalidate hardware checksum when packet is to be mangled, and
1691  * complete checksum manually on outgoing path.
1692  */
1693 int skb_checksum_help(struct sk_buff *skb)
1694 {
1695 	__wsum csum;
1696 	int ret = 0, offset;
1697 
1698 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1699 		goto out_set_summed;
1700 
1701 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1702 		/* Let GSO fix up the checksum. */
1703 		goto out_set_summed;
1704 	}
1705 
1706 	offset = skb->csum_start - skb_headroom(skb);
1707 	BUG_ON(offset >= skb_headlen(skb));
1708 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1709 
1710 	offset += skb->csum_offset;
1711 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1712 
1713 	if (skb_cloned(skb) &&
1714 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1715 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1716 		if (ret)
1717 			goto out;
1718 	}
1719 
1720 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1721 out_set_summed:
1722 	skb->ip_summed = CHECKSUM_NONE;
1723 out:
1724 	return ret;
1725 }
1726 EXPORT_SYMBOL(skb_checksum_help);
1727 
1728 /**
1729  *	skb_gso_segment - Perform segmentation on skb.
1730  *	@skb: buffer to segment
1731  *	@features: features for the output path (see dev->features)
1732  *
1733  *	This function segments the given skb and returns a list of segments.
1734  *
1735  *	It may return NULL if the skb requires no segmentation.  This is
1736  *	only possible when GSO is used for verifying header integrity.
1737  */
1738 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1739 {
1740 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1741 	struct packet_type *ptype;
1742 	__be16 type = skb->protocol;
1743 	int err;
1744 
1745 	skb_reset_mac_header(skb);
1746 	skb->mac_len = skb->network_header - skb->mac_header;
1747 	__skb_pull(skb, skb->mac_len);
1748 
1749 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1750 		struct net_device *dev = skb->dev;
1751 		struct ethtool_drvinfo info = {};
1752 
1753 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1754 			dev->ethtool_ops->get_drvinfo(dev, &info);
1755 
1756 		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1757 			"ip_summed=%d",
1758 		     info.driver, dev ? dev->features : 0L,
1759 		     skb->sk ? skb->sk->sk_route_caps : 0L,
1760 		     skb->len, skb->data_len, skb->ip_summed);
1761 
1762 		if (skb_header_cloned(skb) &&
1763 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1764 			return ERR_PTR(err);
1765 	}
1766 
1767 	rcu_read_lock();
1768 	list_for_each_entry_rcu(ptype,
1769 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1770 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1771 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1772 				err = ptype->gso_send_check(skb);
1773 				segs = ERR_PTR(err);
1774 				if (err || skb_gso_ok(skb, features))
1775 					break;
1776 				__skb_push(skb, (skb->data -
1777 						 skb_network_header(skb)));
1778 			}
1779 			segs = ptype->gso_segment(skb, features);
1780 			break;
1781 		}
1782 	}
1783 	rcu_read_unlock();
1784 
1785 	__skb_push(skb, skb->data - skb_mac_header(skb));
1786 
1787 	return segs;
1788 }
1789 EXPORT_SYMBOL(skb_gso_segment);
1790 
1791 /* Take action when hardware reception checksum errors are detected. */
1792 #ifdef CONFIG_BUG
1793 void netdev_rx_csum_fault(struct net_device *dev)
1794 {
1795 	if (net_ratelimit()) {
1796 		printk(KERN_ERR "%s: hw csum failure.\n",
1797 			dev ? dev->name : "<unknown>");
1798 		dump_stack();
1799 	}
1800 }
1801 EXPORT_SYMBOL(netdev_rx_csum_fault);
1802 #endif
1803 
1804 /* Actually, we should eliminate this check as soon as we know, that:
1805  * 1. IOMMU is present and allows to map all the memory.
1806  * 2. No high memory really exists on this machine.
1807  */
1808 
1809 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1810 {
1811 #ifdef CONFIG_HIGHMEM
1812 	int i;
1813 	if (!(dev->features & NETIF_F_HIGHDMA)) {
1814 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1815 			if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1816 				return 1;
1817 	}
1818 
1819 	if (PCI_DMA_BUS_IS_PHYS) {
1820 		struct device *pdev = dev->dev.parent;
1821 
1822 		if (!pdev)
1823 			return 0;
1824 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1825 			dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
1826 			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
1827 				return 1;
1828 		}
1829 	}
1830 #endif
1831 	return 0;
1832 }
1833 
1834 struct dev_gso_cb {
1835 	void (*destructor)(struct sk_buff *skb);
1836 };
1837 
1838 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1839 
1840 static void dev_gso_skb_destructor(struct sk_buff *skb)
1841 {
1842 	struct dev_gso_cb *cb;
1843 
1844 	do {
1845 		struct sk_buff *nskb = skb->next;
1846 
1847 		skb->next = nskb->next;
1848 		nskb->next = NULL;
1849 		kfree_skb(nskb);
1850 	} while (skb->next);
1851 
1852 	cb = DEV_GSO_CB(skb);
1853 	if (cb->destructor)
1854 		cb->destructor(skb);
1855 }
1856 
1857 /**
1858  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1859  *	@skb: buffer to segment
1860  *
1861  *	This function segments the given skb and stores the list of segments
1862  *	in skb->next.
1863  */
1864 static int dev_gso_segment(struct sk_buff *skb)
1865 {
1866 	struct net_device *dev = skb->dev;
1867 	struct sk_buff *segs;
1868 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1869 					 NETIF_F_SG : 0);
1870 
1871 	segs = skb_gso_segment(skb, features);
1872 
1873 	/* Verifying header integrity only. */
1874 	if (!segs)
1875 		return 0;
1876 
1877 	if (IS_ERR(segs))
1878 		return PTR_ERR(segs);
1879 
1880 	skb->next = segs;
1881 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1882 	skb->destructor = dev_gso_skb_destructor;
1883 
1884 	return 0;
1885 }
1886 
1887 /*
1888  * Try to orphan skb early, right before transmission by the device.
1889  * We cannot orphan skb if tx timestamp is requested, since
1890  * drivers need to call skb_tstamp_tx() to send the timestamp.
1891  */
1892 static inline void skb_orphan_try(struct sk_buff *skb)
1893 {
1894 	if (!skb_tx(skb)->flags)
1895 		skb_orphan(skb);
1896 }
1897 
1898 /*
1899  * Returns true if either:
1900  *	1. skb has frag_list and the device doesn't support FRAGLIST, or
1901  *	2. skb is fragmented and the device does not support SG, or if
1902  *	   at least one of fragments is in highmem and device does not
1903  *	   support DMA from it.
1904  */
1905 static inline int skb_needs_linearize(struct sk_buff *skb,
1906 				      struct net_device *dev)
1907 {
1908 	return skb_is_nonlinear(skb) &&
1909 	       ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
1910 	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
1911 					      illegal_highdma(dev, skb))));
1912 }
1913 
1914 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1915 			struct netdev_queue *txq)
1916 {
1917 	const struct net_device_ops *ops = dev->netdev_ops;
1918 	int rc = NETDEV_TX_OK;
1919 
1920 	if (likely(!skb->next)) {
1921 		if (!list_empty(&ptype_all))
1922 			dev_queue_xmit_nit(skb, dev);
1923 
1924 		/*
1925 		 * If device doesnt need skb->dst, release it right now while
1926 		 * its hot in this cpu cache
1927 		 */
1928 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1929 			skb_dst_drop(skb);
1930 
1931 		skb_orphan_try(skb);
1932 
1933 		if (netif_needs_gso(dev, skb)) {
1934 			if (unlikely(dev_gso_segment(skb)))
1935 				goto out_kfree_skb;
1936 			if (skb->next)
1937 				goto gso;
1938 		} else {
1939 			if (skb_needs_linearize(skb, dev) &&
1940 			    __skb_linearize(skb))
1941 				goto out_kfree_skb;
1942 
1943 			/* If packet is not checksummed and device does not
1944 			 * support checksumming for this protocol, complete
1945 			 * checksumming here.
1946 			 */
1947 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
1948 				skb_set_transport_header(skb, skb->csum_start -
1949 					      skb_headroom(skb));
1950 				if (!dev_can_checksum(dev, skb) &&
1951 				     skb_checksum_help(skb))
1952 					goto out_kfree_skb;
1953 			}
1954 		}
1955 
1956 		rc = ops->ndo_start_xmit(skb, dev);
1957 		if (rc == NETDEV_TX_OK)
1958 			txq_trans_update(txq);
1959 		return rc;
1960 	}
1961 
1962 gso:
1963 	do {
1964 		struct sk_buff *nskb = skb->next;
1965 
1966 		skb->next = nskb->next;
1967 		nskb->next = NULL;
1968 
1969 		/*
1970 		 * If device doesnt need nskb->dst, release it right now while
1971 		 * its hot in this cpu cache
1972 		 */
1973 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1974 			skb_dst_drop(nskb);
1975 
1976 		rc = ops->ndo_start_xmit(nskb, dev);
1977 		if (unlikely(rc != NETDEV_TX_OK)) {
1978 			if (rc & ~NETDEV_TX_MASK)
1979 				goto out_kfree_gso_skb;
1980 			nskb->next = skb->next;
1981 			skb->next = nskb;
1982 			return rc;
1983 		}
1984 		txq_trans_update(txq);
1985 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1986 			return NETDEV_TX_BUSY;
1987 	} while (skb->next);
1988 
1989 out_kfree_gso_skb:
1990 	if (likely(skb->next == NULL))
1991 		skb->destructor = DEV_GSO_CB(skb)->destructor;
1992 out_kfree_skb:
1993 	kfree_skb(skb);
1994 	return rc;
1995 }
1996 
1997 static u32 hashrnd __read_mostly;
1998 
1999 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
2000 {
2001 	u32 hash;
2002 
2003 	if (skb_rx_queue_recorded(skb)) {
2004 		hash = skb_get_rx_queue(skb);
2005 		while (unlikely(hash >= dev->real_num_tx_queues))
2006 			hash -= dev->real_num_tx_queues;
2007 		return hash;
2008 	}
2009 
2010 	if (skb->sk && skb->sk->sk_hash)
2011 		hash = skb->sk->sk_hash;
2012 	else
2013 		hash = (__force u16) skb->protocol;
2014 
2015 	hash = jhash_1word(hash, hashrnd);
2016 
2017 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
2018 }
2019 EXPORT_SYMBOL(skb_tx_hash);
2020 
2021 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2022 {
2023 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
2024 		if (net_ratelimit()) {
2025 			pr_warning("%s selects TX queue %d, but "
2026 				"real number of TX queues is %d\n",
2027 				dev->name, queue_index, dev->real_num_tx_queues);
2028 		}
2029 		return 0;
2030 	}
2031 	return queue_index;
2032 }
2033 
2034 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2035 					struct sk_buff *skb)
2036 {
2037 	u16 queue_index;
2038 	struct sock *sk = skb->sk;
2039 
2040 	if (sk_tx_queue_recorded(sk)) {
2041 		queue_index = sk_tx_queue_get(sk);
2042 	} else {
2043 		const struct net_device_ops *ops = dev->netdev_ops;
2044 
2045 		if (ops->ndo_select_queue) {
2046 			queue_index = ops->ndo_select_queue(dev, skb);
2047 			queue_index = dev_cap_txqueue(dev, queue_index);
2048 		} else {
2049 			queue_index = 0;
2050 			if (dev->real_num_tx_queues > 1)
2051 				queue_index = skb_tx_hash(dev, skb);
2052 
2053 			if (sk) {
2054 				struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
2055 
2056 				if (dst && skb_dst(skb) == dst)
2057 					sk_tx_queue_set(sk, queue_index);
2058 			}
2059 		}
2060 	}
2061 
2062 	skb_set_queue_mapping(skb, queue_index);
2063 	return netdev_get_tx_queue(dev, queue_index);
2064 }
2065 
2066 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2067 				 struct net_device *dev,
2068 				 struct netdev_queue *txq)
2069 {
2070 	spinlock_t *root_lock = qdisc_lock(q);
2071 	bool contended = qdisc_is_running(q);
2072 	int rc;
2073 
2074 	/*
2075 	 * Heuristic to force contended enqueues to serialize on a
2076 	 * separate lock before trying to get qdisc main lock.
2077 	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2078 	 * and dequeue packets faster.
2079 	 */
2080 	if (unlikely(contended))
2081 		spin_lock(&q->busylock);
2082 
2083 	spin_lock(root_lock);
2084 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2085 		kfree_skb(skb);
2086 		rc = NET_XMIT_DROP;
2087 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2088 		   qdisc_run_begin(q)) {
2089 		/*
2090 		 * This is a work-conserving queue; there are no old skbs
2091 		 * waiting to be sent out; and the qdisc is not running -
2092 		 * xmit the skb directly.
2093 		 */
2094 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2095 			skb_dst_force(skb);
2096 		__qdisc_update_bstats(q, skb->len);
2097 		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2098 			if (unlikely(contended)) {
2099 				spin_unlock(&q->busylock);
2100 				contended = false;
2101 			}
2102 			__qdisc_run(q);
2103 		} else
2104 			qdisc_run_end(q);
2105 
2106 		rc = NET_XMIT_SUCCESS;
2107 	} else {
2108 		skb_dst_force(skb);
2109 		rc = qdisc_enqueue_root(skb, q);
2110 		if (qdisc_run_begin(q)) {
2111 			if (unlikely(contended)) {
2112 				spin_unlock(&q->busylock);
2113 				contended = false;
2114 			}
2115 			__qdisc_run(q);
2116 		}
2117 	}
2118 	spin_unlock(root_lock);
2119 	if (unlikely(contended))
2120 		spin_unlock(&q->busylock);
2121 	return rc;
2122 }
2123 
2124 /**
2125  *	dev_queue_xmit - transmit a buffer
2126  *	@skb: buffer to transmit
2127  *
2128  *	Queue a buffer for transmission to a network device. The caller must
2129  *	have set the device and priority and built the buffer before calling
2130  *	this function. The function can be called from an interrupt.
2131  *
2132  *	A negative errno code is returned on a failure. A success does not
2133  *	guarantee the frame will be transmitted as it may be dropped due
2134  *	to congestion or traffic shaping.
2135  *
2136  * -----------------------------------------------------------------------------------
2137  *      I notice this method can also return errors from the queue disciplines,
2138  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
2139  *      be positive.
2140  *
2141  *      Regardless of the return value, the skb is consumed, so it is currently
2142  *      difficult to retry a send to this method.  (You can bump the ref count
2143  *      before sending to hold a reference for retry if you are careful.)
2144  *
2145  *      When calling this method, interrupts MUST be enabled.  This is because
2146  *      the BH enable code must have IRQs enabled so that it will not deadlock.
2147  *          --BLG
2148  */
2149 int dev_queue_xmit(struct sk_buff *skb)
2150 {
2151 	struct net_device *dev = skb->dev;
2152 	struct netdev_queue *txq;
2153 	struct Qdisc *q;
2154 	int rc = -ENOMEM;
2155 
2156 	/* Disable soft irqs for various locks below. Also
2157 	 * stops preemption for RCU.
2158 	 */
2159 	rcu_read_lock_bh();
2160 
2161 	txq = dev_pick_tx(dev, skb);
2162 	q = rcu_dereference_bh(txq->qdisc);
2163 
2164 #ifdef CONFIG_NET_CLS_ACT
2165 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2166 #endif
2167 	if (q->enqueue) {
2168 		rc = __dev_xmit_skb(skb, q, dev, txq);
2169 		goto out;
2170 	}
2171 
2172 	/* The device has no queue. Common case for software devices:
2173 	   loopback, all the sorts of tunnels...
2174 
2175 	   Really, it is unlikely that netif_tx_lock protection is necessary
2176 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
2177 	   counters.)
2178 	   However, it is possible, that they rely on protection
2179 	   made by us here.
2180 
2181 	   Check this and shot the lock. It is not prone from deadlocks.
2182 	   Either shot noqueue qdisc, it is even simpler 8)
2183 	 */
2184 	if (dev->flags & IFF_UP) {
2185 		int cpu = smp_processor_id(); /* ok because BHs are off */
2186 
2187 		if (txq->xmit_lock_owner != cpu) {
2188 
2189 			HARD_TX_LOCK(dev, txq, cpu);
2190 
2191 			if (!netif_tx_queue_stopped(txq)) {
2192 				rc = dev_hard_start_xmit(skb, dev, txq);
2193 				if (dev_xmit_complete(rc)) {
2194 					HARD_TX_UNLOCK(dev, txq);
2195 					goto out;
2196 				}
2197 			}
2198 			HARD_TX_UNLOCK(dev, txq);
2199 			if (net_ratelimit())
2200 				printk(KERN_CRIT "Virtual device %s asks to "
2201 				       "queue packet!\n", dev->name);
2202 		} else {
2203 			/* Recursion is detected! It is possible,
2204 			 * unfortunately */
2205 			if (net_ratelimit())
2206 				printk(KERN_CRIT "Dead loop on virtual device "
2207 				       "%s, fix it urgently!\n", dev->name);
2208 		}
2209 	}
2210 
2211 	rc = -ENETDOWN;
2212 	rcu_read_unlock_bh();
2213 
2214 	kfree_skb(skb);
2215 	return rc;
2216 out:
2217 	rcu_read_unlock_bh();
2218 	return rc;
2219 }
2220 EXPORT_SYMBOL(dev_queue_xmit);
2221 
2222 
2223 /*=======================================================================
2224 			Receiver routines
2225   =======================================================================*/
2226 
2227 int netdev_max_backlog __read_mostly = 1000;
2228 int netdev_tstamp_prequeue __read_mostly = 1;
2229 int netdev_budget __read_mostly = 300;
2230 int weight_p __read_mostly = 64;            /* old backlog weight */
2231 
2232 /* Called with irq disabled */
2233 static inline void ____napi_schedule(struct softnet_data *sd,
2234 				     struct napi_struct *napi)
2235 {
2236 	list_add_tail(&napi->poll_list, &sd->poll_list);
2237 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2238 }
2239 
2240 #ifdef CONFIG_RPS
2241 
2242 /* One global table that all flow-based protocols share. */
2243 struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2244 EXPORT_SYMBOL(rps_sock_flow_table);
2245 
2246 /*
2247  * get_rps_cpu is called from netif_receive_skb and returns the target
2248  * CPU from the RPS map of the receiving queue for a given skb.
2249  * rcu_read_lock must be held on entry.
2250  */
2251 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2252 		       struct rps_dev_flow **rflowp)
2253 {
2254 	struct ipv6hdr *ip6;
2255 	struct iphdr *ip;
2256 	struct netdev_rx_queue *rxqueue;
2257 	struct rps_map *map;
2258 	struct rps_dev_flow_table *flow_table;
2259 	struct rps_sock_flow_table *sock_flow_table;
2260 	int cpu = -1;
2261 	u8 ip_proto;
2262 	u16 tcpu;
2263 	u32 addr1, addr2, ihl;
2264 	union {
2265 		u32 v32;
2266 		u16 v16[2];
2267 	} ports;
2268 
2269 	if (skb_rx_queue_recorded(skb)) {
2270 		u16 index = skb_get_rx_queue(skb);
2271 		if (unlikely(index >= dev->num_rx_queues)) {
2272 			WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2273 				"on queue %u, but number of RX queues is %u\n",
2274 				dev->name, index, dev->num_rx_queues);
2275 			goto done;
2276 		}
2277 		rxqueue = dev->_rx + index;
2278 	} else
2279 		rxqueue = dev->_rx;
2280 
2281 	if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2282 		goto done;
2283 
2284 	if (skb->rxhash)
2285 		goto got_hash; /* Skip hash computation on packet header */
2286 
2287 	switch (skb->protocol) {
2288 	case __constant_htons(ETH_P_IP):
2289 		if (!pskb_may_pull(skb, sizeof(*ip)))
2290 			goto done;
2291 
2292 		ip = (struct iphdr *) skb->data;
2293 		ip_proto = ip->protocol;
2294 		addr1 = (__force u32) ip->saddr;
2295 		addr2 = (__force u32) ip->daddr;
2296 		ihl = ip->ihl;
2297 		break;
2298 	case __constant_htons(ETH_P_IPV6):
2299 		if (!pskb_may_pull(skb, sizeof(*ip6)))
2300 			goto done;
2301 
2302 		ip6 = (struct ipv6hdr *) skb->data;
2303 		ip_proto = ip6->nexthdr;
2304 		addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2305 		addr2 = (__force u32) ip6->daddr.s6_addr32[3];
2306 		ihl = (40 >> 2);
2307 		break;
2308 	default:
2309 		goto done;
2310 	}
2311 	switch (ip_proto) {
2312 	case IPPROTO_TCP:
2313 	case IPPROTO_UDP:
2314 	case IPPROTO_DCCP:
2315 	case IPPROTO_ESP:
2316 	case IPPROTO_AH:
2317 	case IPPROTO_SCTP:
2318 	case IPPROTO_UDPLITE:
2319 		if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2320 			ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2321 			if (ports.v16[1] < ports.v16[0])
2322 				swap(ports.v16[0], ports.v16[1]);
2323 			break;
2324 		}
2325 	default:
2326 		ports.v32 = 0;
2327 		break;
2328 	}
2329 
2330 	/* get a consistent hash (same value on both flow directions) */
2331 	if (addr2 < addr1)
2332 		swap(addr1, addr2);
2333 	skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2334 	if (!skb->rxhash)
2335 		skb->rxhash = 1;
2336 
2337 got_hash:
2338 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
2339 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
2340 	if (flow_table && sock_flow_table) {
2341 		u16 next_cpu;
2342 		struct rps_dev_flow *rflow;
2343 
2344 		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2345 		tcpu = rflow->cpu;
2346 
2347 		next_cpu = sock_flow_table->ents[skb->rxhash &
2348 		    sock_flow_table->mask];
2349 
2350 		/*
2351 		 * If the desired CPU (where last recvmsg was done) is
2352 		 * different from current CPU (one in the rx-queue flow
2353 		 * table entry), switch if one of the following holds:
2354 		 *   - Current CPU is unset (equal to RPS_NO_CPU).
2355 		 *   - Current CPU is offline.
2356 		 *   - The current CPU's queue tail has advanced beyond the
2357 		 *     last packet that was enqueued using this table entry.
2358 		 *     This guarantees that all previous packets for the flow
2359 		 *     have been dequeued, thus preserving in order delivery.
2360 		 */
2361 		if (unlikely(tcpu != next_cpu) &&
2362 		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2363 		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2364 		      rflow->last_qtail)) >= 0)) {
2365 			tcpu = rflow->cpu = next_cpu;
2366 			if (tcpu != RPS_NO_CPU)
2367 				rflow->last_qtail = per_cpu(softnet_data,
2368 				    tcpu).input_queue_head;
2369 		}
2370 		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2371 			*rflowp = rflow;
2372 			cpu = tcpu;
2373 			goto done;
2374 		}
2375 	}
2376 
2377 	map = rcu_dereference(rxqueue->rps_map);
2378 	if (map) {
2379 		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2380 
2381 		if (cpu_online(tcpu)) {
2382 			cpu = tcpu;
2383 			goto done;
2384 		}
2385 	}
2386 
2387 done:
2388 	return cpu;
2389 }
2390 
2391 /* Called from hardirq (IPI) context */
2392 static void rps_trigger_softirq(void *data)
2393 {
2394 	struct softnet_data *sd = data;
2395 
2396 	____napi_schedule(sd, &sd->backlog);
2397 	sd->received_rps++;
2398 }
2399 
2400 #endif /* CONFIG_RPS */
2401 
2402 /*
2403  * Check if this softnet_data structure is another cpu one
2404  * If yes, queue it to our IPI list and return 1
2405  * If no, return 0
2406  */
2407 static int rps_ipi_queued(struct softnet_data *sd)
2408 {
2409 #ifdef CONFIG_RPS
2410 	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2411 
2412 	if (sd != mysd) {
2413 		sd->rps_ipi_next = mysd->rps_ipi_list;
2414 		mysd->rps_ipi_list = sd;
2415 
2416 		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2417 		return 1;
2418 	}
2419 #endif /* CONFIG_RPS */
2420 	return 0;
2421 }
2422 
2423 /*
2424  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2425  * queue (may be a remote CPU queue).
2426  */
2427 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2428 			      unsigned int *qtail)
2429 {
2430 	struct softnet_data *sd;
2431 	unsigned long flags;
2432 
2433 	sd = &per_cpu(softnet_data, cpu);
2434 
2435 	local_irq_save(flags);
2436 
2437 	rps_lock(sd);
2438 	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2439 		if (skb_queue_len(&sd->input_pkt_queue)) {
2440 enqueue:
2441 			__skb_queue_tail(&sd->input_pkt_queue, skb);
2442 			input_queue_tail_incr_save(sd, qtail);
2443 			rps_unlock(sd);
2444 			local_irq_restore(flags);
2445 			return NET_RX_SUCCESS;
2446 		}
2447 
2448 		/* Schedule NAPI for backlog device
2449 		 * We can use non atomic operation since we own the queue lock
2450 		 */
2451 		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2452 			if (!rps_ipi_queued(sd))
2453 				____napi_schedule(sd, &sd->backlog);
2454 		}
2455 		goto enqueue;
2456 	}
2457 
2458 	sd->dropped++;
2459 	rps_unlock(sd);
2460 
2461 	local_irq_restore(flags);
2462 
2463 	kfree_skb(skb);
2464 	return NET_RX_DROP;
2465 }
2466 
2467 /**
2468  *	netif_rx	-	post buffer to the network code
2469  *	@skb: buffer to post
2470  *
2471  *	This function receives a packet from a device driver and queues it for
2472  *	the upper (protocol) levels to process.  It always succeeds. The buffer
2473  *	may be dropped during processing for congestion control or by the
2474  *	protocol layers.
2475  *
2476  *	return values:
2477  *	NET_RX_SUCCESS	(no congestion)
2478  *	NET_RX_DROP     (packet was dropped)
2479  *
2480  */
2481 
2482 int netif_rx(struct sk_buff *skb)
2483 {
2484 	int ret;
2485 
2486 	/* if netpoll wants it, pretend we never saw it */
2487 	if (netpoll_rx(skb))
2488 		return NET_RX_DROP;
2489 
2490 	if (netdev_tstamp_prequeue)
2491 		net_timestamp_check(skb);
2492 
2493 #ifdef CONFIG_RPS
2494 	{
2495 		struct rps_dev_flow voidflow, *rflow = &voidflow;
2496 		int cpu;
2497 
2498 		rcu_read_lock();
2499 
2500 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
2501 		if (cpu < 0)
2502 			cpu = smp_processor_id();
2503 
2504 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2505 
2506 		rcu_read_unlock();
2507 	}
2508 #else
2509 	{
2510 		unsigned int qtail;
2511 		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2512 		put_cpu();
2513 	}
2514 #endif
2515 	return ret;
2516 }
2517 EXPORT_SYMBOL(netif_rx);
2518 
2519 int netif_rx_ni(struct sk_buff *skb)
2520 {
2521 	int err;
2522 
2523 	preempt_disable();
2524 	err = netif_rx(skb);
2525 	if (local_softirq_pending())
2526 		do_softirq();
2527 	preempt_enable();
2528 
2529 	return err;
2530 }
2531 EXPORT_SYMBOL(netif_rx_ni);
2532 
2533 static void net_tx_action(struct softirq_action *h)
2534 {
2535 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2536 
2537 	if (sd->completion_queue) {
2538 		struct sk_buff *clist;
2539 
2540 		local_irq_disable();
2541 		clist = sd->completion_queue;
2542 		sd->completion_queue = NULL;
2543 		local_irq_enable();
2544 
2545 		while (clist) {
2546 			struct sk_buff *skb = clist;
2547 			clist = clist->next;
2548 
2549 			WARN_ON(atomic_read(&skb->users));
2550 			__kfree_skb(skb);
2551 		}
2552 	}
2553 
2554 	if (sd->output_queue) {
2555 		struct Qdisc *head;
2556 
2557 		local_irq_disable();
2558 		head = sd->output_queue;
2559 		sd->output_queue = NULL;
2560 		sd->output_queue_tailp = &sd->output_queue;
2561 		local_irq_enable();
2562 
2563 		while (head) {
2564 			struct Qdisc *q = head;
2565 			spinlock_t *root_lock;
2566 
2567 			head = head->next_sched;
2568 
2569 			root_lock = qdisc_lock(q);
2570 			if (spin_trylock(root_lock)) {
2571 				smp_mb__before_clear_bit();
2572 				clear_bit(__QDISC_STATE_SCHED,
2573 					  &q->state);
2574 				qdisc_run(q);
2575 				spin_unlock(root_lock);
2576 			} else {
2577 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2578 					      &q->state)) {
2579 					__netif_reschedule(q);
2580 				} else {
2581 					smp_mb__before_clear_bit();
2582 					clear_bit(__QDISC_STATE_SCHED,
2583 						  &q->state);
2584 				}
2585 			}
2586 		}
2587 	}
2588 }
2589 
2590 static inline int deliver_skb(struct sk_buff *skb,
2591 			      struct packet_type *pt_prev,
2592 			      struct net_device *orig_dev)
2593 {
2594 	atomic_inc(&skb->users);
2595 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2596 }
2597 
2598 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
2599     (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
2600 /* This hook is defined here for ATM LANE */
2601 int (*br_fdb_test_addr_hook)(struct net_device *dev,
2602 			     unsigned char *addr) __read_mostly;
2603 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2604 #endif
2605 
2606 #ifdef CONFIG_NET_CLS_ACT
2607 /* TODO: Maybe we should just force sch_ingress to be compiled in
2608  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2609  * a compare and 2 stores extra right now if we dont have it on
2610  * but have CONFIG_NET_CLS_ACT
2611  * NOTE: This doesnt stop any functionality; if you dont have
2612  * the ingress scheduler, you just cant add policies on ingress.
2613  *
2614  */
2615 static int ing_filter(struct sk_buff *skb)
2616 {
2617 	struct net_device *dev = skb->dev;
2618 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2619 	struct netdev_queue *rxq;
2620 	int result = TC_ACT_OK;
2621 	struct Qdisc *q;
2622 
2623 	if (MAX_RED_LOOP < ttl++) {
2624 		printk(KERN_WARNING
2625 		       "Redir loop detected Dropping packet (%d->%d)\n",
2626 		       skb->skb_iif, dev->ifindex);
2627 		return TC_ACT_SHOT;
2628 	}
2629 
2630 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2631 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2632 
2633 	rxq = &dev->rx_queue;
2634 
2635 	q = rxq->qdisc;
2636 	if (q != &noop_qdisc) {
2637 		spin_lock(qdisc_lock(q));
2638 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2639 			result = qdisc_enqueue_root(skb, q);
2640 		spin_unlock(qdisc_lock(q));
2641 	}
2642 
2643 	return result;
2644 }
2645 
2646 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2647 					 struct packet_type **pt_prev,
2648 					 int *ret, struct net_device *orig_dev)
2649 {
2650 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2651 		goto out;
2652 
2653 	if (*pt_prev) {
2654 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2655 		*pt_prev = NULL;
2656 	}
2657 
2658 	switch (ing_filter(skb)) {
2659 	case TC_ACT_SHOT:
2660 	case TC_ACT_STOLEN:
2661 		kfree_skb(skb);
2662 		return NULL;
2663 	}
2664 
2665 out:
2666 	skb->tc_verd = 0;
2667 	return skb;
2668 }
2669 #endif
2670 
2671 /*
2672  * 	netif_nit_deliver - deliver received packets to network taps
2673  * 	@skb: buffer
2674  *
2675  * 	This function is used to deliver incoming packets to network
2676  * 	taps. It should be used when the normal netif_receive_skb path
2677  * 	is bypassed, for example because of VLAN acceleration.
2678  */
2679 void netif_nit_deliver(struct sk_buff *skb)
2680 {
2681 	struct packet_type *ptype;
2682 
2683 	if (list_empty(&ptype_all))
2684 		return;
2685 
2686 	skb_reset_network_header(skb);
2687 	skb_reset_transport_header(skb);
2688 	skb->mac_len = skb->network_header - skb->mac_header;
2689 
2690 	rcu_read_lock();
2691 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2692 		if (!ptype->dev || ptype->dev == skb->dev)
2693 			deliver_skb(skb, ptype, skb->dev);
2694 	}
2695 	rcu_read_unlock();
2696 }
2697 
2698 /**
2699  *	netdev_rx_handler_register - register receive handler
2700  *	@dev: device to register a handler for
2701  *	@rx_handler: receive handler to register
2702  *	@rx_handler_data: data pointer that is used by rx handler
2703  *
2704  *	Register a receive hander for a device. This handler will then be
2705  *	called from __netif_receive_skb. A negative errno code is returned
2706  *	on a failure.
2707  *
2708  *	The caller must hold the rtnl_mutex.
2709  */
2710 int netdev_rx_handler_register(struct net_device *dev,
2711 			       rx_handler_func_t *rx_handler,
2712 			       void *rx_handler_data)
2713 {
2714 	ASSERT_RTNL();
2715 
2716 	if (dev->rx_handler)
2717 		return -EBUSY;
2718 
2719 	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
2720 	rcu_assign_pointer(dev->rx_handler, rx_handler);
2721 
2722 	return 0;
2723 }
2724 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
2725 
2726 /**
2727  *	netdev_rx_handler_unregister - unregister receive handler
2728  *	@dev: device to unregister a handler from
2729  *
2730  *	Unregister a receive hander from a device.
2731  *
2732  *	The caller must hold the rtnl_mutex.
2733  */
2734 void netdev_rx_handler_unregister(struct net_device *dev)
2735 {
2736 
2737 	ASSERT_RTNL();
2738 	rcu_assign_pointer(dev->rx_handler, NULL);
2739 	rcu_assign_pointer(dev->rx_handler_data, NULL);
2740 }
2741 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
2742 
2743 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
2744 					      struct net_device *master)
2745 {
2746 	if (skb->pkt_type == PACKET_HOST) {
2747 		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
2748 
2749 		memcpy(dest, master->dev_addr, ETH_ALEN);
2750 	}
2751 }
2752 
2753 /* On bonding slaves other than the currently active slave, suppress
2754  * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
2755  * ARP on active-backup slaves with arp_validate enabled.
2756  */
2757 int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2758 {
2759 	struct net_device *dev = skb->dev;
2760 
2761 	if (master->priv_flags & IFF_MASTER_ARPMON)
2762 		dev->last_rx = jiffies;
2763 
2764 	if ((master->priv_flags & IFF_MASTER_ALB) &&
2765 	    (master->priv_flags & IFF_BRIDGE_PORT)) {
2766 		/* Do address unmangle. The local destination address
2767 		 * will be always the one master has. Provides the right
2768 		 * functionality in a bridge.
2769 		 */
2770 		skb_bond_set_mac_by_master(skb, master);
2771 	}
2772 
2773 	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
2774 		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
2775 		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
2776 			return 0;
2777 
2778 		if (master->priv_flags & IFF_MASTER_ALB) {
2779 			if (skb->pkt_type != PACKET_BROADCAST &&
2780 			    skb->pkt_type != PACKET_MULTICAST)
2781 				return 0;
2782 		}
2783 		if (master->priv_flags & IFF_MASTER_8023AD &&
2784 		    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
2785 			return 0;
2786 
2787 		return 1;
2788 	}
2789 	return 0;
2790 }
2791 EXPORT_SYMBOL(__skb_bond_should_drop);
2792 
2793 static int __netif_receive_skb(struct sk_buff *skb)
2794 {
2795 	struct packet_type *ptype, *pt_prev;
2796 	rx_handler_func_t *rx_handler;
2797 	struct net_device *orig_dev;
2798 	struct net_device *master;
2799 	struct net_device *null_or_orig;
2800 	struct net_device *orig_or_bond;
2801 	int ret = NET_RX_DROP;
2802 	__be16 type;
2803 
2804 	if (!netdev_tstamp_prequeue)
2805 		net_timestamp_check(skb);
2806 
2807 	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2808 		return NET_RX_SUCCESS;
2809 
2810 	/* if we've gotten here through NAPI, check netpoll */
2811 	if (netpoll_receive_skb(skb))
2812 		return NET_RX_DROP;
2813 
2814 	if (!skb->skb_iif)
2815 		skb->skb_iif = skb->dev->ifindex;
2816 
2817 	/*
2818 	 * bonding note: skbs received on inactive slaves should only
2819 	 * be delivered to pkt handlers that are exact matches.  Also
2820 	 * the deliver_no_wcard flag will be set.  If packet handlers
2821 	 * are sensitive to duplicate packets these skbs will need to
2822 	 * be dropped at the handler.  The vlan accel path may have
2823 	 * already set the deliver_no_wcard flag.
2824 	 */
2825 	null_or_orig = NULL;
2826 	orig_dev = skb->dev;
2827 	master = ACCESS_ONCE(orig_dev->master);
2828 	if (skb->deliver_no_wcard)
2829 		null_or_orig = orig_dev;
2830 	else if (master) {
2831 		if (skb_bond_should_drop(skb, master)) {
2832 			skb->deliver_no_wcard = 1;
2833 			null_or_orig = orig_dev; /* deliver only exact match */
2834 		} else
2835 			skb->dev = master;
2836 	}
2837 
2838 	__this_cpu_inc(softnet_data.processed);
2839 	skb_reset_network_header(skb);
2840 	skb_reset_transport_header(skb);
2841 	skb->mac_len = skb->network_header - skb->mac_header;
2842 
2843 	pt_prev = NULL;
2844 
2845 	rcu_read_lock();
2846 
2847 #ifdef CONFIG_NET_CLS_ACT
2848 	if (skb->tc_verd & TC_NCLS) {
2849 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2850 		goto ncls;
2851 	}
2852 #endif
2853 
2854 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2855 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2856 		    ptype->dev == orig_dev) {
2857 			if (pt_prev)
2858 				ret = deliver_skb(skb, pt_prev, orig_dev);
2859 			pt_prev = ptype;
2860 		}
2861 	}
2862 
2863 #ifdef CONFIG_NET_CLS_ACT
2864 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2865 	if (!skb)
2866 		goto out;
2867 ncls:
2868 #endif
2869 
2870 	/* Handle special case of bridge or macvlan */
2871 	rx_handler = rcu_dereference(skb->dev->rx_handler);
2872 	if (rx_handler) {
2873 		if (pt_prev) {
2874 			ret = deliver_skb(skb, pt_prev, orig_dev);
2875 			pt_prev = NULL;
2876 		}
2877 		skb = rx_handler(skb);
2878 		if (!skb)
2879 			goto out;
2880 	}
2881 
2882 	/*
2883 	 * Make sure frames received on VLAN interfaces stacked on
2884 	 * bonding interfaces still make their way to any base bonding
2885 	 * device that may have registered for a specific ptype.  The
2886 	 * handler may have to adjust skb->dev and orig_dev.
2887 	 */
2888 	orig_or_bond = orig_dev;
2889 	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2890 	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
2891 		orig_or_bond = vlan_dev_real_dev(skb->dev);
2892 	}
2893 
2894 	type = skb->protocol;
2895 	list_for_each_entry_rcu(ptype,
2896 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2897 		if (ptype->type == type && (ptype->dev == null_or_orig ||
2898 		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
2899 		     ptype->dev == orig_or_bond)) {
2900 			if (pt_prev)
2901 				ret = deliver_skb(skb, pt_prev, orig_dev);
2902 			pt_prev = ptype;
2903 		}
2904 	}
2905 
2906 	if (pt_prev) {
2907 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2908 	} else {
2909 		kfree_skb(skb);
2910 		/* Jamal, now you will not able to escape explaining
2911 		 * me how you were going to use this. :-)
2912 		 */
2913 		ret = NET_RX_DROP;
2914 	}
2915 
2916 out:
2917 	rcu_read_unlock();
2918 	return ret;
2919 }
2920 
2921 /**
2922  *	netif_receive_skb - process receive buffer from network
2923  *	@skb: buffer to process
2924  *
2925  *	netif_receive_skb() is the main receive data processing function.
2926  *	It always succeeds. The buffer may be dropped during processing
2927  *	for congestion control or by the protocol layers.
2928  *
2929  *	This function may only be called from softirq context and interrupts
2930  *	should be enabled.
2931  *
2932  *	Return values (usually ignored):
2933  *	NET_RX_SUCCESS: no congestion
2934  *	NET_RX_DROP: packet was dropped
2935  */
2936 int netif_receive_skb(struct sk_buff *skb)
2937 {
2938 	if (netdev_tstamp_prequeue)
2939 		net_timestamp_check(skb);
2940 
2941 #ifdef CONFIG_RPS
2942 	{
2943 		struct rps_dev_flow voidflow, *rflow = &voidflow;
2944 		int cpu, ret;
2945 
2946 		rcu_read_lock();
2947 
2948 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
2949 
2950 		if (cpu >= 0) {
2951 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2952 			rcu_read_unlock();
2953 		} else {
2954 			rcu_read_unlock();
2955 			ret = __netif_receive_skb(skb);
2956 		}
2957 
2958 		return ret;
2959 	}
2960 #else
2961 	return __netif_receive_skb(skb);
2962 #endif
2963 }
2964 EXPORT_SYMBOL(netif_receive_skb);
2965 
2966 /* Network device is going away, flush any packets still pending
2967  * Called with irqs disabled.
2968  */
2969 static void flush_backlog(void *arg)
2970 {
2971 	struct net_device *dev = arg;
2972 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2973 	struct sk_buff *skb, *tmp;
2974 
2975 	rps_lock(sd);
2976 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
2977 		if (skb->dev == dev) {
2978 			__skb_unlink(skb, &sd->input_pkt_queue);
2979 			kfree_skb(skb);
2980 			input_queue_head_incr(sd);
2981 		}
2982 	}
2983 	rps_unlock(sd);
2984 
2985 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
2986 		if (skb->dev == dev) {
2987 			__skb_unlink(skb, &sd->process_queue);
2988 			kfree_skb(skb);
2989 			input_queue_head_incr(sd);
2990 		}
2991 	}
2992 }
2993 
2994 static int napi_gro_complete(struct sk_buff *skb)
2995 {
2996 	struct packet_type *ptype;
2997 	__be16 type = skb->protocol;
2998 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2999 	int err = -ENOENT;
3000 
3001 	if (NAPI_GRO_CB(skb)->count == 1) {
3002 		skb_shinfo(skb)->gso_size = 0;
3003 		goto out;
3004 	}
3005 
3006 	rcu_read_lock();
3007 	list_for_each_entry_rcu(ptype, head, list) {
3008 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
3009 			continue;
3010 
3011 		err = ptype->gro_complete(skb);
3012 		break;
3013 	}
3014 	rcu_read_unlock();
3015 
3016 	if (err) {
3017 		WARN_ON(&ptype->list == head);
3018 		kfree_skb(skb);
3019 		return NET_RX_SUCCESS;
3020 	}
3021 
3022 out:
3023 	return netif_receive_skb(skb);
3024 }
3025 
3026 static void napi_gro_flush(struct napi_struct *napi)
3027 {
3028 	struct sk_buff *skb, *next;
3029 
3030 	for (skb = napi->gro_list; skb; skb = next) {
3031 		next = skb->next;
3032 		skb->next = NULL;
3033 		napi_gro_complete(skb);
3034 	}
3035 
3036 	napi->gro_count = 0;
3037 	napi->gro_list = NULL;
3038 }
3039 
3040 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3041 {
3042 	struct sk_buff **pp = NULL;
3043 	struct packet_type *ptype;
3044 	__be16 type = skb->protocol;
3045 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3046 	int same_flow;
3047 	int mac_len;
3048 	enum gro_result ret;
3049 
3050 	if (!(skb->dev->features & NETIF_F_GRO))
3051 		goto normal;
3052 
3053 	if (skb_is_gso(skb) || skb_has_frags(skb))
3054 		goto normal;
3055 
3056 	rcu_read_lock();
3057 	list_for_each_entry_rcu(ptype, head, list) {
3058 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
3059 			continue;
3060 
3061 		skb_set_network_header(skb, skb_gro_offset(skb));
3062 		mac_len = skb->network_header - skb->mac_header;
3063 		skb->mac_len = mac_len;
3064 		NAPI_GRO_CB(skb)->same_flow = 0;
3065 		NAPI_GRO_CB(skb)->flush = 0;
3066 		NAPI_GRO_CB(skb)->free = 0;
3067 
3068 		pp = ptype->gro_receive(&napi->gro_list, skb);
3069 		break;
3070 	}
3071 	rcu_read_unlock();
3072 
3073 	if (&ptype->list == head)
3074 		goto normal;
3075 
3076 	same_flow = NAPI_GRO_CB(skb)->same_flow;
3077 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3078 
3079 	if (pp) {
3080 		struct sk_buff *nskb = *pp;
3081 
3082 		*pp = nskb->next;
3083 		nskb->next = NULL;
3084 		napi_gro_complete(nskb);
3085 		napi->gro_count--;
3086 	}
3087 
3088 	if (same_flow)
3089 		goto ok;
3090 
3091 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3092 		goto normal;
3093 
3094 	napi->gro_count++;
3095 	NAPI_GRO_CB(skb)->count = 1;
3096 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3097 	skb->next = napi->gro_list;
3098 	napi->gro_list = skb;
3099 	ret = GRO_HELD;
3100 
3101 pull:
3102 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
3103 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
3104 
3105 		BUG_ON(skb->end - skb->tail < grow);
3106 
3107 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3108 
3109 		skb->tail += grow;
3110 		skb->data_len -= grow;
3111 
3112 		skb_shinfo(skb)->frags[0].page_offset += grow;
3113 		skb_shinfo(skb)->frags[0].size -= grow;
3114 
3115 		if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
3116 			put_page(skb_shinfo(skb)->frags[0].page);
3117 			memmove(skb_shinfo(skb)->frags,
3118 				skb_shinfo(skb)->frags + 1,
3119 				--skb_shinfo(skb)->nr_frags);
3120 		}
3121 	}
3122 
3123 ok:
3124 	return ret;
3125 
3126 normal:
3127 	ret = GRO_NORMAL;
3128 	goto pull;
3129 }
3130 EXPORT_SYMBOL(dev_gro_receive);
3131 
3132 static gro_result_t
3133 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3134 {
3135 	struct sk_buff *p;
3136 
3137 	if (netpoll_rx_on(skb))
3138 		return GRO_NORMAL;
3139 
3140 	for (p = napi->gro_list; p; p = p->next) {
3141 		NAPI_GRO_CB(p)->same_flow =
3142 			(p->dev == skb->dev) &&
3143 			!compare_ether_header(skb_mac_header(p),
3144 					      skb_gro_mac_header(skb));
3145 		NAPI_GRO_CB(p)->flush = 0;
3146 	}
3147 
3148 	return dev_gro_receive(napi, skb);
3149 }
3150 
3151 gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3152 {
3153 	switch (ret) {
3154 	case GRO_NORMAL:
3155 		if (netif_receive_skb(skb))
3156 			ret = GRO_DROP;
3157 		break;
3158 
3159 	case GRO_DROP:
3160 	case GRO_MERGED_FREE:
3161 		kfree_skb(skb);
3162 		break;
3163 
3164 	case GRO_HELD:
3165 	case GRO_MERGED:
3166 		break;
3167 	}
3168 
3169 	return ret;
3170 }
3171 EXPORT_SYMBOL(napi_skb_finish);
3172 
3173 void skb_gro_reset_offset(struct sk_buff *skb)
3174 {
3175 	NAPI_GRO_CB(skb)->data_offset = 0;
3176 	NAPI_GRO_CB(skb)->frag0 = NULL;
3177 	NAPI_GRO_CB(skb)->frag0_len = 0;
3178 
3179 	if (skb->mac_header == skb->tail &&
3180 	    !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
3181 		NAPI_GRO_CB(skb)->frag0 =
3182 			page_address(skb_shinfo(skb)->frags[0].page) +
3183 			skb_shinfo(skb)->frags[0].page_offset;
3184 		NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
3185 	}
3186 }
3187 EXPORT_SYMBOL(skb_gro_reset_offset);
3188 
3189 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3190 {
3191 	skb_gro_reset_offset(skb);
3192 
3193 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
3194 }
3195 EXPORT_SYMBOL(napi_gro_receive);
3196 
3197 void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3198 {
3199 	__skb_pull(skb, skb_headlen(skb));
3200 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
3201 
3202 	napi->skb = skb;
3203 }
3204 EXPORT_SYMBOL(napi_reuse_skb);
3205 
3206 struct sk_buff *napi_get_frags(struct napi_struct *napi)
3207 {
3208 	struct sk_buff *skb = napi->skb;
3209 
3210 	if (!skb) {
3211 		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3212 		if (skb)
3213 			napi->skb = skb;
3214 	}
3215 	return skb;
3216 }
3217 EXPORT_SYMBOL(napi_get_frags);
3218 
3219 gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3220 			       gro_result_t ret)
3221 {
3222 	switch (ret) {
3223 	case GRO_NORMAL:
3224 	case GRO_HELD:
3225 		skb->protocol = eth_type_trans(skb, skb->dev);
3226 
3227 		if (ret == GRO_HELD)
3228 			skb_gro_pull(skb, -ETH_HLEN);
3229 		else if (netif_receive_skb(skb))
3230 			ret = GRO_DROP;
3231 		break;
3232 
3233 	case GRO_DROP:
3234 	case GRO_MERGED_FREE:
3235 		napi_reuse_skb(napi, skb);
3236 		break;
3237 
3238 	case GRO_MERGED:
3239 		break;
3240 	}
3241 
3242 	return ret;
3243 }
3244 EXPORT_SYMBOL(napi_frags_finish);
3245 
3246 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3247 {
3248 	struct sk_buff *skb = napi->skb;
3249 	struct ethhdr *eth;
3250 	unsigned int hlen;
3251 	unsigned int off;
3252 
3253 	napi->skb = NULL;
3254 
3255 	skb_reset_mac_header(skb);
3256 	skb_gro_reset_offset(skb);
3257 
3258 	off = skb_gro_offset(skb);
3259 	hlen = off + sizeof(*eth);
3260 	eth = skb_gro_header_fast(skb, off);
3261 	if (skb_gro_header_hard(skb, hlen)) {
3262 		eth = skb_gro_header_slow(skb, hlen, off);
3263 		if (unlikely(!eth)) {
3264 			napi_reuse_skb(napi, skb);
3265 			skb = NULL;
3266 			goto out;
3267 		}
3268 	}
3269 
3270 	skb_gro_pull(skb, sizeof(*eth));
3271 
3272 	/*
3273 	 * This works because the only protocols we care about don't require
3274 	 * special handling.  We'll fix it up properly at the end.
3275 	 */
3276 	skb->protocol = eth->h_proto;
3277 
3278 out:
3279 	return skb;
3280 }
3281 EXPORT_SYMBOL(napi_frags_skb);
3282 
3283 gro_result_t napi_gro_frags(struct napi_struct *napi)
3284 {
3285 	struct sk_buff *skb = napi_frags_skb(napi);
3286 
3287 	if (!skb)
3288 		return GRO_DROP;
3289 
3290 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
3291 }
3292 EXPORT_SYMBOL(napi_gro_frags);
3293 
3294 /*
3295  * net_rps_action sends any pending IPI's for rps.
3296  * Note: called with local irq disabled, but exits with local irq enabled.
3297  */
3298 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3299 {
3300 #ifdef CONFIG_RPS
3301 	struct softnet_data *remsd = sd->rps_ipi_list;
3302 
3303 	if (remsd) {
3304 		sd->rps_ipi_list = NULL;
3305 
3306 		local_irq_enable();
3307 
3308 		/* Send pending IPI's to kick RPS processing on remote cpus. */
3309 		while (remsd) {
3310 			struct softnet_data *next = remsd->rps_ipi_next;
3311 
3312 			if (cpu_online(remsd->cpu))
3313 				__smp_call_function_single(remsd->cpu,
3314 							   &remsd->csd, 0);
3315 			remsd = next;
3316 		}
3317 	} else
3318 #endif
3319 		local_irq_enable();
3320 }
3321 
3322 static int process_backlog(struct napi_struct *napi, int quota)
3323 {
3324 	int work = 0;
3325 	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
3326 
3327 #ifdef CONFIG_RPS
3328 	/* Check if we have pending ipi, its better to send them now,
3329 	 * not waiting net_rx_action() end.
3330 	 */
3331 	if (sd->rps_ipi_list) {
3332 		local_irq_disable();
3333 		net_rps_action_and_irq_enable(sd);
3334 	}
3335 #endif
3336 	napi->weight = weight_p;
3337 	local_irq_disable();
3338 	while (work < quota) {
3339 		struct sk_buff *skb;
3340 		unsigned int qlen;
3341 
3342 		while ((skb = __skb_dequeue(&sd->process_queue))) {
3343 			local_irq_enable();
3344 			__netif_receive_skb(skb);
3345 			local_irq_disable();
3346 			input_queue_head_incr(sd);
3347 			if (++work >= quota) {
3348 				local_irq_enable();
3349 				return work;
3350 			}
3351 		}
3352 
3353 		rps_lock(sd);
3354 		qlen = skb_queue_len(&sd->input_pkt_queue);
3355 		if (qlen)
3356 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
3357 						   &sd->process_queue);
3358 
3359 		if (qlen < quota - work) {
3360 			/*
3361 			 * Inline a custom version of __napi_complete().
3362 			 * only current cpu owns and manipulates this napi,
3363 			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
3364 			 * we can use a plain write instead of clear_bit(),
3365 			 * and we dont need an smp_mb() memory barrier.
3366 			 */
3367 			list_del(&napi->poll_list);
3368 			napi->state = 0;
3369 
3370 			quota = work + qlen;
3371 		}
3372 		rps_unlock(sd);
3373 	}
3374 	local_irq_enable();
3375 
3376 	return work;
3377 }
3378 
3379 /**
3380  * __napi_schedule - schedule for receive
3381  * @n: entry to schedule
3382  *
3383  * The entry's receive function will be scheduled to run
3384  */
3385 void __napi_schedule(struct napi_struct *n)
3386 {
3387 	unsigned long flags;
3388 
3389 	local_irq_save(flags);
3390 	____napi_schedule(&__get_cpu_var(softnet_data), n);
3391 	local_irq_restore(flags);
3392 }
3393 EXPORT_SYMBOL(__napi_schedule);
3394 
3395 void __napi_complete(struct napi_struct *n)
3396 {
3397 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
3398 	BUG_ON(n->gro_list);
3399 
3400 	list_del(&n->poll_list);
3401 	smp_mb__before_clear_bit();
3402 	clear_bit(NAPI_STATE_SCHED, &n->state);
3403 }
3404 EXPORT_SYMBOL(__napi_complete);
3405 
3406 void napi_complete(struct napi_struct *n)
3407 {
3408 	unsigned long flags;
3409 
3410 	/*
3411 	 * don't let napi dequeue from the cpu poll list
3412 	 * just in case its running on a different cpu
3413 	 */
3414 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3415 		return;
3416 
3417 	napi_gro_flush(n);
3418 	local_irq_save(flags);
3419 	__napi_complete(n);
3420 	local_irq_restore(flags);
3421 }
3422 EXPORT_SYMBOL(napi_complete);
3423 
3424 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
3425 		    int (*poll)(struct napi_struct *, int), int weight)
3426 {
3427 	INIT_LIST_HEAD(&napi->poll_list);
3428 	napi->gro_count = 0;
3429 	napi->gro_list = NULL;
3430 	napi->skb = NULL;
3431 	napi->poll = poll;
3432 	napi->weight = weight;
3433 	list_add(&napi->dev_list, &dev->napi_list);
3434 	napi->dev = dev;
3435 #ifdef CONFIG_NETPOLL
3436 	spin_lock_init(&napi->poll_lock);
3437 	napi->poll_owner = -1;
3438 #endif
3439 	set_bit(NAPI_STATE_SCHED, &napi->state);
3440 }
3441 EXPORT_SYMBOL(netif_napi_add);
3442 
3443 void netif_napi_del(struct napi_struct *napi)
3444 {
3445 	struct sk_buff *skb, *next;
3446 
3447 	list_del_init(&napi->dev_list);
3448 	napi_free_frags(napi);
3449 
3450 	for (skb = napi->gro_list; skb; skb = next) {
3451 		next = skb->next;
3452 		skb->next = NULL;
3453 		kfree_skb(skb);
3454 	}
3455 
3456 	napi->gro_list = NULL;
3457 	napi->gro_count = 0;
3458 }
3459 EXPORT_SYMBOL(netif_napi_del);
3460 
3461 static void net_rx_action(struct softirq_action *h)
3462 {
3463 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
3464 	unsigned long time_limit = jiffies + 2;
3465 	int budget = netdev_budget;
3466 	void *have;
3467 
3468 	local_irq_disable();
3469 
3470 	while (!list_empty(&sd->poll_list)) {
3471 		struct napi_struct *n;
3472 		int work, weight;
3473 
3474 		/* If softirq window is exhuasted then punt.
3475 		 * Allow this to run for 2 jiffies since which will allow
3476 		 * an average latency of 1.5/HZ.
3477 		 */
3478 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
3479 			goto softnet_break;
3480 
3481 		local_irq_enable();
3482 
3483 		/* Even though interrupts have been re-enabled, this
3484 		 * access is safe because interrupts can only add new
3485 		 * entries to the tail of this list, and only ->poll()
3486 		 * calls can remove this head entry from the list.
3487 		 */
3488 		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
3489 
3490 		have = netpoll_poll_lock(n);
3491 
3492 		weight = n->weight;
3493 
3494 		/* This NAPI_STATE_SCHED test is for avoiding a race
3495 		 * with netpoll's poll_napi().  Only the entity which
3496 		 * obtains the lock and sees NAPI_STATE_SCHED set will
3497 		 * actually make the ->poll() call.  Therefore we avoid
3498 		 * accidently calling ->poll() when NAPI is not scheduled.
3499 		 */
3500 		work = 0;
3501 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
3502 			work = n->poll(n, weight);
3503 			trace_napi_poll(n);
3504 		}
3505 
3506 		WARN_ON_ONCE(work > weight);
3507 
3508 		budget -= work;
3509 
3510 		local_irq_disable();
3511 
3512 		/* Drivers must not modify the NAPI state if they
3513 		 * consume the entire weight.  In such cases this code
3514 		 * still "owns" the NAPI instance and therefore can
3515 		 * move the instance around on the list at-will.
3516 		 */
3517 		if (unlikely(work == weight)) {
3518 			if (unlikely(napi_disable_pending(n))) {
3519 				local_irq_enable();
3520 				napi_complete(n);
3521 				local_irq_disable();
3522 			} else
3523 				list_move_tail(&n->poll_list, &sd->poll_list);
3524 		}
3525 
3526 		netpoll_poll_unlock(have);
3527 	}
3528 out:
3529 	net_rps_action_and_irq_enable(sd);
3530 
3531 #ifdef CONFIG_NET_DMA
3532 	/*
3533 	 * There may not be any more sk_buffs coming right now, so push
3534 	 * any pending DMA copies to hardware
3535 	 */
3536 	dma_issue_pending_all();
3537 #endif
3538 
3539 	return;
3540 
3541 softnet_break:
3542 	sd->time_squeeze++;
3543 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
3544 	goto out;
3545 }
3546 
3547 static gifconf_func_t *gifconf_list[NPROTO];
3548 
3549 /**
3550  *	register_gifconf	-	register a SIOCGIF handler
3551  *	@family: Address family
3552  *	@gifconf: Function handler
3553  *
3554  *	Register protocol dependent address dumping routines. The handler
3555  *	that is passed must not be freed or reused until it has been replaced
3556  *	by another handler.
3557  */
3558 int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
3559 {
3560 	if (family >= NPROTO)
3561 		return -EINVAL;
3562 	gifconf_list[family] = gifconf;
3563 	return 0;
3564 }
3565 EXPORT_SYMBOL(register_gifconf);
3566 
3567 
3568 /*
3569  *	Map an interface index to its name (SIOCGIFNAME)
3570  */
3571 
3572 /*
3573  *	We need this ioctl for efficient implementation of the
3574  *	if_indextoname() function required by the IPv6 API.  Without
3575  *	it, we would have to search all the interfaces to find a
3576  *	match.  --pb
3577  */
3578 
3579 static int dev_ifname(struct net *net, struct ifreq __user *arg)
3580 {
3581 	struct net_device *dev;
3582 	struct ifreq ifr;
3583 
3584 	/*
3585 	 *	Fetch the caller's info block.
3586 	 */
3587 
3588 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3589 		return -EFAULT;
3590 
3591 	rcu_read_lock();
3592 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
3593 	if (!dev) {
3594 		rcu_read_unlock();
3595 		return -ENODEV;
3596 	}
3597 
3598 	strcpy(ifr.ifr_name, dev->name);
3599 	rcu_read_unlock();
3600 
3601 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3602 		return -EFAULT;
3603 	return 0;
3604 }
3605 
3606 /*
3607  *	Perform a SIOCGIFCONF call. This structure will change
3608  *	size eventually, and there is nothing I can do about it.
3609  *	Thus we will need a 'compatibility mode'.
3610  */
3611 
3612 static int dev_ifconf(struct net *net, char __user *arg)
3613 {
3614 	struct ifconf ifc;
3615 	struct net_device *dev;
3616 	char __user *pos;
3617 	int len;
3618 	int total;
3619 	int i;
3620 
3621 	/*
3622 	 *	Fetch the caller's info block.
3623 	 */
3624 
3625 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
3626 		return -EFAULT;
3627 
3628 	pos = ifc.ifc_buf;
3629 	len = ifc.ifc_len;
3630 
3631 	/*
3632 	 *	Loop over the interfaces, and write an info block for each.
3633 	 */
3634 
3635 	total = 0;
3636 	for_each_netdev(net, dev) {
3637 		for (i = 0; i < NPROTO; i++) {
3638 			if (gifconf_list[i]) {
3639 				int done;
3640 				if (!pos)
3641 					done = gifconf_list[i](dev, NULL, 0);
3642 				else
3643 					done = gifconf_list[i](dev, pos + total,
3644 							       len - total);
3645 				if (done < 0)
3646 					return -EFAULT;
3647 				total += done;
3648 			}
3649 		}
3650 	}
3651 
3652 	/*
3653 	 *	All done.  Write the updated control block back to the caller.
3654 	 */
3655 	ifc.ifc_len = total;
3656 
3657 	/*
3658 	 * 	Both BSD and Solaris return 0 here, so we do too.
3659 	 */
3660 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
3661 }
3662 
3663 #ifdef CONFIG_PROC_FS
3664 /*
3665  *	This is invoked by the /proc filesystem handler to display a device
3666  *	in detail.
3667  */
3668 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3669 	__acquires(RCU)
3670 {
3671 	struct net *net = seq_file_net(seq);
3672 	loff_t off;
3673 	struct net_device *dev;
3674 
3675 	rcu_read_lock();
3676 	if (!*pos)
3677 		return SEQ_START_TOKEN;
3678 
3679 	off = 1;
3680 	for_each_netdev_rcu(net, dev)
3681 		if (off++ == *pos)
3682 			return dev;
3683 
3684 	return NULL;
3685 }
3686 
3687 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3688 {
3689 	struct net_device *dev = (v == SEQ_START_TOKEN) ?
3690 				  first_net_device(seq_file_net(seq)) :
3691 				  next_net_device((struct net_device *)v);
3692 
3693 	++*pos;
3694 	return rcu_dereference(dev);
3695 }
3696 
3697 void dev_seq_stop(struct seq_file *seq, void *v)
3698 	__releases(RCU)
3699 {
3700 	rcu_read_unlock();
3701 }
3702 
3703 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3704 {
3705 	const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
3706 
3707 	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
3708 		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
3709 		   dev->name, stats->rx_bytes, stats->rx_packets,
3710 		   stats->rx_errors,
3711 		   stats->rx_dropped + stats->rx_missed_errors,
3712 		   stats->rx_fifo_errors,
3713 		   stats->rx_length_errors + stats->rx_over_errors +
3714 		    stats->rx_crc_errors + stats->rx_frame_errors,
3715 		   stats->rx_compressed, stats->multicast,
3716 		   stats->tx_bytes, stats->tx_packets,
3717 		   stats->tx_errors, stats->tx_dropped,
3718 		   stats->tx_fifo_errors, stats->collisions,
3719 		   stats->tx_carrier_errors +
3720 		    stats->tx_aborted_errors +
3721 		    stats->tx_window_errors +
3722 		    stats->tx_heartbeat_errors,
3723 		   stats->tx_compressed);
3724 }
3725 
3726 /*
3727  *	Called from the PROCfs module. This now uses the new arbitrary sized
3728  *	/proc/net interface to create /proc/net/dev
3729  */
3730 static int dev_seq_show(struct seq_file *seq, void *v)
3731 {
3732 	if (v == SEQ_START_TOKEN)
3733 		seq_puts(seq, "Inter-|   Receive                            "
3734 			      "                    |  Transmit\n"
3735 			      " face |bytes    packets errs drop fifo frame "
3736 			      "compressed multicast|bytes    packets errs "
3737 			      "drop fifo colls carrier compressed\n");
3738 	else
3739 		dev_seq_printf_stats(seq, v);
3740 	return 0;
3741 }
3742 
3743 static struct softnet_data *softnet_get_online(loff_t *pos)
3744 {
3745 	struct softnet_data *sd = NULL;
3746 
3747 	while (*pos < nr_cpu_ids)
3748 		if (cpu_online(*pos)) {
3749 			sd = &per_cpu(softnet_data, *pos);
3750 			break;
3751 		} else
3752 			++*pos;
3753 	return sd;
3754 }
3755 
3756 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3757 {
3758 	return softnet_get_online(pos);
3759 }
3760 
3761 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3762 {
3763 	++*pos;
3764 	return softnet_get_online(pos);
3765 }
3766 
3767 static void softnet_seq_stop(struct seq_file *seq, void *v)
3768 {
3769 }
3770 
3771 static int softnet_seq_show(struct seq_file *seq, void *v)
3772 {
3773 	struct softnet_data *sd = v;
3774 
3775 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3776 		   sd->processed, sd->dropped, sd->time_squeeze, 0,
3777 		   0, 0, 0, 0, /* was fastroute */
3778 		   sd->cpu_collision, sd->received_rps);
3779 	return 0;
3780 }
3781 
3782 static const struct seq_operations dev_seq_ops = {
3783 	.start = dev_seq_start,
3784 	.next  = dev_seq_next,
3785 	.stop  = dev_seq_stop,
3786 	.show  = dev_seq_show,
3787 };
3788 
3789 static int dev_seq_open(struct inode *inode, struct file *file)
3790 {
3791 	return seq_open_net(inode, file, &dev_seq_ops,
3792 			    sizeof(struct seq_net_private));
3793 }
3794 
3795 static const struct file_operations dev_seq_fops = {
3796 	.owner	 = THIS_MODULE,
3797 	.open    = dev_seq_open,
3798 	.read    = seq_read,
3799 	.llseek  = seq_lseek,
3800 	.release = seq_release_net,
3801 };
3802 
3803 static const struct seq_operations softnet_seq_ops = {
3804 	.start = softnet_seq_start,
3805 	.next  = softnet_seq_next,
3806 	.stop  = softnet_seq_stop,
3807 	.show  = softnet_seq_show,
3808 };
3809 
3810 static int softnet_seq_open(struct inode *inode, struct file *file)
3811 {
3812 	return seq_open(file, &softnet_seq_ops);
3813 }
3814 
3815 static const struct file_operations softnet_seq_fops = {
3816 	.owner	 = THIS_MODULE,
3817 	.open    = softnet_seq_open,
3818 	.read    = seq_read,
3819 	.llseek  = seq_lseek,
3820 	.release = seq_release,
3821 };
3822 
3823 static void *ptype_get_idx(loff_t pos)
3824 {
3825 	struct packet_type *pt = NULL;
3826 	loff_t i = 0;
3827 	int t;
3828 
3829 	list_for_each_entry_rcu(pt, &ptype_all, list) {
3830 		if (i == pos)
3831 			return pt;
3832 		++i;
3833 	}
3834 
3835 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3836 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3837 			if (i == pos)
3838 				return pt;
3839 			++i;
3840 		}
3841 	}
3842 	return NULL;
3843 }
3844 
3845 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3846 	__acquires(RCU)
3847 {
3848 	rcu_read_lock();
3849 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3850 }
3851 
3852 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3853 {
3854 	struct packet_type *pt;
3855 	struct list_head *nxt;
3856 	int hash;
3857 
3858 	++*pos;
3859 	if (v == SEQ_START_TOKEN)
3860 		return ptype_get_idx(0);
3861 
3862 	pt = v;
3863 	nxt = pt->list.next;
3864 	if (pt->type == htons(ETH_P_ALL)) {
3865 		if (nxt != &ptype_all)
3866 			goto found;
3867 		hash = 0;
3868 		nxt = ptype_base[0].next;
3869 	} else
3870 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3871 
3872 	while (nxt == &ptype_base[hash]) {
3873 		if (++hash >= PTYPE_HASH_SIZE)
3874 			return NULL;
3875 		nxt = ptype_base[hash].next;
3876 	}
3877 found:
3878 	return list_entry(nxt, struct packet_type, list);
3879 }
3880 
3881 static void ptype_seq_stop(struct seq_file *seq, void *v)
3882 	__releases(RCU)
3883 {
3884 	rcu_read_unlock();
3885 }
3886 
3887 static int ptype_seq_show(struct seq_file *seq, void *v)
3888 {
3889 	struct packet_type *pt = v;
3890 
3891 	if (v == SEQ_START_TOKEN)
3892 		seq_puts(seq, "Type Device      Function\n");
3893 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
3894 		if (pt->type == htons(ETH_P_ALL))
3895 			seq_puts(seq, "ALL ");
3896 		else
3897 			seq_printf(seq, "%04x", ntohs(pt->type));
3898 
3899 		seq_printf(seq, " %-8s %pF\n",
3900 			   pt->dev ? pt->dev->name : "", pt->func);
3901 	}
3902 
3903 	return 0;
3904 }
3905 
3906 static const struct seq_operations ptype_seq_ops = {
3907 	.start = ptype_seq_start,
3908 	.next  = ptype_seq_next,
3909 	.stop  = ptype_seq_stop,
3910 	.show  = ptype_seq_show,
3911 };
3912 
3913 static int ptype_seq_open(struct inode *inode, struct file *file)
3914 {
3915 	return seq_open_net(inode, file, &ptype_seq_ops,
3916 			sizeof(struct seq_net_private));
3917 }
3918 
3919 static const struct file_operations ptype_seq_fops = {
3920 	.owner	 = THIS_MODULE,
3921 	.open    = ptype_seq_open,
3922 	.read    = seq_read,
3923 	.llseek  = seq_lseek,
3924 	.release = seq_release_net,
3925 };
3926 
3927 
3928 static int __net_init dev_proc_net_init(struct net *net)
3929 {
3930 	int rc = -ENOMEM;
3931 
3932 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3933 		goto out;
3934 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3935 		goto out_dev;
3936 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3937 		goto out_softnet;
3938 
3939 	if (wext_proc_init(net))
3940 		goto out_ptype;
3941 	rc = 0;
3942 out:
3943 	return rc;
3944 out_ptype:
3945 	proc_net_remove(net, "ptype");
3946 out_softnet:
3947 	proc_net_remove(net, "softnet_stat");
3948 out_dev:
3949 	proc_net_remove(net, "dev");
3950 	goto out;
3951 }
3952 
3953 static void __net_exit dev_proc_net_exit(struct net *net)
3954 {
3955 	wext_proc_exit(net);
3956 
3957 	proc_net_remove(net, "ptype");
3958 	proc_net_remove(net, "softnet_stat");
3959 	proc_net_remove(net, "dev");
3960 }
3961 
3962 static struct pernet_operations __net_initdata dev_proc_ops = {
3963 	.init = dev_proc_net_init,
3964 	.exit = dev_proc_net_exit,
3965 };
3966 
3967 static int __init dev_proc_init(void)
3968 {
3969 	return register_pernet_subsys(&dev_proc_ops);
3970 }
3971 #else
3972 #define dev_proc_init() 0
3973 #endif	/* CONFIG_PROC_FS */
3974 
3975 
3976 /**
3977  *	netdev_set_master	-	set up master/slave pair
3978  *	@slave: slave device
3979  *	@master: new master device
3980  *
3981  *	Changes the master device of the slave. Pass %NULL to break the
3982  *	bonding. The caller must hold the RTNL semaphore. On a failure
3983  *	a negative errno code is returned. On success the reference counts
3984  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3985  *	function returns zero.
3986  */
3987 int netdev_set_master(struct net_device *slave, struct net_device *master)
3988 {
3989 	struct net_device *old = slave->master;
3990 
3991 	ASSERT_RTNL();
3992 
3993 	if (master) {
3994 		if (old)
3995 			return -EBUSY;
3996 		dev_hold(master);
3997 	}
3998 
3999 	slave->master = master;
4000 
4001 	if (old) {
4002 		synchronize_net();
4003 		dev_put(old);
4004 	}
4005 	if (master)
4006 		slave->flags |= IFF_SLAVE;
4007 	else
4008 		slave->flags &= ~IFF_SLAVE;
4009 
4010 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4011 	return 0;
4012 }
4013 EXPORT_SYMBOL(netdev_set_master);
4014 
4015 static void dev_change_rx_flags(struct net_device *dev, int flags)
4016 {
4017 	const struct net_device_ops *ops = dev->netdev_ops;
4018 
4019 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4020 		ops->ndo_change_rx_flags(dev, flags);
4021 }
4022 
4023 static int __dev_set_promiscuity(struct net_device *dev, int inc)
4024 {
4025 	unsigned short old_flags = dev->flags;
4026 	uid_t uid;
4027 	gid_t gid;
4028 
4029 	ASSERT_RTNL();
4030 
4031 	dev->flags |= IFF_PROMISC;
4032 	dev->promiscuity += inc;
4033 	if (dev->promiscuity == 0) {
4034 		/*
4035 		 * Avoid overflow.
4036 		 * If inc causes overflow, untouch promisc and return error.
4037 		 */
4038 		if (inc < 0)
4039 			dev->flags &= ~IFF_PROMISC;
4040 		else {
4041 			dev->promiscuity -= inc;
4042 			printk(KERN_WARNING "%s: promiscuity touches roof, "
4043 				"set promiscuity failed, promiscuity feature "
4044 				"of device might be broken.\n", dev->name);
4045 			return -EOVERFLOW;
4046 		}
4047 	}
4048 	if (dev->flags != old_flags) {
4049 		printk(KERN_INFO "device %s %s promiscuous mode\n",
4050 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
4051 							       "left");
4052 		if (audit_enabled) {
4053 			current_uid_gid(&uid, &gid);
4054 			audit_log(current->audit_context, GFP_ATOMIC,
4055 				AUDIT_ANOM_PROMISCUOUS,
4056 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4057 				dev->name, (dev->flags & IFF_PROMISC),
4058 				(old_flags & IFF_PROMISC),
4059 				audit_get_loginuid(current),
4060 				uid, gid,
4061 				audit_get_sessionid(current));
4062 		}
4063 
4064 		dev_change_rx_flags(dev, IFF_PROMISC);
4065 	}
4066 	return 0;
4067 }
4068 
4069 /**
4070  *	dev_set_promiscuity	- update promiscuity count on a device
4071  *	@dev: device
4072  *	@inc: modifier
4073  *
4074  *	Add or remove promiscuity from a device. While the count in the device
4075  *	remains above zero the interface remains promiscuous. Once it hits zero
4076  *	the device reverts back to normal filtering operation. A negative inc
4077  *	value is used to drop promiscuity on the device.
4078  *	Return 0 if successful or a negative errno code on error.
4079  */
4080 int dev_set_promiscuity(struct net_device *dev, int inc)
4081 {
4082 	unsigned short old_flags = dev->flags;
4083 	int err;
4084 
4085 	err = __dev_set_promiscuity(dev, inc);
4086 	if (err < 0)
4087 		return err;
4088 	if (dev->flags != old_flags)
4089 		dev_set_rx_mode(dev);
4090 	return err;
4091 }
4092 EXPORT_SYMBOL(dev_set_promiscuity);
4093 
4094 /**
4095  *	dev_set_allmulti	- update allmulti count on a device
4096  *	@dev: device
4097  *	@inc: modifier
4098  *
4099  *	Add or remove reception of all multicast frames to a device. While the
4100  *	count in the device remains above zero the interface remains listening
4101  *	to all interfaces. Once it hits zero the device reverts back to normal
4102  *	filtering operation. A negative @inc value is used to drop the counter
4103  *	when releasing a resource needing all multicasts.
4104  *	Return 0 if successful or a negative errno code on error.
4105  */
4106 
4107 int dev_set_allmulti(struct net_device *dev, int inc)
4108 {
4109 	unsigned short old_flags = dev->flags;
4110 
4111 	ASSERT_RTNL();
4112 
4113 	dev->flags |= IFF_ALLMULTI;
4114 	dev->allmulti += inc;
4115 	if (dev->allmulti == 0) {
4116 		/*
4117 		 * Avoid overflow.
4118 		 * If inc causes overflow, untouch allmulti and return error.
4119 		 */
4120 		if (inc < 0)
4121 			dev->flags &= ~IFF_ALLMULTI;
4122 		else {
4123 			dev->allmulti -= inc;
4124 			printk(KERN_WARNING "%s: allmulti touches roof, "
4125 				"set allmulti failed, allmulti feature of "
4126 				"device might be broken.\n", dev->name);
4127 			return -EOVERFLOW;
4128 		}
4129 	}
4130 	if (dev->flags ^ old_flags) {
4131 		dev_change_rx_flags(dev, IFF_ALLMULTI);
4132 		dev_set_rx_mode(dev);
4133 	}
4134 	return 0;
4135 }
4136 EXPORT_SYMBOL(dev_set_allmulti);
4137 
4138 /*
4139  *	Upload unicast and multicast address lists to device and
4140  *	configure RX filtering. When the device doesn't support unicast
4141  *	filtering it is put in promiscuous mode while unicast addresses
4142  *	are present.
4143  */
4144 void __dev_set_rx_mode(struct net_device *dev)
4145 {
4146 	const struct net_device_ops *ops = dev->netdev_ops;
4147 
4148 	/* dev_open will call this function so the list will stay sane. */
4149 	if (!(dev->flags&IFF_UP))
4150 		return;
4151 
4152 	if (!netif_device_present(dev))
4153 		return;
4154 
4155 	if (ops->ndo_set_rx_mode)
4156 		ops->ndo_set_rx_mode(dev);
4157 	else {
4158 		/* Unicast addresses changes may only happen under the rtnl,
4159 		 * therefore calling __dev_set_promiscuity here is safe.
4160 		 */
4161 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4162 			__dev_set_promiscuity(dev, 1);
4163 			dev->uc_promisc = 1;
4164 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4165 			__dev_set_promiscuity(dev, -1);
4166 			dev->uc_promisc = 0;
4167 		}
4168 
4169 		if (ops->ndo_set_multicast_list)
4170 			ops->ndo_set_multicast_list(dev);
4171 	}
4172 }
4173 
4174 void dev_set_rx_mode(struct net_device *dev)
4175 {
4176 	netif_addr_lock_bh(dev);
4177 	__dev_set_rx_mode(dev);
4178 	netif_addr_unlock_bh(dev);
4179 }
4180 
4181 /**
4182  *	dev_get_flags - get flags reported to userspace
4183  *	@dev: device
4184  *
4185  *	Get the combination of flag bits exported through APIs to userspace.
4186  */
4187 unsigned dev_get_flags(const struct net_device *dev)
4188 {
4189 	unsigned flags;
4190 
4191 	flags = (dev->flags & ~(IFF_PROMISC |
4192 				IFF_ALLMULTI |
4193 				IFF_RUNNING |
4194 				IFF_LOWER_UP |
4195 				IFF_DORMANT)) |
4196 		(dev->gflags & (IFF_PROMISC |
4197 				IFF_ALLMULTI));
4198 
4199 	if (netif_running(dev)) {
4200 		if (netif_oper_up(dev))
4201 			flags |= IFF_RUNNING;
4202 		if (netif_carrier_ok(dev))
4203 			flags |= IFF_LOWER_UP;
4204 		if (netif_dormant(dev))
4205 			flags |= IFF_DORMANT;
4206 	}
4207 
4208 	return flags;
4209 }
4210 EXPORT_SYMBOL(dev_get_flags);
4211 
4212 int __dev_change_flags(struct net_device *dev, unsigned int flags)
4213 {
4214 	int old_flags = dev->flags;
4215 	int ret;
4216 
4217 	ASSERT_RTNL();
4218 
4219 	/*
4220 	 *	Set the flags on our device.
4221 	 */
4222 
4223 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4224 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4225 			       IFF_AUTOMEDIA)) |
4226 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4227 				    IFF_ALLMULTI));
4228 
4229 	/*
4230 	 *	Load in the correct multicast list now the flags have changed.
4231 	 */
4232 
4233 	if ((old_flags ^ flags) & IFF_MULTICAST)
4234 		dev_change_rx_flags(dev, IFF_MULTICAST);
4235 
4236 	dev_set_rx_mode(dev);
4237 
4238 	/*
4239 	 *	Have we downed the interface. We handle IFF_UP ourselves
4240 	 *	according to user attempts to set it, rather than blindly
4241 	 *	setting it.
4242 	 */
4243 
4244 	ret = 0;
4245 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4246 		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4247 
4248 		if (!ret)
4249 			dev_set_rx_mode(dev);
4250 	}
4251 
4252 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4253 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
4254 
4255 		dev->gflags ^= IFF_PROMISC;
4256 		dev_set_promiscuity(dev, inc);
4257 	}
4258 
4259 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4260 	   is important. Some (broken) drivers set IFF_PROMISC, when
4261 	   IFF_ALLMULTI is requested not asking us and not reporting.
4262 	 */
4263 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4264 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4265 
4266 		dev->gflags ^= IFF_ALLMULTI;
4267 		dev_set_allmulti(dev, inc);
4268 	}
4269 
4270 	return ret;
4271 }
4272 
4273 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4274 {
4275 	unsigned int changes = dev->flags ^ old_flags;
4276 
4277 	if (changes & IFF_UP) {
4278 		if (dev->flags & IFF_UP)
4279 			call_netdevice_notifiers(NETDEV_UP, dev);
4280 		else
4281 			call_netdevice_notifiers(NETDEV_DOWN, dev);
4282 	}
4283 
4284 	if (dev->flags & IFF_UP &&
4285 	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4286 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4287 }
4288 
4289 /**
4290  *	dev_change_flags - change device settings
4291  *	@dev: device
4292  *	@flags: device state flags
4293  *
4294  *	Change settings on device based state flags. The flags are
4295  *	in the userspace exported format.
4296  */
4297 int dev_change_flags(struct net_device *dev, unsigned flags)
4298 {
4299 	int ret, changes;
4300 	int old_flags = dev->flags;
4301 
4302 	ret = __dev_change_flags(dev, flags);
4303 	if (ret < 0)
4304 		return ret;
4305 
4306 	changes = old_flags ^ dev->flags;
4307 	if (changes)
4308 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4309 
4310 	__dev_notify_flags(dev, old_flags);
4311 	return ret;
4312 }
4313 EXPORT_SYMBOL(dev_change_flags);
4314 
4315 /**
4316  *	dev_set_mtu - Change maximum transfer unit
4317  *	@dev: device
4318  *	@new_mtu: new transfer unit
4319  *
4320  *	Change the maximum transfer size of the network device.
4321  */
4322 int dev_set_mtu(struct net_device *dev, int new_mtu)
4323 {
4324 	const struct net_device_ops *ops = dev->netdev_ops;
4325 	int err;
4326 
4327 	if (new_mtu == dev->mtu)
4328 		return 0;
4329 
4330 	/*	MTU must be positive.	 */
4331 	if (new_mtu < 0)
4332 		return -EINVAL;
4333 
4334 	if (!netif_device_present(dev))
4335 		return -ENODEV;
4336 
4337 	err = 0;
4338 	if (ops->ndo_change_mtu)
4339 		err = ops->ndo_change_mtu(dev, new_mtu);
4340 	else
4341 		dev->mtu = new_mtu;
4342 
4343 	if (!err && dev->flags & IFF_UP)
4344 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4345 	return err;
4346 }
4347 EXPORT_SYMBOL(dev_set_mtu);
4348 
4349 /**
4350  *	dev_set_mac_address - Change Media Access Control Address
4351  *	@dev: device
4352  *	@sa: new address
4353  *
4354  *	Change the hardware (MAC) address of the device
4355  */
4356 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4357 {
4358 	const struct net_device_ops *ops = dev->netdev_ops;
4359 	int err;
4360 
4361 	if (!ops->ndo_set_mac_address)
4362 		return -EOPNOTSUPP;
4363 	if (sa->sa_family != dev->type)
4364 		return -EINVAL;
4365 	if (!netif_device_present(dev))
4366 		return -ENODEV;
4367 	err = ops->ndo_set_mac_address(dev, sa);
4368 	if (!err)
4369 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4370 	return err;
4371 }
4372 EXPORT_SYMBOL(dev_set_mac_address);
4373 
4374 /*
4375  *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4376  */
4377 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4378 {
4379 	int err;
4380 	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4381 
4382 	if (!dev)
4383 		return -ENODEV;
4384 
4385 	switch (cmd) {
4386 	case SIOCGIFFLAGS:	/* Get interface flags */
4387 		ifr->ifr_flags = (short) dev_get_flags(dev);
4388 		return 0;
4389 
4390 	case SIOCGIFMETRIC:	/* Get the metric on the interface
4391 				   (currently unused) */
4392 		ifr->ifr_metric = 0;
4393 		return 0;
4394 
4395 	case SIOCGIFMTU:	/* Get the MTU of a device */
4396 		ifr->ifr_mtu = dev->mtu;
4397 		return 0;
4398 
4399 	case SIOCGIFHWADDR:
4400 		if (!dev->addr_len)
4401 			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4402 		else
4403 			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4404 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4405 		ifr->ifr_hwaddr.sa_family = dev->type;
4406 		return 0;
4407 
4408 	case SIOCGIFSLAVE:
4409 		err = -EINVAL;
4410 		break;
4411 
4412 	case SIOCGIFMAP:
4413 		ifr->ifr_map.mem_start = dev->mem_start;
4414 		ifr->ifr_map.mem_end   = dev->mem_end;
4415 		ifr->ifr_map.base_addr = dev->base_addr;
4416 		ifr->ifr_map.irq       = dev->irq;
4417 		ifr->ifr_map.dma       = dev->dma;
4418 		ifr->ifr_map.port      = dev->if_port;
4419 		return 0;
4420 
4421 	case SIOCGIFINDEX:
4422 		ifr->ifr_ifindex = dev->ifindex;
4423 		return 0;
4424 
4425 	case SIOCGIFTXQLEN:
4426 		ifr->ifr_qlen = dev->tx_queue_len;
4427 		return 0;
4428 
4429 	default:
4430 		/* dev_ioctl() should ensure this case
4431 		 * is never reached
4432 		 */
4433 		WARN_ON(1);
4434 		err = -EINVAL;
4435 		break;
4436 
4437 	}
4438 	return err;
4439 }
4440 
4441 /*
4442  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4443  */
4444 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4445 {
4446 	int err;
4447 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4448 	const struct net_device_ops *ops;
4449 
4450 	if (!dev)
4451 		return -ENODEV;
4452 
4453 	ops = dev->netdev_ops;
4454 
4455 	switch (cmd) {
4456 	case SIOCSIFFLAGS:	/* Set interface flags */
4457 		return dev_change_flags(dev, ifr->ifr_flags);
4458 
4459 	case SIOCSIFMETRIC:	/* Set the metric on the interface
4460 				   (currently unused) */
4461 		return -EOPNOTSUPP;
4462 
4463 	case SIOCSIFMTU:	/* Set the MTU of a device */
4464 		return dev_set_mtu(dev, ifr->ifr_mtu);
4465 
4466 	case SIOCSIFHWADDR:
4467 		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4468 
4469 	case SIOCSIFHWBROADCAST:
4470 		if (ifr->ifr_hwaddr.sa_family != dev->type)
4471 			return -EINVAL;
4472 		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4473 		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4474 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4475 		return 0;
4476 
4477 	case SIOCSIFMAP:
4478 		if (ops->ndo_set_config) {
4479 			if (!netif_device_present(dev))
4480 				return -ENODEV;
4481 			return ops->ndo_set_config(dev, &ifr->ifr_map);
4482 		}
4483 		return -EOPNOTSUPP;
4484 
4485 	case SIOCADDMULTI:
4486 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4487 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4488 			return -EINVAL;
4489 		if (!netif_device_present(dev))
4490 			return -ENODEV;
4491 		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4492 
4493 	case SIOCDELMULTI:
4494 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4495 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4496 			return -EINVAL;
4497 		if (!netif_device_present(dev))
4498 			return -ENODEV;
4499 		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
4500 
4501 	case SIOCSIFTXQLEN:
4502 		if (ifr->ifr_qlen < 0)
4503 			return -EINVAL;
4504 		dev->tx_queue_len = ifr->ifr_qlen;
4505 		return 0;
4506 
4507 	case SIOCSIFNAME:
4508 		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4509 		return dev_change_name(dev, ifr->ifr_newname);
4510 
4511 	/*
4512 	 *	Unknown or private ioctl
4513 	 */
4514 	default:
4515 		if ((cmd >= SIOCDEVPRIVATE &&
4516 		    cmd <= SIOCDEVPRIVATE + 15) ||
4517 		    cmd == SIOCBONDENSLAVE ||
4518 		    cmd == SIOCBONDRELEASE ||
4519 		    cmd == SIOCBONDSETHWADDR ||
4520 		    cmd == SIOCBONDSLAVEINFOQUERY ||
4521 		    cmd == SIOCBONDINFOQUERY ||
4522 		    cmd == SIOCBONDCHANGEACTIVE ||
4523 		    cmd == SIOCGMIIPHY ||
4524 		    cmd == SIOCGMIIREG ||
4525 		    cmd == SIOCSMIIREG ||
4526 		    cmd == SIOCBRADDIF ||
4527 		    cmd == SIOCBRDELIF ||
4528 		    cmd == SIOCSHWTSTAMP ||
4529 		    cmd == SIOCWANDEV) {
4530 			err = -EOPNOTSUPP;
4531 			if (ops->ndo_do_ioctl) {
4532 				if (netif_device_present(dev))
4533 					err = ops->ndo_do_ioctl(dev, ifr, cmd);
4534 				else
4535 					err = -ENODEV;
4536 			}
4537 		} else
4538 			err = -EINVAL;
4539 
4540 	}
4541 	return err;
4542 }
4543 
4544 /*
4545  *	This function handles all "interface"-type I/O control requests. The actual
4546  *	'doing' part of this is dev_ifsioc above.
4547  */
4548 
4549 /**
4550  *	dev_ioctl	-	network device ioctl
4551  *	@net: the applicable net namespace
4552  *	@cmd: command to issue
4553  *	@arg: pointer to a struct ifreq in user space
4554  *
4555  *	Issue ioctl functions to devices. This is normally called by the
4556  *	user space syscall interfaces but can sometimes be useful for
4557  *	other purposes. The return value is the return from the syscall if
4558  *	positive or a negative errno code on error.
4559  */
4560 
4561 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4562 {
4563 	struct ifreq ifr;
4564 	int ret;
4565 	char *colon;
4566 
4567 	/* One special case: SIOCGIFCONF takes ifconf argument
4568 	   and requires shared lock, because it sleeps writing
4569 	   to user space.
4570 	 */
4571 
4572 	if (cmd == SIOCGIFCONF) {
4573 		rtnl_lock();
4574 		ret = dev_ifconf(net, (char __user *) arg);
4575 		rtnl_unlock();
4576 		return ret;
4577 	}
4578 	if (cmd == SIOCGIFNAME)
4579 		return dev_ifname(net, (struct ifreq __user *)arg);
4580 
4581 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4582 		return -EFAULT;
4583 
4584 	ifr.ifr_name[IFNAMSIZ-1] = 0;
4585 
4586 	colon = strchr(ifr.ifr_name, ':');
4587 	if (colon)
4588 		*colon = 0;
4589 
4590 	/*
4591 	 *	See which interface the caller is talking about.
4592 	 */
4593 
4594 	switch (cmd) {
4595 	/*
4596 	 *	These ioctl calls:
4597 	 *	- can be done by all.
4598 	 *	- atomic and do not require locking.
4599 	 *	- return a value
4600 	 */
4601 	case SIOCGIFFLAGS:
4602 	case SIOCGIFMETRIC:
4603 	case SIOCGIFMTU:
4604 	case SIOCGIFHWADDR:
4605 	case SIOCGIFSLAVE:
4606 	case SIOCGIFMAP:
4607 	case SIOCGIFINDEX:
4608 	case SIOCGIFTXQLEN:
4609 		dev_load(net, ifr.ifr_name);
4610 		rcu_read_lock();
4611 		ret = dev_ifsioc_locked(net, &ifr, cmd);
4612 		rcu_read_unlock();
4613 		if (!ret) {
4614 			if (colon)
4615 				*colon = ':';
4616 			if (copy_to_user(arg, &ifr,
4617 					 sizeof(struct ifreq)))
4618 				ret = -EFAULT;
4619 		}
4620 		return ret;
4621 
4622 	case SIOCETHTOOL:
4623 		dev_load(net, ifr.ifr_name);
4624 		rtnl_lock();
4625 		ret = dev_ethtool(net, &ifr);
4626 		rtnl_unlock();
4627 		if (!ret) {
4628 			if (colon)
4629 				*colon = ':';
4630 			if (copy_to_user(arg, &ifr,
4631 					 sizeof(struct ifreq)))
4632 				ret = -EFAULT;
4633 		}
4634 		return ret;
4635 
4636 	/*
4637 	 *	These ioctl calls:
4638 	 *	- require superuser power.
4639 	 *	- require strict serialization.
4640 	 *	- return a value
4641 	 */
4642 	case SIOCGMIIPHY:
4643 	case SIOCGMIIREG:
4644 	case SIOCSIFNAME:
4645 		if (!capable(CAP_NET_ADMIN))
4646 			return -EPERM;
4647 		dev_load(net, ifr.ifr_name);
4648 		rtnl_lock();
4649 		ret = dev_ifsioc(net, &ifr, cmd);
4650 		rtnl_unlock();
4651 		if (!ret) {
4652 			if (colon)
4653 				*colon = ':';
4654 			if (copy_to_user(arg, &ifr,
4655 					 sizeof(struct ifreq)))
4656 				ret = -EFAULT;
4657 		}
4658 		return ret;
4659 
4660 	/*
4661 	 *	These ioctl calls:
4662 	 *	- require superuser power.
4663 	 *	- require strict serialization.
4664 	 *	- do not return a value
4665 	 */
4666 	case SIOCSIFFLAGS:
4667 	case SIOCSIFMETRIC:
4668 	case SIOCSIFMTU:
4669 	case SIOCSIFMAP:
4670 	case SIOCSIFHWADDR:
4671 	case SIOCSIFSLAVE:
4672 	case SIOCADDMULTI:
4673 	case SIOCDELMULTI:
4674 	case SIOCSIFHWBROADCAST:
4675 	case SIOCSIFTXQLEN:
4676 	case SIOCSMIIREG:
4677 	case SIOCBONDENSLAVE:
4678 	case SIOCBONDRELEASE:
4679 	case SIOCBONDSETHWADDR:
4680 	case SIOCBONDCHANGEACTIVE:
4681 	case SIOCBRADDIF:
4682 	case SIOCBRDELIF:
4683 	case SIOCSHWTSTAMP:
4684 		if (!capable(CAP_NET_ADMIN))
4685 			return -EPERM;
4686 		/* fall through */
4687 	case SIOCBONDSLAVEINFOQUERY:
4688 	case SIOCBONDINFOQUERY:
4689 		dev_load(net, ifr.ifr_name);
4690 		rtnl_lock();
4691 		ret = dev_ifsioc(net, &ifr, cmd);
4692 		rtnl_unlock();
4693 		return ret;
4694 
4695 	case SIOCGIFMEM:
4696 		/* Get the per device memory space. We can add this but
4697 		 * currently do not support it */
4698 	case SIOCSIFMEM:
4699 		/* Set the per device memory buffer space.
4700 		 * Not applicable in our case */
4701 	case SIOCSIFLINK:
4702 		return -EINVAL;
4703 
4704 	/*
4705 	 *	Unknown or private ioctl.
4706 	 */
4707 	default:
4708 		if (cmd == SIOCWANDEV ||
4709 		    (cmd >= SIOCDEVPRIVATE &&
4710 		     cmd <= SIOCDEVPRIVATE + 15)) {
4711 			dev_load(net, ifr.ifr_name);
4712 			rtnl_lock();
4713 			ret = dev_ifsioc(net, &ifr, cmd);
4714 			rtnl_unlock();
4715 			if (!ret && copy_to_user(arg, &ifr,
4716 						 sizeof(struct ifreq)))
4717 				ret = -EFAULT;
4718 			return ret;
4719 		}
4720 		/* Take care of Wireless Extensions */
4721 		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4722 			return wext_handle_ioctl(net, &ifr, cmd, arg);
4723 		return -EINVAL;
4724 	}
4725 }
4726 
4727 
4728 /**
4729  *	dev_new_index	-	allocate an ifindex
4730  *	@net: the applicable net namespace
4731  *
4732  *	Returns a suitable unique value for a new device interface
4733  *	number.  The caller must hold the rtnl semaphore or the
4734  *	dev_base_lock to be sure it remains unique.
4735  */
4736 static int dev_new_index(struct net *net)
4737 {
4738 	static int ifindex;
4739 	for (;;) {
4740 		if (++ifindex <= 0)
4741 			ifindex = 1;
4742 		if (!__dev_get_by_index(net, ifindex))
4743 			return ifindex;
4744 	}
4745 }
4746 
4747 /* Delayed registration/unregisteration */
4748 static LIST_HEAD(net_todo_list);
4749 
4750 static void net_set_todo(struct net_device *dev)
4751 {
4752 	list_add_tail(&dev->todo_list, &net_todo_list);
4753 }
4754 
4755 static void rollback_registered_many(struct list_head *head)
4756 {
4757 	struct net_device *dev, *tmp;
4758 
4759 	BUG_ON(dev_boot_phase);
4760 	ASSERT_RTNL();
4761 
4762 	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
4763 		/* Some devices call without registering
4764 		 * for initialization unwind. Remove those
4765 		 * devices and proceed with the remaining.
4766 		 */
4767 		if (dev->reg_state == NETREG_UNINITIALIZED) {
4768 			pr_debug("unregister_netdevice: device %s/%p never "
4769 				 "was registered\n", dev->name, dev);
4770 
4771 			WARN_ON(1);
4772 			list_del(&dev->unreg_list);
4773 			continue;
4774 		}
4775 
4776 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
4777 
4778 		/* If device is running, close it first. */
4779 		dev_close(dev);
4780 
4781 		/* And unlink it from device chain. */
4782 		unlist_netdevice(dev);
4783 
4784 		dev->reg_state = NETREG_UNREGISTERING;
4785 	}
4786 
4787 	synchronize_net();
4788 
4789 	list_for_each_entry(dev, head, unreg_list) {
4790 		/* Shutdown queueing discipline. */
4791 		dev_shutdown(dev);
4792 
4793 
4794 		/* Notify protocols, that we are about to destroy
4795 		   this device. They should clean all the things.
4796 		*/
4797 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4798 
4799 		if (!dev->rtnl_link_ops ||
4800 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
4801 			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
4802 
4803 		/*
4804 		 *	Flush the unicast and multicast chains
4805 		 */
4806 		dev_uc_flush(dev);
4807 		dev_mc_flush(dev);
4808 
4809 		if (dev->netdev_ops->ndo_uninit)
4810 			dev->netdev_ops->ndo_uninit(dev);
4811 
4812 		/* Notifier chain MUST detach us from master device. */
4813 		WARN_ON(dev->master);
4814 
4815 		/* Remove entries from kobject tree */
4816 		netdev_unregister_kobject(dev);
4817 	}
4818 
4819 	/* Process any work delayed until the end of the batch */
4820 	dev = list_first_entry(head, struct net_device, unreg_list);
4821 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4822 
4823 	synchronize_net();
4824 
4825 	list_for_each_entry(dev, head, unreg_list)
4826 		dev_put(dev);
4827 }
4828 
4829 static void rollback_registered(struct net_device *dev)
4830 {
4831 	LIST_HEAD(single);
4832 
4833 	list_add(&dev->unreg_list, &single);
4834 	rollback_registered_many(&single);
4835 }
4836 
4837 static void __netdev_init_queue_locks_one(struct net_device *dev,
4838 					  struct netdev_queue *dev_queue,
4839 					  void *_unused)
4840 {
4841 	spin_lock_init(&dev_queue->_xmit_lock);
4842 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4843 	dev_queue->xmit_lock_owner = -1;
4844 }
4845 
4846 static void netdev_init_queue_locks(struct net_device *dev)
4847 {
4848 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4849 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4850 }
4851 
4852 unsigned long netdev_fix_features(unsigned long features, const char *name)
4853 {
4854 	/* Fix illegal SG+CSUM combinations. */
4855 	if ((features & NETIF_F_SG) &&
4856 	    !(features & NETIF_F_ALL_CSUM)) {
4857 		if (name)
4858 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4859 			       "checksum feature.\n", name);
4860 		features &= ~NETIF_F_SG;
4861 	}
4862 
4863 	/* TSO requires that SG is present as well. */
4864 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4865 		if (name)
4866 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4867 			       "SG feature.\n", name);
4868 		features &= ~NETIF_F_TSO;
4869 	}
4870 
4871 	if (features & NETIF_F_UFO) {
4872 		if (!(features & NETIF_F_GEN_CSUM)) {
4873 			if (name)
4874 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4875 				       "since no NETIF_F_HW_CSUM feature.\n",
4876 				       name);
4877 			features &= ~NETIF_F_UFO;
4878 		}
4879 
4880 		if (!(features & NETIF_F_SG)) {
4881 			if (name)
4882 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4883 				       "since no NETIF_F_SG feature.\n", name);
4884 			features &= ~NETIF_F_UFO;
4885 		}
4886 	}
4887 
4888 	return features;
4889 }
4890 EXPORT_SYMBOL(netdev_fix_features);
4891 
4892 /**
4893  *	netif_stacked_transfer_operstate -	transfer operstate
4894  *	@rootdev: the root or lower level device to transfer state from
4895  *	@dev: the device to transfer operstate to
4896  *
4897  *	Transfer operational state from root to device. This is normally
4898  *	called when a stacking relationship exists between the root
4899  *	device and the device(a leaf device).
4900  */
4901 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4902 					struct net_device *dev)
4903 {
4904 	if (rootdev->operstate == IF_OPER_DORMANT)
4905 		netif_dormant_on(dev);
4906 	else
4907 		netif_dormant_off(dev);
4908 
4909 	if (netif_carrier_ok(rootdev)) {
4910 		if (!netif_carrier_ok(dev))
4911 			netif_carrier_on(dev);
4912 	} else {
4913 		if (netif_carrier_ok(dev))
4914 			netif_carrier_off(dev);
4915 	}
4916 }
4917 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4918 
4919 /**
4920  *	register_netdevice	- register a network device
4921  *	@dev: device to register
4922  *
4923  *	Take a completed network device structure and add it to the kernel
4924  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4925  *	chain. 0 is returned on success. A negative errno code is returned
4926  *	on a failure to set up the device, or if the name is a duplicate.
4927  *
4928  *	Callers must hold the rtnl semaphore. You may want
4929  *	register_netdev() instead of this.
4930  *
4931  *	BUGS:
4932  *	The locking appears insufficient to guarantee two parallel registers
4933  *	will not get the same name.
4934  */
4935 
4936 int register_netdevice(struct net_device *dev)
4937 {
4938 	int ret;
4939 	struct net *net = dev_net(dev);
4940 
4941 	BUG_ON(dev_boot_phase);
4942 	ASSERT_RTNL();
4943 
4944 	might_sleep();
4945 
4946 	/* When net_device's are persistent, this will be fatal. */
4947 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4948 	BUG_ON(!net);
4949 
4950 	spin_lock_init(&dev->addr_list_lock);
4951 	netdev_set_addr_lockdep_class(dev);
4952 	netdev_init_queue_locks(dev);
4953 
4954 	dev->iflink = -1;
4955 
4956 #ifdef CONFIG_RPS
4957 	if (!dev->num_rx_queues) {
4958 		/*
4959 		 * Allocate a single RX queue if driver never called
4960 		 * alloc_netdev_mq
4961 		 */
4962 
4963 		dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
4964 		if (!dev->_rx) {
4965 			ret = -ENOMEM;
4966 			goto out;
4967 		}
4968 
4969 		dev->_rx->first = dev->_rx;
4970 		atomic_set(&dev->_rx->count, 1);
4971 		dev->num_rx_queues = 1;
4972 	}
4973 #endif
4974 	/* Init, if this function is available */
4975 	if (dev->netdev_ops->ndo_init) {
4976 		ret = dev->netdev_ops->ndo_init(dev);
4977 		if (ret) {
4978 			if (ret > 0)
4979 				ret = -EIO;
4980 			goto out;
4981 		}
4982 	}
4983 
4984 	ret = dev_get_valid_name(dev, dev->name, 0);
4985 	if (ret)
4986 		goto err_uninit;
4987 
4988 	dev->ifindex = dev_new_index(net);
4989 	if (dev->iflink == -1)
4990 		dev->iflink = dev->ifindex;
4991 
4992 	/* Fix illegal checksum combinations */
4993 	if ((dev->features & NETIF_F_HW_CSUM) &&
4994 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4995 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4996 		       dev->name);
4997 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4998 	}
4999 
5000 	if ((dev->features & NETIF_F_NO_CSUM) &&
5001 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5002 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
5003 		       dev->name);
5004 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5005 	}
5006 
5007 	dev->features = netdev_fix_features(dev->features, dev->name);
5008 
5009 	/* Enable software GSO if SG is supported. */
5010 	if (dev->features & NETIF_F_SG)
5011 		dev->features |= NETIF_F_GSO;
5012 
5013 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5014 	ret = notifier_to_errno(ret);
5015 	if (ret)
5016 		goto err_uninit;
5017 
5018 	ret = netdev_register_kobject(dev);
5019 	if (ret)
5020 		goto err_uninit;
5021 	dev->reg_state = NETREG_REGISTERED;
5022 
5023 	/*
5024 	 *	Default initial state at registry is that the
5025 	 *	device is present.
5026 	 */
5027 
5028 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5029 
5030 	dev_init_scheduler(dev);
5031 	dev_hold(dev);
5032 	list_netdevice(dev);
5033 
5034 	/* Notify protocols, that a new device appeared. */
5035 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5036 	ret = notifier_to_errno(ret);
5037 	if (ret) {
5038 		rollback_registered(dev);
5039 		dev->reg_state = NETREG_UNREGISTERED;
5040 	}
5041 	/*
5042 	 *	Prevent userspace races by waiting until the network
5043 	 *	device is fully setup before sending notifications.
5044 	 */
5045 	if (!dev->rtnl_link_ops ||
5046 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5047 		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5048 
5049 out:
5050 	return ret;
5051 
5052 err_uninit:
5053 	if (dev->netdev_ops->ndo_uninit)
5054 		dev->netdev_ops->ndo_uninit(dev);
5055 	goto out;
5056 }
5057 EXPORT_SYMBOL(register_netdevice);
5058 
5059 /**
5060  *	init_dummy_netdev	- init a dummy network device for NAPI
5061  *	@dev: device to init
5062  *
5063  *	This takes a network device structure and initialize the minimum
5064  *	amount of fields so it can be used to schedule NAPI polls without
5065  *	registering a full blown interface. This is to be used by drivers
5066  *	that need to tie several hardware interfaces to a single NAPI
5067  *	poll scheduler due to HW limitations.
5068  */
5069 int init_dummy_netdev(struct net_device *dev)
5070 {
5071 	/* Clear everything. Note we don't initialize spinlocks
5072 	 * are they aren't supposed to be taken by any of the
5073 	 * NAPI code and this dummy netdev is supposed to be
5074 	 * only ever used for NAPI polls
5075 	 */
5076 	memset(dev, 0, sizeof(struct net_device));
5077 
5078 	/* make sure we BUG if trying to hit standard
5079 	 * register/unregister code path
5080 	 */
5081 	dev->reg_state = NETREG_DUMMY;
5082 
5083 	/* initialize the ref count */
5084 	atomic_set(&dev->refcnt, 1);
5085 
5086 	/* NAPI wants this */
5087 	INIT_LIST_HEAD(&dev->napi_list);
5088 
5089 	/* a dummy interface is started by default */
5090 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5091 	set_bit(__LINK_STATE_START, &dev->state);
5092 
5093 	return 0;
5094 }
5095 EXPORT_SYMBOL_GPL(init_dummy_netdev);
5096 
5097 
5098 /**
5099  *	register_netdev	- register a network device
5100  *	@dev: device to register
5101  *
5102  *	Take a completed network device structure and add it to the kernel
5103  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5104  *	chain. 0 is returned on success. A negative errno code is returned
5105  *	on a failure to set up the device, or if the name is a duplicate.
5106  *
5107  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
5108  *	and expands the device name if you passed a format string to
5109  *	alloc_netdev.
5110  */
5111 int register_netdev(struct net_device *dev)
5112 {
5113 	int err;
5114 
5115 	rtnl_lock();
5116 
5117 	/*
5118 	 * If the name is a format string the caller wants us to do a
5119 	 * name allocation.
5120 	 */
5121 	if (strchr(dev->name, '%')) {
5122 		err = dev_alloc_name(dev, dev->name);
5123 		if (err < 0)
5124 			goto out;
5125 	}
5126 
5127 	err = register_netdevice(dev);
5128 out:
5129 	rtnl_unlock();
5130 	return err;
5131 }
5132 EXPORT_SYMBOL(register_netdev);
5133 
5134 /*
5135  * netdev_wait_allrefs - wait until all references are gone.
5136  *
5137  * This is called when unregistering network devices.
5138  *
5139  * Any protocol or device that holds a reference should register
5140  * for netdevice notification, and cleanup and put back the
5141  * reference if they receive an UNREGISTER event.
5142  * We can get stuck here if buggy protocols don't correctly
5143  * call dev_put.
5144  */
5145 static void netdev_wait_allrefs(struct net_device *dev)
5146 {
5147 	unsigned long rebroadcast_time, warning_time;
5148 
5149 	linkwatch_forget_dev(dev);
5150 
5151 	rebroadcast_time = warning_time = jiffies;
5152 	while (atomic_read(&dev->refcnt) != 0) {
5153 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5154 			rtnl_lock();
5155 
5156 			/* Rebroadcast unregister notification */
5157 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5158 			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5159 			 * should have already handle it the first time */
5160 
5161 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5162 				     &dev->state)) {
5163 				/* We must not have linkwatch events
5164 				 * pending on unregister. If this
5165 				 * happens, we simply run the queue
5166 				 * unscheduled, resulting in a noop
5167 				 * for this device.
5168 				 */
5169 				linkwatch_run_queue();
5170 			}
5171 
5172 			__rtnl_unlock();
5173 
5174 			rebroadcast_time = jiffies;
5175 		}
5176 
5177 		msleep(250);
5178 
5179 		if (time_after(jiffies, warning_time + 10 * HZ)) {
5180 			printk(KERN_EMERG "unregister_netdevice: "
5181 			       "waiting for %s to become free. Usage "
5182 			       "count = %d\n",
5183 			       dev->name, atomic_read(&dev->refcnt));
5184 			warning_time = jiffies;
5185 		}
5186 	}
5187 }
5188 
5189 /* The sequence is:
5190  *
5191  *	rtnl_lock();
5192  *	...
5193  *	register_netdevice(x1);
5194  *	register_netdevice(x2);
5195  *	...
5196  *	unregister_netdevice(y1);
5197  *	unregister_netdevice(y2);
5198  *      ...
5199  *	rtnl_unlock();
5200  *	free_netdev(y1);
5201  *	free_netdev(y2);
5202  *
5203  * We are invoked by rtnl_unlock().
5204  * This allows us to deal with problems:
5205  * 1) We can delete sysfs objects which invoke hotplug
5206  *    without deadlocking with linkwatch via keventd.
5207  * 2) Since we run with the RTNL semaphore not held, we can sleep
5208  *    safely in order to wait for the netdev refcnt to drop to zero.
5209  *
5210  * We must not return until all unregister events added during
5211  * the interval the lock was held have been completed.
5212  */
5213 void netdev_run_todo(void)
5214 {
5215 	struct list_head list;
5216 
5217 	/* Snapshot list, allow later requests */
5218 	list_replace_init(&net_todo_list, &list);
5219 
5220 	__rtnl_unlock();
5221 
5222 	while (!list_empty(&list)) {
5223 		struct net_device *dev
5224 			= list_first_entry(&list, struct net_device, todo_list);
5225 		list_del(&dev->todo_list);
5226 
5227 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5228 			printk(KERN_ERR "network todo '%s' but state %d\n",
5229 			       dev->name, dev->reg_state);
5230 			dump_stack();
5231 			continue;
5232 		}
5233 
5234 		dev->reg_state = NETREG_UNREGISTERED;
5235 
5236 		on_each_cpu(flush_backlog, dev, 1);
5237 
5238 		netdev_wait_allrefs(dev);
5239 
5240 		/* paranoia */
5241 		BUG_ON(atomic_read(&dev->refcnt));
5242 		WARN_ON(dev->ip_ptr);
5243 		WARN_ON(dev->ip6_ptr);
5244 		WARN_ON(dev->dn_ptr);
5245 
5246 		if (dev->destructor)
5247 			dev->destructor(dev);
5248 
5249 		/* Free network device */
5250 		kobject_put(&dev->dev.kobj);
5251 	}
5252 }
5253 
5254 /**
5255  *	dev_txq_stats_fold - fold tx_queues stats
5256  *	@dev: device to get statistics from
5257  *	@stats: struct net_device_stats to hold results
5258  */
5259 void dev_txq_stats_fold(const struct net_device *dev,
5260 			struct net_device_stats *stats)
5261 {
5262 	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5263 	unsigned int i;
5264 	struct netdev_queue *txq;
5265 
5266 	for (i = 0; i < dev->num_tx_queues; i++) {
5267 		txq = netdev_get_tx_queue(dev, i);
5268 		tx_bytes   += txq->tx_bytes;
5269 		tx_packets += txq->tx_packets;
5270 		tx_dropped += txq->tx_dropped;
5271 	}
5272 	if (tx_bytes || tx_packets || tx_dropped) {
5273 		stats->tx_bytes   = tx_bytes;
5274 		stats->tx_packets = tx_packets;
5275 		stats->tx_dropped = tx_dropped;
5276 	}
5277 }
5278 EXPORT_SYMBOL(dev_txq_stats_fold);
5279 
5280 /**
5281  *	dev_get_stats	- get network device statistics
5282  *	@dev: device to get statistics from
5283  *
5284  *	Get network statistics from device. The device driver may provide
5285  *	its own method by setting dev->netdev_ops->get_stats64 or
5286  *	dev->netdev_ops->get_stats; otherwise the internal statistics
5287  *	structure is used.
5288  */
5289 const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev)
5290 {
5291 	const struct net_device_ops *ops = dev->netdev_ops;
5292 
5293 	if (ops->ndo_get_stats64)
5294 		return ops->ndo_get_stats64(dev);
5295 	if (ops->ndo_get_stats)
5296 		return (struct rtnl_link_stats64 *)ops->ndo_get_stats(dev);
5297 
5298 	dev_txq_stats_fold(dev, &dev->stats);
5299 	return &dev->stats64;
5300 }
5301 EXPORT_SYMBOL(dev_get_stats);
5302 
5303 static void netdev_init_one_queue(struct net_device *dev,
5304 				  struct netdev_queue *queue,
5305 				  void *_unused)
5306 {
5307 	queue->dev = dev;
5308 }
5309 
5310 static void netdev_init_queues(struct net_device *dev)
5311 {
5312 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5313 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5314 	spin_lock_init(&dev->tx_global_lock);
5315 }
5316 
5317 /**
5318  *	alloc_netdev_mq - allocate network device
5319  *	@sizeof_priv:	size of private data to allocate space for
5320  *	@name:		device name format string
5321  *	@setup:		callback to initialize device
5322  *	@queue_count:	the number of subqueues to allocate
5323  *
5324  *	Allocates a struct net_device with private data area for driver use
5325  *	and performs basic initialization.  Also allocates subquue structs
5326  *	for each queue on the device at the end of the netdevice.
5327  */
5328 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5329 		void (*setup)(struct net_device *), unsigned int queue_count)
5330 {
5331 	struct netdev_queue *tx;
5332 	struct net_device *dev;
5333 	size_t alloc_size;
5334 	struct net_device *p;
5335 #ifdef CONFIG_RPS
5336 	struct netdev_rx_queue *rx;
5337 	int i;
5338 #endif
5339 
5340 	BUG_ON(strlen(name) >= sizeof(dev->name));
5341 
5342 	alloc_size = sizeof(struct net_device);
5343 	if (sizeof_priv) {
5344 		/* ensure 32-byte alignment of private area */
5345 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5346 		alloc_size += sizeof_priv;
5347 	}
5348 	/* ensure 32-byte alignment of whole construct */
5349 	alloc_size += NETDEV_ALIGN - 1;
5350 
5351 	p = kzalloc(alloc_size, GFP_KERNEL);
5352 	if (!p) {
5353 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5354 		return NULL;
5355 	}
5356 
5357 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5358 	if (!tx) {
5359 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
5360 		       "tx qdiscs.\n");
5361 		goto free_p;
5362 	}
5363 
5364 #ifdef CONFIG_RPS
5365 	rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5366 	if (!rx) {
5367 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
5368 		       "rx queues.\n");
5369 		goto free_tx;
5370 	}
5371 
5372 	atomic_set(&rx->count, queue_count);
5373 
5374 	/*
5375 	 * Set a pointer to first element in the array which holds the
5376 	 * reference count.
5377 	 */
5378 	for (i = 0; i < queue_count; i++)
5379 		rx[i].first = rx;
5380 #endif
5381 
5382 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5383 	dev->padded = (char *)dev - (char *)p;
5384 
5385 	if (dev_addr_init(dev))
5386 		goto free_rx;
5387 
5388 	dev_mc_init(dev);
5389 	dev_uc_init(dev);
5390 
5391 	dev_net_set(dev, &init_net);
5392 
5393 	dev->_tx = tx;
5394 	dev->num_tx_queues = queue_count;
5395 	dev->real_num_tx_queues = queue_count;
5396 
5397 #ifdef CONFIG_RPS
5398 	dev->_rx = rx;
5399 	dev->num_rx_queues = queue_count;
5400 #endif
5401 
5402 	dev->gso_max_size = GSO_MAX_SIZE;
5403 
5404 	netdev_init_queues(dev);
5405 
5406 	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5407 	dev->ethtool_ntuple_list.count = 0;
5408 	INIT_LIST_HEAD(&dev->napi_list);
5409 	INIT_LIST_HEAD(&dev->unreg_list);
5410 	INIT_LIST_HEAD(&dev->link_watch_list);
5411 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5412 	setup(dev);
5413 	strcpy(dev->name, name);
5414 	return dev;
5415 
5416 free_rx:
5417 #ifdef CONFIG_RPS
5418 	kfree(rx);
5419 free_tx:
5420 #endif
5421 	kfree(tx);
5422 free_p:
5423 	kfree(p);
5424 	return NULL;
5425 }
5426 EXPORT_SYMBOL(alloc_netdev_mq);
5427 
5428 /**
5429  *	free_netdev - free network device
5430  *	@dev: device
5431  *
5432  *	This function does the last stage of destroying an allocated device
5433  * 	interface. The reference to the device object is released.
5434  *	If this is the last reference then it will be freed.
5435  */
5436 void free_netdev(struct net_device *dev)
5437 {
5438 	struct napi_struct *p, *n;
5439 
5440 	release_net(dev_net(dev));
5441 
5442 	kfree(dev->_tx);
5443 
5444 	/* Flush device addresses */
5445 	dev_addr_flush(dev);
5446 
5447 	/* Clear ethtool n-tuple list */
5448 	ethtool_ntuple_flush(dev);
5449 
5450 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5451 		netif_napi_del(p);
5452 
5453 	/*  Compatibility with error handling in drivers */
5454 	if (dev->reg_state == NETREG_UNINITIALIZED) {
5455 		kfree((char *)dev - dev->padded);
5456 		return;
5457 	}
5458 
5459 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5460 	dev->reg_state = NETREG_RELEASED;
5461 
5462 	/* will free via device release */
5463 	put_device(&dev->dev);
5464 }
5465 EXPORT_SYMBOL(free_netdev);
5466 
5467 /**
5468  *	synchronize_net -  Synchronize with packet receive processing
5469  *
5470  *	Wait for packets currently being received to be done.
5471  *	Does not block later packets from starting.
5472  */
5473 void synchronize_net(void)
5474 {
5475 	might_sleep();
5476 	synchronize_rcu();
5477 }
5478 EXPORT_SYMBOL(synchronize_net);
5479 
5480 /**
5481  *	unregister_netdevice_queue - remove device from the kernel
5482  *	@dev: device
5483  *	@head: list
5484  *
5485  *	This function shuts down a device interface and removes it
5486  *	from the kernel tables.
5487  *	If head not NULL, device is queued to be unregistered later.
5488  *
5489  *	Callers must hold the rtnl semaphore.  You may want
5490  *	unregister_netdev() instead of this.
5491  */
5492 
5493 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
5494 {
5495 	ASSERT_RTNL();
5496 
5497 	if (head) {
5498 		list_move_tail(&dev->unreg_list, head);
5499 	} else {
5500 		rollback_registered(dev);
5501 		/* Finish processing unregister after unlock */
5502 		net_set_todo(dev);
5503 	}
5504 }
5505 EXPORT_SYMBOL(unregister_netdevice_queue);
5506 
5507 /**
5508  *	unregister_netdevice_many - unregister many devices
5509  *	@head: list of devices
5510  */
5511 void unregister_netdevice_many(struct list_head *head)
5512 {
5513 	struct net_device *dev;
5514 
5515 	if (!list_empty(head)) {
5516 		rollback_registered_many(head);
5517 		list_for_each_entry(dev, head, unreg_list)
5518 			net_set_todo(dev);
5519 	}
5520 }
5521 EXPORT_SYMBOL(unregister_netdevice_many);
5522 
5523 /**
5524  *	unregister_netdev - remove device from the kernel
5525  *	@dev: device
5526  *
5527  *	This function shuts down a device interface and removes it
5528  *	from the kernel tables.
5529  *
5530  *	This is just a wrapper for unregister_netdevice that takes
5531  *	the rtnl semaphore.  In general you want to use this and not
5532  *	unregister_netdevice.
5533  */
5534 void unregister_netdev(struct net_device *dev)
5535 {
5536 	rtnl_lock();
5537 	unregister_netdevice(dev);
5538 	rtnl_unlock();
5539 }
5540 EXPORT_SYMBOL(unregister_netdev);
5541 
5542 /**
5543  *	dev_change_net_namespace - move device to different nethost namespace
5544  *	@dev: device
5545  *	@net: network namespace
5546  *	@pat: If not NULL name pattern to try if the current device name
5547  *	      is already taken in the destination network namespace.
5548  *
5549  *	This function shuts down a device interface and moves it
5550  *	to a new network namespace. On success 0 is returned, on
5551  *	a failure a netagive errno code is returned.
5552  *
5553  *	Callers must hold the rtnl semaphore.
5554  */
5555 
5556 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5557 {
5558 	int err;
5559 
5560 	ASSERT_RTNL();
5561 
5562 	/* Don't allow namespace local devices to be moved. */
5563 	err = -EINVAL;
5564 	if (dev->features & NETIF_F_NETNS_LOCAL)
5565 		goto out;
5566 
5567 	/* Ensure the device has been registrered */
5568 	err = -EINVAL;
5569 	if (dev->reg_state != NETREG_REGISTERED)
5570 		goto out;
5571 
5572 	/* Get out if there is nothing todo */
5573 	err = 0;
5574 	if (net_eq(dev_net(dev), net))
5575 		goto out;
5576 
5577 	/* Pick the destination device name, and ensure
5578 	 * we can use it in the destination network namespace.
5579 	 */
5580 	err = -EEXIST;
5581 	if (__dev_get_by_name(net, dev->name)) {
5582 		/* We get here if we can't use the current device name */
5583 		if (!pat)
5584 			goto out;
5585 		if (dev_get_valid_name(dev, pat, 1))
5586 			goto out;
5587 	}
5588 
5589 	/*
5590 	 * And now a mini version of register_netdevice unregister_netdevice.
5591 	 */
5592 
5593 	/* If device is running close it first. */
5594 	dev_close(dev);
5595 
5596 	/* And unlink it from device chain */
5597 	err = -ENODEV;
5598 	unlist_netdevice(dev);
5599 
5600 	synchronize_net();
5601 
5602 	/* Shutdown queueing discipline. */
5603 	dev_shutdown(dev);
5604 
5605 	/* Notify protocols, that we are about to destroy
5606 	   this device. They should clean all the things.
5607 	*/
5608 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5609 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5610 
5611 	/*
5612 	 *	Flush the unicast and multicast chains
5613 	 */
5614 	dev_uc_flush(dev);
5615 	dev_mc_flush(dev);
5616 
5617 	/* Actually switch the network namespace */
5618 	dev_net_set(dev, net);
5619 
5620 	/* If there is an ifindex conflict assign a new one */
5621 	if (__dev_get_by_index(net, dev->ifindex)) {
5622 		int iflink = (dev->iflink == dev->ifindex);
5623 		dev->ifindex = dev_new_index(net);
5624 		if (iflink)
5625 			dev->iflink = dev->ifindex;
5626 	}
5627 
5628 	/* Fixup kobjects */
5629 	err = device_rename(&dev->dev, dev->name);
5630 	WARN_ON(err);
5631 
5632 	/* Add the device back in the hashes */
5633 	list_netdevice(dev);
5634 
5635 	/* Notify protocols, that a new device appeared. */
5636 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
5637 
5638 	/*
5639 	 *	Prevent userspace races by waiting until the network
5640 	 *	device is fully setup before sending notifications.
5641 	 */
5642 	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5643 
5644 	synchronize_net();
5645 	err = 0;
5646 out:
5647 	return err;
5648 }
5649 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
5650 
5651 static int dev_cpu_callback(struct notifier_block *nfb,
5652 			    unsigned long action,
5653 			    void *ocpu)
5654 {
5655 	struct sk_buff **list_skb;
5656 	struct sk_buff *skb;
5657 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
5658 	struct softnet_data *sd, *oldsd;
5659 
5660 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5661 		return NOTIFY_OK;
5662 
5663 	local_irq_disable();
5664 	cpu = smp_processor_id();
5665 	sd = &per_cpu(softnet_data, cpu);
5666 	oldsd = &per_cpu(softnet_data, oldcpu);
5667 
5668 	/* Find end of our completion_queue. */
5669 	list_skb = &sd->completion_queue;
5670 	while (*list_skb)
5671 		list_skb = &(*list_skb)->next;
5672 	/* Append completion queue from offline CPU. */
5673 	*list_skb = oldsd->completion_queue;
5674 	oldsd->completion_queue = NULL;
5675 
5676 	/* Append output queue from offline CPU. */
5677 	if (oldsd->output_queue) {
5678 		*sd->output_queue_tailp = oldsd->output_queue;
5679 		sd->output_queue_tailp = oldsd->output_queue_tailp;
5680 		oldsd->output_queue = NULL;
5681 		oldsd->output_queue_tailp = &oldsd->output_queue;
5682 	}
5683 
5684 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
5685 	local_irq_enable();
5686 
5687 	/* Process offline CPU's input_pkt_queue */
5688 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
5689 		netif_rx(skb);
5690 		input_queue_head_incr(oldsd);
5691 	}
5692 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
5693 		netif_rx(skb);
5694 		input_queue_head_incr(oldsd);
5695 	}
5696 
5697 	return NOTIFY_OK;
5698 }
5699 
5700 
5701 /**
5702  *	netdev_increment_features - increment feature set by one
5703  *	@all: current feature set
5704  *	@one: new feature set
5705  *	@mask: mask feature set
5706  *
5707  *	Computes a new feature set after adding a device with feature set
5708  *	@one to the master device with current feature set @all.  Will not
5709  *	enable anything that is off in @mask. Returns the new feature set.
5710  */
5711 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5712 					unsigned long mask)
5713 {
5714 	/* If device needs checksumming, downgrade to it. */
5715 	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5716 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5717 	else if (mask & NETIF_F_ALL_CSUM) {
5718 		/* If one device supports v4/v6 checksumming, set for all. */
5719 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5720 		    !(all & NETIF_F_GEN_CSUM)) {
5721 			all &= ~NETIF_F_ALL_CSUM;
5722 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5723 		}
5724 
5725 		/* If one device supports hw checksumming, set for all. */
5726 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5727 			all &= ~NETIF_F_ALL_CSUM;
5728 			all |= NETIF_F_HW_CSUM;
5729 		}
5730 	}
5731 
5732 	one |= NETIF_F_ALL_CSUM;
5733 
5734 	one |= all & NETIF_F_ONE_FOR_ALL;
5735 	all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
5736 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
5737 
5738 	return all;
5739 }
5740 EXPORT_SYMBOL(netdev_increment_features);
5741 
5742 static struct hlist_head *netdev_create_hash(void)
5743 {
5744 	int i;
5745 	struct hlist_head *hash;
5746 
5747 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5748 	if (hash != NULL)
5749 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
5750 			INIT_HLIST_HEAD(&hash[i]);
5751 
5752 	return hash;
5753 }
5754 
5755 /* Initialize per network namespace state */
5756 static int __net_init netdev_init(struct net *net)
5757 {
5758 	INIT_LIST_HEAD(&net->dev_base_head);
5759 
5760 	net->dev_name_head = netdev_create_hash();
5761 	if (net->dev_name_head == NULL)
5762 		goto err_name;
5763 
5764 	net->dev_index_head = netdev_create_hash();
5765 	if (net->dev_index_head == NULL)
5766 		goto err_idx;
5767 
5768 	return 0;
5769 
5770 err_idx:
5771 	kfree(net->dev_name_head);
5772 err_name:
5773 	return -ENOMEM;
5774 }
5775 
5776 /**
5777  *	netdev_drivername - network driver for the device
5778  *	@dev: network device
5779  *	@buffer: buffer for resulting name
5780  *	@len: size of buffer
5781  *
5782  *	Determine network driver for device.
5783  */
5784 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5785 {
5786 	const struct device_driver *driver;
5787 	const struct device *parent;
5788 
5789 	if (len <= 0 || !buffer)
5790 		return buffer;
5791 	buffer[0] = 0;
5792 
5793 	parent = dev->dev.parent;
5794 
5795 	if (!parent)
5796 		return buffer;
5797 
5798 	driver = parent->driver;
5799 	if (driver && driver->name)
5800 		strlcpy(buffer, driver->name, len);
5801 	return buffer;
5802 }
5803 
5804 static void __net_exit netdev_exit(struct net *net)
5805 {
5806 	kfree(net->dev_name_head);
5807 	kfree(net->dev_index_head);
5808 }
5809 
5810 static struct pernet_operations __net_initdata netdev_net_ops = {
5811 	.init = netdev_init,
5812 	.exit = netdev_exit,
5813 };
5814 
5815 static void __net_exit default_device_exit(struct net *net)
5816 {
5817 	struct net_device *dev, *aux;
5818 	/*
5819 	 * Push all migratable network devices back to the
5820 	 * initial network namespace
5821 	 */
5822 	rtnl_lock();
5823 	for_each_netdev_safe(net, dev, aux) {
5824 		int err;
5825 		char fb_name[IFNAMSIZ];
5826 
5827 		/* Ignore unmoveable devices (i.e. loopback) */
5828 		if (dev->features & NETIF_F_NETNS_LOCAL)
5829 			continue;
5830 
5831 		/* Leave virtual devices for the generic cleanup */
5832 		if (dev->rtnl_link_ops)
5833 			continue;
5834 
5835 		/* Push remaing network devices to init_net */
5836 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5837 		err = dev_change_net_namespace(dev, &init_net, fb_name);
5838 		if (err) {
5839 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
5840 				__func__, dev->name, err);
5841 			BUG();
5842 		}
5843 	}
5844 	rtnl_unlock();
5845 }
5846 
5847 static void __net_exit default_device_exit_batch(struct list_head *net_list)
5848 {
5849 	/* At exit all network devices most be removed from a network
5850 	 * namespace.  Do this in the reverse order of registeration.
5851 	 * Do this across as many network namespaces as possible to
5852 	 * improve batching efficiency.
5853 	 */
5854 	struct net_device *dev;
5855 	struct net *net;
5856 	LIST_HEAD(dev_kill_list);
5857 
5858 	rtnl_lock();
5859 	list_for_each_entry(net, net_list, exit_list) {
5860 		for_each_netdev_reverse(net, dev) {
5861 			if (dev->rtnl_link_ops)
5862 				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
5863 			else
5864 				unregister_netdevice_queue(dev, &dev_kill_list);
5865 		}
5866 	}
5867 	unregister_netdevice_many(&dev_kill_list);
5868 	rtnl_unlock();
5869 }
5870 
5871 static struct pernet_operations __net_initdata default_device_ops = {
5872 	.exit = default_device_exit,
5873 	.exit_batch = default_device_exit_batch,
5874 };
5875 
5876 /*
5877  *	Initialize the DEV module. At boot time this walks the device list and
5878  *	unhooks any devices that fail to initialise (normally hardware not
5879  *	present) and leaves us with a valid list of present and active devices.
5880  *
5881  */
5882 
5883 /*
5884  *       This is called single threaded during boot, so no need
5885  *       to take the rtnl semaphore.
5886  */
5887 static int __init net_dev_init(void)
5888 {
5889 	int i, rc = -ENOMEM;
5890 
5891 	BUG_ON(!dev_boot_phase);
5892 
5893 	if (dev_proc_init())
5894 		goto out;
5895 
5896 	if (netdev_kobject_init())
5897 		goto out;
5898 
5899 	INIT_LIST_HEAD(&ptype_all);
5900 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
5901 		INIT_LIST_HEAD(&ptype_base[i]);
5902 
5903 	if (register_pernet_subsys(&netdev_net_ops))
5904 		goto out;
5905 
5906 	/*
5907 	 *	Initialise the packet receive queues.
5908 	 */
5909 
5910 	for_each_possible_cpu(i) {
5911 		struct softnet_data *sd = &per_cpu(softnet_data, i);
5912 
5913 		memset(sd, 0, sizeof(*sd));
5914 		skb_queue_head_init(&sd->input_pkt_queue);
5915 		skb_queue_head_init(&sd->process_queue);
5916 		sd->completion_queue = NULL;
5917 		INIT_LIST_HEAD(&sd->poll_list);
5918 		sd->output_queue = NULL;
5919 		sd->output_queue_tailp = &sd->output_queue;
5920 #ifdef CONFIG_RPS
5921 		sd->csd.func = rps_trigger_softirq;
5922 		sd->csd.info = sd;
5923 		sd->csd.flags = 0;
5924 		sd->cpu = i;
5925 #endif
5926 
5927 		sd->backlog.poll = process_backlog;
5928 		sd->backlog.weight = weight_p;
5929 		sd->backlog.gro_list = NULL;
5930 		sd->backlog.gro_count = 0;
5931 	}
5932 
5933 	dev_boot_phase = 0;
5934 
5935 	/* The loopback device is special if any other network devices
5936 	 * is present in a network namespace the loopback device must
5937 	 * be present. Since we now dynamically allocate and free the
5938 	 * loopback device ensure this invariant is maintained by
5939 	 * keeping the loopback device as the first device on the
5940 	 * list of network devices.  Ensuring the loopback devices
5941 	 * is the first device that appears and the last network device
5942 	 * that disappears.
5943 	 */
5944 	if (register_pernet_device(&loopback_net_ops))
5945 		goto out;
5946 
5947 	if (register_pernet_device(&default_device_ops))
5948 		goto out;
5949 
5950 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5951 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5952 
5953 	hotcpu_notifier(dev_cpu_callback, 0);
5954 	dst_init();
5955 	dev_mcast_init();
5956 	rc = 0;
5957 out:
5958 	return rc;
5959 }
5960 
5961 subsys_initcall(net_dev_init);
5962 
5963 static int __init initialize_hashrnd(void)
5964 {
5965 	get_random_bytes(&hashrnd, sizeof(hashrnd));
5966 	return 0;
5967 }
5968 
5969 late_initcall_sync(initialize_hashrnd);
5970 
5971