1 /* 2 * NET3 Protocol independent device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the non IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <[email protected]> 12 * Mark Evans, <[email protected]> 13 * 14 * Additional Authors: 15 * Florian la Roche <[email protected]> 16 * Alan Cox <[email protected]> 17 * David Hinds <[email protected]> 18 * Alexey Kuznetsov <[email protected]> 19 * Adam Sulmicki <[email protected]> 20 * Pekka Riikonen <[email protected]> 21 * 22 * Changes: 23 * D.J. Barrow : Fixed bug where dev->refcnt gets set 24 * to 2 if register_netdev gets called 25 * before net_dev_init & also removed a 26 * few lines of code in the process. 27 * Alan Cox : device private ioctl copies fields back. 28 * Alan Cox : Transmit queue code does relevant 29 * stunts to keep the queue safe. 30 * Alan Cox : Fixed double lock. 31 * Alan Cox : Fixed promisc NULL pointer trap 32 * ???????? : Support the full private ioctl range 33 * Alan Cox : Moved ioctl permission check into 34 * drivers 35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI 36 * Alan Cox : 100 backlog just doesn't cut it when 37 * you start doing multicast video 8) 38 * Alan Cox : Rewrote net_bh and list manager. 39 * Alan Cox : Fix ETH_P_ALL echoback lengths. 40 * Alan Cox : Took out transmit every packet pass 41 * Saved a few bytes in the ioctl handler 42 * Alan Cox : Network driver sets packet type before 43 * calling netif_rx. Saves a function 44 * call a packet. 45 * Alan Cox : Hashed net_bh() 46 * Richard Kooijman: Timestamp fixes. 47 * Alan Cox : Wrong field in SIOCGIFDSTADDR 48 * Alan Cox : Device lock protection. 49 * Alan Cox : Fixed nasty side effect of device close 50 * changes. 51 * Rudi Cilibrasi : Pass the right thing to 52 * set_mac_address() 53 * Dave Miller : 32bit quantity for the device lock to 54 * make it work out on a Sparc. 55 * Bjorn Ekwall : Added KERNELD hack. 56 * Alan Cox : Cleaned up the backlog initialise. 57 * Craig Metz : SIOCGIFCONF fix if space for under 58 * 1 device. 59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there 60 * is no device open function. 61 * Andi Kleen : Fix error reporting for SIOCGIFCONF 62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF 63 * Cyrus Durgin : Cleaned for KMOD 64 * Adam Sulmicki : Bug Fix : Network Device Unload 65 * A network device unload needs to purge 66 * the backlog queue. 67 * Paul Rusty Russell : SIOCSIFNAME 68 * Pekka Riikonen : Netdev boot-time settings code 69 * Andrew Morton : Make unregister_netdevice wait 70 * indefinitely on dev->refcnt 71 * J Hadi Salim : - Backlog queue sampling 72 * - netif_rx() feedback 73 */ 74 75 #include <asm/uaccess.h> 76 #include <asm/system.h> 77 #include <linux/bitops.h> 78 #include <linux/capability.h> 79 #include <linux/cpu.h> 80 #include <linux/types.h> 81 #include <linux/kernel.h> 82 #include <linux/sched.h> 83 #include <linux/mutex.h> 84 #include <linux/string.h> 85 #include <linux/mm.h> 86 #include <linux/socket.h> 87 #include <linux/sockios.h> 88 #include <linux/errno.h> 89 #include <linux/interrupt.h> 90 #include <linux/if_ether.h> 91 #include <linux/netdevice.h> 92 #include <linux/etherdevice.h> 93 #include <linux/ethtool.h> 94 #include <linux/notifier.h> 95 #include <linux/skbuff.h> 96 #include <net/net_namespace.h> 97 #include <net/sock.h> 98 #include <linux/rtnetlink.h> 99 #include <linux/proc_fs.h> 100 #include <linux/seq_file.h> 101 #include <linux/stat.h> 102 #include <linux/if_bridge.h> 103 #include <linux/if_macvlan.h> 104 #include <net/dst.h> 105 #include <net/pkt_sched.h> 106 #include <net/checksum.h> 107 #include <linux/highmem.h> 108 #include <linux/init.h> 109 #include <linux/kmod.h> 110 #include <linux/module.h> 111 #include <linux/netpoll.h> 112 #include <linux/rcupdate.h> 113 #include <linux/delay.h> 114 #include <net/wext.h> 115 #include <net/iw_handler.h> 116 #include <asm/current.h> 117 #include <linux/audit.h> 118 #include <linux/dmaengine.h> 119 #include <linux/err.h> 120 #include <linux/ctype.h> 121 #include <linux/if_arp.h> 122 #include <linux/if_vlan.h> 123 #include <linux/ip.h> 124 #include <net/ip.h> 125 #include <linux/ipv6.h> 126 #include <linux/in.h> 127 #include <linux/jhash.h> 128 #include <linux/random.h> 129 130 #include "net-sysfs.h" 131 132 /* 133 * The list of packet types we will receive (as opposed to discard) 134 * and the routines to invoke. 135 * 136 * Why 16. Because with 16 the only overlap we get on a hash of the 137 * low nibble of the protocol value is RARP/SNAP/X.25. 138 * 139 * NOTE: That is no longer true with the addition of VLAN tags. Not 140 * sure which should go first, but I bet it won't make much 141 * difference if we are running VLANs. The good news is that 142 * this protocol won't be in the list unless compiled in, so 143 * the average user (w/out VLANs) will not be adversely affected. 144 * --BLG 145 * 146 * 0800 IP 147 * 8100 802.1Q VLAN 148 * 0001 802.3 149 * 0002 AX.25 150 * 0004 802.2 151 * 8035 RARP 152 * 0005 SNAP 153 * 0805 X.25 154 * 0806 ARP 155 * 8137 IPX 156 * 0009 Localtalk 157 * 86DD IPv6 158 */ 159 160 #define PTYPE_HASH_SIZE (16) 161 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) 162 163 static DEFINE_SPINLOCK(ptype_lock); 164 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; 165 static struct list_head ptype_all __read_mostly; /* Taps */ 166 167 #ifdef CONFIG_NET_DMA 168 struct net_dma { 169 struct dma_client client; 170 spinlock_t lock; 171 cpumask_t channel_mask; 172 struct dma_chan **channels; 173 }; 174 175 static enum dma_state_client 176 netdev_dma_event(struct dma_client *client, struct dma_chan *chan, 177 enum dma_state state); 178 179 static struct net_dma net_dma = { 180 .client = { 181 .event_callback = netdev_dma_event, 182 }, 183 }; 184 #endif 185 186 /* 187 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 188 * semaphore. 189 * 190 * Pure readers hold dev_base_lock for reading. 191 * 192 * Writers must hold the rtnl semaphore while they loop through the 193 * dev_base_head list, and hold dev_base_lock for writing when they do the 194 * actual updates. This allows pure readers to access the list even 195 * while a writer is preparing to update it. 196 * 197 * To put it another way, dev_base_lock is held for writing only to 198 * protect against pure readers; the rtnl semaphore provides the 199 * protection against other writers. 200 * 201 * See, for example usages, register_netdevice() and 202 * unregister_netdevice(), which must be called with the rtnl 203 * semaphore held. 204 */ 205 DEFINE_RWLOCK(dev_base_lock); 206 207 EXPORT_SYMBOL(dev_base_lock); 208 209 #define NETDEV_HASHBITS 8 210 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) 211 212 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) 213 { 214 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 215 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; 216 } 217 218 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) 219 { 220 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; 221 } 222 223 /* Device list insertion */ 224 static int list_netdevice(struct net_device *dev) 225 { 226 struct net *net = dev_net(dev); 227 228 ASSERT_RTNL(); 229 230 write_lock_bh(&dev_base_lock); 231 list_add_tail(&dev->dev_list, &net->dev_base_head); 232 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 233 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); 234 write_unlock_bh(&dev_base_lock); 235 return 0; 236 } 237 238 /* Device list removal */ 239 static void unlist_netdevice(struct net_device *dev) 240 { 241 ASSERT_RTNL(); 242 243 /* Unlink dev from the device chain */ 244 write_lock_bh(&dev_base_lock); 245 list_del(&dev->dev_list); 246 hlist_del(&dev->name_hlist); 247 hlist_del(&dev->index_hlist); 248 write_unlock_bh(&dev_base_lock); 249 } 250 251 /* 252 * Our notifier list 253 */ 254 255 static RAW_NOTIFIER_HEAD(netdev_chain); 256 257 /* 258 * Device drivers call our routines to queue packets here. We empty the 259 * queue in the local softnet handler. 260 */ 261 262 DEFINE_PER_CPU(struct softnet_data, softnet_data); 263 264 #ifdef CONFIG_LOCKDEP 265 /* 266 * register_netdevice() inits txq->_xmit_lock and sets lockdep class 267 * according to dev->type 268 */ 269 static const unsigned short netdev_lock_type[] = 270 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, 271 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, 272 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, 273 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, 274 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, 275 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, 276 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, 277 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, 278 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, 279 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, 280 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, 281 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, 282 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, 283 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, 284 ARPHRD_NONE}; 285 286 static const char *netdev_lock_name[] = 287 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", 288 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", 289 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", 290 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", 291 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", 292 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", 293 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", 294 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", 295 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", 296 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", 297 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", 298 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", 299 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", 300 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", 301 "_xmit_NONE"}; 302 303 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; 304 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; 305 306 static inline unsigned short netdev_lock_pos(unsigned short dev_type) 307 { 308 int i; 309 310 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) 311 if (netdev_lock_type[i] == dev_type) 312 return i; 313 /* the last key is used by default */ 314 return ARRAY_SIZE(netdev_lock_type) - 1; 315 } 316 317 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, 318 unsigned short dev_type) 319 { 320 int i; 321 322 i = netdev_lock_pos(dev_type); 323 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], 324 netdev_lock_name[i]); 325 } 326 327 static inline void netdev_set_addr_lockdep_class(struct net_device *dev) 328 { 329 int i; 330 331 i = netdev_lock_pos(dev->type); 332 lockdep_set_class_and_name(&dev->addr_list_lock, 333 &netdev_addr_lock_key[i], 334 netdev_lock_name[i]); 335 } 336 #else 337 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, 338 unsigned short dev_type) 339 { 340 } 341 static inline void netdev_set_addr_lockdep_class(struct net_device *dev) 342 { 343 } 344 #endif 345 346 /******************************************************************************* 347 348 Protocol management and registration routines 349 350 *******************************************************************************/ 351 352 /* 353 * Add a protocol ID to the list. Now that the input handler is 354 * smarter we can dispense with all the messy stuff that used to be 355 * here. 356 * 357 * BEWARE!!! Protocol handlers, mangling input packets, 358 * MUST BE last in hash buckets and checking protocol handlers 359 * MUST start from promiscuous ptype_all chain in net_bh. 360 * It is true now, do not change it. 361 * Explanation follows: if protocol handler, mangling packet, will 362 * be the first on list, it is not able to sense, that packet 363 * is cloned and should be copied-on-write, so that it will 364 * change it and subsequent readers will get broken packet. 365 * --ANK (980803) 366 */ 367 368 /** 369 * dev_add_pack - add packet handler 370 * @pt: packet type declaration 371 * 372 * Add a protocol handler to the networking stack. The passed &packet_type 373 * is linked into kernel lists and may not be freed until it has been 374 * removed from the kernel lists. 375 * 376 * This call does not sleep therefore it can not 377 * guarantee all CPU's that are in middle of receiving packets 378 * will see the new packet type (until the next received packet). 379 */ 380 381 void dev_add_pack(struct packet_type *pt) 382 { 383 int hash; 384 385 spin_lock_bh(&ptype_lock); 386 if (pt->type == htons(ETH_P_ALL)) 387 list_add_rcu(&pt->list, &ptype_all); 388 else { 389 hash = ntohs(pt->type) & PTYPE_HASH_MASK; 390 list_add_rcu(&pt->list, &ptype_base[hash]); 391 } 392 spin_unlock_bh(&ptype_lock); 393 } 394 395 /** 396 * __dev_remove_pack - remove packet handler 397 * @pt: packet type declaration 398 * 399 * Remove a protocol handler that was previously added to the kernel 400 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 401 * from the kernel lists and can be freed or reused once this function 402 * returns. 403 * 404 * The packet type might still be in use by receivers 405 * and must not be freed until after all the CPU's have gone 406 * through a quiescent state. 407 */ 408 void __dev_remove_pack(struct packet_type *pt) 409 { 410 struct list_head *head; 411 struct packet_type *pt1; 412 413 spin_lock_bh(&ptype_lock); 414 415 if (pt->type == htons(ETH_P_ALL)) 416 head = &ptype_all; 417 else 418 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; 419 420 list_for_each_entry(pt1, head, list) { 421 if (pt == pt1) { 422 list_del_rcu(&pt->list); 423 goto out; 424 } 425 } 426 427 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 428 out: 429 spin_unlock_bh(&ptype_lock); 430 } 431 /** 432 * dev_remove_pack - remove packet handler 433 * @pt: packet type declaration 434 * 435 * Remove a protocol handler that was previously added to the kernel 436 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 437 * from the kernel lists and can be freed or reused once this function 438 * returns. 439 * 440 * This call sleeps to guarantee that no CPU is looking at the packet 441 * type after return. 442 */ 443 void dev_remove_pack(struct packet_type *pt) 444 { 445 __dev_remove_pack(pt); 446 447 synchronize_net(); 448 } 449 450 /****************************************************************************** 451 452 Device Boot-time Settings Routines 453 454 *******************************************************************************/ 455 456 /* Boot time configuration table */ 457 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; 458 459 /** 460 * netdev_boot_setup_add - add new setup entry 461 * @name: name of the device 462 * @map: configured settings for the device 463 * 464 * Adds new setup entry to the dev_boot_setup list. The function 465 * returns 0 on error and 1 on success. This is a generic routine to 466 * all netdevices. 467 */ 468 static int netdev_boot_setup_add(char *name, struct ifmap *map) 469 { 470 struct netdev_boot_setup *s; 471 int i; 472 473 s = dev_boot_setup; 474 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 475 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { 476 memset(s[i].name, 0, sizeof(s[i].name)); 477 strlcpy(s[i].name, name, IFNAMSIZ); 478 memcpy(&s[i].map, map, sizeof(s[i].map)); 479 break; 480 } 481 } 482 483 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; 484 } 485 486 /** 487 * netdev_boot_setup_check - check boot time settings 488 * @dev: the netdevice 489 * 490 * Check boot time settings for the device. 491 * The found settings are set for the device to be used 492 * later in the device probing. 493 * Returns 0 if no settings found, 1 if they are. 494 */ 495 int netdev_boot_setup_check(struct net_device *dev) 496 { 497 struct netdev_boot_setup *s = dev_boot_setup; 498 int i; 499 500 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 501 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && 502 !strcmp(dev->name, s[i].name)) { 503 dev->irq = s[i].map.irq; 504 dev->base_addr = s[i].map.base_addr; 505 dev->mem_start = s[i].map.mem_start; 506 dev->mem_end = s[i].map.mem_end; 507 return 1; 508 } 509 } 510 return 0; 511 } 512 513 514 /** 515 * netdev_boot_base - get address from boot time settings 516 * @prefix: prefix for network device 517 * @unit: id for network device 518 * 519 * Check boot time settings for the base address of device. 520 * The found settings are set for the device to be used 521 * later in the device probing. 522 * Returns 0 if no settings found. 523 */ 524 unsigned long netdev_boot_base(const char *prefix, int unit) 525 { 526 const struct netdev_boot_setup *s = dev_boot_setup; 527 char name[IFNAMSIZ]; 528 int i; 529 530 sprintf(name, "%s%d", prefix, unit); 531 532 /* 533 * If device already registered then return base of 1 534 * to indicate not to probe for this interface 535 */ 536 if (__dev_get_by_name(&init_net, name)) 537 return 1; 538 539 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) 540 if (!strcmp(name, s[i].name)) 541 return s[i].map.base_addr; 542 return 0; 543 } 544 545 /* 546 * Saves at boot time configured settings for any netdevice. 547 */ 548 int __init netdev_boot_setup(char *str) 549 { 550 int ints[5]; 551 struct ifmap map; 552 553 str = get_options(str, ARRAY_SIZE(ints), ints); 554 if (!str || !*str) 555 return 0; 556 557 /* Save settings */ 558 memset(&map, 0, sizeof(map)); 559 if (ints[0] > 0) 560 map.irq = ints[1]; 561 if (ints[0] > 1) 562 map.base_addr = ints[2]; 563 if (ints[0] > 2) 564 map.mem_start = ints[3]; 565 if (ints[0] > 3) 566 map.mem_end = ints[4]; 567 568 /* Add new entry to the list */ 569 return netdev_boot_setup_add(str, &map); 570 } 571 572 __setup("netdev=", netdev_boot_setup); 573 574 /******************************************************************************* 575 576 Device Interface Subroutines 577 578 *******************************************************************************/ 579 580 /** 581 * __dev_get_by_name - find a device by its name 582 * @net: the applicable net namespace 583 * @name: name to find 584 * 585 * Find an interface by name. Must be called under RTNL semaphore 586 * or @dev_base_lock. If the name is found a pointer to the device 587 * is returned. If the name is not found then %NULL is returned. The 588 * reference counters are not incremented so the caller must be 589 * careful with locks. 590 */ 591 592 struct net_device *__dev_get_by_name(struct net *net, const char *name) 593 { 594 struct hlist_node *p; 595 596 hlist_for_each(p, dev_name_hash(net, name)) { 597 struct net_device *dev 598 = hlist_entry(p, struct net_device, name_hlist); 599 if (!strncmp(dev->name, name, IFNAMSIZ)) 600 return dev; 601 } 602 return NULL; 603 } 604 605 /** 606 * dev_get_by_name - find a device by its name 607 * @net: the applicable net namespace 608 * @name: name to find 609 * 610 * Find an interface by name. This can be called from any 611 * context and does its own locking. The returned handle has 612 * the usage count incremented and the caller must use dev_put() to 613 * release it when it is no longer needed. %NULL is returned if no 614 * matching device is found. 615 */ 616 617 struct net_device *dev_get_by_name(struct net *net, const char *name) 618 { 619 struct net_device *dev; 620 621 read_lock(&dev_base_lock); 622 dev = __dev_get_by_name(net, name); 623 if (dev) 624 dev_hold(dev); 625 read_unlock(&dev_base_lock); 626 return dev; 627 } 628 629 /** 630 * __dev_get_by_index - find a device by its ifindex 631 * @net: the applicable net namespace 632 * @ifindex: index of device 633 * 634 * Search for an interface by index. Returns %NULL if the device 635 * is not found or a pointer to the device. The device has not 636 * had its reference counter increased so the caller must be careful 637 * about locking. The caller must hold either the RTNL semaphore 638 * or @dev_base_lock. 639 */ 640 641 struct net_device *__dev_get_by_index(struct net *net, int ifindex) 642 { 643 struct hlist_node *p; 644 645 hlist_for_each(p, dev_index_hash(net, ifindex)) { 646 struct net_device *dev 647 = hlist_entry(p, struct net_device, index_hlist); 648 if (dev->ifindex == ifindex) 649 return dev; 650 } 651 return NULL; 652 } 653 654 655 /** 656 * dev_get_by_index - find a device by its ifindex 657 * @net: the applicable net namespace 658 * @ifindex: index of device 659 * 660 * Search for an interface by index. Returns NULL if the device 661 * is not found or a pointer to the device. The device returned has 662 * had a reference added and the pointer is safe until the user calls 663 * dev_put to indicate they have finished with it. 664 */ 665 666 struct net_device *dev_get_by_index(struct net *net, int ifindex) 667 { 668 struct net_device *dev; 669 670 read_lock(&dev_base_lock); 671 dev = __dev_get_by_index(net, ifindex); 672 if (dev) 673 dev_hold(dev); 674 read_unlock(&dev_base_lock); 675 return dev; 676 } 677 678 /** 679 * dev_getbyhwaddr - find a device by its hardware address 680 * @net: the applicable net namespace 681 * @type: media type of device 682 * @ha: hardware address 683 * 684 * Search for an interface by MAC address. Returns NULL if the device 685 * is not found or a pointer to the device. The caller must hold the 686 * rtnl semaphore. The returned device has not had its ref count increased 687 * and the caller must therefore be careful about locking 688 * 689 * BUGS: 690 * If the API was consistent this would be __dev_get_by_hwaddr 691 */ 692 693 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) 694 { 695 struct net_device *dev; 696 697 ASSERT_RTNL(); 698 699 for_each_netdev(net, dev) 700 if (dev->type == type && 701 !memcmp(dev->dev_addr, ha, dev->addr_len)) 702 return dev; 703 704 return NULL; 705 } 706 707 EXPORT_SYMBOL(dev_getbyhwaddr); 708 709 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) 710 { 711 struct net_device *dev; 712 713 ASSERT_RTNL(); 714 for_each_netdev(net, dev) 715 if (dev->type == type) 716 return dev; 717 718 return NULL; 719 } 720 721 EXPORT_SYMBOL(__dev_getfirstbyhwtype); 722 723 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) 724 { 725 struct net_device *dev; 726 727 rtnl_lock(); 728 dev = __dev_getfirstbyhwtype(net, type); 729 if (dev) 730 dev_hold(dev); 731 rtnl_unlock(); 732 return dev; 733 } 734 735 EXPORT_SYMBOL(dev_getfirstbyhwtype); 736 737 /** 738 * dev_get_by_flags - find any device with given flags 739 * @net: the applicable net namespace 740 * @if_flags: IFF_* values 741 * @mask: bitmask of bits in if_flags to check 742 * 743 * Search for any interface with the given flags. Returns NULL if a device 744 * is not found or a pointer to the device. The device returned has 745 * had a reference added and the pointer is safe until the user calls 746 * dev_put to indicate they have finished with it. 747 */ 748 749 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask) 750 { 751 struct net_device *dev, *ret; 752 753 ret = NULL; 754 read_lock(&dev_base_lock); 755 for_each_netdev(net, dev) { 756 if (((dev->flags ^ if_flags) & mask) == 0) { 757 dev_hold(dev); 758 ret = dev; 759 break; 760 } 761 } 762 read_unlock(&dev_base_lock); 763 return ret; 764 } 765 766 /** 767 * dev_valid_name - check if name is okay for network device 768 * @name: name string 769 * 770 * Network device names need to be valid file names to 771 * to allow sysfs to work. We also disallow any kind of 772 * whitespace. 773 */ 774 int dev_valid_name(const char *name) 775 { 776 if (*name == '\0') 777 return 0; 778 if (strlen(name) >= IFNAMSIZ) 779 return 0; 780 if (!strcmp(name, ".") || !strcmp(name, "..")) 781 return 0; 782 783 while (*name) { 784 if (*name == '/' || isspace(*name)) 785 return 0; 786 name++; 787 } 788 return 1; 789 } 790 791 /** 792 * __dev_alloc_name - allocate a name for a device 793 * @net: network namespace to allocate the device name in 794 * @name: name format string 795 * @buf: scratch buffer and result name string 796 * 797 * Passed a format string - eg "lt%d" it will try and find a suitable 798 * id. It scans list of devices to build up a free map, then chooses 799 * the first empty slot. The caller must hold the dev_base or rtnl lock 800 * while allocating the name and adding the device in order to avoid 801 * duplicates. 802 * Limited to bits_per_byte * page size devices (ie 32K on most platforms). 803 * Returns the number of the unit assigned or a negative errno code. 804 */ 805 806 static int __dev_alloc_name(struct net *net, const char *name, char *buf) 807 { 808 int i = 0; 809 const char *p; 810 const int max_netdevices = 8*PAGE_SIZE; 811 unsigned long *inuse; 812 struct net_device *d; 813 814 p = strnchr(name, IFNAMSIZ-1, '%'); 815 if (p) { 816 /* 817 * Verify the string as this thing may have come from 818 * the user. There must be either one "%d" and no other "%" 819 * characters. 820 */ 821 if (p[1] != 'd' || strchr(p + 2, '%')) 822 return -EINVAL; 823 824 /* Use one page as a bit array of possible slots */ 825 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); 826 if (!inuse) 827 return -ENOMEM; 828 829 for_each_netdev(net, d) { 830 if (!sscanf(d->name, name, &i)) 831 continue; 832 if (i < 0 || i >= max_netdevices) 833 continue; 834 835 /* avoid cases where sscanf is not exact inverse of printf */ 836 snprintf(buf, IFNAMSIZ, name, i); 837 if (!strncmp(buf, d->name, IFNAMSIZ)) 838 set_bit(i, inuse); 839 } 840 841 i = find_first_zero_bit(inuse, max_netdevices); 842 free_page((unsigned long) inuse); 843 } 844 845 snprintf(buf, IFNAMSIZ, name, i); 846 if (!__dev_get_by_name(net, buf)) 847 return i; 848 849 /* It is possible to run out of possible slots 850 * when the name is long and there isn't enough space left 851 * for the digits, or if all bits are used. 852 */ 853 return -ENFILE; 854 } 855 856 /** 857 * dev_alloc_name - allocate a name for a device 858 * @dev: device 859 * @name: name format string 860 * 861 * Passed a format string - eg "lt%d" it will try and find a suitable 862 * id. It scans list of devices to build up a free map, then chooses 863 * the first empty slot. The caller must hold the dev_base or rtnl lock 864 * while allocating the name and adding the device in order to avoid 865 * duplicates. 866 * Limited to bits_per_byte * page size devices (ie 32K on most platforms). 867 * Returns the number of the unit assigned or a negative errno code. 868 */ 869 870 int dev_alloc_name(struct net_device *dev, const char *name) 871 { 872 char buf[IFNAMSIZ]; 873 struct net *net; 874 int ret; 875 876 BUG_ON(!dev_net(dev)); 877 net = dev_net(dev); 878 ret = __dev_alloc_name(net, name, buf); 879 if (ret >= 0) 880 strlcpy(dev->name, buf, IFNAMSIZ); 881 return ret; 882 } 883 884 885 /** 886 * dev_change_name - change name of a device 887 * @dev: device 888 * @newname: name (or format string) must be at least IFNAMSIZ 889 * 890 * Change name of a device, can pass format strings "eth%d". 891 * for wildcarding. 892 */ 893 int dev_change_name(struct net_device *dev, const char *newname) 894 { 895 char oldname[IFNAMSIZ]; 896 int err = 0; 897 int ret; 898 struct net *net; 899 900 ASSERT_RTNL(); 901 BUG_ON(!dev_net(dev)); 902 903 net = dev_net(dev); 904 if (dev->flags & IFF_UP) 905 return -EBUSY; 906 907 if (!dev_valid_name(newname)) 908 return -EINVAL; 909 910 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) 911 return 0; 912 913 memcpy(oldname, dev->name, IFNAMSIZ); 914 915 if (strchr(newname, '%')) { 916 err = dev_alloc_name(dev, newname); 917 if (err < 0) 918 return err; 919 } 920 else if (__dev_get_by_name(net, newname)) 921 return -EEXIST; 922 else 923 strlcpy(dev->name, newname, IFNAMSIZ); 924 925 rollback: 926 /* For now only devices in the initial network namespace 927 * are in sysfs. 928 */ 929 if (net == &init_net) { 930 ret = device_rename(&dev->dev, dev->name); 931 if (ret) { 932 memcpy(dev->name, oldname, IFNAMSIZ); 933 return ret; 934 } 935 } 936 937 write_lock_bh(&dev_base_lock); 938 hlist_del(&dev->name_hlist); 939 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 940 write_unlock_bh(&dev_base_lock); 941 942 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); 943 ret = notifier_to_errno(ret); 944 945 if (ret) { 946 if (err) { 947 printk(KERN_ERR 948 "%s: name change rollback failed: %d.\n", 949 dev->name, ret); 950 } else { 951 err = ret; 952 memcpy(dev->name, oldname, IFNAMSIZ); 953 goto rollback; 954 } 955 } 956 957 return err; 958 } 959 960 /** 961 * dev_set_alias - change ifalias of a device 962 * @dev: device 963 * @alias: name up to IFALIASZ 964 * @len: limit of bytes to copy from info 965 * 966 * Set ifalias for a device, 967 */ 968 int dev_set_alias(struct net_device *dev, const char *alias, size_t len) 969 { 970 ASSERT_RTNL(); 971 972 if (len >= IFALIASZ) 973 return -EINVAL; 974 975 if (!len) { 976 if (dev->ifalias) { 977 kfree(dev->ifalias); 978 dev->ifalias = NULL; 979 } 980 return 0; 981 } 982 983 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL); 984 if (!dev->ifalias) 985 return -ENOMEM; 986 987 strlcpy(dev->ifalias, alias, len+1); 988 return len; 989 } 990 991 992 /** 993 * netdev_features_change - device changes features 994 * @dev: device to cause notification 995 * 996 * Called to indicate a device has changed features. 997 */ 998 void netdev_features_change(struct net_device *dev) 999 { 1000 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); 1001 } 1002 EXPORT_SYMBOL(netdev_features_change); 1003 1004 /** 1005 * netdev_state_change - device changes state 1006 * @dev: device to cause notification 1007 * 1008 * Called to indicate a device has changed state. This function calls 1009 * the notifier chains for netdev_chain and sends a NEWLINK message 1010 * to the routing socket. 1011 */ 1012 void netdev_state_change(struct net_device *dev) 1013 { 1014 if (dev->flags & IFF_UP) { 1015 call_netdevice_notifiers(NETDEV_CHANGE, dev); 1016 rtmsg_ifinfo(RTM_NEWLINK, dev, 0); 1017 } 1018 } 1019 1020 void netdev_bonding_change(struct net_device *dev) 1021 { 1022 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev); 1023 } 1024 EXPORT_SYMBOL(netdev_bonding_change); 1025 1026 /** 1027 * dev_load - load a network module 1028 * @net: the applicable net namespace 1029 * @name: name of interface 1030 * 1031 * If a network interface is not present and the process has suitable 1032 * privileges this function loads the module. If module loading is not 1033 * available in this kernel then it becomes a nop. 1034 */ 1035 1036 void dev_load(struct net *net, const char *name) 1037 { 1038 struct net_device *dev; 1039 1040 read_lock(&dev_base_lock); 1041 dev = __dev_get_by_name(net, name); 1042 read_unlock(&dev_base_lock); 1043 1044 if (!dev && capable(CAP_SYS_MODULE)) 1045 request_module("%s", name); 1046 } 1047 1048 /** 1049 * dev_open - prepare an interface for use. 1050 * @dev: device to open 1051 * 1052 * Takes a device from down to up state. The device's private open 1053 * function is invoked and then the multicast lists are loaded. Finally 1054 * the device is moved into the up state and a %NETDEV_UP message is 1055 * sent to the netdev notifier chain. 1056 * 1057 * Calling this function on an active interface is a nop. On a failure 1058 * a negative errno code is returned. 1059 */ 1060 int dev_open(struct net_device *dev) 1061 { 1062 const struct net_device_ops *ops = dev->netdev_ops; 1063 int ret = 0; 1064 1065 ASSERT_RTNL(); 1066 1067 /* 1068 * Is it already up? 1069 */ 1070 1071 if (dev->flags & IFF_UP) 1072 return 0; 1073 1074 /* 1075 * Is it even present? 1076 */ 1077 if (!netif_device_present(dev)) 1078 return -ENODEV; 1079 1080 /* 1081 * Call device private open method 1082 */ 1083 set_bit(__LINK_STATE_START, &dev->state); 1084 1085 if (ops->ndo_validate_addr) 1086 ret = ops->ndo_validate_addr(dev); 1087 1088 if (!ret && ops->ndo_open) 1089 ret = ops->ndo_open(dev); 1090 1091 /* 1092 * If it went open OK then: 1093 */ 1094 1095 if (ret) 1096 clear_bit(__LINK_STATE_START, &dev->state); 1097 else { 1098 /* 1099 * Set the flags. 1100 */ 1101 dev->flags |= IFF_UP; 1102 1103 /* 1104 * Initialize multicasting status 1105 */ 1106 dev_set_rx_mode(dev); 1107 1108 /* 1109 * Wakeup transmit queue engine 1110 */ 1111 dev_activate(dev); 1112 1113 /* 1114 * ... and announce new interface. 1115 */ 1116 call_netdevice_notifiers(NETDEV_UP, dev); 1117 } 1118 1119 return ret; 1120 } 1121 1122 /** 1123 * dev_close - shutdown an interface. 1124 * @dev: device to shutdown 1125 * 1126 * This function moves an active device into down state. A 1127 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device 1128 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier 1129 * chain. 1130 */ 1131 int dev_close(struct net_device *dev) 1132 { 1133 const struct net_device_ops *ops = dev->netdev_ops; 1134 ASSERT_RTNL(); 1135 1136 might_sleep(); 1137 1138 if (!(dev->flags & IFF_UP)) 1139 return 0; 1140 1141 /* 1142 * Tell people we are going down, so that they can 1143 * prepare to death, when device is still operating. 1144 */ 1145 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); 1146 1147 clear_bit(__LINK_STATE_START, &dev->state); 1148 1149 /* Synchronize to scheduled poll. We cannot touch poll list, 1150 * it can be even on different cpu. So just clear netif_running(). 1151 * 1152 * dev->stop() will invoke napi_disable() on all of it's 1153 * napi_struct instances on this device. 1154 */ 1155 smp_mb__after_clear_bit(); /* Commit netif_running(). */ 1156 1157 dev_deactivate(dev); 1158 1159 /* 1160 * Call the device specific close. This cannot fail. 1161 * Only if device is UP 1162 * 1163 * We allow it to be called even after a DETACH hot-plug 1164 * event. 1165 */ 1166 if (ops->ndo_stop) 1167 ops->ndo_stop(dev); 1168 1169 /* 1170 * Device is now down. 1171 */ 1172 1173 dev->flags &= ~IFF_UP; 1174 1175 /* 1176 * Tell people we are down 1177 */ 1178 call_netdevice_notifiers(NETDEV_DOWN, dev); 1179 1180 return 0; 1181 } 1182 1183 1184 /** 1185 * dev_disable_lro - disable Large Receive Offload on a device 1186 * @dev: device 1187 * 1188 * Disable Large Receive Offload (LRO) on a net device. Must be 1189 * called under RTNL. This is needed if received packets may be 1190 * forwarded to another interface. 1191 */ 1192 void dev_disable_lro(struct net_device *dev) 1193 { 1194 if (dev->ethtool_ops && dev->ethtool_ops->get_flags && 1195 dev->ethtool_ops->set_flags) { 1196 u32 flags = dev->ethtool_ops->get_flags(dev); 1197 if (flags & ETH_FLAG_LRO) { 1198 flags &= ~ETH_FLAG_LRO; 1199 dev->ethtool_ops->set_flags(dev, flags); 1200 } 1201 } 1202 WARN_ON(dev->features & NETIF_F_LRO); 1203 } 1204 EXPORT_SYMBOL(dev_disable_lro); 1205 1206 1207 static int dev_boot_phase = 1; 1208 1209 /* 1210 * Device change register/unregister. These are not inline or static 1211 * as we export them to the world. 1212 */ 1213 1214 /** 1215 * register_netdevice_notifier - register a network notifier block 1216 * @nb: notifier 1217 * 1218 * Register a notifier to be called when network device events occur. 1219 * The notifier passed is linked into the kernel structures and must 1220 * not be reused until it has been unregistered. A negative errno code 1221 * is returned on a failure. 1222 * 1223 * When registered all registration and up events are replayed 1224 * to the new notifier to allow device to have a race free 1225 * view of the network device list. 1226 */ 1227 1228 int register_netdevice_notifier(struct notifier_block *nb) 1229 { 1230 struct net_device *dev; 1231 struct net_device *last; 1232 struct net *net; 1233 int err; 1234 1235 rtnl_lock(); 1236 err = raw_notifier_chain_register(&netdev_chain, nb); 1237 if (err) 1238 goto unlock; 1239 if (dev_boot_phase) 1240 goto unlock; 1241 for_each_net(net) { 1242 for_each_netdev(net, dev) { 1243 err = nb->notifier_call(nb, NETDEV_REGISTER, dev); 1244 err = notifier_to_errno(err); 1245 if (err) 1246 goto rollback; 1247 1248 if (!(dev->flags & IFF_UP)) 1249 continue; 1250 1251 nb->notifier_call(nb, NETDEV_UP, dev); 1252 } 1253 } 1254 1255 unlock: 1256 rtnl_unlock(); 1257 return err; 1258 1259 rollback: 1260 last = dev; 1261 for_each_net(net) { 1262 for_each_netdev(net, dev) { 1263 if (dev == last) 1264 break; 1265 1266 if (dev->flags & IFF_UP) { 1267 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); 1268 nb->notifier_call(nb, NETDEV_DOWN, dev); 1269 } 1270 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1271 } 1272 } 1273 1274 raw_notifier_chain_unregister(&netdev_chain, nb); 1275 goto unlock; 1276 } 1277 1278 /** 1279 * unregister_netdevice_notifier - unregister a network notifier block 1280 * @nb: notifier 1281 * 1282 * Unregister a notifier previously registered by 1283 * register_netdevice_notifier(). The notifier is unlinked into the 1284 * kernel structures and may then be reused. A negative errno code 1285 * is returned on a failure. 1286 */ 1287 1288 int unregister_netdevice_notifier(struct notifier_block *nb) 1289 { 1290 int err; 1291 1292 rtnl_lock(); 1293 err = raw_notifier_chain_unregister(&netdev_chain, nb); 1294 rtnl_unlock(); 1295 return err; 1296 } 1297 1298 /** 1299 * call_netdevice_notifiers - call all network notifier blocks 1300 * @val: value passed unmodified to notifier function 1301 * @dev: net_device pointer passed unmodified to notifier function 1302 * 1303 * Call all network notifier blocks. Parameters and return value 1304 * are as for raw_notifier_call_chain(). 1305 */ 1306 1307 int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1308 { 1309 return raw_notifier_call_chain(&netdev_chain, val, dev); 1310 } 1311 1312 /* When > 0 there are consumers of rx skb time stamps */ 1313 static atomic_t netstamp_needed = ATOMIC_INIT(0); 1314 1315 void net_enable_timestamp(void) 1316 { 1317 atomic_inc(&netstamp_needed); 1318 } 1319 1320 void net_disable_timestamp(void) 1321 { 1322 atomic_dec(&netstamp_needed); 1323 } 1324 1325 static inline void net_timestamp(struct sk_buff *skb) 1326 { 1327 if (atomic_read(&netstamp_needed)) 1328 __net_timestamp(skb); 1329 else 1330 skb->tstamp.tv64 = 0; 1331 } 1332 1333 /* 1334 * Support routine. Sends outgoing frames to any network 1335 * taps currently in use. 1336 */ 1337 1338 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1339 { 1340 struct packet_type *ptype; 1341 1342 net_timestamp(skb); 1343 1344 rcu_read_lock(); 1345 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1346 /* Never send packets back to the socket 1347 * they originated from - MvS ([email protected]) 1348 */ 1349 if ((ptype->dev == dev || !ptype->dev) && 1350 (ptype->af_packet_priv == NULL || 1351 (struct sock *)ptype->af_packet_priv != skb->sk)) { 1352 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); 1353 if (!skb2) 1354 break; 1355 1356 /* skb->nh should be correctly 1357 set by sender, so that the second statement is 1358 just protection against buggy protocols. 1359 */ 1360 skb_reset_mac_header(skb2); 1361 1362 if (skb_network_header(skb2) < skb2->data || 1363 skb2->network_header > skb2->tail) { 1364 if (net_ratelimit()) 1365 printk(KERN_CRIT "protocol %04x is " 1366 "buggy, dev %s\n", 1367 skb2->protocol, dev->name); 1368 skb_reset_network_header(skb2); 1369 } 1370 1371 skb2->transport_header = skb2->network_header; 1372 skb2->pkt_type = PACKET_OUTGOING; 1373 ptype->func(skb2, skb->dev, ptype, skb->dev); 1374 } 1375 } 1376 rcu_read_unlock(); 1377 } 1378 1379 1380 static inline void __netif_reschedule(struct Qdisc *q) 1381 { 1382 struct softnet_data *sd; 1383 unsigned long flags; 1384 1385 local_irq_save(flags); 1386 sd = &__get_cpu_var(softnet_data); 1387 q->next_sched = sd->output_queue; 1388 sd->output_queue = q; 1389 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1390 local_irq_restore(flags); 1391 } 1392 1393 void __netif_schedule(struct Qdisc *q) 1394 { 1395 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) 1396 __netif_reschedule(q); 1397 } 1398 EXPORT_SYMBOL(__netif_schedule); 1399 1400 void dev_kfree_skb_irq(struct sk_buff *skb) 1401 { 1402 if (atomic_dec_and_test(&skb->users)) { 1403 struct softnet_data *sd; 1404 unsigned long flags; 1405 1406 local_irq_save(flags); 1407 sd = &__get_cpu_var(softnet_data); 1408 skb->next = sd->completion_queue; 1409 sd->completion_queue = skb; 1410 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1411 local_irq_restore(flags); 1412 } 1413 } 1414 EXPORT_SYMBOL(dev_kfree_skb_irq); 1415 1416 void dev_kfree_skb_any(struct sk_buff *skb) 1417 { 1418 if (in_irq() || irqs_disabled()) 1419 dev_kfree_skb_irq(skb); 1420 else 1421 dev_kfree_skb(skb); 1422 } 1423 EXPORT_SYMBOL(dev_kfree_skb_any); 1424 1425 1426 /** 1427 * netif_device_detach - mark device as removed 1428 * @dev: network device 1429 * 1430 * Mark device as removed from system and therefore no longer available. 1431 */ 1432 void netif_device_detach(struct net_device *dev) 1433 { 1434 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && 1435 netif_running(dev)) { 1436 netif_stop_queue(dev); 1437 } 1438 } 1439 EXPORT_SYMBOL(netif_device_detach); 1440 1441 /** 1442 * netif_device_attach - mark device as attached 1443 * @dev: network device 1444 * 1445 * Mark device as attached from system and restart if needed. 1446 */ 1447 void netif_device_attach(struct net_device *dev) 1448 { 1449 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && 1450 netif_running(dev)) { 1451 netif_wake_queue(dev); 1452 __netdev_watchdog_up(dev); 1453 } 1454 } 1455 EXPORT_SYMBOL(netif_device_attach); 1456 1457 static bool can_checksum_protocol(unsigned long features, __be16 protocol) 1458 { 1459 return ((features & NETIF_F_GEN_CSUM) || 1460 ((features & NETIF_F_IP_CSUM) && 1461 protocol == htons(ETH_P_IP)) || 1462 ((features & NETIF_F_IPV6_CSUM) && 1463 protocol == htons(ETH_P_IPV6))); 1464 } 1465 1466 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) 1467 { 1468 if (can_checksum_protocol(dev->features, skb->protocol)) 1469 return true; 1470 1471 if (skb->protocol == htons(ETH_P_8021Q)) { 1472 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 1473 if (can_checksum_protocol(dev->features & dev->vlan_features, 1474 veh->h_vlan_encapsulated_proto)) 1475 return true; 1476 } 1477 1478 return false; 1479 } 1480 1481 /* 1482 * Invalidate hardware checksum when packet is to be mangled, and 1483 * complete checksum manually on outgoing path. 1484 */ 1485 int skb_checksum_help(struct sk_buff *skb) 1486 { 1487 __wsum csum; 1488 int ret = 0, offset; 1489 1490 if (skb->ip_summed == CHECKSUM_COMPLETE) 1491 goto out_set_summed; 1492 1493 if (unlikely(skb_shinfo(skb)->gso_size)) { 1494 /* Let GSO fix up the checksum. */ 1495 goto out_set_summed; 1496 } 1497 1498 offset = skb->csum_start - skb_headroom(skb); 1499 BUG_ON(offset >= skb_headlen(skb)); 1500 csum = skb_checksum(skb, offset, skb->len - offset, 0); 1501 1502 offset += skb->csum_offset; 1503 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); 1504 1505 if (skb_cloned(skb) && 1506 !skb_clone_writable(skb, offset + sizeof(__sum16))) { 1507 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 1508 if (ret) 1509 goto out; 1510 } 1511 1512 *(__sum16 *)(skb->data + offset) = csum_fold(csum); 1513 out_set_summed: 1514 skb->ip_summed = CHECKSUM_NONE; 1515 out: 1516 return ret; 1517 } 1518 1519 /** 1520 * skb_gso_segment - Perform segmentation on skb. 1521 * @skb: buffer to segment 1522 * @features: features for the output path (see dev->features) 1523 * 1524 * This function segments the given skb and returns a list of segments. 1525 * 1526 * It may return NULL if the skb requires no segmentation. This is 1527 * only possible when GSO is used for verifying header integrity. 1528 */ 1529 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) 1530 { 1531 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 1532 struct packet_type *ptype; 1533 __be16 type = skb->protocol; 1534 int err; 1535 1536 BUG_ON(skb_shinfo(skb)->frag_list); 1537 1538 skb_reset_mac_header(skb); 1539 skb->mac_len = skb->network_header - skb->mac_header; 1540 __skb_pull(skb, skb->mac_len); 1541 1542 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { 1543 if (skb_header_cloned(skb) && 1544 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 1545 return ERR_PTR(err); 1546 } 1547 1548 rcu_read_lock(); 1549 list_for_each_entry_rcu(ptype, 1550 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 1551 if (ptype->type == type && !ptype->dev && ptype->gso_segment) { 1552 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 1553 err = ptype->gso_send_check(skb); 1554 segs = ERR_PTR(err); 1555 if (err || skb_gso_ok(skb, features)) 1556 break; 1557 __skb_push(skb, (skb->data - 1558 skb_network_header(skb))); 1559 } 1560 segs = ptype->gso_segment(skb, features); 1561 break; 1562 } 1563 } 1564 rcu_read_unlock(); 1565 1566 __skb_push(skb, skb->data - skb_mac_header(skb)); 1567 1568 return segs; 1569 } 1570 1571 EXPORT_SYMBOL(skb_gso_segment); 1572 1573 /* Take action when hardware reception checksum errors are detected. */ 1574 #ifdef CONFIG_BUG 1575 void netdev_rx_csum_fault(struct net_device *dev) 1576 { 1577 if (net_ratelimit()) { 1578 printk(KERN_ERR "%s: hw csum failure.\n", 1579 dev ? dev->name : "<unknown>"); 1580 dump_stack(); 1581 } 1582 } 1583 EXPORT_SYMBOL(netdev_rx_csum_fault); 1584 #endif 1585 1586 /* Actually, we should eliminate this check as soon as we know, that: 1587 * 1. IOMMU is present and allows to map all the memory. 1588 * 2. No high memory really exists on this machine. 1589 */ 1590 1591 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 1592 { 1593 #ifdef CONFIG_HIGHMEM 1594 int i; 1595 1596 if (dev->features & NETIF_F_HIGHDMA) 1597 return 0; 1598 1599 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1600 if (PageHighMem(skb_shinfo(skb)->frags[i].page)) 1601 return 1; 1602 1603 #endif 1604 return 0; 1605 } 1606 1607 struct dev_gso_cb { 1608 void (*destructor)(struct sk_buff *skb); 1609 }; 1610 1611 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) 1612 1613 static void dev_gso_skb_destructor(struct sk_buff *skb) 1614 { 1615 struct dev_gso_cb *cb; 1616 1617 do { 1618 struct sk_buff *nskb = skb->next; 1619 1620 skb->next = nskb->next; 1621 nskb->next = NULL; 1622 kfree_skb(nskb); 1623 } while (skb->next); 1624 1625 cb = DEV_GSO_CB(skb); 1626 if (cb->destructor) 1627 cb->destructor(skb); 1628 } 1629 1630 /** 1631 * dev_gso_segment - Perform emulated hardware segmentation on skb. 1632 * @skb: buffer to segment 1633 * 1634 * This function segments the given skb and stores the list of segments 1635 * in skb->next. 1636 */ 1637 static int dev_gso_segment(struct sk_buff *skb) 1638 { 1639 struct net_device *dev = skb->dev; 1640 struct sk_buff *segs; 1641 int features = dev->features & ~(illegal_highdma(dev, skb) ? 1642 NETIF_F_SG : 0); 1643 1644 segs = skb_gso_segment(skb, features); 1645 1646 /* Verifying header integrity only. */ 1647 if (!segs) 1648 return 0; 1649 1650 if (IS_ERR(segs)) 1651 return PTR_ERR(segs); 1652 1653 skb->next = segs; 1654 DEV_GSO_CB(skb)->destructor = skb->destructor; 1655 skb->destructor = dev_gso_skb_destructor; 1656 1657 return 0; 1658 } 1659 1660 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 1661 struct netdev_queue *txq) 1662 { 1663 const struct net_device_ops *ops = dev->netdev_ops; 1664 1665 prefetch(&dev->netdev_ops->ndo_start_xmit); 1666 if (likely(!skb->next)) { 1667 if (!list_empty(&ptype_all)) 1668 dev_queue_xmit_nit(skb, dev); 1669 1670 if (netif_needs_gso(dev, skb)) { 1671 if (unlikely(dev_gso_segment(skb))) 1672 goto out_kfree_skb; 1673 if (skb->next) 1674 goto gso; 1675 } 1676 1677 return ops->ndo_start_xmit(skb, dev); 1678 } 1679 1680 gso: 1681 do { 1682 struct sk_buff *nskb = skb->next; 1683 int rc; 1684 1685 skb->next = nskb->next; 1686 nskb->next = NULL; 1687 rc = ops->ndo_start_xmit(nskb, dev); 1688 if (unlikely(rc)) { 1689 nskb->next = skb->next; 1690 skb->next = nskb; 1691 return rc; 1692 } 1693 if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) 1694 return NETDEV_TX_BUSY; 1695 } while (skb->next); 1696 1697 skb->destructor = DEV_GSO_CB(skb)->destructor; 1698 1699 out_kfree_skb: 1700 kfree_skb(skb); 1701 return 0; 1702 } 1703 1704 static u32 simple_tx_hashrnd; 1705 static int simple_tx_hashrnd_initialized = 0; 1706 1707 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) 1708 { 1709 u32 addr1, addr2, ports; 1710 u32 hash, ihl; 1711 u8 ip_proto = 0; 1712 1713 if (unlikely(!simple_tx_hashrnd_initialized)) { 1714 get_random_bytes(&simple_tx_hashrnd, 4); 1715 simple_tx_hashrnd_initialized = 1; 1716 } 1717 1718 switch (skb->protocol) { 1719 case htons(ETH_P_IP): 1720 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) 1721 ip_proto = ip_hdr(skb)->protocol; 1722 addr1 = ip_hdr(skb)->saddr; 1723 addr2 = ip_hdr(skb)->daddr; 1724 ihl = ip_hdr(skb)->ihl; 1725 break; 1726 case htons(ETH_P_IPV6): 1727 ip_proto = ipv6_hdr(skb)->nexthdr; 1728 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; 1729 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; 1730 ihl = (40 >> 2); 1731 break; 1732 default: 1733 return 0; 1734 } 1735 1736 1737 switch (ip_proto) { 1738 case IPPROTO_TCP: 1739 case IPPROTO_UDP: 1740 case IPPROTO_DCCP: 1741 case IPPROTO_ESP: 1742 case IPPROTO_AH: 1743 case IPPROTO_SCTP: 1744 case IPPROTO_UDPLITE: 1745 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); 1746 break; 1747 1748 default: 1749 ports = 0; 1750 break; 1751 } 1752 1753 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); 1754 1755 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); 1756 } 1757 1758 static struct netdev_queue *dev_pick_tx(struct net_device *dev, 1759 struct sk_buff *skb) 1760 { 1761 const struct net_device_ops *ops = dev->netdev_ops; 1762 u16 queue_index = 0; 1763 1764 if (ops->ndo_select_queue) 1765 queue_index = ops->ndo_select_queue(dev, skb); 1766 else if (dev->real_num_tx_queues > 1) 1767 queue_index = simple_tx_hash(dev, skb); 1768 1769 skb_set_queue_mapping(skb, queue_index); 1770 return netdev_get_tx_queue(dev, queue_index); 1771 } 1772 1773 /** 1774 * dev_queue_xmit - transmit a buffer 1775 * @skb: buffer to transmit 1776 * 1777 * Queue a buffer for transmission to a network device. The caller must 1778 * have set the device and priority and built the buffer before calling 1779 * this function. The function can be called from an interrupt. 1780 * 1781 * A negative errno code is returned on a failure. A success does not 1782 * guarantee the frame will be transmitted as it may be dropped due 1783 * to congestion or traffic shaping. 1784 * 1785 * ----------------------------------------------------------------------------------- 1786 * I notice this method can also return errors from the queue disciplines, 1787 * including NET_XMIT_DROP, which is a positive value. So, errors can also 1788 * be positive. 1789 * 1790 * Regardless of the return value, the skb is consumed, so it is currently 1791 * difficult to retry a send to this method. (You can bump the ref count 1792 * before sending to hold a reference for retry if you are careful.) 1793 * 1794 * When calling this method, interrupts MUST be enabled. This is because 1795 * the BH enable code must have IRQs enabled so that it will not deadlock. 1796 * --BLG 1797 */ 1798 int dev_queue_xmit(struct sk_buff *skb) 1799 { 1800 struct net_device *dev = skb->dev; 1801 struct netdev_queue *txq; 1802 struct Qdisc *q; 1803 int rc = -ENOMEM; 1804 1805 /* GSO will handle the following emulations directly. */ 1806 if (netif_needs_gso(dev, skb)) 1807 goto gso; 1808 1809 if (skb_shinfo(skb)->frag_list && 1810 !(dev->features & NETIF_F_FRAGLIST) && 1811 __skb_linearize(skb)) 1812 goto out_kfree_skb; 1813 1814 /* Fragmented skb is linearized if device does not support SG, 1815 * or if at least one of fragments is in highmem and device 1816 * does not support DMA from it. 1817 */ 1818 if (skb_shinfo(skb)->nr_frags && 1819 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && 1820 __skb_linearize(skb)) 1821 goto out_kfree_skb; 1822 1823 /* If packet is not checksummed and device does not support 1824 * checksumming for this protocol, complete checksumming here. 1825 */ 1826 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1827 skb_set_transport_header(skb, skb->csum_start - 1828 skb_headroom(skb)); 1829 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) 1830 goto out_kfree_skb; 1831 } 1832 1833 gso: 1834 /* Disable soft irqs for various locks below. Also 1835 * stops preemption for RCU. 1836 */ 1837 rcu_read_lock_bh(); 1838 1839 txq = dev_pick_tx(dev, skb); 1840 q = rcu_dereference(txq->qdisc); 1841 1842 #ifdef CONFIG_NET_CLS_ACT 1843 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); 1844 #endif 1845 if (q->enqueue) { 1846 spinlock_t *root_lock = qdisc_lock(q); 1847 1848 spin_lock(root_lock); 1849 1850 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { 1851 kfree_skb(skb); 1852 rc = NET_XMIT_DROP; 1853 } else { 1854 rc = qdisc_enqueue_root(skb, q); 1855 qdisc_run(q); 1856 } 1857 spin_unlock(root_lock); 1858 1859 goto out; 1860 } 1861 1862 /* The device has no queue. Common case for software devices: 1863 loopback, all the sorts of tunnels... 1864 1865 Really, it is unlikely that netif_tx_lock protection is necessary 1866 here. (f.e. loopback and IP tunnels are clean ignoring statistics 1867 counters.) 1868 However, it is possible, that they rely on protection 1869 made by us here. 1870 1871 Check this and shot the lock. It is not prone from deadlocks. 1872 Either shot noqueue qdisc, it is even simpler 8) 1873 */ 1874 if (dev->flags & IFF_UP) { 1875 int cpu = smp_processor_id(); /* ok because BHs are off */ 1876 1877 if (txq->xmit_lock_owner != cpu) { 1878 1879 HARD_TX_LOCK(dev, txq, cpu); 1880 1881 if (!netif_tx_queue_stopped(txq)) { 1882 rc = 0; 1883 if (!dev_hard_start_xmit(skb, dev, txq)) { 1884 HARD_TX_UNLOCK(dev, txq); 1885 goto out; 1886 } 1887 } 1888 HARD_TX_UNLOCK(dev, txq); 1889 if (net_ratelimit()) 1890 printk(KERN_CRIT "Virtual device %s asks to " 1891 "queue packet!\n", dev->name); 1892 } else { 1893 /* Recursion is detected! It is possible, 1894 * unfortunately */ 1895 if (net_ratelimit()) 1896 printk(KERN_CRIT "Dead loop on virtual device " 1897 "%s, fix it urgently!\n", dev->name); 1898 } 1899 } 1900 1901 rc = -ENETDOWN; 1902 rcu_read_unlock_bh(); 1903 1904 out_kfree_skb: 1905 kfree_skb(skb); 1906 return rc; 1907 out: 1908 rcu_read_unlock_bh(); 1909 return rc; 1910 } 1911 1912 1913 /*======================================================================= 1914 Receiver routines 1915 =======================================================================*/ 1916 1917 int netdev_max_backlog __read_mostly = 1000; 1918 int netdev_budget __read_mostly = 300; 1919 int weight_p __read_mostly = 64; /* old backlog weight */ 1920 1921 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 1922 1923 1924 /** 1925 * netif_rx - post buffer to the network code 1926 * @skb: buffer to post 1927 * 1928 * This function receives a packet from a device driver and queues it for 1929 * the upper (protocol) levels to process. It always succeeds. The buffer 1930 * may be dropped during processing for congestion control or by the 1931 * protocol layers. 1932 * 1933 * return values: 1934 * NET_RX_SUCCESS (no congestion) 1935 * NET_RX_DROP (packet was dropped) 1936 * 1937 */ 1938 1939 int netif_rx(struct sk_buff *skb) 1940 { 1941 struct softnet_data *queue; 1942 unsigned long flags; 1943 1944 /* if netpoll wants it, pretend we never saw it */ 1945 if (netpoll_rx(skb)) 1946 return NET_RX_DROP; 1947 1948 if (!skb->tstamp.tv64) 1949 net_timestamp(skb); 1950 1951 /* 1952 * The code is rearranged so that the path is the most 1953 * short when CPU is congested, but is still operating. 1954 */ 1955 local_irq_save(flags); 1956 queue = &__get_cpu_var(softnet_data); 1957 1958 __get_cpu_var(netdev_rx_stat).total++; 1959 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { 1960 if (queue->input_pkt_queue.qlen) { 1961 enqueue: 1962 __skb_queue_tail(&queue->input_pkt_queue, skb); 1963 local_irq_restore(flags); 1964 return NET_RX_SUCCESS; 1965 } 1966 1967 napi_schedule(&queue->backlog); 1968 goto enqueue; 1969 } 1970 1971 __get_cpu_var(netdev_rx_stat).dropped++; 1972 local_irq_restore(flags); 1973 1974 kfree_skb(skb); 1975 return NET_RX_DROP; 1976 } 1977 1978 int netif_rx_ni(struct sk_buff *skb) 1979 { 1980 int err; 1981 1982 preempt_disable(); 1983 err = netif_rx(skb); 1984 if (local_softirq_pending()) 1985 do_softirq(); 1986 preempt_enable(); 1987 1988 return err; 1989 } 1990 1991 EXPORT_SYMBOL(netif_rx_ni); 1992 1993 static void net_tx_action(struct softirq_action *h) 1994 { 1995 struct softnet_data *sd = &__get_cpu_var(softnet_data); 1996 1997 if (sd->completion_queue) { 1998 struct sk_buff *clist; 1999 2000 local_irq_disable(); 2001 clist = sd->completion_queue; 2002 sd->completion_queue = NULL; 2003 local_irq_enable(); 2004 2005 while (clist) { 2006 struct sk_buff *skb = clist; 2007 clist = clist->next; 2008 2009 WARN_ON(atomic_read(&skb->users)); 2010 __kfree_skb(skb); 2011 } 2012 } 2013 2014 if (sd->output_queue) { 2015 struct Qdisc *head; 2016 2017 local_irq_disable(); 2018 head = sd->output_queue; 2019 sd->output_queue = NULL; 2020 local_irq_enable(); 2021 2022 while (head) { 2023 struct Qdisc *q = head; 2024 spinlock_t *root_lock; 2025 2026 head = head->next_sched; 2027 2028 root_lock = qdisc_lock(q); 2029 if (spin_trylock(root_lock)) { 2030 smp_mb__before_clear_bit(); 2031 clear_bit(__QDISC_STATE_SCHED, 2032 &q->state); 2033 qdisc_run(q); 2034 spin_unlock(root_lock); 2035 } else { 2036 if (!test_bit(__QDISC_STATE_DEACTIVATED, 2037 &q->state)) { 2038 __netif_reschedule(q); 2039 } else { 2040 smp_mb__before_clear_bit(); 2041 clear_bit(__QDISC_STATE_SCHED, 2042 &q->state); 2043 } 2044 } 2045 } 2046 } 2047 } 2048 2049 static inline int deliver_skb(struct sk_buff *skb, 2050 struct packet_type *pt_prev, 2051 struct net_device *orig_dev) 2052 { 2053 atomic_inc(&skb->users); 2054 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 2055 } 2056 2057 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) 2058 /* These hooks defined here for ATM */ 2059 struct net_bridge; 2060 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, 2061 unsigned char *addr); 2062 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly; 2063 2064 /* 2065 * If bridge module is loaded call bridging hook. 2066 * returns NULL if packet was consumed. 2067 */ 2068 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, 2069 struct sk_buff *skb) __read_mostly; 2070 static inline struct sk_buff *handle_bridge(struct sk_buff *skb, 2071 struct packet_type **pt_prev, int *ret, 2072 struct net_device *orig_dev) 2073 { 2074 struct net_bridge_port *port; 2075 2076 if (skb->pkt_type == PACKET_LOOPBACK || 2077 (port = rcu_dereference(skb->dev->br_port)) == NULL) 2078 return skb; 2079 2080 if (*pt_prev) { 2081 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2082 *pt_prev = NULL; 2083 } 2084 2085 return br_handle_frame_hook(port, skb); 2086 } 2087 #else 2088 #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) 2089 #endif 2090 2091 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) 2092 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; 2093 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); 2094 2095 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, 2096 struct packet_type **pt_prev, 2097 int *ret, 2098 struct net_device *orig_dev) 2099 { 2100 if (skb->dev->macvlan_port == NULL) 2101 return skb; 2102 2103 if (*pt_prev) { 2104 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2105 *pt_prev = NULL; 2106 } 2107 return macvlan_handle_frame_hook(skb); 2108 } 2109 #else 2110 #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) 2111 #endif 2112 2113 #ifdef CONFIG_NET_CLS_ACT 2114 /* TODO: Maybe we should just force sch_ingress to be compiled in 2115 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 2116 * a compare and 2 stores extra right now if we dont have it on 2117 * but have CONFIG_NET_CLS_ACT 2118 * NOTE: This doesnt stop any functionality; if you dont have 2119 * the ingress scheduler, you just cant add policies on ingress. 2120 * 2121 */ 2122 static int ing_filter(struct sk_buff *skb) 2123 { 2124 struct net_device *dev = skb->dev; 2125 u32 ttl = G_TC_RTTL(skb->tc_verd); 2126 struct netdev_queue *rxq; 2127 int result = TC_ACT_OK; 2128 struct Qdisc *q; 2129 2130 if (MAX_RED_LOOP < ttl++) { 2131 printk(KERN_WARNING 2132 "Redir loop detected Dropping packet (%d->%d)\n", 2133 skb->iif, dev->ifindex); 2134 return TC_ACT_SHOT; 2135 } 2136 2137 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); 2138 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); 2139 2140 rxq = &dev->rx_queue; 2141 2142 q = rxq->qdisc; 2143 if (q != &noop_qdisc) { 2144 spin_lock(qdisc_lock(q)); 2145 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) 2146 result = qdisc_enqueue_root(skb, q); 2147 spin_unlock(qdisc_lock(q)); 2148 } 2149 2150 return result; 2151 } 2152 2153 static inline struct sk_buff *handle_ing(struct sk_buff *skb, 2154 struct packet_type **pt_prev, 2155 int *ret, struct net_device *orig_dev) 2156 { 2157 if (skb->dev->rx_queue.qdisc == &noop_qdisc) 2158 goto out; 2159 2160 if (*pt_prev) { 2161 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2162 *pt_prev = NULL; 2163 } else { 2164 /* Huh? Why does turning on AF_PACKET affect this? */ 2165 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); 2166 } 2167 2168 switch (ing_filter(skb)) { 2169 case TC_ACT_SHOT: 2170 case TC_ACT_STOLEN: 2171 kfree_skb(skb); 2172 return NULL; 2173 } 2174 2175 out: 2176 skb->tc_verd = 0; 2177 return skb; 2178 } 2179 #endif 2180 2181 /* 2182 * netif_nit_deliver - deliver received packets to network taps 2183 * @skb: buffer 2184 * 2185 * This function is used to deliver incoming packets to network 2186 * taps. It should be used when the normal netif_receive_skb path 2187 * is bypassed, for example because of VLAN acceleration. 2188 */ 2189 void netif_nit_deliver(struct sk_buff *skb) 2190 { 2191 struct packet_type *ptype; 2192 2193 if (list_empty(&ptype_all)) 2194 return; 2195 2196 skb_reset_network_header(skb); 2197 skb_reset_transport_header(skb); 2198 skb->mac_len = skb->network_header - skb->mac_header; 2199 2200 rcu_read_lock(); 2201 list_for_each_entry_rcu(ptype, &ptype_all, list) { 2202 if (!ptype->dev || ptype->dev == skb->dev) 2203 deliver_skb(skb, ptype, skb->dev); 2204 } 2205 rcu_read_unlock(); 2206 } 2207 2208 /** 2209 * netif_receive_skb - process receive buffer from network 2210 * @skb: buffer to process 2211 * 2212 * netif_receive_skb() is the main receive data processing function. 2213 * It always succeeds. The buffer may be dropped during processing 2214 * for congestion control or by the protocol layers. 2215 * 2216 * This function may only be called from softirq context and interrupts 2217 * should be enabled. 2218 * 2219 * Return values (usually ignored): 2220 * NET_RX_SUCCESS: no congestion 2221 * NET_RX_DROP: packet was dropped 2222 */ 2223 int netif_receive_skb(struct sk_buff *skb) 2224 { 2225 struct packet_type *ptype, *pt_prev; 2226 struct net_device *orig_dev; 2227 struct net_device *null_or_orig; 2228 int ret = NET_RX_DROP; 2229 __be16 type; 2230 2231 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) 2232 return NET_RX_SUCCESS; 2233 2234 /* if we've gotten here through NAPI, check netpoll */ 2235 if (netpoll_receive_skb(skb)) 2236 return NET_RX_DROP; 2237 2238 if (!skb->tstamp.tv64) 2239 net_timestamp(skb); 2240 2241 if (!skb->iif) 2242 skb->iif = skb->dev->ifindex; 2243 2244 null_or_orig = NULL; 2245 orig_dev = skb->dev; 2246 if (orig_dev->master) { 2247 if (skb_bond_should_drop(skb)) 2248 null_or_orig = orig_dev; /* deliver only exact match */ 2249 else 2250 skb->dev = orig_dev->master; 2251 } 2252 2253 __get_cpu_var(netdev_rx_stat).total++; 2254 2255 skb_reset_network_header(skb); 2256 skb_reset_transport_header(skb); 2257 skb->mac_len = skb->network_header - skb->mac_header; 2258 2259 pt_prev = NULL; 2260 2261 rcu_read_lock(); 2262 2263 /* Don't receive packets in an exiting network namespace */ 2264 if (!net_alive(dev_net(skb->dev))) { 2265 kfree_skb(skb); 2266 goto out; 2267 } 2268 2269 #ifdef CONFIG_NET_CLS_ACT 2270 if (skb->tc_verd & TC_NCLS) { 2271 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 2272 goto ncls; 2273 } 2274 #endif 2275 2276 list_for_each_entry_rcu(ptype, &ptype_all, list) { 2277 if (ptype->dev == null_or_orig || ptype->dev == skb->dev || 2278 ptype->dev == orig_dev) { 2279 if (pt_prev) 2280 ret = deliver_skb(skb, pt_prev, orig_dev); 2281 pt_prev = ptype; 2282 } 2283 } 2284 2285 #ifdef CONFIG_NET_CLS_ACT 2286 skb = handle_ing(skb, &pt_prev, &ret, orig_dev); 2287 if (!skb) 2288 goto out; 2289 ncls: 2290 #endif 2291 2292 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); 2293 if (!skb) 2294 goto out; 2295 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); 2296 if (!skb) 2297 goto out; 2298 2299 type = skb->protocol; 2300 list_for_each_entry_rcu(ptype, 2301 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2302 if (ptype->type == type && 2303 (ptype->dev == null_or_orig || ptype->dev == skb->dev || 2304 ptype->dev == orig_dev)) { 2305 if (pt_prev) 2306 ret = deliver_skb(skb, pt_prev, orig_dev); 2307 pt_prev = ptype; 2308 } 2309 } 2310 2311 if (pt_prev) { 2312 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 2313 } else { 2314 kfree_skb(skb); 2315 /* Jamal, now you will not able to escape explaining 2316 * me how you were going to use this. :-) 2317 */ 2318 ret = NET_RX_DROP; 2319 } 2320 2321 out: 2322 rcu_read_unlock(); 2323 return ret; 2324 } 2325 2326 /* Network device is going away, flush any packets still pending */ 2327 static void flush_backlog(void *arg) 2328 { 2329 struct net_device *dev = arg; 2330 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2331 struct sk_buff *skb, *tmp; 2332 2333 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) 2334 if (skb->dev == dev) { 2335 __skb_unlink(skb, &queue->input_pkt_queue); 2336 kfree_skb(skb); 2337 } 2338 } 2339 2340 static int process_backlog(struct napi_struct *napi, int quota) 2341 { 2342 int work = 0; 2343 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2344 unsigned long start_time = jiffies; 2345 2346 napi->weight = weight_p; 2347 do { 2348 struct sk_buff *skb; 2349 2350 local_irq_disable(); 2351 skb = __skb_dequeue(&queue->input_pkt_queue); 2352 if (!skb) { 2353 __napi_complete(napi); 2354 local_irq_enable(); 2355 break; 2356 } 2357 local_irq_enable(); 2358 2359 netif_receive_skb(skb); 2360 } while (++work < quota && jiffies == start_time); 2361 2362 return work; 2363 } 2364 2365 /** 2366 * __napi_schedule - schedule for receive 2367 * @n: entry to schedule 2368 * 2369 * The entry's receive function will be scheduled to run 2370 */ 2371 void __napi_schedule(struct napi_struct *n) 2372 { 2373 unsigned long flags; 2374 2375 local_irq_save(flags); 2376 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); 2377 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2378 local_irq_restore(flags); 2379 } 2380 EXPORT_SYMBOL(__napi_schedule); 2381 2382 2383 static void net_rx_action(struct softirq_action *h) 2384 { 2385 struct list_head *list = &__get_cpu_var(softnet_data).poll_list; 2386 unsigned long time_limit = jiffies + 2; 2387 int budget = netdev_budget; 2388 void *have; 2389 2390 local_irq_disable(); 2391 2392 while (!list_empty(list)) { 2393 struct napi_struct *n; 2394 int work, weight; 2395 2396 /* If softirq window is exhuasted then punt. 2397 * Allow this to run for 2 jiffies since which will allow 2398 * an average latency of 1.5/HZ. 2399 */ 2400 if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) 2401 goto softnet_break; 2402 2403 local_irq_enable(); 2404 2405 /* Even though interrupts have been re-enabled, this 2406 * access is safe because interrupts can only add new 2407 * entries to the tail of this list, and only ->poll() 2408 * calls can remove this head entry from the list. 2409 */ 2410 n = list_entry(list->next, struct napi_struct, poll_list); 2411 2412 have = netpoll_poll_lock(n); 2413 2414 weight = n->weight; 2415 2416 /* This NAPI_STATE_SCHED test is for avoiding a race 2417 * with netpoll's poll_napi(). Only the entity which 2418 * obtains the lock and sees NAPI_STATE_SCHED set will 2419 * actually make the ->poll() call. Therefore we avoid 2420 * accidently calling ->poll() when NAPI is not scheduled. 2421 */ 2422 work = 0; 2423 if (test_bit(NAPI_STATE_SCHED, &n->state)) 2424 work = n->poll(n, weight); 2425 2426 WARN_ON_ONCE(work > weight); 2427 2428 budget -= work; 2429 2430 local_irq_disable(); 2431 2432 /* Drivers must not modify the NAPI state if they 2433 * consume the entire weight. In such cases this code 2434 * still "owns" the NAPI instance and therefore can 2435 * move the instance around on the list at-will. 2436 */ 2437 if (unlikely(work == weight)) { 2438 if (unlikely(napi_disable_pending(n))) 2439 __napi_complete(n); 2440 else 2441 list_move_tail(&n->poll_list, list); 2442 } 2443 2444 netpoll_poll_unlock(have); 2445 } 2446 out: 2447 local_irq_enable(); 2448 2449 #ifdef CONFIG_NET_DMA 2450 /* 2451 * There may not be any more sk_buffs coming right now, so push 2452 * any pending DMA copies to hardware 2453 */ 2454 if (!cpus_empty(net_dma.channel_mask)) { 2455 int chan_idx; 2456 for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) { 2457 struct dma_chan *chan = net_dma.channels[chan_idx]; 2458 if (chan) 2459 dma_async_memcpy_issue_pending(chan); 2460 } 2461 } 2462 #endif 2463 2464 return; 2465 2466 softnet_break: 2467 __get_cpu_var(netdev_rx_stat).time_squeeze++; 2468 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2469 goto out; 2470 } 2471 2472 static gifconf_func_t * gifconf_list [NPROTO]; 2473 2474 /** 2475 * register_gifconf - register a SIOCGIF handler 2476 * @family: Address family 2477 * @gifconf: Function handler 2478 * 2479 * Register protocol dependent address dumping routines. The handler 2480 * that is passed must not be freed or reused until it has been replaced 2481 * by another handler. 2482 */ 2483 int register_gifconf(unsigned int family, gifconf_func_t * gifconf) 2484 { 2485 if (family >= NPROTO) 2486 return -EINVAL; 2487 gifconf_list[family] = gifconf; 2488 return 0; 2489 } 2490 2491 2492 /* 2493 * Map an interface index to its name (SIOCGIFNAME) 2494 */ 2495 2496 /* 2497 * We need this ioctl for efficient implementation of the 2498 * if_indextoname() function required by the IPv6 API. Without 2499 * it, we would have to search all the interfaces to find a 2500 * match. --pb 2501 */ 2502 2503 static int dev_ifname(struct net *net, struct ifreq __user *arg) 2504 { 2505 struct net_device *dev; 2506 struct ifreq ifr; 2507 2508 /* 2509 * Fetch the caller's info block. 2510 */ 2511 2512 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 2513 return -EFAULT; 2514 2515 read_lock(&dev_base_lock); 2516 dev = __dev_get_by_index(net, ifr.ifr_ifindex); 2517 if (!dev) { 2518 read_unlock(&dev_base_lock); 2519 return -ENODEV; 2520 } 2521 2522 strcpy(ifr.ifr_name, dev->name); 2523 read_unlock(&dev_base_lock); 2524 2525 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 2526 return -EFAULT; 2527 return 0; 2528 } 2529 2530 /* 2531 * Perform a SIOCGIFCONF call. This structure will change 2532 * size eventually, and there is nothing I can do about it. 2533 * Thus we will need a 'compatibility mode'. 2534 */ 2535 2536 static int dev_ifconf(struct net *net, char __user *arg) 2537 { 2538 struct ifconf ifc; 2539 struct net_device *dev; 2540 char __user *pos; 2541 int len; 2542 int total; 2543 int i; 2544 2545 /* 2546 * Fetch the caller's info block. 2547 */ 2548 2549 if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) 2550 return -EFAULT; 2551 2552 pos = ifc.ifc_buf; 2553 len = ifc.ifc_len; 2554 2555 /* 2556 * Loop over the interfaces, and write an info block for each. 2557 */ 2558 2559 total = 0; 2560 for_each_netdev(net, dev) { 2561 for (i = 0; i < NPROTO; i++) { 2562 if (gifconf_list[i]) { 2563 int done; 2564 if (!pos) 2565 done = gifconf_list[i](dev, NULL, 0); 2566 else 2567 done = gifconf_list[i](dev, pos + total, 2568 len - total); 2569 if (done < 0) 2570 return -EFAULT; 2571 total += done; 2572 } 2573 } 2574 } 2575 2576 /* 2577 * All done. Write the updated control block back to the caller. 2578 */ 2579 ifc.ifc_len = total; 2580 2581 /* 2582 * Both BSD and Solaris return 0 here, so we do too. 2583 */ 2584 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; 2585 } 2586 2587 #ifdef CONFIG_PROC_FS 2588 /* 2589 * This is invoked by the /proc filesystem handler to display a device 2590 * in detail. 2591 */ 2592 void *dev_seq_start(struct seq_file *seq, loff_t *pos) 2593 __acquires(dev_base_lock) 2594 { 2595 struct net *net = seq_file_net(seq); 2596 loff_t off; 2597 struct net_device *dev; 2598 2599 read_lock(&dev_base_lock); 2600 if (!*pos) 2601 return SEQ_START_TOKEN; 2602 2603 off = 1; 2604 for_each_netdev(net, dev) 2605 if (off++ == *pos) 2606 return dev; 2607 2608 return NULL; 2609 } 2610 2611 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2612 { 2613 struct net *net = seq_file_net(seq); 2614 ++*pos; 2615 return v == SEQ_START_TOKEN ? 2616 first_net_device(net) : next_net_device((struct net_device *)v); 2617 } 2618 2619 void dev_seq_stop(struct seq_file *seq, void *v) 2620 __releases(dev_base_lock) 2621 { 2622 read_unlock(&dev_base_lock); 2623 } 2624 2625 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 2626 { 2627 const struct net_device_stats *stats = dev_get_stats(dev); 2628 2629 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 2630 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 2631 dev->name, stats->rx_bytes, stats->rx_packets, 2632 stats->rx_errors, 2633 stats->rx_dropped + stats->rx_missed_errors, 2634 stats->rx_fifo_errors, 2635 stats->rx_length_errors + stats->rx_over_errors + 2636 stats->rx_crc_errors + stats->rx_frame_errors, 2637 stats->rx_compressed, stats->multicast, 2638 stats->tx_bytes, stats->tx_packets, 2639 stats->tx_errors, stats->tx_dropped, 2640 stats->tx_fifo_errors, stats->collisions, 2641 stats->tx_carrier_errors + 2642 stats->tx_aborted_errors + 2643 stats->tx_window_errors + 2644 stats->tx_heartbeat_errors, 2645 stats->tx_compressed); 2646 } 2647 2648 /* 2649 * Called from the PROCfs module. This now uses the new arbitrary sized 2650 * /proc/net interface to create /proc/net/dev 2651 */ 2652 static int dev_seq_show(struct seq_file *seq, void *v) 2653 { 2654 if (v == SEQ_START_TOKEN) 2655 seq_puts(seq, "Inter-| Receive " 2656 " | Transmit\n" 2657 " face |bytes packets errs drop fifo frame " 2658 "compressed multicast|bytes packets errs " 2659 "drop fifo colls carrier compressed\n"); 2660 else 2661 dev_seq_printf_stats(seq, v); 2662 return 0; 2663 } 2664 2665 static struct netif_rx_stats *softnet_get_online(loff_t *pos) 2666 { 2667 struct netif_rx_stats *rc = NULL; 2668 2669 while (*pos < nr_cpu_ids) 2670 if (cpu_online(*pos)) { 2671 rc = &per_cpu(netdev_rx_stat, *pos); 2672 break; 2673 } else 2674 ++*pos; 2675 return rc; 2676 } 2677 2678 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) 2679 { 2680 return softnet_get_online(pos); 2681 } 2682 2683 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2684 { 2685 ++*pos; 2686 return softnet_get_online(pos); 2687 } 2688 2689 static void softnet_seq_stop(struct seq_file *seq, void *v) 2690 { 2691 } 2692 2693 static int softnet_seq_show(struct seq_file *seq, void *v) 2694 { 2695 struct netif_rx_stats *s = v; 2696 2697 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 2698 s->total, s->dropped, s->time_squeeze, 0, 2699 0, 0, 0, 0, /* was fastroute */ 2700 s->cpu_collision ); 2701 return 0; 2702 } 2703 2704 static const struct seq_operations dev_seq_ops = { 2705 .start = dev_seq_start, 2706 .next = dev_seq_next, 2707 .stop = dev_seq_stop, 2708 .show = dev_seq_show, 2709 }; 2710 2711 static int dev_seq_open(struct inode *inode, struct file *file) 2712 { 2713 return seq_open_net(inode, file, &dev_seq_ops, 2714 sizeof(struct seq_net_private)); 2715 } 2716 2717 static const struct file_operations dev_seq_fops = { 2718 .owner = THIS_MODULE, 2719 .open = dev_seq_open, 2720 .read = seq_read, 2721 .llseek = seq_lseek, 2722 .release = seq_release_net, 2723 }; 2724 2725 static const struct seq_operations softnet_seq_ops = { 2726 .start = softnet_seq_start, 2727 .next = softnet_seq_next, 2728 .stop = softnet_seq_stop, 2729 .show = softnet_seq_show, 2730 }; 2731 2732 static int softnet_seq_open(struct inode *inode, struct file *file) 2733 { 2734 return seq_open(file, &softnet_seq_ops); 2735 } 2736 2737 static const struct file_operations softnet_seq_fops = { 2738 .owner = THIS_MODULE, 2739 .open = softnet_seq_open, 2740 .read = seq_read, 2741 .llseek = seq_lseek, 2742 .release = seq_release, 2743 }; 2744 2745 static void *ptype_get_idx(loff_t pos) 2746 { 2747 struct packet_type *pt = NULL; 2748 loff_t i = 0; 2749 int t; 2750 2751 list_for_each_entry_rcu(pt, &ptype_all, list) { 2752 if (i == pos) 2753 return pt; 2754 ++i; 2755 } 2756 2757 for (t = 0; t < PTYPE_HASH_SIZE; t++) { 2758 list_for_each_entry_rcu(pt, &ptype_base[t], list) { 2759 if (i == pos) 2760 return pt; 2761 ++i; 2762 } 2763 } 2764 return NULL; 2765 } 2766 2767 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) 2768 __acquires(RCU) 2769 { 2770 rcu_read_lock(); 2771 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; 2772 } 2773 2774 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2775 { 2776 struct packet_type *pt; 2777 struct list_head *nxt; 2778 int hash; 2779 2780 ++*pos; 2781 if (v == SEQ_START_TOKEN) 2782 return ptype_get_idx(0); 2783 2784 pt = v; 2785 nxt = pt->list.next; 2786 if (pt->type == htons(ETH_P_ALL)) { 2787 if (nxt != &ptype_all) 2788 goto found; 2789 hash = 0; 2790 nxt = ptype_base[0].next; 2791 } else 2792 hash = ntohs(pt->type) & PTYPE_HASH_MASK; 2793 2794 while (nxt == &ptype_base[hash]) { 2795 if (++hash >= PTYPE_HASH_SIZE) 2796 return NULL; 2797 nxt = ptype_base[hash].next; 2798 } 2799 found: 2800 return list_entry(nxt, struct packet_type, list); 2801 } 2802 2803 static void ptype_seq_stop(struct seq_file *seq, void *v) 2804 __releases(RCU) 2805 { 2806 rcu_read_unlock(); 2807 } 2808 2809 static int ptype_seq_show(struct seq_file *seq, void *v) 2810 { 2811 struct packet_type *pt = v; 2812 2813 if (v == SEQ_START_TOKEN) 2814 seq_puts(seq, "Type Device Function\n"); 2815 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { 2816 if (pt->type == htons(ETH_P_ALL)) 2817 seq_puts(seq, "ALL "); 2818 else 2819 seq_printf(seq, "%04x", ntohs(pt->type)); 2820 2821 seq_printf(seq, " %-8s %pF\n", 2822 pt->dev ? pt->dev->name : "", pt->func); 2823 } 2824 2825 return 0; 2826 } 2827 2828 static const struct seq_operations ptype_seq_ops = { 2829 .start = ptype_seq_start, 2830 .next = ptype_seq_next, 2831 .stop = ptype_seq_stop, 2832 .show = ptype_seq_show, 2833 }; 2834 2835 static int ptype_seq_open(struct inode *inode, struct file *file) 2836 { 2837 return seq_open_net(inode, file, &ptype_seq_ops, 2838 sizeof(struct seq_net_private)); 2839 } 2840 2841 static const struct file_operations ptype_seq_fops = { 2842 .owner = THIS_MODULE, 2843 .open = ptype_seq_open, 2844 .read = seq_read, 2845 .llseek = seq_lseek, 2846 .release = seq_release_net, 2847 }; 2848 2849 2850 static int __net_init dev_proc_net_init(struct net *net) 2851 { 2852 int rc = -ENOMEM; 2853 2854 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) 2855 goto out; 2856 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) 2857 goto out_dev; 2858 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) 2859 goto out_softnet; 2860 2861 if (wext_proc_init(net)) 2862 goto out_ptype; 2863 rc = 0; 2864 out: 2865 return rc; 2866 out_ptype: 2867 proc_net_remove(net, "ptype"); 2868 out_softnet: 2869 proc_net_remove(net, "softnet_stat"); 2870 out_dev: 2871 proc_net_remove(net, "dev"); 2872 goto out; 2873 } 2874 2875 static void __net_exit dev_proc_net_exit(struct net *net) 2876 { 2877 wext_proc_exit(net); 2878 2879 proc_net_remove(net, "ptype"); 2880 proc_net_remove(net, "softnet_stat"); 2881 proc_net_remove(net, "dev"); 2882 } 2883 2884 static struct pernet_operations __net_initdata dev_proc_ops = { 2885 .init = dev_proc_net_init, 2886 .exit = dev_proc_net_exit, 2887 }; 2888 2889 static int __init dev_proc_init(void) 2890 { 2891 return register_pernet_subsys(&dev_proc_ops); 2892 } 2893 #else 2894 #define dev_proc_init() 0 2895 #endif /* CONFIG_PROC_FS */ 2896 2897 2898 /** 2899 * netdev_set_master - set up master/slave pair 2900 * @slave: slave device 2901 * @master: new master device 2902 * 2903 * Changes the master device of the slave. Pass %NULL to break the 2904 * bonding. The caller must hold the RTNL semaphore. On a failure 2905 * a negative errno code is returned. On success the reference counts 2906 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the 2907 * function returns zero. 2908 */ 2909 int netdev_set_master(struct net_device *slave, struct net_device *master) 2910 { 2911 struct net_device *old = slave->master; 2912 2913 ASSERT_RTNL(); 2914 2915 if (master) { 2916 if (old) 2917 return -EBUSY; 2918 dev_hold(master); 2919 } 2920 2921 slave->master = master; 2922 2923 synchronize_net(); 2924 2925 if (old) 2926 dev_put(old); 2927 2928 if (master) 2929 slave->flags |= IFF_SLAVE; 2930 else 2931 slave->flags &= ~IFF_SLAVE; 2932 2933 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); 2934 return 0; 2935 } 2936 2937 static void dev_change_rx_flags(struct net_device *dev, int flags) 2938 { 2939 const struct net_device_ops *ops = dev->netdev_ops; 2940 2941 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) 2942 ops->ndo_change_rx_flags(dev, flags); 2943 } 2944 2945 static int __dev_set_promiscuity(struct net_device *dev, int inc) 2946 { 2947 unsigned short old_flags = dev->flags; 2948 2949 ASSERT_RTNL(); 2950 2951 dev->flags |= IFF_PROMISC; 2952 dev->promiscuity += inc; 2953 if (dev->promiscuity == 0) { 2954 /* 2955 * Avoid overflow. 2956 * If inc causes overflow, untouch promisc and return error. 2957 */ 2958 if (inc < 0) 2959 dev->flags &= ~IFF_PROMISC; 2960 else { 2961 dev->promiscuity -= inc; 2962 printk(KERN_WARNING "%s: promiscuity touches roof, " 2963 "set promiscuity failed, promiscuity feature " 2964 "of device might be broken.\n", dev->name); 2965 return -EOVERFLOW; 2966 } 2967 } 2968 if (dev->flags != old_flags) { 2969 printk(KERN_INFO "device %s %s promiscuous mode\n", 2970 dev->name, (dev->flags & IFF_PROMISC) ? "entered" : 2971 "left"); 2972 if (audit_enabled) 2973 audit_log(current->audit_context, GFP_ATOMIC, 2974 AUDIT_ANOM_PROMISCUOUS, 2975 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", 2976 dev->name, (dev->flags & IFF_PROMISC), 2977 (old_flags & IFF_PROMISC), 2978 audit_get_loginuid(current), 2979 current->uid, current->gid, 2980 audit_get_sessionid(current)); 2981 2982 dev_change_rx_flags(dev, IFF_PROMISC); 2983 } 2984 return 0; 2985 } 2986 2987 /** 2988 * dev_set_promiscuity - update promiscuity count on a device 2989 * @dev: device 2990 * @inc: modifier 2991 * 2992 * Add or remove promiscuity from a device. While the count in the device 2993 * remains above zero the interface remains promiscuous. Once it hits zero 2994 * the device reverts back to normal filtering operation. A negative inc 2995 * value is used to drop promiscuity on the device. 2996 * Return 0 if successful or a negative errno code on error. 2997 */ 2998 int dev_set_promiscuity(struct net_device *dev, int inc) 2999 { 3000 unsigned short old_flags = dev->flags; 3001 int err; 3002 3003 err = __dev_set_promiscuity(dev, inc); 3004 if (err < 0) 3005 return err; 3006 if (dev->flags != old_flags) 3007 dev_set_rx_mode(dev); 3008 return err; 3009 } 3010 3011 /** 3012 * dev_set_allmulti - update allmulti count on a device 3013 * @dev: device 3014 * @inc: modifier 3015 * 3016 * Add or remove reception of all multicast frames to a device. While the 3017 * count in the device remains above zero the interface remains listening 3018 * to all interfaces. Once it hits zero the device reverts back to normal 3019 * filtering operation. A negative @inc value is used to drop the counter 3020 * when releasing a resource needing all multicasts. 3021 * Return 0 if successful or a negative errno code on error. 3022 */ 3023 3024 int dev_set_allmulti(struct net_device *dev, int inc) 3025 { 3026 unsigned short old_flags = dev->flags; 3027 3028 ASSERT_RTNL(); 3029 3030 dev->flags |= IFF_ALLMULTI; 3031 dev->allmulti += inc; 3032 if (dev->allmulti == 0) { 3033 /* 3034 * Avoid overflow. 3035 * If inc causes overflow, untouch allmulti and return error. 3036 */ 3037 if (inc < 0) 3038 dev->flags &= ~IFF_ALLMULTI; 3039 else { 3040 dev->allmulti -= inc; 3041 printk(KERN_WARNING "%s: allmulti touches roof, " 3042 "set allmulti failed, allmulti feature of " 3043 "device might be broken.\n", dev->name); 3044 return -EOVERFLOW; 3045 } 3046 } 3047 if (dev->flags ^ old_flags) { 3048 dev_change_rx_flags(dev, IFF_ALLMULTI); 3049 dev_set_rx_mode(dev); 3050 } 3051 return 0; 3052 } 3053 3054 /* 3055 * Upload unicast and multicast address lists to device and 3056 * configure RX filtering. When the device doesn't support unicast 3057 * filtering it is put in promiscuous mode while unicast addresses 3058 * are present. 3059 */ 3060 void __dev_set_rx_mode(struct net_device *dev) 3061 { 3062 const struct net_device_ops *ops = dev->netdev_ops; 3063 3064 /* dev_open will call this function so the list will stay sane. */ 3065 if (!(dev->flags&IFF_UP)) 3066 return; 3067 3068 if (!netif_device_present(dev)) 3069 return; 3070 3071 if (ops->ndo_set_rx_mode) 3072 ops->ndo_set_rx_mode(dev); 3073 else { 3074 /* Unicast addresses changes may only happen under the rtnl, 3075 * therefore calling __dev_set_promiscuity here is safe. 3076 */ 3077 if (dev->uc_count > 0 && !dev->uc_promisc) { 3078 __dev_set_promiscuity(dev, 1); 3079 dev->uc_promisc = 1; 3080 } else if (dev->uc_count == 0 && dev->uc_promisc) { 3081 __dev_set_promiscuity(dev, -1); 3082 dev->uc_promisc = 0; 3083 } 3084 3085 if (ops->ndo_set_multicast_list) 3086 ops->ndo_set_multicast_list(dev); 3087 } 3088 } 3089 3090 void dev_set_rx_mode(struct net_device *dev) 3091 { 3092 netif_addr_lock_bh(dev); 3093 __dev_set_rx_mode(dev); 3094 netif_addr_unlock_bh(dev); 3095 } 3096 3097 int __dev_addr_delete(struct dev_addr_list **list, int *count, 3098 void *addr, int alen, int glbl) 3099 { 3100 struct dev_addr_list *da; 3101 3102 for (; (da = *list) != NULL; list = &da->next) { 3103 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && 3104 alen == da->da_addrlen) { 3105 if (glbl) { 3106 int old_glbl = da->da_gusers; 3107 da->da_gusers = 0; 3108 if (old_glbl == 0) 3109 break; 3110 } 3111 if (--da->da_users) 3112 return 0; 3113 3114 *list = da->next; 3115 kfree(da); 3116 (*count)--; 3117 return 0; 3118 } 3119 } 3120 return -ENOENT; 3121 } 3122 3123 int __dev_addr_add(struct dev_addr_list **list, int *count, 3124 void *addr, int alen, int glbl) 3125 { 3126 struct dev_addr_list *da; 3127 3128 for (da = *list; da != NULL; da = da->next) { 3129 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && 3130 da->da_addrlen == alen) { 3131 if (glbl) { 3132 int old_glbl = da->da_gusers; 3133 da->da_gusers = 1; 3134 if (old_glbl) 3135 return 0; 3136 } 3137 da->da_users++; 3138 return 0; 3139 } 3140 } 3141 3142 da = kzalloc(sizeof(*da), GFP_ATOMIC); 3143 if (da == NULL) 3144 return -ENOMEM; 3145 memcpy(da->da_addr, addr, alen); 3146 da->da_addrlen = alen; 3147 da->da_users = 1; 3148 da->da_gusers = glbl ? 1 : 0; 3149 da->next = *list; 3150 *list = da; 3151 (*count)++; 3152 return 0; 3153 } 3154 3155 /** 3156 * dev_unicast_delete - Release secondary unicast address. 3157 * @dev: device 3158 * @addr: address to delete 3159 * @alen: length of @addr 3160 * 3161 * Release reference to a secondary unicast address and remove it 3162 * from the device if the reference count drops to zero. 3163 * 3164 * The caller must hold the rtnl_mutex. 3165 */ 3166 int dev_unicast_delete(struct net_device *dev, void *addr, int alen) 3167 { 3168 int err; 3169 3170 ASSERT_RTNL(); 3171 3172 netif_addr_lock_bh(dev); 3173 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); 3174 if (!err) 3175 __dev_set_rx_mode(dev); 3176 netif_addr_unlock_bh(dev); 3177 return err; 3178 } 3179 EXPORT_SYMBOL(dev_unicast_delete); 3180 3181 /** 3182 * dev_unicast_add - add a secondary unicast address 3183 * @dev: device 3184 * @addr: address to add 3185 * @alen: length of @addr 3186 * 3187 * Add a secondary unicast address to the device or increase 3188 * the reference count if it already exists. 3189 * 3190 * The caller must hold the rtnl_mutex. 3191 */ 3192 int dev_unicast_add(struct net_device *dev, void *addr, int alen) 3193 { 3194 int err; 3195 3196 ASSERT_RTNL(); 3197 3198 netif_addr_lock_bh(dev); 3199 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); 3200 if (!err) 3201 __dev_set_rx_mode(dev); 3202 netif_addr_unlock_bh(dev); 3203 return err; 3204 } 3205 EXPORT_SYMBOL(dev_unicast_add); 3206 3207 int __dev_addr_sync(struct dev_addr_list **to, int *to_count, 3208 struct dev_addr_list **from, int *from_count) 3209 { 3210 struct dev_addr_list *da, *next; 3211 int err = 0; 3212 3213 da = *from; 3214 while (da != NULL) { 3215 next = da->next; 3216 if (!da->da_synced) { 3217 err = __dev_addr_add(to, to_count, 3218 da->da_addr, da->da_addrlen, 0); 3219 if (err < 0) 3220 break; 3221 da->da_synced = 1; 3222 da->da_users++; 3223 } else if (da->da_users == 1) { 3224 __dev_addr_delete(to, to_count, 3225 da->da_addr, da->da_addrlen, 0); 3226 __dev_addr_delete(from, from_count, 3227 da->da_addr, da->da_addrlen, 0); 3228 } 3229 da = next; 3230 } 3231 return err; 3232 } 3233 3234 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, 3235 struct dev_addr_list **from, int *from_count) 3236 { 3237 struct dev_addr_list *da, *next; 3238 3239 da = *from; 3240 while (da != NULL) { 3241 next = da->next; 3242 if (da->da_synced) { 3243 __dev_addr_delete(to, to_count, 3244 da->da_addr, da->da_addrlen, 0); 3245 da->da_synced = 0; 3246 __dev_addr_delete(from, from_count, 3247 da->da_addr, da->da_addrlen, 0); 3248 } 3249 da = next; 3250 } 3251 } 3252 3253 /** 3254 * dev_unicast_sync - Synchronize device's unicast list to another device 3255 * @to: destination device 3256 * @from: source device 3257 * 3258 * Add newly added addresses to the destination device and release 3259 * addresses that have no users left. The source device must be 3260 * locked by netif_tx_lock_bh. 3261 * 3262 * This function is intended to be called from the dev->set_rx_mode 3263 * function of layered software devices. 3264 */ 3265 int dev_unicast_sync(struct net_device *to, struct net_device *from) 3266 { 3267 int err = 0; 3268 3269 netif_addr_lock_bh(to); 3270 err = __dev_addr_sync(&to->uc_list, &to->uc_count, 3271 &from->uc_list, &from->uc_count); 3272 if (!err) 3273 __dev_set_rx_mode(to); 3274 netif_addr_unlock_bh(to); 3275 return err; 3276 } 3277 EXPORT_SYMBOL(dev_unicast_sync); 3278 3279 /** 3280 * dev_unicast_unsync - Remove synchronized addresses from the destination device 3281 * @to: destination device 3282 * @from: source device 3283 * 3284 * Remove all addresses that were added to the destination device by 3285 * dev_unicast_sync(). This function is intended to be called from the 3286 * dev->stop function of layered software devices. 3287 */ 3288 void dev_unicast_unsync(struct net_device *to, struct net_device *from) 3289 { 3290 netif_addr_lock_bh(from); 3291 netif_addr_lock(to); 3292 3293 __dev_addr_unsync(&to->uc_list, &to->uc_count, 3294 &from->uc_list, &from->uc_count); 3295 __dev_set_rx_mode(to); 3296 3297 netif_addr_unlock(to); 3298 netif_addr_unlock_bh(from); 3299 } 3300 EXPORT_SYMBOL(dev_unicast_unsync); 3301 3302 static void __dev_addr_discard(struct dev_addr_list **list) 3303 { 3304 struct dev_addr_list *tmp; 3305 3306 while (*list != NULL) { 3307 tmp = *list; 3308 *list = tmp->next; 3309 if (tmp->da_users > tmp->da_gusers) 3310 printk("__dev_addr_discard: address leakage! " 3311 "da_users=%d\n", tmp->da_users); 3312 kfree(tmp); 3313 } 3314 } 3315 3316 static void dev_addr_discard(struct net_device *dev) 3317 { 3318 netif_addr_lock_bh(dev); 3319 3320 __dev_addr_discard(&dev->uc_list); 3321 dev->uc_count = 0; 3322 3323 __dev_addr_discard(&dev->mc_list); 3324 dev->mc_count = 0; 3325 3326 netif_addr_unlock_bh(dev); 3327 } 3328 3329 /** 3330 * dev_get_flags - get flags reported to userspace 3331 * @dev: device 3332 * 3333 * Get the combination of flag bits exported through APIs to userspace. 3334 */ 3335 unsigned dev_get_flags(const struct net_device *dev) 3336 { 3337 unsigned flags; 3338 3339 flags = (dev->flags & ~(IFF_PROMISC | 3340 IFF_ALLMULTI | 3341 IFF_RUNNING | 3342 IFF_LOWER_UP | 3343 IFF_DORMANT)) | 3344 (dev->gflags & (IFF_PROMISC | 3345 IFF_ALLMULTI)); 3346 3347 if (netif_running(dev)) { 3348 if (netif_oper_up(dev)) 3349 flags |= IFF_RUNNING; 3350 if (netif_carrier_ok(dev)) 3351 flags |= IFF_LOWER_UP; 3352 if (netif_dormant(dev)) 3353 flags |= IFF_DORMANT; 3354 } 3355 3356 return flags; 3357 } 3358 3359 /** 3360 * dev_change_flags - change device settings 3361 * @dev: device 3362 * @flags: device state flags 3363 * 3364 * Change settings on device based state flags. The flags are 3365 * in the userspace exported format. 3366 */ 3367 int dev_change_flags(struct net_device *dev, unsigned flags) 3368 { 3369 int ret, changes; 3370 int old_flags = dev->flags; 3371 3372 ASSERT_RTNL(); 3373 3374 /* 3375 * Set the flags on our device. 3376 */ 3377 3378 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | 3379 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | 3380 IFF_AUTOMEDIA)) | 3381 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | 3382 IFF_ALLMULTI)); 3383 3384 /* 3385 * Load in the correct multicast list now the flags have changed. 3386 */ 3387 3388 if ((old_flags ^ flags) & IFF_MULTICAST) 3389 dev_change_rx_flags(dev, IFF_MULTICAST); 3390 3391 dev_set_rx_mode(dev); 3392 3393 /* 3394 * Have we downed the interface. We handle IFF_UP ourselves 3395 * according to user attempts to set it, rather than blindly 3396 * setting it. 3397 */ 3398 3399 ret = 0; 3400 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ 3401 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); 3402 3403 if (!ret) 3404 dev_set_rx_mode(dev); 3405 } 3406 3407 if (dev->flags & IFF_UP && 3408 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | 3409 IFF_VOLATILE))) 3410 call_netdevice_notifiers(NETDEV_CHANGE, dev); 3411 3412 if ((flags ^ dev->gflags) & IFF_PROMISC) { 3413 int inc = (flags & IFF_PROMISC) ? +1 : -1; 3414 dev->gflags ^= IFF_PROMISC; 3415 dev_set_promiscuity(dev, inc); 3416 } 3417 3418 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI 3419 is important. Some (broken) drivers set IFF_PROMISC, when 3420 IFF_ALLMULTI is requested not asking us and not reporting. 3421 */ 3422 if ((flags ^ dev->gflags) & IFF_ALLMULTI) { 3423 int inc = (flags & IFF_ALLMULTI) ? +1 : -1; 3424 dev->gflags ^= IFF_ALLMULTI; 3425 dev_set_allmulti(dev, inc); 3426 } 3427 3428 /* Exclude state transition flags, already notified */ 3429 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); 3430 if (changes) 3431 rtmsg_ifinfo(RTM_NEWLINK, dev, changes); 3432 3433 return ret; 3434 } 3435 3436 /** 3437 * dev_set_mtu - Change maximum transfer unit 3438 * @dev: device 3439 * @new_mtu: new transfer unit 3440 * 3441 * Change the maximum transfer size of the network device. 3442 */ 3443 int dev_set_mtu(struct net_device *dev, int new_mtu) 3444 { 3445 const struct net_device_ops *ops = dev->netdev_ops; 3446 int err; 3447 3448 if (new_mtu == dev->mtu) 3449 return 0; 3450 3451 /* MTU must be positive. */ 3452 if (new_mtu < 0) 3453 return -EINVAL; 3454 3455 if (!netif_device_present(dev)) 3456 return -ENODEV; 3457 3458 err = 0; 3459 if (ops->ndo_change_mtu) 3460 err = ops->ndo_change_mtu(dev, new_mtu); 3461 else 3462 dev->mtu = new_mtu; 3463 3464 if (!err && dev->flags & IFF_UP) 3465 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); 3466 return err; 3467 } 3468 3469 /** 3470 * dev_set_mac_address - Change Media Access Control Address 3471 * @dev: device 3472 * @sa: new address 3473 * 3474 * Change the hardware (MAC) address of the device 3475 */ 3476 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) 3477 { 3478 const struct net_device_ops *ops = dev->netdev_ops; 3479 int err; 3480 3481 if (!ops->ndo_set_mac_address) 3482 return -EOPNOTSUPP; 3483 if (sa->sa_family != dev->type) 3484 return -EINVAL; 3485 if (!netif_device_present(dev)) 3486 return -ENODEV; 3487 err = ops->ndo_set_mac_address(dev, sa); 3488 if (!err) 3489 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 3490 return err; 3491 } 3492 3493 /* 3494 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) 3495 */ 3496 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) 3497 { 3498 int err; 3499 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 3500 3501 if (!dev) 3502 return -ENODEV; 3503 3504 switch (cmd) { 3505 case SIOCGIFFLAGS: /* Get interface flags */ 3506 ifr->ifr_flags = dev_get_flags(dev); 3507 return 0; 3508 3509 case SIOCGIFMETRIC: /* Get the metric on the interface 3510 (currently unused) */ 3511 ifr->ifr_metric = 0; 3512 return 0; 3513 3514 case SIOCGIFMTU: /* Get the MTU of a device */ 3515 ifr->ifr_mtu = dev->mtu; 3516 return 0; 3517 3518 case SIOCGIFHWADDR: 3519 if (!dev->addr_len) 3520 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); 3521 else 3522 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, 3523 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 3524 ifr->ifr_hwaddr.sa_family = dev->type; 3525 return 0; 3526 3527 case SIOCGIFSLAVE: 3528 err = -EINVAL; 3529 break; 3530 3531 case SIOCGIFMAP: 3532 ifr->ifr_map.mem_start = dev->mem_start; 3533 ifr->ifr_map.mem_end = dev->mem_end; 3534 ifr->ifr_map.base_addr = dev->base_addr; 3535 ifr->ifr_map.irq = dev->irq; 3536 ifr->ifr_map.dma = dev->dma; 3537 ifr->ifr_map.port = dev->if_port; 3538 return 0; 3539 3540 case SIOCGIFINDEX: 3541 ifr->ifr_ifindex = dev->ifindex; 3542 return 0; 3543 3544 case SIOCGIFTXQLEN: 3545 ifr->ifr_qlen = dev->tx_queue_len; 3546 return 0; 3547 3548 default: 3549 /* dev_ioctl() should ensure this case 3550 * is never reached 3551 */ 3552 WARN_ON(1); 3553 err = -EINVAL; 3554 break; 3555 3556 } 3557 return err; 3558 } 3559 3560 /* 3561 * Perform the SIOCxIFxxx calls, inside rtnl_lock() 3562 */ 3563 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) 3564 { 3565 int err; 3566 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 3567 const struct net_device_ops *ops = dev->netdev_ops; 3568 3569 if (!dev) 3570 return -ENODEV; 3571 3572 switch (cmd) { 3573 case SIOCSIFFLAGS: /* Set interface flags */ 3574 return dev_change_flags(dev, ifr->ifr_flags); 3575 3576 case SIOCSIFMETRIC: /* Set the metric on the interface 3577 (currently unused) */ 3578 return -EOPNOTSUPP; 3579 3580 case SIOCSIFMTU: /* Set the MTU of a device */ 3581 return dev_set_mtu(dev, ifr->ifr_mtu); 3582 3583 case SIOCSIFHWADDR: 3584 return dev_set_mac_address(dev, &ifr->ifr_hwaddr); 3585 3586 case SIOCSIFHWBROADCAST: 3587 if (ifr->ifr_hwaddr.sa_family != dev->type) 3588 return -EINVAL; 3589 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, 3590 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 3591 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 3592 return 0; 3593 3594 case SIOCSIFMAP: 3595 if (ops->ndo_set_config) { 3596 if (!netif_device_present(dev)) 3597 return -ENODEV; 3598 return ops->ndo_set_config(dev, &ifr->ifr_map); 3599 } 3600 return -EOPNOTSUPP; 3601 3602 case SIOCADDMULTI: 3603 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 3604 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 3605 return -EINVAL; 3606 if (!netif_device_present(dev)) 3607 return -ENODEV; 3608 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, 3609 dev->addr_len, 1); 3610 3611 case SIOCDELMULTI: 3612 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 3613 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 3614 return -EINVAL; 3615 if (!netif_device_present(dev)) 3616 return -ENODEV; 3617 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, 3618 dev->addr_len, 1); 3619 3620 case SIOCSIFTXQLEN: 3621 if (ifr->ifr_qlen < 0) 3622 return -EINVAL; 3623 dev->tx_queue_len = ifr->ifr_qlen; 3624 return 0; 3625 3626 case SIOCSIFNAME: 3627 ifr->ifr_newname[IFNAMSIZ-1] = '\0'; 3628 return dev_change_name(dev, ifr->ifr_newname); 3629 3630 /* 3631 * Unknown or private ioctl 3632 */ 3633 3634 default: 3635 if ((cmd >= SIOCDEVPRIVATE && 3636 cmd <= SIOCDEVPRIVATE + 15) || 3637 cmd == SIOCBONDENSLAVE || 3638 cmd == SIOCBONDRELEASE || 3639 cmd == SIOCBONDSETHWADDR || 3640 cmd == SIOCBONDSLAVEINFOQUERY || 3641 cmd == SIOCBONDINFOQUERY || 3642 cmd == SIOCBONDCHANGEACTIVE || 3643 cmd == SIOCGMIIPHY || 3644 cmd == SIOCGMIIREG || 3645 cmd == SIOCSMIIREG || 3646 cmd == SIOCBRADDIF || 3647 cmd == SIOCBRDELIF || 3648 cmd == SIOCWANDEV) { 3649 err = -EOPNOTSUPP; 3650 if (ops->ndo_do_ioctl) { 3651 if (netif_device_present(dev)) 3652 err = ops->ndo_do_ioctl(dev, ifr, cmd); 3653 else 3654 err = -ENODEV; 3655 } 3656 } else 3657 err = -EINVAL; 3658 3659 } 3660 return err; 3661 } 3662 3663 /* 3664 * This function handles all "interface"-type I/O control requests. The actual 3665 * 'doing' part of this is dev_ifsioc above. 3666 */ 3667 3668 /** 3669 * dev_ioctl - network device ioctl 3670 * @net: the applicable net namespace 3671 * @cmd: command to issue 3672 * @arg: pointer to a struct ifreq in user space 3673 * 3674 * Issue ioctl functions to devices. This is normally called by the 3675 * user space syscall interfaces but can sometimes be useful for 3676 * other purposes. The return value is the return from the syscall if 3677 * positive or a negative errno code on error. 3678 */ 3679 3680 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) 3681 { 3682 struct ifreq ifr; 3683 int ret; 3684 char *colon; 3685 3686 /* One special case: SIOCGIFCONF takes ifconf argument 3687 and requires shared lock, because it sleeps writing 3688 to user space. 3689 */ 3690 3691 if (cmd == SIOCGIFCONF) { 3692 rtnl_lock(); 3693 ret = dev_ifconf(net, (char __user *) arg); 3694 rtnl_unlock(); 3695 return ret; 3696 } 3697 if (cmd == SIOCGIFNAME) 3698 return dev_ifname(net, (struct ifreq __user *)arg); 3699 3700 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 3701 return -EFAULT; 3702 3703 ifr.ifr_name[IFNAMSIZ-1] = 0; 3704 3705 colon = strchr(ifr.ifr_name, ':'); 3706 if (colon) 3707 *colon = 0; 3708 3709 /* 3710 * See which interface the caller is talking about. 3711 */ 3712 3713 switch (cmd) { 3714 /* 3715 * These ioctl calls: 3716 * - can be done by all. 3717 * - atomic and do not require locking. 3718 * - return a value 3719 */ 3720 case SIOCGIFFLAGS: 3721 case SIOCGIFMETRIC: 3722 case SIOCGIFMTU: 3723 case SIOCGIFHWADDR: 3724 case SIOCGIFSLAVE: 3725 case SIOCGIFMAP: 3726 case SIOCGIFINDEX: 3727 case SIOCGIFTXQLEN: 3728 dev_load(net, ifr.ifr_name); 3729 read_lock(&dev_base_lock); 3730 ret = dev_ifsioc_locked(net, &ifr, cmd); 3731 read_unlock(&dev_base_lock); 3732 if (!ret) { 3733 if (colon) 3734 *colon = ':'; 3735 if (copy_to_user(arg, &ifr, 3736 sizeof(struct ifreq))) 3737 ret = -EFAULT; 3738 } 3739 return ret; 3740 3741 case SIOCETHTOOL: 3742 dev_load(net, ifr.ifr_name); 3743 rtnl_lock(); 3744 ret = dev_ethtool(net, &ifr); 3745 rtnl_unlock(); 3746 if (!ret) { 3747 if (colon) 3748 *colon = ':'; 3749 if (copy_to_user(arg, &ifr, 3750 sizeof(struct ifreq))) 3751 ret = -EFAULT; 3752 } 3753 return ret; 3754 3755 /* 3756 * These ioctl calls: 3757 * - require superuser power. 3758 * - require strict serialization. 3759 * - return a value 3760 */ 3761 case SIOCGMIIPHY: 3762 case SIOCGMIIREG: 3763 case SIOCSIFNAME: 3764 if (!capable(CAP_NET_ADMIN)) 3765 return -EPERM; 3766 dev_load(net, ifr.ifr_name); 3767 rtnl_lock(); 3768 ret = dev_ifsioc(net, &ifr, cmd); 3769 rtnl_unlock(); 3770 if (!ret) { 3771 if (colon) 3772 *colon = ':'; 3773 if (copy_to_user(arg, &ifr, 3774 sizeof(struct ifreq))) 3775 ret = -EFAULT; 3776 } 3777 return ret; 3778 3779 /* 3780 * These ioctl calls: 3781 * - require superuser power. 3782 * - require strict serialization. 3783 * - do not return a value 3784 */ 3785 case SIOCSIFFLAGS: 3786 case SIOCSIFMETRIC: 3787 case SIOCSIFMTU: 3788 case SIOCSIFMAP: 3789 case SIOCSIFHWADDR: 3790 case SIOCSIFSLAVE: 3791 case SIOCADDMULTI: 3792 case SIOCDELMULTI: 3793 case SIOCSIFHWBROADCAST: 3794 case SIOCSIFTXQLEN: 3795 case SIOCSMIIREG: 3796 case SIOCBONDENSLAVE: 3797 case SIOCBONDRELEASE: 3798 case SIOCBONDSETHWADDR: 3799 case SIOCBONDCHANGEACTIVE: 3800 case SIOCBRADDIF: 3801 case SIOCBRDELIF: 3802 if (!capable(CAP_NET_ADMIN)) 3803 return -EPERM; 3804 /* fall through */ 3805 case SIOCBONDSLAVEINFOQUERY: 3806 case SIOCBONDINFOQUERY: 3807 dev_load(net, ifr.ifr_name); 3808 rtnl_lock(); 3809 ret = dev_ifsioc(net, &ifr, cmd); 3810 rtnl_unlock(); 3811 return ret; 3812 3813 case SIOCGIFMEM: 3814 /* Get the per device memory space. We can add this but 3815 * currently do not support it */ 3816 case SIOCSIFMEM: 3817 /* Set the per device memory buffer space. 3818 * Not applicable in our case */ 3819 case SIOCSIFLINK: 3820 return -EINVAL; 3821 3822 /* 3823 * Unknown or private ioctl. 3824 */ 3825 default: 3826 if (cmd == SIOCWANDEV || 3827 (cmd >= SIOCDEVPRIVATE && 3828 cmd <= SIOCDEVPRIVATE + 15)) { 3829 dev_load(net, ifr.ifr_name); 3830 rtnl_lock(); 3831 ret = dev_ifsioc(net, &ifr, cmd); 3832 rtnl_unlock(); 3833 if (!ret && copy_to_user(arg, &ifr, 3834 sizeof(struct ifreq))) 3835 ret = -EFAULT; 3836 return ret; 3837 } 3838 /* Take care of Wireless Extensions */ 3839 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) 3840 return wext_handle_ioctl(net, &ifr, cmd, arg); 3841 return -EINVAL; 3842 } 3843 } 3844 3845 3846 /** 3847 * dev_new_index - allocate an ifindex 3848 * @net: the applicable net namespace 3849 * 3850 * Returns a suitable unique value for a new device interface 3851 * number. The caller must hold the rtnl semaphore or the 3852 * dev_base_lock to be sure it remains unique. 3853 */ 3854 static int dev_new_index(struct net *net) 3855 { 3856 static int ifindex; 3857 for (;;) { 3858 if (++ifindex <= 0) 3859 ifindex = 1; 3860 if (!__dev_get_by_index(net, ifindex)) 3861 return ifindex; 3862 } 3863 } 3864 3865 /* Delayed registration/unregisteration */ 3866 static LIST_HEAD(net_todo_list); 3867 3868 static void net_set_todo(struct net_device *dev) 3869 { 3870 list_add_tail(&dev->todo_list, &net_todo_list); 3871 } 3872 3873 static void rollback_registered(struct net_device *dev) 3874 { 3875 BUG_ON(dev_boot_phase); 3876 ASSERT_RTNL(); 3877 3878 /* Some devices call without registering for initialization unwind. */ 3879 if (dev->reg_state == NETREG_UNINITIALIZED) { 3880 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " 3881 "was registered\n", dev->name, dev); 3882 3883 WARN_ON(1); 3884 return; 3885 } 3886 3887 BUG_ON(dev->reg_state != NETREG_REGISTERED); 3888 3889 /* If device is running, close it first. */ 3890 dev_close(dev); 3891 3892 /* And unlink it from device chain. */ 3893 unlist_netdevice(dev); 3894 3895 dev->reg_state = NETREG_UNREGISTERING; 3896 3897 synchronize_net(); 3898 3899 /* Shutdown queueing discipline. */ 3900 dev_shutdown(dev); 3901 3902 3903 /* Notify protocols, that we are about to destroy 3904 this device. They should clean all the things. 3905 */ 3906 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 3907 3908 /* 3909 * Flush the unicast and multicast chains 3910 */ 3911 dev_addr_discard(dev); 3912 3913 if (dev->netdev_ops->ndo_uninit) 3914 dev->netdev_ops->ndo_uninit(dev); 3915 3916 /* Notifier chain MUST detach us from master device. */ 3917 WARN_ON(dev->master); 3918 3919 /* Remove entries from kobject tree */ 3920 netdev_unregister_kobject(dev); 3921 3922 synchronize_net(); 3923 3924 dev_put(dev); 3925 } 3926 3927 static void __netdev_init_queue_locks_one(struct net_device *dev, 3928 struct netdev_queue *dev_queue, 3929 void *_unused) 3930 { 3931 spin_lock_init(&dev_queue->_xmit_lock); 3932 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); 3933 dev_queue->xmit_lock_owner = -1; 3934 } 3935 3936 static void netdev_init_queue_locks(struct net_device *dev) 3937 { 3938 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); 3939 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); 3940 } 3941 3942 unsigned long netdev_fix_features(unsigned long features, const char *name) 3943 { 3944 /* Fix illegal SG+CSUM combinations. */ 3945 if ((features & NETIF_F_SG) && 3946 !(features & NETIF_F_ALL_CSUM)) { 3947 if (name) 3948 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " 3949 "checksum feature.\n", name); 3950 features &= ~NETIF_F_SG; 3951 } 3952 3953 /* TSO requires that SG is present as well. */ 3954 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { 3955 if (name) 3956 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " 3957 "SG feature.\n", name); 3958 features &= ~NETIF_F_TSO; 3959 } 3960 3961 if (features & NETIF_F_UFO) { 3962 if (!(features & NETIF_F_GEN_CSUM)) { 3963 if (name) 3964 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 3965 "since no NETIF_F_HW_CSUM feature.\n", 3966 name); 3967 features &= ~NETIF_F_UFO; 3968 } 3969 3970 if (!(features & NETIF_F_SG)) { 3971 if (name) 3972 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 3973 "since no NETIF_F_SG feature.\n", name); 3974 features &= ~NETIF_F_UFO; 3975 } 3976 } 3977 3978 return features; 3979 } 3980 EXPORT_SYMBOL(netdev_fix_features); 3981 3982 /** 3983 * register_netdevice - register a network device 3984 * @dev: device to register 3985 * 3986 * Take a completed network device structure and add it to the kernel 3987 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 3988 * chain. 0 is returned on success. A negative errno code is returned 3989 * on a failure to set up the device, or if the name is a duplicate. 3990 * 3991 * Callers must hold the rtnl semaphore. You may want 3992 * register_netdev() instead of this. 3993 * 3994 * BUGS: 3995 * The locking appears insufficient to guarantee two parallel registers 3996 * will not get the same name. 3997 */ 3998 3999 int register_netdevice(struct net_device *dev) 4000 { 4001 struct hlist_head *head; 4002 struct hlist_node *p; 4003 int ret; 4004 struct net *net = dev_net(dev); 4005 4006 BUG_ON(dev_boot_phase); 4007 ASSERT_RTNL(); 4008 4009 might_sleep(); 4010 4011 /* When net_device's are persistent, this will be fatal. */ 4012 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); 4013 BUG_ON(!net); 4014 4015 spin_lock_init(&dev->addr_list_lock); 4016 netdev_set_addr_lockdep_class(dev); 4017 netdev_init_queue_locks(dev); 4018 4019 dev->iflink = -1; 4020 4021 #ifdef CONFIG_COMPAT_NET_DEV_OPS 4022 /* Netdevice_ops API compatiability support. 4023 * This is temporary until all network devices are converted. 4024 */ 4025 if (dev->netdev_ops) { 4026 const struct net_device_ops *ops = dev->netdev_ops; 4027 4028 dev->init = ops->ndo_init; 4029 dev->uninit = ops->ndo_uninit; 4030 dev->open = ops->ndo_open; 4031 dev->change_rx_flags = ops->ndo_change_rx_flags; 4032 dev->set_rx_mode = ops->ndo_set_rx_mode; 4033 dev->set_multicast_list = ops->ndo_set_multicast_list; 4034 dev->set_mac_address = ops->ndo_set_mac_address; 4035 dev->validate_addr = ops->ndo_validate_addr; 4036 dev->do_ioctl = ops->ndo_do_ioctl; 4037 dev->set_config = ops->ndo_set_config; 4038 dev->change_mtu = ops->ndo_change_mtu; 4039 dev->tx_timeout = ops->ndo_tx_timeout; 4040 dev->get_stats = ops->ndo_get_stats; 4041 dev->vlan_rx_register = ops->ndo_vlan_rx_register; 4042 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; 4043 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; 4044 #ifdef CONFIG_NET_POLL_CONTROLLER 4045 dev->poll_controller = ops->ndo_poll_controller; 4046 #endif 4047 } else { 4048 char drivername[64]; 4049 pr_info("%s (%s): not using net_device_ops yet\n", 4050 dev->name, netdev_drivername(dev, drivername, 64)); 4051 4052 /* This works only because net_device_ops and the 4053 compatiablity structure are the same. */ 4054 dev->netdev_ops = (void *) &(dev->init); 4055 } 4056 #endif 4057 4058 /* Init, if this function is available */ 4059 if (dev->netdev_ops->ndo_init) { 4060 ret = dev->netdev_ops->ndo_init(dev); 4061 if (ret) { 4062 if (ret > 0) 4063 ret = -EIO; 4064 goto out; 4065 } 4066 } 4067 4068 if (!dev_valid_name(dev->name)) { 4069 ret = -EINVAL; 4070 goto err_uninit; 4071 } 4072 4073 dev->ifindex = dev_new_index(net); 4074 if (dev->iflink == -1) 4075 dev->iflink = dev->ifindex; 4076 4077 /* Check for existence of name */ 4078 head = dev_name_hash(net, dev->name); 4079 hlist_for_each(p, head) { 4080 struct net_device *d 4081 = hlist_entry(p, struct net_device, name_hlist); 4082 if (!strncmp(d->name, dev->name, IFNAMSIZ)) { 4083 ret = -EEXIST; 4084 goto err_uninit; 4085 } 4086 } 4087 4088 /* Fix illegal checksum combinations */ 4089 if ((dev->features & NETIF_F_HW_CSUM) && 4090 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 4091 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", 4092 dev->name); 4093 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 4094 } 4095 4096 if ((dev->features & NETIF_F_NO_CSUM) && 4097 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 4098 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", 4099 dev->name); 4100 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); 4101 } 4102 4103 dev->features = netdev_fix_features(dev->features, dev->name); 4104 4105 /* Enable software GSO if SG is supported. */ 4106 if (dev->features & NETIF_F_SG) 4107 dev->features |= NETIF_F_GSO; 4108 4109 netdev_initialize_kobject(dev); 4110 ret = netdev_register_kobject(dev); 4111 if (ret) 4112 goto err_uninit; 4113 dev->reg_state = NETREG_REGISTERED; 4114 4115 /* 4116 * Default initial state at registry is that the 4117 * device is present. 4118 */ 4119 4120 set_bit(__LINK_STATE_PRESENT, &dev->state); 4121 4122 dev_init_scheduler(dev); 4123 dev_hold(dev); 4124 list_netdevice(dev); 4125 4126 /* Notify protocols, that a new device appeared. */ 4127 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); 4128 ret = notifier_to_errno(ret); 4129 if (ret) { 4130 rollback_registered(dev); 4131 dev->reg_state = NETREG_UNREGISTERED; 4132 } 4133 4134 out: 4135 return ret; 4136 4137 err_uninit: 4138 if (dev->netdev_ops->ndo_uninit) 4139 dev->netdev_ops->ndo_uninit(dev); 4140 goto out; 4141 } 4142 4143 /** 4144 * register_netdev - register a network device 4145 * @dev: device to register 4146 * 4147 * Take a completed network device structure and add it to the kernel 4148 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 4149 * chain. 0 is returned on success. A negative errno code is returned 4150 * on a failure to set up the device, or if the name is a duplicate. 4151 * 4152 * This is a wrapper around register_netdevice that takes the rtnl semaphore 4153 * and expands the device name if you passed a format string to 4154 * alloc_netdev. 4155 */ 4156 int register_netdev(struct net_device *dev) 4157 { 4158 int err; 4159 4160 rtnl_lock(); 4161 4162 /* 4163 * If the name is a format string the caller wants us to do a 4164 * name allocation. 4165 */ 4166 if (strchr(dev->name, '%')) { 4167 err = dev_alloc_name(dev, dev->name); 4168 if (err < 0) 4169 goto out; 4170 } 4171 4172 err = register_netdevice(dev); 4173 out: 4174 rtnl_unlock(); 4175 return err; 4176 } 4177 EXPORT_SYMBOL(register_netdev); 4178 4179 /* 4180 * netdev_wait_allrefs - wait until all references are gone. 4181 * 4182 * This is called when unregistering network devices. 4183 * 4184 * Any protocol or device that holds a reference should register 4185 * for netdevice notification, and cleanup and put back the 4186 * reference if they receive an UNREGISTER event. 4187 * We can get stuck here if buggy protocols don't correctly 4188 * call dev_put. 4189 */ 4190 static void netdev_wait_allrefs(struct net_device *dev) 4191 { 4192 unsigned long rebroadcast_time, warning_time; 4193 4194 rebroadcast_time = warning_time = jiffies; 4195 while (atomic_read(&dev->refcnt) != 0) { 4196 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 4197 rtnl_lock(); 4198 4199 /* Rebroadcast unregister notification */ 4200 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 4201 4202 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 4203 &dev->state)) { 4204 /* We must not have linkwatch events 4205 * pending on unregister. If this 4206 * happens, we simply run the queue 4207 * unscheduled, resulting in a noop 4208 * for this device. 4209 */ 4210 linkwatch_run_queue(); 4211 } 4212 4213 __rtnl_unlock(); 4214 4215 rebroadcast_time = jiffies; 4216 } 4217 4218 msleep(250); 4219 4220 if (time_after(jiffies, warning_time + 10 * HZ)) { 4221 printk(KERN_EMERG "unregister_netdevice: " 4222 "waiting for %s to become free. Usage " 4223 "count = %d\n", 4224 dev->name, atomic_read(&dev->refcnt)); 4225 warning_time = jiffies; 4226 } 4227 } 4228 } 4229 4230 /* The sequence is: 4231 * 4232 * rtnl_lock(); 4233 * ... 4234 * register_netdevice(x1); 4235 * register_netdevice(x2); 4236 * ... 4237 * unregister_netdevice(y1); 4238 * unregister_netdevice(y2); 4239 * ... 4240 * rtnl_unlock(); 4241 * free_netdev(y1); 4242 * free_netdev(y2); 4243 * 4244 * We are invoked by rtnl_unlock(). 4245 * This allows us to deal with problems: 4246 * 1) We can delete sysfs objects which invoke hotplug 4247 * without deadlocking with linkwatch via keventd. 4248 * 2) Since we run with the RTNL semaphore not held, we can sleep 4249 * safely in order to wait for the netdev refcnt to drop to zero. 4250 * 4251 * We must not return until all unregister events added during 4252 * the interval the lock was held have been completed. 4253 */ 4254 void netdev_run_todo(void) 4255 { 4256 struct list_head list; 4257 4258 /* Snapshot list, allow later requests */ 4259 list_replace_init(&net_todo_list, &list); 4260 4261 __rtnl_unlock(); 4262 4263 while (!list_empty(&list)) { 4264 struct net_device *dev 4265 = list_entry(list.next, struct net_device, todo_list); 4266 list_del(&dev->todo_list); 4267 4268 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 4269 printk(KERN_ERR "network todo '%s' but state %d\n", 4270 dev->name, dev->reg_state); 4271 dump_stack(); 4272 continue; 4273 } 4274 4275 dev->reg_state = NETREG_UNREGISTERED; 4276 4277 on_each_cpu(flush_backlog, dev, 1); 4278 4279 netdev_wait_allrefs(dev); 4280 4281 /* paranoia */ 4282 BUG_ON(atomic_read(&dev->refcnt)); 4283 WARN_ON(dev->ip_ptr); 4284 WARN_ON(dev->ip6_ptr); 4285 WARN_ON(dev->dn_ptr); 4286 4287 if (dev->destructor) 4288 dev->destructor(dev); 4289 4290 /* Free network device */ 4291 kobject_put(&dev->dev.kobj); 4292 } 4293 } 4294 4295 /** 4296 * dev_get_stats - get network device statistics 4297 * @dev: device to get statistics from 4298 * 4299 * Get network statistics from device. The device driver may provide 4300 * its own method by setting dev->netdev_ops->get_stats; otherwise 4301 * the internal statistics structure is used. 4302 */ 4303 const struct net_device_stats *dev_get_stats(struct net_device *dev) 4304 { 4305 const struct net_device_ops *ops = dev->netdev_ops; 4306 4307 if (ops->ndo_get_stats) 4308 return ops->ndo_get_stats(dev); 4309 else 4310 return &dev->stats; 4311 } 4312 EXPORT_SYMBOL(dev_get_stats); 4313 4314 static void netdev_init_one_queue(struct net_device *dev, 4315 struct netdev_queue *queue, 4316 void *_unused) 4317 { 4318 queue->dev = dev; 4319 } 4320 4321 static void netdev_init_queues(struct net_device *dev) 4322 { 4323 netdev_init_one_queue(dev, &dev->rx_queue, NULL); 4324 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 4325 spin_lock_init(&dev->tx_global_lock); 4326 } 4327 4328 /** 4329 * alloc_netdev_mq - allocate network device 4330 * @sizeof_priv: size of private data to allocate space for 4331 * @name: device name format string 4332 * @setup: callback to initialize device 4333 * @queue_count: the number of subqueues to allocate 4334 * 4335 * Allocates a struct net_device with private data area for driver use 4336 * and performs basic initialization. Also allocates subquue structs 4337 * for each queue on the device at the end of the netdevice. 4338 */ 4339 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, 4340 void (*setup)(struct net_device *), unsigned int queue_count) 4341 { 4342 struct netdev_queue *tx; 4343 struct net_device *dev; 4344 size_t alloc_size; 4345 void *p; 4346 4347 BUG_ON(strlen(name) >= sizeof(dev->name)); 4348 4349 alloc_size = sizeof(struct net_device); 4350 if (sizeof_priv) { 4351 /* ensure 32-byte alignment of private area */ 4352 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; 4353 alloc_size += sizeof_priv; 4354 } 4355 /* ensure 32-byte alignment of whole construct */ 4356 alloc_size += NETDEV_ALIGN_CONST; 4357 4358 p = kzalloc(alloc_size, GFP_KERNEL); 4359 if (!p) { 4360 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); 4361 return NULL; 4362 } 4363 4364 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); 4365 if (!tx) { 4366 printk(KERN_ERR "alloc_netdev: Unable to allocate " 4367 "tx qdiscs.\n"); 4368 kfree(p); 4369 return NULL; 4370 } 4371 4372 dev = (struct net_device *) 4373 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); 4374 dev->padded = (char *)dev - (char *)p; 4375 dev_net_set(dev, &init_net); 4376 4377 dev->_tx = tx; 4378 dev->num_tx_queues = queue_count; 4379 dev->real_num_tx_queues = queue_count; 4380 4381 if (sizeof_priv) { 4382 dev->priv = ((char *)dev + 4383 ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) 4384 & ~NETDEV_ALIGN_CONST)); 4385 } 4386 4387 dev->gso_max_size = GSO_MAX_SIZE; 4388 4389 netdev_init_queues(dev); 4390 4391 netpoll_netdev_init(dev); 4392 setup(dev); 4393 strcpy(dev->name, name); 4394 return dev; 4395 } 4396 EXPORT_SYMBOL(alloc_netdev_mq); 4397 4398 /** 4399 * free_netdev - free network device 4400 * @dev: device 4401 * 4402 * This function does the last stage of destroying an allocated device 4403 * interface. The reference to the device object is released. 4404 * If this is the last reference then it will be freed. 4405 */ 4406 void free_netdev(struct net_device *dev) 4407 { 4408 release_net(dev_net(dev)); 4409 4410 kfree(dev->_tx); 4411 4412 /* Compatibility with error handling in drivers */ 4413 if (dev->reg_state == NETREG_UNINITIALIZED) { 4414 kfree((char *)dev - dev->padded); 4415 return; 4416 } 4417 4418 BUG_ON(dev->reg_state != NETREG_UNREGISTERED); 4419 dev->reg_state = NETREG_RELEASED; 4420 4421 /* will free via device release */ 4422 put_device(&dev->dev); 4423 } 4424 4425 /** 4426 * synchronize_net - Synchronize with packet receive processing 4427 * 4428 * Wait for packets currently being received to be done. 4429 * Does not block later packets from starting. 4430 */ 4431 void synchronize_net(void) 4432 { 4433 might_sleep(); 4434 synchronize_rcu(); 4435 } 4436 4437 /** 4438 * unregister_netdevice - remove device from the kernel 4439 * @dev: device 4440 * 4441 * This function shuts down a device interface and removes it 4442 * from the kernel tables. 4443 * 4444 * Callers must hold the rtnl semaphore. You may want 4445 * unregister_netdev() instead of this. 4446 */ 4447 4448 void unregister_netdevice(struct net_device *dev) 4449 { 4450 ASSERT_RTNL(); 4451 4452 rollback_registered(dev); 4453 /* Finish processing unregister after unlock */ 4454 net_set_todo(dev); 4455 } 4456 4457 /** 4458 * unregister_netdev - remove device from the kernel 4459 * @dev: device 4460 * 4461 * This function shuts down a device interface and removes it 4462 * from the kernel tables. 4463 * 4464 * This is just a wrapper for unregister_netdevice that takes 4465 * the rtnl semaphore. In general you want to use this and not 4466 * unregister_netdevice. 4467 */ 4468 void unregister_netdev(struct net_device *dev) 4469 { 4470 rtnl_lock(); 4471 unregister_netdevice(dev); 4472 rtnl_unlock(); 4473 } 4474 4475 EXPORT_SYMBOL(unregister_netdev); 4476 4477 /** 4478 * dev_change_net_namespace - move device to different nethost namespace 4479 * @dev: device 4480 * @net: network namespace 4481 * @pat: If not NULL name pattern to try if the current device name 4482 * is already taken in the destination network namespace. 4483 * 4484 * This function shuts down a device interface and moves it 4485 * to a new network namespace. On success 0 is returned, on 4486 * a failure a netagive errno code is returned. 4487 * 4488 * Callers must hold the rtnl semaphore. 4489 */ 4490 4491 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 4492 { 4493 char buf[IFNAMSIZ]; 4494 const char *destname; 4495 int err; 4496 4497 ASSERT_RTNL(); 4498 4499 /* Don't allow namespace local devices to be moved. */ 4500 err = -EINVAL; 4501 if (dev->features & NETIF_F_NETNS_LOCAL) 4502 goto out; 4503 4504 #ifdef CONFIG_SYSFS 4505 /* Don't allow real devices to be moved when sysfs 4506 * is enabled. 4507 */ 4508 err = -EINVAL; 4509 if (dev->dev.parent) 4510 goto out; 4511 #endif 4512 4513 /* Ensure the device has been registrered */ 4514 err = -EINVAL; 4515 if (dev->reg_state != NETREG_REGISTERED) 4516 goto out; 4517 4518 /* Get out if there is nothing todo */ 4519 err = 0; 4520 if (net_eq(dev_net(dev), net)) 4521 goto out; 4522 4523 /* Pick the destination device name, and ensure 4524 * we can use it in the destination network namespace. 4525 */ 4526 err = -EEXIST; 4527 destname = dev->name; 4528 if (__dev_get_by_name(net, destname)) { 4529 /* We get here if we can't use the current device name */ 4530 if (!pat) 4531 goto out; 4532 if (!dev_valid_name(pat)) 4533 goto out; 4534 if (strchr(pat, '%')) { 4535 if (__dev_alloc_name(net, pat, buf) < 0) 4536 goto out; 4537 destname = buf; 4538 } else 4539 destname = pat; 4540 if (__dev_get_by_name(net, destname)) 4541 goto out; 4542 } 4543 4544 /* 4545 * And now a mini version of register_netdevice unregister_netdevice. 4546 */ 4547 4548 /* If device is running close it first. */ 4549 dev_close(dev); 4550 4551 /* And unlink it from device chain */ 4552 err = -ENODEV; 4553 unlist_netdevice(dev); 4554 4555 synchronize_net(); 4556 4557 /* Shutdown queueing discipline. */ 4558 dev_shutdown(dev); 4559 4560 /* Notify protocols, that we are about to destroy 4561 this device. They should clean all the things. 4562 */ 4563 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 4564 4565 /* 4566 * Flush the unicast and multicast chains 4567 */ 4568 dev_addr_discard(dev); 4569 4570 netdev_unregister_kobject(dev); 4571 4572 /* Actually switch the network namespace */ 4573 dev_net_set(dev, net); 4574 4575 /* Assign the new device name */ 4576 if (destname != dev->name) 4577 strcpy(dev->name, destname); 4578 4579 /* If there is an ifindex conflict assign a new one */ 4580 if (__dev_get_by_index(net, dev->ifindex)) { 4581 int iflink = (dev->iflink == dev->ifindex); 4582 dev->ifindex = dev_new_index(net); 4583 if (iflink) 4584 dev->iflink = dev->ifindex; 4585 } 4586 4587 /* Fixup kobjects */ 4588 err = netdev_register_kobject(dev); 4589 WARN_ON(err); 4590 4591 /* Add the device back in the hashes */ 4592 list_netdevice(dev); 4593 4594 /* Notify protocols, that a new device appeared. */ 4595 call_netdevice_notifiers(NETDEV_REGISTER, dev); 4596 4597 synchronize_net(); 4598 err = 0; 4599 out: 4600 return err; 4601 } 4602 4603 static int dev_cpu_callback(struct notifier_block *nfb, 4604 unsigned long action, 4605 void *ocpu) 4606 { 4607 struct sk_buff **list_skb; 4608 struct Qdisc **list_net; 4609 struct sk_buff *skb; 4610 unsigned int cpu, oldcpu = (unsigned long)ocpu; 4611 struct softnet_data *sd, *oldsd; 4612 4613 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) 4614 return NOTIFY_OK; 4615 4616 local_irq_disable(); 4617 cpu = smp_processor_id(); 4618 sd = &per_cpu(softnet_data, cpu); 4619 oldsd = &per_cpu(softnet_data, oldcpu); 4620 4621 /* Find end of our completion_queue. */ 4622 list_skb = &sd->completion_queue; 4623 while (*list_skb) 4624 list_skb = &(*list_skb)->next; 4625 /* Append completion queue from offline CPU. */ 4626 *list_skb = oldsd->completion_queue; 4627 oldsd->completion_queue = NULL; 4628 4629 /* Find end of our output_queue. */ 4630 list_net = &sd->output_queue; 4631 while (*list_net) 4632 list_net = &(*list_net)->next_sched; 4633 /* Append output queue from offline CPU. */ 4634 *list_net = oldsd->output_queue; 4635 oldsd->output_queue = NULL; 4636 4637 raise_softirq_irqoff(NET_TX_SOFTIRQ); 4638 local_irq_enable(); 4639 4640 /* Process offline CPU's input_pkt_queue */ 4641 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) 4642 netif_rx(skb); 4643 4644 return NOTIFY_OK; 4645 } 4646 4647 #ifdef CONFIG_NET_DMA 4648 /** 4649 * net_dma_rebalance - try to maintain one DMA channel per CPU 4650 * @net_dma: DMA client and associated data (lock, channels, channel_mask) 4651 * 4652 * This is called when the number of channels allocated to the net_dma client 4653 * changes. The net_dma client tries to have one DMA channel per CPU. 4654 */ 4655 4656 static void net_dma_rebalance(struct net_dma *net_dma) 4657 { 4658 unsigned int cpu, i, n, chan_idx; 4659 struct dma_chan *chan; 4660 4661 if (cpus_empty(net_dma->channel_mask)) { 4662 for_each_online_cpu(cpu) 4663 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); 4664 return; 4665 } 4666 4667 i = 0; 4668 cpu = first_cpu(cpu_online_map); 4669 4670 for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) { 4671 chan = net_dma->channels[chan_idx]; 4672 4673 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) 4674 + (i < (num_online_cpus() % 4675 cpus_weight(net_dma->channel_mask)) ? 1 : 0)); 4676 4677 while(n) { 4678 per_cpu(softnet_data, cpu).net_dma = chan; 4679 cpu = next_cpu(cpu, cpu_online_map); 4680 n--; 4681 } 4682 i++; 4683 } 4684 } 4685 4686 /** 4687 * netdev_dma_event - event callback for the net_dma_client 4688 * @client: should always be net_dma_client 4689 * @chan: DMA channel for the event 4690 * @state: DMA state to be handled 4691 */ 4692 static enum dma_state_client 4693 netdev_dma_event(struct dma_client *client, struct dma_chan *chan, 4694 enum dma_state state) 4695 { 4696 int i, found = 0, pos = -1; 4697 struct net_dma *net_dma = 4698 container_of(client, struct net_dma, client); 4699 enum dma_state_client ack = DMA_DUP; /* default: take no action */ 4700 4701 spin_lock(&net_dma->lock); 4702 switch (state) { 4703 case DMA_RESOURCE_AVAILABLE: 4704 for (i = 0; i < nr_cpu_ids; i++) 4705 if (net_dma->channels[i] == chan) { 4706 found = 1; 4707 break; 4708 } else if (net_dma->channels[i] == NULL && pos < 0) 4709 pos = i; 4710 4711 if (!found && pos >= 0) { 4712 ack = DMA_ACK; 4713 net_dma->channels[pos] = chan; 4714 cpu_set(pos, net_dma->channel_mask); 4715 net_dma_rebalance(net_dma); 4716 } 4717 break; 4718 case DMA_RESOURCE_REMOVED: 4719 for (i = 0; i < nr_cpu_ids; i++) 4720 if (net_dma->channels[i] == chan) { 4721 found = 1; 4722 pos = i; 4723 break; 4724 } 4725 4726 if (found) { 4727 ack = DMA_ACK; 4728 cpu_clear(pos, net_dma->channel_mask); 4729 net_dma->channels[i] = NULL; 4730 net_dma_rebalance(net_dma); 4731 } 4732 break; 4733 default: 4734 break; 4735 } 4736 spin_unlock(&net_dma->lock); 4737 4738 return ack; 4739 } 4740 4741 /** 4742 * netdev_dma_register - register the networking subsystem as a DMA client 4743 */ 4744 static int __init netdev_dma_register(void) 4745 { 4746 net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma), 4747 GFP_KERNEL); 4748 if (unlikely(!net_dma.channels)) { 4749 printk(KERN_NOTICE 4750 "netdev_dma: no memory for net_dma.channels\n"); 4751 return -ENOMEM; 4752 } 4753 spin_lock_init(&net_dma.lock); 4754 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); 4755 dma_async_client_register(&net_dma.client); 4756 dma_async_client_chan_request(&net_dma.client); 4757 return 0; 4758 } 4759 4760 #else 4761 static int __init netdev_dma_register(void) { return -ENODEV; } 4762 #endif /* CONFIG_NET_DMA */ 4763 4764 /** 4765 * netdev_increment_features - increment feature set by one 4766 * @all: current feature set 4767 * @one: new feature set 4768 * @mask: mask feature set 4769 * 4770 * Computes a new feature set after adding a device with feature set 4771 * @one to the master device with current feature set @all. Will not 4772 * enable anything that is off in @mask. Returns the new feature set. 4773 */ 4774 unsigned long netdev_increment_features(unsigned long all, unsigned long one, 4775 unsigned long mask) 4776 { 4777 /* If device needs checksumming, downgrade to it. */ 4778 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) 4779 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); 4780 else if (mask & NETIF_F_ALL_CSUM) { 4781 /* If one device supports v4/v6 checksumming, set for all. */ 4782 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && 4783 !(all & NETIF_F_GEN_CSUM)) { 4784 all &= ~NETIF_F_ALL_CSUM; 4785 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); 4786 } 4787 4788 /* If one device supports hw checksumming, set for all. */ 4789 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { 4790 all &= ~NETIF_F_ALL_CSUM; 4791 all |= NETIF_F_HW_CSUM; 4792 } 4793 } 4794 4795 one |= NETIF_F_ALL_CSUM; 4796 4797 one |= all & NETIF_F_ONE_FOR_ALL; 4798 all &= one | NETIF_F_LLTX | NETIF_F_GSO; 4799 all |= one & mask & NETIF_F_ONE_FOR_ALL; 4800 4801 return all; 4802 } 4803 EXPORT_SYMBOL(netdev_increment_features); 4804 4805 static struct hlist_head *netdev_create_hash(void) 4806 { 4807 int i; 4808 struct hlist_head *hash; 4809 4810 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); 4811 if (hash != NULL) 4812 for (i = 0; i < NETDEV_HASHENTRIES; i++) 4813 INIT_HLIST_HEAD(&hash[i]); 4814 4815 return hash; 4816 } 4817 4818 /* Initialize per network namespace state */ 4819 static int __net_init netdev_init(struct net *net) 4820 { 4821 INIT_LIST_HEAD(&net->dev_base_head); 4822 4823 net->dev_name_head = netdev_create_hash(); 4824 if (net->dev_name_head == NULL) 4825 goto err_name; 4826 4827 net->dev_index_head = netdev_create_hash(); 4828 if (net->dev_index_head == NULL) 4829 goto err_idx; 4830 4831 return 0; 4832 4833 err_idx: 4834 kfree(net->dev_name_head); 4835 err_name: 4836 return -ENOMEM; 4837 } 4838 4839 /** 4840 * netdev_drivername - network driver for the device 4841 * @dev: network device 4842 * @buffer: buffer for resulting name 4843 * @len: size of buffer 4844 * 4845 * Determine network driver for device. 4846 */ 4847 char *netdev_drivername(const struct net_device *dev, char *buffer, int len) 4848 { 4849 const struct device_driver *driver; 4850 const struct device *parent; 4851 4852 if (len <= 0 || !buffer) 4853 return buffer; 4854 buffer[0] = 0; 4855 4856 parent = dev->dev.parent; 4857 4858 if (!parent) 4859 return buffer; 4860 4861 driver = parent->driver; 4862 if (driver && driver->name) 4863 strlcpy(buffer, driver->name, len); 4864 return buffer; 4865 } 4866 4867 static void __net_exit netdev_exit(struct net *net) 4868 { 4869 kfree(net->dev_name_head); 4870 kfree(net->dev_index_head); 4871 } 4872 4873 static struct pernet_operations __net_initdata netdev_net_ops = { 4874 .init = netdev_init, 4875 .exit = netdev_exit, 4876 }; 4877 4878 static void __net_exit default_device_exit(struct net *net) 4879 { 4880 struct net_device *dev, *next; 4881 /* 4882 * Push all migratable of the network devices back to the 4883 * initial network namespace 4884 */ 4885 rtnl_lock(); 4886 for_each_netdev_safe(net, dev, next) { 4887 int err; 4888 char fb_name[IFNAMSIZ]; 4889 4890 /* Ignore unmoveable devices (i.e. loopback) */ 4891 if (dev->features & NETIF_F_NETNS_LOCAL) 4892 continue; 4893 4894 /* Delete virtual devices */ 4895 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { 4896 dev->rtnl_link_ops->dellink(dev); 4897 continue; 4898 } 4899 4900 /* Push remaing network devices to init_net */ 4901 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 4902 err = dev_change_net_namespace(dev, &init_net, fb_name); 4903 if (err) { 4904 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n", 4905 __func__, dev->name, err); 4906 BUG(); 4907 } 4908 } 4909 rtnl_unlock(); 4910 } 4911 4912 static struct pernet_operations __net_initdata default_device_ops = { 4913 .exit = default_device_exit, 4914 }; 4915 4916 /* 4917 * Initialize the DEV module. At boot time this walks the device list and 4918 * unhooks any devices that fail to initialise (normally hardware not 4919 * present) and leaves us with a valid list of present and active devices. 4920 * 4921 */ 4922 4923 /* 4924 * This is called single threaded during boot, so no need 4925 * to take the rtnl semaphore. 4926 */ 4927 static int __init net_dev_init(void) 4928 { 4929 int i, rc = -ENOMEM; 4930 4931 BUG_ON(!dev_boot_phase); 4932 4933 if (dev_proc_init()) 4934 goto out; 4935 4936 if (netdev_kobject_init()) 4937 goto out; 4938 4939 INIT_LIST_HEAD(&ptype_all); 4940 for (i = 0; i < PTYPE_HASH_SIZE; i++) 4941 INIT_LIST_HEAD(&ptype_base[i]); 4942 4943 if (register_pernet_subsys(&netdev_net_ops)) 4944 goto out; 4945 4946 /* 4947 * Initialise the packet receive queues. 4948 */ 4949 4950 for_each_possible_cpu(i) { 4951 struct softnet_data *queue; 4952 4953 queue = &per_cpu(softnet_data, i); 4954 skb_queue_head_init(&queue->input_pkt_queue); 4955 queue->completion_queue = NULL; 4956 INIT_LIST_HEAD(&queue->poll_list); 4957 4958 queue->backlog.poll = process_backlog; 4959 queue->backlog.weight = weight_p; 4960 } 4961 4962 dev_boot_phase = 0; 4963 4964 /* The loopback device is special if any other network devices 4965 * is present in a network namespace the loopback device must 4966 * be present. Since we now dynamically allocate and free the 4967 * loopback device ensure this invariant is maintained by 4968 * keeping the loopback device as the first device on the 4969 * list of network devices. Ensuring the loopback devices 4970 * is the first device that appears and the last network device 4971 * that disappears. 4972 */ 4973 if (register_pernet_device(&loopback_net_ops)) 4974 goto out; 4975 4976 if (register_pernet_device(&default_device_ops)) 4977 goto out; 4978 4979 netdev_dma_register(); 4980 4981 open_softirq(NET_TX_SOFTIRQ, net_tx_action); 4982 open_softirq(NET_RX_SOFTIRQ, net_rx_action); 4983 4984 hotcpu_notifier(dev_cpu_callback, 0); 4985 dst_init(); 4986 dev_mcast_init(); 4987 rc = 0; 4988 out: 4989 return rc; 4990 } 4991 4992 subsys_initcall(net_dev_init); 4993 4994 EXPORT_SYMBOL(__dev_get_by_index); 4995 EXPORT_SYMBOL(__dev_get_by_name); 4996 EXPORT_SYMBOL(__dev_remove_pack); 4997 EXPORT_SYMBOL(dev_valid_name); 4998 EXPORT_SYMBOL(dev_add_pack); 4999 EXPORT_SYMBOL(dev_alloc_name); 5000 EXPORT_SYMBOL(dev_close); 5001 EXPORT_SYMBOL(dev_get_by_flags); 5002 EXPORT_SYMBOL(dev_get_by_index); 5003 EXPORT_SYMBOL(dev_get_by_name); 5004 EXPORT_SYMBOL(dev_open); 5005 EXPORT_SYMBOL(dev_queue_xmit); 5006 EXPORT_SYMBOL(dev_remove_pack); 5007 EXPORT_SYMBOL(dev_set_allmulti); 5008 EXPORT_SYMBOL(dev_set_promiscuity); 5009 EXPORT_SYMBOL(dev_change_flags); 5010 EXPORT_SYMBOL(dev_set_mtu); 5011 EXPORT_SYMBOL(dev_set_mac_address); 5012 EXPORT_SYMBOL(free_netdev); 5013 EXPORT_SYMBOL(netdev_boot_setup_check); 5014 EXPORT_SYMBOL(netdev_set_master); 5015 EXPORT_SYMBOL(netdev_state_change); 5016 EXPORT_SYMBOL(netif_receive_skb); 5017 EXPORT_SYMBOL(netif_rx); 5018 EXPORT_SYMBOL(register_gifconf); 5019 EXPORT_SYMBOL(register_netdevice); 5020 EXPORT_SYMBOL(register_netdevice_notifier); 5021 EXPORT_SYMBOL(skb_checksum_help); 5022 EXPORT_SYMBOL(synchronize_net); 5023 EXPORT_SYMBOL(unregister_netdevice); 5024 EXPORT_SYMBOL(unregister_netdevice_notifier); 5025 EXPORT_SYMBOL(net_enable_timestamp); 5026 EXPORT_SYMBOL(net_disable_timestamp); 5027 EXPORT_SYMBOL(dev_get_flags); 5028 5029 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 5030 EXPORT_SYMBOL(br_handle_frame_hook); 5031 EXPORT_SYMBOL(br_fdb_get_hook); 5032 EXPORT_SYMBOL(br_fdb_put_hook); 5033 #endif 5034 5035 EXPORT_SYMBOL(dev_load); 5036 5037 EXPORT_PER_CPU_SYMBOL(softnet_data); 5038