1 /* 2 * NET3 Protocol independent device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the non IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <[email protected]> 12 * Mark Evans, <[email protected]> 13 * 14 * Additional Authors: 15 * Florian la Roche <[email protected]> 16 * Alan Cox <[email protected]> 17 * David Hinds <[email protected]> 18 * Alexey Kuznetsov <[email protected]> 19 * Adam Sulmicki <[email protected]> 20 * Pekka Riikonen <[email protected]> 21 * 22 * Changes: 23 * D.J. Barrow : Fixed bug where dev->refcnt gets set 24 * to 2 if register_netdev gets called 25 * before net_dev_init & also removed a 26 * few lines of code in the process. 27 * Alan Cox : device private ioctl copies fields back. 28 * Alan Cox : Transmit queue code does relevant 29 * stunts to keep the queue safe. 30 * Alan Cox : Fixed double lock. 31 * Alan Cox : Fixed promisc NULL pointer trap 32 * ???????? : Support the full private ioctl range 33 * Alan Cox : Moved ioctl permission check into 34 * drivers 35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI 36 * Alan Cox : 100 backlog just doesn't cut it when 37 * you start doing multicast video 8) 38 * Alan Cox : Rewrote net_bh and list manager. 39 * Alan Cox : Fix ETH_P_ALL echoback lengths. 40 * Alan Cox : Took out transmit every packet pass 41 * Saved a few bytes in the ioctl handler 42 * Alan Cox : Network driver sets packet type before 43 * calling netif_rx. Saves a function 44 * call a packet. 45 * Alan Cox : Hashed net_bh() 46 * Richard Kooijman: Timestamp fixes. 47 * Alan Cox : Wrong field in SIOCGIFDSTADDR 48 * Alan Cox : Device lock protection. 49 * Alan Cox : Fixed nasty side effect of device close 50 * changes. 51 * Rudi Cilibrasi : Pass the right thing to 52 * set_mac_address() 53 * Dave Miller : 32bit quantity for the device lock to 54 * make it work out on a Sparc. 55 * Bjorn Ekwall : Added KERNELD hack. 56 * Alan Cox : Cleaned up the backlog initialise. 57 * Craig Metz : SIOCGIFCONF fix if space for under 58 * 1 device. 59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there 60 * is no device open function. 61 * Andi Kleen : Fix error reporting for SIOCGIFCONF 62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF 63 * Cyrus Durgin : Cleaned for KMOD 64 * Adam Sulmicki : Bug Fix : Network Device Unload 65 * A network device unload needs to purge 66 * the backlog queue. 67 * Paul Rusty Russell : SIOCSIFNAME 68 * Pekka Riikonen : Netdev boot-time settings code 69 * Andrew Morton : Make unregister_netdevice wait 70 * indefinitely on dev->refcnt 71 * J Hadi Salim : - Backlog queue sampling 72 * - netif_rx() feedback 73 */ 74 75 #include <asm/uaccess.h> 76 #include <asm/system.h> 77 #include <linux/bitops.h> 78 #include <linux/capability.h> 79 #include <linux/cpu.h> 80 #include <linux/types.h> 81 #include <linux/kernel.h> 82 #include <linux/sched.h> 83 #include <linux/mutex.h> 84 #include <linux/string.h> 85 #include <linux/mm.h> 86 #include <linux/socket.h> 87 #include <linux/sockios.h> 88 #include <linux/errno.h> 89 #include <linux/interrupt.h> 90 #include <linux/if_ether.h> 91 #include <linux/netdevice.h> 92 #include <linux/etherdevice.h> 93 #include <linux/ethtool.h> 94 #include <linux/notifier.h> 95 #include <linux/skbuff.h> 96 #include <net/net_namespace.h> 97 #include <net/sock.h> 98 #include <linux/rtnetlink.h> 99 #include <linux/proc_fs.h> 100 #include <linux/seq_file.h> 101 #include <linux/stat.h> 102 #include <linux/if_bridge.h> 103 #include <linux/if_macvlan.h> 104 #include <net/dst.h> 105 #include <net/pkt_sched.h> 106 #include <net/checksum.h> 107 #include <linux/highmem.h> 108 #include <linux/init.h> 109 #include <linux/kmod.h> 110 #include <linux/module.h> 111 #include <linux/netpoll.h> 112 #include <linux/rcupdate.h> 113 #include <linux/delay.h> 114 #include <net/wext.h> 115 #include <net/iw_handler.h> 116 #include <asm/current.h> 117 #include <linux/audit.h> 118 #include <linux/dmaengine.h> 119 #include <linux/err.h> 120 #include <linux/ctype.h> 121 #include <linux/if_arp.h> 122 #include <linux/if_vlan.h> 123 #include <linux/ip.h> 124 #include <net/ip.h> 125 #include <linux/ipv6.h> 126 #include <linux/in.h> 127 #include <linux/jhash.h> 128 #include <linux/random.h> 129 130 #include "net-sysfs.h" 131 132 /* Instead of increasing this, you should create a hash table. */ 133 #define MAX_GRO_SKBS 8 134 135 /* 136 * The list of packet types we will receive (as opposed to discard) 137 * and the routines to invoke. 138 * 139 * Why 16. Because with 16 the only overlap we get on a hash of the 140 * low nibble of the protocol value is RARP/SNAP/X.25. 141 * 142 * NOTE: That is no longer true with the addition of VLAN tags. Not 143 * sure which should go first, but I bet it won't make much 144 * difference if we are running VLANs. The good news is that 145 * this protocol won't be in the list unless compiled in, so 146 * the average user (w/out VLANs) will not be adversely affected. 147 * --BLG 148 * 149 * 0800 IP 150 * 8100 802.1Q VLAN 151 * 0001 802.3 152 * 0002 AX.25 153 * 0004 802.2 154 * 8035 RARP 155 * 0005 SNAP 156 * 0805 X.25 157 * 0806 ARP 158 * 8137 IPX 159 * 0009 Localtalk 160 * 86DD IPv6 161 */ 162 163 #define PTYPE_HASH_SIZE (16) 164 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) 165 166 static DEFINE_SPINLOCK(ptype_lock); 167 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; 168 static struct list_head ptype_all __read_mostly; /* Taps */ 169 170 /* 171 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 172 * semaphore. 173 * 174 * Pure readers hold dev_base_lock for reading. 175 * 176 * Writers must hold the rtnl semaphore while they loop through the 177 * dev_base_head list, and hold dev_base_lock for writing when they do the 178 * actual updates. This allows pure readers to access the list even 179 * while a writer is preparing to update it. 180 * 181 * To put it another way, dev_base_lock is held for writing only to 182 * protect against pure readers; the rtnl semaphore provides the 183 * protection against other writers. 184 * 185 * See, for example usages, register_netdevice() and 186 * unregister_netdevice(), which must be called with the rtnl 187 * semaphore held. 188 */ 189 DEFINE_RWLOCK(dev_base_lock); 190 191 EXPORT_SYMBOL(dev_base_lock); 192 193 #define NETDEV_HASHBITS 8 194 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) 195 196 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) 197 { 198 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 199 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; 200 } 201 202 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) 203 { 204 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; 205 } 206 207 /* Device list insertion */ 208 static int list_netdevice(struct net_device *dev) 209 { 210 struct net *net = dev_net(dev); 211 212 ASSERT_RTNL(); 213 214 write_lock_bh(&dev_base_lock); 215 list_add_tail(&dev->dev_list, &net->dev_base_head); 216 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 217 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); 218 write_unlock_bh(&dev_base_lock); 219 return 0; 220 } 221 222 /* Device list removal */ 223 static void unlist_netdevice(struct net_device *dev) 224 { 225 ASSERT_RTNL(); 226 227 /* Unlink dev from the device chain */ 228 write_lock_bh(&dev_base_lock); 229 list_del(&dev->dev_list); 230 hlist_del(&dev->name_hlist); 231 hlist_del(&dev->index_hlist); 232 write_unlock_bh(&dev_base_lock); 233 } 234 235 /* 236 * Our notifier list 237 */ 238 239 static RAW_NOTIFIER_HEAD(netdev_chain); 240 241 /* 242 * Device drivers call our routines to queue packets here. We empty the 243 * queue in the local softnet handler. 244 */ 245 246 DEFINE_PER_CPU(struct softnet_data, softnet_data); 247 248 #ifdef CONFIG_LOCKDEP 249 /* 250 * register_netdevice() inits txq->_xmit_lock and sets lockdep class 251 * according to dev->type 252 */ 253 static const unsigned short netdev_lock_type[] = 254 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, 255 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, 256 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, 257 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, 258 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, 259 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, 260 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, 261 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, 262 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, 263 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, 264 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, 265 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, 266 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, 267 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, 268 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE}; 269 270 static const char *netdev_lock_name[] = 271 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", 272 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", 273 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", 274 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", 275 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", 276 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", 277 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", 278 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", 279 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", 280 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", 281 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", 282 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", 283 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", 284 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", 285 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"}; 286 287 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; 288 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; 289 290 static inline unsigned short netdev_lock_pos(unsigned short dev_type) 291 { 292 int i; 293 294 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) 295 if (netdev_lock_type[i] == dev_type) 296 return i; 297 /* the last key is used by default */ 298 return ARRAY_SIZE(netdev_lock_type) - 1; 299 } 300 301 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, 302 unsigned short dev_type) 303 { 304 int i; 305 306 i = netdev_lock_pos(dev_type); 307 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], 308 netdev_lock_name[i]); 309 } 310 311 static inline void netdev_set_addr_lockdep_class(struct net_device *dev) 312 { 313 int i; 314 315 i = netdev_lock_pos(dev->type); 316 lockdep_set_class_and_name(&dev->addr_list_lock, 317 &netdev_addr_lock_key[i], 318 netdev_lock_name[i]); 319 } 320 #else 321 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, 322 unsigned short dev_type) 323 { 324 } 325 static inline void netdev_set_addr_lockdep_class(struct net_device *dev) 326 { 327 } 328 #endif 329 330 /******************************************************************************* 331 332 Protocol management and registration routines 333 334 *******************************************************************************/ 335 336 /* 337 * Add a protocol ID to the list. Now that the input handler is 338 * smarter we can dispense with all the messy stuff that used to be 339 * here. 340 * 341 * BEWARE!!! Protocol handlers, mangling input packets, 342 * MUST BE last in hash buckets and checking protocol handlers 343 * MUST start from promiscuous ptype_all chain in net_bh. 344 * It is true now, do not change it. 345 * Explanation follows: if protocol handler, mangling packet, will 346 * be the first on list, it is not able to sense, that packet 347 * is cloned and should be copied-on-write, so that it will 348 * change it and subsequent readers will get broken packet. 349 * --ANK (980803) 350 */ 351 352 /** 353 * dev_add_pack - add packet handler 354 * @pt: packet type declaration 355 * 356 * Add a protocol handler to the networking stack. The passed &packet_type 357 * is linked into kernel lists and may not be freed until it has been 358 * removed from the kernel lists. 359 * 360 * This call does not sleep therefore it can not 361 * guarantee all CPU's that are in middle of receiving packets 362 * will see the new packet type (until the next received packet). 363 */ 364 365 void dev_add_pack(struct packet_type *pt) 366 { 367 int hash; 368 369 spin_lock_bh(&ptype_lock); 370 if (pt->type == htons(ETH_P_ALL)) 371 list_add_rcu(&pt->list, &ptype_all); 372 else { 373 hash = ntohs(pt->type) & PTYPE_HASH_MASK; 374 list_add_rcu(&pt->list, &ptype_base[hash]); 375 } 376 spin_unlock_bh(&ptype_lock); 377 } 378 379 /** 380 * __dev_remove_pack - remove packet handler 381 * @pt: packet type declaration 382 * 383 * Remove a protocol handler that was previously added to the kernel 384 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 385 * from the kernel lists and can be freed or reused once this function 386 * returns. 387 * 388 * The packet type might still be in use by receivers 389 * and must not be freed until after all the CPU's have gone 390 * through a quiescent state. 391 */ 392 void __dev_remove_pack(struct packet_type *pt) 393 { 394 struct list_head *head; 395 struct packet_type *pt1; 396 397 spin_lock_bh(&ptype_lock); 398 399 if (pt->type == htons(ETH_P_ALL)) 400 head = &ptype_all; 401 else 402 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; 403 404 list_for_each_entry(pt1, head, list) { 405 if (pt == pt1) { 406 list_del_rcu(&pt->list); 407 goto out; 408 } 409 } 410 411 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 412 out: 413 spin_unlock_bh(&ptype_lock); 414 } 415 /** 416 * dev_remove_pack - remove packet handler 417 * @pt: packet type declaration 418 * 419 * Remove a protocol handler that was previously added to the kernel 420 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 421 * from the kernel lists and can be freed or reused once this function 422 * returns. 423 * 424 * This call sleeps to guarantee that no CPU is looking at the packet 425 * type after return. 426 */ 427 void dev_remove_pack(struct packet_type *pt) 428 { 429 __dev_remove_pack(pt); 430 431 synchronize_net(); 432 } 433 434 /****************************************************************************** 435 436 Device Boot-time Settings Routines 437 438 *******************************************************************************/ 439 440 /* Boot time configuration table */ 441 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; 442 443 /** 444 * netdev_boot_setup_add - add new setup entry 445 * @name: name of the device 446 * @map: configured settings for the device 447 * 448 * Adds new setup entry to the dev_boot_setup list. The function 449 * returns 0 on error and 1 on success. This is a generic routine to 450 * all netdevices. 451 */ 452 static int netdev_boot_setup_add(char *name, struct ifmap *map) 453 { 454 struct netdev_boot_setup *s; 455 int i; 456 457 s = dev_boot_setup; 458 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 459 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { 460 memset(s[i].name, 0, sizeof(s[i].name)); 461 strlcpy(s[i].name, name, IFNAMSIZ); 462 memcpy(&s[i].map, map, sizeof(s[i].map)); 463 break; 464 } 465 } 466 467 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; 468 } 469 470 /** 471 * netdev_boot_setup_check - check boot time settings 472 * @dev: the netdevice 473 * 474 * Check boot time settings for the device. 475 * The found settings are set for the device to be used 476 * later in the device probing. 477 * Returns 0 if no settings found, 1 if they are. 478 */ 479 int netdev_boot_setup_check(struct net_device *dev) 480 { 481 struct netdev_boot_setup *s = dev_boot_setup; 482 int i; 483 484 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 485 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && 486 !strcmp(dev->name, s[i].name)) { 487 dev->irq = s[i].map.irq; 488 dev->base_addr = s[i].map.base_addr; 489 dev->mem_start = s[i].map.mem_start; 490 dev->mem_end = s[i].map.mem_end; 491 return 1; 492 } 493 } 494 return 0; 495 } 496 497 498 /** 499 * netdev_boot_base - get address from boot time settings 500 * @prefix: prefix for network device 501 * @unit: id for network device 502 * 503 * Check boot time settings for the base address of device. 504 * The found settings are set for the device to be used 505 * later in the device probing. 506 * Returns 0 if no settings found. 507 */ 508 unsigned long netdev_boot_base(const char *prefix, int unit) 509 { 510 const struct netdev_boot_setup *s = dev_boot_setup; 511 char name[IFNAMSIZ]; 512 int i; 513 514 sprintf(name, "%s%d", prefix, unit); 515 516 /* 517 * If device already registered then return base of 1 518 * to indicate not to probe for this interface 519 */ 520 if (__dev_get_by_name(&init_net, name)) 521 return 1; 522 523 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) 524 if (!strcmp(name, s[i].name)) 525 return s[i].map.base_addr; 526 return 0; 527 } 528 529 /* 530 * Saves at boot time configured settings for any netdevice. 531 */ 532 int __init netdev_boot_setup(char *str) 533 { 534 int ints[5]; 535 struct ifmap map; 536 537 str = get_options(str, ARRAY_SIZE(ints), ints); 538 if (!str || !*str) 539 return 0; 540 541 /* Save settings */ 542 memset(&map, 0, sizeof(map)); 543 if (ints[0] > 0) 544 map.irq = ints[1]; 545 if (ints[0] > 1) 546 map.base_addr = ints[2]; 547 if (ints[0] > 2) 548 map.mem_start = ints[3]; 549 if (ints[0] > 3) 550 map.mem_end = ints[4]; 551 552 /* Add new entry to the list */ 553 return netdev_boot_setup_add(str, &map); 554 } 555 556 __setup("netdev=", netdev_boot_setup); 557 558 /******************************************************************************* 559 560 Device Interface Subroutines 561 562 *******************************************************************************/ 563 564 /** 565 * __dev_get_by_name - find a device by its name 566 * @net: the applicable net namespace 567 * @name: name to find 568 * 569 * Find an interface by name. Must be called under RTNL semaphore 570 * or @dev_base_lock. If the name is found a pointer to the device 571 * is returned. If the name is not found then %NULL is returned. The 572 * reference counters are not incremented so the caller must be 573 * careful with locks. 574 */ 575 576 struct net_device *__dev_get_by_name(struct net *net, const char *name) 577 { 578 struct hlist_node *p; 579 580 hlist_for_each(p, dev_name_hash(net, name)) { 581 struct net_device *dev 582 = hlist_entry(p, struct net_device, name_hlist); 583 if (!strncmp(dev->name, name, IFNAMSIZ)) 584 return dev; 585 } 586 return NULL; 587 } 588 589 /** 590 * dev_get_by_name - find a device by its name 591 * @net: the applicable net namespace 592 * @name: name to find 593 * 594 * Find an interface by name. This can be called from any 595 * context and does its own locking. The returned handle has 596 * the usage count incremented and the caller must use dev_put() to 597 * release it when it is no longer needed. %NULL is returned if no 598 * matching device is found. 599 */ 600 601 struct net_device *dev_get_by_name(struct net *net, const char *name) 602 { 603 struct net_device *dev; 604 605 read_lock(&dev_base_lock); 606 dev = __dev_get_by_name(net, name); 607 if (dev) 608 dev_hold(dev); 609 read_unlock(&dev_base_lock); 610 return dev; 611 } 612 613 /** 614 * __dev_get_by_index - find a device by its ifindex 615 * @net: the applicable net namespace 616 * @ifindex: index of device 617 * 618 * Search for an interface by index. Returns %NULL if the device 619 * is not found or a pointer to the device. The device has not 620 * had its reference counter increased so the caller must be careful 621 * about locking. The caller must hold either the RTNL semaphore 622 * or @dev_base_lock. 623 */ 624 625 struct net_device *__dev_get_by_index(struct net *net, int ifindex) 626 { 627 struct hlist_node *p; 628 629 hlist_for_each(p, dev_index_hash(net, ifindex)) { 630 struct net_device *dev 631 = hlist_entry(p, struct net_device, index_hlist); 632 if (dev->ifindex == ifindex) 633 return dev; 634 } 635 return NULL; 636 } 637 638 639 /** 640 * dev_get_by_index - find a device by its ifindex 641 * @net: the applicable net namespace 642 * @ifindex: index of device 643 * 644 * Search for an interface by index. Returns NULL if the device 645 * is not found or a pointer to the device. The device returned has 646 * had a reference added and the pointer is safe until the user calls 647 * dev_put to indicate they have finished with it. 648 */ 649 650 struct net_device *dev_get_by_index(struct net *net, int ifindex) 651 { 652 struct net_device *dev; 653 654 read_lock(&dev_base_lock); 655 dev = __dev_get_by_index(net, ifindex); 656 if (dev) 657 dev_hold(dev); 658 read_unlock(&dev_base_lock); 659 return dev; 660 } 661 662 /** 663 * dev_getbyhwaddr - find a device by its hardware address 664 * @net: the applicable net namespace 665 * @type: media type of device 666 * @ha: hardware address 667 * 668 * Search for an interface by MAC address. Returns NULL if the device 669 * is not found or a pointer to the device. The caller must hold the 670 * rtnl semaphore. The returned device has not had its ref count increased 671 * and the caller must therefore be careful about locking 672 * 673 * BUGS: 674 * If the API was consistent this would be __dev_get_by_hwaddr 675 */ 676 677 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) 678 { 679 struct net_device *dev; 680 681 ASSERT_RTNL(); 682 683 for_each_netdev(net, dev) 684 if (dev->type == type && 685 !memcmp(dev->dev_addr, ha, dev->addr_len)) 686 return dev; 687 688 return NULL; 689 } 690 691 EXPORT_SYMBOL(dev_getbyhwaddr); 692 693 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) 694 { 695 struct net_device *dev; 696 697 ASSERT_RTNL(); 698 for_each_netdev(net, dev) 699 if (dev->type == type) 700 return dev; 701 702 return NULL; 703 } 704 705 EXPORT_SYMBOL(__dev_getfirstbyhwtype); 706 707 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) 708 { 709 struct net_device *dev; 710 711 rtnl_lock(); 712 dev = __dev_getfirstbyhwtype(net, type); 713 if (dev) 714 dev_hold(dev); 715 rtnl_unlock(); 716 return dev; 717 } 718 719 EXPORT_SYMBOL(dev_getfirstbyhwtype); 720 721 /** 722 * dev_get_by_flags - find any device with given flags 723 * @net: the applicable net namespace 724 * @if_flags: IFF_* values 725 * @mask: bitmask of bits in if_flags to check 726 * 727 * Search for any interface with the given flags. Returns NULL if a device 728 * is not found or a pointer to the device. The device returned has 729 * had a reference added and the pointer is safe until the user calls 730 * dev_put to indicate they have finished with it. 731 */ 732 733 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask) 734 { 735 struct net_device *dev, *ret; 736 737 ret = NULL; 738 read_lock(&dev_base_lock); 739 for_each_netdev(net, dev) { 740 if (((dev->flags ^ if_flags) & mask) == 0) { 741 dev_hold(dev); 742 ret = dev; 743 break; 744 } 745 } 746 read_unlock(&dev_base_lock); 747 return ret; 748 } 749 750 /** 751 * dev_valid_name - check if name is okay for network device 752 * @name: name string 753 * 754 * Network device names need to be valid file names to 755 * to allow sysfs to work. We also disallow any kind of 756 * whitespace. 757 */ 758 int dev_valid_name(const char *name) 759 { 760 if (*name == '\0') 761 return 0; 762 if (strlen(name) >= IFNAMSIZ) 763 return 0; 764 if (!strcmp(name, ".") || !strcmp(name, "..")) 765 return 0; 766 767 while (*name) { 768 if (*name == '/' || isspace(*name)) 769 return 0; 770 name++; 771 } 772 return 1; 773 } 774 775 /** 776 * __dev_alloc_name - allocate a name for a device 777 * @net: network namespace to allocate the device name in 778 * @name: name format string 779 * @buf: scratch buffer and result name string 780 * 781 * Passed a format string - eg "lt%d" it will try and find a suitable 782 * id. It scans list of devices to build up a free map, then chooses 783 * the first empty slot. The caller must hold the dev_base or rtnl lock 784 * while allocating the name and adding the device in order to avoid 785 * duplicates. 786 * Limited to bits_per_byte * page size devices (ie 32K on most platforms). 787 * Returns the number of the unit assigned or a negative errno code. 788 */ 789 790 static int __dev_alloc_name(struct net *net, const char *name, char *buf) 791 { 792 int i = 0; 793 const char *p; 794 const int max_netdevices = 8*PAGE_SIZE; 795 unsigned long *inuse; 796 struct net_device *d; 797 798 p = strnchr(name, IFNAMSIZ-1, '%'); 799 if (p) { 800 /* 801 * Verify the string as this thing may have come from 802 * the user. There must be either one "%d" and no other "%" 803 * characters. 804 */ 805 if (p[1] != 'd' || strchr(p + 2, '%')) 806 return -EINVAL; 807 808 /* Use one page as a bit array of possible slots */ 809 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); 810 if (!inuse) 811 return -ENOMEM; 812 813 for_each_netdev(net, d) { 814 if (!sscanf(d->name, name, &i)) 815 continue; 816 if (i < 0 || i >= max_netdevices) 817 continue; 818 819 /* avoid cases where sscanf is not exact inverse of printf */ 820 snprintf(buf, IFNAMSIZ, name, i); 821 if (!strncmp(buf, d->name, IFNAMSIZ)) 822 set_bit(i, inuse); 823 } 824 825 i = find_first_zero_bit(inuse, max_netdevices); 826 free_page((unsigned long) inuse); 827 } 828 829 snprintf(buf, IFNAMSIZ, name, i); 830 if (!__dev_get_by_name(net, buf)) 831 return i; 832 833 /* It is possible to run out of possible slots 834 * when the name is long and there isn't enough space left 835 * for the digits, or if all bits are used. 836 */ 837 return -ENFILE; 838 } 839 840 /** 841 * dev_alloc_name - allocate a name for a device 842 * @dev: device 843 * @name: name format string 844 * 845 * Passed a format string - eg "lt%d" it will try and find a suitable 846 * id. It scans list of devices to build up a free map, then chooses 847 * the first empty slot. The caller must hold the dev_base or rtnl lock 848 * while allocating the name and adding the device in order to avoid 849 * duplicates. 850 * Limited to bits_per_byte * page size devices (ie 32K on most platforms). 851 * Returns the number of the unit assigned or a negative errno code. 852 */ 853 854 int dev_alloc_name(struct net_device *dev, const char *name) 855 { 856 char buf[IFNAMSIZ]; 857 struct net *net; 858 int ret; 859 860 BUG_ON(!dev_net(dev)); 861 net = dev_net(dev); 862 ret = __dev_alloc_name(net, name, buf); 863 if (ret >= 0) 864 strlcpy(dev->name, buf, IFNAMSIZ); 865 return ret; 866 } 867 868 869 /** 870 * dev_change_name - change name of a device 871 * @dev: device 872 * @newname: name (or format string) must be at least IFNAMSIZ 873 * 874 * Change name of a device, can pass format strings "eth%d". 875 * for wildcarding. 876 */ 877 int dev_change_name(struct net_device *dev, const char *newname) 878 { 879 char oldname[IFNAMSIZ]; 880 int err = 0; 881 int ret; 882 struct net *net; 883 884 ASSERT_RTNL(); 885 BUG_ON(!dev_net(dev)); 886 887 net = dev_net(dev); 888 if (dev->flags & IFF_UP) 889 return -EBUSY; 890 891 if (!dev_valid_name(newname)) 892 return -EINVAL; 893 894 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) 895 return 0; 896 897 memcpy(oldname, dev->name, IFNAMSIZ); 898 899 if (strchr(newname, '%')) { 900 err = dev_alloc_name(dev, newname); 901 if (err < 0) 902 return err; 903 } 904 else if (__dev_get_by_name(net, newname)) 905 return -EEXIST; 906 else 907 strlcpy(dev->name, newname, IFNAMSIZ); 908 909 rollback: 910 /* For now only devices in the initial network namespace 911 * are in sysfs. 912 */ 913 if (net == &init_net) { 914 ret = device_rename(&dev->dev, dev->name); 915 if (ret) { 916 memcpy(dev->name, oldname, IFNAMSIZ); 917 return ret; 918 } 919 } 920 921 write_lock_bh(&dev_base_lock); 922 hlist_del(&dev->name_hlist); 923 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 924 write_unlock_bh(&dev_base_lock); 925 926 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); 927 ret = notifier_to_errno(ret); 928 929 if (ret) { 930 if (err) { 931 printk(KERN_ERR 932 "%s: name change rollback failed: %d.\n", 933 dev->name, ret); 934 } else { 935 err = ret; 936 memcpy(dev->name, oldname, IFNAMSIZ); 937 goto rollback; 938 } 939 } 940 941 return err; 942 } 943 944 /** 945 * dev_set_alias - change ifalias of a device 946 * @dev: device 947 * @alias: name up to IFALIASZ 948 * @len: limit of bytes to copy from info 949 * 950 * Set ifalias for a device, 951 */ 952 int dev_set_alias(struct net_device *dev, const char *alias, size_t len) 953 { 954 ASSERT_RTNL(); 955 956 if (len >= IFALIASZ) 957 return -EINVAL; 958 959 if (!len) { 960 if (dev->ifalias) { 961 kfree(dev->ifalias); 962 dev->ifalias = NULL; 963 } 964 return 0; 965 } 966 967 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL); 968 if (!dev->ifalias) 969 return -ENOMEM; 970 971 strlcpy(dev->ifalias, alias, len+1); 972 return len; 973 } 974 975 976 /** 977 * netdev_features_change - device changes features 978 * @dev: device to cause notification 979 * 980 * Called to indicate a device has changed features. 981 */ 982 void netdev_features_change(struct net_device *dev) 983 { 984 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); 985 } 986 EXPORT_SYMBOL(netdev_features_change); 987 988 /** 989 * netdev_state_change - device changes state 990 * @dev: device to cause notification 991 * 992 * Called to indicate a device has changed state. This function calls 993 * the notifier chains for netdev_chain and sends a NEWLINK message 994 * to the routing socket. 995 */ 996 void netdev_state_change(struct net_device *dev) 997 { 998 if (dev->flags & IFF_UP) { 999 call_netdevice_notifiers(NETDEV_CHANGE, dev); 1000 rtmsg_ifinfo(RTM_NEWLINK, dev, 0); 1001 } 1002 } 1003 1004 void netdev_bonding_change(struct net_device *dev) 1005 { 1006 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev); 1007 } 1008 EXPORT_SYMBOL(netdev_bonding_change); 1009 1010 /** 1011 * dev_load - load a network module 1012 * @net: the applicable net namespace 1013 * @name: name of interface 1014 * 1015 * If a network interface is not present and the process has suitable 1016 * privileges this function loads the module. If module loading is not 1017 * available in this kernel then it becomes a nop. 1018 */ 1019 1020 void dev_load(struct net *net, const char *name) 1021 { 1022 struct net_device *dev; 1023 1024 read_lock(&dev_base_lock); 1025 dev = __dev_get_by_name(net, name); 1026 read_unlock(&dev_base_lock); 1027 1028 if (!dev && capable(CAP_SYS_MODULE)) 1029 request_module("%s", name); 1030 } 1031 1032 /** 1033 * dev_open - prepare an interface for use. 1034 * @dev: device to open 1035 * 1036 * Takes a device from down to up state. The device's private open 1037 * function is invoked and then the multicast lists are loaded. Finally 1038 * the device is moved into the up state and a %NETDEV_UP message is 1039 * sent to the netdev notifier chain. 1040 * 1041 * Calling this function on an active interface is a nop. On a failure 1042 * a negative errno code is returned. 1043 */ 1044 int dev_open(struct net_device *dev) 1045 { 1046 const struct net_device_ops *ops = dev->netdev_ops; 1047 int ret = 0; 1048 1049 ASSERT_RTNL(); 1050 1051 /* 1052 * Is it already up? 1053 */ 1054 1055 if (dev->flags & IFF_UP) 1056 return 0; 1057 1058 /* 1059 * Is it even present? 1060 */ 1061 if (!netif_device_present(dev)) 1062 return -ENODEV; 1063 1064 /* 1065 * Call device private open method 1066 */ 1067 set_bit(__LINK_STATE_START, &dev->state); 1068 1069 if (ops->ndo_validate_addr) 1070 ret = ops->ndo_validate_addr(dev); 1071 1072 if (!ret && ops->ndo_open) 1073 ret = ops->ndo_open(dev); 1074 1075 /* 1076 * If it went open OK then: 1077 */ 1078 1079 if (ret) 1080 clear_bit(__LINK_STATE_START, &dev->state); 1081 else { 1082 /* 1083 * Set the flags. 1084 */ 1085 dev->flags |= IFF_UP; 1086 1087 /* 1088 * Initialize multicasting status 1089 */ 1090 dev_set_rx_mode(dev); 1091 1092 /* 1093 * Wakeup transmit queue engine 1094 */ 1095 dev_activate(dev); 1096 1097 /* 1098 * ... and announce new interface. 1099 */ 1100 call_netdevice_notifiers(NETDEV_UP, dev); 1101 } 1102 1103 return ret; 1104 } 1105 1106 /** 1107 * dev_close - shutdown an interface. 1108 * @dev: device to shutdown 1109 * 1110 * This function moves an active device into down state. A 1111 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device 1112 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier 1113 * chain. 1114 */ 1115 int dev_close(struct net_device *dev) 1116 { 1117 const struct net_device_ops *ops = dev->netdev_ops; 1118 ASSERT_RTNL(); 1119 1120 might_sleep(); 1121 1122 if (!(dev->flags & IFF_UP)) 1123 return 0; 1124 1125 /* 1126 * Tell people we are going down, so that they can 1127 * prepare to death, when device is still operating. 1128 */ 1129 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); 1130 1131 clear_bit(__LINK_STATE_START, &dev->state); 1132 1133 /* Synchronize to scheduled poll. We cannot touch poll list, 1134 * it can be even on different cpu. So just clear netif_running(). 1135 * 1136 * dev->stop() will invoke napi_disable() on all of it's 1137 * napi_struct instances on this device. 1138 */ 1139 smp_mb__after_clear_bit(); /* Commit netif_running(). */ 1140 1141 dev_deactivate(dev); 1142 1143 /* 1144 * Call the device specific close. This cannot fail. 1145 * Only if device is UP 1146 * 1147 * We allow it to be called even after a DETACH hot-plug 1148 * event. 1149 */ 1150 if (ops->ndo_stop) 1151 ops->ndo_stop(dev); 1152 1153 /* 1154 * Device is now down. 1155 */ 1156 1157 dev->flags &= ~IFF_UP; 1158 1159 /* 1160 * Tell people we are down 1161 */ 1162 call_netdevice_notifiers(NETDEV_DOWN, dev); 1163 1164 return 0; 1165 } 1166 1167 1168 /** 1169 * dev_disable_lro - disable Large Receive Offload on a device 1170 * @dev: device 1171 * 1172 * Disable Large Receive Offload (LRO) on a net device. Must be 1173 * called under RTNL. This is needed if received packets may be 1174 * forwarded to another interface. 1175 */ 1176 void dev_disable_lro(struct net_device *dev) 1177 { 1178 if (dev->ethtool_ops && dev->ethtool_ops->get_flags && 1179 dev->ethtool_ops->set_flags) { 1180 u32 flags = dev->ethtool_ops->get_flags(dev); 1181 if (flags & ETH_FLAG_LRO) { 1182 flags &= ~ETH_FLAG_LRO; 1183 dev->ethtool_ops->set_flags(dev, flags); 1184 } 1185 } 1186 WARN_ON(dev->features & NETIF_F_LRO); 1187 } 1188 EXPORT_SYMBOL(dev_disable_lro); 1189 1190 1191 static int dev_boot_phase = 1; 1192 1193 /* 1194 * Device change register/unregister. These are not inline or static 1195 * as we export them to the world. 1196 */ 1197 1198 /** 1199 * register_netdevice_notifier - register a network notifier block 1200 * @nb: notifier 1201 * 1202 * Register a notifier to be called when network device events occur. 1203 * The notifier passed is linked into the kernel structures and must 1204 * not be reused until it has been unregistered. A negative errno code 1205 * is returned on a failure. 1206 * 1207 * When registered all registration and up events are replayed 1208 * to the new notifier to allow device to have a race free 1209 * view of the network device list. 1210 */ 1211 1212 int register_netdevice_notifier(struct notifier_block *nb) 1213 { 1214 struct net_device *dev; 1215 struct net_device *last; 1216 struct net *net; 1217 int err; 1218 1219 rtnl_lock(); 1220 err = raw_notifier_chain_register(&netdev_chain, nb); 1221 if (err) 1222 goto unlock; 1223 if (dev_boot_phase) 1224 goto unlock; 1225 for_each_net(net) { 1226 for_each_netdev(net, dev) { 1227 err = nb->notifier_call(nb, NETDEV_REGISTER, dev); 1228 err = notifier_to_errno(err); 1229 if (err) 1230 goto rollback; 1231 1232 if (!(dev->flags & IFF_UP)) 1233 continue; 1234 1235 nb->notifier_call(nb, NETDEV_UP, dev); 1236 } 1237 } 1238 1239 unlock: 1240 rtnl_unlock(); 1241 return err; 1242 1243 rollback: 1244 last = dev; 1245 for_each_net(net) { 1246 for_each_netdev(net, dev) { 1247 if (dev == last) 1248 break; 1249 1250 if (dev->flags & IFF_UP) { 1251 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); 1252 nb->notifier_call(nb, NETDEV_DOWN, dev); 1253 } 1254 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1255 } 1256 } 1257 1258 raw_notifier_chain_unregister(&netdev_chain, nb); 1259 goto unlock; 1260 } 1261 1262 /** 1263 * unregister_netdevice_notifier - unregister a network notifier block 1264 * @nb: notifier 1265 * 1266 * Unregister a notifier previously registered by 1267 * register_netdevice_notifier(). The notifier is unlinked into the 1268 * kernel structures and may then be reused. A negative errno code 1269 * is returned on a failure. 1270 */ 1271 1272 int unregister_netdevice_notifier(struct notifier_block *nb) 1273 { 1274 int err; 1275 1276 rtnl_lock(); 1277 err = raw_notifier_chain_unregister(&netdev_chain, nb); 1278 rtnl_unlock(); 1279 return err; 1280 } 1281 1282 /** 1283 * call_netdevice_notifiers - call all network notifier blocks 1284 * @val: value passed unmodified to notifier function 1285 * @dev: net_device pointer passed unmodified to notifier function 1286 * 1287 * Call all network notifier blocks. Parameters and return value 1288 * are as for raw_notifier_call_chain(). 1289 */ 1290 1291 int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1292 { 1293 return raw_notifier_call_chain(&netdev_chain, val, dev); 1294 } 1295 1296 /* When > 0 there are consumers of rx skb time stamps */ 1297 static atomic_t netstamp_needed = ATOMIC_INIT(0); 1298 1299 void net_enable_timestamp(void) 1300 { 1301 atomic_inc(&netstamp_needed); 1302 } 1303 1304 void net_disable_timestamp(void) 1305 { 1306 atomic_dec(&netstamp_needed); 1307 } 1308 1309 static inline void net_timestamp(struct sk_buff *skb) 1310 { 1311 if (atomic_read(&netstamp_needed)) 1312 __net_timestamp(skb); 1313 else 1314 skb->tstamp.tv64 = 0; 1315 } 1316 1317 /* 1318 * Support routine. Sends outgoing frames to any network 1319 * taps currently in use. 1320 */ 1321 1322 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1323 { 1324 struct packet_type *ptype; 1325 1326 net_timestamp(skb); 1327 1328 rcu_read_lock(); 1329 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1330 /* Never send packets back to the socket 1331 * they originated from - MvS ([email protected]) 1332 */ 1333 if ((ptype->dev == dev || !ptype->dev) && 1334 (ptype->af_packet_priv == NULL || 1335 (struct sock *)ptype->af_packet_priv != skb->sk)) { 1336 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); 1337 if (!skb2) 1338 break; 1339 1340 /* skb->nh should be correctly 1341 set by sender, so that the second statement is 1342 just protection against buggy protocols. 1343 */ 1344 skb_reset_mac_header(skb2); 1345 1346 if (skb_network_header(skb2) < skb2->data || 1347 skb2->network_header > skb2->tail) { 1348 if (net_ratelimit()) 1349 printk(KERN_CRIT "protocol %04x is " 1350 "buggy, dev %s\n", 1351 skb2->protocol, dev->name); 1352 skb_reset_network_header(skb2); 1353 } 1354 1355 skb2->transport_header = skb2->network_header; 1356 skb2->pkt_type = PACKET_OUTGOING; 1357 ptype->func(skb2, skb->dev, ptype, skb->dev); 1358 } 1359 } 1360 rcu_read_unlock(); 1361 } 1362 1363 1364 static inline void __netif_reschedule(struct Qdisc *q) 1365 { 1366 struct softnet_data *sd; 1367 unsigned long flags; 1368 1369 local_irq_save(flags); 1370 sd = &__get_cpu_var(softnet_data); 1371 q->next_sched = sd->output_queue; 1372 sd->output_queue = q; 1373 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1374 local_irq_restore(flags); 1375 } 1376 1377 void __netif_schedule(struct Qdisc *q) 1378 { 1379 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) 1380 __netif_reschedule(q); 1381 } 1382 EXPORT_SYMBOL(__netif_schedule); 1383 1384 void dev_kfree_skb_irq(struct sk_buff *skb) 1385 { 1386 if (atomic_dec_and_test(&skb->users)) { 1387 struct softnet_data *sd; 1388 unsigned long flags; 1389 1390 local_irq_save(flags); 1391 sd = &__get_cpu_var(softnet_data); 1392 skb->next = sd->completion_queue; 1393 sd->completion_queue = skb; 1394 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1395 local_irq_restore(flags); 1396 } 1397 } 1398 EXPORT_SYMBOL(dev_kfree_skb_irq); 1399 1400 void dev_kfree_skb_any(struct sk_buff *skb) 1401 { 1402 if (in_irq() || irqs_disabled()) 1403 dev_kfree_skb_irq(skb); 1404 else 1405 dev_kfree_skb(skb); 1406 } 1407 EXPORT_SYMBOL(dev_kfree_skb_any); 1408 1409 1410 /** 1411 * netif_device_detach - mark device as removed 1412 * @dev: network device 1413 * 1414 * Mark device as removed from system and therefore no longer available. 1415 */ 1416 void netif_device_detach(struct net_device *dev) 1417 { 1418 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && 1419 netif_running(dev)) { 1420 netif_stop_queue(dev); 1421 } 1422 } 1423 EXPORT_SYMBOL(netif_device_detach); 1424 1425 /** 1426 * netif_device_attach - mark device as attached 1427 * @dev: network device 1428 * 1429 * Mark device as attached from system and restart if needed. 1430 */ 1431 void netif_device_attach(struct net_device *dev) 1432 { 1433 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && 1434 netif_running(dev)) { 1435 netif_wake_queue(dev); 1436 __netdev_watchdog_up(dev); 1437 } 1438 } 1439 EXPORT_SYMBOL(netif_device_attach); 1440 1441 static bool can_checksum_protocol(unsigned long features, __be16 protocol) 1442 { 1443 return ((features & NETIF_F_GEN_CSUM) || 1444 ((features & NETIF_F_IP_CSUM) && 1445 protocol == htons(ETH_P_IP)) || 1446 ((features & NETIF_F_IPV6_CSUM) && 1447 protocol == htons(ETH_P_IPV6))); 1448 } 1449 1450 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) 1451 { 1452 if (can_checksum_protocol(dev->features, skb->protocol)) 1453 return true; 1454 1455 if (skb->protocol == htons(ETH_P_8021Q)) { 1456 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 1457 if (can_checksum_protocol(dev->features & dev->vlan_features, 1458 veh->h_vlan_encapsulated_proto)) 1459 return true; 1460 } 1461 1462 return false; 1463 } 1464 1465 /* 1466 * Invalidate hardware checksum when packet is to be mangled, and 1467 * complete checksum manually on outgoing path. 1468 */ 1469 int skb_checksum_help(struct sk_buff *skb) 1470 { 1471 __wsum csum; 1472 int ret = 0, offset; 1473 1474 if (skb->ip_summed == CHECKSUM_COMPLETE) 1475 goto out_set_summed; 1476 1477 if (unlikely(skb_shinfo(skb)->gso_size)) { 1478 /* Let GSO fix up the checksum. */ 1479 goto out_set_summed; 1480 } 1481 1482 offset = skb->csum_start - skb_headroom(skb); 1483 BUG_ON(offset >= skb_headlen(skb)); 1484 csum = skb_checksum(skb, offset, skb->len - offset, 0); 1485 1486 offset += skb->csum_offset; 1487 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); 1488 1489 if (skb_cloned(skb) && 1490 !skb_clone_writable(skb, offset + sizeof(__sum16))) { 1491 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 1492 if (ret) 1493 goto out; 1494 } 1495 1496 *(__sum16 *)(skb->data + offset) = csum_fold(csum); 1497 out_set_summed: 1498 skb->ip_summed = CHECKSUM_NONE; 1499 out: 1500 return ret; 1501 } 1502 1503 /** 1504 * skb_gso_segment - Perform segmentation on skb. 1505 * @skb: buffer to segment 1506 * @features: features for the output path (see dev->features) 1507 * 1508 * This function segments the given skb and returns a list of segments. 1509 * 1510 * It may return NULL if the skb requires no segmentation. This is 1511 * only possible when GSO is used for verifying header integrity. 1512 */ 1513 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) 1514 { 1515 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 1516 struct packet_type *ptype; 1517 __be16 type = skb->protocol; 1518 int err; 1519 1520 skb_reset_mac_header(skb); 1521 skb->mac_len = skb->network_header - skb->mac_header; 1522 __skb_pull(skb, skb->mac_len); 1523 1524 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { 1525 if (skb_header_cloned(skb) && 1526 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 1527 return ERR_PTR(err); 1528 } 1529 1530 rcu_read_lock(); 1531 list_for_each_entry_rcu(ptype, 1532 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 1533 if (ptype->type == type && !ptype->dev && ptype->gso_segment) { 1534 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 1535 err = ptype->gso_send_check(skb); 1536 segs = ERR_PTR(err); 1537 if (err || skb_gso_ok(skb, features)) 1538 break; 1539 __skb_push(skb, (skb->data - 1540 skb_network_header(skb))); 1541 } 1542 segs = ptype->gso_segment(skb, features); 1543 break; 1544 } 1545 } 1546 rcu_read_unlock(); 1547 1548 __skb_push(skb, skb->data - skb_mac_header(skb)); 1549 1550 return segs; 1551 } 1552 1553 EXPORT_SYMBOL(skb_gso_segment); 1554 1555 /* Take action when hardware reception checksum errors are detected. */ 1556 #ifdef CONFIG_BUG 1557 void netdev_rx_csum_fault(struct net_device *dev) 1558 { 1559 if (net_ratelimit()) { 1560 printk(KERN_ERR "%s: hw csum failure.\n", 1561 dev ? dev->name : "<unknown>"); 1562 dump_stack(); 1563 } 1564 } 1565 EXPORT_SYMBOL(netdev_rx_csum_fault); 1566 #endif 1567 1568 /* Actually, we should eliminate this check as soon as we know, that: 1569 * 1. IOMMU is present and allows to map all the memory. 1570 * 2. No high memory really exists on this machine. 1571 */ 1572 1573 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 1574 { 1575 #ifdef CONFIG_HIGHMEM 1576 int i; 1577 1578 if (dev->features & NETIF_F_HIGHDMA) 1579 return 0; 1580 1581 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1582 if (PageHighMem(skb_shinfo(skb)->frags[i].page)) 1583 return 1; 1584 1585 #endif 1586 return 0; 1587 } 1588 1589 struct dev_gso_cb { 1590 void (*destructor)(struct sk_buff *skb); 1591 }; 1592 1593 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) 1594 1595 static void dev_gso_skb_destructor(struct sk_buff *skb) 1596 { 1597 struct dev_gso_cb *cb; 1598 1599 do { 1600 struct sk_buff *nskb = skb->next; 1601 1602 skb->next = nskb->next; 1603 nskb->next = NULL; 1604 kfree_skb(nskb); 1605 } while (skb->next); 1606 1607 cb = DEV_GSO_CB(skb); 1608 if (cb->destructor) 1609 cb->destructor(skb); 1610 } 1611 1612 /** 1613 * dev_gso_segment - Perform emulated hardware segmentation on skb. 1614 * @skb: buffer to segment 1615 * 1616 * This function segments the given skb and stores the list of segments 1617 * in skb->next. 1618 */ 1619 static int dev_gso_segment(struct sk_buff *skb) 1620 { 1621 struct net_device *dev = skb->dev; 1622 struct sk_buff *segs; 1623 int features = dev->features & ~(illegal_highdma(dev, skb) ? 1624 NETIF_F_SG : 0); 1625 1626 segs = skb_gso_segment(skb, features); 1627 1628 /* Verifying header integrity only. */ 1629 if (!segs) 1630 return 0; 1631 1632 if (IS_ERR(segs)) 1633 return PTR_ERR(segs); 1634 1635 skb->next = segs; 1636 DEV_GSO_CB(skb)->destructor = skb->destructor; 1637 skb->destructor = dev_gso_skb_destructor; 1638 1639 return 0; 1640 } 1641 1642 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 1643 struct netdev_queue *txq) 1644 { 1645 const struct net_device_ops *ops = dev->netdev_ops; 1646 1647 prefetch(&dev->netdev_ops->ndo_start_xmit); 1648 if (likely(!skb->next)) { 1649 if (!list_empty(&ptype_all)) 1650 dev_queue_xmit_nit(skb, dev); 1651 1652 if (netif_needs_gso(dev, skb)) { 1653 if (unlikely(dev_gso_segment(skb))) 1654 goto out_kfree_skb; 1655 if (skb->next) 1656 goto gso; 1657 } 1658 1659 return ops->ndo_start_xmit(skb, dev); 1660 } 1661 1662 gso: 1663 do { 1664 struct sk_buff *nskb = skb->next; 1665 int rc; 1666 1667 skb->next = nskb->next; 1668 nskb->next = NULL; 1669 rc = ops->ndo_start_xmit(nskb, dev); 1670 if (unlikely(rc)) { 1671 nskb->next = skb->next; 1672 skb->next = nskb; 1673 return rc; 1674 } 1675 if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) 1676 return NETDEV_TX_BUSY; 1677 } while (skb->next); 1678 1679 skb->destructor = DEV_GSO_CB(skb)->destructor; 1680 1681 out_kfree_skb: 1682 kfree_skb(skb); 1683 return 0; 1684 } 1685 1686 static u32 simple_tx_hashrnd; 1687 static int simple_tx_hashrnd_initialized = 0; 1688 1689 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) 1690 { 1691 u32 addr1, addr2, ports; 1692 u32 hash, ihl; 1693 u8 ip_proto = 0; 1694 1695 if (unlikely(!simple_tx_hashrnd_initialized)) { 1696 get_random_bytes(&simple_tx_hashrnd, 4); 1697 simple_tx_hashrnd_initialized = 1; 1698 } 1699 1700 switch (skb->protocol) { 1701 case htons(ETH_P_IP): 1702 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) 1703 ip_proto = ip_hdr(skb)->protocol; 1704 addr1 = ip_hdr(skb)->saddr; 1705 addr2 = ip_hdr(skb)->daddr; 1706 ihl = ip_hdr(skb)->ihl; 1707 break; 1708 case htons(ETH_P_IPV6): 1709 ip_proto = ipv6_hdr(skb)->nexthdr; 1710 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; 1711 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; 1712 ihl = (40 >> 2); 1713 break; 1714 default: 1715 return 0; 1716 } 1717 1718 1719 switch (ip_proto) { 1720 case IPPROTO_TCP: 1721 case IPPROTO_UDP: 1722 case IPPROTO_DCCP: 1723 case IPPROTO_ESP: 1724 case IPPROTO_AH: 1725 case IPPROTO_SCTP: 1726 case IPPROTO_UDPLITE: 1727 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); 1728 break; 1729 1730 default: 1731 ports = 0; 1732 break; 1733 } 1734 1735 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); 1736 1737 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); 1738 } 1739 1740 static struct netdev_queue *dev_pick_tx(struct net_device *dev, 1741 struct sk_buff *skb) 1742 { 1743 const struct net_device_ops *ops = dev->netdev_ops; 1744 u16 queue_index = 0; 1745 1746 if (ops->ndo_select_queue) 1747 queue_index = ops->ndo_select_queue(dev, skb); 1748 else if (dev->real_num_tx_queues > 1) 1749 queue_index = simple_tx_hash(dev, skb); 1750 1751 skb_set_queue_mapping(skb, queue_index); 1752 return netdev_get_tx_queue(dev, queue_index); 1753 } 1754 1755 /** 1756 * dev_queue_xmit - transmit a buffer 1757 * @skb: buffer to transmit 1758 * 1759 * Queue a buffer for transmission to a network device. The caller must 1760 * have set the device and priority and built the buffer before calling 1761 * this function. The function can be called from an interrupt. 1762 * 1763 * A negative errno code is returned on a failure. A success does not 1764 * guarantee the frame will be transmitted as it may be dropped due 1765 * to congestion or traffic shaping. 1766 * 1767 * ----------------------------------------------------------------------------------- 1768 * I notice this method can also return errors from the queue disciplines, 1769 * including NET_XMIT_DROP, which is a positive value. So, errors can also 1770 * be positive. 1771 * 1772 * Regardless of the return value, the skb is consumed, so it is currently 1773 * difficult to retry a send to this method. (You can bump the ref count 1774 * before sending to hold a reference for retry if you are careful.) 1775 * 1776 * When calling this method, interrupts MUST be enabled. This is because 1777 * the BH enable code must have IRQs enabled so that it will not deadlock. 1778 * --BLG 1779 */ 1780 int dev_queue_xmit(struct sk_buff *skb) 1781 { 1782 struct net_device *dev = skb->dev; 1783 struct netdev_queue *txq; 1784 struct Qdisc *q; 1785 int rc = -ENOMEM; 1786 1787 /* GSO will handle the following emulations directly. */ 1788 if (netif_needs_gso(dev, skb)) 1789 goto gso; 1790 1791 if (skb_shinfo(skb)->frag_list && 1792 !(dev->features & NETIF_F_FRAGLIST) && 1793 __skb_linearize(skb)) 1794 goto out_kfree_skb; 1795 1796 /* Fragmented skb is linearized if device does not support SG, 1797 * or if at least one of fragments is in highmem and device 1798 * does not support DMA from it. 1799 */ 1800 if (skb_shinfo(skb)->nr_frags && 1801 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && 1802 __skb_linearize(skb)) 1803 goto out_kfree_skb; 1804 1805 /* If packet is not checksummed and device does not support 1806 * checksumming for this protocol, complete checksumming here. 1807 */ 1808 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1809 skb_set_transport_header(skb, skb->csum_start - 1810 skb_headroom(skb)); 1811 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) 1812 goto out_kfree_skb; 1813 } 1814 1815 gso: 1816 /* Disable soft irqs for various locks below. Also 1817 * stops preemption for RCU. 1818 */ 1819 rcu_read_lock_bh(); 1820 1821 txq = dev_pick_tx(dev, skb); 1822 q = rcu_dereference(txq->qdisc); 1823 1824 #ifdef CONFIG_NET_CLS_ACT 1825 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); 1826 #endif 1827 if (q->enqueue) { 1828 spinlock_t *root_lock = qdisc_lock(q); 1829 1830 spin_lock(root_lock); 1831 1832 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { 1833 kfree_skb(skb); 1834 rc = NET_XMIT_DROP; 1835 } else { 1836 rc = qdisc_enqueue_root(skb, q); 1837 qdisc_run(q); 1838 } 1839 spin_unlock(root_lock); 1840 1841 goto out; 1842 } 1843 1844 /* The device has no queue. Common case for software devices: 1845 loopback, all the sorts of tunnels... 1846 1847 Really, it is unlikely that netif_tx_lock protection is necessary 1848 here. (f.e. loopback and IP tunnels are clean ignoring statistics 1849 counters.) 1850 However, it is possible, that they rely on protection 1851 made by us here. 1852 1853 Check this and shot the lock. It is not prone from deadlocks. 1854 Either shot noqueue qdisc, it is even simpler 8) 1855 */ 1856 if (dev->flags & IFF_UP) { 1857 int cpu = smp_processor_id(); /* ok because BHs are off */ 1858 1859 if (txq->xmit_lock_owner != cpu) { 1860 1861 HARD_TX_LOCK(dev, txq, cpu); 1862 1863 if (!netif_tx_queue_stopped(txq)) { 1864 rc = 0; 1865 if (!dev_hard_start_xmit(skb, dev, txq)) { 1866 HARD_TX_UNLOCK(dev, txq); 1867 goto out; 1868 } 1869 } 1870 HARD_TX_UNLOCK(dev, txq); 1871 if (net_ratelimit()) 1872 printk(KERN_CRIT "Virtual device %s asks to " 1873 "queue packet!\n", dev->name); 1874 } else { 1875 /* Recursion is detected! It is possible, 1876 * unfortunately */ 1877 if (net_ratelimit()) 1878 printk(KERN_CRIT "Dead loop on virtual device " 1879 "%s, fix it urgently!\n", dev->name); 1880 } 1881 } 1882 1883 rc = -ENETDOWN; 1884 rcu_read_unlock_bh(); 1885 1886 out_kfree_skb: 1887 kfree_skb(skb); 1888 return rc; 1889 out: 1890 rcu_read_unlock_bh(); 1891 return rc; 1892 } 1893 1894 1895 /*======================================================================= 1896 Receiver routines 1897 =======================================================================*/ 1898 1899 int netdev_max_backlog __read_mostly = 1000; 1900 int netdev_budget __read_mostly = 300; 1901 int weight_p __read_mostly = 64; /* old backlog weight */ 1902 1903 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 1904 1905 1906 /** 1907 * netif_rx - post buffer to the network code 1908 * @skb: buffer to post 1909 * 1910 * This function receives a packet from a device driver and queues it for 1911 * the upper (protocol) levels to process. It always succeeds. The buffer 1912 * may be dropped during processing for congestion control or by the 1913 * protocol layers. 1914 * 1915 * return values: 1916 * NET_RX_SUCCESS (no congestion) 1917 * NET_RX_DROP (packet was dropped) 1918 * 1919 */ 1920 1921 int netif_rx(struct sk_buff *skb) 1922 { 1923 struct softnet_data *queue; 1924 unsigned long flags; 1925 1926 /* if netpoll wants it, pretend we never saw it */ 1927 if (netpoll_rx(skb)) 1928 return NET_RX_DROP; 1929 1930 if (!skb->tstamp.tv64) 1931 net_timestamp(skb); 1932 1933 /* 1934 * The code is rearranged so that the path is the most 1935 * short when CPU is congested, but is still operating. 1936 */ 1937 local_irq_save(flags); 1938 queue = &__get_cpu_var(softnet_data); 1939 1940 __get_cpu_var(netdev_rx_stat).total++; 1941 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { 1942 if (queue->input_pkt_queue.qlen) { 1943 enqueue: 1944 __skb_queue_tail(&queue->input_pkt_queue, skb); 1945 local_irq_restore(flags); 1946 return NET_RX_SUCCESS; 1947 } 1948 1949 napi_schedule(&queue->backlog); 1950 goto enqueue; 1951 } 1952 1953 __get_cpu_var(netdev_rx_stat).dropped++; 1954 local_irq_restore(flags); 1955 1956 kfree_skb(skb); 1957 return NET_RX_DROP; 1958 } 1959 1960 int netif_rx_ni(struct sk_buff *skb) 1961 { 1962 int err; 1963 1964 preempt_disable(); 1965 err = netif_rx(skb); 1966 if (local_softirq_pending()) 1967 do_softirq(); 1968 preempt_enable(); 1969 1970 return err; 1971 } 1972 1973 EXPORT_SYMBOL(netif_rx_ni); 1974 1975 static void net_tx_action(struct softirq_action *h) 1976 { 1977 struct softnet_data *sd = &__get_cpu_var(softnet_data); 1978 1979 if (sd->completion_queue) { 1980 struct sk_buff *clist; 1981 1982 local_irq_disable(); 1983 clist = sd->completion_queue; 1984 sd->completion_queue = NULL; 1985 local_irq_enable(); 1986 1987 while (clist) { 1988 struct sk_buff *skb = clist; 1989 clist = clist->next; 1990 1991 WARN_ON(atomic_read(&skb->users)); 1992 __kfree_skb(skb); 1993 } 1994 } 1995 1996 if (sd->output_queue) { 1997 struct Qdisc *head; 1998 1999 local_irq_disable(); 2000 head = sd->output_queue; 2001 sd->output_queue = NULL; 2002 local_irq_enable(); 2003 2004 while (head) { 2005 struct Qdisc *q = head; 2006 spinlock_t *root_lock; 2007 2008 head = head->next_sched; 2009 2010 root_lock = qdisc_lock(q); 2011 if (spin_trylock(root_lock)) { 2012 smp_mb__before_clear_bit(); 2013 clear_bit(__QDISC_STATE_SCHED, 2014 &q->state); 2015 qdisc_run(q); 2016 spin_unlock(root_lock); 2017 } else { 2018 if (!test_bit(__QDISC_STATE_DEACTIVATED, 2019 &q->state)) { 2020 __netif_reschedule(q); 2021 } else { 2022 smp_mb__before_clear_bit(); 2023 clear_bit(__QDISC_STATE_SCHED, 2024 &q->state); 2025 } 2026 } 2027 } 2028 } 2029 } 2030 2031 static inline int deliver_skb(struct sk_buff *skb, 2032 struct packet_type *pt_prev, 2033 struct net_device *orig_dev) 2034 { 2035 atomic_inc(&skb->users); 2036 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 2037 } 2038 2039 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) 2040 /* These hooks defined here for ATM */ 2041 struct net_bridge; 2042 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, 2043 unsigned char *addr); 2044 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly; 2045 2046 /* 2047 * If bridge module is loaded call bridging hook. 2048 * returns NULL if packet was consumed. 2049 */ 2050 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, 2051 struct sk_buff *skb) __read_mostly; 2052 static inline struct sk_buff *handle_bridge(struct sk_buff *skb, 2053 struct packet_type **pt_prev, int *ret, 2054 struct net_device *orig_dev) 2055 { 2056 struct net_bridge_port *port; 2057 2058 if (skb->pkt_type == PACKET_LOOPBACK || 2059 (port = rcu_dereference(skb->dev->br_port)) == NULL) 2060 return skb; 2061 2062 if (*pt_prev) { 2063 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2064 *pt_prev = NULL; 2065 } 2066 2067 return br_handle_frame_hook(port, skb); 2068 } 2069 #else 2070 #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) 2071 #endif 2072 2073 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) 2074 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; 2075 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); 2076 2077 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, 2078 struct packet_type **pt_prev, 2079 int *ret, 2080 struct net_device *orig_dev) 2081 { 2082 if (skb->dev->macvlan_port == NULL) 2083 return skb; 2084 2085 if (*pt_prev) { 2086 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2087 *pt_prev = NULL; 2088 } 2089 return macvlan_handle_frame_hook(skb); 2090 } 2091 #else 2092 #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) 2093 #endif 2094 2095 #ifdef CONFIG_NET_CLS_ACT 2096 /* TODO: Maybe we should just force sch_ingress to be compiled in 2097 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 2098 * a compare and 2 stores extra right now if we dont have it on 2099 * but have CONFIG_NET_CLS_ACT 2100 * NOTE: This doesnt stop any functionality; if you dont have 2101 * the ingress scheduler, you just cant add policies on ingress. 2102 * 2103 */ 2104 static int ing_filter(struct sk_buff *skb) 2105 { 2106 struct net_device *dev = skb->dev; 2107 u32 ttl = G_TC_RTTL(skb->tc_verd); 2108 struct netdev_queue *rxq; 2109 int result = TC_ACT_OK; 2110 struct Qdisc *q; 2111 2112 if (MAX_RED_LOOP < ttl++) { 2113 printk(KERN_WARNING 2114 "Redir loop detected Dropping packet (%d->%d)\n", 2115 skb->iif, dev->ifindex); 2116 return TC_ACT_SHOT; 2117 } 2118 2119 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); 2120 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); 2121 2122 rxq = &dev->rx_queue; 2123 2124 q = rxq->qdisc; 2125 if (q != &noop_qdisc) { 2126 spin_lock(qdisc_lock(q)); 2127 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) 2128 result = qdisc_enqueue_root(skb, q); 2129 spin_unlock(qdisc_lock(q)); 2130 } 2131 2132 return result; 2133 } 2134 2135 static inline struct sk_buff *handle_ing(struct sk_buff *skb, 2136 struct packet_type **pt_prev, 2137 int *ret, struct net_device *orig_dev) 2138 { 2139 if (skb->dev->rx_queue.qdisc == &noop_qdisc) 2140 goto out; 2141 2142 if (*pt_prev) { 2143 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2144 *pt_prev = NULL; 2145 } else { 2146 /* Huh? Why does turning on AF_PACKET affect this? */ 2147 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); 2148 } 2149 2150 switch (ing_filter(skb)) { 2151 case TC_ACT_SHOT: 2152 case TC_ACT_STOLEN: 2153 kfree_skb(skb); 2154 return NULL; 2155 } 2156 2157 out: 2158 skb->tc_verd = 0; 2159 return skb; 2160 } 2161 #endif 2162 2163 /* 2164 * netif_nit_deliver - deliver received packets to network taps 2165 * @skb: buffer 2166 * 2167 * This function is used to deliver incoming packets to network 2168 * taps. It should be used when the normal netif_receive_skb path 2169 * is bypassed, for example because of VLAN acceleration. 2170 */ 2171 void netif_nit_deliver(struct sk_buff *skb) 2172 { 2173 struct packet_type *ptype; 2174 2175 if (list_empty(&ptype_all)) 2176 return; 2177 2178 skb_reset_network_header(skb); 2179 skb_reset_transport_header(skb); 2180 skb->mac_len = skb->network_header - skb->mac_header; 2181 2182 rcu_read_lock(); 2183 list_for_each_entry_rcu(ptype, &ptype_all, list) { 2184 if (!ptype->dev || ptype->dev == skb->dev) 2185 deliver_skb(skb, ptype, skb->dev); 2186 } 2187 rcu_read_unlock(); 2188 } 2189 2190 /** 2191 * netif_receive_skb - process receive buffer from network 2192 * @skb: buffer to process 2193 * 2194 * netif_receive_skb() is the main receive data processing function. 2195 * It always succeeds. The buffer may be dropped during processing 2196 * for congestion control or by the protocol layers. 2197 * 2198 * This function may only be called from softirq context and interrupts 2199 * should be enabled. 2200 * 2201 * Return values (usually ignored): 2202 * NET_RX_SUCCESS: no congestion 2203 * NET_RX_DROP: packet was dropped 2204 */ 2205 int netif_receive_skb(struct sk_buff *skb) 2206 { 2207 struct packet_type *ptype, *pt_prev; 2208 struct net_device *orig_dev; 2209 struct net_device *null_or_orig; 2210 int ret = NET_RX_DROP; 2211 __be16 type; 2212 2213 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) 2214 return NET_RX_SUCCESS; 2215 2216 /* if we've gotten here through NAPI, check netpoll */ 2217 if (netpoll_receive_skb(skb)) 2218 return NET_RX_DROP; 2219 2220 if (!skb->tstamp.tv64) 2221 net_timestamp(skb); 2222 2223 if (!skb->iif) 2224 skb->iif = skb->dev->ifindex; 2225 2226 null_or_orig = NULL; 2227 orig_dev = skb->dev; 2228 if (orig_dev->master) { 2229 if (skb_bond_should_drop(skb)) 2230 null_or_orig = orig_dev; /* deliver only exact match */ 2231 else 2232 skb->dev = orig_dev->master; 2233 } 2234 2235 __get_cpu_var(netdev_rx_stat).total++; 2236 2237 skb_reset_network_header(skb); 2238 skb_reset_transport_header(skb); 2239 skb->mac_len = skb->network_header - skb->mac_header; 2240 2241 pt_prev = NULL; 2242 2243 rcu_read_lock(); 2244 2245 /* Don't receive packets in an exiting network namespace */ 2246 if (!net_alive(dev_net(skb->dev))) { 2247 kfree_skb(skb); 2248 goto out; 2249 } 2250 2251 #ifdef CONFIG_NET_CLS_ACT 2252 if (skb->tc_verd & TC_NCLS) { 2253 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 2254 goto ncls; 2255 } 2256 #endif 2257 2258 list_for_each_entry_rcu(ptype, &ptype_all, list) { 2259 if (ptype->dev == null_or_orig || ptype->dev == skb->dev || 2260 ptype->dev == orig_dev) { 2261 if (pt_prev) 2262 ret = deliver_skb(skb, pt_prev, orig_dev); 2263 pt_prev = ptype; 2264 } 2265 } 2266 2267 #ifdef CONFIG_NET_CLS_ACT 2268 skb = handle_ing(skb, &pt_prev, &ret, orig_dev); 2269 if (!skb) 2270 goto out; 2271 ncls: 2272 #endif 2273 2274 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); 2275 if (!skb) 2276 goto out; 2277 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); 2278 if (!skb) 2279 goto out; 2280 2281 type = skb->protocol; 2282 list_for_each_entry_rcu(ptype, 2283 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2284 if (ptype->type == type && 2285 (ptype->dev == null_or_orig || ptype->dev == skb->dev || 2286 ptype->dev == orig_dev)) { 2287 if (pt_prev) 2288 ret = deliver_skb(skb, pt_prev, orig_dev); 2289 pt_prev = ptype; 2290 } 2291 } 2292 2293 if (pt_prev) { 2294 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 2295 } else { 2296 kfree_skb(skb); 2297 /* Jamal, now you will not able to escape explaining 2298 * me how you were going to use this. :-) 2299 */ 2300 ret = NET_RX_DROP; 2301 } 2302 2303 out: 2304 rcu_read_unlock(); 2305 return ret; 2306 } 2307 2308 /* Network device is going away, flush any packets still pending */ 2309 static void flush_backlog(void *arg) 2310 { 2311 struct net_device *dev = arg; 2312 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2313 struct sk_buff *skb, *tmp; 2314 2315 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) 2316 if (skb->dev == dev) { 2317 __skb_unlink(skb, &queue->input_pkt_queue); 2318 kfree_skb(skb); 2319 } 2320 } 2321 2322 static int napi_gro_complete(struct sk_buff *skb) 2323 { 2324 struct packet_type *ptype; 2325 __be16 type = skb->protocol; 2326 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2327 int err = -ENOENT; 2328 2329 if (!skb_shinfo(skb)->frag_list) 2330 goto out; 2331 2332 rcu_read_lock(); 2333 list_for_each_entry_rcu(ptype, head, list) { 2334 if (ptype->type != type || ptype->dev || !ptype->gro_complete) 2335 continue; 2336 2337 err = ptype->gro_complete(skb); 2338 break; 2339 } 2340 rcu_read_unlock(); 2341 2342 if (err) { 2343 WARN_ON(&ptype->list == head); 2344 kfree_skb(skb); 2345 return NET_RX_SUCCESS; 2346 } 2347 2348 out: 2349 __skb_push(skb, -skb_network_offset(skb)); 2350 return netif_receive_skb(skb); 2351 } 2352 2353 void napi_gro_flush(struct napi_struct *napi) 2354 { 2355 struct sk_buff *skb, *next; 2356 2357 for (skb = napi->gro_list; skb; skb = next) { 2358 next = skb->next; 2359 skb->next = NULL; 2360 napi_gro_complete(skb); 2361 } 2362 2363 napi->gro_list = NULL; 2364 } 2365 EXPORT_SYMBOL(napi_gro_flush); 2366 2367 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2368 { 2369 struct sk_buff **pp = NULL; 2370 struct packet_type *ptype; 2371 __be16 type = skb->protocol; 2372 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2373 int count = 0; 2374 int same_flow; 2375 int mac_len; 2376 2377 if (!(skb->dev->features & NETIF_F_GRO)) 2378 goto normal; 2379 2380 rcu_read_lock(); 2381 list_for_each_entry_rcu(ptype, head, list) { 2382 struct sk_buff *p; 2383 2384 if (ptype->type != type || ptype->dev || !ptype->gro_receive) 2385 continue; 2386 2387 skb_reset_network_header(skb); 2388 mac_len = skb->network_header - skb->mac_header; 2389 skb->mac_len = mac_len; 2390 NAPI_GRO_CB(skb)->same_flow = 0; 2391 NAPI_GRO_CB(skb)->flush = 0; 2392 2393 for (p = napi->gro_list; p; p = p->next) { 2394 count++; 2395 NAPI_GRO_CB(p)->same_flow = 2396 p->mac_len == mac_len && 2397 !memcmp(skb_mac_header(p), skb_mac_header(skb), 2398 mac_len); 2399 NAPI_GRO_CB(p)->flush = 0; 2400 } 2401 2402 pp = ptype->gro_receive(&napi->gro_list, skb); 2403 break; 2404 } 2405 rcu_read_unlock(); 2406 2407 if (&ptype->list == head) 2408 goto normal; 2409 2410 same_flow = NAPI_GRO_CB(skb)->same_flow; 2411 2412 if (pp) { 2413 struct sk_buff *nskb = *pp; 2414 2415 *pp = nskb->next; 2416 nskb->next = NULL; 2417 napi_gro_complete(nskb); 2418 count--; 2419 } 2420 2421 if (same_flow) 2422 goto ok; 2423 2424 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { 2425 __skb_push(skb, -skb_network_offset(skb)); 2426 goto normal; 2427 } 2428 2429 NAPI_GRO_CB(skb)->count = 1; 2430 skb->next = napi->gro_list; 2431 napi->gro_list = skb; 2432 2433 ok: 2434 return NET_RX_SUCCESS; 2435 2436 normal: 2437 return netif_receive_skb(skb); 2438 } 2439 EXPORT_SYMBOL(napi_gro_receive); 2440 2441 static int process_backlog(struct napi_struct *napi, int quota) 2442 { 2443 int work = 0; 2444 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2445 unsigned long start_time = jiffies; 2446 2447 napi->weight = weight_p; 2448 do { 2449 struct sk_buff *skb; 2450 2451 local_irq_disable(); 2452 skb = __skb_dequeue(&queue->input_pkt_queue); 2453 if (!skb) { 2454 __napi_complete(napi); 2455 local_irq_enable(); 2456 break; 2457 } 2458 local_irq_enable(); 2459 2460 napi_gro_receive(napi, skb); 2461 } while (++work < quota && jiffies == start_time); 2462 2463 napi_gro_flush(napi); 2464 2465 return work; 2466 } 2467 2468 /** 2469 * __napi_schedule - schedule for receive 2470 * @n: entry to schedule 2471 * 2472 * The entry's receive function will be scheduled to run 2473 */ 2474 void __napi_schedule(struct napi_struct *n) 2475 { 2476 unsigned long flags; 2477 2478 local_irq_save(flags); 2479 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); 2480 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2481 local_irq_restore(flags); 2482 } 2483 EXPORT_SYMBOL(__napi_schedule); 2484 2485 void __napi_complete(struct napi_struct *n) 2486 { 2487 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); 2488 BUG_ON(n->gro_list); 2489 2490 list_del(&n->poll_list); 2491 smp_mb__before_clear_bit(); 2492 clear_bit(NAPI_STATE_SCHED, &n->state); 2493 } 2494 EXPORT_SYMBOL(__napi_complete); 2495 2496 void napi_complete(struct napi_struct *n) 2497 { 2498 unsigned long flags; 2499 2500 /* 2501 * don't let napi dequeue from the cpu poll list 2502 * just in case its running on a different cpu 2503 */ 2504 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) 2505 return; 2506 2507 napi_gro_flush(n); 2508 local_irq_save(flags); 2509 __napi_complete(n); 2510 local_irq_restore(flags); 2511 } 2512 EXPORT_SYMBOL(napi_complete); 2513 2514 void netif_napi_add(struct net_device *dev, struct napi_struct *napi, 2515 int (*poll)(struct napi_struct *, int), int weight) 2516 { 2517 INIT_LIST_HEAD(&napi->poll_list); 2518 napi->gro_list = NULL; 2519 napi->poll = poll; 2520 napi->weight = weight; 2521 list_add(&napi->dev_list, &dev->napi_list); 2522 #ifdef CONFIG_NETPOLL 2523 napi->dev = dev; 2524 spin_lock_init(&napi->poll_lock); 2525 napi->poll_owner = -1; 2526 #endif 2527 set_bit(NAPI_STATE_SCHED, &napi->state); 2528 } 2529 EXPORT_SYMBOL(netif_napi_add); 2530 2531 void netif_napi_del(struct napi_struct *napi) 2532 { 2533 struct sk_buff *skb, *next; 2534 2535 list_del_init(&napi->dev_list); 2536 2537 for (skb = napi->gro_list; skb; skb = next) { 2538 next = skb->next; 2539 skb->next = NULL; 2540 kfree_skb(skb); 2541 } 2542 2543 napi->gro_list = NULL; 2544 } 2545 EXPORT_SYMBOL(netif_napi_del); 2546 2547 2548 static void net_rx_action(struct softirq_action *h) 2549 { 2550 struct list_head *list = &__get_cpu_var(softnet_data).poll_list; 2551 unsigned long time_limit = jiffies + 2; 2552 int budget = netdev_budget; 2553 void *have; 2554 2555 local_irq_disable(); 2556 2557 while (!list_empty(list)) { 2558 struct napi_struct *n; 2559 int work, weight; 2560 2561 /* If softirq window is exhuasted then punt. 2562 * Allow this to run for 2 jiffies since which will allow 2563 * an average latency of 1.5/HZ. 2564 */ 2565 if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) 2566 goto softnet_break; 2567 2568 local_irq_enable(); 2569 2570 /* Even though interrupts have been re-enabled, this 2571 * access is safe because interrupts can only add new 2572 * entries to the tail of this list, and only ->poll() 2573 * calls can remove this head entry from the list. 2574 */ 2575 n = list_entry(list->next, struct napi_struct, poll_list); 2576 2577 have = netpoll_poll_lock(n); 2578 2579 weight = n->weight; 2580 2581 /* This NAPI_STATE_SCHED test is for avoiding a race 2582 * with netpoll's poll_napi(). Only the entity which 2583 * obtains the lock and sees NAPI_STATE_SCHED set will 2584 * actually make the ->poll() call. Therefore we avoid 2585 * accidently calling ->poll() when NAPI is not scheduled. 2586 */ 2587 work = 0; 2588 if (test_bit(NAPI_STATE_SCHED, &n->state)) 2589 work = n->poll(n, weight); 2590 2591 WARN_ON_ONCE(work > weight); 2592 2593 budget -= work; 2594 2595 local_irq_disable(); 2596 2597 /* Drivers must not modify the NAPI state if they 2598 * consume the entire weight. In such cases this code 2599 * still "owns" the NAPI instance and therefore can 2600 * move the instance around on the list at-will. 2601 */ 2602 if (unlikely(work == weight)) { 2603 if (unlikely(napi_disable_pending(n))) 2604 __napi_complete(n); 2605 else 2606 list_move_tail(&n->poll_list, list); 2607 } 2608 2609 netpoll_poll_unlock(have); 2610 } 2611 out: 2612 local_irq_enable(); 2613 2614 #ifdef CONFIG_NET_DMA 2615 /* 2616 * There may not be any more sk_buffs coming right now, so push 2617 * any pending DMA copies to hardware 2618 */ 2619 dma_issue_pending_all(); 2620 #endif 2621 2622 return; 2623 2624 softnet_break: 2625 __get_cpu_var(netdev_rx_stat).time_squeeze++; 2626 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2627 goto out; 2628 } 2629 2630 static gifconf_func_t * gifconf_list [NPROTO]; 2631 2632 /** 2633 * register_gifconf - register a SIOCGIF handler 2634 * @family: Address family 2635 * @gifconf: Function handler 2636 * 2637 * Register protocol dependent address dumping routines. The handler 2638 * that is passed must not be freed or reused until it has been replaced 2639 * by another handler. 2640 */ 2641 int register_gifconf(unsigned int family, gifconf_func_t * gifconf) 2642 { 2643 if (family >= NPROTO) 2644 return -EINVAL; 2645 gifconf_list[family] = gifconf; 2646 return 0; 2647 } 2648 2649 2650 /* 2651 * Map an interface index to its name (SIOCGIFNAME) 2652 */ 2653 2654 /* 2655 * We need this ioctl for efficient implementation of the 2656 * if_indextoname() function required by the IPv6 API. Without 2657 * it, we would have to search all the interfaces to find a 2658 * match. --pb 2659 */ 2660 2661 static int dev_ifname(struct net *net, struct ifreq __user *arg) 2662 { 2663 struct net_device *dev; 2664 struct ifreq ifr; 2665 2666 /* 2667 * Fetch the caller's info block. 2668 */ 2669 2670 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 2671 return -EFAULT; 2672 2673 read_lock(&dev_base_lock); 2674 dev = __dev_get_by_index(net, ifr.ifr_ifindex); 2675 if (!dev) { 2676 read_unlock(&dev_base_lock); 2677 return -ENODEV; 2678 } 2679 2680 strcpy(ifr.ifr_name, dev->name); 2681 read_unlock(&dev_base_lock); 2682 2683 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 2684 return -EFAULT; 2685 return 0; 2686 } 2687 2688 /* 2689 * Perform a SIOCGIFCONF call. This structure will change 2690 * size eventually, and there is nothing I can do about it. 2691 * Thus we will need a 'compatibility mode'. 2692 */ 2693 2694 static int dev_ifconf(struct net *net, char __user *arg) 2695 { 2696 struct ifconf ifc; 2697 struct net_device *dev; 2698 char __user *pos; 2699 int len; 2700 int total; 2701 int i; 2702 2703 /* 2704 * Fetch the caller's info block. 2705 */ 2706 2707 if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) 2708 return -EFAULT; 2709 2710 pos = ifc.ifc_buf; 2711 len = ifc.ifc_len; 2712 2713 /* 2714 * Loop over the interfaces, and write an info block for each. 2715 */ 2716 2717 total = 0; 2718 for_each_netdev(net, dev) { 2719 for (i = 0; i < NPROTO; i++) { 2720 if (gifconf_list[i]) { 2721 int done; 2722 if (!pos) 2723 done = gifconf_list[i](dev, NULL, 0); 2724 else 2725 done = gifconf_list[i](dev, pos + total, 2726 len - total); 2727 if (done < 0) 2728 return -EFAULT; 2729 total += done; 2730 } 2731 } 2732 } 2733 2734 /* 2735 * All done. Write the updated control block back to the caller. 2736 */ 2737 ifc.ifc_len = total; 2738 2739 /* 2740 * Both BSD and Solaris return 0 here, so we do too. 2741 */ 2742 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; 2743 } 2744 2745 #ifdef CONFIG_PROC_FS 2746 /* 2747 * This is invoked by the /proc filesystem handler to display a device 2748 * in detail. 2749 */ 2750 void *dev_seq_start(struct seq_file *seq, loff_t *pos) 2751 __acquires(dev_base_lock) 2752 { 2753 struct net *net = seq_file_net(seq); 2754 loff_t off; 2755 struct net_device *dev; 2756 2757 read_lock(&dev_base_lock); 2758 if (!*pos) 2759 return SEQ_START_TOKEN; 2760 2761 off = 1; 2762 for_each_netdev(net, dev) 2763 if (off++ == *pos) 2764 return dev; 2765 2766 return NULL; 2767 } 2768 2769 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2770 { 2771 struct net *net = seq_file_net(seq); 2772 ++*pos; 2773 return v == SEQ_START_TOKEN ? 2774 first_net_device(net) : next_net_device((struct net_device *)v); 2775 } 2776 2777 void dev_seq_stop(struct seq_file *seq, void *v) 2778 __releases(dev_base_lock) 2779 { 2780 read_unlock(&dev_base_lock); 2781 } 2782 2783 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 2784 { 2785 const struct net_device_stats *stats = dev_get_stats(dev); 2786 2787 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 2788 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 2789 dev->name, stats->rx_bytes, stats->rx_packets, 2790 stats->rx_errors, 2791 stats->rx_dropped + stats->rx_missed_errors, 2792 stats->rx_fifo_errors, 2793 stats->rx_length_errors + stats->rx_over_errors + 2794 stats->rx_crc_errors + stats->rx_frame_errors, 2795 stats->rx_compressed, stats->multicast, 2796 stats->tx_bytes, stats->tx_packets, 2797 stats->tx_errors, stats->tx_dropped, 2798 stats->tx_fifo_errors, stats->collisions, 2799 stats->tx_carrier_errors + 2800 stats->tx_aborted_errors + 2801 stats->tx_window_errors + 2802 stats->tx_heartbeat_errors, 2803 stats->tx_compressed); 2804 } 2805 2806 /* 2807 * Called from the PROCfs module. This now uses the new arbitrary sized 2808 * /proc/net interface to create /proc/net/dev 2809 */ 2810 static int dev_seq_show(struct seq_file *seq, void *v) 2811 { 2812 if (v == SEQ_START_TOKEN) 2813 seq_puts(seq, "Inter-| Receive " 2814 " | Transmit\n" 2815 " face |bytes packets errs drop fifo frame " 2816 "compressed multicast|bytes packets errs " 2817 "drop fifo colls carrier compressed\n"); 2818 else 2819 dev_seq_printf_stats(seq, v); 2820 return 0; 2821 } 2822 2823 static struct netif_rx_stats *softnet_get_online(loff_t *pos) 2824 { 2825 struct netif_rx_stats *rc = NULL; 2826 2827 while (*pos < nr_cpu_ids) 2828 if (cpu_online(*pos)) { 2829 rc = &per_cpu(netdev_rx_stat, *pos); 2830 break; 2831 } else 2832 ++*pos; 2833 return rc; 2834 } 2835 2836 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) 2837 { 2838 return softnet_get_online(pos); 2839 } 2840 2841 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2842 { 2843 ++*pos; 2844 return softnet_get_online(pos); 2845 } 2846 2847 static void softnet_seq_stop(struct seq_file *seq, void *v) 2848 { 2849 } 2850 2851 static int softnet_seq_show(struct seq_file *seq, void *v) 2852 { 2853 struct netif_rx_stats *s = v; 2854 2855 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 2856 s->total, s->dropped, s->time_squeeze, 0, 2857 0, 0, 0, 0, /* was fastroute */ 2858 s->cpu_collision ); 2859 return 0; 2860 } 2861 2862 static const struct seq_operations dev_seq_ops = { 2863 .start = dev_seq_start, 2864 .next = dev_seq_next, 2865 .stop = dev_seq_stop, 2866 .show = dev_seq_show, 2867 }; 2868 2869 static int dev_seq_open(struct inode *inode, struct file *file) 2870 { 2871 return seq_open_net(inode, file, &dev_seq_ops, 2872 sizeof(struct seq_net_private)); 2873 } 2874 2875 static const struct file_operations dev_seq_fops = { 2876 .owner = THIS_MODULE, 2877 .open = dev_seq_open, 2878 .read = seq_read, 2879 .llseek = seq_lseek, 2880 .release = seq_release_net, 2881 }; 2882 2883 static const struct seq_operations softnet_seq_ops = { 2884 .start = softnet_seq_start, 2885 .next = softnet_seq_next, 2886 .stop = softnet_seq_stop, 2887 .show = softnet_seq_show, 2888 }; 2889 2890 static int softnet_seq_open(struct inode *inode, struct file *file) 2891 { 2892 return seq_open(file, &softnet_seq_ops); 2893 } 2894 2895 static const struct file_operations softnet_seq_fops = { 2896 .owner = THIS_MODULE, 2897 .open = softnet_seq_open, 2898 .read = seq_read, 2899 .llseek = seq_lseek, 2900 .release = seq_release, 2901 }; 2902 2903 static void *ptype_get_idx(loff_t pos) 2904 { 2905 struct packet_type *pt = NULL; 2906 loff_t i = 0; 2907 int t; 2908 2909 list_for_each_entry_rcu(pt, &ptype_all, list) { 2910 if (i == pos) 2911 return pt; 2912 ++i; 2913 } 2914 2915 for (t = 0; t < PTYPE_HASH_SIZE; t++) { 2916 list_for_each_entry_rcu(pt, &ptype_base[t], list) { 2917 if (i == pos) 2918 return pt; 2919 ++i; 2920 } 2921 } 2922 return NULL; 2923 } 2924 2925 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) 2926 __acquires(RCU) 2927 { 2928 rcu_read_lock(); 2929 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; 2930 } 2931 2932 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2933 { 2934 struct packet_type *pt; 2935 struct list_head *nxt; 2936 int hash; 2937 2938 ++*pos; 2939 if (v == SEQ_START_TOKEN) 2940 return ptype_get_idx(0); 2941 2942 pt = v; 2943 nxt = pt->list.next; 2944 if (pt->type == htons(ETH_P_ALL)) { 2945 if (nxt != &ptype_all) 2946 goto found; 2947 hash = 0; 2948 nxt = ptype_base[0].next; 2949 } else 2950 hash = ntohs(pt->type) & PTYPE_HASH_MASK; 2951 2952 while (nxt == &ptype_base[hash]) { 2953 if (++hash >= PTYPE_HASH_SIZE) 2954 return NULL; 2955 nxt = ptype_base[hash].next; 2956 } 2957 found: 2958 return list_entry(nxt, struct packet_type, list); 2959 } 2960 2961 static void ptype_seq_stop(struct seq_file *seq, void *v) 2962 __releases(RCU) 2963 { 2964 rcu_read_unlock(); 2965 } 2966 2967 static int ptype_seq_show(struct seq_file *seq, void *v) 2968 { 2969 struct packet_type *pt = v; 2970 2971 if (v == SEQ_START_TOKEN) 2972 seq_puts(seq, "Type Device Function\n"); 2973 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { 2974 if (pt->type == htons(ETH_P_ALL)) 2975 seq_puts(seq, "ALL "); 2976 else 2977 seq_printf(seq, "%04x", ntohs(pt->type)); 2978 2979 seq_printf(seq, " %-8s %pF\n", 2980 pt->dev ? pt->dev->name : "", pt->func); 2981 } 2982 2983 return 0; 2984 } 2985 2986 static const struct seq_operations ptype_seq_ops = { 2987 .start = ptype_seq_start, 2988 .next = ptype_seq_next, 2989 .stop = ptype_seq_stop, 2990 .show = ptype_seq_show, 2991 }; 2992 2993 static int ptype_seq_open(struct inode *inode, struct file *file) 2994 { 2995 return seq_open_net(inode, file, &ptype_seq_ops, 2996 sizeof(struct seq_net_private)); 2997 } 2998 2999 static const struct file_operations ptype_seq_fops = { 3000 .owner = THIS_MODULE, 3001 .open = ptype_seq_open, 3002 .read = seq_read, 3003 .llseek = seq_lseek, 3004 .release = seq_release_net, 3005 }; 3006 3007 3008 static int __net_init dev_proc_net_init(struct net *net) 3009 { 3010 int rc = -ENOMEM; 3011 3012 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) 3013 goto out; 3014 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) 3015 goto out_dev; 3016 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) 3017 goto out_softnet; 3018 3019 if (wext_proc_init(net)) 3020 goto out_ptype; 3021 rc = 0; 3022 out: 3023 return rc; 3024 out_ptype: 3025 proc_net_remove(net, "ptype"); 3026 out_softnet: 3027 proc_net_remove(net, "softnet_stat"); 3028 out_dev: 3029 proc_net_remove(net, "dev"); 3030 goto out; 3031 } 3032 3033 static void __net_exit dev_proc_net_exit(struct net *net) 3034 { 3035 wext_proc_exit(net); 3036 3037 proc_net_remove(net, "ptype"); 3038 proc_net_remove(net, "softnet_stat"); 3039 proc_net_remove(net, "dev"); 3040 } 3041 3042 static struct pernet_operations __net_initdata dev_proc_ops = { 3043 .init = dev_proc_net_init, 3044 .exit = dev_proc_net_exit, 3045 }; 3046 3047 static int __init dev_proc_init(void) 3048 { 3049 return register_pernet_subsys(&dev_proc_ops); 3050 } 3051 #else 3052 #define dev_proc_init() 0 3053 #endif /* CONFIG_PROC_FS */ 3054 3055 3056 /** 3057 * netdev_set_master - set up master/slave pair 3058 * @slave: slave device 3059 * @master: new master device 3060 * 3061 * Changes the master device of the slave. Pass %NULL to break the 3062 * bonding. The caller must hold the RTNL semaphore. On a failure 3063 * a negative errno code is returned. On success the reference counts 3064 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the 3065 * function returns zero. 3066 */ 3067 int netdev_set_master(struct net_device *slave, struct net_device *master) 3068 { 3069 struct net_device *old = slave->master; 3070 3071 ASSERT_RTNL(); 3072 3073 if (master) { 3074 if (old) 3075 return -EBUSY; 3076 dev_hold(master); 3077 } 3078 3079 slave->master = master; 3080 3081 synchronize_net(); 3082 3083 if (old) 3084 dev_put(old); 3085 3086 if (master) 3087 slave->flags |= IFF_SLAVE; 3088 else 3089 slave->flags &= ~IFF_SLAVE; 3090 3091 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); 3092 return 0; 3093 } 3094 3095 static void dev_change_rx_flags(struct net_device *dev, int flags) 3096 { 3097 const struct net_device_ops *ops = dev->netdev_ops; 3098 3099 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) 3100 ops->ndo_change_rx_flags(dev, flags); 3101 } 3102 3103 static int __dev_set_promiscuity(struct net_device *dev, int inc) 3104 { 3105 unsigned short old_flags = dev->flags; 3106 uid_t uid; 3107 gid_t gid; 3108 3109 ASSERT_RTNL(); 3110 3111 dev->flags |= IFF_PROMISC; 3112 dev->promiscuity += inc; 3113 if (dev->promiscuity == 0) { 3114 /* 3115 * Avoid overflow. 3116 * If inc causes overflow, untouch promisc and return error. 3117 */ 3118 if (inc < 0) 3119 dev->flags &= ~IFF_PROMISC; 3120 else { 3121 dev->promiscuity -= inc; 3122 printk(KERN_WARNING "%s: promiscuity touches roof, " 3123 "set promiscuity failed, promiscuity feature " 3124 "of device might be broken.\n", dev->name); 3125 return -EOVERFLOW; 3126 } 3127 } 3128 if (dev->flags != old_flags) { 3129 printk(KERN_INFO "device %s %s promiscuous mode\n", 3130 dev->name, (dev->flags & IFF_PROMISC) ? "entered" : 3131 "left"); 3132 if (audit_enabled) { 3133 current_uid_gid(&uid, &gid); 3134 audit_log(current->audit_context, GFP_ATOMIC, 3135 AUDIT_ANOM_PROMISCUOUS, 3136 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", 3137 dev->name, (dev->flags & IFF_PROMISC), 3138 (old_flags & IFF_PROMISC), 3139 audit_get_loginuid(current), 3140 uid, gid, 3141 audit_get_sessionid(current)); 3142 } 3143 3144 dev_change_rx_flags(dev, IFF_PROMISC); 3145 } 3146 return 0; 3147 } 3148 3149 /** 3150 * dev_set_promiscuity - update promiscuity count on a device 3151 * @dev: device 3152 * @inc: modifier 3153 * 3154 * Add or remove promiscuity from a device. While the count in the device 3155 * remains above zero the interface remains promiscuous. Once it hits zero 3156 * the device reverts back to normal filtering operation. A negative inc 3157 * value is used to drop promiscuity on the device. 3158 * Return 0 if successful or a negative errno code on error. 3159 */ 3160 int dev_set_promiscuity(struct net_device *dev, int inc) 3161 { 3162 unsigned short old_flags = dev->flags; 3163 int err; 3164 3165 err = __dev_set_promiscuity(dev, inc); 3166 if (err < 0) 3167 return err; 3168 if (dev->flags != old_flags) 3169 dev_set_rx_mode(dev); 3170 return err; 3171 } 3172 3173 /** 3174 * dev_set_allmulti - update allmulti count on a device 3175 * @dev: device 3176 * @inc: modifier 3177 * 3178 * Add or remove reception of all multicast frames to a device. While the 3179 * count in the device remains above zero the interface remains listening 3180 * to all interfaces. Once it hits zero the device reverts back to normal 3181 * filtering operation. A negative @inc value is used to drop the counter 3182 * when releasing a resource needing all multicasts. 3183 * Return 0 if successful or a negative errno code on error. 3184 */ 3185 3186 int dev_set_allmulti(struct net_device *dev, int inc) 3187 { 3188 unsigned short old_flags = dev->flags; 3189 3190 ASSERT_RTNL(); 3191 3192 dev->flags |= IFF_ALLMULTI; 3193 dev->allmulti += inc; 3194 if (dev->allmulti == 0) { 3195 /* 3196 * Avoid overflow. 3197 * If inc causes overflow, untouch allmulti and return error. 3198 */ 3199 if (inc < 0) 3200 dev->flags &= ~IFF_ALLMULTI; 3201 else { 3202 dev->allmulti -= inc; 3203 printk(KERN_WARNING "%s: allmulti touches roof, " 3204 "set allmulti failed, allmulti feature of " 3205 "device might be broken.\n", dev->name); 3206 return -EOVERFLOW; 3207 } 3208 } 3209 if (dev->flags ^ old_flags) { 3210 dev_change_rx_flags(dev, IFF_ALLMULTI); 3211 dev_set_rx_mode(dev); 3212 } 3213 return 0; 3214 } 3215 3216 /* 3217 * Upload unicast and multicast address lists to device and 3218 * configure RX filtering. When the device doesn't support unicast 3219 * filtering it is put in promiscuous mode while unicast addresses 3220 * are present. 3221 */ 3222 void __dev_set_rx_mode(struct net_device *dev) 3223 { 3224 const struct net_device_ops *ops = dev->netdev_ops; 3225 3226 /* dev_open will call this function so the list will stay sane. */ 3227 if (!(dev->flags&IFF_UP)) 3228 return; 3229 3230 if (!netif_device_present(dev)) 3231 return; 3232 3233 if (ops->ndo_set_rx_mode) 3234 ops->ndo_set_rx_mode(dev); 3235 else { 3236 /* Unicast addresses changes may only happen under the rtnl, 3237 * therefore calling __dev_set_promiscuity here is safe. 3238 */ 3239 if (dev->uc_count > 0 && !dev->uc_promisc) { 3240 __dev_set_promiscuity(dev, 1); 3241 dev->uc_promisc = 1; 3242 } else if (dev->uc_count == 0 && dev->uc_promisc) { 3243 __dev_set_promiscuity(dev, -1); 3244 dev->uc_promisc = 0; 3245 } 3246 3247 if (ops->ndo_set_multicast_list) 3248 ops->ndo_set_multicast_list(dev); 3249 } 3250 } 3251 3252 void dev_set_rx_mode(struct net_device *dev) 3253 { 3254 netif_addr_lock_bh(dev); 3255 __dev_set_rx_mode(dev); 3256 netif_addr_unlock_bh(dev); 3257 } 3258 3259 int __dev_addr_delete(struct dev_addr_list **list, int *count, 3260 void *addr, int alen, int glbl) 3261 { 3262 struct dev_addr_list *da; 3263 3264 for (; (da = *list) != NULL; list = &da->next) { 3265 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && 3266 alen == da->da_addrlen) { 3267 if (glbl) { 3268 int old_glbl = da->da_gusers; 3269 da->da_gusers = 0; 3270 if (old_glbl == 0) 3271 break; 3272 } 3273 if (--da->da_users) 3274 return 0; 3275 3276 *list = da->next; 3277 kfree(da); 3278 (*count)--; 3279 return 0; 3280 } 3281 } 3282 return -ENOENT; 3283 } 3284 3285 int __dev_addr_add(struct dev_addr_list **list, int *count, 3286 void *addr, int alen, int glbl) 3287 { 3288 struct dev_addr_list *da; 3289 3290 for (da = *list; da != NULL; da = da->next) { 3291 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && 3292 da->da_addrlen == alen) { 3293 if (glbl) { 3294 int old_glbl = da->da_gusers; 3295 da->da_gusers = 1; 3296 if (old_glbl) 3297 return 0; 3298 } 3299 da->da_users++; 3300 return 0; 3301 } 3302 } 3303 3304 da = kzalloc(sizeof(*da), GFP_ATOMIC); 3305 if (da == NULL) 3306 return -ENOMEM; 3307 memcpy(da->da_addr, addr, alen); 3308 da->da_addrlen = alen; 3309 da->da_users = 1; 3310 da->da_gusers = glbl ? 1 : 0; 3311 da->next = *list; 3312 *list = da; 3313 (*count)++; 3314 return 0; 3315 } 3316 3317 /** 3318 * dev_unicast_delete - Release secondary unicast address. 3319 * @dev: device 3320 * @addr: address to delete 3321 * @alen: length of @addr 3322 * 3323 * Release reference to a secondary unicast address and remove it 3324 * from the device if the reference count drops to zero. 3325 * 3326 * The caller must hold the rtnl_mutex. 3327 */ 3328 int dev_unicast_delete(struct net_device *dev, void *addr, int alen) 3329 { 3330 int err; 3331 3332 ASSERT_RTNL(); 3333 3334 netif_addr_lock_bh(dev); 3335 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); 3336 if (!err) 3337 __dev_set_rx_mode(dev); 3338 netif_addr_unlock_bh(dev); 3339 return err; 3340 } 3341 EXPORT_SYMBOL(dev_unicast_delete); 3342 3343 /** 3344 * dev_unicast_add - add a secondary unicast address 3345 * @dev: device 3346 * @addr: address to add 3347 * @alen: length of @addr 3348 * 3349 * Add a secondary unicast address to the device or increase 3350 * the reference count if it already exists. 3351 * 3352 * The caller must hold the rtnl_mutex. 3353 */ 3354 int dev_unicast_add(struct net_device *dev, void *addr, int alen) 3355 { 3356 int err; 3357 3358 ASSERT_RTNL(); 3359 3360 netif_addr_lock_bh(dev); 3361 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); 3362 if (!err) 3363 __dev_set_rx_mode(dev); 3364 netif_addr_unlock_bh(dev); 3365 return err; 3366 } 3367 EXPORT_SYMBOL(dev_unicast_add); 3368 3369 int __dev_addr_sync(struct dev_addr_list **to, int *to_count, 3370 struct dev_addr_list **from, int *from_count) 3371 { 3372 struct dev_addr_list *da, *next; 3373 int err = 0; 3374 3375 da = *from; 3376 while (da != NULL) { 3377 next = da->next; 3378 if (!da->da_synced) { 3379 err = __dev_addr_add(to, to_count, 3380 da->da_addr, da->da_addrlen, 0); 3381 if (err < 0) 3382 break; 3383 da->da_synced = 1; 3384 da->da_users++; 3385 } else if (da->da_users == 1) { 3386 __dev_addr_delete(to, to_count, 3387 da->da_addr, da->da_addrlen, 0); 3388 __dev_addr_delete(from, from_count, 3389 da->da_addr, da->da_addrlen, 0); 3390 } 3391 da = next; 3392 } 3393 return err; 3394 } 3395 3396 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, 3397 struct dev_addr_list **from, int *from_count) 3398 { 3399 struct dev_addr_list *da, *next; 3400 3401 da = *from; 3402 while (da != NULL) { 3403 next = da->next; 3404 if (da->da_synced) { 3405 __dev_addr_delete(to, to_count, 3406 da->da_addr, da->da_addrlen, 0); 3407 da->da_synced = 0; 3408 __dev_addr_delete(from, from_count, 3409 da->da_addr, da->da_addrlen, 0); 3410 } 3411 da = next; 3412 } 3413 } 3414 3415 /** 3416 * dev_unicast_sync - Synchronize device's unicast list to another device 3417 * @to: destination device 3418 * @from: source device 3419 * 3420 * Add newly added addresses to the destination device and release 3421 * addresses that have no users left. The source device must be 3422 * locked by netif_tx_lock_bh. 3423 * 3424 * This function is intended to be called from the dev->set_rx_mode 3425 * function of layered software devices. 3426 */ 3427 int dev_unicast_sync(struct net_device *to, struct net_device *from) 3428 { 3429 int err = 0; 3430 3431 netif_addr_lock_bh(to); 3432 err = __dev_addr_sync(&to->uc_list, &to->uc_count, 3433 &from->uc_list, &from->uc_count); 3434 if (!err) 3435 __dev_set_rx_mode(to); 3436 netif_addr_unlock_bh(to); 3437 return err; 3438 } 3439 EXPORT_SYMBOL(dev_unicast_sync); 3440 3441 /** 3442 * dev_unicast_unsync - Remove synchronized addresses from the destination device 3443 * @to: destination device 3444 * @from: source device 3445 * 3446 * Remove all addresses that were added to the destination device by 3447 * dev_unicast_sync(). This function is intended to be called from the 3448 * dev->stop function of layered software devices. 3449 */ 3450 void dev_unicast_unsync(struct net_device *to, struct net_device *from) 3451 { 3452 netif_addr_lock_bh(from); 3453 netif_addr_lock(to); 3454 3455 __dev_addr_unsync(&to->uc_list, &to->uc_count, 3456 &from->uc_list, &from->uc_count); 3457 __dev_set_rx_mode(to); 3458 3459 netif_addr_unlock(to); 3460 netif_addr_unlock_bh(from); 3461 } 3462 EXPORT_SYMBOL(dev_unicast_unsync); 3463 3464 static void __dev_addr_discard(struct dev_addr_list **list) 3465 { 3466 struct dev_addr_list *tmp; 3467 3468 while (*list != NULL) { 3469 tmp = *list; 3470 *list = tmp->next; 3471 if (tmp->da_users > tmp->da_gusers) 3472 printk("__dev_addr_discard: address leakage! " 3473 "da_users=%d\n", tmp->da_users); 3474 kfree(tmp); 3475 } 3476 } 3477 3478 static void dev_addr_discard(struct net_device *dev) 3479 { 3480 netif_addr_lock_bh(dev); 3481 3482 __dev_addr_discard(&dev->uc_list); 3483 dev->uc_count = 0; 3484 3485 __dev_addr_discard(&dev->mc_list); 3486 dev->mc_count = 0; 3487 3488 netif_addr_unlock_bh(dev); 3489 } 3490 3491 /** 3492 * dev_get_flags - get flags reported to userspace 3493 * @dev: device 3494 * 3495 * Get the combination of flag bits exported through APIs to userspace. 3496 */ 3497 unsigned dev_get_flags(const struct net_device *dev) 3498 { 3499 unsigned flags; 3500 3501 flags = (dev->flags & ~(IFF_PROMISC | 3502 IFF_ALLMULTI | 3503 IFF_RUNNING | 3504 IFF_LOWER_UP | 3505 IFF_DORMANT)) | 3506 (dev->gflags & (IFF_PROMISC | 3507 IFF_ALLMULTI)); 3508 3509 if (netif_running(dev)) { 3510 if (netif_oper_up(dev)) 3511 flags |= IFF_RUNNING; 3512 if (netif_carrier_ok(dev)) 3513 flags |= IFF_LOWER_UP; 3514 if (netif_dormant(dev)) 3515 flags |= IFF_DORMANT; 3516 } 3517 3518 return flags; 3519 } 3520 3521 /** 3522 * dev_change_flags - change device settings 3523 * @dev: device 3524 * @flags: device state flags 3525 * 3526 * Change settings on device based state flags. The flags are 3527 * in the userspace exported format. 3528 */ 3529 int dev_change_flags(struct net_device *dev, unsigned flags) 3530 { 3531 int ret, changes; 3532 int old_flags = dev->flags; 3533 3534 ASSERT_RTNL(); 3535 3536 /* 3537 * Set the flags on our device. 3538 */ 3539 3540 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | 3541 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | 3542 IFF_AUTOMEDIA)) | 3543 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | 3544 IFF_ALLMULTI)); 3545 3546 /* 3547 * Load in the correct multicast list now the flags have changed. 3548 */ 3549 3550 if ((old_flags ^ flags) & IFF_MULTICAST) 3551 dev_change_rx_flags(dev, IFF_MULTICAST); 3552 3553 dev_set_rx_mode(dev); 3554 3555 /* 3556 * Have we downed the interface. We handle IFF_UP ourselves 3557 * according to user attempts to set it, rather than blindly 3558 * setting it. 3559 */ 3560 3561 ret = 0; 3562 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ 3563 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); 3564 3565 if (!ret) 3566 dev_set_rx_mode(dev); 3567 } 3568 3569 if (dev->flags & IFF_UP && 3570 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | 3571 IFF_VOLATILE))) 3572 call_netdevice_notifiers(NETDEV_CHANGE, dev); 3573 3574 if ((flags ^ dev->gflags) & IFF_PROMISC) { 3575 int inc = (flags & IFF_PROMISC) ? +1 : -1; 3576 dev->gflags ^= IFF_PROMISC; 3577 dev_set_promiscuity(dev, inc); 3578 } 3579 3580 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI 3581 is important. Some (broken) drivers set IFF_PROMISC, when 3582 IFF_ALLMULTI is requested not asking us and not reporting. 3583 */ 3584 if ((flags ^ dev->gflags) & IFF_ALLMULTI) { 3585 int inc = (flags & IFF_ALLMULTI) ? +1 : -1; 3586 dev->gflags ^= IFF_ALLMULTI; 3587 dev_set_allmulti(dev, inc); 3588 } 3589 3590 /* Exclude state transition flags, already notified */ 3591 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); 3592 if (changes) 3593 rtmsg_ifinfo(RTM_NEWLINK, dev, changes); 3594 3595 return ret; 3596 } 3597 3598 /** 3599 * dev_set_mtu - Change maximum transfer unit 3600 * @dev: device 3601 * @new_mtu: new transfer unit 3602 * 3603 * Change the maximum transfer size of the network device. 3604 */ 3605 int dev_set_mtu(struct net_device *dev, int new_mtu) 3606 { 3607 const struct net_device_ops *ops = dev->netdev_ops; 3608 int err; 3609 3610 if (new_mtu == dev->mtu) 3611 return 0; 3612 3613 /* MTU must be positive. */ 3614 if (new_mtu < 0) 3615 return -EINVAL; 3616 3617 if (!netif_device_present(dev)) 3618 return -ENODEV; 3619 3620 err = 0; 3621 if (ops->ndo_change_mtu) 3622 err = ops->ndo_change_mtu(dev, new_mtu); 3623 else 3624 dev->mtu = new_mtu; 3625 3626 if (!err && dev->flags & IFF_UP) 3627 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); 3628 return err; 3629 } 3630 3631 /** 3632 * dev_set_mac_address - Change Media Access Control Address 3633 * @dev: device 3634 * @sa: new address 3635 * 3636 * Change the hardware (MAC) address of the device 3637 */ 3638 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) 3639 { 3640 const struct net_device_ops *ops = dev->netdev_ops; 3641 int err; 3642 3643 if (!ops->ndo_set_mac_address) 3644 return -EOPNOTSUPP; 3645 if (sa->sa_family != dev->type) 3646 return -EINVAL; 3647 if (!netif_device_present(dev)) 3648 return -ENODEV; 3649 err = ops->ndo_set_mac_address(dev, sa); 3650 if (!err) 3651 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 3652 return err; 3653 } 3654 3655 /* 3656 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) 3657 */ 3658 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) 3659 { 3660 int err; 3661 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 3662 3663 if (!dev) 3664 return -ENODEV; 3665 3666 switch (cmd) { 3667 case SIOCGIFFLAGS: /* Get interface flags */ 3668 ifr->ifr_flags = dev_get_flags(dev); 3669 return 0; 3670 3671 case SIOCGIFMETRIC: /* Get the metric on the interface 3672 (currently unused) */ 3673 ifr->ifr_metric = 0; 3674 return 0; 3675 3676 case SIOCGIFMTU: /* Get the MTU of a device */ 3677 ifr->ifr_mtu = dev->mtu; 3678 return 0; 3679 3680 case SIOCGIFHWADDR: 3681 if (!dev->addr_len) 3682 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); 3683 else 3684 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, 3685 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 3686 ifr->ifr_hwaddr.sa_family = dev->type; 3687 return 0; 3688 3689 case SIOCGIFSLAVE: 3690 err = -EINVAL; 3691 break; 3692 3693 case SIOCGIFMAP: 3694 ifr->ifr_map.mem_start = dev->mem_start; 3695 ifr->ifr_map.mem_end = dev->mem_end; 3696 ifr->ifr_map.base_addr = dev->base_addr; 3697 ifr->ifr_map.irq = dev->irq; 3698 ifr->ifr_map.dma = dev->dma; 3699 ifr->ifr_map.port = dev->if_port; 3700 return 0; 3701 3702 case SIOCGIFINDEX: 3703 ifr->ifr_ifindex = dev->ifindex; 3704 return 0; 3705 3706 case SIOCGIFTXQLEN: 3707 ifr->ifr_qlen = dev->tx_queue_len; 3708 return 0; 3709 3710 default: 3711 /* dev_ioctl() should ensure this case 3712 * is never reached 3713 */ 3714 WARN_ON(1); 3715 err = -EINVAL; 3716 break; 3717 3718 } 3719 return err; 3720 } 3721 3722 /* 3723 * Perform the SIOCxIFxxx calls, inside rtnl_lock() 3724 */ 3725 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) 3726 { 3727 int err; 3728 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 3729 const struct net_device_ops *ops; 3730 3731 if (!dev) 3732 return -ENODEV; 3733 3734 ops = dev->netdev_ops; 3735 3736 switch (cmd) { 3737 case SIOCSIFFLAGS: /* Set interface flags */ 3738 return dev_change_flags(dev, ifr->ifr_flags); 3739 3740 case SIOCSIFMETRIC: /* Set the metric on the interface 3741 (currently unused) */ 3742 return -EOPNOTSUPP; 3743 3744 case SIOCSIFMTU: /* Set the MTU of a device */ 3745 return dev_set_mtu(dev, ifr->ifr_mtu); 3746 3747 case SIOCSIFHWADDR: 3748 return dev_set_mac_address(dev, &ifr->ifr_hwaddr); 3749 3750 case SIOCSIFHWBROADCAST: 3751 if (ifr->ifr_hwaddr.sa_family != dev->type) 3752 return -EINVAL; 3753 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, 3754 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 3755 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 3756 return 0; 3757 3758 case SIOCSIFMAP: 3759 if (ops->ndo_set_config) { 3760 if (!netif_device_present(dev)) 3761 return -ENODEV; 3762 return ops->ndo_set_config(dev, &ifr->ifr_map); 3763 } 3764 return -EOPNOTSUPP; 3765 3766 case SIOCADDMULTI: 3767 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 3768 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 3769 return -EINVAL; 3770 if (!netif_device_present(dev)) 3771 return -ENODEV; 3772 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, 3773 dev->addr_len, 1); 3774 3775 case SIOCDELMULTI: 3776 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 3777 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 3778 return -EINVAL; 3779 if (!netif_device_present(dev)) 3780 return -ENODEV; 3781 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, 3782 dev->addr_len, 1); 3783 3784 case SIOCSIFTXQLEN: 3785 if (ifr->ifr_qlen < 0) 3786 return -EINVAL; 3787 dev->tx_queue_len = ifr->ifr_qlen; 3788 return 0; 3789 3790 case SIOCSIFNAME: 3791 ifr->ifr_newname[IFNAMSIZ-1] = '\0'; 3792 return dev_change_name(dev, ifr->ifr_newname); 3793 3794 /* 3795 * Unknown or private ioctl 3796 */ 3797 3798 default: 3799 if ((cmd >= SIOCDEVPRIVATE && 3800 cmd <= SIOCDEVPRIVATE + 15) || 3801 cmd == SIOCBONDENSLAVE || 3802 cmd == SIOCBONDRELEASE || 3803 cmd == SIOCBONDSETHWADDR || 3804 cmd == SIOCBONDSLAVEINFOQUERY || 3805 cmd == SIOCBONDINFOQUERY || 3806 cmd == SIOCBONDCHANGEACTIVE || 3807 cmd == SIOCGMIIPHY || 3808 cmd == SIOCGMIIREG || 3809 cmd == SIOCSMIIREG || 3810 cmd == SIOCBRADDIF || 3811 cmd == SIOCBRDELIF || 3812 cmd == SIOCWANDEV) { 3813 err = -EOPNOTSUPP; 3814 if (ops->ndo_do_ioctl) { 3815 if (netif_device_present(dev)) 3816 err = ops->ndo_do_ioctl(dev, ifr, cmd); 3817 else 3818 err = -ENODEV; 3819 } 3820 } else 3821 err = -EINVAL; 3822 3823 } 3824 return err; 3825 } 3826 3827 /* 3828 * This function handles all "interface"-type I/O control requests. The actual 3829 * 'doing' part of this is dev_ifsioc above. 3830 */ 3831 3832 /** 3833 * dev_ioctl - network device ioctl 3834 * @net: the applicable net namespace 3835 * @cmd: command to issue 3836 * @arg: pointer to a struct ifreq in user space 3837 * 3838 * Issue ioctl functions to devices. This is normally called by the 3839 * user space syscall interfaces but can sometimes be useful for 3840 * other purposes. The return value is the return from the syscall if 3841 * positive or a negative errno code on error. 3842 */ 3843 3844 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) 3845 { 3846 struct ifreq ifr; 3847 int ret; 3848 char *colon; 3849 3850 /* One special case: SIOCGIFCONF takes ifconf argument 3851 and requires shared lock, because it sleeps writing 3852 to user space. 3853 */ 3854 3855 if (cmd == SIOCGIFCONF) { 3856 rtnl_lock(); 3857 ret = dev_ifconf(net, (char __user *) arg); 3858 rtnl_unlock(); 3859 return ret; 3860 } 3861 if (cmd == SIOCGIFNAME) 3862 return dev_ifname(net, (struct ifreq __user *)arg); 3863 3864 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 3865 return -EFAULT; 3866 3867 ifr.ifr_name[IFNAMSIZ-1] = 0; 3868 3869 colon = strchr(ifr.ifr_name, ':'); 3870 if (colon) 3871 *colon = 0; 3872 3873 /* 3874 * See which interface the caller is talking about. 3875 */ 3876 3877 switch (cmd) { 3878 /* 3879 * These ioctl calls: 3880 * - can be done by all. 3881 * - atomic and do not require locking. 3882 * - return a value 3883 */ 3884 case SIOCGIFFLAGS: 3885 case SIOCGIFMETRIC: 3886 case SIOCGIFMTU: 3887 case SIOCGIFHWADDR: 3888 case SIOCGIFSLAVE: 3889 case SIOCGIFMAP: 3890 case SIOCGIFINDEX: 3891 case SIOCGIFTXQLEN: 3892 dev_load(net, ifr.ifr_name); 3893 read_lock(&dev_base_lock); 3894 ret = dev_ifsioc_locked(net, &ifr, cmd); 3895 read_unlock(&dev_base_lock); 3896 if (!ret) { 3897 if (colon) 3898 *colon = ':'; 3899 if (copy_to_user(arg, &ifr, 3900 sizeof(struct ifreq))) 3901 ret = -EFAULT; 3902 } 3903 return ret; 3904 3905 case SIOCETHTOOL: 3906 dev_load(net, ifr.ifr_name); 3907 rtnl_lock(); 3908 ret = dev_ethtool(net, &ifr); 3909 rtnl_unlock(); 3910 if (!ret) { 3911 if (colon) 3912 *colon = ':'; 3913 if (copy_to_user(arg, &ifr, 3914 sizeof(struct ifreq))) 3915 ret = -EFAULT; 3916 } 3917 return ret; 3918 3919 /* 3920 * These ioctl calls: 3921 * - require superuser power. 3922 * - require strict serialization. 3923 * - return a value 3924 */ 3925 case SIOCGMIIPHY: 3926 case SIOCGMIIREG: 3927 case SIOCSIFNAME: 3928 if (!capable(CAP_NET_ADMIN)) 3929 return -EPERM; 3930 dev_load(net, ifr.ifr_name); 3931 rtnl_lock(); 3932 ret = dev_ifsioc(net, &ifr, cmd); 3933 rtnl_unlock(); 3934 if (!ret) { 3935 if (colon) 3936 *colon = ':'; 3937 if (copy_to_user(arg, &ifr, 3938 sizeof(struct ifreq))) 3939 ret = -EFAULT; 3940 } 3941 return ret; 3942 3943 /* 3944 * These ioctl calls: 3945 * - require superuser power. 3946 * - require strict serialization. 3947 * - do not return a value 3948 */ 3949 case SIOCSIFFLAGS: 3950 case SIOCSIFMETRIC: 3951 case SIOCSIFMTU: 3952 case SIOCSIFMAP: 3953 case SIOCSIFHWADDR: 3954 case SIOCSIFSLAVE: 3955 case SIOCADDMULTI: 3956 case SIOCDELMULTI: 3957 case SIOCSIFHWBROADCAST: 3958 case SIOCSIFTXQLEN: 3959 case SIOCSMIIREG: 3960 case SIOCBONDENSLAVE: 3961 case SIOCBONDRELEASE: 3962 case SIOCBONDSETHWADDR: 3963 case SIOCBONDCHANGEACTIVE: 3964 case SIOCBRADDIF: 3965 case SIOCBRDELIF: 3966 if (!capable(CAP_NET_ADMIN)) 3967 return -EPERM; 3968 /* fall through */ 3969 case SIOCBONDSLAVEINFOQUERY: 3970 case SIOCBONDINFOQUERY: 3971 dev_load(net, ifr.ifr_name); 3972 rtnl_lock(); 3973 ret = dev_ifsioc(net, &ifr, cmd); 3974 rtnl_unlock(); 3975 return ret; 3976 3977 case SIOCGIFMEM: 3978 /* Get the per device memory space. We can add this but 3979 * currently do not support it */ 3980 case SIOCSIFMEM: 3981 /* Set the per device memory buffer space. 3982 * Not applicable in our case */ 3983 case SIOCSIFLINK: 3984 return -EINVAL; 3985 3986 /* 3987 * Unknown or private ioctl. 3988 */ 3989 default: 3990 if (cmd == SIOCWANDEV || 3991 (cmd >= SIOCDEVPRIVATE && 3992 cmd <= SIOCDEVPRIVATE + 15)) { 3993 dev_load(net, ifr.ifr_name); 3994 rtnl_lock(); 3995 ret = dev_ifsioc(net, &ifr, cmd); 3996 rtnl_unlock(); 3997 if (!ret && copy_to_user(arg, &ifr, 3998 sizeof(struct ifreq))) 3999 ret = -EFAULT; 4000 return ret; 4001 } 4002 /* Take care of Wireless Extensions */ 4003 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) 4004 return wext_handle_ioctl(net, &ifr, cmd, arg); 4005 return -EINVAL; 4006 } 4007 } 4008 4009 4010 /** 4011 * dev_new_index - allocate an ifindex 4012 * @net: the applicable net namespace 4013 * 4014 * Returns a suitable unique value for a new device interface 4015 * number. The caller must hold the rtnl semaphore or the 4016 * dev_base_lock to be sure it remains unique. 4017 */ 4018 static int dev_new_index(struct net *net) 4019 { 4020 static int ifindex; 4021 for (;;) { 4022 if (++ifindex <= 0) 4023 ifindex = 1; 4024 if (!__dev_get_by_index(net, ifindex)) 4025 return ifindex; 4026 } 4027 } 4028 4029 /* Delayed registration/unregisteration */ 4030 static LIST_HEAD(net_todo_list); 4031 4032 static void net_set_todo(struct net_device *dev) 4033 { 4034 list_add_tail(&dev->todo_list, &net_todo_list); 4035 } 4036 4037 static void rollback_registered(struct net_device *dev) 4038 { 4039 BUG_ON(dev_boot_phase); 4040 ASSERT_RTNL(); 4041 4042 /* Some devices call without registering for initialization unwind. */ 4043 if (dev->reg_state == NETREG_UNINITIALIZED) { 4044 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " 4045 "was registered\n", dev->name, dev); 4046 4047 WARN_ON(1); 4048 return; 4049 } 4050 4051 BUG_ON(dev->reg_state != NETREG_REGISTERED); 4052 4053 /* If device is running, close it first. */ 4054 dev_close(dev); 4055 4056 /* And unlink it from device chain. */ 4057 unlist_netdevice(dev); 4058 4059 dev->reg_state = NETREG_UNREGISTERING; 4060 4061 synchronize_net(); 4062 4063 /* Shutdown queueing discipline. */ 4064 dev_shutdown(dev); 4065 4066 4067 /* Notify protocols, that we are about to destroy 4068 this device. They should clean all the things. 4069 */ 4070 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 4071 4072 /* 4073 * Flush the unicast and multicast chains 4074 */ 4075 dev_addr_discard(dev); 4076 4077 if (dev->netdev_ops->ndo_uninit) 4078 dev->netdev_ops->ndo_uninit(dev); 4079 4080 /* Notifier chain MUST detach us from master device. */ 4081 WARN_ON(dev->master); 4082 4083 /* Remove entries from kobject tree */ 4084 netdev_unregister_kobject(dev); 4085 4086 synchronize_net(); 4087 4088 dev_put(dev); 4089 } 4090 4091 static void __netdev_init_queue_locks_one(struct net_device *dev, 4092 struct netdev_queue *dev_queue, 4093 void *_unused) 4094 { 4095 spin_lock_init(&dev_queue->_xmit_lock); 4096 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); 4097 dev_queue->xmit_lock_owner = -1; 4098 } 4099 4100 static void netdev_init_queue_locks(struct net_device *dev) 4101 { 4102 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); 4103 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); 4104 } 4105 4106 unsigned long netdev_fix_features(unsigned long features, const char *name) 4107 { 4108 /* Fix illegal SG+CSUM combinations. */ 4109 if ((features & NETIF_F_SG) && 4110 !(features & NETIF_F_ALL_CSUM)) { 4111 if (name) 4112 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " 4113 "checksum feature.\n", name); 4114 features &= ~NETIF_F_SG; 4115 } 4116 4117 /* TSO requires that SG is present as well. */ 4118 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { 4119 if (name) 4120 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " 4121 "SG feature.\n", name); 4122 features &= ~NETIF_F_TSO; 4123 } 4124 4125 if (features & NETIF_F_UFO) { 4126 if (!(features & NETIF_F_GEN_CSUM)) { 4127 if (name) 4128 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 4129 "since no NETIF_F_HW_CSUM feature.\n", 4130 name); 4131 features &= ~NETIF_F_UFO; 4132 } 4133 4134 if (!(features & NETIF_F_SG)) { 4135 if (name) 4136 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 4137 "since no NETIF_F_SG feature.\n", name); 4138 features &= ~NETIF_F_UFO; 4139 } 4140 } 4141 4142 return features; 4143 } 4144 EXPORT_SYMBOL(netdev_fix_features); 4145 4146 /** 4147 * register_netdevice - register a network device 4148 * @dev: device to register 4149 * 4150 * Take a completed network device structure and add it to the kernel 4151 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 4152 * chain. 0 is returned on success. A negative errno code is returned 4153 * on a failure to set up the device, or if the name is a duplicate. 4154 * 4155 * Callers must hold the rtnl semaphore. You may want 4156 * register_netdev() instead of this. 4157 * 4158 * BUGS: 4159 * The locking appears insufficient to guarantee two parallel registers 4160 * will not get the same name. 4161 */ 4162 4163 int register_netdevice(struct net_device *dev) 4164 { 4165 struct hlist_head *head; 4166 struct hlist_node *p; 4167 int ret; 4168 struct net *net = dev_net(dev); 4169 4170 BUG_ON(dev_boot_phase); 4171 ASSERT_RTNL(); 4172 4173 might_sleep(); 4174 4175 /* When net_device's are persistent, this will be fatal. */ 4176 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); 4177 BUG_ON(!net); 4178 4179 spin_lock_init(&dev->addr_list_lock); 4180 netdev_set_addr_lockdep_class(dev); 4181 netdev_init_queue_locks(dev); 4182 4183 dev->iflink = -1; 4184 4185 #ifdef CONFIG_COMPAT_NET_DEV_OPS 4186 /* Netdevice_ops API compatiability support. 4187 * This is temporary until all network devices are converted. 4188 */ 4189 if (dev->netdev_ops) { 4190 const struct net_device_ops *ops = dev->netdev_ops; 4191 4192 dev->init = ops->ndo_init; 4193 dev->uninit = ops->ndo_uninit; 4194 dev->open = ops->ndo_open; 4195 dev->change_rx_flags = ops->ndo_change_rx_flags; 4196 dev->set_rx_mode = ops->ndo_set_rx_mode; 4197 dev->set_multicast_list = ops->ndo_set_multicast_list; 4198 dev->set_mac_address = ops->ndo_set_mac_address; 4199 dev->validate_addr = ops->ndo_validate_addr; 4200 dev->do_ioctl = ops->ndo_do_ioctl; 4201 dev->set_config = ops->ndo_set_config; 4202 dev->change_mtu = ops->ndo_change_mtu; 4203 dev->tx_timeout = ops->ndo_tx_timeout; 4204 dev->get_stats = ops->ndo_get_stats; 4205 dev->vlan_rx_register = ops->ndo_vlan_rx_register; 4206 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; 4207 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; 4208 #ifdef CONFIG_NET_POLL_CONTROLLER 4209 dev->poll_controller = ops->ndo_poll_controller; 4210 #endif 4211 } else { 4212 char drivername[64]; 4213 pr_info("%s (%s): not using net_device_ops yet\n", 4214 dev->name, netdev_drivername(dev, drivername, 64)); 4215 4216 /* This works only because net_device_ops and the 4217 compatiablity structure are the same. */ 4218 dev->netdev_ops = (void *) &(dev->init); 4219 } 4220 #endif 4221 4222 /* Init, if this function is available */ 4223 if (dev->netdev_ops->ndo_init) { 4224 ret = dev->netdev_ops->ndo_init(dev); 4225 if (ret) { 4226 if (ret > 0) 4227 ret = -EIO; 4228 goto out; 4229 } 4230 } 4231 4232 if (!dev_valid_name(dev->name)) { 4233 ret = -EINVAL; 4234 goto err_uninit; 4235 } 4236 4237 dev->ifindex = dev_new_index(net); 4238 if (dev->iflink == -1) 4239 dev->iflink = dev->ifindex; 4240 4241 /* Check for existence of name */ 4242 head = dev_name_hash(net, dev->name); 4243 hlist_for_each(p, head) { 4244 struct net_device *d 4245 = hlist_entry(p, struct net_device, name_hlist); 4246 if (!strncmp(d->name, dev->name, IFNAMSIZ)) { 4247 ret = -EEXIST; 4248 goto err_uninit; 4249 } 4250 } 4251 4252 /* Fix illegal checksum combinations */ 4253 if ((dev->features & NETIF_F_HW_CSUM) && 4254 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 4255 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", 4256 dev->name); 4257 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 4258 } 4259 4260 if ((dev->features & NETIF_F_NO_CSUM) && 4261 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 4262 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", 4263 dev->name); 4264 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); 4265 } 4266 4267 dev->features = netdev_fix_features(dev->features, dev->name); 4268 4269 /* Enable software GSO if SG is supported. */ 4270 if (dev->features & NETIF_F_SG) 4271 dev->features |= NETIF_F_GSO; 4272 4273 netdev_initialize_kobject(dev); 4274 ret = netdev_register_kobject(dev); 4275 if (ret) 4276 goto err_uninit; 4277 dev->reg_state = NETREG_REGISTERED; 4278 4279 /* 4280 * Default initial state at registry is that the 4281 * device is present. 4282 */ 4283 4284 set_bit(__LINK_STATE_PRESENT, &dev->state); 4285 4286 dev_init_scheduler(dev); 4287 dev_hold(dev); 4288 list_netdevice(dev); 4289 4290 /* Notify protocols, that a new device appeared. */ 4291 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); 4292 ret = notifier_to_errno(ret); 4293 if (ret) { 4294 rollback_registered(dev); 4295 dev->reg_state = NETREG_UNREGISTERED; 4296 } 4297 4298 out: 4299 return ret; 4300 4301 err_uninit: 4302 if (dev->netdev_ops->ndo_uninit) 4303 dev->netdev_ops->ndo_uninit(dev); 4304 goto out; 4305 } 4306 4307 /** 4308 * register_netdev - register a network device 4309 * @dev: device to register 4310 * 4311 * Take a completed network device structure and add it to the kernel 4312 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 4313 * chain. 0 is returned on success. A negative errno code is returned 4314 * on a failure to set up the device, or if the name is a duplicate. 4315 * 4316 * This is a wrapper around register_netdevice that takes the rtnl semaphore 4317 * and expands the device name if you passed a format string to 4318 * alloc_netdev. 4319 */ 4320 int register_netdev(struct net_device *dev) 4321 { 4322 int err; 4323 4324 rtnl_lock(); 4325 4326 /* 4327 * If the name is a format string the caller wants us to do a 4328 * name allocation. 4329 */ 4330 if (strchr(dev->name, '%')) { 4331 err = dev_alloc_name(dev, dev->name); 4332 if (err < 0) 4333 goto out; 4334 } 4335 4336 err = register_netdevice(dev); 4337 out: 4338 rtnl_unlock(); 4339 return err; 4340 } 4341 EXPORT_SYMBOL(register_netdev); 4342 4343 /* 4344 * netdev_wait_allrefs - wait until all references are gone. 4345 * 4346 * This is called when unregistering network devices. 4347 * 4348 * Any protocol or device that holds a reference should register 4349 * for netdevice notification, and cleanup and put back the 4350 * reference if they receive an UNREGISTER event. 4351 * We can get stuck here if buggy protocols don't correctly 4352 * call dev_put. 4353 */ 4354 static void netdev_wait_allrefs(struct net_device *dev) 4355 { 4356 unsigned long rebroadcast_time, warning_time; 4357 4358 rebroadcast_time = warning_time = jiffies; 4359 while (atomic_read(&dev->refcnt) != 0) { 4360 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 4361 rtnl_lock(); 4362 4363 /* Rebroadcast unregister notification */ 4364 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 4365 4366 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 4367 &dev->state)) { 4368 /* We must not have linkwatch events 4369 * pending on unregister. If this 4370 * happens, we simply run the queue 4371 * unscheduled, resulting in a noop 4372 * for this device. 4373 */ 4374 linkwatch_run_queue(); 4375 } 4376 4377 __rtnl_unlock(); 4378 4379 rebroadcast_time = jiffies; 4380 } 4381 4382 msleep(250); 4383 4384 if (time_after(jiffies, warning_time + 10 * HZ)) { 4385 printk(KERN_EMERG "unregister_netdevice: " 4386 "waiting for %s to become free. Usage " 4387 "count = %d\n", 4388 dev->name, atomic_read(&dev->refcnt)); 4389 warning_time = jiffies; 4390 } 4391 } 4392 } 4393 4394 /* The sequence is: 4395 * 4396 * rtnl_lock(); 4397 * ... 4398 * register_netdevice(x1); 4399 * register_netdevice(x2); 4400 * ... 4401 * unregister_netdevice(y1); 4402 * unregister_netdevice(y2); 4403 * ... 4404 * rtnl_unlock(); 4405 * free_netdev(y1); 4406 * free_netdev(y2); 4407 * 4408 * We are invoked by rtnl_unlock(). 4409 * This allows us to deal with problems: 4410 * 1) We can delete sysfs objects which invoke hotplug 4411 * without deadlocking with linkwatch via keventd. 4412 * 2) Since we run with the RTNL semaphore not held, we can sleep 4413 * safely in order to wait for the netdev refcnt to drop to zero. 4414 * 4415 * We must not return until all unregister events added during 4416 * the interval the lock was held have been completed. 4417 */ 4418 void netdev_run_todo(void) 4419 { 4420 struct list_head list; 4421 4422 /* Snapshot list, allow later requests */ 4423 list_replace_init(&net_todo_list, &list); 4424 4425 __rtnl_unlock(); 4426 4427 while (!list_empty(&list)) { 4428 struct net_device *dev 4429 = list_entry(list.next, struct net_device, todo_list); 4430 list_del(&dev->todo_list); 4431 4432 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 4433 printk(KERN_ERR "network todo '%s' but state %d\n", 4434 dev->name, dev->reg_state); 4435 dump_stack(); 4436 continue; 4437 } 4438 4439 dev->reg_state = NETREG_UNREGISTERED; 4440 4441 on_each_cpu(flush_backlog, dev, 1); 4442 4443 netdev_wait_allrefs(dev); 4444 4445 /* paranoia */ 4446 BUG_ON(atomic_read(&dev->refcnt)); 4447 WARN_ON(dev->ip_ptr); 4448 WARN_ON(dev->ip6_ptr); 4449 WARN_ON(dev->dn_ptr); 4450 4451 if (dev->destructor) 4452 dev->destructor(dev); 4453 4454 /* Free network device */ 4455 kobject_put(&dev->dev.kobj); 4456 } 4457 } 4458 4459 /** 4460 * dev_get_stats - get network device statistics 4461 * @dev: device to get statistics from 4462 * 4463 * Get network statistics from device. The device driver may provide 4464 * its own method by setting dev->netdev_ops->get_stats; otherwise 4465 * the internal statistics structure is used. 4466 */ 4467 const struct net_device_stats *dev_get_stats(struct net_device *dev) 4468 { 4469 const struct net_device_ops *ops = dev->netdev_ops; 4470 4471 if (ops->ndo_get_stats) 4472 return ops->ndo_get_stats(dev); 4473 else 4474 return &dev->stats; 4475 } 4476 EXPORT_SYMBOL(dev_get_stats); 4477 4478 static void netdev_init_one_queue(struct net_device *dev, 4479 struct netdev_queue *queue, 4480 void *_unused) 4481 { 4482 queue->dev = dev; 4483 } 4484 4485 static void netdev_init_queues(struct net_device *dev) 4486 { 4487 netdev_init_one_queue(dev, &dev->rx_queue, NULL); 4488 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 4489 spin_lock_init(&dev->tx_global_lock); 4490 } 4491 4492 /** 4493 * alloc_netdev_mq - allocate network device 4494 * @sizeof_priv: size of private data to allocate space for 4495 * @name: device name format string 4496 * @setup: callback to initialize device 4497 * @queue_count: the number of subqueues to allocate 4498 * 4499 * Allocates a struct net_device with private data area for driver use 4500 * and performs basic initialization. Also allocates subquue structs 4501 * for each queue on the device at the end of the netdevice. 4502 */ 4503 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, 4504 void (*setup)(struct net_device *), unsigned int queue_count) 4505 { 4506 struct netdev_queue *tx; 4507 struct net_device *dev; 4508 size_t alloc_size; 4509 void *p; 4510 4511 BUG_ON(strlen(name) >= sizeof(dev->name)); 4512 4513 alloc_size = sizeof(struct net_device); 4514 if (sizeof_priv) { 4515 /* ensure 32-byte alignment of private area */ 4516 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; 4517 alloc_size += sizeof_priv; 4518 } 4519 /* ensure 32-byte alignment of whole construct */ 4520 alloc_size += NETDEV_ALIGN_CONST; 4521 4522 p = kzalloc(alloc_size, GFP_KERNEL); 4523 if (!p) { 4524 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); 4525 return NULL; 4526 } 4527 4528 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); 4529 if (!tx) { 4530 printk(KERN_ERR "alloc_netdev: Unable to allocate " 4531 "tx qdiscs.\n"); 4532 kfree(p); 4533 return NULL; 4534 } 4535 4536 dev = (struct net_device *) 4537 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); 4538 dev->padded = (char *)dev - (char *)p; 4539 dev_net_set(dev, &init_net); 4540 4541 dev->_tx = tx; 4542 dev->num_tx_queues = queue_count; 4543 dev->real_num_tx_queues = queue_count; 4544 4545 dev->gso_max_size = GSO_MAX_SIZE; 4546 4547 netdev_init_queues(dev); 4548 4549 INIT_LIST_HEAD(&dev->napi_list); 4550 setup(dev); 4551 strcpy(dev->name, name); 4552 return dev; 4553 } 4554 EXPORT_SYMBOL(alloc_netdev_mq); 4555 4556 /** 4557 * free_netdev - free network device 4558 * @dev: device 4559 * 4560 * This function does the last stage of destroying an allocated device 4561 * interface. The reference to the device object is released. 4562 * If this is the last reference then it will be freed. 4563 */ 4564 void free_netdev(struct net_device *dev) 4565 { 4566 struct napi_struct *p, *n; 4567 4568 release_net(dev_net(dev)); 4569 4570 kfree(dev->_tx); 4571 4572 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) 4573 netif_napi_del(p); 4574 4575 /* Compatibility with error handling in drivers */ 4576 if (dev->reg_state == NETREG_UNINITIALIZED) { 4577 kfree((char *)dev - dev->padded); 4578 return; 4579 } 4580 4581 BUG_ON(dev->reg_state != NETREG_UNREGISTERED); 4582 dev->reg_state = NETREG_RELEASED; 4583 4584 /* will free via device release */ 4585 put_device(&dev->dev); 4586 } 4587 4588 /** 4589 * synchronize_net - Synchronize with packet receive processing 4590 * 4591 * Wait for packets currently being received to be done. 4592 * Does not block later packets from starting. 4593 */ 4594 void synchronize_net(void) 4595 { 4596 might_sleep(); 4597 synchronize_rcu(); 4598 } 4599 4600 /** 4601 * unregister_netdevice - remove device from the kernel 4602 * @dev: device 4603 * 4604 * This function shuts down a device interface and removes it 4605 * from the kernel tables. 4606 * 4607 * Callers must hold the rtnl semaphore. You may want 4608 * unregister_netdev() instead of this. 4609 */ 4610 4611 void unregister_netdevice(struct net_device *dev) 4612 { 4613 ASSERT_RTNL(); 4614 4615 rollback_registered(dev); 4616 /* Finish processing unregister after unlock */ 4617 net_set_todo(dev); 4618 } 4619 4620 /** 4621 * unregister_netdev - remove device from the kernel 4622 * @dev: device 4623 * 4624 * This function shuts down a device interface and removes it 4625 * from the kernel tables. 4626 * 4627 * This is just a wrapper for unregister_netdevice that takes 4628 * the rtnl semaphore. In general you want to use this and not 4629 * unregister_netdevice. 4630 */ 4631 void unregister_netdev(struct net_device *dev) 4632 { 4633 rtnl_lock(); 4634 unregister_netdevice(dev); 4635 rtnl_unlock(); 4636 } 4637 4638 EXPORT_SYMBOL(unregister_netdev); 4639 4640 /** 4641 * dev_change_net_namespace - move device to different nethost namespace 4642 * @dev: device 4643 * @net: network namespace 4644 * @pat: If not NULL name pattern to try if the current device name 4645 * is already taken in the destination network namespace. 4646 * 4647 * This function shuts down a device interface and moves it 4648 * to a new network namespace. On success 0 is returned, on 4649 * a failure a netagive errno code is returned. 4650 * 4651 * Callers must hold the rtnl semaphore. 4652 */ 4653 4654 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 4655 { 4656 char buf[IFNAMSIZ]; 4657 const char *destname; 4658 int err; 4659 4660 ASSERT_RTNL(); 4661 4662 /* Don't allow namespace local devices to be moved. */ 4663 err = -EINVAL; 4664 if (dev->features & NETIF_F_NETNS_LOCAL) 4665 goto out; 4666 4667 #ifdef CONFIG_SYSFS 4668 /* Don't allow real devices to be moved when sysfs 4669 * is enabled. 4670 */ 4671 err = -EINVAL; 4672 if (dev->dev.parent) 4673 goto out; 4674 #endif 4675 4676 /* Ensure the device has been registrered */ 4677 err = -EINVAL; 4678 if (dev->reg_state != NETREG_REGISTERED) 4679 goto out; 4680 4681 /* Get out if there is nothing todo */ 4682 err = 0; 4683 if (net_eq(dev_net(dev), net)) 4684 goto out; 4685 4686 /* Pick the destination device name, and ensure 4687 * we can use it in the destination network namespace. 4688 */ 4689 err = -EEXIST; 4690 destname = dev->name; 4691 if (__dev_get_by_name(net, destname)) { 4692 /* We get here if we can't use the current device name */ 4693 if (!pat) 4694 goto out; 4695 if (!dev_valid_name(pat)) 4696 goto out; 4697 if (strchr(pat, '%')) { 4698 if (__dev_alloc_name(net, pat, buf) < 0) 4699 goto out; 4700 destname = buf; 4701 } else 4702 destname = pat; 4703 if (__dev_get_by_name(net, destname)) 4704 goto out; 4705 } 4706 4707 /* 4708 * And now a mini version of register_netdevice unregister_netdevice. 4709 */ 4710 4711 /* If device is running close it first. */ 4712 dev_close(dev); 4713 4714 /* And unlink it from device chain */ 4715 err = -ENODEV; 4716 unlist_netdevice(dev); 4717 4718 synchronize_net(); 4719 4720 /* Shutdown queueing discipline. */ 4721 dev_shutdown(dev); 4722 4723 /* Notify protocols, that we are about to destroy 4724 this device. They should clean all the things. 4725 */ 4726 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 4727 4728 /* 4729 * Flush the unicast and multicast chains 4730 */ 4731 dev_addr_discard(dev); 4732 4733 netdev_unregister_kobject(dev); 4734 4735 /* Actually switch the network namespace */ 4736 dev_net_set(dev, net); 4737 4738 /* Assign the new device name */ 4739 if (destname != dev->name) 4740 strcpy(dev->name, destname); 4741 4742 /* If there is an ifindex conflict assign a new one */ 4743 if (__dev_get_by_index(net, dev->ifindex)) { 4744 int iflink = (dev->iflink == dev->ifindex); 4745 dev->ifindex = dev_new_index(net); 4746 if (iflink) 4747 dev->iflink = dev->ifindex; 4748 } 4749 4750 /* Fixup kobjects */ 4751 err = netdev_register_kobject(dev); 4752 WARN_ON(err); 4753 4754 /* Add the device back in the hashes */ 4755 list_netdevice(dev); 4756 4757 /* Notify protocols, that a new device appeared. */ 4758 call_netdevice_notifiers(NETDEV_REGISTER, dev); 4759 4760 synchronize_net(); 4761 err = 0; 4762 out: 4763 return err; 4764 } 4765 4766 static int dev_cpu_callback(struct notifier_block *nfb, 4767 unsigned long action, 4768 void *ocpu) 4769 { 4770 struct sk_buff **list_skb; 4771 struct Qdisc **list_net; 4772 struct sk_buff *skb; 4773 unsigned int cpu, oldcpu = (unsigned long)ocpu; 4774 struct softnet_data *sd, *oldsd; 4775 4776 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) 4777 return NOTIFY_OK; 4778 4779 local_irq_disable(); 4780 cpu = smp_processor_id(); 4781 sd = &per_cpu(softnet_data, cpu); 4782 oldsd = &per_cpu(softnet_data, oldcpu); 4783 4784 /* Find end of our completion_queue. */ 4785 list_skb = &sd->completion_queue; 4786 while (*list_skb) 4787 list_skb = &(*list_skb)->next; 4788 /* Append completion queue from offline CPU. */ 4789 *list_skb = oldsd->completion_queue; 4790 oldsd->completion_queue = NULL; 4791 4792 /* Find end of our output_queue. */ 4793 list_net = &sd->output_queue; 4794 while (*list_net) 4795 list_net = &(*list_net)->next_sched; 4796 /* Append output queue from offline CPU. */ 4797 *list_net = oldsd->output_queue; 4798 oldsd->output_queue = NULL; 4799 4800 raise_softirq_irqoff(NET_TX_SOFTIRQ); 4801 local_irq_enable(); 4802 4803 /* Process offline CPU's input_pkt_queue */ 4804 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) 4805 netif_rx(skb); 4806 4807 return NOTIFY_OK; 4808 } 4809 4810 4811 /** 4812 * netdev_increment_features - increment feature set by one 4813 * @all: current feature set 4814 * @one: new feature set 4815 * @mask: mask feature set 4816 * 4817 * Computes a new feature set after adding a device with feature set 4818 * @one to the master device with current feature set @all. Will not 4819 * enable anything that is off in @mask. Returns the new feature set. 4820 */ 4821 unsigned long netdev_increment_features(unsigned long all, unsigned long one, 4822 unsigned long mask) 4823 { 4824 /* If device needs checksumming, downgrade to it. */ 4825 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) 4826 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); 4827 else if (mask & NETIF_F_ALL_CSUM) { 4828 /* If one device supports v4/v6 checksumming, set for all. */ 4829 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && 4830 !(all & NETIF_F_GEN_CSUM)) { 4831 all &= ~NETIF_F_ALL_CSUM; 4832 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); 4833 } 4834 4835 /* If one device supports hw checksumming, set for all. */ 4836 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { 4837 all &= ~NETIF_F_ALL_CSUM; 4838 all |= NETIF_F_HW_CSUM; 4839 } 4840 } 4841 4842 one |= NETIF_F_ALL_CSUM; 4843 4844 one |= all & NETIF_F_ONE_FOR_ALL; 4845 all &= one | NETIF_F_LLTX | NETIF_F_GSO; 4846 all |= one & mask & NETIF_F_ONE_FOR_ALL; 4847 4848 return all; 4849 } 4850 EXPORT_SYMBOL(netdev_increment_features); 4851 4852 static struct hlist_head *netdev_create_hash(void) 4853 { 4854 int i; 4855 struct hlist_head *hash; 4856 4857 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); 4858 if (hash != NULL) 4859 for (i = 0; i < NETDEV_HASHENTRIES; i++) 4860 INIT_HLIST_HEAD(&hash[i]); 4861 4862 return hash; 4863 } 4864 4865 /* Initialize per network namespace state */ 4866 static int __net_init netdev_init(struct net *net) 4867 { 4868 INIT_LIST_HEAD(&net->dev_base_head); 4869 4870 net->dev_name_head = netdev_create_hash(); 4871 if (net->dev_name_head == NULL) 4872 goto err_name; 4873 4874 net->dev_index_head = netdev_create_hash(); 4875 if (net->dev_index_head == NULL) 4876 goto err_idx; 4877 4878 return 0; 4879 4880 err_idx: 4881 kfree(net->dev_name_head); 4882 err_name: 4883 return -ENOMEM; 4884 } 4885 4886 /** 4887 * netdev_drivername - network driver for the device 4888 * @dev: network device 4889 * @buffer: buffer for resulting name 4890 * @len: size of buffer 4891 * 4892 * Determine network driver for device. 4893 */ 4894 char *netdev_drivername(const struct net_device *dev, char *buffer, int len) 4895 { 4896 const struct device_driver *driver; 4897 const struct device *parent; 4898 4899 if (len <= 0 || !buffer) 4900 return buffer; 4901 buffer[0] = 0; 4902 4903 parent = dev->dev.parent; 4904 4905 if (!parent) 4906 return buffer; 4907 4908 driver = parent->driver; 4909 if (driver && driver->name) 4910 strlcpy(buffer, driver->name, len); 4911 return buffer; 4912 } 4913 4914 static void __net_exit netdev_exit(struct net *net) 4915 { 4916 kfree(net->dev_name_head); 4917 kfree(net->dev_index_head); 4918 } 4919 4920 static struct pernet_operations __net_initdata netdev_net_ops = { 4921 .init = netdev_init, 4922 .exit = netdev_exit, 4923 }; 4924 4925 static void __net_exit default_device_exit(struct net *net) 4926 { 4927 struct net_device *dev; 4928 /* 4929 * Push all migratable of the network devices back to the 4930 * initial network namespace 4931 */ 4932 rtnl_lock(); 4933 restart: 4934 for_each_netdev(net, dev) { 4935 int err; 4936 char fb_name[IFNAMSIZ]; 4937 4938 /* Ignore unmoveable devices (i.e. loopback) */ 4939 if (dev->features & NETIF_F_NETNS_LOCAL) 4940 continue; 4941 4942 /* Delete virtual devices */ 4943 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { 4944 dev->rtnl_link_ops->dellink(dev); 4945 goto restart; 4946 } 4947 4948 /* Push remaing network devices to init_net */ 4949 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 4950 err = dev_change_net_namespace(dev, &init_net, fb_name); 4951 if (err) { 4952 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n", 4953 __func__, dev->name, err); 4954 BUG(); 4955 } 4956 goto restart; 4957 } 4958 rtnl_unlock(); 4959 } 4960 4961 static struct pernet_operations __net_initdata default_device_ops = { 4962 .exit = default_device_exit, 4963 }; 4964 4965 /* 4966 * Initialize the DEV module. At boot time this walks the device list and 4967 * unhooks any devices that fail to initialise (normally hardware not 4968 * present) and leaves us with a valid list of present and active devices. 4969 * 4970 */ 4971 4972 /* 4973 * This is called single threaded during boot, so no need 4974 * to take the rtnl semaphore. 4975 */ 4976 static int __init net_dev_init(void) 4977 { 4978 int i, rc = -ENOMEM; 4979 4980 BUG_ON(!dev_boot_phase); 4981 4982 if (dev_proc_init()) 4983 goto out; 4984 4985 if (netdev_kobject_init()) 4986 goto out; 4987 4988 INIT_LIST_HEAD(&ptype_all); 4989 for (i = 0; i < PTYPE_HASH_SIZE; i++) 4990 INIT_LIST_HEAD(&ptype_base[i]); 4991 4992 if (register_pernet_subsys(&netdev_net_ops)) 4993 goto out; 4994 4995 /* 4996 * Initialise the packet receive queues. 4997 */ 4998 4999 for_each_possible_cpu(i) { 5000 struct softnet_data *queue; 5001 5002 queue = &per_cpu(softnet_data, i); 5003 skb_queue_head_init(&queue->input_pkt_queue); 5004 queue->completion_queue = NULL; 5005 INIT_LIST_HEAD(&queue->poll_list); 5006 5007 queue->backlog.poll = process_backlog; 5008 queue->backlog.weight = weight_p; 5009 queue->backlog.gro_list = NULL; 5010 } 5011 5012 dev_boot_phase = 0; 5013 5014 /* The loopback device is special if any other network devices 5015 * is present in a network namespace the loopback device must 5016 * be present. Since we now dynamically allocate and free the 5017 * loopback device ensure this invariant is maintained by 5018 * keeping the loopback device as the first device on the 5019 * list of network devices. Ensuring the loopback devices 5020 * is the first device that appears and the last network device 5021 * that disappears. 5022 */ 5023 if (register_pernet_device(&loopback_net_ops)) 5024 goto out; 5025 5026 if (register_pernet_device(&default_device_ops)) 5027 goto out; 5028 5029 open_softirq(NET_TX_SOFTIRQ, net_tx_action); 5030 open_softirq(NET_RX_SOFTIRQ, net_rx_action); 5031 5032 hotcpu_notifier(dev_cpu_callback, 0); 5033 dst_init(); 5034 dev_mcast_init(); 5035 #ifdef CONFIG_NET_DMA 5036 dmaengine_get(); 5037 #endif 5038 rc = 0; 5039 out: 5040 return rc; 5041 } 5042 5043 subsys_initcall(net_dev_init); 5044 5045 EXPORT_SYMBOL(__dev_get_by_index); 5046 EXPORT_SYMBOL(__dev_get_by_name); 5047 EXPORT_SYMBOL(__dev_remove_pack); 5048 EXPORT_SYMBOL(dev_valid_name); 5049 EXPORT_SYMBOL(dev_add_pack); 5050 EXPORT_SYMBOL(dev_alloc_name); 5051 EXPORT_SYMBOL(dev_close); 5052 EXPORT_SYMBOL(dev_get_by_flags); 5053 EXPORT_SYMBOL(dev_get_by_index); 5054 EXPORT_SYMBOL(dev_get_by_name); 5055 EXPORT_SYMBOL(dev_open); 5056 EXPORT_SYMBOL(dev_queue_xmit); 5057 EXPORT_SYMBOL(dev_remove_pack); 5058 EXPORT_SYMBOL(dev_set_allmulti); 5059 EXPORT_SYMBOL(dev_set_promiscuity); 5060 EXPORT_SYMBOL(dev_change_flags); 5061 EXPORT_SYMBOL(dev_set_mtu); 5062 EXPORT_SYMBOL(dev_set_mac_address); 5063 EXPORT_SYMBOL(free_netdev); 5064 EXPORT_SYMBOL(netdev_boot_setup_check); 5065 EXPORT_SYMBOL(netdev_set_master); 5066 EXPORT_SYMBOL(netdev_state_change); 5067 EXPORT_SYMBOL(netif_receive_skb); 5068 EXPORT_SYMBOL(netif_rx); 5069 EXPORT_SYMBOL(register_gifconf); 5070 EXPORT_SYMBOL(register_netdevice); 5071 EXPORT_SYMBOL(register_netdevice_notifier); 5072 EXPORT_SYMBOL(skb_checksum_help); 5073 EXPORT_SYMBOL(synchronize_net); 5074 EXPORT_SYMBOL(unregister_netdevice); 5075 EXPORT_SYMBOL(unregister_netdevice_notifier); 5076 EXPORT_SYMBOL(net_enable_timestamp); 5077 EXPORT_SYMBOL(net_disable_timestamp); 5078 EXPORT_SYMBOL(dev_get_flags); 5079 5080 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 5081 EXPORT_SYMBOL(br_handle_frame_hook); 5082 EXPORT_SYMBOL(br_fdb_get_hook); 5083 EXPORT_SYMBOL(br_fdb_put_hook); 5084 #endif 5085 5086 EXPORT_SYMBOL(dev_load); 5087 5088 EXPORT_PER_CPU_SYMBOL(softnet_data); 5089