168b8534bSLuigi Rizzo /* 2849bec0eSLuigi Rizzo * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved. 368b8534bSLuigi Rizzo * 468b8534bSLuigi Rizzo * Redistribution and use in source and binary forms, with or without 568b8534bSLuigi Rizzo * modification, are permitted provided that the following conditions 668b8534bSLuigi Rizzo * are met: 768b8534bSLuigi Rizzo * 1. Redistributions of source code must retain the above copyright 868b8534bSLuigi Rizzo * notice, this list of conditions and the following disclaimer. 968b8534bSLuigi Rizzo * 2. Redistributions in binary form must reproduce the above copyright 1068b8534bSLuigi Rizzo * notice, this list of conditions and the following disclaimer in the 1168b8534bSLuigi Rizzo * documentation and/or other materials provided with the distribution. 1268b8534bSLuigi Rizzo * 1368b8534bSLuigi Rizzo * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1468b8534bSLuigi Rizzo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1568b8534bSLuigi Rizzo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1668b8534bSLuigi Rizzo * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1768b8534bSLuigi Rizzo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1868b8534bSLuigi Rizzo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 1968b8534bSLuigi Rizzo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2068b8534bSLuigi Rizzo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2168b8534bSLuigi Rizzo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2268b8534bSLuigi Rizzo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2368b8534bSLuigi Rizzo * SUCH DAMAGE. 2468b8534bSLuigi Rizzo */ 2568b8534bSLuigi Rizzo 26ce3ee1e7SLuigi Rizzo 2768b8534bSLuigi Rizzo /* 2868b8534bSLuigi Rizzo * This module supports memory mapped access to network devices, 2968b8534bSLuigi Rizzo * see netmap(4). 3068b8534bSLuigi Rizzo * 3168b8534bSLuigi Rizzo * The module uses a large, memory pool allocated by the kernel 3268b8534bSLuigi Rizzo * and accessible as mmapped memory by multiple userspace threads/processes. 3368b8534bSLuigi Rizzo * The memory pool contains packet buffers and "netmap rings", 3468b8534bSLuigi Rizzo * i.e. user-accessible copies of the interface's queues. 3568b8534bSLuigi Rizzo * 3668b8534bSLuigi Rizzo * Access to the network card works like this: 3768b8534bSLuigi Rizzo * 1. a process/thread issues one or more open() on /dev/netmap, to create 3868b8534bSLuigi Rizzo * select()able file descriptor on which events are reported. 3968b8534bSLuigi Rizzo * 2. on each descriptor, the process issues an ioctl() to identify 4068b8534bSLuigi Rizzo * the interface that should report events to the file descriptor. 4168b8534bSLuigi Rizzo * 3. on each descriptor, the process issues an mmap() request to 4268b8534bSLuigi Rizzo * map the shared memory region within the process' address space. 4368b8534bSLuigi Rizzo * The list of interesting queues is indicated by a location in 4468b8534bSLuigi Rizzo * the shared memory region. 4568b8534bSLuigi Rizzo * 4. using the functions in the netmap(4) userspace API, a process 4668b8534bSLuigi Rizzo * can look up the occupation state of a queue, access memory buffers, 4768b8534bSLuigi Rizzo * and retrieve received packets or enqueue packets to transmit. 4868b8534bSLuigi Rizzo * 5. using some ioctl()s the process can synchronize the userspace view 4968b8534bSLuigi Rizzo * of the queue with the actual status in the kernel. This includes both 5068b8534bSLuigi Rizzo * receiving the notification of new packets, and transmitting new 5168b8534bSLuigi Rizzo * packets on the output interface. 5268b8534bSLuigi Rizzo * 6. select() or poll() can be used to wait for events on individual 5368b8534bSLuigi Rizzo * transmit or receive queues (or all queues for a given interface). 54ce3ee1e7SLuigi Rizzo * 55ce3ee1e7SLuigi Rizzo 56ce3ee1e7SLuigi Rizzo SYNCHRONIZATION (USER) 57ce3ee1e7SLuigi Rizzo 58ce3ee1e7SLuigi Rizzo The netmap rings and data structures may be shared among multiple 59ce3ee1e7SLuigi Rizzo user threads or even independent processes. 60ce3ee1e7SLuigi Rizzo Any synchronization among those threads/processes is delegated 61ce3ee1e7SLuigi Rizzo to the threads themselves. Only one thread at a time can be in 62ce3ee1e7SLuigi Rizzo a system call on the same netmap ring. The OS does not enforce 63ce3ee1e7SLuigi Rizzo this and only guarantees against system crashes in case of 64ce3ee1e7SLuigi Rizzo invalid usage. 65ce3ee1e7SLuigi Rizzo 66ce3ee1e7SLuigi Rizzo LOCKING (INTERNAL) 67ce3ee1e7SLuigi Rizzo 68ce3ee1e7SLuigi Rizzo Within the kernel, access to the netmap rings is protected as follows: 69ce3ee1e7SLuigi Rizzo 70ce3ee1e7SLuigi Rizzo - a spinlock on each ring, to handle producer/consumer races on 71ce3ee1e7SLuigi Rizzo RX rings attached to the host stack (against multiple host 72ce3ee1e7SLuigi Rizzo threads writing from the host stack to the same ring), 73ce3ee1e7SLuigi Rizzo and on 'destination' rings attached to a VALE switch 74ce3ee1e7SLuigi Rizzo (i.e. RX rings in VALE ports, and TX rings in NIC/host ports) 75ce3ee1e7SLuigi Rizzo protecting multiple active senders for the same destination) 76ce3ee1e7SLuigi Rizzo 77ce3ee1e7SLuigi Rizzo - an atomic variable to guarantee that there is at most one 78ce3ee1e7SLuigi Rizzo instance of *_*xsync() on the ring at any time. 79ce3ee1e7SLuigi Rizzo For rings connected to user file 80ce3ee1e7SLuigi Rizzo descriptors, an atomic_test_and_set() protects this, and the 81ce3ee1e7SLuigi Rizzo lock on the ring is not actually used. 82ce3ee1e7SLuigi Rizzo For NIC RX rings connected to a VALE switch, an atomic_test_and_set() 83ce3ee1e7SLuigi Rizzo is also used to prevent multiple executions (the driver might indeed 84ce3ee1e7SLuigi Rizzo already guarantee this). 85ce3ee1e7SLuigi Rizzo For NIC TX rings connected to a VALE switch, the lock arbitrates 86ce3ee1e7SLuigi Rizzo access to the queue (both when allocating buffers and when pushing 87ce3ee1e7SLuigi Rizzo them out). 88ce3ee1e7SLuigi Rizzo 89ce3ee1e7SLuigi Rizzo - *xsync() should be protected against initializations of the card. 90ce3ee1e7SLuigi Rizzo On FreeBSD most devices have the reset routine protected by 91ce3ee1e7SLuigi Rizzo a RING lock (ixgbe, igb, em) or core lock (re). lem is missing 92ce3ee1e7SLuigi Rizzo the RING protection on rx_reset(), this should be added. 93ce3ee1e7SLuigi Rizzo 94ce3ee1e7SLuigi Rizzo On linux there is an external lock on the tx path, which probably 95ce3ee1e7SLuigi Rizzo also arbitrates access to the reset routine. XXX to be revised 96ce3ee1e7SLuigi Rizzo 97ce3ee1e7SLuigi Rizzo - a per-interface core_lock protecting access from the host stack 98ce3ee1e7SLuigi Rizzo while interfaces may be detached from netmap mode. 99ce3ee1e7SLuigi Rizzo XXX there should be no need for this lock if we detach the interfaces 100ce3ee1e7SLuigi Rizzo only while they are down. 101ce3ee1e7SLuigi Rizzo 102ce3ee1e7SLuigi Rizzo 103ce3ee1e7SLuigi Rizzo --- VALE SWITCH --- 104ce3ee1e7SLuigi Rizzo 105ce3ee1e7SLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports. 106ce3ee1e7SLuigi Rizzo A switch cannot be deleted until all ports are gone. 107ce3ee1e7SLuigi Rizzo 108ce3ee1e7SLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects 109ce3ee1e7SLuigi Rizzo deletion of ports. When configuring or deleting a new port, the 110ce3ee1e7SLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK). 111ce3ee1e7SLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 112ce3ee1e7SLuigi Rizzo The lock is held throughout the entire forwarding cycle, 113ce3ee1e7SLuigi Rizzo during which the thread may incur in a page fault. 114ce3ee1e7SLuigi Rizzo Hence it is important that sleepable shared locks are used. 115ce3ee1e7SLuigi Rizzo 116ce3ee1e7SLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve 117ce3ee1e7SLuigi Rizzo a number of slot in the ring, then the lock is released, 118ce3ee1e7SLuigi Rizzo packets are copied from source to destination, and then 119ce3ee1e7SLuigi Rizzo the lock is acquired again and the receive ring is updated. 120ce3ee1e7SLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack 121ce3ee1e7SLuigi Rizzo ports attached to the switch) 122ce3ee1e7SLuigi Rizzo 12368b8534bSLuigi Rizzo */ 12468b8534bSLuigi Rizzo 125ce3ee1e7SLuigi Rizzo /* 126ce3ee1e7SLuigi Rizzo * OS-specific code that is used only within this file. 127ce3ee1e7SLuigi Rizzo * Other OS-specific code that must be accessed by drivers 128ce3ee1e7SLuigi Rizzo * is present in netmap_kern.h 129ce3ee1e7SLuigi Rizzo */ 13001c7d25fSLuigi Rizzo 131ce3ee1e7SLuigi Rizzo #if defined(__FreeBSD__) 13268b8534bSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */ 13368b8534bSLuigi Rizzo __FBSDID("$FreeBSD$"); 13468b8534bSLuigi Rizzo 13568b8534bSLuigi Rizzo #include <sys/types.h> 13668b8534bSLuigi Rizzo #include <sys/module.h> 13768b8534bSLuigi Rizzo #include <sys/errno.h> 13868b8534bSLuigi Rizzo #include <sys/param.h> /* defines used in kernel.h */ 139506cc70cSLuigi Rizzo #include <sys/jail.h> 14068b8534bSLuigi Rizzo #include <sys/kernel.h> /* types used in module initialization */ 14168b8534bSLuigi Rizzo #include <sys/conf.h> /* cdevsw struct */ 14268b8534bSLuigi Rizzo #include <sys/uio.h> /* uio struct */ 14368b8534bSLuigi Rizzo #include <sys/sockio.h> 14468b8534bSLuigi Rizzo #include <sys/socketvar.h> /* struct socket */ 14568b8534bSLuigi Rizzo #include <sys/malloc.h> 14668b8534bSLuigi Rizzo #include <sys/mman.h> /* PROT_EXEC */ 14768b8534bSLuigi Rizzo #include <sys/poll.h> 148506cc70cSLuigi Rizzo #include <sys/proc.h> 14989f6b863SAttilio Rao #include <sys/rwlock.h> 15068b8534bSLuigi Rizzo #include <vm/vm.h> /* vtophys */ 15168b8534bSLuigi Rizzo #include <vm/pmap.h> /* vtophys */ 152ce3ee1e7SLuigi Rizzo #include <vm/vm_param.h> 153ce3ee1e7SLuigi Rizzo #include <vm/vm_object.h> 154ce3ee1e7SLuigi Rizzo #include <vm/vm_page.h> 155ce3ee1e7SLuigi Rizzo #include <vm/vm_pager.h> 156ce3ee1e7SLuigi Rizzo #include <vm/uma.h> 15768b8534bSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */ 15868b8534bSLuigi Rizzo #include <sys/selinfo.h> 15968b8534bSLuigi Rizzo #include <sys/sysctl.h> 16068b8534bSLuigi Rizzo #include <net/if.h> 16176039bc8SGleb Smirnoff #include <net/if_var.h> 16268b8534bSLuigi Rizzo #include <net/bpf.h> /* BIOCIMMEDIATE */ 163506cc70cSLuigi Rizzo #include <net/vnet.h> 16468b8534bSLuigi Rizzo #include <machine/bus.h> /* bus_dmamap_* */ 165ce3ee1e7SLuigi Rizzo #include <sys/endian.h> 166ce3ee1e7SLuigi Rizzo #include <sys/refcount.h> 16768b8534bSLuigi Rizzo 168ce3ee1e7SLuigi Rizzo #define prefetch(x) __builtin_prefetch(x) 16968b8534bSLuigi Rizzo 170ce3ee1e7SLuigi Rizzo #define BDG_RWLOCK_T struct rwlock // struct rwlock 171ce3ee1e7SLuigi Rizzo 172ce3ee1e7SLuigi Rizzo #define BDG_RWINIT(b) \ 173ce3ee1e7SLuigi Rizzo rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 174ce3ee1e7SLuigi Rizzo #define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 175ce3ee1e7SLuigi Rizzo #define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 176ce3ee1e7SLuigi Rizzo #define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 177ce3ee1e7SLuigi Rizzo #define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 178ce3ee1e7SLuigi Rizzo #define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 179ce3ee1e7SLuigi Rizzo #define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 180ce3ee1e7SLuigi Rizzo 181ce3ee1e7SLuigi Rizzo 182ce3ee1e7SLuigi Rizzo /* netmap global lock. 183ce3ee1e7SLuigi Rizzo * normally called within the user thread (upon a system call) 184ce3ee1e7SLuigi Rizzo * or when a file descriptor or process is terminated 185ce3ee1e7SLuigi Rizzo * (last close or last munmap) 186ce3ee1e7SLuigi Rizzo */ 187ce3ee1e7SLuigi Rizzo 188ce3ee1e7SLuigi Rizzo #define NMG_LOCK_T struct mtx 189ce3ee1e7SLuigi Rizzo #define NMG_LOCK_INIT() mtx_init(&netmap_global_lock, "netmap global lock", NULL, MTX_DEF) 190ce3ee1e7SLuigi Rizzo #define NMG_LOCK_DESTROY() mtx_destroy(&netmap_global_lock) 191ce3ee1e7SLuigi Rizzo #define NMG_LOCK() mtx_lock(&netmap_global_lock) 192ce3ee1e7SLuigi Rizzo #define NMG_UNLOCK() mtx_unlock(&netmap_global_lock) 193ce3ee1e7SLuigi Rizzo #define NMG_LOCK_ASSERT() mtx_assert(&netmap_global_lock, MA_OWNED) 194ce3ee1e7SLuigi Rizzo 195ce3ee1e7SLuigi Rizzo 196ce3ee1e7SLuigi Rizzo /* atomic operations */ 197ce3ee1e7SLuigi Rizzo #include <machine/atomic.h> 198ce3ee1e7SLuigi Rizzo #define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1)) 199ce3ee1e7SLuigi Rizzo #define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0) 200ce3ee1e7SLuigi Rizzo 201ce3ee1e7SLuigi Rizzo 202ce3ee1e7SLuigi Rizzo #elif defined(linux) 203ce3ee1e7SLuigi Rizzo 204ce3ee1e7SLuigi Rizzo #include "bsd_glue.h" 205ce3ee1e7SLuigi Rizzo 206ce3ee1e7SLuigi Rizzo static netdev_tx_t linux_netmap_start_xmit(struct sk_buff *, struct net_device *); 207ce3ee1e7SLuigi Rizzo 208ce3ee1e7SLuigi Rizzo static struct device_driver* 209ce3ee1e7SLuigi Rizzo linux_netmap_find_driver(struct device *dev) 210ce3ee1e7SLuigi Rizzo { 211ce3ee1e7SLuigi Rizzo struct device_driver *dd; 212ce3ee1e7SLuigi Rizzo 213ce3ee1e7SLuigi Rizzo while ( (dd = dev->driver) == NULL ) { 214ce3ee1e7SLuigi Rizzo if ( (dev = dev->parent) == NULL ) 215ce3ee1e7SLuigi Rizzo return NULL; 216ce3ee1e7SLuigi Rizzo } 217ce3ee1e7SLuigi Rizzo return dd; 218ce3ee1e7SLuigi Rizzo } 219ce3ee1e7SLuigi Rizzo 220ce3ee1e7SLuigi Rizzo static struct net_device* 221ce3ee1e7SLuigi Rizzo ifunit_ref(const char *name) 222ce3ee1e7SLuigi Rizzo { 223ce3ee1e7SLuigi Rizzo struct net_device *ifp = dev_get_by_name(&init_net, name); 224ce3ee1e7SLuigi Rizzo struct device_driver *dd; 225ce3ee1e7SLuigi Rizzo 226ce3ee1e7SLuigi Rizzo if (ifp == NULL) 227ce3ee1e7SLuigi Rizzo return NULL; 228ce3ee1e7SLuigi Rizzo 229ce3ee1e7SLuigi Rizzo if ( (dd = linux_netmap_find_driver(&ifp->dev)) == NULL ) 230ce3ee1e7SLuigi Rizzo goto error; 231ce3ee1e7SLuigi Rizzo 232ce3ee1e7SLuigi Rizzo if (!try_module_get(dd->owner)) 233ce3ee1e7SLuigi Rizzo goto error; 234ce3ee1e7SLuigi Rizzo 235ce3ee1e7SLuigi Rizzo return ifp; 236ce3ee1e7SLuigi Rizzo error: 237ce3ee1e7SLuigi Rizzo dev_put(ifp); 238ce3ee1e7SLuigi Rizzo return NULL; 239ce3ee1e7SLuigi Rizzo } 240ce3ee1e7SLuigi Rizzo 241ce3ee1e7SLuigi Rizzo static void 242ce3ee1e7SLuigi Rizzo if_rele(struct net_device *ifp) 243ce3ee1e7SLuigi Rizzo { 244ce3ee1e7SLuigi Rizzo struct device_driver *dd; 245ce3ee1e7SLuigi Rizzo dd = linux_netmap_find_driver(&ifp->dev); 246ce3ee1e7SLuigi Rizzo dev_put(ifp); 247ce3ee1e7SLuigi Rizzo if (dd) 248ce3ee1e7SLuigi Rizzo module_put(dd->owner); 249ce3ee1e7SLuigi Rizzo } 250ce3ee1e7SLuigi Rizzo 251ce3ee1e7SLuigi Rizzo // XXX a mtx would suffice here too 20130404 gl 252ce3ee1e7SLuigi Rizzo #define NMG_LOCK_T struct semaphore 253ce3ee1e7SLuigi Rizzo #define NMG_LOCK_INIT() sema_init(&netmap_global_lock, 1) 254ce3ee1e7SLuigi Rizzo #define NMG_LOCK_DESTROY() 255ce3ee1e7SLuigi Rizzo #define NMG_LOCK() down(&netmap_global_lock) 256ce3ee1e7SLuigi Rizzo #define NMG_UNLOCK() up(&netmap_global_lock) 257ce3ee1e7SLuigi Rizzo #define NMG_LOCK_ASSERT() // XXX to be completed 258ce3ee1e7SLuigi Rizzo 259ce3ee1e7SLuigi Rizzo 260ce3ee1e7SLuigi Rizzo #elif defined(__APPLE__) 261ce3ee1e7SLuigi Rizzo 262ce3ee1e7SLuigi Rizzo #warning OSX support is only partial 263ce3ee1e7SLuigi Rizzo #include "osx_glue.h" 264ce3ee1e7SLuigi Rizzo 265ce3ee1e7SLuigi Rizzo #else 266ce3ee1e7SLuigi Rizzo 267ce3ee1e7SLuigi Rizzo #error Unsupported platform 268ce3ee1e7SLuigi Rizzo 269ce3ee1e7SLuigi Rizzo #endif /* unsupported */ 270ce3ee1e7SLuigi Rizzo 271ce3ee1e7SLuigi Rizzo /* 272ce3ee1e7SLuigi Rizzo * common headers 273ce3ee1e7SLuigi Rizzo */ 2740b8ed8e0SLuigi Rizzo #include <net/netmap.h> 2750b8ed8e0SLuigi Rizzo #include <dev/netmap/netmap_kern.h> 276ce3ee1e7SLuigi Rizzo #include <dev/netmap/netmap_mem2.h> 2770b8ed8e0SLuigi Rizzo 278ce3ee1e7SLuigi Rizzo 279ce3ee1e7SLuigi Rizzo MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map"); 280ce3ee1e7SLuigi Rizzo 281ce3ee1e7SLuigi Rizzo /* 282ce3ee1e7SLuigi Rizzo * The following variables are used by the drivers and replicate 283ce3ee1e7SLuigi Rizzo * fields in the global memory pool. They only refer to buffers 284ce3ee1e7SLuigi Rizzo * used by physical interfaces. 285ce3ee1e7SLuigi Rizzo */ 2865819da83SLuigi Rizzo u_int netmap_total_buffers; 2878241616dSLuigi Rizzo u_int netmap_buf_size; 288ce3ee1e7SLuigi Rizzo char *netmap_buffer_base; /* also address of an invalid buffer */ 2895819da83SLuigi Rizzo 2905819da83SLuigi Rizzo /* user-controlled variables */ 2915819da83SLuigi Rizzo int netmap_verbose; 2925819da83SLuigi Rizzo 2935819da83SLuigi Rizzo static int netmap_no_timestamp; /* don't timestamp on rxsync */ 2945819da83SLuigi Rizzo 2955819da83SLuigi Rizzo SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args"); 2965819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, verbose, 2975819da83SLuigi Rizzo CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode"); 2985819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp, 2995819da83SLuigi Rizzo CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp"); 3005819da83SLuigi Rizzo int netmap_mitigate = 1; 3015819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, ""); 302c85cb1a0SLuigi Rizzo int netmap_no_pendintr = 1; 3035819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, 3045819da83SLuigi Rizzo CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets."); 305f18be576SLuigi Rizzo int netmap_txsync_retry = 2; 306f18be576SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW, 307f18be576SLuigi Rizzo &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush."); 3085819da83SLuigi Rizzo 309f196ce38SLuigi Rizzo int netmap_drop = 0; /* debugging */ 310f196ce38SLuigi Rizzo int netmap_flags = 0; /* debug flags */ 311091fd0abSLuigi Rizzo int netmap_fwd = 0; /* force transparent mode */ 312ce3ee1e7SLuigi Rizzo int netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */ 313f196ce38SLuigi Rizzo 314f196ce38SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , ""); 315f196ce38SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); 316091fd0abSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); 317ce3ee1e7SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, ""); 318f196ce38SLuigi Rizzo 319ce3ee1e7SLuigi Rizzo NMG_LOCK_T netmap_global_lock; 320ce3ee1e7SLuigi Rizzo 321ce3ee1e7SLuigi Rizzo /* 322ce3ee1e7SLuigi Rizzo * protect against multiple threads using the same ring. 323ce3ee1e7SLuigi Rizzo * also check that the ring has not been stopped. 324ce3ee1e7SLuigi Rizzo */ 325ce3ee1e7SLuigi Rizzo #define NM_KR_BUSY 1 326ce3ee1e7SLuigi Rizzo #define NM_KR_STOPPED 2 327ce3ee1e7SLuigi Rizzo static void nm_kr_put(struct netmap_kring *kr); 328ce3ee1e7SLuigi Rizzo static __inline int nm_kr_tryget(struct netmap_kring *kr) 329ce3ee1e7SLuigi Rizzo { 330ce3ee1e7SLuigi Rizzo /* check a first time without taking the lock 331ce3ee1e7SLuigi Rizzo * to avoid starvation for nm_kr_get() 332ce3ee1e7SLuigi Rizzo */ 333ce3ee1e7SLuigi Rizzo if (unlikely(kr->nkr_stopped)) { 334ce3ee1e7SLuigi Rizzo ND("ring %p stopped (%d)", kr, kr->nkr_stopped); 335ce3ee1e7SLuigi Rizzo return NM_KR_STOPPED; 336ce3ee1e7SLuigi Rizzo } 337ce3ee1e7SLuigi Rizzo if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))) 338ce3ee1e7SLuigi Rizzo return NM_KR_BUSY; 339ce3ee1e7SLuigi Rizzo /* check a second time with lock held */ 340ce3ee1e7SLuigi Rizzo if (unlikely(kr->nkr_stopped)) { 341ce3ee1e7SLuigi Rizzo ND("ring %p stopped (%d)", kr, kr->nkr_stopped); 342ce3ee1e7SLuigi Rizzo nm_kr_put(kr); 343ce3ee1e7SLuigi Rizzo return NM_KR_STOPPED; 344ce3ee1e7SLuigi Rizzo } 345ce3ee1e7SLuigi Rizzo return 0; 346ce3ee1e7SLuigi Rizzo } 347ce3ee1e7SLuigi Rizzo 348ce3ee1e7SLuigi Rizzo static __inline void nm_kr_put(struct netmap_kring *kr) 349ce3ee1e7SLuigi Rizzo { 350ce3ee1e7SLuigi Rizzo NM_ATOMIC_CLEAR(&kr->nr_busy); 351ce3ee1e7SLuigi Rizzo } 352ce3ee1e7SLuigi Rizzo 353ce3ee1e7SLuigi Rizzo static void nm_kr_get(struct netmap_kring *kr) 354ce3ee1e7SLuigi Rizzo { 355ce3ee1e7SLuigi Rizzo while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)) 356ce3ee1e7SLuigi Rizzo tsleep(kr, 0, "NM_KR_GET", 4); 357ce3ee1e7SLuigi Rizzo } 358ce3ee1e7SLuigi Rizzo 359ce3ee1e7SLuigi Rizzo static void nm_disable_ring(struct netmap_kring *kr) 360ce3ee1e7SLuigi Rizzo { 361ce3ee1e7SLuigi Rizzo kr->nkr_stopped = 1; 362ce3ee1e7SLuigi Rizzo nm_kr_get(kr); 363ce3ee1e7SLuigi Rizzo mtx_lock(&kr->q_lock); 364ce3ee1e7SLuigi Rizzo mtx_unlock(&kr->q_lock); 365ce3ee1e7SLuigi Rizzo nm_kr_put(kr); 366ce3ee1e7SLuigi Rizzo } 367ce3ee1e7SLuigi Rizzo 368ce3ee1e7SLuigi Rizzo void netmap_disable_all_rings(struct ifnet *ifp) 369ce3ee1e7SLuigi Rizzo { 370ce3ee1e7SLuigi Rizzo struct netmap_adapter *na; 371ce3ee1e7SLuigi Rizzo int i; 372ce3ee1e7SLuigi Rizzo 373ce3ee1e7SLuigi Rizzo if (!(ifp->if_capenable & IFCAP_NETMAP)) 374ce3ee1e7SLuigi Rizzo return; 375ce3ee1e7SLuigi Rizzo 376ce3ee1e7SLuigi Rizzo na = NA(ifp); 377ce3ee1e7SLuigi Rizzo 378ce3ee1e7SLuigi Rizzo for (i = 0; i < na->num_tx_rings + 1; i++) { 379ce3ee1e7SLuigi Rizzo nm_disable_ring(na->tx_rings + i); 380ce3ee1e7SLuigi Rizzo selwakeuppri(&na->tx_rings[i].si, PI_NET); 381ce3ee1e7SLuigi Rizzo } 382ce3ee1e7SLuigi Rizzo for (i = 0; i < na->num_rx_rings + 1; i++) { 383ce3ee1e7SLuigi Rizzo nm_disable_ring(na->rx_rings + i); 384ce3ee1e7SLuigi Rizzo selwakeuppri(&na->rx_rings[i].si, PI_NET); 385ce3ee1e7SLuigi Rizzo } 386ce3ee1e7SLuigi Rizzo selwakeuppri(&na->tx_si, PI_NET); 387ce3ee1e7SLuigi Rizzo selwakeuppri(&na->rx_si, PI_NET); 388ce3ee1e7SLuigi Rizzo } 389ce3ee1e7SLuigi Rizzo 390ce3ee1e7SLuigi Rizzo void netmap_enable_all_rings(struct ifnet *ifp) 391ce3ee1e7SLuigi Rizzo { 392ce3ee1e7SLuigi Rizzo struct netmap_adapter *na; 393ce3ee1e7SLuigi Rizzo int i; 394ce3ee1e7SLuigi Rizzo 395ce3ee1e7SLuigi Rizzo if (!(ifp->if_capenable & IFCAP_NETMAP)) 396ce3ee1e7SLuigi Rizzo return; 397ce3ee1e7SLuigi Rizzo 398ce3ee1e7SLuigi Rizzo na = NA(ifp); 399ce3ee1e7SLuigi Rizzo for (i = 0; i < na->num_tx_rings + 1; i++) { 400ce3ee1e7SLuigi Rizzo D("enabling %p", na->tx_rings + i); 401ce3ee1e7SLuigi Rizzo na->tx_rings[i].nkr_stopped = 0; 402ce3ee1e7SLuigi Rizzo } 403ce3ee1e7SLuigi Rizzo for (i = 0; i < na->num_rx_rings + 1; i++) { 404ce3ee1e7SLuigi Rizzo D("enabling %p", na->rx_rings + i); 405ce3ee1e7SLuigi Rizzo na->rx_rings[i].nkr_stopped = 0; 406ce3ee1e7SLuigi Rizzo } 407ce3ee1e7SLuigi Rizzo } 408ce3ee1e7SLuigi Rizzo 409ce3ee1e7SLuigi Rizzo 410ce3ee1e7SLuigi Rizzo /* 411ce3ee1e7SLuigi Rizzo * generic bound_checking function 412ce3ee1e7SLuigi Rizzo */ 413ce3ee1e7SLuigi Rizzo u_int 414ce3ee1e7SLuigi Rizzo nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg) 415ce3ee1e7SLuigi Rizzo { 416ce3ee1e7SLuigi Rizzo u_int oldv = *v; 417ce3ee1e7SLuigi Rizzo const char *op = NULL; 418ce3ee1e7SLuigi Rizzo 419ce3ee1e7SLuigi Rizzo if (dflt < lo) 420ce3ee1e7SLuigi Rizzo dflt = lo; 421ce3ee1e7SLuigi Rizzo if (dflt > hi) 422ce3ee1e7SLuigi Rizzo dflt = hi; 423ce3ee1e7SLuigi Rizzo if (oldv < lo) { 424ce3ee1e7SLuigi Rizzo *v = dflt; 425ce3ee1e7SLuigi Rizzo op = "Bump"; 426ce3ee1e7SLuigi Rizzo } else if (oldv > hi) { 427ce3ee1e7SLuigi Rizzo *v = hi; 428ce3ee1e7SLuigi Rizzo op = "Clamp"; 429ce3ee1e7SLuigi Rizzo } 430ce3ee1e7SLuigi Rizzo if (op && msg) 431ce3ee1e7SLuigi Rizzo printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); 432ce3ee1e7SLuigi Rizzo return *v; 433ce3ee1e7SLuigi Rizzo } 434ce3ee1e7SLuigi Rizzo 435ce3ee1e7SLuigi Rizzo /* 436ce3ee1e7SLuigi Rizzo * packet-dump function, user-supplied or static buffer. 437ce3ee1e7SLuigi Rizzo * The destination buffer must be at least 30+4*len 438ce3ee1e7SLuigi Rizzo */ 439ce3ee1e7SLuigi Rizzo const char * 440ce3ee1e7SLuigi Rizzo nm_dump_buf(char *p, int len, int lim, char *dst) 441ce3ee1e7SLuigi Rizzo { 442ce3ee1e7SLuigi Rizzo static char _dst[8192]; 443ce3ee1e7SLuigi Rizzo int i, j, i0; 444ce3ee1e7SLuigi Rizzo static char hex[] ="0123456789abcdef"; 445ce3ee1e7SLuigi Rizzo char *o; /* output position */ 446ce3ee1e7SLuigi Rizzo 447ce3ee1e7SLuigi Rizzo #define P_HI(x) hex[((x) & 0xf0)>>4] 448ce3ee1e7SLuigi Rizzo #define P_LO(x) hex[((x) & 0xf)] 449ce3ee1e7SLuigi Rizzo #define P_C(x) ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.') 450ce3ee1e7SLuigi Rizzo if (!dst) 451ce3ee1e7SLuigi Rizzo dst = _dst; 452ce3ee1e7SLuigi Rizzo if (lim <= 0 || lim > len) 453ce3ee1e7SLuigi Rizzo lim = len; 454ce3ee1e7SLuigi Rizzo o = dst; 455ce3ee1e7SLuigi Rizzo sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim); 456ce3ee1e7SLuigi Rizzo o += strlen(o); 457ce3ee1e7SLuigi Rizzo /* hexdump routine */ 458ce3ee1e7SLuigi Rizzo for (i = 0; i < lim; ) { 459ce3ee1e7SLuigi Rizzo sprintf(o, "%5d: ", i); 460ce3ee1e7SLuigi Rizzo o += strlen(o); 461ce3ee1e7SLuigi Rizzo memset(o, ' ', 48); 462ce3ee1e7SLuigi Rizzo i0 = i; 463ce3ee1e7SLuigi Rizzo for (j=0; j < 16 && i < lim; i++, j++) { 464ce3ee1e7SLuigi Rizzo o[j*3] = P_HI(p[i]); 465ce3ee1e7SLuigi Rizzo o[j*3+1] = P_LO(p[i]); 466ce3ee1e7SLuigi Rizzo } 467ce3ee1e7SLuigi Rizzo i = i0; 468ce3ee1e7SLuigi Rizzo for (j=0; j < 16 && i < lim; i++, j++) 469ce3ee1e7SLuigi Rizzo o[j + 48] = P_C(p[i]); 470ce3ee1e7SLuigi Rizzo o[j+48] = '\n'; 471ce3ee1e7SLuigi Rizzo o += j+49; 472ce3ee1e7SLuigi Rizzo } 473ce3ee1e7SLuigi Rizzo *o = '\0'; 474ce3ee1e7SLuigi Rizzo #undef P_HI 475ce3ee1e7SLuigi Rizzo #undef P_LO 476ce3ee1e7SLuigi Rizzo #undef P_C 477ce3ee1e7SLuigi Rizzo return dst; 478ce3ee1e7SLuigi Rizzo } 479f196ce38SLuigi Rizzo 480f196ce38SLuigi Rizzo /* 481f18be576SLuigi Rizzo * system parameters (most of them in netmap_kern.h) 482f18be576SLuigi Rizzo * NM_NAME prefix for switch port names, default "vale" 483ce3ee1e7SLuigi Rizzo * NM_BDG_MAXPORTS number of ports 484f18be576SLuigi Rizzo * NM_BRIDGES max number of switches in the system. 485f18be576SLuigi Rizzo * XXX should become a sysctl or tunable 486f196ce38SLuigi Rizzo * 487f18be576SLuigi Rizzo * Switch ports are named valeX:Y where X is the switch name and Y 488f18be576SLuigi Rizzo * is the port. If Y matches a physical interface name, the port is 489f18be576SLuigi Rizzo * connected to a physical device. 490f18be576SLuigi Rizzo * 491f18be576SLuigi Rizzo * Unlike physical interfaces, switch ports use their own memory region 492f18be576SLuigi Rizzo * for rings and buffers. 493f196ce38SLuigi Rizzo * The virtual interfaces use per-queue lock instead of core lock. 494f196ce38SLuigi Rizzo * In the tx loop, we aggregate traffic in batches to make all operations 495ce3ee1e7SLuigi Rizzo * faster. The batch size is bridge_batch. 496f196ce38SLuigi Rizzo */ 497f18be576SLuigi Rizzo #define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 498ce3ee1e7SLuigi Rizzo #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 499f196ce38SLuigi Rizzo #define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 500f196ce38SLuigi Rizzo #define NM_BDG_HASH 1024 /* forwarding table entries */ 501f196ce38SLuigi Rizzo #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 502ce3ee1e7SLuigi Rizzo #define NM_MULTISEG 64 /* max size of a chain of bufs */ 503ce3ee1e7SLuigi Rizzo /* actual size of the tables */ 504ce3ee1e7SLuigi Rizzo #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 505ce3ee1e7SLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */ 506ce3ee1e7SLuigi Rizzo #define NM_FT_NULL NM_BDG_BATCH_MAX 507f18be576SLuigi Rizzo #define NM_BRIDGES 8 /* number of bridges */ 508d4b42e08SLuigi Rizzo 509d4b42e08SLuigi Rizzo 510ce3ee1e7SLuigi Rizzo /* 511ce3ee1e7SLuigi Rizzo * bridge_batch is set via sysctl to the max batch size to be 512ce3ee1e7SLuigi Rizzo * used in the bridge. The actual value may be larger as the 513ce3ee1e7SLuigi Rizzo * last packet in the block may overflow the size. 514ce3ee1e7SLuigi Rizzo */ 515ce3ee1e7SLuigi Rizzo int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 516ce3ee1e7SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 51701c7d25fSLuigi Rizzo 518f196ce38SLuigi Rizzo 519849bec0eSLuigi Rizzo /* 520849bec0eSLuigi Rizzo * These are used to handle reference counters for bridge ports. 521849bec0eSLuigi Rizzo */ 522849bec0eSLuigi Rizzo #define ADD_BDG_REF(ifp) refcount_acquire(&NA(ifp)->na_bdg_refcount) 523849bec0eSLuigi Rizzo #define DROP_BDG_REF(ifp) refcount_release(&NA(ifp)->na_bdg_refcount) 524849bec0eSLuigi Rizzo 525ce3ee1e7SLuigi Rizzo /* The bridge references the buffers using the device specific look up table */ 526ce3ee1e7SLuigi Rizzo static inline void * 527ce3ee1e7SLuigi Rizzo BDG_NMB(struct netmap_mem_d *nmd, struct netmap_slot *slot) 528ce3ee1e7SLuigi Rizzo { 529ce3ee1e7SLuigi Rizzo struct lut_entry *lut = nmd->pools[NETMAP_BUF_POOL].lut; 530ce3ee1e7SLuigi Rizzo uint32_t i = slot->buf_idx; 531ce3ee1e7SLuigi Rizzo return (unlikely(i >= nmd->pools[NETMAP_BUF_POOL].objtotal)) ? lut[0].vaddr : lut[i].vaddr; 532ce3ee1e7SLuigi Rizzo } 533ce3ee1e7SLuigi Rizzo 5345ab0d24dSLuigi Rizzo static int bdg_netmap_attach(struct netmap_adapter *); 535f196ce38SLuigi Rizzo static int bdg_netmap_reg(struct ifnet *ifp, int onoff); 536ce3ee1e7SLuigi Rizzo int kern_netmap_regif(struct nmreq *nmr); 537f18be576SLuigi Rizzo 538ce3ee1e7SLuigi Rizzo /* 539ce3ee1e7SLuigi Rizzo * Each transmit queue accumulates a batch of packets into 540ce3ee1e7SLuigi Rizzo * a structure before forwarding. Packets to the same 541ce3ee1e7SLuigi Rizzo * destination are put in a list using ft_next as a link field. 542ce3ee1e7SLuigi Rizzo * ft_frags and ft_next are valid only on the first fragment. 543ce3ee1e7SLuigi Rizzo */ 544f196ce38SLuigi Rizzo struct nm_bdg_fwd { /* forwarding entry for a bridge */ 545ce3ee1e7SLuigi Rizzo void *ft_buf; /* netmap or indirect buffer */ 546ce3ee1e7SLuigi Rizzo uint8_t ft_frags; /* how many fragments (only on 1st frag) */ 547ce3ee1e7SLuigi Rizzo uint8_t _ft_port; /* dst port (unused) */ 54885233a7dSLuigi Rizzo uint16_t ft_flags; /* flags, e.g. indirect */ 549ce3ee1e7SLuigi Rizzo uint16_t ft_len; /* src fragment len */ 550f18be576SLuigi Rizzo uint16_t ft_next; /* next packet to same destination */ 551f18be576SLuigi Rizzo }; 552f18be576SLuigi Rizzo 553ce3ee1e7SLuigi Rizzo /* 554ce3ee1e7SLuigi Rizzo * For each output interface, nm_bdg_q is used to construct a list. 555ce3ee1e7SLuigi Rizzo * bq_len is the number of output buffers (we can have coalescing 556ce3ee1e7SLuigi Rizzo * during the copy). 557f18be576SLuigi Rizzo */ 558f18be576SLuigi Rizzo struct nm_bdg_q { 559f18be576SLuigi Rizzo uint16_t bq_head; 560f18be576SLuigi Rizzo uint16_t bq_tail; 561ce3ee1e7SLuigi Rizzo uint32_t bq_len; /* number of buffers */ 562f196ce38SLuigi Rizzo }; 563f196ce38SLuigi Rizzo 564ce3ee1e7SLuigi Rizzo /* XXX revise this */ 565f196ce38SLuigi Rizzo struct nm_hash_ent { 566f196ce38SLuigi Rizzo uint64_t mac; /* the top 2 bytes are the epoch */ 567f196ce38SLuigi Rizzo uint64_t ports; 568f196ce38SLuigi Rizzo }; 569f196ce38SLuigi Rizzo 570f196ce38SLuigi Rizzo /* 571ce3ee1e7SLuigi Rizzo * nm_bridge is a descriptor for a VALE switch. 572849bec0eSLuigi Rizzo * Interfaces for a bridge are all in bdg_ports[]. 573f196ce38SLuigi Rizzo * The array has fixed size, an empty entry does not terminate 574ce3ee1e7SLuigi Rizzo * the search, but lookups only occur on attach/detach so we 575849bec0eSLuigi Rizzo * don't mind if they are slow. 576849bec0eSLuigi Rizzo * 577ce3ee1e7SLuigi Rizzo * The bridge is non blocking on the transmit ports: excess 578ce3ee1e7SLuigi Rizzo * packets are dropped if there is no room on the output port. 579849bec0eSLuigi Rizzo * 580849bec0eSLuigi Rizzo * bdg_lock protects accesses to the bdg_ports array. 581f18be576SLuigi Rizzo * This is a rw lock (or equivalent). 582f196ce38SLuigi Rizzo */ 583f196ce38SLuigi Rizzo struct nm_bridge { 584f18be576SLuigi Rizzo /* XXX what is the proper alignment/layout ? */ 585ce3ee1e7SLuigi Rizzo BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 586ce3ee1e7SLuigi Rizzo int bdg_namelen; 587ce3ee1e7SLuigi Rizzo uint32_t bdg_active_ports; /* 0 means free */ 588ce3ee1e7SLuigi Rizzo char bdg_basename[IFNAMSIZ]; 589ce3ee1e7SLuigi Rizzo 590ce3ee1e7SLuigi Rizzo /* Indexes of active ports (up to active_ports) 591ce3ee1e7SLuigi Rizzo * and all other remaining ports. 592ce3ee1e7SLuigi Rizzo */ 593ce3ee1e7SLuigi Rizzo uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 594ce3ee1e7SLuigi Rizzo 595f18be576SLuigi Rizzo struct netmap_adapter *bdg_ports[NM_BDG_MAXPORTS]; 596f18be576SLuigi Rizzo 597ce3ee1e7SLuigi Rizzo 598f18be576SLuigi Rizzo /* 599f18be576SLuigi Rizzo * The function to decide the destination port. 600f18be576SLuigi Rizzo * It returns either of an index of the destination port, 601f18be576SLuigi Rizzo * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 602f18be576SLuigi Rizzo * forward this packet. ring_nr is the source ring index, and the 603f18be576SLuigi Rizzo * function may overwrite this value to forward this packet to a 604f18be576SLuigi Rizzo * different ring index. 605f18be576SLuigi Rizzo * This function must be set by netmap_bdgctl(). 606f18be576SLuigi Rizzo */ 607f18be576SLuigi Rizzo bdg_lookup_fn_t nm_bdg_lookup; 608f196ce38SLuigi Rizzo 609ce3ee1e7SLuigi Rizzo /* the forwarding table, MAC+ports. 610ce3ee1e7SLuigi Rizzo * XXX should be changed to an argument to be passed to 611ce3ee1e7SLuigi Rizzo * the lookup function, and allocated on attach 612ce3ee1e7SLuigi Rizzo */ 613f196ce38SLuigi Rizzo struct nm_hash_ent ht[NM_BDG_HASH]; 614f196ce38SLuigi Rizzo }; 615f196ce38SLuigi Rizzo 616f196ce38SLuigi Rizzo 617ce3ee1e7SLuigi Rizzo /* 618ce3ee1e7SLuigi Rizzo * XXX in principle nm_bridges could be created dynamically 619ce3ee1e7SLuigi Rizzo * Right now we have a static array and deletions are protected 620ce3ee1e7SLuigi Rizzo * by an exclusive lock. 621f18be576SLuigi Rizzo */ 622ce3ee1e7SLuigi Rizzo struct nm_bridge nm_bridges[NM_BRIDGES]; 623f18be576SLuigi Rizzo 624ce3ee1e7SLuigi Rizzo 625ce3ee1e7SLuigi Rizzo /* 626ce3ee1e7SLuigi Rizzo * A few function to tell which kind of port are we using. 627ce3ee1e7SLuigi Rizzo * XXX should we hold a lock ? 628ce3ee1e7SLuigi Rizzo * 629ce3ee1e7SLuigi Rizzo * nma_is_vp() virtual port 630ce3ee1e7SLuigi Rizzo * nma_is_host() port connected to the host stack 631ce3ee1e7SLuigi Rizzo * nma_is_hw() port connected to a NIC 632ce3ee1e7SLuigi Rizzo */ 633ce3ee1e7SLuigi Rizzo int nma_is_vp(struct netmap_adapter *na); 634ce3ee1e7SLuigi Rizzo int 635f18be576SLuigi Rizzo nma_is_vp(struct netmap_adapter *na) 636f18be576SLuigi Rizzo { 637f18be576SLuigi Rizzo return na->nm_register == bdg_netmap_reg; 638f18be576SLuigi Rizzo } 639ce3ee1e7SLuigi Rizzo 640f18be576SLuigi Rizzo static __inline int 641f18be576SLuigi Rizzo nma_is_host(struct netmap_adapter *na) 642f18be576SLuigi Rizzo { 643f18be576SLuigi Rizzo return na->nm_register == NULL; 644f18be576SLuigi Rizzo } 645ce3ee1e7SLuigi Rizzo 646f18be576SLuigi Rizzo static __inline int 647f18be576SLuigi Rizzo nma_is_hw(struct netmap_adapter *na) 648f18be576SLuigi Rizzo { 649f18be576SLuigi Rizzo /* In case of sw adapter, nm_register is NULL */ 650f18be576SLuigi Rizzo return !nma_is_vp(na) && !nma_is_host(na); 651f18be576SLuigi Rizzo } 652f18be576SLuigi Rizzo 653ce3ee1e7SLuigi Rizzo 654f18be576SLuigi Rizzo /* 655ce3ee1e7SLuigi Rizzo * If the NIC is owned by the kernel 656f18be576SLuigi Rizzo * (i.e., bridge), neither another bridge nor user can use it; 657f18be576SLuigi Rizzo * if the NIC is owned by a user, only users can share it. 658ce3ee1e7SLuigi Rizzo * Evaluation must be done under NMG_LOCK(). 659f18be576SLuigi Rizzo */ 660f18be576SLuigi Rizzo #define NETMAP_OWNED_BY_KERN(ifp) (!nma_is_vp(NA(ifp)) && NA(ifp)->na_bdg) 661f18be576SLuigi Rizzo #define NETMAP_OWNED_BY_ANY(ifp) \ 662f18be576SLuigi Rizzo (NETMAP_OWNED_BY_KERN(ifp) || (NA(ifp)->refcount > 0)) 663f196ce38SLuigi Rizzo 664f196ce38SLuigi Rizzo /* 665f196ce38SLuigi Rizzo * NA(ifp)->bdg_port port index 666f196ce38SLuigi Rizzo */ 667f196ce38SLuigi Rizzo 668ce3ee1e7SLuigi Rizzo 669ce3ee1e7SLuigi Rizzo /* 670ce3ee1e7SLuigi Rizzo * this is a slightly optimized copy routine which rounds 671ce3ee1e7SLuigi Rizzo * to multiple of 64 bytes and is often faster than dealing 672ce3ee1e7SLuigi Rizzo * with other odd sizes. We assume there is enough room 673ce3ee1e7SLuigi Rizzo * in the source and destination buffers. 674ce3ee1e7SLuigi Rizzo * 675ce3ee1e7SLuigi Rizzo * XXX only for multiples of 64 bytes, non overlapped. 676ce3ee1e7SLuigi Rizzo */ 677f196ce38SLuigi Rizzo static inline void 678f196ce38SLuigi Rizzo pkt_copy(void *_src, void *_dst, int l) 679f196ce38SLuigi Rizzo { 680f196ce38SLuigi Rizzo uint64_t *src = _src; 681f196ce38SLuigi Rizzo uint64_t *dst = _dst; 682f196ce38SLuigi Rizzo if (unlikely(l >= 1024)) { 683ce3ee1e7SLuigi Rizzo memcpy(dst, src, l); 684f196ce38SLuigi Rizzo return; 685f196ce38SLuigi Rizzo } 686f196ce38SLuigi Rizzo for (; likely(l > 0); l-=64) { 687f196ce38SLuigi Rizzo *dst++ = *src++; 688f196ce38SLuigi Rizzo *dst++ = *src++; 689f196ce38SLuigi Rizzo *dst++ = *src++; 690f196ce38SLuigi Rizzo *dst++ = *src++; 691f196ce38SLuigi Rizzo *dst++ = *src++; 692f196ce38SLuigi Rizzo *dst++ = *src++; 693f196ce38SLuigi Rizzo *dst++ = *src++; 694f196ce38SLuigi Rizzo *dst++ = *src++; 695f196ce38SLuigi Rizzo } 696f196ce38SLuigi Rizzo } 697f196ce38SLuigi Rizzo 698f18be576SLuigi Rizzo 699f196ce38SLuigi Rizzo /* 700f196ce38SLuigi Rizzo * locate a bridge among the existing ones. 701ce3ee1e7SLuigi Rizzo * MUST BE CALLED WITH NMG_LOCK() 702ce3ee1e7SLuigi Rizzo * 703f196ce38SLuigi Rizzo * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 704f196ce38SLuigi Rizzo * We assume that this is called with a name of at least NM_NAME chars. 705f196ce38SLuigi Rizzo */ 706f196ce38SLuigi Rizzo static struct nm_bridge * 707f18be576SLuigi Rizzo nm_find_bridge(const char *name, int create) 708f196ce38SLuigi Rizzo { 709f18be576SLuigi Rizzo int i, l, namelen; 710f196ce38SLuigi Rizzo struct nm_bridge *b = NULL; 711f196ce38SLuigi Rizzo 712ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 713ce3ee1e7SLuigi Rizzo 714f196ce38SLuigi Rizzo namelen = strlen(NM_NAME); /* base length */ 715ce3ee1e7SLuigi Rizzo l = name ? strlen(name) : 0; /* actual length */ 716ce3ee1e7SLuigi Rizzo if (l < namelen) { 717ce3ee1e7SLuigi Rizzo D("invalid bridge name %s", name ? name : NULL); 718ce3ee1e7SLuigi Rizzo return NULL; 719ce3ee1e7SLuigi Rizzo } 720f196ce38SLuigi Rizzo for (i = namelen + 1; i < l; i++) { 721f196ce38SLuigi Rizzo if (name[i] == ':') { 722f196ce38SLuigi Rizzo namelen = i; 723f196ce38SLuigi Rizzo break; 724f196ce38SLuigi Rizzo } 725f196ce38SLuigi Rizzo } 726f196ce38SLuigi Rizzo if (namelen >= IFNAMSIZ) 727f196ce38SLuigi Rizzo namelen = IFNAMSIZ; 728f196ce38SLuigi Rizzo ND("--- prefix is '%.*s' ---", namelen, name); 729f196ce38SLuigi Rizzo 730f18be576SLuigi Rizzo /* lookup the name, remember empty slot if there is one */ 731f18be576SLuigi Rizzo for (i = 0; i < NM_BRIDGES; i++) { 732f18be576SLuigi Rizzo struct nm_bridge *x = nm_bridges + i; 733f18be576SLuigi Rizzo 734ce3ee1e7SLuigi Rizzo if (x->bdg_active_ports == 0) { 735f18be576SLuigi Rizzo if (create && b == NULL) 736f18be576SLuigi Rizzo b = x; /* record empty slot */ 737ce3ee1e7SLuigi Rizzo } else if (x->bdg_namelen != namelen) { 738f18be576SLuigi Rizzo continue; 739ce3ee1e7SLuigi Rizzo } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 740f196ce38SLuigi Rizzo ND("found '%.*s' at %d", namelen, name, i); 741f18be576SLuigi Rizzo b = x; 742f196ce38SLuigi Rizzo break; 743f196ce38SLuigi Rizzo } 744f196ce38SLuigi Rizzo } 745f18be576SLuigi Rizzo if (i == NM_BRIDGES && b) { /* name not found, can create entry */ 746ce3ee1e7SLuigi Rizzo /* initialize the bridge */ 747ce3ee1e7SLuigi Rizzo strncpy(b->bdg_basename, name, namelen); 748ce3ee1e7SLuigi Rizzo ND("create new bridge %s with ports %d", b->bdg_basename, 749ce3ee1e7SLuigi Rizzo b->bdg_active_ports); 750ce3ee1e7SLuigi Rizzo b->bdg_namelen = namelen; 751ce3ee1e7SLuigi Rizzo b->bdg_active_ports = 0; 752ce3ee1e7SLuigi Rizzo for (i = 0; i < NM_BDG_MAXPORTS; i++) 753ce3ee1e7SLuigi Rizzo b->bdg_port_index[i] = i; 754f18be576SLuigi Rizzo /* set the default function */ 755f18be576SLuigi Rizzo b->nm_bdg_lookup = netmap_bdg_learning; 756f18be576SLuigi Rizzo /* reset the MAC address table */ 757f18be576SLuigi Rizzo bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 758f196ce38SLuigi Rizzo } 759f196ce38SLuigi Rizzo return b; 760f196ce38SLuigi Rizzo } 761f18be576SLuigi Rizzo 762f18be576SLuigi Rizzo 763f18be576SLuigi Rizzo /* 764f18be576SLuigi Rizzo * Free the forwarding tables for rings attached to switch ports. 765f18be576SLuigi Rizzo */ 766f18be576SLuigi Rizzo static void 767f18be576SLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na) 768f18be576SLuigi Rizzo { 769f18be576SLuigi Rizzo int nrings, i; 770f18be576SLuigi Rizzo struct netmap_kring *kring; 771f18be576SLuigi Rizzo 772ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 773f18be576SLuigi Rizzo nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings; 774f18be576SLuigi Rizzo kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings; 775f18be576SLuigi Rizzo for (i = 0; i < nrings; i++) { 776f18be576SLuigi Rizzo if (kring[i].nkr_ft) { 777f18be576SLuigi Rizzo free(kring[i].nkr_ft, M_DEVBUF); 778f18be576SLuigi Rizzo kring[i].nkr_ft = NULL; /* protect from freeing twice */ 779f18be576SLuigi Rizzo } 780f18be576SLuigi Rizzo } 781f18be576SLuigi Rizzo if (nma_is_hw(na)) 782f18be576SLuigi Rizzo nm_free_bdgfwd(SWNA(na->ifp)); 783f18be576SLuigi Rizzo } 784f18be576SLuigi Rizzo 785f18be576SLuigi Rizzo 786f18be576SLuigi Rizzo /* 787f18be576SLuigi Rizzo * Allocate the forwarding tables for the rings attached to the bridge ports. 788f18be576SLuigi Rizzo */ 789f18be576SLuigi Rizzo static int 790f18be576SLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na) 791f18be576SLuigi Rizzo { 792f18be576SLuigi Rizzo int nrings, l, i, num_dstq; 793f18be576SLuigi Rizzo struct netmap_kring *kring; 794f18be576SLuigi Rizzo 795ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 796f18be576SLuigi Rizzo /* all port:rings + broadcast */ 797f18be576SLuigi Rizzo num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 798ce3ee1e7SLuigi Rizzo l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 799f18be576SLuigi Rizzo l += sizeof(struct nm_bdg_q) * num_dstq; 800ce3ee1e7SLuigi Rizzo l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 801f18be576SLuigi Rizzo 802f18be576SLuigi Rizzo nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings; 803f18be576SLuigi Rizzo kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings; 804f18be576SLuigi Rizzo for (i = 0; i < nrings; i++) { 805f18be576SLuigi Rizzo struct nm_bdg_fwd *ft; 806f18be576SLuigi Rizzo struct nm_bdg_q *dstq; 807f18be576SLuigi Rizzo int j; 808f18be576SLuigi Rizzo 809f18be576SLuigi Rizzo ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); 810f18be576SLuigi Rizzo if (!ft) { 811f18be576SLuigi Rizzo nm_free_bdgfwd(na); 812f18be576SLuigi Rizzo return ENOMEM; 813f18be576SLuigi Rizzo } 814ce3ee1e7SLuigi Rizzo dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 815ce3ee1e7SLuigi Rizzo for (j = 0; j < num_dstq; j++) { 816ce3ee1e7SLuigi Rizzo dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 817ce3ee1e7SLuigi Rizzo dstq[j].bq_len = 0; 818ce3ee1e7SLuigi Rizzo } 819f18be576SLuigi Rizzo kring[i].nkr_ft = ft; 820f18be576SLuigi Rizzo } 821f18be576SLuigi Rizzo if (nma_is_hw(na)) 822f18be576SLuigi Rizzo nm_alloc_bdgfwd(SWNA(na->ifp)); 823f18be576SLuigi Rizzo return 0; 824f18be576SLuigi Rizzo } 825f18be576SLuigi Rizzo 826ae10d1afSLuigi Rizzo 827ae10d1afSLuigi Rizzo /* 828ae10d1afSLuigi Rizzo * Fetch configuration from the device, to cope with dynamic 829ae10d1afSLuigi Rizzo * reconfigurations after loading the module. 830ae10d1afSLuigi Rizzo */ 831ae10d1afSLuigi Rizzo static int 832ae10d1afSLuigi Rizzo netmap_update_config(struct netmap_adapter *na) 833ae10d1afSLuigi Rizzo { 834ae10d1afSLuigi Rizzo struct ifnet *ifp = na->ifp; 835ae10d1afSLuigi Rizzo u_int txr, txd, rxr, rxd; 836ae10d1afSLuigi Rizzo 837ae10d1afSLuigi Rizzo txr = txd = rxr = rxd = 0; 838ae10d1afSLuigi Rizzo if (na->nm_config) { 839ae10d1afSLuigi Rizzo na->nm_config(ifp, &txr, &txd, &rxr, &rxd); 840ae10d1afSLuigi Rizzo } else { 841ae10d1afSLuigi Rizzo /* take whatever we had at init time */ 842ae10d1afSLuigi Rizzo txr = na->num_tx_rings; 843ae10d1afSLuigi Rizzo txd = na->num_tx_desc; 844ae10d1afSLuigi Rizzo rxr = na->num_rx_rings; 845ae10d1afSLuigi Rizzo rxd = na->num_rx_desc; 846ae10d1afSLuigi Rizzo } 847ae10d1afSLuigi Rizzo 848ae10d1afSLuigi Rizzo if (na->num_tx_rings == txr && na->num_tx_desc == txd && 849ae10d1afSLuigi Rizzo na->num_rx_rings == rxr && na->num_rx_desc == rxd) 850ae10d1afSLuigi Rizzo return 0; /* nothing changed */ 851ae10d1afSLuigi Rizzo if (netmap_verbose || na->refcount > 0) { 852ae10d1afSLuigi Rizzo D("stored config %s: txring %d x %d, rxring %d x %d", 853ae10d1afSLuigi Rizzo ifp->if_xname, 854ae10d1afSLuigi Rizzo na->num_tx_rings, na->num_tx_desc, 855ae10d1afSLuigi Rizzo na->num_rx_rings, na->num_rx_desc); 856ae10d1afSLuigi Rizzo D("new config %s: txring %d x %d, rxring %d x %d", 857ae10d1afSLuigi Rizzo ifp->if_xname, txr, txd, rxr, rxd); 858ae10d1afSLuigi Rizzo } 859ae10d1afSLuigi Rizzo if (na->refcount == 0) { 860ae10d1afSLuigi Rizzo D("configuration changed (but fine)"); 861ae10d1afSLuigi Rizzo na->num_tx_rings = txr; 862ae10d1afSLuigi Rizzo na->num_tx_desc = txd; 863ae10d1afSLuigi Rizzo na->num_rx_rings = rxr; 864ae10d1afSLuigi Rizzo na->num_rx_desc = rxd; 865ae10d1afSLuigi Rizzo return 0; 866ae10d1afSLuigi Rizzo } 867ae10d1afSLuigi Rizzo D("configuration changed while active, this is bad..."); 868ae10d1afSLuigi Rizzo return 1; 869ae10d1afSLuigi Rizzo } 870ae10d1afSLuigi Rizzo 871ce3ee1e7SLuigi Rizzo static struct netmap_if * 872ce3ee1e7SLuigi Rizzo netmap_if_new(const char *ifname, struct netmap_adapter *na) 873ce3ee1e7SLuigi Rizzo { 874ce3ee1e7SLuigi Rizzo if (netmap_update_config(na)) { 875ce3ee1e7SLuigi Rizzo /* configuration mismatch, report and fail */ 876ce3ee1e7SLuigi Rizzo return NULL; 877ce3ee1e7SLuigi Rizzo } 878ce3ee1e7SLuigi Rizzo return netmap_mem_if_new(ifname, na); 879ce3ee1e7SLuigi Rizzo } 88068b8534bSLuigi Rizzo 8818241616dSLuigi Rizzo 8828241616dSLuigi Rizzo /* Structure associated to each thread which registered an interface. 8838241616dSLuigi Rizzo * 8848241616dSLuigi Rizzo * The first 4 fields of this structure are written by NIOCREGIF and 8858241616dSLuigi Rizzo * read by poll() and NIOC?XSYNC. 8868241616dSLuigi Rizzo * There is low contention among writers (actually, a correct user program 8878241616dSLuigi Rizzo * should have no contention among writers) and among writers and readers, 8888241616dSLuigi Rizzo * so we use a single global lock to protect the structure initialization. 8898241616dSLuigi Rizzo * Since initialization involves the allocation of memory, we reuse the memory 8908241616dSLuigi Rizzo * allocator lock. 8918241616dSLuigi Rizzo * Read access to the structure is lock free. Readers must check that 8928241616dSLuigi Rizzo * np_nifp is not NULL before using the other fields. 8938241616dSLuigi Rizzo * If np_nifp is NULL initialization has not been performed, so they should 8948241616dSLuigi Rizzo * return an error to userlevel. 8958241616dSLuigi Rizzo * 8968241616dSLuigi Rizzo * The ref_done field is used to regulate access to the refcount in the 8978241616dSLuigi Rizzo * memory allocator. The refcount must be incremented at most once for 8988241616dSLuigi Rizzo * each open("/dev/netmap"). The increment is performed by the first 8998241616dSLuigi Rizzo * function that calls netmap_get_memory() (currently called by 9008241616dSLuigi Rizzo * mmap(), NIOCGINFO and NIOCREGIF). 9018241616dSLuigi Rizzo * If the refcount is incremented, it is then decremented when the 9028241616dSLuigi Rizzo * private structure is destroyed. 9038241616dSLuigi Rizzo */ 90468b8534bSLuigi Rizzo struct netmap_priv_d { 905ce3ee1e7SLuigi Rizzo struct netmap_if * volatile np_nifp; /* netmap if descriptor. */ 90668b8534bSLuigi Rizzo 907ce3ee1e7SLuigi Rizzo struct ifnet *np_ifp; /* device for which we hold a ref. */ 90868b8534bSLuigi Rizzo int np_ringid; /* from the ioctl */ 90968b8534bSLuigi Rizzo u_int np_qfirst, np_qlast; /* range of rings to scan */ 91068b8534bSLuigi Rizzo uint16_t np_txpoll; 9118241616dSLuigi Rizzo 912ce3ee1e7SLuigi Rizzo struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */ 913ce3ee1e7SLuigi Rizzo #ifdef __FreeBSD__ 914ce3ee1e7SLuigi Rizzo int np_refcount; /* use with NMG_LOCK held */ 915ce3ee1e7SLuigi Rizzo #endif /* __FreeBSD__ */ 91668b8534bSLuigi Rizzo }; 91768b8534bSLuigi Rizzo 918ce3ee1e7SLuigi Rizzo /* grab a reference to the memory allocator, if we don't have one already. The 919ce3ee1e7SLuigi Rizzo * reference is taken from the netmap_adapter registered with the priv. 920ce3ee1e7SLuigi Rizzo * 921ce3ee1e7SLuigi Rizzo */ 922ce3ee1e7SLuigi Rizzo static int 923ce3ee1e7SLuigi Rizzo netmap_get_memory_locked(struct netmap_priv_d* p) 924ce3ee1e7SLuigi Rizzo { 925ce3ee1e7SLuigi Rizzo struct netmap_mem_d *nmd; 926ce3ee1e7SLuigi Rizzo int error = 0; 927ce3ee1e7SLuigi Rizzo 928ce3ee1e7SLuigi Rizzo if (p->np_ifp == NULL) { 929ce3ee1e7SLuigi Rizzo if (!netmap_mmap_unreg) 930ce3ee1e7SLuigi Rizzo return ENODEV; 931ce3ee1e7SLuigi Rizzo /* for compatibility with older versions of the API 932ce3ee1e7SLuigi Rizzo * we use the global allocator when no interface has been 933ce3ee1e7SLuigi Rizzo * registered 934ce3ee1e7SLuigi Rizzo */ 935ce3ee1e7SLuigi Rizzo nmd = &nm_mem; 936ce3ee1e7SLuigi Rizzo } else { 937ce3ee1e7SLuigi Rizzo nmd = NA(p->np_ifp)->nm_mem; 938ce3ee1e7SLuigi Rizzo } 939ce3ee1e7SLuigi Rizzo if (p->np_mref == NULL) { 940ce3ee1e7SLuigi Rizzo error = netmap_mem_finalize(nmd); 941ce3ee1e7SLuigi Rizzo if (!error) 942ce3ee1e7SLuigi Rizzo p->np_mref = nmd; 943ce3ee1e7SLuigi Rizzo } else if (p->np_mref != nmd) { 944ce3ee1e7SLuigi Rizzo /* a virtual port has been registered, but previous 945ce3ee1e7SLuigi Rizzo * syscalls already used the global allocator. 946ce3ee1e7SLuigi Rizzo * We cannot continue 947ce3ee1e7SLuigi Rizzo */ 948ce3ee1e7SLuigi Rizzo error = ENODEV; 949ce3ee1e7SLuigi Rizzo } 950ce3ee1e7SLuigi Rizzo return error; 951ce3ee1e7SLuigi Rizzo } 95268b8534bSLuigi Rizzo 9538241616dSLuigi Rizzo static int 9548241616dSLuigi Rizzo netmap_get_memory(struct netmap_priv_d* p) 9558241616dSLuigi Rizzo { 956ce3ee1e7SLuigi Rizzo int error; 957ce3ee1e7SLuigi Rizzo NMG_LOCK(); 958ce3ee1e7SLuigi Rizzo error = netmap_get_memory_locked(p); 959ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 9608241616dSLuigi Rizzo return error; 9618241616dSLuigi Rizzo } 9628241616dSLuigi Rizzo 963ce3ee1e7SLuigi Rizzo static int 964ce3ee1e7SLuigi Rizzo netmap_have_memory_locked(struct netmap_priv_d* p) 965ce3ee1e7SLuigi Rizzo { 966ce3ee1e7SLuigi Rizzo return p->np_mref != NULL; 967ce3ee1e7SLuigi Rizzo } 968ce3ee1e7SLuigi Rizzo 969ce3ee1e7SLuigi Rizzo static void 970ce3ee1e7SLuigi Rizzo netmap_drop_memory_locked(struct netmap_priv_d* p) 971ce3ee1e7SLuigi Rizzo { 972ce3ee1e7SLuigi Rizzo if (p->np_mref) { 973ce3ee1e7SLuigi Rizzo netmap_mem_deref(p->np_mref); 974ce3ee1e7SLuigi Rizzo p->np_mref = NULL; 975ce3ee1e7SLuigi Rizzo } 976ce3ee1e7SLuigi Rizzo } 977ce3ee1e7SLuigi Rizzo 97868b8534bSLuigi Rizzo /* 97968b8534bSLuigi Rizzo * File descriptor's private data destructor. 98068b8534bSLuigi Rizzo * 98168b8534bSLuigi Rizzo * Call nm_register(ifp,0) to stop netmap mode on the interface and 98268b8534bSLuigi Rizzo * revert to normal operation. We expect that np_ifp has not gone. 983ce3ee1e7SLuigi Rizzo * The second argument is the nifp to work on. In some cases it is 984ce3ee1e7SLuigi Rizzo * not attached yet to the netmap_priv_d so we need to pass it as 985ce3ee1e7SLuigi Rizzo * a separate argument. 98668b8534bSLuigi Rizzo */ 987ce3ee1e7SLuigi Rizzo /* call with NMG_LOCK held */ 98868b8534bSLuigi Rizzo static void 989ce3ee1e7SLuigi Rizzo netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp) 99068b8534bSLuigi Rizzo { 99168b8534bSLuigi Rizzo struct ifnet *ifp = priv->np_ifp; 99268b8534bSLuigi Rizzo struct netmap_adapter *na = NA(ifp); 99368b8534bSLuigi Rizzo 994ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 99568b8534bSLuigi Rizzo na->refcount--; 99668b8534bSLuigi Rizzo if (na->refcount <= 0) { /* last instance */ 997ce3ee1e7SLuigi Rizzo u_int i; 99868b8534bSLuigi Rizzo 999ae10d1afSLuigi Rizzo if (netmap_verbose) 1000ae10d1afSLuigi Rizzo D("deleting last instance for %s", ifp->if_xname); 100168b8534bSLuigi Rizzo /* 1002f18be576SLuigi Rizzo * (TO CHECK) This function is only called 1003f18be576SLuigi Rizzo * when the last reference to this file descriptor goes 1004f18be576SLuigi Rizzo * away. This means we cannot have any pending poll() 1005f18be576SLuigi Rizzo * or interrupt routine operating on the structure. 1006ce3ee1e7SLuigi Rizzo * XXX The file may be closed in a thread while 1007ce3ee1e7SLuigi Rizzo * another thread is using it. 1008ce3ee1e7SLuigi Rizzo * Linux keeps the file opened until the last reference 1009ce3ee1e7SLuigi Rizzo * by any outstanding ioctl/poll or mmap is gone. 1010ce3ee1e7SLuigi Rizzo * FreeBSD does not track mmap()s (but we do) and 1011ce3ee1e7SLuigi Rizzo * wakes up any sleeping poll(). Need to check what 1012ce3ee1e7SLuigi Rizzo * happens if the close() occurs while a concurrent 1013ce3ee1e7SLuigi Rizzo * syscall is running. 101468b8534bSLuigi Rizzo */ 101568b8534bSLuigi Rizzo na->nm_register(ifp, 0); /* off, clear IFCAP_NETMAP */ 101668b8534bSLuigi Rizzo /* Wake up any sleeping threads. netmap_poll will 101768b8534bSLuigi Rizzo * then return POLLERR 1018ce3ee1e7SLuigi Rizzo * XXX The wake up now must happen during *_down(), when 1019ce3ee1e7SLuigi Rizzo * we order all activities to stop. -gl 102068b8534bSLuigi Rizzo */ 1021f18be576SLuigi Rizzo nm_free_bdgfwd(na); 1022d76bf4ffSLuigi Rizzo for (i = 0; i < na->num_tx_rings + 1; i++) { 10232f70fca5SEd Maste mtx_destroy(&na->tx_rings[i].q_lock); 102464ae02c3SLuigi Rizzo } 1025d76bf4ffSLuigi Rizzo for (i = 0; i < na->num_rx_rings + 1; i++) { 10262f70fca5SEd Maste mtx_destroy(&na->rx_rings[i].q_lock); 102768b8534bSLuigi Rizzo } 10282f70fca5SEd Maste /* XXX kqueue(9) needed; these will mirror knlist_init. */ 10292f70fca5SEd Maste /* knlist_destroy(&na->tx_si.si_note); */ 10302f70fca5SEd Maste /* knlist_destroy(&na->rx_si.si_note); */ 1031f18be576SLuigi Rizzo if (nma_is_hw(na)) 1032f18be576SLuigi Rizzo SWNA(ifp)->tx_rings = SWNA(ifp)->rx_rings = NULL; 103368b8534bSLuigi Rizzo } 1034ce3ee1e7SLuigi Rizzo /* 1035ce3ee1e7SLuigi Rizzo * netmap_mem_if_delete() deletes the nifp, and if this is 1036ce3ee1e7SLuigi Rizzo * the last instance also buffers, rings and krings. 1037ce3ee1e7SLuigi Rizzo */ 1038ce3ee1e7SLuigi Rizzo netmap_mem_if_delete(na, nifp); 10395819da83SLuigi Rizzo } 104068b8534bSLuigi Rizzo 1041f18be576SLuigi Rizzo 1042ce3ee1e7SLuigi Rizzo /* we assume netmap adapter exists 1043ce3ee1e7SLuigi Rizzo * Called with NMG_LOCK held 1044ce3ee1e7SLuigi Rizzo */ 1045f196ce38SLuigi Rizzo static void 1046f196ce38SLuigi Rizzo nm_if_rele(struct ifnet *ifp) 1047f196ce38SLuigi Rizzo { 1048ce3ee1e7SLuigi Rizzo int i, is_hw, hw, sw, lim; 1049f196ce38SLuigi Rizzo struct nm_bridge *b; 1050f18be576SLuigi Rizzo struct netmap_adapter *na; 1051ce3ee1e7SLuigi Rizzo uint8_t tmp[NM_BDG_MAXPORTS]; 1052f196ce38SLuigi Rizzo 1053ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 1054f18be576SLuigi Rizzo /* I can be called not only for get_ifp()-ed references where netmap's 1055f18be576SLuigi Rizzo * capability is guaranteed, but also for non-netmap-capable NICs. 1056f18be576SLuigi Rizzo */ 1057f18be576SLuigi Rizzo if (!NETMAP_CAPABLE(ifp) || !NA(ifp)->na_bdg) { 1058f196ce38SLuigi Rizzo if_rele(ifp); 1059f196ce38SLuigi Rizzo return; 1060f196ce38SLuigi Rizzo } 1061f18be576SLuigi Rizzo na = NA(ifp); 1062f18be576SLuigi Rizzo b = na->na_bdg; 1063f18be576SLuigi Rizzo is_hw = nma_is_hw(na); 1064f18be576SLuigi Rizzo 1065ce3ee1e7SLuigi Rizzo ND("%s has %d references", ifp->if_xname, NA(ifp)->na_bdg_refcount); 1066f18be576SLuigi Rizzo 1067ce3ee1e7SLuigi Rizzo if (!DROP_BDG_REF(ifp)) 1068ce3ee1e7SLuigi Rizzo return; 1069ce3ee1e7SLuigi Rizzo 1070ce3ee1e7SLuigi Rizzo /* 1071ce3ee1e7SLuigi Rizzo New algorithm: 1072ce3ee1e7SLuigi Rizzo make a copy of bdg_port_index; 1073ce3ee1e7SLuigi Rizzo lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 1074ce3ee1e7SLuigi Rizzo in the array of bdg_port_index, replacing them with 1075ce3ee1e7SLuigi Rizzo entries from the bottom of the array; 1076ce3ee1e7SLuigi Rizzo decrement bdg_active_ports; 1077ce3ee1e7SLuigi Rizzo acquire BDG_WLOCK() and copy back the array. 1078ce3ee1e7SLuigi Rizzo */ 1079ce3ee1e7SLuigi Rizzo 1080ce3ee1e7SLuigi Rizzo hw = NA(ifp)->bdg_port; 1081ce3ee1e7SLuigi Rizzo sw = (is_hw && SWNA(ifp)->na_bdg) ? SWNA(ifp)->bdg_port : -1; 1082ce3ee1e7SLuigi Rizzo lim = b->bdg_active_ports; 1083ce3ee1e7SLuigi Rizzo 1084ce3ee1e7SLuigi Rizzo ND("detach %d and %d (lim %d)", hw, sw, lim); 1085ce3ee1e7SLuigi Rizzo /* make a copy of the list of active ports, update it, 1086ce3ee1e7SLuigi Rizzo * and then copy back within BDG_WLOCK(). 1087ce3ee1e7SLuigi Rizzo */ 1088ce3ee1e7SLuigi Rizzo memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 1089ce3ee1e7SLuigi Rizzo for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 1090ce3ee1e7SLuigi Rizzo if (hw >= 0 && tmp[i] == hw) { 1091ce3ee1e7SLuigi Rizzo ND("detach hw %d at %d", hw, i); 1092ce3ee1e7SLuigi Rizzo lim--; /* point to last active port */ 1093ce3ee1e7SLuigi Rizzo tmp[i] = tmp[lim]; /* swap with i */ 1094ce3ee1e7SLuigi Rizzo tmp[lim] = hw; /* now this is inactive */ 1095ce3ee1e7SLuigi Rizzo hw = -1; 1096ce3ee1e7SLuigi Rizzo } else if (sw >= 0 && tmp[i] == sw) { 1097ce3ee1e7SLuigi Rizzo ND("detach sw %d at %d", sw, i); 1098ce3ee1e7SLuigi Rizzo lim--; 1099ce3ee1e7SLuigi Rizzo tmp[i] = tmp[lim]; 1100ce3ee1e7SLuigi Rizzo tmp[lim] = sw; 1101ce3ee1e7SLuigi Rizzo sw = -1; 1102ce3ee1e7SLuigi Rizzo } else { 1103ce3ee1e7SLuigi Rizzo i++; 1104ce3ee1e7SLuigi Rizzo } 1105ce3ee1e7SLuigi Rizzo } 1106ce3ee1e7SLuigi Rizzo if (hw >= 0 || sw >= 0) { 1107ce3ee1e7SLuigi Rizzo D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 1108ce3ee1e7SLuigi Rizzo } 1109ce3ee1e7SLuigi Rizzo hw = NA(ifp)->bdg_port; 1110ce3ee1e7SLuigi Rizzo sw = (is_hw && SWNA(ifp)->na_bdg) ? SWNA(ifp)->bdg_port : -1; 1111ce3ee1e7SLuigi Rizzo 1112ce3ee1e7SLuigi Rizzo BDG_WLOCK(b); 1113ce3ee1e7SLuigi Rizzo b->bdg_ports[hw] = NULL; 1114f18be576SLuigi Rizzo na->na_bdg = NULL; 1115ce3ee1e7SLuigi Rizzo if (sw >= 0) { 1116ce3ee1e7SLuigi Rizzo b->bdg_ports[sw] = NULL; 1117f18be576SLuigi Rizzo SWNA(ifp)->na_bdg = NULL; 1118f196ce38SLuigi Rizzo } 1119ce3ee1e7SLuigi Rizzo memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 1120ce3ee1e7SLuigi Rizzo b->bdg_active_ports = lim; 1121f18be576SLuigi Rizzo BDG_WUNLOCK(b); 1122ce3ee1e7SLuigi Rizzo 1123ce3ee1e7SLuigi Rizzo ND("now %d active ports", lim); 1124ce3ee1e7SLuigi Rizzo if (lim == 0) { 1125ce3ee1e7SLuigi Rizzo ND("marking bridge %s as free", b->bdg_basename); 1126f18be576SLuigi Rizzo b->nm_bdg_lookup = NULL; 1127f18be576SLuigi Rizzo } 1128ce3ee1e7SLuigi Rizzo 1129ce3ee1e7SLuigi Rizzo if (is_hw) { 1130f18be576SLuigi Rizzo if_rele(ifp); 1131f18be576SLuigi Rizzo } else { 1132ce3ee1e7SLuigi Rizzo if (na->na_flags & NAF_MEM_OWNER) 1133ce3ee1e7SLuigi Rizzo netmap_mem_private_delete(na->nm_mem); 1134f18be576SLuigi Rizzo bzero(na, sizeof(*na)); 1135f18be576SLuigi Rizzo free(na, M_DEVBUF); 1136f18be576SLuigi Rizzo bzero(ifp, sizeof(*ifp)); 1137f18be576SLuigi Rizzo free(ifp, M_DEVBUF); 1138f18be576SLuigi Rizzo } 1139ce3ee1e7SLuigi Rizzo } 1140ce3ee1e7SLuigi Rizzo 1141ce3ee1e7SLuigi Rizzo 1142ce3ee1e7SLuigi Rizzo /* 1143ce3ee1e7SLuigi Rizzo * returns 1 if this is the last instance and we can free priv 1144ce3ee1e7SLuigi Rizzo */ 1145ce3ee1e7SLuigi Rizzo static int 1146ce3ee1e7SLuigi Rizzo netmap_dtor_locked(struct netmap_priv_d *priv) 1147ce3ee1e7SLuigi Rizzo { 1148ce3ee1e7SLuigi Rizzo struct ifnet *ifp = priv->np_ifp; 1149ce3ee1e7SLuigi Rizzo 1150ce3ee1e7SLuigi Rizzo #ifdef __FreeBSD__ 1151ce3ee1e7SLuigi Rizzo /* 1152ce3ee1e7SLuigi Rizzo * np_refcount is the number of active mmaps on 1153ce3ee1e7SLuigi Rizzo * this file descriptor 1154ce3ee1e7SLuigi Rizzo */ 1155ce3ee1e7SLuigi Rizzo if (--priv->np_refcount > 0) { 1156ce3ee1e7SLuigi Rizzo return 0; 1157ce3ee1e7SLuigi Rizzo } 1158ce3ee1e7SLuigi Rizzo #endif /* __FreeBSD__ */ 1159ce3ee1e7SLuigi Rizzo if (ifp) { 1160ce3ee1e7SLuigi Rizzo netmap_do_unregif(priv, priv->np_nifp); 1161ce3ee1e7SLuigi Rizzo } 1162ce3ee1e7SLuigi Rizzo netmap_drop_memory_locked(priv); 1163ce3ee1e7SLuigi Rizzo if (ifp) { 1164ce3ee1e7SLuigi Rizzo nm_if_rele(ifp); /* might also destroy *na */ 1165ce3ee1e7SLuigi Rizzo } 1166ce3ee1e7SLuigi Rizzo return 1; 1167f196ce38SLuigi Rizzo } 11685819da83SLuigi Rizzo 11695819da83SLuigi Rizzo static void 11705819da83SLuigi Rizzo netmap_dtor(void *data) 11715819da83SLuigi Rizzo { 11725819da83SLuigi Rizzo struct netmap_priv_d *priv = data; 1173ce3ee1e7SLuigi Rizzo int last_instance; 11745819da83SLuigi Rizzo 1175ce3ee1e7SLuigi Rizzo NMG_LOCK(); 1176ce3ee1e7SLuigi Rizzo last_instance = netmap_dtor_locked(priv); 1177ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 1178ce3ee1e7SLuigi Rizzo if (last_instance) { 1179ce3ee1e7SLuigi Rizzo bzero(priv, sizeof(*priv)); /* for safety */ 118068b8534bSLuigi Rizzo free(priv, M_DEVBUF); 118168b8534bSLuigi Rizzo } 1182ce3ee1e7SLuigi Rizzo } 118368b8534bSLuigi Rizzo 1184f18be576SLuigi Rizzo 11858241616dSLuigi Rizzo #ifdef __FreeBSD__ 11868241616dSLuigi Rizzo 1187f18be576SLuigi Rizzo /* 1188f18be576SLuigi Rizzo * In order to track whether pages are still mapped, we hook into 1189f18be576SLuigi Rizzo * the standard cdev_pager and intercept the constructor and 1190f18be576SLuigi Rizzo * destructor. 1191f18be576SLuigi Rizzo */ 11928241616dSLuigi Rizzo 1193ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t { 1194ce3ee1e7SLuigi Rizzo struct cdev *dev; 1195ce3ee1e7SLuigi Rizzo struct netmap_priv_d *priv; 1196ce3ee1e7SLuigi Rizzo }; 1197f18be576SLuigi Rizzo 11988241616dSLuigi Rizzo static int 11998241616dSLuigi Rizzo netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 12008241616dSLuigi Rizzo vm_ooffset_t foff, struct ucred *cred, u_short *color) 12018241616dSLuigi Rizzo { 1202ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t *vmh = handle; 1203ce3ee1e7SLuigi Rizzo D("handle %p size %jd prot %d foff %jd", 1204ce3ee1e7SLuigi Rizzo handle, (intmax_t)size, prot, (intmax_t)foff); 1205ce3ee1e7SLuigi Rizzo dev_ref(vmh->dev); 1206ce3ee1e7SLuigi Rizzo return 0; 12078241616dSLuigi Rizzo } 12088241616dSLuigi Rizzo 1209f18be576SLuigi Rizzo 12108241616dSLuigi Rizzo static void 12118241616dSLuigi Rizzo netmap_dev_pager_dtor(void *handle) 12128241616dSLuigi Rizzo { 1213ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t *vmh = handle; 1214ce3ee1e7SLuigi Rizzo struct cdev *dev = vmh->dev; 1215ce3ee1e7SLuigi Rizzo struct netmap_priv_d *priv = vmh->priv; 1216ce3ee1e7SLuigi Rizzo D("handle %p", handle); 1217ce3ee1e7SLuigi Rizzo netmap_dtor(priv); 1218ce3ee1e7SLuigi Rizzo free(vmh, M_DEVBUF); 1219ce3ee1e7SLuigi Rizzo dev_rel(dev); 1220ce3ee1e7SLuigi Rizzo } 1221ce3ee1e7SLuigi Rizzo 1222ce3ee1e7SLuigi Rizzo static int 1223ce3ee1e7SLuigi Rizzo netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, 1224ce3ee1e7SLuigi Rizzo int prot, vm_page_t *mres) 1225ce3ee1e7SLuigi Rizzo { 1226ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t *vmh = object->handle; 1227ce3ee1e7SLuigi Rizzo struct netmap_priv_d *priv = vmh->priv; 1228ce3ee1e7SLuigi Rizzo vm_paddr_t paddr; 1229ce3ee1e7SLuigi Rizzo vm_page_t page; 1230ce3ee1e7SLuigi Rizzo vm_memattr_t memattr; 1231ce3ee1e7SLuigi Rizzo vm_pindex_t pidx; 1232ce3ee1e7SLuigi Rizzo 1233ce3ee1e7SLuigi Rizzo ND("object %p offset %jd prot %d mres %p", 1234ce3ee1e7SLuigi Rizzo object, (intmax_t)offset, prot, mres); 1235ce3ee1e7SLuigi Rizzo memattr = object->memattr; 1236ce3ee1e7SLuigi Rizzo pidx = OFF_TO_IDX(offset); 1237ce3ee1e7SLuigi Rizzo paddr = netmap_mem_ofstophys(priv->np_mref, offset); 1238ce3ee1e7SLuigi Rizzo if (paddr == 0) 1239ce3ee1e7SLuigi Rizzo return VM_PAGER_FAIL; 1240ce3ee1e7SLuigi Rizzo 1241ce3ee1e7SLuigi Rizzo if (((*mres)->flags & PG_FICTITIOUS) != 0) { 1242ce3ee1e7SLuigi Rizzo /* 1243ce3ee1e7SLuigi Rizzo * If the passed in result page is a fake page, update it with 1244ce3ee1e7SLuigi Rizzo * the new physical address. 1245ce3ee1e7SLuigi Rizzo */ 1246ce3ee1e7SLuigi Rizzo page = *mres; 1247ce3ee1e7SLuigi Rizzo vm_page_updatefake(page, paddr, memattr); 1248ce3ee1e7SLuigi Rizzo } else { 1249ce3ee1e7SLuigi Rizzo /* 1250ce3ee1e7SLuigi Rizzo * Replace the passed in reqpage page with our own fake page and 1251ce3ee1e7SLuigi Rizzo * free up the all of the original pages. 1252ce3ee1e7SLuigi Rizzo */ 1253ce3ee1e7SLuigi Rizzo #ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */ 1254ce3ee1e7SLuigi Rizzo #define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK 1255ce3ee1e7SLuigi Rizzo #define VM_OBJECT_WLOCK VM_OBJECT_LOCK 1256ce3ee1e7SLuigi Rizzo #endif /* VM_OBJECT_WUNLOCK */ 1257ce3ee1e7SLuigi Rizzo 1258ce3ee1e7SLuigi Rizzo VM_OBJECT_WUNLOCK(object); 1259ce3ee1e7SLuigi Rizzo page = vm_page_getfake(paddr, memattr); 1260ce3ee1e7SLuigi Rizzo VM_OBJECT_WLOCK(object); 1261ce3ee1e7SLuigi Rizzo vm_page_lock(*mres); 1262ce3ee1e7SLuigi Rizzo vm_page_free(*mres); 1263ce3ee1e7SLuigi Rizzo vm_page_unlock(*mres); 1264ce3ee1e7SLuigi Rizzo *mres = page; 1265ce3ee1e7SLuigi Rizzo vm_page_insert(page, object, pidx); 1266ce3ee1e7SLuigi Rizzo } 1267ce3ee1e7SLuigi Rizzo page->valid = VM_PAGE_BITS_ALL; 1268ce3ee1e7SLuigi Rizzo return (VM_PAGER_OK); 12698241616dSLuigi Rizzo } 12708241616dSLuigi Rizzo 12718241616dSLuigi Rizzo 12728241616dSLuigi Rizzo static struct cdev_pager_ops netmap_cdev_pager_ops = { 12738241616dSLuigi Rizzo .cdev_pg_ctor = netmap_dev_pager_ctor, 12748241616dSLuigi Rizzo .cdev_pg_dtor = netmap_dev_pager_dtor, 1275ce3ee1e7SLuigi Rizzo .cdev_pg_fault = netmap_dev_pager_fault, 12768241616dSLuigi Rizzo }; 12778241616dSLuigi Rizzo 1278f18be576SLuigi Rizzo 12798241616dSLuigi Rizzo static int 12808241616dSLuigi Rizzo netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff, 12818241616dSLuigi Rizzo vm_size_t objsize, vm_object_t *objp, int prot) 12828241616dSLuigi Rizzo { 1283ce3ee1e7SLuigi Rizzo int error; 1284ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t *vmh; 1285ce3ee1e7SLuigi Rizzo struct netmap_priv_d *priv; 12868241616dSLuigi Rizzo vm_object_t obj; 12878241616dSLuigi Rizzo 1288ce3ee1e7SLuigi Rizzo D("cdev %p foff %jd size %jd objp %p prot %d", cdev, 128988f79057SGleb Smirnoff (intmax_t )*foff, (intmax_t )objsize, objp, prot); 12908241616dSLuigi Rizzo 1291ce3ee1e7SLuigi Rizzo vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF, 1292ce3ee1e7SLuigi Rizzo M_NOWAIT | M_ZERO); 1293ce3ee1e7SLuigi Rizzo if (vmh == NULL) 1294ce3ee1e7SLuigi Rizzo return ENOMEM; 1295ce3ee1e7SLuigi Rizzo vmh->dev = cdev; 129668b8534bSLuigi Rizzo 1297ce3ee1e7SLuigi Rizzo NMG_LOCK(); 12988241616dSLuigi Rizzo error = devfs_get_cdevpriv((void**)&priv); 1299ce3ee1e7SLuigi Rizzo if (error) 1300ce3ee1e7SLuigi Rizzo goto err_unlock; 1301ce3ee1e7SLuigi Rizzo vmh->priv = priv; 1302ce3ee1e7SLuigi Rizzo priv->np_refcount++; 1303ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 1304ce3ee1e7SLuigi Rizzo 13058241616dSLuigi Rizzo error = netmap_get_memory(priv); 13068241616dSLuigi Rizzo if (error) 1307ce3ee1e7SLuigi Rizzo goto err_deref; 13088241616dSLuigi Rizzo 1309ce3ee1e7SLuigi Rizzo obj = cdev_pager_allocate(vmh, OBJT_DEVICE, 1310ce3ee1e7SLuigi Rizzo &netmap_cdev_pager_ops, objsize, prot, 1311ce3ee1e7SLuigi Rizzo *foff, NULL); 1312ce3ee1e7SLuigi Rizzo if (obj == NULL) { 1313ce3ee1e7SLuigi Rizzo D("cdev_pager_allocate failed"); 1314ce3ee1e7SLuigi Rizzo error = EINVAL; 1315ce3ee1e7SLuigi Rizzo goto err_deref; 1316ce3ee1e7SLuigi Rizzo } 131768b8534bSLuigi Rizzo 1318ce3ee1e7SLuigi Rizzo *objp = obj; 1319ce3ee1e7SLuigi Rizzo return 0; 1320ce3ee1e7SLuigi Rizzo 1321ce3ee1e7SLuigi Rizzo err_deref: 1322ce3ee1e7SLuigi Rizzo NMG_LOCK(); 1323ce3ee1e7SLuigi Rizzo priv->np_refcount--; 1324ce3ee1e7SLuigi Rizzo err_unlock: 1325ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 1326ce3ee1e7SLuigi Rizzo // err: 1327ce3ee1e7SLuigi Rizzo free(vmh, M_DEVBUF); 1328ce3ee1e7SLuigi Rizzo return error; 13298241616dSLuigi Rizzo } 13308241616dSLuigi Rizzo 1331f18be576SLuigi Rizzo 1332ce3ee1e7SLuigi Rizzo // XXX can we remove this ? 13338241616dSLuigi Rizzo static int 13348241616dSLuigi Rizzo netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 13358241616dSLuigi Rizzo { 1336ae10d1afSLuigi Rizzo if (netmap_verbose) 1337ae10d1afSLuigi Rizzo D("dev %p fflag 0x%x devtype %d td %p", 1338ae10d1afSLuigi Rizzo dev, fflag, devtype, td); 13398241616dSLuigi Rizzo return 0; 13408241616dSLuigi Rizzo } 13418241616dSLuigi Rizzo 1342f18be576SLuigi Rizzo 13438241616dSLuigi Rizzo static int 13448241616dSLuigi Rizzo netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 13458241616dSLuigi Rizzo { 13468241616dSLuigi Rizzo struct netmap_priv_d *priv; 13478241616dSLuigi Rizzo int error; 13488241616dSLuigi Rizzo 1349ce3ee1e7SLuigi Rizzo (void)dev; 1350ce3ee1e7SLuigi Rizzo (void)oflags; 1351ce3ee1e7SLuigi Rizzo (void)devtype; 1352ce3ee1e7SLuigi Rizzo (void)td; 1353ce3ee1e7SLuigi Rizzo 1354ce3ee1e7SLuigi Rizzo // XXX wait or nowait ? 13558241616dSLuigi Rizzo priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF, 13568241616dSLuigi Rizzo M_NOWAIT | M_ZERO); 13578241616dSLuigi Rizzo if (priv == NULL) 13588241616dSLuigi Rizzo return ENOMEM; 13598241616dSLuigi Rizzo 13608241616dSLuigi Rizzo error = devfs_set_cdevpriv(priv, netmap_dtor); 13618241616dSLuigi Rizzo if (error) 13628241616dSLuigi Rizzo return error; 13638241616dSLuigi Rizzo 1364ce3ee1e7SLuigi Rizzo priv->np_refcount = 1; 1365ce3ee1e7SLuigi Rizzo 13668241616dSLuigi Rizzo return 0; 136768b8534bSLuigi Rizzo } 1368f196ce38SLuigi Rizzo #endif /* __FreeBSD__ */ 136968b8534bSLuigi Rizzo 137068b8534bSLuigi Rizzo 137168b8534bSLuigi Rizzo /* 137202ad4083SLuigi Rizzo * Handlers for synchronization of the queues from/to the host. 1373091fd0abSLuigi Rizzo * Netmap has two operating modes: 1374091fd0abSLuigi Rizzo * - in the default mode, the rings connected to the host stack are 1375091fd0abSLuigi Rizzo * just another ring pair managed by userspace; 1376091fd0abSLuigi Rizzo * - in transparent mode (XXX to be defined) incoming packets 1377091fd0abSLuigi Rizzo * (from the host or the NIC) are marked as NS_FORWARD upon 1378091fd0abSLuigi Rizzo * arrival, and the user application has a chance to reset the 1379091fd0abSLuigi Rizzo * flag for packets that should be dropped. 1380091fd0abSLuigi Rizzo * On the RXSYNC or poll(), packets in RX rings between 1381091fd0abSLuigi Rizzo * kring->nr_kcur and ring->cur with NS_FORWARD still set are moved 1382091fd0abSLuigi Rizzo * to the other side. 1383091fd0abSLuigi Rizzo * The transfer NIC --> host is relatively easy, just encapsulate 1384091fd0abSLuigi Rizzo * into mbufs and we are done. The host --> NIC side is slightly 1385091fd0abSLuigi Rizzo * harder because there might not be room in the tx ring so it 1386091fd0abSLuigi Rizzo * might take a while before releasing the buffer. 1387091fd0abSLuigi Rizzo */ 1388091fd0abSLuigi Rizzo 1389f18be576SLuigi Rizzo 1390091fd0abSLuigi Rizzo /* 1391091fd0abSLuigi Rizzo * pass a chain of buffers to the host stack as coming from 'dst' 1392091fd0abSLuigi Rizzo */ 1393091fd0abSLuigi Rizzo static void 1394091fd0abSLuigi Rizzo netmap_send_up(struct ifnet *dst, struct mbuf *head) 1395091fd0abSLuigi Rizzo { 1396091fd0abSLuigi Rizzo struct mbuf *m; 1397091fd0abSLuigi Rizzo 1398091fd0abSLuigi Rizzo /* send packets up, outside the lock */ 1399091fd0abSLuigi Rizzo while ((m = head) != NULL) { 1400091fd0abSLuigi Rizzo head = head->m_nextpkt; 1401091fd0abSLuigi Rizzo m->m_nextpkt = NULL; 1402091fd0abSLuigi Rizzo if (netmap_verbose & NM_VERB_HOST) 1403091fd0abSLuigi Rizzo D("sending up pkt %p size %d", m, MBUF_LEN(m)); 1404091fd0abSLuigi Rizzo NM_SEND_UP(dst, m); 1405091fd0abSLuigi Rizzo } 1406091fd0abSLuigi Rizzo } 1407091fd0abSLuigi Rizzo 1408091fd0abSLuigi Rizzo struct mbq { 1409091fd0abSLuigi Rizzo struct mbuf *head; 1410091fd0abSLuigi Rizzo struct mbuf *tail; 1411091fd0abSLuigi Rizzo int count; 1412091fd0abSLuigi Rizzo }; 1413091fd0abSLuigi Rizzo 1414f18be576SLuigi Rizzo 1415091fd0abSLuigi Rizzo /* 1416091fd0abSLuigi Rizzo * put a copy of the buffers marked NS_FORWARD into an mbuf chain. 1417091fd0abSLuigi Rizzo * Run from hwcur to cur - reserved 1418091fd0abSLuigi Rizzo */ 1419091fd0abSLuigi Rizzo static void 1420091fd0abSLuigi Rizzo netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force) 1421091fd0abSLuigi Rizzo { 1422091fd0abSLuigi Rizzo /* Take packets from hwcur to cur-reserved and pass them up. 1423091fd0abSLuigi Rizzo * In case of no buffers we give up. At the end of the loop, 1424091fd0abSLuigi Rizzo * the queue is drained in all cases. 1425091fd0abSLuigi Rizzo * XXX handle reserved 1426091fd0abSLuigi Rizzo */ 1427ce3ee1e7SLuigi Rizzo u_int lim = kring->nkr_num_slots - 1; 1428091fd0abSLuigi Rizzo struct mbuf *m, *tail = q->tail; 1429ce3ee1e7SLuigi Rizzo u_int k = kring->ring->cur, n = kring->ring->reserved; 1430ce3ee1e7SLuigi Rizzo struct netmap_mem_d *nmd = kring->na->nm_mem; 1431091fd0abSLuigi Rizzo 1432ce3ee1e7SLuigi Rizzo /* compute the final position, ring->cur - ring->reserved */ 1433ce3ee1e7SLuigi Rizzo if (n > 0) { 1434ce3ee1e7SLuigi Rizzo if (k < n) 1435ce3ee1e7SLuigi Rizzo k += kring->nkr_num_slots; 1436ce3ee1e7SLuigi Rizzo k += n; 1437ce3ee1e7SLuigi Rizzo } 1438091fd0abSLuigi Rizzo for (n = kring->nr_hwcur; n != k;) { 1439091fd0abSLuigi Rizzo struct netmap_slot *slot = &kring->ring->slot[n]; 1440091fd0abSLuigi Rizzo 1441ce3ee1e7SLuigi Rizzo n = nm_next(n, lim); 1442091fd0abSLuigi Rizzo if ((slot->flags & NS_FORWARD) == 0 && !force) 1443091fd0abSLuigi Rizzo continue; 1444ce3ee1e7SLuigi Rizzo if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(nmd)) { 1445091fd0abSLuigi Rizzo D("bad pkt at %d len %d", n, slot->len); 1446091fd0abSLuigi Rizzo continue; 1447091fd0abSLuigi Rizzo } 1448091fd0abSLuigi Rizzo slot->flags &= ~NS_FORWARD; // XXX needed ? 1449ce3ee1e7SLuigi Rizzo /* XXX adapt to the case of a multisegment packet */ 1450ce3ee1e7SLuigi Rizzo m = m_devget(BDG_NMB(nmd, slot), slot->len, 0, kring->na->ifp, NULL); 1451091fd0abSLuigi Rizzo 1452091fd0abSLuigi Rizzo if (m == NULL) 1453091fd0abSLuigi Rizzo break; 1454091fd0abSLuigi Rizzo if (tail) 1455091fd0abSLuigi Rizzo tail->m_nextpkt = m; 1456091fd0abSLuigi Rizzo else 1457091fd0abSLuigi Rizzo q->head = m; 1458091fd0abSLuigi Rizzo tail = m; 1459091fd0abSLuigi Rizzo q->count++; 1460091fd0abSLuigi Rizzo m->m_nextpkt = NULL; 1461091fd0abSLuigi Rizzo } 1462091fd0abSLuigi Rizzo q->tail = tail; 1463091fd0abSLuigi Rizzo } 1464091fd0abSLuigi Rizzo 1465f18be576SLuigi Rizzo 1466091fd0abSLuigi Rizzo /* 1467091fd0abSLuigi Rizzo * The host ring has packets from nr_hwcur to (cur - reserved) 1468ce3ee1e7SLuigi Rizzo * to be sent down to the NIC. 1469ce3ee1e7SLuigi Rizzo * We need to use the queue lock on the source (host RX ring) 1470ce3ee1e7SLuigi Rizzo * to protect against netmap_transmit. 1471ce3ee1e7SLuigi Rizzo * If the user is well behaved we do not need to acquire locks 1472ce3ee1e7SLuigi Rizzo * on the destination(s), 1473ce3ee1e7SLuigi Rizzo * so we only need to make sure that there are no panics because 1474ce3ee1e7SLuigi Rizzo * of user errors. 1475ce3ee1e7SLuigi Rizzo * XXX verify 1476ce3ee1e7SLuigi Rizzo * 1477ce3ee1e7SLuigi Rizzo * We scan the tx rings, which have just been 1478091fd0abSLuigi Rizzo * flushed so nr_hwcur == cur. Pushing packets down means 1479091fd0abSLuigi Rizzo * increment cur and decrement avail. 1480091fd0abSLuigi Rizzo * XXX to be verified 1481091fd0abSLuigi Rizzo */ 1482091fd0abSLuigi Rizzo static void 1483091fd0abSLuigi Rizzo netmap_sw_to_nic(struct netmap_adapter *na) 1484091fd0abSLuigi Rizzo { 1485091fd0abSLuigi Rizzo struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; 1486091fd0abSLuigi Rizzo struct netmap_kring *k1 = &na->tx_rings[0]; 1487ce3ee1e7SLuigi Rizzo u_int i, howmany, src_lim, dst_lim; 1488ce3ee1e7SLuigi Rizzo 1489ce3ee1e7SLuigi Rizzo /* XXX we should also check that the carrier is on */ 1490ce3ee1e7SLuigi Rizzo if (kring->nkr_stopped) 1491ce3ee1e7SLuigi Rizzo return; 1492ce3ee1e7SLuigi Rizzo 1493ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 1494ce3ee1e7SLuigi Rizzo 1495ce3ee1e7SLuigi Rizzo if (kring->nkr_stopped) 1496ce3ee1e7SLuigi Rizzo goto out; 1497091fd0abSLuigi Rizzo 1498091fd0abSLuigi Rizzo howmany = kring->nr_hwavail; /* XXX otherwise cur - reserved - nr_hwcur */ 1499091fd0abSLuigi Rizzo 1500ce3ee1e7SLuigi Rizzo src_lim = kring->nkr_num_slots - 1; 1501091fd0abSLuigi Rizzo for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) { 1502091fd0abSLuigi Rizzo ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail); 1503ce3ee1e7SLuigi Rizzo dst_lim = k1->nkr_num_slots - 1; 1504091fd0abSLuigi Rizzo while (howmany > 0 && k1->ring->avail > 0) { 1505091fd0abSLuigi Rizzo struct netmap_slot *src, *dst, tmp; 1506091fd0abSLuigi Rizzo src = &kring->ring->slot[kring->nr_hwcur]; 1507091fd0abSLuigi Rizzo dst = &k1->ring->slot[k1->ring->cur]; 1508091fd0abSLuigi Rizzo tmp = *src; 1509091fd0abSLuigi Rizzo src->buf_idx = dst->buf_idx; 1510091fd0abSLuigi Rizzo src->flags = NS_BUF_CHANGED; 1511091fd0abSLuigi Rizzo 1512091fd0abSLuigi Rizzo dst->buf_idx = tmp.buf_idx; 1513091fd0abSLuigi Rizzo dst->len = tmp.len; 1514091fd0abSLuigi Rizzo dst->flags = NS_BUF_CHANGED; 1515091fd0abSLuigi Rizzo ND("out len %d buf %d from %d to %d", 1516091fd0abSLuigi Rizzo dst->len, dst->buf_idx, 1517091fd0abSLuigi Rizzo kring->nr_hwcur, k1->ring->cur); 1518091fd0abSLuigi Rizzo 1519ce3ee1e7SLuigi Rizzo kring->nr_hwcur = nm_next(kring->nr_hwcur, src_lim); 1520091fd0abSLuigi Rizzo howmany--; 1521091fd0abSLuigi Rizzo kring->nr_hwavail--; 1522ce3ee1e7SLuigi Rizzo k1->ring->cur = nm_next(k1->ring->cur, dst_lim); 1523091fd0abSLuigi Rizzo k1->ring->avail--; 1524091fd0abSLuigi Rizzo } 1525091fd0abSLuigi Rizzo kring->ring->cur = kring->nr_hwcur; // XXX 1526ce3ee1e7SLuigi Rizzo k1++; // XXX why? 1527091fd0abSLuigi Rizzo } 1528ce3ee1e7SLuigi Rizzo out: 1529ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 1530091fd0abSLuigi Rizzo } 1531091fd0abSLuigi Rizzo 1532f18be576SLuigi Rizzo 1533091fd0abSLuigi Rizzo /* 1534ce3ee1e7SLuigi Rizzo * netmap_txsync_to_host() passes packets up. We are called from a 153502ad4083SLuigi Rizzo * system call in user process context, and the only contention 153602ad4083SLuigi Rizzo * can be among multiple user threads erroneously calling 1537091fd0abSLuigi Rizzo * this routine concurrently. 153868b8534bSLuigi Rizzo */ 153968b8534bSLuigi Rizzo static void 1540ce3ee1e7SLuigi Rizzo netmap_txsync_to_host(struct netmap_adapter *na) 154168b8534bSLuigi Rizzo { 1542d76bf4ffSLuigi Rizzo struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings]; 154368b8534bSLuigi Rizzo struct netmap_ring *ring = kring->ring; 1544091fd0abSLuigi Rizzo u_int k, lim = kring->nkr_num_slots - 1; 1545ce3ee1e7SLuigi Rizzo struct mbq q = { NULL, NULL, 0 }; 154668b8534bSLuigi Rizzo 1547ce3ee1e7SLuigi Rizzo if (nm_kr_tryget(kring)) { 1548ce3ee1e7SLuigi Rizzo D("ring %p busy (user error)", kring); 154902ad4083SLuigi Rizzo return; 155002ad4083SLuigi Rizzo } 1551ce3ee1e7SLuigi Rizzo k = ring->cur; 1552ce3ee1e7SLuigi Rizzo if (k > lim) { 1553ce3ee1e7SLuigi Rizzo D("invalid ring index in stack TX kring %p", kring); 1554ce3ee1e7SLuigi Rizzo netmap_ring_reinit(kring); 1555ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 1556ce3ee1e7SLuigi Rizzo return; 1557ce3ee1e7SLuigi Rizzo } 155868b8534bSLuigi Rizzo 155968b8534bSLuigi Rizzo /* Take packets from hwcur to cur and pass them up. 156068b8534bSLuigi Rizzo * In case of no buffers we give up. At the end of the loop, 156168b8534bSLuigi Rizzo * the queue is drained in all cases. 156268b8534bSLuigi Rizzo */ 1563091fd0abSLuigi Rizzo netmap_grab_packets(kring, &q, 1); 156402ad4083SLuigi Rizzo kring->nr_hwcur = k; 156568b8534bSLuigi Rizzo kring->nr_hwavail = ring->avail = lim; 156668b8534bSLuigi Rizzo 1567ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 1568091fd0abSLuigi Rizzo netmap_send_up(na->ifp, q.head); 156968b8534bSLuigi Rizzo } 157068b8534bSLuigi Rizzo 1571f18be576SLuigi Rizzo 1572ce3ee1e7SLuigi Rizzo /* 1573ce3ee1e7SLuigi Rizzo * This is the 'txsync' handler to send from a software ring to the 1574ce3ee1e7SLuigi Rizzo * host stack. 1575ce3ee1e7SLuigi Rizzo */ 1576f18be576SLuigi Rizzo /* SWNA(ifp)->txrings[0] is always NA(ifp)->txrings[NA(ifp)->num_txrings] */ 1577f18be576SLuigi Rizzo static int 1578ce3ee1e7SLuigi Rizzo netmap_bdg_to_host(struct ifnet *ifp, u_int ring_nr, int flags) 1579f18be576SLuigi Rizzo { 1580f18be576SLuigi Rizzo (void)ring_nr; 1581ce3ee1e7SLuigi Rizzo (void)flags; 1582ce3ee1e7SLuigi Rizzo if (netmap_verbose > 255) 1583ce3ee1e7SLuigi Rizzo RD(5, "sync to host %s ring %d", ifp->if_xname, ring_nr); 1584ce3ee1e7SLuigi Rizzo netmap_txsync_to_host(NA(ifp)); 1585f18be576SLuigi Rizzo return 0; 1586f18be576SLuigi Rizzo } 1587f18be576SLuigi Rizzo 1588f18be576SLuigi Rizzo 158968b8534bSLuigi Rizzo /* 159002ad4083SLuigi Rizzo * rxsync backend for packets coming from the host stack. 1591ce3ee1e7SLuigi Rizzo * They have been put in the queue by netmap_transmit() so we 159202ad4083SLuigi Rizzo * need to protect access to the kring using a lock. 159302ad4083SLuigi Rizzo * 159468b8534bSLuigi Rizzo * This routine also does the selrecord if called from the poll handler 159568b8534bSLuigi Rizzo * (we know because td != NULL). 159601c7d25fSLuigi Rizzo * 159701c7d25fSLuigi Rizzo * NOTE: on linux, selrecord() is defined as a macro and uses pwait 159801c7d25fSLuigi Rizzo * as an additional hidden argument. 159968b8534bSLuigi Rizzo */ 160068b8534bSLuigi Rizzo static void 1601ce3ee1e7SLuigi Rizzo netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait) 160268b8534bSLuigi Rizzo { 1603d76bf4ffSLuigi Rizzo struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; 160468b8534bSLuigi Rizzo struct netmap_ring *ring = kring->ring; 160564ae02c3SLuigi Rizzo u_int j, n, lim = kring->nkr_num_slots; 160664ae02c3SLuigi Rizzo u_int k = ring->cur, resvd = ring->reserved; 160768b8534bSLuigi Rizzo 160801c7d25fSLuigi Rizzo (void)pwait; /* disable unused warnings */ 1609ce3ee1e7SLuigi Rizzo 1610ce3ee1e7SLuigi Rizzo if (kring->nkr_stopped) /* check a first time without lock */ 1611ce3ee1e7SLuigi Rizzo return; 1612ce3ee1e7SLuigi Rizzo 1613ce3ee1e7SLuigi Rizzo /* XXX as an optimization we could reuse na->core_lock */ 1614ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 1615ce3ee1e7SLuigi Rizzo 1616ce3ee1e7SLuigi Rizzo if (kring->nkr_stopped) /* check again with lock held */ 1617ce3ee1e7SLuigi Rizzo goto unlock_out; 1618ce3ee1e7SLuigi Rizzo 161964ae02c3SLuigi Rizzo if (k >= lim) { 162064ae02c3SLuigi Rizzo netmap_ring_reinit(kring); 1621ce3ee1e7SLuigi Rizzo goto unlock_out; 162264ae02c3SLuigi Rizzo } 162364ae02c3SLuigi Rizzo /* new packets are already set in nr_hwavail */ 162464ae02c3SLuigi Rizzo /* skip past packets that userspace has released */ 162564ae02c3SLuigi Rizzo j = kring->nr_hwcur; 162664ae02c3SLuigi Rizzo if (resvd > 0) { 162764ae02c3SLuigi Rizzo if (resvd + ring->avail >= lim + 1) { 162864ae02c3SLuigi Rizzo D("XXX invalid reserve/avail %d %d", resvd, ring->avail); 162964ae02c3SLuigi Rizzo ring->reserved = resvd = 0; // XXX panic... 163064ae02c3SLuigi Rizzo } 163164ae02c3SLuigi Rizzo k = (k >= resvd) ? k - resvd : k + lim - resvd; 163264ae02c3SLuigi Rizzo } 163364ae02c3SLuigi Rizzo if (j != k) { 163464ae02c3SLuigi Rizzo n = k >= j ? k - j : k + lim - j; 163564ae02c3SLuigi Rizzo kring->nr_hwavail -= n; 163602ad4083SLuigi Rizzo kring->nr_hwcur = k; 163764ae02c3SLuigi Rizzo } 163864ae02c3SLuigi Rizzo k = ring->avail = kring->nr_hwavail - resvd; 163902ad4083SLuigi Rizzo if (k == 0 && td) 164068b8534bSLuigi Rizzo selrecord(td, &kring->si); 164102ad4083SLuigi Rizzo if (k && (netmap_verbose & NM_VERB_HOST)) 164202ad4083SLuigi Rizzo D("%d pkts from stack", k); 1643ce3ee1e7SLuigi Rizzo unlock_out: 1644ce3ee1e7SLuigi Rizzo 1645ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 164668b8534bSLuigi Rizzo } 164768b8534bSLuigi Rizzo 164868b8534bSLuigi Rizzo 164968b8534bSLuigi Rizzo /* 1650ce3ee1e7SLuigi Rizzo * MUST BE CALLED UNDER NMG_LOCK() 1651ce3ee1e7SLuigi Rizzo * 165268b8534bSLuigi Rizzo * get a refcounted reference to an interface. 1653ce3ee1e7SLuigi Rizzo * This is always called in the execution of an ioctl(). 1654ce3ee1e7SLuigi Rizzo * 165568b8534bSLuigi Rizzo * Return ENXIO if the interface does not exist, EINVAL if netmap 165668b8534bSLuigi Rizzo * is not supported by the interface. 165768b8534bSLuigi Rizzo * If successful, hold a reference. 1658f18be576SLuigi Rizzo * 1659ce3ee1e7SLuigi Rizzo * When the NIC is attached to a bridge, reference is managed 1660f18be576SLuigi Rizzo * at na->na_bdg_refcount using ADD/DROP_BDG_REF() as well as 1661f18be576SLuigi Rizzo * virtual ports. Hence, on the final DROP_BDG_REF(), the NIC 1662f18be576SLuigi Rizzo * is detached from the bridge, then ifp's refcount is dropped (this 1663f18be576SLuigi Rizzo * is equivalent to that ifp is destroyed in case of virtual ports. 1664f18be576SLuigi Rizzo * 1665f18be576SLuigi Rizzo * This function uses if_rele() when we want to prevent the NIC from 1666f18be576SLuigi Rizzo * being detached from the bridge in error handling. But once refcount 1667f18be576SLuigi Rizzo * is acquired by this function, it must be released using nm_if_rele(). 166868b8534bSLuigi Rizzo */ 166968b8534bSLuigi Rizzo static int 1670ce3ee1e7SLuigi Rizzo get_ifp(struct nmreq *nmr, struct ifnet **ifp, int create) 167168b8534bSLuigi Rizzo { 1672f18be576SLuigi Rizzo const char *name = nmr->nr_name; 1673f18be576SLuigi Rizzo int namelen = strlen(name); 1674f196ce38SLuigi Rizzo struct ifnet *iter = NULL; 1675f18be576SLuigi Rizzo int no_prefix = 0; 1676f196ce38SLuigi Rizzo 1677ce3ee1e7SLuigi Rizzo /* first try to see if this is a bridge port. */ 1678f196ce38SLuigi Rizzo struct nm_bridge *b; 1679f18be576SLuigi Rizzo struct netmap_adapter *na; 1680ce3ee1e7SLuigi Rizzo int i, j, cand = -1, cand2 = -1; 1681ce3ee1e7SLuigi Rizzo int needed; 1682f196ce38SLuigi Rizzo 1683ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 1684ce3ee1e7SLuigi Rizzo *ifp = NULL; /* default */ 1685f18be576SLuigi Rizzo if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) { 1686ce3ee1e7SLuigi Rizzo no_prefix = 1; /* no VALE prefix */ 1687ce3ee1e7SLuigi Rizzo goto no_bridge_port; 1688f18be576SLuigi Rizzo } 1689ce3ee1e7SLuigi Rizzo 1690ce3ee1e7SLuigi Rizzo b = nm_find_bridge(name, create); 1691f196ce38SLuigi Rizzo if (b == NULL) { 1692f196ce38SLuigi Rizzo D("no bridges available for '%s'", name); 1693f196ce38SLuigi Rizzo return (ENXIO); 1694f196ce38SLuigi Rizzo } 1695ce3ee1e7SLuigi Rizzo 1696ce3ee1e7SLuigi Rizzo /* Now we are sure that name starts with the bridge's name, 1697ce3ee1e7SLuigi Rizzo * lookup the port in the bridge. We need to scan the entire 1698ce3ee1e7SLuigi Rizzo * list. It is not important to hold a WLOCK on the bridge 1699ce3ee1e7SLuigi Rizzo * during the search because NMG_LOCK already guarantees 1700ce3ee1e7SLuigi Rizzo * that there are no other possible writers. 1701ce3ee1e7SLuigi Rizzo */ 1702ce3ee1e7SLuigi Rizzo 1703f196ce38SLuigi Rizzo /* lookup in the local list of ports */ 1704ce3ee1e7SLuigi Rizzo for (j = 0; j < b->bdg_active_ports; j++) { 1705ce3ee1e7SLuigi Rizzo i = b->bdg_port_index[j]; 1706ce3ee1e7SLuigi Rizzo na = b->bdg_ports[i]; 1707ce3ee1e7SLuigi Rizzo // KASSERT(na != NULL); 1708f18be576SLuigi Rizzo iter = na->ifp; 1709f18be576SLuigi Rizzo /* XXX make sure the name only contains one : */ 1710f18be576SLuigi Rizzo if (!strcmp(iter->if_xname, name) /* virtual port */ || 1711ce3ee1e7SLuigi Rizzo (namelen > b->bdg_namelen && !strcmp(iter->if_xname, 1712ce3ee1e7SLuigi Rizzo name + b->bdg_namelen + 1)) /* NIC */) { 1713f196ce38SLuigi Rizzo ADD_BDG_REF(iter); 1714ce3ee1e7SLuigi Rizzo ND("found existing if %s refs %d", name, 1715ce3ee1e7SLuigi Rizzo NA(iter)->na_bdg_refcount); 1716ce3ee1e7SLuigi Rizzo *ifp = iter; 1717ce3ee1e7SLuigi Rizzo /* we are done, this is surely netmap capable */ 1718ce3ee1e7SLuigi Rizzo return 0; 1719f196ce38SLuigi Rizzo } 1720f196ce38SLuigi Rizzo } 1721ce3ee1e7SLuigi Rizzo /* not found, should we create it? */ 1722ce3ee1e7SLuigi Rizzo if (!create) 1723ce3ee1e7SLuigi Rizzo return ENXIO; 1724ce3ee1e7SLuigi Rizzo /* yes we should, see if we have space to attach entries */ 1725ce3ee1e7SLuigi Rizzo needed = 2; /* in some cases we only need 1 */ 1726ce3ee1e7SLuigi Rizzo if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 1727ce3ee1e7SLuigi Rizzo D("bridge full %d, cannot create new port", b->bdg_active_ports); 1728f196ce38SLuigi Rizzo return EINVAL; 1729f196ce38SLuigi Rizzo } 1730ce3ee1e7SLuigi Rizzo /* record the next two ports available, but do not allocate yet */ 1731ce3ee1e7SLuigi Rizzo cand = b->bdg_port_index[b->bdg_active_ports]; 1732ce3ee1e7SLuigi Rizzo cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 1733ce3ee1e7SLuigi Rizzo ND("+++ bridge %s port %s used %d avail %d %d", 1734ce3ee1e7SLuigi Rizzo b->bdg_basename, name, b->bdg_active_ports, cand, cand2); 1735ce3ee1e7SLuigi Rizzo 1736f18be576SLuigi Rizzo /* 1737f18be576SLuigi Rizzo * try see if there is a matching NIC with this name 1738f18be576SLuigi Rizzo * (after the bridge's name) 1739f18be576SLuigi Rizzo */ 1740ce3ee1e7SLuigi Rizzo iter = ifunit_ref(name + b->bdg_namelen + 1); 1741f18be576SLuigi Rizzo if (!iter) { /* this is a virtual port */ 1742f18be576SLuigi Rizzo /* Create a temporary NA with arguments, then 1743f18be576SLuigi Rizzo * bdg_netmap_attach() will allocate the real one 1744f18be576SLuigi Rizzo * and attach it to the ifp 1745f18be576SLuigi Rizzo */ 1746f18be576SLuigi Rizzo struct netmap_adapter tmp_na; 17475ab0d24dSLuigi Rizzo int error; 1748f18be576SLuigi Rizzo 1749ce3ee1e7SLuigi Rizzo if (nmr->nr_cmd) { 1750ce3ee1e7SLuigi Rizzo /* nr_cmd must be 0 for a virtual port */ 1751ce3ee1e7SLuigi Rizzo return EINVAL; 1752ce3ee1e7SLuigi Rizzo } 1753f18be576SLuigi Rizzo bzero(&tmp_na, sizeof(tmp_na)); 1754f18be576SLuigi Rizzo /* bound checking */ 1755f18be576SLuigi Rizzo tmp_na.num_tx_rings = nmr->nr_tx_rings; 1756ce3ee1e7SLuigi Rizzo nm_bound_var(&tmp_na.num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1757ce3ee1e7SLuigi Rizzo nmr->nr_tx_rings = tmp_na.num_tx_rings; // write back 1758f18be576SLuigi Rizzo tmp_na.num_rx_rings = nmr->nr_rx_rings; 1759ce3ee1e7SLuigi Rizzo nm_bound_var(&tmp_na.num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1760ce3ee1e7SLuigi Rizzo nmr->nr_rx_rings = tmp_na.num_rx_rings; // write back 1761ce3ee1e7SLuigi Rizzo nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1762ce3ee1e7SLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 1763ce3ee1e7SLuigi Rizzo tmp_na.num_tx_desc = nmr->nr_tx_slots; 1764ce3ee1e7SLuigi Rizzo nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1765ce3ee1e7SLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 1766ce3ee1e7SLuigi Rizzo tmp_na.num_rx_desc = nmr->nr_rx_slots; 1767f18be576SLuigi Rizzo 1768ce3ee1e7SLuigi Rizzo /* create a struct ifnet for the new port. 1769ce3ee1e7SLuigi Rizzo * need M_NOWAIT as we are under nma_lock 1770ce3ee1e7SLuigi Rizzo */ 1771f18be576SLuigi Rizzo iter = malloc(sizeof(*iter), M_DEVBUF, M_NOWAIT | M_ZERO); 1772f196ce38SLuigi Rizzo if (!iter) 1773ce3ee1e7SLuigi Rizzo return ENOMEM; 1774ce3ee1e7SLuigi Rizzo 1775f196ce38SLuigi Rizzo strcpy(iter->if_xname, name); 1776f18be576SLuigi Rizzo tmp_na.ifp = iter; 1777f18be576SLuigi Rizzo /* bdg_netmap_attach creates a struct netmap_adapter */ 17785ab0d24dSLuigi Rizzo error = bdg_netmap_attach(&tmp_na); 17795ab0d24dSLuigi Rizzo if (error) { 17805ab0d24dSLuigi Rizzo D("error %d", error); 17815ab0d24dSLuigi Rizzo free(iter, M_DEVBUF); 17825ab0d24dSLuigi Rizzo return error; 17835ab0d24dSLuigi Rizzo } 1784ce3ee1e7SLuigi Rizzo cand2 = -1; /* only need one port */ 1785f18be576SLuigi Rizzo } else if (NETMAP_CAPABLE(iter)) { /* this is a NIC */ 1786ce3ee1e7SLuigi Rizzo /* make sure the NIC is not already in use */ 1787ce3ee1e7SLuigi Rizzo if (NETMAP_OWNED_BY_ANY(iter)) { 1788ce3ee1e7SLuigi Rizzo D("NIC %s busy, cannot attach to bridge", 1789ce3ee1e7SLuigi Rizzo iter->if_xname); 1790f18be576SLuigi Rizzo if_rele(iter); /* don't detach from bridge */ 1791ce3ee1e7SLuigi Rizzo return EINVAL; 1792f18be576SLuigi Rizzo } 1793ce3ee1e7SLuigi Rizzo if (nmr->nr_arg1 != NETMAP_BDG_HOST) 1794ce3ee1e7SLuigi Rizzo cand2 = -1; /* only need one port */ 1795ce3ee1e7SLuigi Rizzo } else { /* not a netmap-capable NIC */ 1796ce3ee1e7SLuigi Rizzo if_rele(iter); /* don't detach from bridge */ 1797ce3ee1e7SLuigi Rizzo return EINVAL; 1798ce3ee1e7SLuigi Rizzo } 1799ce3ee1e7SLuigi Rizzo na = NA(iter); 1800ce3ee1e7SLuigi Rizzo 1801ce3ee1e7SLuigi Rizzo BDG_WLOCK(b); 1802ce3ee1e7SLuigi Rizzo na->bdg_port = cand; 1803ce3ee1e7SLuigi Rizzo ND("NIC %p to bridge port %d", NA(iter), cand); 1804ce3ee1e7SLuigi Rizzo /* bind the port to the bridge (virtual ports are not active) */ 1805ce3ee1e7SLuigi Rizzo b->bdg_ports[cand] = na; 1806ce3ee1e7SLuigi Rizzo na->na_bdg = b; 1807ce3ee1e7SLuigi Rizzo b->bdg_active_ports++; 1808ce3ee1e7SLuigi Rizzo if (cand2 >= 0) { 1809ce3ee1e7SLuigi Rizzo /* also bind the host stack to the bridge */ 1810ce3ee1e7SLuigi Rizzo b->bdg_ports[cand2] = SWNA(iter); 1811f18be576SLuigi Rizzo SWNA(iter)->bdg_port = cand2; 1812f18be576SLuigi Rizzo SWNA(iter)->na_bdg = b; 1813ce3ee1e7SLuigi Rizzo b->bdg_active_ports++; 1814ce3ee1e7SLuigi Rizzo ND("host %p to bridge port %d", SWNA(iter), cand2); 1815f18be576SLuigi Rizzo } 1816ce3ee1e7SLuigi Rizzo ADD_BDG_REF(iter); // XXX one or two ? 1817ce3ee1e7SLuigi Rizzo ND("if %s refs %d", name, NA(iter)->na_bdg_refcount); 1818f18be576SLuigi Rizzo BDG_WUNLOCK(b); 1819ce3ee1e7SLuigi Rizzo *ifp = iter; 1820ce3ee1e7SLuigi Rizzo return 0; 1821ce3ee1e7SLuigi Rizzo 1822ce3ee1e7SLuigi Rizzo no_bridge_port: 1823f196ce38SLuigi Rizzo *ifp = iter; 1824f196ce38SLuigi Rizzo if (! *ifp) 182568b8534bSLuigi Rizzo *ifp = ifunit_ref(name); 182668b8534bSLuigi Rizzo if (*ifp == NULL) 182768b8534bSLuigi Rizzo return (ENXIO); 1828ce3ee1e7SLuigi Rizzo 1829f18be576SLuigi Rizzo if (NETMAP_CAPABLE(*ifp)) { 1830f18be576SLuigi Rizzo /* Users cannot use the NIC attached to a bridge directly */ 1831f18be576SLuigi Rizzo if (no_prefix && NETMAP_OWNED_BY_KERN(*ifp)) { 1832f18be576SLuigi Rizzo if_rele(*ifp); /* don't detach from bridge */ 1833f18be576SLuigi Rizzo return EINVAL; 1834f18be576SLuigi Rizzo } else 183568b8534bSLuigi Rizzo return 0; /* valid pointer, we hold the refcount */ 1836f18be576SLuigi Rizzo } 1837f196ce38SLuigi Rizzo nm_if_rele(*ifp); 183868b8534bSLuigi Rizzo return EINVAL; // not NETMAP capable 183968b8534bSLuigi Rizzo } 184068b8534bSLuigi Rizzo 184168b8534bSLuigi Rizzo 184268b8534bSLuigi Rizzo /* 184368b8534bSLuigi Rizzo * Error routine called when txsync/rxsync detects an error. 184468b8534bSLuigi Rizzo * Can't do much more than resetting cur = hwcur, avail = hwavail. 184568b8534bSLuigi Rizzo * Return 1 on reinit. 1846506cc70cSLuigi Rizzo * 1847506cc70cSLuigi Rizzo * This routine is only called by the upper half of the kernel. 1848506cc70cSLuigi Rizzo * It only reads hwcur (which is changed only by the upper half, too) 1849506cc70cSLuigi Rizzo * and hwavail (which may be changed by the lower half, but only on 1850506cc70cSLuigi Rizzo * a tx ring and only to increase it, so any error will be recovered 1851506cc70cSLuigi Rizzo * on the next call). For the above, we don't strictly need to call 1852506cc70cSLuigi Rizzo * it under lock. 185368b8534bSLuigi Rizzo */ 185468b8534bSLuigi Rizzo int 185568b8534bSLuigi Rizzo netmap_ring_reinit(struct netmap_kring *kring) 185668b8534bSLuigi Rizzo { 185768b8534bSLuigi Rizzo struct netmap_ring *ring = kring->ring; 185868b8534bSLuigi Rizzo u_int i, lim = kring->nkr_num_slots - 1; 185968b8534bSLuigi Rizzo int errors = 0; 186068b8534bSLuigi Rizzo 1861ce3ee1e7SLuigi Rizzo // XXX KASSERT nm_kr_tryget 18628241616dSLuigi Rizzo RD(10, "called for %s", kring->na->ifp->if_xname); 186368b8534bSLuigi Rizzo if (ring->cur > lim) 186468b8534bSLuigi Rizzo errors++; 186568b8534bSLuigi Rizzo for (i = 0; i <= lim; i++) { 186668b8534bSLuigi Rizzo u_int idx = ring->slot[i].buf_idx; 186768b8534bSLuigi Rizzo u_int len = ring->slot[i].len; 186868b8534bSLuigi Rizzo if (idx < 2 || idx >= netmap_total_buffers) { 186968b8534bSLuigi Rizzo if (!errors++) 187068b8534bSLuigi Rizzo D("bad buffer at slot %d idx %d len %d ", i, idx, len); 187168b8534bSLuigi Rizzo ring->slot[i].buf_idx = 0; 187268b8534bSLuigi Rizzo ring->slot[i].len = 0; 1873ce3ee1e7SLuigi Rizzo } else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) { 187468b8534bSLuigi Rizzo ring->slot[i].len = 0; 187568b8534bSLuigi Rizzo if (!errors++) 187668b8534bSLuigi Rizzo D("bad len %d at slot %d idx %d", 187768b8534bSLuigi Rizzo len, i, idx); 187868b8534bSLuigi Rizzo } 187968b8534bSLuigi Rizzo } 188068b8534bSLuigi Rizzo if (errors) { 188168b8534bSLuigi Rizzo int pos = kring - kring->na->tx_rings; 1882d76bf4ffSLuigi Rizzo int n = kring->na->num_tx_rings + 1; 188368b8534bSLuigi Rizzo 18848241616dSLuigi Rizzo RD(10, "total %d errors", errors); 188568b8534bSLuigi Rizzo errors++; 18868241616dSLuigi Rizzo RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d", 188768b8534bSLuigi Rizzo kring->na->ifp->if_xname, 188868b8534bSLuigi Rizzo pos < n ? "TX" : "RX", pos < n ? pos : pos - n, 188968b8534bSLuigi Rizzo ring->cur, kring->nr_hwcur, 189068b8534bSLuigi Rizzo ring->avail, kring->nr_hwavail); 189168b8534bSLuigi Rizzo ring->cur = kring->nr_hwcur; 189268b8534bSLuigi Rizzo ring->avail = kring->nr_hwavail; 189368b8534bSLuigi Rizzo } 189468b8534bSLuigi Rizzo return (errors ? 1 : 0); 189568b8534bSLuigi Rizzo } 189668b8534bSLuigi Rizzo 189768b8534bSLuigi Rizzo 189868b8534bSLuigi Rizzo /* 189968b8534bSLuigi Rizzo * Set the ring ID. For devices with a single queue, a request 190068b8534bSLuigi Rizzo * for all rings is the same as a single ring. 190168b8534bSLuigi Rizzo */ 190268b8534bSLuigi Rizzo static int 190368b8534bSLuigi Rizzo netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid) 190468b8534bSLuigi Rizzo { 190568b8534bSLuigi Rizzo struct ifnet *ifp = priv->np_ifp; 190668b8534bSLuigi Rizzo struct netmap_adapter *na = NA(ifp); 190768b8534bSLuigi Rizzo u_int i = ringid & NETMAP_RING_MASK; 190864ae02c3SLuigi Rizzo /* initially (np_qfirst == np_qlast) we don't want to lock */ 1909ce3ee1e7SLuigi Rizzo u_int lim = na->num_rx_rings; 191068b8534bSLuigi Rizzo 1911d76bf4ffSLuigi Rizzo if (na->num_tx_rings > lim) 1912d76bf4ffSLuigi Rizzo lim = na->num_tx_rings; 191364ae02c3SLuigi Rizzo if ( (ringid & NETMAP_HW_RING) && i >= lim) { 191468b8534bSLuigi Rizzo D("invalid ring id %d", i); 191568b8534bSLuigi Rizzo return (EINVAL); 191668b8534bSLuigi Rizzo } 191768b8534bSLuigi Rizzo priv->np_ringid = ringid; 191868b8534bSLuigi Rizzo if (ringid & NETMAP_SW_RING) { 191964ae02c3SLuigi Rizzo priv->np_qfirst = NETMAP_SW_RING; 192064ae02c3SLuigi Rizzo priv->np_qlast = 0; 192168b8534bSLuigi Rizzo } else if (ringid & NETMAP_HW_RING) { 192268b8534bSLuigi Rizzo priv->np_qfirst = i; 192368b8534bSLuigi Rizzo priv->np_qlast = i + 1; 192468b8534bSLuigi Rizzo } else { 192568b8534bSLuigi Rizzo priv->np_qfirst = 0; 192664ae02c3SLuigi Rizzo priv->np_qlast = NETMAP_HW_RING ; 192768b8534bSLuigi Rizzo } 192868b8534bSLuigi Rizzo priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1; 1929ae10d1afSLuigi Rizzo if (netmap_verbose) { 193068b8534bSLuigi Rizzo if (ringid & NETMAP_SW_RING) 193168b8534bSLuigi Rizzo D("ringid %s set to SW RING", ifp->if_xname); 193268b8534bSLuigi Rizzo else if (ringid & NETMAP_HW_RING) 193368b8534bSLuigi Rizzo D("ringid %s set to HW RING %d", ifp->if_xname, 193468b8534bSLuigi Rizzo priv->np_qfirst); 193568b8534bSLuigi Rizzo else 193664ae02c3SLuigi Rizzo D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim); 1937ae10d1afSLuigi Rizzo } 193868b8534bSLuigi Rizzo return 0; 193968b8534bSLuigi Rizzo } 194068b8534bSLuigi Rizzo 1941f18be576SLuigi Rizzo 1942f18be576SLuigi Rizzo /* 1943f18be576SLuigi Rizzo * possibly move the interface to netmap-mode. 1944f18be576SLuigi Rizzo * If success it returns a pointer to netmap_if, otherwise NULL. 1945ce3ee1e7SLuigi Rizzo * This must be called with NMG_LOCK held. 1946f18be576SLuigi Rizzo */ 1947f18be576SLuigi Rizzo static struct netmap_if * 1948f18be576SLuigi Rizzo netmap_do_regif(struct netmap_priv_d *priv, struct ifnet *ifp, 1949f18be576SLuigi Rizzo uint16_t ringid, int *err) 1950f18be576SLuigi Rizzo { 1951f18be576SLuigi Rizzo struct netmap_adapter *na = NA(ifp); 1952f18be576SLuigi Rizzo struct netmap_if *nifp = NULL; 1953ce3ee1e7SLuigi Rizzo int error, need_mem; 1954f18be576SLuigi Rizzo 1955ce3ee1e7SLuigi Rizzo NMG_LOCK_ASSERT(); 1956f18be576SLuigi Rizzo /* ring configuration may have changed, fetch from the card */ 1957f18be576SLuigi Rizzo netmap_update_config(na); 1958f18be576SLuigi Rizzo priv->np_ifp = ifp; /* store the reference */ 1959f18be576SLuigi Rizzo error = netmap_set_ringid(priv, ringid); 1960f18be576SLuigi Rizzo if (error) 1961f18be576SLuigi Rizzo goto out; 1962ce3ee1e7SLuigi Rizzo /* ensure allocators are ready */ 1963ce3ee1e7SLuigi Rizzo need_mem = !netmap_have_memory_locked(priv); 1964ce3ee1e7SLuigi Rizzo if (need_mem) { 1965ce3ee1e7SLuigi Rizzo error = netmap_get_memory_locked(priv); 1966ce3ee1e7SLuigi Rizzo ND("get_memory returned %d", error); 1967ce3ee1e7SLuigi Rizzo if (error) 1968ce3ee1e7SLuigi Rizzo goto out; 1969ce3ee1e7SLuigi Rizzo } 1970f18be576SLuigi Rizzo nifp = netmap_if_new(ifp->if_xname, na); 1971f18be576SLuigi Rizzo if (nifp == NULL) { /* allocation failed */ 1972ce3ee1e7SLuigi Rizzo /* we should drop the allocator, but only 1973ce3ee1e7SLuigi Rizzo * if we were the ones who grabbed it 1974ce3ee1e7SLuigi Rizzo */ 1975ce3ee1e7SLuigi Rizzo if (need_mem) 1976ce3ee1e7SLuigi Rizzo netmap_drop_memory_locked(priv); 1977f18be576SLuigi Rizzo error = ENOMEM; 1978ce3ee1e7SLuigi Rizzo goto out; 1979ce3ee1e7SLuigi Rizzo } 1980ce3ee1e7SLuigi Rizzo na->refcount++; 1981ce3ee1e7SLuigi Rizzo if (ifp->if_capenable & IFCAP_NETMAP) { 1982f18be576SLuigi Rizzo /* was already set */ 1983f18be576SLuigi Rizzo } else { 1984ce3ee1e7SLuigi Rizzo u_int i; 1985f18be576SLuigi Rizzo /* Otherwise set the card in netmap mode 1986f18be576SLuigi Rizzo * and make it use the shared buffers. 1987ce3ee1e7SLuigi Rizzo * 1988ce3ee1e7SLuigi Rizzo * If the interface is attached to a bridge, lock it. 1989f18be576SLuigi Rizzo */ 1990ce3ee1e7SLuigi Rizzo if (NETMAP_OWNED_BY_KERN(ifp)) 1991ce3ee1e7SLuigi Rizzo BDG_WLOCK(NA(ifp)->na_bdg); 1992f18be576SLuigi Rizzo for (i = 0 ; i < na->num_tx_rings + 1; i++) 1993f18be576SLuigi Rizzo mtx_init(&na->tx_rings[i].q_lock, "nm_txq_lock", 1994ce3ee1e7SLuigi Rizzo NULL, MTX_DEF); 1995f18be576SLuigi Rizzo for (i = 0 ; i < na->num_rx_rings + 1; i++) { 1996f18be576SLuigi Rizzo mtx_init(&na->rx_rings[i].q_lock, "nm_rxq_lock", 1997ce3ee1e7SLuigi Rizzo NULL, MTX_DEF); 1998f18be576SLuigi Rizzo } 1999f18be576SLuigi Rizzo if (nma_is_hw(na)) { 2000f18be576SLuigi Rizzo SWNA(ifp)->tx_rings = &na->tx_rings[na->num_tx_rings]; 2001f18be576SLuigi Rizzo SWNA(ifp)->rx_rings = &na->rx_rings[na->num_rx_rings]; 2002f18be576SLuigi Rizzo } 2003ce3ee1e7SLuigi Rizzo /* 2004ce3ee1e7SLuigi Rizzo * do not core lock because the race is harmless here, 2005ce3ee1e7SLuigi Rizzo * there cannot be any traffic to netmap_transmit() 2006ce3ee1e7SLuigi Rizzo */ 2007f18be576SLuigi Rizzo error = na->nm_register(ifp, 1); /* mode on */ 2008ce3ee1e7SLuigi Rizzo // XXX do we need to nm_alloc_bdgfwd() in all cases ? 2009f18be576SLuigi Rizzo if (!error) 2010f18be576SLuigi Rizzo error = nm_alloc_bdgfwd(na); 2011f18be576SLuigi Rizzo if (error) { 2012ce3ee1e7SLuigi Rizzo netmap_do_unregif(priv, nifp); 2013f18be576SLuigi Rizzo nifp = NULL; 2014f18be576SLuigi Rizzo } 2015ce3ee1e7SLuigi Rizzo if (NETMAP_OWNED_BY_KERN(ifp)) 2016ce3ee1e7SLuigi Rizzo BDG_WUNLOCK(NA(ifp)->na_bdg); 2017f18be576SLuigi Rizzo 2018f18be576SLuigi Rizzo } 2019f18be576SLuigi Rizzo out: 2020f18be576SLuigi Rizzo *err = error; 2021ce3ee1e7SLuigi Rizzo if (nifp != NULL) { 2022ce3ee1e7SLuigi Rizzo /* 2023ce3ee1e7SLuigi Rizzo * advertise that the interface is ready bt setting ni_nifp. 2024ce3ee1e7SLuigi Rizzo * The barrier is needed because readers (poll and *SYNC) 2025ce3ee1e7SLuigi Rizzo * check for priv->np_nifp != NULL without locking 2026ce3ee1e7SLuigi Rizzo */ 2027ce3ee1e7SLuigi Rizzo wmb(); /* make sure previous writes are visible to all CPUs */ 2028ce3ee1e7SLuigi Rizzo priv->np_nifp = nifp; 2029ce3ee1e7SLuigi Rizzo } 2030f18be576SLuigi Rizzo return nifp; 2031f18be576SLuigi Rizzo } 2032f18be576SLuigi Rizzo 2033f18be576SLuigi Rizzo /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */ 2034f18be576SLuigi Rizzo static int 2035ce3ee1e7SLuigi Rizzo nm_bdg_attach(struct nmreq *nmr) 2036f18be576SLuigi Rizzo { 2037f18be576SLuigi Rizzo struct ifnet *ifp; 2038f18be576SLuigi Rizzo struct netmap_if *nifp; 2039f18be576SLuigi Rizzo struct netmap_priv_d *npriv; 2040f18be576SLuigi Rizzo int error; 2041f18be576SLuigi Rizzo 2042f18be576SLuigi Rizzo npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); 2043f18be576SLuigi Rizzo if (npriv == NULL) 2044f18be576SLuigi Rizzo return ENOMEM; 2045ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2046ce3ee1e7SLuigi Rizzo error = get_ifp(nmr, &ifp, 1 /* create if not exists */); 2047ce3ee1e7SLuigi Rizzo if (error) /* no device, or another bridge or user owns the device */ 2048ce3ee1e7SLuigi Rizzo goto unlock_exit; 2049ce3ee1e7SLuigi Rizzo /* get_ifp() sets na_bdg if this is a physical interface 2050ce3ee1e7SLuigi Rizzo * that we can attach to a switch. 2051ce3ee1e7SLuigi Rizzo */ 2052ce3ee1e7SLuigi Rizzo if (!NETMAP_OWNED_BY_KERN(ifp)) { 2053ce3ee1e7SLuigi Rizzo /* got reference to a virtual port or direct access to a NIC. 2054ce3ee1e7SLuigi Rizzo * perhaps specified no bridge prefix or wrong NIC name 2055ce3ee1e7SLuigi Rizzo */ 2056ce3ee1e7SLuigi Rizzo error = EINVAL; 2057ce3ee1e7SLuigi Rizzo goto unref_exit; 2058ce3ee1e7SLuigi Rizzo } 2059ce3ee1e7SLuigi Rizzo 2060ce3ee1e7SLuigi Rizzo if (NA(ifp)->refcount > 0) { /* already registered */ 2061ce3ee1e7SLuigi Rizzo error = EBUSY; 2062ce3ee1e7SLuigi Rizzo DROP_BDG_REF(ifp); 2063ce3ee1e7SLuigi Rizzo goto unlock_exit; 2064ce3ee1e7SLuigi Rizzo } 2065ce3ee1e7SLuigi Rizzo 2066ce3ee1e7SLuigi Rizzo nifp = netmap_do_regif(npriv, ifp, nmr->nr_ringid, &error); 2067ce3ee1e7SLuigi Rizzo if (!nifp) { 2068ce3ee1e7SLuigi Rizzo goto unref_exit; 2069ce3ee1e7SLuigi Rizzo } 2070ce3ee1e7SLuigi Rizzo 2071ce3ee1e7SLuigi Rizzo NA(ifp)->na_kpriv = npriv; 2072ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2073ce3ee1e7SLuigi Rizzo ND("registered %s to netmap-mode", ifp->if_xname); 2074ce3ee1e7SLuigi Rizzo return 0; 2075ce3ee1e7SLuigi Rizzo 2076ce3ee1e7SLuigi Rizzo unref_exit: 2077ce3ee1e7SLuigi Rizzo nm_if_rele(ifp); 2078ce3ee1e7SLuigi Rizzo unlock_exit: 2079ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2080f18be576SLuigi Rizzo bzero(npriv, sizeof(*npriv)); 2081f18be576SLuigi Rizzo free(npriv, M_DEVBUF); 2082f18be576SLuigi Rizzo return error; 2083f18be576SLuigi Rizzo } 2084f18be576SLuigi Rizzo 2085ce3ee1e7SLuigi Rizzo static int 2086ce3ee1e7SLuigi Rizzo nm_bdg_detach(struct nmreq *nmr) 2087ce3ee1e7SLuigi Rizzo { 2088ce3ee1e7SLuigi Rizzo struct ifnet *ifp; 2089ce3ee1e7SLuigi Rizzo int error; 2090ce3ee1e7SLuigi Rizzo int last_instance; 2091ce3ee1e7SLuigi Rizzo 2092ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2093ce3ee1e7SLuigi Rizzo error = get_ifp(nmr, &ifp, 0 /* don't create */); 2094f18be576SLuigi Rizzo if (error) { /* no device, or another bridge or user owns the device */ 2095ce3ee1e7SLuigi Rizzo goto unlock_exit; 2096ce3ee1e7SLuigi Rizzo } 2097ce3ee1e7SLuigi Rizzo /* XXX do we need to check this ? */ 2098ce3ee1e7SLuigi Rizzo if (!NETMAP_OWNED_BY_KERN(ifp)) { 2099f18be576SLuigi Rizzo /* got reference to a virtual port or direct access to a NIC. 2100f18be576SLuigi Rizzo * perhaps specified no bridge's prefix or wrong NIC's name 2101f18be576SLuigi Rizzo */ 2102f18be576SLuigi Rizzo error = EINVAL; 2103ce3ee1e7SLuigi Rizzo goto unref_exit; 2104f18be576SLuigi Rizzo } 2105f18be576SLuigi Rizzo 2106f18be576SLuigi Rizzo if (NA(ifp)->refcount == 0) { /* not registered */ 2107f18be576SLuigi Rizzo error = EINVAL; 2108f18be576SLuigi Rizzo goto unref_exit; 2109f18be576SLuigi Rizzo } 2110f18be576SLuigi Rizzo 2111ce3ee1e7SLuigi Rizzo DROP_BDG_REF(ifp); /* the one from get_ifp */ 2112ce3ee1e7SLuigi Rizzo last_instance = netmap_dtor_locked(NA(ifp)->na_kpriv); /* unregister */ 2113ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2114ce3ee1e7SLuigi Rizzo if (!last_instance) { 2115ce3ee1e7SLuigi Rizzo D("--- error, trying to detach an entry with active mmaps"); 2116f18be576SLuigi Rizzo error = EINVAL; 2117ce3ee1e7SLuigi Rizzo } else { 2118ce3ee1e7SLuigi Rizzo struct netmap_priv_d *npriv = NA(ifp)->na_kpriv; 2119ce3ee1e7SLuigi Rizzo NA(ifp)->na_kpriv = NULL; 2120ce3ee1e7SLuigi Rizzo 2121ce3ee1e7SLuigi Rizzo bzero(npriv, sizeof(*npriv)); 2122ce3ee1e7SLuigi Rizzo free(npriv, M_DEVBUF); 2123f18be576SLuigi Rizzo } 2124ce3ee1e7SLuigi Rizzo return error; 2125f18be576SLuigi Rizzo 2126ce3ee1e7SLuigi Rizzo unref_exit: 2127ce3ee1e7SLuigi Rizzo nm_if_rele(ifp); 2128ce3ee1e7SLuigi Rizzo unlock_exit: 2129ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2130ce3ee1e7SLuigi Rizzo return error; 2131f18be576SLuigi Rizzo } 2132f18be576SLuigi Rizzo 2133f18be576SLuigi Rizzo 2134f18be576SLuigi Rizzo /* Initialize necessary fields of sw adapter located in right after hw's 2135f18be576SLuigi Rizzo * one. sw adapter attaches a pair of sw rings of the netmap-mode NIC. 2136f18be576SLuigi Rizzo * It is always activated and deactivated at the same tie with the hw's one. 2137f18be576SLuigi Rizzo * Thus we don't need refcounting on the sw adapter. 2138f18be576SLuigi Rizzo * Regardless of NIC's feature we use separate lock so that anybody can lock 2139f18be576SLuigi Rizzo * me independently from the hw adapter. 2140f18be576SLuigi Rizzo * Make sure nm_register is NULL to be handled as FALSE in nma_is_hw 2141f18be576SLuigi Rizzo */ 2142f18be576SLuigi Rizzo static void 2143f18be576SLuigi Rizzo netmap_attach_sw(struct ifnet *ifp) 2144f18be576SLuigi Rizzo { 2145f18be576SLuigi Rizzo struct netmap_adapter *hw_na = NA(ifp); 2146f18be576SLuigi Rizzo struct netmap_adapter *na = SWNA(ifp); 2147f18be576SLuigi Rizzo 2148f18be576SLuigi Rizzo na->ifp = ifp; 2149f18be576SLuigi Rizzo na->num_rx_rings = na->num_tx_rings = 1; 2150f18be576SLuigi Rizzo na->num_tx_desc = hw_na->num_tx_desc; 2151f18be576SLuigi Rizzo na->num_rx_desc = hw_na->num_rx_desc; 2152f18be576SLuigi Rizzo na->nm_txsync = netmap_bdg_to_host; 2153ce3ee1e7SLuigi Rizzo /* we use the same memory allocator as the 2154ce3ee1e7SLuigi Rizzo * the hw adapter */ 2155ce3ee1e7SLuigi Rizzo na->nm_mem = hw_na->nm_mem; 2156f18be576SLuigi Rizzo } 2157f18be576SLuigi Rizzo 2158f18be576SLuigi Rizzo 2159ce3ee1e7SLuigi Rizzo /* exported to kernel callers, e.g. OVS ? 2160ce3ee1e7SLuigi Rizzo * Entry point. 2161ce3ee1e7SLuigi Rizzo * Called without NMG_LOCK. 2162ce3ee1e7SLuigi Rizzo */ 2163f18be576SLuigi Rizzo int 2164f18be576SLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func) 2165f18be576SLuigi Rizzo { 2166f18be576SLuigi Rizzo struct nm_bridge *b; 2167f18be576SLuigi Rizzo struct netmap_adapter *na; 2168f18be576SLuigi Rizzo struct ifnet *iter; 2169f18be576SLuigi Rizzo char *name = nmr->nr_name; 2170f18be576SLuigi Rizzo int cmd = nmr->nr_cmd, namelen = strlen(name); 2171f18be576SLuigi Rizzo int error = 0, i, j; 2172f18be576SLuigi Rizzo 2173f18be576SLuigi Rizzo switch (cmd) { 2174f18be576SLuigi Rizzo case NETMAP_BDG_ATTACH: 2175ce3ee1e7SLuigi Rizzo error = nm_bdg_attach(nmr); 2176ce3ee1e7SLuigi Rizzo break; 2177ce3ee1e7SLuigi Rizzo 2178f18be576SLuigi Rizzo case NETMAP_BDG_DETACH: 2179ce3ee1e7SLuigi Rizzo error = nm_bdg_detach(nmr); 2180f18be576SLuigi Rizzo break; 2181f18be576SLuigi Rizzo 2182f18be576SLuigi Rizzo case NETMAP_BDG_LIST: 2183f18be576SLuigi Rizzo /* this is used to enumerate bridges and ports */ 2184f18be576SLuigi Rizzo if (namelen) { /* look up indexes of bridge and port */ 2185f18be576SLuigi Rizzo if (strncmp(name, NM_NAME, strlen(NM_NAME))) { 2186f18be576SLuigi Rizzo error = EINVAL; 2187f18be576SLuigi Rizzo break; 2188f18be576SLuigi Rizzo } 2189ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2190f18be576SLuigi Rizzo b = nm_find_bridge(name, 0 /* don't create */); 2191f18be576SLuigi Rizzo if (!b) { 2192f18be576SLuigi Rizzo error = ENOENT; 2193ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2194f18be576SLuigi Rizzo break; 2195f18be576SLuigi Rizzo } 2196f18be576SLuigi Rizzo 2197f18be576SLuigi Rizzo error = ENOENT; 2198ce3ee1e7SLuigi Rizzo for (j = 0; j < b->bdg_active_ports; j++) { 2199ce3ee1e7SLuigi Rizzo i = b->bdg_port_index[j]; 2200ce3ee1e7SLuigi Rizzo na = b->bdg_ports[i]; 2201ce3ee1e7SLuigi Rizzo if (na == NULL) { 2202ce3ee1e7SLuigi Rizzo D("---AAAAAAAAARGH-------"); 2203f18be576SLuigi Rizzo continue; 2204ce3ee1e7SLuigi Rizzo } 2205f18be576SLuigi Rizzo iter = na->ifp; 2206f18be576SLuigi Rizzo /* the former and the latter identify a 2207f18be576SLuigi Rizzo * virtual port and a NIC, respectively 2208f18be576SLuigi Rizzo */ 2209f18be576SLuigi Rizzo if (!strcmp(iter->if_xname, name) || 2210ce3ee1e7SLuigi Rizzo (namelen > b->bdg_namelen && 2211f18be576SLuigi Rizzo !strcmp(iter->if_xname, 2212ce3ee1e7SLuigi Rizzo name + b->bdg_namelen + 1))) { 2213f18be576SLuigi Rizzo /* bridge index */ 2214f18be576SLuigi Rizzo nmr->nr_arg1 = b - nm_bridges; 2215f18be576SLuigi Rizzo nmr->nr_arg2 = i; /* port index */ 2216f18be576SLuigi Rizzo error = 0; 2217f18be576SLuigi Rizzo break; 2218f18be576SLuigi Rizzo } 2219f18be576SLuigi Rizzo } 2220ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2221f18be576SLuigi Rizzo } else { 2222f18be576SLuigi Rizzo /* return the first non-empty entry starting from 2223f18be576SLuigi Rizzo * bridge nr_arg1 and port nr_arg2. 2224f18be576SLuigi Rizzo * 2225f18be576SLuigi Rizzo * Users can detect the end of the same bridge by 2226f18be576SLuigi Rizzo * seeing the new and old value of nr_arg1, and can 2227f18be576SLuigi Rizzo * detect the end of all the bridge by error != 0 2228f18be576SLuigi Rizzo */ 2229f18be576SLuigi Rizzo i = nmr->nr_arg1; 2230f18be576SLuigi Rizzo j = nmr->nr_arg2; 2231f18be576SLuigi Rizzo 2232ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2233ce3ee1e7SLuigi Rizzo for (error = ENOENT; i < NM_BRIDGES; i++) { 2234f18be576SLuigi Rizzo b = nm_bridges + i; 2235ce3ee1e7SLuigi Rizzo if (j >= b->bdg_active_ports) { 2236ce3ee1e7SLuigi Rizzo j = 0; /* following bridges scan from 0 */ 2237f18be576SLuigi Rizzo continue; 2238ce3ee1e7SLuigi Rizzo } 2239f18be576SLuigi Rizzo nmr->nr_arg1 = i; 2240f18be576SLuigi Rizzo nmr->nr_arg2 = j; 2241ce3ee1e7SLuigi Rizzo j = b->bdg_port_index[j]; 2242ce3ee1e7SLuigi Rizzo na = b->bdg_ports[j]; 2243ce3ee1e7SLuigi Rizzo iter = na->ifp; 2244ce3ee1e7SLuigi Rizzo strncpy(name, iter->if_xname, (size_t)IFNAMSIZ); 2245f18be576SLuigi Rizzo error = 0; 2246f18be576SLuigi Rizzo break; 2247f18be576SLuigi Rizzo } 2248ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2249f18be576SLuigi Rizzo } 2250f18be576SLuigi Rizzo break; 2251f18be576SLuigi Rizzo 2252f18be576SLuigi Rizzo case NETMAP_BDG_LOOKUP_REG: 2253f18be576SLuigi Rizzo /* register a lookup function to the given bridge. 2254f18be576SLuigi Rizzo * nmr->nr_name may be just bridge's name (including ':' 2255f18be576SLuigi Rizzo * if it is not just NM_NAME). 2256f18be576SLuigi Rizzo */ 2257f18be576SLuigi Rizzo if (!func) { 2258f18be576SLuigi Rizzo error = EINVAL; 2259f18be576SLuigi Rizzo break; 2260f18be576SLuigi Rizzo } 2261ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2262f18be576SLuigi Rizzo b = nm_find_bridge(name, 0 /* don't create */); 2263f18be576SLuigi Rizzo if (!b) { 2264f18be576SLuigi Rizzo error = EINVAL; 2265ce3ee1e7SLuigi Rizzo } else { 2266f18be576SLuigi Rizzo b->nm_bdg_lookup = func; 2267ce3ee1e7SLuigi Rizzo } 2268ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2269f18be576SLuigi Rizzo break; 2270ce3ee1e7SLuigi Rizzo 2271f18be576SLuigi Rizzo default: 2272f18be576SLuigi Rizzo D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 2273f18be576SLuigi Rizzo error = EINVAL; 2274f18be576SLuigi Rizzo break; 2275f18be576SLuigi Rizzo } 2276f18be576SLuigi Rizzo return error; 2277f18be576SLuigi Rizzo } 2278f18be576SLuigi Rizzo 2279f18be576SLuigi Rizzo 228068b8534bSLuigi Rizzo /* 228168b8534bSLuigi Rizzo * ioctl(2) support for the "netmap" device. 228268b8534bSLuigi Rizzo * 228368b8534bSLuigi Rizzo * Following a list of accepted commands: 228468b8534bSLuigi Rizzo * - NIOCGINFO 228568b8534bSLuigi Rizzo * - SIOCGIFADDR just for convenience 228668b8534bSLuigi Rizzo * - NIOCREGIF 228768b8534bSLuigi Rizzo * - NIOCUNREGIF 228868b8534bSLuigi Rizzo * - NIOCTXSYNC 228968b8534bSLuigi Rizzo * - NIOCRXSYNC 229068b8534bSLuigi Rizzo * 229168b8534bSLuigi Rizzo * Return 0 on success, errno otherwise. 229268b8534bSLuigi Rizzo */ 229368b8534bSLuigi Rizzo static int 22940b8ed8e0SLuigi Rizzo netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, 22950b8ed8e0SLuigi Rizzo int fflag, struct thread *td) 229668b8534bSLuigi Rizzo { 229768b8534bSLuigi Rizzo struct netmap_priv_d *priv = NULL; 2298ce3ee1e7SLuigi Rizzo struct ifnet *ifp = NULL; 229968b8534bSLuigi Rizzo struct nmreq *nmr = (struct nmreq *) data; 2300ce3ee1e7SLuigi Rizzo struct netmap_adapter *na = NULL; 230168b8534bSLuigi Rizzo int error; 230264ae02c3SLuigi Rizzo u_int i, lim; 230368b8534bSLuigi Rizzo struct netmap_if *nifp; 2304ce3ee1e7SLuigi Rizzo struct netmap_kring *krings; 230568b8534bSLuigi Rizzo 23060b8ed8e0SLuigi Rizzo (void)dev; /* UNUSED */ 23070b8ed8e0SLuigi Rizzo (void)fflag; /* UNUSED */ 2308f196ce38SLuigi Rizzo #ifdef linux 2309f196ce38SLuigi Rizzo #define devfs_get_cdevpriv(pp) \ 2310f196ce38SLuigi Rizzo ({ *(struct netmap_priv_d **)pp = ((struct file *)td)->private_data; \ 2311f196ce38SLuigi Rizzo (*pp ? 0 : ENOENT); }) 2312f196ce38SLuigi Rizzo 2313f196ce38SLuigi Rizzo /* devfs_set_cdevpriv cannot fail on linux */ 2314f196ce38SLuigi Rizzo #define devfs_set_cdevpriv(p, fn) \ 2315f196ce38SLuigi Rizzo ({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); }) 2316f196ce38SLuigi Rizzo 2317f196ce38SLuigi Rizzo 2318f196ce38SLuigi Rizzo #define devfs_clear_cdevpriv() do { \ 2319f196ce38SLuigi Rizzo netmap_dtor(priv); ((struct file *)td)->private_data = 0; \ 2320f196ce38SLuigi Rizzo } while (0) 2321f196ce38SLuigi Rizzo #endif /* linux */ 2322f196ce38SLuigi Rizzo 2323506cc70cSLuigi Rizzo CURVNET_SET(TD_TO_VNET(td)); 2324506cc70cSLuigi Rizzo 232568b8534bSLuigi Rizzo error = devfs_get_cdevpriv((void **)&priv); 23268241616dSLuigi Rizzo if (error) { 2327506cc70cSLuigi Rizzo CURVNET_RESTORE(); 23288241616dSLuigi Rizzo /* XXX ENOENT should be impossible, since the priv 23298241616dSLuigi Rizzo * is now created in the open */ 23308241616dSLuigi Rizzo return (error == ENOENT ? ENXIO : error); 2331506cc70cSLuigi Rizzo } 233268b8534bSLuigi Rizzo 2333f196ce38SLuigi Rizzo nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; /* truncate name */ 233468b8534bSLuigi Rizzo switch (cmd) { 233568b8534bSLuigi Rizzo case NIOCGINFO: /* return capabilities etc */ 233664ae02c3SLuigi Rizzo if (nmr->nr_version != NETMAP_API) { 233764ae02c3SLuigi Rizzo D("API mismatch got %d have %d", 233864ae02c3SLuigi Rizzo nmr->nr_version, NETMAP_API); 233964ae02c3SLuigi Rizzo nmr->nr_version = NETMAP_API; 234064ae02c3SLuigi Rizzo error = EINVAL; 234164ae02c3SLuigi Rizzo break; 234264ae02c3SLuigi Rizzo } 2343f18be576SLuigi Rizzo if (nmr->nr_cmd == NETMAP_BDG_LIST) { 2344f18be576SLuigi Rizzo error = netmap_bdg_ctl(nmr, NULL); 2345f18be576SLuigi Rizzo break; 2346f18be576SLuigi Rizzo } 2347ce3ee1e7SLuigi Rizzo 2348ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2349ce3ee1e7SLuigi Rizzo do { 2350ce3ee1e7SLuigi Rizzo /* memsize is always valid */ 2351ce3ee1e7SLuigi Rizzo struct netmap_mem_d *nmd = &nm_mem; 2352ce3ee1e7SLuigi Rizzo u_int memflags; 2353ce3ee1e7SLuigi Rizzo 2354ce3ee1e7SLuigi Rizzo if (nmr->nr_name[0] != '\0') { 2355ce3ee1e7SLuigi Rizzo /* get a refcount */ 2356ce3ee1e7SLuigi Rizzo error = get_ifp(nmr, &ifp, 1 /* create */); 23578241616dSLuigi Rizzo if (error) 23588241616dSLuigi Rizzo break; 2359ce3ee1e7SLuigi Rizzo na = NA(ifp); /* retrieve the netmap adapter */ 2360ce3ee1e7SLuigi Rizzo nmd = na->nm_mem; /* and its memory allocator */ 2361ce3ee1e7SLuigi Rizzo } 2362ce3ee1e7SLuigi Rizzo 2363ce3ee1e7SLuigi Rizzo error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags); 2364ce3ee1e7SLuigi Rizzo if (error) 2365ce3ee1e7SLuigi Rizzo break; 2366ce3ee1e7SLuigi Rizzo if (na == NULL) /* only memory info */ 2367ce3ee1e7SLuigi Rizzo break; 23688241616dSLuigi Rizzo nmr->nr_offset = 0; 23698241616dSLuigi Rizzo nmr->nr_rx_slots = nmr->nr_tx_slots = 0; 2370ae10d1afSLuigi Rizzo netmap_update_config(na); 2371d76bf4ffSLuigi Rizzo nmr->nr_rx_rings = na->num_rx_rings; 2372d76bf4ffSLuigi Rizzo nmr->nr_tx_rings = na->num_tx_rings; 237364ae02c3SLuigi Rizzo nmr->nr_rx_slots = na->num_rx_desc; 237464ae02c3SLuigi Rizzo nmr->nr_tx_slots = na->num_tx_desc; 2375ce3ee1e7SLuigi Rizzo if (memflags & NETMAP_MEM_PRIVATE) 2376ce3ee1e7SLuigi Rizzo nmr->nr_ringid |= NETMAP_PRIV_MEM; 2377ce3ee1e7SLuigi Rizzo } while (0); 2378ce3ee1e7SLuigi Rizzo if (ifp) 2379f196ce38SLuigi Rizzo nm_if_rele(ifp); /* return the refcount */ 2380ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 238168b8534bSLuigi Rizzo break; 238268b8534bSLuigi Rizzo 238368b8534bSLuigi Rizzo case NIOCREGIF: 238464ae02c3SLuigi Rizzo if (nmr->nr_version != NETMAP_API) { 238564ae02c3SLuigi Rizzo nmr->nr_version = NETMAP_API; 238664ae02c3SLuigi Rizzo error = EINVAL; 238764ae02c3SLuigi Rizzo break; 238864ae02c3SLuigi Rizzo } 2389f18be576SLuigi Rizzo /* possibly attach/detach NIC and VALE switch */ 2390f18be576SLuigi Rizzo i = nmr->nr_cmd; 2391f18be576SLuigi Rizzo if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH) { 2392f18be576SLuigi Rizzo error = netmap_bdg_ctl(nmr, NULL); 2393f18be576SLuigi Rizzo break; 2394f18be576SLuigi Rizzo } else if (i != 0) { 2395f18be576SLuigi Rizzo D("nr_cmd must be 0 not %d", i); 2396f18be576SLuigi Rizzo error = EINVAL; 2397f18be576SLuigi Rizzo break; 2398f18be576SLuigi Rizzo } 2399f18be576SLuigi Rizzo 24008241616dSLuigi Rizzo /* protect access to priv from concurrent NIOCREGIF */ 2401ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2402ce3ee1e7SLuigi Rizzo do { 2403ce3ee1e7SLuigi Rizzo u_int memflags; 2404ce3ee1e7SLuigi Rizzo 24058241616dSLuigi Rizzo if (priv->np_ifp != NULL) { /* thread already registered */ 2406506cc70cSLuigi Rizzo error = netmap_set_ringid(priv, nmr->nr_ringid); 2407506cc70cSLuigi Rizzo break; 2408506cc70cSLuigi Rizzo } 240968b8534bSLuigi Rizzo /* find the interface and a reference */ 2410ce3ee1e7SLuigi Rizzo error = get_ifp(nmr, &ifp, 1 /* create */); /* keep reference */ 241168b8534bSLuigi Rizzo if (error) 2412ce3ee1e7SLuigi Rizzo break; 2413ce3ee1e7SLuigi Rizzo if (NETMAP_OWNED_BY_KERN(ifp)) { 2414f18be576SLuigi Rizzo nm_if_rele(ifp); 2415ce3ee1e7SLuigi Rizzo error = EBUSY; 2416ce3ee1e7SLuigi Rizzo break; 2417f196ce38SLuigi Rizzo } 2418f18be576SLuigi Rizzo nifp = netmap_do_regif(priv, ifp, nmr->nr_ringid, &error); 2419f18be576SLuigi Rizzo if (!nifp) { /* reg. failed, release priv and ref */ 2420f196ce38SLuigi Rizzo nm_if_rele(ifp); /* return the refcount */ 24218241616dSLuigi Rizzo priv->np_ifp = NULL; 24228241616dSLuigi Rizzo priv->np_nifp = NULL; 2423ce3ee1e7SLuigi Rizzo break; 242468b8534bSLuigi Rizzo } 242568b8534bSLuigi Rizzo 242668b8534bSLuigi Rizzo /* return the offset of the netmap_if object */ 2427f18be576SLuigi Rizzo na = NA(ifp); /* retrieve netmap adapter */ 2428d76bf4ffSLuigi Rizzo nmr->nr_rx_rings = na->num_rx_rings; 2429d76bf4ffSLuigi Rizzo nmr->nr_tx_rings = na->num_tx_rings; 243064ae02c3SLuigi Rizzo nmr->nr_rx_slots = na->num_rx_desc; 243164ae02c3SLuigi Rizzo nmr->nr_tx_slots = na->num_tx_desc; 2432ce3ee1e7SLuigi Rizzo error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags); 2433ce3ee1e7SLuigi Rizzo if (error) { 2434ce3ee1e7SLuigi Rizzo nm_if_rele(ifp); 2435ce3ee1e7SLuigi Rizzo break; 2436ce3ee1e7SLuigi Rizzo } 2437ce3ee1e7SLuigi Rizzo if (memflags & NETMAP_MEM_PRIVATE) { 2438ce3ee1e7SLuigi Rizzo nmr->nr_ringid |= NETMAP_PRIV_MEM; 24393d819cb6SLuigi Rizzo *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; 2440ce3ee1e7SLuigi Rizzo } 2441ce3ee1e7SLuigi Rizzo nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp); 2442ce3ee1e7SLuigi Rizzo } while (0); 2443ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 244468b8534bSLuigi Rizzo break; 244568b8534bSLuigi Rizzo 244668b8534bSLuigi Rizzo case NIOCUNREGIF: 24478241616dSLuigi Rizzo // XXX we have no data here ? 24488241616dSLuigi Rizzo D("deprecated, data is %p", nmr); 24498241616dSLuigi Rizzo error = EINVAL; 245068b8534bSLuigi Rizzo break; 245168b8534bSLuigi Rizzo 245268b8534bSLuigi Rizzo case NIOCTXSYNC: 245368b8534bSLuigi Rizzo case NIOCRXSYNC: 24548241616dSLuigi Rizzo nifp = priv->np_nifp; 24558241616dSLuigi Rizzo 24568241616dSLuigi Rizzo if (nifp == NULL) { 2457506cc70cSLuigi Rizzo error = ENXIO; 2458506cc70cSLuigi Rizzo break; 2459506cc70cSLuigi Rizzo } 24608241616dSLuigi Rizzo rmb(); /* make sure following reads are not from cache */ 24618241616dSLuigi Rizzo 246268b8534bSLuigi Rizzo ifp = priv->np_ifp; /* we have a reference */ 24638241616dSLuigi Rizzo 24648241616dSLuigi Rizzo if (ifp == NULL) { 24658241616dSLuigi Rizzo D("Internal error: nifp != NULL && ifp == NULL"); 24668241616dSLuigi Rizzo error = ENXIO; 24678241616dSLuigi Rizzo break; 24688241616dSLuigi Rizzo } 24698241616dSLuigi Rizzo 247068b8534bSLuigi Rizzo na = NA(ifp); /* retrieve netmap adapter */ 247164ae02c3SLuigi Rizzo if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */ 247268b8534bSLuigi Rizzo if (cmd == NIOCTXSYNC) 2473ce3ee1e7SLuigi Rizzo netmap_txsync_to_host(na); 247468b8534bSLuigi Rizzo else 2475ce3ee1e7SLuigi Rizzo netmap_rxsync_from_host(na, NULL, NULL); 2476506cc70cSLuigi Rizzo break; 247768b8534bSLuigi Rizzo } 247864ae02c3SLuigi Rizzo /* find the last ring to scan */ 247964ae02c3SLuigi Rizzo lim = priv->np_qlast; 248064ae02c3SLuigi Rizzo if (lim == NETMAP_HW_RING) 24813c0caf6cSLuigi Rizzo lim = (cmd == NIOCTXSYNC) ? 2482d76bf4ffSLuigi Rizzo na->num_tx_rings : na->num_rx_rings; 248368b8534bSLuigi Rizzo 2484ce3ee1e7SLuigi Rizzo krings = (cmd == NIOCTXSYNC) ? na->tx_rings : na->rx_rings; 248564ae02c3SLuigi Rizzo for (i = priv->np_qfirst; i < lim; i++) { 2486ce3ee1e7SLuigi Rizzo struct netmap_kring *kring = krings + i; 2487ce3ee1e7SLuigi Rizzo if (nm_kr_tryget(kring)) { 2488ce3ee1e7SLuigi Rizzo error = EBUSY; 2489ce3ee1e7SLuigi Rizzo goto out; 2490ce3ee1e7SLuigi Rizzo } 249168b8534bSLuigi Rizzo if (cmd == NIOCTXSYNC) { 249268b8534bSLuigi Rizzo if (netmap_verbose & NM_VERB_TXSYNC) 24933c0caf6cSLuigi Rizzo D("pre txsync ring %d cur %d hwcur %d", 249468b8534bSLuigi Rizzo i, kring->ring->cur, 249568b8534bSLuigi Rizzo kring->nr_hwcur); 2496ce3ee1e7SLuigi Rizzo na->nm_txsync(ifp, i, NAF_FORCE_RECLAIM); 249768b8534bSLuigi Rizzo if (netmap_verbose & NM_VERB_TXSYNC) 24983c0caf6cSLuigi Rizzo D("post txsync ring %d cur %d hwcur %d", 249968b8534bSLuigi Rizzo i, kring->ring->cur, 250068b8534bSLuigi Rizzo kring->nr_hwcur); 250168b8534bSLuigi Rizzo } else { 2502ce3ee1e7SLuigi Rizzo na->nm_rxsync(ifp, i, NAF_FORCE_READ); 250368b8534bSLuigi Rizzo microtime(&na->rx_rings[i].ring->ts); 250468b8534bSLuigi Rizzo } 2505ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 250668b8534bSLuigi Rizzo } 250768b8534bSLuigi Rizzo 250868b8534bSLuigi Rizzo break; 250968b8534bSLuigi Rizzo 2510f196ce38SLuigi Rizzo #ifdef __FreeBSD__ 251168b8534bSLuigi Rizzo case BIOCIMMEDIATE: 251268b8534bSLuigi Rizzo case BIOCGHDRCMPLT: 251368b8534bSLuigi Rizzo case BIOCSHDRCMPLT: 251468b8534bSLuigi Rizzo case BIOCSSEESENT: 251568b8534bSLuigi Rizzo D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT"); 251668b8534bSLuigi Rizzo break; 251768b8534bSLuigi Rizzo 2518babc7c12SLuigi Rizzo default: /* allow device-specific ioctls */ 251968b8534bSLuigi Rizzo { 252068b8534bSLuigi Rizzo struct socket so; 2521ce3ee1e7SLuigi Rizzo 252268b8534bSLuigi Rizzo bzero(&so, sizeof(so)); 2523ce3ee1e7SLuigi Rizzo NMG_LOCK(); 2524ce3ee1e7SLuigi Rizzo error = get_ifp(nmr, &ifp, 0 /* don't create */); /* keep reference */ 2525ce3ee1e7SLuigi Rizzo if (error) { 2526ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 252768b8534bSLuigi Rizzo break; 2528ce3ee1e7SLuigi Rizzo } 252968b8534bSLuigi Rizzo so.so_vnet = ifp->if_vnet; 253068b8534bSLuigi Rizzo // so->so_proto not null. 253168b8534bSLuigi Rizzo error = ifioctl(&so, cmd, data, td); 2532f196ce38SLuigi Rizzo nm_if_rele(ifp); 2533ce3ee1e7SLuigi Rizzo NMG_UNLOCK(); 2534babc7c12SLuigi Rizzo break; 253568b8534bSLuigi Rizzo } 2536f196ce38SLuigi Rizzo 2537f196ce38SLuigi Rizzo #else /* linux */ 2538f196ce38SLuigi Rizzo default: 2539f196ce38SLuigi Rizzo error = EOPNOTSUPP; 2540f196ce38SLuigi Rizzo #endif /* linux */ 254168b8534bSLuigi Rizzo } 2542ce3ee1e7SLuigi Rizzo out: 254368b8534bSLuigi Rizzo 2544506cc70cSLuigi Rizzo CURVNET_RESTORE(); 254568b8534bSLuigi Rizzo return (error); 254668b8534bSLuigi Rizzo } 254768b8534bSLuigi Rizzo 254868b8534bSLuigi Rizzo 254968b8534bSLuigi Rizzo /* 255068b8534bSLuigi Rizzo * select(2) and poll(2) handlers for the "netmap" device. 255168b8534bSLuigi Rizzo * 255268b8534bSLuigi Rizzo * Can be called for one or more queues. 255368b8534bSLuigi Rizzo * Return true the event mask corresponding to ready events. 255468b8534bSLuigi Rizzo * If there are no ready events, do a selrecord on either individual 2555ce3ee1e7SLuigi Rizzo * selinfo or on the global one. 255668b8534bSLuigi Rizzo * Device-dependent parts (locking and sync of tx/rx rings) 255768b8534bSLuigi Rizzo * are done through callbacks. 2558f196ce38SLuigi Rizzo * 255901c7d25fSLuigi Rizzo * On linux, arguments are really pwait, the poll table, and 'td' is struct file * 256001c7d25fSLuigi Rizzo * The first one is remapped to pwait as selrecord() uses the name as an 256101c7d25fSLuigi Rizzo * hidden argument. 256268b8534bSLuigi Rizzo */ 256368b8534bSLuigi Rizzo static int 256401c7d25fSLuigi Rizzo netmap_poll(struct cdev *dev, int events, struct thread *td) 256568b8534bSLuigi Rizzo { 256668b8534bSLuigi Rizzo struct netmap_priv_d *priv = NULL; 256768b8534bSLuigi Rizzo struct netmap_adapter *na; 256868b8534bSLuigi Rizzo struct ifnet *ifp; 256968b8534bSLuigi Rizzo struct netmap_kring *kring; 2570954dca4cSLuigi Rizzo u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0; 2571091fd0abSLuigi Rizzo u_int lim_tx, lim_rx, host_forwarded = 0; 2572091fd0abSLuigi Rizzo struct mbq q = { NULL, NULL, 0 }; 257301c7d25fSLuigi Rizzo void *pwait = dev; /* linux compatibility */ 257401c7d25fSLuigi Rizzo 2575ce3ee1e7SLuigi Rizzo int retry_tx = 1; 2576ce3ee1e7SLuigi Rizzo 257701c7d25fSLuigi Rizzo (void)pwait; 257868b8534bSLuigi Rizzo 257968b8534bSLuigi Rizzo if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL) 258068b8534bSLuigi Rizzo return POLLERR; 258168b8534bSLuigi Rizzo 25828241616dSLuigi Rizzo if (priv->np_nifp == NULL) { 25838241616dSLuigi Rizzo D("No if registered"); 25848241616dSLuigi Rizzo return POLLERR; 25858241616dSLuigi Rizzo } 25868241616dSLuigi Rizzo rmb(); /* make sure following reads are not from cache */ 25878241616dSLuigi Rizzo 258868b8534bSLuigi Rizzo ifp = priv->np_ifp; 258968b8534bSLuigi Rizzo // XXX check for deleting() ? 259068b8534bSLuigi Rizzo if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) 259168b8534bSLuigi Rizzo return POLLERR; 259268b8534bSLuigi Rizzo 259368b8534bSLuigi Rizzo if (netmap_verbose & 0x8000) 259468b8534bSLuigi Rizzo D("device %s events 0x%x", ifp->if_xname, events); 259568b8534bSLuigi Rizzo want_tx = events & (POLLOUT | POLLWRNORM); 259668b8534bSLuigi Rizzo want_rx = events & (POLLIN | POLLRDNORM); 259768b8534bSLuigi Rizzo 259868b8534bSLuigi Rizzo na = NA(ifp); /* retrieve netmap adapter */ 259968b8534bSLuigi Rizzo 2600d76bf4ffSLuigi Rizzo lim_tx = na->num_tx_rings; 2601d76bf4ffSLuigi Rizzo lim_rx = na->num_rx_rings; 2602ce3ee1e7SLuigi Rizzo 260364ae02c3SLuigi Rizzo if (priv->np_qfirst == NETMAP_SW_RING) { 2604ce3ee1e7SLuigi Rizzo /* handle the host stack ring */ 260568b8534bSLuigi Rizzo if (priv->np_txpoll || want_tx) { 260668b8534bSLuigi Rizzo /* push any packets up, then we are always ready */ 2607ce3ee1e7SLuigi Rizzo netmap_txsync_to_host(na); 260868b8534bSLuigi Rizzo revents |= want_tx; 260968b8534bSLuigi Rizzo } 261068b8534bSLuigi Rizzo if (want_rx) { 261164ae02c3SLuigi Rizzo kring = &na->rx_rings[lim_rx]; 261268b8534bSLuigi Rizzo if (kring->ring->avail == 0) 2613ce3ee1e7SLuigi Rizzo netmap_rxsync_from_host(na, td, dev); 261468b8534bSLuigi Rizzo if (kring->ring->avail > 0) { 261568b8534bSLuigi Rizzo revents |= want_rx; 261668b8534bSLuigi Rizzo } 261768b8534bSLuigi Rizzo } 261868b8534bSLuigi Rizzo return (revents); 261968b8534bSLuigi Rizzo } 262068b8534bSLuigi Rizzo 2621091fd0abSLuigi Rizzo /* if we are in transparent mode, check also the host rx ring */ 2622091fd0abSLuigi Rizzo kring = &na->rx_rings[lim_rx]; 2623091fd0abSLuigi Rizzo if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all 2624091fd0abSLuigi Rizzo && want_rx 2625091fd0abSLuigi Rizzo && (netmap_fwd || kring->ring->flags & NR_FORWARD) ) { 2626091fd0abSLuigi Rizzo if (kring->ring->avail == 0) 2627ce3ee1e7SLuigi Rizzo netmap_rxsync_from_host(na, td, dev); 2628091fd0abSLuigi Rizzo if (kring->ring->avail > 0) 2629091fd0abSLuigi Rizzo revents |= want_rx; 2630091fd0abSLuigi Rizzo } 2631091fd0abSLuigi Rizzo 263268b8534bSLuigi Rizzo /* 2633ce3ee1e7SLuigi Rizzo * check_all is set if the card has more than one queue AND 263468b8534bSLuigi Rizzo * the client is polling all of them. If true, we sleep on 2635ce3ee1e7SLuigi Rizzo * the "global" selinfo, otherwise we sleep on individual selinfo 2636ce3ee1e7SLuigi Rizzo * (FreeBSD only allows two selinfo's per file descriptor). 2637ce3ee1e7SLuigi Rizzo * The interrupt routine in the driver wake one or the other 2638ce3ee1e7SLuigi Rizzo * (or both) depending on which clients are active. 263968b8534bSLuigi Rizzo * 264068b8534bSLuigi Rizzo * rxsync() is only called if we run out of buffers on a POLLIN. 264168b8534bSLuigi Rizzo * txsync() is called if we run out of buffers on POLLOUT, or 264268b8534bSLuigi Rizzo * there are pending packets to send. The latter can be disabled 264368b8534bSLuigi Rizzo * passing NETMAP_NO_TX_POLL in the NIOCREG call. 264468b8534bSLuigi Rizzo */ 2645954dca4cSLuigi Rizzo check_all_tx = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1); 2646954dca4cSLuigi Rizzo check_all_rx = (priv->np_qlast == NETMAP_HW_RING) && (lim_rx > 1); 264768b8534bSLuigi Rizzo 264864ae02c3SLuigi Rizzo if (priv->np_qlast != NETMAP_HW_RING) { 264964ae02c3SLuigi Rizzo lim_tx = lim_rx = priv->np_qlast; 265064ae02c3SLuigi Rizzo } 265164ae02c3SLuigi Rizzo 265268b8534bSLuigi Rizzo /* 265368b8534bSLuigi Rizzo * We start with a lock free round which is good if we have 265468b8534bSLuigi Rizzo * data available. If this fails, then lock and call the sync 265568b8534bSLuigi Rizzo * routines. 265668b8534bSLuigi Rizzo */ 265764ae02c3SLuigi Rizzo for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) { 265868b8534bSLuigi Rizzo kring = &na->rx_rings[i]; 265968b8534bSLuigi Rizzo if (kring->ring->avail > 0) { 266068b8534bSLuigi Rizzo revents |= want_rx; 266168b8534bSLuigi Rizzo want_rx = 0; /* also breaks the loop */ 266268b8534bSLuigi Rizzo } 266368b8534bSLuigi Rizzo } 266464ae02c3SLuigi Rizzo for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) { 266568b8534bSLuigi Rizzo kring = &na->tx_rings[i]; 266668b8534bSLuigi Rizzo if (kring->ring->avail > 0) { 266768b8534bSLuigi Rizzo revents |= want_tx; 266868b8534bSLuigi Rizzo want_tx = 0; /* also breaks the loop */ 266968b8534bSLuigi Rizzo } 267068b8534bSLuigi Rizzo } 267168b8534bSLuigi Rizzo 267268b8534bSLuigi Rizzo /* 267368b8534bSLuigi Rizzo * If we to push packets out (priv->np_txpoll) or want_tx is 267468b8534bSLuigi Rizzo * still set, we do need to run the txsync calls (on all rings, 267568b8534bSLuigi Rizzo * to avoid that the tx rings stall). 267668b8534bSLuigi Rizzo */ 267768b8534bSLuigi Rizzo if (priv->np_txpoll || want_tx) { 2678ce3ee1e7SLuigi Rizzo /* If we really want to be woken up (want_tx), 2679ce3ee1e7SLuigi Rizzo * do a selrecord, either on the global or on 2680ce3ee1e7SLuigi Rizzo * the private structure. Then issue the txsync 2681ce3ee1e7SLuigi Rizzo * so there is no race in the selrecord/selwait 2682ce3ee1e7SLuigi Rizzo */ 2683091fd0abSLuigi Rizzo flush_tx: 268464ae02c3SLuigi Rizzo for (i = priv->np_qfirst; i < lim_tx; i++) { 268568b8534bSLuigi Rizzo kring = &na->tx_rings[i]; 26865819da83SLuigi Rizzo /* 2687ce3ee1e7SLuigi Rizzo * Skip this ring if want_tx == 0 26885819da83SLuigi Rizzo * (we have already done a successful sync on 26895819da83SLuigi Rizzo * a previous ring) AND kring->cur == kring->hwcur 26905819da83SLuigi Rizzo * (there are no pending transmissions for this ring). 26915819da83SLuigi Rizzo */ 269268b8534bSLuigi Rizzo if (!want_tx && kring->ring->cur == kring->nr_hwcur) 269368b8534bSLuigi Rizzo continue; 2694ce3ee1e7SLuigi Rizzo /* make sure only one user thread is doing this */ 2695ce3ee1e7SLuigi Rizzo if (nm_kr_tryget(kring)) { 2696ce3ee1e7SLuigi Rizzo ND("ring %p busy is %d", kring, (int)kring->nr_busy); 2697ce3ee1e7SLuigi Rizzo revents |= POLLERR; 2698ce3ee1e7SLuigi Rizzo goto out; 269968b8534bSLuigi Rizzo } 2700ce3ee1e7SLuigi Rizzo 270168b8534bSLuigi Rizzo if (netmap_verbose & NM_VERB_TXSYNC) 270268b8534bSLuigi Rizzo D("send %d on %s %d", 2703ce3ee1e7SLuigi Rizzo kring->ring->cur, ifp->if_xname, i); 2704ce3ee1e7SLuigi Rizzo if (na->nm_txsync(ifp, i, 0)) 270568b8534bSLuigi Rizzo revents |= POLLERR; 270668b8534bSLuigi Rizzo 27075819da83SLuigi Rizzo /* Check avail/call selrecord only if called with POLLOUT */ 270868b8534bSLuigi Rizzo if (want_tx) { 270968b8534bSLuigi Rizzo if (kring->ring->avail > 0) { 271068b8534bSLuigi Rizzo /* stop at the first ring. We don't risk 271168b8534bSLuigi Rizzo * starvation. 271268b8534bSLuigi Rizzo */ 271368b8534bSLuigi Rizzo revents |= want_tx; 271468b8534bSLuigi Rizzo want_tx = 0; 271568b8534bSLuigi Rizzo } 2716ce3ee1e7SLuigi Rizzo } 2717ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 2718ce3ee1e7SLuigi Rizzo } 2719ce3ee1e7SLuigi Rizzo if (want_tx && retry_tx) { 2720954dca4cSLuigi Rizzo selrecord(td, check_all_tx ? 2721ce3ee1e7SLuigi Rizzo &na->tx_si : &na->tx_rings[priv->np_qfirst].si); 2722ce3ee1e7SLuigi Rizzo retry_tx = 0; 2723ce3ee1e7SLuigi Rizzo goto flush_tx; 272468b8534bSLuigi Rizzo } 272568b8534bSLuigi Rizzo } 272668b8534bSLuigi Rizzo 272768b8534bSLuigi Rizzo /* 272868b8534bSLuigi Rizzo * now if want_rx is still set we need to lock and rxsync. 272968b8534bSLuigi Rizzo * Do it on all rings because otherwise we starve. 273068b8534bSLuigi Rizzo */ 273168b8534bSLuigi Rizzo if (want_rx) { 2732ce3ee1e7SLuigi Rizzo int retry_rx = 1; 2733ce3ee1e7SLuigi Rizzo do_retry_rx: 273464ae02c3SLuigi Rizzo for (i = priv->np_qfirst; i < lim_rx; i++) { 273568b8534bSLuigi Rizzo kring = &na->rx_rings[i]; 2736ce3ee1e7SLuigi Rizzo 2737ce3ee1e7SLuigi Rizzo if (nm_kr_tryget(kring)) { 2738ce3ee1e7SLuigi Rizzo revents |= POLLERR; 2739ce3ee1e7SLuigi Rizzo goto out; 274068b8534bSLuigi Rizzo } 2741ce3ee1e7SLuigi Rizzo 2742ce3ee1e7SLuigi Rizzo /* XXX NR_FORWARD should only be read on 2743ce3ee1e7SLuigi Rizzo * physical or NIC ports 2744ce3ee1e7SLuigi Rizzo */ 2745091fd0abSLuigi Rizzo if (netmap_fwd ||kring->ring->flags & NR_FORWARD) { 2746091fd0abSLuigi Rizzo ND(10, "forwarding some buffers up %d to %d", 2747091fd0abSLuigi Rizzo kring->nr_hwcur, kring->ring->cur); 2748091fd0abSLuigi Rizzo netmap_grab_packets(kring, &q, netmap_fwd); 2749091fd0abSLuigi Rizzo } 275068b8534bSLuigi Rizzo 2751ce3ee1e7SLuigi Rizzo if (na->nm_rxsync(ifp, i, 0)) 275268b8534bSLuigi Rizzo revents |= POLLERR; 27535819da83SLuigi Rizzo if (netmap_no_timestamp == 0 || 27545819da83SLuigi Rizzo kring->ring->flags & NR_TIMESTAMP) { 275568b8534bSLuigi Rizzo microtime(&kring->ring->ts); 27565819da83SLuigi Rizzo } 275768b8534bSLuigi Rizzo 2758ce3ee1e7SLuigi Rizzo if (kring->ring->avail > 0) { 275968b8534bSLuigi Rizzo revents |= want_rx; 2760ce3ee1e7SLuigi Rizzo retry_rx = 0; 276168b8534bSLuigi Rizzo } 2762ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 276368b8534bSLuigi Rizzo } 2764ce3ee1e7SLuigi Rizzo if (retry_rx) { 2765ce3ee1e7SLuigi Rizzo retry_rx = 0; 2766954dca4cSLuigi Rizzo selrecord(td, check_all_rx ? 2767ce3ee1e7SLuigi Rizzo &na->rx_si : &na->rx_rings[priv->np_qfirst].si); 2768ce3ee1e7SLuigi Rizzo goto do_retry_rx; 2769ce3ee1e7SLuigi Rizzo } 277068b8534bSLuigi Rizzo } 2771091fd0abSLuigi Rizzo 2772ce3ee1e7SLuigi Rizzo /* forward host to the netmap ring. 2773ce3ee1e7SLuigi Rizzo * I am accessing nr_hwavail without lock, but netmap_transmit 2774ce3ee1e7SLuigi Rizzo * can only increment it, so the operation is safe. 2775ce3ee1e7SLuigi Rizzo */ 2776091fd0abSLuigi Rizzo kring = &na->rx_rings[lim_rx]; 2777091fd0abSLuigi Rizzo if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all 2778091fd0abSLuigi Rizzo && (netmap_fwd || kring->ring->flags & NR_FORWARD) 2779091fd0abSLuigi Rizzo && kring->nr_hwavail > 0 && !host_forwarded) { 2780091fd0abSLuigi Rizzo netmap_sw_to_nic(na); 2781091fd0abSLuigi Rizzo host_forwarded = 1; /* prevent another pass */ 2782091fd0abSLuigi Rizzo want_rx = 0; 2783091fd0abSLuigi Rizzo goto flush_tx; 2784091fd0abSLuigi Rizzo } 2785091fd0abSLuigi Rizzo 2786091fd0abSLuigi Rizzo if (q.head) 2787091fd0abSLuigi Rizzo netmap_send_up(na->ifp, q.head); 278868b8534bSLuigi Rizzo 2789ce3ee1e7SLuigi Rizzo out: 2790ce3ee1e7SLuigi Rizzo 279168b8534bSLuigi Rizzo return (revents); 279268b8534bSLuigi Rizzo } 279368b8534bSLuigi Rizzo 279468b8534bSLuigi Rizzo /*------- driver support routines ------*/ 279568b8534bSLuigi Rizzo 2796f18be576SLuigi Rizzo 279768b8534bSLuigi Rizzo /* 279868b8534bSLuigi Rizzo * Initialize a ``netmap_adapter`` object created by driver on attach. 279968b8534bSLuigi Rizzo * We allocate a block of memory with room for a struct netmap_adapter 280068b8534bSLuigi Rizzo * plus two sets of N+2 struct netmap_kring (where N is the number 280168b8534bSLuigi Rizzo * of hardware rings): 280268b8534bSLuigi Rizzo * krings 0..N-1 are for the hardware queues. 280368b8534bSLuigi Rizzo * kring N is for the host stack queue 280468b8534bSLuigi Rizzo * kring N+1 is only used for the selinfo for all queues. 280568b8534bSLuigi Rizzo * Return 0 on success, ENOMEM otherwise. 280664ae02c3SLuigi Rizzo * 28070bf88954SEd Maste * By default the receive and transmit adapter ring counts are both initialized 28080bf88954SEd Maste * to num_queues. na->num_tx_rings can be set for cards with different tx/rx 280924e57ec9SEd Maste * setups. 281068b8534bSLuigi Rizzo */ 281168b8534bSLuigi Rizzo int 2812ce3ee1e7SLuigi Rizzo netmap_attach(struct netmap_adapter *arg, u_int num_queues) 281368b8534bSLuigi Rizzo { 2814ae10d1afSLuigi Rizzo struct netmap_adapter *na = NULL; 2815ae10d1afSLuigi Rizzo struct ifnet *ifp = arg ? arg->ifp : NULL; 2816ce3ee1e7SLuigi Rizzo size_t len; 281768b8534bSLuigi Rizzo 2818ae10d1afSLuigi Rizzo if (arg == NULL || ifp == NULL) 2819ae10d1afSLuigi Rizzo goto fail; 2820ce3ee1e7SLuigi Rizzo /* a VALE port uses two endpoints */ 2821f18be576SLuigi Rizzo len = nma_is_vp(arg) ? sizeof(*na) : sizeof(*na) * 2; 2822f18be576SLuigi Rizzo na = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); 2823ae10d1afSLuigi Rizzo if (na == NULL) 2824ae10d1afSLuigi Rizzo goto fail; 2825ae10d1afSLuigi Rizzo WNA(ifp) = na; 2826ae10d1afSLuigi Rizzo *na = *arg; /* copy everything, trust the driver to not pass junk */ 2827ae10d1afSLuigi Rizzo NETMAP_SET_CAPABLE(ifp); 2828d76bf4ffSLuigi Rizzo if (na->num_tx_rings == 0) 2829d76bf4ffSLuigi Rizzo na->num_tx_rings = num_queues; 2830d76bf4ffSLuigi Rizzo na->num_rx_rings = num_queues; 2831ae10d1afSLuigi Rizzo na->refcount = na->na_single = na->na_multi = 0; 2832ae10d1afSLuigi Rizzo /* Core lock initialized here, others after netmap_if_new. */ 2833ae10d1afSLuigi Rizzo mtx_init(&na->core_lock, "netmap core lock", MTX_NETWORK_LOCK, MTX_DEF); 283464ae02c3SLuigi Rizzo #ifdef linux 2835f18be576SLuigi Rizzo if (ifp->netdev_ops) { 2836f18be576SLuigi Rizzo ND("netdev_ops %p", ifp->netdev_ops); 2837f18be576SLuigi Rizzo /* prepare a clone of the netdev ops */ 2838f18be576SLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) 2839f18be576SLuigi Rizzo na->nm_ndo.ndo_start_xmit = ifp->netdev_ops; 2840f18be576SLuigi Rizzo #else 2841849bec0eSLuigi Rizzo na->nm_ndo = *ifp->netdev_ops; 2842f18be576SLuigi Rizzo #endif 2843f18be576SLuigi Rizzo } 2844ce3ee1e7SLuigi Rizzo na->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit; 2845ce3ee1e7SLuigi Rizzo #endif /* linux */ 2846ce3ee1e7SLuigi Rizzo na->nm_mem = arg->nm_mem ? arg->nm_mem : &nm_mem; 2847f18be576SLuigi Rizzo if (!nma_is_vp(arg)) 2848f18be576SLuigi Rizzo netmap_attach_sw(ifp); 2849ae10d1afSLuigi Rizzo D("success for %s", ifp->if_xname); 2850ae10d1afSLuigi Rizzo return 0; 285168b8534bSLuigi Rizzo 2852ae10d1afSLuigi Rizzo fail: 2853ae10d1afSLuigi Rizzo D("fail, arg %p ifp %p na %p", arg, ifp, na); 2854849bec0eSLuigi Rizzo netmap_detach(ifp); 2855ae10d1afSLuigi Rizzo return (na ? EINVAL : ENOMEM); 285668b8534bSLuigi Rizzo } 285768b8534bSLuigi Rizzo 285868b8534bSLuigi Rizzo 285968b8534bSLuigi Rizzo /* 286068b8534bSLuigi Rizzo * Free the allocated memory linked to the given ``netmap_adapter`` 286168b8534bSLuigi Rizzo * object. 286268b8534bSLuigi Rizzo */ 286368b8534bSLuigi Rizzo void 286468b8534bSLuigi Rizzo netmap_detach(struct ifnet *ifp) 286568b8534bSLuigi Rizzo { 286668b8534bSLuigi Rizzo struct netmap_adapter *na = NA(ifp); 286768b8534bSLuigi Rizzo 286868b8534bSLuigi Rizzo if (!na) 286968b8534bSLuigi Rizzo return; 287068b8534bSLuigi Rizzo 28712f70fca5SEd Maste mtx_destroy(&na->core_lock); 28722f70fca5SEd Maste 2873ae10d1afSLuigi Rizzo if (na->tx_rings) { /* XXX should not happen */ 2874ae10d1afSLuigi Rizzo D("freeing leftover tx_rings"); 2875ae10d1afSLuigi Rizzo free(na->tx_rings, M_DEVBUF); 2876ae10d1afSLuigi Rizzo } 2877ce3ee1e7SLuigi Rizzo if (na->na_flags & NAF_MEM_OWNER) 2878ce3ee1e7SLuigi Rizzo netmap_mem_private_delete(na->nm_mem); 287968b8534bSLuigi Rizzo bzero(na, sizeof(*na)); 2880d0c7b075SLuigi Rizzo WNA(ifp) = NULL; 288168b8534bSLuigi Rizzo free(na, M_DEVBUF); 288268b8534bSLuigi Rizzo } 288368b8534bSLuigi Rizzo 288468b8534bSLuigi Rizzo 2885f18be576SLuigi Rizzo int 2886ce3ee1e7SLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 2887ce3ee1e7SLuigi Rizzo struct netmap_adapter *na, u_int ring_nr); 2888f18be576SLuigi Rizzo 2889f18be576SLuigi Rizzo 289068b8534bSLuigi Rizzo /* 289102ad4083SLuigi Rizzo * Intercept packets from the network stack and pass them 289202ad4083SLuigi Rizzo * to netmap as incoming packets on the 'software' ring. 2893ce3ee1e7SLuigi Rizzo * We rely on the OS to make sure that the ifp and na do not go 2894ce3ee1e7SLuigi Rizzo * away (typically the caller checks for IFF_DRV_RUNNING or the like). 2895ce3ee1e7SLuigi Rizzo * In nm_register() or whenever there is a reinitialization, 2896ce3ee1e7SLuigi Rizzo * we make sure to access the core lock and per-ring locks 2897ce3ee1e7SLuigi Rizzo * so that IFCAP_NETMAP is visible here. 289868b8534bSLuigi Rizzo */ 289968b8534bSLuigi Rizzo int 2900ce3ee1e7SLuigi Rizzo netmap_transmit(struct ifnet *ifp, struct mbuf *m) 290168b8534bSLuigi Rizzo { 290268b8534bSLuigi Rizzo struct netmap_adapter *na = NA(ifp); 2903ce3ee1e7SLuigi Rizzo struct netmap_kring *kring; 29041a26580eSLuigi Rizzo u_int i, len = MBUF_LEN(m); 2905ce3ee1e7SLuigi Rizzo u_int error = EBUSY, lim; 290668b8534bSLuigi Rizzo struct netmap_slot *slot; 290768b8534bSLuigi Rizzo 2908ce3ee1e7SLuigi Rizzo // XXX [Linux] we do not need this lock 2909ce3ee1e7SLuigi Rizzo // if we follow the down/configure/up protocol -gl 2910ce3ee1e7SLuigi Rizzo // mtx_lock(&na->core_lock); 2911ce3ee1e7SLuigi Rizzo if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) { 2912ce3ee1e7SLuigi Rizzo /* interface not in netmap mode anymore */ 2913ce3ee1e7SLuigi Rizzo error = ENXIO; 2914ce3ee1e7SLuigi Rizzo goto done; 2915ce3ee1e7SLuigi Rizzo } 2916ce3ee1e7SLuigi Rizzo 2917ce3ee1e7SLuigi Rizzo kring = &na->rx_rings[na->num_rx_rings]; 2918ce3ee1e7SLuigi Rizzo lim = kring->nkr_num_slots - 1; 291968b8534bSLuigi Rizzo if (netmap_verbose & NM_VERB_HOST) 292068b8534bSLuigi Rizzo D("%s packet %d len %d from the stack", ifp->if_xname, 292168b8534bSLuigi Rizzo kring->nr_hwcur + kring->nr_hwavail, len); 2922ce3ee1e7SLuigi Rizzo // XXX reconsider long packets if we handle fragments 2923ce3ee1e7SLuigi Rizzo if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */ 2924849bec0eSLuigi Rizzo D("%s from_host, drop packet size %d > %d", ifp->if_xname, 2925ce3ee1e7SLuigi Rizzo len, NETMAP_BDG_BUF_SIZE(na->nm_mem)); 2926ce3ee1e7SLuigi Rizzo goto done; 2927849bec0eSLuigi Rizzo } 2928ce3ee1e7SLuigi Rizzo if (SWNA(ifp)->na_bdg) { 2929ce3ee1e7SLuigi Rizzo struct nm_bdg_fwd *ft; 2930ce3ee1e7SLuigi Rizzo char *dst; 2931f18be576SLuigi Rizzo 2932ce3ee1e7SLuigi Rizzo na = SWNA(ifp); /* we operate on the host port */ 2933ce3ee1e7SLuigi Rizzo ft = na->rx_rings[0].nkr_ft; 2934ce3ee1e7SLuigi Rizzo dst = BDG_NMB(na->nm_mem, &na->rx_rings[0].ring->slot[0]); 2935ce3ee1e7SLuigi Rizzo 2936ce3ee1e7SLuigi Rizzo /* use slot 0 in the ft, there is nothing queued here */ 2937ce3ee1e7SLuigi Rizzo /* XXX we can save the copy calling m_copydata in nm_bdg_flush, 2938ce3ee1e7SLuigi Rizzo * need a special flag for this. 2939ce3ee1e7SLuigi Rizzo */ 2940ce3ee1e7SLuigi Rizzo m_copydata(m, 0, (int)len, dst); 2941ce3ee1e7SLuigi Rizzo ft->ft_flags = 0; 2942ce3ee1e7SLuigi Rizzo ft->ft_len = len; 2943ce3ee1e7SLuigi Rizzo ft->ft_buf = dst; 2944ce3ee1e7SLuigi Rizzo ft->ft_next = NM_FT_NULL; 2945ce3ee1e7SLuigi Rizzo ft->ft_frags = 1; 2946ce3ee1e7SLuigi Rizzo if (netmap_verbose & NM_VERB_HOST) 2947ce3ee1e7SLuigi Rizzo RD(5, "pkt %p size %d to bridge port %d", 2948ce3ee1e7SLuigi Rizzo dst, len, na->bdg_port); 2949ce3ee1e7SLuigi Rizzo nm_bdg_flush(ft, 1, na, 0); 2950ce3ee1e7SLuigi Rizzo na = NA(ifp); /* back to the regular object/lock */ 2951ce3ee1e7SLuigi Rizzo error = 0; 2952ce3ee1e7SLuigi Rizzo goto done; 2953ce3ee1e7SLuigi Rizzo } 2954ce3ee1e7SLuigi Rizzo 2955ce3ee1e7SLuigi Rizzo /* protect against other instances of netmap_transmit, 2956ce3ee1e7SLuigi Rizzo * and userspace invocations of rxsync(). 2957ce3ee1e7SLuigi Rizzo * XXX could reuse core_lock 2958ce3ee1e7SLuigi Rizzo */ 2959ce3ee1e7SLuigi Rizzo // XXX [Linux] there can be no other instances of netmap_transmit 2960ce3ee1e7SLuigi Rizzo // on this same ring, but we still need this lock to protect 2961ce3ee1e7SLuigi Rizzo // concurrent access from netmap_sw_to_nic() -gl 2962ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 296302ad4083SLuigi Rizzo if (kring->nr_hwavail >= lim) { 29645b248374SLuigi Rizzo if (netmap_verbose) 296568b8534bSLuigi Rizzo D("stack ring %s full\n", ifp->if_xname); 2966ce3ee1e7SLuigi Rizzo } else { 296768b8534bSLuigi Rizzo /* compute the insert position */ 2968ce3ee1e7SLuigi Rizzo i = nm_kr_rxpos(kring); 296968b8534bSLuigi Rizzo slot = &kring->ring->slot[i]; 2970ce3ee1e7SLuigi Rizzo m_copydata(m, 0, (int)len, BDG_NMB(na->nm_mem, slot)); 297168b8534bSLuigi Rizzo slot->len = len; 2972091fd0abSLuigi Rizzo slot->flags = kring->nkr_slot_flags; 297368b8534bSLuigi Rizzo kring->nr_hwavail++; 297468b8534bSLuigi Rizzo if (netmap_verbose & NM_VERB_HOST) 2975d76bf4ffSLuigi Rizzo D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings); 297668b8534bSLuigi Rizzo selwakeuppri(&kring->si, PI_NET); 297768b8534bSLuigi Rizzo error = 0; 2978ce3ee1e7SLuigi Rizzo } 2979ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 2980ce3ee1e7SLuigi Rizzo 298168b8534bSLuigi Rizzo done: 2982ce3ee1e7SLuigi Rizzo // mtx_unlock(&na->core_lock); 298368b8534bSLuigi Rizzo 298468b8534bSLuigi Rizzo /* release the mbuf in either cases of success or failure. As an 298568b8534bSLuigi Rizzo * alternative, put the mbuf in a free list and free the list 298668b8534bSLuigi Rizzo * only when really necessary. 298768b8534bSLuigi Rizzo */ 298868b8534bSLuigi Rizzo m_freem(m); 298968b8534bSLuigi Rizzo 299068b8534bSLuigi Rizzo return (error); 299168b8534bSLuigi Rizzo } 299268b8534bSLuigi Rizzo 299368b8534bSLuigi Rizzo 299468b8534bSLuigi Rizzo /* 299568b8534bSLuigi Rizzo * netmap_reset() is called by the driver routines when reinitializing 299668b8534bSLuigi Rizzo * a ring. The driver is in charge of locking to protect the kring. 299768b8534bSLuigi Rizzo * If netmap mode is not set just return NULL. 299868b8534bSLuigi Rizzo */ 299968b8534bSLuigi Rizzo struct netmap_slot * 3000ce3ee1e7SLuigi Rizzo netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, 300168b8534bSLuigi Rizzo u_int new_cur) 300268b8534bSLuigi Rizzo { 300368b8534bSLuigi Rizzo struct netmap_kring *kring; 3004506cc70cSLuigi Rizzo int new_hwofs, lim; 300568b8534bSLuigi Rizzo 3006ce3ee1e7SLuigi Rizzo if (na == NULL) { 3007ce3ee1e7SLuigi Rizzo D("NULL na, should not happen"); 300868b8534bSLuigi Rizzo return NULL; /* no netmap support here */ 3009ce3ee1e7SLuigi Rizzo } 3010ce3ee1e7SLuigi Rizzo if (!(na->ifp->if_capenable & IFCAP_NETMAP)) { 3011*5864b3a5SLuigi Rizzo ND("interface not in netmap mode"); 301268b8534bSLuigi Rizzo return NULL; /* nothing to reinitialize */ 3013ce3ee1e7SLuigi Rizzo } 301468b8534bSLuigi Rizzo 3015ce3ee1e7SLuigi Rizzo /* XXX note- in the new scheme, we are not guaranteed to be 3016ce3ee1e7SLuigi Rizzo * under lock (e.g. when called on a device reset). 3017ce3ee1e7SLuigi Rizzo * In this case, we should set a flag and do not trust too 3018ce3ee1e7SLuigi Rizzo * much the values. In practice: TODO 3019ce3ee1e7SLuigi Rizzo * - set a RESET flag somewhere in the kring 3020ce3ee1e7SLuigi Rizzo * - do the processing in a conservative way 3021ce3ee1e7SLuigi Rizzo * - let the *sync() fixup at the end. 3022ce3ee1e7SLuigi Rizzo */ 302364ae02c3SLuigi Rizzo if (tx == NR_TX) { 30248241616dSLuigi Rizzo if (n >= na->num_tx_rings) 30258241616dSLuigi Rizzo return NULL; 302664ae02c3SLuigi Rizzo kring = na->tx_rings + n; 3027506cc70cSLuigi Rizzo new_hwofs = kring->nr_hwcur - new_cur; 302864ae02c3SLuigi Rizzo } else { 30298241616dSLuigi Rizzo if (n >= na->num_rx_rings) 30308241616dSLuigi Rizzo return NULL; 303164ae02c3SLuigi Rizzo kring = na->rx_rings + n; 3032506cc70cSLuigi Rizzo new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur; 303364ae02c3SLuigi Rizzo } 303464ae02c3SLuigi Rizzo lim = kring->nkr_num_slots - 1; 3035506cc70cSLuigi Rizzo if (new_hwofs > lim) 3036506cc70cSLuigi Rizzo new_hwofs -= lim + 1; 3037506cc70cSLuigi Rizzo 3038ce3ee1e7SLuigi Rizzo /* Always set the new offset value and realign the ring. */ 3039ce3ee1e7SLuigi Rizzo D("%s hwofs %d -> %d, hwavail %d -> %d", 3040ce3ee1e7SLuigi Rizzo tx == NR_TX ? "TX" : "RX", 3041ce3ee1e7SLuigi Rizzo kring->nkr_hwofs, new_hwofs, 3042ce3ee1e7SLuigi Rizzo kring->nr_hwavail, 3043ce3ee1e7SLuigi Rizzo tx == NR_TX ? lim : kring->nr_hwavail); 3044506cc70cSLuigi Rizzo kring->nkr_hwofs = new_hwofs; 3045506cc70cSLuigi Rizzo if (tx == NR_TX) 3046ce3ee1e7SLuigi Rizzo kring->nr_hwavail = lim; 3047506cc70cSLuigi Rizzo 3048f196ce38SLuigi Rizzo #if 0 // def linux 3049f196ce38SLuigi Rizzo /* XXX check that the mappings are correct */ 3050f196ce38SLuigi Rizzo /* need ring_nr, adapter->pdev, direction */ 3051f196ce38SLuigi Rizzo buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE); 3052f196ce38SLuigi Rizzo if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { 3053f196ce38SLuigi Rizzo D("error mapping rx netmap buffer %d", i); 3054f196ce38SLuigi Rizzo // XXX fix error handling 3055f196ce38SLuigi Rizzo } 3056f196ce38SLuigi Rizzo 3057f196ce38SLuigi Rizzo #endif /* linux */ 305868b8534bSLuigi Rizzo /* 3059ce3ee1e7SLuigi Rizzo * Wakeup on the individual and global selwait 3060506cc70cSLuigi Rizzo * We do the wakeup here, but the ring is not yet reconfigured. 3061506cc70cSLuigi Rizzo * However, we are under lock so there are no races. 306268b8534bSLuigi Rizzo */ 306368b8534bSLuigi Rizzo selwakeuppri(&kring->si, PI_NET); 306464ae02c3SLuigi Rizzo selwakeuppri(tx == NR_TX ? &na->tx_si : &na->rx_si, PI_NET); 306568b8534bSLuigi Rizzo return kring->ring->slot; 306668b8534bSLuigi Rizzo } 306768b8534bSLuigi Rizzo 306868b8534bSLuigi Rizzo 3069ce3ee1e7SLuigi Rizzo /* 3070ce3ee1e7SLuigi Rizzo * Grab packets from a kring, move them into the ft structure 3071ce3ee1e7SLuigi Rizzo * associated to the tx (input) port. Max one instance per port, 3072ce3ee1e7SLuigi Rizzo * filtered on input (ioctl, poll or XXX). 3073ce3ee1e7SLuigi Rizzo * Returns the next position in the ring. 3074ce3ee1e7SLuigi Rizzo */ 3075f18be576SLuigi Rizzo static int 3076f18be576SLuigi Rizzo nm_bdg_preflush(struct netmap_adapter *na, u_int ring_nr, 3077f18be576SLuigi Rizzo struct netmap_kring *kring, u_int end) 3078f18be576SLuigi Rizzo { 3079f18be576SLuigi Rizzo struct netmap_ring *ring = kring->ring; 3080ce3ee1e7SLuigi Rizzo struct nm_bdg_fwd *ft; 3081f18be576SLuigi Rizzo u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 3082f18be576SLuigi Rizzo u_int ft_i = 0; /* start from 0 */ 3083ce3ee1e7SLuigi Rizzo u_int frags = 1; /* how many frags ? */ 3084ce3ee1e7SLuigi Rizzo struct nm_bridge *b = na->na_bdg; 3085f18be576SLuigi Rizzo 3086ce3ee1e7SLuigi Rizzo /* To protect against modifications to the bridge we acquire a 3087ce3ee1e7SLuigi Rizzo * shared lock, waiting if we can sleep (if the source port is 3088ce3ee1e7SLuigi Rizzo * attached to a user process) or with a trylock otherwise (NICs). 3089ce3ee1e7SLuigi Rizzo */ 3090ce3ee1e7SLuigi Rizzo ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 3091ce3ee1e7SLuigi Rizzo if (na->na_flags & NAF_BDG_MAYSLEEP) 3092ce3ee1e7SLuigi Rizzo BDG_RLOCK(b); 3093ce3ee1e7SLuigi Rizzo else if (!BDG_RTRYLOCK(b)) 3094ce3ee1e7SLuigi Rizzo return 0; 3095ce3ee1e7SLuigi Rizzo ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 3096ce3ee1e7SLuigi Rizzo ft = kring->nkr_ft; 3097ce3ee1e7SLuigi Rizzo 3098ce3ee1e7SLuigi Rizzo for (; likely(j != end); j = nm_next(j, lim)) { 3099f18be576SLuigi Rizzo struct netmap_slot *slot = &ring->slot[j]; 3100ce3ee1e7SLuigi Rizzo char *buf; 3101f18be576SLuigi Rizzo 3102ce3ee1e7SLuigi Rizzo ft[ft_i].ft_len = slot->len; 310385233a7dSLuigi Rizzo ft[ft_i].ft_flags = slot->flags; 310485233a7dSLuigi Rizzo 310585233a7dSLuigi Rizzo ND("flags is 0x%x", slot->flags); 310685233a7dSLuigi Rizzo /* this slot goes into a list so initialize the link field */ 3107ce3ee1e7SLuigi Rizzo ft[ft_i].ft_next = NM_FT_NULL; 310885233a7dSLuigi Rizzo buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 31093d819cb6SLuigi Rizzo (void *)(uintptr_t)slot->ptr : BDG_NMB(na->nm_mem, slot); 311085233a7dSLuigi Rizzo prefetch(buf); 3111ce3ee1e7SLuigi Rizzo ++ft_i; 3112ce3ee1e7SLuigi Rizzo if (slot->flags & NS_MOREFRAG) { 3113ce3ee1e7SLuigi Rizzo frags++; 3114ce3ee1e7SLuigi Rizzo continue; 3115ce3ee1e7SLuigi Rizzo } 3116ce3ee1e7SLuigi Rizzo if (unlikely(netmap_verbose && frags > 1)) 3117ce3ee1e7SLuigi Rizzo RD(5, "%d frags at %d", frags, ft_i - frags); 3118ce3ee1e7SLuigi Rizzo ft[ft_i - frags].ft_frags = frags; 3119ce3ee1e7SLuigi Rizzo frags = 1; 3120ce3ee1e7SLuigi Rizzo if (unlikely((int)ft_i >= bridge_batch)) 3121f18be576SLuigi Rizzo ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 3122f18be576SLuigi Rizzo } 3123ce3ee1e7SLuigi Rizzo if (frags > 1) { 3124ce3ee1e7SLuigi Rizzo D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); 3125ce3ee1e7SLuigi Rizzo // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG 3126ce3ee1e7SLuigi Rizzo ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; 3127ce3ee1e7SLuigi Rizzo ft[ft_i - frags].ft_frags = frags - 1; 3128ce3ee1e7SLuigi Rizzo } 3129f18be576SLuigi Rizzo if (ft_i) 3130f18be576SLuigi Rizzo ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 3131ce3ee1e7SLuigi Rizzo BDG_RUNLOCK(b); 3132f18be576SLuigi Rizzo return j; 3133f18be576SLuigi Rizzo } 3134f18be576SLuigi Rizzo 3135f18be576SLuigi Rizzo 3136f18be576SLuigi Rizzo /* 3137ce3ee1e7SLuigi Rizzo * Pass packets from nic to the bridge. 3138ce3ee1e7SLuigi Rizzo * XXX TODO check locking: this is called from the interrupt 3139ce3ee1e7SLuigi Rizzo * handler so we should make sure that the interface is not 3140ce3ee1e7SLuigi Rizzo * disconnected while passing down an interrupt. 3141ce3ee1e7SLuigi Rizzo * 3142f18be576SLuigi Rizzo * Note, no user process can access this NIC so we can ignore 3143f18be576SLuigi Rizzo * the info in the 'ring'. 3144f18be576SLuigi Rizzo */ 3145f18be576SLuigi Rizzo static void 3146f18be576SLuigi Rizzo netmap_nic_to_bdg(struct ifnet *ifp, u_int ring_nr) 3147f18be576SLuigi Rizzo { 3148f18be576SLuigi Rizzo struct netmap_adapter *na = NA(ifp); 3149f18be576SLuigi Rizzo struct netmap_kring *kring = &na->rx_rings[ring_nr]; 3150f18be576SLuigi Rizzo struct netmap_ring *ring = kring->ring; 3151ce3ee1e7SLuigi Rizzo u_int j, k; 3152f18be576SLuigi Rizzo 3153ce3ee1e7SLuigi Rizzo /* make sure that only one thread is ever in here, 3154ce3ee1e7SLuigi Rizzo * after which we can unlock. Probably unnecessary XXX. 3155ce3ee1e7SLuigi Rizzo */ 3156ce3ee1e7SLuigi Rizzo if (nm_kr_tryget(kring)) 3157ce3ee1e7SLuigi Rizzo return; 3158ce3ee1e7SLuigi Rizzo /* fetch packets that have arrived. 3159ce3ee1e7SLuigi Rizzo * XXX maybe do this in a loop ? 3160ce3ee1e7SLuigi Rizzo */ 3161ce3ee1e7SLuigi Rizzo if (na->nm_rxsync(ifp, ring_nr, 0)) 3162ce3ee1e7SLuigi Rizzo goto put_out; 3163ce3ee1e7SLuigi Rizzo if (kring->nr_hwavail == 0 && netmap_verbose) { 3164f18be576SLuigi Rizzo D("how strange, interrupt with no packets on %s", 3165f18be576SLuigi Rizzo ifp->if_xname); 3166ce3ee1e7SLuigi Rizzo goto put_out; 3167f18be576SLuigi Rizzo } 3168ce3ee1e7SLuigi Rizzo k = nm_kr_rxpos(kring); 3169f18be576SLuigi Rizzo 3170f18be576SLuigi Rizzo j = nm_bdg_preflush(na, ring_nr, kring, k); 3171f18be576SLuigi Rizzo 3172f18be576SLuigi Rizzo /* we consume everything, but we cannot update kring directly 3173f18be576SLuigi Rizzo * because the nic may have destroyed the info in the NIC ring. 3174f18be576SLuigi Rizzo * So we need to call rxsync again to restore it. 3175f18be576SLuigi Rizzo */ 3176f18be576SLuigi Rizzo ring->cur = j; 3177f18be576SLuigi Rizzo ring->avail = 0; 3178f18be576SLuigi Rizzo na->nm_rxsync(ifp, ring_nr, 0); 3179ce3ee1e7SLuigi Rizzo 3180ce3ee1e7SLuigi Rizzo put_out: 3181ce3ee1e7SLuigi Rizzo nm_kr_put(kring); 3182f18be576SLuigi Rizzo return; 3183f18be576SLuigi Rizzo } 3184f18be576SLuigi Rizzo 3185f18be576SLuigi Rizzo 318668b8534bSLuigi Rizzo /* 3187ce3ee1e7SLuigi Rizzo * Default functions to handle rx/tx interrupts from a physical device. 3188ce3ee1e7SLuigi Rizzo * "work_done" is non-null on the RX path, NULL for the TX path. 3189ce3ee1e7SLuigi Rizzo * We rely on the OS to make sure that there is only one active 3190ce3ee1e7SLuigi Rizzo * instance per queue, and that there is appropriate locking. 3191849bec0eSLuigi Rizzo * 3192ce3ee1e7SLuigi Rizzo * If the card is not in netmap mode, simply return 0, 3193ce3ee1e7SLuigi Rizzo * so that the caller proceeds with regular processing. 3194ce3ee1e7SLuigi Rizzo * 3195ce3ee1e7SLuigi Rizzo * If the card is connected to a netmap file descriptor, 3196ce3ee1e7SLuigi Rizzo * do a selwakeup on the individual queue, plus one on the global one 3197ce3ee1e7SLuigi Rizzo * if needed (multiqueue card _and_ there are multiqueue listeners), 3198ce3ee1e7SLuigi Rizzo * and return 1. 3199ce3ee1e7SLuigi Rizzo * 3200ce3ee1e7SLuigi Rizzo * Finally, if called on rx from an interface connected to a switch, 3201ce3ee1e7SLuigi Rizzo * calls the proper forwarding routine, and return 1. 32021a26580eSLuigi Rizzo */ 3203babc7c12SLuigi Rizzo int 3204ce3ee1e7SLuigi Rizzo netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done) 32051a26580eSLuigi Rizzo { 32061a26580eSLuigi Rizzo struct netmap_adapter *na; 3207ce3ee1e7SLuigi Rizzo struct netmap_kring *kring; 32081a26580eSLuigi Rizzo 32091a26580eSLuigi Rizzo if (!(ifp->if_capenable & IFCAP_NETMAP)) 32101a26580eSLuigi Rizzo return 0; 3211849bec0eSLuigi Rizzo 3212ce3ee1e7SLuigi Rizzo q &= NETMAP_RING_MASK; 3213849bec0eSLuigi Rizzo 3214ce3ee1e7SLuigi Rizzo if (netmap_verbose) 3215ce3ee1e7SLuigi Rizzo RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q); 32161a26580eSLuigi Rizzo na = NA(ifp); 32178241616dSLuigi Rizzo if (na->na_flags & NAF_SKIP_INTR) { 32188241616dSLuigi Rizzo ND("use regular interrupt"); 32198241616dSLuigi Rizzo return 0; 32208241616dSLuigi Rizzo } 32218241616dSLuigi Rizzo 322264ae02c3SLuigi Rizzo if (work_done) { /* RX path */ 32238241616dSLuigi Rizzo if (q >= na->num_rx_rings) 3224849bec0eSLuigi Rizzo return 0; // not a physical queue 3225ce3ee1e7SLuigi Rizzo kring = na->rx_rings + q; 3226ce3ee1e7SLuigi Rizzo kring->nr_kflags |= NKR_PENDINTR; // XXX atomic ? 3227ce3ee1e7SLuigi Rizzo if (na->na_bdg != NULL) { 3228ce3ee1e7SLuigi Rizzo netmap_nic_to_bdg(ifp, q); 3229ce3ee1e7SLuigi Rizzo } else { 3230ce3ee1e7SLuigi Rizzo selwakeuppri(&kring->si, PI_NET); 3231ce3ee1e7SLuigi Rizzo if (na->num_rx_rings > 1 /* or multiple listeners */ ) 3232ce3ee1e7SLuigi Rizzo selwakeuppri(&na->rx_si, PI_NET); 3233ce3ee1e7SLuigi Rizzo } 3234ce3ee1e7SLuigi Rizzo *work_done = 1; /* do not fire napi again */ 3235849bec0eSLuigi Rizzo } else { /* TX path */ 32368241616dSLuigi Rizzo if (q >= na->num_tx_rings) 3237849bec0eSLuigi Rizzo return 0; // not a physical queue 3238ce3ee1e7SLuigi Rizzo kring = na->tx_rings + q; 3239ce3ee1e7SLuigi Rizzo selwakeuppri(&kring->si, PI_NET); 3240ce3ee1e7SLuigi Rizzo if (na->num_tx_rings > 1 /* or multiple listeners */ ) 3241ce3ee1e7SLuigi Rizzo selwakeuppri(&na->tx_si, PI_NET); 324264ae02c3SLuigi Rizzo } 32431a26580eSLuigi Rizzo return 1; 32441a26580eSLuigi Rizzo } 32451a26580eSLuigi Rizzo 324664ae02c3SLuigi Rizzo 324701c7d25fSLuigi Rizzo #ifdef linux /* linux-specific routines */ 324801c7d25fSLuigi Rizzo 3249f18be576SLuigi Rizzo 325001c7d25fSLuigi Rizzo /* 325101c7d25fSLuigi Rizzo * Remap linux arguments into the FreeBSD call. 325201c7d25fSLuigi Rizzo * - pwait is the poll table, passed as 'dev'; 325301c7d25fSLuigi Rizzo * If pwait == NULL someone else already woke up before. We can report 325401c7d25fSLuigi Rizzo * events but they are filtered upstream. 325501c7d25fSLuigi Rizzo * If pwait != NULL, then pwait->key contains the list of events. 325601c7d25fSLuigi Rizzo * - events is computed from pwait as above. 325701c7d25fSLuigi Rizzo * - file is passed as 'td'; 325801c7d25fSLuigi Rizzo */ 325901c7d25fSLuigi Rizzo static u_int 326001c7d25fSLuigi Rizzo linux_netmap_poll(struct file * file, struct poll_table_struct *pwait) 326101c7d25fSLuigi Rizzo { 3262849bec0eSLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) 3263849bec0eSLuigi Rizzo int events = POLLIN | POLLOUT; /* XXX maybe... */ 3264849bec0eSLuigi Rizzo #elif LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) 326501c7d25fSLuigi Rizzo int events = pwait ? pwait->key : POLLIN | POLLOUT; 326601c7d25fSLuigi Rizzo #else /* in 3.4.0 field 'key' was renamed to '_key' */ 326701c7d25fSLuigi Rizzo int events = pwait ? pwait->_key : POLLIN | POLLOUT; 326801c7d25fSLuigi Rizzo #endif 326901c7d25fSLuigi Rizzo return netmap_poll((void *)pwait, events, (void *)file); 327001c7d25fSLuigi Rizzo } 327101c7d25fSLuigi Rizzo 3272f18be576SLuigi Rizzo 327301c7d25fSLuigi Rizzo static int 327442a3a5bdSLuigi Rizzo linux_netmap_mmap(struct file *f, struct vm_area_struct *vma) 327501c7d25fSLuigi Rizzo { 32768241616dSLuigi Rizzo int error = 0; 3277ce3ee1e7SLuigi Rizzo unsigned long off, va; 3278ce3ee1e7SLuigi Rizzo vm_ooffset_t pa; 3279ce3ee1e7SLuigi Rizzo struct netmap_priv_d *priv = f->private_data; 328001c7d25fSLuigi Rizzo /* 328101c7d25fSLuigi Rizzo * vma->vm_start: start of mapping user address space 328201c7d25fSLuigi Rizzo * vma->vm_end: end of the mapping user address space 32838241616dSLuigi Rizzo * vma->vm_pfoff: offset of first page in the device 328401c7d25fSLuigi Rizzo */ 328501c7d25fSLuigi Rizzo 328601c7d25fSLuigi Rizzo // XXX security checks 328701c7d25fSLuigi Rizzo 3288ce3ee1e7SLuigi Rizzo error = netmap_get_memory(priv); 32898241616dSLuigi Rizzo ND("get_memory returned %d", error); 32908241616dSLuigi Rizzo if (error) 32918241616dSLuigi Rizzo return -error; 32928241616dSLuigi Rizzo 3293ce3ee1e7SLuigi Rizzo if ((vma->vm_start & ~PAGE_MASK) || (vma->vm_end & ~PAGE_MASK)) { 3294ce3ee1e7SLuigi Rizzo ND("vm_start = %lx vm_end = %lx", vma->vm_start, vma->vm_end); 3295ce3ee1e7SLuigi Rizzo return -EINVAL; 3296ce3ee1e7SLuigi Rizzo } 32978241616dSLuigi Rizzo 3298ce3ee1e7SLuigi Rizzo for (va = vma->vm_start, off = vma->vm_pgoff; 3299ce3ee1e7SLuigi Rizzo va < vma->vm_end; 3300ce3ee1e7SLuigi Rizzo va += PAGE_SIZE, off++) 3301ce3ee1e7SLuigi Rizzo { 3302ce3ee1e7SLuigi Rizzo pa = netmap_mem_ofstophys(priv->np_mref, off << PAGE_SHIFT); 3303ce3ee1e7SLuigi Rizzo if (pa == 0) 3304ce3ee1e7SLuigi Rizzo return -EINVAL; 330501c7d25fSLuigi Rizzo 3306ce3ee1e7SLuigi Rizzo ND("va %lx pa %p", va, pa); 3307ce3ee1e7SLuigi Rizzo error = remap_pfn_range(vma, va, pa >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot); 3308ce3ee1e7SLuigi Rizzo if (error) 3309ce3ee1e7SLuigi Rizzo return error; 3310ce3ee1e7SLuigi Rizzo } 331101c7d25fSLuigi Rizzo return 0; 331201c7d25fSLuigi Rizzo } 331301c7d25fSLuigi Rizzo 3314f18be576SLuigi Rizzo 3315ce3ee1e7SLuigi Rizzo /* 3316ce3ee1e7SLuigi Rizzo * This one is probably already protected by the netif lock XXX 3317ce3ee1e7SLuigi Rizzo */ 331801c7d25fSLuigi Rizzo static netdev_tx_t 3319ce3ee1e7SLuigi Rizzo linux_netmap_start_xmit(struct sk_buff *skb, struct net_device *dev) 332001c7d25fSLuigi Rizzo { 3321ce3ee1e7SLuigi Rizzo netmap_transmit(dev, skb); 332201c7d25fSLuigi Rizzo return (NETDEV_TX_OK); 332301c7d25fSLuigi Rizzo } 332401c7d25fSLuigi Rizzo 332501c7d25fSLuigi Rizzo 3326ce3ee1e7SLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) // XXX was 37 332701c7d25fSLuigi Rizzo #define LIN_IOCTL_NAME .ioctl 332801c7d25fSLuigi Rizzo int 332901c7d25fSLuigi Rizzo linux_netmap_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long data /* arg */) 333001c7d25fSLuigi Rizzo #else 333101c7d25fSLuigi Rizzo #define LIN_IOCTL_NAME .unlocked_ioctl 333201c7d25fSLuigi Rizzo long 333301c7d25fSLuigi Rizzo linux_netmap_ioctl(struct file *file, u_int cmd, u_long data /* arg */) 333401c7d25fSLuigi Rizzo #endif 333501c7d25fSLuigi Rizzo { 333601c7d25fSLuigi Rizzo int ret; 333701c7d25fSLuigi Rizzo struct nmreq nmr; 333801c7d25fSLuigi Rizzo bzero(&nmr, sizeof(nmr)); 333901c7d25fSLuigi Rizzo 3340ce3ee1e7SLuigi Rizzo if (cmd == NIOCTXSYNC || cmd == NIOCRXSYNC) { 3341ce3ee1e7SLuigi Rizzo data = 0; /* no argument required here */ 3342ce3ee1e7SLuigi Rizzo } 334301c7d25fSLuigi Rizzo if (data && copy_from_user(&nmr, (void *)data, sizeof(nmr) ) != 0) 334401c7d25fSLuigi Rizzo return -EFAULT; 334501c7d25fSLuigi Rizzo ret = netmap_ioctl(NULL, cmd, (caddr_t)&nmr, 0, (void *)file); 334601c7d25fSLuigi Rizzo if (data && copy_to_user((void*)data, &nmr, sizeof(nmr) ) != 0) 334701c7d25fSLuigi Rizzo return -EFAULT; 334801c7d25fSLuigi Rizzo return -ret; 334901c7d25fSLuigi Rizzo } 335001c7d25fSLuigi Rizzo 335101c7d25fSLuigi Rizzo 335201c7d25fSLuigi Rizzo static int 33530b8ed8e0SLuigi Rizzo netmap_release(struct inode *inode, struct file *file) 335401c7d25fSLuigi Rizzo { 33550b8ed8e0SLuigi Rizzo (void)inode; /* UNUSED */ 335601c7d25fSLuigi Rizzo if (file->private_data) 335701c7d25fSLuigi Rizzo netmap_dtor(file->private_data); 335801c7d25fSLuigi Rizzo return (0); 335901c7d25fSLuigi Rizzo } 336001c7d25fSLuigi Rizzo 3361f18be576SLuigi Rizzo 33628241616dSLuigi Rizzo static int 33638241616dSLuigi Rizzo linux_netmap_open(struct inode *inode, struct file *file) 33648241616dSLuigi Rizzo { 33658241616dSLuigi Rizzo struct netmap_priv_d *priv; 33668241616dSLuigi Rizzo (void)inode; /* UNUSED */ 33678241616dSLuigi Rizzo 33688241616dSLuigi Rizzo priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF, 33698241616dSLuigi Rizzo M_NOWAIT | M_ZERO); 33708241616dSLuigi Rizzo if (priv == NULL) 33718241616dSLuigi Rizzo return -ENOMEM; 33728241616dSLuigi Rizzo 33738241616dSLuigi Rizzo file->private_data = priv; 33748241616dSLuigi Rizzo 33758241616dSLuigi Rizzo return (0); 33768241616dSLuigi Rizzo } 337701c7d25fSLuigi Rizzo 3378f18be576SLuigi Rizzo 337901c7d25fSLuigi Rizzo static struct file_operations netmap_fops = { 3380f18be576SLuigi Rizzo .owner = THIS_MODULE, 33818241616dSLuigi Rizzo .open = linux_netmap_open, 338242a3a5bdSLuigi Rizzo .mmap = linux_netmap_mmap, 338301c7d25fSLuigi Rizzo LIN_IOCTL_NAME = linux_netmap_ioctl, 338401c7d25fSLuigi Rizzo .poll = linux_netmap_poll, 338501c7d25fSLuigi Rizzo .release = netmap_release, 338601c7d25fSLuigi Rizzo }; 338701c7d25fSLuigi Rizzo 3388f18be576SLuigi Rizzo 338901c7d25fSLuigi Rizzo static struct miscdevice netmap_cdevsw = { /* same name as FreeBSD */ 339001c7d25fSLuigi Rizzo MISC_DYNAMIC_MINOR, 339101c7d25fSLuigi Rizzo "netmap", 339201c7d25fSLuigi Rizzo &netmap_fops, 339301c7d25fSLuigi Rizzo }; 339401c7d25fSLuigi Rizzo 339501c7d25fSLuigi Rizzo static int netmap_init(void); 339601c7d25fSLuigi Rizzo static void netmap_fini(void); 339701c7d25fSLuigi Rizzo 3398f18be576SLuigi Rizzo 339942a3a5bdSLuigi Rizzo /* Errors have negative values on linux */ 340042a3a5bdSLuigi Rizzo static int linux_netmap_init(void) 340142a3a5bdSLuigi Rizzo { 340242a3a5bdSLuigi Rizzo return -netmap_init(); 340342a3a5bdSLuigi Rizzo } 340442a3a5bdSLuigi Rizzo 340542a3a5bdSLuigi Rizzo module_init(linux_netmap_init); 340601c7d25fSLuigi Rizzo module_exit(netmap_fini); 340701c7d25fSLuigi Rizzo /* export certain symbols to other modules */ 340801c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_attach); // driver attach routines 340901c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_detach); // driver detach routines 341001c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_ring_reinit); // ring init on error 341101c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buffer_lut); 341201c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_total_buffers); // index check 341301c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buffer_base); 341401c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_reset); // ring init routines 341501c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buf_size); 341601c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_rx_irq); // default irq handler 341701c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_no_pendintr); // XXX mitigation - should go away 3418f18be576SLuigi Rizzo EXPORT_SYMBOL(netmap_bdg_ctl); // bridge configuration routine 3419f18be576SLuigi Rizzo EXPORT_SYMBOL(netmap_bdg_learning); // the default lookup function 3420ce3ee1e7SLuigi Rizzo EXPORT_SYMBOL(netmap_disable_all_rings); 3421ce3ee1e7SLuigi Rizzo EXPORT_SYMBOL(netmap_enable_all_rings); 342201c7d25fSLuigi Rizzo 342301c7d25fSLuigi Rizzo 342401c7d25fSLuigi Rizzo MODULE_AUTHOR("http://info.iet.unipi.it/~luigi/netmap/"); 342501c7d25fSLuigi Rizzo MODULE_DESCRIPTION("The netmap packet I/O framework"); 342601c7d25fSLuigi Rizzo MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */ 342701c7d25fSLuigi Rizzo 342801c7d25fSLuigi Rizzo #else /* __FreeBSD__ */ 342901c7d25fSLuigi Rizzo 3430f18be576SLuigi Rizzo 3431babc7c12SLuigi Rizzo static struct cdevsw netmap_cdevsw = { 3432babc7c12SLuigi Rizzo .d_version = D_VERSION, 3433babc7c12SLuigi Rizzo .d_name = "netmap", 34348241616dSLuigi Rizzo .d_open = netmap_open, 34358241616dSLuigi Rizzo .d_mmap_single = netmap_mmap_single, 3436babc7c12SLuigi Rizzo .d_ioctl = netmap_ioctl, 3437babc7c12SLuigi Rizzo .d_poll = netmap_poll, 34388241616dSLuigi Rizzo .d_close = netmap_close, 3439babc7c12SLuigi Rizzo }; 344001c7d25fSLuigi Rizzo #endif /* __FreeBSD__ */ 3441babc7c12SLuigi Rizzo 3442f196ce38SLuigi Rizzo /* 3443f196ce38SLuigi Rizzo *---- support for virtual bridge ----- 3444f196ce38SLuigi Rizzo */ 3445f196ce38SLuigi Rizzo 3446f196ce38SLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */ 3447f196ce38SLuigi Rizzo 3448f196ce38SLuigi Rizzo /* 3449f196ce38SLuigi Rizzo * The following hash function is adapted from "Hash Functions" by Bob Jenkins 3450f196ce38SLuigi Rizzo * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 3451f196ce38SLuigi Rizzo * 3452f196ce38SLuigi Rizzo * http://www.burtleburtle.net/bob/hash/spooky.html 3453f196ce38SLuigi Rizzo */ 3454f196ce38SLuigi Rizzo #define mix(a, b, c) \ 3455f196ce38SLuigi Rizzo do { \ 3456f196ce38SLuigi Rizzo a -= b; a -= c; a ^= (c >> 13); \ 3457f196ce38SLuigi Rizzo b -= c; b -= a; b ^= (a << 8); \ 3458f196ce38SLuigi Rizzo c -= a; c -= b; c ^= (b >> 13); \ 3459f196ce38SLuigi Rizzo a -= b; a -= c; a ^= (c >> 12); \ 3460f196ce38SLuigi Rizzo b -= c; b -= a; b ^= (a << 16); \ 3461f196ce38SLuigi Rizzo c -= a; c -= b; c ^= (b >> 5); \ 3462f196ce38SLuigi Rizzo a -= b; a -= c; a ^= (c >> 3); \ 3463f196ce38SLuigi Rizzo b -= c; b -= a; b ^= (a << 10); \ 3464f196ce38SLuigi Rizzo c -= a; c -= b; c ^= (b >> 15); \ 3465f196ce38SLuigi Rizzo } while (/*CONSTCOND*/0) 3466f196ce38SLuigi Rizzo 3467f196ce38SLuigi Rizzo static __inline uint32_t 3468f196ce38SLuigi Rizzo nm_bridge_rthash(const uint8_t *addr) 3469f196ce38SLuigi Rizzo { 3470f196ce38SLuigi Rizzo uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 3471f196ce38SLuigi Rizzo 3472f196ce38SLuigi Rizzo b += addr[5] << 8; 3473f196ce38SLuigi Rizzo b += addr[4]; 3474f196ce38SLuigi Rizzo a += addr[3] << 24; 3475f196ce38SLuigi Rizzo a += addr[2] << 16; 3476f196ce38SLuigi Rizzo a += addr[1] << 8; 3477f196ce38SLuigi Rizzo a += addr[0]; 3478f196ce38SLuigi Rizzo 3479f196ce38SLuigi Rizzo mix(a, b, c); 3480f196ce38SLuigi Rizzo #define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 3481f196ce38SLuigi Rizzo return (c & BRIDGE_RTHASH_MASK); 3482f196ce38SLuigi Rizzo } 3483f196ce38SLuigi Rizzo 3484f196ce38SLuigi Rizzo #undef mix 3485f196ce38SLuigi Rizzo 3486f196ce38SLuigi Rizzo 3487f196ce38SLuigi Rizzo static int 3488f196ce38SLuigi Rizzo bdg_netmap_reg(struct ifnet *ifp, int onoff) 3489f196ce38SLuigi Rizzo { 3490f18be576SLuigi Rizzo /* the interface is already attached to the bridge, 3491f18be576SLuigi Rizzo * so we only need to toggle IFCAP_NETMAP. 3492f196ce38SLuigi Rizzo */ 3493f18be576SLuigi Rizzo if (onoff) { 3494f196ce38SLuigi Rizzo ifp->if_capenable |= IFCAP_NETMAP; 3495f196ce38SLuigi Rizzo } else { 3496f196ce38SLuigi Rizzo ifp->if_capenable &= ~IFCAP_NETMAP; 3497f196ce38SLuigi Rizzo } 3498f18be576SLuigi Rizzo return 0; 3499f196ce38SLuigi Rizzo } 3500f196ce38SLuigi Rizzo 3501f196ce38SLuigi Rizzo 3502f18be576SLuigi Rizzo /* 3503f18be576SLuigi Rizzo * Lookup function for a learning bridge. 3504f18be576SLuigi Rizzo * Update the hash table with the source address, 3505f18be576SLuigi Rizzo * and then returns the destination port index, and the 3506f18be576SLuigi Rizzo * ring in *dst_ring (at the moment, always use ring 0) 3507f18be576SLuigi Rizzo */ 3508f18be576SLuigi Rizzo u_int 3509ce3ee1e7SLuigi Rizzo netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring, 3510f18be576SLuigi Rizzo struct netmap_adapter *na) 3511f196ce38SLuigi Rizzo { 3512f18be576SLuigi Rizzo struct nm_hash_ent *ht = na->na_bdg->ht; 3513f196ce38SLuigi Rizzo uint32_t sh, dh; 3514f18be576SLuigi Rizzo u_int dst, mysrc = na->bdg_port; 3515f196ce38SLuigi Rizzo uint64_t smac, dmac; 3516f196ce38SLuigi Rizzo 3517ce3ee1e7SLuigi Rizzo if (buf_len < 14) { 3518ce3ee1e7SLuigi Rizzo D("invalid buf length %d", buf_len); 3519ce3ee1e7SLuigi Rizzo return NM_BDG_NOPORT; 3520ce3ee1e7SLuigi Rizzo } 3521f196ce38SLuigi Rizzo dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 3522f196ce38SLuigi Rizzo smac = le64toh(*(uint64_t *)(buf + 4)); 3523f196ce38SLuigi Rizzo smac >>= 16; 3524f18be576SLuigi Rizzo 3525f196ce38SLuigi Rizzo /* 3526f196ce38SLuigi Rizzo * The hash is somewhat expensive, there might be some 3527f196ce38SLuigi Rizzo * worthwhile optimizations here. 3528f196ce38SLuigi Rizzo */ 3529f196ce38SLuigi Rizzo if ((buf[6] & 1) == 0) { /* valid src */ 3530f196ce38SLuigi Rizzo uint8_t *s = buf+6; 3531ce3ee1e7SLuigi Rizzo sh = nm_bridge_rthash(s); // XXX hash of source 3532f196ce38SLuigi Rizzo /* update source port forwarding entry */ 3533f18be576SLuigi Rizzo ht[sh].mac = smac; /* XXX expire ? */ 3534f18be576SLuigi Rizzo ht[sh].ports = mysrc; 3535f196ce38SLuigi Rizzo if (netmap_verbose) 3536f196ce38SLuigi Rizzo D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 3537f18be576SLuigi Rizzo s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 3538f196ce38SLuigi Rizzo } 3539f18be576SLuigi Rizzo dst = NM_BDG_BROADCAST; 3540f196ce38SLuigi Rizzo if ((buf[0] & 1) == 0) { /* unicast */ 3541f196ce38SLuigi Rizzo dh = nm_bridge_rthash(buf); // XXX hash of dst 3542f18be576SLuigi Rizzo if (ht[dh].mac == dmac) { /* found dst */ 3543f18be576SLuigi Rizzo dst = ht[dh].ports; 3544f196ce38SLuigi Rizzo } 3545f18be576SLuigi Rizzo /* XXX otherwise return NM_BDG_UNKNOWN ? */ 3546f196ce38SLuigi Rizzo } 3547f18be576SLuigi Rizzo *dst_ring = 0; 3548f18be576SLuigi Rizzo return dst; 3549f196ce38SLuigi Rizzo } 3550f196ce38SLuigi Rizzo 3551f18be576SLuigi Rizzo 3552f18be576SLuigi Rizzo /* 3553f18be576SLuigi Rizzo * This flush routine supports only unicast and broadcast but a large 3554f18be576SLuigi Rizzo * number of ports, and lets us replace the learn and dispatch functions. 3555f18be576SLuigi Rizzo */ 3556f18be576SLuigi Rizzo int 3557ce3ee1e7SLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_adapter *na, 3558f18be576SLuigi Rizzo u_int ring_nr) 3559f18be576SLuigi Rizzo { 3560f18be576SLuigi Rizzo struct nm_bdg_q *dst_ents, *brddst; 3561f18be576SLuigi Rizzo uint16_t num_dsts = 0, *dsts; 3562f18be576SLuigi Rizzo struct nm_bridge *b = na->na_bdg; 3563ce3ee1e7SLuigi Rizzo u_int i, j, me = na->bdg_port; 3564f18be576SLuigi Rizzo 3565ce3ee1e7SLuigi Rizzo /* 3566ce3ee1e7SLuigi Rizzo * The work area (pointed by ft) is followed by an array of 3567ce3ee1e7SLuigi Rizzo * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 3568ce3ee1e7SLuigi Rizzo * queues per port plus one for the broadcast traffic. 3569ce3ee1e7SLuigi Rizzo * Then we have an array of destination indexes. 3570ce3ee1e7SLuigi Rizzo */ 3571ce3ee1e7SLuigi Rizzo dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 3572f18be576SLuigi Rizzo dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 3573f18be576SLuigi Rizzo 3574ce3ee1e7SLuigi Rizzo /* first pass: find a destination for each packet in the batch */ 3575ce3ee1e7SLuigi Rizzo for (i = 0; likely(i < n); i += ft[i].ft_frags) { 3576ce3ee1e7SLuigi Rizzo uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 3577f18be576SLuigi Rizzo uint16_t dst_port, d_i; 3578f18be576SLuigi Rizzo struct nm_bdg_q *d; 3579f18be576SLuigi Rizzo 3580ce3ee1e7SLuigi Rizzo ND("slot %d frags %d", i, ft[i].ft_frags); 3581ce3ee1e7SLuigi Rizzo dst_port = b->nm_bdg_lookup(ft[i].ft_buf, ft[i].ft_len, 3582ce3ee1e7SLuigi Rizzo &dst_ring, na); 3583ce3ee1e7SLuigi Rizzo if (netmap_verbose > 255) 3584ce3ee1e7SLuigi Rizzo RD(5, "slot %d port %d -> %d", i, me, dst_port); 3585ce3ee1e7SLuigi Rizzo if (dst_port == NM_BDG_NOPORT) 3586f18be576SLuigi Rizzo continue; /* this packet is identified to be dropped */ 3587ce3ee1e7SLuigi Rizzo else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 3588f18be576SLuigi Rizzo continue; 3589ce3ee1e7SLuigi Rizzo else if (dst_port == NM_BDG_BROADCAST) 3590f18be576SLuigi Rizzo dst_ring = 0; /* broadcasts always go to ring 0 */ 3591ce3ee1e7SLuigi Rizzo else if (unlikely(dst_port == me || 3592ce3ee1e7SLuigi Rizzo !b->bdg_ports[dst_port])) 3593f18be576SLuigi Rizzo continue; 3594f18be576SLuigi Rizzo 3595f18be576SLuigi Rizzo /* get a position in the scratch pad */ 3596f18be576SLuigi Rizzo d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 3597f18be576SLuigi Rizzo d = dst_ents + d_i; 3598ce3ee1e7SLuigi Rizzo 3599ce3ee1e7SLuigi Rizzo /* append the first fragment to the list */ 3600ce3ee1e7SLuigi Rizzo if (d->bq_head == NM_FT_NULL) { /* new destination */ 3601f18be576SLuigi Rizzo d->bq_head = d->bq_tail = i; 3602f18be576SLuigi Rizzo /* remember this position to be scanned later */ 3603f18be576SLuigi Rizzo if (dst_port != NM_BDG_BROADCAST) 3604f18be576SLuigi Rizzo dsts[num_dsts++] = d_i; 360585233a7dSLuigi Rizzo } else { 3606f18be576SLuigi Rizzo ft[d->bq_tail].ft_next = i; 3607f18be576SLuigi Rizzo d->bq_tail = i; 3608f18be576SLuigi Rizzo } 3609ce3ee1e7SLuigi Rizzo d->bq_len += ft[i].ft_frags; 361085233a7dSLuigi Rizzo } 3611f18be576SLuigi Rizzo 3612ce3ee1e7SLuigi Rizzo /* 3613ce3ee1e7SLuigi Rizzo * Broadcast traffic goes to ring 0 on all destinations. 3614ce3ee1e7SLuigi Rizzo * So we need to add these rings to the list of ports to scan. 3615ce3ee1e7SLuigi Rizzo * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 3616ce3ee1e7SLuigi Rizzo * expensive. We should keep a compact list of active destinations 3617ce3ee1e7SLuigi Rizzo * so we could shorten this loop. 3618f18be576SLuigi Rizzo */ 3619f18be576SLuigi Rizzo brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 3620ce3ee1e7SLuigi Rizzo if (brddst->bq_head != NM_FT_NULL) { 3621ce3ee1e7SLuigi Rizzo for (j = 0; likely(j < b->bdg_active_ports); j++) { 3622ce3ee1e7SLuigi Rizzo uint16_t d_i; 3623ce3ee1e7SLuigi Rizzo i = b->bdg_port_index[j]; 3624ce3ee1e7SLuigi Rizzo if (unlikely(i == me)) 3625f18be576SLuigi Rizzo continue; 3626ce3ee1e7SLuigi Rizzo d_i = i * NM_BDG_MAXRINGS; 3627ce3ee1e7SLuigi Rizzo if (dst_ents[d_i].bq_head == NM_FT_NULL) 3628f18be576SLuigi Rizzo dsts[num_dsts++] = d_i; 3629f18be576SLuigi Rizzo } 3630f18be576SLuigi Rizzo } 3631f18be576SLuigi Rizzo 3632ce3ee1e7SLuigi Rizzo ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 3633f18be576SLuigi Rizzo /* second pass: scan destinations (XXX will be modular somehow) */ 3634f18be576SLuigi Rizzo for (i = 0; i < num_dsts; i++) { 3635f18be576SLuigi Rizzo struct ifnet *dst_ifp; 3636f18be576SLuigi Rizzo struct netmap_adapter *dst_na; 3637f196ce38SLuigi Rizzo struct netmap_kring *kring; 3638f196ce38SLuigi Rizzo struct netmap_ring *ring; 3639f18be576SLuigi Rizzo u_int dst_nr, is_vp, lim, j, sent = 0, d_i, next, brd_next; 3640ce3ee1e7SLuigi Rizzo u_int needed, howmany; 3641ce3ee1e7SLuigi Rizzo int retry = netmap_txsync_retry; 3642f18be576SLuigi Rizzo struct nm_bdg_q *d; 3643ce3ee1e7SLuigi Rizzo uint32_t my_start = 0, lease_idx = 0; 3644ce3ee1e7SLuigi Rizzo int nrings; 3645f196ce38SLuigi Rizzo 3646f18be576SLuigi Rizzo d_i = dsts[i]; 3647ce3ee1e7SLuigi Rizzo ND("second pass %d port %d", i, d_i); 3648f18be576SLuigi Rizzo d = dst_ents + d_i; 3649ce3ee1e7SLuigi Rizzo // XXX fix the division 3650ce3ee1e7SLuigi Rizzo dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 3651f18be576SLuigi Rizzo /* protect from the lookup function returning an inactive 3652f18be576SLuigi Rizzo * destination port 3653f18be576SLuigi Rizzo */ 3654f18be576SLuigi Rizzo if (unlikely(dst_na == NULL)) 3655ce3ee1e7SLuigi Rizzo goto cleanup; 3656ce3ee1e7SLuigi Rizzo if (dst_na->na_flags & NAF_SW_ONLY) 3657ce3ee1e7SLuigi Rizzo goto cleanup; 3658f18be576SLuigi Rizzo dst_ifp = dst_na->ifp; 3659f18be576SLuigi Rizzo /* 3660f18be576SLuigi Rizzo * The interface may be in !netmap mode in two cases: 3661f18be576SLuigi Rizzo * - when na is attached but not activated yet; 3662f18be576SLuigi Rizzo * - when na is being deactivated but is still attached. 3663f18be576SLuigi Rizzo */ 3664ce3ee1e7SLuigi Rizzo if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) { 3665ce3ee1e7SLuigi Rizzo ND("not in netmap mode!"); 3666ce3ee1e7SLuigi Rizzo goto cleanup; 3667ce3ee1e7SLuigi Rizzo } 3668f196ce38SLuigi Rizzo 3669f18be576SLuigi Rizzo /* there is at least one either unicast or broadcast packet */ 3670f18be576SLuigi Rizzo brd_next = brddst->bq_head; 3671f18be576SLuigi Rizzo next = d->bq_head; 3672ce3ee1e7SLuigi Rizzo /* we need to reserve this many slots. If fewer are 3673ce3ee1e7SLuigi Rizzo * available, some packets will be dropped. 3674ce3ee1e7SLuigi Rizzo * Packets may have multiple fragments, so we may not use 3675ce3ee1e7SLuigi Rizzo * there is a chance that we may not use all of the slots 3676ce3ee1e7SLuigi Rizzo * we have claimed, so we will need to handle the leftover 3677ce3ee1e7SLuigi Rizzo * ones when we regain the lock. 3678ce3ee1e7SLuigi Rizzo */ 3679ce3ee1e7SLuigi Rizzo needed = d->bq_len + brddst->bq_len; 3680f18be576SLuigi Rizzo 3681f18be576SLuigi Rizzo is_vp = nma_is_vp(dst_na); 3682ce3ee1e7SLuigi Rizzo ND(5, "pass 2 dst %d is %x %s", 3683ce3ee1e7SLuigi Rizzo i, d_i, is_vp ? "virtual" : "nic/host"); 3684f18be576SLuigi Rizzo dst_nr = d_i & (NM_BDG_MAXRINGS-1); 3685f18be576SLuigi Rizzo if (is_vp) { /* virtual port */ 3686ce3ee1e7SLuigi Rizzo nrings = dst_na->num_rx_rings; 3687ce3ee1e7SLuigi Rizzo } else { 3688ce3ee1e7SLuigi Rizzo nrings = dst_na->num_tx_rings; 3689f18be576SLuigi Rizzo } 3690ce3ee1e7SLuigi Rizzo if (dst_nr >= nrings) 3691ce3ee1e7SLuigi Rizzo dst_nr = dst_nr % nrings; 3692ce3ee1e7SLuigi Rizzo kring = is_vp ? &dst_na->rx_rings[dst_nr] : 3693ce3ee1e7SLuigi Rizzo &dst_na->tx_rings[dst_nr]; 3694ce3ee1e7SLuigi Rizzo ring = kring->ring; 3695ce3ee1e7SLuigi Rizzo lim = kring->nkr_num_slots - 1; 3696f18be576SLuigi Rizzo 3697ce3ee1e7SLuigi Rizzo retry: 3698ce3ee1e7SLuigi Rizzo 3699ce3ee1e7SLuigi Rizzo /* reserve the buffers in the queue and an entry 3700ce3ee1e7SLuigi Rizzo * to report completion, and drop lock. 3701ce3ee1e7SLuigi Rizzo * XXX this might become a helper function. 3702ce3ee1e7SLuigi Rizzo */ 3703ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 3704ce3ee1e7SLuigi Rizzo if (kring->nkr_stopped) { 3705ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3706ce3ee1e7SLuigi Rizzo goto cleanup; 3707ce3ee1e7SLuigi Rizzo } 3708ce3ee1e7SLuigi Rizzo /* on physical interfaces, do a txsync to recover 3709ce3ee1e7SLuigi Rizzo * slots for packets already transmitted. 3710ce3ee1e7SLuigi Rizzo * XXX maybe we could be optimistic and rely on a retry 3711ce3ee1e7SLuigi Rizzo * in case of failure. 3712ce3ee1e7SLuigi Rizzo */ 3713ce3ee1e7SLuigi Rizzo if (nma_is_hw(dst_na)) { 3714ce3ee1e7SLuigi Rizzo dst_na->nm_txsync(dst_ifp, dst_nr, 0); 3715ce3ee1e7SLuigi Rizzo } 3716ce3ee1e7SLuigi Rizzo my_start = j = kring->nkr_hwlease; 3717ce3ee1e7SLuigi Rizzo howmany = nm_kr_space(kring, is_vp); 3718ce3ee1e7SLuigi Rizzo if (needed < howmany) 3719ce3ee1e7SLuigi Rizzo howmany = needed; 3720ce3ee1e7SLuigi Rizzo lease_idx = nm_kr_lease(kring, howmany, is_vp); 3721ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3722ce3ee1e7SLuigi Rizzo 3723ce3ee1e7SLuigi Rizzo /* only retry if we need more than available slots */ 3724ce3ee1e7SLuigi Rizzo if (retry && needed <= howmany) 3725ce3ee1e7SLuigi Rizzo retry = 0; 3726ce3ee1e7SLuigi Rizzo 3727ce3ee1e7SLuigi Rizzo /* copy to the destination queue */ 3728ce3ee1e7SLuigi Rizzo while (howmany > 0) { 3729ce3ee1e7SLuigi Rizzo struct netmap_slot *slot; 3730ce3ee1e7SLuigi Rizzo struct nm_bdg_fwd *ft_p, *ft_end; 3731ce3ee1e7SLuigi Rizzo u_int cnt; 3732ce3ee1e7SLuigi Rizzo 3733ce3ee1e7SLuigi Rizzo /* find the queue from which we pick next packet. 3734ce3ee1e7SLuigi Rizzo * NM_FT_NULL is always higher than valid indexes 373585233a7dSLuigi Rizzo * so we never dereference it if the other list 3736ce3ee1e7SLuigi Rizzo * has packets (and if both are empty we never 373785233a7dSLuigi Rizzo * get here). 373885233a7dSLuigi Rizzo */ 3739f18be576SLuigi Rizzo if (next < brd_next) { 3740f18be576SLuigi Rizzo ft_p = ft + next; 3741f18be576SLuigi Rizzo next = ft_p->ft_next; 3742f18be576SLuigi Rizzo } else { /* insert broadcast */ 3743f18be576SLuigi Rizzo ft_p = ft + brd_next; 3744f18be576SLuigi Rizzo brd_next = ft_p->ft_next; 3745f18be576SLuigi Rizzo } 3746ce3ee1e7SLuigi Rizzo cnt = ft_p->ft_frags; // cnt > 0 3747ce3ee1e7SLuigi Rizzo if (unlikely(cnt > howmany)) 3748ce3ee1e7SLuigi Rizzo break; /* no more space */ 3749ce3ee1e7SLuigi Rizzo howmany -= cnt; 3750ce3ee1e7SLuigi Rizzo if (netmap_verbose && cnt > 1) 3751ce3ee1e7SLuigi Rizzo RD(5, "rx %d frags to %d", cnt, j); 3752ce3ee1e7SLuigi Rizzo ft_end = ft_p + cnt; 3753ce3ee1e7SLuigi Rizzo do { 3754ce3ee1e7SLuigi Rizzo void *dst, *src = ft_p->ft_buf; 3755ce3ee1e7SLuigi Rizzo size_t len = (ft_p->ft_len + 63) & ~63; 3756ce3ee1e7SLuigi Rizzo 3757f196ce38SLuigi Rizzo slot = &ring->slot[j]; 3758ce3ee1e7SLuigi Rizzo dst = BDG_NMB(dst_na->nm_mem, slot); 3759ce3ee1e7SLuigi Rizzo /* round to a multiple of 64 */ 3760ce3ee1e7SLuigi Rizzo 3761ce3ee1e7SLuigi Rizzo ND("send %d %d bytes at %s:%d", 3762ce3ee1e7SLuigi Rizzo i, ft_p->ft_len, dst_ifp->if_xname, j); 376385233a7dSLuigi Rizzo if (ft_p->ft_flags & NS_INDIRECT) { 3764ce3ee1e7SLuigi Rizzo if (copyin(src, dst, len)) { 3765ce3ee1e7SLuigi Rizzo // invalid user pointer, pretend len is 0 3766ce3ee1e7SLuigi Rizzo ft_p->ft_len = 0; 3767ce3ee1e7SLuigi Rizzo } 376885233a7dSLuigi Rizzo } else { 3769ce3ee1e7SLuigi Rizzo //memcpy(dst, src, len); 3770ce3ee1e7SLuigi Rizzo pkt_copy(src, dst, (int)len); 377185233a7dSLuigi Rizzo } 3772f18be576SLuigi Rizzo slot->len = ft_p->ft_len; 3773ce3ee1e7SLuigi Rizzo slot->flags = (cnt << 8)| NS_MOREFRAG; 3774ce3ee1e7SLuigi Rizzo j = nm_next(j, lim); 3775ce3ee1e7SLuigi Rizzo ft_p++; 3776f196ce38SLuigi Rizzo sent++; 3777ce3ee1e7SLuigi Rizzo } while (ft_p != ft_end); 3778ce3ee1e7SLuigi Rizzo slot->flags = (cnt << 8); /* clear flag on last entry */ 377985233a7dSLuigi Rizzo /* are we done ? */ 3780ce3ee1e7SLuigi Rizzo if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 3781f18be576SLuigi Rizzo break; 3782f196ce38SLuigi Rizzo } 3783ce3ee1e7SLuigi Rizzo { 3784ce3ee1e7SLuigi Rizzo /* current position */ 3785ce3ee1e7SLuigi Rizzo uint32_t *p = kring->nkr_leases; /* shorthand */ 3786ce3ee1e7SLuigi Rizzo uint32_t update_pos; 3787ce3ee1e7SLuigi Rizzo int still_locked = 1; 3788ce3ee1e7SLuigi Rizzo 3789ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 3790ce3ee1e7SLuigi Rizzo if (unlikely(howmany > 0)) { 3791ce3ee1e7SLuigi Rizzo /* not used all bufs. If i am the last one 3792ce3ee1e7SLuigi Rizzo * i can recover the slots, otherwise must 3793ce3ee1e7SLuigi Rizzo * fill them with 0 to mark empty packets. 3794ce3ee1e7SLuigi Rizzo */ 3795ce3ee1e7SLuigi Rizzo ND("leftover %d bufs", howmany); 3796ce3ee1e7SLuigi Rizzo if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 3797ce3ee1e7SLuigi Rizzo /* yes i am the last one */ 3798ce3ee1e7SLuigi Rizzo ND("roll back nkr_hwlease to %d", j); 3799ce3ee1e7SLuigi Rizzo kring->nkr_hwlease = j; 3800f18be576SLuigi Rizzo } else { 3801ce3ee1e7SLuigi Rizzo while (howmany-- > 0) { 3802ce3ee1e7SLuigi Rizzo ring->slot[j].len = 0; 3803ce3ee1e7SLuigi Rizzo ring->slot[j].flags = 0; 3804ce3ee1e7SLuigi Rizzo j = nm_next(j, lim); 3805ce3ee1e7SLuigi Rizzo } 3806ce3ee1e7SLuigi Rizzo } 3807ce3ee1e7SLuigi Rizzo } 3808ce3ee1e7SLuigi Rizzo p[lease_idx] = j; /* report I am done */ 3809ce3ee1e7SLuigi Rizzo 3810ce3ee1e7SLuigi Rizzo update_pos = is_vp ? nm_kr_rxpos(kring) : ring->cur; 3811ce3ee1e7SLuigi Rizzo 3812ce3ee1e7SLuigi Rizzo if (my_start == update_pos) { 3813ce3ee1e7SLuigi Rizzo /* all slots before my_start have been reported, 3814ce3ee1e7SLuigi Rizzo * so scan subsequent leases to see if other ranges 3815ce3ee1e7SLuigi Rizzo * have been completed, and to a selwakeup or txsync. 3816ce3ee1e7SLuigi Rizzo */ 3817ce3ee1e7SLuigi Rizzo while (lease_idx != kring->nkr_lease_idx && 3818ce3ee1e7SLuigi Rizzo p[lease_idx] != NR_NOSLOT) { 3819ce3ee1e7SLuigi Rizzo j = p[lease_idx]; 3820ce3ee1e7SLuigi Rizzo p[lease_idx] = NR_NOSLOT; 3821ce3ee1e7SLuigi Rizzo lease_idx = nm_next(lease_idx, lim); 3822ce3ee1e7SLuigi Rizzo } 3823ce3ee1e7SLuigi Rizzo /* j is the new 'write' position. j != my_start 3824ce3ee1e7SLuigi Rizzo * means there are new buffers to report 3825ce3ee1e7SLuigi Rizzo */ 3826ce3ee1e7SLuigi Rizzo if (likely(j != my_start)) { 3827ce3ee1e7SLuigi Rizzo if (is_vp) { 3828ce3ee1e7SLuigi Rizzo uint32_t old_avail = kring->nr_hwavail; 3829ce3ee1e7SLuigi Rizzo 3830ce3ee1e7SLuigi Rizzo kring->nr_hwavail = (j >= kring->nr_hwcur) ? 3831ce3ee1e7SLuigi Rizzo j - kring->nr_hwcur : 3832ce3ee1e7SLuigi Rizzo j + lim + 1 - kring->nr_hwcur; 3833ce3ee1e7SLuigi Rizzo if (kring->nr_hwavail < old_avail) { 3834ce3ee1e7SLuigi Rizzo D("avail shrink %d -> %d", 3835ce3ee1e7SLuigi Rizzo old_avail, kring->nr_hwavail); 3836ce3ee1e7SLuigi Rizzo } 3837ce3ee1e7SLuigi Rizzo still_locked = 0; 3838ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3839ce3ee1e7SLuigi Rizzo selwakeuppri(&kring->si, PI_NET); 3840ce3ee1e7SLuigi Rizzo } else { 3841f18be576SLuigi Rizzo ring->cur = j; 3842ce3ee1e7SLuigi Rizzo /* XXX update avail ? */ 3843ce3ee1e7SLuigi Rizzo still_locked = 0; 3844f18be576SLuigi Rizzo dst_na->nm_txsync(dst_ifp, dst_nr, 0); 3845ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3846ce3ee1e7SLuigi Rizzo 3847f18be576SLuigi Rizzo /* retry to send more packets */ 3848ce3ee1e7SLuigi Rizzo if (nma_is_hw(dst_na) && retry--) 3849f18be576SLuigi Rizzo goto retry; 3850f18be576SLuigi Rizzo } 3851f18be576SLuigi Rizzo } 3852ce3ee1e7SLuigi Rizzo } 3853ce3ee1e7SLuigi Rizzo if (still_locked) 3854ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3855ce3ee1e7SLuigi Rizzo } 3856ce3ee1e7SLuigi Rizzo cleanup: 3857ce3ee1e7SLuigi Rizzo d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 3858ce3ee1e7SLuigi Rizzo d->bq_len = 0; 3859ce3ee1e7SLuigi Rizzo } 3860ce3ee1e7SLuigi Rizzo brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 3861ce3ee1e7SLuigi Rizzo brddst->bq_len = 0; 3862f196ce38SLuigi Rizzo return 0; 3863f196ce38SLuigi Rizzo } 3864f196ce38SLuigi Rizzo 3865f18be576SLuigi Rizzo 3866f196ce38SLuigi Rizzo /* 3867ce3ee1e7SLuigi Rizzo * main dispatch routine for the bridge. 3868ce3ee1e7SLuigi Rizzo * We already know that only one thread is running this. 3869ce3ee1e7SLuigi Rizzo * we must run nm_bdg_preflush without lock. 3870f196ce38SLuigi Rizzo */ 3871f196ce38SLuigi Rizzo static int 3872ce3ee1e7SLuigi Rizzo bdg_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags) 3873f196ce38SLuigi Rizzo { 3874f196ce38SLuigi Rizzo struct netmap_adapter *na = NA(ifp); 3875f196ce38SLuigi Rizzo struct netmap_kring *kring = &na->tx_rings[ring_nr]; 3876f196ce38SLuigi Rizzo struct netmap_ring *ring = kring->ring; 3877ce3ee1e7SLuigi Rizzo u_int j, k, lim = kring->nkr_num_slots - 1; 3878f196ce38SLuigi Rizzo 3879f196ce38SLuigi Rizzo k = ring->cur; 3880f196ce38SLuigi Rizzo if (k > lim) 3881f196ce38SLuigi Rizzo return netmap_ring_reinit(kring); 3882f196ce38SLuigi Rizzo 3883ce3ee1e7SLuigi Rizzo if (bridge_batch <= 0) { /* testing only */ 3884f196ce38SLuigi Rizzo j = k; // used all 3885f196ce38SLuigi Rizzo goto done; 3886f196ce38SLuigi Rizzo } 3887ce3ee1e7SLuigi Rizzo if (bridge_batch > NM_BDG_BATCH) 3888ce3ee1e7SLuigi Rizzo bridge_batch = NM_BDG_BATCH; 3889f196ce38SLuigi Rizzo 3890f18be576SLuigi Rizzo j = nm_bdg_preflush(na, ring_nr, kring, k); 3891f196ce38SLuigi Rizzo if (j != k) 3892f196ce38SLuigi Rizzo D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail); 3893ce3ee1e7SLuigi Rizzo /* k-j modulo ring size is the number of slots processed */ 3894ce3ee1e7SLuigi Rizzo if (k < j) 3895ce3ee1e7SLuigi Rizzo k += kring->nkr_num_slots; 3896ce3ee1e7SLuigi Rizzo kring->nr_hwavail = lim - (k - j); 3897f196ce38SLuigi Rizzo 3898f196ce38SLuigi Rizzo done: 3899f196ce38SLuigi Rizzo kring->nr_hwcur = j; 3900f196ce38SLuigi Rizzo ring->avail = kring->nr_hwavail; 3901f196ce38SLuigi Rizzo if (netmap_verbose) 3902ce3ee1e7SLuigi Rizzo D("%s ring %d flags %d", ifp->if_xname, ring_nr, flags); 3903f196ce38SLuigi Rizzo return 0; 3904f196ce38SLuigi Rizzo } 3905f196ce38SLuigi Rizzo 3906f18be576SLuigi Rizzo 3907ce3ee1e7SLuigi Rizzo /* 3908ce3ee1e7SLuigi Rizzo * user process reading from a VALE switch. 3909ce3ee1e7SLuigi Rizzo * Already protected against concurrent calls from userspace, 3910ce3ee1e7SLuigi Rizzo * but we must acquire the queue's lock to protect against 3911ce3ee1e7SLuigi Rizzo * writers on the same queue. 3912ce3ee1e7SLuigi Rizzo */ 3913f196ce38SLuigi Rizzo static int 3914ce3ee1e7SLuigi Rizzo bdg_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags) 3915f196ce38SLuigi Rizzo { 3916f196ce38SLuigi Rizzo struct netmap_adapter *na = NA(ifp); 3917f196ce38SLuigi Rizzo struct netmap_kring *kring = &na->rx_rings[ring_nr]; 3918f196ce38SLuigi Rizzo struct netmap_ring *ring = kring->ring; 3919f18be576SLuigi Rizzo u_int j, lim = kring->nkr_num_slots - 1; 3920f196ce38SLuigi Rizzo u_int k = ring->cur, resvd = ring->reserved; 3921f18be576SLuigi Rizzo int n; 3922f196ce38SLuigi Rizzo 3923ce3ee1e7SLuigi Rizzo mtx_lock(&kring->q_lock); 3924ce3ee1e7SLuigi Rizzo if (k > lim) { 3925ce3ee1e7SLuigi Rizzo D("ouch dangerous reset!!!"); 3926ce3ee1e7SLuigi Rizzo n = netmap_ring_reinit(kring); 3927ce3ee1e7SLuigi Rizzo goto done; 3928ce3ee1e7SLuigi Rizzo } 3929f196ce38SLuigi Rizzo 3930f196ce38SLuigi Rizzo /* skip past packets that userspace has released */ 3931f196ce38SLuigi Rizzo j = kring->nr_hwcur; /* netmap ring index */ 3932f196ce38SLuigi Rizzo if (resvd > 0) { 3933f196ce38SLuigi Rizzo if (resvd + ring->avail >= lim + 1) { 3934f196ce38SLuigi Rizzo D("XXX invalid reserve/avail %d %d", resvd, ring->avail); 3935f196ce38SLuigi Rizzo ring->reserved = resvd = 0; // XXX panic... 3936f196ce38SLuigi Rizzo } 3937f196ce38SLuigi Rizzo k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd; 3938f196ce38SLuigi Rizzo } 3939f196ce38SLuigi Rizzo 3940f196ce38SLuigi Rizzo if (j != k) { /* userspace has released some packets. */ 3941f196ce38SLuigi Rizzo n = k - j; 3942f196ce38SLuigi Rizzo if (n < 0) 3943f196ce38SLuigi Rizzo n += kring->nkr_num_slots; 3944f196ce38SLuigi Rizzo ND("userspace releases %d packets", n); 3945f196ce38SLuigi Rizzo for (n = 0; likely(j != k); n++) { 3946f196ce38SLuigi Rizzo struct netmap_slot *slot = &ring->slot[j]; 3947ce3ee1e7SLuigi Rizzo void *addr = BDG_NMB(na->nm_mem, slot); 3948f196ce38SLuigi Rizzo 3949f196ce38SLuigi Rizzo if (addr == netmap_buffer_base) { /* bad buf */ 3950ce3ee1e7SLuigi Rizzo D("bad buffer index %d, ignore ?", 3951ce3ee1e7SLuigi Rizzo slot->buf_idx); 3952f196ce38SLuigi Rizzo } 3953f196ce38SLuigi Rizzo slot->flags &= ~NS_BUF_CHANGED; 3954ce3ee1e7SLuigi Rizzo j = nm_next(j, lim); 3955f196ce38SLuigi Rizzo } 3956f196ce38SLuigi Rizzo kring->nr_hwavail -= n; 3957f196ce38SLuigi Rizzo kring->nr_hwcur = k; 3958f196ce38SLuigi Rizzo } 3959f196ce38SLuigi Rizzo /* tell userspace that there are new packets */ 3960f196ce38SLuigi Rizzo ring->avail = kring->nr_hwavail - resvd; 3961ce3ee1e7SLuigi Rizzo n = 0; 3962ce3ee1e7SLuigi Rizzo done: 3963ce3ee1e7SLuigi Rizzo mtx_unlock(&kring->q_lock); 3964ce3ee1e7SLuigi Rizzo return n; 3965f196ce38SLuigi Rizzo } 3966f196ce38SLuigi Rizzo 3967f18be576SLuigi Rizzo 39685ab0d24dSLuigi Rizzo static int 3969f18be576SLuigi Rizzo bdg_netmap_attach(struct netmap_adapter *arg) 3970f196ce38SLuigi Rizzo { 3971f196ce38SLuigi Rizzo struct netmap_adapter na; 3972f196ce38SLuigi Rizzo 3973f196ce38SLuigi Rizzo ND("attaching virtual bridge"); 3974f196ce38SLuigi Rizzo bzero(&na, sizeof(na)); 3975f196ce38SLuigi Rizzo 3976f18be576SLuigi Rizzo na.ifp = arg->ifp; 3977ce3ee1e7SLuigi Rizzo na.na_flags = NAF_BDG_MAYSLEEP | NAF_MEM_OWNER; 3978f18be576SLuigi Rizzo na.num_tx_rings = arg->num_tx_rings; 3979f18be576SLuigi Rizzo na.num_rx_rings = arg->num_rx_rings; 3980ce3ee1e7SLuigi Rizzo na.num_tx_desc = arg->num_tx_desc; 3981ce3ee1e7SLuigi Rizzo na.num_rx_desc = arg->num_rx_desc; 3982f196ce38SLuigi Rizzo na.nm_txsync = bdg_netmap_txsync; 3983f196ce38SLuigi Rizzo na.nm_rxsync = bdg_netmap_rxsync; 3984f196ce38SLuigi Rizzo na.nm_register = bdg_netmap_reg; 3985ce3ee1e7SLuigi Rizzo na.nm_mem = netmap_mem_private_new(arg->ifp->if_xname, 3986ce3ee1e7SLuigi Rizzo na.num_tx_rings, na.num_tx_desc, 3987ce3ee1e7SLuigi Rizzo na.num_rx_rings, na.num_rx_desc); 39885ab0d24dSLuigi Rizzo return netmap_attach(&na, na.num_tx_rings); 3989f196ce38SLuigi Rizzo } 3990f196ce38SLuigi Rizzo 3991babc7c12SLuigi Rizzo 3992babc7c12SLuigi Rizzo static struct cdev *netmap_dev; /* /dev/netmap character device. */ 3993babc7c12SLuigi Rizzo 3994babc7c12SLuigi Rizzo 39951a26580eSLuigi Rizzo /* 399668b8534bSLuigi Rizzo * Module loader. 399768b8534bSLuigi Rizzo * 399868b8534bSLuigi Rizzo * Create the /dev/netmap device and initialize all global 399968b8534bSLuigi Rizzo * variables. 400068b8534bSLuigi Rizzo * 400168b8534bSLuigi Rizzo * Return 0 on success, errno on failure. 400268b8534bSLuigi Rizzo */ 400368b8534bSLuigi Rizzo static int 400468b8534bSLuigi Rizzo netmap_init(void) 400568b8534bSLuigi Rizzo { 4006ce3ee1e7SLuigi Rizzo int i, error; 400768b8534bSLuigi Rizzo 4008ce3ee1e7SLuigi Rizzo NMG_LOCK_INIT(); 4009ce3ee1e7SLuigi Rizzo 4010ce3ee1e7SLuigi Rizzo error = netmap_mem_init(); 401168b8534bSLuigi Rizzo if (error != 0) { 401242a3a5bdSLuigi Rizzo printf("netmap: unable to initialize the memory allocator.\n"); 401368b8534bSLuigi Rizzo return (error); 401468b8534bSLuigi Rizzo } 40158241616dSLuigi Rizzo printf("netmap: loaded module\n"); 401668b8534bSLuigi Rizzo netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660, 401768b8534bSLuigi Rizzo "netmap"); 4018f196ce38SLuigi Rizzo 4019f18be576SLuigi Rizzo bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */ 4020f196ce38SLuigi Rizzo for (i = 0; i < NM_BRIDGES; i++) 4021ce3ee1e7SLuigi Rizzo BDG_RWINIT(&nm_bridges[i]); 4022babc7c12SLuigi Rizzo return (error); 402368b8534bSLuigi Rizzo } 402468b8534bSLuigi Rizzo 402568b8534bSLuigi Rizzo 402668b8534bSLuigi Rizzo /* 402768b8534bSLuigi Rizzo * Module unloader. 402868b8534bSLuigi Rizzo * 402968b8534bSLuigi Rizzo * Free all the memory, and destroy the ``/dev/netmap`` device. 403068b8534bSLuigi Rizzo */ 403168b8534bSLuigi Rizzo static void 403268b8534bSLuigi Rizzo netmap_fini(void) 403368b8534bSLuigi Rizzo { 403468b8534bSLuigi Rizzo destroy_dev(netmap_dev); 4035ce3ee1e7SLuigi Rizzo netmap_mem_fini(); 4036ce3ee1e7SLuigi Rizzo NMG_LOCK_DESTROY(); 403768b8534bSLuigi Rizzo printf("netmap: unloaded module.\n"); 403868b8534bSLuigi Rizzo } 403968b8534bSLuigi Rizzo 404068b8534bSLuigi Rizzo 4041f196ce38SLuigi Rizzo #ifdef __FreeBSD__ 404268b8534bSLuigi Rizzo /* 404368b8534bSLuigi Rizzo * Kernel entry point. 404468b8534bSLuigi Rizzo * 404568b8534bSLuigi Rizzo * Initialize/finalize the module and return. 404668b8534bSLuigi Rizzo * 404768b8534bSLuigi Rizzo * Return 0 on success, errno on failure. 404868b8534bSLuigi Rizzo */ 404968b8534bSLuigi Rizzo static int 405068b8534bSLuigi Rizzo netmap_loader(__unused struct module *module, int event, __unused void *arg) 405168b8534bSLuigi Rizzo { 405268b8534bSLuigi Rizzo int error = 0; 405368b8534bSLuigi Rizzo 405468b8534bSLuigi Rizzo switch (event) { 405568b8534bSLuigi Rizzo case MOD_LOAD: 405668b8534bSLuigi Rizzo error = netmap_init(); 405768b8534bSLuigi Rizzo break; 405868b8534bSLuigi Rizzo 405968b8534bSLuigi Rizzo case MOD_UNLOAD: 406068b8534bSLuigi Rizzo netmap_fini(); 406168b8534bSLuigi Rizzo break; 406268b8534bSLuigi Rizzo 406368b8534bSLuigi Rizzo default: 406468b8534bSLuigi Rizzo error = EOPNOTSUPP; 406568b8534bSLuigi Rizzo break; 406668b8534bSLuigi Rizzo } 406768b8534bSLuigi Rizzo 406868b8534bSLuigi Rizzo return (error); 406968b8534bSLuigi Rizzo } 407068b8534bSLuigi Rizzo 407168b8534bSLuigi Rizzo 407268b8534bSLuigi Rizzo DEV_MODULE(netmap, netmap_loader, NULL); 4073f196ce38SLuigi Rizzo #endif /* __FreeBSD__ */ 4074