xref: /freebsd-14.2/sys/dev/netmap/netmap.c (revision 5864b3a5)
168b8534bSLuigi Rizzo /*
2849bec0eSLuigi Rizzo  * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
368b8534bSLuigi Rizzo  *
468b8534bSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
568b8534bSLuigi Rizzo  * modification, are permitted provided that the following conditions
668b8534bSLuigi Rizzo  * are met:
768b8534bSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
868b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
968b8534bSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
1068b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
1168b8534bSLuigi Rizzo  *    documentation and/or other materials provided with the distribution.
1268b8534bSLuigi Rizzo  *
1368b8534bSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1468b8534bSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1568b8534bSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1668b8534bSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1768b8534bSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1868b8534bSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1968b8534bSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2068b8534bSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2168b8534bSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2268b8534bSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2368b8534bSLuigi Rizzo  * SUCH DAMAGE.
2468b8534bSLuigi Rizzo  */
2568b8534bSLuigi Rizzo 
26ce3ee1e7SLuigi Rizzo 
2768b8534bSLuigi Rizzo /*
2868b8534bSLuigi Rizzo  * This module supports memory mapped access to network devices,
2968b8534bSLuigi Rizzo  * see netmap(4).
3068b8534bSLuigi Rizzo  *
3168b8534bSLuigi Rizzo  * The module uses a large, memory pool allocated by the kernel
3268b8534bSLuigi Rizzo  * and accessible as mmapped memory by multiple userspace threads/processes.
3368b8534bSLuigi Rizzo  * The memory pool contains packet buffers and "netmap rings",
3468b8534bSLuigi Rizzo  * i.e. user-accessible copies of the interface's queues.
3568b8534bSLuigi Rizzo  *
3668b8534bSLuigi Rizzo  * Access to the network card works like this:
3768b8534bSLuigi Rizzo  * 1. a process/thread issues one or more open() on /dev/netmap, to create
3868b8534bSLuigi Rizzo  *    select()able file descriptor on which events are reported.
3968b8534bSLuigi Rizzo  * 2. on each descriptor, the process issues an ioctl() to identify
4068b8534bSLuigi Rizzo  *    the interface that should report events to the file descriptor.
4168b8534bSLuigi Rizzo  * 3. on each descriptor, the process issues an mmap() request to
4268b8534bSLuigi Rizzo  *    map the shared memory region within the process' address space.
4368b8534bSLuigi Rizzo  *    The list of interesting queues is indicated by a location in
4468b8534bSLuigi Rizzo  *    the shared memory region.
4568b8534bSLuigi Rizzo  * 4. using the functions in the netmap(4) userspace API, a process
4668b8534bSLuigi Rizzo  *    can look up the occupation state of a queue, access memory buffers,
4768b8534bSLuigi Rizzo  *    and retrieve received packets or enqueue packets to transmit.
4868b8534bSLuigi Rizzo  * 5. using some ioctl()s the process can synchronize the userspace view
4968b8534bSLuigi Rizzo  *    of the queue with the actual status in the kernel. This includes both
5068b8534bSLuigi Rizzo  *    receiving the notification of new packets, and transmitting new
5168b8534bSLuigi Rizzo  *    packets on the output interface.
5268b8534bSLuigi Rizzo  * 6. select() or poll() can be used to wait for events on individual
5368b8534bSLuigi Rizzo  *    transmit or receive queues (or all queues for a given interface).
54ce3ee1e7SLuigi Rizzo  *
55ce3ee1e7SLuigi Rizzo 
56ce3ee1e7SLuigi Rizzo 		SYNCHRONIZATION (USER)
57ce3ee1e7SLuigi Rizzo 
58ce3ee1e7SLuigi Rizzo The netmap rings and data structures may be shared among multiple
59ce3ee1e7SLuigi Rizzo user threads or even independent processes.
60ce3ee1e7SLuigi Rizzo Any synchronization among those threads/processes is delegated
61ce3ee1e7SLuigi Rizzo to the threads themselves. Only one thread at a time can be in
62ce3ee1e7SLuigi Rizzo a system call on the same netmap ring. The OS does not enforce
63ce3ee1e7SLuigi Rizzo this and only guarantees against system crashes in case of
64ce3ee1e7SLuigi Rizzo invalid usage.
65ce3ee1e7SLuigi Rizzo 
66ce3ee1e7SLuigi Rizzo 		LOCKING (INTERNAL)
67ce3ee1e7SLuigi Rizzo 
68ce3ee1e7SLuigi Rizzo Within the kernel, access to the netmap rings is protected as follows:
69ce3ee1e7SLuigi Rizzo 
70ce3ee1e7SLuigi Rizzo - a spinlock on each ring, to handle producer/consumer races on
71ce3ee1e7SLuigi Rizzo   RX rings attached to the host stack (against multiple host
72ce3ee1e7SLuigi Rizzo   threads writing from the host stack to the same ring),
73ce3ee1e7SLuigi Rizzo   and on 'destination' rings attached to a VALE switch
74ce3ee1e7SLuigi Rizzo   (i.e. RX rings in VALE ports, and TX rings in NIC/host ports)
75ce3ee1e7SLuigi Rizzo   protecting multiple active senders for the same destination)
76ce3ee1e7SLuigi Rizzo 
77ce3ee1e7SLuigi Rizzo - an atomic variable to guarantee that there is at most one
78ce3ee1e7SLuigi Rizzo   instance of *_*xsync() on the ring at any time.
79ce3ee1e7SLuigi Rizzo   For rings connected to user file
80ce3ee1e7SLuigi Rizzo   descriptors, an atomic_test_and_set() protects this, and the
81ce3ee1e7SLuigi Rizzo   lock on the ring is not actually used.
82ce3ee1e7SLuigi Rizzo   For NIC RX rings connected to a VALE switch, an atomic_test_and_set()
83ce3ee1e7SLuigi Rizzo   is also used to prevent multiple executions (the driver might indeed
84ce3ee1e7SLuigi Rizzo   already guarantee this).
85ce3ee1e7SLuigi Rizzo   For NIC TX rings connected to a VALE switch, the lock arbitrates
86ce3ee1e7SLuigi Rizzo   access to the queue (both when allocating buffers and when pushing
87ce3ee1e7SLuigi Rizzo   them out).
88ce3ee1e7SLuigi Rizzo 
89ce3ee1e7SLuigi Rizzo - *xsync() should be protected against initializations of the card.
90ce3ee1e7SLuigi Rizzo   On FreeBSD most devices have the reset routine protected by
91ce3ee1e7SLuigi Rizzo   a RING lock (ixgbe, igb, em) or core lock (re). lem is missing
92ce3ee1e7SLuigi Rizzo   the RING protection on rx_reset(), this should be added.
93ce3ee1e7SLuigi Rizzo 
94ce3ee1e7SLuigi Rizzo   On linux there is an external lock on the tx path, which probably
95ce3ee1e7SLuigi Rizzo   also arbitrates access to the reset routine. XXX to be revised
96ce3ee1e7SLuigi Rizzo 
97ce3ee1e7SLuigi Rizzo - a per-interface core_lock protecting access from the host stack
98ce3ee1e7SLuigi Rizzo   while interfaces may be detached from netmap mode.
99ce3ee1e7SLuigi Rizzo   XXX there should be no need for this lock if we detach the interfaces
100ce3ee1e7SLuigi Rizzo   only while they are down.
101ce3ee1e7SLuigi Rizzo 
102ce3ee1e7SLuigi Rizzo 
103ce3ee1e7SLuigi Rizzo --- VALE SWITCH ---
104ce3ee1e7SLuigi Rizzo 
105ce3ee1e7SLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
106ce3ee1e7SLuigi Rizzo A switch cannot be deleted until all ports are gone.
107ce3ee1e7SLuigi Rizzo 
108ce3ee1e7SLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
109ce3ee1e7SLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
110ce3ee1e7SLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
111ce3ee1e7SLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
112ce3ee1e7SLuigi Rizzo The lock is held throughout the entire forwarding cycle,
113ce3ee1e7SLuigi Rizzo during which the thread may incur in a page fault.
114ce3ee1e7SLuigi Rizzo Hence it is important that sleepable shared locks are used.
115ce3ee1e7SLuigi Rizzo 
116ce3ee1e7SLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
117ce3ee1e7SLuigi Rizzo a number of slot in the ring, then the lock is released,
118ce3ee1e7SLuigi Rizzo packets are copied from source to destination, and then
119ce3ee1e7SLuigi Rizzo the lock is acquired again and the receive ring is updated.
120ce3ee1e7SLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
121ce3ee1e7SLuigi Rizzo ports attached to the switch)
122ce3ee1e7SLuigi Rizzo 
12368b8534bSLuigi Rizzo  */
12468b8534bSLuigi Rizzo 
125ce3ee1e7SLuigi Rizzo /*
126ce3ee1e7SLuigi Rizzo  * OS-specific code that is used only within this file.
127ce3ee1e7SLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
128ce3ee1e7SLuigi Rizzo  * is present in netmap_kern.h
129ce3ee1e7SLuigi Rizzo  */
13001c7d25fSLuigi Rizzo 
131ce3ee1e7SLuigi Rizzo #if defined(__FreeBSD__)
13268b8534bSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
13368b8534bSLuigi Rizzo __FBSDID("$FreeBSD$");
13468b8534bSLuigi Rizzo 
13568b8534bSLuigi Rizzo #include <sys/types.h>
13668b8534bSLuigi Rizzo #include <sys/module.h>
13768b8534bSLuigi Rizzo #include <sys/errno.h>
13868b8534bSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
139506cc70cSLuigi Rizzo #include <sys/jail.h>
14068b8534bSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
14168b8534bSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct */
14268b8534bSLuigi Rizzo #include <sys/uio.h>	/* uio struct */
14368b8534bSLuigi Rizzo #include <sys/sockio.h>
14468b8534bSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
14568b8534bSLuigi Rizzo #include <sys/malloc.h>
14668b8534bSLuigi Rizzo #include <sys/mman.h>	/* PROT_EXEC */
14768b8534bSLuigi Rizzo #include <sys/poll.h>
148506cc70cSLuigi Rizzo #include <sys/proc.h>
14989f6b863SAttilio Rao #include <sys/rwlock.h>
15068b8534bSLuigi Rizzo #include <vm/vm.h>	/* vtophys */
15168b8534bSLuigi Rizzo #include <vm/pmap.h>	/* vtophys */
152ce3ee1e7SLuigi Rizzo #include <vm/vm_param.h>
153ce3ee1e7SLuigi Rizzo #include <vm/vm_object.h>
154ce3ee1e7SLuigi Rizzo #include <vm/vm_page.h>
155ce3ee1e7SLuigi Rizzo #include <vm/vm_pager.h>
156ce3ee1e7SLuigi Rizzo #include <vm/uma.h>
15768b8534bSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
15868b8534bSLuigi Rizzo #include <sys/selinfo.h>
15968b8534bSLuigi Rizzo #include <sys/sysctl.h>
16068b8534bSLuigi Rizzo #include <net/if.h>
16176039bc8SGleb Smirnoff #include <net/if_var.h>
16268b8534bSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
163506cc70cSLuigi Rizzo #include <net/vnet.h>
16468b8534bSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
165ce3ee1e7SLuigi Rizzo #include <sys/endian.h>
166ce3ee1e7SLuigi Rizzo #include <sys/refcount.h>
16768b8534bSLuigi Rizzo 
168ce3ee1e7SLuigi Rizzo #define prefetch(x)	__builtin_prefetch(x)
16968b8534bSLuigi Rizzo 
170ce3ee1e7SLuigi Rizzo #define BDG_RWLOCK_T		struct rwlock // struct rwlock
171ce3ee1e7SLuigi Rizzo 
172ce3ee1e7SLuigi Rizzo #define	BDG_RWINIT(b)		\
173ce3ee1e7SLuigi Rizzo 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
174ce3ee1e7SLuigi Rizzo #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
175ce3ee1e7SLuigi Rizzo #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
176ce3ee1e7SLuigi Rizzo #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
177ce3ee1e7SLuigi Rizzo #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
178ce3ee1e7SLuigi Rizzo #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
179ce3ee1e7SLuigi Rizzo #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
180ce3ee1e7SLuigi Rizzo 
181ce3ee1e7SLuigi Rizzo 
182ce3ee1e7SLuigi Rizzo /* netmap global lock.
183ce3ee1e7SLuigi Rizzo  * normally called within the user thread (upon a system call)
184ce3ee1e7SLuigi Rizzo  * or when a file descriptor or process is terminated
185ce3ee1e7SLuigi Rizzo  * (last close or last munmap)
186ce3ee1e7SLuigi Rizzo  */
187ce3ee1e7SLuigi Rizzo 
188ce3ee1e7SLuigi Rizzo #define NMG_LOCK_T		struct mtx
189ce3ee1e7SLuigi Rizzo #define NMG_LOCK_INIT()		mtx_init(&netmap_global_lock, "netmap global lock", NULL, MTX_DEF)
190ce3ee1e7SLuigi Rizzo #define NMG_LOCK_DESTROY()	mtx_destroy(&netmap_global_lock)
191ce3ee1e7SLuigi Rizzo #define NMG_LOCK()		mtx_lock(&netmap_global_lock)
192ce3ee1e7SLuigi Rizzo #define NMG_UNLOCK()		mtx_unlock(&netmap_global_lock)
193ce3ee1e7SLuigi Rizzo #define NMG_LOCK_ASSERT()	mtx_assert(&netmap_global_lock, MA_OWNED)
194ce3ee1e7SLuigi Rizzo 
195ce3ee1e7SLuigi Rizzo 
196ce3ee1e7SLuigi Rizzo /* atomic operations */
197ce3ee1e7SLuigi Rizzo #include <machine/atomic.h>
198ce3ee1e7SLuigi Rizzo #define NM_ATOMIC_TEST_AND_SET(p)	(!atomic_cmpset_acq_int((p), 0, 1))
199ce3ee1e7SLuigi Rizzo #define NM_ATOMIC_CLEAR(p)		atomic_store_rel_int((p), 0)
200ce3ee1e7SLuigi Rizzo 
201ce3ee1e7SLuigi Rizzo 
202ce3ee1e7SLuigi Rizzo #elif defined(linux)
203ce3ee1e7SLuigi Rizzo 
204ce3ee1e7SLuigi Rizzo #include "bsd_glue.h"
205ce3ee1e7SLuigi Rizzo 
206ce3ee1e7SLuigi Rizzo static netdev_tx_t linux_netmap_start_xmit(struct sk_buff *, struct net_device *);
207ce3ee1e7SLuigi Rizzo 
208ce3ee1e7SLuigi Rizzo static struct device_driver*
209ce3ee1e7SLuigi Rizzo linux_netmap_find_driver(struct device *dev)
210ce3ee1e7SLuigi Rizzo {
211ce3ee1e7SLuigi Rizzo 	struct device_driver *dd;
212ce3ee1e7SLuigi Rizzo 
213ce3ee1e7SLuigi Rizzo 	while ( (dd = dev->driver) == NULL ) {
214ce3ee1e7SLuigi Rizzo 		if ( (dev = dev->parent) == NULL )
215ce3ee1e7SLuigi Rizzo 			return NULL;
216ce3ee1e7SLuigi Rizzo 	}
217ce3ee1e7SLuigi Rizzo 	return dd;
218ce3ee1e7SLuigi Rizzo }
219ce3ee1e7SLuigi Rizzo 
220ce3ee1e7SLuigi Rizzo static struct net_device*
221ce3ee1e7SLuigi Rizzo ifunit_ref(const char *name)
222ce3ee1e7SLuigi Rizzo {
223ce3ee1e7SLuigi Rizzo 	struct net_device *ifp = dev_get_by_name(&init_net, name);
224ce3ee1e7SLuigi Rizzo 	struct device_driver *dd;
225ce3ee1e7SLuigi Rizzo 
226ce3ee1e7SLuigi Rizzo 	if (ifp == NULL)
227ce3ee1e7SLuigi Rizzo 		return NULL;
228ce3ee1e7SLuigi Rizzo 
229ce3ee1e7SLuigi Rizzo 	if ( (dd = linux_netmap_find_driver(&ifp->dev)) == NULL )
230ce3ee1e7SLuigi Rizzo 		goto error;
231ce3ee1e7SLuigi Rizzo 
232ce3ee1e7SLuigi Rizzo 	if (!try_module_get(dd->owner))
233ce3ee1e7SLuigi Rizzo 		goto error;
234ce3ee1e7SLuigi Rizzo 
235ce3ee1e7SLuigi Rizzo 	return ifp;
236ce3ee1e7SLuigi Rizzo error:
237ce3ee1e7SLuigi Rizzo 	dev_put(ifp);
238ce3ee1e7SLuigi Rizzo 	return NULL;
239ce3ee1e7SLuigi Rizzo }
240ce3ee1e7SLuigi Rizzo 
241ce3ee1e7SLuigi Rizzo static void
242ce3ee1e7SLuigi Rizzo if_rele(struct net_device *ifp)
243ce3ee1e7SLuigi Rizzo {
244ce3ee1e7SLuigi Rizzo 	struct device_driver *dd;
245ce3ee1e7SLuigi Rizzo 	dd = linux_netmap_find_driver(&ifp->dev);
246ce3ee1e7SLuigi Rizzo 	dev_put(ifp);
247ce3ee1e7SLuigi Rizzo 	if (dd)
248ce3ee1e7SLuigi Rizzo 		module_put(dd->owner);
249ce3ee1e7SLuigi Rizzo }
250ce3ee1e7SLuigi Rizzo 
251ce3ee1e7SLuigi Rizzo // XXX a mtx would suffice here too 20130404 gl
252ce3ee1e7SLuigi Rizzo #define NMG_LOCK_T		struct semaphore
253ce3ee1e7SLuigi Rizzo #define NMG_LOCK_INIT()		sema_init(&netmap_global_lock, 1)
254ce3ee1e7SLuigi Rizzo #define NMG_LOCK_DESTROY()
255ce3ee1e7SLuigi Rizzo #define NMG_LOCK()		down(&netmap_global_lock)
256ce3ee1e7SLuigi Rizzo #define NMG_UNLOCK()		up(&netmap_global_lock)
257ce3ee1e7SLuigi Rizzo #define NMG_LOCK_ASSERT()	//	XXX to be completed
258ce3ee1e7SLuigi Rizzo 
259ce3ee1e7SLuigi Rizzo 
260ce3ee1e7SLuigi Rizzo #elif defined(__APPLE__)
261ce3ee1e7SLuigi Rizzo 
262ce3ee1e7SLuigi Rizzo #warning OSX support is only partial
263ce3ee1e7SLuigi Rizzo #include "osx_glue.h"
264ce3ee1e7SLuigi Rizzo 
265ce3ee1e7SLuigi Rizzo #else
266ce3ee1e7SLuigi Rizzo 
267ce3ee1e7SLuigi Rizzo #error	Unsupported platform
268ce3ee1e7SLuigi Rizzo 
269ce3ee1e7SLuigi Rizzo #endif /* unsupported */
270ce3ee1e7SLuigi Rizzo 
271ce3ee1e7SLuigi Rizzo /*
272ce3ee1e7SLuigi Rizzo  * common headers
273ce3ee1e7SLuigi Rizzo  */
2740b8ed8e0SLuigi Rizzo #include <net/netmap.h>
2750b8ed8e0SLuigi Rizzo #include <dev/netmap/netmap_kern.h>
276ce3ee1e7SLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
2770b8ed8e0SLuigi Rizzo 
278ce3ee1e7SLuigi Rizzo 
279ce3ee1e7SLuigi Rizzo MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
280ce3ee1e7SLuigi Rizzo 
281ce3ee1e7SLuigi Rizzo /*
282ce3ee1e7SLuigi Rizzo  * The following variables are used by the drivers and replicate
283ce3ee1e7SLuigi Rizzo  * fields in the global memory pool. They only refer to buffers
284ce3ee1e7SLuigi Rizzo  * used by physical interfaces.
285ce3ee1e7SLuigi Rizzo  */
2865819da83SLuigi Rizzo u_int netmap_total_buffers;
2878241616dSLuigi Rizzo u_int netmap_buf_size;
288ce3ee1e7SLuigi Rizzo char *netmap_buffer_base;	/* also address of an invalid buffer */
2895819da83SLuigi Rizzo 
2905819da83SLuigi Rizzo /* user-controlled variables */
2915819da83SLuigi Rizzo int netmap_verbose;
2925819da83SLuigi Rizzo 
2935819da83SLuigi Rizzo static int netmap_no_timestamp; /* don't timestamp on rxsync */
2945819da83SLuigi Rizzo 
2955819da83SLuigi Rizzo SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
2965819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
2975819da83SLuigi Rizzo     CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
2985819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
2995819da83SLuigi Rizzo     CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
3005819da83SLuigi Rizzo int netmap_mitigate = 1;
3015819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
302c85cb1a0SLuigi Rizzo int netmap_no_pendintr = 1;
3035819da83SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
3045819da83SLuigi Rizzo     CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
305f18be576SLuigi Rizzo int netmap_txsync_retry = 2;
306f18be576SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
307f18be576SLuigi Rizzo     &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
3085819da83SLuigi Rizzo 
309f196ce38SLuigi Rizzo int netmap_drop = 0;	/* debugging */
310f196ce38SLuigi Rizzo int netmap_flags = 0;	/* debug flags */
311091fd0abSLuigi Rizzo int netmap_fwd = 0;	/* force transparent mode */
312ce3ee1e7SLuigi Rizzo int netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */
313f196ce38SLuigi Rizzo 
314f196ce38SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , "");
315f196ce38SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
316091fd0abSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
317ce3ee1e7SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, "");
318f196ce38SLuigi Rizzo 
319ce3ee1e7SLuigi Rizzo NMG_LOCK_T	netmap_global_lock;
320ce3ee1e7SLuigi Rizzo 
321ce3ee1e7SLuigi Rizzo /*
322ce3ee1e7SLuigi Rizzo  * protect against multiple threads using the same ring.
323ce3ee1e7SLuigi Rizzo  * also check that the ring has not been stopped.
324ce3ee1e7SLuigi Rizzo  */
325ce3ee1e7SLuigi Rizzo #define NM_KR_BUSY	1
326ce3ee1e7SLuigi Rizzo #define NM_KR_STOPPED	2
327ce3ee1e7SLuigi Rizzo static void nm_kr_put(struct netmap_kring *kr);
328ce3ee1e7SLuigi Rizzo static __inline int nm_kr_tryget(struct netmap_kring *kr)
329ce3ee1e7SLuigi Rizzo {
330ce3ee1e7SLuigi Rizzo 	/* check a first time without taking the lock
331ce3ee1e7SLuigi Rizzo 	 * to avoid starvation for nm_kr_get()
332ce3ee1e7SLuigi Rizzo 	 */
333ce3ee1e7SLuigi Rizzo 	if (unlikely(kr->nkr_stopped)) {
334ce3ee1e7SLuigi Rizzo 		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
335ce3ee1e7SLuigi Rizzo 		return NM_KR_STOPPED;
336ce3ee1e7SLuigi Rizzo 	}
337ce3ee1e7SLuigi Rizzo 	if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)))
338ce3ee1e7SLuigi Rizzo 		return NM_KR_BUSY;
339ce3ee1e7SLuigi Rizzo 	/* check a second time with lock held */
340ce3ee1e7SLuigi Rizzo 	if (unlikely(kr->nkr_stopped)) {
341ce3ee1e7SLuigi Rizzo 		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
342ce3ee1e7SLuigi Rizzo 		nm_kr_put(kr);
343ce3ee1e7SLuigi Rizzo 		return NM_KR_STOPPED;
344ce3ee1e7SLuigi Rizzo 	}
345ce3ee1e7SLuigi Rizzo 	return 0;
346ce3ee1e7SLuigi Rizzo }
347ce3ee1e7SLuigi Rizzo 
348ce3ee1e7SLuigi Rizzo static __inline void nm_kr_put(struct netmap_kring *kr)
349ce3ee1e7SLuigi Rizzo {
350ce3ee1e7SLuigi Rizzo 	NM_ATOMIC_CLEAR(&kr->nr_busy);
351ce3ee1e7SLuigi Rizzo }
352ce3ee1e7SLuigi Rizzo 
353ce3ee1e7SLuigi Rizzo static void nm_kr_get(struct netmap_kring *kr)
354ce3ee1e7SLuigi Rizzo {
355ce3ee1e7SLuigi Rizzo 	while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
356ce3ee1e7SLuigi Rizzo 		tsleep(kr, 0, "NM_KR_GET", 4);
357ce3ee1e7SLuigi Rizzo }
358ce3ee1e7SLuigi Rizzo 
359ce3ee1e7SLuigi Rizzo static void nm_disable_ring(struct netmap_kring *kr)
360ce3ee1e7SLuigi Rizzo {
361ce3ee1e7SLuigi Rizzo 	kr->nkr_stopped = 1;
362ce3ee1e7SLuigi Rizzo 	nm_kr_get(kr);
363ce3ee1e7SLuigi Rizzo 	mtx_lock(&kr->q_lock);
364ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kr->q_lock);
365ce3ee1e7SLuigi Rizzo 	nm_kr_put(kr);
366ce3ee1e7SLuigi Rizzo }
367ce3ee1e7SLuigi Rizzo 
368ce3ee1e7SLuigi Rizzo void netmap_disable_all_rings(struct ifnet *ifp)
369ce3ee1e7SLuigi Rizzo {
370ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na;
371ce3ee1e7SLuigi Rizzo 	int i;
372ce3ee1e7SLuigi Rizzo 
373ce3ee1e7SLuigi Rizzo 	if (!(ifp->if_capenable & IFCAP_NETMAP))
374ce3ee1e7SLuigi Rizzo 		return;
375ce3ee1e7SLuigi Rizzo 
376ce3ee1e7SLuigi Rizzo 	na = NA(ifp);
377ce3ee1e7SLuigi Rizzo 
378ce3ee1e7SLuigi Rizzo 	for (i = 0; i < na->num_tx_rings + 1; i++) {
379ce3ee1e7SLuigi Rizzo 		nm_disable_ring(na->tx_rings + i);
380ce3ee1e7SLuigi Rizzo 		selwakeuppri(&na->tx_rings[i].si, PI_NET);
381ce3ee1e7SLuigi Rizzo 	}
382ce3ee1e7SLuigi Rizzo 	for (i = 0; i < na->num_rx_rings + 1; i++) {
383ce3ee1e7SLuigi Rizzo 		nm_disable_ring(na->rx_rings + i);
384ce3ee1e7SLuigi Rizzo 		selwakeuppri(&na->rx_rings[i].si, PI_NET);
385ce3ee1e7SLuigi Rizzo 	}
386ce3ee1e7SLuigi Rizzo 	selwakeuppri(&na->tx_si, PI_NET);
387ce3ee1e7SLuigi Rizzo 	selwakeuppri(&na->rx_si, PI_NET);
388ce3ee1e7SLuigi Rizzo }
389ce3ee1e7SLuigi Rizzo 
390ce3ee1e7SLuigi Rizzo void netmap_enable_all_rings(struct ifnet *ifp)
391ce3ee1e7SLuigi Rizzo {
392ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na;
393ce3ee1e7SLuigi Rizzo 	int i;
394ce3ee1e7SLuigi Rizzo 
395ce3ee1e7SLuigi Rizzo 	if (!(ifp->if_capenable & IFCAP_NETMAP))
396ce3ee1e7SLuigi Rizzo 		return;
397ce3ee1e7SLuigi Rizzo 
398ce3ee1e7SLuigi Rizzo 	na = NA(ifp);
399ce3ee1e7SLuigi Rizzo 	for (i = 0; i < na->num_tx_rings + 1; i++) {
400ce3ee1e7SLuigi Rizzo 		D("enabling %p", na->tx_rings + i);
401ce3ee1e7SLuigi Rizzo 		na->tx_rings[i].nkr_stopped = 0;
402ce3ee1e7SLuigi Rizzo 	}
403ce3ee1e7SLuigi Rizzo 	for (i = 0; i < na->num_rx_rings + 1; i++) {
404ce3ee1e7SLuigi Rizzo 		D("enabling %p", na->rx_rings + i);
405ce3ee1e7SLuigi Rizzo 		na->rx_rings[i].nkr_stopped = 0;
406ce3ee1e7SLuigi Rizzo 	}
407ce3ee1e7SLuigi Rizzo }
408ce3ee1e7SLuigi Rizzo 
409ce3ee1e7SLuigi Rizzo 
410ce3ee1e7SLuigi Rizzo /*
411ce3ee1e7SLuigi Rizzo  * generic bound_checking function
412ce3ee1e7SLuigi Rizzo  */
413ce3ee1e7SLuigi Rizzo u_int
414ce3ee1e7SLuigi Rizzo nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
415ce3ee1e7SLuigi Rizzo {
416ce3ee1e7SLuigi Rizzo 	u_int oldv = *v;
417ce3ee1e7SLuigi Rizzo 	const char *op = NULL;
418ce3ee1e7SLuigi Rizzo 
419ce3ee1e7SLuigi Rizzo 	if (dflt < lo)
420ce3ee1e7SLuigi Rizzo 		dflt = lo;
421ce3ee1e7SLuigi Rizzo 	if (dflt > hi)
422ce3ee1e7SLuigi Rizzo 		dflt = hi;
423ce3ee1e7SLuigi Rizzo 	if (oldv < lo) {
424ce3ee1e7SLuigi Rizzo 		*v = dflt;
425ce3ee1e7SLuigi Rizzo 		op = "Bump";
426ce3ee1e7SLuigi Rizzo 	} else if (oldv > hi) {
427ce3ee1e7SLuigi Rizzo 		*v = hi;
428ce3ee1e7SLuigi Rizzo 		op = "Clamp";
429ce3ee1e7SLuigi Rizzo 	}
430ce3ee1e7SLuigi Rizzo 	if (op && msg)
431ce3ee1e7SLuigi Rizzo 		printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
432ce3ee1e7SLuigi Rizzo 	return *v;
433ce3ee1e7SLuigi Rizzo }
434ce3ee1e7SLuigi Rizzo 
435ce3ee1e7SLuigi Rizzo /*
436ce3ee1e7SLuigi Rizzo  * packet-dump function, user-supplied or static buffer.
437ce3ee1e7SLuigi Rizzo  * The destination buffer must be at least 30+4*len
438ce3ee1e7SLuigi Rizzo  */
439ce3ee1e7SLuigi Rizzo const char *
440ce3ee1e7SLuigi Rizzo nm_dump_buf(char *p, int len, int lim, char *dst)
441ce3ee1e7SLuigi Rizzo {
442ce3ee1e7SLuigi Rizzo 	static char _dst[8192];
443ce3ee1e7SLuigi Rizzo         int i, j, i0;
444ce3ee1e7SLuigi Rizzo 	static char hex[] ="0123456789abcdef";
445ce3ee1e7SLuigi Rizzo 	char *o;	/* output position */
446ce3ee1e7SLuigi Rizzo 
447ce3ee1e7SLuigi Rizzo #define P_HI(x)	hex[((x) & 0xf0)>>4]
448ce3ee1e7SLuigi Rizzo #define P_LO(x)	hex[((x) & 0xf)]
449ce3ee1e7SLuigi Rizzo #define P_C(x)	((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
450ce3ee1e7SLuigi Rizzo 	if (!dst)
451ce3ee1e7SLuigi Rizzo 		dst = _dst;
452ce3ee1e7SLuigi Rizzo 	if (lim <= 0 || lim > len)
453ce3ee1e7SLuigi Rizzo 		lim = len;
454ce3ee1e7SLuigi Rizzo 	o = dst;
455ce3ee1e7SLuigi Rizzo 	sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
456ce3ee1e7SLuigi Rizzo 	o += strlen(o);
457ce3ee1e7SLuigi Rizzo 	/* hexdump routine */
458ce3ee1e7SLuigi Rizzo 	for (i = 0; i < lim; ) {
459ce3ee1e7SLuigi Rizzo 		sprintf(o, "%5d: ", i);
460ce3ee1e7SLuigi Rizzo 		o += strlen(o);
461ce3ee1e7SLuigi Rizzo 		memset(o, ' ', 48);
462ce3ee1e7SLuigi Rizzo 		i0 = i;
463ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++) {
464ce3ee1e7SLuigi Rizzo 			o[j*3] = P_HI(p[i]);
465ce3ee1e7SLuigi Rizzo 			o[j*3+1] = P_LO(p[i]);
466ce3ee1e7SLuigi Rizzo 		}
467ce3ee1e7SLuigi Rizzo 		i = i0;
468ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++)
469ce3ee1e7SLuigi Rizzo 			o[j + 48] = P_C(p[i]);
470ce3ee1e7SLuigi Rizzo 		o[j+48] = '\n';
471ce3ee1e7SLuigi Rizzo 		o += j+49;
472ce3ee1e7SLuigi Rizzo 	}
473ce3ee1e7SLuigi Rizzo 	*o = '\0';
474ce3ee1e7SLuigi Rizzo #undef P_HI
475ce3ee1e7SLuigi Rizzo #undef P_LO
476ce3ee1e7SLuigi Rizzo #undef P_C
477ce3ee1e7SLuigi Rizzo 	return dst;
478ce3ee1e7SLuigi Rizzo }
479f196ce38SLuigi Rizzo 
480f196ce38SLuigi Rizzo /*
481f18be576SLuigi Rizzo  * system parameters (most of them in netmap_kern.h)
482f18be576SLuigi Rizzo  * NM_NAME	prefix for switch port names, default "vale"
483ce3ee1e7SLuigi Rizzo  * NM_BDG_MAXPORTS	number of ports
484f18be576SLuigi Rizzo  * NM_BRIDGES	max number of switches in the system.
485f18be576SLuigi Rizzo  *	XXX should become a sysctl or tunable
486f196ce38SLuigi Rizzo  *
487f18be576SLuigi Rizzo  * Switch ports are named valeX:Y where X is the switch name and Y
488f18be576SLuigi Rizzo  * is the port. If Y matches a physical interface name, the port is
489f18be576SLuigi Rizzo  * connected to a physical device.
490f18be576SLuigi Rizzo  *
491f18be576SLuigi Rizzo  * Unlike physical interfaces, switch ports use their own memory region
492f18be576SLuigi Rizzo  * for rings and buffers.
493f196ce38SLuigi Rizzo  * The virtual interfaces use per-queue lock instead of core lock.
494f196ce38SLuigi Rizzo  * In the tx loop, we aggregate traffic in batches to make all operations
495ce3ee1e7SLuigi Rizzo  * faster. The batch size is bridge_batch.
496f196ce38SLuigi Rizzo  */
497f18be576SLuigi Rizzo #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
498ce3ee1e7SLuigi Rizzo #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
499f196ce38SLuigi Rizzo #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
500f196ce38SLuigi Rizzo #define NM_BDG_HASH		1024	/* forwarding table entries */
501f196ce38SLuigi Rizzo #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
502ce3ee1e7SLuigi Rizzo #define NM_MULTISEG		64	/* max size of a chain of bufs */
503ce3ee1e7SLuigi Rizzo /* actual size of the tables */
504ce3ee1e7SLuigi Rizzo #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
505ce3ee1e7SLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */
506ce3ee1e7SLuigi Rizzo #define NM_FT_NULL		NM_BDG_BATCH_MAX
507f18be576SLuigi Rizzo #define	NM_BRIDGES		8	/* number of bridges */
508d4b42e08SLuigi Rizzo 
509d4b42e08SLuigi Rizzo 
510ce3ee1e7SLuigi Rizzo /*
511ce3ee1e7SLuigi Rizzo  * bridge_batch is set via sysctl to the max batch size to be
512ce3ee1e7SLuigi Rizzo  * used in the bridge. The actual value may be larger as the
513ce3ee1e7SLuigi Rizzo  * last packet in the block may overflow the size.
514ce3ee1e7SLuigi Rizzo  */
515ce3ee1e7SLuigi Rizzo int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
516ce3ee1e7SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
51701c7d25fSLuigi Rizzo 
518f196ce38SLuigi Rizzo 
519849bec0eSLuigi Rizzo /*
520849bec0eSLuigi Rizzo  * These are used to handle reference counters for bridge ports.
521849bec0eSLuigi Rizzo  */
522849bec0eSLuigi Rizzo #define	ADD_BDG_REF(ifp)	refcount_acquire(&NA(ifp)->na_bdg_refcount)
523849bec0eSLuigi Rizzo #define	DROP_BDG_REF(ifp)	refcount_release(&NA(ifp)->na_bdg_refcount)
524849bec0eSLuigi Rizzo 
525ce3ee1e7SLuigi Rizzo /* The bridge references the buffers using the device specific look up table */
526ce3ee1e7SLuigi Rizzo static inline void *
527ce3ee1e7SLuigi Rizzo BDG_NMB(struct netmap_mem_d *nmd, struct netmap_slot *slot)
528ce3ee1e7SLuigi Rizzo {
529ce3ee1e7SLuigi Rizzo 	struct lut_entry *lut = nmd->pools[NETMAP_BUF_POOL].lut;
530ce3ee1e7SLuigi Rizzo 	uint32_t i = slot->buf_idx;
531ce3ee1e7SLuigi Rizzo 	return (unlikely(i >= nmd->pools[NETMAP_BUF_POOL].objtotal)) ?  lut[0].vaddr : lut[i].vaddr;
532ce3ee1e7SLuigi Rizzo }
533ce3ee1e7SLuigi Rizzo 
5345ab0d24dSLuigi Rizzo static int bdg_netmap_attach(struct netmap_adapter *);
535f196ce38SLuigi Rizzo static int bdg_netmap_reg(struct ifnet *ifp, int onoff);
536ce3ee1e7SLuigi Rizzo int kern_netmap_regif(struct nmreq *nmr);
537f18be576SLuigi Rizzo 
538ce3ee1e7SLuigi Rizzo /*
539ce3ee1e7SLuigi Rizzo  * Each transmit queue accumulates a batch of packets into
540ce3ee1e7SLuigi Rizzo  * a structure before forwarding. Packets to the same
541ce3ee1e7SLuigi Rizzo  * destination are put in a list using ft_next as a link field.
542ce3ee1e7SLuigi Rizzo  * ft_frags and ft_next are valid only on the first fragment.
543ce3ee1e7SLuigi Rizzo  */
544f196ce38SLuigi Rizzo struct nm_bdg_fwd {	/* forwarding entry for a bridge */
545ce3ee1e7SLuigi Rizzo 	void *ft_buf;		/* netmap or indirect buffer */
546ce3ee1e7SLuigi Rizzo 	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
547ce3ee1e7SLuigi Rizzo 	uint8_t _ft_port;	/* dst port (unused) */
54885233a7dSLuigi Rizzo 	uint16_t ft_flags;	/* flags, e.g. indirect */
549ce3ee1e7SLuigi Rizzo 	uint16_t ft_len;	/* src fragment len */
550f18be576SLuigi Rizzo 	uint16_t ft_next;	/* next packet to same destination */
551f18be576SLuigi Rizzo };
552f18be576SLuigi Rizzo 
553ce3ee1e7SLuigi Rizzo /*
554ce3ee1e7SLuigi Rizzo  * For each output interface, nm_bdg_q is used to construct a list.
555ce3ee1e7SLuigi Rizzo  * bq_len is the number of output buffers (we can have coalescing
556ce3ee1e7SLuigi Rizzo  * during the copy).
557f18be576SLuigi Rizzo  */
558f18be576SLuigi Rizzo struct nm_bdg_q {
559f18be576SLuigi Rizzo 	uint16_t bq_head;
560f18be576SLuigi Rizzo 	uint16_t bq_tail;
561ce3ee1e7SLuigi Rizzo 	uint32_t bq_len;	/* number of buffers */
562f196ce38SLuigi Rizzo };
563f196ce38SLuigi Rizzo 
564ce3ee1e7SLuigi Rizzo /* XXX revise this */
565f196ce38SLuigi Rizzo struct nm_hash_ent {
566f196ce38SLuigi Rizzo 	uint64_t	mac;	/* the top 2 bytes are the epoch */
567f196ce38SLuigi Rizzo 	uint64_t	ports;
568f196ce38SLuigi Rizzo };
569f196ce38SLuigi Rizzo 
570f196ce38SLuigi Rizzo /*
571ce3ee1e7SLuigi Rizzo  * nm_bridge is a descriptor for a VALE switch.
572849bec0eSLuigi Rizzo  * Interfaces for a bridge are all in bdg_ports[].
573f196ce38SLuigi Rizzo  * The array has fixed size, an empty entry does not terminate
574ce3ee1e7SLuigi Rizzo  * the search, but lookups only occur on attach/detach so we
575849bec0eSLuigi Rizzo  * don't mind if they are slow.
576849bec0eSLuigi Rizzo  *
577ce3ee1e7SLuigi Rizzo  * The bridge is non blocking on the transmit ports: excess
578ce3ee1e7SLuigi Rizzo  * packets are dropped if there is no room on the output port.
579849bec0eSLuigi Rizzo  *
580849bec0eSLuigi Rizzo  * bdg_lock protects accesses to the bdg_ports array.
581f18be576SLuigi Rizzo  * This is a rw lock (or equivalent).
582f196ce38SLuigi Rizzo  */
583f196ce38SLuigi Rizzo struct nm_bridge {
584f18be576SLuigi Rizzo 	/* XXX what is the proper alignment/layout ? */
585ce3ee1e7SLuigi Rizzo 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
586ce3ee1e7SLuigi Rizzo 	int		bdg_namelen;
587ce3ee1e7SLuigi Rizzo 	uint32_t	bdg_active_ports; /* 0 means free */
588ce3ee1e7SLuigi Rizzo 	char		bdg_basename[IFNAMSIZ];
589ce3ee1e7SLuigi Rizzo 
590ce3ee1e7SLuigi Rizzo 	/* Indexes of active ports (up to active_ports)
591ce3ee1e7SLuigi Rizzo 	 * and all other remaining ports.
592ce3ee1e7SLuigi Rizzo 	 */
593ce3ee1e7SLuigi Rizzo 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
594ce3ee1e7SLuigi Rizzo 
595f18be576SLuigi Rizzo 	struct netmap_adapter *bdg_ports[NM_BDG_MAXPORTS];
596f18be576SLuigi Rizzo 
597ce3ee1e7SLuigi Rizzo 
598f18be576SLuigi Rizzo 	/*
599f18be576SLuigi Rizzo 	 * The function to decide the destination port.
600f18be576SLuigi Rizzo 	 * It returns either of an index of the destination port,
601f18be576SLuigi Rizzo 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
602f18be576SLuigi Rizzo 	 * forward this packet.  ring_nr is the source ring index, and the
603f18be576SLuigi Rizzo 	 * function may overwrite this value to forward this packet to a
604f18be576SLuigi Rizzo 	 * different ring index.
605f18be576SLuigi Rizzo 	 * This function must be set by netmap_bdgctl().
606f18be576SLuigi Rizzo 	 */
607f18be576SLuigi Rizzo 	bdg_lookup_fn_t nm_bdg_lookup;
608f196ce38SLuigi Rizzo 
609ce3ee1e7SLuigi Rizzo 	/* the forwarding table, MAC+ports.
610ce3ee1e7SLuigi Rizzo 	 * XXX should be changed to an argument to be passed to
611ce3ee1e7SLuigi Rizzo 	 * the lookup function, and allocated on attach
612ce3ee1e7SLuigi Rizzo 	 */
613f196ce38SLuigi Rizzo 	struct nm_hash_ent ht[NM_BDG_HASH];
614f196ce38SLuigi Rizzo };
615f196ce38SLuigi Rizzo 
616f196ce38SLuigi Rizzo 
617ce3ee1e7SLuigi Rizzo /*
618ce3ee1e7SLuigi Rizzo  * XXX in principle nm_bridges could be created dynamically
619ce3ee1e7SLuigi Rizzo  * Right now we have a static array and deletions are protected
620ce3ee1e7SLuigi Rizzo  * by an exclusive lock.
621f18be576SLuigi Rizzo  */
622ce3ee1e7SLuigi Rizzo struct nm_bridge nm_bridges[NM_BRIDGES];
623f18be576SLuigi Rizzo 
624ce3ee1e7SLuigi Rizzo 
625ce3ee1e7SLuigi Rizzo /*
626ce3ee1e7SLuigi Rizzo  * A few function to tell which kind of port are we using.
627ce3ee1e7SLuigi Rizzo  * XXX should we hold a lock ?
628ce3ee1e7SLuigi Rizzo  *
629ce3ee1e7SLuigi Rizzo  * nma_is_vp()		virtual port
630ce3ee1e7SLuigi Rizzo  * nma_is_host()	port connected to the host stack
631ce3ee1e7SLuigi Rizzo  * nma_is_hw()		port connected to a NIC
632ce3ee1e7SLuigi Rizzo  */
633ce3ee1e7SLuigi Rizzo int nma_is_vp(struct netmap_adapter *na);
634ce3ee1e7SLuigi Rizzo int
635f18be576SLuigi Rizzo nma_is_vp(struct netmap_adapter *na)
636f18be576SLuigi Rizzo {
637f18be576SLuigi Rizzo 	return na->nm_register == bdg_netmap_reg;
638f18be576SLuigi Rizzo }
639ce3ee1e7SLuigi Rizzo 
640f18be576SLuigi Rizzo static __inline int
641f18be576SLuigi Rizzo nma_is_host(struct netmap_adapter *na)
642f18be576SLuigi Rizzo {
643f18be576SLuigi Rizzo 	return na->nm_register == NULL;
644f18be576SLuigi Rizzo }
645ce3ee1e7SLuigi Rizzo 
646f18be576SLuigi Rizzo static __inline int
647f18be576SLuigi Rizzo nma_is_hw(struct netmap_adapter *na)
648f18be576SLuigi Rizzo {
649f18be576SLuigi Rizzo 	/* In case of sw adapter, nm_register is NULL */
650f18be576SLuigi Rizzo 	return !nma_is_vp(na) && !nma_is_host(na);
651f18be576SLuigi Rizzo }
652f18be576SLuigi Rizzo 
653ce3ee1e7SLuigi Rizzo 
654f18be576SLuigi Rizzo /*
655ce3ee1e7SLuigi Rizzo  * If the NIC is owned by the kernel
656f18be576SLuigi Rizzo  * (i.e., bridge), neither another bridge nor user can use it;
657f18be576SLuigi Rizzo  * if the NIC is owned by a user, only users can share it.
658ce3ee1e7SLuigi Rizzo  * Evaluation must be done under NMG_LOCK().
659f18be576SLuigi Rizzo  */
660f18be576SLuigi Rizzo #define NETMAP_OWNED_BY_KERN(ifp)	(!nma_is_vp(NA(ifp)) && NA(ifp)->na_bdg)
661f18be576SLuigi Rizzo #define NETMAP_OWNED_BY_ANY(ifp) \
662f18be576SLuigi Rizzo 	(NETMAP_OWNED_BY_KERN(ifp) || (NA(ifp)->refcount > 0))
663f196ce38SLuigi Rizzo 
664f196ce38SLuigi Rizzo /*
665f196ce38SLuigi Rizzo  * NA(ifp)->bdg_port	port index
666f196ce38SLuigi Rizzo  */
667f196ce38SLuigi Rizzo 
668ce3ee1e7SLuigi Rizzo 
669ce3ee1e7SLuigi Rizzo /*
670ce3ee1e7SLuigi Rizzo  * this is a slightly optimized copy routine which rounds
671ce3ee1e7SLuigi Rizzo  * to multiple of 64 bytes and is often faster than dealing
672ce3ee1e7SLuigi Rizzo  * with other odd sizes. We assume there is enough room
673ce3ee1e7SLuigi Rizzo  * in the source and destination buffers.
674ce3ee1e7SLuigi Rizzo  *
675ce3ee1e7SLuigi Rizzo  * XXX only for multiples of 64 bytes, non overlapped.
676ce3ee1e7SLuigi Rizzo  */
677f196ce38SLuigi Rizzo static inline void
678f196ce38SLuigi Rizzo pkt_copy(void *_src, void *_dst, int l)
679f196ce38SLuigi Rizzo {
680f196ce38SLuigi Rizzo         uint64_t *src = _src;
681f196ce38SLuigi Rizzo         uint64_t *dst = _dst;
682f196ce38SLuigi Rizzo         if (unlikely(l >= 1024)) {
683ce3ee1e7SLuigi Rizzo                 memcpy(dst, src, l);
684f196ce38SLuigi Rizzo                 return;
685f196ce38SLuigi Rizzo         }
686f196ce38SLuigi Rizzo         for (; likely(l > 0); l-=64) {
687f196ce38SLuigi Rizzo                 *dst++ = *src++;
688f196ce38SLuigi Rizzo                 *dst++ = *src++;
689f196ce38SLuigi Rizzo                 *dst++ = *src++;
690f196ce38SLuigi Rizzo                 *dst++ = *src++;
691f196ce38SLuigi Rizzo                 *dst++ = *src++;
692f196ce38SLuigi Rizzo                 *dst++ = *src++;
693f196ce38SLuigi Rizzo                 *dst++ = *src++;
694f196ce38SLuigi Rizzo                 *dst++ = *src++;
695f196ce38SLuigi Rizzo         }
696f196ce38SLuigi Rizzo }
697f196ce38SLuigi Rizzo 
698f18be576SLuigi Rizzo 
699f196ce38SLuigi Rizzo /*
700f196ce38SLuigi Rizzo  * locate a bridge among the existing ones.
701ce3ee1e7SLuigi Rizzo  * MUST BE CALLED WITH NMG_LOCK()
702ce3ee1e7SLuigi Rizzo  *
703f196ce38SLuigi Rizzo  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
704f196ce38SLuigi Rizzo  * We assume that this is called with a name of at least NM_NAME chars.
705f196ce38SLuigi Rizzo  */
706f196ce38SLuigi Rizzo static struct nm_bridge *
707f18be576SLuigi Rizzo nm_find_bridge(const char *name, int create)
708f196ce38SLuigi Rizzo {
709f18be576SLuigi Rizzo 	int i, l, namelen;
710f196ce38SLuigi Rizzo 	struct nm_bridge *b = NULL;
711f196ce38SLuigi Rizzo 
712ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
713ce3ee1e7SLuigi Rizzo 
714f196ce38SLuigi Rizzo 	namelen = strlen(NM_NAME);	/* base length */
715ce3ee1e7SLuigi Rizzo 	l = name ? strlen(name) : 0;		/* actual length */
716ce3ee1e7SLuigi Rizzo 	if (l < namelen) {
717ce3ee1e7SLuigi Rizzo 		D("invalid bridge name %s", name ? name : NULL);
718ce3ee1e7SLuigi Rizzo 		return NULL;
719ce3ee1e7SLuigi Rizzo 	}
720f196ce38SLuigi Rizzo 	for (i = namelen + 1; i < l; i++) {
721f196ce38SLuigi Rizzo 		if (name[i] == ':') {
722f196ce38SLuigi Rizzo 			namelen = i;
723f196ce38SLuigi Rizzo 			break;
724f196ce38SLuigi Rizzo 		}
725f196ce38SLuigi Rizzo 	}
726f196ce38SLuigi Rizzo 	if (namelen >= IFNAMSIZ)
727f196ce38SLuigi Rizzo 		namelen = IFNAMSIZ;
728f196ce38SLuigi Rizzo 	ND("--- prefix is '%.*s' ---", namelen, name);
729f196ce38SLuigi Rizzo 
730f18be576SLuigi Rizzo 	/* lookup the name, remember empty slot if there is one */
731f18be576SLuigi Rizzo 	for (i = 0; i < NM_BRIDGES; i++) {
732f18be576SLuigi Rizzo 		struct nm_bridge *x = nm_bridges + i;
733f18be576SLuigi Rizzo 
734ce3ee1e7SLuigi Rizzo 		if (x->bdg_active_ports == 0) {
735f18be576SLuigi Rizzo 			if (create && b == NULL)
736f18be576SLuigi Rizzo 				b = x;	/* record empty slot */
737ce3ee1e7SLuigi Rizzo 		} else if (x->bdg_namelen != namelen) {
738f18be576SLuigi Rizzo 			continue;
739ce3ee1e7SLuigi Rizzo 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
740f196ce38SLuigi Rizzo 			ND("found '%.*s' at %d", namelen, name, i);
741f18be576SLuigi Rizzo 			b = x;
742f196ce38SLuigi Rizzo 			break;
743f196ce38SLuigi Rizzo 		}
744f196ce38SLuigi Rizzo 	}
745f18be576SLuigi Rizzo 	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
746ce3ee1e7SLuigi Rizzo 		/* initialize the bridge */
747ce3ee1e7SLuigi Rizzo 		strncpy(b->bdg_basename, name, namelen);
748ce3ee1e7SLuigi Rizzo 		ND("create new bridge %s with ports %d", b->bdg_basename,
749ce3ee1e7SLuigi Rizzo 			b->bdg_active_ports);
750ce3ee1e7SLuigi Rizzo 		b->bdg_namelen = namelen;
751ce3ee1e7SLuigi Rizzo 		b->bdg_active_ports = 0;
752ce3ee1e7SLuigi Rizzo 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
753ce3ee1e7SLuigi Rizzo 			b->bdg_port_index[i] = i;
754f18be576SLuigi Rizzo 		/* set the default function */
755f18be576SLuigi Rizzo 		b->nm_bdg_lookup = netmap_bdg_learning;
756f18be576SLuigi Rizzo 		/* reset the MAC address table */
757f18be576SLuigi Rizzo 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
758f196ce38SLuigi Rizzo 	}
759f196ce38SLuigi Rizzo 	return b;
760f196ce38SLuigi Rizzo }
761f18be576SLuigi Rizzo 
762f18be576SLuigi Rizzo 
763f18be576SLuigi Rizzo /*
764f18be576SLuigi Rizzo  * Free the forwarding tables for rings attached to switch ports.
765f18be576SLuigi Rizzo  */
766f18be576SLuigi Rizzo static void
767f18be576SLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na)
768f18be576SLuigi Rizzo {
769f18be576SLuigi Rizzo 	int nrings, i;
770f18be576SLuigi Rizzo 	struct netmap_kring *kring;
771f18be576SLuigi Rizzo 
772ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
773f18be576SLuigi Rizzo 	nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
774f18be576SLuigi Rizzo 	kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
775f18be576SLuigi Rizzo 	for (i = 0; i < nrings; i++) {
776f18be576SLuigi Rizzo 		if (kring[i].nkr_ft) {
777f18be576SLuigi Rizzo 			free(kring[i].nkr_ft, M_DEVBUF);
778f18be576SLuigi Rizzo 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
779f18be576SLuigi Rizzo 		}
780f18be576SLuigi Rizzo 	}
781f18be576SLuigi Rizzo 	if (nma_is_hw(na))
782f18be576SLuigi Rizzo 		nm_free_bdgfwd(SWNA(na->ifp));
783f18be576SLuigi Rizzo }
784f18be576SLuigi Rizzo 
785f18be576SLuigi Rizzo 
786f18be576SLuigi Rizzo /*
787f18be576SLuigi Rizzo  * Allocate the forwarding tables for the rings attached to the bridge ports.
788f18be576SLuigi Rizzo  */
789f18be576SLuigi Rizzo static int
790f18be576SLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na)
791f18be576SLuigi Rizzo {
792f18be576SLuigi Rizzo 	int nrings, l, i, num_dstq;
793f18be576SLuigi Rizzo 	struct netmap_kring *kring;
794f18be576SLuigi Rizzo 
795ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
796f18be576SLuigi Rizzo 	/* all port:rings + broadcast */
797f18be576SLuigi Rizzo 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
798ce3ee1e7SLuigi Rizzo 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
799f18be576SLuigi Rizzo 	l += sizeof(struct nm_bdg_q) * num_dstq;
800ce3ee1e7SLuigi Rizzo 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
801f18be576SLuigi Rizzo 
802f18be576SLuigi Rizzo 	nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
803f18be576SLuigi Rizzo 	kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
804f18be576SLuigi Rizzo 	for (i = 0; i < nrings; i++) {
805f18be576SLuigi Rizzo 		struct nm_bdg_fwd *ft;
806f18be576SLuigi Rizzo 		struct nm_bdg_q *dstq;
807f18be576SLuigi Rizzo 		int j;
808f18be576SLuigi Rizzo 
809f18be576SLuigi Rizzo 		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
810f18be576SLuigi Rizzo 		if (!ft) {
811f18be576SLuigi Rizzo 			nm_free_bdgfwd(na);
812f18be576SLuigi Rizzo 			return ENOMEM;
813f18be576SLuigi Rizzo 		}
814ce3ee1e7SLuigi Rizzo 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
815ce3ee1e7SLuigi Rizzo 		for (j = 0; j < num_dstq; j++) {
816ce3ee1e7SLuigi Rizzo 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
817ce3ee1e7SLuigi Rizzo 			dstq[j].bq_len = 0;
818ce3ee1e7SLuigi Rizzo 		}
819f18be576SLuigi Rizzo 		kring[i].nkr_ft = ft;
820f18be576SLuigi Rizzo 	}
821f18be576SLuigi Rizzo 	if (nma_is_hw(na))
822f18be576SLuigi Rizzo 		nm_alloc_bdgfwd(SWNA(na->ifp));
823f18be576SLuigi Rizzo 	return 0;
824f18be576SLuigi Rizzo }
825f18be576SLuigi Rizzo 
826ae10d1afSLuigi Rizzo 
827ae10d1afSLuigi Rizzo /*
828ae10d1afSLuigi Rizzo  * Fetch configuration from the device, to cope with dynamic
829ae10d1afSLuigi Rizzo  * reconfigurations after loading the module.
830ae10d1afSLuigi Rizzo  */
831ae10d1afSLuigi Rizzo static int
832ae10d1afSLuigi Rizzo netmap_update_config(struct netmap_adapter *na)
833ae10d1afSLuigi Rizzo {
834ae10d1afSLuigi Rizzo 	struct ifnet *ifp = na->ifp;
835ae10d1afSLuigi Rizzo 	u_int txr, txd, rxr, rxd;
836ae10d1afSLuigi Rizzo 
837ae10d1afSLuigi Rizzo 	txr = txd = rxr = rxd = 0;
838ae10d1afSLuigi Rizzo 	if (na->nm_config) {
839ae10d1afSLuigi Rizzo 		na->nm_config(ifp, &txr, &txd, &rxr, &rxd);
840ae10d1afSLuigi Rizzo 	} else {
841ae10d1afSLuigi Rizzo 		/* take whatever we had at init time */
842ae10d1afSLuigi Rizzo 		txr = na->num_tx_rings;
843ae10d1afSLuigi Rizzo 		txd = na->num_tx_desc;
844ae10d1afSLuigi Rizzo 		rxr = na->num_rx_rings;
845ae10d1afSLuigi Rizzo 		rxd = na->num_rx_desc;
846ae10d1afSLuigi Rizzo 	}
847ae10d1afSLuigi Rizzo 
848ae10d1afSLuigi Rizzo 	if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
849ae10d1afSLuigi Rizzo 	    na->num_rx_rings == rxr && na->num_rx_desc == rxd)
850ae10d1afSLuigi Rizzo 		return 0; /* nothing changed */
851ae10d1afSLuigi Rizzo 	if (netmap_verbose || na->refcount > 0) {
852ae10d1afSLuigi Rizzo 		D("stored config %s: txring %d x %d, rxring %d x %d",
853ae10d1afSLuigi Rizzo 			ifp->if_xname,
854ae10d1afSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
855ae10d1afSLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc);
856ae10d1afSLuigi Rizzo 		D("new config %s: txring %d x %d, rxring %d x %d",
857ae10d1afSLuigi Rizzo 			ifp->if_xname, txr, txd, rxr, rxd);
858ae10d1afSLuigi Rizzo 	}
859ae10d1afSLuigi Rizzo 	if (na->refcount == 0) {
860ae10d1afSLuigi Rizzo 		D("configuration changed (but fine)");
861ae10d1afSLuigi Rizzo 		na->num_tx_rings = txr;
862ae10d1afSLuigi Rizzo 		na->num_tx_desc = txd;
863ae10d1afSLuigi Rizzo 		na->num_rx_rings = rxr;
864ae10d1afSLuigi Rizzo 		na->num_rx_desc = rxd;
865ae10d1afSLuigi Rizzo 		return 0;
866ae10d1afSLuigi Rizzo 	}
867ae10d1afSLuigi Rizzo 	D("configuration changed while active, this is bad...");
868ae10d1afSLuigi Rizzo 	return 1;
869ae10d1afSLuigi Rizzo }
870ae10d1afSLuigi Rizzo 
871ce3ee1e7SLuigi Rizzo static struct netmap_if *
872ce3ee1e7SLuigi Rizzo netmap_if_new(const char *ifname, struct netmap_adapter *na)
873ce3ee1e7SLuigi Rizzo {
874ce3ee1e7SLuigi Rizzo 	if (netmap_update_config(na)) {
875ce3ee1e7SLuigi Rizzo 		/* configuration mismatch, report and fail */
876ce3ee1e7SLuigi Rizzo 		return NULL;
877ce3ee1e7SLuigi Rizzo 	}
878ce3ee1e7SLuigi Rizzo 	return netmap_mem_if_new(ifname, na);
879ce3ee1e7SLuigi Rizzo }
88068b8534bSLuigi Rizzo 
8818241616dSLuigi Rizzo 
8828241616dSLuigi Rizzo /* Structure associated to each thread which registered an interface.
8838241616dSLuigi Rizzo  *
8848241616dSLuigi Rizzo  * The first 4 fields of this structure are written by NIOCREGIF and
8858241616dSLuigi Rizzo  * read by poll() and NIOC?XSYNC.
8868241616dSLuigi Rizzo  * There is low contention among writers (actually, a correct user program
8878241616dSLuigi Rizzo  * should have no contention among writers) and among writers and readers,
8888241616dSLuigi Rizzo  * so we use a single global lock to protect the structure initialization.
8898241616dSLuigi Rizzo  * Since initialization involves the allocation of memory, we reuse the memory
8908241616dSLuigi Rizzo  * allocator lock.
8918241616dSLuigi Rizzo  * Read access to the structure is lock free. Readers must check that
8928241616dSLuigi Rizzo  * np_nifp is not NULL before using the other fields.
8938241616dSLuigi Rizzo  * If np_nifp is NULL initialization has not been performed, so they should
8948241616dSLuigi Rizzo  * return an error to userlevel.
8958241616dSLuigi Rizzo  *
8968241616dSLuigi Rizzo  * The ref_done field is used to regulate access to the refcount in the
8978241616dSLuigi Rizzo  * memory allocator. The refcount must be incremented at most once for
8988241616dSLuigi Rizzo  * each open("/dev/netmap"). The increment is performed by the first
8998241616dSLuigi Rizzo  * function that calls netmap_get_memory() (currently called by
9008241616dSLuigi Rizzo  * mmap(), NIOCGINFO and NIOCREGIF).
9018241616dSLuigi Rizzo  * If the refcount is incremented, it is then decremented when the
9028241616dSLuigi Rizzo  * private structure is destroyed.
9038241616dSLuigi Rizzo  */
90468b8534bSLuigi Rizzo struct netmap_priv_d {
905ce3ee1e7SLuigi Rizzo 	struct netmap_if * volatile np_nifp;	/* netmap if descriptor. */
90668b8534bSLuigi Rizzo 
907ce3ee1e7SLuigi Rizzo 	struct ifnet	*np_ifp;	/* device for which we hold a ref. */
90868b8534bSLuigi Rizzo 	int		np_ringid;	/* from the ioctl */
90968b8534bSLuigi Rizzo 	u_int		np_qfirst, np_qlast;	/* range of rings to scan */
91068b8534bSLuigi Rizzo 	uint16_t	np_txpoll;
9118241616dSLuigi Rizzo 
912ce3ee1e7SLuigi Rizzo 	struct netmap_mem_d *np_mref;	/* use with NMG_LOCK held */
913ce3ee1e7SLuigi Rizzo #ifdef __FreeBSD__
914ce3ee1e7SLuigi Rizzo 	int		np_refcount;	/* use with NMG_LOCK held */
915ce3ee1e7SLuigi Rizzo #endif /* __FreeBSD__ */
91668b8534bSLuigi Rizzo };
91768b8534bSLuigi Rizzo 
918ce3ee1e7SLuigi Rizzo /* grab a reference to the memory allocator, if we don't have one already.  The
919ce3ee1e7SLuigi Rizzo  * reference is taken from the netmap_adapter registered with the priv.
920ce3ee1e7SLuigi Rizzo  *
921ce3ee1e7SLuigi Rizzo  */
922ce3ee1e7SLuigi Rizzo static int
923ce3ee1e7SLuigi Rizzo netmap_get_memory_locked(struct netmap_priv_d* p)
924ce3ee1e7SLuigi Rizzo {
925ce3ee1e7SLuigi Rizzo 	struct netmap_mem_d *nmd;
926ce3ee1e7SLuigi Rizzo 	int error = 0;
927ce3ee1e7SLuigi Rizzo 
928ce3ee1e7SLuigi Rizzo 	if (p->np_ifp == NULL) {
929ce3ee1e7SLuigi Rizzo 		if (!netmap_mmap_unreg)
930ce3ee1e7SLuigi Rizzo 			return ENODEV;
931ce3ee1e7SLuigi Rizzo 		/* for compatibility with older versions of the API
932ce3ee1e7SLuigi Rizzo  		 * we use the global allocator when no interface has been
933ce3ee1e7SLuigi Rizzo  		 * registered
934ce3ee1e7SLuigi Rizzo  		 */
935ce3ee1e7SLuigi Rizzo 		nmd = &nm_mem;
936ce3ee1e7SLuigi Rizzo 	} else {
937ce3ee1e7SLuigi Rizzo 		nmd = NA(p->np_ifp)->nm_mem;
938ce3ee1e7SLuigi Rizzo 	}
939ce3ee1e7SLuigi Rizzo 	if (p->np_mref == NULL) {
940ce3ee1e7SLuigi Rizzo 		error = netmap_mem_finalize(nmd);
941ce3ee1e7SLuigi Rizzo 		if (!error)
942ce3ee1e7SLuigi Rizzo 			p->np_mref = nmd;
943ce3ee1e7SLuigi Rizzo 	} else if (p->np_mref != nmd) {
944ce3ee1e7SLuigi Rizzo 		/* a virtual port has been registered, but previous
945ce3ee1e7SLuigi Rizzo  		 * syscalls already used the global allocator.
946ce3ee1e7SLuigi Rizzo  		 * We cannot continue
947ce3ee1e7SLuigi Rizzo  		 */
948ce3ee1e7SLuigi Rizzo 		error = ENODEV;
949ce3ee1e7SLuigi Rizzo 	}
950ce3ee1e7SLuigi Rizzo 	return error;
951ce3ee1e7SLuigi Rizzo }
95268b8534bSLuigi Rizzo 
9538241616dSLuigi Rizzo static int
9548241616dSLuigi Rizzo netmap_get_memory(struct netmap_priv_d* p)
9558241616dSLuigi Rizzo {
956ce3ee1e7SLuigi Rizzo 	int error;
957ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
958ce3ee1e7SLuigi Rizzo 	error = netmap_get_memory_locked(p);
959ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
9608241616dSLuigi Rizzo 	return error;
9618241616dSLuigi Rizzo }
9628241616dSLuigi Rizzo 
963ce3ee1e7SLuigi Rizzo static int
964ce3ee1e7SLuigi Rizzo netmap_have_memory_locked(struct netmap_priv_d* p)
965ce3ee1e7SLuigi Rizzo {
966ce3ee1e7SLuigi Rizzo 	return p->np_mref != NULL;
967ce3ee1e7SLuigi Rizzo }
968ce3ee1e7SLuigi Rizzo 
969ce3ee1e7SLuigi Rizzo static void
970ce3ee1e7SLuigi Rizzo netmap_drop_memory_locked(struct netmap_priv_d* p)
971ce3ee1e7SLuigi Rizzo {
972ce3ee1e7SLuigi Rizzo 	if (p->np_mref) {
973ce3ee1e7SLuigi Rizzo 		netmap_mem_deref(p->np_mref);
974ce3ee1e7SLuigi Rizzo 		p->np_mref = NULL;
975ce3ee1e7SLuigi Rizzo 	}
976ce3ee1e7SLuigi Rizzo }
977ce3ee1e7SLuigi Rizzo 
97868b8534bSLuigi Rizzo /*
97968b8534bSLuigi Rizzo  * File descriptor's private data destructor.
98068b8534bSLuigi Rizzo  *
98168b8534bSLuigi Rizzo  * Call nm_register(ifp,0) to stop netmap mode on the interface and
98268b8534bSLuigi Rizzo  * revert to normal operation. We expect that np_ifp has not gone.
983ce3ee1e7SLuigi Rizzo  * The second argument is the nifp to work on. In some cases it is
984ce3ee1e7SLuigi Rizzo  * not attached yet to the netmap_priv_d so we need to pass it as
985ce3ee1e7SLuigi Rizzo  * a separate argument.
98668b8534bSLuigi Rizzo  */
987ce3ee1e7SLuigi Rizzo /* call with NMG_LOCK held */
98868b8534bSLuigi Rizzo static void
989ce3ee1e7SLuigi Rizzo netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
99068b8534bSLuigi Rizzo {
99168b8534bSLuigi Rizzo 	struct ifnet *ifp = priv->np_ifp;
99268b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
99368b8534bSLuigi Rizzo 
994ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
99568b8534bSLuigi Rizzo 	na->refcount--;
99668b8534bSLuigi Rizzo 	if (na->refcount <= 0) {	/* last instance */
997ce3ee1e7SLuigi Rizzo 		u_int i;
99868b8534bSLuigi Rizzo 
999ae10d1afSLuigi Rizzo 		if (netmap_verbose)
1000ae10d1afSLuigi Rizzo 			D("deleting last instance for %s", ifp->if_xname);
100168b8534bSLuigi Rizzo 		/*
1002f18be576SLuigi Rizzo 		 * (TO CHECK) This function is only called
1003f18be576SLuigi Rizzo 		 * when the last reference to this file descriptor goes
1004f18be576SLuigi Rizzo 		 * away. This means we cannot have any pending poll()
1005f18be576SLuigi Rizzo 		 * or interrupt routine operating on the structure.
1006ce3ee1e7SLuigi Rizzo 		 * XXX The file may be closed in a thread while
1007ce3ee1e7SLuigi Rizzo 		 * another thread is using it.
1008ce3ee1e7SLuigi Rizzo 		 * Linux keeps the file opened until the last reference
1009ce3ee1e7SLuigi Rizzo 		 * by any outstanding ioctl/poll or mmap is gone.
1010ce3ee1e7SLuigi Rizzo 		 * FreeBSD does not track mmap()s (but we do) and
1011ce3ee1e7SLuigi Rizzo 		 * wakes up any sleeping poll(). Need to check what
1012ce3ee1e7SLuigi Rizzo 		 * happens if the close() occurs while a concurrent
1013ce3ee1e7SLuigi Rizzo 		 * syscall is running.
101468b8534bSLuigi Rizzo 		 */
101568b8534bSLuigi Rizzo 		na->nm_register(ifp, 0); /* off, clear IFCAP_NETMAP */
101668b8534bSLuigi Rizzo 		/* Wake up any sleeping threads. netmap_poll will
101768b8534bSLuigi Rizzo 		 * then return POLLERR
1018ce3ee1e7SLuigi Rizzo 		 * XXX The wake up now must happen during *_down(), when
1019ce3ee1e7SLuigi Rizzo 		 * we order all activities to stop. -gl
102068b8534bSLuigi Rizzo 		 */
1021f18be576SLuigi Rizzo 		nm_free_bdgfwd(na);
1022d76bf4ffSLuigi Rizzo 		for (i = 0; i < na->num_tx_rings + 1; i++) {
10232f70fca5SEd Maste 			mtx_destroy(&na->tx_rings[i].q_lock);
102464ae02c3SLuigi Rizzo 		}
1025d76bf4ffSLuigi Rizzo 		for (i = 0; i < na->num_rx_rings + 1; i++) {
10262f70fca5SEd Maste 			mtx_destroy(&na->rx_rings[i].q_lock);
102768b8534bSLuigi Rizzo 		}
10282f70fca5SEd Maste 		/* XXX kqueue(9) needed; these will mirror knlist_init. */
10292f70fca5SEd Maste 		/* knlist_destroy(&na->tx_si.si_note); */
10302f70fca5SEd Maste 		/* knlist_destroy(&na->rx_si.si_note); */
1031f18be576SLuigi Rizzo 		if (nma_is_hw(na))
1032f18be576SLuigi Rizzo 			SWNA(ifp)->tx_rings = SWNA(ifp)->rx_rings = NULL;
103368b8534bSLuigi Rizzo 	}
1034ce3ee1e7SLuigi Rizzo 	/*
1035ce3ee1e7SLuigi Rizzo 	 * netmap_mem_if_delete() deletes the nifp, and if this is
1036ce3ee1e7SLuigi Rizzo 	 * the last instance also buffers, rings and krings.
1037ce3ee1e7SLuigi Rizzo 	 */
1038ce3ee1e7SLuigi Rizzo 	netmap_mem_if_delete(na, nifp);
10395819da83SLuigi Rizzo }
104068b8534bSLuigi Rizzo 
1041f18be576SLuigi Rizzo 
1042ce3ee1e7SLuigi Rizzo /* we assume netmap adapter exists
1043ce3ee1e7SLuigi Rizzo  * Called with NMG_LOCK held
1044ce3ee1e7SLuigi Rizzo  */
1045f196ce38SLuigi Rizzo static void
1046f196ce38SLuigi Rizzo nm_if_rele(struct ifnet *ifp)
1047f196ce38SLuigi Rizzo {
1048ce3ee1e7SLuigi Rizzo 	int i, is_hw, hw, sw, lim;
1049f196ce38SLuigi Rizzo 	struct nm_bridge *b;
1050f18be576SLuigi Rizzo 	struct netmap_adapter *na;
1051ce3ee1e7SLuigi Rizzo 	uint8_t tmp[NM_BDG_MAXPORTS];
1052f196ce38SLuigi Rizzo 
1053ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1054f18be576SLuigi Rizzo 	/* I can be called not only for get_ifp()-ed references where netmap's
1055f18be576SLuigi Rizzo 	 * capability is guaranteed, but also for non-netmap-capable NICs.
1056f18be576SLuigi Rizzo 	 */
1057f18be576SLuigi Rizzo 	if (!NETMAP_CAPABLE(ifp) || !NA(ifp)->na_bdg) {
1058f196ce38SLuigi Rizzo 		if_rele(ifp);
1059f196ce38SLuigi Rizzo 		return;
1060f196ce38SLuigi Rizzo 	}
1061f18be576SLuigi Rizzo 	na = NA(ifp);
1062f18be576SLuigi Rizzo 	b = na->na_bdg;
1063f18be576SLuigi Rizzo 	is_hw = nma_is_hw(na);
1064f18be576SLuigi Rizzo 
1065ce3ee1e7SLuigi Rizzo 	ND("%s has %d references", ifp->if_xname, NA(ifp)->na_bdg_refcount);
1066f18be576SLuigi Rizzo 
1067ce3ee1e7SLuigi Rizzo 	if (!DROP_BDG_REF(ifp))
1068ce3ee1e7SLuigi Rizzo 		return;
1069ce3ee1e7SLuigi Rizzo 
1070ce3ee1e7SLuigi Rizzo 	/*
1071ce3ee1e7SLuigi Rizzo 	New algorithm:
1072ce3ee1e7SLuigi Rizzo 	make a copy of bdg_port_index;
1073ce3ee1e7SLuigi Rizzo 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
1074ce3ee1e7SLuigi Rizzo 	in the array of bdg_port_index, replacing them with
1075ce3ee1e7SLuigi Rizzo 	entries from the bottom of the array;
1076ce3ee1e7SLuigi Rizzo 	decrement bdg_active_ports;
1077ce3ee1e7SLuigi Rizzo 	acquire BDG_WLOCK() and copy back the array.
1078ce3ee1e7SLuigi Rizzo 	 */
1079ce3ee1e7SLuigi Rizzo 
1080ce3ee1e7SLuigi Rizzo 	hw = NA(ifp)->bdg_port;
1081ce3ee1e7SLuigi Rizzo 	sw = (is_hw && SWNA(ifp)->na_bdg) ? SWNA(ifp)->bdg_port : -1;
1082ce3ee1e7SLuigi Rizzo 	lim = b->bdg_active_ports;
1083ce3ee1e7SLuigi Rizzo 
1084ce3ee1e7SLuigi Rizzo 	ND("detach %d and %d (lim %d)", hw, sw, lim);
1085ce3ee1e7SLuigi Rizzo 	/* make a copy of the list of active ports, update it,
1086ce3ee1e7SLuigi Rizzo 	 * and then copy back within BDG_WLOCK().
1087ce3ee1e7SLuigi Rizzo 	 */
1088ce3ee1e7SLuigi Rizzo 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
1089ce3ee1e7SLuigi Rizzo 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
1090ce3ee1e7SLuigi Rizzo 		if (hw >= 0 && tmp[i] == hw) {
1091ce3ee1e7SLuigi Rizzo 			ND("detach hw %d at %d", hw, i);
1092ce3ee1e7SLuigi Rizzo 			lim--; /* point to last active port */
1093ce3ee1e7SLuigi Rizzo 			tmp[i] = tmp[lim]; /* swap with i */
1094ce3ee1e7SLuigi Rizzo 			tmp[lim] = hw;	/* now this is inactive */
1095ce3ee1e7SLuigi Rizzo 			hw = -1;
1096ce3ee1e7SLuigi Rizzo 		} else if (sw >= 0 && tmp[i] == sw) {
1097ce3ee1e7SLuigi Rizzo 			ND("detach sw %d at %d", sw, i);
1098ce3ee1e7SLuigi Rizzo 			lim--;
1099ce3ee1e7SLuigi Rizzo 			tmp[i] = tmp[lim];
1100ce3ee1e7SLuigi Rizzo 			tmp[lim] = sw;
1101ce3ee1e7SLuigi Rizzo 			sw = -1;
1102ce3ee1e7SLuigi Rizzo 		} else {
1103ce3ee1e7SLuigi Rizzo 			i++;
1104ce3ee1e7SLuigi Rizzo 		}
1105ce3ee1e7SLuigi Rizzo 	}
1106ce3ee1e7SLuigi Rizzo 	if (hw >= 0 || sw >= 0) {
1107ce3ee1e7SLuigi Rizzo 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
1108ce3ee1e7SLuigi Rizzo 	}
1109ce3ee1e7SLuigi Rizzo 	hw = NA(ifp)->bdg_port;
1110ce3ee1e7SLuigi Rizzo 	sw = (is_hw && SWNA(ifp)->na_bdg) ?  SWNA(ifp)->bdg_port : -1;
1111ce3ee1e7SLuigi Rizzo 
1112ce3ee1e7SLuigi Rizzo 	BDG_WLOCK(b);
1113ce3ee1e7SLuigi Rizzo 	b->bdg_ports[hw] = NULL;
1114f18be576SLuigi Rizzo 	na->na_bdg = NULL;
1115ce3ee1e7SLuigi Rizzo 	if (sw >= 0) {
1116ce3ee1e7SLuigi Rizzo 		b->bdg_ports[sw] = NULL;
1117f18be576SLuigi Rizzo 		SWNA(ifp)->na_bdg = NULL;
1118f196ce38SLuigi Rizzo 	}
1119ce3ee1e7SLuigi Rizzo 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
1120ce3ee1e7SLuigi Rizzo 	b->bdg_active_ports = lim;
1121f18be576SLuigi Rizzo 	BDG_WUNLOCK(b);
1122ce3ee1e7SLuigi Rizzo 
1123ce3ee1e7SLuigi Rizzo 	ND("now %d active ports", lim);
1124ce3ee1e7SLuigi Rizzo 	if (lim == 0) {
1125ce3ee1e7SLuigi Rizzo 		ND("marking bridge %s as free", b->bdg_basename);
1126f18be576SLuigi Rizzo 		b->nm_bdg_lookup = NULL;
1127f18be576SLuigi Rizzo 	}
1128ce3ee1e7SLuigi Rizzo 
1129ce3ee1e7SLuigi Rizzo 	if (is_hw) {
1130f18be576SLuigi Rizzo 		if_rele(ifp);
1131f18be576SLuigi Rizzo 	} else {
1132ce3ee1e7SLuigi Rizzo 		if (na->na_flags & NAF_MEM_OWNER)
1133ce3ee1e7SLuigi Rizzo 			netmap_mem_private_delete(na->nm_mem);
1134f18be576SLuigi Rizzo 		bzero(na, sizeof(*na));
1135f18be576SLuigi Rizzo 		free(na, M_DEVBUF);
1136f18be576SLuigi Rizzo 		bzero(ifp, sizeof(*ifp));
1137f18be576SLuigi Rizzo 		free(ifp, M_DEVBUF);
1138f18be576SLuigi Rizzo 	}
1139ce3ee1e7SLuigi Rizzo }
1140ce3ee1e7SLuigi Rizzo 
1141ce3ee1e7SLuigi Rizzo 
1142ce3ee1e7SLuigi Rizzo /*
1143ce3ee1e7SLuigi Rizzo  * returns 1 if this is the last instance and we can free priv
1144ce3ee1e7SLuigi Rizzo  */
1145ce3ee1e7SLuigi Rizzo static int
1146ce3ee1e7SLuigi Rizzo netmap_dtor_locked(struct netmap_priv_d *priv)
1147ce3ee1e7SLuigi Rizzo {
1148ce3ee1e7SLuigi Rizzo 	struct ifnet *ifp = priv->np_ifp;
1149ce3ee1e7SLuigi Rizzo 
1150ce3ee1e7SLuigi Rizzo #ifdef __FreeBSD__
1151ce3ee1e7SLuigi Rizzo 	/*
1152ce3ee1e7SLuigi Rizzo 	 * np_refcount is the number of active mmaps on
1153ce3ee1e7SLuigi Rizzo 	 * this file descriptor
1154ce3ee1e7SLuigi Rizzo 	 */
1155ce3ee1e7SLuigi Rizzo 	if (--priv->np_refcount > 0) {
1156ce3ee1e7SLuigi Rizzo 		return 0;
1157ce3ee1e7SLuigi Rizzo 	}
1158ce3ee1e7SLuigi Rizzo #endif /* __FreeBSD__ */
1159ce3ee1e7SLuigi Rizzo 	if (ifp) {
1160ce3ee1e7SLuigi Rizzo 		netmap_do_unregif(priv, priv->np_nifp);
1161ce3ee1e7SLuigi Rizzo 	}
1162ce3ee1e7SLuigi Rizzo 	netmap_drop_memory_locked(priv);
1163ce3ee1e7SLuigi Rizzo 	if (ifp) {
1164ce3ee1e7SLuigi Rizzo 		nm_if_rele(ifp); /* might also destroy *na */
1165ce3ee1e7SLuigi Rizzo 	}
1166ce3ee1e7SLuigi Rizzo 	return 1;
1167f196ce38SLuigi Rizzo }
11685819da83SLuigi Rizzo 
11695819da83SLuigi Rizzo static void
11705819da83SLuigi Rizzo netmap_dtor(void *data)
11715819da83SLuigi Rizzo {
11725819da83SLuigi Rizzo 	struct netmap_priv_d *priv = data;
1173ce3ee1e7SLuigi Rizzo 	int last_instance;
11745819da83SLuigi Rizzo 
1175ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
1176ce3ee1e7SLuigi Rizzo 	last_instance = netmap_dtor_locked(priv);
1177ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
1178ce3ee1e7SLuigi Rizzo 	if (last_instance) {
1179ce3ee1e7SLuigi Rizzo 		bzero(priv, sizeof(*priv));	/* for safety */
118068b8534bSLuigi Rizzo 		free(priv, M_DEVBUF);
118168b8534bSLuigi Rizzo 	}
1182ce3ee1e7SLuigi Rizzo }
118368b8534bSLuigi Rizzo 
1184f18be576SLuigi Rizzo 
11858241616dSLuigi Rizzo #ifdef __FreeBSD__
11868241616dSLuigi Rizzo 
1187f18be576SLuigi Rizzo /*
1188f18be576SLuigi Rizzo  * In order to track whether pages are still mapped, we hook into
1189f18be576SLuigi Rizzo  * the standard cdev_pager and intercept the constructor and
1190f18be576SLuigi Rizzo  * destructor.
1191f18be576SLuigi Rizzo  */
11928241616dSLuigi Rizzo 
1193ce3ee1e7SLuigi Rizzo struct netmap_vm_handle_t {
1194ce3ee1e7SLuigi Rizzo 	struct cdev 		*dev;
1195ce3ee1e7SLuigi Rizzo 	struct netmap_priv_d	*priv;
1196ce3ee1e7SLuigi Rizzo };
1197f18be576SLuigi Rizzo 
11988241616dSLuigi Rizzo static int
11998241616dSLuigi Rizzo netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
12008241616dSLuigi Rizzo     vm_ooffset_t foff, struct ucred *cred, u_short *color)
12018241616dSLuigi Rizzo {
1202ce3ee1e7SLuigi Rizzo 	struct netmap_vm_handle_t *vmh = handle;
1203ce3ee1e7SLuigi Rizzo 	D("handle %p size %jd prot %d foff %jd",
1204ce3ee1e7SLuigi Rizzo 		handle, (intmax_t)size, prot, (intmax_t)foff);
1205ce3ee1e7SLuigi Rizzo 	dev_ref(vmh->dev);
1206ce3ee1e7SLuigi Rizzo 	return 0;
12078241616dSLuigi Rizzo }
12088241616dSLuigi Rizzo 
1209f18be576SLuigi Rizzo 
12108241616dSLuigi Rizzo static void
12118241616dSLuigi Rizzo netmap_dev_pager_dtor(void *handle)
12128241616dSLuigi Rizzo {
1213ce3ee1e7SLuigi Rizzo 	struct netmap_vm_handle_t *vmh = handle;
1214ce3ee1e7SLuigi Rizzo 	struct cdev *dev = vmh->dev;
1215ce3ee1e7SLuigi Rizzo 	struct netmap_priv_d *priv = vmh->priv;
1216ce3ee1e7SLuigi Rizzo 	D("handle %p", handle);
1217ce3ee1e7SLuigi Rizzo 	netmap_dtor(priv);
1218ce3ee1e7SLuigi Rizzo 	free(vmh, M_DEVBUF);
1219ce3ee1e7SLuigi Rizzo 	dev_rel(dev);
1220ce3ee1e7SLuigi Rizzo }
1221ce3ee1e7SLuigi Rizzo 
1222ce3ee1e7SLuigi Rizzo static int
1223ce3ee1e7SLuigi Rizzo netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
1224ce3ee1e7SLuigi Rizzo 	int prot, vm_page_t *mres)
1225ce3ee1e7SLuigi Rizzo {
1226ce3ee1e7SLuigi Rizzo 	struct netmap_vm_handle_t *vmh = object->handle;
1227ce3ee1e7SLuigi Rizzo 	struct netmap_priv_d *priv = vmh->priv;
1228ce3ee1e7SLuigi Rizzo 	vm_paddr_t paddr;
1229ce3ee1e7SLuigi Rizzo 	vm_page_t page;
1230ce3ee1e7SLuigi Rizzo 	vm_memattr_t memattr;
1231ce3ee1e7SLuigi Rizzo 	vm_pindex_t pidx;
1232ce3ee1e7SLuigi Rizzo 
1233ce3ee1e7SLuigi Rizzo 	ND("object %p offset %jd prot %d mres %p",
1234ce3ee1e7SLuigi Rizzo 			object, (intmax_t)offset, prot, mres);
1235ce3ee1e7SLuigi Rizzo 	memattr = object->memattr;
1236ce3ee1e7SLuigi Rizzo 	pidx = OFF_TO_IDX(offset);
1237ce3ee1e7SLuigi Rizzo 	paddr = netmap_mem_ofstophys(priv->np_mref, offset);
1238ce3ee1e7SLuigi Rizzo 	if (paddr == 0)
1239ce3ee1e7SLuigi Rizzo 		return VM_PAGER_FAIL;
1240ce3ee1e7SLuigi Rizzo 
1241ce3ee1e7SLuigi Rizzo 	if (((*mres)->flags & PG_FICTITIOUS) != 0) {
1242ce3ee1e7SLuigi Rizzo 		/*
1243ce3ee1e7SLuigi Rizzo 		 * If the passed in result page is a fake page, update it with
1244ce3ee1e7SLuigi Rizzo 		 * the new physical address.
1245ce3ee1e7SLuigi Rizzo 		 */
1246ce3ee1e7SLuigi Rizzo 		page = *mres;
1247ce3ee1e7SLuigi Rizzo 		vm_page_updatefake(page, paddr, memattr);
1248ce3ee1e7SLuigi Rizzo 	} else {
1249ce3ee1e7SLuigi Rizzo 		/*
1250ce3ee1e7SLuigi Rizzo 		 * Replace the passed in reqpage page with our own fake page and
1251ce3ee1e7SLuigi Rizzo 		 * free up the all of the original pages.
1252ce3ee1e7SLuigi Rizzo 		 */
1253ce3ee1e7SLuigi Rizzo #ifndef VM_OBJECT_WUNLOCK	/* FreeBSD < 10.x */
1254ce3ee1e7SLuigi Rizzo #define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
1255ce3ee1e7SLuigi Rizzo #define VM_OBJECT_WLOCK	VM_OBJECT_LOCK
1256ce3ee1e7SLuigi Rizzo #endif /* VM_OBJECT_WUNLOCK */
1257ce3ee1e7SLuigi Rizzo 
1258ce3ee1e7SLuigi Rizzo 		VM_OBJECT_WUNLOCK(object);
1259ce3ee1e7SLuigi Rizzo 		page = vm_page_getfake(paddr, memattr);
1260ce3ee1e7SLuigi Rizzo 		VM_OBJECT_WLOCK(object);
1261ce3ee1e7SLuigi Rizzo 		vm_page_lock(*mres);
1262ce3ee1e7SLuigi Rizzo 		vm_page_free(*mres);
1263ce3ee1e7SLuigi Rizzo 		vm_page_unlock(*mres);
1264ce3ee1e7SLuigi Rizzo 		*mres = page;
1265ce3ee1e7SLuigi Rizzo 		vm_page_insert(page, object, pidx);
1266ce3ee1e7SLuigi Rizzo 	}
1267ce3ee1e7SLuigi Rizzo 	page->valid = VM_PAGE_BITS_ALL;
1268ce3ee1e7SLuigi Rizzo 	return (VM_PAGER_OK);
12698241616dSLuigi Rizzo }
12708241616dSLuigi Rizzo 
12718241616dSLuigi Rizzo 
12728241616dSLuigi Rizzo static struct cdev_pager_ops netmap_cdev_pager_ops = {
12738241616dSLuigi Rizzo         .cdev_pg_ctor = netmap_dev_pager_ctor,
12748241616dSLuigi Rizzo         .cdev_pg_dtor = netmap_dev_pager_dtor,
1275ce3ee1e7SLuigi Rizzo         .cdev_pg_fault = netmap_dev_pager_fault,
12768241616dSLuigi Rizzo };
12778241616dSLuigi Rizzo 
1278f18be576SLuigi Rizzo 
12798241616dSLuigi Rizzo static int
12808241616dSLuigi Rizzo netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
12818241616dSLuigi Rizzo 	vm_size_t objsize,  vm_object_t *objp, int prot)
12828241616dSLuigi Rizzo {
1283ce3ee1e7SLuigi Rizzo 	int error;
1284ce3ee1e7SLuigi Rizzo 	struct netmap_vm_handle_t *vmh;
1285ce3ee1e7SLuigi Rizzo 	struct netmap_priv_d *priv;
12868241616dSLuigi Rizzo 	vm_object_t obj;
12878241616dSLuigi Rizzo 
1288ce3ee1e7SLuigi Rizzo 	D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
128988f79057SGleb Smirnoff 	    (intmax_t )*foff, (intmax_t )objsize, objp, prot);
12908241616dSLuigi Rizzo 
1291ce3ee1e7SLuigi Rizzo 	vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
1292ce3ee1e7SLuigi Rizzo 			      M_NOWAIT | M_ZERO);
1293ce3ee1e7SLuigi Rizzo 	if (vmh == NULL)
1294ce3ee1e7SLuigi Rizzo 		return ENOMEM;
1295ce3ee1e7SLuigi Rizzo 	vmh->dev = cdev;
129668b8534bSLuigi Rizzo 
1297ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
12988241616dSLuigi Rizzo 	error = devfs_get_cdevpriv((void**)&priv);
1299ce3ee1e7SLuigi Rizzo 	if (error)
1300ce3ee1e7SLuigi Rizzo 		goto err_unlock;
1301ce3ee1e7SLuigi Rizzo 	vmh->priv = priv;
1302ce3ee1e7SLuigi Rizzo 	priv->np_refcount++;
1303ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
1304ce3ee1e7SLuigi Rizzo 
13058241616dSLuigi Rizzo 	error = netmap_get_memory(priv);
13068241616dSLuigi Rizzo 	if (error)
1307ce3ee1e7SLuigi Rizzo 		goto err_deref;
13088241616dSLuigi Rizzo 
1309ce3ee1e7SLuigi Rizzo 	obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
1310ce3ee1e7SLuigi Rizzo 		&netmap_cdev_pager_ops, objsize, prot,
1311ce3ee1e7SLuigi Rizzo 		*foff, NULL);
1312ce3ee1e7SLuigi Rizzo 	if (obj == NULL) {
1313ce3ee1e7SLuigi Rizzo 		D("cdev_pager_allocate failed");
1314ce3ee1e7SLuigi Rizzo 		error = EINVAL;
1315ce3ee1e7SLuigi Rizzo 		goto err_deref;
1316ce3ee1e7SLuigi Rizzo 	}
131768b8534bSLuigi Rizzo 
1318ce3ee1e7SLuigi Rizzo 	*objp = obj;
1319ce3ee1e7SLuigi Rizzo 	return 0;
1320ce3ee1e7SLuigi Rizzo 
1321ce3ee1e7SLuigi Rizzo err_deref:
1322ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
1323ce3ee1e7SLuigi Rizzo 	priv->np_refcount--;
1324ce3ee1e7SLuigi Rizzo err_unlock:
1325ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
1326ce3ee1e7SLuigi Rizzo // err:
1327ce3ee1e7SLuigi Rizzo 	free(vmh, M_DEVBUF);
1328ce3ee1e7SLuigi Rizzo 	return error;
13298241616dSLuigi Rizzo }
13308241616dSLuigi Rizzo 
1331f18be576SLuigi Rizzo 
1332ce3ee1e7SLuigi Rizzo // XXX can we remove this ?
13338241616dSLuigi Rizzo static int
13348241616dSLuigi Rizzo netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
13358241616dSLuigi Rizzo {
1336ae10d1afSLuigi Rizzo 	if (netmap_verbose)
1337ae10d1afSLuigi Rizzo 		D("dev %p fflag 0x%x devtype %d td %p",
1338ae10d1afSLuigi Rizzo 			dev, fflag, devtype, td);
13398241616dSLuigi Rizzo 	return 0;
13408241616dSLuigi Rizzo }
13418241616dSLuigi Rizzo 
1342f18be576SLuigi Rizzo 
13438241616dSLuigi Rizzo static int
13448241616dSLuigi Rizzo netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
13458241616dSLuigi Rizzo {
13468241616dSLuigi Rizzo 	struct netmap_priv_d *priv;
13478241616dSLuigi Rizzo 	int error;
13488241616dSLuigi Rizzo 
1349ce3ee1e7SLuigi Rizzo 	(void)dev;
1350ce3ee1e7SLuigi Rizzo 	(void)oflags;
1351ce3ee1e7SLuigi Rizzo 	(void)devtype;
1352ce3ee1e7SLuigi Rizzo 	(void)td;
1353ce3ee1e7SLuigi Rizzo 
1354ce3ee1e7SLuigi Rizzo 	// XXX wait or nowait ?
13558241616dSLuigi Rizzo 	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
13568241616dSLuigi Rizzo 			      M_NOWAIT | M_ZERO);
13578241616dSLuigi Rizzo 	if (priv == NULL)
13588241616dSLuigi Rizzo 		return ENOMEM;
13598241616dSLuigi Rizzo 
13608241616dSLuigi Rizzo 	error = devfs_set_cdevpriv(priv, netmap_dtor);
13618241616dSLuigi Rizzo 	if (error)
13628241616dSLuigi Rizzo 	        return error;
13638241616dSLuigi Rizzo 
1364ce3ee1e7SLuigi Rizzo 	priv->np_refcount = 1;
1365ce3ee1e7SLuigi Rizzo 
13668241616dSLuigi Rizzo 	return 0;
136768b8534bSLuigi Rizzo }
1368f196ce38SLuigi Rizzo #endif /* __FreeBSD__ */
136968b8534bSLuigi Rizzo 
137068b8534bSLuigi Rizzo 
137168b8534bSLuigi Rizzo /*
137202ad4083SLuigi Rizzo  * Handlers for synchronization of the queues from/to the host.
1373091fd0abSLuigi Rizzo  * Netmap has two operating modes:
1374091fd0abSLuigi Rizzo  * - in the default mode, the rings connected to the host stack are
1375091fd0abSLuigi Rizzo  *   just another ring pair managed by userspace;
1376091fd0abSLuigi Rizzo  * - in transparent mode (XXX to be defined) incoming packets
1377091fd0abSLuigi Rizzo  *   (from the host or the NIC) are marked as NS_FORWARD upon
1378091fd0abSLuigi Rizzo  *   arrival, and the user application has a chance to reset the
1379091fd0abSLuigi Rizzo  *   flag for packets that should be dropped.
1380091fd0abSLuigi Rizzo  *   On the RXSYNC or poll(), packets in RX rings between
1381091fd0abSLuigi Rizzo  *   kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
1382091fd0abSLuigi Rizzo  *   to the other side.
1383091fd0abSLuigi Rizzo  * The transfer NIC --> host is relatively easy, just encapsulate
1384091fd0abSLuigi Rizzo  * into mbufs and we are done. The host --> NIC side is slightly
1385091fd0abSLuigi Rizzo  * harder because there might not be room in the tx ring so it
1386091fd0abSLuigi Rizzo  * might take a while before releasing the buffer.
1387091fd0abSLuigi Rizzo  */
1388091fd0abSLuigi Rizzo 
1389f18be576SLuigi Rizzo 
1390091fd0abSLuigi Rizzo /*
1391091fd0abSLuigi Rizzo  * pass a chain of buffers to the host stack as coming from 'dst'
1392091fd0abSLuigi Rizzo  */
1393091fd0abSLuigi Rizzo static void
1394091fd0abSLuigi Rizzo netmap_send_up(struct ifnet *dst, struct mbuf *head)
1395091fd0abSLuigi Rizzo {
1396091fd0abSLuigi Rizzo 	struct mbuf *m;
1397091fd0abSLuigi Rizzo 
1398091fd0abSLuigi Rizzo 	/* send packets up, outside the lock */
1399091fd0abSLuigi Rizzo 	while ((m = head) != NULL) {
1400091fd0abSLuigi Rizzo 		head = head->m_nextpkt;
1401091fd0abSLuigi Rizzo 		m->m_nextpkt = NULL;
1402091fd0abSLuigi Rizzo 		if (netmap_verbose & NM_VERB_HOST)
1403091fd0abSLuigi Rizzo 			D("sending up pkt %p size %d", m, MBUF_LEN(m));
1404091fd0abSLuigi Rizzo 		NM_SEND_UP(dst, m);
1405091fd0abSLuigi Rizzo 	}
1406091fd0abSLuigi Rizzo }
1407091fd0abSLuigi Rizzo 
1408091fd0abSLuigi Rizzo struct mbq {
1409091fd0abSLuigi Rizzo 	struct mbuf *head;
1410091fd0abSLuigi Rizzo 	struct mbuf *tail;
1411091fd0abSLuigi Rizzo 	int count;
1412091fd0abSLuigi Rizzo };
1413091fd0abSLuigi Rizzo 
1414f18be576SLuigi Rizzo 
1415091fd0abSLuigi Rizzo /*
1416091fd0abSLuigi Rizzo  * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
1417091fd0abSLuigi Rizzo  * Run from hwcur to cur - reserved
1418091fd0abSLuigi Rizzo  */
1419091fd0abSLuigi Rizzo static void
1420091fd0abSLuigi Rizzo netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
1421091fd0abSLuigi Rizzo {
1422091fd0abSLuigi Rizzo 	/* Take packets from hwcur to cur-reserved and pass them up.
1423091fd0abSLuigi Rizzo 	 * In case of no buffers we give up. At the end of the loop,
1424091fd0abSLuigi Rizzo 	 * the queue is drained in all cases.
1425091fd0abSLuigi Rizzo 	 * XXX handle reserved
1426091fd0abSLuigi Rizzo 	 */
1427ce3ee1e7SLuigi Rizzo 	u_int lim = kring->nkr_num_slots - 1;
1428091fd0abSLuigi Rizzo 	struct mbuf *m, *tail = q->tail;
1429ce3ee1e7SLuigi Rizzo 	u_int k = kring->ring->cur, n = kring->ring->reserved;
1430ce3ee1e7SLuigi Rizzo 	struct netmap_mem_d *nmd = kring->na->nm_mem;
1431091fd0abSLuigi Rizzo 
1432ce3ee1e7SLuigi Rizzo 	/* compute the final position, ring->cur - ring->reserved */
1433ce3ee1e7SLuigi Rizzo 	if (n > 0) {
1434ce3ee1e7SLuigi Rizzo 		if (k < n)
1435ce3ee1e7SLuigi Rizzo 			k += kring->nkr_num_slots;
1436ce3ee1e7SLuigi Rizzo 		k += n;
1437ce3ee1e7SLuigi Rizzo 	}
1438091fd0abSLuigi Rizzo 	for (n = kring->nr_hwcur; n != k;) {
1439091fd0abSLuigi Rizzo 		struct netmap_slot *slot = &kring->ring->slot[n];
1440091fd0abSLuigi Rizzo 
1441ce3ee1e7SLuigi Rizzo 		n = nm_next(n, lim);
1442091fd0abSLuigi Rizzo 		if ((slot->flags & NS_FORWARD) == 0 && !force)
1443091fd0abSLuigi Rizzo 			continue;
1444ce3ee1e7SLuigi Rizzo 		if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(nmd)) {
1445091fd0abSLuigi Rizzo 			D("bad pkt at %d len %d", n, slot->len);
1446091fd0abSLuigi Rizzo 			continue;
1447091fd0abSLuigi Rizzo 		}
1448091fd0abSLuigi Rizzo 		slot->flags &= ~NS_FORWARD; // XXX needed ?
1449ce3ee1e7SLuigi Rizzo 		/* XXX adapt to the case of a multisegment packet */
1450ce3ee1e7SLuigi Rizzo 		m = m_devget(BDG_NMB(nmd, slot), slot->len, 0, kring->na->ifp, NULL);
1451091fd0abSLuigi Rizzo 
1452091fd0abSLuigi Rizzo 		if (m == NULL)
1453091fd0abSLuigi Rizzo 			break;
1454091fd0abSLuigi Rizzo 		if (tail)
1455091fd0abSLuigi Rizzo 			tail->m_nextpkt = m;
1456091fd0abSLuigi Rizzo 		else
1457091fd0abSLuigi Rizzo 			q->head = m;
1458091fd0abSLuigi Rizzo 		tail = m;
1459091fd0abSLuigi Rizzo 		q->count++;
1460091fd0abSLuigi Rizzo 		m->m_nextpkt = NULL;
1461091fd0abSLuigi Rizzo 	}
1462091fd0abSLuigi Rizzo 	q->tail = tail;
1463091fd0abSLuigi Rizzo }
1464091fd0abSLuigi Rizzo 
1465f18be576SLuigi Rizzo 
1466091fd0abSLuigi Rizzo /*
1467091fd0abSLuigi Rizzo  * The host ring has packets from nr_hwcur to (cur - reserved)
1468ce3ee1e7SLuigi Rizzo  * to be sent down to the NIC.
1469ce3ee1e7SLuigi Rizzo  * We need to use the queue lock on the source (host RX ring)
1470ce3ee1e7SLuigi Rizzo  * to protect against netmap_transmit.
1471ce3ee1e7SLuigi Rizzo  * If the user is well behaved we do not need to acquire locks
1472ce3ee1e7SLuigi Rizzo  * on the destination(s),
1473ce3ee1e7SLuigi Rizzo  * so we only need to make sure that there are no panics because
1474ce3ee1e7SLuigi Rizzo  * of user errors.
1475ce3ee1e7SLuigi Rizzo  * XXX verify
1476ce3ee1e7SLuigi Rizzo  *
1477ce3ee1e7SLuigi Rizzo  * We scan the tx rings, which have just been
1478091fd0abSLuigi Rizzo  * flushed so nr_hwcur == cur. Pushing packets down means
1479091fd0abSLuigi Rizzo  * increment cur and decrement avail.
1480091fd0abSLuigi Rizzo  * XXX to be verified
1481091fd0abSLuigi Rizzo  */
1482091fd0abSLuigi Rizzo static void
1483091fd0abSLuigi Rizzo netmap_sw_to_nic(struct netmap_adapter *na)
1484091fd0abSLuigi Rizzo {
1485091fd0abSLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
1486091fd0abSLuigi Rizzo 	struct netmap_kring *k1 = &na->tx_rings[0];
1487ce3ee1e7SLuigi Rizzo 	u_int i, howmany, src_lim, dst_lim;
1488ce3ee1e7SLuigi Rizzo 
1489ce3ee1e7SLuigi Rizzo 	/* XXX we should also check that the carrier is on */
1490ce3ee1e7SLuigi Rizzo 	if (kring->nkr_stopped)
1491ce3ee1e7SLuigi Rizzo 		return;
1492ce3ee1e7SLuigi Rizzo 
1493ce3ee1e7SLuigi Rizzo 	mtx_lock(&kring->q_lock);
1494ce3ee1e7SLuigi Rizzo 
1495ce3ee1e7SLuigi Rizzo 	if (kring->nkr_stopped)
1496ce3ee1e7SLuigi Rizzo 		goto out;
1497091fd0abSLuigi Rizzo 
1498091fd0abSLuigi Rizzo 	howmany = kring->nr_hwavail;	/* XXX otherwise cur - reserved - nr_hwcur */
1499091fd0abSLuigi Rizzo 
1500ce3ee1e7SLuigi Rizzo 	src_lim = kring->nkr_num_slots - 1;
1501091fd0abSLuigi Rizzo 	for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
1502091fd0abSLuigi Rizzo 		ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail);
1503ce3ee1e7SLuigi Rizzo 		dst_lim = k1->nkr_num_slots - 1;
1504091fd0abSLuigi Rizzo 		while (howmany > 0 && k1->ring->avail > 0) {
1505091fd0abSLuigi Rizzo 			struct netmap_slot *src, *dst, tmp;
1506091fd0abSLuigi Rizzo 			src = &kring->ring->slot[kring->nr_hwcur];
1507091fd0abSLuigi Rizzo 			dst = &k1->ring->slot[k1->ring->cur];
1508091fd0abSLuigi Rizzo 			tmp = *src;
1509091fd0abSLuigi Rizzo 			src->buf_idx = dst->buf_idx;
1510091fd0abSLuigi Rizzo 			src->flags = NS_BUF_CHANGED;
1511091fd0abSLuigi Rizzo 
1512091fd0abSLuigi Rizzo 			dst->buf_idx = tmp.buf_idx;
1513091fd0abSLuigi Rizzo 			dst->len = tmp.len;
1514091fd0abSLuigi Rizzo 			dst->flags = NS_BUF_CHANGED;
1515091fd0abSLuigi Rizzo 			ND("out len %d buf %d from %d to %d",
1516091fd0abSLuigi Rizzo 				dst->len, dst->buf_idx,
1517091fd0abSLuigi Rizzo 				kring->nr_hwcur, k1->ring->cur);
1518091fd0abSLuigi Rizzo 
1519ce3ee1e7SLuigi Rizzo 			kring->nr_hwcur = nm_next(kring->nr_hwcur, src_lim);
1520091fd0abSLuigi Rizzo 			howmany--;
1521091fd0abSLuigi Rizzo 			kring->nr_hwavail--;
1522ce3ee1e7SLuigi Rizzo 			k1->ring->cur = nm_next(k1->ring->cur, dst_lim);
1523091fd0abSLuigi Rizzo 			k1->ring->avail--;
1524091fd0abSLuigi Rizzo 		}
1525091fd0abSLuigi Rizzo 		kring->ring->cur = kring->nr_hwcur; // XXX
1526ce3ee1e7SLuigi Rizzo 		k1++; // XXX why?
1527091fd0abSLuigi Rizzo 	}
1528ce3ee1e7SLuigi Rizzo out:
1529ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kring->q_lock);
1530091fd0abSLuigi Rizzo }
1531091fd0abSLuigi Rizzo 
1532f18be576SLuigi Rizzo 
1533091fd0abSLuigi Rizzo /*
1534ce3ee1e7SLuigi Rizzo  * netmap_txsync_to_host() passes packets up. We are called from a
153502ad4083SLuigi Rizzo  * system call in user process context, and the only contention
153602ad4083SLuigi Rizzo  * can be among multiple user threads erroneously calling
1537091fd0abSLuigi Rizzo  * this routine concurrently.
153868b8534bSLuigi Rizzo  */
153968b8534bSLuigi Rizzo static void
1540ce3ee1e7SLuigi Rizzo netmap_txsync_to_host(struct netmap_adapter *na)
154168b8534bSLuigi Rizzo {
1542d76bf4ffSLuigi Rizzo 	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
154368b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
1544091fd0abSLuigi Rizzo 	u_int k, lim = kring->nkr_num_slots - 1;
1545ce3ee1e7SLuigi Rizzo 	struct mbq q = { NULL, NULL, 0 };
154668b8534bSLuigi Rizzo 
1547ce3ee1e7SLuigi Rizzo 	if (nm_kr_tryget(kring)) {
1548ce3ee1e7SLuigi Rizzo 		D("ring %p busy (user error)", kring);
154902ad4083SLuigi Rizzo 		return;
155002ad4083SLuigi Rizzo 	}
1551ce3ee1e7SLuigi Rizzo 	k = ring->cur;
1552ce3ee1e7SLuigi Rizzo 	if (k > lim) {
1553ce3ee1e7SLuigi Rizzo 		D("invalid ring index in stack TX kring %p", kring);
1554ce3ee1e7SLuigi Rizzo 		netmap_ring_reinit(kring);
1555ce3ee1e7SLuigi Rizzo 		nm_kr_put(kring);
1556ce3ee1e7SLuigi Rizzo 		return;
1557ce3ee1e7SLuigi Rizzo 	}
155868b8534bSLuigi Rizzo 
155968b8534bSLuigi Rizzo 	/* Take packets from hwcur to cur and pass them up.
156068b8534bSLuigi Rizzo 	 * In case of no buffers we give up. At the end of the loop,
156168b8534bSLuigi Rizzo 	 * the queue is drained in all cases.
156268b8534bSLuigi Rizzo 	 */
1563091fd0abSLuigi Rizzo 	netmap_grab_packets(kring, &q, 1);
156402ad4083SLuigi Rizzo 	kring->nr_hwcur = k;
156568b8534bSLuigi Rizzo 	kring->nr_hwavail = ring->avail = lim;
156668b8534bSLuigi Rizzo 
1567ce3ee1e7SLuigi Rizzo 	nm_kr_put(kring);
1568091fd0abSLuigi Rizzo 	netmap_send_up(na->ifp, q.head);
156968b8534bSLuigi Rizzo }
157068b8534bSLuigi Rizzo 
1571f18be576SLuigi Rizzo 
1572ce3ee1e7SLuigi Rizzo /*
1573ce3ee1e7SLuigi Rizzo  * This is the 'txsync' handler to send from a software ring to the
1574ce3ee1e7SLuigi Rizzo  * host stack.
1575ce3ee1e7SLuigi Rizzo  */
1576f18be576SLuigi Rizzo /* SWNA(ifp)->txrings[0] is always NA(ifp)->txrings[NA(ifp)->num_txrings] */
1577f18be576SLuigi Rizzo static int
1578ce3ee1e7SLuigi Rizzo netmap_bdg_to_host(struct ifnet *ifp, u_int ring_nr, int flags)
1579f18be576SLuigi Rizzo {
1580f18be576SLuigi Rizzo 	(void)ring_nr;
1581ce3ee1e7SLuigi Rizzo 	(void)flags;
1582ce3ee1e7SLuigi Rizzo 	if (netmap_verbose > 255)
1583ce3ee1e7SLuigi Rizzo 		RD(5, "sync to host %s ring %d", ifp->if_xname, ring_nr);
1584ce3ee1e7SLuigi Rizzo 	netmap_txsync_to_host(NA(ifp));
1585f18be576SLuigi Rizzo 	return 0;
1586f18be576SLuigi Rizzo }
1587f18be576SLuigi Rizzo 
1588f18be576SLuigi Rizzo 
158968b8534bSLuigi Rizzo /*
159002ad4083SLuigi Rizzo  * rxsync backend for packets coming from the host stack.
1591ce3ee1e7SLuigi Rizzo  * They have been put in the queue by netmap_transmit() so we
159202ad4083SLuigi Rizzo  * need to protect access to the kring using a lock.
159302ad4083SLuigi Rizzo  *
159468b8534bSLuigi Rizzo  * This routine also does the selrecord if called from the poll handler
159568b8534bSLuigi Rizzo  * (we know because td != NULL).
159601c7d25fSLuigi Rizzo  *
159701c7d25fSLuigi Rizzo  * NOTE: on linux, selrecord() is defined as a macro and uses pwait
159801c7d25fSLuigi Rizzo  *     as an additional hidden argument.
159968b8534bSLuigi Rizzo  */
160068b8534bSLuigi Rizzo static void
1601ce3ee1e7SLuigi Rizzo netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
160268b8534bSLuigi Rizzo {
1603d76bf4ffSLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
160468b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
160564ae02c3SLuigi Rizzo 	u_int j, n, lim = kring->nkr_num_slots;
160664ae02c3SLuigi Rizzo 	u_int k = ring->cur, resvd = ring->reserved;
160768b8534bSLuigi Rizzo 
160801c7d25fSLuigi Rizzo 	(void)pwait;	/* disable unused warnings */
1609ce3ee1e7SLuigi Rizzo 
1610ce3ee1e7SLuigi Rizzo 	if (kring->nkr_stopped) /* check a first time without lock */
1611ce3ee1e7SLuigi Rizzo 		return;
1612ce3ee1e7SLuigi Rizzo 
1613ce3ee1e7SLuigi Rizzo 	/* XXX as an optimization we could reuse na->core_lock */
1614ce3ee1e7SLuigi Rizzo 	mtx_lock(&kring->q_lock);
1615ce3ee1e7SLuigi Rizzo 
1616ce3ee1e7SLuigi Rizzo 	if (kring->nkr_stopped)  /* check again with lock held */
1617ce3ee1e7SLuigi Rizzo 		goto unlock_out;
1618ce3ee1e7SLuigi Rizzo 
161964ae02c3SLuigi Rizzo 	if (k >= lim) {
162064ae02c3SLuigi Rizzo 		netmap_ring_reinit(kring);
1621ce3ee1e7SLuigi Rizzo 		goto unlock_out;
162264ae02c3SLuigi Rizzo 	}
162364ae02c3SLuigi Rizzo 	/* new packets are already set in nr_hwavail */
162464ae02c3SLuigi Rizzo 	/* skip past packets that userspace has released */
162564ae02c3SLuigi Rizzo 	j = kring->nr_hwcur;
162664ae02c3SLuigi Rizzo 	if (resvd > 0) {
162764ae02c3SLuigi Rizzo 		if (resvd + ring->avail >= lim + 1) {
162864ae02c3SLuigi Rizzo 			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
162964ae02c3SLuigi Rizzo 			ring->reserved = resvd = 0; // XXX panic...
163064ae02c3SLuigi Rizzo 		}
163164ae02c3SLuigi Rizzo 		k = (k >= resvd) ? k - resvd : k + lim - resvd;
163264ae02c3SLuigi Rizzo         }
163364ae02c3SLuigi Rizzo 	if (j != k) {
163464ae02c3SLuigi Rizzo 		n = k >= j ? k - j : k + lim - j;
163564ae02c3SLuigi Rizzo 		kring->nr_hwavail -= n;
163602ad4083SLuigi Rizzo 		kring->nr_hwcur = k;
163764ae02c3SLuigi Rizzo 	}
163864ae02c3SLuigi Rizzo 	k = ring->avail = kring->nr_hwavail - resvd;
163902ad4083SLuigi Rizzo 	if (k == 0 && td)
164068b8534bSLuigi Rizzo 		selrecord(td, &kring->si);
164102ad4083SLuigi Rizzo 	if (k && (netmap_verbose & NM_VERB_HOST))
164202ad4083SLuigi Rizzo 		D("%d pkts from stack", k);
1643ce3ee1e7SLuigi Rizzo unlock_out:
1644ce3ee1e7SLuigi Rizzo 
1645ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kring->q_lock);
164668b8534bSLuigi Rizzo }
164768b8534bSLuigi Rizzo 
164868b8534bSLuigi Rizzo 
164968b8534bSLuigi Rizzo /*
1650ce3ee1e7SLuigi Rizzo  * MUST BE CALLED UNDER NMG_LOCK()
1651ce3ee1e7SLuigi Rizzo  *
165268b8534bSLuigi Rizzo  * get a refcounted reference to an interface.
1653ce3ee1e7SLuigi Rizzo  * This is always called in the execution of an ioctl().
1654ce3ee1e7SLuigi Rizzo  *
165568b8534bSLuigi Rizzo  * Return ENXIO if the interface does not exist, EINVAL if netmap
165668b8534bSLuigi Rizzo  * is not supported by the interface.
165768b8534bSLuigi Rizzo  * If successful, hold a reference.
1658f18be576SLuigi Rizzo  *
1659ce3ee1e7SLuigi Rizzo  * When the NIC is attached to a bridge, reference is managed
1660f18be576SLuigi Rizzo  * at na->na_bdg_refcount using ADD/DROP_BDG_REF() as well as
1661f18be576SLuigi Rizzo  * virtual ports.  Hence, on the final DROP_BDG_REF(), the NIC
1662f18be576SLuigi Rizzo  * is detached from the bridge, then ifp's refcount is dropped (this
1663f18be576SLuigi Rizzo  * is equivalent to that ifp is destroyed in case of virtual ports.
1664f18be576SLuigi Rizzo  *
1665f18be576SLuigi Rizzo  * This function uses if_rele() when we want to prevent the NIC from
1666f18be576SLuigi Rizzo  * being detached from the bridge in error handling.  But once refcount
1667f18be576SLuigi Rizzo  * is acquired by this function, it must be released using nm_if_rele().
166868b8534bSLuigi Rizzo  */
166968b8534bSLuigi Rizzo static int
1670ce3ee1e7SLuigi Rizzo get_ifp(struct nmreq *nmr, struct ifnet **ifp, int create)
167168b8534bSLuigi Rizzo {
1672f18be576SLuigi Rizzo 	const char *name = nmr->nr_name;
1673f18be576SLuigi Rizzo 	int namelen = strlen(name);
1674f196ce38SLuigi Rizzo 	struct ifnet *iter = NULL;
1675f18be576SLuigi Rizzo 	int no_prefix = 0;
1676f196ce38SLuigi Rizzo 
1677ce3ee1e7SLuigi Rizzo 	/* first try to see if this is a bridge port. */
1678f196ce38SLuigi Rizzo 	struct nm_bridge *b;
1679f18be576SLuigi Rizzo 	struct netmap_adapter *na;
1680ce3ee1e7SLuigi Rizzo 	int i, j, cand = -1, cand2 = -1;
1681ce3ee1e7SLuigi Rizzo 	int needed;
1682f196ce38SLuigi Rizzo 
1683ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1684ce3ee1e7SLuigi Rizzo 	*ifp = NULL;	/* default */
1685f18be576SLuigi Rizzo 	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
1686ce3ee1e7SLuigi Rizzo 		no_prefix = 1;	/* no VALE prefix */
1687ce3ee1e7SLuigi Rizzo 		goto no_bridge_port;
1688f18be576SLuigi Rizzo 	}
1689ce3ee1e7SLuigi Rizzo 
1690ce3ee1e7SLuigi Rizzo 	b = nm_find_bridge(name, create);
1691f196ce38SLuigi Rizzo 	if (b == NULL) {
1692f196ce38SLuigi Rizzo 		D("no bridges available for '%s'", name);
1693f196ce38SLuigi Rizzo 		return (ENXIO);
1694f196ce38SLuigi Rizzo 	}
1695ce3ee1e7SLuigi Rizzo 
1696ce3ee1e7SLuigi Rizzo 	/* Now we are sure that name starts with the bridge's name,
1697ce3ee1e7SLuigi Rizzo 	 * lookup the port in the bridge. We need to scan the entire
1698ce3ee1e7SLuigi Rizzo 	 * list. It is not important to hold a WLOCK on the bridge
1699ce3ee1e7SLuigi Rizzo 	 * during the search because NMG_LOCK already guarantees
1700ce3ee1e7SLuigi Rizzo 	 * that there are no other possible writers.
1701ce3ee1e7SLuigi Rizzo 	 */
1702ce3ee1e7SLuigi Rizzo 
1703f196ce38SLuigi Rizzo 	/* lookup in the local list of ports */
1704ce3ee1e7SLuigi Rizzo 	for (j = 0; j < b->bdg_active_ports; j++) {
1705ce3ee1e7SLuigi Rizzo 		i = b->bdg_port_index[j];
1706ce3ee1e7SLuigi Rizzo 		na = b->bdg_ports[i];
1707ce3ee1e7SLuigi Rizzo 		// KASSERT(na != NULL);
1708f18be576SLuigi Rizzo 		iter = na->ifp;
1709f18be576SLuigi Rizzo 		/* XXX make sure the name only contains one : */
1710f18be576SLuigi Rizzo 		if (!strcmp(iter->if_xname, name) /* virtual port */ ||
1711ce3ee1e7SLuigi Rizzo 		    (namelen > b->bdg_namelen && !strcmp(iter->if_xname,
1712ce3ee1e7SLuigi Rizzo 		    name + b->bdg_namelen + 1)) /* NIC */) {
1713f196ce38SLuigi Rizzo 			ADD_BDG_REF(iter);
1714ce3ee1e7SLuigi Rizzo 			ND("found existing if %s refs %d", name,
1715ce3ee1e7SLuigi Rizzo 				NA(iter)->na_bdg_refcount);
1716ce3ee1e7SLuigi Rizzo 			*ifp = iter;
1717ce3ee1e7SLuigi Rizzo 			/* we are done, this is surely netmap capable */
1718ce3ee1e7SLuigi Rizzo 			return 0;
1719f196ce38SLuigi Rizzo 		}
1720f196ce38SLuigi Rizzo 	}
1721ce3ee1e7SLuigi Rizzo 	/* not found, should we create it? */
1722ce3ee1e7SLuigi Rizzo 	if (!create)
1723ce3ee1e7SLuigi Rizzo 		return ENXIO;
1724ce3ee1e7SLuigi Rizzo 	/* yes we should, see if we have space to attach entries */
1725ce3ee1e7SLuigi Rizzo 	needed = 2; /* in some cases we only need 1 */
1726ce3ee1e7SLuigi Rizzo 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
1727ce3ee1e7SLuigi Rizzo 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
1728f196ce38SLuigi Rizzo 		return EINVAL;
1729f196ce38SLuigi Rizzo 	}
1730ce3ee1e7SLuigi Rizzo 	/* record the next two ports available, but do not allocate yet */
1731ce3ee1e7SLuigi Rizzo 	cand = b->bdg_port_index[b->bdg_active_ports];
1732ce3ee1e7SLuigi Rizzo 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
1733ce3ee1e7SLuigi Rizzo 	ND("+++ bridge %s port %s used %d avail %d %d",
1734ce3ee1e7SLuigi Rizzo 		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
1735ce3ee1e7SLuigi Rizzo 
1736f18be576SLuigi Rizzo 	/*
1737f18be576SLuigi Rizzo 	 * try see if there is a matching NIC with this name
1738f18be576SLuigi Rizzo 	 * (after the bridge's name)
1739f18be576SLuigi Rizzo 	 */
1740ce3ee1e7SLuigi Rizzo 	iter = ifunit_ref(name + b->bdg_namelen + 1);
1741f18be576SLuigi Rizzo 	if (!iter) { /* this is a virtual port */
1742f18be576SLuigi Rizzo 		/* Create a temporary NA with arguments, then
1743f18be576SLuigi Rizzo 		 * bdg_netmap_attach() will allocate the real one
1744f18be576SLuigi Rizzo 		 * and attach it to the ifp
1745f18be576SLuigi Rizzo 		 */
1746f18be576SLuigi Rizzo 		struct netmap_adapter tmp_na;
17475ab0d24dSLuigi Rizzo 		int error;
1748f18be576SLuigi Rizzo 
1749ce3ee1e7SLuigi Rizzo 		if (nmr->nr_cmd) {
1750ce3ee1e7SLuigi Rizzo 			/* nr_cmd must be 0 for a virtual port */
1751ce3ee1e7SLuigi Rizzo 			return EINVAL;
1752ce3ee1e7SLuigi Rizzo 		}
1753f18be576SLuigi Rizzo 		bzero(&tmp_na, sizeof(tmp_na));
1754f18be576SLuigi Rizzo 		/* bound checking */
1755f18be576SLuigi Rizzo 		tmp_na.num_tx_rings = nmr->nr_tx_rings;
1756ce3ee1e7SLuigi Rizzo 		nm_bound_var(&tmp_na.num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1757ce3ee1e7SLuigi Rizzo 		nmr->nr_tx_rings = tmp_na.num_tx_rings; // write back
1758f18be576SLuigi Rizzo 		tmp_na.num_rx_rings = nmr->nr_rx_rings;
1759ce3ee1e7SLuigi Rizzo 		nm_bound_var(&tmp_na.num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1760ce3ee1e7SLuigi Rizzo 		nmr->nr_rx_rings = tmp_na.num_rx_rings; // write back
1761ce3ee1e7SLuigi Rizzo 		nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1762ce3ee1e7SLuigi Rizzo 				1, NM_BDG_MAXSLOTS, NULL);
1763ce3ee1e7SLuigi Rizzo 		tmp_na.num_tx_desc = nmr->nr_tx_slots;
1764ce3ee1e7SLuigi Rizzo 		nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1765ce3ee1e7SLuigi Rizzo 				1, NM_BDG_MAXSLOTS, NULL);
1766ce3ee1e7SLuigi Rizzo 		tmp_na.num_rx_desc = nmr->nr_rx_slots;
1767f18be576SLuigi Rizzo 
1768ce3ee1e7SLuigi Rizzo 	 	/* create a struct ifnet for the new port.
1769ce3ee1e7SLuigi Rizzo 		 * need M_NOWAIT as we are under nma_lock
1770ce3ee1e7SLuigi Rizzo 		 */
1771f18be576SLuigi Rizzo 		iter = malloc(sizeof(*iter), M_DEVBUF, M_NOWAIT | M_ZERO);
1772f196ce38SLuigi Rizzo 		if (!iter)
1773ce3ee1e7SLuigi Rizzo 			return ENOMEM;
1774ce3ee1e7SLuigi Rizzo 
1775f196ce38SLuigi Rizzo 		strcpy(iter->if_xname, name);
1776f18be576SLuigi Rizzo 		tmp_na.ifp = iter;
1777f18be576SLuigi Rizzo 		/* bdg_netmap_attach creates a struct netmap_adapter */
17785ab0d24dSLuigi Rizzo 		error = bdg_netmap_attach(&tmp_na);
17795ab0d24dSLuigi Rizzo 		if (error) {
17805ab0d24dSLuigi Rizzo 			D("error %d", error);
17815ab0d24dSLuigi Rizzo 			free(iter, M_DEVBUF);
17825ab0d24dSLuigi Rizzo 			return error;
17835ab0d24dSLuigi Rizzo 		}
1784ce3ee1e7SLuigi Rizzo 		cand2 = -1;	/* only need one port */
1785f18be576SLuigi Rizzo 	} else if (NETMAP_CAPABLE(iter)) { /* this is a NIC */
1786ce3ee1e7SLuigi Rizzo 		/* make sure the NIC is not already in use */
1787ce3ee1e7SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(iter)) {
1788ce3ee1e7SLuigi Rizzo 			D("NIC %s busy, cannot attach to bridge",
1789ce3ee1e7SLuigi Rizzo 				iter->if_xname);
1790f18be576SLuigi Rizzo 			if_rele(iter); /* don't detach from bridge */
1791ce3ee1e7SLuigi Rizzo 			return EINVAL;
1792f18be576SLuigi Rizzo 		}
1793ce3ee1e7SLuigi Rizzo 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
1794ce3ee1e7SLuigi Rizzo 			cand2 = -1; /* only need one port */
1795ce3ee1e7SLuigi Rizzo 	} else { /* not a netmap-capable NIC */
1796ce3ee1e7SLuigi Rizzo 		if_rele(iter); /* don't detach from bridge */
1797ce3ee1e7SLuigi Rizzo 		return EINVAL;
1798ce3ee1e7SLuigi Rizzo 	}
1799ce3ee1e7SLuigi Rizzo 	na = NA(iter);
1800ce3ee1e7SLuigi Rizzo 
1801ce3ee1e7SLuigi Rizzo 	BDG_WLOCK(b);
1802ce3ee1e7SLuigi Rizzo 	na->bdg_port = cand;
1803ce3ee1e7SLuigi Rizzo 	ND("NIC  %p to bridge port %d", NA(iter), cand);
1804ce3ee1e7SLuigi Rizzo 	/* bind the port to the bridge (virtual ports are not active) */
1805ce3ee1e7SLuigi Rizzo 	b->bdg_ports[cand] = na;
1806ce3ee1e7SLuigi Rizzo 	na->na_bdg = b;
1807ce3ee1e7SLuigi Rizzo 	b->bdg_active_ports++;
1808ce3ee1e7SLuigi Rizzo 	if (cand2 >= 0) {
1809ce3ee1e7SLuigi Rizzo 		/* also bind the host stack to the bridge */
1810ce3ee1e7SLuigi Rizzo 		b->bdg_ports[cand2] = SWNA(iter);
1811f18be576SLuigi Rizzo 		SWNA(iter)->bdg_port = cand2;
1812f18be576SLuigi Rizzo 		SWNA(iter)->na_bdg = b;
1813ce3ee1e7SLuigi Rizzo 		b->bdg_active_ports++;
1814ce3ee1e7SLuigi Rizzo 		ND("host %p to bridge port %d", SWNA(iter), cand2);
1815f18be576SLuigi Rizzo 	}
1816ce3ee1e7SLuigi Rizzo 	ADD_BDG_REF(iter);	// XXX one or two ?
1817ce3ee1e7SLuigi Rizzo 	ND("if %s refs %d", name, NA(iter)->na_bdg_refcount);
1818f18be576SLuigi Rizzo 	BDG_WUNLOCK(b);
1819ce3ee1e7SLuigi Rizzo 	*ifp = iter;
1820ce3ee1e7SLuigi Rizzo 	return 0;
1821ce3ee1e7SLuigi Rizzo 
1822ce3ee1e7SLuigi Rizzo no_bridge_port:
1823f196ce38SLuigi Rizzo 	*ifp = iter;
1824f196ce38SLuigi Rizzo 	if (! *ifp)
182568b8534bSLuigi Rizzo 		*ifp = ifunit_ref(name);
182668b8534bSLuigi Rizzo 	if (*ifp == NULL)
182768b8534bSLuigi Rizzo 		return (ENXIO);
1828ce3ee1e7SLuigi Rizzo 
1829f18be576SLuigi Rizzo 	if (NETMAP_CAPABLE(*ifp)) {
1830f18be576SLuigi Rizzo 		/* Users cannot use the NIC attached to a bridge directly */
1831f18be576SLuigi Rizzo 		if (no_prefix && NETMAP_OWNED_BY_KERN(*ifp)) {
1832f18be576SLuigi Rizzo 			if_rele(*ifp); /* don't detach from bridge */
1833f18be576SLuigi Rizzo 			return EINVAL;
1834f18be576SLuigi Rizzo 		} else
183568b8534bSLuigi Rizzo 			return 0;	/* valid pointer, we hold the refcount */
1836f18be576SLuigi Rizzo 	}
1837f196ce38SLuigi Rizzo 	nm_if_rele(*ifp);
183868b8534bSLuigi Rizzo 	return EINVAL;	// not NETMAP capable
183968b8534bSLuigi Rizzo }
184068b8534bSLuigi Rizzo 
184168b8534bSLuigi Rizzo 
184268b8534bSLuigi Rizzo /*
184368b8534bSLuigi Rizzo  * Error routine called when txsync/rxsync detects an error.
184468b8534bSLuigi Rizzo  * Can't do much more than resetting cur = hwcur, avail = hwavail.
184568b8534bSLuigi Rizzo  * Return 1 on reinit.
1846506cc70cSLuigi Rizzo  *
1847506cc70cSLuigi Rizzo  * This routine is only called by the upper half of the kernel.
1848506cc70cSLuigi Rizzo  * It only reads hwcur (which is changed only by the upper half, too)
1849506cc70cSLuigi Rizzo  * and hwavail (which may be changed by the lower half, but only on
1850506cc70cSLuigi Rizzo  * a tx ring and only to increase it, so any error will be recovered
1851506cc70cSLuigi Rizzo  * on the next call). For the above, we don't strictly need to call
1852506cc70cSLuigi Rizzo  * it under lock.
185368b8534bSLuigi Rizzo  */
185468b8534bSLuigi Rizzo int
185568b8534bSLuigi Rizzo netmap_ring_reinit(struct netmap_kring *kring)
185668b8534bSLuigi Rizzo {
185768b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
185868b8534bSLuigi Rizzo 	u_int i, lim = kring->nkr_num_slots - 1;
185968b8534bSLuigi Rizzo 	int errors = 0;
186068b8534bSLuigi Rizzo 
1861ce3ee1e7SLuigi Rizzo 	// XXX KASSERT nm_kr_tryget
18628241616dSLuigi Rizzo 	RD(10, "called for %s", kring->na->ifp->if_xname);
186368b8534bSLuigi Rizzo 	if (ring->cur > lim)
186468b8534bSLuigi Rizzo 		errors++;
186568b8534bSLuigi Rizzo 	for (i = 0; i <= lim; i++) {
186668b8534bSLuigi Rizzo 		u_int idx = ring->slot[i].buf_idx;
186768b8534bSLuigi Rizzo 		u_int len = ring->slot[i].len;
186868b8534bSLuigi Rizzo 		if (idx < 2 || idx >= netmap_total_buffers) {
186968b8534bSLuigi Rizzo 			if (!errors++)
187068b8534bSLuigi Rizzo 				D("bad buffer at slot %d idx %d len %d ", i, idx, len);
187168b8534bSLuigi Rizzo 			ring->slot[i].buf_idx = 0;
187268b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
1873ce3ee1e7SLuigi Rizzo 		} else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) {
187468b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
187568b8534bSLuigi Rizzo 			if (!errors++)
187668b8534bSLuigi Rizzo 				D("bad len %d at slot %d idx %d",
187768b8534bSLuigi Rizzo 					len, i, idx);
187868b8534bSLuigi Rizzo 		}
187968b8534bSLuigi Rizzo 	}
188068b8534bSLuigi Rizzo 	if (errors) {
188168b8534bSLuigi Rizzo 		int pos = kring - kring->na->tx_rings;
1882d76bf4ffSLuigi Rizzo 		int n = kring->na->num_tx_rings + 1;
188368b8534bSLuigi Rizzo 
18848241616dSLuigi Rizzo 		RD(10, "total %d errors", errors);
188568b8534bSLuigi Rizzo 		errors++;
18868241616dSLuigi Rizzo 		RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
188768b8534bSLuigi Rizzo 			kring->na->ifp->if_xname,
188868b8534bSLuigi Rizzo 			pos < n ?  "TX" : "RX", pos < n ? pos : pos - n,
188968b8534bSLuigi Rizzo 			ring->cur, kring->nr_hwcur,
189068b8534bSLuigi Rizzo 			ring->avail, kring->nr_hwavail);
189168b8534bSLuigi Rizzo 		ring->cur = kring->nr_hwcur;
189268b8534bSLuigi Rizzo 		ring->avail = kring->nr_hwavail;
189368b8534bSLuigi Rizzo 	}
189468b8534bSLuigi Rizzo 	return (errors ? 1 : 0);
189568b8534bSLuigi Rizzo }
189668b8534bSLuigi Rizzo 
189768b8534bSLuigi Rizzo 
189868b8534bSLuigi Rizzo /*
189968b8534bSLuigi Rizzo  * Set the ring ID. For devices with a single queue, a request
190068b8534bSLuigi Rizzo  * for all rings is the same as a single ring.
190168b8534bSLuigi Rizzo  */
190268b8534bSLuigi Rizzo static int
190368b8534bSLuigi Rizzo netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
190468b8534bSLuigi Rizzo {
190568b8534bSLuigi Rizzo 	struct ifnet *ifp = priv->np_ifp;
190668b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
190768b8534bSLuigi Rizzo 	u_int i = ringid & NETMAP_RING_MASK;
190864ae02c3SLuigi Rizzo 	/* initially (np_qfirst == np_qlast) we don't want to lock */
1909ce3ee1e7SLuigi Rizzo 	u_int lim = na->num_rx_rings;
191068b8534bSLuigi Rizzo 
1911d76bf4ffSLuigi Rizzo 	if (na->num_tx_rings > lim)
1912d76bf4ffSLuigi Rizzo 		lim = na->num_tx_rings;
191364ae02c3SLuigi Rizzo 	if ( (ringid & NETMAP_HW_RING) && i >= lim) {
191468b8534bSLuigi Rizzo 		D("invalid ring id %d", i);
191568b8534bSLuigi Rizzo 		return (EINVAL);
191668b8534bSLuigi Rizzo 	}
191768b8534bSLuigi Rizzo 	priv->np_ringid = ringid;
191868b8534bSLuigi Rizzo 	if (ringid & NETMAP_SW_RING) {
191964ae02c3SLuigi Rizzo 		priv->np_qfirst = NETMAP_SW_RING;
192064ae02c3SLuigi Rizzo 		priv->np_qlast = 0;
192168b8534bSLuigi Rizzo 	} else if (ringid & NETMAP_HW_RING) {
192268b8534bSLuigi Rizzo 		priv->np_qfirst = i;
192368b8534bSLuigi Rizzo 		priv->np_qlast = i + 1;
192468b8534bSLuigi Rizzo 	} else {
192568b8534bSLuigi Rizzo 		priv->np_qfirst = 0;
192664ae02c3SLuigi Rizzo 		priv->np_qlast = NETMAP_HW_RING ;
192768b8534bSLuigi Rizzo 	}
192868b8534bSLuigi Rizzo 	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
1929ae10d1afSLuigi Rizzo     if (netmap_verbose) {
193068b8534bSLuigi Rizzo 	if (ringid & NETMAP_SW_RING)
193168b8534bSLuigi Rizzo 		D("ringid %s set to SW RING", ifp->if_xname);
193268b8534bSLuigi Rizzo 	else if (ringid & NETMAP_HW_RING)
193368b8534bSLuigi Rizzo 		D("ringid %s set to HW RING %d", ifp->if_xname,
193468b8534bSLuigi Rizzo 			priv->np_qfirst);
193568b8534bSLuigi Rizzo 	else
193664ae02c3SLuigi Rizzo 		D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim);
1937ae10d1afSLuigi Rizzo     }
193868b8534bSLuigi Rizzo 	return 0;
193968b8534bSLuigi Rizzo }
194068b8534bSLuigi Rizzo 
1941f18be576SLuigi Rizzo 
1942f18be576SLuigi Rizzo /*
1943f18be576SLuigi Rizzo  * possibly move the interface to netmap-mode.
1944f18be576SLuigi Rizzo  * If success it returns a pointer to netmap_if, otherwise NULL.
1945ce3ee1e7SLuigi Rizzo  * This must be called with NMG_LOCK held.
1946f18be576SLuigi Rizzo  */
1947f18be576SLuigi Rizzo static struct netmap_if *
1948f18be576SLuigi Rizzo netmap_do_regif(struct netmap_priv_d *priv, struct ifnet *ifp,
1949f18be576SLuigi Rizzo 	uint16_t ringid, int *err)
1950f18be576SLuigi Rizzo {
1951f18be576SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
1952f18be576SLuigi Rizzo 	struct netmap_if *nifp = NULL;
1953ce3ee1e7SLuigi Rizzo 	int error, need_mem;
1954f18be576SLuigi Rizzo 
1955ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1956f18be576SLuigi Rizzo 	/* ring configuration may have changed, fetch from the card */
1957f18be576SLuigi Rizzo 	netmap_update_config(na);
1958f18be576SLuigi Rizzo 	priv->np_ifp = ifp;     /* store the reference */
1959f18be576SLuigi Rizzo 	error = netmap_set_ringid(priv, ringid);
1960f18be576SLuigi Rizzo 	if (error)
1961f18be576SLuigi Rizzo 		goto out;
1962ce3ee1e7SLuigi Rizzo 	/* ensure allocators are ready */
1963ce3ee1e7SLuigi Rizzo 	need_mem = !netmap_have_memory_locked(priv);
1964ce3ee1e7SLuigi Rizzo 	if (need_mem) {
1965ce3ee1e7SLuigi Rizzo 		error = netmap_get_memory_locked(priv);
1966ce3ee1e7SLuigi Rizzo 		ND("get_memory returned %d", error);
1967ce3ee1e7SLuigi Rizzo 		if (error)
1968ce3ee1e7SLuigi Rizzo 			goto out;
1969ce3ee1e7SLuigi Rizzo 	}
1970f18be576SLuigi Rizzo 	nifp = netmap_if_new(ifp->if_xname, na);
1971f18be576SLuigi Rizzo 	if (nifp == NULL) { /* allocation failed */
1972ce3ee1e7SLuigi Rizzo 		/* we should drop the allocator, but only
1973ce3ee1e7SLuigi Rizzo 		 * if we were the ones who grabbed it
1974ce3ee1e7SLuigi Rizzo 		 */
1975ce3ee1e7SLuigi Rizzo 		if (need_mem)
1976ce3ee1e7SLuigi Rizzo 			netmap_drop_memory_locked(priv);
1977f18be576SLuigi Rizzo 		error = ENOMEM;
1978ce3ee1e7SLuigi Rizzo 		goto out;
1979ce3ee1e7SLuigi Rizzo 	}
1980ce3ee1e7SLuigi Rizzo 	na->refcount++;
1981ce3ee1e7SLuigi Rizzo 	if (ifp->if_capenable & IFCAP_NETMAP) {
1982f18be576SLuigi Rizzo 		/* was already set */
1983f18be576SLuigi Rizzo 	} else {
1984ce3ee1e7SLuigi Rizzo 		u_int i;
1985f18be576SLuigi Rizzo 		/* Otherwise set the card in netmap mode
1986f18be576SLuigi Rizzo 		 * and make it use the shared buffers.
1987ce3ee1e7SLuigi Rizzo 		 *
1988ce3ee1e7SLuigi Rizzo 		 * If the interface is attached to a bridge, lock it.
1989f18be576SLuigi Rizzo 		 */
1990ce3ee1e7SLuigi Rizzo 		if (NETMAP_OWNED_BY_KERN(ifp))
1991ce3ee1e7SLuigi Rizzo 			BDG_WLOCK(NA(ifp)->na_bdg);
1992f18be576SLuigi Rizzo 		for (i = 0 ; i < na->num_tx_rings + 1; i++)
1993f18be576SLuigi Rizzo 			mtx_init(&na->tx_rings[i].q_lock, "nm_txq_lock",
1994ce3ee1e7SLuigi Rizzo 			    NULL, MTX_DEF);
1995f18be576SLuigi Rizzo 		for (i = 0 ; i < na->num_rx_rings + 1; i++) {
1996f18be576SLuigi Rizzo 			mtx_init(&na->rx_rings[i].q_lock, "nm_rxq_lock",
1997ce3ee1e7SLuigi Rizzo 			    NULL, MTX_DEF);
1998f18be576SLuigi Rizzo 		}
1999f18be576SLuigi Rizzo 		if (nma_is_hw(na)) {
2000f18be576SLuigi Rizzo 			SWNA(ifp)->tx_rings = &na->tx_rings[na->num_tx_rings];
2001f18be576SLuigi Rizzo 			SWNA(ifp)->rx_rings = &na->rx_rings[na->num_rx_rings];
2002f18be576SLuigi Rizzo 		}
2003ce3ee1e7SLuigi Rizzo 		/*
2004ce3ee1e7SLuigi Rizzo 		 * do not core lock because the race is harmless here,
2005ce3ee1e7SLuigi Rizzo 		 * there cannot be any traffic to netmap_transmit()
2006ce3ee1e7SLuigi Rizzo 		 */
2007f18be576SLuigi Rizzo 		error = na->nm_register(ifp, 1); /* mode on */
2008ce3ee1e7SLuigi Rizzo 		// XXX do we need to nm_alloc_bdgfwd() in all cases ?
2009f18be576SLuigi Rizzo 		if (!error)
2010f18be576SLuigi Rizzo 			error = nm_alloc_bdgfwd(na);
2011f18be576SLuigi Rizzo 		if (error) {
2012ce3ee1e7SLuigi Rizzo 			netmap_do_unregif(priv, nifp);
2013f18be576SLuigi Rizzo 			nifp = NULL;
2014f18be576SLuigi Rizzo 		}
2015ce3ee1e7SLuigi Rizzo 		if (NETMAP_OWNED_BY_KERN(ifp))
2016ce3ee1e7SLuigi Rizzo 			BDG_WUNLOCK(NA(ifp)->na_bdg);
2017f18be576SLuigi Rizzo 
2018f18be576SLuigi Rizzo 	}
2019f18be576SLuigi Rizzo out:
2020f18be576SLuigi Rizzo 	*err = error;
2021ce3ee1e7SLuigi Rizzo 	if (nifp != NULL) {
2022ce3ee1e7SLuigi Rizzo 		/*
2023ce3ee1e7SLuigi Rizzo 		 * advertise that the interface is ready bt setting ni_nifp.
2024ce3ee1e7SLuigi Rizzo 		 * The barrier is needed because readers (poll and *SYNC)
2025ce3ee1e7SLuigi Rizzo 		 * check for priv->np_nifp != NULL without locking
2026ce3ee1e7SLuigi Rizzo 		 */
2027ce3ee1e7SLuigi Rizzo 		wmb(); /* make sure previous writes are visible to all CPUs */
2028ce3ee1e7SLuigi Rizzo 		priv->np_nifp = nifp;
2029ce3ee1e7SLuigi Rizzo 	}
2030f18be576SLuigi Rizzo 	return nifp;
2031f18be576SLuigi Rizzo }
2032f18be576SLuigi Rizzo 
2033f18be576SLuigi Rizzo /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
2034f18be576SLuigi Rizzo static int
2035ce3ee1e7SLuigi Rizzo nm_bdg_attach(struct nmreq *nmr)
2036f18be576SLuigi Rizzo {
2037f18be576SLuigi Rizzo 	struct ifnet *ifp;
2038f18be576SLuigi Rizzo 	struct netmap_if *nifp;
2039f18be576SLuigi Rizzo 	struct netmap_priv_d *npriv;
2040f18be576SLuigi Rizzo 	int error;
2041f18be576SLuigi Rizzo 
2042f18be576SLuigi Rizzo 	npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
2043f18be576SLuigi Rizzo 	if (npriv == NULL)
2044f18be576SLuigi Rizzo 		return ENOMEM;
2045ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
2046ce3ee1e7SLuigi Rizzo 	error = get_ifp(nmr, &ifp, 1 /* create if not exists */);
2047ce3ee1e7SLuigi Rizzo 	if (error) /* no device, or another bridge or user owns the device */
2048ce3ee1e7SLuigi Rizzo 		goto unlock_exit;
2049ce3ee1e7SLuigi Rizzo 	/* get_ifp() sets na_bdg if this is a physical interface
2050ce3ee1e7SLuigi Rizzo 	 * that we can attach to a switch.
2051ce3ee1e7SLuigi Rizzo 	 */
2052ce3ee1e7SLuigi Rizzo 	if (!NETMAP_OWNED_BY_KERN(ifp)) {
2053ce3ee1e7SLuigi Rizzo 		/* got reference to a virtual port or direct access to a NIC.
2054ce3ee1e7SLuigi Rizzo 		 * perhaps specified no bridge prefix or wrong NIC name
2055ce3ee1e7SLuigi Rizzo 		 */
2056ce3ee1e7SLuigi Rizzo 		error = EINVAL;
2057ce3ee1e7SLuigi Rizzo 		goto unref_exit;
2058ce3ee1e7SLuigi Rizzo 	}
2059ce3ee1e7SLuigi Rizzo 
2060ce3ee1e7SLuigi Rizzo 	if (NA(ifp)->refcount > 0) { /* already registered */
2061ce3ee1e7SLuigi Rizzo 		error = EBUSY;
2062ce3ee1e7SLuigi Rizzo 		DROP_BDG_REF(ifp);
2063ce3ee1e7SLuigi Rizzo 		goto unlock_exit;
2064ce3ee1e7SLuigi Rizzo 	}
2065ce3ee1e7SLuigi Rizzo 
2066ce3ee1e7SLuigi Rizzo 	nifp = netmap_do_regif(npriv, ifp, nmr->nr_ringid, &error);
2067ce3ee1e7SLuigi Rizzo 	if (!nifp) {
2068ce3ee1e7SLuigi Rizzo 		goto unref_exit;
2069ce3ee1e7SLuigi Rizzo 	}
2070ce3ee1e7SLuigi Rizzo 
2071ce3ee1e7SLuigi Rizzo 	NA(ifp)->na_kpriv = npriv;
2072ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
2073ce3ee1e7SLuigi Rizzo 	ND("registered %s to netmap-mode", ifp->if_xname);
2074ce3ee1e7SLuigi Rizzo 	return 0;
2075ce3ee1e7SLuigi Rizzo 
2076ce3ee1e7SLuigi Rizzo unref_exit:
2077ce3ee1e7SLuigi Rizzo 	nm_if_rele(ifp);
2078ce3ee1e7SLuigi Rizzo unlock_exit:
2079ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
2080f18be576SLuigi Rizzo 	bzero(npriv, sizeof(*npriv));
2081f18be576SLuigi Rizzo 	free(npriv, M_DEVBUF);
2082f18be576SLuigi Rizzo 	return error;
2083f18be576SLuigi Rizzo }
2084f18be576SLuigi Rizzo 
2085ce3ee1e7SLuigi Rizzo static int
2086ce3ee1e7SLuigi Rizzo nm_bdg_detach(struct nmreq *nmr)
2087ce3ee1e7SLuigi Rizzo {
2088ce3ee1e7SLuigi Rizzo 	struct ifnet *ifp;
2089ce3ee1e7SLuigi Rizzo 	int error;
2090ce3ee1e7SLuigi Rizzo 	int last_instance;
2091ce3ee1e7SLuigi Rizzo 
2092ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
2093ce3ee1e7SLuigi Rizzo 	error = get_ifp(nmr, &ifp, 0 /* don't create */);
2094f18be576SLuigi Rizzo 	if (error) { /* no device, or another bridge or user owns the device */
2095ce3ee1e7SLuigi Rizzo 		goto unlock_exit;
2096ce3ee1e7SLuigi Rizzo 	}
2097ce3ee1e7SLuigi Rizzo 	/* XXX do we need to check this ? */
2098ce3ee1e7SLuigi Rizzo 	if (!NETMAP_OWNED_BY_KERN(ifp)) {
2099f18be576SLuigi Rizzo 		/* got reference to a virtual port or direct access to a NIC.
2100f18be576SLuigi Rizzo 		 * perhaps specified no bridge's prefix or wrong NIC's name
2101f18be576SLuigi Rizzo 		 */
2102f18be576SLuigi Rizzo 		error = EINVAL;
2103ce3ee1e7SLuigi Rizzo 		goto unref_exit;
2104f18be576SLuigi Rizzo 	}
2105f18be576SLuigi Rizzo 
2106f18be576SLuigi Rizzo 	if (NA(ifp)->refcount == 0) { /* not registered */
2107f18be576SLuigi Rizzo 		error = EINVAL;
2108f18be576SLuigi Rizzo 		goto unref_exit;
2109f18be576SLuigi Rizzo 	}
2110f18be576SLuigi Rizzo 
2111ce3ee1e7SLuigi Rizzo 	DROP_BDG_REF(ifp); /* the one from get_ifp */
2112ce3ee1e7SLuigi Rizzo 	last_instance = netmap_dtor_locked(NA(ifp)->na_kpriv); /* unregister */
2113ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
2114ce3ee1e7SLuigi Rizzo 	if (!last_instance) {
2115ce3ee1e7SLuigi Rizzo 		D("--- error, trying to detach an entry with active mmaps");
2116f18be576SLuigi Rizzo 		error = EINVAL;
2117ce3ee1e7SLuigi Rizzo 	} else {
2118ce3ee1e7SLuigi Rizzo 		struct netmap_priv_d *npriv = NA(ifp)->na_kpriv;
2119ce3ee1e7SLuigi Rizzo 		NA(ifp)->na_kpriv = NULL;
2120ce3ee1e7SLuigi Rizzo 
2121ce3ee1e7SLuigi Rizzo 		bzero(npriv, sizeof(*npriv));
2122ce3ee1e7SLuigi Rizzo 		free(npriv, M_DEVBUF);
2123f18be576SLuigi Rizzo 	}
2124ce3ee1e7SLuigi Rizzo 	return error;
2125f18be576SLuigi Rizzo 
2126ce3ee1e7SLuigi Rizzo unref_exit:
2127ce3ee1e7SLuigi Rizzo 	nm_if_rele(ifp);
2128ce3ee1e7SLuigi Rizzo unlock_exit:
2129ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
2130ce3ee1e7SLuigi Rizzo 	return error;
2131f18be576SLuigi Rizzo }
2132f18be576SLuigi Rizzo 
2133f18be576SLuigi Rizzo 
2134f18be576SLuigi Rizzo /* Initialize necessary fields of sw adapter located in right after hw's
2135f18be576SLuigi Rizzo  * one.  sw adapter attaches a pair of sw rings of the netmap-mode NIC.
2136f18be576SLuigi Rizzo  * It is always activated and deactivated at the same tie with the hw's one.
2137f18be576SLuigi Rizzo  * Thus we don't need refcounting on the sw adapter.
2138f18be576SLuigi Rizzo  * Regardless of NIC's feature we use separate lock so that anybody can lock
2139f18be576SLuigi Rizzo  * me independently from the hw adapter.
2140f18be576SLuigi Rizzo  * Make sure nm_register is NULL to be handled as FALSE in nma_is_hw
2141f18be576SLuigi Rizzo  */
2142f18be576SLuigi Rizzo static void
2143f18be576SLuigi Rizzo netmap_attach_sw(struct ifnet *ifp)
2144f18be576SLuigi Rizzo {
2145f18be576SLuigi Rizzo 	struct netmap_adapter *hw_na = NA(ifp);
2146f18be576SLuigi Rizzo 	struct netmap_adapter *na = SWNA(ifp);
2147f18be576SLuigi Rizzo 
2148f18be576SLuigi Rizzo 	na->ifp = ifp;
2149f18be576SLuigi Rizzo 	na->num_rx_rings = na->num_tx_rings = 1;
2150f18be576SLuigi Rizzo 	na->num_tx_desc = hw_na->num_tx_desc;
2151f18be576SLuigi Rizzo 	na->num_rx_desc = hw_na->num_rx_desc;
2152f18be576SLuigi Rizzo 	na->nm_txsync = netmap_bdg_to_host;
2153ce3ee1e7SLuigi Rizzo 	/* we use the same memory allocator as the
2154ce3ee1e7SLuigi Rizzo 	 * the hw adapter */
2155ce3ee1e7SLuigi Rizzo 	na->nm_mem = hw_na->nm_mem;
2156f18be576SLuigi Rizzo }
2157f18be576SLuigi Rizzo 
2158f18be576SLuigi Rizzo 
2159ce3ee1e7SLuigi Rizzo /* exported to kernel callers, e.g. OVS ?
2160ce3ee1e7SLuigi Rizzo  * Entry point.
2161ce3ee1e7SLuigi Rizzo  * Called without NMG_LOCK.
2162ce3ee1e7SLuigi Rizzo  */
2163f18be576SLuigi Rizzo int
2164f18be576SLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
2165f18be576SLuigi Rizzo {
2166f18be576SLuigi Rizzo 	struct nm_bridge *b;
2167f18be576SLuigi Rizzo 	struct netmap_adapter *na;
2168f18be576SLuigi Rizzo 	struct ifnet *iter;
2169f18be576SLuigi Rizzo 	char *name = nmr->nr_name;
2170f18be576SLuigi Rizzo 	int cmd = nmr->nr_cmd, namelen = strlen(name);
2171f18be576SLuigi Rizzo 	int error = 0, i, j;
2172f18be576SLuigi Rizzo 
2173f18be576SLuigi Rizzo 	switch (cmd) {
2174f18be576SLuigi Rizzo 	case NETMAP_BDG_ATTACH:
2175ce3ee1e7SLuigi Rizzo 		error = nm_bdg_attach(nmr);
2176ce3ee1e7SLuigi Rizzo 		break;
2177ce3ee1e7SLuigi Rizzo 
2178f18be576SLuigi Rizzo 	case NETMAP_BDG_DETACH:
2179ce3ee1e7SLuigi Rizzo 		error = nm_bdg_detach(nmr);
2180f18be576SLuigi Rizzo 		break;
2181f18be576SLuigi Rizzo 
2182f18be576SLuigi Rizzo 	case NETMAP_BDG_LIST:
2183f18be576SLuigi Rizzo 		/* this is used to enumerate bridges and ports */
2184f18be576SLuigi Rizzo 		if (namelen) { /* look up indexes of bridge and port */
2185f18be576SLuigi Rizzo 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
2186f18be576SLuigi Rizzo 				error = EINVAL;
2187f18be576SLuigi Rizzo 				break;
2188f18be576SLuigi Rizzo 			}
2189ce3ee1e7SLuigi Rizzo 			NMG_LOCK();
2190f18be576SLuigi Rizzo 			b = nm_find_bridge(name, 0 /* don't create */);
2191f18be576SLuigi Rizzo 			if (!b) {
2192f18be576SLuigi Rizzo 				error = ENOENT;
2193ce3ee1e7SLuigi Rizzo 				NMG_UNLOCK();
2194f18be576SLuigi Rizzo 				break;
2195f18be576SLuigi Rizzo 			}
2196f18be576SLuigi Rizzo 
2197f18be576SLuigi Rizzo 			error = ENOENT;
2198ce3ee1e7SLuigi Rizzo 			for (j = 0; j < b->bdg_active_ports; j++) {
2199ce3ee1e7SLuigi Rizzo 				i = b->bdg_port_index[j];
2200ce3ee1e7SLuigi Rizzo 				na = b->bdg_ports[i];
2201ce3ee1e7SLuigi Rizzo 				if (na == NULL) {
2202ce3ee1e7SLuigi Rizzo 					D("---AAAAAAAAARGH-------");
2203f18be576SLuigi Rizzo 					continue;
2204ce3ee1e7SLuigi Rizzo 				}
2205f18be576SLuigi Rizzo 				iter = na->ifp;
2206f18be576SLuigi Rizzo 				/* the former and the latter identify a
2207f18be576SLuigi Rizzo 				 * virtual port and a NIC, respectively
2208f18be576SLuigi Rizzo 				 */
2209f18be576SLuigi Rizzo 				if (!strcmp(iter->if_xname, name) ||
2210ce3ee1e7SLuigi Rizzo 				    (namelen > b->bdg_namelen &&
2211f18be576SLuigi Rizzo 				    !strcmp(iter->if_xname,
2212ce3ee1e7SLuigi Rizzo 				    name + b->bdg_namelen + 1))) {
2213f18be576SLuigi Rizzo 					/* bridge index */
2214f18be576SLuigi Rizzo 					nmr->nr_arg1 = b - nm_bridges;
2215f18be576SLuigi Rizzo 					nmr->nr_arg2 = i; /* port index */
2216f18be576SLuigi Rizzo 					error = 0;
2217f18be576SLuigi Rizzo 					break;
2218f18be576SLuigi Rizzo 				}
2219f18be576SLuigi Rizzo 			}
2220ce3ee1e7SLuigi Rizzo 			NMG_UNLOCK();
2221f18be576SLuigi Rizzo 		} else {
2222f18be576SLuigi Rizzo 			/* return the first non-empty entry starting from
2223f18be576SLuigi Rizzo 			 * bridge nr_arg1 and port nr_arg2.
2224f18be576SLuigi Rizzo 			 *
2225f18be576SLuigi Rizzo 			 * Users can detect the end of the same bridge by
2226f18be576SLuigi Rizzo 			 * seeing the new and old value of nr_arg1, and can
2227f18be576SLuigi Rizzo 			 * detect the end of all the bridge by error != 0
2228f18be576SLuigi Rizzo 			 */
2229f18be576SLuigi Rizzo 			i = nmr->nr_arg1;
2230f18be576SLuigi Rizzo 			j = nmr->nr_arg2;
2231f18be576SLuigi Rizzo 
2232ce3ee1e7SLuigi Rizzo 			NMG_LOCK();
2233ce3ee1e7SLuigi Rizzo 			for (error = ENOENT; i < NM_BRIDGES; i++) {
2234f18be576SLuigi Rizzo 				b = nm_bridges + i;
2235ce3ee1e7SLuigi Rizzo 				if (j >= b->bdg_active_ports) {
2236ce3ee1e7SLuigi Rizzo 					j = 0; /* following bridges scan from 0 */
2237f18be576SLuigi Rizzo 					continue;
2238ce3ee1e7SLuigi Rizzo 				}
2239f18be576SLuigi Rizzo 				nmr->nr_arg1 = i;
2240f18be576SLuigi Rizzo 				nmr->nr_arg2 = j;
2241ce3ee1e7SLuigi Rizzo 				j = b->bdg_port_index[j];
2242ce3ee1e7SLuigi Rizzo 				na = b->bdg_ports[j];
2243ce3ee1e7SLuigi Rizzo 				iter = na->ifp;
2244ce3ee1e7SLuigi Rizzo 				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
2245f18be576SLuigi Rizzo 				error = 0;
2246f18be576SLuigi Rizzo 				break;
2247f18be576SLuigi Rizzo 			}
2248ce3ee1e7SLuigi Rizzo 			NMG_UNLOCK();
2249f18be576SLuigi Rizzo 		}
2250f18be576SLuigi Rizzo 		break;
2251f18be576SLuigi Rizzo 
2252f18be576SLuigi Rizzo 	case NETMAP_BDG_LOOKUP_REG:
2253f18be576SLuigi Rizzo 		/* register a lookup function to the given bridge.
2254f18be576SLuigi Rizzo 		 * nmr->nr_name may be just bridge's name (including ':'
2255f18be576SLuigi Rizzo 		 * if it is not just NM_NAME).
2256f18be576SLuigi Rizzo 		 */
2257f18be576SLuigi Rizzo 		if (!func) {
2258f18be576SLuigi Rizzo 			error = EINVAL;
2259f18be576SLuigi Rizzo 			break;
2260f18be576SLuigi Rizzo 		}
2261ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2262f18be576SLuigi Rizzo 		b = nm_find_bridge(name, 0 /* don't create */);
2263f18be576SLuigi Rizzo 		if (!b) {
2264f18be576SLuigi Rizzo 			error = EINVAL;
2265ce3ee1e7SLuigi Rizzo 		} else {
2266f18be576SLuigi Rizzo 			b->nm_bdg_lookup = func;
2267ce3ee1e7SLuigi Rizzo 		}
2268ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
2269f18be576SLuigi Rizzo 		break;
2270ce3ee1e7SLuigi Rizzo 
2271f18be576SLuigi Rizzo 	default:
2272f18be576SLuigi Rizzo 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
2273f18be576SLuigi Rizzo 		error = EINVAL;
2274f18be576SLuigi Rizzo 		break;
2275f18be576SLuigi Rizzo 	}
2276f18be576SLuigi Rizzo 	return error;
2277f18be576SLuigi Rizzo }
2278f18be576SLuigi Rizzo 
2279f18be576SLuigi Rizzo 
228068b8534bSLuigi Rizzo /*
228168b8534bSLuigi Rizzo  * ioctl(2) support for the "netmap" device.
228268b8534bSLuigi Rizzo  *
228368b8534bSLuigi Rizzo  * Following a list of accepted commands:
228468b8534bSLuigi Rizzo  * - NIOCGINFO
228568b8534bSLuigi Rizzo  * - SIOCGIFADDR	just for convenience
228668b8534bSLuigi Rizzo  * - NIOCREGIF
228768b8534bSLuigi Rizzo  * - NIOCUNREGIF
228868b8534bSLuigi Rizzo  * - NIOCTXSYNC
228968b8534bSLuigi Rizzo  * - NIOCRXSYNC
229068b8534bSLuigi Rizzo  *
229168b8534bSLuigi Rizzo  * Return 0 on success, errno otherwise.
229268b8534bSLuigi Rizzo  */
229368b8534bSLuigi Rizzo static int
22940b8ed8e0SLuigi Rizzo netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
22950b8ed8e0SLuigi Rizzo 	int fflag, struct thread *td)
229668b8534bSLuigi Rizzo {
229768b8534bSLuigi Rizzo 	struct netmap_priv_d *priv = NULL;
2298ce3ee1e7SLuigi Rizzo 	struct ifnet *ifp = NULL;
229968b8534bSLuigi Rizzo 	struct nmreq *nmr = (struct nmreq *) data;
2300ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na = NULL;
230168b8534bSLuigi Rizzo 	int error;
230264ae02c3SLuigi Rizzo 	u_int i, lim;
230368b8534bSLuigi Rizzo 	struct netmap_if *nifp;
2304ce3ee1e7SLuigi Rizzo 	struct netmap_kring *krings;
230568b8534bSLuigi Rizzo 
23060b8ed8e0SLuigi Rizzo 	(void)dev;	/* UNUSED */
23070b8ed8e0SLuigi Rizzo 	(void)fflag;	/* UNUSED */
2308f196ce38SLuigi Rizzo #ifdef linux
2309f196ce38SLuigi Rizzo #define devfs_get_cdevpriv(pp)				\
2310f196ce38SLuigi Rizzo 	({ *(struct netmap_priv_d **)pp = ((struct file *)td)->private_data; 	\
2311f196ce38SLuigi Rizzo 		(*pp ? 0 : ENOENT); })
2312f196ce38SLuigi Rizzo 
2313f196ce38SLuigi Rizzo /* devfs_set_cdevpriv cannot fail on linux */
2314f196ce38SLuigi Rizzo #define devfs_set_cdevpriv(p, fn)				\
2315f196ce38SLuigi Rizzo 	({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); })
2316f196ce38SLuigi Rizzo 
2317f196ce38SLuigi Rizzo 
2318f196ce38SLuigi Rizzo #define devfs_clear_cdevpriv()	do {				\
2319f196ce38SLuigi Rizzo 		netmap_dtor(priv); ((struct file *)td)->private_data = 0;	\
2320f196ce38SLuigi Rizzo 	} while (0)
2321f196ce38SLuigi Rizzo #endif /* linux */
2322f196ce38SLuigi Rizzo 
2323506cc70cSLuigi Rizzo 	CURVNET_SET(TD_TO_VNET(td));
2324506cc70cSLuigi Rizzo 
232568b8534bSLuigi Rizzo 	error = devfs_get_cdevpriv((void **)&priv);
23268241616dSLuigi Rizzo 	if (error) {
2327506cc70cSLuigi Rizzo 		CURVNET_RESTORE();
23288241616dSLuigi Rizzo 		/* XXX ENOENT should be impossible, since the priv
23298241616dSLuigi Rizzo 		 * is now created in the open */
23308241616dSLuigi Rizzo 		return (error == ENOENT ? ENXIO : error);
2331506cc70cSLuigi Rizzo 	}
233268b8534bSLuigi Rizzo 
2333f196ce38SLuigi Rizzo 	nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';	/* truncate name */
233468b8534bSLuigi Rizzo 	switch (cmd) {
233568b8534bSLuigi Rizzo 	case NIOCGINFO:		/* return capabilities etc */
233664ae02c3SLuigi Rizzo 		if (nmr->nr_version != NETMAP_API) {
233764ae02c3SLuigi Rizzo 			D("API mismatch got %d have %d",
233864ae02c3SLuigi Rizzo 				nmr->nr_version, NETMAP_API);
233964ae02c3SLuigi Rizzo 			nmr->nr_version = NETMAP_API;
234064ae02c3SLuigi Rizzo 			error = EINVAL;
234164ae02c3SLuigi Rizzo 			break;
234264ae02c3SLuigi Rizzo 		}
2343f18be576SLuigi Rizzo 		if (nmr->nr_cmd == NETMAP_BDG_LIST) {
2344f18be576SLuigi Rizzo 			error = netmap_bdg_ctl(nmr, NULL);
2345f18be576SLuigi Rizzo 			break;
2346f18be576SLuigi Rizzo 		}
2347ce3ee1e7SLuigi Rizzo 
2348ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2349ce3ee1e7SLuigi Rizzo 		do {
2350ce3ee1e7SLuigi Rizzo 			/* memsize is always valid */
2351ce3ee1e7SLuigi Rizzo 			struct netmap_mem_d *nmd = &nm_mem;
2352ce3ee1e7SLuigi Rizzo 			u_int memflags;
2353ce3ee1e7SLuigi Rizzo 
2354ce3ee1e7SLuigi Rizzo 			if (nmr->nr_name[0] != '\0') {
2355ce3ee1e7SLuigi Rizzo 				/* get a refcount */
2356ce3ee1e7SLuigi Rizzo 				error = get_ifp(nmr, &ifp, 1 /* create */);
23578241616dSLuigi Rizzo 				if (error)
23588241616dSLuigi Rizzo 					break;
2359ce3ee1e7SLuigi Rizzo 				na = NA(ifp);  /* retrieve the netmap adapter */
2360ce3ee1e7SLuigi Rizzo 				nmd = na->nm_mem; /* and its memory allocator */
2361ce3ee1e7SLuigi Rizzo 			}
2362ce3ee1e7SLuigi Rizzo 
2363ce3ee1e7SLuigi Rizzo 			error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags);
2364ce3ee1e7SLuigi Rizzo 			if (error)
2365ce3ee1e7SLuigi Rizzo 				break;
2366ce3ee1e7SLuigi Rizzo 			if (na == NULL) /* only memory info */
2367ce3ee1e7SLuigi Rizzo 				break;
23688241616dSLuigi Rizzo 			nmr->nr_offset = 0;
23698241616dSLuigi Rizzo 			nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
2370ae10d1afSLuigi Rizzo 			netmap_update_config(na);
2371d76bf4ffSLuigi Rizzo 			nmr->nr_rx_rings = na->num_rx_rings;
2372d76bf4ffSLuigi Rizzo 			nmr->nr_tx_rings = na->num_tx_rings;
237364ae02c3SLuigi Rizzo 			nmr->nr_rx_slots = na->num_rx_desc;
237464ae02c3SLuigi Rizzo 			nmr->nr_tx_slots = na->num_tx_desc;
2375ce3ee1e7SLuigi Rizzo 			if (memflags & NETMAP_MEM_PRIVATE)
2376ce3ee1e7SLuigi Rizzo 				nmr->nr_ringid |= NETMAP_PRIV_MEM;
2377ce3ee1e7SLuigi Rizzo 		} while (0);
2378ce3ee1e7SLuigi Rizzo 		if (ifp)
2379f196ce38SLuigi Rizzo 			nm_if_rele(ifp);	/* return the refcount */
2380ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
238168b8534bSLuigi Rizzo 		break;
238268b8534bSLuigi Rizzo 
238368b8534bSLuigi Rizzo 	case NIOCREGIF:
238464ae02c3SLuigi Rizzo 		if (nmr->nr_version != NETMAP_API) {
238564ae02c3SLuigi Rizzo 			nmr->nr_version = NETMAP_API;
238664ae02c3SLuigi Rizzo 			error = EINVAL;
238764ae02c3SLuigi Rizzo 			break;
238864ae02c3SLuigi Rizzo 		}
2389f18be576SLuigi Rizzo 		/* possibly attach/detach NIC and VALE switch */
2390f18be576SLuigi Rizzo 		i = nmr->nr_cmd;
2391f18be576SLuigi Rizzo 		if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH) {
2392f18be576SLuigi Rizzo 			error = netmap_bdg_ctl(nmr, NULL);
2393f18be576SLuigi Rizzo 			break;
2394f18be576SLuigi Rizzo 		} else if (i != 0) {
2395f18be576SLuigi Rizzo 			D("nr_cmd must be 0 not %d", i);
2396f18be576SLuigi Rizzo 			error = EINVAL;
2397f18be576SLuigi Rizzo 			break;
2398f18be576SLuigi Rizzo 		}
2399f18be576SLuigi Rizzo 
24008241616dSLuigi Rizzo 		/* protect access to priv from concurrent NIOCREGIF */
2401ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2402ce3ee1e7SLuigi Rizzo 		do {
2403ce3ee1e7SLuigi Rizzo 			u_int memflags;
2404ce3ee1e7SLuigi Rizzo 
24058241616dSLuigi Rizzo 			if (priv->np_ifp != NULL) {	/* thread already registered */
2406506cc70cSLuigi Rizzo 				error = netmap_set_ringid(priv, nmr->nr_ringid);
2407506cc70cSLuigi Rizzo 				break;
2408506cc70cSLuigi Rizzo 			}
240968b8534bSLuigi Rizzo 			/* find the interface and a reference */
2410ce3ee1e7SLuigi Rizzo 			error = get_ifp(nmr, &ifp, 1 /* create */); /* keep reference */
241168b8534bSLuigi Rizzo 			if (error)
2412ce3ee1e7SLuigi Rizzo 				break;
2413ce3ee1e7SLuigi Rizzo 			if (NETMAP_OWNED_BY_KERN(ifp)) {
2414f18be576SLuigi Rizzo 				nm_if_rele(ifp);
2415ce3ee1e7SLuigi Rizzo 				error = EBUSY;
2416ce3ee1e7SLuigi Rizzo 				break;
2417f196ce38SLuigi Rizzo 			}
2418f18be576SLuigi Rizzo 			nifp = netmap_do_regif(priv, ifp, nmr->nr_ringid, &error);
2419f18be576SLuigi Rizzo 			if (!nifp) {    /* reg. failed, release priv and ref */
2420f196ce38SLuigi Rizzo 				nm_if_rele(ifp);        /* return the refcount */
24218241616dSLuigi Rizzo 				priv->np_ifp = NULL;
24228241616dSLuigi Rizzo 				priv->np_nifp = NULL;
2423ce3ee1e7SLuigi Rizzo 				break;
242468b8534bSLuigi Rizzo 			}
242568b8534bSLuigi Rizzo 
242668b8534bSLuigi Rizzo 			/* return the offset of the netmap_if object */
2427f18be576SLuigi Rizzo 			na = NA(ifp); /* retrieve netmap adapter */
2428d76bf4ffSLuigi Rizzo 			nmr->nr_rx_rings = na->num_rx_rings;
2429d76bf4ffSLuigi Rizzo 			nmr->nr_tx_rings = na->num_tx_rings;
243064ae02c3SLuigi Rizzo 			nmr->nr_rx_slots = na->num_rx_desc;
243164ae02c3SLuigi Rizzo 			nmr->nr_tx_slots = na->num_tx_desc;
2432ce3ee1e7SLuigi Rizzo 			error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags);
2433ce3ee1e7SLuigi Rizzo 			if (error) {
2434ce3ee1e7SLuigi Rizzo 				nm_if_rele(ifp);
2435ce3ee1e7SLuigi Rizzo 				break;
2436ce3ee1e7SLuigi Rizzo 			}
2437ce3ee1e7SLuigi Rizzo 			if (memflags & NETMAP_MEM_PRIVATE) {
2438ce3ee1e7SLuigi Rizzo 				nmr->nr_ringid |= NETMAP_PRIV_MEM;
24393d819cb6SLuigi Rizzo 				*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
2440ce3ee1e7SLuigi Rizzo 			}
2441ce3ee1e7SLuigi Rizzo 			nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
2442ce3ee1e7SLuigi Rizzo 		} while (0);
2443ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
244468b8534bSLuigi Rizzo 		break;
244568b8534bSLuigi Rizzo 
244668b8534bSLuigi Rizzo 	case NIOCUNREGIF:
24478241616dSLuigi Rizzo 		// XXX we have no data here ?
24488241616dSLuigi Rizzo 		D("deprecated, data is %p", nmr);
24498241616dSLuigi Rizzo 		error = EINVAL;
245068b8534bSLuigi Rizzo 		break;
245168b8534bSLuigi Rizzo 
245268b8534bSLuigi Rizzo 	case NIOCTXSYNC:
245368b8534bSLuigi Rizzo 	case NIOCRXSYNC:
24548241616dSLuigi Rizzo 		nifp = priv->np_nifp;
24558241616dSLuigi Rizzo 
24568241616dSLuigi Rizzo 		if (nifp == NULL) {
2457506cc70cSLuigi Rizzo 			error = ENXIO;
2458506cc70cSLuigi Rizzo 			break;
2459506cc70cSLuigi Rizzo 		}
24608241616dSLuigi Rizzo 		rmb(); /* make sure following reads are not from cache */
24618241616dSLuigi Rizzo 
246268b8534bSLuigi Rizzo 		ifp = priv->np_ifp;	/* we have a reference */
24638241616dSLuigi Rizzo 
24648241616dSLuigi Rizzo 		if (ifp == NULL) {
24658241616dSLuigi Rizzo 			D("Internal error: nifp != NULL && ifp == NULL");
24668241616dSLuigi Rizzo 			error = ENXIO;
24678241616dSLuigi Rizzo 			break;
24688241616dSLuigi Rizzo 		}
24698241616dSLuigi Rizzo 
247068b8534bSLuigi Rizzo 		na = NA(ifp); /* retrieve netmap adapter */
247164ae02c3SLuigi Rizzo 		if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */
247268b8534bSLuigi Rizzo 			if (cmd == NIOCTXSYNC)
2473ce3ee1e7SLuigi Rizzo 				netmap_txsync_to_host(na);
247468b8534bSLuigi Rizzo 			else
2475ce3ee1e7SLuigi Rizzo 				netmap_rxsync_from_host(na, NULL, NULL);
2476506cc70cSLuigi Rizzo 			break;
247768b8534bSLuigi Rizzo 		}
247864ae02c3SLuigi Rizzo 		/* find the last ring to scan */
247964ae02c3SLuigi Rizzo 		lim = priv->np_qlast;
248064ae02c3SLuigi Rizzo 		if (lim == NETMAP_HW_RING)
24813c0caf6cSLuigi Rizzo 			lim = (cmd == NIOCTXSYNC) ?
2482d76bf4ffSLuigi Rizzo 			    na->num_tx_rings : na->num_rx_rings;
248368b8534bSLuigi Rizzo 
2484ce3ee1e7SLuigi Rizzo 		krings = (cmd == NIOCTXSYNC) ? na->tx_rings : na->rx_rings;
248564ae02c3SLuigi Rizzo 		for (i = priv->np_qfirst; i < lim; i++) {
2486ce3ee1e7SLuigi Rizzo 			struct netmap_kring *kring = krings + i;
2487ce3ee1e7SLuigi Rizzo 			if (nm_kr_tryget(kring)) {
2488ce3ee1e7SLuigi Rizzo 				error = EBUSY;
2489ce3ee1e7SLuigi Rizzo 				goto out;
2490ce3ee1e7SLuigi Rizzo 			}
249168b8534bSLuigi Rizzo 			if (cmd == NIOCTXSYNC) {
249268b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
24933c0caf6cSLuigi Rizzo 					D("pre txsync ring %d cur %d hwcur %d",
249468b8534bSLuigi Rizzo 					    i, kring->ring->cur,
249568b8534bSLuigi Rizzo 					    kring->nr_hwcur);
2496ce3ee1e7SLuigi Rizzo 				na->nm_txsync(ifp, i, NAF_FORCE_RECLAIM);
249768b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
24983c0caf6cSLuigi Rizzo 					D("post txsync ring %d cur %d hwcur %d",
249968b8534bSLuigi Rizzo 					    i, kring->ring->cur,
250068b8534bSLuigi Rizzo 					    kring->nr_hwcur);
250168b8534bSLuigi Rizzo 			} else {
2502ce3ee1e7SLuigi Rizzo 				na->nm_rxsync(ifp, i, NAF_FORCE_READ);
250368b8534bSLuigi Rizzo 				microtime(&na->rx_rings[i].ring->ts);
250468b8534bSLuigi Rizzo 			}
2505ce3ee1e7SLuigi Rizzo 			nm_kr_put(kring);
250668b8534bSLuigi Rizzo 		}
250768b8534bSLuigi Rizzo 
250868b8534bSLuigi Rizzo 		break;
250968b8534bSLuigi Rizzo 
2510f196ce38SLuigi Rizzo #ifdef __FreeBSD__
251168b8534bSLuigi Rizzo 	case BIOCIMMEDIATE:
251268b8534bSLuigi Rizzo 	case BIOCGHDRCMPLT:
251368b8534bSLuigi Rizzo 	case BIOCSHDRCMPLT:
251468b8534bSLuigi Rizzo 	case BIOCSSEESENT:
251568b8534bSLuigi Rizzo 		D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
251668b8534bSLuigi Rizzo 		break;
251768b8534bSLuigi Rizzo 
2518babc7c12SLuigi Rizzo 	default:	/* allow device-specific ioctls */
251968b8534bSLuigi Rizzo 	    {
252068b8534bSLuigi Rizzo 		struct socket so;
2521ce3ee1e7SLuigi Rizzo 
252268b8534bSLuigi Rizzo 		bzero(&so, sizeof(so));
2523ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2524ce3ee1e7SLuigi Rizzo 		error = get_ifp(nmr, &ifp, 0 /* don't create */); /* keep reference */
2525ce3ee1e7SLuigi Rizzo 		if (error) {
2526ce3ee1e7SLuigi Rizzo 			NMG_UNLOCK();
252768b8534bSLuigi Rizzo 			break;
2528ce3ee1e7SLuigi Rizzo 		}
252968b8534bSLuigi Rizzo 		so.so_vnet = ifp->if_vnet;
253068b8534bSLuigi Rizzo 		// so->so_proto not null.
253168b8534bSLuigi Rizzo 		error = ifioctl(&so, cmd, data, td);
2532f196ce38SLuigi Rizzo 		nm_if_rele(ifp);
2533ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
2534babc7c12SLuigi Rizzo 		break;
253568b8534bSLuigi Rizzo 	    }
2536f196ce38SLuigi Rizzo 
2537f196ce38SLuigi Rizzo #else /* linux */
2538f196ce38SLuigi Rizzo 	default:
2539f196ce38SLuigi Rizzo 		error = EOPNOTSUPP;
2540f196ce38SLuigi Rizzo #endif /* linux */
254168b8534bSLuigi Rizzo 	}
2542ce3ee1e7SLuigi Rizzo out:
254368b8534bSLuigi Rizzo 
2544506cc70cSLuigi Rizzo 	CURVNET_RESTORE();
254568b8534bSLuigi Rizzo 	return (error);
254668b8534bSLuigi Rizzo }
254768b8534bSLuigi Rizzo 
254868b8534bSLuigi Rizzo 
254968b8534bSLuigi Rizzo /*
255068b8534bSLuigi Rizzo  * select(2) and poll(2) handlers for the "netmap" device.
255168b8534bSLuigi Rizzo  *
255268b8534bSLuigi Rizzo  * Can be called for one or more queues.
255368b8534bSLuigi Rizzo  * Return true the event mask corresponding to ready events.
255468b8534bSLuigi Rizzo  * If there are no ready events, do a selrecord on either individual
2555ce3ee1e7SLuigi Rizzo  * selinfo or on the global one.
255668b8534bSLuigi Rizzo  * Device-dependent parts (locking and sync of tx/rx rings)
255768b8534bSLuigi Rizzo  * are done through callbacks.
2558f196ce38SLuigi Rizzo  *
255901c7d25fSLuigi Rizzo  * On linux, arguments are really pwait, the poll table, and 'td' is struct file *
256001c7d25fSLuigi Rizzo  * The first one is remapped to pwait as selrecord() uses the name as an
256101c7d25fSLuigi Rizzo  * hidden argument.
256268b8534bSLuigi Rizzo  */
256368b8534bSLuigi Rizzo static int
256401c7d25fSLuigi Rizzo netmap_poll(struct cdev *dev, int events, struct thread *td)
256568b8534bSLuigi Rizzo {
256668b8534bSLuigi Rizzo 	struct netmap_priv_d *priv = NULL;
256768b8534bSLuigi Rizzo 	struct netmap_adapter *na;
256868b8534bSLuigi Rizzo 	struct ifnet *ifp;
256968b8534bSLuigi Rizzo 	struct netmap_kring *kring;
2570954dca4cSLuigi Rizzo 	u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0;
2571091fd0abSLuigi Rizzo 	u_int lim_tx, lim_rx, host_forwarded = 0;
2572091fd0abSLuigi Rizzo 	struct mbq q = { NULL, NULL, 0 };
257301c7d25fSLuigi Rizzo 	void *pwait = dev;	/* linux compatibility */
257401c7d25fSLuigi Rizzo 
2575ce3ee1e7SLuigi Rizzo 		int retry_tx = 1;
2576ce3ee1e7SLuigi Rizzo 
257701c7d25fSLuigi Rizzo 	(void)pwait;
257868b8534bSLuigi Rizzo 
257968b8534bSLuigi Rizzo 	if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
258068b8534bSLuigi Rizzo 		return POLLERR;
258168b8534bSLuigi Rizzo 
25828241616dSLuigi Rizzo 	if (priv->np_nifp == NULL) {
25838241616dSLuigi Rizzo 		D("No if registered");
25848241616dSLuigi Rizzo 		return POLLERR;
25858241616dSLuigi Rizzo 	}
25868241616dSLuigi Rizzo 	rmb(); /* make sure following reads are not from cache */
25878241616dSLuigi Rizzo 
258868b8534bSLuigi Rizzo 	ifp = priv->np_ifp;
258968b8534bSLuigi Rizzo 	// XXX check for deleting() ?
259068b8534bSLuigi Rizzo 	if ( (ifp->if_capenable & IFCAP_NETMAP) == 0)
259168b8534bSLuigi Rizzo 		return POLLERR;
259268b8534bSLuigi Rizzo 
259368b8534bSLuigi Rizzo 	if (netmap_verbose & 0x8000)
259468b8534bSLuigi Rizzo 		D("device %s events 0x%x", ifp->if_xname, events);
259568b8534bSLuigi Rizzo 	want_tx = events & (POLLOUT | POLLWRNORM);
259668b8534bSLuigi Rizzo 	want_rx = events & (POLLIN | POLLRDNORM);
259768b8534bSLuigi Rizzo 
259868b8534bSLuigi Rizzo 	na = NA(ifp); /* retrieve netmap adapter */
259968b8534bSLuigi Rizzo 
2600d76bf4ffSLuigi Rizzo 	lim_tx = na->num_tx_rings;
2601d76bf4ffSLuigi Rizzo 	lim_rx = na->num_rx_rings;
2602ce3ee1e7SLuigi Rizzo 
260364ae02c3SLuigi Rizzo 	if (priv->np_qfirst == NETMAP_SW_RING) {
2604ce3ee1e7SLuigi Rizzo 		/* handle the host stack ring */
260568b8534bSLuigi Rizzo 		if (priv->np_txpoll || want_tx) {
260668b8534bSLuigi Rizzo 			/* push any packets up, then we are always ready */
2607ce3ee1e7SLuigi Rizzo 			netmap_txsync_to_host(na);
260868b8534bSLuigi Rizzo 			revents |= want_tx;
260968b8534bSLuigi Rizzo 		}
261068b8534bSLuigi Rizzo 		if (want_rx) {
261164ae02c3SLuigi Rizzo 			kring = &na->rx_rings[lim_rx];
261268b8534bSLuigi Rizzo 			if (kring->ring->avail == 0)
2613ce3ee1e7SLuigi Rizzo 				netmap_rxsync_from_host(na, td, dev);
261468b8534bSLuigi Rizzo 			if (kring->ring->avail > 0) {
261568b8534bSLuigi Rizzo 				revents |= want_rx;
261668b8534bSLuigi Rizzo 			}
261768b8534bSLuigi Rizzo 		}
261868b8534bSLuigi Rizzo 		return (revents);
261968b8534bSLuigi Rizzo 	}
262068b8534bSLuigi Rizzo 
2621091fd0abSLuigi Rizzo 	/* if we are in transparent mode, check also the host rx ring */
2622091fd0abSLuigi Rizzo 	kring = &na->rx_rings[lim_rx];
2623091fd0abSLuigi Rizzo 	if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
2624091fd0abSLuigi Rizzo 			&& want_rx
2625091fd0abSLuigi Rizzo 			&& (netmap_fwd || kring->ring->flags & NR_FORWARD) ) {
2626091fd0abSLuigi Rizzo 		if (kring->ring->avail == 0)
2627ce3ee1e7SLuigi Rizzo 			netmap_rxsync_from_host(na, td, dev);
2628091fd0abSLuigi Rizzo 		if (kring->ring->avail > 0)
2629091fd0abSLuigi Rizzo 			revents |= want_rx;
2630091fd0abSLuigi Rizzo 	}
2631091fd0abSLuigi Rizzo 
263268b8534bSLuigi Rizzo 	/*
2633ce3ee1e7SLuigi Rizzo 	 * check_all is set if the card has more than one queue AND
263468b8534bSLuigi Rizzo 	 * the client is polling all of them. If true, we sleep on
2635ce3ee1e7SLuigi Rizzo 	 * the "global" selinfo, otherwise we sleep on individual selinfo
2636ce3ee1e7SLuigi Rizzo 	 * (FreeBSD only allows two selinfo's per file descriptor).
2637ce3ee1e7SLuigi Rizzo 	 * The interrupt routine in the driver wake one or the other
2638ce3ee1e7SLuigi Rizzo 	 * (or both) depending on which clients are active.
263968b8534bSLuigi Rizzo 	 *
264068b8534bSLuigi Rizzo 	 * rxsync() is only called if we run out of buffers on a POLLIN.
264168b8534bSLuigi Rizzo 	 * txsync() is called if we run out of buffers on POLLOUT, or
264268b8534bSLuigi Rizzo 	 * there are pending packets to send. The latter can be disabled
264368b8534bSLuigi Rizzo 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
264468b8534bSLuigi Rizzo 	 */
2645954dca4cSLuigi Rizzo 	check_all_tx = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1);
2646954dca4cSLuigi Rizzo 	check_all_rx = (priv->np_qlast == NETMAP_HW_RING) && (lim_rx > 1);
264768b8534bSLuigi Rizzo 
264864ae02c3SLuigi Rizzo 	if (priv->np_qlast != NETMAP_HW_RING) {
264964ae02c3SLuigi Rizzo 		lim_tx = lim_rx = priv->np_qlast;
265064ae02c3SLuigi Rizzo 	}
265164ae02c3SLuigi Rizzo 
265268b8534bSLuigi Rizzo 	/*
265368b8534bSLuigi Rizzo 	 * We start with a lock free round which is good if we have
265468b8534bSLuigi Rizzo 	 * data available. If this fails, then lock and call the sync
265568b8534bSLuigi Rizzo 	 * routines.
265668b8534bSLuigi Rizzo 	 */
265764ae02c3SLuigi Rizzo 	for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) {
265868b8534bSLuigi Rizzo 		kring = &na->rx_rings[i];
265968b8534bSLuigi Rizzo 		if (kring->ring->avail > 0) {
266068b8534bSLuigi Rizzo 			revents |= want_rx;
266168b8534bSLuigi Rizzo 			want_rx = 0;	/* also breaks the loop */
266268b8534bSLuigi Rizzo 		}
266368b8534bSLuigi Rizzo 	}
266464ae02c3SLuigi Rizzo 	for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) {
266568b8534bSLuigi Rizzo 		kring = &na->tx_rings[i];
266668b8534bSLuigi Rizzo 		if (kring->ring->avail > 0) {
266768b8534bSLuigi Rizzo 			revents |= want_tx;
266868b8534bSLuigi Rizzo 			want_tx = 0;	/* also breaks the loop */
266968b8534bSLuigi Rizzo 		}
267068b8534bSLuigi Rizzo 	}
267168b8534bSLuigi Rizzo 
267268b8534bSLuigi Rizzo 	/*
267368b8534bSLuigi Rizzo 	 * If we to push packets out (priv->np_txpoll) or want_tx is
267468b8534bSLuigi Rizzo 	 * still set, we do need to run the txsync calls (on all rings,
267568b8534bSLuigi Rizzo 	 * to avoid that the tx rings stall).
267668b8534bSLuigi Rizzo 	 */
267768b8534bSLuigi Rizzo 	if (priv->np_txpoll || want_tx) {
2678ce3ee1e7SLuigi Rizzo 		/* If we really want to be woken up (want_tx),
2679ce3ee1e7SLuigi Rizzo 		 * do a selrecord, either on the global or on
2680ce3ee1e7SLuigi Rizzo 		 * the private structure.  Then issue the txsync
2681ce3ee1e7SLuigi Rizzo 		 * so there is no race in the selrecord/selwait
2682ce3ee1e7SLuigi Rizzo 		 */
2683091fd0abSLuigi Rizzo flush_tx:
268464ae02c3SLuigi Rizzo 		for (i = priv->np_qfirst; i < lim_tx; i++) {
268568b8534bSLuigi Rizzo 			kring = &na->tx_rings[i];
26865819da83SLuigi Rizzo 			/*
2687ce3ee1e7SLuigi Rizzo 			 * Skip this ring if want_tx == 0
26885819da83SLuigi Rizzo 			 * (we have already done a successful sync on
26895819da83SLuigi Rizzo 			 * a previous ring) AND kring->cur == kring->hwcur
26905819da83SLuigi Rizzo 			 * (there are no pending transmissions for this ring).
26915819da83SLuigi Rizzo 			 */
269268b8534bSLuigi Rizzo 			if (!want_tx && kring->ring->cur == kring->nr_hwcur)
269368b8534bSLuigi Rizzo 				continue;
2694ce3ee1e7SLuigi Rizzo 			/* make sure only one user thread is doing this */
2695ce3ee1e7SLuigi Rizzo 			if (nm_kr_tryget(kring)) {
2696ce3ee1e7SLuigi Rizzo 				ND("ring %p busy is %d", kring, (int)kring->nr_busy);
2697ce3ee1e7SLuigi Rizzo 				revents |= POLLERR;
2698ce3ee1e7SLuigi Rizzo 				goto out;
269968b8534bSLuigi Rizzo 			}
2700ce3ee1e7SLuigi Rizzo 
270168b8534bSLuigi Rizzo 			if (netmap_verbose & NM_VERB_TXSYNC)
270268b8534bSLuigi Rizzo 				D("send %d on %s %d",
2703ce3ee1e7SLuigi Rizzo 					kring->ring->cur, ifp->if_xname, i);
2704ce3ee1e7SLuigi Rizzo 			if (na->nm_txsync(ifp, i, 0))
270568b8534bSLuigi Rizzo 				revents |= POLLERR;
270668b8534bSLuigi Rizzo 
27075819da83SLuigi Rizzo 			/* Check avail/call selrecord only if called with POLLOUT */
270868b8534bSLuigi Rizzo 			if (want_tx) {
270968b8534bSLuigi Rizzo 				if (kring->ring->avail > 0) {
271068b8534bSLuigi Rizzo 					/* stop at the first ring. We don't risk
271168b8534bSLuigi Rizzo 					 * starvation.
271268b8534bSLuigi Rizzo 					 */
271368b8534bSLuigi Rizzo 					revents |= want_tx;
271468b8534bSLuigi Rizzo 					want_tx = 0;
271568b8534bSLuigi Rizzo 				}
2716ce3ee1e7SLuigi Rizzo 			}
2717ce3ee1e7SLuigi Rizzo 			nm_kr_put(kring);
2718ce3ee1e7SLuigi Rizzo 		}
2719ce3ee1e7SLuigi Rizzo 		if (want_tx && retry_tx) {
2720954dca4cSLuigi Rizzo 			selrecord(td, check_all_tx ?
2721ce3ee1e7SLuigi Rizzo 			    &na->tx_si : &na->tx_rings[priv->np_qfirst].si);
2722ce3ee1e7SLuigi Rizzo 			retry_tx = 0;
2723ce3ee1e7SLuigi Rizzo 			goto flush_tx;
272468b8534bSLuigi Rizzo 		}
272568b8534bSLuigi Rizzo 	}
272668b8534bSLuigi Rizzo 
272768b8534bSLuigi Rizzo 	/*
272868b8534bSLuigi Rizzo 	 * now if want_rx is still set we need to lock and rxsync.
272968b8534bSLuigi Rizzo 	 * Do it on all rings because otherwise we starve.
273068b8534bSLuigi Rizzo 	 */
273168b8534bSLuigi Rizzo 	if (want_rx) {
2732ce3ee1e7SLuigi Rizzo 		int retry_rx = 1;
2733ce3ee1e7SLuigi Rizzo do_retry_rx:
273464ae02c3SLuigi Rizzo 		for (i = priv->np_qfirst; i < lim_rx; i++) {
273568b8534bSLuigi Rizzo 			kring = &na->rx_rings[i];
2736ce3ee1e7SLuigi Rizzo 
2737ce3ee1e7SLuigi Rizzo 			if (nm_kr_tryget(kring)) {
2738ce3ee1e7SLuigi Rizzo 				revents |= POLLERR;
2739ce3ee1e7SLuigi Rizzo 				goto out;
274068b8534bSLuigi Rizzo 			}
2741ce3ee1e7SLuigi Rizzo 
2742ce3ee1e7SLuigi Rizzo 			/* XXX NR_FORWARD should only be read on
2743ce3ee1e7SLuigi Rizzo 			 * physical or NIC ports
2744ce3ee1e7SLuigi Rizzo 			 */
2745091fd0abSLuigi Rizzo 			if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
2746091fd0abSLuigi Rizzo 				ND(10, "forwarding some buffers up %d to %d",
2747091fd0abSLuigi Rizzo 				    kring->nr_hwcur, kring->ring->cur);
2748091fd0abSLuigi Rizzo 				netmap_grab_packets(kring, &q, netmap_fwd);
2749091fd0abSLuigi Rizzo 			}
275068b8534bSLuigi Rizzo 
2751ce3ee1e7SLuigi Rizzo 			if (na->nm_rxsync(ifp, i, 0))
275268b8534bSLuigi Rizzo 				revents |= POLLERR;
27535819da83SLuigi Rizzo 			if (netmap_no_timestamp == 0 ||
27545819da83SLuigi Rizzo 					kring->ring->flags & NR_TIMESTAMP) {
275568b8534bSLuigi Rizzo 				microtime(&kring->ring->ts);
27565819da83SLuigi Rizzo 			}
275768b8534bSLuigi Rizzo 
2758ce3ee1e7SLuigi Rizzo 			if (kring->ring->avail > 0) {
275968b8534bSLuigi Rizzo 				revents |= want_rx;
2760ce3ee1e7SLuigi Rizzo 				retry_rx = 0;
276168b8534bSLuigi Rizzo 			}
2762ce3ee1e7SLuigi Rizzo 			nm_kr_put(kring);
276368b8534bSLuigi Rizzo 		}
2764ce3ee1e7SLuigi Rizzo 		if (retry_rx) {
2765ce3ee1e7SLuigi Rizzo 			retry_rx = 0;
2766954dca4cSLuigi Rizzo 			selrecord(td, check_all_rx ?
2767ce3ee1e7SLuigi Rizzo 			    &na->rx_si : &na->rx_rings[priv->np_qfirst].si);
2768ce3ee1e7SLuigi Rizzo 			goto do_retry_rx;
2769ce3ee1e7SLuigi Rizzo 		}
277068b8534bSLuigi Rizzo 	}
2771091fd0abSLuigi Rizzo 
2772ce3ee1e7SLuigi Rizzo 	/* forward host to the netmap ring.
2773ce3ee1e7SLuigi Rizzo 	 * I am accessing nr_hwavail without lock, but netmap_transmit
2774ce3ee1e7SLuigi Rizzo 	 * can only increment it, so the operation is safe.
2775ce3ee1e7SLuigi Rizzo 	 */
2776091fd0abSLuigi Rizzo 	kring = &na->rx_rings[lim_rx];
2777091fd0abSLuigi Rizzo 	if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
2778091fd0abSLuigi Rizzo 			&& (netmap_fwd || kring->ring->flags & NR_FORWARD)
2779091fd0abSLuigi Rizzo 			 && kring->nr_hwavail > 0 && !host_forwarded) {
2780091fd0abSLuigi Rizzo 		netmap_sw_to_nic(na);
2781091fd0abSLuigi Rizzo 		host_forwarded = 1; /* prevent another pass */
2782091fd0abSLuigi Rizzo 		want_rx = 0;
2783091fd0abSLuigi Rizzo 		goto flush_tx;
2784091fd0abSLuigi Rizzo 	}
2785091fd0abSLuigi Rizzo 
2786091fd0abSLuigi Rizzo 	if (q.head)
2787091fd0abSLuigi Rizzo 		netmap_send_up(na->ifp, q.head);
278868b8534bSLuigi Rizzo 
2789ce3ee1e7SLuigi Rizzo out:
2790ce3ee1e7SLuigi Rizzo 
279168b8534bSLuigi Rizzo 	return (revents);
279268b8534bSLuigi Rizzo }
279368b8534bSLuigi Rizzo 
279468b8534bSLuigi Rizzo /*------- driver support routines ------*/
279568b8534bSLuigi Rizzo 
2796f18be576SLuigi Rizzo 
279768b8534bSLuigi Rizzo /*
279868b8534bSLuigi Rizzo  * Initialize a ``netmap_adapter`` object created by driver on attach.
279968b8534bSLuigi Rizzo  * We allocate a block of memory with room for a struct netmap_adapter
280068b8534bSLuigi Rizzo  * plus two sets of N+2 struct netmap_kring (where N is the number
280168b8534bSLuigi Rizzo  * of hardware rings):
280268b8534bSLuigi Rizzo  * krings	0..N-1	are for the hardware queues.
280368b8534bSLuigi Rizzo  * kring	N	is for the host stack queue
280468b8534bSLuigi Rizzo  * kring	N+1	is only used for the selinfo for all queues.
280568b8534bSLuigi Rizzo  * Return 0 on success, ENOMEM otherwise.
280664ae02c3SLuigi Rizzo  *
28070bf88954SEd Maste  * By default the receive and transmit adapter ring counts are both initialized
28080bf88954SEd Maste  * to num_queues.  na->num_tx_rings can be set for cards with different tx/rx
280924e57ec9SEd Maste  * setups.
281068b8534bSLuigi Rizzo  */
281168b8534bSLuigi Rizzo int
2812ce3ee1e7SLuigi Rizzo netmap_attach(struct netmap_adapter *arg, u_int num_queues)
281368b8534bSLuigi Rizzo {
2814ae10d1afSLuigi Rizzo 	struct netmap_adapter *na = NULL;
2815ae10d1afSLuigi Rizzo 	struct ifnet *ifp = arg ? arg->ifp : NULL;
2816ce3ee1e7SLuigi Rizzo 	size_t len;
281768b8534bSLuigi Rizzo 
2818ae10d1afSLuigi Rizzo 	if (arg == NULL || ifp == NULL)
2819ae10d1afSLuigi Rizzo 		goto fail;
2820ce3ee1e7SLuigi Rizzo 	/* a VALE port uses two endpoints */
2821f18be576SLuigi Rizzo 	len = nma_is_vp(arg) ? sizeof(*na) : sizeof(*na) * 2;
2822f18be576SLuigi Rizzo 	na = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
2823ae10d1afSLuigi Rizzo 	if (na == NULL)
2824ae10d1afSLuigi Rizzo 		goto fail;
2825ae10d1afSLuigi Rizzo 	WNA(ifp) = na;
2826ae10d1afSLuigi Rizzo 	*na = *arg; /* copy everything, trust the driver to not pass junk */
2827ae10d1afSLuigi Rizzo 	NETMAP_SET_CAPABLE(ifp);
2828d76bf4ffSLuigi Rizzo 	if (na->num_tx_rings == 0)
2829d76bf4ffSLuigi Rizzo 		na->num_tx_rings = num_queues;
2830d76bf4ffSLuigi Rizzo 	na->num_rx_rings = num_queues;
2831ae10d1afSLuigi Rizzo 	na->refcount = na->na_single = na->na_multi = 0;
2832ae10d1afSLuigi Rizzo 	/* Core lock initialized here, others after netmap_if_new. */
2833ae10d1afSLuigi Rizzo 	mtx_init(&na->core_lock, "netmap core lock", MTX_NETWORK_LOCK, MTX_DEF);
283464ae02c3SLuigi Rizzo #ifdef linux
2835f18be576SLuigi Rizzo 	if (ifp->netdev_ops) {
2836f18be576SLuigi Rizzo 		ND("netdev_ops %p", ifp->netdev_ops);
2837f18be576SLuigi Rizzo 		/* prepare a clone of the netdev ops */
2838f18be576SLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
2839f18be576SLuigi Rizzo 		na->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
2840f18be576SLuigi Rizzo #else
2841849bec0eSLuigi Rizzo 		na->nm_ndo = *ifp->netdev_ops;
2842f18be576SLuigi Rizzo #endif
2843f18be576SLuigi Rizzo 	}
2844ce3ee1e7SLuigi Rizzo 	na->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
2845ce3ee1e7SLuigi Rizzo #endif /* linux */
2846ce3ee1e7SLuigi Rizzo 	na->nm_mem = arg->nm_mem ? arg->nm_mem : &nm_mem;
2847f18be576SLuigi Rizzo 	if (!nma_is_vp(arg))
2848f18be576SLuigi Rizzo 		netmap_attach_sw(ifp);
2849ae10d1afSLuigi Rizzo 	D("success for %s", ifp->if_xname);
2850ae10d1afSLuigi Rizzo 	return 0;
285168b8534bSLuigi Rizzo 
2852ae10d1afSLuigi Rizzo fail:
2853ae10d1afSLuigi Rizzo 	D("fail, arg %p ifp %p na %p", arg, ifp, na);
2854849bec0eSLuigi Rizzo 	netmap_detach(ifp);
2855ae10d1afSLuigi Rizzo 	return (na ? EINVAL : ENOMEM);
285668b8534bSLuigi Rizzo }
285768b8534bSLuigi Rizzo 
285868b8534bSLuigi Rizzo 
285968b8534bSLuigi Rizzo /*
286068b8534bSLuigi Rizzo  * Free the allocated memory linked to the given ``netmap_adapter``
286168b8534bSLuigi Rizzo  * object.
286268b8534bSLuigi Rizzo  */
286368b8534bSLuigi Rizzo void
286468b8534bSLuigi Rizzo netmap_detach(struct ifnet *ifp)
286568b8534bSLuigi Rizzo {
286668b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
286768b8534bSLuigi Rizzo 
286868b8534bSLuigi Rizzo 	if (!na)
286968b8534bSLuigi Rizzo 		return;
287068b8534bSLuigi Rizzo 
28712f70fca5SEd Maste 	mtx_destroy(&na->core_lock);
28722f70fca5SEd Maste 
2873ae10d1afSLuigi Rizzo 	if (na->tx_rings) { /* XXX should not happen */
2874ae10d1afSLuigi Rizzo 		D("freeing leftover tx_rings");
2875ae10d1afSLuigi Rizzo 		free(na->tx_rings, M_DEVBUF);
2876ae10d1afSLuigi Rizzo 	}
2877ce3ee1e7SLuigi Rizzo 	if (na->na_flags & NAF_MEM_OWNER)
2878ce3ee1e7SLuigi Rizzo 		netmap_mem_private_delete(na->nm_mem);
287968b8534bSLuigi Rizzo 	bzero(na, sizeof(*na));
2880d0c7b075SLuigi Rizzo 	WNA(ifp) = NULL;
288168b8534bSLuigi Rizzo 	free(na, M_DEVBUF);
288268b8534bSLuigi Rizzo }
288368b8534bSLuigi Rizzo 
288468b8534bSLuigi Rizzo 
2885f18be576SLuigi Rizzo int
2886ce3ee1e7SLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
2887ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na, u_int ring_nr);
2888f18be576SLuigi Rizzo 
2889f18be576SLuigi Rizzo 
289068b8534bSLuigi Rizzo /*
289102ad4083SLuigi Rizzo  * Intercept packets from the network stack and pass them
289202ad4083SLuigi Rizzo  * to netmap as incoming packets on the 'software' ring.
2893ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that the ifp and na do not go
2894ce3ee1e7SLuigi Rizzo  * away (typically the caller checks for IFF_DRV_RUNNING or the like).
2895ce3ee1e7SLuigi Rizzo  * In nm_register() or whenever there is a reinitialization,
2896ce3ee1e7SLuigi Rizzo  * we make sure to access the core lock and per-ring locks
2897ce3ee1e7SLuigi Rizzo  * so that IFCAP_NETMAP is visible here.
289868b8534bSLuigi Rizzo  */
289968b8534bSLuigi Rizzo int
2900ce3ee1e7SLuigi Rizzo netmap_transmit(struct ifnet *ifp, struct mbuf *m)
290168b8534bSLuigi Rizzo {
290268b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
2903ce3ee1e7SLuigi Rizzo 	struct netmap_kring *kring;
29041a26580eSLuigi Rizzo 	u_int i, len = MBUF_LEN(m);
2905ce3ee1e7SLuigi Rizzo 	u_int error = EBUSY, lim;
290668b8534bSLuigi Rizzo 	struct netmap_slot *slot;
290768b8534bSLuigi Rizzo 
2908ce3ee1e7SLuigi Rizzo 	// XXX [Linux] we do not need this lock
2909ce3ee1e7SLuigi Rizzo 	// if we follow the down/configure/up protocol -gl
2910ce3ee1e7SLuigi Rizzo 	// mtx_lock(&na->core_lock);
2911ce3ee1e7SLuigi Rizzo 	if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) {
2912ce3ee1e7SLuigi Rizzo 		/* interface not in netmap mode anymore */
2913ce3ee1e7SLuigi Rizzo 		error = ENXIO;
2914ce3ee1e7SLuigi Rizzo 		goto done;
2915ce3ee1e7SLuigi Rizzo 	}
2916ce3ee1e7SLuigi Rizzo 
2917ce3ee1e7SLuigi Rizzo 	kring = &na->rx_rings[na->num_rx_rings];
2918ce3ee1e7SLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
291968b8534bSLuigi Rizzo 	if (netmap_verbose & NM_VERB_HOST)
292068b8534bSLuigi Rizzo 		D("%s packet %d len %d from the stack", ifp->if_xname,
292168b8534bSLuigi Rizzo 			kring->nr_hwcur + kring->nr_hwavail, len);
2922ce3ee1e7SLuigi Rizzo 	// XXX reconsider long packets if we handle fragments
2923ce3ee1e7SLuigi Rizzo 	if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */
2924849bec0eSLuigi Rizzo 		D("%s from_host, drop packet size %d > %d", ifp->if_xname,
2925ce3ee1e7SLuigi Rizzo 			len, NETMAP_BDG_BUF_SIZE(na->nm_mem));
2926ce3ee1e7SLuigi Rizzo 		goto done;
2927849bec0eSLuigi Rizzo 	}
2928ce3ee1e7SLuigi Rizzo 	if (SWNA(ifp)->na_bdg) {
2929ce3ee1e7SLuigi Rizzo 		struct nm_bdg_fwd *ft;
2930ce3ee1e7SLuigi Rizzo 		char *dst;
2931f18be576SLuigi Rizzo 
2932ce3ee1e7SLuigi Rizzo 		na = SWNA(ifp); /* we operate on the host port */
2933ce3ee1e7SLuigi Rizzo 		ft = na->rx_rings[0].nkr_ft;
2934ce3ee1e7SLuigi Rizzo 		dst = BDG_NMB(na->nm_mem, &na->rx_rings[0].ring->slot[0]);
2935ce3ee1e7SLuigi Rizzo 
2936ce3ee1e7SLuigi Rizzo 		/* use slot 0 in the ft, there is nothing queued here */
2937ce3ee1e7SLuigi Rizzo 		/* XXX we can save the copy calling m_copydata in nm_bdg_flush,
2938ce3ee1e7SLuigi Rizzo 		 * need a special flag for this.
2939ce3ee1e7SLuigi Rizzo 		 */
2940ce3ee1e7SLuigi Rizzo 		m_copydata(m, 0, (int)len, dst);
2941ce3ee1e7SLuigi Rizzo 		ft->ft_flags = 0;
2942ce3ee1e7SLuigi Rizzo 		ft->ft_len = len;
2943ce3ee1e7SLuigi Rizzo 		ft->ft_buf = dst;
2944ce3ee1e7SLuigi Rizzo 		ft->ft_next = NM_FT_NULL;
2945ce3ee1e7SLuigi Rizzo 		ft->ft_frags = 1;
2946ce3ee1e7SLuigi Rizzo 		if (netmap_verbose & NM_VERB_HOST)
2947ce3ee1e7SLuigi Rizzo 			RD(5, "pkt %p size %d to bridge port %d",
2948ce3ee1e7SLuigi Rizzo 				dst, len, na->bdg_port);
2949ce3ee1e7SLuigi Rizzo 		nm_bdg_flush(ft, 1, na, 0);
2950ce3ee1e7SLuigi Rizzo 		na = NA(ifp);	/* back to the regular object/lock */
2951ce3ee1e7SLuigi Rizzo 		error = 0;
2952ce3ee1e7SLuigi Rizzo 		goto done;
2953ce3ee1e7SLuigi Rizzo 	}
2954ce3ee1e7SLuigi Rizzo 
2955ce3ee1e7SLuigi Rizzo 	/* protect against other instances of netmap_transmit,
2956ce3ee1e7SLuigi Rizzo 	 * and userspace invocations of rxsync().
2957ce3ee1e7SLuigi Rizzo 	 * XXX could reuse core_lock
2958ce3ee1e7SLuigi Rizzo 	 */
2959ce3ee1e7SLuigi Rizzo 	// XXX [Linux] there can be no other instances of netmap_transmit
2960ce3ee1e7SLuigi Rizzo 	// on this same ring, but we still need this lock to protect
2961ce3ee1e7SLuigi Rizzo 	// concurrent access from netmap_sw_to_nic() -gl
2962ce3ee1e7SLuigi Rizzo 	mtx_lock(&kring->q_lock);
296302ad4083SLuigi Rizzo 	if (kring->nr_hwavail >= lim) {
29645b248374SLuigi Rizzo 		if (netmap_verbose)
296568b8534bSLuigi Rizzo 			D("stack ring %s full\n", ifp->if_xname);
2966ce3ee1e7SLuigi Rizzo 	} else {
296768b8534bSLuigi Rizzo 		/* compute the insert position */
2968ce3ee1e7SLuigi Rizzo 		i = nm_kr_rxpos(kring);
296968b8534bSLuigi Rizzo 		slot = &kring->ring->slot[i];
2970ce3ee1e7SLuigi Rizzo 		m_copydata(m, 0, (int)len, BDG_NMB(na->nm_mem, slot));
297168b8534bSLuigi Rizzo 		slot->len = len;
2972091fd0abSLuigi Rizzo 		slot->flags = kring->nkr_slot_flags;
297368b8534bSLuigi Rizzo 		kring->nr_hwavail++;
297468b8534bSLuigi Rizzo 		if (netmap_verbose  & NM_VERB_HOST)
2975d76bf4ffSLuigi Rizzo 			D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings);
297668b8534bSLuigi Rizzo 		selwakeuppri(&kring->si, PI_NET);
297768b8534bSLuigi Rizzo 		error = 0;
2978ce3ee1e7SLuigi Rizzo 	}
2979ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kring->q_lock);
2980ce3ee1e7SLuigi Rizzo 
298168b8534bSLuigi Rizzo done:
2982ce3ee1e7SLuigi Rizzo 	// mtx_unlock(&na->core_lock);
298368b8534bSLuigi Rizzo 
298468b8534bSLuigi Rizzo 	/* release the mbuf in either cases of success or failure. As an
298568b8534bSLuigi Rizzo 	 * alternative, put the mbuf in a free list and free the list
298668b8534bSLuigi Rizzo 	 * only when really necessary.
298768b8534bSLuigi Rizzo 	 */
298868b8534bSLuigi Rizzo 	m_freem(m);
298968b8534bSLuigi Rizzo 
299068b8534bSLuigi Rizzo 	return (error);
299168b8534bSLuigi Rizzo }
299268b8534bSLuigi Rizzo 
299368b8534bSLuigi Rizzo 
299468b8534bSLuigi Rizzo /*
299568b8534bSLuigi Rizzo  * netmap_reset() is called by the driver routines when reinitializing
299668b8534bSLuigi Rizzo  * a ring. The driver is in charge of locking to protect the kring.
299768b8534bSLuigi Rizzo  * If netmap mode is not set just return NULL.
299868b8534bSLuigi Rizzo  */
299968b8534bSLuigi Rizzo struct netmap_slot *
3000ce3ee1e7SLuigi Rizzo netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
300168b8534bSLuigi Rizzo 	u_int new_cur)
300268b8534bSLuigi Rizzo {
300368b8534bSLuigi Rizzo 	struct netmap_kring *kring;
3004506cc70cSLuigi Rizzo 	int new_hwofs, lim;
300568b8534bSLuigi Rizzo 
3006ce3ee1e7SLuigi Rizzo 	if (na == NULL) {
3007ce3ee1e7SLuigi Rizzo 		D("NULL na, should not happen");
300868b8534bSLuigi Rizzo 		return NULL;	/* no netmap support here */
3009ce3ee1e7SLuigi Rizzo 	}
3010ce3ee1e7SLuigi Rizzo 	if (!(na->ifp->if_capenable & IFCAP_NETMAP)) {
3011*5864b3a5SLuigi Rizzo 		ND("interface not in netmap mode");
301268b8534bSLuigi Rizzo 		return NULL;	/* nothing to reinitialize */
3013ce3ee1e7SLuigi Rizzo 	}
301468b8534bSLuigi Rizzo 
3015ce3ee1e7SLuigi Rizzo 	/* XXX note- in the new scheme, we are not guaranteed to be
3016ce3ee1e7SLuigi Rizzo 	 * under lock (e.g. when called on a device reset).
3017ce3ee1e7SLuigi Rizzo 	 * In this case, we should set a flag and do not trust too
3018ce3ee1e7SLuigi Rizzo 	 * much the values. In practice: TODO
3019ce3ee1e7SLuigi Rizzo 	 * - set a RESET flag somewhere in the kring
3020ce3ee1e7SLuigi Rizzo 	 * - do the processing in a conservative way
3021ce3ee1e7SLuigi Rizzo 	 * - let the *sync() fixup at the end.
3022ce3ee1e7SLuigi Rizzo 	 */
302364ae02c3SLuigi Rizzo 	if (tx == NR_TX) {
30248241616dSLuigi Rizzo 		if (n >= na->num_tx_rings)
30258241616dSLuigi Rizzo 			return NULL;
302664ae02c3SLuigi Rizzo 		kring = na->tx_rings + n;
3027506cc70cSLuigi Rizzo 		new_hwofs = kring->nr_hwcur - new_cur;
302864ae02c3SLuigi Rizzo 	} else {
30298241616dSLuigi Rizzo 		if (n >= na->num_rx_rings)
30308241616dSLuigi Rizzo 			return NULL;
303164ae02c3SLuigi Rizzo 		kring = na->rx_rings + n;
3032506cc70cSLuigi Rizzo 		new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
303364ae02c3SLuigi Rizzo 	}
303464ae02c3SLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
3035506cc70cSLuigi Rizzo 	if (new_hwofs > lim)
3036506cc70cSLuigi Rizzo 		new_hwofs -= lim + 1;
3037506cc70cSLuigi Rizzo 
3038ce3ee1e7SLuigi Rizzo 	/* Always set the new offset value and realign the ring. */
3039ce3ee1e7SLuigi Rizzo 	D("%s hwofs %d -> %d, hwavail %d -> %d",
3040ce3ee1e7SLuigi Rizzo 		tx == NR_TX ? "TX" : "RX",
3041ce3ee1e7SLuigi Rizzo 		kring->nkr_hwofs, new_hwofs,
3042ce3ee1e7SLuigi Rizzo 		kring->nr_hwavail,
3043ce3ee1e7SLuigi Rizzo 		tx == NR_TX ? lim : kring->nr_hwavail);
3044506cc70cSLuigi Rizzo 	kring->nkr_hwofs = new_hwofs;
3045506cc70cSLuigi Rizzo 	if (tx == NR_TX)
3046ce3ee1e7SLuigi Rizzo 		kring->nr_hwavail = lim;
3047506cc70cSLuigi Rizzo 
3048f196ce38SLuigi Rizzo #if 0 // def linux
3049f196ce38SLuigi Rizzo 	/* XXX check that the mappings are correct */
3050f196ce38SLuigi Rizzo 	/* need ring_nr, adapter->pdev, direction */
3051f196ce38SLuigi Rizzo 	buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
3052f196ce38SLuigi Rizzo 	if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
3053f196ce38SLuigi Rizzo 		D("error mapping rx netmap buffer %d", i);
3054f196ce38SLuigi Rizzo 		// XXX fix error handling
3055f196ce38SLuigi Rizzo 	}
3056f196ce38SLuigi Rizzo 
3057f196ce38SLuigi Rizzo #endif /* linux */
305868b8534bSLuigi Rizzo 	/*
3059ce3ee1e7SLuigi Rizzo 	 * Wakeup on the individual and global selwait
3060506cc70cSLuigi Rizzo 	 * We do the wakeup here, but the ring is not yet reconfigured.
3061506cc70cSLuigi Rizzo 	 * However, we are under lock so there are no races.
306268b8534bSLuigi Rizzo 	 */
306368b8534bSLuigi Rizzo 	selwakeuppri(&kring->si, PI_NET);
306464ae02c3SLuigi Rizzo 	selwakeuppri(tx == NR_TX ? &na->tx_si : &na->rx_si, PI_NET);
306568b8534bSLuigi Rizzo 	return kring->ring->slot;
306668b8534bSLuigi Rizzo }
306768b8534bSLuigi Rizzo 
306868b8534bSLuigi Rizzo 
3069ce3ee1e7SLuigi Rizzo /*
3070ce3ee1e7SLuigi Rizzo  * Grab packets from a kring, move them into the ft structure
3071ce3ee1e7SLuigi Rizzo  * associated to the tx (input) port. Max one instance per port,
3072ce3ee1e7SLuigi Rizzo  * filtered on input (ioctl, poll or XXX).
3073ce3ee1e7SLuigi Rizzo  * Returns the next position in the ring.
3074ce3ee1e7SLuigi Rizzo  */
3075f18be576SLuigi Rizzo static int
3076f18be576SLuigi Rizzo nm_bdg_preflush(struct netmap_adapter *na, u_int ring_nr,
3077f18be576SLuigi Rizzo 	struct netmap_kring *kring, u_int end)
3078f18be576SLuigi Rizzo {
3079f18be576SLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
3080ce3ee1e7SLuigi Rizzo 	struct nm_bdg_fwd *ft;
3081f18be576SLuigi Rizzo 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
3082f18be576SLuigi Rizzo 	u_int ft_i = 0;	/* start from 0 */
3083ce3ee1e7SLuigi Rizzo 	u_int frags = 1; /* how many frags ? */
3084ce3ee1e7SLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
3085f18be576SLuigi Rizzo 
3086ce3ee1e7SLuigi Rizzo 	/* To protect against modifications to the bridge we acquire a
3087ce3ee1e7SLuigi Rizzo 	 * shared lock, waiting if we can sleep (if the source port is
3088ce3ee1e7SLuigi Rizzo 	 * attached to a user process) or with a trylock otherwise (NICs).
3089ce3ee1e7SLuigi Rizzo 	 */
3090ce3ee1e7SLuigi Rizzo 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
3091ce3ee1e7SLuigi Rizzo 	if (na->na_flags & NAF_BDG_MAYSLEEP)
3092ce3ee1e7SLuigi Rizzo 		BDG_RLOCK(b);
3093ce3ee1e7SLuigi Rizzo 	else if (!BDG_RTRYLOCK(b))
3094ce3ee1e7SLuigi Rizzo 		return 0;
3095ce3ee1e7SLuigi Rizzo 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
3096ce3ee1e7SLuigi Rizzo 	ft = kring->nkr_ft;
3097ce3ee1e7SLuigi Rizzo 
3098ce3ee1e7SLuigi Rizzo 	for (; likely(j != end); j = nm_next(j, lim)) {
3099f18be576SLuigi Rizzo 		struct netmap_slot *slot = &ring->slot[j];
3100ce3ee1e7SLuigi Rizzo 		char *buf;
3101f18be576SLuigi Rizzo 
3102ce3ee1e7SLuigi Rizzo 		ft[ft_i].ft_len = slot->len;
310385233a7dSLuigi Rizzo 		ft[ft_i].ft_flags = slot->flags;
310485233a7dSLuigi Rizzo 
310585233a7dSLuigi Rizzo 		ND("flags is 0x%x", slot->flags);
310685233a7dSLuigi Rizzo 		/* this slot goes into a list so initialize the link field */
3107ce3ee1e7SLuigi Rizzo 		ft[ft_i].ft_next = NM_FT_NULL;
310885233a7dSLuigi Rizzo 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
31093d819cb6SLuigi Rizzo 			(void *)(uintptr_t)slot->ptr : BDG_NMB(na->nm_mem, slot);
311085233a7dSLuigi Rizzo 		prefetch(buf);
3111ce3ee1e7SLuigi Rizzo 		++ft_i;
3112ce3ee1e7SLuigi Rizzo 		if (slot->flags & NS_MOREFRAG) {
3113ce3ee1e7SLuigi Rizzo 			frags++;
3114ce3ee1e7SLuigi Rizzo 			continue;
3115ce3ee1e7SLuigi Rizzo 		}
3116ce3ee1e7SLuigi Rizzo 		if (unlikely(netmap_verbose && frags > 1))
3117ce3ee1e7SLuigi Rizzo 			RD(5, "%d frags at %d", frags, ft_i - frags);
3118ce3ee1e7SLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
3119ce3ee1e7SLuigi Rizzo 		frags = 1;
3120ce3ee1e7SLuigi Rizzo 		if (unlikely((int)ft_i >= bridge_batch))
3121f18be576SLuigi Rizzo 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
3122f18be576SLuigi Rizzo 	}
3123ce3ee1e7SLuigi Rizzo 	if (frags > 1) {
3124ce3ee1e7SLuigi Rizzo 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
3125ce3ee1e7SLuigi Rizzo 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
3126ce3ee1e7SLuigi Rizzo 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
3127ce3ee1e7SLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags - 1;
3128ce3ee1e7SLuigi Rizzo 	}
3129f18be576SLuigi Rizzo 	if (ft_i)
3130f18be576SLuigi Rizzo 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
3131ce3ee1e7SLuigi Rizzo 	BDG_RUNLOCK(b);
3132f18be576SLuigi Rizzo 	return j;
3133f18be576SLuigi Rizzo }
3134f18be576SLuigi Rizzo 
3135f18be576SLuigi Rizzo 
3136f18be576SLuigi Rizzo /*
3137ce3ee1e7SLuigi Rizzo  * Pass packets from nic to the bridge.
3138ce3ee1e7SLuigi Rizzo  * XXX TODO check locking: this is called from the interrupt
3139ce3ee1e7SLuigi Rizzo  * handler so we should make sure that the interface is not
3140ce3ee1e7SLuigi Rizzo  * disconnected while passing down an interrupt.
3141ce3ee1e7SLuigi Rizzo  *
3142f18be576SLuigi Rizzo  * Note, no user process can access this NIC so we can ignore
3143f18be576SLuigi Rizzo  * the info in the 'ring'.
3144f18be576SLuigi Rizzo  */
3145f18be576SLuigi Rizzo static void
3146f18be576SLuigi Rizzo netmap_nic_to_bdg(struct ifnet *ifp, u_int ring_nr)
3147f18be576SLuigi Rizzo {
3148f18be576SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
3149f18be576SLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
3150f18be576SLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
3151ce3ee1e7SLuigi Rizzo 	u_int j, k;
3152f18be576SLuigi Rizzo 
3153ce3ee1e7SLuigi Rizzo 	/* make sure that only one thread is ever in here,
3154ce3ee1e7SLuigi Rizzo 	 * after which we can unlock. Probably unnecessary XXX.
3155ce3ee1e7SLuigi Rizzo 	 */
3156ce3ee1e7SLuigi Rizzo 	if (nm_kr_tryget(kring))
3157ce3ee1e7SLuigi Rizzo 		return;
3158ce3ee1e7SLuigi Rizzo 	/* fetch packets that have arrived.
3159ce3ee1e7SLuigi Rizzo 	 * XXX maybe do this in a loop ?
3160ce3ee1e7SLuigi Rizzo 	 */
3161ce3ee1e7SLuigi Rizzo 	if (na->nm_rxsync(ifp, ring_nr, 0))
3162ce3ee1e7SLuigi Rizzo 		goto put_out;
3163ce3ee1e7SLuigi Rizzo 	if (kring->nr_hwavail == 0 && netmap_verbose) {
3164f18be576SLuigi Rizzo 		D("how strange, interrupt with no packets on %s",
3165f18be576SLuigi Rizzo 			ifp->if_xname);
3166ce3ee1e7SLuigi Rizzo 		goto put_out;
3167f18be576SLuigi Rizzo 	}
3168ce3ee1e7SLuigi Rizzo 	k = nm_kr_rxpos(kring);
3169f18be576SLuigi Rizzo 
3170f18be576SLuigi Rizzo 	j = nm_bdg_preflush(na, ring_nr, kring, k);
3171f18be576SLuigi Rizzo 
3172f18be576SLuigi Rizzo 	/* we consume everything, but we cannot update kring directly
3173f18be576SLuigi Rizzo 	 * because the nic may have destroyed the info in the NIC ring.
3174f18be576SLuigi Rizzo 	 * So we need to call rxsync again to restore it.
3175f18be576SLuigi Rizzo 	 */
3176f18be576SLuigi Rizzo 	ring->cur = j;
3177f18be576SLuigi Rizzo 	ring->avail = 0;
3178f18be576SLuigi Rizzo 	na->nm_rxsync(ifp, ring_nr, 0);
3179ce3ee1e7SLuigi Rizzo 
3180ce3ee1e7SLuigi Rizzo put_out:
3181ce3ee1e7SLuigi Rizzo 	nm_kr_put(kring);
3182f18be576SLuigi Rizzo 	return;
3183f18be576SLuigi Rizzo }
3184f18be576SLuigi Rizzo 
3185f18be576SLuigi Rizzo 
318668b8534bSLuigi Rizzo /*
3187ce3ee1e7SLuigi Rizzo  * Default functions to handle rx/tx interrupts from a physical device.
3188ce3ee1e7SLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
3189ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that there is only one active
3190ce3ee1e7SLuigi Rizzo  * instance per queue, and that there is appropriate locking.
3191849bec0eSLuigi Rizzo  *
3192ce3ee1e7SLuigi Rizzo  * If the card is not in netmap mode, simply return 0,
3193ce3ee1e7SLuigi Rizzo  * so that the caller proceeds with regular processing.
3194ce3ee1e7SLuigi Rizzo  *
3195ce3ee1e7SLuigi Rizzo  * If the card is connected to a netmap file descriptor,
3196ce3ee1e7SLuigi Rizzo  * do a selwakeup on the individual queue, plus one on the global one
3197ce3ee1e7SLuigi Rizzo  * if needed (multiqueue card _and_ there are multiqueue listeners),
3198ce3ee1e7SLuigi Rizzo  * and return 1.
3199ce3ee1e7SLuigi Rizzo  *
3200ce3ee1e7SLuigi Rizzo  * Finally, if called on rx from an interface connected to a switch,
3201ce3ee1e7SLuigi Rizzo  * calls the proper forwarding routine, and return 1.
32021a26580eSLuigi Rizzo  */
3203babc7c12SLuigi Rizzo int
3204ce3ee1e7SLuigi Rizzo netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
32051a26580eSLuigi Rizzo {
32061a26580eSLuigi Rizzo 	struct netmap_adapter *na;
3207ce3ee1e7SLuigi Rizzo 	struct netmap_kring *kring;
32081a26580eSLuigi Rizzo 
32091a26580eSLuigi Rizzo 	if (!(ifp->if_capenable & IFCAP_NETMAP))
32101a26580eSLuigi Rizzo 		return 0;
3211849bec0eSLuigi Rizzo 
3212ce3ee1e7SLuigi Rizzo 	q &= NETMAP_RING_MASK;
3213849bec0eSLuigi Rizzo 
3214ce3ee1e7SLuigi Rizzo 	if (netmap_verbose)
3215ce3ee1e7SLuigi Rizzo 		RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
32161a26580eSLuigi Rizzo 	na = NA(ifp);
32178241616dSLuigi Rizzo 	if (na->na_flags & NAF_SKIP_INTR) {
32188241616dSLuigi Rizzo 		ND("use regular interrupt");
32198241616dSLuigi Rizzo 		return 0;
32208241616dSLuigi Rizzo 	}
32218241616dSLuigi Rizzo 
322264ae02c3SLuigi Rizzo 	if (work_done) { /* RX path */
32238241616dSLuigi Rizzo 		if (q >= na->num_rx_rings)
3224849bec0eSLuigi Rizzo 			return 0;	// not a physical queue
3225ce3ee1e7SLuigi Rizzo 		kring = na->rx_rings + q;
3226ce3ee1e7SLuigi Rizzo 		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
3227ce3ee1e7SLuigi Rizzo 		if (na->na_bdg != NULL) {
3228ce3ee1e7SLuigi Rizzo 			netmap_nic_to_bdg(ifp, q);
3229ce3ee1e7SLuigi Rizzo 		} else {
3230ce3ee1e7SLuigi Rizzo 			selwakeuppri(&kring->si, PI_NET);
3231ce3ee1e7SLuigi Rizzo 			if (na->num_rx_rings > 1 /* or multiple listeners */ )
3232ce3ee1e7SLuigi Rizzo 				selwakeuppri(&na->rx_si, PI_NET);
3233ce3ee1e7SLuigi Rizzo 		}
3234ce3ee1e7SLuigi Rizzo 		*work_done = 1; /* do not fire napi again */
3235849bec0eSLuigi Rizzo 	} else { /* TX path */
32368241616dSLuigi Rizzo 		if (q >= na->num_tx_rings)
3237849bec0eSLuigi Rizzo 			return 0;	// not a physical queue
3238ce3ee1e7SLuigi Rizzo 		kring = na->tx_rings + q;
3239ce3ee1e7SLuigi Rizzo 		selwakeuppri(&kring->si, PI_NET);
3240ce3ee1e7SLuigi Rizzo 		if (na->num_tx_rings > 1 /* or multiple listeners */ )
3241ce3ee1e7SLuigi Rizzo 			selwakeuppri(&na->tx_si, PI_NET);
324264ae02c3SLuigi Rizzo 	}
32431a26580eSLuigi Rizzo 	return 1;
32441a26580eSLuigi Rizzo }
32451a26580eSLuigi Rizzo 
324664ae02c3SLuigi Rizzo 
324701c7d25fSLuigi Rizzo #ifdef linux	/* linux-specific routines */
324801c7d25fSLuigi Rizzo 
3249f18be576SLuigi Rizzo 
325001c7d25fSLuigi Rizzo /*
325101c7d25fSLuigi Rizzo  * Remap linux arguments into the FreeBSD call.
325201c7d25fSLuigi Rizzo  * - pwait is the poll table, passed as 'dev';
325301c7d25fSLuigi Rizzo  *   If pwait == NULL someone else already woke up before. We can report
325401c7d25fSLuigi Rizzo  *   events but they are filtered upstream.
325501c7d25fSLuigi Rizzo  *   If pwait != NULL, then pwait->key contains the list of events.
325601c7d25fSLuigi Rizzo  * - events is computed from pwait as above.
325701c7d25fSLuigi Rizzo  * - file is passed as 'td';
325801c7d25fSLuigi Rizzo  */
325901c7d25fSLuigi Rizzo static u_int
326001c7d25fSLuigi Rizzo linux_netmap_poll(struct file * file, struct poll_table_struct *pwait)
326101c7d25fSLuigi Rizzo {
3262849bec0eSLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
3263849bec0eSLuigi Rizzo 	int events = POLLIN | POLLOUT; /* XXX maybe... */
3264849bec0eSLuigi Rizzo #elif LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
326501c7d25fSLuigi Rizzo 	int events = pwait ? pwait->key : POLLIN | POLLOUT;
326601c7d25fSLuigi Rizzo #else /* in 3.4.0 field 'key' was renamed to '_key' */
326701c7d25fSLuigi Rizzo 	int events = pwait ? pwait->_key : POLLIN | POLLOUT;
326801c7d25fSLuigi Rizzo #endif
326901c7d25fSLuigi Rizzo 	return netmap_poll((void *)pwait, events, (void *)file);
327001c7d25fSLuigi Rizzo }
327101c7d25fSLuigi Rizzo 
3272f18be576SLuigi Rizzo 
327301c7d25fSLuigi Rizzo static int
327442a3a5bdSLuigi Rizzo linux_netmap_mmap(struct file *f, struct vm_area_struct *vma)
327501c7d25fSLuigi Rizzo {
32768241616dSLuigi Rizzo 	int error = 0;
3277ce3ee1e7SLuigi Rizzo 	unsigned long off, va;
3278ce3ee1e7SLuigi Rizzo 	vm_ooffset_t pa;
3279ce3ee1e7SLuigi Rizzo 	struct netmap_priv_d *priv = f->private_data;
328001c7d25fSLuigi Rizzo 	/*
328101c7d25fSLuigi Rizzo 	 * vma->vm_start: start of mapping user address space
328201c7d25fSLuigi Rizzo 	 * vma->vm_end: end of the mapping user address space
32838241616dSLuigi Rizzo 	 * vma->vm_pfoff: offset of first page in the device
328401c7d25fSLuigi Rizzo 	 */
328501c7d25fSLuigi Rizzo 
328601c7d25fSLuigi Rizzo 	// XXX security checks
328701c7d25fSLuigi Rizzo 
3288ce3ee1e7SLuigi Rizzo 	error = netmap_get_memory(priv);
32898241616dSLuigi Rizzo 	ND("get_memory returned %d", error);
32908241616dSLuigi Rizzo 	if (error)
32918241616dSLuigi Rizzo 	    return -error;
32928241616dSLuigi Rizzo 
3293ce3ee1e7SLuigi Rizzo 	if ((vma->vm_start & ~PAGE_MASK) || (vma->vm_end & ~PAGE_MASK)) {
3294ce3ee1e7SLuigi Rizzo 		ND("vm_start = %lx vm_end = %lx", vma->vm_start, vma->vm_end);
3295ce3ee1e7SLuigi Rizzo 		return -EINVAL;
3296ce3ee1e7SLuigi Rizzo 	}
32978241616dSLuigi Rizzo 
3298ce3ee1e7SLuigi Rizzo 	for (va = vma->vm_start, off = vma->vm_pgoff;
3299ce3ee1e7SLuigi Rizzo 	     va < vma->vm_end;
3300ce3ee1e7SLuigi Rizzo 	     va += PAGE_SIZE, off++)
3301ce3ee1e7SLuigi Rizzo 	{
3302ce3ee1e7SLuigi Rizzo 		pa = netmap_mem_ofstophys(priv->np_mref, off << PAGE_SHIFT);
3303ce3ee1e7SLuigi Rizzo 		if (pa == 0)
3304ce3ee1e7SLuigi Rizzo 			return -EINVAL;
330501c7d25fSLuigi Rizzo 
3306ce3ee1e7SLuigi Rizzo 		ND("va %lx pa %p", va, pa);
3307ce3ee1e7SLuigi Rizzo 		error = remap_pfn_range(vma, va, pa >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot);
3308ce3ee1e7SLuigi Rizzo 		if (error)
3309ce3ee1e7SLuigi Rizzo 			return error;
3310ce3ee1e7SLuigi Rizzo 	}
331101c7d25fSLuigi Rizzo 	return 0;
331201c7d25fSLuigi Rizzo }
331301c7d25fSLuigi Rizzo 
3314f18be576SLuigi Rizzo 
3315ce3ee1e7SLuigi Rizzo /*
3316ce3ee1e7SLuigi Rizzo  * This one is probably already protected by the netif lock XXX
3317ce3ee1e7SLuigi Rizzo  */
331801c7d25fSLuigi Rizzo static netdev_tx_t
3319ce3ee1e7SLuigi Rizzo linux_netmap_start_xmit(struct sk_buff *skb, struct net_device *dev)
332001c7d25fSLuigi Rizzo {
3321ce3ee1e7SLuigi Rizzo 	netmap_transmit(dev, skb);
332201c7d25fSLuigi Rizzo 	return (NETDEV_TX_OK);
332301c7d25fSLuigi Rizzo }
332401c7d25fSLuigi Rizzo 
332501c7d25fSLuigi Rizzo 
3326ce3ee1e7SLuigi Rizzo #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)	// XXX was 37
332701c7d25fSLuigi Rizzo #define LIN_IOCTL_NAME	.ioctl
332801c7d25fSLuigi Rizzo int
332901c7d25fSLuigi Rizzo linux_netmap_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long data /* arg */)
333001c7d25fSLuigi Rizzo #else
333101c7d25fSLuigi Rizzo #define LIN_IOCTL_NAME	.unlocked_ioctl
333201c7d25fSLuigi Rizzo long
333301c7d25fSLuigi Rizzo linux_netmap_ioctl(struct file *file, u_int cmd, u_long data /* arg */)
333401c7d25fSLuigi Rizzo #endif
333501c7d25fSLuigi Rizzo {
333601c7d25fSLuigi Rizzo 	int ret;
333701c7d25fSLuigi Rizzo 	struct nmreq nmr;
333801c7d25fSLuigi Rizzo 	bzero(&nmr, sizeof(nmr));
333901c7d25fSLuigi Rizzo 
3340ce3ee1e7SLuigi Rizzo 	if (cmd == NIOCTXSYNC || cmd == NIOCRXSYNC) {
3341ce3ee1e7SLuigi Rizzo 		data = 0;	/* no argument required here */
3342ce3ee1e7SLuigi Rizzo 	}
334301c7d25fSLuigi Rizzo 	if (data && copy_from_user(&nmr, (void *)data, sizeof(nmr) ) != 0)
334401c7d25fSLuigi Rizzo 		return -EFAULT;
334501c7d25fSLuigi Rizzo 	ret = netmap_ioctl(NULL, cmd, (caddr_t)&nmr, 0, (void *)file);
334601c7d25fSLuigi Rizzo 	if (data && copy_to_user((void*)data, &nmr, sizeof(nmr) ) != 0)
334701c7d25fSLuigi Rizzo 		return -EFAULT;
334801c7d25fSLuigi Rizzo 	return -ret;
334901c7d25fSLuigi Rizzo }
335001c7d25fSLuigi Rizzo 
335101c7d25fSLuigi Rizzo 
335201c7d25fSLuigi Rizzo static int
33530b8ed8e0SLuigi Rizzo netmap_release(struct inode *inode, struct file *file)
335401c7d25fSLuigi Rizzo {
33550b8ed8e0SLuigi Rizzo 	(void)inode;	/* UNUSED */
335601c7d25fSLuigi Rizzo 	if (file->private_data)
335701c7d25fSLuigi Rizzo 		netmap_dtor(file->private_data);
335801c7d25fSLuigi Rizzo 	return (0);
335901c7d25fSLuigi Rizzo }
336001c7d25fSLuigi Rizzo 
3361f18be576SLuigi Rizzo 
33628241616dSLuigi Rizzo static int
33638241616dSLuigi Rizzo linux_netmap_open(struct inode *inode, struct file *file)
33648241616dSLuigi Rizzo {
33658241616dSLuigi Rizzo 	struct netmap_priv_d *priv;
33668241616dSLuigi Rizzo 	(void)inode;	/* UNUSED */
33678241616dSLuigi Rizzo 
33688241616dSLuigi Rizzo 	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
33698241616dSLuigi Rizzo 			      M_NOWAIT | M_ZERO);
33708241616dSLuigi Rizzo 	if (priv == NULL)
33718241616dSLuigi Rizzo 		return -ENOMEM;
33728241616dSLuigi Rizzo 
33738241616dSLuigi Rizzo 	file->private_data = priv;
33748241616dSLuigi Rizzo 
33758241616dSLuigi Rizzo 	return (0);
33768241616dSLuigi Rizzo }
337701c7d25fSLuigi Rizzo 
3378f18be576SLuigi Rizzo 
337901c7d25fSLuigi Rizzo static struct file_operations netmap_fops = {
3380f18be576SLuigi Rizzo     .owner = THIS_MODULE,
33818241616dSLuigi Rizzo     .open = linux_netmap_open,
338242a3a5bdSLuigi Rizzo     .mmap = linux_netmap_mmap,
338301c7d25fSLuigi Rizzo     LIN_IOCTL_NAME = linux_netmap_ioctl,
338401c7d25fSLuigi Rizzo     .poll = linux_netmap_poll,
338501c7d25fSLuigi Rizzo     .release = netmap_release,
338601c7d25fSLuigi Rizzo };
338701c7d25fSLuigi Rizzo 
3388f18be576SLuigi Rizzo 
338901c7d25fSLuigi Rizzo static struct miscdevice netmap_cdevsw = {	/* same name as FreeBSD */
339001c7d25fSLuigi Rizzo 	MISC_DYNAMIC_MINOR,
339101c7d25fSLuigi Rizzo 	"netmap",
339201c7d25fSLuigi Rizzo 	&netmap_fops,
339301c7d25fSLuigi Rizzo };
339401c7d25fSLuigi Rizzo 
339501c7d25fSLuigi Rizzo static int netmap_init(void);
339601c7d25fSLuigi Rizzo static void netmap_fini(void);
339701c7d25fSLuigi Rizzo 
3398f18be576SLuigi Rizzo 
339942a3a5bdSLuigi Rizzo /* Errors have negative values on linux */
340042a3a5bdSLuigi Rizzo static int linux_netmap_init(void)
340142a3a5bdSLuigi Rizzo {
340242a3a5bdSLuigi Rizzo 	return -netmap_init();
340342a3a5bdSLuigi Rizzo }
340442a3a5bdSLuigi Rizzo 
340542a3a5bdSLuigi Rizzo module_init(linux_netmap_init);
340601c7d25fSLuigi Rizzo module_exit(netmap_fini);
340701c7d25fSLuigi Rizzo /* export certain symbols to other modules */
340801c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_attach);		// driver attach routines
340901c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_detach);		// driver detach routines
341001c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_ring_reinit);	// ring init on error
341101c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buffer_lut);
341201c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_total_buffers);	// index check
341301c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buffer_base);
341401c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_reset);		// ring init routines
341501c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_buf_size);
341601c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_rx_irq);		// default irq handler
341701c7d25fSLuigi Rizzo EXPORT_SYMBOL(netmap_no_pendintr);	// XXX mitigation - should go away
3418f18be576SLuigi Rizzo EXPORT_SYMBOL(netmap_bdg_ctl);		// bridge configuration routine
3419f18be576SLuigi Rizzo EXPORT_SYMBOL(netmap_bdg_learning);	// the default lookup function
3420ce3ee1e7SLuigi Rizzo EXPORT_SYMBOL(netmap_disable_all_rings);
3421ce3ee1e7SLuigi Rizzo EXPORT_SYMBOL(netmap_enable_all_rings);
342201c7d25fSLuigi Rizzo 
342301c7d25fSLuigi Rizzo 
342401c7d25fSLuigi Rizzo MODULE_AUTHOR("http://info.iet.unipi.it/~luigi/netmap/");
342501c7d25fSLuigi Rizzo MODULE_DESCRIPTION("The netmap packet I/O framework");
342601c7d25fSLuigi Rizzo MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */
342701c7d25fSLuigi Rizzo 
342801c7d25fSLuigi Rizzo #else /* __FreeBSD__ */
342901c7d25fSLuigi Rizzo 
3430f18be576SLuigi Rizzo 
3431babc7c12SLuigi Rizzo static struct cdevsw netmap_cdevsw = {
3432babc7c12SLuigi Rizzo 	.d_version = D_VERSION,
3433babc7c12SLuigi Rizzo 	.d_name = "netmap",
34348241616dSLuigi Rizzo 	.d_open = netmap_open,
34358241616dSLuigi Rizzo 	.d_mmap_single = netmap_mmap_single,
3436babc7c12SLuigi Rizzo 	.d_ioctl = netmap_ioctl,
3437babc7c12SLuigi Rizzo 	.d_poll = netmap_poll,
34388241616dSLuigi Rizzo 	.d_close = netmap_close,
3439babc7c12SLuigi Rizzo };
344001c7d25fSLuigi Rizzo #endif /* __FreeBSD__ */
3441babc7c12SLuigi Rizzo 
3442f196ce38SLuigi Rizzo /*
3443f196ce38SLuigi Rizzo  *---- support for virtual bridge -----
3444f196ce38SLuigi Rizzo  */
3445f196ce38SLuigi Rizzo 
3446f196ce38SLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */
3447f196ce38SLuigi Rizzo 
3448f196ce38SLuigi Rizzo /*
3449f196ce38SLuigi Rizzo  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3450f196ce38SLuigi Rizzo  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3451f196ce38SLuigi Rizzo  *
3452f196ce38SLuigi Rizzo  * http://www.burtleburtle.net/bob/hash/spooky.html
3453f196ce38SLuigi Rizzo  */
3454f196ce38SLuigi Rizzo #define mix(a, b, c)                                                    \
3455f196ce38SLuigi Rizzo do {                                                                    \
3456f196ce38SLuigi Rizzo         a -= b; a -= c; a ^= (c >> 13);                                 \
3457f196ce38SLuigi Rizzo         b -= c; b -= a; b ^= (a << 8);                                  \
3458f196ce38SLuigi Rizzo         c -= a; c -= b; c ^= (b >> 13);                                 \
3459f196ce38SLuigi Rizzo         a -= b; a -= c; a ^= (c >> 12);                                 \
3460f196ce38SLuigi Rizzo         b -= c; b -= a; b ^= (a << 16);                                 \
3461f196ce38SLuigi Rizzo         c -= a; c -= b; c ^= (b >> 5);                                  \
3462f196ce38SLuigi Rizzo         a -= b; a -= c; a ^= (c >> 3);                                  \
3463f196ce38SLuigi Rizzo         b -= c; b -= a; b ^= (a << 10);                                 \
3464f196ce38SLuigi Rizzo         c -= a; c -= b; c ^= (b >> 15);                                 \
3465f196ce38SLuigi Rizzo } while (/*CONSTCOND*/0)
3466f196ce38SLuigi Rizzo 
3467f196ce38SLuigi Rizzo static __inline uint32_t
3468f196ce38SLuigi Rizzo nm_bridge_rthash(const uint8_t *addr)
3469f196ce38SLuigi Rizzo {
3470f196ce38SLuigi Rizzo         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
3471f196ce38SLuigi Rizzo 
3472f196ce38SLuigi Rizzo         b += addr[5] << 8;
3473f196ce38SLuigi Rizzo         b += addr[4];
3474f196ce38SLuigi Rizzo         a += addr[3] << 24;
3475f196ce38SLuigi Rizzo         a += addr[2] << 16;
3476f196ce38SLuigi Rizzo         a += addr[1] << 8;
3477f196ce38SLuigi Rizzo         a += addr[0];
3478f196ce38SLuigi Rizzo 
3479f196ce38SLuigi Rizzo         mix(a, b, c);
3480f196ce38SLuigi Rizzo #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
3481f196ce38SLuigi Rizzo         return (c & BRIDGE_RTHASH_MASK);
3482f196ce38SLuigi Rizzo }
3483f196ce38SLuigi Rizzo 
3484f196ce38SLuigi Rizzo #undef mix
3485f196ce38SLuigi Rizzo 
3486f196ce38SLuigi Rizzo 
3487f196ce38SLuigi Rizzo static int
3488f196ce38SLuigi Rizzo bdg_netmap_reg(struct ifnet *ifp, int onoff)
3489f196ce38SLuigi Rizzo {
3490f18be576SLuigi Rizzo 	/* the interface is already attached to the bridge,
3491f18be576SLuigi Rizzo 	 * so we only need to toggle IFCAP_NETMAP.
3492f196ce38SLuigi Rizzo 	 */
3493f18be576SLuigi Rizzo 	if (onoff) {
3494f196ce38SLuigi Rizzo 		ifp->if_capenable |= IFCAP_NETMAP;
3495f196ce38SLuigi Rizzo 	} else {
3496f196ce38SLuigi Rizzo 		ifp->if_capenable &= ~IFCAP_NETMAP;
3497f196ce38SLuigi Rizzo 	}
3498f18be576SLuigi Rizzo 	return 0;
3499f196ce38SLuigi Rizzo }
3500f196ce38SLuigi Rizzo 
3501f196ce38SLuigi Rizzo 
3502f18be576SLuigi Rizzo /*
3503f18be576SLuigi Rizzo  * Lookup function for a learning bridge.
3504f18be576SLuigi Rizzo  * Update the hash table with the source address,
3505f18be576SLuigi Rizzo  * and then returns the destination port index, and the
3506f18be576SLuigi Rizzo  * ring in *dst_ring (at the moment, always use ring 0)
3507f18be576SLuigi Rizzo  */
3508f18be576SLuigi Rizzo u_int
3509ce3ee1e7SLuigi Rizzo netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
3510f18be576SLuigi Rizzo 		struct netmap_adapter *na)
3511f196ce38SLuigi Rizzo {
3512f18be576SLuigi Rizzo 	struct nm_hash_ent *ht = na->na_bdg->ht;
3513f196ce38SLuigi Rizzo 	uint32_t sh, dh;
3514f18be576SLuigi Rizzo 	u_int dst, mysrc = na->bdg_port;
3515f196ce38SLuigi Rizzo 	uint64_t smac, dmac;
3516f196ce38SLuigi Rizzo 
3517ce3ee1e7SLuigi Rizzo 	if (buf_len < 14) {
3518ce3ee1e7SLuigi Rizzo 		D("invalid buf length %d", buf_len);
3519ce3ee1e7SLuigi Rizzo 		return NM_BDG_NOPORT;
3520ce3ee1e7SLuigi Rizzo 	}
3521f196ce38SLuigi Rizzo 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
3522f196ce38SLuigi Rizzo 	smac = le64toh(*(uint64_t *)(buf + 4));
3523f196ce38SLuigi Rizzo 	smac >>= 16;
3524f18be576SLuigi Rizzo 
3525f196ce38SLuigi Rizzo 	/*
3526f196ce38SLuigi Rizzo 	 * The hash is somewhat expensive, there might be some
3527f196ce38SLuigi Rizzo 	 * worthwhile optimizations here.
3528f196ce38SLuigi Rizzo 	 */
3529f196ce38SLuigi Rizzo 	if ((buf[6] & 1) == 0) { /* valid src */
3530f196ce38SLuigi Rizzo 		uint8_t *s = buf+6;
3531ce3ee1e7SLuigi Rizzo 		sh = nm_bridge_rthash(s); // XXX hash of source
3532f196ce38SLuigi Rizzo 		/* update source port forwarding entry */
3533f18be576SLuigi Rizzo 		ht[sh].mac = smac;	/* XXX expire ? */
3534f18be576SLuigi Rizzo 		ht[sh].ports = mysrc;
3535f196ce38SLuigi Rizzo 		if (netmap_verbose)
3536f196ce38SLuigi Rizzo 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
3537f18be576SLuigi Rizzo 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
3538f196ce38SLuigi Rizzo 	}
3539f18be576SLuigi Rizzo 	dst = NM_BDG_BROADCAST;
3540f196ce38SLuigi Rizzo 	if ((buf[0] & 1) == 0) { /* unicast */
3541f196ce38SLuigi Rizzo 		dh = nm_bridge_rthash(buf); // XXX hash of dst
3542f18be576SLuigi Rizzo 		if (ht[dh].mac == dmac) {	/* found dst */
3543f18be576SLuigi Rizzo 			dst = ht[dh].ports;
3544f196ce38SLuigi Rizzo 		}
3545f18be576SLuigi Rizzo 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
3546f196ce38SLuigi Rizzo 	}
3547f18be576SLuigi Rizzo 	*dst_ring = 0;
3548f18be576SLuigi Rizzo 	return dst;
3549f196ce38SLuigi Rizzo }
3550f196ce38SLuigi Rizzo 
3551f18be576SLuigi Rizzo 
3552f18be576SLuigi Rizzo /*
3553f18be576SLuigi Rizzo  * This flush routine supports only unicast and broadcast but a large
3554f18be576SLuigi Rizzo  * number of ports, and lets us replace the learn and dispatch functions.
3555f18be576SLuigi Rizzo  */
3556f18be576SLuigi Rizzo int
3557ce3ee1e7SLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_adapter *na,
3558f18be576SLuigi Rizzo 		u_int ring_nr)
3559f18be576SLuigi Rizzo {
3560f18be576SLuigi Rizzo 	struct nm_bdg_q *dst_ents, *brddst;
3561f18be576SLuigi Rizzo 	uint16_t num_dsts = 0, *dsts;
3562f18be576SLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
3563ce3ee1e7SLuigi Rizzo 	u_int i, j, me = na->bdg_port;
3564f18be576SLuigi Rizzo 
3565ce3ee1e7SLuigi Rizzo 	/*
3566ce3ee1e7SLuigi Rizzo 	 * The work area (pointed by ft) is followed by an array of
3567ce3ee1e7SLuigi Rizzo 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
3568ce3ee1e7SLuigi Rizzo 	 * queues per port plus one for the broadcast traffic.
3569ce3ee1e7SLuigi Rizzo 	 * Then we have an array of destination indexes.
3570ce3ee1e7SLuigi Rizzo 	 */
3571ce3ee1e7SLuigi Rizzo 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
3572f18be576SLuigi Rizzo 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
3573f18be576SLuigi Rizzo 
3574ce3ee1e7SLuigi Rizzo 	/* first pass: find a destination for each packet in the batch */
3575ce3ee1e7SLuigi Rizzo 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
3576ce3ee1e7SLuigi Rizzo 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
3577f18be576SLuigi Rizzo 		uint16_t dst_port, d_i;
3578f18be576SLuigi Rizzo 		struct nm_bdg_q *d;
3579f18be576SLuigi Rizzo 
3580ce3ee1e7SLuigi Rizzo 		ND("slot %d frags %d", i, ft[i].ft_frags);
3581ce3ee1e7SLuigi Rizzo 		dst_port = b->nm_bdg_lookup(ft[i].ft_buf, ft[i].ft_len,
3582ce3ee1e7SLuigi Rizzo 			&dst_ring, na);
3583ce3ee1e7SLuigi Rizzo 		if (netmap_verbose > 255)
3584ce3ee1e7SLuigi Rizzo 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
3585ce3ee1e7SLuigi Rizzo 		if (dst_port == NM_BDG_NOPORT)
3586f18be576SLuigi Rizzo 			continue; /* this packet is identified to be dropped */
3587ce3ee1e7SLuigi Rizzo 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
3588f18be576SLuigi Rizzo 			continue;
3589ce3ee1e7SLuigi Rizzo 		else if (dst_port == NM_BDG_BROADCAST)
3590f18be576SLuigi Rizzo 			dst_ring = 0; /* broadcasts always go to ring 0 */
3591ce3ee1e7SLuigi Rizzo 		else if (unlikely(dst_port == me ||
3592ce3ee1e7SLuigi Rizzo 		    !b->bdg_ports[dst_port]))
3593f18be576SLuigi Rizzo 			continue;
3594f18be576SLuigi Rizzo 
3595f18be576SLuigi Rizzo 		/* get a position in the scratch pad */
3596f18be576SLuigi Rizzo 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
3597f18be576SLuigi Rizzo 		d = dst_ents + d_i;
3598ce3ee1e7SLuigi Rizzo 
3599ce3ee1e7SLuigi Rizzo 		/* append the first fragment to the list */
3600ce3ee1e7SLuigi Rizzo 		if (d->bq_head == NM_FT_NULL) { /* new destination */
3601f18be576SLuigi Rizzo 			d->bq_head = d->bq_tail = i;
3602f18be576SLuigi Rizzo 			/* remember this position to be scanned later */
3603f18be576SLuigi Rizzo 			if (dst_port != NM_BDG_BROADCAST)
3604f18be576SLuigi Rizzo 				dsts[num_dsts++] = d_i;
360585233a7dSLuigi Rizzo 		} else {
3606f18be576SLuigi Rizzo 			ft[d->bq_tail].ft_next = i;
3607f18be576SLuigi Rizzo 			d->bq_tail = i;
3608f18be576SLuigi Rizzo 		}
3609ce3ee1e7SLuigi Rizzo 		d->bq_len += ft[i].ft_frags;
361085233a7dSLuigi Rizzo 	}
3611f18be576SLuigi Rizzo 
3612ce3ee1e7SLuigi Rizzo 	/*
3613ce3ee1e7SLuigi Rizzo 	 * Broadcast traffic goes to ring 0 on all destinations.
3614ce3ee1e7SLuigi Rizzo 	 * So we need to add these rings to the list of ports to scan.
3615ce3ee1e7SLuigi Rizzo 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
3616ce3ee1e7SLuigi Rizzo 	 * expensive. We should keep a compact list of active destinations
3617ce3ee1e7SLuigi Rizzo 	 * so we could shorten this loop.
3618f18be576SLuigi Rizzo 	 */
3619f18be576SLuigi Rizzo 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
3620ce3ee1e7SLuigi Rizzo 	if (brddst->bq_head != NM_FT_NULL) {
3621ce3ee1e7SLuigi Rizzo 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
3622ce3ee1e7SLuigi Rizzo 			uint16_t d_i;
3623ce3ee1e7SLuigi Rizzo 			i = b->bdg_port_index[j];
3624ce3ee1e7SLuigi Rizzo 			if (unlikely(i == me))
3625f18be576SLuigi Rizzo 				continue;
3626ce3ee1e7SLuigi Rizzo 			d_i = i * NM_BDG_MAXRINGS;
3627ce3ee1e7SLuigi Rizzo 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
3628f18be576SLuigi Rizzo 				dsts[num_dsts++] = d_i;
3629f18be576SLuigi Rizzo 		}
3630f18be576SLuigi Rizzo 	}
3631f18be576SLuigi Rizzo 
3632ce3ee1e7SLuigi Rizzo 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
3633f18be576SLuigi Rizzo 	/* second pass: scan destinations (XXX will be modular somehow) */
3634f18be576SLuigi Rizzo 	for (i = 0; i < num_dsts; i++) {
3635f18be576SLuigi Rizzo 		struct ifnet *dst_ifp;
3636f18be576SLuigi Rizzo 		struct netmap_adapter *dst_na;
3637f196ce38SLuigi Rizzo 		struct netmap_kring *kring;
3638f196ce38SLuigi Rizzo 		struct netmap_ring *ring;
3639f18be576SLuigi Rizzo 		u_int dst_nr, is_vp, lim, j, sent = 0, d_i, next, brd_next;
3640ce3ee1e7SLuigi Rizzo 		u_int needed, howmany;
3641ce3ee1e7SLuigi Rizzo 		int retry = netmap_txsync_retry;
3642f18be576SLuigi Rizzo 		struct nm_bdg_q *d;
3643ce3ee1e7SLuigi Rizzo 		uint32_t my_start = 0, lease_idx = 0;
3644ce3ee1e7SLuigi Rizzo 		int nrings;
3645f196ce38SLuigi Rizzo 
3646f18be576SLuigi Rizzo 		d_i = dsts[i];
3647ce3ee1e7SLuigi Rizzo 		ND("second pass %d port %d", i, d_i);
3648f18be576SLuigi Rizzo 		d = dst_ents + d_i;
3649ce3ee1e7SLuigi Rizzo 		// XXX fix the division
3650ce3ee1e7SLuigi Rizzo 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
3651f18be576SLuigi Rizzo 		/* protect from the lookup function returning an inactive
3652f18be576SLuigi Rizzo 		 * destination port
3653f18be576SLuigi Rizzo 		 */
3654f18be576SLuigi Rizzo 		if (unlikely(dst_na == NULL))
3655ce3ee1e7SLuigi Rizzo 			goto cleanup;
3656ce3ee1e7SLuigi Rizzo 		if (dst_na->na_flags & NAF_SW_ONLY)
3657ce3ee1e7SLuigi Rizzo 			goto cleanup;
3658f18be576SLuigi Rizzo 		dst_ifp = dst_na->ifp;
3659f18be576SLuigi Rizzo 		/*
3660f18be576SLuigi Rizzo 		 * The interface may be in !netmap mode in two cases:
3661f18be576SLuigi Rizzo 		 * - when na is attached but not activated yet;
3662f18be576SLuigi Rizzo 		 * - when na is being deactivated but is still attached.
3663f18be576SLuigi Rizzo 		 */
3664ce3ee1e7SLuigi Rizzo 		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
3665ce3ee1e7SLuigi Rizzo 			ND("not in netmap mode!");
3666ce3ee1e7SLuigi Rizzo 			goto cleanup;
3667ce3ee1e7SLuigi Rizzo 		}
3668f196ce38SLuigi Rizzo 
3669f18be576SLuigi Rizzo 		/* there is at least one either unicast or broadcast packet */
3670f18be576SLuigi Rizzo 		brd_next = brddst->bq_head;
3671f18be576SLuigi Rizzo 		next = d->bq_head;
3672ce3ee1e7SLuigi Rizzo 		/* we need to reserve this many slots. If fewer are
3673ce3ee1e7SLuigi Rizzo 		 * available, some packets will be dropped.
3674ce3ee1e7SLuigi Rizzo 		 * Packets may have multiple fragments, so we may not use
3675ce3ee1e7SLuigi Rizzo 		 * there is a chance that we may not use all of the slots
3676ce3ee1e7SLuigi Rizzo 		 * we have claimed, so we will need to handle the leftover
3677ce3ee1e7SLuigi Rizzo 		 * ones when we regain the lock.
3678ce3ee1e7SLuigi Rizzo 		 */
3679ce3ee1e7SLuigi Rizzo 		needed = d->bq_len + brddst->bq_len;
3680f18be576SLuigi Rizzo 
3681f18be576SLuigi Rizzo 		is_vp = nma_is_vp(dst_na);
3682ce3ee1e7SLuigi Rizzo 		ND(5, "pass 2 dst %d is %x %s",
3683ce3ee1e7SLuigi Rizzo 			i, d_i, is_vp ? "virtual" : "nic/host");
3684f18be576SLuigi Rizzo 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
3685f18be576SLuigi Rizzo 		if (is_vp) { /* virtual port */
3686ce3ee1e7SLuigi Rizzo 			nrings = dst_na->num_rx_rings;
3687ce3ee1e7SLuigi Rizzo 		} else {
3688ce3ee1e7SLuigi Rizzo 			nrings = dst_na->num_tx_rings;
3689f18be576SLuigi Rizzo 		}
3690ce3ee1e7SLuigi Rizzo 		if (dst_nr >= nrings)
3691ce3ee1e7SLuigi Rizzo 			dst_nr = dst_nr % nrings;
3692ce3ee1e7SLuigi Rizzo 		kring = is_vp ?  &dst_na->rx_rings[dst_nr] :
3693ce3ee1e7SLuigi Rizzo 				&dst_na->tx_rings[dst_nr];
3694ce3ee1e7SLuigi Rizzo 		ring = kring->ring;
3695ce3ee1e7SLuigi Rizzo 		lim = kring->nkr_num_slots - 1;
3696f18be576SLuigi Rizzo 
3697ce3ee1e7SLuigi Rizzo retry:
3698ce3ee1e7SLuigi Rizzo 
3699ce3ee1e7SLuigi Rizzo 		/* reserve the buffers in the queue and an entry
3700ce3ee1e7SLuigi Rizzo 		 * to report completion, and drop lock.
3701ce3ee1e7SLuigi Rizzo 		 * XXX this might become a helper function.
3702ce3ee1e7SLuigi Rizzo 		 */
3703ce3ee1e7SLuigi Rizzo 		mtx_lock(&kring->q_lock);
3704ce3ee1e7SLuigi Rizzo 		if (kring->nkr_stopped) {
3705ce3ee1e7SLuigi Rizzo 			mtx_unlock(&kring->q_lock);
3706ce3ee1e7SLuigi Rizzo 			goto cleanup;
3707ce3ee1e7SLuigi Rizzo 		}
3708ce3ee1e7SLuigi Rizzo 		/* on physical interfaces, do a txsync to recover
3709ce3ee1e7SLuigi Rizzo 		 * slots for packets already transmitted.
3710ce3ee1e7SLuigi Rizzo 		 * XXX maybe we could be optimistic and rely on a retry
3711ce3ee1e7SLuigi Rizzo 		 * in case of failure.
3712ce3ee1e7SLuigi Rizzo 		 */
3713ce3ee1e7SLuigi Rizzo 		if (nma_is_hw(dst_na)) {
3714ce3ee1e7SLuigi Rizzo 			dst_na->nm_txsync(dst_ifp, dst_nr, 0);
3715ce3ee1e7SLuigi Rizzo 		}
3716ce3ee1e7SLuigi Rizzo 		my_start = j = kring->nkr_hwlease;
3717ce3ee1e7SLuigi Rizzo 		howmany = nm_kr_space(kring, is_vp);
3718ce3ee1e7SLuigi Rizzo 		if (needed < howmany)
3719ce3ee1e7SLuigi Rizzo 			howmany = needed;
3720ce3ee1e7SLuigi Rizzo 		lease_idx = nm_kr_lease(kring, howmany, is_vp);
3721ce3ee1e7SLuigi Rizzo 		mtx_unlock(&kring->q_lock);
3722ce3ee1e7SLuigi Rizzo 
3723ce3ee1e7SLuigi Rizzo 		/* only retry if we need more than available slots */
3724ce3ee1e7SLuigi Rizzo 		if (retry && needed <= howmany)
3725ce3ee1e7SLuigi Rizzo 			retry = 0;
3726ce3ee1e7SLuigi Rizzo 
3727ce3ee1e7SLuigi Rizzo 		/* copy to the destination queue */
3728ce3ee1e7SLuigi Rizzo 		while (howmany > 0) {
3729ce3ee1e7SLuigi Rizzo 			struct netmap_slot *slot;
3730ce3ee1e7SLuigi Rizzo 			struct nm_bdg_fwd *ft_p, *ft_end;
3731ce3ee1e7SLuigi Rizzo 			u_int cnt;
3732ce3ee1e7SLuigi Rizzo 
3733ce3ee1e7SLuigi Rizzo 			/* find the queue from which we pick next packet.
3734ce3ee1e7SLuigi Rizzo 			 * NM_FT_NULL is always higher than valid indexes
373585233a7dSLuigi Rizzo 			 * so we never dereference it if the other list
3736ce3ee1e7SLuigi Rizzo 			 * has packets (and if both are empty we never
373785233a7dSLuigi Rizzo 			 * get here).
373885233a7dSLuigi Rizzo 			 */
3739f18be576SLuigi Rizzo 			if (next < brd_next) {
3740f18be576SLuigi Rizzo 				ft_p = ft + next;
3741f18be576SLuigi Rizzo 				next = ft_p->ft_next;
3742f18be576SLuigi Rizzo 			} else { /* insert broadcast */
3743f18be576SLuigi Rizzo 				ft_p = ft + brd_next;
3744f18be576SLuigi Rizzo 				brd_next = ft_p->ft_next;
3745f18be576SLuigi Rizzo 			}
3746ce3ee1e7SLuigi Rizzo 			cnt = ft_p->ft_frags; // cnt > 0
3747ce3ee1e7SLuigi Rizzo 			if (unlikely(cnt > howmany))
3748ce3ee1e7SLuigi Rizzo 			    break; /* no more space */
3749ce3ee1e7SLuigi Rizzo 			howmany -= cnt;
3750ce3ee1e7SLuigi Rizzo 			if (netmap_verbose && cnt > 1)
3751ce3ee1e7SLuigi Rizzo 				RD(5, "rx %d frags to %d", cnt, j);
3752ce3ee1e7SLuigi Rizzo 			ft_end = ft_p + cnt;
3753ce3ee1e7SLuigi Rizzo 			do {
3754ce3ee1e7SLuigi Rizzo 			    void *dst, *src = ft_p->ft_buf;
3755ce3ee1e7SLuigi Rizzo 			    size_t len = (ft_p->ft_len + 63) & ~63;
3756ce3ee1e7SLuigi Rizzo 
3757f196ce38SLuigi Rizzo 			    slot = &ring->slot[j];
3758ce3ee1e7SLuigi Rizzo 			    dst = BDG_NMB(dst_na->nm_mem, slot);
3759ce3ee1e7SLuigi Rizzo 			    /* round to a multiple of 64 */
3760ce3ee1e7SLuigi Rizzo 
3761ce3ee1e7SLuigi Rizzo 			    ND("send %d %d bytes at %s:%d",
3762ce3ee1e7SLuigi Rizzo 				i, ft_p->ft_len, dst_ifp->if_xname, j);
376385233a7dSLuigi Rizzo 			    if (ft_p->ft_flags & NS_INDIRECT) {
3764ce3ee1e7SLuigi Rizzo 				if (copyin(src, dst, len)) {
3765ce3ee1e7SLuigi Rizzo 					// invalid user pointer, pretend len is 0
3766ce3ee1e7SLuigi Rizzo 					ft_p->ft_len = 0;
3767ce3ee1e7SLuigi Rizzo 				}
376885233a7dSLuigi Rizzo 			    } else {
3769ce3ee1e7SLuigi Rizzo 				//memcpy(dst, src, len);
3770ce3ee1e7SLuigi Rizzo 				pkt_copy(src, dst, (int)len);
377185233a7dSLuigi Rizzo 			    }
3772f18be576SLuigi Rizzo 			    slot->len = ft_p->ft_len;
3773ce3ee1e7SLuigi Rizzo 			    slot->flags = (cnt << 8)| NS_MOREFRAG;
3774ce3ee1e7SLuigi Rizzo 			    j = nm_next(j, lim);
3775ce3ee1e7SLuigi Rizzo 			    ft_p++;
3776f196ce38SLuigi Rizzo 			    sent++;
3777ce3ee1e7SLuigi Rizzo 			} while (ft_p != ft_end);
3778ce3ee1e7SLuigi Rizzo 			slot->flags = (cnt << 8); /* clear flag on last entry */
377985233a7dSLuigi Rizzo 			/* are we done ? */
3780ce3ee1e7SLuigi Rizzo 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
3781f18be576SLuigi Rizzo 				break;
3782f196ce38SLuigi Rizzo 		}
3783ce3ee1e7SLuigi Rizzo 		{
3784ce3ee1e7SLuigi Rizzo 		    /* current position */
3785ce3ee1e7SLuigi Rizzo 		    uint32_t *p = kring->nkr_leases; /* shorthand */
3786ce3ee1e7SLuigi Rizzo 		    uint32_t update_pos;
3787ce3ee1e7SLuigi Rizzo 		    int still_locked = 1;
3788ce3ee1e7SLuigi Rizzo 
3789ce3ee1e7SLuigi Rizzo 		    mtx_lock(&kring->q_lock);
3790ce3ee1e7SLuigi Rizzo 		    if (unlikely(howmany > 0)) {
3791ce3ee1e7SLuigi Rizzo 			/* not used all bufs. If i am the last one
3792ce3ee1e7SLuigi Rizzo 			 * i can recover the slots, otherwise must
3793ce3ee1e7SLuigi Rizzo 			 * fill them with 0 to mark empty packets.
3794ce3ee1e7SLuigi Rizzo 			 */
3795ce3ee1e7SLuigi Rizzo 			ND("leftover %d bufs", howmany);
3796ce3ee1e7SLuigi Rizzo 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
3797ce3ee1e7SLuigi Rizzo 			    /* yes i am the last one */
3798ce3ee1e7SLuigi Rizzo 			    ND("roll back nkr_hwlease to %d", j);
3799ce3ee1e7SLuigi Rizzo 			    kring->nkr_hwlease = j;
3800f18be576SLuigi Rizzo 			} else {
3801ce3ee1e7SLuigi Rizzo 			    while (howmany-- > 0) {
3802ce3ee1e7SLuigi Rizzo 				ring->slot[j].len = 0;
3803ce3ee1e7SLuigi Rizzo 				ring->slot[j].flags = 0;
3804ce3ee1e7SLuigi Rizzo 				j = nm_next(j, lim);
3805ce3ee1e7SLuigi Rizzo 			    }
3806ce3ee1e7SLuigi Rizzo 			}
3807ce3ee1e7SLuigi Rizzo 		    }
3808ce3ee1e7SLuigi Rizzo 		    p[lease_idx] = j; /* report I am done */
3809ce3ee1e7SLuigi Rizzo 
3810ce3ee1e7SLuigi Rizzo 		    update_pos = is_vp ? nm_kr_rxpos(kring) : ring->cur;
3811ce3ee1e7SLuigi Rizzo 
3812ce3ee1e7SLuigi Rizzo 		    if (my_start == update_pos) {
3813ce3ee1e7SLuigi Rizzo 			/* all slots before my_start have been reported,
3814ce3ee1e7SLuigi Rizzo 			 * so scan subsequent leases to see if other ranges
3815ce3ee1e7SLuigi Rizzo 			 * have been completed, and to a selwakeup or txsync.
3816ce3ee1e7SLuigi Rizzo 		         */
3817ce3ee1e7SLuigi Rizzo 			while (lease_idx != kring->nkr_lease_idx &&
3818ce3ee1e7SLuigi Rizzo 				p[lease_idx] != NR_NOSLOT) {
3819ce3ee1e7SLuigi Rizzo 			    j = p[lease_idx];
3820ce3ee1e7SLuigi Rizzo 			    p[lease_idx] = NR_NOSLOT;
3821ce3ee1e7SLuigi Rizzo 			    lease_idx = nm_next(lease_idx, lim);
3822ce3ee1e7SLuigi Rizzo 			}
3823ce3ee1e7SLuigi Rizzo 			/* j is the new 'write' position. j != my_start
3824ce3ee1e7SLuigi Rizzo 			 * means there are new buffers to report
3825ce3ee1e7SLuigi Rizzo 			 */
3826ce3ee1e7SLuigi Rizzo 			if (likely(j != my_start)) {
3827ce3ee1e7SLuigi Rizzo 			    if (is_vp) {
3828ce3ee1e7SLuigi Rizzo 				uint32_t old_avail = kring->nr_hwavail;
3829ce3ee1e7SLuigi Rizzo 
3830ce3ee1e7SLuigi Rizzo 				kring->nr_hwavail = (j >= kring->nr_hwcur) ?
3831ce3ee1e7SLuigi Rizzo 					j - kring->nr_hwcur :
3832ce3ee1e7SLuigi Rizzo 					j + lim + 1 - kring->nr_hwcur;
3833ce3ee1e7SLuigi Rizzo 				if (kring->nr_hwavail < old_avail) {
3834ce3ee1e7SLuigi Rizzo 					D("avail shrink %d -> %d",
3835ce3ee1e7SLuigi Rizzo 						old_avail, kring->nr_hwavail);
3836ce3ee1e7SLuigi Rizzo 				}
3837ce3ee1e7SLuigi Rizzo 				still_locked = 0;
3838ce3ee1e7SLuigi Rizzo 				mtx_unlock(&kring->q_lock);
3839ce3ee1e7SLuigi Rizzo 				selwakeuppri(&kring->si, PI_NET);
3840ce3ee1e7SLuigi Rizzo 			    } else {
3841f18be576SLuigi Rizzo 				ring->cur = j;
3842ce3ee1e7SLuigi Rizzo 				/* XXX update avail ? */
3843ce3ee1e7SLuigi Rizzo 				still_locked = 0;
3844f18be576SLuigi Rizzo 				dst_na->nm_txsync(dst_ifp, dst_nr, 0);
3845ce3ee1e7SLuigi Rizzo 				mtx_unlock(&kring->q_lock);
3846ce3ee1e7SLuigi Rizzo 
3847f18be576SLuigi Rizzo 				/* retry to send more packets */
3848ce3ee1e7SLuigi Rizzo 				if (nma_is_hw(dst_na) && retry--)
3849f18be576SLuigi Rizzo 					goto retry;
3850f18be576SLuigi Rizzo 			    }
3851f18be576SLuigi Rizzo 			}
3852ce3ee1e7SLuigi Rizzo 		    }
3853ce3ee1e7SLuigi Rizzo 		    if (still_locked)
3854ce3ee1e7SLuigi Rizzo 			mtx_unlock(&kring->q_lock);
3855ce3ee1e7SLuigi Rizzo 		}
3856ce3ee1e7SLuigi Rizzo cleanup:
3857ce3ee1e7SLuigi Rizzo 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
3858ce3ee1e7SLuigi Rizzo 		d->bq_len = 0;
3859ce3ee1e7SLuigi Rizzo 	}
3860ce3ee1e7SLuigi Rizzo 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
3861ce3ee1e7SLuigi Rizzo 	brddst->bq_len = 0;
3862f196ce38SLuigi Rizzo 	return 0;
3863f196ce38SLuigi Rizzo }
3864f196ce38SLuigi Rizzo 
3865f18be576SLuigi Rizzo 
3866f196ce38SLuigi Rizzo /*
3867ce3ee1e7SLuigi Rizzo  * main dispatch routine for the bridge.
3868ce3ee1e7SLuigi Rizzo  * We already know that only one thread is running this.
3869ce3ee1e7SLuigi Rizzo  * we must run nm_bdg_preflush without lock.
3870f196ce38SLuigi Rizzo  */
3871f196ce38SLuigi Rizzo static int
3872ce3ee1e7SLuigi Rizzo bdg_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
3873f196ce38SLuigi Rizzo {
3874f196ce38SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
3875f196ce38SLuigi Rizzo 	struct netmap_kring *kring = &na->tx_rings[ring_nr];
3876f196ce38SLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
3877ce3ee1e7SLuigi Rizzo 	u_int j, k, lim = kring->nkr_num_slots - 1;
3878f196ce38SLuigi Rizzo 
3879f196ce38SLuigi Rizzo 	k = ring->cur;
3880f196ce38SLuigi Rizzo 	if (k > lim)
3881f196ce38SLuigi Rizzo 		return netmap_ring_reinit(kring);
3882f196ce38SLuigi Rizzo 
3883ce3ee1e7SLuigi Rizzo 	if (bridge_batch <= 0) { /* testing only */
3884f196ce38SLuigi Rizzo 		j = k; // used all
3885f196ce38SLuigi Rizzo 		goto done;
3886f196ce38SLuigi Rizzo 	}
3887ce3ee1e7SLuigi Rizzo 	if (bridge_batch > NM_BDG_BATCH)
3888ce3ee1e7SLuigi Rizzo 		bridge_batch = NM_BDG_BATCH;
3889f196ce38SLuigi Rizzo 
3890f18be576SLuigi Rizzo 	j = nm_bdg_preflush(na, ring_nr, kring, k);
3891f196ce38SLuigi Rizzo 	if (j != k)
3892f196ce38SLuigi Rizzo 		D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
3893ce3ee1e7SLuigi Rizzo 	/* k-j modulo ring size is the number of slots processed */
3894ce3ee1e7SLuigi Rizzo 	if (k < j)
3895ce3ee1e7SLuigi Rizzo 		k += kring->nkr_num_slots;
3896ce3ee1e7SLuigi Rizzo 	kring->nr_hwavail = lim - (k - j);
3897f196ce38SLuigi Rizzo 
3898f196ce38SLuigi Rizzo done:
3899f196ce38SLuigi Rizzo 	kring->nr_hwcur = j;
3900f196ce38SLuigi Rizzo 	ring->avail = kring->nr_hwavail;
3901f196ce38SLuigi Rizzo 	if (netmap_verbose)
3902ce3ee1e7SLuigi Rizzo 		D("%s ring %d flags %d", ifp->if_xname, ring_nr, flags);
3903f196ce38SLuigi Rizzo 	return 0;
3904f196ce38SLuigi Rizzo }
3905f196ce38SLuigi Rizzo 
3906f18be576SLuigi Rizzo 
3907ce3ee1e7SLuigi Rizzo /*
3908ce3ee1e7SLuigi Rizzo  * user process reading from a VALE switch.
3909ce3ee1e7SLuigi Rizzo  * Already protected against concurrent calls from userspace,
3910ce3ee1e7SLuigi Rizzo  * but we must acquire the queue's lock to protect against
3911ce3ee1e7SLuigi Rizzo  * writers on the same queue.
3912ce3ee1e7SLuigi Rizzo  */
3913f196ce38SLuigi Rizzo static int
3914ce3ee1e7SLuigi Rizzo bdg_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
3915f196ce38SLuigi Rizzo {
3916f196ce38SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
3917f196ce38SLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
3918f196ce38SLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
3919f18be576SLuigi Rizzo 	u_int j, lim = kring->nkr_num_slots - 1;
3920f196ce38SLuigi Rizzo 	u_int k = ring->cur, resvd = ring->reserved;
3921f18be576SLuigi Rizzo 	int n;
3922f196ce38SLuigi Rizzo 
3923ce3ee1e7SLuigi Rizzo 	mtx_lock(&kring->q_lock);
3924ce3ee1e7SLuigi Rizzo 	if (k > lim) {
3925ce3ee1e7SLuigi Rizzo 		D("ouch dangerous reset!!!");
3926ce3ee1e7SLuigi Rizzo 		n = netmap_ring_reinit(kring);
3927ce3ee1e7SLuigi Rizzo 		goto done;
3928ce3ee1e7SLuigi Rizzo 	}
3929f196ce38SLuigi Rizzo 
3930f196ce38SLuigi Rizzo 	/* skip past packets that userspace has released */
3931f196ce38SLuigi Rizzo 	j = kring->nr_hwcur;    /* netmap ring index */
3932f196ce38SLuigi Rizzo 	if (resvd > 0) {
3933f196ce38SLuigi Rizzo 		if (resvd + ring->avail >= lim + 1) {
3934f196ce38SLuigi Rizzo 			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
3935f196ce38SLuigi Rizzo 			ring->reserved = resvd = 0; // XXX panic...
3936f196ce38SLuigi Rizzo 		}
3937f196ce38SLuigi Rizzo 		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
3938f196ce38SLuigi Rizzo 	}
3939f196ce38SLuigi Rizzo 
3940f196ce38SLuigi Rizzo 	if (j != k) { /* userspace has released some packets. */
3941f196ce38SLuigi Rizzo 		n = k - j;
3942f196ce38SLuigi Rizzo 		if (n < 0)
3943f196ce38SLuigi Rizzo 			n += kring->nkr_num_slots;
3944f196ce38SLuigi Rizzo 		ND("userspace releases %d packets", n);
3945f196ce38SLuigi Rizzo                 for (n = 0; likely(j != k); n++) {
3946f196ce38SLuigi Rizzo                         struct netmap_slot *slot = &ring->slot[j];
3947ce3ee1e7SLuigi Rizzo                         void *addr = BDG_NMB(na->nm_mem, slot);
3948f196ce38SLuigi Rizzo 
3949f196ce38SLuigi Rizzo                         if (addr == netmap_buffer_base) { /* bad buf */
3950ce3ee1e7SLuigi Rizzo 				D("bad buffer index %d, ignore ?",
3951ce3ee1e7SLuigi Rizzo 					slot->buf_idx);
3952f196ce38SLuigi Rizzo                         }
3953f196ce38SLuigi Rizzo 			slot->flags &= ~NS_BUF_CHANGED;
3954ce3ee1e7SLuigi Rizzo                         j = nm_next(j, lim);
3955f196ce38SLuigi Rizzo                 }
3956f196ce38SLuigi Rizzo                 kring->nr_hwavail -= n;
3957f196ce38SLuigi Rizzo                 kring->nr_hwcur = k;
3958f196ce38SLuigi Rizzo         }
3959f196ce38SLuigi Rizzo         /* tell userspace that there are new packets */
3960f196ce38SLuigi Rizzo         ring->avail = kring->nr_hwavail - resvd;
3961ce3ee1e7SLuigi Rizzo 	n = 0;
3962ce3ee1e7SLuigi Rizzo done:
3963ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kring->q_lock);
3964ce3ee1e7SLuigi Rizzo 	return n;
3965f196ce38SLuigi Rizzo }
3966f196ce38SLuigi Rizzo 
3967f18be576SLuigi Rizzo 
39685ab0d24dSLuigi Rizzo static int
3969f18be576SLuigi Rizzo bdg_netmap_attach(struct netmap_adapter *arg)
3970f196ce38SLuigi Rizzo {
3971f196ce38SLuigi Rizzo 	struct netmap_adapter na;
3972f196ce38SLuigi Rizzo 
3973f196ce38SLuigi Rizzo 	ND("attaching virtual bridge");
3974f196ce38SLuigi Rizzo 	bzero(&na, sizeof(na));
3975f196ce38SLuigi Rizzo 
3976f18be576SLuigi Rizzo 	na.ifp = arg->ifp;
3977ce3ee1e7SLuigi Rizzo 	na.na_flags = NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
3978f18be576SLuigi Rizzo 	na.num_tx_rings = arg->num_tx_rings;
3979f18be576SLuigi Rizzo 	na.num_rx_rings = arg->num_rx_rings;
3980ce3ee1e7SLuigi Rizzo 	na.num_tx_desc = arg->num_tx_desc;
3981ce3ee1e7SLuigi Rizzo 	na.num_rx_desc = arg->num_rx_desc;
3982f196ce38SLuigi Rizzo 	na.nm_txsync = bdg_netmap_txsync;
3983f196ce38SLuigi Rizzo 	na.nm_rxsync = bdg_netmap_rxsync;
3984f196ce38SLuigi Rizzo 	na.nm_register = bdg_netmap_reg;
3985ce3ee1e7SLuigi Rizzo 	na.nm_mem = netmap_mem_private_new(arg->ifp->if_xname,
3986ce3ee1e7SLuigi Rizzo 			na.num_tx_rings, na.num_tx_desc,
3987ce3ee1e7SLuigi Rizzo 			na.num_rx_rings, na.num_rx_desc);
39885ab0d24dSLuigi Rizzo 	return netmap_attach(&na, na.num_tx_rings);
3989f196ce38SLuigi Rizzo }
3990f196ce38SLuigi Rizzo 
3991babc7c12SLuigi Rizzo 
3992babc7c12SLuigi Rizzo static struct cdev *netmap_dev; /* /dev/netmap character device. */
3993babc7c12SLuigi Rizzo 
3994babc7c12SLuigi Rizzo 
39951a26580eSLuigi Rizzo /*
399668b8534bSLuigi Rizzo  * Module loader.
399768b8534bSLuigi Rizzo  *
399868b8534bSLuigi Rizzo  * Create the /dev/netmap device and initialize all global
399968b8534bSLuigi Rizzo  * variables.
400068b8534bSLuigi Rizzo  *
400168b8534bSLuigi Rizzo  * Return 0 on success, errno on failure.
400268b8534bSLuigi Rizzo  */
400368b8534bSLuigi Rizzo static int
400468b8534bSLuigi Rizzo netmap_init(void)
400568b8534bSLuigi Rizzo {
4006ce3ee1e7SLuigi Rizzo 	int i, error;
400768b8534bSLuigi Rizzo 
4008ce3ee1e7SLuigi Rizzo 	NMG_LOCK_INIT();
4009ce3ee1e7SLuigi Rizzo 
4010ce3ee1e7SLuigi Rizzo 	error = netmap_mem_init();
401168b8534bSLuigi Rizzo 	if (error != 0) {
401242a3a5bdSLuigi Rizzo 		printf("netmap: unable to initialize the memory allocator.\n");
401368b8534bSLuigi Rizzo 		return (error);
401468b8534bSLuigi Rizzo 	}
40158241616dSLuigi Rizzo 	printf("netmap: loaded module\n");
401668b8534bSLuigi Rizzo 	netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660,
401768b8534bSLuigi Rizzo 			      "netmap");
4018f196ce38SLuigi Rizzo 
4019f18be576SLuigi Rizzo 	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
4020f196ce38SLuigi Rizzo 	for (i = 0; i < NM_BRIDGES; i++)
4021ce3ee1e7SLuigi Rizzo 		BDG_RWINIT(&nm_bridges[i]);
4022babc7c12SLuigi Rizzo 	return (error);
402368b8534bSLuigi Rizzo }
402468b8534bSLuigi Rizzo 
402568b8534bSLuigi Rizzo 
402668b8534bSLuigi Rizzo /*
402768b8534bSLuigi Rizzo  * Module unloader.
402868b8534bSLuigi Rizzo  *
402968b8534bSLuigi Rizzo  * Free all the memory, and destroy the ``/dev/netmap`` device.
403068b8534bSLuigi Rizzo  */
403168b8534bSLuigi Rizzo static void
403268b8534bSLuigi Rizzo netmap_fini(void)
403368b8534bSLuigi Rizzo {
403468b8534bSLuigi Rizzo 	destroy_dev(netmap_dev);
4035ce3ee1e7SLuigi Rizzo 	netmap_mem_fini();
4036ce3ee1e7SLuigi Rizzo 	NMG_LOCK_DESTROY();
403768b8534bSLuigi Rizzo 	printf("netmap: unloaded module.\n");
403868b8534bSLuigi Rizzo }
403968b8534bSLuigi Rizzo 
404068b8534bSLuigi Rizzo 
4041f196ce38SLuigi Rizzo #ifdef __FreeBSD__
404268b8534bSLuigi Rizzo /*
404368b8534bSLuigi Rizzo  * Kernel entry point.
404468b8534bSLuigi Rizzo  *
404568b8534bSLuigi Rizzo  * Initialize/finalize the module and return.
404668b8534bSLuigi Rizzo  *
404768b8534bSLuigi Rizzo  * Return 0 on success, errno on failure.
404868b8534bSLuigi Rizzo  */
404968b8534bSLuigi Rizzo static int
405068b8534bSLuigi Rizzo netmap_loader(__unused struct module *module, int event, __unused void *arg)
405168b8534bSLuigi Rizzo {
405268b8534bSLuigi Rizzo 	int error = 0;
405368b8534bSLuigi Rizzo 
405468b8534bSLuigi Rizzo 	switch (event) {
405568b8534bSLuigi Rizzo 	case MOD_LOAD:
405668b8534bSLuigi Rizzo 		error = netmap_init();
405768b8534bSLuigi Rizzo 		break;
405868b8534bSLuigi Rizzo 
405968b8534bSLuigi Rizzo 	case MOD_UNLOAD:
406068b8534bSLuigi Rizzo 		netmap_fini();
406168b8534bSLuigi Rizzo 		break;
406268b8534bSLuigi Rizzo 
406368b8534bSLuigi Rizzo 	default:
406468b8534bSLuigi Rizzo 		error = EOPNOTSUPP;
406568b8534bSLuigi Rizzo 		break;
406668b8534bSLuigi Rizzo 	}
406768b8534bSLuigi Rizzo 
406868b8534bSLuigi Rizzo 	return (error);
406968b8534bSLuigi Rizzo }
407068b8534bSLuigi Rizzo 
407168b8534bSLuigi Rizzo 
407268b8534bSLuigi Rizzo DEV_MODULE(netmap, netmap_loader, NULL);
4073f196ce38SLuigi Rizzo #endif /* __FreeBSD__ */
4074