xref: /freebsd-14.2/sys/dev/netmap/netmap_vale.c (revision e31c6ec7)
1f9790aebSLuigi Rizzo /*
217885a7bSLuigi Rizzo  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3f9790aebSLuigi Rizzo  *
4f9790aebSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
5f9790aebSLuigi Rizzo  * modification, are permitted provided that the following conditions
6f9790aebSLuigi Rizzo  * are met:
7f9790aebSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
8f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
9f9790aebSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
10f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
11f9790aebSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
12f9790aebSLuigi Rizzo  *
13f9790aebSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14f9790aebSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15f9790aebSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16f9790aebSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17f9790aebSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18f9790aebSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19f9790aebSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20f9790aebSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21f9790aebSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22f9790aebSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23f9790aebSLuigi Rizzo  * SUCH DAMAGE.
24f9790aebSLuigi Rizzo  */
25f9790aebSLuigi Rizzo 
26f9790aebSLuigi Rizzo 
27f9790aebSLuigi Rizzo /*
28f9790aebSLuigi Rizzo  * This module implements the VALE switch for netmap
29f9790aebSLuigi Rizzo 
30f9790aebSLuigi Rizzo --- VALE SWITCH ---
31f9790aebSLuigi Rizzo 
32f9790aebSLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
33f9790aebSLuigi Rizzo A switch cannot be deleted until all ports are gone.
34f9790aebSLuigi Rizzo 
35f9790aebSLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
36f9790aebSLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
37f9790aebSLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
38f9790aebSLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39f9790aebSLuigi Rizzo The lock is held throughout the entire forwarding cycle,
40f9790aebSLuigi Rizzo during which the thread may incur in a page fault.
41f9790aebSLuigi Rizzo Hence it is important that sleepable shared locks are used.
42f9790aebSLuigi Rizzo 
43f9790aebSLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
44f9790aebSLuigi Rizzo a number of slot in the ring, then the lock is released,
45f9790aebSLuigi Rizzo packets are copied from source to destination, and then
46f9790aebSLuigi Rizzo the lock is acquired again and the receive ring is updated.
47f9790aebSLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
48f9790aebSLuigi Rizzo ports attached to the switch)
49f9790aebSLuigi Rizzo 
50f9790aebSLuigi Rizzo  */
51f9790aebSLuigi Rizzo 
52f9790aebSLuigi Rizzo /*
53f9790aebSLuigi Rizzo  * OS-specific code that is used only within this file.
54f9790aebSLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
55f9790aebSLuigi Rizzo  * is present in netmap_kern.h
56f9790aebSLuigi Rizzo  */
57f9790aebSLuigi Rizzo 
58f9790aebSLuigi Rizzo #if defined(__FreeBSD__)
59f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
60f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$");
61f9790aebSLuigi Rizzo 
62f9790aebSLuigi Rizzo #include <sys/types.h>
63f9790aebSLuigi Rizzo #include <sys/errno.h>
64f9790aebSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
65f9790aebSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
66f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
67f9790aebSLuigi Rizzo #include <sys/sockio.h>
68f9790aebSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
69f9790aebSLuigi Rizzo #include <sys/malloc.h>
70f9790aebSLuigi Rizzo #include <sys/poll.h>
71f9790aebSLuigi Rizzo #include <sys/rwlock.h>
72f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
73f9790aebSLuigi Rizzo #include <sys/selinfo.h>
74f9790aebSLuigi Rizzo #include <sys/sysctl.h>
75f9790aebSLuigi Rizzo #include <net/if.h>
76f9790aebSLuigi Rizzo #include <net/if_var.h>
77f9790aebSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
78f9790aebSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
79f9790aebSLuigi Rizzo #include <sys/endian.h>
80f9790aebSLuigi Rizzo #include <sys/refcount.h>
81f9790aebSLuigi Rizzo 
82f9790aebSLuigi Rizzo 
83f9790aebSLuigi Rizzo #define BDG_RWLOCK_T		struct rwlock // struct rwlock
84f9790aebSLuigi Rizzo 
85f9790aebSLuigi Rizzo #define	BDG_RWINIT(b)		\
86f9790aebSLuigi Rizzo 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87f9790aebSLuigi Rizzo #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88f9790aebSLuigi Rizzo #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89f9790aebSLuigi Rizzo #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90f9790aebSLuigi Rizzo #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91f9790aebSLuigi Rizzo #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92f9790aebSLuigi Rizzo #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93f9790aebSLuigi Rizzo 
94f9790aebSLuigi Rizzo 
95f9790aebSLuigi Rizzo #elif defined(linux)
96f9790aebSLuigi Rizzo 
97f9790aebSLuigi Rizzo #include "bsd_glue.h"
98f9790aebSLuigi Rizzo 
99f9790aebSLuigi Rizzo #elif defined(__APPLE__)
100f9790aebSLuigi Rizzo 
101f9790aebSLuigi Rizzo #warning OSX support is only partial
102f9790aebSLuigi Rizzo #include "osx_glue.h"
103f9790aebSLuigi Rizzo 
104f9790aebSLuigi Rizzo #else
105f9790aebSLuigi Rizzo 
106f9790aebSLuigi Rizzo #error	Unsupported platform
107f9790aebSLuigi Rizzo 
108f9790aebSLuigi Rizzo #endif /* unsupported */
109f9790aebSLuigi Rizzo 
110f9790aebSLuigi Rizzo /*
111f9790aebSLuigi Rizzo  * common headers
112f9790aebSLuigi Rizzo  */
113f9790aebSLuigi Rizzo 
114f9790aebSLuigi Rizzo #include <net/netmap.h>
115f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h>
116f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
117f9790aebSLuigi Rizzo 
118f9790aebSLuigi Rizzo #ifdef WITH_VALE
119f9790aebSLuigi Rizzo 
120f9790aebSLuigi Rizzo /*
121f9790aebSLuigi Rizzo  * system parameters (most of them in netmap_kern.h)
122f9790aebSLuigi Rizzo  * NM_NAME	prefix for switch port names, default "vale"
123f9790aebSLuigi Rizzo  * NM_BDG_MAXPORTS	number of ports
124f9790aebSLuigi Rizzo  * NM_BRIDGES	max number of switches in the system.
125f9790aebSLuigi Rizzo  *	XXX should become a sysctl or tunable
126f9790aebSLuigi Rizzo  *
127f9790aebSLuigi Rizzo  * Switch ports are named valeX:Y where X is the switch name and Y
128f9790aebSLuigi Rizzo  * is the port. If Y matches a physical interface name, the port is
129f9790aebSLuigi Rizzo  * connected to a physical device.
130f9790aebSLuigi Rizzo  *
131f9790aebSLuigi Rizzo  * Unlike physical interfaces, switch ports use their own memory region
132f9790aebSLuigi Rizzo  * for rings and buffers.
133f9790aebSLuigi Rizzo  * The virtual interfaces use per-queue lock instead of core lock.
134f9790aebSLuigi Rizzo  * In the tx loop, we aggregate traffic in batches to make all operations
135f9790aebSLuigi Rizzo  * faster. The batch size is bridge_batch.
136f9790aebSLuigi Rizzo  */
137f9790aebSLuigi Rizzo #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
138f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
139f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
140f9790aebSLuigi Rizzo #define NM_BDG_HASH		1024	/* forwarding table entries */
141f9790aebSLuigi Rizzo #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
142f9790aebSLuigi Rizzo #define NM_MULTISEG		64	/* max size of a chain of bufs */
143f9790aebSLuigi Rizzo /* actual size of the tables */
144f9790aebSLuigi Rizzo #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
145f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */
146f9790aebSLuigi Rizzo #define NM_FT_NULL		NM_BDG_BATCH_MAX
147f9790aebSLuigi Rizzo #define	NM_BRIDGES		8	/* number of bridges */
148f9790aebSLuigi Rizzo 
149f9790aebSLuigi Rizzo 
150f9790aebSLuigi Rizzo /*
151f9790aebSLuigi Rizzo  * bridge_batch is set via sysctl to the max batch size to be
152f9790aebSLuigi Rizzo  * used in the bridge. The actual value may be larger as the
153f9790aebSLuigi Rizzo  * last packet in the block may overflow the size.
154f9790aebSLuigi Rizzo  */
155f9790aebSLuigi Rizzo int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap);
157f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
158f9790aebSLuigi Rizzo 
159f9790aebSLuigi Rizzo 
160f9790aebSLuigi Rizzo static int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp);
161f9790aebSLuigi Rizzo static int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
162f9790aebSLuigi Rizzo static int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
163f9790aebSLuigi Rizzo static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
164f9790aebSLuigi Rizzo int kern_netmap_regif(struct nmreq *nmr);
165f9790aebSLuigi Rizzo 
166f9790aebSLuigi Rizzo /*
167f9790aebSLuigi Rizzo  * For each output interface, nm_bdg_q is used to construct a list.
168f9790aebSLuigi Rizzo  * bq_len is the number of output buffers (we can have coalescing
169f9790aebSLuigi Rizzo  * during the copy).
170f9790aebSLuigi Rizzo  */
171f9790aebSLuigi Rizzo struct nm_bdg_q {
172f9790aebSLuigi Rizzo 	uint16_t bq_head;
173f9790aebSLuigi Rizzo 	uint16_t bq_tail;
174f9790aebSLuigi Rizzo 	uint32_t bq_len;	/* number of buffers */
175f9790aebSLuigi Rizzo };
176f9790aebSLuigi Rizzo 
177f9790aebSLuigi Rizzo /* XXX revise this */
178f9790aebSLuigi Rizzo struct nm_hash_ent {
179f9790aebSLuigi Rizzo 	uint64_t	mac;	/* the top 2 bytes are the epoch */
180f9790aebSLuigi Rizzo 	uint64_t	ports;
181f9790aebSLuigi Rizzo };
182f9790aebSLuigi Rizzo 
183f9790aebSLuigi Rizzo /*
184f9790aebSLuigi Rizzo  * nm_bridge is a descriptor for a VALE switch.
185f9790aebSLuigi Rizzo  * Interfaces for a bridge are all in bdg_ports[].
186f9790aebSLuigi Rizzo  * The array has fixed size, an empty entry does not terminate
187f9790aebSLuigi Rizzo  * the search, but lookups only occur on attach/detach so we
188f9790aebSLuigi Rizzo  * don't mind if they are slow.
189f9790aebSLuigi Rizzo  *
190f9790aebSLuigi Rizzo  * The bridge is non blocking on the transmit ports: excess
191f9790aebSLuigi Rizzo  * packets are dropped if there is no room on the output port.
192f9790aebSLuigi Rizzo  *
193f9790aebSLuigi Rizzo  * bdg_lock protects accesses to the bdg_ports array.
194f9790aebSLuigi Rizzo  * This is a rw lock (or equivalent).
195f9790aebSLuigi Rizzo  */
196f9790aebSLuigi Rizzo struct nm_bridge {
197f9790aebSLuigi Rizzo 	/* XXX what is the proper alignment/layout ? */
198f9790aebSLuigi Rizzo 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
199f9790aebSLuigi Rizzo 	int		bdg_namelen;
200f9790aebSLuigi Rizzo 	uint32_t	bdg_active_ports; /* 0 means free */
201f9790aebSLuigi Rizzo 	char		bdg_basename[IFNAMSIZ];
202f9790aebSLuigi Rizzo 
203f9790aebSLuigi Rizzo 	/* Indexes of active ports (up to active_ports)
204f9790aebSLuigi Rizzo 	 * and all other remaining ports.
205f9790aebSLuigi Rizzo 	 */
206f9790aebSLuigi Rizzo 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
207f9790aebSLuigi Rizzo 
208f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
209f9790aebSLuigi Rizzo 
210f9790aebSLuigi Rizzo 
211f9790aebSLuigi Rizzo 	/*
212f9790aebSLuigi Rizzo 	 * The function to decide the destination port.
213f9790aebSLuigi Rizzo 	 * It returns either of an index of the destination port,
214f9790aebSLuigi Rizzo 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
215f9790aebSLuigi Rizzo 	 * forward this packet.  ring_nr is the source ring index, and the
216f9790aebSLuigi Rizzo 	 * function may overwrite this value to forward this packet to a
217f9790aebSLuigi Rizzo 	 * different ring index.
218f9790aebSLuigi Rizzo 	 * This function must be set by netmap_bdgctl().
219f9790aebSLuigi Rizzo 	 */
220f9790aebSLuigi Rizzo 	bdg_lookup_fn_t nm_bdg_lookup;
221f9790aebSLuigi Rizzo 
222f9790aebSLuigi Rizzo 	/* the forwarding table, MAC+ports.
223f9790aebSLuigi Rizzo 	 * XXX should be changed to an argument to be passed to
224f9790aebSLuigi Rizzo 	 * the lookup function, and allocated on attach
225f9790aebSLuigi Rizzo 	 */
226f9790aebSLuigi Rizzo 	struct nm_hash_ent ht[NM_BDG_HASH];
227f9790aebSLuigi Rizzo };
228f9790aebSLuigi Rizzo 
229f9790aebSLuigi Rizzo 
230f9790aebSLuigi Rizzo /*
231f9790aebSLuigi Rizzo  * XXX in principle nm_bridges could be created dynamically
232f9790aebSLuigi Rizzo  * Right now we have a static array and deletions are protected
233f9790aebSLuigi Rizzo  * by an exclusive lock.
234f9790aebSLuigi Rizzo  */
235f9790aebSLuigi Rizzo struct nm_bridge nm_bridges[NM_BRIDGES];
236f9790aebSLuigi Rizzo 
237f9790aebSLuigi Rizzo 
238f9790aebSLuigi Rizzo /*
239f9790aebSLuigi Rizzo  * this is a slightly optimized copy routine which rounds
240f9790aebSLuigi Rizzo  * to multiple of 64 bytes and is often faster than dealing
241f9790aebSLuigi Rizzo  * with other odd sizes. We assume there is enough room
242f9790aebSLuigi Rizzo  * in the source and destination buffers.
243f9790aebSLuigi Rizzo  *
244f9790aebSLuigi Rizzo  * XXX only for multiples of 64 bytes, non overlapped.
245f9790aebSLuigi Rizzo  */
246f9790aebSLuigi Rizzo static inline void
247f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l)
248f9790aebSLuigi Rizzo {
249f9790aebSLuigi Rizzo         uint64_t *src = _src;
250f9790aebSLuigi Rizzo         uint64_t *dst = _dst;
251f9790aebSLuigi Rizzo         if (unlikely(l >= 1024)) {
252f9790aebSLuigi Rizzo                 memcpy(dst, src, l);
253f9790aebSLuigi Rizzo                 return;
254f9790aebSLuigi Rizzo         }
255f9790aebSLuigi Rizzo         for (; likely(l > 0); l-=64) {
256f9790aebSLuigi Rizzo                 *dst++ = *src++;
257f9790aebSLuigi Rizzo                 *dst++ = *src++;
258f9790aebSLuigi Rizzo                 *dst++ = *src++;
259f9790aebSLuigi Rizzo                 *dst++ = *src++;
260f9790aebSLuigi Rizzo                 *dst++ = *src++;
261f9790aebSLuigi Rizzo                 *dst++ = *src++;
262f9790aebSLuigi Rizzo                 *dst++ = *src++;
263f9790aebSLuigi Rizzo                 *dst++ = *src++;
264f9790aebSLuigi Rizzo         }
265f9790aebSLuigi Rizzo }
266f9790aebSLuigi Rizzo 
267f9790aebSLuigi Rizzo 
268f9790aebSLuigi Rizzo /*
269f9790aebSLuigi Rizzo  * locate a bridge among the existing ones.
270f9790aebSLuigi Rizzo  * MUST BE CALLED WITH NMG_LOCK()
271f9790aebSLuigi Rizzo  *
272f9790aebSLuigi Rizzo  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
273f9790aebSLuigi Rizzo  * We assume that this is called with a name of at least NM_NAME chars.
274f9790aebSLuigi Rizzo  */
275f9790aebSLuigi Rizzo static struct nm_bridge *
276f9790aebSLuigi Rizzo nm_find_bridge(const char *name, int create)
277f9790aebSLuigi Rizzo {
278f9790aebSLuigi Rizzo 	int i, l, namelen;
279f9790aebSLuigi Rizzo 	struct nm_bridge *b = NULL;
280f9790aebSLuigi Rizzo 
281f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
282f9790aebSLuigi Rizzo 
283f9790aebSLuigi Rizzo 	namelen = strlen(NM_NAME);	/* base length */
284f9790aebSLuigi Rizzo 	l = name ? strlen(name) : 0;		/* actual length */
285f9790aebSLuigi Rizzo 	if (l < namelen) {
286f9790aebSLuigi Rizzo 		D("invalid bridge name %s", name ? name : NULL);
287f9790aebSLuigi Rizzo 		return NULL;
288f9790aebSLuigi Rizzo 	}
289f9790aebSLuigi Rizzo 	for (i = namelen + 1; i < l; i++) {
290f9790aebSLuigi Rizzo 		if (name[i] == ':') {
291f9790aebSLuigi Rizzo 			namelen = i;
292f9790aebSLuigi Rizzo 			break;
293f9790aebSLuigi Rizzo 		}
294f9790aebSLuigi Rizzo 	}
295f9790aebSLuigi Rizzo 	if (namelen >= IFNAMSIZ)
296f9790aebSLuigi Rizzo 		namelen = IFNAMSIZ;
297f9790aebSLuigi Rizzo 	ND("--- prefix is '%.*s' ---", namelen, name);
298f9790aebSLuigi Rizzo 
299f9790aebSLuigi Rizzo 	/* lookup the name, remember empty slot if there is one */
300f9790aebSLuigi Rizzo 	for (i = 0; i < NM_BRIDGES; i++) {
301f9790aebSLuigi Rizzo 		struct nm_bridge *x = nm_bridges + i;
302f9790aebSLuigi Rizzo 
303f9790aebSLuigi Rizzo 		if (x->bdg_active_ports == 0) {
304f9790aebSLuigi Rizzo 			if (create && b == NULL)
305f9790aebSLuigi Rizzo 				b = x;	/* record empty slot */
306f9790aebSLuigi Rizzo 		} else if (x->bdg_namelen != namelen) {
307f9790aebSLuigi Rizzo 			continue;
308f9790aebSLuigi Rizzo 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
309f9790aebSLuigi Rizzo 			ND("found '%.*s' at %d", namelen, name, i);
310f9790aebSLuigi Rizzo 			b = x;
311f9790aebSLuigi Rizzo 			break;
312f9790aebSLuigi Rizzo 		}
313f9790aebSLuigi Rizzo 	}
314f9790aebSLuigi Rizzo 	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
315f9790aebSLuigi Rizzo 		/* initialize the bridge */
316f9790aebSLuigi Rizzo 		strncpy(b->bdg_basename, name, namelen);
317f9790aebSLuigi Rizzo 		ND("create new bridge %s with ports %d", b->bdg_basename,
318f9790aebSLuigi Rizzo 			b->bdg_active_ports);
319f9790aebSLuigi Rizzo 		b->bdg_namelen = namelen;
320f9790aebSLuigi Rizzo 		b->bdg_active_ports = 0;
321f9790aebSLuigi Rizzo 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
322f9790aebSLuigi Rizzo 			b->bdg_port_index[i] = i;
323f9790aebSLuigi Rizzo 		/* set the default function */
324f9790aebSLuigi Rizzo 		b->nm_bdg_lookup = netmap_bdg_learning;
325f9790aebSLuigi Rizzo 		/* reset the MAC address table */
326f9790aebSLuigi Rizzo 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
327f9790aebSLuigi Rizzo 	}
328f9790aebSLuigi Rizzo 	return b;
329f9790aebSLuigi Rizzo }
330f9790aebSLuigi Rizzo 
331f9790aebSLuigi Rizzo 
332f9790aebSLuigi Rizzo /*
333f9790aebSLuigi Rizzo  * Free the forwarding tables for rings attached to switch ports.
334f9790aebSLuigi Rizzo  */
335f9790aebSLuigi Rizzo static void
336f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na)
337f9790aebSLuigi Rizzo {
338f9790aebSLuigi Rizzo 	int nrings, i;
339f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
340f9790aebSLuigi Rizzo 
341f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
34217885a7bSLuigi Rizzo 	nrings = na->num_tx_rings;
34317885a7bSLuigi Rizzo 	kring = na->tx_rings;
344f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
345f9790aebSLuigi Rizzo 		if (kring[i].nkr_ft) {
346f9790aebSLuigi Rizzo 			free(kring[i].nkr_ft, M_DEVBUF);
347f9790aebSLuigi Rizzo 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
348f9790aebSLuigi Rizzo 		}
349f9790aebSLuigi Rizzo 	}
350f9790aebSLuigi Rizzo }
351f9790aebSLuigi Rizzo 
352f9790aebSLuigi Rizzo 
353f9790aebSLuigi Rizzo /*
354f9790aebSLuigi Rizzo  * Allocate the forwarding tables for the rings attached to the bridge ports.
355f9790aebSLuigi Rizzo  */
356f9790aebSLuigi Rizzo static int
357f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na)
358f9790aebSLuigi Rizzo {
359f9790aebSLuigi Rizzo 	int nrings, l, i, num_dstq;
360f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
361f9790aebSLuigi Rizzo 
362f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
363f9790aebSLuigi Rizzo 	/* all port:rings + broadcast */
364f9790aebSLuigi Rizzo 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
365f9790aebSLuigi Rizzo 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
366f9790aebSLuigi Rizzo 	l += sizeof(struct nm_bdg_q) * num_dstq;
367f9790aebSLuigi Rizzo 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
368f9790aebSLuigi Rizzo 
369f0ea3689SLuigi Rizzo 	nrings = netmap_real_tx_rings(na);
370f9790aebSLuigi Rizzo 	kring = na->tx_rings;
371f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
372f9790aebSLuigi Rizzo 		struct nm_bdg_fwd *ft;
373f9790aebSLuigi Rizzo 		struct nm_bdg_q *dstq;
374f9790aebSLuigi Rizzo 		int j;
375f9790aebSLuigi Rizzo 
376f9790aebSLuigi Rizzo 		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
377f9790aebSLuigi Rizzo 		if (!ft) {
378f9790aebSLuigi Rizzo 			nm_free_bdgfwd(na);
379f9790aebSLuigi Rizzo 			return ENOMEM;
380f9790aebSLuigi Rizzo 		}
381f9790aebSLuigi Rizzo 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
382f9790aebSLuigi Rizzo 		for (j = 0; j < num_dstq; j++) {
383f9790aebSLuigi Rizzo 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
384f9790aebSLuigi Rizzo 			dstq[j].bq_len = 0;
385f9790aebSLuigi Rizzo 		}
386f9790aebSLuigi Rizzo 		kring[i].nkr_ft = ft;
387f9790aebSLuigi Rizzo 	}
388f9790aebSLuigi Rizzo 	return 0;
389f9790aebSLuigi Rizzo }
390f9790aebSLuigi Rizzo 
391f9790aebSLuigi Rizzo 
392f9790aebSLuigi Rizzo static void
393f9790aebSLuigi Rizzo netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
394f9790aebSLuigi Rizzo {
395f9790aebSLuigi Rizzo 	int s_hw = hw, s_sw = sw;
396f9790aebSLuigi Rizzo 	int i, lim =b->bdg_active_ports;
397f9790aebSLuigi Rizzo 	uint8_t tmp[NM_BDG_MAXPORTS];
398f9790aebSLuigi Rizzo 
399f9790aebSLuigi Rizzo 	/*
400f9790aebSLuigi Rizzo 	New algorithm:
401f9790aebSLuigi Rizzo 	make a copy of bdg_port_index;
402f9790aebSLuigi Rizzo 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
403f9790aebSLuigi Rizzo 	in the array of bdg_port_index, replacing them with
404f9790aebSLuigi Rizzo 	entries from the bottom of the array;
405f9790aebSLuigi Rizzo 	decrement bdg_active_ports;
406f9790aebSLuigi Rizzo 	acquire BDG_WLOCK() and copy back the array.
407f9790aebSLuigi Rizzo 	 */
408f9790aebSLuigi Rizzo 
409f0ea3689SLuigi Rizzo 	if (netmap_verbose)
410f9790aebSLuigi Rizzo 		D("detach %d and %d (lim %d)", hw, sw, lim);
411f9790aebSLuigi Rizzo 	/* make a copy of the list of active ports, update it,
412f9790aebSLuigi Rizzo 	 * and then copy back within BDG_WLOCK().
413f9790aebSLuigi Rizzo 	 */
414f9790aebSLuigi Rizzo 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
415f9790aebSLuigi Rizzo 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
416f9790aebSLuigi Rizzo 		if (hw >= 0 && tmp[i] == hw) {
417f9790aebSLuigi Rizzo 			ND("detach hw %d at %d", hw, i);
418f9790aebSLuigi Rizzo 			lim--; /* point to last active port */
419f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim]; /* swap with i */
420f9790aebSLuigi Rizzo 			tmp[lim] = hw;	/* now this is inactive */
421f9790aebSLuigi Rizzo 			hw = -1;
422f9790aebSLuigi Rizzo 		} else if (sw >= 0 && tmp[i] == sw) {
423f9790aebSLuigi Rizzo 			ND("detach sw %d at %d", sw, i);
424f9790aebSLuigi Rizzo 			lim--;
425f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim];
426f9790aebSLuigi Rizzo 			tmp[lim] = sw;
427f9790aebSLuigi Rizzo 			sw = -1;
428f9790aebSLuigi Rizzo 		} else {
429f9790aebSLuigi Rizzo 			i++;
430f9790aebSLuigi Rizzo 		}
431f9790aebSLuigi Rizzo 	}
432f9790aebSLuigi Rizzo 	if (hw >= 0 || sw >= 0) {
433f9790aebSLuigi Rizzo 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
434f9790aebSLuigi Rizzo 	}
435f9790aebSLuigi Rizzo 
436f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
437f9790aebSLuigi Rizzo 	b->bdg_ports[s_hw] = NULL;
438f9790aebSLuigi Rizzo 	if (s_sw >= 0) {
439f9790aebSLuigi Rizzo 		b->bdg_ports[s_sw] = NULL;
440f9790aebSLuigi Rizzo 	}
441f9790aebSLuigi Rizzo 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
442f9790aebSLuigi Rizzo 	b->bdg_active_ports = lim;
443f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
444f9790aebSLuigi Rizzo 
445f9790aebSLuigi Rizzo 	ND("now %d active ports", lim);
446f9790aebSLuigi Rizzo 	if (lim == 0) {
447f9790aebSLuigi Rizzo 		ND("marking bridge %s as free", b->bdg_basename);
448f9790aebSLuigi Rizzo 		b->nm_bdg_lookup = NULL;
449f9790aebSLuigi Rizzo 	}
450f9790aebSLuigi Rizzo }
451f9790aebSLuigi Rizzo 
45217885a7bSLuigi Rizzo 
453f9790aebSLuigi Rizzo static void
454f9790aebSLuigi Rizzo netmap_adapter_vp_dtor(struct netmap_adapter *na)
455f9790aebSLuigi Rizzo {
456f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
457f9790aebSLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
458f9790aebSLuigi Rizzo 	struct ifnet *ifp = na->ifp;
459f9790aebSLuigi Rizzo 
460f9790aebSLuigi Rizzo 	ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
461f9790aebSLuigi Rizzo 
462f9790aebSLuigi Rizzo 	if (b) {
463f9790aebSLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
464f9790aebSLuigi Rizzo 	}
465f9790aebSLuigi Rizzo 
466f9790aebSLuigi Rizzo 	bzero(ifp, sizeof(*ifp));
467f9790aebSLuigi Rizzo 	free(ifp, M_DEVBUF);
468f9790aebSLuigi Rizzo 	na->ifp = NULL;
469f9790aebSLuigi Rizzo }
470f9790aebSLuigi Rizzo 
47117885a7bSLuigi Rizzo 
47217885a7bSLuigi Rizzo /* Try to get a reference to a netmap adapter attached to a VALE switch.
47317885a7bSLuigi Rizzo  * If the adapter is found (or is created), this function returns 0, a
47417885a7bSLuigi Rizzo  * non NULL pointer is returned into *na, and the caller holds a
47517885a7bSLuigi Rizzo  * reference to the adapter.
47617885a7bSLuigi Rizzo  * If an adapter is not found, then no reference is grabbed and the
47717885a7bSLuigi Rizzo  * function returns an error code, or 0 if there is just a VALE prefix
47817885a7bSLuigi Rizzo  * mismatch. Therefore the caller holds a reference when
47917885a7bSLuigi Rizzo  * (*na != NULL && return == 0).
48017885a7bSLuigi Rizzo  */
481f9790aebSLuigi Rizzo int
482f9790aebSLuigi Rizzo netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
483f9790aebSLuigi Rizzo {
484f9790aebSLuigi Rizzo 	const char *name = nmr->nr_name;
485f9790aebSLuigi Rizzo 	struct ifnet *ifp;
486f9790aebSLuigi Rizzo 	int error = 0;
487f9790aebSLuigi Rizzo 	struct netmap_adapter *ret;
488f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
489f9790aebSLuigi Rizzo 	struct nm_bridge *b;
490f9790aebSLuigi Rizzo 	int i, j, cand = -1, cand2 = -1;
491f9790aebSLuigi Rizzo 	int needed;
492f9790aebSLuigi Rizzo 
493f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
494f9790aebSLuigi Rizzo 
495f9790aebSLuigi Rizzo 	/* first try to see if this is a bridge port. */
496f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
497f9790aebSLuigi Rizzo 	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
498f9790aebSLuigi Rizzo 		return 0;  /* no error, but no VALE prefix */
499f9790aebSLuigi Rizzo 	}
500f9790aebSLuigi Rizzo 
501f9790aebSLuigi Rizzo 	b = nm_find_bridge(name, create);
502f9790aebSLuigi Rizzo 	if (b == NULL) {
503f9790aebSLuigi Rizzo 		D("no bridges available for '%s'", name);
504f2637526SLuigi Rizzo 		return (create ? ENOMEM : ENXIO);
505f9790aebSLuigi Rizzo 	}
506f9790aebSLuigi Rizzo 
507f9790aebSLuigi Rizzo 	/* Now we are sure that name starts with the bridge's name,
508f9790aebSLuigi Rizzo 	 * lookup the port in the bridge. We need to scan the entire
509f9790aebSLuigi Rizzo 	 * list. It is not important to hold a WLOCK on the bridge
510f9790aebSLuigi Rizzo 	 * during the search because NMG_LOCK already guarantees
511f9790aebSLuigi Rizzo 	 * that there are no other possible writers.
512f9790aebSLuigi Rizzo 	 */
513f9790aebSLuigi Rizzo 
514f9790aebSLuigi Rizzo 	/* lookup in the local list of ports */
515f9790aebSLuigi Rizzo 	for (j = 0; j < b->bdg_active_ports; j++) {
516f9790aebSLuigi Rizzo 		i = b->bdg_port_index[j];
517f9790aebSLuigi Rizzo 		vpna = b->bdg_ports[i];
518f9790aebSLuigi Rizzo 		// KASSERT(na != NULL);
519f9790aebSLuigi Rizzo 		ifp = vpna->up.ifp;
520f9790aebSLuigi Rizzo 		/* XXX make sure the name only contains one : */
521f9790aebSLuigi Rizzo 		if (!strcmp(NM_IFPNAME(ifp), name)) {
522f9790aebSLuigi Rizzo 			netmap_adapter_get(&vpna->up);
523f9790aebSLuigi Rizzo 			ND("found existing if %s refs %d", name,
524f9790aebSLuigi Rizzo 				vpna->na_bdg_refcount);
525f9790aebSLuigi Rizzo 			*na = (struct netmap_adapter *)vpna;
526f9790aebSLuigi Rizzo 			return 0;
527f9790aebSLuigi Rizzo 		}
528f9790aebSLuigi Rizzo 	}
529f9790aebSLuigi Rizzo 	/* not found, should we create it? */
530f9790aebSLuigi Rizzo 	if (!create)
531f9790aebSLuigi Rizzo 		return ENXIO;
532f9790aebSLuigi Rizzo 	/* yes we should, see if we have space to attach entries */
533f9790aebSLuigi Rizzo 	needed = 2; /* in some cases we only need 1 */
534f9790aebSLuigi Rizzo 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
535f9790aebSLuigi Rizzo 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
536f2637526SLuigi Rizzo 		return ENOMEM;
537f9790aebSLuigi Rizzo 	}
538f9790aebSLuigi Rizzo 	/* record the next two ports available, but do not allocate yet */
539f9790aebSLuigi Rizzo 	cand = b->bdg_port_index[b->bdg_active_ports];
540f9790aebSLuigi Rizzo 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
541f9790aebSLuigi Rizzo 	ND("+++ bridge %s port %s used %d avail %d %d",
542f9790aebSLuigi Rizzo 		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
543f9790aebSLuigi Rizzo 
544f9790aebSLuigi Rizzo 	/*
545f9790aebSLuigi Rizzo 	 * try see if there is a matching NIC with this name
546f9790aebSLuigi Rizzo 	 * (after the bridge's name)
547f9790aebSLuigi Rizzo 	 */
548f9790aebSLuigi Rizzo 	ifp = ifunit_ref(name + b->bdg_namelen + 1);
549f9790aebSLuigi Rizzo 	if (!ifp) { /* this is a virtual port */
550f9790aebSLuigi Rizzo 		if (nmr->nr_cmd) {
551f9790aebSLuigi Rizzo 			/* nr_cmd must be 0 for a virtual port */
552f9790aebSLuigi Rizzo 			return EINVAL;
553f9790aebSLuigi Rizzo 		}
554f9790aebSLuigi Rizzo 
555f9790aebSLuigi Rizzo 	 	/* create a struct ifnet for the new port.
556f9790aebSLuigi Rizzo 		 * need M_NOWAIT as we are under nma_lock
557f9790aebSLuigi Rizzo 		 */
558f9790aebSLuigi Rizzo 		ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
559f9790aebSLuigi Rizzo 		if (!ifp)
560f9790aebSLuigi Rizzo 			return ENOMEM;
561f9790aebSLuigi Rizzo 
562f9790aebSLuigi Rizzo 		strcpy(ifp->if_xname, name);
563f9790aebSLuigi Rizzo 		/* bdg_netmap_attach creates a struct netmap_adapter */
564f9790aebSLuigi Rizzo 		error = bdg_netmap_attach(nmr, ifp);
565f9790aebSLuigi Rizzo 		if (error) {
566f9790aebSLuigi Rizzo 			D("error %d", error);
567f9790aebSLuigi Rizzo 			free(ifp, M_DEVBUF);
568f9790aebSLuigi Rizzo 			return error;
569f9790aebSLuigi Rizzo 		}
570f9790aebSLuigi Rizzo 		ret = NA(ifp);
571f9790aebSLuigi Rizzo 		cand2 = -1;	/* only need one port */
572f9790aebSLuigi Rizzo 	} else {  /* this is a NIC */
573f9790aebSLuigi Rizzo 		struct ifnet *fake_ifp;
574f9790aebSLuigi Rizzo 
575f9790aebSLuigi Rizzo 		error = netmap_get_hw_na(ifp, &ret);
576f9790aebSLuigi Rizzo 		if (error || ret == NULL)
577f9790aebSLuigi Rizzo 			goto out;
578f9790aebSLuigi Rizzo 
579f9790aebSLuigi Rizzo 		/* make sure the NIC is not already in use */
580f9790aebSLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(ret)) {
581f9790aebSLuigi Rizzo 			D("NIC %s busy, cannot attach to bridge",
582f9790aebSLuigi Rizzo 				NM_IFPNAME(ifp));
583f2637526SLuigi Rizzo 			error = EBUSY;
584f9790aebSLuigi Rizzo 			goto out;
585f9790aebSLuigi Rizzo 		}
586f9790aebSLuigi Rizzo 		/* create a fake interface */
587f9790aebSLuigi Rizzo 		fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
588f9790aebSLuigi Rizzo 		if (!fake_ifp) {
589f9790aebSLuigi Rizzo 			error = ENOMEM;
590f9790aebSLuigi Rizzo 			goto out;
591f9790aebSLuigi Rizzo 		}
592f9790aebSLuigi Rizzo 		strcpy(fake_ifp->if_xname, name);
593f9790aebSLuigi Rizzo 		error = netmap_bwrap_attach(fake_ifp, ifp);
594f9790aebSLuigi Rizzo 		if (error) {
595f9790aebSLuigi Rizzo 			free(fake_ifp, M_DEVBUF);
596f9790aebSLuigi Rizzo 			goto out;
597f9790aebSLuigi Rizzo 		}
598f9790aebSLuigi Rizzo 		ret = NA(fake_ifp);
599f9790aebSLuigi Rizzo 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
600f9790aebSLuigi Rizzo 			cand2 = -1; /* only need one port */
601f9790aebSLuigi Rizzo 		if_rele(ifp);
602f9790aebSLuigi Rizzo 	}
603f9790aebSLuigi Rizzo 	vpna = (struct netmap_vp_adapter *)ret;
604f9790aebSLuigi Rizzo 
605f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
606f9790aebSLuigi Rizzo 	vpna->bdg_port = cand;
607f9790aebSLuigi Rizzo 	ND("NIC  %p to bridge port %d", vpna, cand);
608f9790aebSLuigi Rizzo 	/* bind the port to the bridge (virtual ports are not active) */
609f9790aebSLuigi Rizzo 	b->bdg_ports[cand] = vpna;
610f9790aebSLuigi Rizzo 	vpna->na_bdg = b;
611f9790aebSLuigi Rizzo 	b->bdg_active_ports++;
612f9790aebSLuigi Rizzo 	if (cand2 >= 0) {
613f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *hostna = vpna + 1;
614f9790aebSLuigi Rizzo 		/* also bind the host stack to the bridge */
615f9790aebSLuigi Rizzo 		b->bdg_ports[cand2] = hostna;
616f9790aebSLuigi Rizzo 		hostna->bdg_port = cand2;
617f9790aebSLuigi Rizzo 		hostna->na_bdg = b;
618f9790aebSLuigi Rizzo 		b->bdg_active_ports++;
619f9790aebSLuigi Rizzo 		ND("host %p to bridge port %d", hostna, cand2);
620f9790aebSLuigi Rizzo 	}
621f9790aebSLuigi Rizzo 	ND("if %s refs %d", name, vpna->up.na_refcount);
622f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
623f9790aebSLuigi Rizzo 	*na = ret;
624f9790aebSLuigi Rizzo 	netmap_adapter_get(ret);
625f9790aebSLuigi Rizzo 	return 0;
626f9790aebSLuigi Rizzo 
627f9790aebSLuigi Rizzo out:
628f9790aebSLuigi Rizzo 	if_rele(ifp);
629f9790aebSLuigi Rizzo 
630f9790aebSLuigi Rizzo 	return error;
631f9790aebSLuigi Rizzo }
632f9790aebSLuigi Rizzo 
633f9790aebSLuigi Rizzo 
634f9790aebSLuigi Rizzo /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
635f9790aebSLuigi Rizzo static int
636f9790aebSLuigi Rizzo nm_bdg_attach(struct nmreq *nmr)
637f9790aebSLuigi Rizzo {
638f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
639f9790aebSLuigi Rizzo 	struct netmap_if *nifp;
640f9790aebSLuigi Rizzo 	struct netmap_priv_d *npriv;
641f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
642f9790aebSLuigi Rizzo 	int error;
643f9790aebSLuigi Rizzo 
644f9790aebSLuigi Rizzo 	npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
645f9790aebSLuigi Rizzo 	if (npriv == NULL)
646f9790aebSLuigi Rizzo 		return ENOMEM;
647f2637526SLuigi Rizzo 
648f9790aebSLuigi Rizzo 	NMG_LOCK();
649f2637526SLuigi Rizzo 
65017885a7bSLuigi Rizzo 	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
651f9790aebSLuigi Rizzo 	if (error) /* no device, or another bridge or user owns the device */
652f9790aebSLuigi Rizzo 		goto unlock_exit;
653f2637526SLuigi Rizzo 
65417885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
655f9790aebSLuigi Rizzo 		error = EINVAL;
65617885a7bSLuigi Rizzo 		goto unlock_exit;
657f9790aebSLuigi Rizzo 	}
658f9790aebSLuigi Rizzo 
659f9790aebSLuigi Rizzo 	if (na->active_fds > 0) { /* already registered */
660f9790aebSLuigi Rizzo 		error = EBUSY;
661f9790aebSLuigi Rizzo 		goto unref_exit;
662f9790aebSLuigi Rizzo 	}
663f9790aebSLuigi Rizzo 
664f0ea3689SLuigi Rizzo 	nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
665f9790aebSLuigi Rizzo 	if (!nifp) {
666f9790aebSLuigi Rizzo 		goto unref_exit;
667f9790aebSLuigi Rizzo 	}
668f9790aebSLuigi Rizzo 
669f9790aebSLuigi Rizzo 	bna = (struct netmap_bwrap_adapter*)na;
670f9790aebSLuigi Rizzo 	bna->na_kpriv = npriv;
671f9790aebSLuigi Rizzo 	NMG_UNLOCK();
672f9790aebSLuigi Rizzo 	ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
673f9790aebSLuigi Rizzo 	return 0;
674f9790aebSLuigi Rizzo 
675f9790aebSLuigi Rizzo unref_exit:
676f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
677f9790aebSLuigi Rizzo unlock_exit:
678f9790aebSLuigi Rizzo 	NMG_UNLOCK();
679f9790aebSLuigi Rizzo 	bzero(npriv, sizeof(*npriv));
680f9790aebSLuigi Rizzo 	free(npriv, M_DEVBUF);
681f9790aebSLuigi Rizzo 	return error;
682f9790aebSLuigi Rizzo }
683f9790aebSLuigi Rizzo 
68417885a7bSLuigi Rizzo 
685f9790aebSLuigi Rizzo static int
686f9790aebSLuigi Rizzo nm_bdg_detach(struct nmreq *nmr)
687f9790aebSLuigi Rizzo {
688f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
689f9790aebSLuigi Rizzo 	int error;
690f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
691f9790aebSLuigi Rizzo 	int last_instance;
692f9790aebSLuigi Rizzo 
693f9790aebSLuigi Rizzo 	NMG_LOCK();
69417885a7bSLuigi Rizzo 	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
695f9790aebSLuigi Rizzo 	if (error) { /* no device, or another bridge or user owns the device */
696f9790aebSLuigi Rizzo 		goto unlock_exit;
697f9790aebSLuigi Rizzo 	}
698f2637526SLuigi Rizzo 
69917885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
700f9790aebSLuigi Rizzo 		error = EINVAL;
70117885a7bSLuigi Rizzo 		goto unlock_exit;
702f9790aebSLuigi Rizzo 	}
70317885a7bSLuigi Rizzo 
704f9790aebSLuigi Rizzo 	bna = (struct netmap_bwrap_adapter *)na;
705f9790aebSLuigi Rizzo 
706f9790aebSLuigi Rizzo 	if (na->active_fds == 0) { /* not registered */
707f9790aebSLuigi Rizzo 		error = EINVAL;
708f9790aebSLuigi Rizzo 		goto unref_exit;
709f9790aebSLuigi Rizzo 	}
710f9790aebSLuigi Rizzo 
711f9790aebSLuigi Rizzo 	last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
712f9790aebSLuigi Rizzo 	if (!last_instance) {
713f9790aebSLuigi Rizzo 		D("--- error, trying to detach an entry with active mmaps");
714f9790aebSLuigi Rizzo 		error = EINVAL;
715f9790aebSLuigi Rizzo 	} else {
716f9790aebSLuigi Rizzo 		struct netmap_priv_d *npriv = bna->na_kpriv;
717f9790aebSLuigi Rizzo 
718f9790aebSLuigi Rizzo 		bna->na_kpriv = NULL;
719f9790aebSLuigi Rizzo 		D("deleting priv");
720f9790aebSLuigi Rizzo 
721f9790aebSLuigi Rizzo 		bzero(npriv, sizeof(*npriv));
722f9790aebSLuigi Rizzo 		free(npriv, M_DEVBUF);
723f9790aebSLuigi Rizzo 	}
724f9790aebSLuigi Rizzo 
725f9790aebSLuigi Rizzo unref_exit:
726f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
727f9790aebSLuigi Rizzo unlock_exit:
728f9790aebSLuigi Rizzo 	NMG_UNLOCK();
729f9790aebSLuigi Rizzo 	return error;
730f9790aebSLuigi Rizzo 
731f9790aebSLuigi Rizzo }
732f9790aebSLuigi Rizzo 
733f9790aebSLuigi Rizzo 
734f9790aebSLuigi Rizzo /* exported to kernel callers, e.g. OVS ?
735f9790aebSLuigi Rizzo  * Entry point.
736f9790aebSLuigi Rizzo  * Called without NMG_LOCK.
737f9790aebSLuigi Rizzo  */
738f9790aebSLuigi Rizzo int
739f9790aebSLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
740f9790aebSLuigi Rizzo {
741f9790aebSLuigi Rizzo 	struct nm_bridge *b;
742f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
743f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
744f9790aebSLuigi Rizzo 	struct ifnet *iter;
745f9790aebSLuigi Rizzo 	char *name = nmr->nr_name;
746f9790aebSLuigi Rizzo 	int cmd = nmr->nr_cmd, namelen = strlen(name);
747f9790aebSLuigi Rizzo 	int error = 0, i, j;
748f9790aebSLuigi Rizzo 
749f9790aebSLuigi Rizzo 	switch (cmd) {
750f9790aebSLuigi Rizzo 	case NETMAP_BDG_ATTACH:
751f9790aebSLuigi Rizzo 		error = nm_bdg_attach(nmr);
752f9790aebSLuigi Rizzo 		break;
753f9790aebSLuigi Rizzo 
754f9790aebSLuigi Rizzo 	case NETMAP_BDG_DETACH:
755f9790aebSLuigi Rizzo 		error = nm_bdg_detach(nmr);
756f9790aebSLuigi Rizzo 		break;
757f9790aebSLuigi Rizzo 
758f9790aebSLuigi Rizzo 	case NETMAP_BDG_LIST:
759f9790aebSLuigi Rizzo 		/* this is used to enumerate bridges and ports */
760f9790aebSLuigi Rizzo 		if (namelen) { /* look up indexes of bridge and port */
761f9790aebSLuigi Rizzo 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
762f9790aebSLuigi Rizzo 				error = EINVAL;
763f9790aebSLuigi Rizzo 				break;
764f9790aebSLuigi Rizzo 			}
765f9790aebSLuigi Rizzo 			NMG_LOCK();
766f9790aebSLuigi Rizzo 			b = nm_find_bridge(name, 0 /* don't create */);
767f9790aebSLuigi Rizzo 			if (!b) {
768f9790aebSLuigi Rizzo 				error = ENOENT;
769f9790aebSLuigi Rizzo 				NMG_UNLOCK();
770f9790aebSLuigi Rizzo 				break;
771f9790aebSLuigi Rizzo 			}
772f9790aebSLuigi Rizzo 
773f9790aebSLuigi Rizzo 			error = ENOENT;
774f9790aebSLuigi Rizzo 			for (j = 0; j < b->bdg_active_ports; j++) {
775f9790aebSLuigi Rizzo 				i = b->bdg_port_index[j];
776f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[i];
777f9790aebSLuigi Rizzo 				if (vpna == NULL) {
778f9790aebSLuigi Rizzo 					D("---AAAAAAAAARGH-------");
779f9790aebSLuigi Rizzo 					continue;
780f9790aebSLuigi Rizzo 				}
781f9790aebSLuigi Rizzo 				iter = vpna->up.ifp;
782f9790aebSLuigi Rizzo 				/* the former and the latter identify a
783f9790aebSLuigi Rizzo 				 * virtual port and a NIC, respectively
784f9790aebSLuigi Rizzo 				 */
785f9790aebSLuigi Rizzo 				if (!strcmp(iter->if_xname, name)) {
786f9790aebSLuigi Rizzo 					/* bridge index */
787f9790aebSLuigi Rizzo 					nmr->nr_arg1 = b - nm_bridges;
788f9790aebSLuigi Rizzo 					nmr->nr_arg2 = i; /* port index */
789f9790aebSLuigi Rizzo 					error = 0;
790f9790aebSLuigi Rizzo 					break;
791f9790aebSLuigi Rizzo 				}
792f9790aebSLuigi Rizzo 			}
793f9790aebSLuigi Rizzo 			NMG_UNLOCK();
794f9790aebSLuigi Rizzo 		} else {
795f9790aebSLuigi Rizzo 			/* return the first non-empty entry starting from
796f9790aebSLuigi Rizzo 			 * bridge nr_arg1 and port nr_arg2.
797f9790aebSLuigi Rizzo 			 *
798f9790aebSLuigi Rizzo 			 * Users can detect the end of the same bridge by
799f9790aebSLuigi Rizzo 			 * seeing the new and old value of nr_arg1, and can
800f9790aebSLuigi Rizzo 			 * detect the end of all the bridge by error != 0
801f9790aebSLuigi Rizzo 			 */
802f9790aebSLuigi Rizzo 			i = nmr->nr_arg1;
803f9790aebSLuigi Rizzo 			j = nmr->nr_arg2;
804f9790aebSLuigi Rizzo 
805f9790aebSLuigi Rizzo 			NMG_LOCK();
806f9790aebSLuigi Rizzo 			for (error = ENOENT; i < NM_BRIDGES; i++) {
807f9790aebSLuigi Rizzo 				b = nm_bridges + i;
808f9790aebSLuigi Rizzo 				if (j >= b->bdg_active_ports) {
809f9790aebSLuigi Rizzo 					j = 0; /* following bridges scan from 0 */
810f9790aebSLuigi Rizzo 					continue;
811f9790aebSLuigi Rizzo 				}
812f9790aebSLuigi Rizzo 				nmr->nr_arg1 = i;
813f9790aebSLuigi Rizzo 				nmr->nr_arg2 = j;
814f9790aebSLuigi Rizzo 				j = b->bdg_port_index[j];
815f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[j];
816f9790aebSLuigi Rizzo 				iter = vpna->up.ifp;
817f9790aebSLuigi Rizzo 				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
818f9790aebSLuigi Rizzo 				error = 0;
819f9790aebSLuigi Rizzo 				break;
820f9790aebSLuigi Rizzo 			}
821f9790aebSLuigi Rizzo 			NMG_UNLOCK();
822f9790aebSLuigi Rizzo 		}
823f9790aebSLuigi Rizzo 		break;
824f9790aebSLuigi Rizzo 
825f9790aebSLuigi Rizzo 	case NETMAP_BDG_LOOKUP_REG:
826f9790aebSLuigi Rizzo 		/* register a lookup function to the given bridge.
827f9790aebSLuigi Rizzo 		 * nmr->nr_name may be just bridge's name (including ':'
828f9790aebSLuigi Rizzo 		 * if it is not just NM_NAME).
829f9790aebSLuigi Rizzo 		 */
830f9790aebSLuigi Rizzo 		if (!func) {
831f9790aebSLuigi Rizzo 			error = EINVAL;
832f9790aebSLuigi Rizzo 			break;
833f9790aebSLuigi Rizzo 		}
834f9790aebSLuigi Rizzo 		NMG_LOCK();
835f9790aebSLuigi Rizzo 		b = nm_find_bridge(name, 0 /* don't create */);
836f9790aebSLuigi Rizzo 		if (!b) {
837f9790aebSLuigi Rizzo 			error = EINVAL;
838f9790aebSLuigi Rizzo 		} else {
839f9790aebSLuigi Rizzo 			b->nm_bdg_lookup = func;
840f9790aebSLuigi Rizzo 		}
841f9790aebSLuigi Rizzo 		NMG_UNLOCK();
842f9790aebSLuigi Rizzo 		break;
843f9790aebSLuigi Rizzo 
844f0ea3689SLuigi Rizzo 	case NETMAP_BDG_VNET_HDR:
845f0ea3689SLuigi Rizzo 		/* Valid lengths for the virtio-net header are 0 (no header),
846f0ea3689SLuigi Rizzo 		   10 and 12. */
847f0ea3689SLuigi Rizzo 		if (nmr->nr_arg1 != 0 &&
848f0ea3689SLuigi Rizzo 			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
849f0ea3689SLuigi Rizzo 				nmr->nr_arg1 != 12) {
850f0ea3689SLuigi Rizzo 			error = EINVAL;
851f0ea3689SLuigi Rizzo 			break;
852f0ea3689SLuigi Rizzo 		}
853f9790aebSLuigi Rizzo 		NMG_LOCK();
854f9790aebSLuigi Rizzo 		error = netmap_get_bdg_na(nmr, &na, 0);
85517885a7bSLuigi Rizzo 		if (na && !error) {
856f9790aebSLuigi Rizzo 			vpna = (struct netmap_vp_adapter *)na;
857f0ea3689SLuigi Rizzo 			vpna->virt_hdr_len = nmr->nr_arg1;
858f0ea3689SLuigi Rizzo 			if (vpna->virt_hdr_len)
859f0ea3689SLuigi Rizzo 				vpna->mfs = NETMAP_BDG_BUF_SIZE(na->nm_mem);
860f0ea3689SLuigi Rizzo 			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
86117885a7bSLuigi Rizzo 			netmap_adapter_put(na);
862f9790aebSLuigi Rizzo 		}
863f9790aebSLuigi Rizzo 		NMG_UNLOCK();
864f9790aebSLuigi Rizzo 		break;
865f9790aebSLuigi Rizzo 
866f9790aebSLuigi Rizzo 	default:
867f9790aebSLuigi Rizzo 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
868f9790aebSLuigi Rizzo 		error = EINVAL;
869f9790aebSLuigi Rizzo 		break;
870f9790aebSLuigi Rizzo 	}
871f9790aebSLuigi Rizzo 	return error;
872f9790aebSLuigi Rizzo }
873f9790aebSLuigi Rizzo 
874f9790aebSLuigi Rizzo static int
875f9790aebSLuigi Rizzo netmap_vp_krings_create(struct netmap_adapter *na)
876f9790aebSLuigi Rizzo {
877f0ea3689SLuigi Rizzo 	u_int tailroom;
878f9790aebSLuigi Rizzo 	int error, i;
879f9790aebSLuigi Rizzo 	uint32_t *leases;
880f0ea3689SLuigi Rizzo 	u_int nrx = netmap_real_rx_rings(na);
881f9790aebSLuigi Rizzo 
882f9790aebSLuigi Rizzo 	/*
883f9790aebSLuigi Rizzo 	 * Leases are attached to RX rings on vale ports
884f9790aebSLuigi Rizzo 	 */
885f9790aebSLuigi Rizzo 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
886f9790aebSLuigi Rizzo 
887f0ea3689SLuigi Rizzo 	error = netmap_krings_create(na, tailroom);
888f9790aebSLuigi Rizzo 	if (error)
889f9790aebSLuigi Rizzo 		return error;
890f9790aebSLuigi Rizzo 
891f9790aebSLuigi Rizzo 	leases = na->tailroom;
892f9790aebSLuigi Rizzo 
893f9790aebSLuigi Rizzo 	for (i = 0; i < nrx; i++) { /* Receive rings */
894f9790aebSLuigi Rizzo 		na->rx_rings[i].nkr_leases = leases;
895f9790aebSLuigi Rizzo 		leases += na->num_rx_desc;
896f9790aebSLuigi Rizzo 	}
897f9790aebSLuigi Rizzo 
898f9790aebSLuigi Rizzo 	error = nm_alloc_bdgfwd(na);
899f9790aebSLuigi Rizzo 	if (error) {
900f9790aebSLuigi Rizzo 		netmap_krings_delete(na);
901f9790aebSLuigi Rizzo 		return error;
902f9790aebSLuigi Rizzo 	}
903f9790aebSLuigi Rizzo 
904f9790aebSLuigi Rizzo 	return 0;
905f9790aebSLuigi Rizzo }
906f9790aebSLuigi Rizzo 
90717885a7bSLuigi Rizzo 
908f9790aebSLuigi Rizzo static void
909f9790aebSLuigi Rizzo netmap_vp_krings_delete(struct netmap_adapter *na)
910f9790aebSLuigi Rizzo {
911f9790aebSLuigi Rizzo 	nm_free_bdgfwd(na);
912f9790aebSLuigi Rizzo 	netmap_krings_delete(na);
913f9790aebSLuigi Rizzo }
914f9790aebSLuigi Rizzo 
915f9790aebSLuigi Rizzo 
916f9790aebSLuigi Rizzo static int
917f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
918f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *na, u_int ring_nr);
919f9790aebSLuigi Rizzo 
920f9790aebSLuigi Rizzo 
921f9790aebSLuigi Rizzo /*
922f9790aebSLuigi Rizzo  * Grab packets from a kring, move them into the ft structure
923f9790aebSLuigi Rizzo  * associated to the tx (input) port. Max one instance per port,
924f9790aebSLuigi Rizzo  * filtered on input (ioctl, poll or XXX).
925f9790aebSLuigi Rizzo  * Returns the next position in the ring.
926f9790aebSLuigi Rizzo  */
927f9790aebSLuigi Rizzo static int
928f9790aebSLuigi Rizzo nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
929f9790aebSLuigi Rizzo 	struct netmap_kring *kring, u_int end)
930f9790aebSLuigi Rizzo {
931f9790aebSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
932f9790aebSLuigi Rizzo 	struct nm_bdg_fwd *ft;
933f9790aebSLuigi Rizzo 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
934f9790aebSLuigi Rizzo 	u_int ft_i = 0;	/* start from 0 */
935f9790aebSLuigi Rizzo 	u_int frags = 1; /* how many frags ? */
936f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
937f9790aebSLuigi Rizzo 
938f9790aebSLuigi Rizzo 	/* To protect against modifications to the bridge we acquire a
939f9790aebSLuigi Rizzo 	 * shared lock, waiting if we can sleep (if the source port is
940f9790aebSLuigi Rizzo 	 * attached to a user process) or with a trylock otherwise (NICs).
941f9790aebSLuigi Rizzo 	 */
942f9790aebSLuigi Rizzo 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
943f9790aebSLuigi Rizzo 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
944f9790aebSLuigi Rizzo 		BDG_RLOCK(b);
945f9790aebSLuigi Rizzo 	else if (!BDG_RTRYLOCK(b))
946f9790aebSLuigi Rizzo 		return 0;
947f9790aebSLuigi Rizzo 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
948f9790aebSLuigi Rizzo 	ft = kring->nkr_ft;
949f9790aebSLuigi Rizzo 
950f9790aebSLuigi Rizzo 	for (; likely(j != end); j = nm_next(j, lim)) {
951f9790aebSLuigi Rizzo 		struct netmap_slot *slot = &ring->slot[j];
952f9790aebSLuigi Rizzo 		char *buf;
953f9790aebSLuigi Rizzo 
954f9790aebSLuigi Rizzo 		ft[ft_i].ft_len = slot->len;
955f9790aebSLuigi Rizzo 		ft[ft_i].ft_flags = slot->flags;
956f9790aebSLuigi Rizzo 
957f9790aebSLuigi Rizzo 		ND("flags is 0x%x", slot->flags);
958f9790aebSLuigi Rizzo 		/* this slot goes into a list so initialize the link field */
959f9790aebSLuigi Rizzo 		ft[ft_i].ft_next = NM_FT_NULL;
960f9790aebSLuigi Rizzo 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
961f9790aebSLuigi Rizzo 			(void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
962*e31c6ec7SLuigi Rizzo 		if (unlikely(buf == NULL)) {
963*e31c6ec7SLuigi Rizzo 			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
964*e31c6ec7SLuigi Rizzo 				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
965*e31c6ec7SLuigi Rizzo 				kring->name, j, ft[ft_i].ft_len);
966*e31c6ec7SLuigi Rizzo 			buf = ft[ft_i].ft_buf = NMB_VA(0); /* the 'null' buffer */
967*e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_len = 0;
968*e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_flags = 0;
969*e31c6ec7SLuigi Rizzo 		}
9702e159ef0SLuigi Rizzo 		__builtin_prefetch(buf);
971f9790aebSLuigi Rizzo 		++ft_i;
972f9790aebSLuigi Rizzo 		if (slot->flags & NS_MOREFRAG) {
973f9790aebSLuigi Rizzo 			frags++;
974f9790aebSLuigi Rizzo 			continue;
975f9790aebSLuigi Rizzo 		}
976f9790aebSLuigi Rizzo 		if (unlikely(netmap_verbose && frags > 1))
977f9790aebSLuigi Rizzo 			RD(5, "%d frags at %d", frags, ft_i - frags);
978f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
979f9790aebSLuigi Rizzo 		frags = 1;
980f9790aebSLuigi Rizzo 		if (unlikely((int)ft_i >= bridge_batch))
981f9790aebSLuigi Rizzo 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
982f9790aebSLuigi Rizzo 	}
983f9790aebSLuigi Rizzo 	if (frags > 1) {
984f9790aebSLuigi Rizzo 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
985f9790aebSLuigi Rizzo 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
986f9790aebSLuigi Rizzo 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
987f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags - 1;
988f9790aebSLuigi Rizzo 	}
989f9790aebSLuigi Rizzo 	if (ft_i)
990f9790aebSLuigi Rizzo 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
991f9790aebSLuigi Rizzo 	BDG_RUNLOCK(b);
992f9790aebSLuigi Rizzo 	return j;
993f9790aebSLuigi Rizzo }
994f9790aebSLuigi Rizzo 
995f9790aebSLuigi Rizzo 
996f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */
997f9790aebSLuigi Rizzo 
998f9790aebSLuigi Rizzo /*
999f9790aebSLuigi Rizzo  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1000f9790aebSLuigi Rizzo  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1001f9790aebSLuigi Rizzo  *
1002f9790aebSLuigi Rizzo  * http://www.burtleburtle.net/bob/hash/spooky.html
1003f9790aebSLuigi Rizzo  */
1004f9790aebSLuigi Rizzo #define mix(a, b, c)                                                    \
1005f9790aebSLuigi Rizzo do {                                                                    \
1006f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 13);                                 \
1007f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 8);                                  \
1008f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 13);                                 \
1009f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 12);                                 \
1010f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 16);                                 \
1011f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 5);                                  \
1012f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 3);                                  \
1013f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 10);                                 \
1014f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 15);                                 \
1015f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0)
1016f9790aebSLuigi Rizzo 
101717885a7bSLuigi Rizzo 
1018f9790aebSLuigi Rizzo static __inline uint32_t
1019f9790aebSLuigi Rizzo nm_bridge_rthash(const uint8_t *addr)
1020f9790aebSLuigi Rizzo {
1021f9790aebSLuigi Rizzo         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1022f9790aebSLuigi Rizzo 
1023f9790aebSLuigi Rizzo         b += addr[5] << 8;
1024f9790aebSLuigi Rizzo         b += addr[4];
1025f9790aebSLuigi Rizzo         a += addr[3] << 24;
1026f9790aebSLuigi Rizzo         a += addr[2] << 16;
1027f9790aebSLuigi Rizzo         a += addr[1] << 8;
1028f9790aebSLuigi Rizzo         a += addr[0];
1029f9790aebSLuigi Rizzo 
1030f9790aebSLuigi Rizzo         mix(a, b, c);
1031f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1032f9790aebSLuigi Rizzo         return (c & BRIDGE_RTHASH_MASK);
1033f9790aebSLuigi Rizzo }
1034f9790aebSLuigi Rizzo 
1035f9790aebSLuigi Rizzo #undef mix
1036f9790aebSLuigi Rizzo 
1037f9790aebSLuigi Rizzo 
1038f9790aebSLuigi Rizzo static int
1039f9790aebSLuigi Rizzo bdg_netmap_reg(struct netmap_adapter *na, int onoff)
1040f9790aebSLuigi Rizzo {
1041f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna =
1042f9790aebSLuigi Rizzo 		(struct netmap_vp_adapter*)na;
1043f9790aebSLuigi Rizzo 	struct ifnet *ifp = na->ifp;
1044f9790aebSLuigi Rizzo 
1045f9790aebSLuigi Rizzo 	/* the interface is already attached to the bridge,
1046f9790aebSLuigi Rizzo 	 * so we only need to toggle IFCAP_NETMAP.
1047f9790aebSLuigi Rizzo 	 */
1048f9790aebSLuigi Rizzo 	BDG_WLOCK(vpna->na_bdg);
1049f9790aebSLuigi Rizzo 	if (onoff) {
1050f9790aebSLuigi Rizzo 		ifp->if_capenable |= IFCAP_NETMAP;
1051f9790aebSLuigi Rizzo 	} else {
1052f9790aebSLuigi Rizzo 		ifp->if_capenable &= ~IFCAP_NETMAP;
1053f9790aebSLuigi Rizzo 	}
1054f9790aebSLuigi Rizzo 	BDG_WUNLOCK(vpna->na_bdg);
1055f9790aebSLuigi Rizzo 	return 0;
1056f9790aebSLuigi Rizzo }
1057f9790aebSLuigi Rizzo 
1058f9790aebSLuigi Rizzo 
1059f9790aebSLuigi Rizzo /*
1060f9790aebSLuigi Rizzo  * Lookup function for a learning bridge.
1061f9790aebSLuigi Rizzo  * Update the hash table with the source address,
1062f9790aebSLuigi Rizzo  * and then returns the destination port index, and the
1063f9790aebSLuigi Rizzo  * ring in *dst_ring (at the moment, always use ring 0)
1064f9790aebSLuigi Rizzo  */
1065f9790aebSLuigi Rizzo u_int
1066f9790aebSLuigi Rizzo netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1067f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *na)
1068f9790aebSLuigi Rizzo {
1069f9790aebSLuigi Rizzo 	struct nm_hash_ent *ht = na->na_bdg->ht;
1070f9790aebSLuigi Rizzo 	uint32_t sh, dh;
1071f9790aebSLuigi Rizzo 	u_int dst, mysrc = na->bdg_port;
1072f9790aebSLuigi Rizzo 	uint64_t smac, dmac;
1073f9790aebSLuigi Rizzo 
1074f9790aebSLuigi Rizzo 	if (buf_len < 14) {
1075f9790aebSLuigi Rizzo 		D("invalid buf length %d", buf_len);
1076f9790aebSLuigi Rizzo 		return NM_BDG_NOPORT;
1077f9790aebSLuigi Rizzo 	}
1078f9790aebSLuigi Rizzo 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1079f9790aebSLuigi Rizzo 	smac = le64toh(*(uint64_t *)(buf + 4));
1080f9790aebSLuigi Rizzo 	smac >>= 16;
1081f9790aebSLuigi Rizzo 
1082f9790aebSLuigi Rizzo 	/*
1083f9790aebSLuigi Rizzo 	 * The hash is somewhat expensive, there might be some
1084f9790aebSLuigi Rizzo 	 * worthwhile optimizations here.
1085f9790aebSLuigi Rizzo 	 */
1086f9790aebSLuigi Rizzo 	if ((buf[6] & 1) == 0) { /* valid src */
1087f9790aebSLuigi Rizzo 		uint8_t *s = buf+6;
1088f9790aebSLuigi Rizzo 		sh = nm_bridge_rthash(s); // XXX hash of source
1089f9790aebSLuigi Rizzo 		/* update source port forwarding entry */
1090f9790aebSLuigi Rizzo 		ht[sh].mac = smac;	/* XXX expire ? */
1091f9790aebSLuigi Rizzo 		ht[sh].ports = mysrc;
1092f9790aebSLuigi Rizzo 		if (netmap_verbose)
1093f9790aebSLuigi Rizzo 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1094f9790aebSLuigi Rizzo 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1095f9790aebSLuigi Rizzo 	}
1096f9790aebSLuigi Rizzo 	dst = NM_BDG_BROADCAST;
1097f9790aebSLuigi Rizzo 	if ((buf[0] & 1) == 0) { /* unicast */
1098f9790aebSLuigi Rizzo 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1099f9790aebSLuigi Rizzo 		if (ht[dh].mac == dmac) {	/* found dst */
1100f9790aebSLuigi Rizzo 			dst = ht[dh].ports;
1101f9790aebSLuigi Rizzo 		}
1102f9790aebSLuigi Rizzo 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1103f9790aebSLuigi Rizzo 	}
1104f9790aebSLuigi Rizzo 	*dst_ring = 0;
1105f9790aebSLuigi Rizzo 	return dst;
1106f9790aebSLuigi Rizzo }
1107f9790aebSLuigi Rizzo 
1108f9790aebSLuigi Rizzo 
1109f9790aebSLuigi Rizzo /*
111017885a7bSLuigi Rizzo  * Available space in the ring. Only used in VALE code
111117885a7bSLuigi Rizzo  * and only with is_rx = 1
111217885a7bSLuigi Rizzo  */
111317885a7bSLuigi Rizzo static inline uint32_t
111417885a7bSLuigi Rizzo nm_kr_space(struct netmap_kring *k, int is_rx)
111517885a7bSLuigi Rizzo {
111617885a7bSLuigi Rizzo 	int space;
111717885a7bSLuigi Rizzo 
111817885a7bSLuigi Rizzo 	if (is_rx) {
111917885a7bSLuigi Rizzo 		int busy = k->nkr_hwlease - k->nr_hwcur;
112017885a7bSLuigi Rizzo 		if (busy < 0)
112117885a7bSLuigi Rizzo 			busy += k->nkr_num_slots;
112217885a7bSLuigi Rizzo 		space = k->nkr_num_slots - 1 - busy;
112317885a7bSLuigi Rizzo 	} else {
112417885a7bSLuigi Rizzo 		/* XXX never used in this branch */
112517885a7bSLuigi Rizzo 		space = k->nr_hwtail - k->nkr_hwlease;
112617885a7bSLuigi Rizzo 		if (space < 0)
112717885a7bSLuigi Rizzo 			space += k->nkr_num_slots;
112817885a7bSLuigi Rizzo 	}
112917885a7bSLuigi Rizzo #if 0
113017885a7bSLuigi Rizzo 	// sanity check
113117885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
113217885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
113317885a7bSLuigi Rizzo 		k->nr_tail >= k->nkr_num_slots ||
113417885a7bSLuigi Rizzo 		busy < 0 ||
113517885a7bSLuigi Rizzo 		busy >= k->nkr_num_slots) {
113617885a7bSLuigi Rizzo 		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
113717885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
113817885a7bSLuigi Rizzo 	}
113917885a7bSLuigi Rizzo #endif
114017885a7bSLuigi Rizzo 	return space;
114117885a7bSLuigi Rizzo }
114217885a7bSLuigi Rizzo 
114317885a7bSLuigi Rizzo 
114417885a7bSLuigi Rizzo 
114517885a7bSLuigi Rizzo 
114617885a7bSLuigi Rizzo /* make a lease on the kring for N positions. return the
114717885a7bSLuigi Rizzo  * lease index
114817885a7bSLuigi Rizzo  * XXX only used in VALE code and with is_rx = 1
114917885a7bSLuigi Rizzo  */
115017885a7bSLuigi Rizzo static inline uint32_t
115117885a7bSLuigi Rizzo nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
115217885a7bSLuigi Rizzo {
115317885a7bSLuigi Rizzo 	uint32_t lim = k->nkr_num_slots - 1;
115417885a7bSLuigi Rizzo 	uint32_t lease_idx = k->nkr_lease_idx;
115517885a7bSLuigi Rizzo 
115617885a7bSLuigi Rizzo 	k->nkr_leases[lease_idx] = NR_NOSLOT;
115717885a7bSLuigi Rizzo 	k->nkr_lease_idx = nm_next(lease_idx, lim);
115817885a7bSLuigi Rizzo 
115917885a7bSLuigi Rizzo 	if (n > nm_kr_space(k, is_rx)) {
116017885a7bSLuigi Rizzo 		D("invalid request for %d slots", n);
116117885a7bSLuigi Rizzo 		panic("x");
116217885a7bSLuigi Rizzo 	}
116317885a7bSLuigi Rizzo 	/* XXX verify that there are n slots */
116417885a7bSLuigi Rizzo 	k->nkr_hwlease += n;
116517885a7bSLuigi Rizzo 	if (k->nkr_hwlease > lim)
116617885a7bSLuigi Rizzo 		k->nkr_hwlease -= lim + 1;
116717885a7bSLuigi Rizzo 
116817885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
116917885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
117017885a7bSLuigi Rizzo 		k->nr_hwtail >= k->nkr_num_slots ||
117117885a7bSLuigi Rizzo 		k->nkr_lease_idx >= k->nkr_num_slots) {
117217885a7bSLuigi Rizzo 		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
117317885a7bSLuigi Rizzo 			k->na->ifp->if_xname,
117417885a7bSLuigi Rizzo 			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
117517885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
117617885a7bSLuigi Rizzo 	}
117717885a7bSLuigi Rizzo 	return lease_idx;
117817885a7bSLuigi Rizzo }
117917885a7bSLuigi Rizzo 
118017885a7bSLuigi Rizzo /*
1181f9790aebSLuigi Rizzo  * This flush routine supports only unicast and broadcast but a large
1182f9790aebSLuigi Rizzo  * number of ports, and lets us replace the learn and dispatch functions.
1183f9790aebSLuigi Rizzo  */
1184f9790aebSLuigi Rizzo int
1185f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1186f9790aebSLuigi Rizzo 		u_int ring_nr)
1187f9790aebSLuigi Rizzo {
1188f9790aebSLuigi Rizzo 	struct nm_bdg_q *dst_ents, *brddst;
1189f9790aebSLuigi Rizzo 	uint16_t num_dsts = 0, *dsts;
1190f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
1191f9790aebSLuigi Rizzo 	u_int i, j, me = na->bdg_port;
1192f9790aebSLuigi Rizzo 
1193f9790aebSLuigi Rizzo 	/*
1194f9790aebSLuigi Rizzo 	 * The work area (pointed by ft) is followed by an array of
1195f9790aebSLuigi Rizzo 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1196f9790aebSLuigi Rizzo 	 * queues per port plus one for the broadcast traffic.
1197f9790aebSLuigi Rizzo 	 * Then we have an array of destination indexes.
1198f9790aebSLuigi Rizzo 	 */
1199f9790aebSLuigi Rizzo 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1200f9790aebSLuigi Rizzo 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1201f9790aebSLuigi Rizzo 
1202f9790aebSLuigi Rizzo 	/* first pass: find a destination for each packet in the batch */
1203f9790aebSLuigi Rizzo 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1204f9790aebSLuigi Rizzo 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1205f9790aebSLuigi Rizzo 		uint16_t dst_port, d_i;
1206f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1207f9790aebSLuigi Rizzo 		uint8_t *buf = ft[i].ft_buf;
1208f9790aebSLuigi Rizzo 		u_int len = ft[i].ft_len;
1209f9790aebSLuigi Rizzo 
1210f9790aebSLuigi Rizzo 		ND("slot %d frags %d", i, ft[i].ft_frags);
1211f0ea3689SLuigi Rizzo 		/* Drop the packet if the virtio-net header is not into the first
1212f9790aebSLuigi Rizzo 		   fragment nor at the very beginning of the second. */
1213f0ea3689SLuigi Rizzo 		if (unlikely(na->virt_hdr_len > len))
1214f9790aebSLuigi Rizzo 			continue;
1215f0ea3689SLuigi Rizzo 		if (len == na->virt_hdr_len) {
1216f9790aebSLuigi Rizzo 			buf = ft[i+1].ft_buf;
1217f9790aebSLuigi Rizzo 			len = ft[i+1].ft_len;
1218f9790aebSLuigi Rizzo 		} else {
1219f0ea3689SLuigi Rizzo 			buf += na->virt_hdr_len;
1220f0ea3689SLuigi Rizzo 			len -= na->virt_hdr_len;
1221f9790aebSLuigi Rizzo 		}
1222f9790aebSLuigi Rizzo 		dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na);
1223f9790aebSLuigi Rizzo 		if (netmap_verbose > 255)
1224f9790aebSLuigi Rizzo 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1225f9790aebSLuigi Rizzo 		if (dst_port == NM_BDG_NOPORT)
1226f9790aebSLuigi Rizzo 			continue; /* this packet is identified to be dropped */
1227f9790aebSLuigi Rizzo 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1228f9790aebSLuigi Rizzo 			continue;
1229f9790aebSLuigi Rizzo 		else if (dst_port == NM_BDG_BROADCAST)
1230f9790aebSLuigi Rizzo 			dst_ring = 0; /* broadcasts always go to ring 0 */
1231f9790aebSLuigi Rizzo 		else if (unlikely(dst_port == me ||
1232f9790aebSLuigi Rizzo 		    !b->bdg_ports[dst_port]))
1233f9790aebSLuigi Rizzo 			continue;
1234f9790aebSLuigi Rizzo 
1235f9790aebSLuigi Rizzo 		/* get a position in the scratch pad */
1236f9790aebSLuigi Rizzo 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1237f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1238f9790aebSLuigi Rizzo 
1239f9790aebSLuigi Rizzo 		/* append the first fragment to the list */
1240f9790aebSLuigi Rizzo 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1241f9790aebSLuigi Rizzo 			d->bq_head = d->bq_tail = i;
1242f9790aebSLuigi Rizzo 			/* remember this position to be scanned later */
1243f9790aebSLuigi Rizzo 			if (dst_port != NM_BDG_BROADCAST)
1244f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1245f9790aebSLuigi Rizzo 		} else {
1246f9790aebSLuigi Rizzo 			ft[d->bq_tail].ft_next = i;
1247f9790aebSLuigi Rizzo 			d->bq_tail = i;
1248f9790aebSLuigi Rizzo 		}
1249f9790aebSLuigi Rizzo 		d->bq_len += ft[i].ft_frags;
1250f9790aebSLuigi Rizzo 	}
1251f9790aebSLuigi Rizzo 
1252f9790aebSLuigi Rizzo 	/*
1253f9790aebSLuigi Rizzo 	 * Broadcast traffic goes to ring 0 on all destinations.
1254f9790aebSLuigi Rizzo 	 * So we need to add these rings to the list of ports to scan.
1255f9790aebSLuigi Rizzo 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1256f9790aebSLuigi Rizzo 	 * expensive. We should keep a compact list of active destinations
1257f9790aebSLuigi Rizzo 	 * so we could shorten this loop.
1258f9790aebSLuigi Rizzo 	 */
1259f9790aebSLuigi Rizzo 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1260f9790aebSLuigi Rizzo 	if (brddst->bq_head != NM_FT_NULL) {
1261f9790aebSLuigi Rizzo 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1262f9790aebSLuigi Rizzo 			uint16_t d_i;
1263f9790aebSLuigi Rizzo 			i = b->bdg_port_index[j];
1264f9790aebSLuigi Rizzo 			if (unlikely(i == me))
1265f9790aebSLuigi Rizzo 				continue;
1266f9790aebSLuigi Rizzo 			d_i = i * NM_BDG_MAXRINGS;
1267f9790aebSLuigi Rizzo 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1268f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1269f9790aebSLuigi Rizzo 		}
1270f9790aebSLuigi Rizzo 	}
1271f9790aebSLuigi Rizzo 
1272f9790aebSLuigi Rizzo 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1273f9790aebSLuigi Rizzo 	/* second pass: scan destinations (XXX will be modular somehow) */
1274f9790aebSLuigi Rizzo 	for (i = 0; i < num_dsts; i++) {
1275f9790aebSLuigi Rizzo 		struct ifnet *dst_ifp;
1276f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *dst_na;
1277f9790aebSLuigi Rizzo 		struct netmap_kring *kring;
1278f9790aebSLuigi Rizzo 		struct netmap_ring *ring;
1279f0ea3689SLuigi Rizzo 		u_int dst_nr, lim, j, d_i, next, brd_next;
1280f9790aebSLuigi Rizzo 		u_int needed, howmany;
1281f9790aebSLuigi Rizzo 		int retry = netmap_txsync_retry;
1282f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1283f9790aebSLuigi Rizzo 		uint32_t my_start = 0, lease_idx = 0;
1284f9790aebSLuigi Rizzo 		int nrings;
1285f0ea3689SLuigi Rizzo 		int virt_hdr_mismatch = 0;
1286f9790aebSLuigi Rizzo 
1287f9790aebSLuigi Rizzo 		d_i = dsts[i];
1288f9790aebSLuigi Rizzo 		ND("second pass %d port %d", i, d_i);
1289f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1290f9790aebSLuigi Rizzo 		// XXX fix the division
1291f9790aebSLuigi Rizzo 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1292f9790aebSLuigi Rizzo 		/* protect from the lookup function returning an inactive
1293f9790aebSLuigi Rizzo 		 * destination port
1294f9790aebSLuigi Rizzo 		 */
1295f9790aebSLuigi Rizzo 		if (unlikely(dst_na == NULL))
1296f9790aebSLuigi Rizzo 			goto cleanup;
1297f9790aebSLuigi Rizzo 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1298f9790aebSLuigi Rizzo 			goto cleanup;
1299f9790aebSLuigi Rizzo 		dst_ifp = dst_na->up.ifp;
1300f9790aebSLuigi Rizzo 		/*
1301f9790aebSLuigi Rizzo 		 * The interface may be in !netmap mode in two cases:
1302f9790aebSLuigi Rizzo 		 * - when na is attached but not activated yet;
1303f9790aebSLuigi Rizzo 		 * - when na is being deactivated but is still attached.
1304f9790aebSLuigi Rizzo 		 */
1305f9790aebSLuigi Rizzo 		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1306f9790aebSLuigi Rizzo 			ND("not in netmap mode!");
1307f9790aebSLuigi Rizzo 			goto cleanup;
1308f9790aebSLuigi Rizzo 		}
1309f9790aebSLuigi Rizzo 
1310f9790aebSLuigi Rizzo 		/* there is at least one either unicast or broadcast packet */
1311f9790aebSLuigi Rizzo 		brd_next = brddst->bq_head;
1312f9790aebSLuigi Rizzo 		next = d->bq_head;
1313f9790aebSLuigi Rizzo 		/* we need to reserve this many slots. If fewer are
1314f9790aebSLuigi Rizzo 		 * available, some packets will be dropped.
1315f9790aebSLuigi Rizzo 		 * Packets may have multiple fragments, so we may not use
1316f9790aebSLuigi Rizzo 		 * there is a chance that we may not use all of the slots
1317f9790aebSLuigi Rizzo 		 * we have claimed, so we will need to handle the leftover
1318f9790aebSLuigi Rizzo 		 * ones when we regain the lock.
1319f9790aebSLuigi Rizzo 		 */
1320f9790aebSLuigi Rizzo 		needed = d->bq_len + brddst->bq_len;
1321f9790aebSLuigi Rizzo 
1322f0ea3689SLuigi Rizzo 		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
1323*e31c6ec7SLuigi Rizzo 			RD(3, "virt_hdr_mismatch, src %d len %d", na->virt_hdr_len, dst_na->virt_hdr_len);
1324f0ea3689SLuigi Rizzo 			/* There is a virtio-net header/offloadings mismatch between
1325f0ea3689SLuigi Rizzo 			 * source and destination. The slower mismatch datapath will
1326f0ea3689SLuigi Rizzo 			 * be used to cope with all the mismatches.
1327f0ea3689SLuigi Rizzo 			 */
1328f0ea3689SLuigi Rizzo 			virt_hdr_mismatch = 1;
1329f0ea3689SLuigi Rizzo 			if (dst_na->mfs < na->mfs) {
1330f0ea3689SLuigi Rizzo 				/* We may need to do segmentation offloadings, and so
1331f0ea3689SLuigi Rizzo 				 * we may need a number of destination slots greater
1332f0ea3689SLuigi Rizzo 				 * than the number of input slots ('needed').
1333f0ea3689SLuigi Rizzo 				 * We look for the smallest integer 'x' which satisfies:
1334f0ea3689SLuigi Rizzo 				 *	needed * na->mfs + x * H <= x * na->mfs
1335f0ea3689SLuigi Rizzo 				 * where 'H' is the length of the longest header that may
1336f0ea3689SLuigi Rizzo 				 * be replicated in the segmentation process (e.g. for
1337f0ea3689SLuigi Rizzo 				 * TCPv4 we must account for ethernet header, IP header
1338f0ea3689SLuigi Rizzo 				 * and TCPv4 header).
1339f0ea3689SLuigi Rizzo 				 */
1340f0ea3689SLuigi Rizzo 				needed = (needed * na->mfs) /
1341f0ea3689SLuigi Rizzo 						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1342f0ea3689SLuigi Rizzo 				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1343f0ea3689SLuigi Rizzo 			}
1344f0ea3689SLuigi Rizzo 		}
1345f0ea3689SLuigi Rizzo 
1346f9790aebSLuigi Rizzo 		ND(5, "pass 2 dst %d is %x %s",
1347f9790aebSLuigi Rizzo 			i, d_i, is_vp ? "virtual" : "nic/host");
1348f9790aebSLuigi Rizzo 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1349f9790aebSLuigi Rizzo 		nrings = dst_na->up.num_rx_rings;
1350f9790aebSLuigi Rizzo 		if (dst_nr >= nrings)
1351f9790aebSLuigi Rizzo 			dst_nr = dst_nr % nrings;
1352f9790aebSLuigi Rizzo 		kring = &dst_na->up.rx_rings[dst_nr];
1353f9790aebSLuigi Rizzo 		ring = kring->ring;
1354f9790aebSLuigi Rizzo 		lim = kring->nkr_num_slots - 1;
1355f9790aebSLuigi Rizzo 
1356f9790aebSLuigi Rizzo retry:
1357f9790aebSLuigi Rizzo 
1358f0ea3689SLuigi Rizzo 		if (dst_na->retry && retry) {
1359f0ea3689SLuigi Rizzo 			/* try to get some free slot from the previous run */
1360f0ea3689SLuigi Rizzo 			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1361f0ea3689SLuigi Rizzo 		}
1362f9790aebSLuigi Rizzo 		/* reserve the buffers in the queue and an entry
1363f9790aebSLuigi Rizzo 		 * to report completion, and drop lock.
1364f9790aebSLuigi Rizzo 		 * XXX this might become a helper function.
1365f9790aebSLuigi Rizzo 		 */
1366f9790aebSLuigi Rizzo 		mtx_lock(&kring->q_lock);
1367f9790aebSLuigi Rizzo 		if (kring->nkr_stopped) {
1368f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1369f9790aebSLuigi Rizzo 			goto cleanup;
1370f9790aebSLuigi Rizzo 		}
1371f9790aebSLuigi Rizzo 		my_start = j = kring->nkr_hwlease;
1372f9790aebSLuigi Rizzo 		howmany = nm_kr_space(kring, 1);
1373f9790aebSLuigi Rizzo 		if (needed < howmany)
1374f9790aebSLuigi Rizzo 			howmany = needed;
1375f9790aebSLuigi Rizzo 		lease_idx = nm_kr_lease(kring, howmany, 1);
1376f9790aebSLuigi Rizzo 		mtx_unlock(&kring->q_lock);
1377f9790aebSLuigi Rizzo 
1378f9790aebSLuigi Rizzo 		/* only retry if we need more than available slots */
1379f9790aebSLuigi Rizzo 		if (retry && needed <= howmany)
1380f9790aebSLuigi Rizzo 			retry = 0;
1381f9790aebSLuigi Rizzo 
1382f9790aebSLuigi Rizzo 		/* copy to the destination queue */
1383f9790aebSLuigi Rizzo 		while (howmany > 0) {
1384f9790aebSLuigi Rizzo 			struct netmap_slot *slot;
1385f9790aebSLuigi Rizzo 			struct nm_bdg_fwd *ft_p, *ft_end;
1386f9790aebSLuigi Rizzo 			u_int cnt;
1387f9790aebSLuigi Rizzo 
1388f9790aebSLuigi Rizzo 			/* find the queue from which we pick next packet.
1389f9790aebSLuigi Rizzo 			 * NM_FT_NULL is always higher than valid indexes
1390f9790aebSLuigi Rizzo 			 * so we never dereference it if the other list
1391f9790aebSLuigi Rizzo 			 * has packets (and if both are empty we never
1392f9790aebSLuigi Rizzo 			 * get here).
1393f9790aebSLuigi Rizzo 			 */
1394f9790aebSLuigi Rizzo 			if (next < brd_next) {
1395f9790aebSLuigi Rizzo 				ft_p = ft + next;
1396f9790aebSLuigi Rizzo 				next = ft_p->ft_next;
1397f9790aebSLuigi Rizzo 			} else { /* insert broadcast */
1398f9790aebSLuigi Rizzo 				ft_p = ft + brd_next;
1399f9790aebSLuigi Rizzo 				brd_next = ft_p->ft_next;
1400f9790aebSLuigi Rizzo 			}
1401f9790aebSLuigi Rizzo 			cnt = ft_p->ft_frags; // cnt > 0
1402f9790aebSLuigi Rizzo 			if (unlikely(cnt > howmany))
1403f9790aebSLuigi Rizzo 			    break; /* no more space */
1404f9790aebSLuigi Rizzo 			if (netmap_verbose && cnt > 1)
1405f9790aebSLuigi Rizzo 				RD(5, "rx %d frags to %d", cnt, j);
1406f9790aebSLuigi Rizzo 			ft_end = ft_p + cnt;
1407f0ea3689SLuigi Rizzo 			if (unlikely(virt_hdr_mismatch)) {
1408f0ea3689SLuigi Rizzo 				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1409f0ea3689SLuigi Rizzo 			} else {
1410f0ea3689SLuigi Rizzo 				howmany -= cnt;
1411f9790aebSLuigi Rizzo 				do {
1412f9790aebSLuigi Rizzo 					char *dst, *src = ft_p->ft_buf;
1413f9790aebSLuigi Rizzo 					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1414f9790aebSLuigi Rizzo 
1415f9790aebSLuigi Rizzo 					slot = &ring->slot[j];
1416f9790aebSLuigi Rizzo 					dst = BDG_NMB(&dst_na->up, slot);
1417f9790aebSLuigi Rizzo 
141817885a7bSLuigi Rizzo 					ND("send [%d] %d(%d) bytes at %s:%d",
141917885a7bSLuigi Rizzo 							i, (int)copy_len, (int)dst_len,
142017885a7bSLuigi Rizzo 							NM_IFPNAME(dst_ifp), j);
1421f9790aebSLuigi Rizzo 					/* round to a multiple of 64 */
1422f9790aebSLuigi Rizzo 					copy_len = (copy_len + 63) & ~63;
1423f9790aebSLuigi Rizzo 
1424*e31c6ec7SLuigi Rizzo 					if (unlikely(copy_len > NETMAP_BUF_SIZE ||
1425*e31c6ec7SLuigi Rizzo 							copy_len > NETMAP_BUF_SIZE)) {
1426*e31c6ec7SLuigi Rizzo 						RD(5, "invalid len %d, down to 64", (int)copy_len);
1427*e31c6ec7SLuigi Rizzo 						copy_len = dst_len = 64; // XXX
1428*e31c6ec7SLuigi Rizzo 					}
1429f9790aebSLuigi Rizzo 					if (ft_p->ft_flags & NS_INDIRECT) {
1430f9790aebSLuigi Rizzo 						if (copyin(src, dst, copy_len)) {
1431f9790aebSLuigi Rizzo 							// invalid user pointer, pretend len is 0
1432f9790aebSLuigi Rizzo 							dst_len = 0;
1433f9790aebSLuigi Rizzo 						}
1434f9790aebSLuigi Rizzo 					} else {
1435f9790aebSLuigi Rizzo 						//memcpy(dst, src, copy_len);
1436f9790aebSLuigi Rizzo 						pkt_copy(src, dst, (int)copy_len);
1437f9790aebSLuigi Rizzo 					}
1438f9790aebSLuigi Rizzo 					slot->len = dst_len;
1439f9790aebSLuigi Rizzo 					slot->flags = (cnt << 8)| NS_MOREFRAG;
1440f9790aebSLuigi Rizzo 					j = nm_next(j, lim);
1441f0ea3689SLuigi Rizzo 					needed--;
1442f9790aebSLuigi Rizzo 					ft_p++;
1443f9790aebSLuigi Rizzo 				} while (ft_p != ft_end);
1444f9790aebSLuigi Rizzo 				slot->flags = (cnt << 8); /* clear flag on last entry */
1445f0ea3689SLuigi Rizzo 			}
1446f9790aebSLuigi Rizzo 			/* are we done ? */
1447f9790aebSLuigi Rizzo 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1448f9790aebSLuigi Rizzo 				break;
1449f9790aebSLuigi Rizzo 		}
1450f9790aebSLuigi Rizzo 		{
1451f9790aebSLuigi Rizzo 		    /* current position */
1452f9790aebSLuigi Rizzo 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1453f9790aebSLuigi Rizzo 		    uint32_t update_pos;
1454f9790aebSLuigi Rizzo 		    int still_locked = 1;
1455f9790aebSLuigi Rizzo 
1456f9790aebSLuigi Rizzo 		    mtx_lock(&kring->q_lock);
1457f9790aebSLuigi Rizzo 		    if (unlikely(howmany > 0)) {
1458f9790aebSLuigi Rizzo 			/* not used all bufs. If i am the last one
1459f9790aebSLuigi Rizzo 			 * i can recover the slots, otherwise must
1460f9790aebSLuigi Rizzo 			 * fill them with 0 to mark empty packets.
1461f9790aebSLuigi Rizzo 			 */
1462f9790aebSLuigi Rizzo 			ND("leftover %d bufs", howmany);
1463f9790aebSLuigi Rizzo 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1464f9790aebSLuigi Rizzo 			    /* yes i am the last one */
1465f9790aebSLuigi Rizzo 			    ND("roll back nkr_hwlease to %d", j);
1466f9790aebSLuigi Rizzo 			    kring->nkr_hwlease = j;
1467f9790aebSLuigi Rizzo 			} else {
1468f9790aebSLuigi Rizzo 			    while (howmany-- > 0) {
1469f9790aebSLuigi Rizzo 				ring->slot[j].len = 0;
1470f9790aebSLuigi Rizzo 				ring->slot[j].flags = 0;
1471f9790aebSLuigi Rizzo 				j = nm_next(j, lim);
1472f9790aebSLuigi Rizzo 			    }
1473f9790aebSLuigi Rizzo 			}
1474f9790aebSLuigi Rizzo 		    }
1475f9790aebSLuigi Rizzo 		    p[lease_idx] = j; /* report I am done */
1476f9790aebSLuigi Rizzo 
147717885a7bSLuigi Rizzo 		    update_pos = kring->nr_hwtail;
1478f9790aebSLuigi Rizzo 
1479f9790aebSLuigi Rizzo 		    if (my_start == update_pos) {
1480f9790aebSLuigi Rizzo 			/* all slots before my_start have been reported,
1481f9790aebSLuigi Rizzo 			 * so scan subsequent leases to see if other ranges
1482f9790aebSLuigi Rizzo 			 * have been completed, and to a selwakeup or txsync.
1483f9790aebSLuigi Rizzo 		         */
1484f9790aebSLuigi Rizzo 			while (lease_idx != kring->nkr_lease_idx &&
1485f9790aebSLuigi Rizzo 				p[lease_idx] != NR_NOSLOT) {
1486f9790aebSLuigi Rizzo 			    j = p[lease_idx];
1487f9790aebSLuigi Rizzo 			    p[lease_idx] = NR_NOSLOT;
1488f9790aebSLuigi Rizzo 			    lease_idx = nm_next(lease_idx, lim);
1489f9790aebSLuigi Rizzo 			}
1490f9790aebSLuigi Rizzo 			/* j is the new 'write' position. j != my_start
1491f9790aebSLuigi Rizzo 			 * means there are new buffers to report
1492f9790aebSLuigi Rizzo 			 */
1493f9790aebSLuigi Rizzo 			if (likely(j != my_start)) {
149417885a7bSLuigi Rizzo 				kring->nr_hwtail = j;
1495f9790aebSLuigi Rizzo 				still_locked = 0;
1496f9790aebSLuigi Rizzo 				mtx_unlock(&kring->q_lock);
1497f0ea3689SLuigi Rizzo 				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1498f9790aebSLuigi Rizzo 				if (dst_na->retry && retry--)
1499f9790aebSLuigi Rizzo 					goto retry;
1500f9790aebSLuigi Rizzo 			}
1501f9790aebSLuigi Rizzo 		    }
1502f9790aebSLuigi Rizzo 		    if (still_locked)
1503f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1504f9790aebSLuigi Rizzo 		}
1505f9790aebSLuigi Rizzo cleanup:
1506f9790aebSLuigi Rizzo 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1507f9790aebSLuigi Rizzo 		d->bq_len = 0;
1508f9790aebSLuigi Rizzo 	}
1509f9790aebSLuigi Rizzo 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1510f9790aebSLuigi Rizzo 	brddst->bq_len = 0;
1511f9790aebSLuigi Rizzo 	return 0;
1512f9790aebSLuigi Rizzo }
1513f9790aebSLuigi Rizzo 
151417885a7bSLuigi Rizzo 
1515f9790aebSLuigi Rizzo static int
1516f9790aebSLuigi Rizzo netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1517f9790aebSLuigi Rizzo {
1518f9790aebSLuigi Rizzo 	struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
151917885a7bSLuigi Rizzo 	u_int done;
152017885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
152117885a7bSLuigi Rizzo 	u_int const cur = kring->rcur;
1522f9790aebSLuigi Rizzo 
1523f9790aebSLuigi Rizzo 	if (bridge_batch <= 0) { /* testing only */
152417885a7bSLuigi Rizzo 		done = cur; // used all
1525f9790aebSLuigi Rizzo 		goto done;
1526f9790aebSLuigi Rizzo 	}
1527f9790aebSLuigi Rizzo 	if (bridge_batch > NM_BDG_BATCH)
1528f9790aebSLuigi Rizzo 		bridge_batch = NM_BDG_BATCH;
1529f9790aebSLuigi Rizzo 
153017885a7bSLuigi Rizzo 	done = nm_bdg_preflush(na, ring_nr, kring, cur);
1531f9790aebSLuigi Rizzo done:
153217885a7bSLuigi Rizzo 	if (done != cur)
153317885a7bSLuigi Rizzo 		D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
153417885a7bSLuigi Rizzo 	/*
153517885a7bSLuigi Rizzo 	 * packets between 'done' and 'cur' are left unsent.
153617885a7bSLuigi Rizzo 	 */
153717885a7bSLuigi Rizzo 	kring->nr_hwcur = done;
153817885a7bSLuigi Rizzo 	kring->nr_hwtail = nm_prev(done, lim);
153917885a7bSLuigi Rizzo 	nm_txsync_finalize(kring);
1540f9790aebSLuigi Rizzo 	if (netmap_verbose)
1541f9790aebSLuigi Rizzo 		D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1542f9790aebSLuigi Rizzo 	return 0;
1543f9790aebSLuigi Rizzo }
1544f9790aebSLuigi Rizzo 
1545f9790aebSLuigi Rizzo 
1546f9790aebSLuigi Rizzo /*
1547f9790aebSLuigi Rizzo  * main dispatch routine for the bridge.
1548f9790aebSLuigi Rizzo  * We already know that only one thread is running this.
1549f9790aebSLuigi Rizzo  * we must run nm_bdg_preflush without lock.
1550f9790aebSLuigi Rizzo  */
1551f9790aebSLuigi Rizzo static int
1552f9790aebSLuigi Rizzo bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1553f9790aebSLuigi Rizzo {
1554f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1555f9790aebSLuigi Rizzo 	return netmap_vp_txsync(vpna, ring_nr, flags);
1556f9790aebSLuigi Rizzo }
1557f9790aebSLuigi Rizzo 
155817885a7bSLuigi Rizzo static int
155917885a7bSLuigi Rizzo netmap_vp_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
156017885a7bSLuigi Rizzo {
156117885a7bSLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
156217885a7bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
156317885a7bSLuigi Rizzo 	u_int nm_i, lim = kring->nkr_num_slots - 1;
156417885a7bSLuigi Rizzo 	u_int head = nm_rxsync_prologue(kring);
156517885a7bSLuigi Rizzo 	int n;
156617885a7bSLuigi Rizzo 
156717885a7bSLuigi Rizzo 	if (head > lim) {
156817885a7bSLuigi Rizzo 		D("ouch dangerous reset!!!");
156917885a7bSLuigi Rizzo 		n = netmap_ring_reinit(kring);
157017885a7bSLuigi Rizzo 		goto done;
157117885a7bSLuigi Rizzo 	}
157217885a7bSLuigi Rizzo 
157317885a7bSLuigi Rizzo 	/* First part, import newly received packets. */
157417885a7bSLuigi Rizzo 	/* actually nothing to do here, they are already in the kring */
157517885a7bSLuigi Rizzo 
157617885a7bSLuigi Rizzo 	/* Second part, skip past packets that userspace has released. */
157717885a7bSLuigi Rizzo 	nm_i = kring->nr_hwcur;
157817885a7bSLuigi Rizzo 	if (nm_i != head) {
157917885a7bSLuigi Rizzo 		/* consistency check, but nothing really important here */
158017885a7bSLuigi Rizzo 		for (n = 0; likely(nm_i != head); n++) {
158117885a7bSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[nm_i];
158217885a7bSLuigi Rizzo 			void *addr = BDG_NMB(na, slot);
158317885a7bSLuigi Rizzo 
158417885a7bSLuigi Rizzo 			if (addr == netmap_buffer_base) { /* bad buf */
158517885a7bSLuigi Rizzo 				D("bad buffer index %d, ignore ?",
158617885a7bSLuigi Rizzo 					slot->buf_idx);
158717885a7bSLuigi Rizzo 			}
158817885a7bSLuigi Rizzo 			slot->flags &= ~NS_BUF_CHANGED;
158917885a7bSLuigi Rizzo 			nm_i = nm_next(nm_i, lim);
159017885a7bSLuigi Rizzo 		}
159117885a7bSLuigi Rizzo 		kring->nr_hwcur = head;
159217885a7bSLuigi Rizzo 	}
159317885a7bSLuigi Rizzo 
159417885a7bSLuigi Rizzo 	/* tell userspace that there are new packets */
159517885a7bSLuigi Rizzo 	nm_rxsync_finalize(kring);
159617885a7bSLuigi Rizzo 	n = 0;
159717885a7bSLuigi Rizzo done:
159817885a7bSLuigi Rizzo 	return n;
159917885a7bSLuigi Rizzo }
1600f9790aebSLuigi Rizzo 
1601f9790aebSLuigi Rizzo /*
1602f9790aebSLuigi Rizzo  * user process reading from a VALE switch.
1603f9790aebSLuigi Rizzo  * Already protected against concurrent calls from userspace,
1604f9790aebSLuigi Rizzo  * but we must acquire the queue's lock to protect against
1605f9790aebSLuigi Rizzo  * writers on the same queue.
1606f9790aebSLuigi Rizzo  */
1607f9790aebSLuigi Rizzo static int
1608f9790aebSLuigi Rizzo bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1609f9790aebSLuigi Rizzo {
1610f9790aebSLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1611f9790aebSLuigi Rizzo 	int n;
1612f9790aebSLuigi Rizzo 
1613f9790aebSLuigi Rizzo 	mtx_lock(&kring->q_lock);
161417885a7bSLuigi Rizzo 	n = netmap_vp_rxsync(na, ring_nr, flags);
1615f9790aebSLuigi Rizzo 	mtx_unlock(&kring->q_lock);
1616f9790aebSLuigi Rizzo 	return n;
1617f9790aebSLuigi Rizzo }
1618f9790aebSLuigi Rizzo 
161917885a7bSLuigi Rizzo 
1620f9790aebSLuigi Rizzo static int
1621f9790aebSLuigi Rizzo bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
1622f9790aebSLuigi Rizzo {
1623f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
1624f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
1625f9790aebSLuigi Rizzo 	int error;
1626f0ea3689SLuigi Rizzo 	u_int npipes = 0;
1627f9790aebSLuigi Rizzo 
1628f9790aebSLuigi Rizzo 	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1629f9790aebSLuigi Rizzo 	if (vpna == NULL)
1630f9790aebSLuigi Rizzo 		return ENOMEM;
1631f9790aebSLuigi Rizzo 
1632f9790aebSLuigi Rizzo  	na = &vpna->up;
1633f9790aebSLuigi Rizzo 
1634f9790aebSLuigi Rizzo 	na->ifp = ifp;
1635f9790aebSLuigi Rizzo 
1636f9790aebSLuigi Rizzo 	/* bound checking */
1637f9790aebSLuigi Rizzo 	na->num_tx_rings = nmr->nr_tx_rings;
1638f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1639f9790aebSLuigi Rizzo 	nmr->nr_tx_rings = na->num_tx_rings; // write back
1640f9790aebSLuigi Rizzo 	na->num_rx_rings = nmr->nr_rx_rings;
1641f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1642f9790aebSLuigi Rizzo 	nmr->nr_rx_rings = na->num_rx_rings; // write back
1643f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1644f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
1645f9790aebSLuigi Rizzo 	na->num_tx_desc = nmr->nr_tx_slots;
1646f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1647f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
1648f0ea3689SLuigi Rizzo 	/* validate number of pipes. We want at least 1,
1649f0ea3689SLuigi Rizzo 	 * but probably can do with some more.
1650f0ea3689SLuigi Rizzo 	 * So let's use 2 as default (when 0 is supplied)
1651f0ea3689SLuigi Rizzo 	 */
1652f0ea3689SLuigi Rizzo 	npipes = nmr->nr_arg1;
1653f0ea3689SLuigi Rizzo 	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1654f0ea3689SLuigi Rizzo 	nmr->nr_arg1 = npipes;	/* write back */
1655f0ea3689SLuigi Rizzo 	/* validate extra bufs */
1656f0ea3689SLuigi Rizzo 	nm_bound_var(&nmr->nr_arg3, 0, 0,
1657f0ea3689SLuigi Rizzo 			128*NM_BDG_MAXSLOTS, NULL);
1658f9790aebSLuigi Rizzo 	na->num_rx_desc = nmr->nr_rx_slots;
1659f0ea3689SLuigi Rizzo 	vpna->virt_hdr_len = 0;
1660f0ea3689SLuigi Rizzo 	vpna->mfs = 1514;
1661f0ea3689SLuigi Rizzo 	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1662f0ea3689SLuigi Rizzo 		vpna->mfs = netmap_buf_size; */
1663f0ea3689SLuigi Rizzo         if (netmap_verbose)
1664f0ea3689SLuigi Rizzo 		D("max frame size %u", vpna->mfs);
1665f9790aebSLuigi Rizzo 
1666f9790aebSLuigi Rizzo 	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1667f9790aebSLuigi Rizzo 	na->nm_txsync = bdg_netmap_txsync;
1668f9790aebSLuigi Rizzo 	na->nm_rxsync = bdg_netmap_rxsync;
1669f9790aebSLuigi Rizzo 	na->nm_register = bdg_netmap_reg;
1670f9790aebSLuigi Rizzo 	na->nm_dtor = netmap_adapter_vp_dtor;
1671f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_vp_krings_create;
1672f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_vp_krings_delete;
1673f9790aebSLuigi Rizzo 	na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp),
1674f9790aebSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
1675f0ea3689SLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc,
1676f0ea3689SLuigi Rizzo 			nmr->nr_arg3, npipes, &error);
1677f0ea3689SLuigi Rizzo 	if (na->nm_mem == NULL)
1678f0ea3689SLuigi Rizzo 		goto err;
1679f9790aebSLuigi Rizzo 	/* other nmd fields are set in the common routine */
1680f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
1681f0ea3689SLuigi Rizzo 	if (error)
1682f0ea3689SLuigi Rizzo 		goto err;
1683f0ea3689SLuigi Rizzo 	return 0;
1684f0ea3689SLuigi Rizzo 
1685f0ea3689SLuigi Rizzo err:
1686f0ea3689SLuigi Rizzo 	if (na->nm_mem != NULL)
1687f0ea3689SLuigi Rizzo 		netmap_mem_private_delete(na->nm_mem);
1688f9790aebSLuigi Rizzo 	free(vpna, M_DEVBUF);
1689f9790aebSLuigi Rizzo 	return error;
1690f9790aebSLuigi Rizzo }
1691f9790aebSLuigi Rizzo 
169217885a7bSLuigi Rizzo 
1693f9790aebSLuigi Rizzo static void
1694f9790aebSLuigi Rizzo netmap_bwrap_dtor(struct netmap_adapter *na)
1695f9790aebSLuigi Rizzo {
1696f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1697f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1698f9790aebSLuigi Rizzo 	struct nm_bridge *b = bna->up.na_bdg,
1699f9790aebSLuigi Rizzo 		*bh = bna->host.na_bdg;
1700f9790aebSLuigi Rizzo 	struct ifnet *ifp = na->ifp;
1701f9790aebSLuigi Rizzo 
1702f9790aebSLuigi Rizzo 	ND("na %p", na);
1703f9790aebSLuigi Rizzo 
1704f9790aebSLuigi Rizzo 	if (b) {
1705f9790aebSLuigi Rizzo 		netmap_bdg_detach_common(b, bna->up.bdg_port,
1706f9790aebSLuigi Rizzo 			(bh ? bna->host.bdg_port : -1));
1707f9790aebSLuigi Rizzo 	}
1708f9790aebSLuigi Rizzo 
1709f9790aebSLuigi Rizzo 	hwna->na_private = NULL;
1710f9790aebSLuigi Rizzo 	netmap_adapter_put(hwna);
1711f9790aebSLuigi Rizzo 
1712f9790aebSLuigi Rizzo 	bzero(ifp, sizeof(*ifp));
1713f9790aebSLuigi Rizzo 	free(ifp, M_DEVBUF);
1714f9790aebSLuigi Rizzo 	na->ifp = NULL;
1715f9790aebSLuigi Rizzo 
1716f9790aebSLuigi Rizzo }
1717f9790aebSLuigi Rizzo 
171817885a7bSLuigi Rizzo 
1719f9790aebSLuigi Rizzo /*
172017885a7bSLuigi Rizzo  * Intr callback for NICs connected to a bridge.
172117885a7bSLuigi Rizzo  * Simply ignore tx interrupts (maybe we could try to recover space ?)
172217885a7bSLuigi Rizzo  * and pass received packets from nic to the bridge.
172317885a7bSLuigi Rizzo  *
1724f9790aebSLuigi Rizzo  * XXX TODO check locking: this is called from the interrupt
1725f9790aebSLuigi Rizzo  * handler so we should make sure that the interface is not
1726f9790aebSLuigi Rizzo  * disconnected while passing down an interrupt.
1727f9790aebSLuigi Rizzo  *
172817885a7bSLuigi Rizzo  * Note, no user process can access this NIC or the host stack.
172917885a7bSLuigi Rizzo  * The only part of the ring that is significant are the slots,
173017885a7bSLuigi Rizzo  * and head/cur/tail are set from the kring as needed
173117885a7bSLuigi Rizzo  * (part as a receive ring, part as a transmit ring).
173217885a7bSLuigi Rizzo  *
173317885a7bSLuigi Rizzo  * callback that overwrites the hwna notify callback.
1734f9790aebSLuigi Rizzo  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1735f9790aebSLuigi Rizzo  * The bridge wrapper then sends the packets through the bridge.
1736f9790aebSLuigi Rizzo  */
1737f9790aebSLuigi Rizzo static int
1738f9790aebSLuigi Rizzo netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1739f9790aebSLuigi Rizzo {
1740f9790aebSLuigi Rizzo 	struct ifnet *ifp = na->ifp;
1741f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
1742f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *hostna = &bna->host;
1743f9790aebSLuigi Rizzo 	struct netmap_kring *kring, *bkring;
1744f9790aebSLuigi Rizzo 	struct netmap_ring *ring;
1745f9790aebSLuigi Rizzo 	int is_host_ring = ring_nr == na->num_rx_rings;
1746f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = &bna->up;
1747f9790aebSLuigi Rizzo 	int error = 0;
1748f9790aebSLuigi Rizzo 
174917885a7bSLuigi Rizzo 	if (netmap_verbose)
175017885a7bSLuigi Rizzo 	    D("%s %s%d 0x%x", NM_IFPNAME(ifp),
175117885a7bSLuigi Rizzo 		(tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
1752f9790aebSLuigi Rizzo 
1753f9790aebSLuigi Rizzo 	if (flags & NAF_DISABLE_NOTIFY) {
1754f9790aebSLuigi Rizzo 		kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1755f9790aebSLuigi Rizzo 		bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
175617885a7bSLuigi Rizzo 		if (kring[ring_nr].nkr_stopped)
175717885a7bSLuigi Rizzo 			netmap_disable_ring(&bkring[ring_nr]);
1758f9790aebSLuigi Rizzo 		else
175917885a7bSLuigi Rizzo 			bkring[ring_nr].nkr_stopped = 0;
1760f9790aebSLuigi Rizzo 		return 0;
1761f9790aebSLuigi Rizzo 	}
1762f9790aebSLuigi Rizzo 
1763f9790aebSLuigi Rizzo 	if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1764f9790aebSLuigi Rizzo 		return 0;
1765f9790aebSLuigi Rizzo 
176617885a7bSLuigi Rizzo 	/* we only care about receive interrupts */
1767f9790aebSLuigi Rizzo 	if (tx == NR_TX)
1768f9790aebSLuigi Rizzo 		return 0;
1769f9790aebSLuigi Rizzo 
1770f9790aebSLuigi Rizzo 	kring = &na->rx_rings[ring_nr];
1771f9790aebSLuigi Rizzo 	ring = kring->ring;
1772f9790aebSLuigi Rizzo 
1773f9790aebSLuigi Rizzo 	/* make sure the ring is not disabled */
1774f9790aebSLuigi Rizzo 	if (nm_kr_tryget(kring))
1775f9790aebSLuigi Rizzo 		return 0;
1776f9790aebSLuigi Rizzo 
1777f9790aebSLuigi Rizzo 	if (is_host_ring && hostna->na_bdg == NULL) {
1778f9790aebSLuigi Rizzo 		error = bna->save_notify(na, ring_nr, tx, flags);
1779f9790aebSLuigi Rizzo 		goto put_out;
1780f9790aebSLuigi Rizzo 	}
1781f9790aebSLuigi Rizzo 
178217885a7bSLuigi Rizzo 	/* Here we expect ring->head = ring->cur = ring->tail
178317885a7bSLuigi Rizzo 	 * because everything has been released from the previous round.
178417885a7bSLuigi Rizzo 	 * However the ring is shared and we might have info from
178517885a7bSLuigi Rizzo 	 * the wrong side (the tx ring). Hence we overwrite with
178617885a7bSLuigi Rizzo 	 * the info from the rx kring.
178717885a7bSLuigi Rizzo 	 */
178817885a7bSLuigi Rizzo 	if (netmap_verbose)
178917885a7bSLuigi Rizzo 	    D("%s head %d cur %d tail %d (kring %d %d %d)",  NM_IFPNAME(ifp),
179017885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
179117885a7bSLuigi Rizzo 		kring->rhead, kring->rcur, kring->rtail);
179217885a7bSLuigi Rizzo 
179317885a7bSLuigi Rizzo 	ring->head = kring->rhead;
179417885a7bSLuigi Rizzo 	ring->cur = kring->rcur;
179517885a7bSLuigi Rizzo 	ring->tail = kring->rtail;
179617885a7bSLuigi Rizzo 
1797f9790aebSLuigi Rizzo 	if (is_host_ring) {
1798f9790aebSLuigi Rizzo 		vpna = hostna;
1799f9790aebSLuigi Rizzo 		ring_nr = 0;
1800f0ea3689SLuigi Rizzo 	}
1801f0ea3689SLuigi Rizzo 	/* simulate a user wakeup on the rx ring */
1802f9790aebSLuigi Rizzo 	/* fetch packets that have arrived.
1803f9790aebSLuigi Rizzo 	 * XXX maybe do this in a loop ?
1804f9790aebSLuigi Rizzo 	 */
1805f0ea3689SLuigi Rizzo 	error = kring->nm_sync(kring, 0);
1806f9790aebSLuigi Rizzo 	if (error)
1807f9790aebSLuigi Rizzo 		goto put_out;
180817885a7bSLuigi Rizzo 	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
1809f9790aebSLuigi Rizzo 		D("how strange, interrupt with no packets on %s",
1810f9790aebSLuigi Rizzo 			NM_IFPNAME(ifp));
1811f9790aebSLuigi Rizzo 		goto put_out;
1812f9790aebSLuigi Rizzo 	}
181317885a7bSLuigi Rizzo 
181417885a7bSLuigi Rizzo 	/* new packets are ring->cur to ring->tail, and the bkring
181517885a7bSLuigi Rizzo 	 * had hwcur == ring->cur. So advance ring->cur to ring->tail
181617885a7bSLuigi Rizzo 	 * to push all packets out.
181717885a7bSLuigi Rizzo 	 */
181817885a7bSLuigi Rizzo 	ring->head = ring->cur = ring->tail;
181917885a7bSLuigi Rizzo 
182017885a7bSLuigi Rizzo 	/* also set tail to what the bwrap expects */
182117885a7bSLuigi Rizzo 	bkring = &vpna->up.tx_rings[ring_nr];
182217885a7bSLuigi Rizzo 	ring->tail = bkring->nr_hwtail; // rtail too ?
182317885a7bSLuigi Rizzo 
182417885a7bSLuigi Rizzo 	/* pass packets to the switch */
182517885a7bSLuigi Rizzo 	nm_txsync_prologue(bkring); // XXX error checking ?
1826f9790aebSLuigi Rizzo 	netmap_vp_txsync(vpna, ring_nr, flags);
1827f9790aebSLuigi Rizzo 
182817885a7bSLuigi Rizzo 	/* mark all buffers as released on this ring */
182917885a7bSLuigi Rizzo 	ring->head = ring->cur = kring->nr_hwtail;
183017885a7bSLuigi Rizzo 	ring->tail = kring->rtail;
183117885a7bSLuigi Rizzo 	/* another call to actually release the buffers */
183217885a7bSLuigi Rizzo 	if (!is_host_ring) {
1833f0ea3689SLuigi Rizzo 		error = kring->nm_sync(kring, 0);
183417885a7bSLuigi Rizzo 	} else {
183517885a7bSLuigi Rizzo 		/* mark all packets as released, as in the
183617885a7bSLuigi Rizzo 		 * second part of netmap_rxsync_from_host()
183717885a7bSLuigi Rizzo 		 */
183817885a7bSLuigi Rizzo 		kring->nr_hwcur = kring->nr_hwtail;
183917885a7bSLuigi Rizzo 		nm_rxsync_finalize(kring);
184017885a7bSLuigi Rizzo 	}
1841f9790aebSLuigi Rizzo 
1842f9790aebSLuigi Rizzo put_out:
1843f9790aebSLuigi Rizzo 	nm_kr_put(kring);
1844f9790aebSLuigi Rizzo 	return error;
1845f9790aebSLuigi Rizzo }
1846f9790aebSLuigi Rizzo 
184717885a7bSLuigi Rizzo 
1848f9790aebSLuigi Rizzo static int
1849f9790aebSLuigi Rizzo netmap_bwrap_register(struct netmap_adapter *na, int onoff)
1850f9790aebSLuigi Rizzo {
1851f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
1852f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
1853f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1854f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *hostna = &bna->host;
1855f9790aebSLuigi Rizzo 	int error;
1856f9790aebSLuigi Rizzo 
185717885a7bSLuigi Rizzo 	ND("%s %s", NM_IFPNAME(na->ifp), onoff ? "on" : "off");
1858f9790aebSLuigi Rizzo 
1859f9790aebSLuigi Rizzo 	if (onoff) {
1860f9790aebSLuigi Rizzo 		int i;
1861f9790aebSLuigi Rizzo 
1862f9790aebSLuigi Rizzo 		hwna->na_lut = na->na_lut;
1863f9790aebSLuigi Rizzo 		hwna->na_lut_objtotal = na->na_lut_objtotal;
1864f9790aebSLuigi Rizzo 
1865f9790aebSLuigi Rizzo 		if (hostna->na_bdg) {
1866f9790aebSLuigi Rizzo 			hostna->up.na_lut = na->na_lut;
1867f9790aebSLuigi Rizzo 			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1868f9790aebSLuigi Rizzo 		}
1869f9790aebSLuigi Rizzo 
18700c7ba37eSLuigi Rizzo 		/* cross-link the netmap rings
18710c7ba37eSLuigi Rizzo 		 * The original number of rings comes from hwna,
18720c7ba37eSLuigi Rizzo 		 * rx rings on one side equals tx rings on the other.
18730c7ba37eSLuigi Rizzo 		 */
1874f0ea3689SLuigi Rizzo 		for (i = 0; i < na->num_rx_rings + 1; i++) {
1875f9790aebSLuigi Rizzo 			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1876f9790aebSLuigi Rizzo 			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1877f9790aebSLuigi Rizzo 		}
1878f0ea3689SLuigi Rizzo 		for (i = 0; i < na->num_tx_rings + 1; i++) {
1879f9790aebSLuigi Rizzo 			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1880f9790aebSLuigi Rizzo 			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1881f9790aebSLuigi Rizzo 		}
1882f9790aebSLuigi Rizzo 	}
1883f9790aebSLuigi Rizzo 
1884f9790aebSLuigi Rizzo 	if (hwna->ifp) {
1885f9790aebSLuigi Rizzo 		error = hwna->nm_register(hwna, onoff);
1886f9790aebSLuigi Rizzo 		if (error)
1887f9790aebSLuigi Rizzo 			return error;
1888f9790aebSLuigi Rizzo 	}
1889f9790aebSLuigi Rizzo 
1890f9790aebSLuigi Rizzo 	bdg_netmap_reg(na, onoff);
1891f9790aebSLuigi Rizzo 
1892f9790aebSLuigi Rizzo 	if (onoff) {
1893f9790aebSLuigi Rizzo 		bna->save_notify = hwna->nm_notify;
1894f9790aebSLuigi Rizzo 		hwna->nm_notify = netmap_bwrap_intr_notify;
1895f9790aebSLuigi Rizzo 	} else {
1896f9790aebSLuigi Rizzo 		hwna->nm_notify = bna->save_notify;
1897f9790aebSLuigi Rizzo 		hwna->na_lut = NULL;
1898f9790aebSLuigi Rizzo 		hwna->na_lut_objtotal = 0;
1899f9790aebSLuigi Rizzo 	}
1900f9790aebSLuigi Rizzo 
1901f9790aebSLuigi Rizzo 	return 0;
1902f9790aebSLuigi Rizzo }
1903f9790aebSLuigi Rizzo 
190417885a7bSLuigi Rizzo 
1905f9790aebSLuigi Rizzo static int
1906f9790aebSLuigi Rizzo netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1907f9790aebSLuigi Rizzo 				    u_int *rxr, u_int *rxd)
1908f9790aebSLuigi Rizzo {
1909f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
1910f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
1911f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1912f9790aebSLuigi Rizzo 
1913f9790aebSLuigi Rizzo 	/* forward the request */
1914f9790aebSLuigi Rizzo 	netmap_update_config(hwna);
1915f9790aebSLuigi Rizzo 	/* swap the results */
1916f9790aebSLuigi Rizzo 	*txr = hwna->num_rx_rings;
1917f9790aebSLuigi Rizzo 	*txd = hwna->num_rx_desc;
1918f9790aebSLuigi Rizzo 	*rxr = hwna->num_tx_rings;
1919f9790aebSLuigi Rizzo 	*rxd = hwna->num_rx_desc;
1920f9790aebSLuigi Rizzo 
1921f9790aebSLuigi Rizzo 	return 0;
1922f9790aebSLuigi Rizzo }
1923f9790aebSLuigi Rizzo 
192417885a7bSLuigi Rizzo 
1925f9790aebSLuigi Rizzo static int
1926f9790aebSLuigi Rizzo netmap_bwrap_krings_create(struct netmap_adapter *na)
1927f9790aebSLuigi Rizzo {
1928f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
1929f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
1930f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1931f9790aebSLuigi Rizzo 	struct netmap_adapter *hostna = &bna->host.up;
1932f9790aebSLuigi Rizzo 	int error;
1933f9790aebSLuigi Rizzo 
1934f9790aebSLuigi Rizzo 	ND("%s", NM_IFPNAME(na->ifp));
1935f9790aebSLuigi Rizzo 
1936f9790aebSLuigi Rizzo 	error = netmap_vp_krings_create(na);
1937f9790aebSLuigi Rizzo 	if (error)
1938f9790aebSLuigi Rizzo 		return error;
1939f9790aebSLuigi Rizzo 
1940f9790aebSLuigi Rizzo 	error = hwna->nm_krings_create(hwna);
1941f9790aebSLuigi Rizzo 	if (error) {
1942f9790aebSLuigi Rizzo 		netmap_vp_krings_delete(na);
1943f9790aebSLuigi Rizzo 		return error;
1944f9790aebSLuigi Rizzo 	}
1945f9790aebSLuigi Rizzo 
1946f0ea3689SLuigi Rizzo 	if (na->na_flags & NAF_HOST_RINGS) {
1947f9790aebSLuigi Rizzo 		hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1948f9790aebSLuigi Rizzo 		hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1949f0ea3689SLuigi Rizzo 	}
1950f9790aebSLuigi Rizzo 
1951f9790aebSLuigi Rizzo 	return 0;
1952f9790aebSLuigi Rizzo }
1953f9790aebSLuigi Rizzo 
195417885a7bSLuigi Rizzo 
1955f9790aebSLuigi Rizzo static void
1956f9790aebSLuigi Rizzo netmap_bwrap_krings_delete(struct netmap_adapter *na)
1957f9790aebSLuigi Rizzo {
1958f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
1959f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
1960f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1961f9790aebSLuigi Rizzo 
1962f9790aebSLuigi Rizzo 	ND("%s", NM_IFPNAME(na->ifp));
1963f9790aebSLuigi Rizzo 
1964f9790aebSLuigi Rizzo 	hwna->nm_krings_delete(hwna);
1965f9790aebSLuigi Rizzo 	netmap_vp_krings_delete(na);
1966f9790aebSLuigi Rizzo }
1967f9790aebSLuigi Rizzo 
196817885a7bSLuigi Rizzo 
1969f9790aebSLuigi Rizzo /* notify method for the bridge-->hwna direction */
1970f9790aebSLuigi Rizzo static int
1971f9790aebSLuigi Rizzo netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1972f9790aebSLuigi Rizzo {
1973f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
1974f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
1975f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1976f9790aebSLuigi Rizzo 	struct netmap_kring *kring, *hw_kring;
1977f9790aebSLuigi Rizzo 	struct netmap_ring *ring;
197817885a7bSLuigi Rizzo 	u_int lim;
1979f9790aebSLuigi Rizzo 	int error = 0;
1980f9790aebSLuigi Rizzo 
1981f9790aebSLuigi Rizzo 	if (tx == NR_TX)
1982f2637526SLuigi Rizzo 	        return EINVAL;
1983f9790aebSLuigi Rizzo 
1984f9790aebSLuigi Rizzo 	kring = &na->rx_rings[ring_n];
1985f9790aebSLuigi Rizzo 	hw_kring = &hwna->tx_rings[ring_n];
1986f9790aebSLuigi Rizzo 	ring = kring->ring;
1987f9790aebSLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
1988f9790aebSLuigi Rizzo 
1989f9790aebSLuigi Rizzo 	if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1990f9790aebSLuigi Rizzo 		return 0;
1991f0ea3689SLuigi Rizzo 	mtx_lock(&kring->q_lock);
199217885a7bSLuigi Rizzo 	/* first step: simulate a user wakeup on the rx ring */
199317885a7bSLuigi Rizzo 	netmap_vp_rxsync(na, ring_n, flags);
199417885a7bSLuigi Rizzo 	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1995f9790aebSLuigi Rizzo 		NM_IFPNAME(na->ifp), ring_n,
199617885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
199717885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
199817885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
199917885a7bSLuigi Rizzo 	/* second step: the simulated user consumes all new packets */
200017885a7bSLuigi Rizzo 	ring->head = ring->cur = ring->tail;
200117885a7bSLuigi Rizzo 
200217885a7bSLuigi Rizzo 	/* third step: the new packets are sent on the tx ring
200317885a7bSLuigi Rizzo 	 * (which is actually the same ring)
200417885a7bSLuigi Rizzo 	 */
200517885a7bSLuigi Rizzo 	/* set tail to what the hw expects */
200617885a7bSLuigi Rizzo 	ring->tail = hw_kring->rtail;
200717885a7bSLuigi Rizzo 	nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
2008f0ea3689SLuigi Rizzo 	error = hw_kring->nm_sync(hw_kring, flags);
200917885a7bSLuigi Rizzo 
201017885a7bSLuigi Rizzo 	/* fourth step: now we are back the rx ring */
201117885a7bSLuigi Rizzo 	/* claim ownership on all hw owned bufs */
201217885a7bSLuigi Rizzo 	ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
201317885a7bSLuigi Rizzo 	ring->tail = kring->rtail; /* restore saved value of tail, for safety */
201417885a7bSLuigi Rizzo 
201517885a7bSLuigi Rizzo 	/* fifth step: the user goes to sleep again, causing another rxsync */
201617885a7bSLuigi Rizzo 	netmap_vp_rxsync(na, ring_n, flags);
201717885a7bSLuigi Rizzo 	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2018f9790aebSLuigi Rizzo 		NM_IFPNAME(na->ifp), ring_n,
201917885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
202017885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
202117885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2022f0ea3689SLuigi Rizzo 	mtx_unlock(&kring->q_lock);
2023f9790aebSLuigi Rizzo 	return error;
2024f9790aebSLuigi Rizzo }
2025f9790aebSLuigi Rizzo 
202617885a7bSLuigi Rizzo 
2027f9790aebSLuigi Rizzo static int
2028f9790aebSLuigi Rizzo netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2029f9790aebSLuigi Rizzo {
2030f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
2031f9790aebSLuigi Rizzo 	struct netmap_adapter *port_na = &bna->up.up;
2032f9790aebSLuigi Rizzo 	if (tx == NR_TX || ring_n != 0)
2033f2637526SLuigi Rizzo 		return EINVAL;
2034f9790aebSLuigi Rizzo 	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
2035f9790aebSLuigi Rizzo }
2036f9790aebSLuigi Rizzo 
203717885a7bSLuigi Rizzo 
2038f9790aebSLuigi Rizzo /* attach a bridge wrapper to the 'real' device */
2039f9790aebSLuigi Rizzo static int
2040f9790aebSLuigi Rizzo netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
2041f9790aebSLuigi Rizzo {
2042f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
2043f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
2044f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = NA(real);
2045f9790aebSLuigi Rizzo 	struct netmap_adapter *hostna;
2046f9790aebSLuigi Rizzo 	int error;
2047f9790aebSLuigi Rizzo 
2048f9790aebSLuigi Rizzo 
2049f9790aebSLuigi Rizzo 	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
2050f9790aebSLuigi Rizzo 	if (bna == NULL)
2051f9790aebSLuigi Rizzo 		return ENOMEM;
2052f9790aebSLuigi Rizzo 
2053f9790aebSLuigi Rizzo 	na = &bna->up.up;
2054f9790aebSLuigi Rizzo 	na->ifp = fake;
2055f9790aebSLuigi Rizzo 	/* fill the ring data for the bwrap adapter with rx/tx meanings
2056f9790aebSLuigi Rizzo 	 * swapped. The real cross-linking will be done during register,
2057f9790aebSLuigi Rizzo 	 * when all the krings will have been created.
2058f9790aebSLuigi Rizzo 	 */
2059f9790aebSLuigi Rizzo 	na->num_rx_rings = hwna->num_tx_rings;
2060f9790aebSLuigi Rizzo 	na->num_tx_rings = hwna->num_rx_rings;
2061f9790aebSLuigi Rizzo 	na->num_tx_desc = hwna->num_rx_desc;
2062f9790aebSLuigi Rizzo 	na->num_rx_desc = hwna->num_tx_desc;
2063f9790aebSLuigi Rizzo 	na->nm_dtor = netmap_bwrap_dtor;
2064f9790aebSLuigi Rizzo 	na->nm_register = netmap_bwrap_register;
2065f9790aebSLuigi Rizzo 	// na->nm_txsync = netmap_bwrap_txsync;
2066f9790aebSLuigi Rizzo 	// na->nm_rxsync = netmap_bwrap_rxsync;
2067f9790aebSLuigi Rizzo 	na->nm_config = netmap_bwrap_config;
2068f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_bwrap_krings_create;
2069f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_bwrap_krings_delete;
2070f9790aebSLuigi Rizzo 	na->nm_notify = netmap_bwrap_notify;
2071f9790aebSLuigi Rizzo 	na->nm_mem = hwna->nm_mem;
2072f9790aebSLuigi Rizzo 	na->na_private = na; /* prevent NIOCREGIF */
2073f9790aebSLuigi Rizzo 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2074f9790aebSLuigi Rizzo 
2075f9790aebSLuigi Rizzo 	bna->hwna = hwna;
2076f9790aebSLuigi Rizzo 	netmap_adapter_get(hwna);
2077f9790aebSLuigi Rizzo 	hwna->na_private = bna; /* weak reference */
2078f9790aebSLuigi Rizzo 
2079f0ea3689SLuigi Rizzo 	if (hwna->na_flags & NAF_HOST_RINGS) {
2080f0ea3689SLuigi Rizzo 		na->na_flags |= NAF_HOST_RINGS;
2081f9790aebSLuigi Rizzo 		hostna = &bna->host.up;
2082f9790aebSLuigi Rizzo 		hostna->ifp = hwna->ifp;
2083f9790aebSLuigi Rizzo 		hostna->num_tx_rings = 1;
2084f9790aebSLuigi Rizzo 		hostna->num_tx_desc = hwna->num_rx_desc;
2085f9790aebSLuigi Rizzo 		hostna->num_rx_rings = 1;
2086f9790aebSLuigi Rizzo 		hostna->num_rx_desc = hwna->num_tx_desc;
2087f9790aebSLuigi Rizzo 		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2088f9790aebSLuigi Rizzo 		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2089f9790aebSLuigi Rizzo 		hostna->nm_notify = netmap_bwrap_host_notify;
2090f9790aebSLuigi Rizzo 		hostna->nm_mem = na->nm_mem;
2091f9790aebSLuigi Rizzo 		hostna->na_private = bna;
2092f0ea3689SLuigi Rizzo 	}
2093f9790aebSLuigi Rizzo 
209417885a7bSLuigi Rizzo 	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
209517885a7bSLuigi Rizzo 		fake->if_xname, real->if_xname,
2096f9790aebSLuigi Rizzo 		na->num_tx_rings, na->num_tx_desc,
2097f9790aebSLuigi Rizzo 		na->num_rx_rings, na->num_rx_desc);
2098f9790aebSLuigi Rizzo 
2099f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
2100f9790aebSLuigi Rizzo 	if (error) {
2101f9790aebSLuigi Rizzo 		netmap_adapter_put(hwna);
2102f9790aebSLuigi Rizzo 		free(bna, M_DEVBUF);
2103f9790aebSLuigi Rizzo 		return error;
2104f9790aebSLuigi Rizzo 	}
2105f9790aebSLuigi Rizzo 	return 0;
2106f9790aebSLuigi Rizzo }
2107f9790aebSLuigi Rizzo 
210817885a7bSLuigi Rizzo 
2109f9790aebSLuigi Rizzo void
2110f9790aebSLuigi Rizzo netmap_init_bridges(void)
2111f9790aebSLuigi Rizzo {
2112f9790aebSLuigi Rizzo 	int i;
2113f9790aebSLuigi Rizzo 	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
2114f9790aebSLuigi Rizzo 	for (i = 0; i < NM_BRIDGES; i++)
2115f9790aebSLuigi Rizzo 		BDG_RWINIT(&nm_bridges[i]);
2116f9790aebSLuigi Rizzo }
2117f9790aebSLuigi Rizzo #endif /* WITH_VALE */
2118