xref: /freebsd-14.2/sys/dev/netmap/netmap_vale.c (revision 4bf50f18)
1f9790aebSLuigi Rizzo /*
217885a7bSLuigi Rizzo  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3f9790aebSLuigi Rizzo  *
4f9790aebSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
5f9790aebSLuigi Rizzo  * modification, are permitted provided that the following conditions
6f9790aebSLuigi Rizzo  * are met:
7f9790aebSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
8f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
9f9790aebSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
10f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
11f9790aebSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
12f9790aebSLuigi Rizzo  *
13f9790aebSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14f9790aebSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15f9790aebSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16f9790aebSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17f9790aebSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18f9790aebSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19f9790aebSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20f9790aebSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21f9790aebSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22f9790aebSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23f9790aebSLuigi Rizzo  * SUCH DAMAGE.
24f9790aebSLuigi Rizzo  */
25f9790aebSLuigi Rizzo 
26f9790aebSLuigi Rizzo 
27f9790aebSLuigi Rizzo /*
28f9790aebSLuigi Rizzo  * This module implements the VALE switch for netmap
29f9790aebSLuigi Rizzo 
30f9790aebSLuigi Rizzo --- VALE SWITCH ---
31f9790aebSLuigi Rizzo 
32f9790aebSLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
33f9790aebSLuigi Rizzo A switch cannot be deleted until all ports are gone.
34f9790aebSLuigi Rizzo 
35f9790aebSLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
36f9790aebSLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
37f9790aebSLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
38f9790aebSLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39f9790aebSLuigi Rizzo The lock is held throughout the entire forwarding cycle,
40f9790aebSLuigi Rizzo during which the thread may incur in a page fault.
41f9790aebSLuigi Rizzo Hence it is important that sleepable shared locks are used.
42f9790aebSLuigi Rizzo 
43f9790aebSLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
44f9790aebSLuigi Rizzo a number of slot in the ring, then the lock is released,
45f9790aebSLuigi Rizzo packets are copied from source to destination, and then
46f9790aebSLuigi Rizzo the lock is acquired again and the receive ring is updated.
47f9790aebSLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
48f9790aebSLuigi Rizzo ports attached to the switch)
49f9790aebSLuigi Rizzo 
50f9790aebSLuigi Rizzo  */
51f9790aebSLuigi Rizzo 
52f9790aebSLuigi Rizzo /*
53f9790aebSLuigi Rizzo  * OS-specific code that is used only within this file.
54f9790aebSLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
55f9790aebSLuigi Rizzo  * is present in netmap_kern.h
56f9790aebSLuigi Rizzo  */
57f9790aebSLuigi Rizzo 
58f9790aebSLuigi Rizzo #if defined(__FreeBSD__)
59f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
60f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$");
61f9790aebSLuigi Rizzo 
62f9790aebSLuigi Rizzo #include <sys/types.h>
63f9790aebSLuigi Rizzo #include <sys/errno.h>
64f9790aebSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
65f9790aebSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
66f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
67f9790aebSLuigi Rizzo #include <sys/sockio.h>
68f9790aebSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
69f9790aebSLuigi Rizzo #include <sys/malloc.h>
70f9790aebSLuigi Rizzo #include <sys/poll.h>
71f9790aebSLuigi Rizzo #include <sys/rwlock.h>
72f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
73f9790aebSLuigi Rizzo #include <sys/selinfo.h>
74f9790aebSLuigi Rizzo #include <sys/sysctl.h>
75f9790aebSLuigi Rizzo #include <net/if.h>
76f9790aebSLuigi Rizzo #include <net/if_var.h>
77f9790aebSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
78f9790aebSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
79f9790aebSLuigi Rizzo #include <sys/endian.h>
80f9790aebSLuigi Rizzo #include <sys/refcount.h>
81f9790aebSLuigi Rizzo 
82f9790aebSLuigi Rizzo 
83f9790aebSLuigi Rizzo #define BDG_RWLOCK_T		struct rwlock // struct rwlock
84f9790aebSLuigi Rizzo 
85f9790aebSLuigi Rizzo #define	BDG_RWINIT(b)		\
86f9790aebSLuigi Rizzo 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87f9790aebSLuigi Rizzo #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88f9790aebSLuigi Rizzo #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89f9790aebSLuigi Rizzo #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90f9790aebSLuigi Rizzo #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91f9790aebSLuigi Rizzo #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92f9790aebSLuigi Rizzo #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93f9790aebSLuigi Rizzo 
94f9790aebSLuigi Rizzo 
95f9790aebSLuigi Rizzo #elif defined(linux)
96f9790aebSLuigi Rizzo 
97f9790aebSLuigi Rizzo #include "bsd_glue.h"
98f9790aebSLuigi Rizzo 
99f9790aebSLuigi Rizzo #elif defined(__APPLE__)
100f9790aebSLuigi Rizzo 
101f9790aebSLuigi Rizzo #warning OSX support is only partial
102f9790aebSLuigi Rizzo #include "osx_glue.h"
103f9790aebSLuigi Rizzo 
104f9790aebSLuigi Rizzo #else
105f9790aebSLuigi Rizzo 
106f9790aebSLuigi Rizzo #error	Unsupported platform
107f9790aebSLuigi Rizzo 
108f9790aebSLuigi Rizzo #endif /* unsupported */
109f9790aebSLuigi Rizzo 
110f9790aebSLuigi Rizzo /*
111f9790aebSLuigi Rizzo  * common headers
112f9790aebSLuigi Rizzo  */
113f9790aebSLuigi Rizzo 
114f9790aebSLuigi Rizzo #include <net/netmap.h>
115f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h>
116f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
117f9790aebSLuigi Rizzo 
118f9790aebSLuigi Rizzo #ifdef WITH_VALE
119f9790aebSLuigi Rizzo 
120f9790aebSLuigi Rizzo /*
121f9790aebSLuigi Rizzo  * system parameters (most of them in netmap_kern.h)
122f9790aebSLuigi Rizzo  * NM_NAME	prefix for switch port names, default "vale"
123f9790aebSLuigi Rizzo  * NM_BDG_MAXPORTS	number of ports
124f9790aebSLuigi Rizzo  * NM_BRIDGES	max number of switches in the system.
125f9790aebSLuigi Rizzo  *	XXX should become a sysctl or tunable
126f9790aebSLuigi Rizzo  *
127f9790aebSLuigi Rizzo  * Switch ports are named valeX:Y where X is the switch name and Y
128f9790aebSLuigi Rizzo  * is the port. If Y matches a physical interface name, the port is
129f9790aebSLuigi Rizzo  * connected to a physical device.
130f9790aebSLuigi Rizzo  *
131f9790aebSLuigi Rizzo  * Unlike physical interfaces, switch ports use their own memory region
132f9790aebSLuigi Rizzo  * for rings and buffers.
133f9790aebSLuigi Rizzo  * The virtual interfaces use per-queue lock instead of core lock.
134f9790aebSLuigi Rizzo  * In the tx loop, we aggregate traffic in batches to make all operations
135f9790aebSLuigi Rizzo  * faster. The batch size is bridge_batch.
136f9790aebSLuigi Rizzo  */
137f9790aebSLuigi Rizzo #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
138f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
139f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
140f9790aebSLuigi Rizzo #define NM_BDG_HASH		1024	/* forwarding table entries */
141f9790aebSLuigi Rizzo #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
142f9790aebSLuigi Rizzo #define NM_MULTISEG		64	/* max size of a chain of bufs */
143f9790aebSLuigi Rizzo /* actual size of the tables */
144f9790aebSLuigi Rizzo #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
145f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */
146f9790aebSLuigi Rizzo #define NM_FT_NULL		NM_BDG_BATCH_MAX
147f9790aebSLuigi Rizzo #define	NM_BRIDGES		8	/* number of bridges */
148f9790aebSLuigi Rizzo 
149f9790aebSLuigi Rizzo 
150f9790aebSLuigi Rizzo /*
151f9790aebSLuigi Rizzo  * bridge_batch is set via sysctl to the max batch size to be
152f9790aebSLuigi Rizzo  * used in the bridge. The actual value may be larger as the
153f9790aebSLuigi Rizzo  * last packet in the block may overflow the size.
154f9790aebSLuigi Rizzo  */
155f9790aebSLuigi Rizzo int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap);
157f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
158f9790aebSLuigi Rizzo 
159f9790aebSLuigi Rizzo 
160*4bf50f18SLuigi Rizzo static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
161*4bf50f18SLuigi Rizzo static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
162f9790aebSLuigi Rizzo static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
163f9790aebSLuigi Rizzo 
164f9790aebSLuigi Rizzo /*
165f9790aebSLuigi Rizzo  * For each output interface, nm_bdg_q is used to construct a list.
166f9790aebSLuigi Rizzo  * bq_len is the number of output buffers (we can have coalescing
167f9790aebSLuigi Rizzo  * during the copy).
168f9790aebSLuigi Rizzo  */
169f9790aebSLuigi Rizzo struct nm_bdg_q {
170f9790aebSLuigi Rizzo 	uint16_t bq_head;
171f9790aebSLuigi Rizzo 	uint16_t bq_tail;
172f9790aebSLuigi Rizzo 	uint32_t bq_len;	/* number of buffers */
173f9790aebSLuigi Rizzo };
174f9790aebSLuigi Rizzo 
175f9790aebSLuigi Rizzo /* XXX revise this */
176f9790aebSLuigi Rizzo struct nm_hash_ent {
177f9790aebSLuigi Rizzo 	uint64_t	mac;	/* the top 2 bytes are the epoch */
178f9790aebSLuigi Rizzo 	uint64_t	ports;
179f9790aebSLuigi Rizzo };
180f9790aebSLuigi Rizzo 
181f9790aebSLuigi Rizzo /*
182f9790aebSLuigi Rizzo  * nm_bridge is a descriptor for a VALE switch.
183f9790aebSLuigi Rizzo  * Interfaces for a bridge are all in bdg_ports[].
184f9790aebSLuigi Rizzo  * The array has fixed size, an empty entry does not terminate
185f9790aebSLuigi Rizzo  * the search, but lookups only occur on attach/detach so we
186f9790aebSLuigi Rizzo  * don't mind if they are slow.
187f9790aebSLuigi Rizzo  *
188f9790aebSLuigi Rizzo  * The bridge is non blocking on the transmit ports: excess
189f9790aebSLuigi Rizzo  * packets are dropped if there is no room on the output port.
190f9790aebSLuigi Rizzo  *
191f9790aebSLuigi Rizzo  * bdg_lock protects accesses to the bdg_ports array.
192f9790aebSLuigi Rizzo  * This is a rw lock (or equivalent).
193f9790aebSLuigi Rizzo  */
194f9790aebSLuigi Rizzo struct nm_bridge {
195f9790aebSLuigi Rizzo 	/* XXX what is the proper alignment/layout ? */
196f9790aebSLuigi Rizzo 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
197f9790aebSLuigi Rizzo 	int		bdg_namelen;
198f9790aebSLuigi Rizzo 	uint32_t	bdg_active_ports; /* 0 means free */
199f9790aebSLuigi Rizzo 	char		bdg_basename[IFNAMSIZ];
200f9790aebSLuigi Rizzo 
201f9790aebSLuigi Rizzo 	/* Indexes of active ports (up to active_ports)
202f9790aebSLuigi Rizzo 	 * and all other remaining ports.
203f9790aebSLuigi Rizzo 	 */
204f9790aebSLuigi Rizzo 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
205f9790aebSLuigi Rizzo 
206f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
207f9790aebSLuigi Rizzo 
208f9790aebSLuigi Rizzo 
209f9790aebSLuigi Rizzo 	/*
210f9790aebSLuigi Rizzo 	 * The function to decide the destination port.
211f9790aebSLuigi Rizzo 	 * It returns either of an index of the destination port,
212f9790aebSLuigi Rizzo 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
213f9790aebSLuigi Rizzo 	 * forward this packet.  ring_nr is the source ring index, and the
214f9790aebSLuigi Rizzo 	 * function may overwrite this value to forward this packet to a
215f9790aebSLuigi Rizzo 	 * different ring index.
216f9790aebSLuigi Rizzo 	 * This function must be set by netmap_bdgctl().
217f9790aebSLuigi Rizzo 	 */
218*4bf50f18SLuigi Rizzo 	struct netmap_bdg_ops bdg_ops;
219f9790aebSLuigi Rizzo 
220f9790aebSLuigi Rizzo 	/* the forwarding table, MAC+ports.
221f9790aebSLuigi Rizzo 	 * XXX should be changed to an argument to be passed to
222f9790aebSLuigi Rizzo 	 * the lookup function, and allocated on attach
223f9790aebSLuigi Rizzo 	 */
224f9790aebSLuigi Rizzo 	struct nm_hash_ent ht[NM_BDG_HASH];
225f9790aebSLuigi Rizzo };
226f9790aebSLuigi Rizzo 
227*4bf50f18SLuigi Rizzo const char*
228*4bf50f18SLuigi Rizzo netmap_bdg_name(struct netmap_vp_adapter *vp)
229*4bf50f18SLuigi Rizzo {
230*4bf50f18SLuigi Rizzo 	struct nm_bridge *b = vp->na_bdg;
231*4bf50f18SLuigi Rizzo 	if (b == NULL)
232*4bf50f18SLuigi Rizzo 		return NULL;
233*4bf50f18SLuigi Rizzo 	return b->bdg_basename;
234*4bf50f18SLuigi Rizzo }
235*4bf50f18SLuigi Rizzo 
236f9790aebSLuigi Rizzo 
237f9790aebSLuigi Rizzo /*
238f9790aebSLuigi Rizzo  * XXX in principle nm_bridges could be created dynamically
239f9790aebSLuigi Rizzo  * Right now we have a static array and deletions are protected
240f9790aebSLuigi Rizzo  * by an exclusive lock.
241f9790aebSLuigi Rizzo  */
242f9790aebSLuigi Rizzo struct nm_bridge nm_bridges[NM_BRIDGES];
243f9790aebSLuigi Rizzo 
244f9790aebSLuigi Rizzo 
245f9790aebSLuigi Rizzo /*
246f9790aebSLuigi Rizzo  * this is a slightly optimized copy routine which rounds
247f9790aebSLuigi Rizzo  * to multiple of 64 bytes and is often faster than dealing
248f9790aebSLuigi Rizzo  * with other odd sizes. We assume there is enough room
249f9790aebSLuigi Rizzo  * in the source and destination buffers.
250f9790aebSLuigi Rizzo  *
251f9790aebSLuigi Rizzo  * XXX only for multiples of 64 bytes, non overlapped.
252f9790aebSLuigi Rizzo  */
253f9790aebSLuigi Rizzo static inline void
254f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l)
255f9790aebSLuigi Rizzo {
256f9790aebSLuigi Rizzo         uint64_t *src = _src;
257f9790aebSLuigi Rizzo         uint64_t *dst = _dst;
258f9790aebSLuigi Rizzo         if (unlikely(l >= 1024)) {
259f9790aebSLuigi Rizzo                 memcpy(dst, src, l);
260f9790aebSLuigi Rizzo                 return;
261f9790aebSLuigi Rizzo         }
262f9790aebSLuigi Rizzo         for (; likely(l > 0); l-=64) {
263f9790aebSLuigi Rizzo                 *dst++ = *src++;
264f9790aebSLuigi Rizzo                 *dst++ = *src++;
265f9790aebSLuigi Rizzo                 *dst++ = *src++;
266f9790aebSLuigi Rizzo                 *dst++ = *src++;
267f9790aebSLuigi Rizzo                 *dst++ = *src++;
268f9790aebSLuigi Rizzo                 *dst++ = *src++;
269f9790aebSLuigi Rizzo                 *dst++ = *src++;
270f9790aebSLuigi Rizzo                 *dst++ = *src++;
271f9790aebSLuigi Rizzo         }
272f9790aebSLuigi Rizzo }
273f9790aebSLuigi Rizzo 
274f9790aebSLuigi Rizzo 
275f9790aebSLuigi Rizzo /*
276f9790aebSLuigi Rizzo  * locate a bridge among the existing ones.
277f9790aebSLuigi Rizzo  * MUST BE CALLED WITH NMG_LOCK()
278f9790aebSLuigi Rizzo  *
279f9790aebSLuigi Rizzo  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
280f9790aebSLuigi Rizzo  * We assume that this is called with a name of at least NM_NAME chars.
281f9790aebSLuigi Rizzo  */
282f9790aebSLuigi Rizzo static struct nm_bridge *
283f9790aebSLuigi Rizzo nm_find_bridge(const char *name, int create)
284f9790aebSLuigi Rizzo {
285f9790aebSLuigi Rizzo 	int i, l, namelen;
286f9790aebSLuigi Rizzo 	struct nm_bridge *b = NULL;
287f9790aebSLuigi Rizzo 
288f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
289f9790aebSLuigi Rizzo 
290f9790aebSLuigi Rizzo 	namelen = strlen(NM_NAME);	/* base length */
291f9790aebSLuigi Rizzo 	l = name ? strlen(name) : 0;		/* actual length */
292f9790aebSLuigi Rizzo 	if (l < namelen) {
293f9790aebSLuigi Rizzo 		D("invalid bridge name %s", name ? name : NULL);
294f9790aebSLuigi Rizzo 		return NULL;
295f9790aebSLuigi Rizzo 	}
296f9790aebSLuigi Rizzo 	for (i = namelen + 1; i < l; i++) {
297f9790aebSLuigi Rizzo 		if (name[i] == ':') {
298f9790aebSLuigi Rizzo 			namelen = i;
299f9790aebSLuigi Rizzo 			break;
300f9790aebSLuigi Rizzo 		}
301f9790aebSLuigi Rizzo 	}
302f9790aebSLuigi Rizzo 	if (namelen >= IFNAMSIZ)
303f9790aebSLuigi Rizzo 		namelen = IFNAMSIZ;
304f9790aebSLuigi Rizzo 	ND("--- prefix is '%.*s' ---", namelen, name);
305f9790aebSLuigi Rizzo 
306f9790aebSLuigi Rizzo 	/* lookup the name, remember empty slot if there is one */
307f9790aebSLuigi Rizzo 	for (i = 0; i < NM_BRIDGES; i++) {
308f9790aebSLuigi Rizzo 		struct nm_bridge *x = nm_bridges + i;
309f9790aebSLuigi Rizzo 
310f9790aebSLuigi Rizzo 		if (x->bdg_active_ports == 0) {
311f9790aebSLuigi Rizzo 			if (create && b == NULL)
312f9790aebSLuigi Rizzo 				b = x;	/* record empty slot */
313f9790aebSLuigi Rizzo 		} else if (x->bdg_namelen != namelen) {
314f9790aebSLuigi Rizzo 			continue;
315f9790aebSLuigi Rizzo 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
316f9790aebSLuigi Rizzo 			ND("found '%.*s' at %d", namelen, name, i);
317f9790aebSLuigi Rizzo 			b = x;
318f9790aebSLuigi Rizzo 			break;
319f9790aebSLuigi Rizzo 		}
320f9790aebSLuigi Rizzo 	}
321f9790aebSLuigi Rizzo 	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
322f9790aebSLuigi Rizzo 		/* initialize the bridge */
323f9790aebSLuigi Rizzo 		strncpy(b->bdg_basename, name, namelen);
324f9790aebSLuigi Rizzo 		ND("create new bridge %s with ports %d", b->bdg_basename,
325f9790aebSLuigi Rizzo 			b->bdg_active_ports);
326f9790aebSLuigi Rizzo 		b->bdg_namelen = namelen;
327f9790aebSLuigi Rizzo 		b->bdg_active_ports = 0;
328f9790aebSLuigi Rizzo 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
329f9790aebSLuigi Rizzo 			b->bdg_port_index[i] = i;
330f9790aebSLuigi Rizzo 		/* set the default function */
331*4bf50f18SLuigi Rizzo 		b->bdg_ops.lookup = netmap_bdg_learning;
332f9790aebSLuigi Rizzo 		/* reset the MAC address table */
333f9790aebSLuigi Rizzo 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
334f9790aebSLuigi Rizzo 	}
335f9790aebSLuigi Rizzo 	return b;
336f9790aebSLuigi Rizzo }
337f9790aebSLuigi Rizzo 
338f9790aebSLuigi Rizzo 
339f9790aebSLuigi Rizzo /*
340f9790aebSLuigi Rizzo  * Free the forwarding tables for rings attached to switch ports.
341f9790aebSLuigi Rizzo  */
342f9790aebSLuigi Rizzo static void
343f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na)
344f9790aebSLuigi Rizzo {
345f9790aebSLuigi Rizzo 	int nrings, i;
346f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
347f9790aebSLuigi Rizzo 
348f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
34917885a7bSLuigi Rizzo 	nrings = na->num_tx_rings;
35017885a7bSLuigi Rizzo 	kring = na->tx_rings;
351f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
352f9790aebSLuigi Rizzo 		if (kring[i].nkr_ft) {
353f9790aebSLuigi Rizzo 			free(kring[i].nkr_ft, M_DEVBUF);
354f9790aebSLuigi Rizzo 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
355f9790aebSLuigi Rizzo 		}
356f9790aebSLuigi Rizzo 	}
357f9790aebSLuigi Rizzo }
358f9790aebSLuigi Rizzo 
359f9790aebSLuigi Rizzo 
360f9790aebSLuigi Rizzo /*
361f9790aebSLuigi Rizzo  * Allocate the forwarding tables for the rings attached to the bridge ports.
362f9790aebSLuigi Rizzo  */
363f9790aebSLuigi Rizzo static int
364f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na)
365f9790aebSLuigi Rizzo {
366f9790aebSLuigi Rizzo 	int nrings, l, i, num_dstq;
367f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
368f9790aebSLuigi Rizzo 
369f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
370f9790aebSLuigi Rizzo 	/* all port:rings + broadcast */
371f9790aebSLuigi Rizzo 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
372f9790aebSLuigi Rizzo 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
373f9790aebSLuigi Rizzo 	l += sizeof(struct nm_bdg_q) * num_dstq;
374f9790aebSLuigi Rizzo 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
375f9790aebSLuigi Rizzo 
376f0ea3689SLuigi Rizzo 	nrings = netmap_real_tx_rings(na);
377f9790aebSLuigi Rizzo 	kring = na->tx_rings;
378f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
379f9790aebSLuigi Rizzo 		struct nm_bdg_fwd *ft;
380f9790aebSLuigi Rizzo 		struct nm_bdg_q *dstq;
381f9790aebSLuigi Rizzo 		int j;
382f9790aebSLuigi Rizzo 
383f9790aebSLuigi Rizzo 		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
384f9790aebSLuigi Rizzo 		if (!ft) {
385f9790aebSLuigi Rizzo 			nm_free_bdgfwd(na);
386f9790aebSLuigi Rizzo 			return ENOMEM;
387f9790aebSLuigi Rizzo 		}
388f9790aebSLuigi Rizzo 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
389f9790aebSLuigi Rizzo 		for (j = 0; j < num_dstq; j++) {
390f9790aebSLuigi Rizzo 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
391f9790aebSLuigi Rizzo 			dstq[j].bq_len = 0;
392f9790aebSLuigi Rizzo 		}
393f9790aebSLuigi Rizzo 		kring[i].nkr_ft = ft;
394f9790aebSLuigi Rizzo 	}
395f9790aebSLuigi Rizzo 	return 0;
396f9790aebSLuigi Rizzo }
397f9790aebSLuigi Rizzo 
398f9790aebSLuigi Rizzo 
399*4bf50f18SLuigi Rizzo /* remove from bridge b the ports in slots hw and sw
400*4bf50f18SLuigi Rizzo  * (sw can be -1 if not needed)
401*4bf50f18SLuigi Rizzo  */
402f9790aebSLuigi Rizzo static void
403f9790aebSLuigi Rizzo netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
404f9790aebSLuigi Rizzo {
405f9790aebSLuigi Rizzo 	int s_hw = hw, s_sw = sw;
406f9790aebSLuigi Rizzo 	int i, lim =b->bdg_active_ports;
407f9790aebSLuigi Rizzo 	uint8_t tmp[NM_BDG_MAXPORTS];
408f9790aebSLuigi Rizzo 
409f9790aebSLuigi Rizzo 	/*
410f9790aebSLuigi Rizzo 	New algorithm:
411f9790aebSLuigi Rizzo 	make a copy of bdg_port_index;
412f9790aebSLuigi Rizzo 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
413f9790aebSLuigi Rizzo 	in the array of bdg_port_index, replacing them with
414f9790aebSLuigi Rizzo 	entries from the bottom of the array;
415f9790aebSLuigi Rizzo 	decrement bdg_active_ports;
416f9790aebSLuigi Rizzo 	acquire BDG_WLOCK() and copy back the array.
417f9790aebSLuigi Rizzo 	 */
418f9790aebSLuigi Rizzo 
419f0ea3689SLuigi Rizzo 	if (netmap_verbose)
420f9790aebSLuigi Rizzo 		D("detach %d and %d (lim %d)", hw, sw, lim);
421f9790aebSLuigi Rizzo 	/* make a copy of the list of active ports, update it,
422f9790aebSLuigi Rizzo 	 * and then copy back within BDG_WLOCK().
423f9790aebSLuigi Rizzo 	 */
424f9790aebSLuigi Rizzo 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
425f9790aebSLuigi Rizzo 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
426f9790aebSLuigi Rizzo 		if (hw >= 0 && tmp[i] == hw) {
427f9790aebSLuigi Rizzo 			ND("detach hw %d at %d", hw, i);
428f9790aebSLuigi Rizzo 			lim--; /* point to last active port */
429f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim]; /* swap with i */
430f9790aebSLuigi Rizzo 			tmp[lim] = hw;	/* now this is inactive */
431f9790aebSLuigi Rizzo 			hw = -1;
432f9790aebSLuigi Rizzo 		} else if (sw >= 0 && tmp[i] == sw) {
433f9790aebSLuigi Rizzo 			ND("detach sw %d at %d", sw, i);
434f9790aebSLuigi Rizzo 			lim--;
435f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim];
436f9790aebSLuigi Rizzo 			tmp[lim] = sw;
437f9790aebSLuigi Rizzo 			sw = -1;
438f9790aebSLuigi Rizzo 		} else {
439f9790aebSLuigi Rizzo 			i++;
440f9790aebSLuigi Rizzo 		}
441f9790aebSLuigi Rizzo 	}
442f9790aebSLuigi Rizzo 	if (hw >= 0 || sw >= 0) {
443f9790aebSLuigi Rizzo 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
444f9790aebSLuigi Rizzo 	}
445f9790aebSLuigi Rizzo 
446f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
447*4bf50f18SLuigi Rizzo 	if (b->bdg_ops.dtor)
448*4bf50f18SLuigi Rizzo 		b->bdg_ops.dtor(b->bdg_ports[s_hw]);
449f9790aebSLuigi Rizzo 	b->bdg_ports[s_hw] = NULL;
450f9790aebSLuigi Rizzo 	if (s_sw >= 0) {
451f9790aebSLuigi Rizzo 		b->bdg_ports[s_sw] = NULL;
452f9790aebSLuigi Rizzo 	}
453f9790aebSLuigi Rizzo 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
454f9790aebSLuigi Rizzo 	b->bdg_active_ports = lim;
455f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
456f9790aebSLuigi Rizzo 
457f9790aebSLuigi Rizzo 	ND("now %d active ports", lim);
458f9790aebSLuigi Rizzo 	if (lim == 0) {
459f9790aebSLuigi Rizzo 		ND("marking bridge %s as free", b->bdg_basename);
460*4bf50f18SLuigi Rizzo 		bzero(&b->bdg_ops, sizeof(b->bdg_ops));
461f9790aebSLuigi Rizzo 	}
462f9790aebSLuigi Rizzo }
463f9790aebSLuigi Rizzo 
464*4bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for VALE ports */
465*4bf50f18SLuigi Rizzo static int
466*4bf50f18SLuigi Rizzo netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
467f9790aebSLuigi Rizzo {
468f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
469f9790aebSLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
470f9790aebSLuigi Rizzo 
471*4bf50f18SLuigi Rizzo 	if (attach)
472*4bf50f18SLuigi Rizzo 		return 0; /* nothing to do */
473*4bf50f18SLuigi Rizzo 	if (b) {
474*4bf50f18SLuigi Rizzo 		netmap_set_all_rings(na, 0 /* disable */);
475*4bf50f18SLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
476*4bf50f18SLuigi Rizzo 		vpna->na_bdg = NULL;
477*4bf50f18SLuigi Rizzo 		netmap_set_all_rings(na, 1 /* enable */);
478*4bf50f18SLuigi Rizzo 	}
479*4bf50f18SLuigi Rizzo 	/* I have took reference just for attach */
480*4bf50f18SLuigi Rizzo 	netmap_adapter_put(na);
481*4bf50f18SLuigi Rizzo 	return 0;
482*4bf50f18SLuigi Rizzo }
483*4bf50f18SLuigi Rizzo 
484*4bf50f18SLuigi Rizzo /* nm_dtor callback for ephemeral VALE ports */
485*4bf50f18SLuigi Rizzo static void
486*4bf50f18SLuigi Rizzo netmap_vp_dtor(struct netmap_adapter *na)
487*4bf50f18SLuigi Rizzo {
488*4bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
489*4bf50f18SLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
490*4bf50f18SLuigi Rizzo 
491*4bf50f18SLuigi Rizzo 	ND("%s has %d references", na->name, na->na_refcount);
492f9790aebSLuigi Rizzo 
493f9790aebSLuigi Rizzo 	if (b) {
494f9790aebSLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
495f9790aebSLuigi Rizzo 	}
496f9790aebSLuigi Rizzo }
497f9790aebSLuigi Rizzo 
498*4bf50f18SLuigi Rizzo /* nm_dtor callback for persistent VALE ports */
499*4bf50f18SLuigi Rizzo static void
500*4bf50f18SLuigi Rizzo netmap_persist_vp_dtor(struct netmap_adapter *na)
501*4bf50f18SLuigi Rizzo {
502*4bf50f18SLuigi Rizzo 	struct ifnet *ifp = na->ifp;
503*4bf50f18SLuigi Rizzo 
504*4bf50f18SLuigi Rizzo 	netmap_vp_dtor(na);
505*4bf50f18SLuigi Rizzo 	na->ifp = NULL;
506*4bf50f18SLuigi Rizzo 	nm_vi_detach(ifp);
507*4bf50f18SLuigi Rizzo }
508*4bf50f18SLuigi Rizzo 
509*4bf50f18SLuigi Rizzo /* remove a persistent VALE port from the system */
510*4bf50f18SLuigi Rizzo static int
511*4bf50f18SLuigi Rizzo nm_vi_destroy(const char *name)
512*4bf50f18SLuigi Rizzo {
513*4bf50f18SLuigi Rizzo 	struct ifnet *ifp;
514*4bf50f18SLuigi Rizzo 	int error;
515*4bf50f18SLuigi Rizzo 
516*4bf50f18SLuigi Rizzo 	ifp = ifunit_ref(name);
517*4bf50f18SLuigi Rizzo 	if (!ifp)
518*4bf50f18SLuigi Rizzo 		return ENXIO;
519*4bf50f18SLuigi Rizzo 	NMG_LOCK();
520*4bf50f18SLuigi Rizzo 	/* make sure this is actually a VALE port */
521*4bf50f18SLuigi Rizzo 	if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
522*4bf50f18SLuigi Rizzo 		error = EINVAL;
523*4bf50f18SLuigi Rizzo 		goto err;
524*4bf50f18SLuigi Rizzo 	}
525*4bf50f18SLuigi Rizzo 
526*4bf50f18SLuigi Rizzo 	if (NA(ifp)->na_refcount > 1) {
527*4bf50f18SLuigi Rizzo 		error = EBUSY;
528*4bf50f18SLuigi Rizzo 		goto err;
529*4bf50f18SLuigi Rizzo 	}
530*4bf50f18SLuigi Rizzo 	NMG_UNLOCK();
531*4bf50f18SLuigi Rizzo 
532*4bf50f18SLuigi Rizzo 	D("destroying a persistent vale interface %s", ifp->if_xname);
533*4bf50f18SLuigi Rizzo 	/* Linux requires all the references are released
534*4bf50f18SLuigi Rizzo 	 * before unregister
535*4bf50f18SLuigi Rizzo 	 */
536*4bf50f18SLuigi Rizzo 	if_rele(ifp);
537*4bf50f18SLuigi Rizzo 	netmap_detach(ifp);
538*4bf50f18SLuigi Rizzo 	return 0;
539*4bf50f18SLuigi Rizzo 
540*4bf50f18SLuigi Rizzo err:
541*4bf50f18SLuigi Rizzo 	NMG_UNLOCK();
542*4bf50f18SLuigi Rizzo 	if_rele(ifp);
543*4bf50f18SLuigi Rizzo 	return error;
544*4bf50f18SLuigi Rizzo }
545*4bf50f18SLuigi Rizzo 
546*4bf50f18SLuigi Rizzo /*
547*4bf50f18SLuigi Rizzo  * Create a virtual interface registered to the system.
548*4bf50f18SLuigi Rizzo  * The interface will be attached to a bridge later.
549*4bf50f18SLuigi Rizzo  */
550*4bf50f18SLuigi Rizzo static int
551*4bf50f18SLuigi Rizzo nm_vi_create(struct nmreq *nmr)
552*4bf50f18SLuigi Rizzo {
553*4bf50f18SLuigi Rizzo 	struct ifnet *ifp;
554*4bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna;
555*4bf50f18SLuigi Rizzo 	int error;
556*4bf50f18SLuigi Rizzo 
557*4bf50f18SLuigi Rizzo 	/* don't include VALE prefix */
558*4bf50f18SLuigi Rizzo 	if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME)))
559*4bf50f18SLuigi Rizzo 		return EINVAL;
560*4bf50f18SLuigi Rizzo 	ifp = ifunit_ref(nmr->nr_name);
561*4bf50f18SLuigi Rizzo 	if (ifp) { /* already exist, cannot create new one */
562*4bf50f18SLuigi Rizzo 		if_rele(ifp);
563*4bf50f18SLuigi Rizzo 		return EEXIST;
564*4bf50f18SLuigi Rizzo 	}
565*4bf50f18SLuigi Rizzo 	error = nm_vi_persist(nmr->nr_name, &ifp);
566*4bf50f18SLuigi Rizzo 	if (error)
567*4bf50f18SLuigi Rizzo 		return error;
568*4bf50f18SLuigi Rizzo 
569*4bf50f18SLuigi Rizzo 	NMG_LOCK();
570*4bf50f18SLuigi Rizzo 	/* netmap_vp_create creates a struct netmap_vp_adapter */
571*4bf50f18SLuigi Rizzo 	error = netmap_vp_create(nmr, ifp, &vpna);
572*4bf50f18SLuigi Rizzo 	if (error) {
573*4bf50f18SLuigi Rizzo 		D("error %d", error);
574*4bf50f18SLuigi Rizzo 		nm_vi_detach(ifp);
575*4bf50f18SLuigi Rizzo 		return error;
576*4bf50f18SLuigi Rizzo 	}
577*4bf50f18SLuigi Rizzo 	/* persist-specific routines */
578*4bf50f18SLuigi Rizzo 	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
579*4bf50f18SLuigi Rizzo 	vpna->up.nm_dtor = netmap_persist_vp_dtor;
580*4bf50f18SLuigi Rizzo 	netmap_adapter_get(&vpna->up);
581*4bf50f18SLuigi Rizzo 	NMG_UNLOCK();
582*4bf50f18SLuigi Rizzo 	D("created %s", ifp->if_xname);
583*4bf50f18SLuigi Rizzo 	return 0;
584*4bf50f18SLuigi Rizzo }
58517885a7bSLuigi Rizzo 
58617885a7bSLuigi Rizzo /* Try to get a reference to a netmap adapter attached to a VALE switch.
58717885a7bSLuigi Rizzo  * If the adapter is found (or is created), this function returns 0, a
58817885a7bSLuigi Rizzo  * non NULL pointer is returned into *na, and the caller holds a
58917885a7bSLuigi Rizzo  * reference to the adapter.
59017885a7bSLuigi Rizzo  * If an adapter is not found, then no reference is grabbed and the
59117885a7bSLuigi Rizzo  * function returns an error code, or 0 if there is just a VALE prefix
59217885a7bSLuigi Rizzo  * mismatch. Therefore the caller holds a reference when
59317885a7bSLuigi Rizzo  * (*na != NULL && return == 0).
59417885a7bSLuigi Rizzo  */
595f9790aebSLuigi Rizzo int
596f9790aebSLuigi Rizzo netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
597f9790aebSLuigi Rizzo {
598*4bf50f18SLuigi Rizzo 	char *nr_name = nmr->nr_name;
599*4bf50f18SLuigi Rizzo 	const char *ifname;
600f9790aebSLuigi Rizzo 	struct ifnet *ifp;
601f9790aebSLuigi Rizzo 	int error = 0;
602*4bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna, *hostna = NULL;
603f9790aebSLuigi Rizzo 	struct nm_bridge *b;
604f9790aebSLuigi Rizzo 	int i, j, cand = -1, cand2 = -1;
605f9790aebSLuigi Rizzo 	int needed;
606f9790aebSLuigi Rizzo 
607f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
608f9790aebSLuigi Rizzo 
609f9790aebSLuigi Rizzo 	/* first try to see if this is a bridge port. */
610f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
611*4bf50f18SLuigi Rizzo 	if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) {
612f9790aebSLuigi Rizzo 		return 0;  /* no error, but no VALE prefix */
613f9790aebSLuigi Rizzo 	}
614f9790aebSLuigi Rizzo 
615*4bf50f18SLuigi Rizzo 	b = nm_find_bridge(nr_name, create);
616f9790aebSLuigi Rizzo 	if (b == NULL) {
617*4bf50f18SLuigi Rizzo 		D("no bridges available for '%s'", nr_name);
618f2637526SLuigi Rizzo 		return (create ? ENOMEM : ENXIO);
619f9790aebSLuigi Rizzo 	}
620*4bf50f18SLuigi Rizzo 	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
621*4bf50f18SLuigi Rizzo 		panic("x");
622f9790aebSLuigi Rizzo 
623f9790aebSLuigi Rizzo 	/* Now we are sure that name starts with the bridge's name,
624f9790aebSLuigi Rizzo 	 * lookup the port in the bridge. We need to scan the entire
625f9790aebSLuigi Rizzo 	 * list. It is not important to hold a WLOCK on the bridge
626f9790aebSLuigi Rizzo 	 * during the search because NMG_LOCK already guarantees
627f9790aebSLuigi Rizzo 	 * that there are no other possible writers.
628f9790aebSLuigi Rizzo 	 */
629f9790aebSLuigi Rizzo 
630f9790aebSLuigi Rizzo 	/* lookup in the local list of ports */
631f9790aebSLuigi Rizzo 	for (j = 0; j < b->bdg_active_ports; j++) {
632f9790aebSLuigi Rizzo 		i = b->bdg_port_index[j];
633f9790aebSLuigi Rizzo 		vpna = b->bdg_ports[i];
634f9790aebSLuigi Rizzo 		// KASSERT(na != NULL);
635*4bf50f18SLuigi Rizzo 		D("checking %s", vpna->up.name);
636*4bf50f18SLuigi Rizzo 		if (!strcmp(vpna->up.name, nr_name)) {
637f9790aebSLuigi Rizzo 			netmap_adapter_get(&vpna->up);
638*4bf50f18SLuigi Rizzo 			ND("found existing if %s refs %d", nr_name)
639*4bf50f18SLuigi Rizzo 			*na = &vpna->up;
640f9790aebSLuigi Rizzo 			return 0;
641f9790aebSLuigi Rizzo 		}
642f9790aebSLuigi Rizzo 	}
643f9790aebSLuigi Rizzo 	/* not found, should we create it? */
644f9790aebSLuigi Rizzo 	if (!create)
645f9790aebSLuigi Rizzo 		return ENXIO;
646f9790aebSLuigi Rizzo 	/* yes we should, see if we have space to attach entries */
647f9790aebSLuigi Rizzo 	needed = 2; /* in some cases we only need 1 */
648f9790aebSLuigi Rizzo 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
649f9790aebSLuigi Rizzo 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
650f2637526SLuigi Rizzo 		return ENOMEM;
651f9790aebSLuigi Rizzo 	}
652f9790aebSLuigi Rizzo 	/* record the next two ports available, but do not allocate yet */
653f9790aebSLuigi Rizzo 	cand = b->bdg_port_index[b->bdg_active_ports];
654f9790aebSLuigi Rizzo 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
655f9790aebSLuigi Rizzo 	ND("+++ bridge %s port %s used %d avail %d %d",
656*4bf50f18SLuigi Rizzo 		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
657f9790aebSLuigi Rizzo 
658f9790aebSLuigi Rizzo 	/*
659f9790aebSLuigi Rizzo 	 * try see if there is a matching NIC with this name
660f9790aebSLuigi Rizzo 	 * (after the bridge's name)
661f9790aebSLuigi Rizzo 	 */
662*4bf50f18SLuigi Rizzo 	ifname = nr_name + b->bdg_namelen + 1;
663*4bf50f18SLuigi Rizzo 	ifp = ifunit_ref(ifname);
664*4bf50f18SLuigi Rizzo 	if (!ifp) {
665*4bf50f18SLuigi Rizzo 		/* Create an ephemeral virtual port
666*4bf50f18SLuigi Rizzo 		 * This block contains all the ephemeral-specific logics
667*4bf50f18SLuigi Rizzo 		 */
668f9790aebSLuigi Rizzo 		if (nmr->nr_cmd) {
669f9790aebSLuigi Rizzo 			/* nr_cmd must be 0 for a virtual port */
670f9790aebSLuigi Rizzo 			return EINVAL;
671f9790aebSLuigi Rizzo 		}
672f9790aebSLuigi Rizzo 
673f9790aebSLuigi Rizzo 		/* bdg_netmap_attach creates a struct netmap_adapter */
674*4bf50f18SLuigi Rizzo 		error = netmap_vp_create(nmr, NULL, &vpna);
675f9790aebSLuigi Rizzo 		if (error) {
676f9790aebSLuigi Rizzo 			D("error %d", error);
677f9790aebSLuigi Rizzo 			free(ifp, M_DEVBUF);
678f9790aebSLuigi Rizzo 			return error;
679f9790aebSLuigi Rizzo 		}
680*4bf50f18SLuigi Rizzo 		/* shortcut - we can skip get_hw_na(),
681*4bf50f18SLuigi Rizzo 		 * ownership check and nm_bdg_attach()
682*4bf50f18SLuigi Rizzo 		 */
683*4bf50f18SLuigi Rizzo 	} else {
684*4bf50f18SLuigi Rizzo 		struct netmap_adapter *hw;
685f9790aebSLuigi Rizzo 
686*4bf50f18SLuigi Rizzo 		error = netmap_get_hw_na(ifp, &hw);
687*4bf50f18SLuigi Rizzo 		if (error || hw == NULL)
688f9790aebSLuigi Rizzo 			goto out;
689f9790aebSLuigi Rizzo 
690*4bf50f18SLuigi Rizzo 		/* host adapter might not be created */
691*4bf50f18SLuigi Rizzo 		error = hw->nm_bdg_attach(nr_name, hw);
692*4bf50f18SLuigi Rizzo 		if (error)
693f9790aebSLuigi Rizzo 			goto out;
694*4bf50f18SLuigi Rizzo 		vpna = hw->na_vp;
695*4bf50f18SLuigi Rizzo 		hostna = hw->na_hostvp;
696f9790aebSLuigi Rizzo 		if_rele(ifp);
697*4bf50f18SLuigi Rizzo 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
698*4bf50f18SLuigi Rizzo 			hostna = NULL;
699f9790aebSLuigi Rizzo 	}
700f9790aebSLuigi Rizzo 
701f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
702f9790aebSLuigi Rizzo 	vpna->bdg_port = cand;
703f9790aebSLuigi Rizzo 	ND("NIC  %p to bridge port %d", vpna, cand);
704f9790aebSLuigi Rizzo 	/* bind the port to the bridge (virtual ports are not active) */
705f9790aebSLuigi Rizzo 	b->bdg_ports[cand] = vpna;
706f9790aebSLuigi Rizzo 	vpna->na_bdg = b;
707f9790aebSLuigi Rizzo 	b->bdg_active_ports++;
708*4bf50f18SLuigi Rizzo 	if (hostna != NULL) {
709f9790aebSLuigi Rizzo 		/* also bind the host stack to the bridge */
710f9790aebSLuigi Rizzo 		b->bdg_ports[cand2] = hostna;
711f9790aebSLuigi Rizzo 		hostna->bdg_port = cand2;
712f9790aebSLuigi Rizzo 		hostna->na_bdg = b;
713f9790aebSLuigi Rizzo 		b->bdg_active_ports++;
714f9790aebSLuigi Rizzo 		ND("host %p to bridge port %d", hostna, cand2);
715f9790aebSLuigi Rizzo 	}
716*4bf50f18SLuigi Rizzo 	ND("if %s refs %d", ifname, vpna->up.na_refcount);
717f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
718*4bf50f18SLuigi Rizzo 	*na = &vpna->up;
719*4bf50f18SLuigi Rizzo 	netmap_adapter_get(*na);
720f9790aebSLuigi Rizzo 	return 0;
721f9790aebSLuigi Rizzo 
722f9790aebSLuigi Rizzo out:
723f9790aebSLuigi Rizzo 	if_rele(ifp);
724f9790aebSLuigi Rizzo 
725f9790aebSLuigi Rizzo 	return error;
726f9790aebSLuigi Rizzo }
727f9790aebSLuigi Rizzo 
728f9790aebSLuigi Rizzo 
729*4bf50f18SLuigi Rizzo /* Process NETMAP_BDG_ATTACH */
730f9790aebSLuigi Rizzo static int
731*4bf50f18SLuigi Rizzo nm_bdg_ctl_attach(struct nmreq *nmr)
732f9790aebSLuigi Rizzo {
733f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
734f9790aebSLuigi Rizzo 	int error;
735f9790aebSLuigi Rizzo 
736f9790aebSLuigi Rizzo 	NMG_LOCK();
737f2637526SLuigi Rizzo 
73817885a7bSLuigi Rizzo 	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
739*4bf50f18SLuigi Rizzo 	if (error) /* no device */
740f9790aebSLuigi Rizzo 		goto unlock_exit;
741f2637526SLuigi Rizzo 
74217885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
743f9790aebSLuigi Rizzo 		error = EINVAL;
74417885a7bSLuigi Rizzo 		goto unlock_exit;
745f9790aebSLuigi Rizzo 	}
746f9790aebSLuigi Rizzo 
747*4bf50f18SLuigi Rizzo 	if (NETMAP_OWNED_BY_ANY(na)) {
748f9790aebSLuigi Rizzo 		error = EBUSY;
749f9790aebSLuigi Rizzo 		goto unref_exit;
750f9790aebSLuigi Rizzo 	}
751f9790aebSLuigi Rizzo 
752*4bf50f18SLuigi Rizzo 	if (na->nm_bdg_ctl) {
753*4bf50f18SLuigi Rizzo 		/* nop for VALE ports. The bwrap needs to put the hwna
754*4bf50f18SLuigi Rizzo 		 * in netmap mode (see netmap_bwrap_bdg_ctl)
755*4bf50f18SLuigi Rizzo 		 */
756*4bf50f18SLuigi Rizzo 		error = na->nm_bdg_ctl(na, nmr, 1);
757*4bf50f18SLuigi Rizzo 		if (error)
758f9790aebSLuigi Rizzo 			goto unref_exit;
759*4bf50f18SLuigi Rizzo 		ND("registered %s to netmap-mode", na->name);
760f9790aebSLuigi Rizzo 	}
761f9790aebSLuigi Rizzo 	NMG_UNLOCK();
762f9790aebSLuigi Rizzo 	return 0;
763f9790aebSLuigi Rizzo 
764f9790aebSLuigi Rizzo unref_exit:
765f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
766f9790aebSLuigi Rizzo unlock_exit:
767f9790aebSLuigi Rizzo 	NMG_UNLOCK();
768f9790aebSLuigi Rizzo 	return error;
769f9790aebSLuigi Rizzo }
770f9790aebSLuigi Rizzo 
77117885a7bSLuigi Rizzo 
772*4bf50f18SLuigi Rizzo /* process NETMAP_BDG_DETACH */
773f9790aebSLuigi Rizzo static int
774*4bf50f18SLuigi Rizzo nm_bdg_ctl_detach(struct nmreq *nmr)
775f9790aebSLuigi Rizzo {
776f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
777f9790aebSLuigi Rizzo 	int error;
778f9790aebSLuigi Rizzo 
779f9790aebSLuigi Rizzo 	NMG_LOCK();
78017885a7bSLuigi Rizzo 	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
781f9790aebSLuigi Rizzo 	if (error) { /* no device, or another bridge or user owns the device */
782f9790aebSLuigi Rizzo 		goto unlock_exit;
783f9790aebSLuigi Rizzo 	}
784f2637526SLuigi Rizzo 
78517885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
786f9790aebSLuigi Rizzo 		error = EINVAL;
78717885a7bSLuigi Rizzo 		goto unlock_exit;
788f9790aebSLuigi Rizzo 	}
78917885a7bSLuigi Rizzo 
790*4bf50f18SLuigi Rizzo 	if (na->nm_bdg_ctl) {
791*4bf50f18SLuigi Rizzo 		/* remove the port from bridge. The bwrap
792*4bf50f18SLuigi Rizzo 		 * also needs to put the hwna in normal mode
793*4bf50f18SLuigi Rizzo 		 */
794*4bf50f18SLuigi Rizzo 		error = na->nm_bdg_ctl(na, nmr, 0);
795f9790aebSLuigi Rizzo 	}
796f9790aebSLuigi Rizzo 
797f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
798f9790aebSLuigi Rizzo unlock_exit:
799f9790aebSLuigi Rizzo 	NMG_UNLOCK();
800f9790aebSLuigi Rizzo 	return error;
801f9790aebSLuigi Rizzo 
802f9790aebSLuigi Rizzo }
803f9790aebSLuigi Rizzo 
804f9790aebSLuigi Rizzo 
805*4bf50f18SLuigi Rizzo /* Called by either user's context (netmap_ioctl())
806*4bf50f18SLuigi Rizzo  * or external kernel modules (e.g., Openvswitch).
807*4bf50f18SLuigi Rizzo  * Operation is indicated in nmr->nr_cmd.
808*4bf50f18SLuigi Rizzo  * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
809*4bf50f18SLuigi Rizzo  * requires bdg_ops argument; the other commands ignore this argument.
810*4bf50f18SLuigi Rizzo  *
811f9790aebSLuigi Rizzo  * Called without NMG_LOCK.
812f9790aebSLuigi Rizzo  */
813f9790aebSLuigi Rizzo int
814*4bf50f18SLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
815f9790aebSLuigi Rizzo {
816f9790aebSLuigi Rizzo 	struct nm_bridge *b;
817f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
818f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
819f9790aebSLuigi Rizzo 	char *name = nmr->nr_name;
820f9790aebSLuigi Rizzo 	int cmd = nmr->nr_cmd, namelen = strlen(name);
821f9790aebSLuigi Rizzo 	int error = 0, i, j;
822f9790aebSLuigi Rizzo 
823f9790aebSLuigi Rizzo 	switch (cmd) {
824*4bf50f18SLuigi Rizzo 	case NETMAP_BDG_NEWIF:
825*4bf50f18SLuigi Rizzo 		error = nm_vi_create(nmr);
826*4bf50f18SLuigi Rizzo 		break;
827*4bf50f18SLuigi Rizzo 
828*4bf50f18SLuigi Rizzo 	case NETMAP_BDG_DELIF:
829*4bf50f18SLuigi Rizzo 		error = nm_vi_destroy(nmr->nr_name);
830*4bf50f18SLuigi Rizzo 		break;
831*4bf50f18SLuigi Rizzo 
832f9790aebSLuigi Rizzo 	case NETMAP_BDG_ATTACH:
833*4bf50f18SLuigi Rizzo 		error = nm_bdg_ctl_attach(nmr);
834f9790aebSLuigi Rizzo 		break;
835f9790aebSLuigi Rizzo 
836f9790aebSLuigi Rizzo 	case NETMAP_BDG_DETACH:
837*4bf50f18SLuigi Rizzo 		error = nm_bdg_ctl_detach(nmr);
838f9790aebSLuigi Rizzo 		break;
839f9790aebSLuigi Rizzo 
840f9790aebSLuigi Rizzo 	case NETMAP_BDG_LIST:
841f9790aebSLuigi Rizzo 		/* this is used to enumerate bridges and ports */
842f9790aebSLuigi Rizzo 		if (namelen) { /* look up indexes of bridge and port */
843f9790aebSLuigi Rizzo 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
844f9790aebSLuigi Rizzo 				error = EINVAL;
845f9790aebSLuigi Rizzo 				break;
846f9790aebSLuigi Rizzo 			}
847f9790aebSLuigi Rizzo 			NMG_LOCK();
848f9790aebSLuigi Rizzo 			b = nm_find_bridge(name, 0 /* don't create */);
849f9790aebSLuigi Rizzo 			if (!b) {
850f9790aebSLuigi Rizzo 				error = ENOENT;
851f9790aebSLuigi Rizzo 				NMG_UNLOCK();
852f9790aebSLuigi Rizzo 				break;
853f9790aebSLuigi Rizzo 			}
854f9790aebSLuigi Rizzo 
855*4bf50f18SLuigi Rizzo 			name = name + b->bdg_namelen + 1;
856f9790aebSLuigi Rizzo 			error = ENOENT;
857f9790aebSLuigi Rizzo 			for (j = 0; j < b->bdg_active_ports; j++) {
858f9790aebSLuigi Rizzo 				i = b->bdg_port_index[j];
859f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[i];
860f9790aebSLuigi Rizzo 				if (vpna == NULL) {
861f9790aebSLuigi Rizzo 					D("---AAAAAAAAARGH-------");
862f9790aebSLuigi Rizzo 					continue;
863f9790aebSLuigi Rizzo 				}
864f9790aebSLuigi Rizzo 				/* the former and the latter identify a
865f9790aebSLuigi Rizzo 				 * virtual port and a NIC, respectively
866f9790aebSLuigi Rizzo 				 */
867*4bf50f18SLuigi Rizzo 				if (!strcmp(vpna->up.name, name)) {
868f9790aebSLuigi Rizzo 					/* bridge index */
869f9790aebSLuigi Rizzo 					nmr->nr_arg1 = b - nm_bridges;
870f9790aebSLuigi Rizzo 					nmr->nr_arg2 = i; /* port index */
871f9790aebSLuigi Rizzo 					error = 0;
872f9790aebSLuigi Rizzo 					break;
873f9790aebSLuigi Rizzo 				}
874f9790aebSLuigi Rizzo 			}
875f9790aebSLuigi Rizzo 			NMG_UNLOCK();
876f9790aebSLuigi Rizzo 		} else {
877f9790aebSLuigi Rizzo 			/* return the first non-empty entry starting from
878f9790aebSLuigi Rizzo 			 * bridge nr_arg1 and port nr_arg2.
879f9790aebSLuigi Rizzo 			 *
880f9790aebSLuigi Rizzo 			 * Users can detect the end of the same bridge by
881f9790aebSLuigi Rizzo 			 * seeing the new and old value of nr_arg1, and can
882f9790aebSLuigi Rizzo 			 * detect the end of all the bridge by error != 0
883f9790aebSLuigi Rizzo 			 */
884f9790aebSLuigi Rizzo 			i = nmr->nr_arg1;
885f9790aebSLuigi Rizzo 			j = nmr->nr_arg2;
886f9790aebSLuigi Rizzo 
887f9790aebSLuigi Rizzo 			NMG_LOCK();
888f9790aebSLuigi Rizzo 			for (error = ENOENT; i < NM_BRIDGES; i++) {
889f9790aebSLuigi Rizzo 				b = nm_bridges + i;
890f9790aebSLuigi Rizzo 				if (j >= b->bdg_active_ports) {
891f9790aebSLuigi Rizzo 					j = 0; /* following bridges scan from 0 */
892f9790aebSLuigi Rizzo 					continue;
893f9790aebSLuigi Rizzo 				}
894f9790aebSLuigi Rizzo 				nmr->nr_arg1 = i;
895f9790aebSLuigi Rizzo 				nmr->nr_arg2 = j;
896f9790aebSLuigi Rizzo 				j = b->bdg_port_index[j];
897f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[j];
898*4bf50f18SLuigi Rizzo 				strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
899f9790aebSLuigi Rizzo 				error = 0;
900f9790aebSLuigi Rizzo 				break;
901f9790aebSLuigi Rizzo 			}
902f9790aebSLuigi Rizzo 			NMG_UNLOCK();
903f9790aebSLuigi Rizzo 		}
904f9790aebSLuigi Rizzo 		break;
905f9790aebSLuigi Rizzo 
906*4bf50f18SLuigi Rizzo 	case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
907*4bf50f18SLuigi Rizzo 		/* register callbacks to the given bridge.
908f9790aebSLuigi Rizzo 		 * nmr->nr_name may be just bridge's name (including ':'
909f9790aebSLuigi Rizzo 		 * if it is not just NM_NAME).
910f9790aebSLuigi Rizzo 		 */
911*4bf50f18SLuigi Rizzo 		if (!bdg_ops) {
912f9790aebSLuigi Rizzo 			error = EINVAL;
913f9790aebSLuigi Rizzo 			break;
914f9790aebSLuigi Rizzo 		}
915f9790aebSLuigi Rizzo 		NMG_LOCK();
916f9790aebSLuigi Rizzo 		b = nm_find_bridge(name, 0 /* don't create */);
917f9790aebSLuigi Rizzo 		if (!b) {
918f9790aebSLuigi Rizzo 			error = EINVAL;
919f9790aebSLuigi Rizzo 		} else {
920*4bf50f18SLuigi Rizzo 			b->bdg_ops = *bdg_ops;
921f9790aebSLuigi Rizzo 		}
922f9790aebSLuigi Rizzo 		NMG_UNLOCK();
923f9790aebSLuigi Rizzo 		break;
924f9790aebSLuigi Rizzo 
925f0ea3689SLuigi Rizzo 	case NETMAP_BDG_VNET_HDR:
926f0ea3689SLuigi Rizzo 		/* Valid lengths for the virtio-net header are 0 (no header),
927f0ea3689SLuigi Rizzo 		   10 and 12. */
928f0ea3689SLuigi Rizzo 		if (nmr->nr_arg1 != 0 &&
929f0ea3689SLuigi Rizzo 			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
930f0ea3689SLuigi Rizzo 				nmr->nr_arg1 != 12) {
931f0ea3689SLuigi Rizzo 			error = EINVAL;
932f0ea3689SLuigi Rizzo 			break;
933f0ea3689SLuigi Rizzo 		}
934f9790aebSLuigi Rizzo 		NMG_LOCK();
935f9790aebSLuigi Rizzo 		error = netmap_get_bdg_na(nmr, &na, 0);
93617885a7bSLuigi Rizzo 		if (na && !error) {
937f9790aebSLuigi Rizzo 			vpna = (struct netmap_vp_adapter *)na;
938f0ea3689SLuigi Rizzo 			vpna->virt_hdr_len = nmr->nr_arg1;
939f0ea3689SLuigi Rizzo 			if (vpna->virt_hdr_len)
940*4bf50f18SLuigi Rizzo 				vpna->mfs = NETMAP_BUF_SIZE(na);
941f0ea3689SLuigi Rizzo 			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
94217885a7bSLuigi Rizzo 			netmap_adapter_put(na);
943f9790aebSLuigi Rizzo 		}
944f9790aebSLuigi Rizzo 		NMG_UNLOCK();
945f9790aebSLuigi Rizzo 		break;
946f9790aebSLuigi Rizzo 
947f9790aebSLuigi Rizzo 	default:
948f9790aebSLuigi Rizzo 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
949f9790aebSLuigi Rizzo 		error = EINVAL;
950f9790aebSLuigi Rizzo 		break;
951f9790aebSLuigi Rizzo 	}
952f9790aebSLuigi Rizzo 	return error;
953f9790aebSLuigi Rizzo }
954f9790aebSLuigi Rizzo 
955*4bf50f18SLuigi Rizzo int
956*4bf50f18SLuigi Rizzo netmap_bdg_config(struct nmreq *nmr)
957*4bf50f18SLuigi Rizzo {
958*4bf50f18SLuigi Rizzo 	struct nm_bridge *b;
959*4bf50f18SLuigi Rizzo 	int error = EINVAL;
960*4bf50f18SLuigi Rizzo 
961*4bf50f18SLuigi Rizzo 	NMG_LOCK();
962*4bf50f18SLuigi Rizzo 	b = nm_find_bridge(nmr->nr_name, 0);
963*4bf50f18SLuigi Rizzo 	if (!b) {
964*4bf50f18SLuigi Rizzo 		NMG_UNLOCK();
965*4bf50f18SLuigi Rizzo 		return error;
966*4bf50f18SLuigi Rizzo 	}
967*4bf50f18SLuigi Rizzo 	NMG_UNLOCK();
968*4bf50f18SLuigi Rizzo 	/* Don't call config() with NMG_LOCK() held */
969*4bf50f18SLuigi Rizzo 	BDG_RLOCK(b);
970*4bf50f18SLuigi Rizzo 	if (b->bdg_ops.config != NULL)
971*4bf50f18SLuigi Rizzo 		error = b->bdg_ops.config((struct nm_ifreq *)nmr);
972*4bf50f18SLuigi Rizzo 	BDG_RUNLOCK(b);
973*4bf50f18SLuigi Rizzo 	return error;
974*4bf50f18SLuigi Rizzo }
975*4bf50f18SLuigi Rizzo 
976*4bf50f18SLuigi Rizzo 
977*4bf50f18SLuigi Rizzo /* nm_krings_create callback for VALE ports.
978*4bf50f18SLuigi Rizzo  * Calls the standard netmap_krings_create, then adds leases on rx
979*4bf50f18SLuigi Rizzo  * rings and bdgfwd on tx rings.
980*4bf50f18SLuigi Rizzo  */
981f9790aebSLuigi Rizzo static int
982f9790aebSLuigi Rizzo netmap_vp_krings_create(struct netmap_adapter *na)
983f9790aebSLuigi Rizzo {
984f0ea3689SLuigi Rizzo 	u_int tailroom;
985f9790aebSLuigi Rizzo 	int error, i;
986f9790aebSLuigi Rizzo 	uint32_t *leases;
987f0ea3689SLuigi Rizzo 	u_int nrx = netmap_real_rx_rings(na);
988f9790aebSLuigi Rizzo 
989f9790aebSLuigi Rizzo 	/*
990f9790aebSLuigi Rizzo 	 * Leases are attached to RX rings on vale ports
991f9790aebSLuigi Rizzo 	 */
992f9790aebSLuigi Rizzo 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
993f9790aebSLuigi Rizzo 
994f0ea3689SLuigi Rizzo 	error = netmap_krings_create(na, tailroom);
995f9790aebSLuigi Rizzo 	if (error)
996f9790aebSLuigi Rizzo 		return error;
997f9790aebSLuigi Rizzo 
998f9790aebSLuigi Rizzo 	leases = na->tailroom;
999f9790aebSLuigi Rizzo 
1000f9790aebSLuigi Rizzo 	for (i = 0; i < nrx; i++) { /* Receive rings */
1001f9790aebSLuigi Rizzo 		na->rx_rings[i].nkr_leases = leases;
1002f9790aebSLuigi Rizzo 		leases += na->num_rx_desc;
1003f9790aebSLuigi Rizzo 	}
1004f9790aebSLuigi Rizzo 
1005f9790aebSLuigi Rizzo 	error = nm_alloc_bdgfwd(na);
1006f9790aebSLuigi Rizzo 	if (error) {
1007f9790aebSLuigi Rizzo 		netmap_krings_delete(na);
1008f9790aebSLuigi Rizzo 		return error;
1009f9790aebSLuigi Rizzo 	}
1010f9790aebSLuigi Rizzo 
1011f9790aebSLuigi Rizzo 	return 0;
1012f9790aebSLuigi Rizzo }
1013f9790aebSLuigi Rizzo 
101417885a7bSLuigi Rizzo 
1015*4bf50f18SLuigi Rizzo /* nm_krings_delete callback for VALE ports. */
1016f9790aebSLuigi Rizzo static void
1017f9790aebSLuigi Rizzo netmap_vp_krings_delete(struct netmap_adapter *na)
1018f9790aebSLuigi Rizzo {
1019f9790aebSLuigi Rizzo 	nm_free_bdgfwd(na);
1020f9790aebSLuigi Rizzo 	netmap_krings_delete(na);
1021f9790aebSLuigi Rizzo }
1022f9790aebSLuigi Rizzo 
1023f9790aebSLuigi Rizzo 
1024f9790aebSLuigi Rizzo static int
1025f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
1026f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *na, u_int ring_nr);
1027f9790aebSLuigi Rizzo 
1028f9790aebSLuigi Rizzo 
1029f9790aebSLuigi Rizzo /*
1030*4bf50f18SLuigi Rizzo  * main dispatch routine for the bridge.
1031f9790aebSLuigi Rizzo  * Grab packets from a kring, move them into the ft structure
1032f9790aebSLuigi Rizzo  * associated to the tx (input) port. Max one instance per port,
1033f9790aebSLuigi Rizzo  * filtered on input (ioctl, poll or XXX).
1034f9790aebSLuigi Rizzo  * Returns the next position in the ring.
1035f9790aebSLuigi Rizzo  */
1036f9790aebSLuigi Rizzo static int
1037*4bf50f18SLuigi Rizzo nm_bdg_preflush(struct netmap_kring *kring, u_int end)
1038f9790aebSLuigi Rizzo {
1039*4bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
1040*4bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter*)kring->na;
1041f9790aebSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
1042f9790aebSLuigi Rizzo 	struct nm_bdg_fwd *ft;
1043*4bf50f18SLuigi Rizzo 	u_int ring_nr = kring->ring_id;
1044f9790aebSLuigi Rizzo 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
1045f9790aebSLuigi Rizzo 	u_int ft_i = 0;	/* start from 0 */
1046f9790aebSLuigi Rizzo 	u_int frags = 1; /* how many frags ? */
1047f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
1048f9790aebSLuigi Rizzo 
1049f9790aebSLuigi Rizzo 	/* To protect against modifications to the bridge we acquire a
1050f9790aebSLuigi Rizzo 	 * shared lock, waiting if we can sleep (if the source port is
1051f9790aebSLuigi Rizzo 	 * attached to a user process) or with a trylock otherwise (NICs).
1052f9790aebSLuigi Rizzo 	 */
1053f9790aebSLuigi Rizzo 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1054f9790aebSLuigi Rizzo 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
1055f9790aebSLuigi Rizzo 		BDG_RLOCK(b);
1056f9790aebSLuigi Rizzo 	else if (!BDG_RTRYLOCK(b))
1057f9790aebSLuigi Rizzo 		return 0;
1058f9790aebSLuigi Rizzo 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1059f9790aebSLuigi Rizzo 	ft = kring->nkr_ft;
1060f9790aebSLuigi Rizzo 
1061f9790aebSLuigi Rizzo 	for (; likely(j != end); j = nm_next(j, lim)) {
1062f9790aebSLuigi Rizzo 		struct netmap_slot *slot = &ring->slot[j];
1063f9790aebSLuigi Rizzo 		char *buf;
1064f9790aebSLuigi Rizzo 
1065f9790aebSLuigi Rizzo 		ft[ft_i].ft_len = slot->len;
1066f9790aebSLuigi Rizzo 		ft[ft_i].ft_flags = slot->flags;
1067f9790aebSLuigi Rizzo 
1068f9790aebSLuigi Rizzo 		ND("flags is 0x%x", slot->flags);
1069f9790aebSLuigi Rizzo 		/* this slot goes into a list so initialize the link field */
1070f9790aebSLuigi Rizzo 		ft[ft_i].ft_next = NM_FT_NULL;
1071f9790aebSLuigi Rizzo 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
1072*4bf50f18SLuigi Rizzo 			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
1073e31c6ec7SLuigi Rizzo 		if (unlikely(buf == NULL)) {
1074e31c6ec7SLuigi Rizzo 			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
1075e31c6ec7SLuigi Rizzo 				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
1076e31c6ec7SLuigi Rizzo 				kring->name, j, ft[ft_i].ft_len);
1077*4bf50f18SLuigi Rizzo 			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
1078e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_len = 0;
1079e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_flags = 0;
1080e31c6ec7SLuigi Rizzo 		}
10812e159ef0SLuigi Rizzo 		__builtin_prefetch(buf);
1082f9790aebSLuigi Rizzo 		++ft_i;
1083f9790aebSLuigi Rizzo 		if (slot->flags & NS_MOREFRAG) {
1084f9790aebSLuigi Rizzo 			frags++;
1085f9790aebSLuigi Rizzo 			continue;
1086f9790aebSLuigi Rizzo 		}
1087f9790aebSLuigi Rizzo 		if (unlikely(netmap_verbose && frags > 1))
1088f9790aebSLuigi Rizzo 			RD(5, "%d frags at %d", frags, ft_i - frags);
1089f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
1090f9790aebSLuigi Rizzo 		frags = 1;
1091f9790aebSLuigi Rizzo 		if (unlikely((int)ft_i >= bridge_batch))
1092f9790aebSLuigi Rizzo 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1093f9790aebSLuigi Rizzo 	}
1094f9790aebSLuigi Rizzo 	if (frags > 1) {
1095f9790aebSLuigi Rizzo 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1096f9790aebSLuigi Rizzo 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1097f9790aebSLuigi Rizzo 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1098f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags - 1;
1099f9790aebSLuigi Rizzo 	}
1100f9790aebSLuigi Rizzo 	if (ft_i)
1101f9790aebSLuigi Rizzo 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1102f9790aebSLuigi Rizzo 	BDG_RUNLOCK(b);
1103f9790aebSLuigi Rizzo 	return j;
1104f9790aebSLuigi Rizzo }
1105f9790aebSLuigi Rizzo 
1106f9790aebSLuigi Rizzo 
1107f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */
1108f9790aebSLuigi Rizzo 
1109f9790aebSLuigi Rizzo /*
1110f9790aebSLuigi Rizzo  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1111f9790aebSLuigi Rizzo  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1112f9790aebSLuigi Rizzo  *
1113f9790aebSLuigi Rizzo  * http://www.burtleburtle.net/bob/hash/spooky.html
1114f9790aebSLuigi Rizzo  */
1115f9790aebSLuigi Rizzo #define mix(a, b, c)                                                    \
1116f9790aebSLuigi Rizzo do {                                                                    \
1117f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 13);                                 \
1118f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 8);                                  \
1119f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 13);                                 \
1120f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 12);                                 \
1121f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 16);                                 \
1122f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 5);                                  \
1123f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 3);                                  \
1124f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 10);                                 \
1125f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 15);                                 \
1126f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0)
1127f9790aebSLuigi Rizzo 
112817885a7bSLuigi Rizzo 
1129f9790aebSLuigi Rizzo static __inline uint32_t
1130f9790aebSLuigi Rizzo nm_bridge_rthash(const uint8_t *addr)
1131f9790aebSLuigi Rizzo {
1132f9790aebSLuigi Rizzo         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1133f9790aebSLuigi Rizzo 
1134f9790aebSLuigi Rizzo         b += addr[5] << 8;
1135f9790aebSLuigi Rizzo         b += addr[4];
1136f9790aebSLuigi Rizzo         a += addr[3] << 24;
1137f9790aebSLuigi Rizzo         a += addr[2] << 16;
1138f9790aebSLuigi Rizzo         a += addr[1] << 8;
1139f9790aebSLuigi Rizzo         a += addr[0];
1140f9790aebSLuigi Rizzo 
1141f9790aebSLuigi Rizzo         mix(a, b, c);
1142f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1143f9790aebSLuigi Rizzo         return (c & BRIDGE_RTHASH_MASK);
1144f9790aebSLuigi Rizzo }
1145f9790aebSLuigi Rizzo 
1146f9790aebSLuigi Rizzo #undef mix
1147f9790aebSLuigi Rizzo 
1148f9790aebSLuigi Rizzo 
1149*4bf50f18SLuigi Rizzo /* nm_register callback for VALE ports */
1150f9790aebSLuigi Rizzo static int
1151*4bf50f18SLuigi Rizzo netmap_vp_reg(struct netmap_adapter *na, int onoff)
1152f9790aebSLuigi Rizzo {
1153f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna =
1154f9790aebSLuigi Rizzo 		(struct netmap_vp_adapter*)na;
1155f9790aebSLuigi Rizzo 
1156*4bf50f18SLuigi Rizzo 	/* persistent ports may be put in netmap mode
1157*4bf50f18SLuigi Rizzo 	 * before being attached to a bridge
1158f9790aebSLuigi Rizzo 	 */
1159*4bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
1160f9790aebSLuigi Rizzo 		BDG_WLOCK(vpna->na_bdg);
1161f9790aebSLuigi Rizzo 	if (onoff) {
1162*4bf50f18SLuigi Rizzo 		na->na_flags |= NAF_NETMAP_ON;
1163*4bf50f18SLuigi Rizzo 		 /* XXX on FreeBSD, persistent VALE ports should also
1164*4bf50f18SLuigi Rizzo 		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
1165*4bf50f18SLuigi Rizzo 		 */
1166f9790aebSLuigi Rizzo 	} else {
1167*4bf50f18SLuigi Rizzo 		na->na_flags &= ~NAF_NETMAP_ON;
1168f9790aebSLuigi Rizzo 	}
1169*4bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
1170f9790aebSLuigi Rizzo 		BDG_WUNLOCK(vpna->na_bdg);
1171f9790aebSLuigi Rizzo 	return 0;
1172f9790aebSLuigi Rizzo }
1173f9790aebSLuigi Rizzo 
1174f9790aebSLuigi Rizzo 
1175f9790aebSLuigi Rizzo /*
1176f9790aebSLuigi Rizzo  * Lookup function for a learning bridge.
1177f9790aebSLuigi Rizzo  * Update the hash table with the source address,
1178f9790aebSLuigi Rizzo  * and then returns the destination port index, and the
1179f9790aebSLuigi Rizzo  * ring in *dst_ring (at the moment, always use ring 0)
1180f9790aebSLuigi Rizzo  */
1181f9790aebSLuigi Rizzo u_int
1182*4bf50f18SLuigi Rizzo netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1183*4bf50f18SLuigi Rizzo 		const struct netmap_vp_adapter *na)
1184f9790aebSLuigi Rizzo {
1185*4bf50f18SLuigi Rizzo 	uint8_t *buf = ft->ft_buf;
1186*4bf50f18SLuigi Rizzo 	u_int buf_len = ft->ft_len;
1187f9790aebSLuigi Rizzo 	struct nm_hash_ent *ht = na->na_bdg->ht;
1188f9790aebSLuigi Rizzo 	uint32_t sh, dh;
1189f9790aebSLuigi Rizzo 	u_int dst, mysrc = na->bdg_port;
1190f9790aebSLuigi Rizzo 	uint64_t smac, dmac;
1191f9790aebSLuigi Rizzo 
1192*4bf50f18SLuigi Rizzo 	/* safety check, unfortunately we have many cases */
1193*4bf50f18SLuigi Rizzo 	if (buf_len >= 14 + na->virt_hdr_len) {
1194*4bf50f18SLuigi Rizzo 		/* virthdr + mac_hdr in the same slot */
1195*4bf50f18SLuigi Rizzo 		buf += na->virt_hdr_len;
1196*4bf50f18SLuigi Rizzo 		buf_len -= na->virt_hdr_len;
1197*4bf50f18SLuigi Rizzo 	} else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
1198*4bf50f18SLuigi Rizzo 		/* only header in first fragment */
1199*4bf50f18SLuigi Rizzo 		ft++;
1200*4bf50f18SLuigi Rizzo 		buf = ft->ft_buf;
1201*4bf50f18SLuigi Rizzo 		buf_len = ft->ft_len;
1202*4bf50f18SLuigi Rizzo 	} else {
1203*4bf50f18SLuigi Rizzo 		RD(5, "invalid buf format, length %d", buf_len);
1204f9790aebSLuigi Rizzo 		return NM_BDG_NOPORT;
1205f9790aebSLuigi Rizzo 	}
1206f9790aebSLuigi Rizzo 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1207f9790aebSLuigi Rizzo 	smac = le64toh(*(uint64_t *)(buf + 4));
1208f9790aebSLuigi Rizzo 	smac >>= 16;
1209f9790aebSLuigi Rizzo 
1210f9790aebSLuigi Rizzo 	/*
1211f9790aebSLuigi Rizzo 	 * The hash is somewhat expensive, there might be some
1212f9790aebSLuigi Rizzo 	 * worthwhile optimizations here.
1213f9790aebSLuigi Rizzo 	 */
1214f9790aebSLuigi Rizzo 	if ((buf[6] & 1) == 0) { /* valid src */
1215f9790aebSLuigi Rizzo 		uint8_t *s = buf+6;
1216f9790aebSLuigi Rizzo 		sh = nm_bridge_rthash(s); // XXX hash of source
1217f9790aebSLuigi Rizzo 		/* update source port forwarding entry */
1218f9790aebSLuigi Rizzo 		ht[sh].mac = smac;	/* XXX expire ? */
1219f9790aebSLuigi Rizzo 		ht[sh].ports = mysrc;
1220f9790aebSLuigi Rizzo 		if (netmap_verbose)
1221f9790aebSLuigi Rizzo 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1222f9790aebSLuigi Rizzo 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1223f9790aebSLuigi Rizzo 	}
1224f9790aebSLuigi Rizzo 	dst = NM_BDG_BROADCAST;
1225f9790aebSLuigi Rizzo 	if ((buf[0] & 1) == 0) { /* unicast */
1226f9790aebSLuigi Rizzo 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1227f9790aebSLuigi Rizzo 		if (ht[dh].mac == dmac) {	/* found dst */
1228f9790aebSLuigi Rizzo 			dst = ht[dh].ports;
1229f9790aebSLuigi Rizzo 		}
1230f9790aebSLuigi Rizzo 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1231f9790aebSLuigi Rizzo 	}
1232f9790aebSLuigi Rizzo 	*dst_ring = 0;
1233f9790aebSLuigi Rizzo 	return dst;
1234f9790aebSLuigi Rizzo }
1235f9790aebSLuigi Rizzo 
1236f9790aebSLuigi Rizzo 
1237f9790aebSLuigi Rizzo /*
123817885a7bSLuigi Rizzo  * Available space in the ring. Only used in VALE code
123917885a7bSLuigi Rizzo  * and only with is_rx = 1
124017885a7bSLuigi Rizzo  */
124117885a7bSLuigi Rizzo static inline uint32_t
124217885a7bSLuigi Rizzo nm_kr_space(struct netmap_kring *k, int is_rx)
124317885a7bSLuigi Rizzo {
124417885a7bSLuigi Rizzo 	int space;
124517885a7bSLuigi Rizzo 
124617885a7bSLuigi Rizzo 	if (is_rx) {
124717885a7bSLuigi Rizzo 		int busy = k->nkr_hwlease - k->nr_hwcur;
124817885a7bSLuigi Rizzo 		if (busy < 0)
124917885a7bSLuigi Rizzo 			busy += k->nkr_num_slots;
125017885a7bSLuigi Rizzo 		space = k->nkr_num_slots - 1 - busy;
125117885a7bSLuigi Rizzo 	} else {
125217885a7bSLuigi Rizzo 		/* XXX never used in this branch */
125317885a7bSLuigi Rizzo 		space = k->nr_hwtail - k->nkr_hwlease;
125417885a7bSLuigi Rizzo 		if (space < 0)
125517885a7bSLuigi Rizzo 			space += k->nkr_num_slots;
125617885a7bSLuigi Rizzo 	}
125717885a7bSLuigi Rizzo #if 0
125817885a7bSLuigi Rizzo 	// sanity check
125917885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
126017885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
126117885a7bSLuigi Rizzo 		k->nr_tail >= k->nkr_num_slots ||
126217885a7bSLuigi Rizzo 		busy < 0 ||
126317885a7bSLuigi Rizzo 		busy >= k->nkr_num_slots) {
126417885a7bSLuigi Rizzo 		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
126517885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
126617885a7bSLuigi Rizzo 	}
126717885a7bSLuigi Rizzo #endif
126817885a7bSLuigi Rizzo 	return space;
126917885a7bSLuigi Rizzo }
127017885a7bSLuigi Rizzo 
127117885a7bSLuigi Rizzo 
127217885a7bSLuigi Rizzo 
127317885a7bSLuigi Rizzo 
127417885a7bSLuigi Rizzo /* make a lease on the kring for N positions. return the
127517885a7bSLuigi Rizzo  * lease index
127617885a7bSLuigi Rizzo  * XXX only used in VALE code and with is_rx = 1
127717885a7bSLuigi Rizzo  */
127817885a7bSLuigi Rizzo static inline uint32_t
127917885a7bSLuigi Rizzo nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
128017885a7bSLuigi Rizzo {
128117885a7bSLuigi Rizzo 	uint32_t lim = k->nkr_num_slots - 1;
128217885a7bSLuigi Rizzo 	uint32_t lease_idx = k->nkr_lease_idx;
128317885a7bSLuigi Rizzo 
128417885a7bSLuigi Rizzo 	k->nkr_leases[lease_idx] = NR_NOSLOT;
128517885a7bSLuigi Rizzo 	k->nkr_lease_idx = nm_next(lease_idx, lim);
128617885a7bSLuigi Rizzo 
128717885a7bSLuigi Rizzo 	if (n > nm_kr_space(k, is_rx)) {
128817885a7bSLuigi Rizzo 		D("invalid request for %d slots", n);
128917885a7bSLuigi Rizzo 		panic("x");
129017885a7bSLuigi Rizzo 	}
129117885a7bSLuigi Rizzo 	/* XXX verify that there are n slots */
129217885a7bSLuigi Rizzo 	k->nkr_hwlease += n;
129317885a7bSLuigi Rizzo 	if (k->nkr_hwlease > lim)
129417885a7bSLuigi Rizzo 		k->nkr_hwlease -= lim + 1;
129517885a7bSLuigi Rizzo 
129617885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
129717885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
129817885a7bSLuigi Rizzo 		k->nr_hwtail >= k->nkr_num_slots ||
129917885a7bSLuigi Rizzo 		k->nkr_lease_idx >= k->nkr_num_slots) {
130017885a7bSLuigi Rizzo 		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
1301*4bf50f18SLuigi Rizzo 			k->na->name,
130217885a7bSLuigi Rizzo 			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
130317885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
130417885a7bSLuigi Rizzo 	}
130517885a7bSLuigi Rizzo 	return lease_idx;
130617885a7bSLuigi Rizzo }
130717885a7bSLuigi Rizzo 
130817885a7bSLuigi Rizzo /*
1309*4bf50f18SLuigi Rizzo  *
1310f9790aebSLuigi Rizzo  * This flush routine supports only unicast and broadcast but a large
1311f9790aebSLuigi Rizzo  * number of ports, and lets us replace the learn and dispatch functions.
1312f9790aebSLuigi Rizzo  */
1313f9790aebSLuigi Rizzo int
1314f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1315f9790aebSLuigi Rizzo 		u_int ring_nr)
1316f9790aebSLuigi Rizzo {
1317f9790aebSLuigi Rizzo 	struct nm_bdg_q *dst_ents, *brddst;
1318f9790aebSLuigi Rizzo 	uint16_t num_dsts = 0, *dsts;
1319f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
1320f9790aebSLuigi Rizzo 	u_int i, j, me = na->bdg_port;
1321f9790aebSLuigi Rizzo 
1322f9790aebSLuigi Rizzo 	/*
1323f9790aebSLuigi Rizzo 	 * The work area (pointed by ft) is followed by an array of
1324f9790aebSLuigi Rizzo 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1325f9790aebSLuigi Rizzo 	 * queues per port plus one for the broadcast traffic.
1326f9790aebSLuigi Rizzo 	 * Then we have an array of destination indexes.
1327f9790aebSLuigi Rizzo 	 */
1328f9790aebSLuigi Rizzo 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1329f9790aebSLuigi Rizzo 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1330f9790aebSLuigi Rizzo 
1331f9790aebSLuigi Rizzo 	/* first pass: find a destination for each packet in the batch */
1332f9790aebSLuigi Rizzo 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1333f9790aebSLuigi Rizzo 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1334f9790aebSLuigi Rizzo 		uint16_t dst_port, d_i;
1335f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1336f9790aebSLuigi Rizzo 
1337f9790aebSLuigi Rizzo 		ND("slot %d frags %d", i, ft[i].ft_frags);
1338f0ea3689SLuigi Rizzo 		/* Drop the packet if the virtio-net header is not into the first
1339f9790aebSLuigi Rizzo 		   fragment nor at the very beginning of the second. */
1340*4bf50f18SLuigi Rizzo 		if (unlikely(na->virt_hdr_len > ft[i].ft_len))
1341f9790aebSLuigi Rizzo 			continue;
1342*4bf50f18SLuigi Rizzo 		dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
1343f9790aebSLuigi Rizzo 		if (netmap_verbose > 255)
1344f9790aebSLuigi Rizzo 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1345f9790aebSLuigi Rizzo 		if (dst_port == NM_BDG_NOPORT)
1346f9790aebSLuigi Rizzo 			continue; /* this packet is identified to be dropped */
1347f9790aebSLuigi Rizzo 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1348f9790aebSLuigi Rizzo 			continue;
1349f9790aebSLuigi Rizzo 		else if (dst_port == NM_BDG_BROADCAST)
1350f9790aebSLuigi Rizzo 			dst_ring = 0; /* broadcasts always go to ring 0 */
1351f9790aebSLuigi Rizzo 		else if (unlikely(dst_port == me ||
1352f9790aebSLuigi Rizzo 		    !b->bdg_ports[dst_port]))
1353f9790aebSLuigi Rizzo 			continue;
1354f9790aebSLuigi Rizzo 
1355f9790aebSLuigi Rizzo 		/* get a position in the scratch pad */
1356f9790aebSLuigi Rizzo 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1357f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1358f9790aebSLuigi Rizzo 
1359f9790aebSLuigi Rizzo 		/* append the first fragment to the list */
1360f9790aebSLuigi Rizzo 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1361f9790aebSLuigi Rizzo 			d->bq_head = d->bq_tail = i;
1362f9790aebSLuigi Rizzo 			/* remember this position to be scanned later */
1363f9790aebSLuigi Rizzo 			if (dst_port != NM_BDG_BROADCAST)
1364f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1365f9790aebSLuigi Rizzo 		} else {
1366f9790aebSLuigi Rizzo 			ft[d->bq_tail].ft_next = i;
1367f9790aebSLuigi Rizzo 			d->bq_tail = i;
1368f9790aebSLuigi Rizzo 		}
1369f9790aebSLuigi Rizzo 		d->bq_len += ft[i].ft_frags;
1370f9790aebSLuigi Rizzo 	}
1371f9790aebSLuigi Rizzo 
1372f9790aebSLuigi Rizzo 	/*
1373f9790aebSLuigi Rizzo 	 * Broadcast traffic goes to ring 0 on all destinations.
1374f9790aebSLuigi Rizzo 	 * So we need to add these rings to the list of ports to scan.
1375f9790aebSLuigi Rizzo 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1376f9790aebSLuigi Rizzo 	 * expensive. We should keep a compact list of active destinations
1377f9790aebSLuigi Rizzo 	 * so we could shorten this loop.
1378f9790aebSLuigi Rizzo 	 */
1379f9790aebSLuigi Rizzo 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1380f9790aebSLuigi Rizzo 	if (brddst->bq_head != NM_FT_NULL) {
1381f9790aebSLuigi Rizzo 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1382f9790aebSLuigi Rizzo 			uint16_t d_i;
1383f9790aebSLuigi Rizzo 			i = b->bdg_port_index[j];
1384f9790aebSLuigi Rizzo 			if (unlikely(i == me))
1385f9790aebSLuigi Rizzo 				continue;
1386f9790aebSLuigi Rizzo 			d_i = i * NM_BDG_MAXRINGS;
1387f9790aebSLuigi Rizzo 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1388f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1389f9790aebSLuigi Rizzo 		}
1390f9790aebSLuigi Rizzo 	}
1391f9790aebSLuigi Rizzo 
1392f9790aebSLuigi Rizzo 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1393*4bf50f18SLuigi Rizzo 	/* second pass: scan destinations */
1394f9790aebSLuigi Rizzo 	for (i = 0; i < num_dsts; i++) {
1395f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *dst_na;
1396f9790aebSLuigi Rizzo 		struct netmap_kring *kring;
1397f9790aebSLuigi Rizzo 		struct netmap_ring *ring;
1398f0ea3689SLuigi Rizzo 		u_int dst_nr, lim, j, d_i, next, brd_next;
1399f9790aebSLuigi Rizzo 		u_int needed, howmany;
1400f9790aebSLuigi Rizzo 		int retry = netmap_txsync_retry;
1401f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1402f9790aebSLuigi Rizzo 		uint32_t my_start = 0, lease_idx = 0;
1403f9790aebSLuigi Rizzo 		int nrings;
1404f0ea3689SLuigi Rizzo 		int virt_hdr_mismatch = 0;
1405f9790aebSLuigi Rizzo 
1406f9790aebSLuigi Rizzo 		d_i = dsts[i];
1407f9790aebSLuigi Rizzo 		ND("second pass %d port %d", i, d_i);
1408f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1409f9790aebSLuigi Rizzo 		// XXX fix the division
1410f9790aebSLuigi Rizzo 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1411f9790aebSLuigi Rizzo 		/* protect from the lookup function returning an inactive
1412f9790aebSLuigi Rizzo 		 * destination port
1413f9790aebSLuigi Rizzo 		 */
1414f9790aebSLuigi Rizzo 		if (unlikely(dst_na == NULL))
1415f9790aebSLuigi Rizzo 			goto cleanup;
1416f9790aebSLuigi Rizzo 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1417f9790aebSLuigi Rizzo 			goto cleanup;
1418f9790aebSLuigi Rizzo 		/*
1419f9790aebSLuigi Rizzo 		 * The interface may be in !netmap mode in two cases:
1420f9790aebSLuigi Rizzo 		 * - when na is attached but not activated yet;
1421f9790aebSLuigi Rizzo 		 * - when na is being deactivated but is still attached.
1422f9790aebSLuigi Rizzo 		 */
1423*4bf50f18SLuigi Rizzo 		if (unlikely(!nm_netmap_on(&dst_na->up))) {
1424f9790aebSLuigi Rizzo 			ND("not in netmap mode!");
1425f9790aebSLuigi Rizzo 			goto cleanup;
1426f9790aebSLuigi Rizzo 		}
1427f9790aebSLuigi Rizzo 
1428f9790aebSLuigi Rizzo 		/* there is at least one either unicast or broadcast packet */
1429f9790aebSLuigi Rizzo 		brd_next = brddst->bq_head;
1430f9790aebSLuigi Rizzo 		next = d->bq_head;
1431f9790aebSLuigi Rizzo 		/* we need to reserve this many slots. If fewer are
1432f9790aebSLuigi Rizzo 		 * available, some packets will be dropped.
1433f9790aebSLuigi Rizzo 		 * Packets may have multiple fragments, so we may not use
1434f9790aebSLuigi Rizzo 		 * there is a chance that we may not use all of the slots
1435f9790aebSLuigi Rizzo 		 * we have claimed, so we will need to handle the leftover
1436f9790aebSLuigi Rizzo 		 * ones when we regain the lock.
1437f9790aebSLuigi Rizzo 		 */
1438f9790aebSLuigi Rizzo 		needed = d->bq_len + brddst->bq_len;
1439f9790aebSLuigi Rizzo 
1440f0ea3689SLuigi Rizzo 		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
1441*4bf50f18SLuigi Rizzo 			RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len);
1442f0ea3689SLuigi Rizzo 			/* There is a virtio-net header/offloadings mismatch between
1443f0ea3689SLuigi Rizzo 			 * source and destination. The slower mismatch datapath will
1444f0ea3689SLuigi Rizzo 			 * be used to cope with all the mismatches.
1445f0ea3689SLuigi Rizzo 			 */
1446f0ea3689SLuigi Rizzo 			virt_hdr_mismatch = 1;
1447f0ea3689SLuigi Rizzo 			if (dst_na->mfs < na->mfs) {
1448f0ea3689SLuigi Rizzo 				/* We may need to do segmentation offloadings, and so
1449f0ea3689SLuigi Rizzo 				 * we may need a number of destination slots greater
1450f0ea3689SLuigi Rizzo 				 * than the number of input slots ('needed').
1451f0ea3689SLuigi Rizzo 				 * We look for the smallest integer 'x' which satisfies:
1452f0ea3689SLuigi Rizzo 				 *	needed * na->mfs + x * H <= x * na->mfs
1453f0ea3689SLuigi Rizzo 				 * where 'H' is the length of the longest header that may
1454f0ea3689SLuigi Rizzo 				 * be replicated in the segmentation process (e.g. for
1455f0ea3689SLuigi Rizzo 				 * TCPv4 we must account for ethernet header, IP header
1456f0ea3689SLuigi Rizzo 				 * and TCPv4 header).
1457f0ea3689SLuigi Rizzo 				 */
1458f0ea3689SLuigi Rizzo 				needed = (needed * na->mfs) /
1459f0ea3689SLuigi Rizzo 						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1460f0ea3689SLuigi Rizzo 				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1461f0ea3689SLuigi Rizzo 			}
1462f0ea3689SLuigi Rizzo 		}
1463f0ea3689SLuigi Rizzo 
1464f9790aebSLuigi Rizzo 		ND(5, "pass 2 dst %d is %x %s",
1465f9790aebSLuigi Rizzo 			i, d_i, is_vp ? "virtual" : "nic/host");
1466f9790aebSLuigi Rizzo 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1467f9790aebSLuigi Rizzo 		nrings = dst_na->up.num_rx_rings;
1468f9790aebSLuigi Rizzo 		if (dst_nr >= nrings)
1469f9790aebSLuigi Rizzo 			dst_nr = dst_nr % nrings;
1470f9790aebSLuigi Rizzo 		kring = &dst_na->up.rx_rings[dst_nr];
1471f9790aebSLuigi Rizzo 		ring = kring->ring;
1472f9790aebSLuigi Rizzo 		lim = kring->nkr_num_slots - 1;
1473f9790aebSLuigi Rizzo 
1474f9790aebSLuigi Rizzo retry:
1475f9790aebSLuigi Rizzo 
1476f0ea3689SLuigi Rizzo 		if (dst_na->retry && retry) {
1477f0ea3689SLuigi Rizzo 			/* try to get some free slot from the previous run */
1478f0ea3689SLuigi Rizzo 			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1479*4bf50f18SLuigi Rizzo 			/* actually useful only for bwraps, since there
1480*4bf50f18SLuigi Rizzo 			 * the notify will trigger a txsync on the hwna. VALE ports
1481*4bf50f18SLuigi Rizzo 			 * have dst_na->retry == 0
1482*4bf50f18SLuigi Rizzo 			 */
1483f0ea3689SLuigi Rizzo 		}
1484f9790aebSLuigi Rizzo 		/* reserve the buffers in the queue and an entry
1485f9790aebSLuigi Rizzo 		 * to report completion, and drop lock.
1486f9790aebSLuigi Rizzo 		 * XXX this might become a helper function.
1487f9790aebSLuigi Rizzo 		 */
1488f9790aebSLuigi Rizzo 		mtx_lock(&kring->q_lock);
1489f9790aebSLuigi Rizzo 		if (kring->nkr_stopped) {
1490f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1491f9790aebSLuigi Rizzo 			goto cleanup;
1492f9790aebSLuigi Rizzo 		}
1493f9790aebSLuigi Rizzo 		my_start = j = kring->nkr_hwlease;
1494f9790aebSLuigi Rizzo 		howmany = nm_kr_space(kring, 1);
1495f9790aebSLuigi Rizzo 		if (needed < howmany)
1496f9790aebSLuigi Rizzo 			howmany = needed;
1497f9790aebSLuigi Rizzo 		lease_idx = nm_kr_lease(kring, howmany, 1);
1498f9790aebSLuigi Rizzo 		mtx_unlock(&kring->q_lock);
1499f9790aebSLuigi Rizzo 
1500f9790aebSLuigi Rizzo 		/* only retry if we need more than available slots */
1501f9790aebSLuigi Rizzo 		if (retry && needed <= howmany)
1502f9790aebSLuigi Rizzo 			retry = 0;
1503f9790aebSLuigi Rizzo 
1504f9790aebSLuigi Rizzo 		/* copy to the destination queue */
1505f9790aebSLuigi Rizzo 		while (howmany > 0) {
1506f9790aebSLuigi Rizzo 			struct netmap_slot *slot;
1507f9790aebSLuigi Rizzo 			struct nm_bdg_fwd *ft_p, *ft_end;
1508f9790aebSLuigi Rizzo 			u_int cnt;
1509f9790aebSLuigi Rizzo 
1510f9790aebSLuigi Rizzo 			/* find the queue from which we pick next packet.
1511f9790aebSLuigi Rizzo 			 * NM_FT_NULL is always higher than valid indexes
1512f9790aebSLuigi Rizzo 			 * so we never dereference it if the other list
1513f9790aebSLuigi Rizzo 			 * has packets (and if both are empty we never
1514f9790aebSLuigi Rizzo 			 * get here).
1515f9790aebSLuigi Rizzo 			 */
1516f9790aebSLuigi Rizzo 			if (next < brd_next) {
1517f9790aebSLuigi Rizzo 				ft_p = ft + next;
1518f9790aebSLuigi Rizzo 				next = ft_p->ft_next;
1519f9790aebSLuigi Rizzo 			} else { /* insert broadcast */
1520f9790aebSLuigi Rizzo 				ft_p = ft + brd_next;
1521f9790aebSLuigi Rizzo 				brd_next = ft_p->ft_next;
1522f9790aebSLuigi Rizzo 			}
1523f9790aebSLuigi Rizzo 			cnt = ft_p->ft_frags; // cnt > 0
1524f9790aebSLuigi Rizzo 			if (unlikely(cnt > howmany))
1525f9790aebSLuigi Rizzo 			    break; /* no more space */
1526f9790aebSLuigi Rizzo 			if (netmap_verbose && cnt > 1)
1527f9790aebSLuigi Rizzo 				RD(5, "rx %d frags to %d", cnt, j);
1528f9790aebSLuigi Rizzo 			ft_end = ft_p + cnt;
1529f0ea3689SLuigi Rizzo 			if (unlikely(virt_hdr_mismatch)) {
1530f0ea3689SLuigi Rizzo 				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1531f0ea3689SLuigi Rizzo 			} else {
1532f0ea3689SLuigi Rizzo 				howmany -= cnt;
1533f9790aebSLuigi Rizzo 				do {
1534f9790aebSLuigi Rizzo 					char *dst, *src = ft_p->ft_buf;
1535f9790aebSLuigi Rizzo 					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1536f9790aebSLuigi Rizzo 
1537f9790aebSLuigi Rizzo 					slot = &ring->slot[j];
1538*4bf50f18SLuigi Rizzo 					dst = NMB(&dst_na->up, slot);
1539f9790aebSLuigi Rizzo 
154017885a7bSLuigi Rizzo 					ND("send [%d] %d(%d) bytes at %s:%d",
154117885a7bSLuigi Rizzo 							i, (int)copy_len, (int)dst_len,
154217885a7bSLuigi Rizzo 							NM_IFPNAME(dst_ifp), j);
1543f9790aebSLuigi Rizzo 					/* round to a multiple of 64 */
1544f9790aebSLuigi Rizzo 					copy_len = (copy_len + 63) & ~63;
1545f9790aebSLuigi Rizzo 
1546*4bf50f18SLuigi Rizzo 					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
1547*4bf50f18SLuigi Rizzo 						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
1548e31c6ec7SLuigi Rizzo 						RD(5, "invalid len %d, down to 64", (int)copy_len);
1549e31c6ec7SLuigi Rizzo 						copy_len = dst_len = 64; // XXX
1550e31c6ec7SLuigi Rizzo 					}
1551f9790aebSLuigi Rizzo 					if (ft_p->ft_flags & NS_INDIRECT) {
1552f9790aebSLuigi Rizzo 						if (copyin(src, dst, copy_len)) {
1553f9790aebSLuigi Rizzo 							// invalid user pointer, pretend len is 0
1554f9790aebSLuigi Rizzo 							dst_len = 0;
1555f9790aebSLuigi Rizzo 						}
1556f9790aebSLuigi Rizzo 					} else {
1557f9790aebSLuigi Rizzo 						//memcpy(dst, src, copy_len);
1558f9790aebSLuigi Rizzo 						pkt_copy(src, dst, (int)copy_len);
1559f9790aebSLuigi Rizzo 					}
1560f9790aebSLuigi Rizzo 					slot->len = dst_len;
1561f9790aebSLuigi Rizzo 					slot->flags = (cnt << 8)| NS_MOREFRAG;
1562f9790aebSLuigi Rizzo 					j = nm_next(j, lim);
1563f0ea3689SLuigi Rizzo 					needed--;
1564f9790aebSLuigi Rizzo 					ft_p++;
1565f9790aebSLuigi Rizzo 				} while (ft_p != ft_end);
1566f9790aebSLuigi Rizzo 				slot->flags = (cnt << 8); /* clear flag on last entry */
1567f0ea3689SLuigi Rizzo 			}
1568f9790aebSLuigi Rizzo 			/* are we done ? */
1569f9790aebSLuigi Rizzo 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1570f9790aebSLuigi Rizzo 				break;
1571f9790aebSLuigi Rizzo 		}
1572f9790aebSLuigi Rizzo 		{
1573f9790aebSLuigi Rizzo 		    /* current position */
1574f9790aebSLuigi Rizzo 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1575f9790aebSLuigi Rizzo 		    uint32_t update_pos;
1576f9790aebSLuigi Rizzo 		    int still_locked = 1;
1577f9790aebSLuigi Rizzo 
1578f9790aebSLuigi Rizzo 		    mtx_lock(&kring->q_lock);
1579f9790aebSLuigi Rizzo 		    if (unlikely(howmany > 0)) {
1580f9790aebSLuigi Rizzo 			/* not used all bufs. If i am the last one
1581f9790aebSLuigi Rizzo 			 * i can recover the slots, otherwise must
1582f9790aebSLuigi Rizzo 			 * fill them with 0 to mark empty packets.
1583f9790aebSLuigi Rizzo 			 */
1584f9790aebSLuigi Rizzo 			ND("leftover %d bufs", howmany);
1585f9790aebSLuigi Rizzo 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1586f9790aebSLuigi Rizzo 			    /* yes i am the last one */
1587f9790aebSLuigi Rizzo 			    ND("roll back nkr_hwlease to %d", j);
1588f9790aebSLuigi Rizzo 			    kring->nkr_hwlease = j;
1589f9790aebSLuigi Rizzo 			} else {
1590f9790aebSLuigi Rizzo 			    while (howmany-- > 0) {
1591f9790aebSLuigi Rizzo 				ring->slot[j].len = 0;
1592f9790aebSLuigi Rizzo 				ring->slot[j].flags = 0;
1593f9790aebSLuigi Rizzo 				j = nm_next(j, lim);
1594f9790aebSLuigi Rizzo 			    }
1595f9790aebSLuigi Rizzo 			}
1596f9790aebSLuigi Rizzo 		    }
1597f9790aebSLuigi Rizzo 		    p[lease_idx] = j; /* report I am done */
1598f9790aebSLuigi Rizzo 
159917885a7bSLuigi Rizzo 		    update_pos = kring->nr_hwtail;
1600f9790aebSLuigi Rizzo 
1601f9790aebSLuigi Rizzo 		    if (my_start == update_pos) {
1602f9790aebSLuigi Rizzo 			/* all slots before my_start have been reported,
1603f9790aebSLuigi Rizzo 			 * so scan subsequent leases to see if other ranges
1604f9790aebSLuigi Rizzo 			 * have been completed, and to a selwakeup or txsync.
1605f9790aebSLuigi Rizzo 		         */
1606f9790aebSLuigi Rizzo 			while (lease_idx != kring->nkr_lease_idx &&
1607f9790aebSLuigi Rizzo 				p[lease_idx] != NR_NOSLOT) {
1608f9790aebSLuigi Rizzo 			    j = p[lease_idx];
1609f9790aebSLuigi Rizzo 			    p[lease_idx] = NR_NOSLOT;
1610f9790aebSLuigi Rizzo 			    lease_idx = nm_next(lease_idx, lim);
1611f9790aebSLuigi Rizzo 			}
1612f9790aebSLuigi Rizzo 			/* j is the new 'write' position. j != my_start
1613f9790aebSLuigi Rizzo 			 * means there are new buffers to report
1614f9790aebSLuigi Rizzo 			 */
1615f9790aebSLuigi Rizzo 			if (likely(j != my_start)) {
161617885a7bSLuigi Rizzo 				kring->nr_hwtail = j;
1617f9790aebSLuigi Rizzo 				still_locked = 0;
1618f9790aebSLuigi Rizzo 				mtx_unlock(&kring->q_lock);
1619f0ea3689SLuigi Rizzo 				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1620*4bf50f18SLuigi Rizzo 				/* this is netmap_notify for VALE ports and
1621*4bf50f18SLuigi Rizzo 				 * netmap_bwrap_notify for bwrap. The latter will
1622*4bf50f18SLuigi Rizzo 				 * trigger a txsync on the underlying hwna
1623*4bf50f18SLuigi Rizzo 				 */
1624*4bf50f18SLuigi Rizzo 				if (dst_na->retry && retry--) {
1625*4bf50f18SLuigi Rizzo 					/* XXX this is going to call nm_notify again.
1626*4bf50f18SLuigi Rizzo 					 * Only useful for bwrap in virtual machines
1627*4bf50f18SLuigi Rizzo 					 */
1628f9790aebSLuigi Rizzo 					goto retry;
1629f9790aebSLuigi Rizzo 				}
1630f9790aebSLuigi Rizzo 			}
1631*4bf50f18SLuigi Rizzo 		    }
1632f9790aebSLuigi Rizzo 		    if (still_locked)
1633f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1634f9790aebSLuigi Rizzo 		}
1635f9790aebSLuigi Rizzo cleanup:
1636f9790aebSLuigi Rizzo 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1637f9790aebSLuigi Rizzo 		d->bq_len = 0;
1638f9790aebSLuigi Rizzo 	}
1639f9790aebSLuigi Rizzo 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1640f9790aebSLuigi Rizzo 	brddst->bq_len = 0;
1641f9790aebSLuigi Rizzo 	return 0;
1642f9790aebSLuigi Rizzo }
1643f9790aebSLuigi Rizzo 
1644*4bf50f18SLuigi Rizzo /* nm_txsync callback for VALE ports */
1645f9790aebSLuigi Rizzo static int
1646*4bf50f18SLuigi Rizzo netmap_vp_txsync(struct netmap_kring *kring, int flags)
1647f9790aebSLuigi Rizzo {
1648*4bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
1649*4bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter *)kring->na;
165017885a7bSLuigi Rizzo 	u_int done;
165117885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
165217885a7bSLuigi Rizzo 	u_int const cur = kring->rcur;
1653f9790aebSLuigi Rizzo 
1654f9790aebSLuigi Rizzo 	if (bridge_batch <= 0) { /* testing only */
165517885a7bSLuigi Rizzo 		done = cur; // used all
1656f9790aebSLuigi Rizzo 		goto done;
1657f9790aebSLuigi Rizzo 	}
1658*4bf50f18SLuigi Rizzo 	if (!na->na_bdg) {
1659*4bf50f18SLuigi Rizzo 		done = cur;
1660*4bf50f18SLuigi Rizzo 		goto done;
1661*4bf50f18SLuigi Rizzo 	}
1662f9790aebSLuigi Rizzo 	if (bridge_batch > NM_BDG_BATCH)
1663f9790aebSLuigi Rizzo 		bridge_batch = NM_BDG_BATCH;
1664f9790aebSLuigi Rizzo 
1665*4bf50f18SLuigi Rizzo 	done = nm_bdg_preflush(kring, cur);
1666f9790aebSLuigi Rizzo done:
166717885a7bSLuigi Rizzo 	if (done != cur)
166817885a7bSLuigi Rizzo 		D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
166917885a7bSLuigi Rizzo 	/*
167017885a7bSLuigi Rizzo 	 * packets between 'done' and 'cur' are left unsent.
167117885a7bSLuigi Rizzo 	 */
167217885a7bSLuigi Rizzo 	kring->nr_hwcur = done;
167317885a7bSLuigi Rizzo 	kring->nr_hwtail = nm_prev(done, lim);
167417885a7bSLuigi Rizzo 	nm_txsync_finalize(kring);
1675f9790aebSLuigi Rizzo 	if (netmap_verbose)
1676*4bf50f18SLuigi Rizzo 		D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
1677f9790aebSLuigi Rizzo 	return 0;
1678f9790aebSLuigi Rizzo }
1679f9790aebSLuigi Rizzo 
1680f9790aebSLuigi Rizzo 
1681*4bf50f18SLuigi Rizzo /* rxsync code used by VALE ports nm_rxsync callback and also
1682*4bf50f18SLuigi Rizzo  * internally by the brwap
1683f9790aebSLuigi Rizzo  */
1684f9790aebSLuigi Rizzo static int
1685*4bf50f18SLuigi Rizzo netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
1686f9790aebSLuigi Rizzo {
1687*4bf50f18SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
168817885a7bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
168917885a7bSLuigi Rizzo 	u_int nm_i, lim = kring->nkr_num_slots - 1;
169017885a7bSLuigi Rizzo 	u_int head = nm_rxsync_prologue(kring);
169117885a7bSLuigi Rizzo 	int n;
169217885a7bSLuigi Rizzo 
169317885a7bSLuigi Rizzo 	if (head > lim) {
169417885a7bSLuigi Rizzo 		D("ouch dangerous reset!!!");
169517885a7bSLuigi Rizzo 		n = netmap_ring_reinit(kring);
169617885a7bSLuigi Rizzo 		goto done;
169717885a7bSLuigi Rizzo 	}
169817885a7bSLuigi Rizzo 
169917885a7bSLuigi Rizzo 	/* First part, import newly received packets. */
170017885a7bSLuigi Rizzo 	/* actually nothing to do here, they are already in the kring */
170117885a7bSLuigi Rizzo 
170217885a7bSLuigi Rizzo 	/* Second part, skip past packets that userspace has released. */
170317885a7bSLuigi Rizzo 	nm_i = kring->nr_hwcur;
170417885a7bSLuigi Rizzo 	if (nm_i != head) {
170517885a7bSLuigi Rizzo 		/* consistency check, but nothing really important here */
170617885a7bSLuigi Rizzo 		for (n = 0; likely(nm_i != head); n++) {
170717885a7bSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[nm_i];
1708*4bf50f18SLuigi Rizzo 			void *addr = NMB(na, slot);
170917885a7bSLuigi Rizzo 
1710*4bf50f18SLuigi Rizzo 			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
171117885a7bSLuigi Rizzo 				D("bad buffer index %d, ignore ?",
171217885a7bSLuigi Rizzo 					slot->buf_idx);
171317885a7bSLuigi Rizzo 			}
171417885a7bSLuigi Rizzo 			slot->flags &= ~NS_BUF_CHANGED;
171517885a7bSLuigi Rizzo 			nm_i = nm_next(nm_i, lim);
171617885a7bSLuigi Rizzo 		}
171717885a7bSLuigi Rizzo 		kring->nr_hwcur = head;
171817885a7bSLuigi Rizzo 	}
171917885a7bSLuigi Rizzo 
172017885a7bSLuigi Rizzo 	/* tell userspace that there are new packets */
172117885a7bSLuigi Rizzo 	nm_rxsync_finalize(kring);
172217885a7bSLuigi Rizzo 	n = 0;
172317885a7bSLuigi Rizzo done:
172417885a7bSLuigi Rizzo 	return n;
172517885a7bSLuigi Rizzo }
1726f9790aebSLuigi Rizzo 
1727f9790aebSLuigi Rizzo /*
1728*4bf50f18SLuigi Rizzo  * nm_rxsync callback for VALE ports
1729f9790aebSLuigi Rizzo  * user process reading from a VALE switch.
1730f9790aebSLuigi Rizzo  * Already protected against concurrent calls from userspace,
1731f9790aebSLuigi Rizzo  * but we must acquire the queue's lock to protect against
1732f9790aebSLuigi Rizzo  * writers on the same queue.
1733f9790aebSLuigi Rizzo  */
1734f9790aebSLuigi Rizzo static int
1735*4bf50f18SLuigi Rizzo netmap_vp_rxsync(struct netmap_kring *kring, int flags)
1736f9790aebSLuigi Rizzo {
1737f9790aebSLuigi Rizzo 	int n;
1738f9790aebSLuigi Rizzo 
1739f9790aebSLuigi Rizzo 	mtx_lock(&kring->q_lock);
1740*4bf50f18SLuigi Rizzo 	n = netmap_vp_rxsync_locked(kring, flags);
1741f9790aebSLuigi Rizzo 	mtx_unlock(&kring->q_lock);
1742f9790aebSLuigi Rizzo 	return n;
1743f9790aebSLuigi Rizzo }
1744f9790aebSLuigi Rizzo 
174517885a7bSLuigi Rizzo 
1746*4bf50f18SLuigi Rizzo /* nm_bdg_attach callback for VALE ports
1747*4bf50f18SLuigi Rizzo  * The na_vp port is this same netmap_adapter. There is no host port.
1748*4bf50f18SLuigi Rizzo  */
1749f9790aebSLuigi Rizzo static int
1750*4bf50f18SLuigi Rizzo netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
1751*4bf50f18SLuigi Rizzo {
1752*4bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
1753*4bf50f18SLuigi Rizzo 
1754*4bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
1755*4bf50f18SLuigi Rizzo 		return EBUSY;
1756*4bf50f18SLuigi Rizzo 	na->na_vp = vpna;
1757*4bf50f18SLuigi Rizzo 	strncpy(na->name, name, sizeof(na->name));
1758*4bf50f18SLuigi Rizzo 	na->na_hostvp = NULL;
1759*4bf50f18SLuigi Rizzo 	return 0;
1760*4bf50f18SLuigi Rizzo }
1761*4bf50f18SLuigi Rizzo 
1762*4bf50f18SLuigi Rizzo /* create a netmap_vp_adapter that describes a VALE port.
1763*4bf50f18SLuigi Rizzo  * Only persistent VALE ports have a non-null ifp.
1764*4bf50f18SLuigi Rizzo  */
1765*4bf50f18SLuigi Rizzo static int
1766*4bf50f18SLuigi Rizzo netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
1767f9790aebSLuigi Rizzo {
1768f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
1769f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
1770f9790aebSLuigi Rizzo 	int error;
1771f0ea3689SLuigi Rizzo 	u_int npipes = 0;
1772f9790aebSLuigi Rizzo 
1773f9790aebSLuigi Rizzo 	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1774f9790aebSLuigi Rizzo 	if (vpna == NULL)
1775f9790aebSLuigi Rizzo 		return ENOMEM;
1776f9790aebSLuigi Rizzo 
1777f9790aebSLuigi Rizzo  	na = &vpna->up;
1778f9790aebSLuigi Rizzo 
1779f9790aebSLuigi Rizzo 	na->ifp = ifp;
1780*4bf50f18SLuigi Rizzo 	strncpy(na->name, nmr->nr_name, sizeof(na->name));
1781f9790aebSLuigi Rizzo 
1782f9790aebSLuigi Rizzo 	/* bound checking */
1783f9790aebSLuigi Rizzo 	na->num_tx_rings = nmr->nr_tx_rings;
1784f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1785f9790aebSLuigi Rizzo 	nmr->nr_tx_rings = na->num_tx_rings; // write back
1786f9790aebSLuigi Rizzo 	na->num_rx_rings = nmr->nr_rx_rings;
1787f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1788f9790aebSLuigi Rizzo 	nmr->nr_rx_rings = na->num_rx_rings; // write back
1789f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1790f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
1791f9790aebSLuigi Rizzo 	na->num_tx_desc = nmr->nr_tx_slots;
1792f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1793f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
1794f0ea3689SLuigi Rizzo 	/* validate number of pipes. We want at least 1,
1795f0ea3689SLuigi Rizzo 	 * but probably can do with some more.
1796f0ea3689SLuigi Rizzo 	 * So let's use 2 as default (when 0 is supplied)
1797f0ea3689SLuigi Rizzo 	 */
1798f0ea3689SLuigi Rizzo 	npipes = nmr->nr_arg1;
1799f0ea3689SLuigi Rizzo 	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1800f0ea3689SLuigi Rizzo 	nmr->nr_arg1 = npipes;	/* write back */
1801f0ea3689SLuigi Rizzo 	/* validate extra bufs */
1802f0ea3689SLuigi Rizzo 	nm_bound_var(&nmr->nr_arg3, 0, 0,
1803f0ea3689SLuigi Rizzo 			128*NM_BDG_MAXSLOTS, NULL);
1804f9790aebSLuigi Rizzo 	na->num_rx_desc = nmr->nr_rx_slots;
1805f0ea3689SLuigi Rizzo 	vpna->virt_hdr_len = 0;
1806f0ea3689SLuigi Rizzo 	vpna->mfs = 1514;
1807f0ea3689SLuigi Rizzo 	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1808f0ea3689SLuigi Rizzo 		vpna->mfs = netmap_buf_size; */
1809f0ea3689SLuigi Rizzo         if (netmap_verbose)
1810f0ea3689SLuigi Rizzo 		D("max frame size %u", vpna->mfs);
1811f9790aebSLuigi Rizzo 
1812f9790aebSLuigi Rizzo 	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1813*4bf50f18SLuigi Rizzo 	na->nm_txsync = netmap_vp_txsync;
1814*4bf50f18SLuigi Rizzo 	na->nm_rxsync = netmap_vp_rxsync;
1815*4bf50f18SLuigi Rizzo 	na->nm_register = netmap_vp_reg;
1816f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_vp_krings_create;
1817f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_vp_krings_delete;
1818*4bf50f18SLuigi Rizzo 	na->nm_dtor = netmap_vp_dtor;
1819*4bf50f18SLuigi Rizzo 	na->nm_mem = netmap_mem_private_new(na->name,
1820f9790aebSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
1821f0ea3689SLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc,
1822f0ea3689SLuigi Rizzo 			nmr->nr_arg3, npipes, &error);
1823f0ea3689SLuigi Rizzo 	if (na->nm_mem == NULL)
1824f0ea3689SLuigi Rizzo 		goto err;
1825*4bf50f18SLuigi Rizzo 	na->nm_bdg_attach = netmap_vp_bdg_attach;
1826f9790aebSLuigi Rizzo 	/* other nmd fields are set in the common routine */
1827f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
1828f0ea3689SLuigi Rizzo 	if (error)
1829f0ea3689SLuigi Rizzo 		goto err;
1830*4bf50f18SLuigi Rizzo 	*ret = vpna;
1831f0ea3689SLuigi Rizzo 	return 0;
1832f0ea3689SLuigi Rizzo 
1833f0ea3689SLuigi Rizzo err:
1834f0ea3689SLuigi Rizzo 	if (na->nm_mem != NULL)
1835f0ea3689SLuigi Rizzo 		netmap_mem_private_delete(na->nm_mem);
1836f9790aebSLuigi Rizzo 	free(vpna, M_DEVBUF);
1837f9790aebSLuigi Rizzo 	return error;
1838f9790aebSLuigi Rizzo }
1839f9790aebSLuigi Rizzo 
1840*4bf50f18SLuigi Rizzo /* Bridge wrapper code (bwrap).
1841*4bf50f18SLuigi Rizzo  * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1842*4bf50f18SLuigi Rizzo  * VALE switch.
1843*4bf50f18SLuigi Rizzo  * The main task is to swap the meaning of tx and rx rings to match the
1844*4bf50f18SLuigi Rizzo  * expectations of the VALE switch code (see nm_bdg_flush).
1845*4bf50f18SLuigi Rizzo  *
1846*4bf50f18SLuigi Rizzo  * The bwrap works by interposing a netmap_bwrap_adapter between the
1847*4bf50f18SLuigi Rizzo  * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1848*4bf50f18SLuigi Rizzo  * a netmap_vp_adapter to the rest the system, but, internally, it
1849*4bf50f18SLuigi Rizzo  * translates all callbacks to what the hwna expects.
1850*4bf50f18SLuigi Rizzo  *
1851*4bf50f18SLuigi Rizzo  * Note that we have to intercept callbacks coming from two sides:
1852*4bf50f18SLuigi Rizzo  *
1853*4bf50f18SLuigi Rizzo  *  - callbacks coming from the netmap module are intercepted by
1854*4bf50f18SLuigi Rizzo  *    passing around the netmap_bwrap_adapter instead of the hwna
1855*4bf50f18SLuigi Rizzo  *
1856*4bf50f18SLuigi Rizzo  *  - callbacks coming from outside of the netmap module only know
1857*4bf50f18SLuigi Rizzo  *    about the hwna. This, however, only happens in interrupt
1858*4bf50f18SLuigi Rizzo  *    handlers, where only the hwna->nm_notify callback is called.
1859*4bf50f18SLuigi Rizzo  *    What the bwrap does is to overwrite the hwna->nm_notify callback
1860*4bf50f18SLuigi Rizzo  *    with its own netmap_bwrap_intr_notify.
1861*4bf50f18SLuigi Rizzo  *    XXX This assumes that the hwna->nm_notify callback was the
1862*4bf50f18SLuigi Rizzo  *    standard netmap_notify(), as it is the case for nic adapters.
1863*4bf50f18SLuigi Rizzo  *    Any additional action performed by hwna->nm_notify will not be
1864*4bf50f18SLuigi Rizzo  *    performed by netmap_bwrap_intr_notify.
1865*4bf50f18SLuigi Rizzo  *
1866*4bf50f18SLuigi Rizzo  * Additionally, the bwrap can optionally attach the host rings pair
1867*4bf50f18SLuigi Rizzo  * of the wrapped adapter to a different port of the switch.
1868*4bf50f18SLuigi Rizzo  */
1869*4bf50f18SLuigi Rizzo 
187017885a7bSLuigi Rizzo 
1871f9790aebSLuigi Rizzo static void
1872f9790aebSLuigi Rizzo netmap_bwrap_dtor(struct netmap_adapter *na)
1873f9790aebSLuigi Rizzo {
1874f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1875f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1876f9790aebSLuigi Rizzo 
1877f9790aebSLuigi Rizzo 	ND("na %p", na);
1878*4bf50f18SLuigi Rizzo 	/* drop reference to hwna->ifp.
1879*4bf50f18SLuigi Rizzo 	 * If we don't do this, netmap_detach_common(na)
1880*4bf50f18SLuigi Rizzo 	 * will think it has set NA(na->ifp) to NULL
1881*4bf50f18SLuigi Rizzo 	 */
1882f9790aebSLuigi Rizzo 	na->ifp = NULL;
1883*4bf50f18SLuigi Rizzo 	/* for safety, also drop the possible reference
1884*4bf50f18SLuigi Rizzo 	 * in the hostna
1885*4bf50f18SLuigi Rizzo 	 */
1886*4bf50f18SLuigi Rizzo 	bna->host.up.ifp = NULL;
1887*4bf50f18SLuigi Rizzo 
1888*4bf50f18SLuigi Rizzo 	hwna->nm_mem = bna->save_nmd;
1889*4bf50f18SLuigi Rizzo 	hwna->na_private = NULL;
1890*4bf50f18SLuigi Rizzo 	hwna->na_vp = hwna->na_hostvp = NULL;
1891*4bf50f18SLuigi Rizzo 	hwna->na_flags &= ~NAF_BUSY;
1892*4bf50f18SLuigi Rizzo 	netmap_adapter_put(hwna);
1893f9790aebSLuigi Rizzo 
1894f9790aebSLuigi Rizzo }
1895f9790aebSLuigi Rizzo 
189617885a7bSLuigi Rizzo 
1897f9790aebSLuigi Rizzo /*
189817885a7bSLuigi Rizzo  * Intr callback for NICs connected to a bridge.
189917885a7bSLuigi Rizzo  * Simply ignore tx interrupts (maybe we could try to recover space ?)
190017885a7bSLuigi Rizzo  * and pass received packets from nic to the bridge.
190117885a7bSLuigi Rizzo  *
1902f9790aebSLuigi Rizzo  * XXX TODO check locking: this is called from the interrupt
1903f9790aebSLuigi Rizzo  * handler so we should make sure that the interface is not
1904f9790aebSLuigi Rizzo  * disconnected while passing down an interrupt.
1905f9790aebSLuigi Rizzo  *
190617885a7bSLuigi Rizzo  * Note, no user process can access this NIC or the host stack.
190717885a7bSLuigi Rizzo  * The only part of the ring that is significant are the slots,
190817885a7bSLuigi Rizzo  * and head/cur/tail are set from the kring as needed
190917885a7bSLuigi Rizzo  * (part as a receive ring, part as a transmit ring).
191017885a7bSLuigi Rizzo  *
191117885a7bSLuigi Rizzo  * callback that overwrites the hwna notify callback.
1912f9790aebSLuigi Rizzo  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1913f9790aebSLuigi Rizzo  * The bridge wrapper then sends the packets through the bridge.
1914f9790aebSLuigi Rizzo  */
1915f9790aebSLuigi Rizzo static int
1916f9790aebSLuigi Rizzo netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1917f9790aebSLuigi Rizzo {
1918f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
1919f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *hostna = &bna->host;
1920f9790aebSLuigi Rizzo 	struct netmap_kring *kring, *bkring;
1921f9790aebSLuigi Rizzo 	struct netmap_ring *ring;
1922f9790aebSLuigi Rizzo 	int is_host_ring = ring_nr == na->num_rx_rings;
1923f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = &bna->up;
1924f9790aebSLuigi Rizzo 	int error = 0;
1925f9790aebSLuigi Rizzo 
192617885a7bSLuigi Rizzo 	if (netmap_verbose)
1927*4bf50f18SLuigi Rizzo 	    D("%s %s%d 0x%x", na->name,
192817885a7bSLuigi Rizzo 		(tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
1929f9790aebSLuigi Rizzo 
1930f9790aebSLuigi Rizzo 	if (flags & NAF_DISABLE_NOTIFY) {
1931*4bf50f18SLuigi Rizzo 		/* the enabled/disabled state of the ring has changed,
1932*4bf50f18SLuigi Rizzo 		 * propagate the info to the wrapper (with tx/rx swapped)
1933*4bf50f18SLuigi Rizzo 		 */
1934*4bf50f18SLuigi Rizzo 		if (tx == NR_TX) {
1935*4bf50f18SLuigi Rizzo 			netmap_set_rxring(&vpna->up, ring_nr,
1936*4bf50f18SLuigi Rizzo 					na->tx_rings[ring_nr].nkr_stopped);
1937*4bf50f18SLuigi Rizzo 		} else {
1938*4bf50f18SLuigi Rizzo 			netmap_set_txring(&vpna->up, ring_nr,
1939*4bf50f18SLuigi Rizzo 					na->rx_rings[ring_nr].nkr_stopped);
1940*4bf50f18SLuigi Rizzo 		}
1941f9790aebSLuigi Rizzo 		return 0;
1942f9790aebSLuigi Rizzo 	}
1943f9790aebSLuigi Rizzo 
1944*4bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
1945f9790aebSLuigi Rizzo 		return 0;
1946f9790aebSLuigi Rizzo 
194717885a7bSLuigi Rizzo 	/* we only care about receive interrupts */
1948f9790aebSLuigi Rizzo 	if (tx == NR_TX)
1949f9790aebSLuigi Rizzo 		return 0;
1950f9790aebSLuigi Rizzo 
1951f9790aebSLuigi Rizzo 	kring = &na->rx_rings[ring_nr];
1952f9790aebSLuigi Rizzo 	ring = kring->ring;
1953f9790aebSLuigi Rizzo 
1954f9790aebSLuigi Rizzo 	/* make sure the ring is not disabled */
1955f9790aebSLuigi Rizzo 	if (nm_kr_tryget(kring))
1956f9790aebSLuigi Rizzo 		return 0;
1957f9790aebSLuigi Rizzo 
1958f9790aebSLuigi Rizzo 	if (is_host_ring && hostna->na_bdg == NULL) {
1959f9790aebSLuigi Rizzo 		error = bna->save_notify(na, ring_nr, tx, flags);
1960f9790aebSLuigi Rizzo 		goto put_out;
1961f9790aebSLuigi Rizzo 	}
1962f9790aebSLuigi Rizzo 
196317885a7bSLuigi Rizzo 	/* Here we expect ring->head = ring->cur = ring->tail
196417885a7bSLuigi Rizzo 	 * because everything has been released from the previous round.
196517885a7bSLuigi Rizzo 	 * However the ring is shared and we might have info from
196617885a7bSLuigi Rizzo 	 * the wrong side (the tx ring). Hence we overwrite with
196717885a7bSLuigi Rizzo 	 * the info from the rx kring.
196817885a7bSLuigi Rizzo 	 */
196917885a7bSLuigi Rizzo 	if (netmap_verbose)
1970*4bf50f18SLuigi Rizzo 	    D("%s head %d cur %d tail %d (kring %d %d %d)",  na->name,
197117885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
197217885a7bSLuigi Rizzo 		kring->rhead, kring->rcur, kring->rtail);
197317885a7bSLuigi Rizzo 
197417885a7bSLuigi Rizzo 	ring->head = kring->rhead;
197517885a7bSLuigi Rizzo 	ring->cur = kring->rcur;
197617885a7bSLuigi Rizzo 	ring->tail = kring->rtail;
197717885a7bSLuigi Rizzo 
1978f9790aebSLuigi Rizzo 	if (is_host_ring) {
1979f9790aebSLuigi Rizzo 		vpna = hostna;
1980f9790aebSLuigi Rizzo 		ring_nr = 0;
1981f0ea3689SLuigi Rizzo 	}
1982f0ea3689SLuigi Rizzo 	/* simulate a user wakeup on the rx ring */
1983f9790aebSLuigi Rizzo 	/* fetch packets that have arrived.
1984f9790aebSLuigi Rizzo 	 * XXX maybe do this in a loop ?
1985f9790aebSLuigi Rizzo 	 */
1986f0ea3689SLuigi Rizzo 	error = kring->nm_sync(kring, 0);
1987f9790aebSLuigi Rizzo 	if (error)
1988f9790aebSLuigi Rizzo 		goto put_out;
198917885a7bSLuigi Rizzo 	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
1990f9790aebSLuigi Rizzo 		D("how strange, interrupt with no packets on %s",
1991*4bf50f18SLuigi Rizzo 			na->name);
1992f9790aebSLuigi Rizzo 		goto put_out;
1993f9790aebSLuigi Rizzo 	}
199417885a7bSLuigi Rizzo 
199517885a7bSLuigi Rizzo 	/* new packets are ring->cur to ring->tail, and the bkring
199617885a7bSLuigi Rizzo 	 * had hwcur == ring->cur. So advance ring->cur to ring->tail
199717885a7bSLuigi Rizzo 	 * to push all packets out.
199817885a7bSLuigi Rizzo 	 */
199917885a7bSLuigi Rizzo 	ring->head = ring->cur = ring->tail;
200017885a7bSLuigi Rizzo 
200117885a7bSLuigi Rizzo 	/* also set tail to what the bwrap expects */
200217885a7bSLuigi Rizzo 	bkring = &vpna->up.tx_rings[ring_nr];
200317885a7bSLuigi Rizzo 	ring->tail = bkring->nr_hwtail; // rtail too ?
200417885a7bSLuigi Rizzo 
200517885a7bSLuigi Rizzo 	/* pass packets to the switch */
200617885a7bSLuigi Rizzo 	nm_txsync_prologue(bkring); // XXX error checking ?
2007*4bf50f18SLuigi Rizzo 	netmap_vp_txsync(bkring, flags);
2008f9790aebSLuigi Rizzo 
200917885a7bSLuigi Rizzo 	/* mark all buffers as released on this ring */
201017885a7bSLuigi Rizzo 	ring->head = ring->cur = kring->nr_hwtail;
201117885a7bSLuigi Rizzo 	ring->tail = kring->rtail;
201217885a7bSLuigi Rizzo 	/* another call to actually release the buffers */
201317885a7bSLuigi Rizzo 	if (!is_host_ring) {
2014f0ea3689SLuigi Rizzo 		error = kring->nm_sync(kring, 0);
201517885a7bSLuigi Rizzo 	} else {
201617885a7bSLuigi Rizzo 		/* mark all packets as released, as in the
201717885a7bSLuigi Rizzo 		 * second part of netmap_rxsync_from_host()
201817885a7bSLuigi Rizzo 		 */
201917885a7bSLuigi Rizzo 		kring->nr_hwcur = kring->nr_hwtail;
202017885a7bSLuigi Rizzo 		nm_rxsync_finalize(kring);
202117885a7bSLuigi Rizzo 	}
2022f9790aebSLuigi Rizzo 
2023f9790aebSLuigi Rizzo put_out:
2024f9790aebSLuigi Rizzo 	nm_kr_put(kring);
2025f9790aebSLuigi Rizzo 	return error;
2026f9790aebSLuigi Rizzo }
2027f9790aebSLuigi Rizzo 
202817885a7bSLuigi Rizzo 
2029*4bf50f18SLuigi Rizzo /* nm_register callback for bwrap */
2030f9790aebSLuigi Rizzo static int
2031f9790aebSLuigi Rizzo netmap_bwrap_register(struct netmap_adapter *na, int onoff)
2032f9790aebSLuigi Rizzo {
2033f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2034f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2035f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2036f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *hostna = &bna->host;
2037f9790aebSLuigi Rizzo 	int error;
2038f9790aebSLuigi Rizzo 
2039*4bf50f18SLuigi Rizzo 	ND("%s %s", na->name, onoff ? "on" : "off");
2040f9790aebSLuigi Rizzo 
2041f9790aebSLuigi Rizzo 	if (onoff) {
2042f9790aebSLuigi Rizzo 		int i;
2043f9790aebSLuigi Rizzo 
2044*4bf50f18SLuigi Rizzo 		/* netmap_do_regif has been called on the bwrap na.
2045*4bf50f18SLuigi Rizzo 		 * We need to pass the information about the
2046*4bf50f18SLuigi Rizzo 		 * memory allocator down to the hwna before
2047*4bf50f18SLuigi Rizzo 		 * putting it in netmap mode
2048*4bf50f18SLuigi Rizzo 		 */
2049f9790aebSLuigi Rizzo 		hwna->na_lut = na->na_lut;
2050f9790aebSLuigi Rizzo 		hwna->na_lut_objtotal = na->na_lut_objtotal;
2051*4bf50f18SLuigi Rizzo 		hwna->na_lut_objsize = na->na_lut_objsize;
2052f9790aebSLuigi Rizzo 
2053f9790aebSLuigi Rizzo 		if (hostna->na_bdg) {
2054*4bf50f18SLuigi Rizzo 			/* if the host rings have been attached to switch,
2055*4bf50f18SLuigi Rizzo 			 * we need to copy the memory allocator information
2056*4bf50f18SLuigi Rizzo 			 * in the hostna also
2057*4bf50f18SLuigi Rizzo 			 */
2058f9790aebSLuigi Rizzo 			hostna->up.na_lut = na->na_lut;
2059f9790aebSLuigi Rizzo 			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
2060*4bf50f18SLuigi Rizzo 			hostna->up.na_lut_objsize = na->na_lut_objsize;
2061f9790aebSLuigi Rizzo 		}
2062f9790aebSLuigi Rizzo 
20630c7ba37eSLuigi Rizzo 		/* cross-link the netmap rings
20640c7ba37eSLuigi Rizzo 		 * The original number of rings comes from hwna,
20650c7ba37eSLuigi Rizzo 		 * rx rings on one side equals tx rings on the other.
2066*4bf50f18SLuigi Rizzo 		 * We need to do this now, after the initialization
2067*4bf50f18SLuigi Rizzo 		 * of the kring->ring pointers
20680c7ba37eSLuigi Rizzo 		 */
2069f0ea3689SLuigi Rizzo 		for (i = 0; i < na->num_rx_rings + 1; i++) {
2070f9790aebSLuigi Rizzo 			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
2071f9790aebSLuigi Rizzo 			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
2072f9790aebSLuigi Rizzo 		}
2073f0ea3689SLuigi Rizzo 		for (i = 0; i < na->num_tx_rings + 1; i++) {
2074f9790aebSLuigi Rizzo 			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
2075f9790aebSLuigi Rizzo 			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
2076f9790aebSLuigi Rizzo 		}
2077f9790aebSLuigi Rizzo 	}
2078f9790aebSLuigi Rizzo 
2079*4bf50f18SLuigi Rizzo 	/* forward the request to the hwna */
2080f9790aebSLuigi Rizzo 	error = hwna->nm_register(hwna, onoff);
2081f9790aebSLuigi Rizzo 	if (error)
2082f9790aebSLuigi Rizzo 		return error;
2083f9790aebSLuigi Rizzo 
2084*4bf50f18SLuigi Rizzo 	/* impersonate a netmap_vp_adapter */
2085*4bf50f18SLuigi Rizzo 	netmap_vp_reg(na, onoff);
2086*4bf50f18SLuigi Rizzo 	if (hostna->na_bdg)
2087*4bf50f18SLuigi Rizzo 		netmap_vp_reg(&hostna->up, onoff);
2088f9790aebSLuigi Rizzo 
2089f9790aebSLuigi Rizzo 	if (onoff) {
2090*4bf50f18SLuigi Rizzo 		/* intercept the hwna nm_nofify callback */
2091f9790aebSLuigi Rizzo 		bna->save_notify = hwna->nm_notify;
2092f9790aebSLuigi Rizzo 		hwna->nm_notify = netmap_bwrap_intr_notify;
2093f9790aebSLuigi Rizzo 	} else {
2094f9790aebSLuigi Rizzo 		hwna->nm_notify = bna->save_notify;
2095f9790aebSLuigi Rizzo 		hwna->na_lut = NULL;
2096f9790aebSLuigi Rizzo 		hwna->na_lut_objtotal = 0;
2097*4bf50f18SLuigi Rizzo 		hwna->na_lut_objsize = 0;
2098f9790aebSLuigi Rizzo 	}
2099f9790aebSLuigi Rizzo 
2100f9790aebSLuigi Rizzo 	return 0;
2101f9790aebSLuigi Rizzo }
2102f9790aebSLuigi Rizzo 
2103*4bf50f18SLuigi Rizzo /* nm_config callback for bwrap */
2104f9790aebSLuigi Rizzo static int
2105f9790aebSLuigi Rizzo netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
2106f9790aebSLuigi Rizzo 				    u_int *rxr, u_int *rxd)
2107f9790aebSLuigi Rizzo {
2108f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2109f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2110f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2111f9790aebSLuigi Rizzo 
2112f9790aebSLuigi Rizzo 	/* forward the request */
2113f9790aebSLuigi Rizzo 	netmap_update_config(hwna);
2114f9790aebSLuigi Rizzo 	/* swap the results */
2115f9790aebSLuigi Rizzo 	*txr = hwna->num_rx_rings;
2116f9790aebSLuigi Rizzo 	*txd = hwna->num_rx_desc;
2117f9790aebSLuigi Rizzo 	*rxr = hwna->num_tx_rings;
2118f9790aebSLuigi Rizzo 	*rxd = hwna->num_rx_desc;
2119f9790aebSLuigi Rizzo 
2120f9790aebSLuigi Rizzo 	return 0;
2121f9790aebSLuigi Rizzo }
2122f9790aebSLuigi Rizzo 
212317885a7bSLuigi Rizzo 
2124*4bf50f18SLuigi Rizzo /* nm_krings_create callback for bwrap */
2125f9790aebSLuigi Rizzo static int
2126f9790aebSLuigi Rizzo netmap_bwrap_krings_create(struct netmap_adapter *na)
2127f9790aebSLuigi Rizzo {
2128f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2129f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2130f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2131f9790aebSLuigi Rizzo 	struct netmap_adapter *hostna = &bna->host.up;
2132f9790aebSLuigi Rizzo 	int error;
2133f9790aebSLuigi Rizzo 
2134*4bf50f18SLuigi Rizzo 	ND("%s", na->name);
2135f9790aebSLuigi Rizzo 
2136*4bf50f18SLuigi Rizzo 	/* impersonate a netmap_vp_adapter */
2137f9790aebSLuigi Rizzo 	error = netmap_vp_krings_create(na);
2138f9790aebSLuigi Rizzo 	if (error)
2139f9790aebSLuigi Rizzo 		return error;
2140f9790aebSLuigi Rizzo 
2141*4bf50f18SLuigi Rizzo 	/* also create the hwna krings */
2142f9790aebSLuigi Rizzo 	error = hwna->nm_krings_create(hwna);
2143f9790aebSLuigi Rizzo 	if (error) {
2144f9790aebSLuigi Rizzo 		netmap_vp_krings_delete(na);
2145f9790aebSLuigi Rizzo 		return error;
2146f9790aebSLuigi Rizzo 	}
2147*4bf50f18SLuigi Rizzo 	/* the connection between the bwrap krings and the hwna krings
2148*4bf50f18SLuigi Rizzo 	 * will be perfomed later, in the nm_register callback, since
2149*4bf50f18SLuigi Rizzo 	 * now the kring->ring pointers have not been initialized yet
2150*4bf50f18SLuigi Rizzo 	 */
2151f9790aebSLuigi Rizzo 
2152f0ea3689SLuigi Rizzo 	if (na->na_flags & NAF_HOST_RINGS) {
2153*4bf50f18SLuigi Rizzo 		/* the hostna rings are the host rings of the bwrap.
2154*4bf50f18SLuigi Rizzo 		 * The corresponding krings must point back to the
2155*4bf50f18SLuigi Rizzo 		 * hostna
2156*4bf50f18SLuigi Rizzo 		 */
2157f9790aebSLuigi Rizzo 		hostna->tx_rings = na->tx_rings + na->num_tx_rings;
2158*4bf50f18SLuigi Rizzo 		hostna->tx_rings[0].na = hostna;
2159f9790aebSLuigi Rizzo 		hostna->rx_rings = na->rx_rings + na->num_rx_rings;
2160*4bf50f18SLuigi Rizzo 		hostna->rx_rings[0].na = hostna;
2161f0ea3689SLuigi Rizzo 	}
2162f9790aebSLuigi Rizzo 
2163f9790aebSLuigi Rizzo 	return 0;
2164f9790aebSLuigi Rizzo }
2165f9790aebSLuigi Rizzo 
216617885a7bSLuigi Rizzo 
2167f9790aebSLuigi Rizzo static void
2168f9790aebSLuigi Rizzo netmap_bwrap_krings_delete(struct netmap_adapter *na)
2169f9790aebSLuigi Rizzo {
2170f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2171f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2172f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2173f9790aebSLuigi Rizzo 
2174*4bf50f18SLuigi Rizzo 	ND("%s", na->name);
2175f9790aebSLuigi Rizzo 
2176f9790aebSLuigi Rizzo 	hwna->nm_krings_delete(hwna);
2177f9790aebSLuigi Rizzo 	netmap_vp_krings_delete(na);
2178f9790aebSLuigi Rizzo }
2179f9790aebSLuigi Rizzo 
218017885a7bSLuigi Rizzo 
2181f9790aebSLuigi Rizzo /* notify method for the bridge-->hwna direction */
2182f9790aebSLuigi Rizzo static int
2183f9790aebSLuigi Rizzo netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2184f9790aebSLuigi Rizzo {
2185f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2186f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2187f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2188f9790aebSLuigi Rizzo 	struct netmap_kring *kring, *hw_kring;
2189f9790aebSLuigi Rizzo 	struct netmap_ring *ring;
219017885a7bSLuigi Rizzo 	u_int lim;
2191f9790aebSLuigi Rizzo 	int error = 0;
2192f9790aebSLuigi Rizzo 
2193f9790aebSLuigi Rizzo 	if (tx == NR_TX)
2194f2637526SLuigi Rizzo 	        return EINVAL;
2195f9790aebSLuigi Rizzo 
2196f9790aebSLuigi Rizzo 	kring = &na->rx_rings[ring_n];
2197f9790aebSLuigi Rizzo 	hw_kring = &hwna->tx_rings[ring_n];
2198f9790aebSLuigi Rizzo 	ring = kring->ring;
2199f9790aebSLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
2200f9790aebSLuigi Rizzo 
2201*4bf50f18SLuigi Rizzo 	if (!nm_netmap_on(hwna))
2202f9790aebSLuigi Rizzo 		return 0;
2203f0ea3689SLuigi Rizzo 	mtx_lock(&kring->q_lock);
220417885a7bSLuigi Rizzo 	/* first step: simulate a user wakeup on the rx ring */
2205*4bf50f18SLuigi Rizzo 	netmap_vp_rxsync_locked(kring, flags);
220617885a7bSLuigi Rizzo 	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2207*4bf50f18SLuigi Rizzo 		na->name, ring_n,
220817885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
220917885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
221017885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
221117885a7bSLuigi Rizzo 	/* second step: the simulated user consumes all new packets */
221217885a7bSLuigi Rizzo 	ring->head = ring->cur = ring->tail;
221317885a7bSLuigi Rizzo 
221417885a7bSLuigi Rizzo 	/* third step: the new packets are sent on the tx ring
221517885a7bSLuigi Rizzo 	 * (which is actually the same ring)
221617885a7bSLuigi Rizzo 	 */
221717885a7bSLuigi Rizzo 	/* set tail to what the hw expects */
221817885a7bSLuigi Rizzo 	ring->tail = hw_kring->rtail;
221917885a7bSLuigi Rizzo 	nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
2220f0ea3689SLuigi Rizzo 	error = hw_kring->nm_sync(hw_kring, flags);
222117885a7bSLuigi Rizzo 
222217885a7bSLuigi Rizzo 	/* fourth step: now we are back the rx ring */
222317885a7bSLuigi Rizzo 	/* claim ownership on all hw owned bufs */
222417885a7bSLuigi Rizzo 	ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
222517885a7bSLuigi Rizzo 	ring->tail = kring->rtail; /* restore saved value of tail, for safety */
222617885a7bSLuigi Rizzo 
222717885a7bSLuigi Rizzo 	/* fifth step: the user goes to sleep again, causing another rxsync */
2228*4bf50f18SLuigi Rizzo 	netmap_vp_rxsync_locked(kring, flags);
222917885a7bSLuigi Rizzo 	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2230*4bf50f18SLuigi Rizzo 		na->name, ring_n,
223117885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
223217885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
223317885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2234f0ea3689SLuigi Rizzo 	mtx_unlock(&kring->q_lock);
2235f9790aebSLuigi Rizzo 	return error;
2236f9790aebSLuigi Rizzo }
2237f9790aebSLuigi Rizzo 
223817885a7bSLuigi Rizzo 
2239*4bf50f18SLuigi Rizzo /* notify method for the bridge-->host-rings path */
2240f9790aebSLuigi Rizzo static int
2241f9790aebSLuigi Rizzo netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2242f9790aebSLuigi Rizzo {
2243f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
2244f9790aebSLuigi Rizzo 	struct netmap_adapter *port_na = &bna->up.up;
2245f9790aebSLuigi Rizzo 	if (tx == NR_TX || ring_n != 0)
2246f2637526SLuigi Rizzo 		return EINVAL;
2247f9790aebSLuigi Rizzo 	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
2248f9790aebSLuigi Rizzo }
2249f9790aebSLuigi Rizzo 
225017885a7bSLuigi Rizzo 
2251*4bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for the bwrap.
2252*4bf50f18SLuigi Rizzo  * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
2253*4bf50f18SLuigi Rizzo  * On attach, it needs to provide a fake netmap_priv_d structure and
2254*4bf50f18SLuigi Rizzo  * perform a netmap_do_regif() on the bwrap. This will put both the
2255*4bf50f18SLuigi Rizzo  * bwrap and the hwna in netmap mode, with the netmap rings shared
2256*4bf50f18SLuigi Rizzo  * and cross linked. Moroever, it will start intercepting interrupts
2257*4bf50f18SLuigi Rizzo  * directed to hwna.
2258*4bf50f18SLuigi Rizzo  */
2259f9790aebSLuigi Rizzo static int
2260*4bf50f18SLuigi Rizzo netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
2261*4bf50f18SLuigi Rizzo {
2262*4bf50f18SLuigi Rizzo 	struct netmap_priv_d *npriv;
2263*4bf50f18SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
2264*4bf50f18SLuigi Rizzo 	struct netmap_if *nifp;
2265*4bf50f18SLuigi Rizzo 	int error = 0;
2266*4bf50f18SLuigi Rizzo 
2267*4bf50f18SLuigi Rizzo 	if (attach) {
2268*4bf50f18SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(na)) {
2269*4bf50f18SLuigi Rizzo 			return EBUSY;
2270*4bf50f18SLuigi Rizzo 		}
2271*4bf50f18SLuigi Rizzo 		if (bna->na_kpriv) {
2272*4bf50f18SLuigi Rizzo 			/* nothing to do */
2273*4bf50f18SLuigi Rizzo 			return 0;
2274*4bf50f18SLuigi Rizzo 		}
2275*4bf50f18SLuigi Rizzo 		npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
2276*4bf50f18SLuigi Rizzo 		if (npriv == NULL)
2277*4bf50f18SLuigi Rizzo 			return ENOMEM;
2278*4bf50f18SLuigi Rizzo 		nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
2279*4bf50f18SLuigi Rizzo 		if (!nifp) {
2280*4bf50f18SLuigi Rizzo 			bzero(npriv, sizeof(*npriv));
2281*4bf50f18SLuigi Rizzo 			free(npriv, M_DEVBUF);
2282*4bf50f18SLuigi Rizzo 			return error;
2283*4bf50f18SLuigi Rizzo 		}
2284*4bf50f18SLuigi Rizzo 		bna->na_kpriv = npriv;
2285*4bf50f18SLuigi Rizzo 		na->na_flags |= NAF_BUSY;
2286*4bf50f18SLuigi Rizzo 	} else {
2287*4bf50f18SLuigi Rizzo 		int last_instance;
2288*4bf50f18SLuigi Rizzo 
2289*4bf50f18SLuigi Rizzo 		if (na->active_fds == 0) /* not registered */
2290*4bf50f18SLuigi Rizzo 			return EINVAL;
2291*4bf50f18SLuigi Rizzo 		last_instance = netmap_dtor_locked(bna->na_kpriv);
2292*4bf50f18SLuigi Rizzo 		if (!last_instance) {
2293*4bf50f18SLuigi Rizzo 			D("--- error, trying to detach an entry with active mmaps");
2294*4bf50f18SLuigi Rizzo 			error = EINVAL;
2295*4bf50f18SLuigi Rizzo 		} else {
2296*4bf50f18SLuigi Rizzo 			struct nm_bridge *b = bna->up.na_bdg,
2297*4bf50f18SLuigi Rizzo 				*bh = bna->host.na_bdg;
2298*4bf50f18SLuigi Rizzo 			npriv = bna->na_kpriv;
2299*4bf50f18SLuigi Rizzo 			bna->na_kpriv = NULL;
2300*4bf50f18SLuigi Rizzo 			D("deleting priv");
2301*4bf50f18SLuigi Rizzo 
2302*4bf50f18SLuigi Rizzo 			bzero(npriv, sizeof(*npriv));
2303*4bf50f18SLuigi Rizzo 			free(npriv, M_DEVBUF);
2304*4bf50f18SLuigi Rizzo 			if (b) {
2305*4bf50f18SLuigi Rizzo 				/* XXX the bwrap dtor should take care
2306*4bf50f18SLuigi Rizzo 				 * of this (2014-06-16)
2307*4bf50f18SLuigi Rizzo 				 */
2308*4bf50f18SLuigi Rizzo 				netmap_bdg_detach_common(b, bna->up.bdg_port,
2309*4bf50f18SLuigi Rizzo 				    (bh ? bna->host.bdg_port : -1));
2310*4bf50f18SLuigi Rizzo 			}
2311*4bf50f18SLuigi Rizzo 			na->na_flags &= ~NAF_BUSY;
2312*4bf50f18SLuigi Rizzo 		}
2313*4bf50f18SLuigi Rizzo 	}
2314*4bf50f18SLuigi Rizzo 	return error;
2315*4bf50f18SLuigi Rizzo 
2316*4bf50f18SLuigi Rizzo }
2317*4bf50f18SLuigi Rizzo 
2318*4bf50f18SLuigi Rizzo /* attach a bridge wrapper to the 'real' device */
2319*4bf50f18SLuigi Rizzo int
2320*4bf50f18SLuigi Rizzo netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
2321f9790aebSLuigi Rizzo {
2322f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
2323*4bf50f18SLuigi Rizzo 	struct netmap_adapter *na = NULL;
2324*4bf50f18SLuigi Rizzo 	struct netmap_adapter *hostna = NULL;
2325*4bf50f18SLuigi Rizzo 	int error = 0;
2326f9790aebSLuigi Rizzo 
2327*4bf50f18SLuigi Rizzo 	/* make sure the NIC is not already in use */
2328*4bf50f18SLuigi Rizzo 	if (NETMAP_OWNED_BY_ANY(hwna)) {
2329*4bf50f18SLuigi Rizzo 		D("NIC %s busy, cannot attach to bridge", hwna->name);
2330*4bf50f18SLuigi Rizzo 		return EBUSY;
2331*4bf50f18SLuigi Rizzo 	}
2332f9790aebSLuigi Rizzo 
2333f9790aebSLuigi Rizzo 	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
2334*4bf50f18SLuigi Rizzo 	if (bna == NULL) {
2335f9790aebSLuigi Rizzo 		return ENOMEM;
2336*4bf50f18SLuigi Rizzo 	}
2337f9790aebSLuigi Rizzo 
2338f9790aebSLuigi Rizzo 	na = &bna->up.up;
2339*4bf50f18SLuigi Rizzo 	strncpy(na->name, nr_name, sizeof(na->name));
2340f9790aebSLuigi Rizzo 	/* fill the ring data for the bwrap adapter with rx/tx meanings
2341f9790aebSLuigi Rizzo 	 * swapped. The real cross-linking will be done during register,
2342f9790aebSLuigi Rizzo 	 * when all the krings will have been created.
2343f9790aebSLuigi Rizzo 	 */
2344f9790aebSLuigi Rizzo 	na->num_rx_rings = hwna->num_tx_rings;
2345f9790aebSLuigi Rizzo 	na->num_tx_rings = hwna->num_rx_rings;
2346f9790aebSLuigi Rizzo 	na->num_tx_desc = hwna->num_rx_desc;
2347f9790aebSLuigi Rizzo 	na->num_rx_desc = hwna->num_tx_desc;
2348f9790aebSLuigi Rizzo 	na->nm_dtor = netmap_bwrap_dtor;
2349f9790aebSLuigi Rizzo 	na->nm_register = netmap_bwrap_register;
2350f9790aebSLuigi Rizzo 	// na->nm_txsync = netmap_bwrap_txsync;
2351f9790aebSLuigi Rizzo 	// na->nm_rxsync = netmap_bwrap_rxsync;
2352f9790aebSLuigi Rizzo 	na->nm_config = netmap_bwrap_config;
2353f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_bwrap_krings_create;
2354f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_bwrap_krings_delete;
2355f9790aebSLuigi Rizzo 	na->nm_notify = netmap_bwrap_notify;
2356*4bf50f18SLuigi Rizzo 	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
2357*4bf50f18SLuigi Rizzo 	na->pdev = hwna->pdev;
2358*4bf50f18SLuigi Rizzo 	na->nm_mem = netmap_mem_private_new(na->name,
2359*4bf50f18SLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
2360*4bf50f18SLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc,
2361*4bf50f18SLuigi Rizzo 			0, 0, &error);
2362*4bf50f18SLuigi Rizzo 	na->na_flags |= NAF_MEM_OWNER;
2363*4bf50f18SLuigi Rizzo 	if (na->nm_mem == NULL)
2364*4bf50f18SLuigi Rizzo 		goto err_put;
2365f9790aebSLuigi Rizzo 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2366f9790aebSLuigi Rizzo 
2367f9790aebSLuigi Rizzo 	bna->hwna = hwna;
2368f9790aebSLuigi Rizzo 	netmap_adapter_get(hwna);
2369f9790aebSLuigi Rizzo 	hwna->na_private = bna; /* weak reference */
2370*4bf50f18SLuigi Rizzo 	hwna->na_vp = &bna->up;
2371f9790aebSLuigi Rizzo 
2372f0ea3689SLuigi Rizzo 	if (hwna->na_flags & NAF_HOST_RINGS) {
2373*4bf50f18SLuigi Rizzo 		if (hwna->na_flags & NAF_SW_ONLY)
2374*4bf50f18SLuigi Rizzo 			na->na_flags |= NAF_SW_ONLY;
2375f0ea3689SLuigi Rizzo 		na->na_flags |= NAF_HOST_RINGS;
2376f9790aebSLuigi Rizzo 		hostna = &bna->host.up;
2377*4bf50f18SLuigi Rizzo 		snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
2378f9790aebSLuigi Rizzo 		hostna->ifp = hwna->ifp;
2379f9790aebSLuigi Rizzo 		hostna->num_tx_rings = 1;
2380f9790aebSLuigi Rizzo 		hostna->num_tx_desc = hwna->num_rx_desc;
2381f9790aebSLuigi Rizzo 		hostna->num_rx_rings = 1;
2382f9790aebSLuigi Rizzo 		hostna->num_rx_desc = hwna->num_tx_desc;
2383f9790aebSLuigi Rizzo 		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2384f9790aebSLuigi Rizzo 		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2385f9790aebSLuigi Rizzo 		hostna->nm_notify = netmap_bwrap_host_notify;
2386f9790aebSLuigi Rizzo 		hostna->nm_mem = na->nm_mem;
2387f9790aebSLuigi Rizzo 		hostna->na_private = bna;
2388*4bf50f18SLuigi Rizzo 		hostna->na_vp = &bna->up;
2389*4bf50f18SLuigi Rizzo 		na->na_hostvp = hwna->na_hostvp =
2390*4bf50f18SLuigi Rizzo 			hostna->na_hostvp = &bna->host;
2391*4bf50f18SLuigi Rizzo 		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
2392f0ea3689SLuigi Rizzo 	}
2393f9790aebSLuigi Rizzo 
239417885a7bSLuigi Rizzo 	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
2395*4bf50f18SLuigi Rizzo 		na->name, ifp->if_xname,
2396f9790aebSLuigi Rizzo 		na->num_tx_rings, na->num_tx_desc,
2397f9790aebSLuigi Rizzo 		na->num_rx_rings, na->num_rx_desc);
2398f9790aebSLuigi Rizzo 
2399f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
2400f9790aebSLuigi Rizzo 	if (error) {
2401*4bf50f18SLuigi Rizzo 		goto err_free;
2402*4bf50f18SLuigi Rizzo 	}
2403*4bf50f18SLuigi Rizzo 	/* make bwrap ifp point to the real ifp
2404*4bf50f18SLuigi Rizzo 	 * NOTE: netmap_attach_common() interprets a non-NULL na->ifp
2405*4bf50f18SLuigi Rizzo 	 * as a request to make the ifp point to the na. Since we
2406*4bf50f18SLuigi Rizzo 	 * do not want to change the na already pointed to by hwna->ifp,
2407*4bf50f18SLuigi Rizzo 	 * the following assignment has to be delayed until now
2408*4bf50f18SLuigi Rizzo 	 */
2409*4bf50f18SLuigi Rizzo 	na->ifp = hwna->ifp;
2410*4bf50f18SLuigi Rizzo 	hwna->na_flags |= NAF_BUSY;
2411*4bf50f18SLuigi Rizzo 	/* make hwna point to the allocator we are actually using,
2412*4bf50f18SLuigi Rizzo 	 * so that monitors will be able to find it
2413*4bf50f18SLuigi Rizzo 	 */
2414*4bf50f18SLuigi Rizzo 	bna->save_nmd = hwna->nm_mem;
2415*4bf50f18SLuigi Rizzo 	hwna->nm_mem = na->nm_mem;
2416*4bf50f18SLuigi Rizzo 	return 0;
2417*4bf50f18SLuigi Rizzo 
2418*4bf50f18SLuigi Rizzo err_free:
2419*4bf50f18SLuigi Rizzo 	netmap_mem_private_delete(na->nm_mem);
2420*4bf50f18SLuigi Rizzo err_put:
2421*4bf50f18SLuigi Rizzo 	hwna->na_vp = hwna->na_hostvp = NULL;
2422f9790aebSLuigi Rizzo 	netmap_adapter_put(hwna);
2423f9790aebSLuigi Rizzo 	free(bna, M_DEVBUF);
2424f9790aebSLuigi Rizzo 	return error;
2425*4bf50f18SLuigi Rizzo 
2426f9790aebSLuigi Rizzo }
2427f9790aebSLuigi Rizzo 
242817885a7bSLuigi Rizzo 
2429f9790aebSLuigi Rizzo void
2430f9790aebSLuigi Rizzo netmap_init_bridges(void)
2431f9790aebSLuigi Rizzo {
2432f9790aebSLuigi Rizzo 	int i;
2433f9790aebSLuigi Rizzo 	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
2434f9790aebSLuigi Rizzo 	for (i = 0; i < NM_BRIDGES; i++)
2435f9790aebSLuigi Rizzo 		BDG_RWINIT(&nm_bridges[i]);
2436f9790aebSLuigi Rizzo }
2437f9790aebSLuigi Rizzo #endif /* WITH_VALE */
2438