xref: /freebsd-14.2/sys/dev/netmap/netmap_vale.c (revision 847bf383)
1f9790aebSLuigi Rizzo /*
217885a7bSLuigi Rizzo  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3f9790aebSLuigi Rizzo  *
4f9790aebSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
5f9790aebSLuigi Rizzo  * modification, are permitted provided that the following conditions
6f9790aebSLuigi Rizzo  * are met:
7f9790aebSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
8f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
9f9790aebSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
10f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
11f9790aebSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
12f9790aebSLuigi Rizzo  *
13f9790aebSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14f9790aebSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15f9790aebSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16f9790aebSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17f9790aebSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18f9790aebSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19f9790aebSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20f9790aebSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21f9790aebSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22f9790aebSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23f9790aebSLuigi Rizzo  * SUCH DAMAGE.
24f9790aebSLuigi Rizzo  */
25f9790aebSLuigi Rizzo 
26f9790aebSLuigi Rizzo 
27f9790aebSLuigi Rizzo /*
28f9790aebSLuigi Rizzo  * This module implements the VALE switch for netmap
29f9790aebSLuigi Rizzo 
30f9790aebSLuigi Rizzo --- VALE SWITCH ---
31f9790aebSLuigi Rizzo 
32f9790aebSLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
33f9790aebSLuigi Rizzo A switch cannot be deleted until all ports are gone.
34f9790aebSLuigi Rizzo 
35f9790aebSLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
36f9790aebSLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
37f9790aebSLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
38f9790aebSLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39f9790aebSLuigi Rizzo The lock is held throughout the entire forwarding cycle,
40f9790aebSLuigi Rizzo during which the thread may incur in a page fault.
41f9790aebSLuigi Rizzo Hence it is important that sleepable shared locks are used.
42f9790aebSLuigi Rizzo 
43f9790aebSLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
44f9790aebSLuigi Rizzo a number of slot in the ring, then the lock is released,
45f9790aebSLuigi Rizzo packets are copied from source to destination, and then
46f9790aebSLuigi Rizzo the lock is acquired again and the receive ring is updated.
47f9790aebSLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
48f9790aebSLuigi Rizzo ports attached to the switch)
49f9790aebSLuigi Rizzo 
50f9790aebSLuigi Rizzo  */
51f9790aebSLuigi Rizzo 
52f9790aebSLuigi Rizzo /*
53f9790aebSLuigi Rizzo  * OS-specific code that is used only within this file.
54f9790aebSLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
55f9790aebSLuigi Rizzo  * is present in netmap_kern.h
56f9790aebSLuigi Rizzo  */
57f9790aebSLuigi Rizzo 
58f9790aebSLuigi Rizzo #if defined(__FreeBSD__)
59f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
60f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$");
61f9790aebSLuigi Rizzo 
62f9790aebSLuigi Rizzo #include <sys/types.h>
63f9790aebSLuigi Rizzo #include <sys/errno.h>
64f9790aebSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
65f9790aebSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
66f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
67f9790aebSLuigi Rizzo #include <sys/sockio.h>
68f9790aebSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
69f9790aebSLuigi Rizzo #include <sys/malloc.h>
70f9790aebSLuigi Rizzo #include <sys/poll.h>
71f9790aebSLuigi Rizzo #include <sys/rwlock.h>
72f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
73f9790aebSLuigi Rizzo #include <sys/selinfo.h>
74f9790aebSLuigi Rizzo #include <sys/sysctl.h>
75f9790aebSLuigi Rizzo #include <net/if.h>
76f9790aebSLuigi Rizzo #include <net/if_var.h>
77f9790aebSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
78f9790aebSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
79f9790aebSLuigi Rizzo #include <sys/endian.h>
80f9790aebSLuigi Rizzo #include <sys/refcount.h>
81f9790aebSLuigi Rizzo 
82f9790aebSLuigi Rizzo 
83f9790aebSLuigi Rizzo #define BDG_RWLOCK_T		struct rwlock // struct rwlock
84f9790aebSLuigi Rizzo 
85f9790aebSLuigi Rizzo #define	BDG_RWINIT(b)		\
86f9790aebSLuigi Rizzo 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87f9790aebSLuigi Rizzo #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88f9790aebSLuigi Rizzo #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89f9790aebSLuigi Rizzo #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90f9790aebSLuigi Rizzo #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91f9790aebSLuigi Rizzo #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92f9790aebSLuigi Rizzo #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93f9790aebSLuigi Rizzo 
94f9790aebSLuigi Rizzo 
95f9790aebSLuigi Rizzo #elif defined(linux)
96f9790aebSLuigi Rizzo 
97f9790aebSLuigi Rizzo #include "bsd_glue.h"
98f9790aebSLuigi Rizzo 
99f9790aebSLuigi Rizzo #elif defined(__APPLE__)
100f9790aebSLuigi Rizzo 
101f9790aebSLuigi Rizzo #warning OSX support is only partial
102f9790aebSLuigi Rizzo #include "osx_glue.h"
103f9790aebSLuigi Rizzo 
104f9790aebSLuigi Rizzo #else
105f9790aebSLuigi Rizzo 
106f9790aebSLuigi Rizzo #error	Unsupported platform
107f9790aebSLuigi Rizzo 
108f9790aebSLuigi Rizzo #endif /* unsupported */
109f9790aebSLuigi Rizzo 
110f9790aebSLuigi Rizzo /*
111f9790aebSLuigi Rizzo  * common headers
112f9790aebSLuigi Rizzo  */
113f9790aebSLuigi Rizzo 
114f9790aebSLuigi Rizzo #include <net/netmap.h>
115f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h>
116f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
117f9790aebSLuigi Rizzo 
118f9790aebSLuigi Rizzo #ifdef WITH_VALE
119f9790aebSLuigi Rizzo 
120f9790aebSLuigi Rizzo /*
121f9790aebSLuigi Rizzo  * system parameters (most of them in netmap_kern.h)
122f9790aebSLuigi Rizzo  * NM_NAME	prefix for switch port names, default "vale"
123f9790aebSLuigi Rizzo  * NM_BDG_MAXPORTS	number of ports
124f9790aebSLuigi Rizzo  * NM_BRIDGES	max number of switches in the system.
125f9790aebSLuigi Rizzo  *	XXX should become a sysctl or tunable
126f9790aebSLuigi Rizzo  *
127f9790aebSLuigi Rizzo  * Switch ports are named valeX:Y where X is the switch name and Y
128f9790aebSLuigi Rizzo  * is the port. If Y matches a physical interface name, the port is
129f9790aebSLuigi Rizzo  * connected to a physical device.
130f9790aebSLuigi Rizzo  *
131f9790aebSLuigi Rizzo  * Unlike physical interfaces, switch ports use their own memory region
132f9790aebSLuigi Rizzo  * for rings and buffers.
133f9790aebSLuigi Rizzo  * The virtual interfaces use per-queue lock instead of core lock.
134f9790aebSLuigi Rizzo  * In the tx loop, we aggregate traffic in batches to make all operations
135f9790aebSLuigi Rizzo  * faster. The batch size is bridge_batch.
136f9790aebSLuigi Rizzo  */
137f9790aebSLuigi Rizzo #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
138f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
139f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
140f9790aebSLuigi Rizzo #define NM_BDG_HASH		1024	/* forwarding table entries */
141f9790aebSLuigi Rizzo #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
142f9790aebSLuigi Rizzo #define NM_MULTISEG		64	/* max size of a chain of bufs */
143f9790aebSLuigi Rizzo /* actual size of the tables */
144f9790aebSLuigi Rizzo #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
145f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */
146f9790aebSLuigi Rizzo #define NM_FT_NULL		NM_BDG_BATCH_MAX
147f9790aebSLuigi Rizzo #define	NM_BRIDGES		8	/* number of bridges */
148f9790aebSLuigi Rizzo 
149f9790aebSLuigi Rizzo 
150f9790aebSLuigi Rizzo /*
151f9790aebSLuigi Rizzo  * bridge_batch is set via sysctl to the max batch size to be
152f9790aebSLuigi Rizzo  * used in the bridge. The actual value may be larger as the
153f9790aebSLuigi Rizzo  * last packet in the block may overflow the size.
154f9790aebSLuigi Rizzo  */
155f9790aebSLuigi Rizzo int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap);
157f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
158f9790aebSLuigi Rizzo 
159f9790aebSLuigi Rizzo 
1604bf50f18SLuigi Rizzo static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
1614bf50f18SLuigi Rizzo static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
162f9790aebSLuigi Rizzo static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
163f9790aebSLuigi Rizzo 
164f9790aebSLuigi Rizzo /*
165f9790aebSLuigi Rizzo  * For each output interface, nm_bdg_q is used to construct a list.
166f9790aebSLuigi Rizzo  * bq_len is the number of output buffers (we can have coalescing
167f9790aebSLuigi Rizzo  * during the copy).
168f9790aebSLuigi Rizzo  */
169f9790aebSLuigi Rizzo struct nm_bdg_q {
170f9790aebSLuigi Rizzo 	uint16_t bq_head;
171f9790aebSLuigi Rizzo 	uint16_t bq_tail;
172f9790aebSLuigi Rizzo 	uint32_t bq_len;	/* number of buffers */
173f9790aebSLuigi Rizzo };
174f9790aebSLuigi Rizzo 
175f9790aebSLuigi Rizzo /* XXX revise this */
176f9790aebSLuigi Rizzo struct nm_hash_ent {
177f9790aebSLuigi Rizzo 	uint64_t	mac;	/* the top 2 bytes are the epoch */
178f9790aebSLuigi Rizzo 	uint64_t	ports;
179f9790aebSLuigi Rizzo };
180f9790aebSLuigi Rizzo 
181f9790aebSLuigi Rizzo /*
182f9790aebSLuigi Rizzo  * nm_bridge is a descriptor for a VALE switch.
183f9790aebSLuigi Rizzo  * Interfaces for a bridge are all in bdg_ports[].
184f9790aebSLuigi Rizzo  * The array has fixed size, an empty entry does not terminate
185f9790aebSLuigi Rizzo  * the search, but lookups only occur on attach/detach so we
186f9790aebSLuigi Rizzo  * don't mind if they are slow.
187f9790aebSLuigi Rizzo  *
188f9790aebSLuigi Rizzo  * The bridge is non blocking on the transmit ports: excess
189f9790aebSLuigi Rizzo  * packets are dropped if there is no room on the output port.
190f9790aebSLuigi Rizzo  *
191f9790aebSLuigi Rizzo  * bdg_lock protects accesses to the bdg_ports array.
192f9790aebSLuigi Rizzo  * This is a rw lock (or equivalent).
193f9790aebSLuigi Rizzo  */
194f9790aebSLuigi Rizzo struct nm_bridge {
195f9790aebSLuigi Rizzo 	/* XXX what is the proper alignment/layout ? */
196f9790aebSLuigi Rizzo 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
197f9790aebSLuigi Rizzo 	int		bdg_namelen;
198f9790aebSLuigi Rizzo 	uint32_t	bdg_active_ports; /* 0 means free */
199f9790aebSLuigi Rizzo 	char		bdg_basename[IFNAMSIZ];
200f9790aebSLuigi Rizzo 
201f9790aebSLuigi Rizzo 	/* Indexes of active ports (up to active_ports)
202f9790aebSLuigi Rizzo 	 * and all other remaining ports.
203f9790aebSLuigi Rizzo 	 */
204f9790aebSLuigi Rizzo 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
205f9790aebSLuigi Rizzo 
206f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
207f9790aebSLuigi Rizzo 
208f9790aebSLuigi Rizzo 
209f9790aebSLuigi Rizzo 	/*
210f9790aebSLuigi Rizzo 	 * The function to decide the destination port.
211f9790aebSLuigi Rizzo 	 * It returns either of an index of the destination port,
212f9790aebSLuigi Rizzo 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
213f9790aebSLuigi Rizzo 	 * forward this packet.  ring_nr is the source ring index, and the
214f9790aebSLuigi Rizzo 	 * function may overwrite this value to forward this packet to a
215f9790aebSLuigi Rizzo 	 * different ring index.
216f9790aebSLuigi Rizzo 	 * This function must be set by netmap_bdgctl().
217f9790aebSLuigi Rizzo 	 */
2184bf50f18SLuigi Rizzo 	struct netmap_bdg_ops bdg_ops;
219f9790aebSLuigi Rizzo 
220f9790aebSLuigi Rizzo 	/* the forwarding table, MAC+ports.
221f9790aebSLuigi Rizzo 	 * XXX should be changed to an argument to be passed to
222f9790aebSLuigi Rizzo 	 * the lookup function, and allocated on attach
223f9790aebSLuigi Rizzo 	 */
224f9790aebSLuigi Rizzo 	struct nm_hash_ent ht[NM_BDG_HASH];
225*847bf383SLuigi Rizzo 
226*847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
227*847bf383SLuigi Rizzo 	struct net *ns;
228*847bf383SLuigi Rizzo #endif /* CONFIG_NET_NS */
229f9790aebSLuigi Rizzo };
230f9790aebSLuigi Rizzo 
2314bf50f18SLuigi Rizzo const char*
2324bf50f18SLuigi Rizzo netmap_bdg_name(struct netmap_vp_adapter *vp)
2334bf50f18SLuigi Rizzo {
2344bf50f18SLuigi Rizzo 	struct nm_bridge *b = vp->na_bdg;
2354bf50f18SLuigi Rizzo 	if (b == NULL)
2364bf50f18SLuigi Rizzo 		return NULL;
2374bf50f18SLuigi Rizzo 	return b->bdg_basename;
2384bf50f18SLuigi Rizzo }
2394bf50f18SLuigi Rizzo 
240f9790aebSLuigi Rizzo 
241*847bf383SLuigi Rizzo #ifndef CONFIG_NET_NS
242f9790aebSLuigi Rizzo /*
243f9790aebSLuigi Rizzo  * XXX in principle nm_bridges could be created dynamically
244f9790aebSLuigi Rizzo  * Right now we have a static array and deletions are protected
245f9790aebSLuigi Rizzo  * by an exclusive lock.
246f9790aebSLuigi Rizzo  */
247*847bf383SLuigi Rizzo struct nm_bridge *nm_bridges;
248*847bf383SLuigi Rizzo #endif /* !CONFIG_NET_NS */
249f9790aebSLuigi Rizzo 
250f9790aebSLuigi Rizzo 
251f9790aebSLuigi Rizzo /*
252f9790aebSLuigi Rizzo  * this is a slightly optimized copy routine which rounds
253f9790aebSLuigi Rizzo  * to multiple of 64 bytes and is often faster than dealing
254f9790aebSLuigi Rizzo  * with other odd sizes. We assume there is enough room
255f9790aebSLuigi Rizzo  * in the source and destination buffers.
256f9790aebSLuigi Rizzo  *
257f9790aebSLuigi Rizzo  * XXX only for multiples of 64 bytes, non overlapped.
258f9790aebSLuigi Rizzo  */
259f9790aebSLuigi Rizzo static inline void
260f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l)
261f9790aebSLuigi Rizzo {
262f9790aebSLuigi Rizzo         uint64_t *src = _src;
263f9790aebSLuigi Rizzo         uint64_t *dst = _dst;
264f9790aebSLuigi Rizzo         if (unlikely(l >= 1024)) {
265f9790aebSLuigi Rizzo                 memcpy(dst, src, l);
266f9790aebSLuigi Rizzo                 return;
267f9790aebSLuigi Rizzo         }
268f9790aebSLuigi Rizzo         for (; likely(l > 0); l-=64) {
269f9790aebSLuigi Rizzo                 *dst++ = *src++;
270f9790aebSLuigi Rizzo                 *dst++ = *src++;
271f9790aebSLuigi Rizzo                 *dst++ = *src++;
272f9790aebSLuigi Rizzo                 *dst++ = *src++;
273f9790aebSLuigi Rizzo                 *dst++ = *src++;
274f9790aebSLuigi Rizzo                 *dst++ = *src++;
275f9790aebSLuigi Rizzo                 *dst++ = *src++;
276f9790aebSLuigi Rizzo                 *dst++ = *src++;
277f9790aebSLuigi Rizzo         }
278f9790aebSLuigi Rizzo }
279f9790aebSLuigi Rizzo 
280f9790aebSLuigi Rizzo 
281f9790aebSLuigi Rizzo /*
282f9790aebSLuigi Rizzo  * locate a bridge among the existing ones.
283f9790aebSLuigi Rizzo  * MUST BE CALLED WITH NMG_LOCK()
284f9790aebSLuigi Rizzo  *
285f9790aebSLuigi Rizzo  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
286f9790aebSLuigi Rizzo  * We assume that this is called with a name of at least NM_NAME chars.
287f9790aebSLuigi Rizzo  */
288f9790aebSLuigi Rizzo static struct nm_bridge *
289f9790aebSLuigi Rizzo nm_find_bridge(const char *name, int create)
290f9790aebSLuigi Rizzo {
291f9790aebSLuigi Rizzo 	int i, l, namelen;
292*847bf383SLuigi Rizzo 	struct nm_bridge *b = NULL, *bridges;
293*847bf383SLuigi Rizzo 	u_int num_bridges;
294f9790aebSLuigi Rizzo 
295f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
296f9790aebSLuigi Rizzo 
297*847bf383SLuigi Rizzo 	netmap_bns_getbridges(&bridges, &num_bridges);
298*847bf383SLuigi Rizzo 
299f9790aebSLuigi Rizzo 	namelen = strlen(NM_NAME);	/* base length */
300f9790aebSLuigi Rizzo 	l = name ? strlen(name) : 0;		/* actual length */
301f9790aebSLuigi Rizzo 	if (l < namelen) {
302f9790aebSLuigi Rizzo 		D("invalid bridge name %s", name ? name : NULL);
303f9790aebSLuigi Rizzo 		return NULL;
304f9790aebSLuigi Rizzo 	}
305f9790aebSLuigi Rizzo 	for (i = namelen + 1; i < l; i++) {
306f9790aebSLuigi Rizzo 		if (name[i] == ':') {
307f9790aebSLuigi Rizzo 			namelen = i;
308f9790aebSLuigi Rizzo 			break;
309f9790aebSLuigi Rizzo 		}
310f9790aebSLuigi Rizzo 	}
311f9790aebSLuigi Rizzo 	if (namelen >= IFNAMSIZ)
312f9790aebSLuigi Rizzo 		namelen = IFNAMSIZ;
313f9790aebSLuigi Rizzo 	ND("--- prefix is '%.*s' ---", namelen, name);
314f9790aebSLuigi Rizzo 
315f9790aebSLuigi Rizzo 	/* lookup the name, remember empty slot if there is one */
316*847bf383SLuigi Rizzo 	for (i = 0; i < num_bridges; i++) {
317*847bf383SLuigi Rizzo 		struct nm_bridge *x = bridges + i;
318f9790aebSLuigi Rizzo 
319f9790aebSLuigi Rizzo 		if (x->bdg_active_ports == 0) {
320f9790aebSLuigi Rizzo 			if (create && b == NULL)
321f9790aebSLuigi Rizzo 				b = x;	/* record empty slot */
322f9790aebSLuigi Rizzo 		} else if (x->bdg_namelen != namelen) {
323f9790aebSLuigi Rizzo 			continue;
324f9790aebSLuigi Rizzo 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
325f9790aebSLuigi Rizzo 			ND("found '%.*s' at %d", namelen, name, i);
326f9790aebSLuigi Rizzo 			b = x;
327f9790aebSLuigi Rizzo 			break;
328f9790aebSLuigi Rizzo 		}
329f9790aebSLuigi Rizzo 	}
330*847bf383SLuigi Rizzo 	if (i == num_bridges && b) { /* name not found, can create entry */
331f9790aebSLuigi Rizzo 		/* initialize the bridge */
332f9790aebSLuigi Rizzo 		strncpy(b->bdg_basename, name, namelen);
333f9790aebSLuigi Rizzo 		ND("create new bridge %s with ports %d", b->bdg_basename,
334f9790aebSLuigi Rizzo 			b->bdg_active_ports);
335f9790aebSLuigi Rizzo 		b->bdg_namelen = namelen;
336f9790aebSLuigi Rizzo 		b->bdg_active_ports = 0;
337f9790aebSLuigi Rizzo 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
338f9790aebSLuigi Rizzo 			b->bdg_port_index[i] = i;
339f9790aebSLuigi Rizzo 		/* set the default function */
3404bf50f18SLuigi Rizzo 		b->bdg_ops.lookup = netmap_bdg_learning;
341f9790aebSLuigi Rizzo 		/* reset the MAC address table */
342f9790aebSLuigi Rizzo 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
343*847bf383SLuigi Rizzo 		NM_BNS_GET(b);
344f9790aebSLuigi Rizzo 	}
345f9790aebSLuigi Rizzo 	return b;
346f9790aebSLuigi Rizzo }
347f9790aebSLuigi Rizzo 
348f9790aebSLuigi Rizzo 
349f9790aebSLuigi Rizzo /*
350f9790aebSLuigi Rizzo  * Free the forwarding tables for rings attached to switch ports.
351f9790aebSLuigi Rizzo  */
352f9790aebSLuigi Rizzo static void
353f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na)
354f9790aebSLuigi Rizzo {
355f9790aebSLuigi Rizzo 	int nrings, i;
356f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
357f9790aebSLuigi Rizzo 
358f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
35917885a7bSLuigi Rizzo 	nrings = na->num_tx_rings;
36017885a7bSLuigi Rizzo 	kring = na->tx_rings;
361f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
362f9790aebSLuigi Rizzo 		if (kring[i].nkr_ft) {
363f9790aebSLuigi Rizzo 			free(kring[i].nkr_ft, M_DEVBUF);
364f9790aebSLuigi Rizzo 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
365f9790aebSLuigi Rizzo 		}
366f9790aebSLuigi Rizzo 	}
367f9790aebSLuigi Rizzo }
368f9790aebSLuigi Rizzo 
369f9790aebSLuigi Rizzo 
370f9790aebSLuigi Rizzo /*
371f9790aebSLuigi Rizzo  * Allocate the forwarding tables for the rings attached to the bridge ports.
372f9790aebSLuigi Rizzo  */
373f9790aebSLuigi Rizzo static int
374f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na)
375f9790aebSLuigi Rizzo {
376f9790aebSLuigi Rizzo 	int nrings, l, i, num_dstq;
377f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
378f9790aebSLuigi Rizzo 
379f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
380f9790aebSLuigi Rizzo 	/* all port:rings + broadcast */
381f9790aebSLuigi Rizzo 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
382f9790aebSLuigi Rizzo 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
383f9790aebSLuigi Rizzo 	l += sizeof(struct nm_bdg_q) * num_dstq;
384f9790aebSLuigi Rizzo 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
385f9790aebSLuigi Rizzo 
386*847bf383SLuigi Rizzo 	nrings = netmap_real_rings(na, NR_TX);
387f9790aebSLuigi Rizzo 	kring = na->tx_rings;
388f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
389f9790aebSLuigi Rizzo 		struct nm_bdg_fwd *ft;
390f9790aebSLuigi Rizzo 		struct nm_bdg_q *dstq;
391f9790aebSLuigi Rizzo 		int j;
392f9790aebSLuigi Rizzo 
393f9790aebSLuigi Rizzo 		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
394f9790aebSLuigi Rizzo 		if (!ft) {
395f9790aebSLuigi Rizzo 			nm_free_bdgfwd(na);
396f9790aebSLuigi Rizzo 			return ENOMEM;
397f9790aebSLuigi Rizzo 		}
398f9790aebSLuigi Rizzo 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
399f9790aebSLuigi Rizzo 		for (j = 0; j < num_dstq; j++) {
400f9790aebSLuigi Rizzo 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
401f9790aebSLuigi Rizzo 			dstq[j].bq_len = 0;
402f9790aebSLuigi Rizzo 		}
403f9790aebSLuigi Rizzo 		kring[i].nkr_ft = ft;
404f9790aebSLuigi Rizzo 	}
405f9790aebSLuigi Rizzo 	return 0;
406f9790aebSLuigi Rizzo }
407f9790aebSLuigi Rizzo 
408f9790aebSLuigi Rizzo 
4094bf50f18SLuigi Rizzo /* remove from bridge b the ports in slots hw and sw
4104bf50f18SLuigi Rizzo  * (sw can be -1 if not needed)
4114bf50f18SLuigi Rizzo  */
412f9790aebSLuigi Rizzo static void
413f9790aebSLuigi Rizzo netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
414f9790aebSLuigi Rizzo {
415f9790aebSLuigi Rizzo 	int s_hw = hw, s_sw = sw;
416f9790aebSLuigi Rizzo 	int i, lim =b->bdg_active_ports;
417f9790aebSLuigi Rizzo 	uint8_t tmp[NM_BDG_MAXPORTS];
418f9790aebSLuigi Rizzo 
419f9790aebSLuigi Rizzo 	/*
420f9790aebSLuigi Rizzo 	New algorithm:
421f9790aebSLuigi Rizzo 	make a copy of bdg_port_index;
422f9790aebSLuigi Rizzo 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
423f9790aebSLuigi Rizzo 	in the array of bdg_port_index, replacing them with
424f9790aebSLuigi Rizzo 	entries from the bottom of the array;
425f9790aebSLuigi Rizzo 	decrement bdg_active_ports;
426f9790aebSLuigi Rizzo 	acquire BDG_WLOCK() and copy back the array.
427f9790aebSLuigi Rizzo 	 */
428f9790aebSLuigi Rizzo 
429f0ea3689SLuigi Rizzo 	if (netmap_verbose)
430f9790aebSLuigi Rizzo 		D("detach %d and %d (lim %d)", hw, sw, lim);
431f9790aebSLuigi Rizzo 	/* make a copy of the list of active ports, update it,
432f9790aebSLuigi Rizzo 	 * and then copy back within BDG_WLOCK().
433f9790aebSLuigi Rizzo 	 */
434f9790aebSLuigi Rizzo 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
435f9790aebSLuigi Rizzo 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
436f9790aebSLuigi Rizzo 		if (hw >= 0 && tmp[i] == hw) {
437f9790aebSLuigi Rizzo 			ND("detach hw %d at %d", hw, i);
438f9790aebSLuigi Rizzo 			lim--; /* point to last active port */
439f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim]; /* swap with i */
440f9790aebSLuigi Rizzo 			tmp[lim] = hw;	/* now this is inactive */
441f9790aebSLuigi Rizzo 			hw = -1;
442f9790aebSLuigi Rizzo 		} else if (sw >= 0 && tmp[i] == sw) {
443f9790aebSLuigi Rizzo 			ND("detach sw %d at %d", sw, i);
444f9790aebSLuigi Rizzo 			lim--;
445f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim];
446f9790aebSLuigi Rizzo 			tmp[lim] = sw;
447f9790aebSLuigi Rizzo 			sw = -1;
448f9790aebSLuigi Rizzo 		} else {
449f9790aebSLuigi Rizzo 			i++;
450f9790aebSLuigi Rizzo 		}
451f9790aebSLuigi Rizzo 	}
452f9790aebSLuigi Rizzo 	if (hw >= 0 || sw >= 0) {
453f9790aebSLuigi Rizzo 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
454f9790aebSLuigi Rizzo 	}
455f9790aebSLuigi Rizzo 
456f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
4574bf50f18SLuigi Rizzo 	if (b->bdg_ops.dtor)
4584bf50f18SLuigi Rizzo 		b->bdg_ops.dtor(b->bdg_ports[s_hw]);
459f9790aebSLuigi Rizzo 	b->bdg_ports[s_hw] = NULL;
460f9790aebSLuigi Rizzo 	if (s_sw >= 0) {
461f9790aebSLuigi Rizzo 		b->bdg_ports[s_sw] = NULL;
462f9790aebSLuigi Rizzo 	}
463f9790aebSLuigi Rizzo 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
464f9790aebSLuigi Rizzo 	b->bdg_active_ports = lim;
465f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
466f9790aebSLuigi Rizzo 
467f9790aebSLuigi Rizzo 	ND("now %d active ports", lim);
468f9790aebSLuigi Rizzo 	if (lim == 0) {
469f9790aebSLuigi Rizzo 		ND("marking bridge %s as free", b->bdg_basename);
4704bf50f18SLuigi Rizzo 		bzero(&b->bdg_ops, sizeof(b->bdg_ops));
471*847bf383SLuigi Rizzo 		NM_BNS_PUT(b);
472f9790aebSLuigi Rizzo 	}
473f9790aebSLuigi Rizzo }
474f9790aebSLuigi Rizzo 
4754bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for VALE ports */
4764bf50f18SLuigi Rizzo static int
4774bf50f18SLuigi Rizzo netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
478f9790aebSLuigi Rizzo {
479f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
480f9790aebSLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
481f9790aebSLuigi Rizzo 
4824bf50f18SLuigi Rizzo 	if (attach)
4834bf50f18SLuigi Rizzo 		return 0; /* nothing to do */
4844bf50f18SLuigi Rizzo 	if (b) {
4854bf50f18SLuigi Rizzo 		netmap_set_all_rings(na, 0 /* disable */);
4864bf50f18SLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
4874bf50f18SLuigi Rizzo 		vpna->na_bdg = NULL;
4884bf50f18SLuigi Rizzo 		netmap_set_all_rings(na, 1 /* enable */);
4894bf50f18SLuigi Rizzo 	}
4904bf50f18SLuigi Rizzo 	/* I have took reference just for attach */
4914bf50f18SLuigi Rizzo 	netmap_adapter_put(na);
4924bf50f18SLuigi Rizzo 	return 0;
4934bf50f18SLuigi Rizzo }
4944bf50f18SLuigi Rizzo 
4954bf50f18SLuigi Rizzo /* nm_dtor callback for ephemeral VALE ports */
4964bf50f18SLuigi Rizzo static void
4974bf50f18SLuigi Rizzo netmap_vp_dtor(struct netmap_adapter *na)
4984bf50f18SLuigi Rizzo {
4994bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
5004bf50f18SLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
5014bf50f18SLuigi Rizzo 
5024bf50f18SLuigi Rizzo 	ND("%s has %d references", na->name, na->na_refcount);
503f9790aebSLuigi Rizzo 
504f9790aebSLuigi Rizzo 	if (b) {
505f9790aebSLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
506f9790aebSLuigi Rizzo 	}
507f9790aebSLuigi Rizzo }
508f9790aebSLuigi Rizzo 
5094bf50f18SLuigi Rizzo /* nm_dtor callback for persistent VALE ports */
5104bf50f18SLuigi Rizzo static void
5114bf50f18SLuigi Rizzo netmap_persist_vp_dtor(struct netmap_adapter *na)
5124bf50f18SLuigi Rizzo {
5134bf50f18SLuigi Rizzo 	struct ifnet *ifp = na->ifp;
5144bf50f18SLuigi Rizzo 
5154bf50f18SLuigi Rizzo 	netmap_vp_dtor(na);
5164bf50f18SLuigi Rizzo 	na->ifp = NULL;
5174bf50f18SLuigi Rizzo 	nm_vi_detach(ifp);
5184bf50f18SLuigi Rizzo }
5194bf50f18SLuigi Rizzo 
5204bf50f18SLuigi Rizzo /* remove a persistent VALE port from the system */
5214bf50f18SLuigi Rizzo static int
5224bf50f18SLuigi Rizzo nm_vi_destroy(const char *name)
5234bf50f18SLuigi Rizzo {
5244bf50f18SLuigi Rizzo 	struct ifnet *ifp;
5254bf50f18SLuigi Rizzo 	int error;
5264bf50f18SLuigi Rizzo 
5274bf50f18SLuigi Rizzo 	ifp = ifunit_ref(name);
5284bf50f18SLuigi Rizzo 	if (!ifp)
5294bf50f18SLuigi Rizzo 		return ENXIO;
5304bf50f18SLuigi Rizzo 	NMG_LOCK();
5314bf50f18SLuigi Rizzo 	/* make sure this is actually a VALE port */
5324bf50f18SLuigi Rizzo 	if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
5334bf50f18SLuigi Rizzo 		error = EINVAL;
5344bf50f18SLuigi Rizzo 		goto err;
5354bf50f18SLuigi Rizzo 	}
5364bf50f18SLuigi Rizzo 
5374bf50f18SLuigi Rizzo 	if (NA(ifp)->na_refcount > 1) {
5384bf50f18SLuigi Rizzo 		error = EBUSY;
5394bf50f18SLuigi Rizzo 		goto err;
5404bf50f18SLuigi Rizzo 	}
5414bf50f18SLuigi Rizzo 	NMG_UNLOCK();
5424bf50f18SLuigi Rizzo 
5434bf50f18SLuigi Rizzo 	D("destroying a persistent vale interface %s", ifp->if_xname);
5444bf50f18SLuigi Rizzo 	/* Linux requires all the references are released
5454bf50f18SLuigi Rizzo 	 * before unregister
5464bf50f18SLuigi Rizzo 	 */
5474bf50f18SLuigi Rizzo 	if_rele(ifp);
5484bf50f18SLuigi Rizzo 	netmap_detach(ifp);
5494bf50f18SLuigi Rizzo 	return 0;
5504bf50f18SLuigi Rizzo 
5514bf50f18SLuigi Rizzo err:
5524bf50f18SLuigi Rizzo 	NMG_UNLOCK();
5534bf50f18SLuigi Rizzo 	if_rele(ifp);
5544bf50f18SLuigi Rizzo 	return error;
5554bf50f18SLuigi Rizzo }
5564bf50f18SLuigi Rizzo 
5574bf50f18SLuigi Rizzo /*
5584bf50f18SLuigi Rizzo  * Create a virtual interface registered to the system.
5594bf50f18SLuigi Rizzo  * The interface will be attached to a bridge later.
5604bf50f18SLuigi Rizzo  */
5614bf50f18SLuigi Rizzo static int
5624bf50f18SLuigi Rizzo nm_vi_create(struct nmreq *nmr)
5634bf50f18SLuigi Rizzo {
5644bf50f18SLuigi Rizzo 	struct ifnet *ifp;
5654bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna;
5664bf50f18SLuigi Rizzo 	int error;
5674bf50f18SLuigi Rizzo 
5684bf50f18SLuigi Rizzo 	/* don't include VALE prefix */
5694bf50f18SLuigi Rizzo 	if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME)))
5704bf50f18SLuigi Rizzo 		return EINVAL;
5714bf50f18SLuigi Rizzo 	ifp = ifunit_ref(nmr->nr_name);
5724bf50f18SLuigi Rizzo 	if (ifp) { /* already exist, cannot create new one */
5734bf50f18SLuigi Rizzo 		if_rele(ifp);
5744bf50f18SLuigi Rizzo 		return EEXIST;
5754bf50f18SLuigi Rizzo 	}
5764bf50f18SLuigi Rizzo 	error = nm_vi_persist(nmr->nr_name, &ifp);
5774bf50f18SLuigi Rizzo 	if (error)
5784bf50f18SLuigi Rizzo 		return error;
5794bf50f18SLuigi Rizzo 
5804bf50f18SLuigi Rizzo 	NMG_LOCK();
5814bf50f18SLuigi Rizzo 	/* netmap_vp_create creates a struct netmap_vp_adapter */
5824bf50f18SLuigi Rizzo 	error = netmap_vp_create(nmr, ifp, &vpna);
5834bf50f18SLuigi Rizzo 	if (error) {
5844bf50f18SLuigi Rizzo 		D("error %d", error);
5854bf50f18SLuigi Rizzo 		nm_vi_detach(ifp);
5864bf50f18SLuigi Rizzo 		return error;
5874bf50f18SLuigi Rizzo 	}
5884bf50f18SLuigi Rizzo 	/* persist-specific routines */
5894bf50f18SLuigi Rizzo 	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
5904bf50f18SLuigi Rizzo 	vpna->up.nm_dtor = netmap_persist_vp_dtor;
5914bf50f18SLuigi Rizzo 	netmap_adapter_get(&vpna->up);
5924bf50f18SLuigi Rizzo 	NMG_UNLOCK();
5934bf50f18SLuigi Rizzo 	D("created %s", ifp->if_xname);
5944bf50f18SLuigi Rizzo 	return 0;
5954bf50f18SLuigi Rizzo }
59617885a7bSLuigi Rizzo 
59717885a7bSLuigi Rizzo /* Try to get a reference to a netmap adapter attached to a VALE switch.
59817885a7bSLuigi Rizzo  * If the adapter is found (or is created), this function returns 0, a
59917885a7bSLuigi Rizzo  * non NULL pointer is returned into *na, and the caller holds a
60017885a7bSLuigi Rizzo  * reference to the adapter.
60117885a7bSLuigi Rizzo  * If an adapter is not found, then no reference is grabbed and the
60217885a7bSLuigi Rizzo  * function returns an error code, or 0 if there is just a VALE prefix
60317885a7bSLuigi Rizzo  * mismatch. Therefore the caller holds a reference when
60417885a7bSLuigi Rizzo  * (*na != NULL && return == 0).
60517885a7bSLuigi Rizzo  */
606f9790aebSLuigi Rizzo int
607f9790aebSLuigi Rizzo netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
608f9790aebSLuigi Rizzo {
6094bf50f18SLuigi Rizzo 	char *nr_name = nmr->nr_name;
6104bf50f18SLuigi Rizzo 	const char *ifname;
611f9790aebSLuigi Rizzo 	struct ifnet *ifp;
612f9790aebSLuigi Rizzo 	int error = 0;
6134bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna, *hostna = NULL;
614f9790aebSLuigi Rizzo 	struct nm_bridge *b;
615f9790aebSLuigi Rizzo 	int i, j, cand = -1, cand2 = -1;
616f9790aebSLuigi Rizzo 	int needed;
617f9790aebSLuigi Rizzo 
618f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
619f9790aebSLuigi Rizzo 
620f9790aebSLuigi Rizzo 	/* first try to see if this is a bridge port. */
621f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
6224bf50f18SLuigi Rizzo 	if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) {
623f9790aebSLuigi Rizzo 		return 0;  /* no error, but no VALE prefix */
624f9790aebSLuigi Rizzo 	}
625f9790aebSLuigi Rizzo 
6264bf50f18SLuigi Rizzo 	b = nm_find_bridge(nr_name, create);
627f9790aebSLuigi Rizzo 	if (b == NULL) {
6284bf50f18SLuigi Rizzo 		D("no bridges available for '%s'", nr_name);
629f2637526SLuigi Rizzo 		return (create ? ENOMEM : ENXIO);
630f9790aebSLuigi Rizzo 	}
6314bf50f18SLuigi Rizzo 	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
6324bf50f18SLuigi Rizzo 		panic("x");
633f9790aebSLuigi Rizzo 
634f9790aebSLuigi Rizzo 	/* Now we are sure that name starts with the bridge's name,
635f9790aebSLuigi Rizzo 	 * lookup the port in the bridge. We need to scan the entire
636f9790aebSLuigi Rizzo 	 * list. It is not important to hold a WLOCK on the bridge
637f9790aebSLuigi Rizzo 	 * during the search because NMG_LOCK already guarantees
638f9790aebSLuigi Rizzo 	 * that there are no other possible writers.
639f9790aebSLuigi Rizzo 	 */
640f9790aebSLuigi Rizzo 
641f9790aebSLuigi Rizzo 	/* lookup in the local list of ports */
642f9790aebSLuigi Rizzo 	for (j = 0; j < b->bdg_active_ports; j++) {
643f9790aebSLuigi Rizzo 		i = b->bdg_port_index[j];
644f9790aebSLuigi Rizzo 		vpna = b->bdg_ports[i];
645f9790aebSLuigi Rizzo 		// KASSERT(na != NULL);
646*847bf383SLuigi Rizzo 		ND("checking %s", vpna->up.name);
6474bf50f18SLuigi Rizzo 		if (!strcmp(vpna->up.name, nr_name)) {
648f9790aebSLuigi Rizzo 			netmap_adapter_get(&vpna->up);
6494bf50f18SLuigi Rizzo 			ND("found existing if %s refs %d", nr_name)
6504bf50f18SLuigi Rizzo 			*na = &vpna->up;
651f9790aebSLuigi Rizzo 			return 0;
652f9790aebSLuigi Rizzo 		}
653f9790aebSLuigi Rizzo 	}
654f9790aebSLuigi Rizzo 	/* not found, should we create it? */
655f9790aebSLuigi Rizzo 	if (!create)
656f9790aebSLuigi Rizzo 		return ENXIO;
657f9790aebSLuigi Rizzo 	/* yes we should, see if we have space to attach entries */
658f9790aebSLuigi Rizzo 	needed = 2; /* in some cases we only need 1 */
659f9790aebSLuigi Rizzo 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
660f9790aebSLuigi Rizzo 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
661f2637526SLuigi Rizzo 		return ENOMEM;
662f9790aebSLuigi Rizzo 	}
663f9790aebSLuigi Rizzo 	/* record the next two ports available, but do not allocate yet */
664f9790aebSLuigi Rizzo 	cand = b->bdg_port_index[b->bdg_active_ports];
665f9790aebSLuigi Rizzo 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
666f9790aebSLuigi Rizzo 	ND("+++ bridge %s port %s used %d avail %d %d",
6674bf50f18SLuigi Rizzo 		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
668f9790aebSLuigi Rizzo 
669f9790aebSLuigi Rizzo 	/*
670f9790aebSLuigi Rizzo 	 * try see if there is a matching NIC with this name
671f9790aebSLuigi Rizzo 	 * (after the bridge's name)
672f9790aebSLuigi Rizzo 	 */
6734bf50f18SLuigi Rizzo 	ifname = nr_name + b->bdg_namelen + 1;
6744bf50f18SLuigi Rizzo 	ifp = ifunit_ref(ifname);
6754bf50f18SLuigi Rizzo 	if (!ifp) {
6764bf50f18SLuigi Rizzo 		/* Create an ephemeral virtual port
6774bf50f18SLuigi Rizzo 		 * This block contains all the ephemeral-specific logics
6784bf50f18SLuigi Rizzo 		 */
679f9790aebSLuigi Rizzo 		if (nmr->nr_cmd) {
680f9790aebSLuigi Rizzo 			/* nr_cmd must be 0 for a virtual port */
681f9790aebSLuigi Rizzo 			return EINVAL;
682f9790aebSLuigi Rizzo 		}
683f9790aebSLuigi Rizzo 
684f9790aebSLuigi Rizzo 		/* bdg_netmap_attach creates a struct netmap_adapter */
6854bf50f18SLuigi Rizzo 		error = netmap_vp_create(nmr, NULL, &vpna);
686f9790aebSLuigi Rizzo 		if (error) {
687f9790aebSLuigi Rizzo 			D("error %d", error);
688f9790aebSLuigi Rizzo 			free(ifp, M_DEVBUF);
689f9790aebSLuigi Rizzo 			return error;
690f9790aebSLuigi Rizzo 		}
6914bf50f18SLuigi Rizzo 		/* shortcut - we can skip get_hw_na(),
6924bf50f18SLuigi Rizzo 		 * ownership check and nm_bdg_attach()
6934bf50f18SLuigi Rizzo 		 */
6944bf50f18SLuigi Rizzo 	} else {
6954bf50f18SLuigi Rizzo 		struct netmap_adapter *hw;
696f9790aebSLuigi Rizzo 
6974bf50f18SLuigi Rizzo 		error = netmap_get_hw_na(ifp, &hw);
6984bf50f18SLuigi Rizzo 		if (error || hw == NULL)
699f9790aebSLuigi Rizzo 			goto out;
700f9790aebSLuigi Rizzo 
7014bf50f18SLuigi Rizzo 		/* host adapter might not be created */
7024bf50f18SLuigi Rizzo 		error = hw->nm_bdg_attach(nr_name, hw);
7034bf50f18SLuigi Rizzo 		if (error)
704f9790aebSLuigi Rizzo 			goto out;
7054bf50f18SLuigi Rizzo 		vpna = hw->na_vp;
7064bf50f18SLuigi Rizzo 		hostna = hw->na_hostvp;
707f9790aebSLuigi Rizzo 		if_rele(ifp);
7084bf50f18SLuigi Rizzo 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
7094bf50f18SLuigi Rizzo 			hostna = NULL;
710f9790aebSLuigi Rizzo 	}
711f9790aebSLuigi Rizzo 
712f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
713f9790aebSLuigi Rizzo 	vpna->bdg_port = cand;
714f9790aebSLuigi Rizzo 	ND("NIC  %p to bridge port %d", vpna, cand);
715f9790aebSLuigi Rizzo 	/* bind the port to the bridge (virtual ports are not active) */
716f9790aebSLuigi Rizzo 	b->bdg_ports[cand] = vpna;
717f9790aebSLuigi Rizzo 	vpna->na_bdg = b;
718f9790aebSLuigi Rizzo 	b->bdg_active_ports++;
7194bf50f18SLuigi Rizzo 	if (hostna != NULL) {
720f9790aebSLuigi Rizzo 		/* also bind the host stack to the bridge */
721f9790aebSLuigi Rizzo 		b->bdg_ports[cand2] = hostna;
722f9790aebSLuigi Rizzo 		hostna->bdg_port = cand2;
723f9790aebSLuigi Rizzo 		hostna->na_bdg = b;
724f9790aebSLuigi Rizzo 		b->bdg_active_ports++;
725f9790aebSLuigi Rizzo 		ND("host %p to bridge port %d", hostna, cand2);
726f9790aebSLuigi Rizzo 	}
7274bf50f18SLuigi Rizzo 	ND("if %s refs %d", ifname, vpna->up.na_refcount);
728f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
7294bf50f18SLuigi Rizzo 	*na = &vpna->up;
7304bf50f18SLuigi Rizzo 	netmap_adapter_get(*na);
731f9790aebSLuigi Rizzo 	return 0;
732f9790aebSLuigi Rizzo 
733f9790aebSLuigi Rizzo out:
734f9790aebSLuigi Rizzo 	if_rele(ifp);
735f9790aebSLuigi Rizzo 
736f9790aebSLuigi Rizzo 	return error;
737f9790aebSLuigi Rizzo }
738f9790aebSLuigi Rizzo 
739f9790aebSLuigi Rizzo 
7404bf50f18SLuigi Rizzo /* Process NETMAP_BDG_ATTACH */
741f9790aebSLuigi Rizzo static int
7424bf50f18SLuigi Rizzo nm_bdg_ctl_attach(struct nmreq *nmr)
743f9790aebSLuigi Rizzo {
744f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
745f9790aebSLuigi Rizzo 	int error;
746f9790aebSLuigi Rizzo 
747f9790aebSLuigi Rizzo 	NMG_LOCK();
748f2637526SLuigi Rizzo 
74917885a7bSLuigi Rizzo 	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
7504bf50f18SLuigi Rizzo 	if (error) /* no device */
751f9790aebSLuigi Rizzo 		goto unlock_exit;
752f2637526SLuigi Rizzo 
75317885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
754f9790aebSLuigi Rizzo 		error = EINVAL;
75517885a7bSLuigi Rizzo 		goto unlock_exit;
756f9790aebSLuigi Rizzo 	}
757f9790aebSLuigi Rizzo 
7584bf50f18SLuigi Rizzo 	if (NETMAP_OWNED_BY_ANY(na)) {
759f9790aebSLuigi Rizzo 		error = EBUSY;
760f9790aebSLuigi Rizzo 		goto unref_exit;
761f9790aebSLuigi Rizzo 	}
762f9790aebSLuigi Rizzo 
7634bf50f18SLuigi Rizzo 	if (na->nm_bdg_ctl) {
7644bf50f18SLuigi Rizzo 		/* nop for VALE ports. The bwrap needs to put the hwna
7654bf50f18SLuigi Rizzo 		 * in netmap mode (see netmap_bwrap_bdg_ctl)
7664bf50f18SLuigi Rizzo 		 */
7674bf50f18SLuigi Rizzo 		error = na->nm_bdg_ctl(na, nmr, 1);
7684bf50f18SLuigi Rizzo 		if (error)
769f9790aebSLuigi Rizzo 			goto unref_exit;
7704bf50f18SLuigi Rizzo 		ND("registered %s to netmap-mode", na->name);
771f9790aebSLuigi Rizzo 	}
772f9790aebSLuigi Rizzo 	NMG_UNLOCK();
773f9790aebSLuigi Rizzo 	return 0;
774f9790aebSLuigi Rizzo 
775f9790aebSLuigi Rizzo unref_exit:
776f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
777f9790aebSLuigi Rizzo unlock_exit:
778f9790aebSLuigi Rizzo 	NMG_UNLOCK();
779f9790aebSLuigi Rizzo 	return error;
780f9790aebSLuigi Rizzo }
781f9790aebSLuigi Rizzo 
78217885a7bSLuigi Rizzo 
7834bf50f18SLuigi Rizzo /* process NETMAP_BDG_DETACH */
784f9790aebSLuigi Rizzo static int
7854bf50f18SLuigi Rizzo nm_bdg_ctl_detach(struct nmreq *nmr)
786f9790aebSLuigi Rizzo {
787f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
788f9790aebSLuigi Rizzo 	int error;
789f9790aebSLuigi Rizzo 
790f9790aebSLuigi Rizzo 	NMG_LOCK();
79117885a7bSLuigi Rizzo 	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
792f9790aebSLuigi Rizzo 	if (error) { /* no device, or another bridge or user owns the device */
793f9790aebSLuigi Rizzo 		goto unlock_exit;
794f9790aebSLuigi Rizzo 	}
795f2637526SLuigi Rizzo 
79617885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
797f9790aebSLuigi Rizzo 		error = EINVAL;
79817885a7bSLuigi Rizzo 		goto unlock_exit;
799f9790aebSLuigi Rizzo 	}
80017885a7bSLuigi Rizzo 
8014bf50f18SLuigi Rizzo 	if (na->nm_bdg_ctl) {
8024bf50f18SLuigi Rizzo 		/* remove the port from bridge. The bwrap
8034bf50f18SLuigi Rizzo 		 * also needs to put the hwna in normal mode
8044bf50f18SLuigi Rizzo 		 */
8054bf50f18SLuigi Rizzo 		error = na->nm_bdg_ctl(na, nmr, 0);
806f9790aebSLuigi Rizzo 	}
807f9790aebSLuigi Rizzo 
808f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
809f9790aebSLuigi Rizzo unlock_exit:
810f9790aebSLuigi Rizzo 	NMG_UNLOCK();
811f9790aebSLuigi Rizzo 	return error;
812f9790aebSLuigi Rizzo 
813f9790aebSLuigi Rizzo }
814f9790aebSLuigi Rizzo 
815f9790aebSLuigi Rizzo 
8164bf50f18SLuigi Rizzo /* Called by either user's context (netmap_ioctl())
8174bf50f18SLuigi Rizzo  * or external kernel modules (e.g., Openvswitch).
8184bf50f18SLuigi Rizzo  * Operation is indicated in nmr->nr_cmd.
8194bf50f18SLuigi Rizzo  * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
8204bf50f18SLuigi Rizzo  * requires bdg_ops argument; the other commands ignore this argument.
8214bf50f18SLuigi Rizzo  *
822f9790aebSLuigi Rizzo  * Called without NMG_LOCK.
823f9790aebSLuigi Rizzo  */
824f9790aebSLuigi Rizzo int
8254bf50f18SLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
826f9790aebSLuigi Rizzo {
827*847bf383SLuigi Rizzo 	struct nm_bridge *b, *bridges;
828f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
829f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
830f9790aebSLuigi Rizzo 	char *name = nmr->nr_name;
831f9790aebSLuigi Rizzo 	int cmd = nmr->nr_cmd, namelen = strlen(name);
832f9790aebSLuigi Rizzo 	int error = 0, i, j;
833*847bf383SLuigi Rizzo 	u_int num_bridges;
834*847bf383SLuigi Rizzo 
835*847bf383SLuigi Rizzo 	netmap_bns_getbridges(&bridges, &num_bridges);
836f9790aebSLuigi Rizzo 
837f9790aebSLuigi Rizzo 	switch (cmd) {
8384bf50f18SLuigi Rizzo 	case NETMAP_BDG_NEWIF:
8394bf50f18SLuigi Rizzo 		error = nm_vi_create(nmr);
8404bf50f18SLuigi Rizzo 		break;
8414bf50f18SLuigi Rizzo 
8424bf50f18SLuigi Rizzo 	case NETMAP_BDG_DELIF:
8434bf50f18SLuigi Rizzo 		error = nm_vi_destroy(nmr->nr_name);
8444bf50f18SLuigi Rizzo 		break;
8454bf50f18SLuigi Rizzo 
846f9790aebSLuigi Rizzo 	case NETMAP_BDG_ATTACH:
8474bf50f18SLuigi Rizzo 		error = nm_bdg_ctl_attach(nmr);
848f9790aebSLuigi Rizzo 		break;
849f9790aebSLuigi Rizzo 
850f9790aebSLuigi Rizzo 	case NETMAP_BDG_DETACH:
8514bf50f18SLuigi Rizzo 		error = nm_bdg_ctl_detach(nmr);
852f9790aebSLuigi Rizzo 		break;
853f9790aebSLuigi Rizzo 
854f9790aebSLuigi Rizzo 	case NETMAP_BDG_LIST:
855f9790aebSLuigi Rizzo 		/* this is used to enumerate bridges and ports */
856f9790aebSLuigi Rizzo 		if (namelen) { /* look up indexes of bridge and port */
857f9790aebSLuigi Rizzo 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
858f9790aebSLuigi Rizzo 				error = EINVAL;
859f9790aebSLuigi Rizzo 				break;
860f9790aebSLuigi Rizzo 			}
861f9790aebSLuigi Rizzo 			NMG_LOCK();
862f9790aebSLuigi Rizzo 			b = nm_find_bridge(name, 0 /* don't create */);
863f9790aebSLuigi Rizzo 			if (!b) {
864f9790aebSLuigi Rizzo 				error = ENOENT;
865f9790aebSLuigi Rizzo 				NMG_UNLOCK();
866f9790aebSLuigi Rizzo 				break;
867f9790aebSLuigi Rizzo 			}
868f9790aebSLuigi Rizzo 
869f9790aebSLuigi Rizzo 			error = ENOENT;
870f9790aebSLuigi Rizzo 			for (j = 0; j < b->bdg_active_ports; j++) {
871f9790aebSLuigi Rizzo 				i = b->bdg_port_index[j];
872f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[i];
873f9790aebSLuigi Rizzo 				if (vpna == NULL) {
874f9790aebSLuigi Rizzo 					D("---AAAAAAAAARGH-------");
875f9790aebSLuigi Rizzo 					continue;
876f9790aebSLuigi Rizzo 				}
877f9790aebSLuigi Rizzo 				/* the former and the latter identify a
878f9790aebSLuigi Rizzo 				 * virtual port and a NIC, respectively
879f9790aebSLuigi Rizzo 				 */
8804bf50f18SLuigi Rizzo 				if (!strcmp(vpna->up.name, name)) {
881f9790aebSLuigi Rizzo 					/* bridge index */
882*847bf383SLuigi Rizzo 					nmr->nr_arg1 = b - bridges;
883f9790aebSLuigi Rizzo 					nmr->nr_arg2 = i; /* port index */
884f9790aebSLuigi Rizzo 					error = 0;
885f9790aebSLuigi Rizzo 					break;
886f9790aebSLuigi Rizzo 				}
887f9790aebSLuigi Rizzo 			}
888f9790aebSLuigi Rizzo 			NMG_UNLOCK();
889f9790aebSLuigi Rizzo 		} else {
890f9790aebSLuigi Rizzo 			/* return the first non-empty entry starting from
891f9790aebSLuigi Rizzo 			 * bridge nr_arg1 and port nr_arg2.
892f9790aebSLuigi Rizzo 			 *
893f9790aebSLuigi Rizzo 			 * Users can detect the end of the same bridge by
894f9790aebSLuigi Rizzo 			 * seeing the new and old value of nr_arg1, and can
895f9790aebSLuigi Rizzo 			 * detect the end of all the bridge by error != 0
896f9790aebSLuigi Rizzo 			 */
897f9790aebSLuigi Rizzo 			i = nmr->nr_arg1;
898f9790aebSLuigi Rizzo 			j = nmr->nr_arg2;
899f9790aebSLuigi Rizzo 
900f9790aebSLuigi Rizzo 			NMG_LOCK();
901f9790aebSLuigi Rizzo 			for (error = ENOENT; i < NM_BRIDGES; i++) {
902*847bf383SLuigi Rizzo 				b = bridges + i;
903f9790aebSLuigi Rizzo 				if (j >= b->bdg_active_ports) {
904f9790aebSLuigi Rizzo 					j = 0; /* following bridges scan from 0 */
905f9790aebSLuigi Rizzo 					continue;
906f9790aebSLuigi Rizzo 				}
907f9790aebSLuigi Rizzo 				nmr->nr_arg1 = i;
908f9790aebSLuigi Rizzo 				nmr->nr_arg2 = j;
909f9790aebSLuigi Rizzo 				j = b->bdg_port_index[j];
910f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[j];
9114bf50f18SLuigi Rizzo 				strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
912f9790aebSLuigi Rizzo 				error = 0;
913f9790aebSLuigi Rizzo 				break;
914f9790aebSLuigi Rizzo 			}
915f9790aebSLuigi Rizzo 			NMG_UNLOCK();
916f9790aebSLuigi Rizzo 		}
917f9790aebSLuigi Rizzo 		break;
918f9790aebSLuigi Rizzo 
9194bf50f18SLuigi Rizzo 	case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
9204bf50f18SLuigi Rizzo 		/* register callbacks to the given bridge.
921f9790aebSLuigi Rizzo 		 * nmr->nr_name may be just bridge's name (including ':'
922f9790aebSLuigi Rizzo 		 * if it is not just NM_NAME).
923f9790aebSLuigi Rizzo 		 */
9244bf50f18SLuigi Rizzo 		if (!bdg_ops) {
925f9790aebSLuigi Rizzo 			error = EINVAL;
926f9790aebSLuigi Rizzo 			break;
927f9790aebSLuigi Rizzo 		}
928f9790aebSLuigi Rizzo 		NMG_LOCK();
929f9790aebSLuigi Rizzo 		b = nm_find_bridge(name, 0 /* don't create */);
930f9790aebSLuigi Rizzo 		if (!b) {
931f9790aebSLuigi Rizzo 			error = EINVAL;
932f9790aebSLuigi Rizzo 		} else {
9334bf50f18SLuigi Rizzo 			b->bdg_ops = *bdg_ops;
934f9790aebSLuigi Rizzo 		}
935f9790aebSLuigi Rizzo 		NMG_UNLOCK();
936f9790aebSLuigi Rizzo 		break;
937f9790aebSLuigi Rizzo 
938f0ea3689SLuigi Rizzo 	case NETMAP_BDG_VNET_HDR:
939f0ea3689SLuigi Rizzo 		/* Valid lengths for the virtio-net header are 0 (no header),
940f0ea3689SLuigi Rizzo 		   10 and 12. */
941f0ea3689SLuigi Rizzo 		if (nmr->nr_arg1 != 0 &&
942f0ea3689SLuigi Rizzo 			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
943f0ea3689SLuigi Rizzo 				nmr->nr_arg1 != 12) {
944f0ea3689SLuigi Rizzo 			error = EINVAL;
945f0ea3689SLuigi Rizzo 			break;
946f0ea3689SLuigi Rizzo 		}
947f9790aebSLuigi Rizzo 		NMG_LOCK();
948f9790aebSLuigi Rizzo 		error = netmap_get_bdg_na(nmr, &na, 0);
94917885a7bSLuigi Rizzo 		if (na && !error) {
950f9790aebSLuigi Rizzo 			vpna = (struct netmap_vp_adapter *)na;
951f0ea3689SLuigi Rizzo 			vpna->virt_hdr_len = nmr->nr_arg1;
952f0ea3689SLuigi Rizzo 			if (vpna->virt_hdr_len)
9534bf50f18SLuigi Rizzo 				vpna->mfs = NETMAP_BUF_SIZE(na);
954f0ea3689SLuigi Rizzo 			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
95517885a7bSLuigi Rizzo 			netmap_adapter_put(na);
956f9790aebSLuigi Rizzo 		}
957f9790aebSLuigi Rizzo 		NMG_UNLOCK();
958f9790aebSLuigi Rizzo 		break;
959f9790aebSLuigi Rizzo 
960f9790aebSLuigi Rizzo 	default:
961f9790aebSLuigi Rizzo 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
962f9790aebSLuigi Rizzo 		error = EINVAL;
963f9790aebSLuigi Rizzo 		break;
964f9790aebSLuigi Rizzo 	}
965f9790aebSLuigi Rizzo 	return error;
966f9790aebSLuigi Rizzo }
967f9790aebSLuigi Rizzo 
9684bf50f18SLuigi Rizzo int
9694bf50f18SLuigi Rizzo netmap_bdg_config(struct nmreq *nmr)
9704bf50f18SLuigi Rizzo {
9714bf50f18SLuigi Rizzo 	struct nm_bridge *b;
9724bf50f18SLuigi Rizzo 	int error = EINVAL;
9734bf50f18SLuigi Rizzo 
9744bf50f18SLuigi Rizzo 	NMG_LOCK();
9754bf50f18SLuigi Rizzo 	b = nm_find_bridge(nmr->nr_name, 0);
9764bf50f18SLuigi Rizzo 	if (!b) {
9774bf50f18SLuigi Rizzo 		NMG_UNLOCK();
9784bf50f18SLuigi Rizzo 		return error;
9794bf50f18SLuigi Rizzo 	}
9804bf50f18SLuigi Rizzo 	NMG_UNLOCK();
9814bf50f18SLuigi Rizzo 	/* Don't call config() with NMG_LOCK() held */
9824bf50f18SLuigi Rizzo 	BDG_RLOCK(b);
9834bf50f18SLuigi Rizzo 	if (b->bdg_ops.config != NULL)
9844bf50f18SLuigi Rizzo 		error = b->bdg_ops.config((struct nm_ifreq *)nmr);
9854bf50f18SLuigi Rizzo 	BDG_RUNLOCK(b);
9864bf50f18SLuigi Rizzo 	return error;
9874bf50f18SLuigi Rizzo }
9884bf50f18SLuigi Rizzo 
9894bf50f18SLuigi Rizzo 
9904bf50f18SLuigi Rizzo /* nm_krings_create callback for VALE ports.
9914bf50f18SLuigi Rizzo  * Calls the standard netmap_krings_create, then adds leases on rx
9924bf50f18SLuigi Rizzo  * rings and bdgfwd on tx rings.
9934bf50f18SLuigi Rizzo  */
994f9790aebSLuigi Rizzo static int
995f9790aebSLuigi Rizzo netmap_vp_krings_create(struct netmap_adapter *na)
996f9790aebSLuigi Rizzo {
997f0ea3689SLuigi Rizzo 	u_int tailroom;
998f9790aebSLuigi Rizzo 	int error, i;
999f9790aebSLuigi Rizzo 	uint32_t *leases;
1000*847bf383SLuigi Rizzo 	u_int nrx = netmap_real_rings(na, NR_RX);
1001f9790aebSLuigi Rizzo 
1002f9790aebSLuigi Rizzo 	/*
1003f9790aebSLuigi Rizzo 	 * Leases are attached to RX rings on vale ports
1004f9790aebSLuigi Rizzo 	 */
1005f9790aebSLuigi Rizzo 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
1006f9790aebSLuigi Rizzo 
1007f0ea3689SLuigi Rizzo 	error = netmap_krings_create(na, tailroom);
1008f9790aebSLuigi Rizzo 	if (error)
1009f9790aebSLuigi Rizzo 		return error;
1010f9790aebSLuigi Rizzo 
1011f9790aebSLuigi Rizzo 	leases = na->tailroom;
1012f9790aebSLuigi Rizzo 
1013f9790aebSLuigi Rizzo 	for (i = 0; i < nrx; i++) { /* Receive rings */
1014f9790aebSLuigi Rizzo 		na->rx_rings[i].nkr_leases = leases;
1015f9790aebSLuigi Rizzo 		leases += na->num_rx_desc;
1016f9790aebSLuigi Rizzo 	}
1017f9790aebSLuigi Rizzo 
1018f9790aebSLuigi Rizzo 	error = nm_alloc_bdgfwd(na);
1019f9790aebSLuigi Rizzo 	if (error) {
1020f9790aebSLuigi Rizzo 		netmap_krings_delete(na);
1021f9790aebSLuigi Rizzo 		return error;
1022f9790aebSLuigi Rizzo 	}
1023f9790aebSLuigi Rizzo 
1024f9790aebSLuigi Rizzo 	return 0;
1025f9790aebSLuigi Rizzo }
1026f9790aebSLuigi Rizzo 
102717885a7bSLuigi Rizzo 
10284bf50f18SLuigi Rizzo /* nm_krings_delete callback for VALE ports. */
1029f9790aebSLuigi Rizzo static void
1030f9790aebSLuigi Rizzo netmap_vp_krings_delete(struct netmap_adapter *na)
1031f9790aebSLuigi Rizzo {
1032f9790aebSLuigi Rizzo 	nm_free_bdgfwd(na);
1033f9790aebSLuigi Rizzo 	netmap_krings_delete(na);
1034f9790aebSLuigi Rizzo }
1035f9790aebSLuigi Rizzo 
1036f9790aebSLuigi Rizzo 
1037f9790aebSLuigi Rizzo static int
1038f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
1039f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *na, u_int ring_nr);
1040f9790aebSLuigi Rizzo 
1041f9790aebSLuigi Rizzo 
1042f9790aebSLuigi Rizzo /*
10434bf50f18SLuigi Rizzo  * main dispatch routine for the bridge.
1044f9790aebSLuigi Rizzo  * Grab packets from a kring, move them into the ft structure
1045f9790aebSLuigi Rizzo  * associated to the tx (input) port. Max one instance per port,
1046f9790aebSLuigi Rizzo  * filtered on input (ioctl, poll or XXX).
1047f9790aebSLuigi Rizzo  * Returns the next position in the ring.
1048f9790aebSLuigi Rizzo  */
1049f9790aebSLuigi Rizzo static int
10504bf50f18SLuigi Rizzo nm_bdg_preflush(struct netmap_kring *kring, u_int end)
1051f9790aebSLuigi Rizzo {
10524bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
10534bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter*)kring->na;
1054f9790aebSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
1055f9790aebSLuigi Rizzo 	struct nm_bdg_fwd *ft;
10564bf50f18SLuigi Rizzo 	u_int ring_nr = kring->ring_id;
1057f9790aebSLuigi Rizzo 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
1058f9790aebSLuigi Rizzo 	u_int ft_i = 0;	/* start from 0 */
1059f9790aebSLuigi Rizzo 	u_int frags = 1; /* how many frags ? */
1060f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
1061f9790aebSLuigi Rizzo 
1062f9790aebSLuigi Rizzo 	/* To protect against modifications to the bridge we acquire a
1063f9790aebSLuigi Rizzo 	 * shared lock, waiting if we can sleep (if the source port is
1064f9790aebSLuigi Rizzo 	 * attached to a user process) or with a trylock otherwise (NICs).
1065f9790aebSLuigi Rizzo 	 */
1066f9790aebSLuigi Rizzo 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1067f9790aebSLuigi Rizzo 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
1068f9790aebSLuigi Rizzo 		BDG_RLOCK(b);
1069f9790aebSLuigi Rizzo 	else if (!BDG_RTRYLOCK(b))
1070f9790aebSLuigi Rizzo 		return 0;
1071f9790aebSLuigi Rizzo 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1072f9790aebSLuigi Rizzo 	ft = kring->nkr_ft;
1073f9790aebSLuigi Rizzo 
1074f9790aebSLuigi Rizzo 	for (; likely(j != end); j = nm_next(j, lim)) {
1075f9790aebSLuigi Rizzo 		struct netmap_slot *slot = &ring->slot[j];
1076f9790aebSLuigi Rizzo 		char *buf;
1077f9790aebSLuigi Rizzo 
1078f9790aebSLuigi Rizzo 		ft[ft_i].ft_len = slot->len;
1079f9790aebSLuigi Rizzo 		ft[ft_i].ft_flags = slot->flags;
1080f9790aebSLuigi Rizzo 
1081f9790aebSLuigi Rizzo 		ND("flags is 0x%x", slot->flags);
1082*847bf383SLuigi Rizzo 		/* we do not use the buf changed flag, but we still need to reset it */
1083*847bf383SLuigi Rizzo 		slot->flags &= ~NS_BUF_CHANGED;
1084*847bf383SLuigi Rizzo 
1085f9790aebSLuigi Rizzo 		/* this slot goes into a list so initialize the link field */
1086f9790aebSLuigi Rizzo 		ft[ft_i].ft_next = NM_FT_NULL;
1087f9790aebSLuigi Rizzo 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
10884bf50f18SLuigi Rizzo 			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
1089e31c6ec7SLuigi Rizzo 		if (unlikely(buf == NULL)) {
1090e31c6ec7SLuigi Rizzo 			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
1091e31c6ec7SLuigi Rizzo 				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
1092e31c6ec7SLuigi Rizzo 				kring->name, j, ft[ft_i].ft_len);
10934bf50f18SLuigi Rizzo 			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
1094e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_len = 0;
1095e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_flags = 0;
1096e31c6ec7SLuigi Rizzo 		}
10972e159ef0SLuigi Rizzo 		__builtin_prefetch(buf);
1098f9790aebSLuigi Rizzo 		++ft_i;
1099f9790aebSLuigi Rizzo 		if (slot->flags & NS_MOREFRAG) {
1100f9790aebSLuigi Rizzo 			frags++;
1101f9790aebSLuigi Rizzo 			continue;
1102f9790aebSLuigi Rizzo 		}
1103f9790aebSLuigi Rizzo 		if (unlikely(netmap_verbose && frags > 1))
1104f9790aebSLuigi Rizzo 			RD(5, "%d frags at %d", frags, ft_i - frags);
1105f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
1106f9790aebSLuigi Rizzo 		frags = 1;
1107f9790aebSLuigi Rizzo 		if (unlikely((int)ft_i >= bridge_batch))
1108f9790aebSLuigi Rizzo 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1109f9790aebSLuigi Rizzo 	}
1110f9790aebSLuigi Rizzo 	if (frags > 1) {
1111f9790aebSLuigi Rizzo 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1112f9790aebSLuigi Rizzo 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1113f9790aebSLuigi Rizzo 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1114f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags - 1;
1115f9790aebSLuigi Rizzo 	}
1116f9790aebSLuigi Rizzo 	if (ft_i)
1117f9790aebSLuigi Rizzo 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1118f9790aebSLuigi Rizzo 	BDG_RUNLOCK(b);
1119f9790aebSLuigi Rizzo 	return j;
1120f9790aebSLuigi Rizzo }
1121f9790aebSLuigi Rizzo 
1122f9790aebSLuigi Rizzo 
1123f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */
1124f9790aebSLuigi Rizzo 
1125f9790aebSLuigi Rizzo /*
1126f9790aebSLuigi Rizzo  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1127f9790aebSLuigi Rizzo  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1128f9790aebSLuigi Rizzo  *
1129f9790aebSLuigi Rizzo  * http://www.burtleburtle.net/bob/hash/spooky.html
1130f9790aebSLuigi Rizzo  */
1131f9790aebSLuigi Rizzo #define mix(a, b, c)                                                    \
1132f9790aebSLuigi Rizzo do {                                                                    \
1133f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 13);                                 \
1134f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 8);                                  \
1135f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 13);                                 \
1136f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 12);                                 \
1137f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 16);                                 \
1138f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 5);                                  \
1139f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 3);                                  \
1140f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 10);                                 \
1141f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 15);                                 \
1142f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0)
1143f9790aebSLuigi Rizzo 
114417885a7bSLuigi Rizzo 
1145f9790aebSLuigi Rizzo static __inline uint32_t
1146f9790aebSLuigi Rizzo nm_bridge_rthash(const uint8_t *addr)
1147f9790aebSLuigi Rizzo {
1148f9790aebSLuigi Rizzo         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1149f9790aebSLuigi Rizzo 
1150f9790aebSLuigi Rizzo         b += addr[5] << 8;
1151f9790aebSLuigi Rizzo         b += addr[4];
1152f9790aebSLuigi Rizzo         a += addr[3] << 24;
1153f9790aebSLuigi Rizzo         a += addr[2] << 16;
1154f9790aebSLuigi Rizzo         a += addr[1] << 8;
1155f9790aebSLuigi Rizzo         a += addr[0];
1156f9790aebSLuigi Rizzo 
1157f9790aebSLuigi Rizzo         mix(a, b, c);
1158f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1159f9790aebSLuigi Rizzo         return (c & BRIDGE_RTHASH_MASK);
1160f9790aebSLuigi Rizzo }
1161f9790aebSLuigi Rizzo 
1162f9790aebSLuigi Rizzo #undef mix
1163f9790aebSLuigi Rizzo 
1164f9790aebSLuigi Rizzo 
11654bf50f18SLuigi Rizzo /* nm_register callback for VALE ports */
1166f9790aebSLuigi Rizzo static int
11674bf50f18SLuigi Rizzo netmap_vp_reg(struct netmap_adapter *na, int onoff)
1168f9790aebSLuigi Rizzo {
1169f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna =
1170f9790aebSLuigi Rizzo 		(struct netmap_vp_adapter*)na;
1171f9790aebSLuigi Rizzo 
11724bf50f18SLuigi Rizzo 	/* persistent ports may be put in netmap mode
11734bf50f18SLuigi Rizzo 	 * before being attached to a bridge
1174f9790aebSLuigi Rizzo 	 */
11754bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
1176f9790aebSLuigi Rizzo 		BDG_WLOCK(vpna->na_bdg);
1177f9790aebSLuigi Rizzo 	if (onoff) {
11784bf50f18SLuigi Rizzo 		na->na_flags |= NAF_NETMAP_ON;
11794bf50f18SLuigi Rizzo 		 /* XXX on FreeBSD, persistent VALE ports should also
11804bf50f18SLuigi Rizzo 		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
11814bf50f18SLuigi Rizzo 		 */
1182f9790aebSLuigi Rizzo 	} else {
11834bf50f18SLuigi Rizzo 		na->na_flags &= ~NAF_NETMAP_ON;
1184f9790aebSLuigi Rizzo 	}
11854bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
1186f9790aebSLuigi Rizzo 		BDG_WUNLOCK(vpna->na_bdg);
1187f9790aebSLuigi Rizzo 	return 0;
1188f9790aebSLuigi Rizzo }
1189f9790aebSLuigi Rizzo 
1190f9790aebSLuigi Rizzo 
1191f9790aebSLuigi Rizzo /*
1192f9790aebSLuigi Rizzo  * Lookup function for a learning bridge.
1193f9790aebSLuigi Rizzo  * Update the hash table with the source address,
1194f9790aebSLuigi Rizzo  * and then returns the destination port index, and the
1195f9790aebSLuigi Rizzo  * ring in *dst_ring (at the moment, always use ring 0)
1196f9790aebSLuigi Rizzo  */
1197f9790aebSLuigi Rizzo u_int
11984bf50f18SLuigi Rizzo netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1199*847bf383SLuigi Rizzo 		struct netmap_vp_adapter *na)
1200f9790aebSLuigi Rizzo {
12014bf50f18SLuigi Rizzo 	uint8_t *buf = ft->ft_buf;
12024bf50f18SLuigi Rizzo 	u_int buf_len = ft->ft_len;
1203f9790aebSLuigi Rizzo 	struct nm_hash_ent *ht = na->na_bdg->ht;
1204f9790aebSLuigi Rizzo 	uint32_t sh, dh;
1205f9790aebSLuigi Rizzo 	u_int dst, mysrc = na->bdg_port;
1206f9790aebSLuigi Rizzo 	uint64_t smac, dmac;
1207f9790aebSLuigi Rizzo 
12084bf50f18SLuigi Rizzo 	/* safety check, unfortunately we have many cases */
12094bf50f18SLuigi Rizzo 	if (buf_len >= 14 + na->virt_hdr_len) {
12104bf50f18SLuigi Rizzo 		/* virthdr + mac_hdr in the same slot */
12114bf50f18SLuigi Rizzo 		buf += na->virt_hdr_len;
12124bf50f18SLuigi Rizzo 		buf_len -= na->virt_hdr_len;
12134bf50f18SLuigi Rizzo 	} else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
12144bf50f18SLuigi Rizzo 		/* only header in first fragment */
12154bf50f18SLuigi Rizzo 		ft++;
12164bf50f18SLuigi Rizzo 		buf = ft->ft_buf;
12174bf50f18SLuigi Rizzo 		buf_len = ft->ft_len;
12184bf50f18SLuigi Rizzo 	} else {
12194bf50f18SLuigi Rizzo 		RD(5, "invalid buf format, length %d", buf_len);
1220f9790aebSLuigi Rizzo 		return NM_BDG_NOPORT;
1221f9790aebSLuigi Rizzo 	}
1222f9790aebSLuigi Rizzo 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1223f9790aebSLuigi Rizzo 	smac = le64toh(*(uint64_t *)(buf + 4));
1224f9790aebSLuigi Rizzo 	smac >>= 16;
1225f9790aebSLuigi Rizzo 
1226f9790aebSLuigi Rizzo 	/*
1227f9790aebSLuigi Rizzo 	 * The hash is somewhat expensive, there might be some
1228f9790aebSLuigi Rizzo 	 * worthwhile optimizations here.
1229f9790aebSLuigi Rizzo 	 */
1230*847bf383SLuigi Rizzo 	if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
1231f9790aebSLuigi Rizzo 		uint8_t *s = buf+6;
1232f9790aebSLuigi Rizzo 		sh = nm_bridge_rthash(s); // XXX hash of source
1233f9790aebSLuigi Rizzo 		/* update source port forwarding entry */
1234*847bf383SLuigi Rizzo 		na->last_smac = ht[sh].mac = smac;	/* XXX expire ? */
1235f9790aebSLuigi Rizzo 		ht[sh].ports = mysrc;
1236f9790aebSLuigi Rizzo 		if (netmap_verbose)
1237f9790aebSLuigi Rizzo 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1238f9790aebSLuigi Rizzo 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1239f9790aebSLuigi Rizzo 	}
1240f9790aebSLuigi Rizzo 	dst = NM_BDG_BROADCAST;
1241f9790aebSLuigi Rizzo 	if ((buf[0] & 1) == 0) { /* unicast */
1242f9790aebSLuigi Rizzo 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1243f9790aebSLuigi Rizzo 		if (ht[dh].mac == dmac) {	/* found dst */
1244f9790aebSLuigi Rizzo 			dst = ht[dh].ports;
1245f9790aebSLuigi Rizzo 		}
1246f9790aebSLuigi Rizzo 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1247f9790aebSLuigi Rizzo 	}
1248f9790aebSLuigi Rizzo 	return dst;
1249f9790aebSLuigi Rizzo }
1250f9790aebSLuigi Rizzo 
1251f9790aebSLuigi Rizzo 
1252f9790aebSLuigi Rizzo /*
125317885a7bSLuigi Rizzo  * Available space in the ring. Only used in VALE code
125417885a7bSLuigi Rizzo  * and only with is_rx = 1
125517885a7bSLuigi Rizzo  */
125617885a7bSLuigi Rizzo static inline uint32_t
125717885a7bSLuigi Rizzo nm_kr_space(struct netmap_kring *k, int is_rx)
125817885a7bSLuigi Rizzo {
125917885a7bSLuigi Rizzo 	int space;
126017885a7bSLuigi Rizzo 
126117885a7bSLuigi Rizzo 	if (is_rx) {
126217885a7bSLuigi Rizzo 		int busy = k->nkr_hwlease - k->nr_hwcur;
126317885a7bSLuigi Rizzo 		if (busy < 0)
126417885a7bSLuigi Rizzo 			busy += k->nkr_num_slots;
126517885a7bSLuigi Rizzo 		space = k->nkr_num_slots - 1 - busy;
126617885a7bSLuigi Rizzo 	} else {
126717885a7bSLuigi Rizzo 		/* XXX never used in this branch */
126817885a7bSLuigi Rizzo 		space = k->nr_hwtail - k->nkr_hwlease;
126917885a7bSLuigi Rizzo 		if (space < 0)
127017885a7bSLuigi Rizzo 			space += k->nkr_num_slots;
127117885a7bSLuigi Rizzo 	}
127217885a7bSLuigi Rizzo #if 0
127317885a7bSLuigi Rizzo 	// sanity check
127417885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
127517885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
127617885a7bSLuigi Rizzo 		k->nr_tail >= k->nkr_num_slots ||
127717885a7bSLuigi Rizzo 		busy < 0 ||
127817885a7bSLuigi Rizzo 		busy >= k->nkr_num_slots) {
127917885a7bSLuigi Rizzo 		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
128017885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
128117885a7bSLuigi Rizzo 	}
128217885a7bSLuigi Rizzo #endif
128317885a7bSLuigi Rizzo 	return space;
128417885a7bSLuigi Rizzo }
128517885a7bSLuigi Rizzo 
128617885a7bSLuigi Rizzo 
128717885a7bSLuigi Rizzo 
128817885a7bSLuigi Rizzo 
128917885a7bSLuigi Rizzo /* make a lease on the kring for N positions. return the
129017885a7bSLuigi Rizzo  * lease index
129117885a7bSLuigi Rizzo  * XXX only used in VALE code and with is_rx = 1
129217885a7bSLuigi Rizzo  */
129317885a7bSLuigi Rizzo static inline uint32_t
129417885a7bSLuigi Rizzo nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
129517885a7bSLuigi Rizzo {
129617885a7bSLuigi Rizzo 	uint32_t lim = k->nkr_num_slots - 1;
129717885a7bSLuigi Rizzo 	uint32_t lease_idx = k->nkr_lease_idx;
129817885a7bSLuigi Rizzo 
129917885a7bSLuigi Rizzo 	k->nkr_leases[lease_idx] = NR_NOSLOT;
130017885a7bSLuigi Rizzo 	k->nkr_lease_idx = nm_next(lease_idx, lim);
130117885a7bSLuigi Rizzo 
130217885a7bSLuigi Rizzo 	if (n > nm_kr_space(k, is_rx)) {
130317885a7bSLuigi Rizzo 		D("invalid request for %d slots", n);
130417885a7bSLuigi Rizzo 		panic("x");
130517885a7bSLuigi Rizzo 	}
130617885a7bSLuigi Rizzo 	/* XXX verify that there are n slots */
130717885a7bSLuigi Rizzo 	k->nkr_hwlease += n;
130817885a7bSLuigi Rizzo 	if (k->nkr_hwlease > lim)
130917885a7bSLuigi Rizzo 		k->nkr_hwlease -= lim + 1;
131017885a7bSLuigi Rizzo 
131117885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
131217885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
131317885a7bSLuigi Rizzo 		k->nr_hwtail >= k->nkr_num_slots ||
131417885a7bSLuigi Rizzo 		k->nkr_lease_idx >= k->nkr_num_slots) {
131517885a7bSLuigi Rizzo 		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
13164bf50f18SLuigi Rizzo 			k->na->name,
131717885a7bSLuigi Rizzo 			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
131817885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
131917885a7bSLuigi Rizzo 	}
132017885a7bSLuigi Rizzo 	return lease_idx;
132117885a7bSLuigi Rizzo }
132217885a7bSLuigi Rizzo 
132317885a7bSLuigi Rizzo /*
13244bf50f18SLuigi Rizzo  *
1325f9790aebSLuigi Rizzo  * This flush routine supports only unicast and broadcast but a large
1326f9790aebSLuigi Rizzo  * number of ports, and lets us replace the learn and dispatch functions.
1327f9790aebSLuigi Rizzo  */
1328f9790aebSLuigi Rizzo int
1329f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1330f9790aebSLuigi Rizzo 		u_int ring_nr)
1331f9790aebSLuigi Rizzo {
1332f9790aebSLuigi Rizzo 	struct nm_bdg_q *dst_ents, *brddst;
1333f9790aebSLuigi Rizzo 	uint16_t num_dsts = 0, *dsts;
1334f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
1335f9790aebSLuigi Rizzo 	u_int i, j, me = na->bdg_port;
1336f9790aebSLuigi Rizzo 
1337f9790aebSLuigi Rizzo 	/*
1338f9790aebSLuigi Rizzo 	 * The work area (pointed by ft) is followed by an array of
1339f9790aebSLuigi Rizzo 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1340f9790aebSLuigi Rizzo 	 * queues per port plus one for the broadcast traffic.
1341f9790aebSLuigi Rizzo 	 * Then we have an array of destination indexes.
1342f9790aebSLuigi Rizzo 	 */
1343f9790aebSLuigi Rizzo 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1344f9790aebSLuigi Rizzo 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1345f9790aebSLuigi Rizzo 
1346f9790aebSLuigi Rizzo 	/* first pass: find a destination for each packet in the batch */
1347f9790aebSLuigi Rizzo 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1348f9790aebSLuigi Rizzo 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1349f9790aebSLuigi Rizzo 		uint16_t dst_port, d_i;
1350f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1351f9790aebSLuigi Rizzo 
1352f9790aebSLuigi Rizzo 		ND("slot %d frags %d", i, ft[i].ft_frags);
1353f0ea3689SLuigi Rizzo 		/* Drop the packet if the virtio-net header is not into the first
1354f9790aebSLuigi Rizzo 		   fragment nor at the very beginning of the second. */
13554bf50f18SLuigi Rizzo 		if (unlikely(na->virt_hdr_len > ft[i].ft_len))
1356f9790aebSLuigi Rizzo 			continue;
13574bf50f18SLuigi Rizzo 		dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
1358f9790aebSLuigi Rizzo 		if (netmap_verbose > 255)
1359f9790aebSLuigi Rizzo 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1360f9790aebSLuigi Rizzo 		if (dst_port == NM_BDG_NOPORT)
1361f9790aebSLuigi Rizzo 			continue; /* this packet is identified to be dropped */
1362f9790aebSLuigi Rizzo 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1363f9790aebSLuigi Rizzo 			continue;
1364f9790aebSLuigi Rizzo 		else if (dst_port == NM_BDG_BROADCAST)
1365f9790aebSLuigi Rizzo 			dst_ring = 0; /* broadcasts always go to ring 0 */
1366f9790aebSLuigi Rizzo 		else if (unlikely(dst_port == me ||
1367f9790aebSLuigi Rizzo 		    !b->bdg_ports[dst_port]))
1368f9790aebSLuigi Rizzo 			continue;
1369f9790aebSLuigi Rizzo 
1370f9790aebSLuigi Rizzo 		/* get a position in the scratch pad */
1371f9790aebSLuigi Rizzo 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1372f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1373f9790aebSLuigi Rizzo 
1374f9790aebSLuigi Rizzo 		/* append the first fragment to the list */
1375f9790aebSLuigi Rizzo 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1376f9790aebSLuigi Rizzo 			d->bq_head = d->bq_tail = i;
1377f9790aebSLuigi Rizzo 			/* remember this position to be scanned later */
1378f9790aebSLuigi Rizzo 			if (dst_port != NM_BDG_BROADCAST)
1379f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1380f9790aebSLuigi Rizzo 		} else {
1381f9790aebSLuigi Rizzo 			ft[d->bq_tail].ft_next = i;
1382f9790aebSLuigi Rizzo 			d->bq_tail = i;
1383f9790aebSLuigi Rizzo 		}
1384f9790aebSLuigi Rizzo 		d->bq_len += ft[i].ft_frags;
1385f9790aebSLuigi Rizzo 	}
1386f9790aebSLuigi Rizzo 
1387f9790aebSLuigi Rizzo 	/*
1388f9790aebSLuigi Rizzo 	 * Broadcast traffic goes to ring 0 on all destinations.
1389f9790aebSLuigi Rizzo 	 * So we need to add these rings to the list of ports to scan.
1390f9790aebSLuigi Rizzo 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1391f9790aebSLuigi Rizzo 	 * expensive. We should keep a compact list of active destinations
1392f9790aebSLuigi Rizzo 	 * so we could shorten this loop.
1393f9790aebSLuigi Rizzo 	 */
1394f9790aebSLuigi Rizzo 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1395f9790aebSLuigi Rizzo 	if (brddst->bq_head != NM_FT_NULL) {
1396f9790aebSLuigi Rizzo 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1397f9790aebSLuigi Rizzo 			uint16_t d_i;
1398f9790aebSLuigi Rizzo 			i = b->bdg_port_index[j];
1399f9790aebSLuigi Rizzo 			if (unlikely(i == me))
1400f9790aebSLuigi Rizzo 				continue;
1401f9790aebSLuigi Rizzo 			d_i = i * NM_BDG_MAXRINGS;
1402f9790aebSLuigi Rizzo 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1403f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1404f9790aebSLuigi Rizzo 		}
1405f9790aebSLuigi Rizzo 	}
1406f9790aebSLuigi Rizzo 
1407f9790aebSLuigi Rizzo 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
14084bf50f18SLuigi Rizzo 	/* second pass: scan destinations */
1409f9790aebSLuigi Rizzo 	for (i = 0; i < num_dsts; i++) {
1410f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *dst_na;
1411f9790aebSLuigi Rizzo 		struct netmap_kring *kring;
1412f9790aebSLuigi Rizzo 		struct netmap_ring *ring;
1413f0ea3689SLuigi Rizzo 		u_int dst_nr, lim, j, d_i, next, brd_next;
1414f9790aebSLuigi Rizzo 		u_int needed, howmany;
1415f9790aebSLuigi Rizzo 		int retry = netmap_txsync_retry;
1416f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1417f9790aebSLuigi Rizzo 		uint32_t my_start = 0, lease_idx = 0;
1418f9790aebSLuigi Rizzo 		int nrings;
1419f0ea3689SLuigi Rizzo 		int virt_hdr_mismatch = 0;
1420f9790aebSLuigi Rizzo 
1421f9790aebSLuigi Rizzo 		d_i = dsts[i];
1422f9790aebSLuigi Rizzo 		ND("second pass %d port %d", i, d_i);
1423f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1424f9790aebSLuigi Rizzo 		// XXX fix the division
1425f9790aebSLuigi Rizzo 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1426f9790aebSLuigi Rizzo 		/* protect from the lookup function returning an inactive
1427f9790aebSLuigi Rizzo 		 * destination port
1428f9790aebSLuigi Rizzo 		 */
1429f9790aebSLuigi Rizzo 		if (unlikely(dst_na == NULL))
1430f9790aebSLuigi Rizzo 			goto cleanup;
1431f9790aebSLuigi Rizzo 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1432f9790aebSLuigi Rizzo 			goto cleanup;
1433f9790aebSLuigi Rizzo 		/*
1434f9790aebSLuigi Rizzo 		 * The interface may be in !netmap mode in two cases:
1435f9790aebSLuigi Rizzo 		 * - when na is attached but not activated yet;
1436f9790aebSLuigi Rizzo 		 * - when na is being deactivated but is still attached.
1437f9790aebSLuigi Rizzo 		 */
14384bf50f18SLuigi Rizzo 		if (unlikely(!nm_netmap_on(&dst_na->up))) {
1439f9790aebSLuigi Rizzo 			ND("not in netmap mode!");
1440f9790aebSLuigi Rizzo 			goto cleanup;
1441f9790aebSLuigi Rizzo 		}
1442f9790aebSLuigi Rizzo 
1443f9790aebSLuigi Rizzo 		/* there is at least one either unicast or broadcast packet */
1444f9790aebSLuigi Rizzo 		brd_next = brddst->bq_head;
1445f9790aebSLuigi Rizzo 		next = d->bq_head;
1446f9790aebSLuigi Rizzo 		/* we need to reserve this many slots. If fewer are
1447f9790aebSLuigi Rizzo 		 * available, some packets will be dropped.
1448f9790aebSLuigi Rizzo 		 * Packets may have multiple fragments, so we may not use
1449f9790aebSLuigi Rizzo 		 * there is a chance that we may not use all of the slots
1450f9790aebSLuigi Rizzo 		 * we have claimed, so we will need to handle the leftover
1451f9790aebSLuigi Rizzo 		 * ones when we regain the lock.
1452f9790aebSLuigi Rizzo 		 */
1453f9790aebSLuigi Rizzo 		needed = d->bq_len + brddst->bq_len;
1454f9790aebSLuigi Rizzo 
1455f0ea3689SLuigi Rizzo 		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
14564bf50f18SLuigi Rizzo 			RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len);
1457f0ea3689SLuigi Rizzo 			/* There is a virtio-net header/offloadings mismatch between
1458f0ea3689SLuigi Rizzo 			 * source and destination. The slower mismatch datapath will
1459f0ea3689SLuigi Rizzo 			 * be used to cope with all the mismatches.
1460f0ea3689SLuigi Rizzo 			 */
1461f0ea3689SLuigi Rizzo 			virt_hdr_mismatch = 1;
1462f0ea3689SLuigi Rizzo 			if (dst_na->mfs < na->mfs) {
1463f0ea3689SLuigi Rizzo 				/* We may need to do segmentation offloadings, and so
1464f0ea3689SLuigi Rizzo 				 * we may need a number of destination slots greater
1465f0ea3689SLuigi Rizzo 				 * than the number of input slots ('needed').
1466f0ea3689SLuigi Rizzo 				 * We look for the smallest integer 'x' which satisfies:
1467f0ea3689SLuigi Rizzo 				 *	needed * na->mfs + x * H <= x * na->mfs
1468f0ea3689SLuigi Rizzo 				 * where 'H' is the length of the longest header that may
1469f0ea3689SLuigi Rizzo 				 * be replicated in the segmentation process (e.g. for
1470f0ea3689SLuigi Rizzo 				 * TCPv4 we must account for ethernet header, IP header
1471f0ea3689SLuigi Rizzo 				 * and TCPv4 header).
1472f0ea3689SLuigi Rizzo 				 */
1473f0ea3689SLuigi Rizzo 				needed = (needed * na->mfs) /
1474f0ea3689SLuigi Rizzo 						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1475f0ea3689SLuigi Rizzo 				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1476f0ea3689SLuigi Rizzo 			}
1477f0ea3689SLuigi Rizzo 		}
1478f0ea3689SLuigi Rizzo 
1479f9790aebSLuigi Rizzo 		ND(5, "pass 2 dst %d is %x %s",
1480f9790aebSLuigi Rizzo 			i, d_i, is_vp ? "virtual" : "nic/host");
1481f9790aebSLuigi Rizzo 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1482f9790aebSLuigi Rizzo 		nrings = dst_na->up.num_rx_rings;
1483f9790aebSLuigi Rizzo 		if (dst_nr >= nrings)
1484f9790aebSLuigi Rizzo 			dst_nr = dst_nr % nrings;
1485f9790aebSLuigi Rizzo 		kring = &dst_na->up.rx_rings[dst_nr];
1486f9790aebSLuigi Rizzo 		ring = kring->ring;
1487f9790aebSLuigi Rizzo 		lim = kring->nkr_num_slots - 1;
1488f9790aebSLuigi Rizzo 
1489f9790aebSLuigi Rizzo retry:
1490f9790aebSLuigi Rizzo 
1491f0ea3689SLuigi Rizzo 		if (dst_na->retry && retry) {
1492f0ea3689SLuigi Rizzo 			/* try to get some free slot from the previous run */
1493*847bf383SLuigi Rizzo 			kring->nm_notify(kring, 0);
14944bf50f18SLuigi Rizzo 			/* actually useful only for bwraps, since there
14954bf50f18SLuigi Rizzo 			 * the notify will trigger a txsync on the hwna. VALE ports
14964bf50f18SLuigi Rizzo 			 * have dst_na->retry == 0
14974bf50f18SLuigi Rizzo 			 */
1498f0ea3689SLuigi Rizzo 		}
1499f9790aebSLuigi Rizzo 		/* reserve the buffers in the queue and an entry
1500f9790aebSLuigi Rizzo 		 * to report completion, and drop lock.
1501f9790aebSLuigi Rizzo 		 * XXX this might become a helper function.
1502f9790aebSLuigi Rizzo 		 */
1503f9790aebSLuigi Rizzo 		mtx_lock(&kring->q_lock);
1504f9790aebSLuigi Rizzo 		if (kring->nkr_stopped) {
1505f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1506f9790aebSLuigi Rizzo 			goto cleanup;
1507f9790aebSLuigi Rizzo 		}
1508f9790aebSLuigi Rizzo 		my_start = j = kring->nkr_hwlease;
1509f9790aebSLuigi Rizzo 		howmany = nm_kr_space(kring, 1);
1510f9790aebSLuigi Rizzo 		if (needed < howmany)
1511f9790aebSLuigi Rizzo 			howmany = needed;
1512f9790aebSLuigi Rizzo 		lease_idx = nm_kr_lease(kring, howmany, 1);
1513f9790aebSLuigi Rizzo 		mtx_unlock(&kring->q_lock);
1514f9790aebSLuigi Rizzo 
1515f9790aebSLuigi Rizzo 		/* only retry if we need more than available slots */
1516f9790aebSLuigi Rizzo 		if (retry && needed <= howmany)
1517f9790aebSLuigi Rizzo 			retry = 0;
1518f9790aebSLuigi Rizzo 
1519f9790aebSLuigi Rizzo 		/* copy to the destination queue */
1520f9790aebSLuigi Rizzo 		while (howmany > 0) {
1521f9790aebSLuigi Rizzo 			struct netmap_slot *slot;
1522f9790aebSLuigi Rizzo 			struct nm_bdg_fwd *ft_p, *ft_end;
1523f9790aebSLuigi Rizzo 			u_int cnt;
1524f9790aebSLuigi Rizzo 
1525f9790aebSLuigi Rizzo 			/* find the queue from which we pick next packet.
1526f9790aebSLuigi Rizzo 			 * NM_FT_NULL is always higher than valid indexes
1527f9790aebSLuigi Rizzo 			 * so we never dereference it if the other list
1528f9790aebSLuigi Rizzo 			 * has packets (and if both are empty we never
1529f9790aebSLuigi Rizzo 			 * get here).
1530f9790aebSLuigi Rizzo 			 */
1531f9790aebSLuigi Rizzo 			if (next < brd_next) {
1532f9790aebSLuigi Rizzo 				ft_p = ft + next;
1533f9790aebSLuigi Rizzo 				next = ft_p->ft_next;
1534f9790aebSLuigi Rizzo 			} else { /* insert broadcast */
1535f9790aebSLuigi Rizzo 				ft_p = ft + brd_next;
1536f9790aebSLuigi Rizzo 				brd_next = ft_p->ft_next;
1537f9790aebSLuigi Rizzo 			}
1538f9790aebSLuigi Rizzo 			cnt = ft_p->ft_frags; // cnt > 0
1539f9790aebSLuigi Rizzo 			if (unlikely(cnt > howmany))
1540f9790aebSLuigi Rizzo 			    break; /* no more space */
1541f9790aebSLuigi Rizzo 			if (netmap_verbose && cnt > 1)
1542f9790aebSLuigi Rizzo 				RD(5, "rx %d frags to %d", cnt, j);
1543f9790aebSLuigi Rizzo 			ft_end = ft_p + cnt;
1544f0ea3689SLuigi Rizzo 			if (unlikely(virt_hdr_mismatch)) {
1545f0ea3689SLuigi Rizzo 				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1546f0ea3689SLuigi Rizzo 			} else {
1547f0ea3689SLuigi Rizzo 				howmany -= cnt;
1548f9790aebSLuigi Rizzo 				do {
1549f9790aebSLuigi Rizzo 					char *dst, *src = ft_p->ft_buf;
1550f9790aebSLuigi Rizzo 					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1551f9790aebSLuigi Rizzo 
1552f9790aebSLuigi Rizzo 					slot = &ring->slot[j];
15534bf50f18SLuigi Rizzo 					dst = NMB(&dst_na->up, slot);
1554f9790aebSLuigi Rizzo 
155517885a7bSLuigi Rizzo 					ND("send [%d] %d(%d) bytes at %s:%d",
155617885a7bSLuigi Rizzo 							i, (int)copy_len, (int)dst_len,
155717885a7bSLuigi Rizzo 							NM_IFPNAME(dst_ifp), j);
1558f9790aebSLuigi Rizzo 					/* round to a multiple of 64 */
1559f9790aebSLuigi Rizzo 					copy_len = (copy_len + 63) & ~63;
1560f9790aebSLuigi Rizzo 
15614bf50f18SLuigi Rizzo 					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
15624bf50f18SLuigi Rizzo 						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
1563e31c6ec7SLuigi Rizzo 						RD(5, "invalid len %d, down to 64", (int)copy_len);
1564e31c6ec7SLuigi Rizzo 						copy_len = dst_len = 64; // XXX
1565e31c6ec7SLuigi Rizzo 					}
1566f9790aebSLuigi Rizzo 					if (ft_p->ft_flags & NS_INDIRECT) {
1567f9790aebSLuigi Rizzo 						if (copyin(src, dst, copy_len)) {
1568f9790aebSLuigi Rizzo 							// invalid user pointer, pretend len is 0
1569f9790aebSLuigi Rizzo 							dst_len = 0;
1570f9790aebSLuigi Rizzo 						}
1571f9790aebSLuigi Rizzo 					} else {
1572f9790aebSLuigi Rizzo 						//memcpy(dst, src, copy_len);
1573f9790aebSLuigi Rizzo 						pkt_copy(src, dst, (int)copy_len);
1574f9790aebSLuigi Rizzo 					}
1575f9790aebSLuigi Rizzo 					slot->len = dst_len;
1576f9790aebSLuigi Rizzo 					slot->flags = (cnt << 8)| NS_MOREFRAG;
1577f9790aebSLuigi Rizzo 					j = nm_next(j, lim);
1578f0ea3689SLuigi Rizzo 					needed--;
1579f9790aebSLuigi Rizzo 					ft_p++;
1580f9790aebSLuigi Rizzo 				} while (ft_p != ft_end);
1581f9790aebSLuigi Rizzo 				slot->flags = (cnt << 8); /* clear flag on last entry */
1582f0ea3689SLuigi Rizzo 			}
1583f9790aebSLuigi Rizzo 			/* are we done ? */
1584f9790aebSLuigi Rizzo 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1585f9790aebSLuigi Rizzo 				break;
1586f9790aebSLuigi Rizzo 		}
1587f9790aebSLuigi Rizzo 		{
1588f9790aebSLuigi Rizzo 		    /* current position */
1589f9790aebSLuigi Rizzo 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1590f9790aebSLuigi Rizzo 		    uint32_t update_pos;
1591f9790aebSLuigi Rizzo 		    int still_locked = 1;
1592f9790aebSLuigi Rizzo 
1593f9790aebSLuigi Rizzo 		    mtx_lock(&kring->q_lock);
1594f9790aebSLuigi Rizzo 		    if (unlikely(howmany > 0)) {
1595f9790aebSLuigi Rizzo 			/* not used all bufs. If i am the last one
1596f9790aebSLuigi Rizzo 			 * i can recover the slots, otherwise must
1597f9790aebSLuigi Rizzo 			 * fill them with 0 to mark empty packets.
1598f9790aebSLuigi Rizzo 			 */
1599f9790aebSLuigi Rizzo 			ND("leftover %d bufs", howmany);
1600f9790aebSLuigi Rizzo 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1601f9790aebSLuigi Rizzo 			    /* yes i am the last one */
1602f9790aebSLuigi Rizzo 			    ND("roll back nkr_hwlease to %d", j);
1603f9790aebSLuigi Rizzo 			    kring->nkr_hwlease = j;
1604f9790aebSLuigi Rizzo 			} else {
1605f9790aebSLuigi Rizzo 			    while (howmany-- > 0) {
1606f9790aebSLuigi Rizzo 				ring->slot[j].len = 0;
1607f9790aebSLuigi Rizzo 				ring->slot[j].flags = 0;
1608f9790aebSLuigi Rizzo 				j = nm_next(j, lim);
1609f9790aebSLuigi Rizzo 			    }
1610f9790aebSLuigi Rizzo 			}
1611f9790aebSLuigi Rizzo 		    }
1612f9790aebSLuigi Rizzo 		    p[lease_idx] = j; /* report I am done */
1613f9790aebSLuigi Rizzo 
161417885a7bSLuigi Rizzo 		    update_pos = kring->nr_hwtail;
1615f9790aebSLuigi Rizzo 
1616f9790aebSLuigi Rizzo 		    if (my_start == update_pos) {
1617f9790aebSLuigi Rizzo 			/* all slots before my_start have been reported,
1618f9790aebSLuigi Rizzo 			 * so scan subsequent leases to see if other ranges
1619f9790aebSLuigi Rizzo 			 * have been completed, and to a selwakeup or txsync.
1620f9790aebSLuigi Rizzo 		         */
1621f9790aebSLuigi Rizzo 			while (lease_idx != kring->nkr_lease_idx &&
1622f9790aebSLuigi Rizzo 				p[lease_idx] != NR_NOSLOT) {
1623f9790aebSLuigi Rizzo 			    j = p[lease_idx];
1624f9790aebSLuigi Rizzo 			    p[lease_idx] = NR_NOSLOT;
1625f9790aebSLuigi Rizzo 			    lease_idx = nm_next(lease_idx, lim);
1626f9790aebSLuigi Rizzo 			}
1627f9790aebSLuigi Rizzo 			/* j is the new 'write' position. j != my_start
1628f9790aebSLuigi Rizzo 			 * means there are new buffers to report
1629f9790aebSLuigi Rizzo 			 */
1630f9790aebSLuigi Rizzo 			if (likely(j != my_start)) {
163117885a7bSLuigi Rizzo 				kring->nr_hwtail = j;
1632f9790aebSLuigi Rizzo 				still_locked = 0;
1633f9790aebSLuigi Rizzo 				mtx_unlock(&kring->q_lock);
1634*847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
16354bf50f18SLuigi Rizzo 				/* this is netmap_notify for VALE ports and
16364bf50f18SLuigi Rizzo 				 * netmap_bwrap_notify for bwrap. The latter will
16374bf50f18SLuigi Rizzo 				 * trigger a txsync on the underlying hwna
16384bf50f18SLuigi Rizzo 				 */
16394bf50f18SLuigi Rizzo 				if (dst_na->retry && retry--) {
16404bf50f18SLuigi Rizzo 					/* XXX this is going to call nm_notify again.
16414bf50f18SLuigi Rizzo 					 * Only useful for bwrap in virtual machines
16424bf50f18SLuigi Rizzo 					 */
1643f9790aebSLuigi Rizzo 					goto retry;
1644f9790aebSLuigi Rizzo 				}
1645f9790aebSLuigi Rizzo 			}
16464bf50f18SLuigi Rizzo 		    }
1647f9790aebSLuigi Rizzo 		    if (still_locked)
1648f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1649f9790aebSLuigi Rizzo 		}
1650f9790aebSLuigi Rizzo cleanup:
1651f9790aebSLuigi Rizzo 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1652f9790aebSLuigi Rizzo 		d->bq_len = 0;
1653f9790aebSLuigi Rizzo 	}
1654f9790aebSLuigi Rizzo 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1655f9790aebSLuigi Rizzo 	brddst->bq_len = 0;
1656f9790aebSLuigi Rizzo 	return 0;
1657f9790aebSLuigi Rizzo }
1658f9790aebSLuigi Rizzo 
16594bf50f18SLuigi Rizzo /* nm_txsync callback for VALE ports */
1660f9790aebSLuigi Rizzo static int
16614bf50f18SLuigi Rizzo netmap_vp_txsync(struct netmap_kring *kring, int flags)
1662f9790aebSLuigi Rizzo {
16634bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
16644bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter *)kring->na;
166517885a7bSLuigi Rizzo 	u_int done;
166617885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1667*847bf383SLuigi Rizzo 	u_int const head = kring->rhead;
1668f9790aebSLuigi Rizzo 
1669f9790aebSLuigi Rizzo 	if (bridge_batch <= 0) { /* testing only */
1670*847bf383SLuigi Rizzo 		done = head; // used all
1671f9790aebSLuigi Rizzo 		goto done;
1672f9790aebSLuigi Rizzo 	}
16734bf50f18SLuigi Rizzo 	if (!na->na_bdg) {
1674*847bf383SLuigi Rizzo 		done = head;
16754bf50f18SLuigi Rizzo 		goto done;
16764bf50f18SLuigi Rizzo 	}
1677f9790aebSLuigi Rizzo 	if (bridge_batch > NM_BDG_BATCH)
1678f9790aebSLuigi Rizzo 		bridge_batch = NM_BDG_BATCH;
1679f9790aebSLuigi Rizzo 
1680*847bf383SLuigi Rizzo 	done = nm_bdg_preflush(kring, head);
1681f9790aebSLuigi Rizzo done:
1682*847bf383SLuigi Rizzo 	if (done != head)
1683*847bf383SLuigi Rizzo 		D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
168417885a7bSLuigi Rizzo 	/*
168517885a7bSLuigi Rizzo 	 * packets between 'done' and 'cur' are left unsent.
168617885a7bSLuigi Rizzo 	 */
168717885a7bSLuigi Rizzo 	kring->nr_hwcur = done;
168817885a7bSLuigi Rizzo 	kring->nr_hwtail = nm_prev(done, lim);
1689f9790aebSLuigi Rizzo 	if (netmap_verbose)
16904bf50f18SLuigi Rizzo 		D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
1691f9790aebSLuigi Rizzo 	return 0;
1692f9790aebSLuigi Rizzo }
1693f9790aebSLuigi Rizzo 
1694f9790aebSLuigi Rizzo 
16954bf50f18SLuigi Rizzo /* rxsync code used by VALE ports nm_rxsync callback and also
16964bf50f18SLuigi Rizzo  * internally by the brwap
1697f9790aebSLuigi Rizzo  */
1698f9790aebSLuigi Rizzo static int
16994bf50f18SLuigi Rizzo netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
1700f9790aebSLuigi Rizzo {
17014bf50f18SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
170217885a7bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
170317885a7bSLuigi Rizzo 	u_int nm_i, lim = kring->nkr_num_slots - 1;
1704*847bf383SLuigi Rizzo 	u_int head = kring->rhead;
170517885a7bSLuigi Rizzo 	int n;
170617885a7bSLuigi Rizzo 
170717885a7bSLuigi Rizzo 	if (head > lim) {
170817885a7bSLuigi Rizzo 		D("ouch dangerous reset!!!");
170917885a7bSLuigi Rizzo 		n = netmap_ring_reinit(kring);
171017885a7bSLuigi Rizzo 		goto done;
171117885a7bSLuigi Rizzo 	}
171217885a7bSLuigi Rizzo 
171317885a7bSLuigi Rizzo 	/* First part, import newly received packets. */
171417885a7bSLuigi Rizzo 	/* actually nothing to do here, they are already in the kring */
171517885a7bSLuigi Rizzo 
171617885a7bSLuigi Rizzo 	/* Second part, skip past packets that userspace has released. */
171717885a7bSLuigi Rizzo 	nm_i = kring->nr_hwcur;
171817885a7bSLuigi Rizzo 	if (nm_i != head) {
171917885a7bSLuigi Rizzo 		/* consistency check, but nothing really important here */
172017885a7bSLuigi Rizzo 		for (n = 0; likely(nm_i != head); n++) {
172117885a7bSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[nm_i];
17224bf50f18SLuigi Rizzo 			void *addr = NMB(na, slot);
172317885a7bSLuigi Rizzo 
17244bf50f18SLuigi Rizzo 			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
172517885a7bSLuigi Rizzo 				D("bad buffer index %d, ignore ?",
172617885a7bSLuigi Rizzo 					slot->buf_idx);
172717885a7bSLuigi Rizzo 			}
172817885a7bSLuigi Rizzo 			slot->flags &= ~NS_BUF_CHANGED;
172917885a7bSLuigi Rizzo 			nm_i = nm_next(nm_i, lim);
173017885a7bSLuigi Rizzo 		}
173117885a7bSLuigi Rizzo 		kring->nr_hwcur = head;
173217885a7bSLuigi Rizzo 	}
173317885a7bSLuigi Rizzo 
173417885a7bSLuigi Rizzo 	n = 0;
173517885a7bSLuigi Rizzo done:
173617885a7bSLuigi Rizzo 	return n;
173717885a7bSLuigi Rizzo }
1738f9790aebSLuigi Rizzo 
1739f9790aebSLuigi Rizzo /*
17404bf50f18SLuigi Rizzo  * nm_rxsync callback for VALE ports
1741f9790aebSLuigi Rizzo  * user process reading from a VALE switch.
1742f9790aebSLuigi Rizzo  * Already protected against concurrent calls from userspace,
1743f9790aebSLuigi Rizzo  * but we must acquire the queue's lock to protect against
1744f9790aebSLuigi Rizzo  * writers on the same queue.
1745f9790aebSLuigi Rizzo  */
1746f9790aebSLuigi Rizzo static int
17474bf50f18SLuigi Rizzo netmap_vp_rxsync(struct netmap_kring *kring, int flags)
1748f9790aebSLuigi Rizzo {
1749f9790aebSLuigi Rizzo 	int n;
1750f9790aebSLuigi Rizzo 
1751f9790aebSLuigi Rizzo 	mtx_lock(&kring->q_lock);
17524bf50f18SLuigi Rizzo 	n = netmap_vp_rxsync_locked(kring, flags);
1753f9790aebSLuigi Rizzo 	mtx_unlock(&kring->q_lock);
1754f9790aebSLuigi Rizzo 	return n;
1755f9790aebSLuigi Rizzo }
1756f9790aebSLuigi Rizzo 
175717885a7bSLuigi Rizzo 
17584bf50f18SLuigi Rizzo /* nm_bdg_attach callback for VALE ports
17594bf50f18SLuigi Rizzo  * The na_vp port is this same netmap_adapter. There is no host port.
17604bf50f18SLuigi Rizzo  */
1761f9790aebSLuigi Rizzo static int
17624bf50f18SLuigi Rizzo netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
17634bf50f18SLuigi Rizzo {
17644bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
17654bf50f18SLuigi Rizzo 
17664bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
17674bf50f18SLuigi Rizzo 		return EBUSY;
17684bf50f18SLuigi Rizzo 	na->na_vp = vpna;
17694bf50f18SLuigi Rizzo 	strncpy(na->name, name, sizeof(na->name));
17704bf50f18SLuigi Rizzo 	na->na_hostvp = NULL;
17714bf50f18SLuigi Rizzo 	return 0;
17724bf50f18SLuigi Rizzo }
17734bf50f18SLuigi Rizzo 
17744bf50f18SLuigi Rizzo /* create a netmap_vp_adapter that describes a VALE port.
17754bf50f18SLuigi Rizzo  * Only persistent VALE ports have a non-null ifp.
17764bf50f18SLuigi Rizzo  */
17774bf50f18SLuigi Rizzo static int
17784bf50f18SLuigi Rizzo netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
1779f9790aebSLuigi Rizzo {
1780f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
1781f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
1782f9790aebSLuigi Rizzo 	int error;
1783f0ea3689SLuigi Rizzo 	u_int npipes = 0;
1784f9790aebSLuigi Rizzo 
1785f9790aebSLuigi Rizzo 	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1786f9790aebSLuigi Rizzo 	if (vpna == NULL)
1787f9790aebSLuigi Rizzo 		return ENOMEM;
1788f9790aebSLuigi Rizzo 
1789f9790aebSLuigi Rizzo  	na = &vpna->up;
1790f9790aebSLuigi Rizzo 
1791f9790aebSLuigi Rizzo 	na->ifp = ifp;
17924bf50f18SLuigi Rizzo 	strncpy(na->name, nmr->nr_name, sizeof(na->name));
1793f9790aebSLuigi Rizzo 
1794f9790aebSLuigi Rizzo 	/* bound checking */
1795f9790aebSLuigi Rizzo 	na->num_tx_rings = nmr->nr_tx_rings;
1796f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1797f9790aebSLuigi Rizzo 	nmr->nr_tx_rings = na->num_tx_rings; // write back
1798f9790aebSLuigi Rizzo 	na->num_rx_rings = nmr->nr_rx_rings;
1799f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1800f9790aebSLuigi Rizzo 	nmr->nr_rx_rings = na->num_rx_rings; // write back
1801f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1802f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
1803f9790aebSLuigi Rizzo 	na->num_tx_desc = nmr->nr_tx_slots;
1804f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1805f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
1806f0ea3689SLuigi Rizzo 	/* validate number of pipes. We want at least 1,
1807f0ea3689SLuigi Rizzo 	 * but probably can do with some more.
1808f0ea3689SLuigi Rizzo 	 * So let's use 2 as default (when 0 is supplied)
1809f0ea3689SLuigi Rizzo 	 */
1810f0ea3689SLuigi Rizzo 	npipes = nmr->nr_arg1;
1811f0ea3689SLuigi Rizzo 	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1812f0ea3689SLuigi Rizzo 	nmr->nr_arg1 = npipes;	/* write back */
1813f0ea3689SLuigi Rizzo 	/* validate extra bufs */
1814f0ea3689SLuigi Rizzo 	nm_bound_var(&nmr->nr_arg3, 0, 0,
1815f0ea3689SLuigi Rizzo 			128*NM_BDG_MAXSLOTS, NULL);
1816f9790aebSLuigi Rizzo 	na->num_rx_desc = nmr->nr_rx_slots;
1817f0ea3689SLuigi Rizzo 	vpna->virt_hdr_len = 0;
1818f0ea3689SLuigi Rizzo 	vpna->mfs = 1514;
1819*847bf383SLuigi Rizzo 	vpna->last_smac = ~0llu;
1820f0ea3689SLuigi Rizzo 	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1821f0ea3689SLuigi Rizzo 		vpna->mfs = netmap_buf_size; */
1822f0ea3689SLuigi Rizzo         if (netmap_verbose)
1823f0ea3689SLuigi Rizzo 		D("max frame size %u", vpna->mfs);
1824f9790aebSLuigi Rizzo 
1825*847bf383SLuigi Rizzo 	na->na_flags |= NAF_BDG_MAYSLEEP;
18264bf50f18SLuigi Rizzo 	na->nm_txsync = netmap_vp_txsync;
18274bf50f18SLuigi Rizzo 	na->nm_rxsync = netmap_vp_rxsync;
18284bf50f18SLuigi Rizzo 	na->nm_register = netmap_vp_reg;
1829f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_vp_krings_create;
1830f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_vp_krings_delete;
18314bf50f18SLuigi Rizzo 	na->nm_dtor = netmap_vp_dtor;
18324bf50f18SLuigi Rizzo 	na->nm_mem = netmap_mem_private_new(na->name,
1833f9790aebSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
1834f0ea3689SLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc,
1835f0ea3689SLuigi Rizzo 			nmr->nr_arg3, npipes, &error);
1836f0ea3689SLuigi Rizzo 	if (na->nm_mem == NULL)
1837f0ea3689SLuigi Rizzo 		goto err;
18384bf50f18SLuigi Rizzo 	na->nm_bdg_attach = netmap_vp_bdg_attach;
1839f9790aebSLuigi Rizzo 	/* other nmd fields are set in the common routine */
1840f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
1841f0ea3689SLuigi Rizzo 	if (error)
1842f0ea3689SLuigi Rizzo 		goto err;
18434bf50f18SLuigi Rizzo 	*ret = vpna;
1844f0ea3689SLuigi Rizzo 	return 0;
1845f0ea3689SLuigi Rizzo 
1846f0ea3689SLuigi Rizzo err:
1847f0ea3689SLuigi Rizzo 	if (na->nm_mem != NULL)
1848*847bf383SLuigi Rizzo 		netmap_mem_delete(na->nm_mem);
1849f9790aebSLuigi Rizzo 	free(vpna, M_DEVBUF);
1850f9790aebSLuigi Rizzo 	return error;
1851f9790aebSLuigi Rizzo }
1852f9790aebSLuigi Rizzo 
18534bf50f18SLuigi Rizzo /* Bridge wrapper code (bwrap).
18544bf50f18SLuigi Rizzo  * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
18554bf50f18SLuigi Rizzo  * VALE switch.
18564bf50f18SLuigi Rizzo  * The main task is to swap the meaning of tx and rx rings to match the
18574bf50f18SLuigi Rizzo  * expectations of the VALE switch code (see nm_bdg_flush).
18584bf50f18SLuigi Rizzo  *
18594bf50f18SLuigi Rizzo  * The bwrap works by interposing a netmap_bwrap_adapter between the
18604bf50f18SLuigi Rizzo  * rest of the system and the hwna. The netmap_bwrap_adapter looks like
18614bf50f18SLuigi Rizzo  * a netmap_vp_adapter to the rest the system, but, internally, it
18624bf50f18SLuigi Rizzo  * translates all callbacks to what the hwna expects.
18634bf50f18SLuigi Rizzo  *
18644bf50f18SLuigi Rizzo  * Note that we have to intercept callbacks coming from two sides:
18654bf50f18SLuigi Rizzo  *
18664bf50f18SLuigi Rizzo  *  - callbacks coming from the netmap module are intercepted by
18674bf50f18SLuigi Rizzo  *    passing around the netmap_bwrap_adapter instead of the hwna
18684bf50f18SLuigi Rizzo  *
18694bf50f18SLuigi Rizzo  *  - callbacks coming from outside of the netmap module only know
18704bf50f18SLuigi Rizzo  *    about the hwna. This, however, only happens in interrupt
18714bf50f18SLuigi Rizzo  *    handlers, where only the hwna->nm_notify callback is called.
18724bf50f18SLuigi Rizzo  *    What the bwrap does is to overwrite the hwna->nm_notify callback
18734bf50f18SLuigi Rizzo  *    with its own netmap_bwrap_intr_notify.
18744bf50f18SLuigi Rizzo  *    XXX This assumes that the hwna->nm_notify callback was the
18754bf50f18SLuigi Rizzo  *    standard netmap_notify(), as it is the case for nic adapters.
18764bf50f18SLuigi Rizzo  *    Any additional action performed by hwna->nm_notify will not be
18774bf50f18SLuigi Rizzo  *    performed by netmap_bwrap_intr_notify.
18784bf50f18SLuigi Rizzo  *
18794bf50f18SLuigi Rizzo  * Additionally, the bwrap can optionally attach the host rings pair
18804bf50f18SLuigi Rizzo  * of the wrapped adapter to a different port of the switch.
18814bf50f18SLuigi Rizzo  */
18824bf50f18SLuigi Rizzo 
188317885a7bSLuigi Rizzo 
1884f9790aebSLuigi Rizzo static void
1885f9790aebSLuigi Rizzo netmap_bwrap_dtor(struct netmap_adapter *na)
1886f9790aebSLuigi Rizzo {
1887f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1888f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1889f9790aebSLuigi Rizzo 
1890f9790aebSLuigi Rizzo 	ND("na %p", na);
18914bf50f18SLuigi Rizzo 	/* drop reference to hwna->ifp.
18924bf50f18SLuigi Rizzo 	 * If we don't do this, netmap_detach_common(na)
18934bf50f18SLuigi Rizzo 	 * will think it has set NA(na->ifp) to NULL
18944bf50f18SLuigi Rizzo 	 */
1895f9790aebSLuigi Rizzo 	na->ifp = NULL;
18964bf50f18SLuigi Rizzo 	/* for safety, also drop the possible reference
18974bf50f18SLuigi Rizzo 	 * in the hostna
18984bf50f18SLuigi Rizzo 	 */
18994bf50f18SLuigi Rizzo 	bna->host.up.ifp = NULL;
19004bf50f18SLuigi Rizzo 
19014bf50f18SLuigi Rizzo 	hwna->nm_mem = bna->save_nmd;
19024bf50f18SLuigi Rizzo 	hwna->na_private = NULL;
19034bf50f18SLuigi Rizzo 	hwna->na_vp = hwna->na_hostvp = NULL;
19044bf50f18SLuigi Rizzo 	hwna->na_flags &= ~NAF_BUSY;
19054bf50f18SLuigi Rizzo 	netmap_adapter_put(hwna);
1906f9790aebSLuigi Rizzo 
1907f9790aebSLuigi Rizzo }
1908f9790aebSLuigi Rizzo 
190917885a7bSLuigi Rizzo 
1910f9790aebSLuigi Rizzo /*
191117885a7bSLuigi Rizzo  * Intr callback for NICs connected to a bridge.
191217885a7bSLuigi Rizzo  * Simply ignore tx interrupts (maybe we could try to recover space ?)
191317885a7bSLuigi Rizzo  * and pass received packets from nic to the bridge.
191417885a7bSLuigi Rizzo  *
1915f9790aebSLuigi Rizzo  * XXX TODO check locking: this is called from the interrupt
1916f9790aebSLuigi Rizzo  * handler so we should make sure that the interface is not
1917f9790aebSLuigi Rizzo  * disconnected while passing down an interrupt.
1918f9790aebSLuigi Rizzo  *
191917885a7bSLuigi Rizzo  * Note, no user process can access this NIC or the host stack.
192017885a7bSLuigi Rizzo  * The only part of the ring that is significant are the slots,
192117885a7bSLuigi Rizzo  * and head/cur/tail are set from the kring as needed
192217885a7bSLuigi Rizzo  * (part as a receive ring, part as a transmit ring).
192317885a7bSLuigi Rizzo  *
192417885a7bSLuigi Rizzo  * callback that overwrites the hwna notify callback.
1925f9790aebSLuigi Rizzo  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1926f9790aebSLuigi Rizzo  * The bridge wrapper then sends the packets through the bridge.
1927f9790aebSLuigi Rizzo  */
1928f9790aebSLuigi Rizzo static int
1929*847bf383SLuigi Rizzo netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
1930f9790aebSLuigi Rizzo {
1931*847bf383SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
1932f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
1933*847bf383SLuigi Rizzo 	struct netmap_kring *bkring;
1934f9790aebSLuigi Rizzo 	struct netmap_ring *ring;
1935f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = &bna->up;
1936*847bf383SLuigi Rizzo 	u_int ring_nr = kring->ring_id;
1937f9790aebSLuigi Rizzo 	int error = 0;
1938f9790aebSLuigi Rizzo 
193917885a7bSLuigi Rizzo 	if (netmap_verbose)
1940*847bf383SLuigi Rizzo 	    D("%s %s 0x%x", na->name, kring->name, flags);
1941f9790aebSLuigi Rizzo 
19424bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
1943f9790aebSLuigi Rizzo 		return 0;
1944f9790aebSLuigi Rizzo 
1945*847bf383SLuigi Rizzo 	bkring = &vpna->up.tx_rings[ring_nr];
1946*847bf383SLuigi Rizzo 	ring = kring->ring; /* == kbkring->ring */
1947f9790aebSLuigi Rizzo 
1948f9790aebSLuigi Rizzo 	/* make sure the ring is not disabled */
1949f9790aebSLuigi Rizzo 	if (nm_kr_tryget(kring))
1950f9790aebSLuigi Rizzo 		return 0;
1951f9790aebSLuigi Rizzo 
195217885a7bSLuigi Rizzo 	if (netmap_verbose)
1953*847bf383SLuigi Rizzo 	    D("%s head %d cur %d tail %d",  na->name,
195417885a7bSLuigi Rizzo 		kring->rhead, kring->rcur, kring->rtail);
195517885a7bSLuigi Rizzo 
1956*847bf383SLuigi Rizzo 	/* simulate a user wakeup on the rx ring
1957*847bf383SLuigi Rizzo 	 * fetch packets that have arrived.
1958f9790aebSLuigi Rizzo 	 */
1959f0ea3689SLuigi Rizzo 	error = kring->nm_sync(kring, 0);
1960f9790aebSLuigi Rizzo 	if (error)
1961f9790aebSLuigi Rizzo 		goto put_out;
196217885a7bSLuigi Rizzo 	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
1963f9790aebSLuigi Rizzo 		D("how strange, interrupt with no packets on %s",
19644bf50f18SLuigi Rizzo 			na->name);
1965f9790aebSLuigi Rizzo 		goto put_out;
1966f9790aebSLuigi Rizzo 	}
196717885a7bSLuigi Rizzo 
1968*847bf383SLuigi Rizzo 	/* new packets are kring->rcur to kring->nr_hwtail, and the bkring
1969*847bf383SLuigi Rizzo 	 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
197017885a7bSLuigi Rizzo 	 * to push all packets out.
197117885a7bSLuigi Rizzo 	 */
1972*847bf383SLuigi Rizzo 	bkring->rhead = bkring->rcur = kring->nr_hwtail;
197317885a7bSLuigi Rizzo 
19744bf50f18SLuigi Rizzo 	netmap_vp_txsync(bkring, flags);
1975f9790aebSLuigi Rizzo 
197617885a7bSLuigi Rizzo 	/* mark all buffers as released on this ring */
1977*847bf383SLuigi Rizzo 	kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
197817885a7bSLuigi Rizzo 	/* another call to actually release the buffers */
1979f0ea3689SLuigi Rizzo 	error = kring->nm_sync(kring, 0);
1980f9790aebSLuigi Rizzo 
1981f9790aebSLuigi Rizzo put_out:
1982f9790aebSLuigi Rizzo 	nm_kr_put(kring);
1983f9790aebSLuigi Rizzo 	return error;
1984f9790aebSLuigi Rizzo }
1985f9790aebSLuigi Rizzo 
198617885a7bSLuigi Rizzo 
19874bf50f18SLuigi Rizzo /* nm_register callback for bwrap */
1988f9790aebSLuigi Rizzo static int
1989f9790aebSLuigi Rizzo netmap_bwrap_register(struct netmap_adapter *na, int onoff)
1990f9790aebSLuigi Rizzo {
1991f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
1992f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
1993f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1994f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *hostna = &bna->host;
1995f9790aebSLuigi Rizzo 	int error;
1996*847bf383SLuigi Rizzo 	enum txrx t;
1997f9790aebSLuigi Rizzo 
19984bf50f18SLuigi Rizzo 	ND("%s %s", na->name, onoff ? "on" : "off");
1999f9790aebSLuigi Rizzo 
2000f9790aebSLuigi Rizzo 	if (onoff) {
2001f9790aebSLuigi Rizzo 		int i;
2002f9790aebSLuigi Rizzo 
20034bf50f18SLuigi Rizzo 		/* netmap_do_regif has been called on the bwrap na.
20044bf50f18SLuigi Rizzo 		 * We need to pass the information about the
20054bf50f18SLuigi Rizzo 		 * memory allocator down to the hwna before
20064bf50f18SLuigi Rizzo 		 * putting it in netmap mode
20074bf50f18SLuigi Rizzo 		 */
2008f9790aebSLuigi Rizzo 		hwna->na_lut = na->na_lut;
2009f9790aebSLuigi Rizzo 
2010f9790aebSLuigi Rizzo 		if (hostna->na_bdg) {
20114bf50f18SLuigi Rizzo 			/* if the host rings have been attached to switch,
20124bf50f18SLuigi Rizzo 			 * we need to copy the memory allocator information
20134bf50f18SLuigi Rizzo 			 * in the hostna also
20144bf50f18SLuigi Rizzo 			 */
2015f9790aebSLuigi Rizzo 			hostna->up.na_lut = na->na_lut;
2016f9790aebSLuigi Rizzo 		}
2017f9790aebSLuigi Rizzo 
20180c7ba37eSLuigi Rizzo 		/* cross-link the netmap rings
20190c7ba37eSLuigi Rizzo 		 * The original number of rings comes from hwna,
20200c7ba37eSLuigi Rizzo 		 * rx rings on one side equals tx rings on the other.
20214bf50f18SLuigi Rizzo 		 * We need to do this now, after the initialization
20224bf50f18SLuigi Rizzo 		 * of the kring->ring pointers
20230c7ba37eSLuigi Rizzo 		 */
2024*847bf383SLuigi Rizzo 		for_rx_tx(t) {
2025*847bf383SLuigi Rizzo 			enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
2026*847bf383SLuigi Rizzo 			for (i = 0; i < nma_get_nrings(na, r) + 1; i++) {
2027*847bf383SLuigi Rizzo 				NMR(hwna, t)[i].nkr_num_slots = NMR(na, r)[i].nkr_num_slots;
2028*847bf383SLuigi Rizzo 				NMR(hwna, t)[i].ring = NMR(na, r)[i].ring;
2029f9790aebSLuigi Rizzo 			}
2030f9790aebSLuigi Rizzo 		}
2031f9790aebSLuigi Rizzo 	}
2032f9790aebSLuigi Rizzo 
20334bf50f18SLuigi Rizzo 	/* forward the request to the hwna */
2034f9790aebSLuigi Rizzo 	error = hwna->nm_register(hwna, onoff);
2035f9790aebSLuigi Rizzo 	if (error)
2036f9790aebSLuigi Rizzo 		return error;
2037f9790aebSLuigi Rizzo 
20384bf50f18SLuigi Rizzo 	/* impersonate a netmap_vp_adapter */
20394bf50f18SLuigi Rizzo 	netmap_vp_reg(na, onoff);
20404bf50f18SLuigi Rizzo 	if (hostna->na_bdg)
20414bf50f18SLuigi Rizzo 		netmap_vp_reg(&hostna->up, onoff);
2042f9790aebSLuigi Rizzo 
2043f9790aebSLuigi Rizzo 	if (onoff) {
2044*847bf383SLuigi Rizzo 		u_int i;
2045*847bf383SLuigi Rizzo 		/* intercept the hwna nm_nofify callback on the hw rings */
2046*847bf383SLuigi Rizzo 		for (i = 0; i < hwna->num_rx_rings; i++) {
2047*847bf383SLuigi Rizzo 			hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
2048*847bf383SLuigi Rizzo 			hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
2049*847bf383SLuigi Rizzo 		}
2050*847bf383SLuigi Rizzo 		i = hwna->num_rx_rings; /* for safety */
2051*847bf383SLuigi Rizzo 		/* save the host ring notify unconditionally */
2052*847bf383SLuigi Rizzo 		hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
2053*847bf383SLuigi Rizzo 		if (hostna->na_bdg) {
2054*847bf383SLuigi Rizzo 			/* also intercept the host ring notify */
2055*847bf383SLuigi Rizzo 			hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
2056*847bf383SLuigi Rizzo 		}
2057f9790aebSLuigi Rizzo 	} else {
2058*847bf383SLuigi Rizzo 		u_int i;
2059*847bf383SLuigi Rizzo 		/* reset all notify callbacks (including host ring) */
2060*847bf383SLuigi Rizzo 		for (i = 0; i <= hwna->num_rx_rings; i++) {
2061*847bf383SLuigi Rizzo 			hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify;
2062*847bf383SLuigi Rizzo 			hwna->rx_rings[i].save_notify = NULL;
2063*847bf383SLuigi Rizzo 		}
2064*847bf383SLuigi Rizzo 		hwna->na_lut.lut = NULL;
2065*847bf383SLuigi Rizzo 		hwna->na_lut.objtotal = 0;
2066*847bf383SLuigi Rizzo 		hwna->na_lut.objsize = 0;
2067f9790aebSLuigi Rizzo 	}
2068f9790aebSLuigi Rizzo 
2069f9790aebSLuigi Rizzo 	return 0;
2070f9790aebSLuigi Rizzo }
2071f9790aebSLuigi Rizzo 
20724bf50f18SLuigi Rizzo /* nm_config callback for bwrap */
2073f9790aebSLuigi Rizzo static int
2074f9790aebSLuigi Rizzo netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
2075f9790aebSLuigi Rizzo 				    u_int *rxr, u_int *rxd)
2076f9790aebSLuigi Rizzo {
2077f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2078f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2079f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2080f9790aebSLuigi Rizzo 
2081f9790aebSLuigi Rizzo 	/* forward the request */
2082f9790aebSLuigi Rizzo 	netmap_update_config(hwna);
2083f9790aebSLuigi Rizzo 	/* swap the results */
2084f9790aebSLuigi Rizzo 	*txr = hwna->num_rx_rings;
2085f9790aebSLuigi Rizzo 	*txd = hwna->num_rx_desc;
2086f9790aebSLuigi Rizzo 	*rxr = hwna->num_tx_rings;
2087f9790aebSLuigi Rizzo 	*rxd = hwna->num_rx_desc;
2088f9790aebSLuigi Rizzo 
2089f9790aebSLuigi Rizzo 	return 0;
2090f9790aebSLuigi Rizzo }
2091f9790aebSLuigi Rizzo 
209217885a7bSLuigi Rizzo 
20934bf50f18SLuigi Rizzo /* nm_krings_create callback for bwrap */
2094f9790aebSLuigi Rizzo static int
2095f9790aebSLuigi Rizzo netmap_bwrap_krings_create(struct netmap_adapter *na)
2096f9790aebSLuigi Rizzo {
2097f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2098f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2099f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2100f9790aebSLuigi Rizzo 	struct netmap_adapter *hostna = &bna->host.up;
2101f9790aebSLuigi Rizzo 	int error;
2102f9790aebSLuigi Rizzo 
21034bf50f18SLuigi Rizzo 	ND("%s", na->name);
2104f9790aebSLuigi Rizzo 
21054bf50f18SLuigi Rizzo 	/* impersonate a netmap_vp_adapter */
2106f9790aebSLuigi Rizzo 	error = netmap_vp_krings_create(na);
2107f9790aebSLuigi Rizzo 	if (error)
2108f9790aebSLuigi Rizzo 		return error;
2109f9790aebSLuigi Rizzo 
21104bf50f18SLuigi Rizzo 	/* also create the hwna krings */
2111f9790aebSLuigi Rizzo 	error = hwna->nm_krings_create(hwna);
2112f9790aebSLuigi Rizzo 	if (error) {
2113f9790aebSLuigi Rizzo 		netmap_vp_krings_delete(na);
2114f9790aebSLuigi Rizzo 		return error;
2115f9790aebSLuigi Rizzo 	}
21164bf50f18SLuigi Rizzo 	/* the connection between the bwrap krings and the hwna krings
21174bf50f18SLuigi Rizzo 	 * will be perfomed later, in the nm_register callback, since
21184bf50f18SLuigi Rizzo 	 * now the kring->ring pointers have not been initialized yet
21194bf50f18SLuigi Rizzo 	 */
2120f9790aebSLuigi Rizzo 
2121f0ea3689SLuigi Rizzo 	if (na->na_flags & NAF_HOST_RINGS) {
21224bf50f18SLuigi Rizzo 		/* the hostna rings are the host rings of the bwrap.
21234bf50f18SLuigi Rizzo 		 * The corresponding krings must point back to the
21244bf50f18SLuigi Rizzo 		 * hostna
21254bf50f18SLuigi Rizzo 		 */
2126*847bf383SLuigi Rizzo 		hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
21274bf50f18SLuigi Rizzo 		hostna->tx_rings[0].na = hostna;
2128*847bf383SLuigi Rizzo 		hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
21294bf50f18SLuigi Rizzo 		hostna->rx_rings[0].na = hostna;
2130f0ea3689SLuigi Rizzo 	}
2131f9790aebSLuigi Rizzo 
2132f9790aebSLuigi Rizzo 	return 0;
2133f9790aebSLuigi Rizzo }
2134f9790aebSLuigi Rizzo 
213517885a7bSLuigi Rizzo 
2136f9790aebSLuigi Rizzo static void
2137f9790aebSLuigi Rizzo netmap_bwrap_krings_delete(struct netmap_adapter *na)
2138f9790aebSLuigi Rizzo {
2139f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2140f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2141f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2142f9790aebSLuigi Rizzo 
21434bf50f18SLuigi Rizzo 	ND("%s", na->name);
2144f9790aebSLuigi Rizzo 
2145f9790aebSLuigi Rizzo 	hwna->nm_krings_delete(hwna);
2146f9790aebSLuigi Rizzo 	netmap_vp_krings_delete(na);
2147f9790aebSLuigi Rizzo }
2148f9790aebSLuigi Rizzo 
214917885a7bSLuigi Rizzo 
2150f9790aebSLuigi Rizzo /* notify method for the bridge-->hwna direction */
2151f9790aebSLuigi Rizzo static int
2152*847bf383SLuigi Rizzo netmap_bwrap_notify(struct netmap_kring *kring, int flags)
2153f9790aebSLuigi Rizzo {
2154*847bf383SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
2155*847bf383SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
2156f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2157*847bf383SLuigi Rizzo 	u_int ring_n = kring->ring_id;
2158*847bf383SLuigi Rizzo 	u_int lim = kring->nkr_num_slots - 1;
2159*847bf383SLuigi Rizzo 	struct netmap_kring *hw_kring;
2160f9790aebSLuigi Rizzo 	int error = 0;
2161f9790aebSLuigi Rizzo 
2162*847bf383SLuigi Rizzo 	ND("%s: na %s hwna %s",
2163*847bf383SLuigi Rizzo 			(kring ? kring->name : "NULL!"),
2164*847bf383SLuigi Rizzo 			(na ? na->name : "NULL!"),
2165*847bf383SLuigi Rizzo 			(hwna ? hwna->name : "NULL!"));
2166f9790aebSLuigi Rizzo 	hw_kring = &hwna->tx_rings[ring_n];
2167*847bf383SLuigi Rizzo 
2168*847bf383SLuigi Rizzo 	if (nm_kr_tryget(hw_kring))
2169*847bf383SLuigi Rizzo 		return 0;
2170f9790aebSLuigi Rizzo 
21714bf50f18SLuigi Rizzo 	if (!nm_netmap_on(hwna))
2172f9790aebSLuigi Rizzo 		return 0;
217317885a7bSLuigi Rizzo 	/* first step: simulate a user wakeup on the rx ring */
2174*847bf383SLuigi Rizzo 	netmap_vp_rxsync(kring, flags);
217517885a7bSLuigi Rizzo 	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
21764bf50f18SLuigi Rizzo 		na->name, ring_n,
217717885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
217817885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
217917885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
2180*847bf383SLuigi Rizzo 	/* second step: the new packets are sent on the tx ring
218117885a7bSLuigi Rizzo 	 * (which is actually the same ring)
218217885a7bSLuigi Rizzo 	 */
2183*847bf383SLuigi Rizzo 	hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
2184f0ea3689SLuigi Rizzo 	error = hw_kring->nm_sync(hw_kring, flags);
2185*847bf383SLuigi Rizzo 	if (error)
2186*847bf383SLuigi Rizzo 		goto out;
218717885a7bSLuigi Rizzo 
2188*847bf383SLuigi Rizzo 	/* third step: now we are back the rx ring */
218917885a7bSLuigi Rizzo 	/* claim ownership on all hw owned bufs */
2190*847bf383SLuigi Rizzo 	kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
219117885a7bSLuigi Rizzo 
2192*847bf383SLuigi Rizzo 	/* fourth step: the user goes to sleep again, causing another rxsync */
2193*847bf383SLuigi Rizzo 	netmap_vp_rxsync(kring, flags);
219417885a7bSLuigi Rizzo 	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
21954bf50f18SLuigi Rizzo 		na->name, ring_n,
219617885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
219717885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
219817885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2199*847bf383SLuigi Rizzo out:
2200*847bf383SLuigi Rizzo 	nm_kr_put(hw_kring);
2201f9790aebSLuigi Rizzo 	return error;
2202f9790aebSLuigi Rizzo }
2203f9790aebSLuigi Rizzo 
220417885a7bSLuigi Rizzo 
22054bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for the bwrap.
22064bf50f18SLuigi Rizzo  * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
22074bf50f18SLuigi Rizzo  * On attach, it needs to provide a fake netmap_priv_d structure and
22084bf50f18SLuigi Rizzo  * perform a netmap_do_regif() on the bwrap. This will put both the
22094bf50f18SLuigi Rizzo  * bwrap and the hwna in netmap mode, with the netmap rings shared
22104bf50f18SLuigi Rizzo  * and cross linked. Moroever, it will start intercepting interrupts
22114bf50f18SLuigi Rizzo  * directed to hwna.
22124bf50f18SLuigi Rizzo  */
2213f9790aebSLuigi Rizzo static int
22144bf50f18SLuigi Rizzo netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
22154bf50f18SLuigi Rizzo {
22164bf50f18SLuigi Rizzo 	struct netmap_priv_d *npriv;
22174bf50f18SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
22184bf50f18SLuigi Rizzo 	int error = 0;
22194bf50f18SLuigi Rizzo 
22204bf50f18SLuigi Rizzo 	if (attach) {
22214bf50f18SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(na)) {
22224bf50f18SLuigi Rizzo 			return EBUSY;
22234bf50f18SLuigi Rizzo 		}
22244bf50f18SLuigi Rizzo 		if (bna->na_kpriv) {
22254bf50f18SLuigi Rizzo 			/* nothing to do */
22264bf50f18SLuigi Rizzo 			return 0;
22274bf50f18SLuigi Rizzo 		}
22284bf50f18SLuigi Rizzo 		npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
22294bf50f18SLuigi Rizzo 		if (npriv == NULL)
22304bf50f18SLuigi Rizzo 			return ENOMEM;
2231*847bf383SLuigi Rizzo 		error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags);
2232*847bf383SLuigi Rizzo 		if (error) {
22334bf50f18SLuigi Rizzo 			bzero(npriv, sizeof(*npriv));
22344bf50f18SLuigi Rizzo 			free(npriv, M_DEVBUF);
22354bf50f18SLuigi Rizzo 			return error;
22364bf50f18SLuigi Rizzo 		}
22374bf50f18SLuigi Rizzo 		bna->na_kpriv = npriv;
22384bf50f18SLuigi Rizzo 		na->na_flags |= NAF_BUSY;
22394bf50f18SLuigi Rizzo 	} else {
22404bf50f18SLuigi Rizzo 		int last_instance;
22414bf50f18SLuigi Rizzo 
22424bf50f18SLuigi Rizzo 		if (na->active_fds == 0) /* not registered */
22434bf50f18SLuigi Rizzo 			return EINVAL;
22444bf50f18SLuigi Rizzo 		last_instance = netmap_dtor_locked(bna->na_kpriv);
22454bf50f18SLuigi Rizzo 		if (!last_instance) {
22464bf50f18SLuigi Rizzo 			D("--- error, trying to detach an entry with active mmaps");
22474bf50f18SLuigi Rizzo 			error = EINVAL;
22484bf50f18SLuigi Rizzo 		} else {
22494bf50f18SLuigi Rizzo 			struct nm_bridge *b = bna->up.na_bdg,
22504bf50f18SLuigi Rizzo 				*bh = bna->host.na_bdg;
22514bf50f18SLuigi Rizzo 			npriv = bna->na_kpriv;
22524bf50f18SLuigi Rizzo 			bna->na_kpriv = NULL;
22534bf50f18SLuigi Rizzo 			D("deleting priv");
22544bf50f18SLuigi Rizzo 
22554bf50f18SLuigi Rizzo 			bzero(npriv, sizeof(*npriv));
22564bf50f18SLuigi Rizzo 			free(npriv, M_DEVBUF);
22574bf50f18SLuigi Rizzo 			if (b) {
22584bf50f18SLuigi Rizzo 				/* XXX the bwrap dtor should take care
22594bf50f18SLuigi Rizzo 				 * of this (2014-06-16)
22604bf50f18SLuigi Rizzo 				 */
22614bf50f18SLuigi Rizzo 				netmap_bdg_detach_common(b, bna->up.bdg_port,
22624bf50f18SLuigi Rizzo 				    (bh ? bna->host.bdg_port : -1));
22634bf50f18SLuigi Rizzo 			}
22644bf50f18SLuigi Rizzo 			na->na_flags &= ~NAF_BUSY;
22654bf50f18SLuigi Rizzo 		}
22664bf50f18SLuigi Rizzo 	}
22674bf50f18SLuigi Rizzo 	return error;
22684bf50f18SLuigi Rizzo 
22694bf50f18SLuigi Rizzo }
22704bf50f18SLuigi Rizzo 
22714bf50f18SLuigi Rizzo /* attach a bridge wrapper to the 'real' device */
22724bf50f18SLuigi Rizzo int
22734bf50f18SLuigi Rizzo netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
2274f9790aebSLuigi Rizzo {
2275f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
22764bf50f18SLuigi Rizzo 	struct netmap_adapter *na = NULL;
22774bf50f18SLuigi Rizzo 	struct netmap_adapter *hostna = NULL;
22784bf50f18SLuigi Rizzo 	int error = 0;
2279*847bf383SLuigi Rizzo 	enum txrx t;
2280f9790aebSLuigi Rizzo 
22814bf50f18SLuigi Rizzo 	/* make sure the NIC is not already in use */
22824bf50f18SLuigi Rizzo 	if (NETMAP_OWNED_BY_ANY(hwna)) {
22834bf50f18SLuigi Rizzo 		D("NIC %s busy, cannot attach to bridge", hwna->name);
22844bf50f18SLuigi Rizzo 		return EBUSY;
22854bf50f18SLuigi Rizzo 	}
2286f9790aebSLuigi Rizzo 
2287f9790aebSLuigi Rizzo 	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
22884bf50f18SLuigi Rizzo 	if (bna == NULL) {
2289f9790aebSLuigi Rizzo 		return ENOMEM;
22904bf50f18SLuigi Rizzo 	}
2291f9790aebSLuigi Rizzo 
2292f9790aebSLuigi Rizzo 	na = &bna->up.up;
2293*847bf383SLuigi Rizzo 	na->na_private = bna;
22944bf50f18SLuigi Rizzo 	strncpy(na->name, nr_name, sizeof(na->name));
2295f9790aebSLuigi Rizzo 	/* fill the ring data for the bwrap adapter with rx/tx meanings
2296f9790aebSLuigi Rizzo 	 * swapped. The real cross-linking will be done during register,
2297f9790aebSLuigi Rizzo 	 * when all the krings will have been created.
2298f9790aebSLuigi Rizzo 	 */
2299*847bf383SLuigi Rizzo 	for_rx_tx(t) {
2300*847bf383SLuigi Rizzo 		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
2301*847bf383SLuigi Rizzo 		nma_set_nrings(na, t, nma_get_nrings(hwna, r));
2302*847bf383SLuigi Rizzo 		nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
2303*847bf383SLuigi Rizzo 	}
2304f9790aebSLuigi Rizzo 	na->nm_dtor = netmap_bwrap_dtor;
2305f9790aebSLuigi Rizzo 	na->nm_register = netmap_bwrap_register;
2306f9790aebSLuigi Rizzo 	// na->nm_txsync = netmap_bwrap_txsync;
2307f9790aebSLuigi Rizzo 	// na->nm_rxsync = netmap_bwrap_rxsync;
2308f9790aebSLuigi Rizzo 	na->nm_config = netmap_bwrap_config;
2309f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_bwrap_krings_create;
2310f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_bwrap_krings_delete;
2311f9790aebSLuigi Rizzo 	na->nm_notify = netmap_bwrap_notify;
23124bf50f18SLuigi Rizzo 	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
23134bf50f18SLuigi Rizzo 	na->pdev = hwna->pdev;
23144bf50f18SLuigi Rizzo 	na->nm_mem = netmap_mem_private_new(na->name,
23154bf50f18SLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
23164bf50f18SLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc,
23174bf50f18SLuigi Rizzo 			0, 0, &error);
23184bf50f18SLuigi Rizzo 	na->na_flags |= NAF_MEM_OWNER;
23194bf50f18SLuigi Rizzo 	if (na->nm_mem == NULL)
23204bf50f18SLuigi Rizzo 		goto err_put;
2321f9790aebSLuigi Rizzo 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2322f9790aebSLuigi Rizzo 
2323f9790aebSLuigi Rizzo 	bna->hwna = hwna;
2324f9790aebSLuigi Rizzo 	netmap_adapter_get(hwna);
2325f9790aebSLuigi Rizzo 	hwna->na_private = bna; /* weak reference */
23264bf50f18SLuigi Rizzo 	hwna->na_vp = &bna->up;
2327f9790aebSLuigi Rizzo 
2328f0ea3689SLuigi Rizzo 	if (hwna->na_flags & NAF_HOST_RINGS) {
23294bf50f18SLuigi Rizzo 		if (hwna->na_flags & NAF_SW_ONLY)
23304bf50f18SLuigi Rizzo 			na->na_flags |= NAF_SW_ONLY;
2331f0ea3689SLuigi Rizzo 		na->na_flags |= NAF_HOST_RINGS;
2332f9790aebSLuigi Rizzo 		hostna = &bna->host.up;
23334bf50f18SLuigi Rizzo 		snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
2334f9790aebSLuigi Rizzo 		hostna->ifp = hwna->ifp;
2335*847bf383SLuigi Rizzo 		for_rx_tx(t) {
2336*847bf383SLuigi Rizzo 			enum txrx r = nm_txrx_swap(t);
2337*847bf383SLuigi Rizzo 			nma_set_nrings(hostna, t, 1);
2338*847bf383SLuigi Rizzo 			nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
2339*847bf383SLuigi Rizzo 		}
2340f9790aebSLuigi Rizzo 		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2341f9790aebSLuigi Rizzo 		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2342*847bf383SLuigi Rizzo 		hostna->nm_notify = netmap_bwrap_notify;
2343f9790aebSLuigi Rizzo 		hostna->nm_mem = na->nm_mem;
2344f9790aebSLuigi Rizzo 		hostna->na_private = bna;
23454bf50f18SLuigi Rizzo 		hostna->na_vp = &bna->up;
23464bf50f18SLuigi Rizzo 		na->na_hostvp = hwna->na_hostvp =
23474bf50f18SLuigi Rizzo 			hostna->na_hostvp = &bna->host;
23484bf50f18SLuigi Rizzo 		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
2349f0ea3689SLuigi Rizzo 	}
2350f9790aebSLuigi Rizzo 
235117885a7bSLuigi Rizzo 	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
23524bf50f18SLuigi Rizzo 		na->name, ifp->if_xname,
2353f9790aebSLuigi Rizzo 		na->num_tx_rings, na->num_tx_desc,
2354f9790aebSLuigi Rizzo 		na->num_rx_rings, na->num_rx_desc);
2355f9790aebSLuigi Rizzo 
2356f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
2357f9790aebSLuigi Rizzo 	if (error) {
23584bf50f18SLuigi Rizzo 		goto err_free;
23594bf50f18SLuigi Rizzo 	}
23604bf50f18SLuigi Rizzo 	/* make bwrap ifp point to the real ifp
23614bf50f18SLuigi Rizzo 	 * NOTE: netmap_attach_common() interprets a non-NULL na->ifp
23624bf50f18SLuigi Rizzo 	 * as a request to make the ifp point to the na. Since we
23634bf50f18SLuigi Rizzo 	 * do not want to change the na already pointed to by hwna->ifp,
23644bf50f18SLuigi Rizzo 	 * the following assignment has to be delayed until now
23654bf50f18SLuigi Rizzo 	 */
23664bf50f18SLuigi Rizzo 	na->ifp = hwna->ifp;
23674bf50f18SLuigi Rizzo 	hwna->na_flags |= NAF_BUSY;
23684bf50f18SLuigi Rizzo 	/* make hwna point to the allocator we are actually using,
23694bf50f18SLuigi Rizzo 	 * so that monitors will be able to find it
23704bf50f18SLuigi Rizzo 	 */
23714bf50f18SLuigi Rizzo 	bna->save_nmd = hwna->nm_mem;
23724bf50f18SLuigi Rizzo 	hwna->nm_mem = na->nm_mem;
23734bf50f18SLuigi Rizzo 	return 0;
23744bf50f18SLuigi Rizzo 
23754bf50f18SLuigi Rizzo err_free:
2376*847bf383SLuigi Rizzo 	netmap_mem_delete(na->nm_mem);
23774bf50f18SLuigi Rizzo err_put:
23784bf50f18SLuigi Rizzo 	hwna->na_vp = hwna->na_hostvp = NULL;
2379f9790aebSLuigi Rizzo 	netmap_adapter_put(hwna);
2380f9790aebSLuigi Rizzo 	free(bna, M_DEVBUF);
2381f9790aebSLuigi Rizzo 	return error;
23824bf50f18SLuigi Rizzo 
2383f9790aebSLuigi Rizzo }
2384f9790aebSLuigi Rizzo 
2385*847bf383SLuigi Rizzo struct nm_bridge *
2386*847bf383SLuigi Rizzo netmap_init_bridges2(u_int n)
2387f9790aebSLuigi Rizzo {
2388f9790aebSLuigi Rizzo 	int i;
2389*847bf383SLuigi Rizzo 	struct nm_bridge *b;
2390*847bf383SLuigi Rizzo 
2391*847bf383SLuigi Rizzo 	b = malloc(sizeof(struct nm_bridge) * n, M_DEVBUF,
2392*847bf383SLuigi Rizzo 		M_NOWAIT | M_ZERO);
2393*847bf383SLuigi Rizzo 	if (b == NULL)
2394*847bf383SLuigi Rizzo 		return NULL;
2395*847bf383SLuigi Rizzo 	for (i = 0; i < n; i++)
2396*847bf383SLuigi Rizzo 		BDG_RWINIT(&b[i]);
2397*847bf383SLuigi Rizzo 	return b;
2398*847bf383SLuigi Rizzo }
2399*847bf383SLuigi Rizzo 
2400*847bf383SLuigi Rizzo void
2401*847bf383SLuigi Rizzo netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
2402*847bf383SLuigi Rizzo {
2403*847bf383SLuigi Rizzo 	int i;
2404*847bf383SLuigi Rizzo 
2405*847bf383SLuigi Rizzo 	if (b == NULL)
2406*847bf383SLuigi Rizzo 		return;
2407*847bf383SLuigi Rizzo 
2408*847bf383SLuigi Rizzo 	for (i = 0; i < n; i++)
2409*847bf383SLuigi Rizzo 		BDG_RWDESTROY(&b[i]);
2410*847bf383SLuigi Rizzo 	free(b, M_DEVBUF);
2411*847bf383SLuigi Rizzo }
2412*847bf383SLuigi Rizzo 
2413*847bf383SLuigi Rizzo int
2414*847bf383SLuigi Rizzo netmap_init_bridges(void)
2415*847bf383SLuigi Rizzo {
2416*847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
2417*847bf383SLuigi Rizzo 	return netmap_bns_register();
2418*847bf383SLuigi Rizzo #else
2419*847bf383SLuigi Rizzo 	nm_bridges = netmap_init_bridges2(NM_BRIDGES);
2420*847bf383SLuigi Rizzo 	if (nm_bridges == NULL)
2421*847bf383SLuigi Rizzo 		return ENOMEM;
2422*847bf383SLuigi Rizzo 	return 0;
2423*847bf383SLuigi Rizzo #endif
2424*847bf383SLuigi Rizzo }
2425*847bf383SLuigi Rizzo 
2426*847bf383SLuigi Rizzo void
2427*847bf383SLuigi Rizzo netmap_uninit_bridges(void)
2428*847bf383SLuigi Rizzo {
2429*847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
2430*847bf383SLuigi Rizzo 	netmap_bns_unregister();
2431*847bf383SLuigi Rizzo #else
2432*847bf383SLuigi Rizzo 	netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
2433*847bf383SLuigi Rizzo #endif
2434f9790aebSLuigi Rizzo }
2435f9790aebSLuigi Rizzo #endif /* WITH_VALE */
2436