xref: /freebsd-14.2/sys/dev/netmap/netmap_vale.c (revision 37e3a6d3)
1f9790aebSLuigi Rizzo /*
2*37e3a6d3SLuigi Rizzo  * Copyright (C) 2013-2016 Universita` di Pisa
3*37e3a6d3SLuigi Rizzo  * All rights reserved.
4f9790aebSLuigi Rizzo  *
5f9790aebSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
6f9790aebSLuigi Rizzo  * modification, are permitted provided that the following conditions
7f9790aebSLuigi Rizzo  * are met:
8f9790aebSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
9f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
10f9790aebSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
11f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
12f9790aebSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
13f9790aebSLuigi Rizzo  *
14f9790aebSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15f9790aebSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16f9790aebSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17f9790aebSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18f9790aebSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19f9790aebSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20f9790aebSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21f9790aebSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22f9790aebSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23f9790aebSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24f9790aebSLuigi Rizzo  * SUCH DAMAGE.
25f9790aebSLuigi Rizzo  */
26f9790aebSLuigi Rizzo 
27f9790aebSLuigi Rizzo 
28f9790aebSLuigi Rizzo /*
29f9790aebSLuigi Rizzo  * This module implements the VALE switch for netmap
30f9790aebSLuigi Rizzo 
31f9790aebSLuigi Rizzo --- VALE SWITCH ---
32f9790aebSLuigi Rizzo 
33f9790aebSLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
34f9790aebSLuigi Rizzo A switch cannot be deleted until all ports are gone.
35f9790aebSLuigi Rizzo 
36f9790aebSLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
37f9790aebSLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
38f9790aebSLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
39f9790aebSLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
40f9790aebSLuigi Rizzo The lock is held throughout the entire forwarding cycle,
41f9790aebSLuigi Rizzo during which the thread may incur in a page fault.
42f9790aebSLuigi Rizzo Hence it is important that sleepable shared locks are used.
43f9790aebSLuigi Rizzo 
44f9790aebSLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
45f9790aebSLuigi Rizzo a number of slot in the ring, then the lock is released,
46f9790aebSLuigi Rizzo packets are copied from source to destination, and then
47f9790aebSLuigi Rizzo the lock is acquired again and the receive ring is updated.
48f9790aebSLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
49f9790aebSLuigi Rizzo ports attached to the switch)
50f9790aebSLuigi Rizzo 
51f9790aebSLuigi Rizzo  */
52f9790aebSLuigi Rizzo 
53f9790aebSLuigi Rizzo /*
54f9790aebSLuigi Rizzo  * OS-specific code that is used only within this file.
55f9790aebSLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
56f9790aebSLuigi Rizzo  * is present in netmap_kern.h
57f9790aebSLuigi Rizzo  */
58f9790aebSLuigi Rizzo 
59f9790aebSLuigi Rizzo #if defined(__FreeBSD__)
60f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
61f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$");
62f9790aebSLuigi Rizzo 
63f9790aebSLuigi Rizzo #include <sys/types.h>
64f9790aebSLuigi Rizzo #include <sys/errno.h>
65f9790aebSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
66f9790aebSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
67f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
68f9790aebSLuigi Rizzo #include <sys/sockio.h>
69f9790aebSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
70f9790aebSLuigi Rizzo #include <sys/malloc.h>
71f9790aebSLuigi Rizzo #include <sys/poll.h>
72f9790aebSLuigi Rizzo #include <sys/rwlock.h>
73f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
74f9790aebSLuigi Rizzo #include <sys/selinfo.h>
75f9790aebSLuigi Rizzo #include <sys/sysctl.h>
76f9790aebSLuigi Rizzo #include <net/if.h>
77f9790aebSLuigi Rizzo #include <net/if_var.h>
78f9790aebSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
79f9790aebSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
80f9790aebSLuigi Rizzo #include <sys/endian.h>
81f9790aebSLuigi Rizzo #include <sys/refcount.h>
82f9790aebSLuigi Rizzo 
83f9790aebSLuigi Rizzo 
84f9790aebSLuigi Rizzo #define BDG_RWLOCK_T		struct rwlock // struct rwlock
85f9790aebSLuigi Rizzo 
86f9790aebSLuigi Rizzo #define	BDG_RWINIT(b)		\
87f9790aebSLuigi Rizzo 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
88f9790aebSLuigi Rizzo #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
89f9790aebSLuigi Rizzo #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
90f9790aebSLuigi Rizzo #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
91f9790aebSLuigi Rizzo #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
92f9790aebSLuigi Rizzo #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
93f9790aebSLuigi Rizzo #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
94f9790aebSLuigi Rizzo 
95f9790aebSLuigi Rizzo 
96f9790aebSLuigi Rizzo #elif defined(linux)
97f9790aebSLuigi Rizzo 
98f9790aebSLuigi Rizzo #include "bsd_glue.h"
99f9790aebSLuigi Rizzo 
100f9790aebSLuigi Rizzo #elif defined(__APPLE__)
101f9790aebSLuigi Rizzo 
102f9790aebSLuigi Rizzo #warning OSX support is only partial
103f9790aebSLuigi Rizzo #include "osx_glue.h"
104f9790aebSLuigi Rizzo 
105*37e3a6d3SLuigi Rizzo #elif defined(_WIN32)
106*37e3a6d3SLuigi Rizzo #include "win_glue.h"
107*37e3a6d3SLuigi Rizzo 
108f9790aebSLuigi Rizzo #else
109f9790aebSLuigi Rizzo 
110f9790aebSLuigi Rizzo #error	Unsupported platform
111f9790aebSLuigi Rizzo 
112f9790aebSLuigi Rizzo #endif /* unsupported */
113f9790aebSLuigi Rizzo 
114f9790aebSLuigi Rizzo /*
115f9790aebSLuigi Rizzo  * common headers
116f9790aebSLuigi Rizzo  */
117f9790aebSLuigi Rizzo 
118f9790aebSLuigi Rizzo #include <net/netmap.h>
119f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h>
120f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
121f9790aebSLuigi Rizzo 
122f9790aebSLuigi Rizzo #ifdef WITH_VALE
123f9790aebSLuigi Rizzo 
124f9790aebSLuigi Rizzo /*
125f9790aebSLuigi Rizzo  * system parameters (most of them in netmap_kern.h)
126*37e3a6d3SLuigi Rizzo  * NM_BDG_NAME	prefix for switch port names, default "vale"
127f9790aebSLuigi Rizzo  * NM_BDG_MAXPORTS	number of ports
128f9790aebSLuigi Rizzo  * NM_BRIDGES	max number of switches in the system.
129f9790aebSLuigi Rizzo  *	XXX should become a sysctl or tunable
130f9790aebSLuigi Rizzo  *
131f9790aebSLuigi Rizzo  * Switch ports are named valeX:Y where X is the switch name and Y
132f9790aebSLuigi Rizzo  * is the port. If Y matches a physical interface name, the port is
133f9790aebSLuigi Rizzo  * connected to a physical device.
134f9790aebSLuigi Rizzo  *
135f9790aebSLuigi Rizzo  * Unlike physical interfaces, switch ports use their own memory region
136f9790aebSLuigi Rizzo  * for rings and buffers.
137f9790aebSLuigi Rizzo  * The virtual interfaces use per-queue lock instead of core lock.
138f9790aebSLuigi Rizzo  * In the tx loop, we aggregate traffic in batches to make all operations
139f9790aebSLuigi Rizzo  * faster. The batch size is bridge_batch.
140f9790aebSLuigi Rizzo  */
141f9790aebSLuigi Rizzo #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
142f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
143f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
144f9790aebSLuigi Rizzo #define NM_BDG_HASH		1024	/* forwarding table entries */
145f9790aebSLuigi Rizzo #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
146f9790aebSLuigi Rizzo #define NM_MULTISEG		64	/* max size of a chain of bufs */
147f9790aebSLuigi Rizzo /* actual size of the tables */
148f9790aebSLuigi Rizzo #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
149f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */
150f9790aebSLuigi Rizzo #define NM_FT_NULL		NM_BDG_BATCH_MAX
151f9790aebSLuigi Rizzo 
152f9790aebSLuigi Rizzo 
153f9790aebSLuigi Rizzo /*
154f9790aebSLuigi Rizzo  * bridge_batch is set via sysctl to the max batch size to be
155f9790aebSLuigi Rizzo  * used in the bridge. The actual value may be larger as the
156f9790aebSLuigi Rizzo  * last packet in the block may overflow the size.
157f9790aebSLuigi Rizzo  */
158*37e3a6d3SLuigi Rizzo static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
159*37e3a6d3SLuigi Rizzo SYSBEGIN(vars_vale);
160f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap);
161f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
162*37e3a6d3SLuigi Rizzo SYSEND;
163f9790aebSLuigi Rizzo 
1644bf50f18SLuigi Rizzo static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
1654bf50f18SLuigi Rizzo static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
166*37e3a6d3SLuigi Rizzo static int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
167f9790aebSLuigi Rizzo 
168f9790aebSLuigi Rizzo /*
169f9790aebSLuigi Rizzo  * For each output interface, nm_bdg_q is used to construct a list.
170f9790aebSLuigi Rizzo  * bq_len is the number of output buffers (we can have coalescing
171f9790aebSLuigi Rizzo  * during the copy).
172f9790aebSLuigi Rizzo  */
173f9790aebSLuigi Rizzo struct nm_bdg_q {
174f9790aebSLuigi Rizzo 	uint16_t bq_head;
175f9790aebSLuigi Rizzo 	uint16_t bq_tail;
176f9790aebSLuigi Rizzo 	uint32_t bq_len;	/* number of buffers */
177f9790aebSLuigi Rizzo };
178f9790aebSLuigi Rizzo 
179f9790aebSLuigi Rizzo /* XXX revise this */
180f9790aebSLuigi Rizzo struct nm_hash_ent {
181f9790aebSLuigi Rizzo 	uint64_t	mac;	/* the top 2 bytes are the epoch */
182f9790aebSLuigi Rizzo 	uint64_t	ports;
183f9790aebSLuigi Rizzo };
184f9790aebSLuigi Rizzo 
185f9790aebSLuigi Rizzo /*
186f9790aebSLuigi Rizzo  * nm_bridge is a descriptor for a VALE switch.
187f9790aebSLuigi Rizzo  * Interfaces for a bridge are all in bdg_ports[].
188f9790aebSLuigi Rizzo  * The array has fixed size, an empty entry does not terminate
189f9790aebSLuigi Rizzo  * the search, but lookups only occur on attach/detach so we
190f9790aebSLuigi Rizzo  * don't mind if they are slow.
191f9790aebSLuigi Rizzo  *
192f9790aebSLuigi Rizzo  * The bridge is non blocking on the transmit ports: excess
193f9790aebSLuigi Rizzo  * packets are dropped if there is no room on the output port.
194f9790aebSLuigi Rizzo  *
195f9790aebSLuigi Rizzo  * bdg_lock protects accesses to the bdg_ports array.
196f9790aebSLuigi Rizzo  * This is a rw lock (or equivalent).
197f9790aebSLuigi Rizzo  */
198f9790aebSLuigi Rizzo struct nm_bridge {
199f9790aebSLuigi Rizzo 	/* XXX what is the proper alignment/layout ? */
200f9790aebSLuigi Rizzo 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
201f9790aebSLuigi Rizzo 	int		bdg_namelen;
202f9790aebSLuigi Rizzo 	uint32_t	bdg_active_ports; /* 0 means free */
203f9790aebSLuigi Rizzo 	char		bdg_basename[IFNAMSIZ];
204f9790aebSLuigi Rizzo 
205f9790aebSLuigi Rizzo 	/* Indexes of active ports (up to active_ports)
206f9790aebSLuigi Rizzo 	 * and all other remaining ports.
207f9790aebSLuigi Rizzo 	 */
208f9790aebSLuigi Rizzo 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
209f9790aebSLuigi Rizzo 
210f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
211f9790aebSLuigi Rizzo 
212f9790aebSLuigi Rizzo 
213f9790aebSLuigi Rizzo 	/*
214f9790aebSLuigi Rizzo 	 * The function to decide the destination port.
215f9790aebSLuigi Rizzo 	 * It returns either of an index of the destination port,
216f9790aebSLuigi Rizzo 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
217f9790aebSLuigi Rizzo 	 * forward this packet.  ring_nr is the source ring index, and the
218f9790aebSLuigi Rizzo 	 * function may overwrite this value to forward this packet to a
219f9790aebSLuigi Rizzo 	 * different ring index.
220*37e3a6d3SLuigi Rizzo 	 * This function must be set by netmap_bdg_ctl().
221f9790aebSLuigi Rizzo 	 */
2224bf50f18SLuigi Rizzo 	struct netmap_bdg_ops bdg_ops;
223f9790aebSLuigi Rizzo 
224f9790aebSLuigi Rizzo 	/* the forwarding table, MAC+ports.
225f9790aebSLuigi Rizzo 	 * XXX should be changed to an argument to be passed to
226f9790aebSLuigi Rizzo 	 * the lookup function, and allocated on attach
227f9790aebSLuigi Rizzo 	 */
228f9790aebSLuigi Rizzo 	struct nm_hash_ent ht[NM_BDG_HASH];
229847bf383SLuigi Rizzo 
230847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
231847bf383SLuigi Rizzo 	struct net *ns;
232847bf383SLuigi Rizzo #endif /* CONFIG_NET_NS */
233f9790aebSLuigi Rizzo };
234f9790aebSLuigi Rizzo 
2354bf50f18SLuigi Rizzo const char*
2364bf50f18SLuigi Rizzo netmap_bdg_name(struct netmap_vp_adapter *vp)
2374bf50f18SLuigi Rizzo {
2384bf50f18SLuigi Rizzo 	struct nm_bridge *b = vp->na_bdg;
2394bf50f18SLuigi Rizzo 	if (b == NULL)
2404bf50f18SLuigi Rizzo 		return NULL;
2414bf50f18SLuigi Rizzo 	return b->bdg_basename;
2424bf50f18SLuigi Rizzo }
2434bf50f18SLuigi Rizzo 
244f9790aebSLuigi Rizzo 
245847bf383SLuigi Rizzo #ifndef CONFIG_NET_NS
246f9790aebSLuigi Rizzo /*
247f9790aebSLuigi Rizzo  * XXX in principle nm_bridges could be created dynamically
248f9790aebSLuigi Rizzo  * Right now we have a static array and deletions are protected
249f9790aebSLuigi Rizzo  * by an exclusive lock.
250f9790aebSLuigi Rizzo  */
251*37e3a6d3SLuigi Rizzo static struct nm_bridge *nm_bridges;
252847bf383SLuigi Rizzo #endif /* !CONFIG_NET_NS */
253f9790aebSLuigi Rizzo 
254f9790aebSLuigi Rizzo 
255f9790aebSLuigi Rizzo /*
256f9790aebSLuigi Rizzo  * this is a slightly optimized copy routine which rounds
257f9790aebSLuigi Rizzo  * to multiple of 64 bytes and is often faster than dealing
258f9790aebSLuigi Rizzo  * with other odd sizes. We assume there is enough room
259f9790aebSLuigi Rizzo  * in the source and destination buffers.
260f9790aebSLuigi Rizzo  *
261f9790aebSLuigi Rizzo  * XXX only for multiples of 64 bytes, non overlapped.
262f9790aebSLuigi Rizzo  */
263f9790aebSLuigi Rizzo static inline void
264f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l)
265f9790aebSLuigi Rizzo {
266f9790aebSLuigi Rizzo         uint64_t *src = _src;
267f9790aebSLuigi Rizzo         uint64_t *dst = _dst;
268f9790aebSLuigi Rizzo         if (unlikely(l >= 1024)) {
269f9790aebSLuigi Rizzo                 memcpy(dst, src, l);
270f9790aebSLuigi Rizzo                 return;
271f9790aebSLuigi Rizzo         }
272f9790aebSLuigi Rizzo         for (; likely(l > 0); l-=64) {
273f9790aebSLuigi Rizzo                 *dst++ = *src++;
274f9790aebSLuigi Rizzo                 *dst++ = *src++;
275f9790aebSLuigi Rizzo                 *dst++ = *src++;
276f9790aebSLuigi Rizzo                 *dst++ = *src++;
277f9790aebSLuigi Rizzo                 *dst++ = *src++;
278f9790aebSLuigi Rizzo                 *dst++ = *src++;
279f9790aebSLuigi Rizzo                 *dst++ = *src++;
280f9790aebSLuigi Rizzo                 *dst++ = *src++;
281f9790aebSLuigi Rizzo         }
282f9790aebSLuigi Rizzo }
283f9790aebSLuigi Rizzo 
284f9790aebSLuigi Rizzo 
285*37e3a6d3SLuigi Rizzo static int
286*37e3a6d3SLuigi Rizzo nm_is_id_char(const char c)
287*37e3a6d3SLuigi Rizzo {
288*37e3a6d3SLuigi Rizzo 	return (c >= 'a' && c <= 'z') ||
289*37e3a6d3SLuigi Rizzo 	       (c >= 'A' && c <= 'Z') ||
290*37e3a6d3SLuigi Rizzo 	       (c >= '0' && c <= '9') ||
291*37e3a6d3SLuigi Rizzo 	       (c == '_');
292*37e3a6d3SLuigi Rizzo }
293*37e3a6d3SLuigi Rizzo 
294*37e3a6d3SLuigi Rizzo /* Validate the name of a VALE bridge port and return the
295*37e3a6d3SLuigi Rizzo  * position of the ":" character. */
296*37e3a6d3SLuigi Rizzo static int
297*37e3a6d3SLuigi Rizzo nm_vale_name_validate(const char *name)
298*37e3a6d3SLuigi Rizzo {
299*37e3a6d3SLuigi Rizzo 	int colon_pos = -1;
300*37e3a6d3SLuigi Rizzo 	int i;
301*37e3a6d3SLuigi Rizzo 
302*37e3a6d3SLuigi Rizzo 	if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
303*37e3a6d3SLuigi Rizzo 		return -1;
304*37e3a6d3SLuigi Rizzo 	}
305*37e3a6d3SLuigi Rizzo 
306*37e3a6d3SLuigi Rizzo 	for (i = 0; name[i]; i++) {
307*37e3a6d3SLuigi Rizzo 		if (name[i] == ':') {
308*37e3a6d3SLuigi Rizzo 			if (colon_pos != -1) {
309*37e3a6d3SLuigi Rizzo 				return -1;
310*37e3a6d3SLuigi Rizzo 			}
311*37e3a6d3SLuigi Rizzo 			colon_pos = i;
312*37e3a6d3SLuigi Rizzo 		} else if (!nm_is_id_char(name[i])) {
313*37e3a6d3SLuigi Rizzo 			return -1;
314*37e3a6d3SLuigi Rizzo 		}
315*37e3a6d3SLuigi Rizzo 	}
316*37e3a6d3SLuigi Rizzo 
317*37e3a6d3SLuigi Rizzo 	if (i >= IFNAMSIZ) {
318*37e3a6d3SLuigi Rizzo 		return -1;
319*37e3a6d3SLuigi Rizzo 	}
320*37e3a6d3SLuigi Rizzo 
321*37e3a6d3SLuigi Rizzo 	return colon_pos;
322*37e3a6d3SLuigi Rizzo }
323*37e3a6d3SLuigi Rizzo 
324f9790aebSLuigi Rizzo /*
325f9790aebSLuigi Rizzo  * locate a bridge among the existing ones.
326f9790aebSLuigi Rizzo  * MUST BE CALLED WITH NMG_LOCK()
327f9790aebSLuigi Rizzo  *
328f9790aebSLuigi Rizzo  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
329f9790aebSLuigi Rizzo  * We assume that this is called with a name of at least NM_NAME chars.
330f9790aebSLuigi Rizzo  */
331f9790aebSLuigi Rizzo static struct nm_bridge *
332f9790aebSLuigi Rizzo nm_find_bridge(const char *name, int create)
333f9790aebSLuigi Rizzo {
334*37e3a6d3SLuigi Rizzo 	int i, namelen;
335847bf383SLuigi Rizzo 	struct nm_bridge *b = NULL, *bridges;
336847bf383SLuigi Rizzo 	u_int num_bridges;
337f9790aebSLuigi Rizzo 
338f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
339f9790aebSLuigi Rizzo 
340847bf383SLuigi Rizzo 	netmap_bns_getbridges(&bridges, &num_bridges);
341847bf383SLuigi Rizzo 
342*37e3a6d3SLuigi Rizzo 	namelen = nm_vale_name_validate(name);
343*37e3a6d3SLuigi Rizzo 	if (namelen < 0) {
344f9790aebSLuigi Rizzo 		D("invalid bridge name %s", name ? name : NULL);
345f9790aebSLuigi Rizzo 		return NULL;
346f9790aebSLuigi Rizzo 	}
347f9790aebSLuigi Rizzo 
348f9790aebSLuigi Rizzo 	/* lookup the name, remember empty slot if there is one */
349847bf383SLuigi Rizzo 	for (i = 0; i < num_bridges; i++) {
350847bf383SLuigi Rizzo 		struct nm_bridge *x = bridges + i;
351f9790aebSLuigi Rizzo 
352f9790aebSLuigi Rizzo 		if (x->bdg_active_ports == 0) {
353f9790aebSLuigi Rizzo 			if (create && b == NULL)
354f9790aebSLuigi Rizzo 				b = x;	/* record empty slot */
355f9790aebSLuigi Rizzo 		} else if (x->bdg_namelen != namelen) {
356f9790aebSLuigi Rizzo 			continue;
357f9790aebSLuigi Rizzo 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
358f9790aebSLuigi Rizzo 			ND("found '%.*s' at %d", namelen, name, i);
359f9790aebSLuigi Rizzo 			b = x;
360f9790aebSLuigi Rizzo 			break;
361f9790aebSLuigi Rizzo 		}
362f9790aebSLuigi Rizzo 	}
363847bf383SLuigi Rizzo 	if (i == num_bridges && b) { /* name not found, can create entry */
364f9790aebSLuigi Rizzo 		/* initialize the bridge */
365f9790aebSLuigi Rizzo 		strncpy(b->bdg_basename, name, namelen);
366f9790aebSLuigi Rizzo 		ND("create new bridge %s with ports %d", b->bdg_basename,
367f9790aebSLuigi Rizzo 			b->bdg_active_ports);
368f9790aebSLuigi Rizzo 		b->bdg_namelen = namelen;
369f9790aebSLuigi Rizzo 		b->bdg_active_ports = 0;
370f9790aebSLuigi Rizzo 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
371f9790aebSLuigi Rizzo 			b->bdg_port_index[i] = i;
372f9790aebSLuigi Rizzo 		/* set the default function */
3734bf50f18SLuigi Rizzo 		b->bdg_ops.lookup = netmap_bdg_learning;
374f9790aebSLuigi Rizzo 		/* reset the MAC address table */
375f9790aebSLuigi Rizzo 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
376847bf383SLuigi Rizzo 		NM_BNS_GET(b);
377f9790aebSLuigi Rizzo 	}
378f9790aebSLuigi Rizzo 	return b;
379f9790aebSLuigi Rizzo }
380f9790aebSLuigi Rizzo 
381f9790aebSLuigi Rizzo 
382f9790aebSLuigi Rizzo /*
383f9790aebSLuigi Rizzo  * Free the forwarding tables for rings attached to switch ports.
384f9790aebSLuigi Rizzo  */
385f9790aebSLuigi Rizzo static void
386f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na)
387f9790aebSLuigi Rizzo {
388f9790aebSLuigi Rizzo 	int nrings, i;
389f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
390f9790aebSLuigi Rizzo 
391f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
39217885a7bSLuigi Rizzo 	nrings = na->num_tx_rings;
39317885a7bSLuigi Rizzo 	kring = na->tx_rings;
394f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
395f9790aebSLuigi Rizzo 		if (kring[i].nkr_ft) {
396f9790aebSLuigi Rizzo 			free(kring[i].nkr_ft, M_DEVBUF);
397f9790aebSLuigi Rizzo 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
398f9790aebSLuigi Rizzo 		}
399f9790aebSLuigi Rizzo 	}
400f9790aebSLuigi Rizzo }
401f9790aebSLuigi Rizzo 
402f9790aebSLuigi Rizzo 
403f9790aebSLuigi Rizzo /*
404f9790aebSLuigi Rizzo  * Allocate the forwarding tables for the rings attached to the bridge ports.
405f9790aebSLuigi Rizzo  */
406f9790aebSLuigi Rizzo static int
407f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na)
408f9790aebSLuigi Rizzo {
409f9790aebSLuigi Rizzo 	int nrings, l, i, num_dstq;
410f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
411f9790aebSLuigi Rizzo 
412f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
413f9790aebSLuigi Rizzo 	/* all port:rings + broadcast */
414f9790aebSLuigi Rizzo 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
415f9790aebSLuigi Rizzo 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
416f9790aebSLuigi Rizzo 	l += sizeof(struct nm_bdg_q) * num_dstq;
417f9790aebSLuigi Rizzo 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
418f9790aebSLuigi Rizzo 
419847bf383SLuigi Rizzo 	nrings = netmap_real_rings(na, NR_TX);
420f9790aebSLuigi Rizzo 	kring = na->tx_rings;
421f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
422f9790aebSLuigi Rizzo 		struct nm_bdg_fwd *ft;
423f9790aebSLuigi Rizzo 		struct nm_bdg_q *dstq;
424f9790aebSLuigi Rizzo 		int j;
425f9790aebSLuigi Rizzo 
426f9790aebSLuigi Rizzo 		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
427f9790aebSLuigi Rizzo 		if (!ft) {
428f9790aebSLuigi Rizzo 			nm_free_bdgfwd(na);
429f9790aebSLuigi Rizzo 			return ENOMEM;
430f9790aebSLuigi Rizzo 		}
431f9790aebSLuigi Rizzo 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
432f9790aebSLuigi Rizzo 		for (j = 0; j < num_dstq; j++) {
433f9790aebSLuigi Rizzo 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
434f9790aebSLuigi Rizzo 			dstq[j].bq_len = 0;
435f9790aebSLuigi Rizzo 		}
436f9790aebSLuigi Rizzo 		kring[i].nkr_ft = ft;
437f9790aebSLuigi Rizzo 	}
438f9790aebSLuigi Rizzo 	return 0;
439f9790aebSLuigi Rizzo }
440f9790aebSLuigi Rizzo 
441f9790aebSLuigi Rizzo 
4424bf50f18SLuigi Rizzo /* remove from bridge b the ports in slots hw and sw
4434bf50f18SLuigi Rizzo  * (sw can be -1 if not needed)
4444bf50f18SLuigi Rizzo  */
445f9790aebSLuigi Rizzo static void
446f9790aebSLuigi Rizzo netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
447f9790aebSLuigi Rizzo {
448f9790aebSLuigi Rizzo 	int s_hw = hw, s_sw = sw;
449f9790aebSLuigi Rizzo 	int i, lim =b->bdg_active_ports;
450f9790aebSLuigi Rizzo 	uint8_t tmp[NM_BDG_MAXPORTS];
451f9790aebSLuigi Rizzo 
452f9790aebSLuigi Rizzo 	/*
453f9790aebSLuigi Rizzo 	New algorithm:
454f9790aebSLuigi Rizzo 	make a copy of bdg_port_index;
455f9790aebSLuigi Rizzo 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
456f9790aebSLuigi Rizzo 	in the array of bdg_port_index, replacing them with
457f9790aebSLuigi Rizzo 	entries from the bottom of the array;
458f9790aebSLuigi Rizzo 	decrement bdg_active_ports;
459f9790aebSLuigi Rizzo 	acquire BDG_WLOCK() and copy back the array.
460f9790aebSLuigi Rizzo 	 */
461f9790aebSLuigi Rizzo 
462f0ea3689SLuigi Rizzo 	if (netmap_verbose)
463f9790aebSLuigi Rizzo 		D("detach %d and %d (lim %d)", hw, sw, lim);
464f9790aebSLuigi Rizzo 	/* make a copy of the list of active ports, update it,
465f9790aebSLuigi Rizzo 	 * and then copy back within BDG_WLOCK().
466f9790aebSLuigi Rizzo 	 */
467f9790aebSLuigi Rizzo 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
468f9790aebSLuigi Rizzo 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
469f9790aebSLuigi Rizzo 		if (hw >= 0 && tmp[i] == hw) {
470f9790aebSLuigi Rizzo 			ND("detach hw %d at %d", hw, i);
471f9790aebSLuigi Rizzo 			lim--; /* point to last active port */
472f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim]; /* swap with i */
473f9790aebSLuigi Rizzo 			tmp[lim] = hw;	/* now this is inactive */
474f9790aebSLuigi Rizzo 			hw = -1;
475f9790aebSLuigi Rizzo 		} else if (sw >= 0 && tmp[i] == sw) {
476f9790aebSLuigi Rizzo 			ND("detach sw %d at %d", sw, i);
477f9790aebSLuigi Rizzo 			lim--;
478f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim];
479f9790aebSLuigi Rizzo 			tmp[lim] = sw;
480f9790aebSLuigi Rizzo 			sw = -1;
481f9790aebSLuigi Rizzo 		} else {
482f9790aebSLuigi Rizzo 			i++;
483f9790aebSLuigi Rizzo 		}
484f9790aebSLuigi Rizzo 	}
485f9790aebSLuigi Rizzo 	if (hw >= 0 || sw >= 0) {
486f9790aebSLuigi Rizzo 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
487f9790aebSLuigi Rizzo 	}
488f9790aebSLuigi Rizzo 
489f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
4904bf50f18SLuigi Rizzo 	if (b->bdg_ops.dtor)
4914bf50f18SLuigi Rizzo 		b->bdg_ops.dtor(b->bdg_ports[s_hw]);
492f9790aebSLuigi Rizzo 	b->bdg_ports[s_hw] = NULL;
493f9790aebSLuigi Rizzo 	if (s_sw >= 0) {
494f9790aebSLuigi Rizzo 		b->bdg_ports[s_sw] = NULL;
495f9790aebSLuigi Rizzo 	}
496f9790aebSLuigi Rizzo 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
497f9790aebSLuigi Rizzo 	b->bdg_active_ports = lim;
498f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
499f9790aebSLuigi Rizzo 
500f9790aebSLuigi Rizzo 	ND("now %d active ports", lim);
501f9790aebSLuigi Rizzo 	if (lim == 0) {
502f9790aebSLuigi Rizzo 		ND("marking bridge %s as free", b->bdg_basename);
5034bf50f18SLuigi Rizzo 		bzero(&b->bdg_ops, sizeof(b->bdg_ops));
504847bf383SLuigi Rizzo 		NM_BNS_PUT(b);
505f9790aebSLuigi Rizzo 	}
506f9790aebSLuigi Rizzo }
507f9790aebSLuigi Rizzo 
5084bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for VALE ports */
5094bf50f18SLuigi Rizzo static int
5104bf50f18SLuigi Rizzo netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
511f9790aebSLuigi Rizzo {
512f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
513f9790aebSLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
514f9790aebSLuigi Rizzo 
515*37e3a6d3SLuigi Rizzo 	(void)nmr;	// XXX merge ?
5164bf50f18SLuigi Rizzo 	if (attach)
5174bf50f18SLuigi Rizzo 		return 0; /* nothing to do */
5184bf50f18SLuigi Rizzo 	if (b) {
5194bf50f18SLuigi Rizzo 		netmap_set_all_rings(na, 0 /* disable */);
5204bf50f18SLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
5214bf50f18SLuigi Rizzo 		vpna->na_bdg = NULL;
5224bf50f18SLuigi Rizzo 		netmap_set_all_rings(na, 1 /* enable */);
5234bf50f18SLuigi Rizzo 	}
5244bf50f18SLuigi Rizzo 	/* I have took reference just for attach */
5254bf50f18SLuigi Rizzo 	netmap_adapter_put(na);
5264bf50f18SLuigi Rizzo 	return 0;
5274bf50f18SLuigi Rizzo }
5284bf50f18SLuigi Rizzo 
5294bf50f18SLuigi Rizzo /* nm_dtor callback for ephemeral VALE ports */
5304bf50f18SLuigi Rizzo static void
5314bf50f18SLuigi Rizzo netmap_vp_dtor(struct netmap_adapter *na)
5324bf50f18SLuigi Rizzo {
5334bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
5344bf50f18SLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
5354bf50f18SLuigi Rizzo 
5364bf50f18SLuigi Rizzo 	ND("%s has %d references", na->name, na->na_refcount);
537f9790aebSLuigi Rizzo 
538f9790aebSLuigi Rizzo 	if (b) {
539f9790aebSLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
540f9790aebSLuigi Rizzo 	}
541f9790aebSLuigi Rizzo }
542f9790aebSLuigi Rizzo 
5434bf50f18SLuigi Rizzo /* remove a persistent VALE port from the system */
5444bf50f18SLuigi Rizzo static int
5454bf50f18SLuigi Rizzo nm_vi_destroy(const char *name)
5464bf50f18SLuigi Rizzo {
5474bf50f18SLuigi Rizzo 	struct ifnet *ifp;
5484bf50f18SLuigi Rizzo 	int error;
5494bf50f18SLuigi Rizzo 
5504bf50f18SLuigi Rizzo 	ifp = ifunit_ref(name);
5514bf50f18SLuigi Rizzo 	if (!ifp)
5524bf50f18SLuigi Rizzo 		return ENXIO;
5534bf50f18SLuigi Rizzo 	NMG_LOCK();
5544bf50f18SLuigi Rizzo 	/* make sure this is actually a VALE port */
555*37e3a6d3SLuigi Rizzo 	if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
5564bf50f18SLuigi Rizzo 		error = EINVAL;
5574bf50f18SLuigi Rizzo 		goto err;
5584bf50f18SLuigi Rizzo 	}
5594bf50f18SLuigi Rizzo 
5604bf50f18SLuigi Rizzo 	if (NA(ifp)->na_refcount > 1) {
5614bf50f18SLuigi Rizzo 		error = EBUSY;
5624bf50f18SLuigi Rizzo 		goto err;
5634bf50f18SLuigi Rizzo 	}
5644bf50f18SLuigi Rizzo 	NMG_UNLOCK();
5654bf50f18SLuigi Rizzo 
5664bf50f18SLuigi Rizzo 	D("destroying a persistent vale interface %s", ifp->if_xname);
5674bf50f18SLuigi Rizzo 	/* Linux requires all the references are released
5684bf50f18SLuigi Rizzo 	 * before unregister
5694bf50f18SLuigi Rizzo 	 */
5704bf50f18SLuigi Rizzo 	if_rele(ifp);
5714bf50f18SLuigi Rizzo 	netmap_detach(ifp);
572*37e3a6d3SLuigi Rizzo 	nm_os_vi_detach(ifp);
5734bf50f18SLuigi Rizzo 	return 0;
5744bf50f18SLuigi Rizzo 
5754bf50f18SLuigi Rizzo err:
5764bf50f18SLuigi Rizzo 	NMG_UNLOCK();
5774bf50f18SLuigi Rizzo 	if_rele(ifp);
5784bf50f18SLuigi Rizzo 	return error;
5794bf50f18SLuigi Rizzo }
5804bf50f18SLuigi Rizzo 
5814bf50f18SLuigi Rizzo /*
5824bf50f18SLuigi Rizzo  * Create a virtual interface registered to the system.
5834bf50f18SLuigi Rizzo  * The interface will be attached to a bridge later.
5844bf50f18SLuigi Rizzo  */
5854bf50f18SLuigi Rizzo static int
5864bf50f18SLuigi Rizzo nm_vi_create(struct nmreq *nmr)
5874bf50f18SLuigi Rizzo {
5884bf50f18SLuigi Rizzo 	struct ifnet *ifp;
5894bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna;
5904bf50f18SLuigi Rizzo 	int error;
5914bf50f18SLuigi Rizzo 
5924bf50f18SLuigi Rizzo 	/* don't include VALE prefix */
593*37e3a6d3SLuigi Rizzo 	if (!strncmp(nmr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
5944bf50f18SLuigi Rizzo 		return EINVAL;
5954bf50f18SLuigi Rizzo 	ifp = ifunit_ref(nmr->nr_name);
5964bf50f18SLuigi Rizzo 	if (ifp) { /* already exist, cannot create new one */
5974bf50f18SLuigi Rizzo 		if_rele(ifp);
5984bf50f18SLuigi Rizzo 		return EEXIST;
5994bf50f18SLuigi Rizzo 	}
600*37e3a6d3SLuigi Rizzo 	error = nm_os_vi_persist(nmr->nr_name, &ifp);
6014bf50f18SLuigi Rizzo 	if (error)
6024bf50f18SLuigi Rizzo 		return error;
6034bf50f18SLuigi Rizzo 
6044bf50f18SLuigi Rizzo 	NMG_LOCK();
6054bf50f18SLuigi Rizzo 	/* netmap_vp_create creates a struct netmap_vp_adapter */
6064bf50f18SLuigi Rizzo 	error = netmap_vp_create(nmr, ifp, &vpna);
6074bf50f18SLuigi Rizzo 	if (error) {
6084bf50f18SLuigi Rizzo 		D("error %d", error);
609*37e3a6d3SLuigi Rizzo 		nm_os_vi_detach(ifp);
6104bf50f18SLuigi Rizzo 		return error;
6114bf50f18SLuigi Rizzo 	}
6124bf50f18SLuigi Rizzo 	/* persist-specific routines */
6134bf50f18SLuigi Rizzo 	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
6144bf50f18SLuigi Rizzo 	netmap_adapter_get(&vpna->up);
615*37e3a6d3SLuigi Rizzo 	NM_ATTACH_NA(ifp, &vpna->up);
6164bf50f18SLuigi Rizzo 	NMG_UNLOCK();
6174bf50f18SLuigi Rizzo 	D("created %s", ifp->if_xname);
6184bf50f18SLuigi Rizzo 	return 0;
6194bf50f18SLuigi Rizzo }
62017885a7bSLuigi Rizzo 
62117885a7bSLuigi Rizzo /* Try to get a reference to a netmap adapter attached to a VALE switch.
62217885a7bSLuigi Rizzo  * If the adapter is found (or is created), this function returns 0, a
62317885a7bSLuigi Rizzo  * non NULL pointer is returned into *na, and the caller holds a
62417885a7bSLuigi Rizzo  * reference to the adapter.
62517885a7bSLuigi Rizzo  * If an adapter is not found, then no reference is grabbed and the
62617885a7bSLuigi Rizzo  * function returns an error code, or 0 if there is just a VALE prefix
62717885a7bSLuigi Rizzo  * mismatch. Therefore the caller holds a reference when
62817885a7bSLuigi Rizzo  * (*na != NULL && return == 0).
62917885a7bSLuigi Rizzo  */
630f9790aebSLuigi Rizzo int
631f9790aebSLuigi Rizzo netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
632f9790aebSLuigi Rizzo {
6334bf50f18SLuigi Rizzo 	char *nr_name = nmr->nr_name;
6344bf50f18SLuigi Rizzo 	const char *ifname;
635f9790aebSLuigi Rizzo 	struct ifnet *ifp;
636f9790aebSLuigi Rizzo 	int error = 0;
6374bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna, *hostna = NULL;
638f9790aebSLuigi Rizzo 	struct nm_bridge *b;
639f9790aebSLuigi Rizzo 	int i, j, cand = -1, cand2 = -1;
640f9790aebSLuigi Rizzo 	int needed;
641f9790aebSLuigi Rizzo 
642f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
643f9790aebSLuigi Rizzo 
644f9790aebSLuigi Rizzo 	/* first try to see if this is a bridge port. */
645f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
646*37e3a6d3SLuigi Rizzo 	if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) {
647f9790aebSLuigi Rizzo 		return 0;  /* no error, but no VALE prefix */
648f9790aebSLuigi Rizzo 	}
649f9790aebSLuigi Rizzo 
6504bf50f18SLuigi Rizzo 	b = nm_find_bridge(nr_name, create);
651f9790aebSLuigi Rizzo 	if (b == NULL) {
6524bf50f18SLuigi Rizzo 		D("no bridges available for '%s'", nr_name);
653f2637526SLuigi Rizzo 		return (create ? ENOMEM : ENXIO);
654f9790aebSLuigi Rizzo 	}
6554bf50f18SLuigi Rizzo 	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
6564bf50f18SLuigi Rizzo 		panic("x");
657f9790aebSLuigi Rizzo 
658f9790aebSLuigi Rizzo 	/* Now we are sure that name starts with the bridge's name,
659f9790aebSLuigi Rizzo 	 * lookup the port in the bridge. We need to scan the entire
660f9790aebSLuigi Rizzo 	 * list. It is not important to hold a WLOCK on the bridge
661f9790aebSLuigi Rizzo 	 * during the search because NMG_LOCK already guarantees
662f9790aebSLuigi Rizzo 	 * that there are no other possible writers.
663f9790aebSLuigi Rizzo 	 */
664f9790aebSLuigi Rizzo 
665f9790aebSLuigi Rizzo 	/* lookup in the local list of ports */
666f9790aebSLuigi Rizzo 	for (j = 0; j < b->bdg_active_ports; j++) {
667f9790aebSLuigi Rizzo 		i = b->bdg_port_index[j];
668f9790aebSLuigi Rizzo 		vpna = b->bdg_ports[i];
669f9790aebSLuigi Rizzo 		// KASSERT(na != NULL);
670847bf383SLuigi Rizzo 		ND("checking %s", vpna->up.name);
6714bf50f18SLuigi Rizzo 		if (!strcmp(vpna->up.name, nr_name)) {
672f9790aebSLuigi Rizzo 			netmap_adapter_get(&vpna->up);
6734bf50f18SLuigi Rizzo 			ND("found existing if %s refs %d", nr_name)
6744bf50f18SLuigi Rizzo 			*na = &vpna->up;
675f9790aebSLuigi Rizzo 			return 0;
676f9790aebSLuigi Rizzo 		}
677f9790aebSLuigi Rizzo 	}
678f9790aebSLuigi Rizzo 	/* not found, should we create it? */
679f9790aebSLuigi Rizzo 	if (!create)
680f9790aebSLuigi Rizzo 		return ENXIO;
681f9790aebSLuigi Rizzo 	/* yes we should, see if we have space to attach entries */
682f9790aebSLuigi Rizzo 	needed = 2; /* in some cases we only need 1 */
683f9790aebSLuigi Rizzo 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
684f9790aebSLuigi Rizzo 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
685f2637526SLuigi Rizzo 		return ENOMEM;
686f9790aebSLuigi Rizzo 	}
687f9790aebSLuigi Rizzo 	/* record the next two ports available, but do not allocate yet */
688f9790aebSLuigi Rizzo 	cand = b->bdg_port_index[b->bdg_active_ports];
689f9790aebSLuigi Rizzo 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
690f9790aebSLuigi Rizzo 	ND("+++ bridge %s port %s used %d avail %d %d",
6914bf50f18SLuigi Rizzo 		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
692f9790aebSLuigi Rizzo 
693f9790aebSLuigi Rizzo 	/*
694f9790aebSLuigi Rizzo 	 * try see if there is a matching NIC with this name
695f9790aebSLuigi Rizzo 	 * (after the bridge's name)
696f9790aebSLuigi Rizzo 	 */
6974bf50f18SLuigi Rizzo 	ifname = nr_name + b->bdg_namelen + 1;
6984bf50f18SLuigi Rizzo 	ifp = ifunit_ref(ifname);
6994bf50f18SLuigi Rizzo 	if (!ifp) {
7004bf50f18SLuigi Rizzo 		/* Create an ephemeral virtual port
7014bf50f18SLuigi Rizzo 		 * This block contains all the ephemeral-specific logics
7024bf50f18SLuigi Rizzo 		 */
703f9790aebSLuigi Rizzo 		if (nmr->nr_cmd) {
704f9790aebSLuigi Rizzo 			/* nr_cmd must be 0 for a virtual port */
705f9790aebSLuigi Rizzo 			return EINVAL;
706f9790aebSLuigi Rizzo 		}
707f9790aebSLuigi Rizzo 
708f9790aebSLuigi Rizzo 		/* bdg_netmap_attach creates a struct netmap_adapter */
7094bf50f18SLuigi Rizzo 		error = netmap_vp_create(nmr, NULL, &vpna);
710f9790aebSLuigi Rizzo 		if (error) {
711f9790aebSLuigi Rizzo 			D("error %d", error);
712f9790aebSLuigi Rizzo 			free(ifp, M_DEVBUF);
713f9790aebSLuigi Rizzo 			return error;
714f9790aebSLuigi Rizzo 		}
7154bf50f18SLuigi Rizzo 		/* shortcut - we can skip get_hw_na(),
7164bf50f18SLuigi Rizzo 		 * ownership check and nm_bdg_attach()
7174bf50f18SLuigi Rizzo 		 */
7184bf50f18SLuigi Rizzo 	} else {
7194bf50f18SLuigi Rizzo 		struct netmap_adapter *hw;
720f9790aebSLuigi Rizzo 
7214bf50f18SLuigi Rizzo 		error = netmap_get_hw_na(ifp, &hw);
7224bf50f18SLuigi Rizzo 		if (error || hw == NULL)
723f9790aebSLuigi Rizzo 			goto out;
724f9790aebSLuigi Rizzo 
7254bf50f18SLuigi Rizzo 		/* host adapter might not be created */
7264bf50f18SLuigi Rizzo 		error = hw->nm_bdg_attach(nr_name, hw);
7274bf50f18SLuigi Rizzo 		if (error)
728f9790aebSLuigi Rizzo 			goto out;
7294bf50f18SLuigi Rizzo 		vpna = hw->na_vp;
7304bf50f18SLuigi Rizzo 		hostna = hw->na_hostvp;
7314bf50f18SLuigi Rizzo 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
7324bf50f18SLuigi Rizzo 			hostna = NULL;
733f9790aebSLuigi Rizzo 	}
734f9790aebSLuigi Rizzo 
735f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
736f9790aebSLuigi Rizzo 	vpna->bdg_port = cand;
737f9790aebSLuigi Rizzo 	ND("NIC  %p to bridge port %d", vpna, cand);
738f9790aebSLuigi Rizzo 	/* bind the port to the bridge (virtual ports are not active) */
739f9790aebSLuigi Rizzo 	b->bdg_ports[cand] = vpna;
740f9790aebSLuigi Rizzo 	vpna->na_bdg = b;
741f9790aebSLuigi Rizzo 	b->bdg_active_ports++;
7424bf50f18SLuigi Rizzo 	if (hostna != NULL) {
743f9790aebSLuigi Rizzo 		/* also bind the host stack to the bridge */
744f9790aebSLuigi Rizzo 		b->bdg_ports[cand2] = hostna;
745f9790aebSLuigi Rizzo 		hostna->bdg_port = cand2;
746f9790aebSLuigi Rizzo 		hostna->na_bdg = b;
747f9790aebSLuigi Rizzo 		b->bdg_active_ports++;
748f9790aebSLuigi Rizzo 		ND("host %p to bridge port %d", hostna, cand2);
749f9790aebSLuigi Rizzo 	}
7504bf50f18SLuigi Rizzo 	ND("if %s refs %d", ifname, vpna->up.na_refcount);
751f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
7524bf50f18SLuigi Rizzo 	*na = &vpna->up;
7534bf50f18SLuigi Rizzo 	netmap_adapter_get(*na);
754f9790aebSLuigi Rizzo 	return 0;
755f9790aebSLuigi Rizzo 
756f9790aebSLuigi Rizzo out:
757f9790aebSLuigi Rizzo 	if_rele(ifp);
758f9790aebSLuigi Rizzo 
759f9790aebSLuigi Rizzo 	return error;
760f9790aebSLuigi Rizzo }
761f9790aebSLuigi Rizzo 
762f9790aebSLuigi Rizzo 
7634bf50f18SLuigi Rizzo /* Process NETMAP_BDG_ATTACH */
764f9790aebSLuigi Rizzo static int
7654bf50f18SLuigi Rizzo nm_bdg_ctl_attach(struct nmreq *nmr)
766f9790aebSLuigi Rizzo {
767f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
768f9790aebSLuigi Rizzo 	int error;
769f9790aebSLuigi Rizzo 
770f9790aebSLuigi Rizzo 	NMG_LOCK();
771f2637526SLuigi Rizzo 
77217885a7bSLuigi Rizzo 	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
7734bf50f18SLuigi Rizzo 	if (error) /* no device */
774f9790aebSLuigi Rizzo 		goto unlock_exit;
775f2637526SLuigi Rizzo 
77617885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
777f9790aebSLuigi Rizzo 		error = EINVAL;
77817885a7bSLuigi Rizzo 		goto unlock_exit;
779f9790aebSLuigi Rizzo 	}
780f9790aebSLuigi Rizzo 
7814bf50f18SLuigi Rizzo 	if (NETMAP_OWNED_BY_ANY(na)) {
782f9790aebSLuigi Rizzo 		error = EBUSY;
783f9790aebSLuigi Rizzo 		goto unref_exit;
784f9790aebSLuigi Rizzo 	}
785f9790aebSLuigi Rizzo 
7864bf50f18SLuigi Rizzo 	if (na->nm_bdg_ctl) {
7874bf50f18SLuigi Rizzo 		/* nop for VALE ports. The bwrap needs to put the hwna
7884bf50f18SLuigi Rizzo 		 * in netmap mode (see netmap_bwrap_bdg_ctl)
7894bf50f18SLuigi Rizzo 		 */
7904bf50f18SLuigi Rizzo 		error = na->nm_bdg_ctl(na, nmr, 1);
7914bf50f18SLuigi Rizzo 		if (error)
792f9790aebSLuigi Rizzo 			goto unref_exit;
7934bf50f18SLuigi Rizzo 		ND("registered %s to netmap-mode", na->name);
794f9790aebSLuigi Rizzo 	}
795f9790aebSLuigi Rizzo 	NMG_UNLOCK();
796f9790aebSLuigi Rizzo 	return 0;
797f9790aebSLuigi Rizzo 
798f9790aebSLuigi Rizzo unref_exit:
799f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
800f9790aebSLuigi Rizzo unlock_exit:
801f9790aebSLuigi Rizzo 	NMG_UNLOCK();
802f9790aebSLuigi Rizzo 	return error;
803f9790aebSLuigi Rizzo }
804f9790aebSLuigi Rizzo 
805*37e3a6d3SLuigi Rizzo static inline int
806*37e3a6d3SLuigi Rizzo nm_is_bwrap(struct netmap_adapter *na)
807*37e3a6d3SLuigi Rizzo {
808*37e3a6d3SLuigi Rizzo 	return na->nm_register == netmap_bwrap_reg;
809*37e3a6d3SLuigi Rizzo }
81017885a7bSLuigi Rizzo 
8114bf50f18SLuigi Rizzo /* process NETMAP_BDG_DETACH */
812f9790aebSLuigi Rizzo static int
8134bf50f18SLuigi Rizzo nm_bdg_ctl_detach(struct nmreq *nmr)
814f9790aebSLuigi Rizzo {
815f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
816f9790aebSLuigi Rizzo 	int error;
817f9790aebSLuigi Rizzo 
818f9790aebSLuigi Rizzo 	NMG_LOCK();
81917885a7bSLuigi Rizzo 	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
820f9790aebSLuigi Rizzo 	if (error) { /* no device, or another bridge or user owns the device */
821f9790aebSLuigi Rizzo 		goto unlock_exit;
822f9790aebSLuigi Rizzo 	}
823f2637526SLuigi Rizzo 
82417885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
825f9790aebSLuigi Rizzo 		error = EINVAL;
82617885a7bSLuigi Rizzo 		goto unlock_exit;
827*37e3a6d3SLuigi Rizzo 	} else if (nm_is_bwrap(na) &&
828*37e3a6d3SLuigi Rizzo 		   ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
829*37e3a6d3SLuigi Rizzo 		/* Don't detach a NIC with polling */
830*37e3a6d3SLuigi Rizzo 		error = EBUSY;
831*37e3a6d3SLuigi Rizzo 		netmap_adapter_put(na);
832*37e3a6d3SLuigi Rizzo 		goto unlock_exit;
833f9790aebSLuigi Rizzo 	}
8344bf50f18SLuigi Rizzo 	if (na->nm_bdg_ctl) {
8354bf50f18SLuigi Rizzo 		/* remove the port from bridge. The bwrap
8364bf50f18SLuigi Rizzo 		 * also needs to put the hwna in normal mode
8374bf50f18SLuigi Rizzo 		 */
8384bf50f18SLuigi Rizzo 		error = na->nm_bdg_ctl(na, nmr, 0);
839f9790aebSLuigi Rizzo 	}
840f9790aebSLuigi Rizzo 
841f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
842f9790aebSLuigi Rizzo unlock_exit:
843f9790aebSLuigi Rizzo 	NMG_UNLOCK();
844f9790aebSLuigi Rizzo 	return error;
845f9790aebSLuigi Rizzo 
846f9790aebSLuigi Rizzo }
847f9790aebSLuigi Rizzo 
848*37e3a6d3SLuigi Rizzo struct nm_bdg_polling_state;
849*37e3a6d3SLuigi Rizzo struct
850*37e3a6d3SLuigi Rizzo nm_bdg_kthread {
851*37e3a6d3SLuigi Rizzo 	struct nm_kthread *nmk;
852*37e3a6d3SLuigi Rizzo 	u_int qfirst;
853*37e3a6d3SLuigi Rizzo 	u_int qlast;
854*37e3a6d3SLuigi Rizzo 	struct nm_bdg_polling_state *bps;
855*37e3a6d3SLuigi Rizzo };
856*37e3a6d3SLuigi Rizzo 
857*37e3a6d3SLuigi Rizzo struct nm_bdg_polling_state {
858*37e3a6d3SLuigi Rizzo 	bool configured;
859*37e3a6d3SLuigi Rizzo 	bool stopped;
860*37e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
861*37e3a6d3SLuigi Rizzo 	u_int reg;
862*37e3a6d3SLuigi Rizzo 	u_int qfirst;
863*37e3a6d3SLuigi Rizzo 	u_int qlast;
864*37e3a6d3SLuigi Rizzo 	u_int cpu_from;
865*37e3a6d3SLuigi Rizzo 	u_int ncpus;
866*37e3a6d3SLuigi Rizzo 	struct nm_bdg_kthread *kthreads;
867*37e3a6d3SLuigi Rizzo };
868*37e3a6d3SLuigi Rizzo 
869*37e3a6d3SLuigi Rizzo static void
870*37e3a6d3SLuigi Rizzo netmap_bwrap_polling(void *data)
871*37e3a6d3SLuigi Rizzo {
872*37e3a6d3SLuigi Rizzo 	struct nm_bdg_kthread *nbk = data;
873*37e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
874*37e3a6d3SLuigi Rizzo 	u_int qfirst, qlast, i;
875*37e3a6d3SLuigi Rizzo 	struct netmap_kring *kring0, *kring;
876*37e3a6d3SLuigi Rizzo 
877*37e3a6d3SLuigi Rizzo 	if (!nbk)
878*37e3a6d3SLuigi Rizzo 		return;
879*37e3a6d3SLuigi Rizzo 	qfirst = nbk->qfirst;
880*37e3a6d3SLuigi Rizzo 	qlast = nbk->qlast;
881*37e3a6d3SLuigi Rizzo 	bna = nbk->bps->bna;
882*37e3a6d3SLuigi Rizzo 	kring0 = NMR(bna->hwna, NR_RX);
883*37e3a6d3SLuigi Rizzo 
884*37e3a6d3SLuigi Rizzo 	for (i = qfirst; i < qlast; i++) {
885*37e3a6d3SLuigi Rizzo 		kring = kring0 + i;
886*37e3a6d3SLuigi Rizzo 		kring->nm_notify(kring, 0);
887*37e3a6d3SLuigi Rizzo 	}
888*37e3a6d3SLuigi Rizzo }
889*37e3a6d3SLuigi Rizzo 
890*37e3a6d3SLuigi Rizzo static int
891*37e3a6d3SLuigi Rizzo nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
892*37e3a6d3SLuigi Rizzo {
893*37e3a6d3SLuigi Rizzo 	struct nm_kthread_cfg kcfg;
894*37e3a6d3SLuigi Rizzo 	int i, j;
895*37e3a6d3SLuigi Rizzo 
896*37e3a6d3SLuigi Rizzo 	bps->kthreads = malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus,
897*37e3a6d3SLuigi Rizzo 				M_DEVBUF, M_NOWAIT | M_ZERO);
898*37e3a6d3SLuigi Rizzo 	if (bps->kthreads == NULL)
899*37e3a6d3SLuigi Rizzo 		return ENOMEM;
900*37e3a6d3SLuigi Rizzo 
901*37e3a6d3SLuigi Rizzo 	bzero(&kcfg, sizeof(kcfg));
902*37e3a6d3SLuigi Rizzo 	kcfg.worker_fn = netmap_bwrap_polling;
903*37e3a6d3SLuigi Rizzo 	for (i = 0; i < bps->ncpus; i++) {
904*37e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
905*37e3a6d3SLuigi Rizzo 		int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC);
906*37e3a6d3SLuigi Rizzo 		int affinity = bps->cpu_from + i;
907*37e3a6d3SLuigi Rizzo 
908*37e3a6d3SLuigi Rizzo 		t->bps = bps;
909*37e3a6d3SLuigi Rizzo 		t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
910*37e3a6d3SLuigi Rizzo 		t->qlast = all ? bps->qlast : t->qfirst + 1;
911*37e3a6d3SLuigi Rizzo 		D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
912*37e3a6d3SLuigi Rizzo 			t->qlast);
913*37e3a6d3SLuigi Rizzo 
914*37e3a6d3SLuigi Rizzo 		kcfg.type = i;
915*37e3a6d3SLuigi Rizzo 		kcfg.worker_private = t;
916*37e3a6d3SLuigi Rizzo 		t->nmk = nm_os_kthread_create(&kcfg);
917*37e3a6d3SLuigi Rizzo 		if (t->nmk == NULL) {
918*37e3a6d3SLuigi Rizzo 			goto cleanup;
919*37e3a6d3SLuigi Rizzo 		}
920*37e3a6d3SLuigi Rizzo 		nm_os_kthread_set_affinity(t->nmk, affinity);
921*37e3a6d3SLuigi Rizzo 	}
922*37e3a6d3SLuigi Rizzo 	return 0;
923*37e3a6d3SLuigi Rizzo 
924*37e3a6d3SLuigi Rizzo cleanup:
925*37e3a6d3SLuigi Rizzo 	for (j = 0; j < i; j++) {
926*37e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
927*37e3a6d3SLuigi Rizzo 		nm_os_kthread_delete(t->nmk);
928*37e3a6d3SLuigi Rizzo 	}
929*37e3a6d3SLuigi Rizzo 	free(bps->kthreads, M_DEVBUF);
930*37e3a6d3SLuigi Rizzo 	return EFAULT;
931*37e3a6d3SLuigi Rizzo }
932*37e3a6d3SLuigi Rizzo 
933*37e3a6d3SLuigi Rizzo /* a version of ptnetmap_start_kthreads() */
934*37e3a6d3SLuigi Rizzo static int
935*37e3a6d3SLuigi Rizzo nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
936*37e3a6d3SLuigi Rizzo {
937*37e3a6d3SLuigi Rizzo 	int error, i, j;
938*37e3a6d3SLuigi Rizzo 
939*37e3a6d3SLuigi Rizzo 	if (!bps) {
940*37e3a6d3SLuigi Rizzo 		D("polling is not configured");
941*37e3a6d3SLuigi Rizzo 		return EFAULT;
942*37e3a6d3SLuigi Rizzo 	}
943*37e3a6d3SLuigi Rizzo 	bps->stopped = false;
944*37e3a6d3SLuigi Rizzo 
945*37e3a6d3SLuigi Rizzo 	for (i = 0; i < bps->ncpus; i++) {
946*37e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
947*37e3a6d3SLuigi Rizzo 		error = nm_os_kthread_start(t->nmk);
948*37e3a6d3SLuigi Rizzo 		if (error) {
949*37e3a6d3SLuigi Rizzo 			D("error in nm_kthread_start()");
950*37e3a6d3SLuigi Rizzo 			goto cleanup;
951*37e3a6d3SLuigi Rizzo 		}
952*37e3a6d3SLuigi Rizzo 	}
953*37e3a6d3SLuigi Rizzo 	return 0;
954*37e3a6d3SLuigi Rizzo 
955*37e3a6d3SLuigi Rizzo cleanup:
956*37e3a6d3SLuigi Rizzo 	for (j = 0; j < i; j++) {
957*37e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
958*37e3a6d3SLuigi Rizzo 		nm_os_kthread_stop(t->nmk);
959*37e3a6d3SLuigi Rizzo 	}
960*37e3a6d3SLuigi Rizzo 	bps->stopped = true;
961*37e3a6d3SLuigi Rizzo 	return error;
962*37e3a6d3SLuigi Rizzo }
963*37e3a6d3SLuigi Rizzo 
964*37e3a6d3SLuigi Rizzo static void
965*37e3a6d3SLuigi Rizzo nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
966*37e3a6d3SLuigi Rizzo {
967*37e3a6d3SLuigi Rizzo 	int i;
968*37e3a6d3SLuigi Rizzo 
969*37e3a6d3SLuigi Rizzo 	if (!bps)
970*37e3a6d3SLuigi Rizzo 		return;
971*37e3a6d3SLuigi Rizzo 
972*37e3a6d3SLuigi Rizzo 	for (i = 0; i < bps->ncpus; i++) {
973*37e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
974*37e3a6d3SLuigi Rizzo 		nm_os_kthread_stop(t->nmk);
975*37e3a6d3SLuigi Rizzo 		nm_os_kthread_delete(t->nmk);
976*37e3a6d3SLuigi Rizzo 	}
977*37e3a6d3SLuigi Rizzo 	bps->stopped = true;
978*37e3a6d3SLuigi Rizzo }
979*37e3a6d3SLuigi Rizzo 
980*37e3a6d3SLuigi Rizzo static int
981*37e3a6d3SLuigi Rizzo get_polling_cfg(struct nmreq *nmr, struct netmap_adapter *na,
982*37e3a6d3SLuigi Rizzo 			struct nm_bdg_polling_state *bps)
983*37e3a6d3SLuigi Rizzo {
984*37e3a6d3SLuigi Rizzo 	int req_cpus, avail_cpus, core_from;
985*37e3a6d3SLuigi Rizzo 	u_int reg, i, qfirst, qlast;
986*37e3a6d3SLuigi Rizzo 
987*37e3a6d3SLuigi Rizzo 	avail_cpus = nm_os_ncpus();
988*37e3a6d3SLuigi Rizzo 	req_cpus = nmr->nr_arg1;
989*37e3a6d3SLuigi Rizzo 
990*37e3a6d3SLuigi Rizzo 	if (req_cpus == 0) {
991*37e3a6d3SLuigi Rizzo 		D("req_cpus must be > 0");
992*37e3a6d3SLuigi Rizzo 		return EINVAL;
993*37e3a6d3SLuigi Rizzo 	} else if (req_cpus >= avail_cpus) {
994*37e3a6d3SLuigi Rizzo 		D("for safety, we need at least one core left in the system");
995*37e3a6d3SLuigi Rizzo 		return EINVAL;
996*37e3a6d3SLuigi Rizzo 	}
997*37e3a6d3SLuigi Rizzo 	reg = nmr->nr_flags & NR_REG_MASK;
998*37e3a6d3SLuigi Rizzo 	i = nmr->nr_ringid & NETMAP_RING_MASK;
999*37e3a6d3SLuigi Rizzo 	/*
1000*37e3a6d3SLuigi Rizzo 	 * ONE_NIC: dedicate one core to one ring. If multiple cores
1001*37e3a6d3SLuigi Rizzo 	 *          are specified, consecutive rings are also polled.
1002*37e3a6d3SLuigi Rizzo 	 *          For example, if ringid=2 and 2 cores are given,
1003*37e3a6d3SLuigi Rizzo 	 *          ring 2 and 3 are polled by core 2 and 3, respectively.
1004*37e3a6d3SLuigi Rizzo 	 * ALL_NIC: poll all the rings using a core specified by ringid.
1005*37e3a6d3SLuigi Rizzo 	 *          the number of cores must be 1.
1006*37e3a6d3SLuigi Rizzo 	 */
1007*37e3a6d3SLuigi Rizzo 	if (reg == NR_REG_ONE_NIC) {
1008*37e3a6d3SLuigi Rizzo 		if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
1009*37e3a6d3SLuigi Rizzo 			D("only %d rings exist (ring %u-%u is given)",
1010*37e3a6d3SLuigi Rizzo 				nma_get_nrings(na, NR_RX), i, i+req_cpus);
1011*37e3a6d3SLuigi Rizzo 			return EINVAL;
1012*37e3a6d3SLuigi Rizzo 		}
1013*37e3a6d3SLuigi Rizzo 		qfirst = i;
1014*37e3a6d3SLuigi Rizzo 		qlast = qfirst + req_cpus;
1015*37e3a6d3SLuigi Rizzo 		core_from = qfirst;
1016*37e3a6d3SLuigi Rizzo 	} else if (reg == NR_REG_ALL_NIC) {
1017*37e3a6d3SLuigi Rizzo 		if (req_cpus != 1) {
1018*37e3a6d3SLuigi Rizzo 			D("ncpus must be 1 not %d for REG_ALL_NIC", req_cpus);
1019*37e3a6d3SLuigi Rizzo 			return EINVAL;
1020*37e3a6d3SLuigi Rizzo 		}
1021*37e3a6d3SLuigi Rizzo 		qfirst = 0;
1022*37e3a6d3SLuigi Rizzo 		qlast = nma_get_nrings(na, NR_RX);
1023*37e3a6d3SLuigi Rizzo 		core_from = i;
1024*37e3a6d3SLuigi Rizzo 	} else {
1025*37e3a6d3SLuigi Rizzo 		D("reg must be ALL_NIC or ONE_NIC");
1026*37e3a6d3SLuigi Rizzo 		return EINVAL;
1027*37e3a6d3SLuigi Rizzo 	}
1028*37e3a6d3SLuigi Rizzo 
1029*37e3a6d3SLuigi Rizzo 	bps->reg = reg;
1030*37e3a6d3SLuigi Rizzo 	bps->qfirst = qfirst;
1031*37e3a6d3SLuigi Rizzo 	bps->qlast = qlast;
1032*37e3a6d3SLuigi Rizzo 	bps->cpu_from = core_from;
1033*37e3a6d3SLuigi Rizzo 	bps->ncpus = req_cpus;
1034*37e3a6d3SLuigi Rizzo 	D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
1035*37e3a6d3SLuigi Rizzo 		reg == NR_REG_ALL_NIC ? "REG_ALL_NIC" : "REG_ONE_NIC",
1036*37e3a6d3SLuigi Rizzo 		qfirst, qlast, core_from, req_cpus);
1037*37e3a6d3SLuigi Rizzo 	return 0;
1038*37e3a6d3SLuigi Rizzo }
1039*37e3a6d3SLuigi Rizzo 
1040*37e3a6d3SLuigi Rizzo static int
1041*37e3a6d3SLuigi Rizzo nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
1042*37e3a6d3SLuigi Rizzo {
1043*37e3a6d3SLuigi Rizzo 	struct nm_bdg_polling_state *bps;
1044*37e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
1045*37e3a6d3SLuigi Rizzo 	int error;
1046*37e3a6d3SLuigi Rizzo 
1047*37e3a6d3SLuigi Rizzo 	bna = (struct netmap_bwrap_adapter *)na;
1048*37e3a6d3SLuigi Rizzo 	if (bna->na_polling_state) {
1049*37e3a6d3SLuigi Rizzo 		D("ERROR adapter already in polling mode");
1050*37e3a6d3SLuigi Rizzo 		return EFAULT;
1051*37e3a6d3SLuigi Rizzo 	}
1052*37e3a6d3SLuigi Rizzo 
1053*37e3a6d3SLuigi Rizzo 	bps = malloc(sizeof(*bps), M_DEVBUF, M_NOWAIT | M_ZERO);
1054*37e3a6d3SLuigi Rizzo 	if (!bps)
1055*37e3a6d3SLuigi Rizzo 		return ENOMEM;
1056*37e3a6d3SLuigi Rizzo 	bps->configured = false;
1057*37e3a6d3SLuigi Rizzo 	bps->stopped = true;
1058*37e3a6d3SLuigi Rizzo 
1059*37e3a6d3SLuigi Rizzo 	if (get_polling_cfg(nmr, na, bps)) {
1060*37e3a6d3SLuigi Rizzo 		free(bps, M_DEVBUF);
1061*37e3a6d3SLuigi Rizzo 		return EINVAL;
1062*37e3a6d3SLuigi Rizzo 	}
1063*37e3a6d3SLuigi Rizzo 
1064*37e3a6d3SLuigi Rizzo 	if (nm_bdg_create_kthreads(bps)) {
1065*37e3a6d3SLuigi Rizzo 		free(bps, M_DEVBUF);
1066*37e3a6d3SLuigi Rizzo 		return EFAULT;
1067*37e3a6d3SLuigi Rizzo 	}
1068*37e3a6d3SLuigi Rizzo 
1069*37e3a6d3SLuigi Rizzo 	bps->configured = true;
1070*37e3a6d3SLuigi Rizzo 	bna->na_polling_state = bps;
1071*37e3a6d3SLuigi Rizzo 	bps->bna = bna;
1072*37e3a6d3SLuigi Rizzo 
1073*37e3a6d3SLuigi Rizzo 	/* disable interrupt if possible */
1074*37e3a6d3SLuigi Rizzo 	if (bna->hwna->nm_intr)
1075*37e3a6d3SLuigi Rizzo 		bna->hwna->nm_intr(bna->hwna, 0);
1076*37e3a6d3SLuigi Rizzo 	/* start kthread now */
1077*37e3a6d3SLuigi Rizzo 	error = nm_bdg_polling_start_kthreads(bps);
1078*37e3a6d3SLuigi Rizzo 	if (error) {
1079*37e3a6d3SLuigi Rizzo 		D("ERROR nm_bdg_polling_start_kthread()");
1080*37e3a6d3SLuigi Rizzo 		free(bps->kthreads, M_DEVBUF);
1081*37e3a6d3SLuigi Rizzo 		free(bps, M_DEVBUF);
1082*37e3a6d3SLuigi Rizzo 		bna->na_polling_state = NULL;
1083*37e3a6d3SLuigi Rizzo 		if (bna->hwna->nm_intr)
1084*37e3a6d3SLuigi Rizzo 			bna->hwna->nm_intr(bna->hwna, 1);
1085*37e3a6d3SLuigi Rizzo 	}
1086*37e3a6d3SLuigi Rizzo 	return error;
1087*37e3a6d3SLuigi Rizzo }
1088*37e3a6d3SLuigi Rizzo 
1089*37e3a6d3SLuigi Rizzo static int
1090*37e3a6d3SLuigi Rizzo nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na)
1091*37e3a6d3SLuigi Rizzo {
1092*37e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
1093*37e3a6d3SLuigi Rizzo 	struct nm_bdg_polling_state *bps;
1094*37e3a6d3SLuigi Rizzo 
1095*37e3a6d3SLuigi Rizzo 	if (!bna->na_polling_state) {
1096*37e3a6d3SLuigi Rizzo 		D("ERROR adapter is not in polling mode");
1097*37e3a6d3SLuigi Rizzo 		return EFAULT;
1098*37e3a6d3SLuigi Rizzo 	}
1099*37e3a6d3SLuigi Rizzo 	bps = bna->na_polling_state;
1100*37e3a6d3SLuigi Rizzo 	nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
1101*37e3a6d3SLuigi Rizzo 	bps->configured = false;
1102*37e3a6d3SLuigi Rizzo 	free(bps, M_DEVBUF);
1103*37e3a6d3SLuigi Rizzo 	bna->na_polling_state = NULL;
1104*37e3a6d3SLuigi Rizzo 	/* reenable interrupt */
1105*37e3a6d3SLuigi Rizzo 	if (bna->hwna->nm_intr)
1106*37e3a6d3SLuigi Rizzo 		bna->hwna->nm_intr(bna->hwna, 1);
1107*37e3a6d3SLuigi Rizzo 	return 0;
1108*37e3a6d3SLuigi Rizzo }
1109f9790aebSLuigi Rizzo 
11104bf50f18SLuigi Rizzo /* Called by either user's context (netmap_ioctl())
11114bf50f18SLuigi Rizzo  * or external kernel modules (e.g., Openvswitch).
11124bf50f18SLuigi Rizzo  * Operation is indicated in nmr->nr_cmd.
11134bf50f18SLuigi Rizzo  * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
11144bf50f18SLuigi Rizzo  * requires bdg_ops argument; the other commands ignore this argument.
11154bf50f18SLuigi Rizzo  *
1116f9790aebSLuigi Rizzo  * Called without NMG_LOCK.
1117f9790aebSLuigi Rizzo  */
1118f9790aebSLuigi Rizzo int
11194bf50f18SLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
1120f9790aebSLuigi Rizzo {
1121847bf383SLuigi Rizzo 	struct nm_bridge *b, *bridges;
1122f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
1123f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
1124f9790aebSLuigi Rizzo 	char *name = nmr->nr_name;
1125f9790aebSLuigi Rizzo 	int cmd = nmr->nr_cmd, namelen = strlen(name);
1126f9790aebSLuigi Rizzo 	int error = 0, i, j;
1127847bf383SLuigi Rizzo 	u_int num_bridges;
1128847bf383SLuigi Rizzo 
1129847bf383SLuigi Rizzo 	netmap_bns_getbridges(&bridges, &num_bridges);
1130f9790aebSLuigi Rizzo 
1131f9790aebSLuigi Rizzo 	switch (cmd) {
11324bf50f18SLuigi Rizzo 	case NETMAP_BDG_NEWIF:
11334bf50f18SLuigi Rizzo 		error = nm_vi_create(nmr);
11344bf50f18SLuigi Rizzo 		break;
11354bf50f18SLuigi Rizzo 
11364bf50f18SLuigi Rizzo 	case NETMAP_BDG_DELIF:
11374bf50f18SLuigi Rizzo 		error = nm_vi_destroy(nmr->nr_name);
11384bf50f18SLuigi Rizzo 		break;
11394bf50f18SLuigi Rizzo 
1140f9790aebSLuigi Rizzo 	case NETMAP_BDG_ATTACH:
11414bf50f18SLuigi Rizzo 		error = nm_bdg_ctl_attach(nmr);
1142f9790aebSLuigi Rizzo 		break;
1143f9790aebSLuigi Rizzo 
1144f9790aebSLuigi Rizzo 	case NETMAP_BDG_DETACH:
11454bf50f18SLuigi Rizzo 		error = nm_bdg_ctl_detach(nmr);
1146f9790aebSLuigi Rizzo 		break;
1147f9790aebSLuigi Rizzo 
1148f9790aebSLuigi Rizzo 	case NETMAP_BDG_LIST:
1149f9790aebSLuigi Rizzo 		/* this is used to enumerate bridges and ports */
1150f9790aebSLuigi Rizzo 		if (namelen) { /* look up indexes of bridge and port */
1151*37e3a6d3SLuigi Rizzo 			if (strncmp(name, NM_BDG_NAME, strlen(NM_BDG_NAME))) {
1152f9790aebSLuigi Rizzo 				error = EINVAL;
1153f9790aebSLuigi Rizzo 				break;
1154f9790aebSLuigi Rizzo 			}
1155f9790aebSLuigi Rizzo 			NMG_LOCK();
1156f9790aebSLuigi Rizzo 			b = nm_find_bridge(name, 0 /* don't create */);
1157f9790aebSLuigi Rizzo 			if (!b) {
1158f9790aebSLuigi Rizzo 				error = ENOENT;
1159f9790aebSLuigi Rizzo 				NMG_UNLOCK();
1160f9790aebSLuigi Rizzo 				break;
1161f9790aebSLuigi Rizzo 			}
1162f9790aebSLuigi Rizzo 
1163*37e3a6d3SLuigi Rizzo 			error = 0;
1164*37e3a6d3SLuigi Rizzo 			nmr->nr_arg1 = b - bridges; /* bridge index */
1165*37e3a6d3SLuigi Rizzo 			nmr->nr_arg2 = NM_BDG_NOPORT;
1166f9790aebSLuigi Rizzo 			for (j = 0; j < b->bdg_active_ports; j++) {
1167f9790aebSLuigi Rizzo 				i = b->bdg_port_index[j];
1168f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[i];
1169f9790aebSLuigi Rizzo 				if (vpna == NULL) {
1170f9790aebSLuigi Rizzo 					D("---AAAAAAAAARGH-------");
1171f9790aebSLuigi Rizzo 					continue;
1172f9790aebSLuigi Rizzo 				}
1173f9790aebSLuigi Rizzo 				/* the former and the latter identify a
1174f9790aebSLuigi Rizzo 				 * virtual port and a NIC, respectively
1175f9790aebSLuigi Rizzo 				 */
11764bf50f18SLuigi Rizzo 				if (!strcmp(vpna->up.name, name)) {
1177f9790aebSLuigi Rizzo 					nmr->nr_arg2 = i; /* port index */
1178f9790aebSLuigi Rizzo 					break;
1179f9790aebSLuigi Rizzo 				}
1180f9790aebSLuigi Rizzo 			}
1181f9790aebSLuigi Rizzo 			NMG_UNLOCK();
1182f9790aebSLuigi Rizzo 		} else {
1183f9790aebSLuigi Rizzo 			/* return the first non-empty entry starting from
1184f9790aebSLuigi Rizzo 			 * bridge nr_arg1 and port nr_arg2.
1185f9790aebSLuigi Rizzo 			 *
1186f9790aebSLuigi Rizzo 			 * Users can detect the end of the same bridge by
1187f9790aebSLuigi Rizzo 			 * seeing the new and old value of nr_arg1, and can
1188f9790aebSLuigi Rizzo 			 * detect the end of all the bridge by error != 0
1189f9790aebSLuigi Rizzo 			 */
1190f9790aebSLuigi Rizzo 			i = nmr->nr_arg1;
1191f9790aebSLuigi Rizzo 			j = nmr->nr_arg2;
1192f9790aebSLuigi Rizzo 
1193f9790aebSLuigi Rizzo 			NMG_LOCK();
1194f9790aebSLuigi Rizzo 			for (error = ENOENT; i < NM_BRIDGES; i++) {
1195847bf383SLuigi Rizzo 				b = bridges + i;
1196f9790aebSLuigi Rizzo 				if (j >= b->bdg_active_ports) {
1197f9790aebSLuigi Rizzo 					j = 0; /* following bridges scan from 0 */
1198f9790aebSLuigi Rizzo 					continue;
1199f9790aebSLuigi Rizzo 				}
1200f9790aebSLuigi Rizzo 				nmr->nr_arg1 = i;
1201f9790aebSLuigi Rizzo 				nmr->nr_arg2 = j;
1202f9790aebSLuigi Rizzo 				j = b->bdg_port_index[j];
1203f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[j];
12044bf50f18SLuigi Rizzo 				strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
1205f9790aebSLuigi Rizzo 				error = 0;
1206f9790aebSLuigi Rizzo 				break;
1207f9790aebSLuigi Rizzo 			}
1208f9790aebSLuigi Rizzo 			NMG_UNLOCK();
1209f9790aebSLuigi Rizzo 		}
1210f9790aebSLuigi Rizzo 		break;
1211f9790aebSLuigi Rizzo 
12124bf50f18SLuigi Rizzo 	case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
12134bf50f18SLuigi Rizzo 		/* register callbacks to the given bridge.
1214f9790aebSLuigi Rizzo 		 * nmr->nr_name may be just bridge's name (including ':'
1215f9790aebSLuigi Rizzo 		 * if it is not just NM_NAME).
1216f9790aebSLuigi Rizzo 		 */
12174bf50f18SLuigi Rizzo 		if (!bdg_ops) {
1218f9790aebSLuigi Rizzo 			error = EINVAL;
1219f9790aebSLuigi Rizzo 			break;
1220f9790aebSLuigi Rizzo 		}
1221f9790aebSLuigi Rizzo 		NMG_LOCK();
1222f9790aebSLuigi Rizzo 		b = nm_find_bridge(name, 0 /* don't create */);
1223f9790aebSLuigi Rizzo 		if (!b) {
1224f9790aebSLuigi Rizzo 			error = EINVAL;
1225f9790aebSLuigi Rizzo 		} else {
12264bf50f18SLuigi Rizzo 			b->bdg_ops = *bdg_ops;
1227f9790aebSLuigi Rizzo 		}
1228f9790aebSLuigi Rizzo 		NMG_UNLOCK();
1229f9790aebSLuigi Rizzo 		break;
1230f9790aebSLuigi Rizzo 
1231f0ea3689SLuigi Rizzo 	case NETMAP_BDG_VNET_HDR:
1232f0ea3689SLuigi Rizzo 		/* Valid lengths for the virtio-net header are 0 (no header),
1233f0ea3689SLuigi Rizzo 		   10 and 12. */
1234f0ea3689SLuigi Rizzo 		if (nmr->nr_arg1 != 0 &&
1235f0ea3689SLuigi Rizzo 			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
1236f0ea3689SLuigi Rizzo 				nmr->nr_arg1 != 12) {
1237f0ea3689SLuigi Rizzo 			error = EINVAL;
1238f0ea3689SLuigi Rizzo 			break;
1239f0ea3689SLuigi Rizzo 		}
1240f9790aebSLuigi Rizzo 		NMG_LOCK();
1241f9790aebSLuigi Rizzo 		error = netmap_get_bdg_na(nmr, &na, 0);
124217885a7bSLuigi Rizzo 		if (na && !error) {
1243f9790aebSLuigi Rizzo 			vpna = (struct netmap_vp_adapter *)na;
1244*37e3a6d3SLuigi Rizzo 			na->virt_hdr_len = nmr->nr_arg1;
1245*37e3a6d3SLuigi Rizzo 			if (na->virt_hdr_len) {
12464bf50f18SLuigi Rizzo 				vpna->mfs = NETMAP_BUF_SIZE(na);
1247*37e3a6d3SLuigi Rizzo 			}
1248*37e3a6d3SLuigi Rizzo 			D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
1249*37e3a6d3SLuigi Rizzo 			netmap_adapter_put(na);
1250*37e3a6d3SLuigi Rizzo 		} else if (!na) {
1251*37e3a6d3SLuigi Rizzo 			error = ENXIO;
1252*37e3a6d3SLuigi Rizzo 		}
1253*37e3a6d3SLuigi Rizzo 		NMG_UNLOCK();
1254*37e3a6d3SLuigi Rizzo 		break;
1255*37e3a6d3SLuigi Rizzo 
1256*37e3a6d3SLuigi Rizzo 	case NETMAP_BDG_POLLING_ON:
1257*37e3a6d3SLuigi Rizzo 	case NETMAP_BDG_POLLING_OFF:
1258*37e3a6d3SLuigi Rizzo 		NMG_LOCK();
1259*37e3a6d3SLuigi Rizzo 		error = netmap_get_bdg_na(nmr, &na, 0);
1260*37e3a6d3SLuigi Rizzo 		if (na && !error) {
1261*37e3a6d3SLuigi Rizzo 			if (!nm_is_bwrap(na)) {
1262*37e3a6d3SLuigi Rizzo 				error = EOPNOTSUPP;
1263*37e3a6d3SLuigi Rizzo 			} else if (cmd == NETMAP_BDG_POLLING_ON) {
1264*37e3a6d3SLuigi Rizzo 				error = nm_bdg_ctl_polling_start(nmr, na);
1265*37e3a6d3SLuigi Rizzo 				if (!error)
1266*37e3a6d3SLuigi Rizzo 					netmap_adapter_get(na);
1267*37e3a6d3SLuigi Rizzo 			} else {
1268*37e3a6d3SLuigi Rizzo 				error = nm_bdg_ctl_polling_stop(nmr, na);
1269*37e3a6d3SLuigi Rizzo 				if (!error)
1270*37e3a6d3SLuigi Rizzo 					netmap_adapter_put(na);
1271*37e3a6d3SLuigi Rizzo 			}
127217885a7bSLuigi Rizzo 			netmap_adapter_put(na);
1273f9790aebSLuigi Rizzo 		}
1274f9790aebSLuigi Rizzo 		NMG_UNLOCK();
1275f9790aebSLuigi Rizzo 		break;
1276f9790aebSLuigi Rizzo 
1277f9790aebSLuigi Rizzo 	default:
1278f9790aebSLuigi Rizzo 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
1279f9790aebSLuigi Rizzo 		error = EINVAL;
1280f9790aebSLuigi Rizzo 		break;
1281f9790aebSLuigi Rizzo 	}
1282f9790aebSLuigi Rizzo 	return error;
1283f9790aebSLuigi Rizzo }
1284f9790aebSLuigi Rizzo 
12854bf50f18SLuigi Rizzo int
12864bf50f18SLuigi Rizzo netmap_bdg_config(struct nmreq *nmr)
12874bf50f18SLuigi Rizzo {
12884bf50f18SLuigi Rizzo 	struct nm_bridge *b;
12894bf50f18SLuigi Rizzo 	int error = EINVAL;
12904bf50f18SLuigi Rizzo 
12914bf50f18SLuigi Rizzo 	NMG_LOCK();
12924bf50f18SLuigi Rizzo 	b = nm_find_bridge(nmr->nr_name, 0);
12934bf50f18SLuigi Rizzo 	if (!b) {
12944bf50f18SLuigi Rizzo 		NMG_UNLOCK();
12954bf50f18SLuigi Rizzo 		return error;
12964bf50f18SLuigi Rizzo 	}
12974bf50f18SLuigi Rizzo 	NMG_UNLOCK();
12984bf50f18SLuigi Rizzo 	/* Don't call config() with NMG_LOCK() held */
12994bf50f18SLuigi Rizzo 	BDG_RLOCK(b);
13004bf50f18SLuigi Rizzo 	if (b->bdg_ops.config != NULL)
13014bf50f18SLuigi Rizzo 		error = b->bdg_ops.config((struct nm_ifreq *)nmr);
13024bf50f18SLuigi Rizzo 	BDG_RUNLOCK(b);
13034bf50f18SLuigi Rizzo 	return error;
13044bf50f18SLuigi Rizzo }
13054bf50f18SLuigi Rizzo 
13064bf50f18SLuigi Rizzo 
13074bf50f18SLuigi Rizzo /* nm_krings_create callback for VALE ports.
13084bf50f18SLuigi Rizzo  * Calls the standard netmap_krings_create, then adds leases on rx
13094bf50f18SLuigi Rizzo  * rings and bdgfwd on tx rings.
13104bf50f18SLuigi Rizzo  */
1311f9790aebSLuigi Rizzo static int
1312f9790aebSLuigi Rizzo netmap_vp_krings_create(struct netmap_adapter *na)
1313f9790aebSLuigi Rizzo {
1314f0ea3689SLuigi Rizzo 	u_int tailroom;
1315f9790aebSLuigi Rizzo 	int error, i;
1316f9790aebSLuigi Rizzo 	uint32_t *leases;
1317847bf383SLuigi Rizzo 	u_int nrx = netmap_real_rings(na, NR_RX);
1318f9790aebSLuigi Rizzo 
1319f9790aebSLuigi Rizzo 	/*
1320f9790aebSLuigi Rizzo 	 * Leases are attached to RX rings on vale ports
1321f9790aebSLuigi Rizzo 	 */
1322f9790aebSLuigi Rizzo 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
1323f9790aebSLuigi Rizzo 
1324f0ea3689SLuigi Rizzo 	error = netmap_krings_create(na, tailroom);
1325f9790aebSLuigi Rizzo 	if (error)
1326f9790aebSLuigi Rizzo 		return error;
1327f9790aebSLuigi Rizzo 
1328f9790aebSLuigi Rizzo 	leases = na->tailroom;
1329f9790aebSLuigi Rizzo 
1330f9790aebSLuigi Rizzo 	for (i = 0; i < nrx; i++) { /* Receive rings */
1331f9790aebSLuigi Rizzo 		na->rx_rings[i].nkr_leases = leases;
1332f9790aebSLuigi Rizzo 		leases += na->num_rx_desc;
1333f9790aebSLuigi Rizzo 	}
1334f9790aebSLuigi Rizzo 
1335f9790aebSLuigi Rizzo 	error = nm_alloc_bdgfwd(na);
1336f9790aebSLuigi Rizzo 	if (error) {
1337f9790aebSLuigi Rizzo 		netmap_krings_delete(na);
1338f9790aebSLuigi Rizzo 		return error;
1339f9790aebSLuigi Rizzo 	}
1340f9790aebSLuigi Rizzo 
1341f9790aebSLuigi Rizzo 	return 0;
1342f9790aebSLuigi Rizzo }
1343f9790aebSLuigi Rizzo 
134417885a7bSLuigi Rizzo 
13454bf50f18SLuigi Rizzo /* nm_krings_delete callback for VALE ports. */
1346f9790aebSLuigi Rizzo static void
1347f9790aebSLuigi Rizzo netmap_vp_krings_delete(struct netmap_adapter *na)
1348f9790aebSLuigi Rizzo {
1349f9790aebSLuigi Rizzo 	nm_free_bdgfwd(na);
1350f9790aebSLuigi Rizzo 	netmap_krings_delete(na);
1351f9790aebSLuigi Rizzo }
1352f9790aebSLuigi Rizzo 
1353f9790aebSLuigi Rizzo 
1354f9790aebSLuigi Rizzo static int
1355f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
1356f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *na, u_int ring_nr);
1357f9790aebSLuigi Rizzo 
1358f9790aebSLuigi Rizzo 
1359f9790aebSLuigi Rizzo /*
13604bf50f18SLuigi Rizzo  * main dispatch routine for the bridge.
1361f9790aebSLuigi Rizzo  * Grab packets from a kring, move them into the ft structure
1362f9790aebSLuigi Rizzo  * associated to the tx (input) port. Max one instance per port,
1363f9790aebSLuigi Rizzo  * filtered on input (ioctl, poll or XXX).
1364f9790aebSLuigi Rizzo  * Returns the next position in the ring.
1365f9790aebSLuigi Rizzo  */
1366f9790aebSLuigi Rizzo static int
13674bf50f18SLuigi Rizzo nm_bdg_preflush(struct netmap_kring *kring, u_int end)
1368f9790aebSLuigi Rizzo {
13694bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
13704bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter*)kring->na;
1371f9790aebSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
1372f9790aebSLuigi Rizzo 	struct nm_bdg_fwd *ft;
13734bf50f18SLuigi Rizzo 	u_int ring_nr = kring->ring_id;
1374f9790aebSLuigi Rizzo 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
1375f9790aebSLuigi Rizzo 	u_int ft_i = 0;	/* start from 0 */
1376f9790aebSLuigi Rizzo 	u_int frags = 1; /* how many frags ? */
1377f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
1378f9790aebSLuigi Rizzo 
1379f9790aebSLuigi Rizzo 	/* To protect against modifications to the bridge we acquire a
1380f9790aebSLuigi Rizzo 	 * shared lock, waiting if we can sleep (if the source port is
1381f9790aebSLuigi Rizzo 	 * attached to a user process) or with a trylock otherwise (NICs).
1382f9790aebSLuigi Rizzo 	 */
1383f9790aebSLuigi Rizzo 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1384f9790aebSLuigi Rizzo 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
1385f9790aebSLuigi Rizzo 		BDG_RLOCK(b);
1386f9790aebSLuigi Rizzo 	else if (!BDG_RTRYLOCK(b))
1387f9790aebSLuigi Rizzo 		return 0;
1388f9790aebSLuigi Rizzo 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1389f9790aebSLuigi Rizzo 	ft = kring->nkr_ft;
1390f9790aebSLuigi Rizzo 
1391f9790aebSLuigi Rizzo 	for (; likely(j != end); j = nm_next(j, lim)) {
1392f9790aebSLuigi Rizzo 		struct netmap_slot *slot = &ring->slot[j];
1393f9790aebSLuigi Rizzo 		char *buf;
1394f9790aebSLuigi Rizzo 
1395f9790aebSLuigi Rizzo 		ft[ft_i].ft_len = slot->len;
1396f9790aebSLuigi Rizzo 		ft[ft_i].ft_flags = slot->flags;
1397f9790aebSLuigi Rizzo 
1398f9790aebSLuigi Rizzo 		ND("flags is 0x%x", slot->flags);
1399847bf383SLuigi Rizzo 		/* we do not use the buf changed flag, but we still need to reset it */
1400847bf383SLuigi Rizzo 		slot->flags &= ~NS_BUF_CHANGED;
1401847bf383SLuigi Rizzo 
1402f9790aebSLuigi Rizzo 		/* this slot goes into a list so initialize the link field */
1403f9790aebSLuigi Rizzo 		ft[ft_i].ft_next = NM_FT_NULL;
1404f9790aebSLuigi Rizzo 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
14054bf50f18SLuigi Rizzo 			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
1406e31c6ec7SLuigi Rizzo 		if (unlikely(buf == NULL)) {
1407e31c6ec7SLuigi Rizzo 			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
1408e31c6ec7SLuigi Rizzo 				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
1409e31c6ec7SLuigi Rizzo 				kring->name, j, ft[ft_i].ft_len);
14104bf50f18SLuigi Rizzo 			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
1411e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_len = 0;
1412e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_flags = 0;
1413e31c6ec7SLuigi Rizzo 		}
14142e159ef0SLuigi Rizzo 		__builtin_prefetch(buf);
1415f9790aebSLuigi Rizzo 		++ft_i;
1416f9790aebSLuigi Rizzo 		if (slot->flags & NS_MOREFRAG) {
1417f9790aebSLuigi Rizzo 			frags++;
1418f9790aebSLuigi Rizzo 			continue;
1419f9790aebSLuigi Rizzo 		}
1420f9790aebSLuigi Rizzo 		if (unlikely(netmap_verbose && frags > 1))
1421f9790aebSLuigi Rizzo 			RD(5, "%d frags at %d", frags, ft_i - frags);
1422f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
1423f9790aebSLuigi Rizzo 		frags = 1;
1424f9790aebSLuigi Rizzo 		if (unlikely((int)ft_i >= bridge_batch))
1425f9790aebSLuigi Rizzo 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1426f9790aebSLuigi Rizzo 	}
1427f9790aebSLuigi Rizzo 	if (frags > 1) {
1428*37e3a6d3SLuigi Rizzo 		/* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
1429*37e3a6d3SLuigi Rizzo 		 * have to fix frags count. */
1430*37e3a6d3SLuigi Rizzo 		frags--;
1431*37e3a6d3SLuigi Rizzo 		ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
1432*37e3a6d3SLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
1433*37e3a6d3SLuigi Rizzo 		D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1434f9790aebSLuigi Rizzo 	}
1435f9790aebSLuigi Rizzo 	if (ft_i)
1436f9790aebSLuigi Rizzo 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1437f9790aebSLuigi Rizzo 	BDG_RUNLOCK(b);
1438f9790aebSLuigi Rizzo 	return j;
1439f9790aebSLuigi Rizzo }
1440f9790aebSLuigi Rizzo 
1441f9790aebSLuigi Rizzo 
1442f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */
1443f9790aebSLuigi Rizzo 
1444f9790aebSLuigi Rizzo /*
1445f9790aebSLuigi Rizzo  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1446f9790aebSLuigi Rizzo  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1447f9790aebSLuigi Rizzo  *
1448f9790aebSLuigi Rizzo  * http://www.burtleburtle.net/bob/hash/spooky.html
1449f9790aebSLuigi Rizzo  */
1450f9790aebSLuigi Rizzo #define mix(a, b, c)                                                    \
1451f9790aebSLuigi Rizzo do {                                                                    \
1452f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 13);                                 \
1453f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 8);                                  \
1454f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 13);                                 \
1455f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 12);                                 \
1456f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 16);                                 \
1457f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 5);                                  \
1458f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 3);                                  \
1459f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 10);                                 \
1460f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 15);                                 \
1461f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0)
1462f9790aebSLuigi Rizzo 
146317885a7bSLuigi Rizzo 
1464f9790aebSLuigi Rizzo static __inline uint32_t
1465f9790aebSLuigi Rizzo nm_bridge_rthash(const uint8_t *addr)
1466f9790aebSLuigi Rizzo {
1467f9790aebSLuigi Rizzo         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1468f9790aebSLuigi Rizzo 
1469f9790aebSLuigi Rizzo         b += addr[5] << 8;
1470f9790aebSLuigi Rizzo         b += addr[4];
1471f9790aebSLuigi Rizzo         a += addr[3] << 24;
1472f9790aebSLuigi Rizzo         a += addr[2] << 16;
1473f9790aebSLuigi Rizzo         a += addr[1] << 8;
1474f9790aebSLuigi Rizzo         a += addr[0];
1475f9790aebSLuigi Rizzo 
1476f9790aebSLuigi Rizzo         mix(a, b, c);
1477f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1478f9790aebSLuigi Rizzo         return (c & BRIDGE_RTHASH_MASK);
1479f9790aebSLuigi Rizzo }
1480f9790aebSLuigi Rizzo 
1481f9790aebSLuigi Rizzo #undef mix
1482f9790aebSLuigi Rizzo 
1483f9790aebSLuigi Rizzo 
14844bf50f18SLuigi Rizzo /* nm_register callback for VALE ports */
1485f9790aebSLuigi Rizzo static int
14864bf50f18SLuigi Rizzo netmap_vp_reg(struct netmap_adapter *na, int onoff)
1487f9790aebSLuigi Rizzo {
1488f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna =
1489f9790aebSLuigi Rizzo 		(struct netmap_vp_adapter*)na;
1490*37e3a6d3SLuigi Rizzo 	enum txrx t;
1491*37e3a6d3SLuigi Rizzo 	int i;
1492f9790aebSLuigi Rizzo 
14934bf50f18SLuigi Rizzo 	/* persistent ports may be put in netmap mode
14944bf50f18SLuigi Rizzo 	 * before being attached to a bridge
1495f9790aebSLuigi Rizzo 	 */
14964bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
1497f9790aebSLuigi Rizzo 		BDG_WLOCK(vpna->na_bdg);
1498f9790aebSLuigi Rizzo 	if (onoff) {
1499*37e3a6d3SLuigi Rizzo 		for_rx_tx(t) {
1500*37e3a6d3SLuigi Rizzo 			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
1501*37e3a6d3SLuigi Rizzo 				struct netmap_kring *kring = &NMR(na, t)[i];
1502*37e3a6d3SLuigi Rizzo 
1503*37e3a6d3SLuigi Rizzo 				if (nm_kring_pending_on(kring))
1504*37e3a6d3SLuigi Rizzo 					kring->nr_mode = NKR_NETMAP_ON;
1505*37e3a6d3SLuigi Rizzo 			}
1506*37e3a6d3SLuigi Rizzo 		}
1507*37e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
15084bf50f18SLuigi Rizzo 			na->na_flags |= NAF_NETMAP_ON;
15094bf50f18SLuigi Rizzo 		 /* XXX on FreeBSD, persistent VALE ports should also
15104bf50f18SLuigi Rizzo 		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
15114bf50f18SLuigi Rizzo 		 */
1512f9790aebSLuigi Rizzo 	} else {
1513*37e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
15144bf50f18SLuigi Rizzo 			na->na_flags &= ~NAF_NETMAP_ON;
1515*37e3a6d3SLuigi Rizzo 		for_rx_tx(t) {
1516*37e3a6d3SLuigi Rizzo 			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
1517*37e3a6d3SLuigi Rizzo 				struct netmap_kring *kring = &NMR(na, t)[i];
1518*37e3a6d3SLuigi Rizzo 
1519*37e3a6d3SLuigi Rizzo 				if (nm_kring_pending_off(kring))
1520*37e3a6d3SLuigi Rizzo 					kring->nr_mode = NKR_NETMAP_OFF;
1521*37e3a6d3SLuigi Rizzo 			}
1522*37e3a6d3SLuigi Rizzo 		}
1523f9790aebSLuigi Rizzo 	}
15244bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
1525f9790aebSLuigi Rizzo 		BDG_WUNLOCK(vpna->na_bdg);
1526f9790aebSLuigi Rizzo 	return 0;
1527f9790aebSLuigi Rizzo }
1528f9790aebSLuigi Rizzo 
1529f9790aebSLuigi Rizzo 
1530f9790aebSLuigi Rizzo /*
1531f9790aebSLuigi Rizzo  * Lookup function for a learning bridge.
1532f9790aebSLuigi Rizzo  * Update the hash table with the source address,
1533f9790aebSLuigi Rizzo  * and then returns the destination port index, and the
1534f9790aebSLuigi Rizzo  * ring in *dst_ring (at the moment, always use ring 0)
1535f9790aebSLuigi Rizzo  */
1536f9790aebSLuigi Rizzo u_int
15374bf50f18SLuigi Rizzo netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1538847bf383SLuigi Rizzo 		struct netmap_vp_adapter *na)
1539f9790aebSLuigi Rizzo {
15404bf50f18SLuigi Rizzo 	uint8_t *buf = ft->ft_buf;
15414bf50f18SLuigi Rizzo 	u_int buf_len = ft->ft_len;
1542f9790aebSLuigi Rizzo 	struct nm_hash_ent *ht = na->na_bdg->ht;
1543f9790aebSLuigi Rizzo 	uint32_t sh, dh;
1544f9790aebSLuigi Rizzo 	u_int dst, mysrc = na->bdg_port;
1545f9790aebSLuigi Rizzo 	uint64_t smac, dmac;
1546*37e3a6d3SLuigi Rizzo 	uint8_t indbuf[12];
1547f9790aebSLuigi Rizzo 
15484bf50f18SLuigi Rizzo 	/* safety check, unfortunately we have many cases */
1549*37e3a6d3SLuigi Rizzo 	if (buf_len >= 14 + na->up.virt_hdr_len) {
15504bf50f18SLuigi Rizzo 		/* virthdr + mac_hdr in the same slot */
1551*37e3a6d3SLuigi Rizzo 		buf += na->up.virt_hdr_len;
1552*37e3a6d3SLuigi Rizzo 		buf_len -= na->up.virt_hdr_len;
1553*37e3a6d3SLuigi Rizzo 	} else if (buf_len == na->up.virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
15544bf50f18SLuigi Rizzo 		/* only header in first fragment */
15554bf50f18SLuigi Rizzo 		ft++;
15564bf50f18SLuigi Rizzo 		buf = ft->ft_buf;
15574bf50f18SLuigi Rizzo 		buf_len = ft->ft_len;
15584bf50f18SLuigi Rizzo 	} else {
15594bf50f18SLuigi Rizzo 		RD(5, "invalid buf format, length %d", buf_len);
1560f9790aebSLuigi Rizzo 		return NM_BDG_NOPORT;
1561f9790aebSLuigi Rizzo 	}
1562*37e3a6d3SLuigi Rizzo 
1563*37e3a6d3SLuigi Rizzo 	if (ft->ft_flags & NS_INDIRECT) {
1564*37e3a6d3SLuigi Rizzo 		if (copyin(buf, indbuf, sizeof(indbuf))) {
1565*37e3a6d3SLuigi Rizzo 			return NM_BDG_NOPORT;
1566*37e3a6d3SLuigi Rizzo 		}
1567*37e3a6d3SLuigi Rizzo 		buf = indbuf;
1568*37e3a6d3SLuigi Rizzo 	}
1569*37e3a6d3SLuigi Rizzo 
1570f9790aebSLuigi Rizzo 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1571f9790aebSLuigi Rizzo 	smac = le64toh(*(uint64_t *)(buf + 4));
1572f9790aebSLuigi Rizzo 	smac >>= 16;
1573f9790aebSLuigi Rizzo 
1574f9790aebSLuigi Rizzo 	/*
1575f9790aebSLuigi Rizzo 	 * The hash is somewhat expensive, there might be some
1576f9790aebSLuigi Rizzo 	 * worthwhile optimizations here.
1577f9790aebSLuigi Rizzo 	 */
1578847bf383SLuigi Rizzo 	if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
1579f9790aebSLuigi Rizzo 		uint8_t *s = buf+6;
1580f9790aebSLuigi Rizzo 		sh = nm_bridge_rthash(s); // XXX hash of source
1581f9790aebSLuigi Rizzo 		/* update source port forwarding entry */
1582847bf383SLuigi Rizzo 		na->last_smac = ht[sh].mac = smac;	/* XXX expire ? */
1583f9790aebSLuigi Rizzo 		ht[sh].ports = mysrc;
1584f9790aebSLuigi Rizzo 		if (netmap_verbose)
1585f9790aebSLuigi Rizzo 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1586f9790aebSLuigi Rizzo 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1587f9790aebSLuigi Rizzo 	}
1588f9790aebSLuigi Rizzo 	dst = NM_BDG_BROADCAST;
1589f9790aebSLuigi Rizzo 	if ((buf[0] & 1) == 0) { /* unicast */
1590f9790aebSLuigi Rizzo 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1591f9790aebSLuigi Rizzo 		if (ht[dh].mac == dmac) {	/* found dst */
1592f9790aebSLuigi Rizzo 			dst = ht[dh].ports;
1593f9790aebSLuigi Rizzo 		}
1594f9790aebSLuigi Rizzo 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1595f9790aebSLuigi Rizzo 	}
1596f9790aebSLuigi Rizzo 	return dst;
1597f9790aebSLuigi Rizzo }
1598f9790aebSLuigi Rizzo 
1599f9790aebSLuigi Rizzo 
1600f9790aebSLuigi Rizzo /*
160117885a7bSLuigi Rizzo  * Available space in the ring. Only used in VALE code
160217885a7bSLuigi Rizzo  * and only with is_rx = 1
160317885a7bSLuigi Rizzo  */
160417885a7bSLuigi Rizzo static inline uint32_t
160517885a7bSLuigi Rizzo nm_kr_space(struct netmap_kring *k, int is_rx)
160617885a7bSLuigi Rizzo {
160717885a7bSLuigi Rizzo 	int space;
160817885a7bSLuigi Rizzo 
160917885a7bSLuigi Rizzo 	if (is_rx) {
161017885a7bSLuigi Rizzo 		int busy = k->nkr_hwlease - k->nr_hwcur;
161117885a7bSLuigi Rizzo 		if (busy < 0)
161217885a7bSLuigi Rizzo 			busy += k->nkr_num_slots;
161317885a7bSLuigi Rizzo 		space = k->nkr_num_slots - 1 - busy;
161417885a7bSLuigi Rizzo 	} else {
161517885a7bSLuigi Rizzo 		/* XXX never used in this branch */
161617885a7bSLuigi Rizzo 		space = k->nr_hwtail - k->nkr_hwlease;
161717885a7bSLuigi Rizzo 		if (space < 0)
161817885a7bSLuigi Rizzo 			space += k->nkr_num_slots;
161917885a7bSLuigi Rizzo 	}
162017885a7bSLuigi Rizzo #if 0
162117885a7bSLuigi Rizzo 	// sanity check
162217885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
162317885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
162417885a7bSLuigi Rizzo 		k->nr_tail >= k->nkr_num_slots ||
162517885a7bSLuigi Rizzo 		busy < 0 ||
162617885a7bSLuigi Rizzo 		busy >= k->nkr_num_slots) {
162717885a7bSLuigi Rizzo 		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
162817885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
162917885a7bSLuigi Rizzo 	}
163017885a7bSLuigi Rizzo #endif
163117885a7bSLuigi Rizzo 	return space;
163217885a7bSLuigi Rizzo }
163317885a7bSLuigi Rizzo 
163417885a7bSLuigi Rizzo 
163517885a7bSLuigi Rizzo 
163617885a7bSLuigi Rizzo 
163717885a7bSLuigi Rizzo /* make a lease on the kring for N positions. return the
163817885a7bSLuigi Rizzo  * lease index
163917885a7bSLuigi Rizzo  * XXX only used in VALE code and with is_rx = 1
164017885a7bSLuigi Rizzo  */
164117885a7bSLuigi Rizzo static inline uint32_t
164217885a7bSLuigi Rizzo nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
164317885a7bSLuigi Rizzo {
164417885a7bSLuigi Rizzo 	uint32_t lim = k->nkr_num_slots - 1;
164517885a7bSLuigi Rizzo 	uint32_t lease_idx = k->nkr_lease_idx;
164617885a7bSLuigi Rizzo 
164717885a7bSLuigi Rizzo 	k->nkr_leases[lease_idx] = NR_NOSLOT;
164817885a7bSLuigi Rizzo 	k->nkr_lease_idx = nm_next(lease_idx, lim);
164917885a7bSLuigi Rizzo 
165017885a7bSLuigi Rizzo 	if (n > nm_kr_space(k, is_rx)) {
165117885a7bSLuigi Rizzo 		D("invalid request for %d slots", n);
165217885a7bSLuigi Rizzo 		panic("x");
165317885a7bSLuigi Rizzo 	}
165417885a7bSLuigi Rizzo 	/* XXX verify that there are n slots */
165517885a7bSLuigi Rizzo 	k->nkr_hwlease += n;
165617885a7bSLuigi Rizzo 	if (k->nkr_hwlease > lim)
165717885a7bSLuigi Rizzo 		k->nkr_hwlease -= lim + 1;
165817885a7bSLuigi Rizzo 
165917885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
166017885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
166117885a7bSLuigi Rizzo 		k->nr_hwtail >= k->nkr_num_slots ||
166217885a7bSLuigi Rizzo 		k->nkr_lease_idx >= k->nkr_num_slots) {
166317885a7bSLuigi Rizzo 		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
16644bf50f18SLuigi Rizzo 			k->na->name,
166517885a7bSLuigi Rizzo 			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
166617885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
166717885a7bSLuigi Rizzo 	}
166817885a7bSLuigi Rizzo 	return lease_idx;
166917885a7bSLuigi Rizzo }
167017885a7bSLuigi Rizzo 
167117885a7bSLuigi Rizzo /*
16724bf50f18SLuigi Rizzo  *
1673f9790aebSLuigi Rizzo  * This flush routine supports only unicast and broadcast but a large
1674f9790aebSLuigi Rizzo  * number of ports, and lets us replace the learn and dispatch functions.
1675f9790aebSLuigi Rizzo  */
1676f9790aebSLuigi Rizzo int
1677f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1678f9790aebSLuigi Rizzo 		u_int ring_nr)
1679f9790aebSLuigi Rizzo {
1680f9790aebSLuigi Rizzo 	struct nm_bdg_q *dst_ents, *brddst;
1681f9790aebSLuigi Rizzo 	uint16_t num_dsts = 0, *dsts;
1682f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
1683*37e3a6d3SLuigi Rizzo 	u_int i, me = na->bdg_port;
1684f9790aebSLuigi Rizzo 
1685f9790aebSLuigi Rizzo 	/*
1686f9790aebSLuigi Rizzo 	 * The work area (pointed by ft) is followed by an array of
1687f9790aebSLuigi Rizzo 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1688f9790aebSLuigi Rizzo 	 * queues per port plus one for the broadcast traffic.
1689f9790aebSLuigi Rizzo 	 * Then we have an array of destination indexes.
1690f9790aebSLuigi Rizzo 	 */
1691f9790aebSLuigi Rizzo 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1692f9790aebSLuigi Rizzo 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1693f9790aebSLuigi Rizzo 
1694f9790aebSLuigi Rizzo 	/* first pass: find a destination for each packet in the batch */
1695f9790aebSLuigi Rizzo 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1696f9790aebSLuigi Rizzo 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1697f9790aebSLuigi Rizzo 		uint16_t dst_port, d_i;
1698f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1699f9790aebSLuigi Rizzo 
1700f9790aebSLuigi Rizzo 		ND("slot %d frags %d", i, ft[i].ft_frags);
1701f0ea3689SLuigi Rizzo 		/* Drop the packet if the virtio-net header is not into the first
1702f9790aebSLuigi Rizzo 		   fragment nor at the very beginning of the second. */
1703*37e3a6d3SLuigi Rizzo 		if (unlikely(na->up.virt_hdr_len > ft[i].ft_len))
1704f9790aebSLuigi Rizzo 			continue;
17054bf50f18SLuigi Rizzo 		dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
1706f9790aebSLuigi Rizzo 		if (netmap_verbose > 255)
1707f9790aebSLuigi Rizzo 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1708f9790aebSLuigi Rizzo 		if (dst_port == NM_BDG_NOPORT)
1709f9790aebSLuigi Rizzo 			continue; /* this packet is identified to be dropped */
1710f9790aebSLuigi Rizzo 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1711f9790aebSLuigi Rizzo 			continue;
1712f9790aebSLuigi Rizzo 		else if (dst_port == NM_BDG_BROADCAST)
1713f9790aebSLuigi Rizzo 			dst_ring = 0; /* broadcasts always go to ring 0 */
1714f9790aebSLuigi Rizzo 		else if (unlikely(dst_port == me ||
1715f9790aebSLuigi Rizzo 		    !b->bdg_ports[dst_port]))
1716f9790aebSLuigi Rizzo 			continue;
1717f9790aebSLuigi Rizzo 
1718f9790aebSLuigi Rizzo 		/* get a position in the scratch pad */
1719f9790aebSLuigi Rizzo 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1720f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1721f9790aebSLuigi Rizzo 
1722f9790aebSLuigi Rizzo 		/* append the first fragment to the list */
1723f9790aebSLuigi Rizzo 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1724f9790aebSLuigi Rizzo 			d->bq_head = d->bq_tail = i;
1725f9790aebSLuigi Rizzo 			/* remember this position to be scanned later */
1726f9790aebSLuigi Rizzo 			if (dst_port != NM_BDG_BROADCAST)
1727f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1728f9790aebSLuigi Rizzo 		} else {
1729f9790aebSLuigi Rizzo 			ft[d->bq_tail].ft_next = i;
1730f9790aebSLuigi Rizzo 			d->bq_tail = i;
1731f9790aebSLuigi Rizzo 		}
1732f9790aebSLuigi Rizzo 		d->bq_len += ft[i].ft_frags;
1733f9790aebSLuigi Rizzo 	}
1734f9790aebSLuigi Rizzo 
1735f9790aebSLuigi Rizzo 	/*
1736f9790aebSLuigi Rizzo 	 * Broadcast traffic goes to ring 0 on all destinations.
1737f9790aebSLuigi Rizzo 	 * So we need to add these rings to the list of ports to scan.
1738f9790aebSLuigi Rizzo 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1739f9790aebSLuigi Rizzo 	 * expensive. We should keep a compact list of active destinations
1740f9790aebSLuigi Rizzo 	 * so we could shorten this loop.
1741f9790aebSLuigi Rizzo 	 */
1742f9790aebSLuigi Rizzo 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1743f9790aebSLuigi Rizzo 	if (brddst->bq_head != NM_FT_NULL) {
1744*37e3a6d3SLuigi Rizzo 		u_int j;
1745f9790aebSLuigi Rizzo 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1746f9790aebSLuigi Rizzo 			uint16_t d_i;
1747f9790aebSLuigi Rizzo 			i = b->bdg_port_index[j];
1748f9790aebSLuigi Rizzo 			if (unlikely(i == me))
1749f9790aebSLuigi Rizzo 				continue;
1750f9790aebSLuigi Rizzo 			d_i = i * NM_BDG_MAXRINGS;
1751f9790aebSLuigi Rizzo 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1752f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1753f9790aebSLuigi Rizzo 		}
1754f9790aebSLuigi Rizzo 	}
1755f9790aebSLuigi Rizzo 
1756f9790aebSLuigi Rizzo 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
17574bf50f18SLuigi Rizzo 	/* second pass: scan destinations */
1758f9790aebSLuigi Rizzo 	for (i = 0; i < num_dsts; i++) {
1759f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *dst_na;
1760f9790aebSLuigi Rizzo 		struct netmap_kring *kring;
1761f9790aebSLuigi Rizzo 		struct netmap_ring *ring;
1762f0ea3689SLuigi Rizzo 		u_int dst_nr, lim, j, d_i, next, brd_next;
1763f9790aebSLuigi Rizzo 		u_int needed, howmany;
1764f9790aebSLuigi Rizzo 		int retry = netmap_txsync_retry;
1765f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1766f9790aebSLuigi Rizzo 		uint32_t my_start = 0, lease_idx = 0;
1767f9790aebSLuigi Rizzo 		int nrings;
1768f0ea3689SLuigi Rizzo 		int virt_hdr_mismatch = 0;
1769f9790aebSLuigi Rizzo 
1770f9790aebSLuigi Rizzo 		d_i = dsts[i];
1771f9790aebSLuigi Rizzo 		ND("second pass %d port %d", i, d_i);
1772f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1773f9790aebSLuigi Rizzo 		// XXX fix the division
1774f9790aebSLuigi Rizzo 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1775f9790aebSLuigi Rizzo 		/* protect from the lookup function returning an inactive
1776f9790aebSLuigi Rizzo 		 * destination port
1777f9790aebSLuigi Rizzo 		 */
1778f9790aebSLuigi Rizzo 		if (unlikely(dst_na == NULL))
1779f9790aebSLuigi Rizzo 			goto cleanup;
1780f9790aebSLuigi Rizzo 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1781f9790aebSLuigi Rizzo 			goto cleanup;
1782f9790aebSLuigi Rizzo 		/*
1783f9790aebSLuigi Rizzo 		 * The interface may be in !netmap mode in two cases:
1784f9790aebSLuigi Rizzo 		 * - when na is attached but not activated yet;
1785f9790aebSLuigi Rizzo 		 * - when na is being deactivated but is still attached.
1786f9790aebSLuigi Rizzo 		 */
17874bf50f18SLuigi Rizzo 		if (unlikely(!nm_netmap_on(&dst_na->up))) {
1788f9790aebSLuigi Rizzo 			ND("not in netmap mode!");
1789f9790aebSLuigi Rizzo 			goto cleanup;
1790f9790aebSLuigi Rizzo 		}
1791f9790aebSLuigi Rizzo 
1792f9790aebSLuigi Rizzo 		/* there is at least one either unicast or broadcast packet */
1793f9790aebSLuigi Rizzo 		brd_next = brddst->bq_head;
1794f9790aebSLuigi Rizzo 		next = d->bq_head;
1795f9790aebSLuigi Rizzo 		/* we need to reserve this many slots. If fewer are
1796f9790aebSLuigi Rizzo 		 * available, some packets will be dropped.
1797f9790aebSLuigi Rizzo 		 * Packets may have multiple fragments, so we may not use
1798f9790aebSLuigi Rizzo 		 * there is a chance that we may not use all of the slots
1799f9790aebSLuigi Rizzo 		 * we have claimed, so we will need to handle the leftover
1800f9790aebSLuigi Rizzo 		 * ones when we regain the lock.
1801f9790aebSLuigi Rizzo 		 */
1802f9790aebSLuigi Rizzo 		needed = d->bq_len + brddst->bq_len;
1803f9790aebSLuigi Rizzo 
1804*37e3a6d3SLuigi Rizzo 		if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
1805*37e3a6d3SLuigi Rizzo 			RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
1806*37e3a6d3SLuigi Rizzo 			      dst_na->up.virt_hdr_len);
1807f0ea3689SLuigi Rizzo 			/* There is a virtio-net header/offloadings mismatch between
1808f0ea3689SLuigi Rizzo 			 * source and destination. The slower mismatch datapath will
1809f0ea3689SLuigi Rizzo 			 * be used to cope with all the mismatches.
1810f0ea3689SLuigi Rizzo 			 */
1811f0ea3689SLuigi Rizzo 			virt_hdr_mismatch = 1;
1812f0ea3689SLuigi Rizzo 			if (dst_na->mfs < na->mfs) {
1813f0ea3689SLuigi Rizzo 				/* We may need to do segmentation offloadings, and so
1814f0ea3689SLuigi Rizzo 				 * we may need a number of destination slots greater
1815f0ea3689SLuigi Rizzo 				 * than the number of input slots ('needed').
1816f0ea3689SLuigi Rizzo 				 * We look for the smallest integer 'x' which satisfies:
1817f0ea3689SLuigi Rizzo 				 *	needed * na->mfs + x * H <= x * na->mfs
1818f0ea3689SLuigi Rizzo 				 * where 'H' is the length of the longest header that may
1819f0ea3689SLuigi Rizzo 				 * be replicated in the segmentation process (e.g. for
1820f0ea3689SLuigi Rizzo 				 * TCPv4 we must account for ethernet header, IP header
1821f0ea3689SLuigi Rizzo 				 * and TCPv4 header).
1822f0ea3689SLuigi Rizzo 				 */
1823f0ea3689SLuigi Rizzo 				needed = (needed * na->mfs) /
1824f0ea3689SLuigi Rizzo 						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1825f0ea3689SLuigi Rizzo 				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1826f0ea3689SLuigi Rizzo 			}
1827f0ea3689SLuigi Rizzo 		}
1828f0ea3689SLuigi Rizzo 
1829f9790aebSLuigi Rizzo 		ND(5, "pass 2 dst %d is %x %s",
1830f9790aebSLuigi Rizzo 			i, d_i, is_vp ? "virtual" : "nic/host");
1831f9790aebSLuigi Rizzo 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1832f9790aebSLuigi Rizzo 		nrings = dst_na->up.num_rx_rings;
1833f9790aebSLuigi Rizzo 		if (dst_nr >= nrings)
1834f9790aebSLuigi Rizzo 			dst_nr = dst_nr % nrings;
1835f9790aebSLuigi Rizzo 		kring = &dst_na->up.rx_rings[dst_nr];
1836f9790aebSLuigi Rizzo 		ring = kring->ring;
1837f9790aebSLuigi Rizzo 		lim = kring->nkr_num_slots - 1;
1838f9790aebSLuigi Rizzo 
1839f9790aebSLuigi Rizzo retry:
1840f9790aebSLuigi Rizzo 
1841f0ea3689SLuigi Rizzo 		if (dst_na->retry && retry) {
1842f0ea3689SLuigi Rizzo 			/* try to get some free slot from the previous run */
1843847bf383SLuigi Rizzo 			kring->nm_notify(kring, 0);
18444bf50f18SLuigi Rizzo 			/* actually useful only for bwraps, since there
18454bf50f18SLuigi Rizzo 			 * the notify will trigger a txsync on the hwna. VALE ports
18464bf50f18SLuigi Rizzo 			 * have dst_na->retry == 0
18474bf50f18SLuigi Rizzo 			 */
1848f0ea3689SLuigi Rizzo 		}
1849f9790aebSLuigi Rizzo 		/* reserve the buffers in the queue and an entry
1850f9790aebSLuigi Rizzo 		 * to report completion, and drop lock.
1851f9790aebSLuigi Rizzo 		 * XXX this might become a helper function.
1852f9790aebSLuigi Rizzo 		 */
1853f9790aebSLuigi Rizzo 		mtx_lock(&kring->q_lock);
1854f9790aebSLuigi Rizzo 		if (kring->nkr_stopped) {
1855f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1856f9790aebSLuigi Rizzo 			goto cleanup;
1857f9790aebSLuigi Rizzo 		}
1858f9790aebSLuigi Rizzo 		my_start = j = kring->nkr_hwlease;
1859f9790aebSLuigi Rizzo 		howmany = nm_kr_space(kring, 1);
1860f9790aebSLuigi Rizzo 		if (needed < howmany)
1861f9790aebSLuigi Rizzo 			howmany = needed;
1862f9790aebSLuigi Rizzo 		lease_idx = nm_kr_lease(kring, howmany, 1);
1863f9790aebSLuigi Rizzo 		mtx_unlock(&kring->q_lock);
1864f9790aebSLuigi Rizzo 
1865f9790aebSLuigi Rizzo 		/* only retry if we need more than available slots */
1866f9790aebSLuigi Rizzo 		if (retry && needed <= howmany)
1867f9790aebSLuigi Rizzo 			retry = 0;
1868f9790aebSLuigi Rizzo 
1869f9790aebSLuigi Rizzo 		/* copy to the destination queue */
1870f9790aebSLuigi Rizzo 		while (howmany > 0) {
1871f9790aebSLuigi Rizzo 			struct netmap_slot *slot;
1872f9790aebSLuigi Rizzo 			struct nm_bdg_fwd *ft_p, *ft_end;
1873f9790aebSLuigi Rizzo 			u_int cnt;
1874f9790aebSLuigi Rizzo 
1875f9790aebSLuigi Rizzo 			/* find the queue from which we pick next packet.
1876f9790aebSLuigi Rizzo 			 * NM_FT_NULL is always higher than valid indexes
1877f9790aebSLuigi Rizzo 			 * so we never dereference it if the other list
1878f9790aebSLuigi Rizzo 			 * has packets (and if both are empty we never
1879f9790aebSLuigi Rizzo 			 * get here).
1880f9790aebSLuigi Rizzo 			 */
1881f9790aebSLuigi Rizzo 			if (next < brd_next) {
1882f9790aebSLuigi Rizzo 				ft_p = ft + next;
1883f9790aebSLuigi Rizzo 				next = ft_p->ft_next;
1884f9790aebSLuigi Rizzo 			} else { /* insert broadcast */
1885f9790aebSLuigi Rizzo 				ft_p = ft + brd_next;
1886f9790aebSLuigi Rizzo 				brd_next = ft_p->ft_next;
1887f9790aebSLuigi Rizzo 			}
1888f9790aebSLuigi Rizzo 			cnt = ft_p->ft_frags; // cnt > 0
1889f9790aebSLuigi Rizzo 			if (unlikely(cnt > howmany))
1890f9790aebSLuigi Rizzo 			    break; /* no more space */
1891f9790aebSLuigi Rizzo 			if (netmap_verbose && cnt > 1)
1892f9790aebSLuigi Rizzo 				RD(5, "rx %d frags to %d", cnt, j);
1893f9790aebSLuigi Rizzo 			ft_end = ft_p + cnt;
1894f0ea3689SLuigi Rizzo 			if (unlikely(virt_hdr_mismatch)) {
1895f0ea3689SLuigi Rizzo 				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1896f0ea3689SLuigi Rizzo 			} else {
1897f0ea3689SLuigi Rizzo 				howmany -= cnt;
1898f9790aebSLuigi Rizzo 				do {
1899f9790aebSLuigi Rizzo 					char *dst, *src = ft_p->ft_buf;
1900f9790aebSLuigi Rizzo 					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1901f9790aebSLuigi Rizzo 
1902f9790aebSLuigi Rizzo 					slot = &ring->slot[j];
19034bf50f18SLuigi Rizzo 					dst = NMB(&dst_na->up, slot);
1904f9790aebSLuigi Rizzo 
190517885a7bSLuigi Rizzo 					ND("send [%d] %d(%d) bytes at %s:%d",
190617885a7bSLuigi Rizzo 							i, (int)copy_len, (int)dst_len,
190717885a7bSLuigi Rizzo 							NM_IFPNAME(dst_ifp), j);
1908f9790aebSLuigi Rizzo 					/* round to a multiple of 64 */
1909f9790aebSLuigi Rizzo 					copy_len = (copy_len + 63) & ~63;
1910f9790aebSLuigi Rizzo 
19114bf50f18SLuigi Rizzo 					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
19124bf50f18SLuigi Rizzo 						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
1913e31c6ec7SLuigi Rizzo 						RD(5, "invalid len %d, down to 64", (int)copy_len);
1914e31c6ec7SLuigi Rizzo 						copy_len = dst_len = 64; // XXX
1915e31c6ec7SLuigi Rizzo 					}
1916f9790aebSLuigi Rizzo 					if (ft_p->ft_flags & NS_INDIRECT) {
1917f9790aebSLuigi Rizzo 						if (copyin(src, dst, copy_len)) {
1918f9790aebSLuigi Rizzo 							// invalid user pointer, pretend len is 0
1919f9790aebSLuigi Rizzo 							dst_len = 0;
1920f9790aebSLuigi Rizzo 						}
1921f9790aebSLuigi Rizzo 					} else {
1922f9790aebSLuigi Rizzo 						//memcpy(dst, src, copy_len);
1923f9790aebSLuigi Rizzo 						pkt_copy(src, dst, (int)copy_len);
1924f9790aebSLuigi Rizzo 					}
1925f9790aebSLuigi Rizzo 					slot->len = dst_len;
1926f9790aebSLuigi Rizzo 					slot->flags = (cnt << 8)| NS_MOREFRAG;
1927f9790aebSLuigi Rizzo 					j = nm_next(j, lim);
1928f0ea3689SLuigi Rizzo 					needed--;
1929f9790aebSLuigi Rizzo 					ft_p++;
1930f9790aebSLuigi Rizzo 				} while (ft_p != ft_end);
1931f9790aebSLuigi Rizzo 				slot->flags = (cnt << 8); /* clear flag on last entry */
1932f0ea3689SLuigi Rizzo 			}
1933f9790aebSLuigi Rizzo 			/* are we done ? */
1934f9790aebSLuigi Rizzo 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1935f9790aebSLuigi Rizzo 				break;
1936f9790aebSLuigi Rizzo 		}
1937f9790aebSLuigi Rizzo 		{
1938f9790aebSLuigi Rizzo 		    /* current position */
1939f9790aebSLuigi Rizzo 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1940f9790aebSLuigi Rizzo 		    uint32_t update_pos;
1941f9790aebSLuigi Rizzo 		    int still_locked = 1;
1942f9790aebSLuigi Rizzo 
1943f9790aebSLuigi Rizzo 		    mtx_lock(&kring->q_lock);
1944f9790aebSLuigi Rizzo 		    if (unlikely(howmany > 0)) {
1945f9790aebSLuigi Rizzo 			/* not used all bufs. If i am the last one
1946f9790aebSLuigi Rizzo 			 * i can recover the slots, otherwise must
1947f9790aebSLuigi Rizzo 			 * fill them with 0 to mark empty packets.
1948f9790aebSLuigi Rizzo 			 */
1949f9790aebSLuigi Rizzo 			ND("leftover %d bufs", howmany);
1950f9790aebSLuigi Rizzo 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1951f9790aebSLuigi Rizzo 			    /* yes i am the last one */
1952f9790aebSLuigi Rizzo 			    ND("roll back nkr_hwlease to %d", j);
1953f9790aebSLuigi Rizzo 			    kring->nkr_hwlease = j;
1954f9790aebSLuigi Rizzo 			} else {
1955f9790aebSLuigi Rizzo 			    while (howmany-- > 0) {
1956f9790aebSLuigi Rizzo 				ring->slot[j].len = 0;
1957f9790aebSLuigi Rizzo 				ring->slot[j].flags = 0;
1958f9790aebSLuigi Rizzo 				j = nm_next(j, lim);
1959f9790aebSLuigi Rizzo 			    }
1960f9790aebSLuigi Rizzo 			}
1961f9790aebSLuigi Rizzo 		    }
1962f9790aebSLuigi Rizzo 		    p[lease_idx] = j; /* report I am done */
1963f9790aebSLuigi Rizzo 
196417885a7bSLuigi Rizzo 		    update_pos = kring->nr_hwtail;
1965f9790aebSLuigi Rizzo 
1966f9790aebSLuigi Rizzo 		    if (my_start == update_pos) {
1967f9790aebSLuigi Rizzo 			/* all slots before my_start have been reported,
1968f9790aebSLuigi Rizzo 			 * so scan subsequent leases to see if other ranges
1969f9790aebSLuigi Rizzo 			 * have been completed, and to a selwakeup or txsync.
1970f9790aebSLuigi Rizzo 		         */
1971f9790aebSLuigi Rizzo 			while (lease_idx != kring->nkr_lease_idx &&
1972f9790aebSLuigi Rizzo 				p[lease_idx] != NR_NOSLOT) {
1973f9790aebSLuigi Rizzo 			    j = p[lease_idx];
1974f9790aebSLuigi Rizzo 			    p[lease_idx] = NR_NOSLOT;
1975f9790aebSLuigi Rizzo 			    lease_idx = nm_next(lease_idx, lim);
1976f9790aebSLuigi Rizzo 			}
1977f9790aebSLuigi Rizzo 			/* j is the new 'write' position. j != my_start
1978f9790aebSLuigi Rizzo 			 * means there are new buffers to report
1979f9790aebSLuigi Rizzo 			 */
1980f9790aebSLuigi Rizzo 			if (likely(j != my_start)) {
198117885a7bSLuigi Rizzo 				kring->nr_hwtail = j;
1982f9790aebSLuigi Rizzo 				still_locked = 0;
1983f9790aebSLuigi Rizzo 				mtx_unlock(&kring->q_lock);
1984847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
19854bf50f18SLuigi Rizzo 				/* this is netmap_notify for VALE ports and
19864bf50f18SLuigi Rizzo 				 * netmap_bwrap_notify for bwrap. The latter will
19874bf50f18SLuigi Rizzo 				 * trigger a txsync on the underlying hwna
19884bf50f18SLuigi Rizzo 				 */
19894bf50f18SLuigi Rizzo 				if (dst_na->retry && retry--) {
19904bf50f18SLuigi Rizzo 					/* XXX this is going to call nm_notify again.
19914bf50f18SLuigi Rizzo 					 * Only useful for bwrap in virtual machines
19924bf50f18SLuigi Rizzo 					 */
1993f9790aebSLuigi Rizzo 					goto retry;
1994f9790aebSLuigi Rizzo 				}
1995f9790aebSLuigi Rizzo 			}
19964bf50f18SLuigi Rizzo 		    }
1997f9790aebSLuigi Rizzo 		    if (still_locked)
1998f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1999f9790aebSLuigi Rizzo 		}
2000f9790aebSLuigi Rizzo cleanup:
2001f9790aebSLuigi Rizzo 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
2002f9790aebSLuigi Rizzo 		d->bq_len = 0;
2003f9790aebSLuigi Rizzo 	}
2004f9790aebSLuigi Rizzo 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
2005f9790aebSLuigi Rizzo 	brddst->bq_len = 0;
2006f9790aebSLuigi Rizzo 	return 0;
2007f9790aebSLuigi Rizzo }
2008f9790aebSLuigi Rizzo 
20094bf50f18SLuigi Rizzo /* nm_txsync callback for VALE ports */
2010f9790aebSLuigi Rizzo static int
20114bf50f18SLuigi Rizzo netmap_vp_txsync(struct netmap_kring *kring, int flags)
2012f9790aebSLuigi Rizzo {
20134bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
20144bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter *)kring->na;
201517885a7bSLuigi Rizzo 	u_int done;
201617885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
2017847bf383SLuigi Rizzo 	u_int const head = kring->rhead;
2018f9790aebSLuigi Rizzo 
2019f9790aebSLuigi Rizzo 	if (bridge_batch <= 0) { /* testing only */
2020847bf383SLuigi Rizzo 		done = head; // used all
2021f9790aebSLuigi Rizzo 		goto done;
2022f9790aebSLuigi Rizzo 	}
20234bf50f18SLuigi Rizzo 	if (!na->na_bdg) {
2024847bf383SLuigi Rizzo 		done = head;
20254bf50f18SLuigi Rizzo 		goto done;
20264bf50f18SLuigi Rizzo 	}
2027f9790aebSLuigi Rizzo 	if (bridge_batch > NM_BDG_BATCH)
2028f9790aebSLuigi Rizzo 		bridge_batch = NM_BDG_BATCH;
2029f9790aebSLuigi Rizzo 
2030847bf383SLuigi Rizzo 	done = nm_bdg_preflush(kring, head);
2031f9790aebSLuigi Rizzo done:
2032847bf383SLuigi Rizzo 	if (done != head)
2033847bf383SLuigi Rizzo 		D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
203417885a7bSLuigi Rizzo 	/*
203517885a7bSLuigi Rizzo 	 * packets between 'done' and 'cur' are left unsent.
203617885a7bSLuigi Rizzo 	 */
203717885a7bSLuigi Rizzo 	kring->nr_hwcur = done;
203817885a7bSLuigi Rizzo 	kring->nr_hwtail = nm_prev(done, lim);
2039f9790aebSLuigi Rizzo 	if (netmap_verbose)
20404bf50f18SLuigi Rizzo 		D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
2041f9790aebSLuigi Rizzo 	return 0;
2042f9790aebSLuigi Rizzo }
2043f9790aebSLuigi Rizzo 
2044f9790aebSLuigi Rizzo 
20454bf50f18SLuigi Rizzo /* rxsync code used by VALE ports nm_rxsync callback and also
20464bf50f18SLuigi Rizzo  * internally by the brwap
2047f9790aebSLuigi Rizzo  */
2048f9790aebSLuigi Rizzo static int
20494bf50f18SLuigi Rizzo netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
2050f9790aebSLuigi Rizzo {
20514bf50f18SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
205217885a7bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
205317885a7bSLuigi Rizzo 	u_int nm_i, lim = kring->nkr_num_slots - 1;
2054847bf383SLuigi Rizzo 	u_int head = kring->rhead;
205517885a7bSLuigi Rizzo 	int n;
205617885a7bSLuigi Rizzo 
205717885a7bSLuigi Rizzo 	if (head > lim) {
205817885a7bSLuigi Rizzo 		D("ouch dangerous reset!!!");
205917885a7bSLuigi Rizzo 		n = netmap_ring_reinit(kring);
206017885a7bSLuigi Rizzo 		goto done;
206117885a7bSLuigi Rizzo 	}
206217885a7bSLuigi Rizzo 
206317885a7bSLuigi Rizzo 	/* First part, import newly received packets. */
206417885a7bSLuigi Rizzo 	/* actually nothing to do here, they are already in the kring */
206517885a7bSLuigi Rizzo 
206617885a7bSLuigi Rizzo 	/* Second part, skip past packets that userspace has released. */
206717885a7bSLuigi Rizzo 	nm_i = kring->nr_hwcur;
206817885a7bSLuigi Rizzo 	if (nm_i != head) {
206917885a7bSLuigi Rizzo 		/* consistency check, but nothing really important here */
207017885a7bSLuigi Rizzo 		for (n = 0; likely(nm_i != head); n++) {
207117885a7bSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[nm_i];
20724bf50f18SLuigi Rizzo 			void *addr = NMB(na, slot);
207317885a7bSLuigi Rizzo 
20744bf50f18SLuigi Rizzo 			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
207517885a7bSLuigi Rizzo 				D("bad buffer index %d, ignore ?",
207617885a7bSLuigi Rizzo 					slot->buf_idx);
207717885a7bSLuigi Rizzo 			}
207817885a7bSLuigi Rizzo 			slot->flags &= ~NS_BUF_CHANGED;
207917885a7bSLuigi Rizzo 			nm_i = nm_next(nm_i, lim);
208017885a7bSLuigi Rizzo 		}
208117885a7bSLuigi Rizzo 		kring->nr_hwcur = head;
208217885a7bSLuigi Rizzo 	}
208317885a7bSLuigi Rizzo 
208417885a7bSLuigi Rizzo 	n = 0;
208517885a7bSLuigi Rizzo done:
208617885a7bSLuigi Rizzo 	return n;
208717885a7bSLuigi Rizzo }
2088f9790aebSLuigi Rizzo 
2089f9790aebSLuigi Rizzo /*
20904bf50f18SLuigi Rizzo  * nm_rxsync callback for VALE ports
2091f9790aebSLuigi Rizzo  * user process reading from a VALE switch.
2092f9790aebSLuigi Rizzo  * Already protected against concurrent calls from userspace,
2093f9790aebSLuigi Rizzo  * but we must acquire the queue's lock to protect against
2094f9790aebSLuigi Rizzo  * writers on the same queue.
2095f9790aebSLuigi Rizzo  */
2096f9790aebSLuigi Rizzo static int
20974bf50f18SLuigi Rizzo netmap_vp_rxsync(struct netmap_kring *kring, int flags)
2098f9790aebSLuigi Rizzo {
2099f9790aebSLuigi Rizzo 	int n;
2100f9790aebSLuigi Rizzo 
2101f9790aebSLuigi Rizzo 	mtx_lock(&kring->q_lock);
21024bf50f18SLuigi Rizzo 	n = netmap_vp_rxsync_locked(kring, flags);
2103f9790aebSLuigi Rizzo 	mtx_unlock(&kring->q_lock);
2104f9790aebSLuigi Rizzo 	return n;
2105f9790aebSLuigi Rizzo }
2106f9790aebSLuigi Rizzo 
210717885a7bSLuigi Rizzo 
21084bf50f18SLuigi Rizzo /* nm_bdg_attach callback for VALE ports
21094bf50f18SLuigi Rizzo  * The na_vp port is this same netmap_adapter. There is no host port.
21104bf50f18SLuigi Rizzo  */
2111f9790aebSLuigi Rizzo static int
21124bf50f18SLuigi Rizzo netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
21134bf50f18SLuigi Rizzo {
21144bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
21154bf50f18SLuigi Rizzo 
21164bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
21174bf50f18SLuigi Rizzo 		return EBUSY;
21184bf50f18SLuigi Rizzo 	na->na_vp = vpna;
21194bf50f18SLuigi Rizzo 	strncpy(na->name, name, sizeof(na->name));
21204bf50f18SLuigi Rizzo 	na->na_hostvp = NULL;
21214bf50f18SLuigi Rizzo 	return 0;
21224bf50f18SLuigi Rizzo }
21234bf50f18SLuigi Rizzo 
21244bf50f18SLuigi Rizzo /* create a netmap_vp_adapter that describes a VALE port.
21254bf50f18SLuigi Rizzo  * Only persistent VALE ports have a non-null ifp.
21264bf50f18SLuigi Rizzo  */
21274bf50f18SLuigi Rizzo static int
21284bf50f18SLuigi Rizzo netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
2129f9790aebSLuigi Rizzo {
2130f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
2131f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
2132f9790aebSLuigi Rizzo 	int error;
2133f0ea3689SLuigi Rizzo 	u_int npipes = 0;
2134f9790aebSLuigi Rizzo 
2135f9790aebSLuigi Rizzo 	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
2136f9790aebSLuigi Rizzo 	if (vpna == NULL)
2137f9790aebSLuigi Rizzo 		return ENOMEM;
2138f9790aebSLuigi Rizzo 
2139f9790aebSLuigi Rizzo  	na = &vpna->up;
2140f9790aebSLuigi Rizzo 
2141f9790aebSLuigi Rizzo 	na->ifp = ifp;
21424bf50f18SLuigi Rizzo 	strncpy(na->name, nmr->nr_name, sizeof(na->name));
2143f9790aebSLuigi Rizzo 
2144f9790aebSLuigi Rizzo 	/* bound checking */
2145f9790aebSLuigi Rizzo 	na->num_tx_rings = nmr->nr_tx_rings;
2146f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
2147f9790aebSLuigi Rizzo 	nmr->nr_tx_rings = na->num_tx_rings; // write back
2148f9790aebSLuigi Rizzo 	na->num_rx_rings = nmr->nr_rx_rings;
2149f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
2150f9790aebSLuigi Rizzo 	nmr->nr_rx_rings = na->num_rx_rings; // write back
2151f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
2152f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
2153f9790aebSLuigi Rizzo 	na->num_tx_desc = nmr->nr_tx_slots;
2154f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
2155f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
2156f0ea3689SLuigi Rizzo 	/* validate number of pipes. We want at least 1,
2157f0ea3689SLuigi Rizzo 	 * but probably can do with some more.
2158f0ea3689SLuigi Rizzo 	 * So let's use 2 as default (when 0 is supplied)
2159f0ea3689SLuigi Rizzo 	 */
2160f0ea3689SLuigi Rizzo 	npipes = nmr->nr_arg1;
2161f0ea3689SLuigi Rizzo 	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
2162f0ea3689SLuigi Rizzo 	nmr->nr_arg1 = npipes;	/* write back */
2163f0ea3689SLuigi Rizzo 	/* validate extra bufs */
2164f0ea3689SLuigi Rizzo 	nm_bound_var(&nmr->nr_arg3, 0, 0,
2165f0ea3689SLuigi Rizzo 			128*NM_BDG_MAXSLOTS, NULL);
2166f9790aebSLuigi Rizzo 	na->num_rx_desc = nmr->nr_rx_slots;
2167f0ea3689SLuigi Rizzo 	vpna->mfs = 1514;
2168847bf383SLuigi Rizzo 	vpna->last_smac = ~0llu;
2169f0ea3689SLuigi Rizzo 	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
2170f0ea3689SLuigi Rizzo 		vpna->mfs = netmap_buf_size; */
2171f0ea3689SLuigi Rizzo         if (netmap_verbose)
2172f0ea3689SLuigi Rizzo 		D("max frame size %u", vpna->mfs);
2173f9790aebSLuigi Rizzo 
2174847bf383SLuigi Rizzo 	na->na_flags |= NAF_BDG_MAYSLEEP;
217510b8ef3dSLuigi Rizzo 	/* persistent VALE ports look like hw devices
217610b8ef3dSLuigi Rizzo 	 * with a native netmap adapter
217710b8ef3dSLuigi Rizzo 	 */
217810b8ef3dSLuigi Rizzo 	if (ifp)
217910b8ef3dSLuigi Rizzo 		na->na_flags |= NAF_NATIVE;
21804bf50f18SLuigi Rizzo 	na->nm_txsync = netmap_vp_txsync;
21814bf50f18SLuigi Rizzo 	na->nm_rxsync = netmap_vp_rxsync;
21824bf50f18SLuigi Rizzo 	na->nm_register = netmap_vp_reg;
2183f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_vp_krings_create;
2184f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_vp_krings_delete;
21854bf50f18SLuigi Rizzo 	na->nm_dtor = netmap_vp_dtor;
21864bf50f18SLuigi Rizzo 	na->nm_mem = netmap_mem_private_new(na->name,
2187f9790aebSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
2188f0ea3689SLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc,
2189f0ea3689SLuigi Rizzo 			nmr->nr_arg3, npipes, &error);
2190f0ea3689SLuigi Rizzo 	if (na->nm_mem == NULL)
2191f0ea3689SLuigi Rizzo 		goto err;
21924bf50f18SLuigi Rizzo 	na->nm_bdg_attach = netmap_vp_bdg_attach;
2193f9790aebSLuigi Rizzo 	/* other nmd fields are set in the common routine */
2194f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
2195f0ea3689SLuigi Rizzo 	if (error)
2196f0ea3689SLuigi Rizzo 		goto err;
21974bf50f18SLuigi Rizzo 	*ret = vpna;
2198f0ea3689SLuigi Rizzo 	return 0;
2199f0ea3689SLuigi Rizzo 
2200f0ea3689SLuigi Rizzo err:
2201f0ea3689SLuigi Rizzo 	if (na->nm_mem != NULL)
2202847bf383SLuigi Rizzo 		netmap_mem_delete(na->nm_mem);
2203f9790aebSLuigi Rizzo 	free(vpna, M_DEVBUF);
2204f9790aebSLuigi Rizzo 	return error;
2205f9790aebSLuigi Rizzo }
2206f9790aebSLuigi Rizzo 
22074bf50f18SLuigi Rizzo /* Bridge wrapper code (bwrap).
22084bf50f18SLuigi Rizzo  * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
22094bf50f18SLuigi Rizzo  * VALE switch.
22104bf50f18SLuigi Rizzo  * The main task is to swap the meaning of tx and rx rings to match the
22114bf50f18SLuigi Rizzo  * expectations of the VALE switch code (see nm_bdg_flush).
22124bf50f18SLuigi Rizzo  *
22134bf50f18SLuigi Rizzo  * The bwrap works by interposing a netmap_bwrap_adapter between the
22144bf50f18SLuigi Rizzo  * rest of the system and the hwna. The netmap_bwrap_adapter looks like
22154bf50f18SLuigi Rizzo  * a netmap_vp_adapter to the rest the system, but, internally, it
22164bf50f18SLuigi Rizzo  * translates all callbacks to what the hwna expects.
22174bf50f18SLuigi Rizzo  *
22184bf50f18SLuigi Rizzo  * Note that we have to intercept callbacks coming from two sides:
22194bf50f18SLuigi Rizzo  *
22204bf50f18SLuigi Rizzo  *  - callbacks coming from the netmap module are intercepted by
22214bf50f18SLuigi Rizzo  *    passing around the netmap_bwrap_adapter instead of the hwna
22224bf50f18SLuigi Rizzo  *
22234bf50f18SLuigi Rizzo  *  - callbacks coming from outside of the netmap module only know
22244bf50f18SLuigi Rizzo  *    about the hwna. This, however, only happens in interrupt
22254bf50f18SLuigi Rizzo  *    handlers, where only the hwna->nm_notify callback is called.
22264bf50f18SLuigi Rizzo  *    What the bwrap does is to overwrite the hwna->nm_notify callback
22274bf50f18SLuigi Rizzo  *    with its own netmap_bwrap_intr_notify.
22284bf50f18SLuigi Rizzo  *    XXX This assumes that the hwna->nm_notify callback was the
22294bf50f18SLuigi Rizzo  *    standard netmap_notify(), as it is the case for nic adapters.
22304bf50f18SLuigi Rizzo  *    Any additional action performed by hwna->nm_notify will not be
22314bf50f18SLuigi Rizzo  *    performed by netmap_bwrap_intr_notify.
22324bf50f18SLuigi Rizzo  *
22334bf50f18SLuigi Rizzo  * Additionally, the bwrap can optionally attach the host rings pair
22344bf50f18SLuigi Rizzo  * of the wrapped adapter to a different port of the switch.
22354bf50f18SLuigi Rizzo  */
22364bf50f18SLuigi Rizzo 
223717885a7bSLuigi Rizzo 
2238f9790aebSLuigi Rizzo static void
2239f9790aebSLuigi Rizzo netmap_bwrap_dtor(struct netmap_adapter *na)
2240f9790aebSLuigi Rizzo {
2241f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
2242f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2243*37e3a6d3SLuigi Rizzo 	struct nm_bridge *b = bna->up.na_bdg,
2244*37e3a6d3SLuigi Rizzo 		*bh = bna->host.na_bdg;
2245*37e3a6d3SLuigi Rizzo 
2246*37e3a6d3SLuigi Rizzo 	if (b) {
2247*37e3a6d3SLuigi Rizzo 		netmap_bdg_detach_common(b, bna->up.bdg_port,
2248*37e3a6d3SLuigi Rizzo 			    (bh ? bna->host.bdg_port : -1));
2249*37e3a6d3SLuigi Rizzo 	}
2250f9790aebSLuigi Rizzo 
2251f9790aebSLuigi Rizzo 	ND("na %p", na);
2252f9790aebSLuigi Rizzo 	na->ifp = NULL;
22534bf50f18SLuigi Rizzo 	bna->host.up.ifp = NULL;
22544bf50f18SLuigi Rizzo 	hwna->na_private = NULL;
22554bf50f18SLuigi Rizzo 	hwna->na_vp = hwna->na_hostvp = NULL;
22564bf50f18SLuigi Rizzo 	hwna->na_flags &= ~NAF_BUSY;
22574bf50f18SLuigi Rizzo 	netmap_adapter_put(hwna);
2258f9790aebSLuigi Rizzo 
2259f9790aebSLuigi Rizzo }
2260f9790aebSLuigi Rizzo 
226117885a7bSLuigi Rizzo 
2262f9790aebSLuigi Rizzo /*
226317885a7bSLuigi Rizzo  * Intr callback for NICs connected to a bridge.
226417885a7bSLuigi Rizzo  * Simply ignore tx interrupts (maybe we could try to recover space ?)
226517885a7bSLuigi Rizzo  * and pass received packets from nic to the bridge.
226617885a7bSLuigi Rizzo  *
2267f9790aebSLuigi Rizzo  * XXX TODO check locking: this is called from the interrupt
2268f9790aebSLuigi Rizzo  * handler so we should make sure that the interface is not
2269f9790aebSLuigi Rizzo  * disconnected while passing down an interrupt.
2270f9790aebSLuigi Rizzo  *
227117885a7bSLuigi Rizzo  * Note, no user process can access this NIC or the host stack.
227217885a7bSLuigi Rizzo  * The only part of the ring that is significant are the slots,
227317885a7bSLuigi Rizzo  * and head/cur/tail are set from the kring as needed
227417885a7bSLuigi Rizzo  * (part as a receive ring, part as a transmit ring).
227517885a7bSLuigi Rizzo  *
227617885a7bSLuigi Rizzo  * callback that overwrites the hwna notify callback.
2277*37e3a6d3SLuigi Rizzo  * Packets come from the outside or from the host stack and are put on an
2278*37e3a6d3SLuigi Rizzo  * hwna rx ring.
2279f9790aebSLuigi Rizzo  * The bridge wrapper then sends the packets through the bridge.
2280f9790aebSLuigi Rizzo  */
2281f9790aebSLuigi Rizzo static int
2282847bf383SLuigi Rizzo netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
2283f9790aebSLuigi Rizzo {
2284847bf383SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
2285f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
2286847bf383SLuigi Rizzo 	struct netmap_kring *bkring;
2287f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = &bna->up;
2288847bf383SLuigi Rizzo 	u_int ring_nr = kring->ring_id;
2289*37e3a6d3SLuigi Rizzo 	int ret = NM_IRQ_COMPLETED;
2290*37e3a6d3SLuigi Rizzo 	int error;
2291f9790aebSLuigi Rizzo 
229217885a7bSLuigi Rizzo 	if (netmap_verbose)
2293847bf383SLuigi Rizzo 	    D("%s %s 0x%x", na->name, kring->name, flags);
2294f9790aebSLuigi Rizzo 
2295847bf383SLuigi Rizzo 	bkring = &vpna->up.tx_rings[ring_nr];
2296f9790aebSLuigi Rizzo 
2297f9790aebSLuigi Rizzo 	/* make sure the ring is not disabled */
2298*37e3a6d3SLuigi Rizzo 	if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
2299*37e3a6d3SLuigi Rizzo 		return EIO;
2300*37e3a6d3SLuigi Rizzo 	}
2301f9790aebSLuigi Rizzo 
230217885a7bSLuigi Rizzo 	if (netmap_verbose)
2303847bf383SLuigi Rizzo 	    D("%s head %d cur %d tail %d",  na->name,
230417885a7bSLuigi Rizzo 		kring->rhead, kring->rcur, kring->rtail);
230517885a7bSLuigi Rizzo 
2306847bf383SLuigi Rizzo 	/* simulate a user wakeup on the rx ring
2307847bf383SLuigi Rizzo 	 * fetch packets that have arrived.
2308f9790aebSLuigi Rizzo 	 */
2309f0ea3689SLuigi Rizzo 	error = kring->nm_sync(kring, 0);
2310f9790aebSLuigi Rizzo 	if (error)
2311f9790aebSLuigi Rizzo 		goto put_out;
2312*37e3a6d3SLuigi Rizzo 	if (kring->nr_hwcur == kring->nr_hwtail) {
2313*37e3a6d3SLuigi Rizzo 		if (netmap_verbose)
2314f9790aebSLuigi Rizzo 			D("how strange, interrupt with no packets on %s",
23154bf50f18SLuigi Rizzo 			    na->name);
2316f9790aebSLuigi Rizzo 		goto put_out;
2317f9790aebSLuigi Rizzo 	}
231817885a7bSLuigi Rizzo 
2319847bf383SLuigi Rizzo 	/* new packets are kring->rcur to kring->nr_hwtail, and the bkring
2320847bf383SLuigi Rizzo 	 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
232117885a7bSLuigi Rizzo 	 * to push all packets out.
232217885a7bSLuigi Rizzo 	 */
2323847bf383SLuigi Rizzo 	bkring->rhead = bkring->rcur = kring->nr_hwtail;
232417885a7bSLuigi Rizzo 
23254bf50f18SLuigi Rizzo 	netmap_vp_txsync(bkring, flags);
2326f9790aebSLuigi Rizzo 
232717885a7bSLuigi Rizzo 	/* mark all buffers as released on this ring */
2328847bf383SLuigi Rizzo 	kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
232917885a7bSLuigi Rizzo 	/* another call to actually release the buffers */
2330f0ea3689SLuigi Rizzo 	error = kring->nm_sync(kring, 0);
2331f9790aebSLuigi Rizzo 
2332*37e3a6d3SLuigi Rizzo 	/* The second rxsync may have further advanced hwtail. If this happens,
2333*37e3a6d3SLuigi Rizzo 	 *  return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
2334*37e3a6d3SLuigi Rizzo 	if (kring->rcur != kring->nr_hwtail) {
2335*37e3a6d3SLuigi Rizzo 		ret = NM_IRQ_RESCHED;
2336*37e3a6d3SLuigi Rizzo 	}
2337f9790aebSLuigi Rizzo put_out:
2338f9790aebSLuigi Rizzo 	nm_kr_put(kring);
2339*37e3a6d3SLuigi Rizzo 
2340*37e3a6d3SLuigi Rizzo 	return error ? error : ret;
2341f9790aebSLuigi Rizzo }
2342f9790aebSLuigi Rizzo 
234317885a7bSLuigi Rizzo 
23444bf50f18SLuigi Rizzo /* nm_register callback for bwrap */
2345f9790aebSLuigi Rizzo static int
2346*37e3a6d3SLuigi Rizzo netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
2347f9790aebSLuigi Rizzo {
2348f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2349f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2350f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2351f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *hostna = &bna->host;
2352*37e3a6d3SLuigi Rizzo 	int error, i;
2353847bf383SLuigi Rizzo 	enum txrx t;
2354f9790aebSLuigi Rizzo 
23554bf50f18SLuigi Rizzo 	ND("%s %s", na->name, onoff ? "on" : "off");
2356f9790aebSLuigi Rizzo 
2357f9790aebSLuigi Rizzo 	if (onoff) {
23584bf50f18SLuigi Rizzo 		/* netmap_do_regif has been called on the bwrap na.
23594bf50f18SLuigi Rizzo 		 * We need to pass the information about the
23604bf50f18SLuigi Rizzo 		 * memory allocator down to the hwna before
23614bf50f18SLuigi Rizzo 		 * putting it in netmap mode
23624bf50f18SLuigi Rizzo 		 */
2363f9790aebSLuigi Rizzo 		hwna->na_lut = na->na_lut;
2364f9790aebSLuigi Rizzo 
2365f9790aebSLuigi Rizzo 		if (hostna->na_bdg) {
23664bf50f18SLuigi Rizzo 			/* if the host rings have been attached to switch,
23674bf50f18SLuigi Rizzo 			 * we need to copy the memory allocator information
23684bf50f18SLuigi Rizzo 			 * in the hostna also
23694bf50f18SLuigi Rizzo 			 */
2370f9790aebSLuigi Rizzo 			hostna->up.na_lut = na->na_lut;
2371f9790aebSLuigi Rizzo 		}
2372f9790aebSLuigi Rizzo 
23730c7ba37eSLuigi Rizzo 		/* cross-link the netmap rings
23740c7ba37eSLuigi Rizzo 		 * The original number of rings comes from hwna,
23750c7ba37eSLuigi Rizzo 		 * rx rings on one side equals tx rings on the other.
23760c7ba37eSLuigi Rizzo 		 */
2377847bf383SLuigi Rizzo 		for_rx_tx(t) {
2378847bf383SLuigi Rizzo 			enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
2379*37e3a6d3SLuigi Rizzo 			for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
2380*37e3a6d3SLuigi Rizzo 				NMR(hwna, r)[i].ring = NMR(na, t)[i].ring;
2381f9790aebSLuigi Rizzo 			}
2382f9790aebSLuigi Rizzo 		}
2383*37e3a6d3SLuigi Rizzo 
2384*37e3a6d3SLuigi Rizzo 		if (na->na_flags & NAF_HOST_RINGS) {
2385*37e3a6d3SLuigi Rizzo 			struct netmap_adapter *hna = &hostna->up;
2386*37e3a6d3SLuigi Rizzo 			/* the hostna rings are the host rings of the bwrap.
2387*37e3a6d3SLuigi Rizzo 			 * The corresponding krings must point back to the
2388*37e3a6d3SLuigi Rizzo 			 * hostna
2389*37e3a6d3SLuigi Rizzo 			 */
2390*37e3a6d3SLuigi Rizzo 			hna->tx_rings = &na->tx_rings[na->num_tx_rings];
2391*37e3a6d3SLuigi Rizzo 			hna->tx_rings[0].na = hna;
2392*37e3a6d3SLuigi Rizzo 			hna->rx_rings = &na->rx_rings[na->num_rx_rings];
2393*37e3a6d3SLuigi Rizzo 			hna->rx_rings[0].na = hna;
2394*37e3a6d3SLuigi Rizzo 		}
2395*37e3a6d3SLuigi Rizzo 	}
2396*37e3a6d3SLuigi Rizzo 
2397*37e3a6d3SLuigi Rizzo 	/* pass down the pending ring state information */
2398*37e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
2399*37e3a6d3SLuigi Rizzo 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
2400*37e3a6d3SLuigi Rizzo 			NMR(hwna, t)[i].nr_pending_mode =
2401*37e3a6d3SLuigi Rizzo 				NMR(na, t)[i].nr_pending_mode;
2402f9790aebSLuigi Rizzo 	}
2403f9790aebSLuigi Rizzo 
24044bf50f18SLuigi Rizzo 	/* forward the request to the hwna */
2405f9790aebSLuigi Rizzo 	error = hwna->nm_register(hwna, onoff);
2406f9790aebSLuigi Rizzo 	if (error)
2407f9790aebSLuigi Rizzo 		return error;
2408f9790aebSLuigi Rizzo 
2409*37e3a6d3SLuigi Rizzo 	/* copy up the current ring state information */
2410*37e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
2411*37e3a6d3SLuigi Rizzo 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
2412*37e3a6d3SLuigi Rizzo 			NMR(na, t)[i].nr_mode =
2413*37e3a6d3SLuigi Rizzo 				NMR(hwna, t)[i].nr_mode;
2414*37e3a6d3SLuigi Rizzo 	}
2415*37e3a6d3SLuigi Rizzo 
24164bf50f18SLuigi Rizzo 	/* impersonate a netmap_vp_adapter */
24174bf50f18SLuigi Rizzo 	netmap_vp_reg(na, onoff);
24184bf50f18SLuigi Rizzo 	if (hostna->na_bdg)
24194bf50f18SLuigi Rizzo 		netmap_vp_reg(&hostna->up, onoff);
2420f9790aebSLuigi Rizzo 
2421f9790aebSLuigi Rizzo 	if (onoff) {
2422847bf383SLuigi Rizzo 		u_int i;
2423847bf383SLuigi Rizzo 		/* intercept the hwna nm_nofify callback on the hw rings */
2424847bf383SLuigi Rizzo 		for (i = 0; i < hwna->num_rx_rings; i++) {
2425847bf383SLuigi Rizzo 			hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
2426847bf383SLuigi Rizzo 			hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
2427847bf383SLuigi Rizzo 		}
2428847bf383SLuigi Rizzo 		i = hwna->num_rx_rings; /* for safety */
2429847bf383SLuigi Rizzo 		/* save the host ring notify unconditionally */
2430847bf383SLuigi Rizzo 		hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
2431847bf383SLuigi Rizzo 		if (hostna->na_bdg) {
2432847bf383SLuigi Rizzo 			/* also intercept the host ring notify */
2433847bf383SLuigi Rizzo 			hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
2434847bf383SLuigi Rizzo 		}
2435*37e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
2436*37e3a6d3SLuigi Rizzo 			na->na_flags |= NAF_NETMAP_ON;
2437f9790aebSLuigi Rizzo 	} else {
2438847bf383SLuigi Rizzo 		u_int i;
2439*37e3a6d3SLuigi Rizzo 
2440*37e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
2441*37e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_NETMAP_ON;
2442*37e3a6d3SLuigi Rizzo 
2443847bf383SLuigi Rizzo 		/* reset all notify callbacks (including host ring) */
2444847bf383SLuigi Rizzo 		for (i = 0; i <= hwna->num_rx_rings; i++) {
2445847bf383SLuigi Rizzo 			hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify;
2446847bf383SLuigi Rizzo 			hwna->rx_rings[i].save_notify = NULL;
2447847bf383SLuigi Rizzo 		}
2448847bf383SLuigi Rizzo 		hwna->na_lut.lut = NULL;
2449847bf383SLuigi Rizzo 		hwna->na_lut.objtotal = 0;
2450847bf383SLuigi Rizzo 		hwna->na_lut.objsize = 0;
2451f9790aebSLuigi Rizzo 	}
2452f9790aebSLuigi Rizzo 
2453f9790aebSLuigi Rizzo 	return 0;
2454f9790aebSLuigi Rizzo }
2455f9790aebSLuigi Rizzo 
24564bf50f18SLuigi Rizzo /* nm_config callback for bwrap */
2457f9790aebSLuigi Rizzo static int
2458f9790aebSLuigi Rizzo netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
2459f9790aebSLuigi Rizzo 				    u_int *rxr, u_int *rxd)
2460f9790aebSLuigi Rizzo {
2461f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2462f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2463f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2464f9790aebSLuigi Rizzo 
2465f9790aebSLuigi Rizzo 	/* forward the request */
2466f9790aebSLuigi Rizzo 	netmap_update_config(hwna);
2467f9790aebSLuigi Rizzo 	/* swap the results */
2468f9790aebSLuigi Rizzo 	*txr = hwna->num_rx_rings;
2469f9790aebSLuigi Rizzo 	*txd = hwna->num_rx_desc;
2470f9790aebSLuigi Rizzo 	*rxr = hwna->num_tx_rings;
2471f9790aebSLuigi Rizzo 	*rxd = hwna->num_rx_desc;
2472f9790aebSLuigi Rizzo 
2473f9790aebSLuigi Rizzo 	return 0;
2474f9790aebSLuigi Rizzo }
2475f9790aebSLuigi Rizzo 
247617885a7bSLuigi Rizzo 
24774bf50f18SLuigi Rizzo /* nm_krings_create callback for bwrap */
2478f9790aebSLuigi Rizzo static int
2479f9790aebSLuigi Rizzo netmap_bwrap_krings_create(struct netmap_adapter *na)
2480f9790aebSLuigi Rizzo {
2481f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2482f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2483f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2484*37e3a6d3SLuigi Rizzo 	int i, error = 0;
2485*37e3a6d3SLuigi Rizzo 	enum txrx t;
2486f9790aebSLuigi Rizzo 
24874bf50f18SLuigi Rizzo 	ND("%s", na->name);
2488f9790aebSLuigi Rizzo 
24894bf50f18SLuigi Rizzo 	/* impersonate a netmap_vp_adapter */
2490f9790aebSLuigi Rizzo 	error = netmap_vp_krings_create(na);
2491f9790aebSLuigi Rizzo 	if (error)
2492f9790aebSLuigi Rizzo 		return error;
2493f9790aebSLuigi Rizzo 
24944bf50f18SLuigi Rizzo 	/* also create the hwna krings */
2495f9790aebSLuigi Rizzo 	error = hwna->nm_krings_create(hwna);
2496f9790aebSLuigi Rizzo 	if (error) {
2497*37e3a6d3SLuigi Rizzo 		goto err_del_vp_rings;
2498f9790aebSLuigi Rizzo 	}
2499f9790aebSLuigi Rizzo 
2500*37e3a6d3SLuigi Rizzo 	/* get each ring slot number from the corresponding hwna ring */
2501*37e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
2502*37e3a6d3SLuigi Rizzo 		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
2503*37e3a6d3SLuigi Rizzo 		for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
2504*37e3a6d3SLuigi Rizzo 			NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots;
2505*37e3a6d3SLuigi Rizzo 		}
2506f0ea3689SLuigi Rizzo 	}
2507f9790aebSLuigi Rizzo 
2508f9790aebSLuigi Rizzo 	return 0;
2509*37e3a6d3SLuigi Rizzo 
2510*37e3a6d3SLuigi Rizzo err_del_vp_rings:
2511*37e3a6d3SLuigi Rizzo 	netmap_vp_krings_delete(na);
2512*37e3a6d3SLuigi Rizzo 
2513*37e3a6d3SLuigi Rizzo 	return error;
2514f9790aebSLuigi Rizzo }
2515f9790aebSLuigi Rizzo 
251617885a7bSLuigi Rizzo 
2517f9790aebSLuigi Rizzo static void
2518f9790aebSLuigi Rizzo netmap_bwrap_krings_delete(struct netmap_adapter *na)
2519f9790aebSLuigi Rizzo {
2520f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2521f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2522f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2523f9790aebSLuigi Rizzo 
25244bf50f18SLuigi Rizzo 	ND("%s", na->name);
2525f9790aebSLuigi Rizzo 
2526f9790aebSLuigi Rizzo 	hwna->nm_krings_delete(hwna);
2527f9790aebSLuigi Rizzo 	netmap_vp_krings_delete(na);
2528f9790aebSLuigi Rizzo }
2529f9790aebSLuigi Rizzo 
253017885a7bSLuigi Rizzo 
2531f9790aebSLuigi Rizzo /* notify method for the bridge-->hwna direction */
2532f9790aebSLuigi Rizzo static int
2533847bf383SLuigi Rizzo netmap_bwrap_notify(struct netmap_kring *kring, int flags)
2534f9790aebSLuigi Rizzo {
2535847bf383SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
2536847bf383SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
2537f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2538847bf383SLuigi Rizzo 	u_int ring_n = kring->ring_id;
2539847bf383SLuigi Rizzo 	u_int lim = kring->nkr_num_slots - 1;
2540847bf383SLuigi Rizzo 	struct netmap_kring *hw_kring;
2541*37e3a6d3SLuigi Rizzo 	int error;
2542f9790aebSLuigi Rizzo 
2543847bf383SLuigi Rizzo 	ND("%s: na %s hwna %s",
2544847bf383SLuigi Rizzo 			(kring ? kring->name : "NULL!"),
2545847bf383SLuigi Rizzo 			(na ? na->name : "NULL!"),
2546847bf383SLuigi Rizzo 			(hwna ? hwna->name : "NULL!"));
2547f9790aebSLuigi Rizzo 	hw_kring = &hwna->tx_rings[ring_n];
2548847bf383SLuigi Rizzo 
2549*37e3a6d3SLuigi Rizzo 	if (nm_kr_tryget(hw_kring, 0, NULL)) {
2550*37e3a6d3SLuigi Rizzo 		return ENXIO;
2551*37e3a6d3SLuigi Rizzo 	}
2552f9790aebSLuigi Rizzo 
255317885a7bSLuigi Rizzo 	/* first step: simulate a user wakeup on the rx ring */
2554847bf383SLuigi Rizzo 	netmap_vp_rxsync(kring, flags);
255517885a7bSLuigi Rizzo 	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
25564bf50f18SLuigi Rizzo 		na->name, ring_n,
255717885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
255817885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
255917885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
2560847bf383SLuigi Rizzo 	/* second step: the new packets are sent on the tx ring
256117885a7bSLuigi Rizzo 	 * (which is actually the same ring)
256217885a7bSLuigi Rizzo 	 */
2563847bf383SLuigi Rizzo 	hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
2564f0ea3689SLuigi Rizzo 	error = hw_kring->nm_sync(hw_kring, flags);
2565847bf383SLuigi Rizzo 	if (error)
2566*37e3a6d3SLuigi Rizzo 		goto put_out;
256717885a7bSLuigi Rizzo 
2568847bf383SLuigi Rizzo 	/* third step: now we are back the rx ring */
256917885a7bSLuigi Rizzo 	/* claim ownership on all hw owned bufs */
2570847bf383SLuigi Rizzo 	kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
257117885a7bSLuigi Rizzo 
2572847bf383SLuigi Rizzo 	/* fourth step: the user goes to sleep again, causing another rxsync */
2573847bf383SLuigi Rizzo 	netmap_vp_rxsync(kring, flags);
257417885a7bSLuigi Rizzo 	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
25754bf50f18SLuigi Rizzo 		na->name, ring_n,
257617885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
257717885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
257817885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2579*37e3a6d3SLuigi Rizzo put_out:
2580847bf383SLuigi Rizzo 	nm_kr_put(hw_kring);
2581*37e3a6d3SLuigi Rizzo 
2582*37e3a6d3SLuigi Rizzo 	return error ? error : NM_IRQ_COMPLETED;
2583f9790aebSLuigi Rizzo }
2584f9790aebSLuigi Rizzo 
258517885a7bSLuigi Rizzo 
25864bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for the bwrap.
25874bf50f18SLuigi Rizzo  * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
25884bf50f18SLuigi Rizzo  * On attach, it needs to provide a fake netmap_priv_d structure and
25894bf50f18SLuigi Rizzo  * perform a netmap_do_regif() on the bwrap. This will put both the
25904bf50f18SLuigi Rizzo  * bwrap and the hwna in netmap mode, with the netmap rings shared
25914bf50f18SLuigi Rizzo  * and cross linked. Moroever, it will start intercepting interrupts
25924bf50f18SLuigi Rizzo  * directed to hwna.
25934bf50f18SLuigi Rizzo  */
2594f9790aebSLuigi Rizzo static int
25954bf50f18SLuigi Rizzo netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
25964bf50f18SLuigi Rizzo {
25974bf50f18SLuigi Rizzo 	struct netmap_priv_d *npriv;
25984bf50f18SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
25994bf50f18SLuigi Rizzo 	int error = 0;
26004bf50f18SLuigi Rizzo 
26014bf50f18SLuigi Rizzo 	if (attach) {
26024bf50f18SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(na)) {
26034bf50f18SLuigi Rizzo 			return EBUSY;
26044bf50f18SLuigi Rizzo 		}
26054bf50f18SLuigi Rizzo 		if (bna->na_kpriv) {
26064bf50f18SLuigi Rizzo 			/* nothing to do */
26074bf50f18SLuigi Rizzo 			return 0;
26084bf50f18SLuigi Rizzo 		}
2609*37e3a6d3SLuigi Rizzo 		npriv = netmap_priv_new();
26104bf50f18SLuigi Rizzo 		if (npriv == NULL)
26114bf50f18SLuigi Rizzo 			return ENOMEM;
2612*37e3a6d3SLuigi Rizzo 		npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
2613*37e3a6d3SLuigi Rizzo 		error = netmap_do_regif(npriv, na, 0, NR_REG_NIC_SW);
2614847bf383SLuigi Rizzo 		if (error) {
2615*37e3a6d3SLuigi Rizzo 			netmap_priv_delete(npriv);
26164bf50f18SLuigi Rizzo 			return error;
26174bf50f18SLuigi Rizzo 		}
26184bf50f18SLuigi Rizzo 		bna->na_kpriv = npriv;
26194bf50f18SLuigi Rizzo 		na->na_flags |= NAF_BUSY;
26204bf50f18SLuigi Rizzo 	} else {
26214bf50f18SLuigi Rizzo 		if (na->active_fds == 0) /* not registered */
26224bf50f18SLuigi Rizzo 			return EINVAL;
2623*37e3a6d3SLuigi Rizzo 		netmap_priv_delete(bna->na_kpriv);
26244bf50f18SLuigi Rizzo 		bna->na_kpriv = NULL;
26254bf50f18SLuigi Rizzo 		na->na_flags &= ~NAF_BUSY;
26264bf50f18SLuigi Rizzo 	}
26274bf50f18SLuigi Rizzo 	return error;
26284bf50f18SLuigi Rizzo 
26294bf50f18SLuigi Rizzo }
26304bf50f18SLuigi Rizzo 
26314bf50f18SLuigi Rizzo /* attach a bridge wrapper to the 'real' device */
26324bf50f18SLuigi Rizzo int
26334bf50f18SLuigi Rizzo netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
2634f9790aebSLuigi Rizzo {
2635f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
26364bf50f18SLuigi Rizzo 	struct netmap_adapter *na = NULL;
26374bf50f18SLuigi Rizzo 	struct netmap_adapter *hostna = NULL;
26384bf50f18SLuigi Rizzo 	int error = 0;
2639847bf383SLuigi Rizzo 	enum txrx t;
2640f9790aebSLuigi Rizzo 
26414bf50f18SLuigi Rizzo 	/* make sure the NIC is not already in use */
26424bf50f18SLuigi Rizzo 	if (NETMAP_OWNED_BY_ANY(hwna)) {
26434bf50f18SLuigi Rizzo 		D("NIC %s busy, cannot attach to bridge", hwna->name);
26444bf50f18SLuigi Rizzo 		return EBUSY;
26454bf50f18SLuigi Rizzo 	}
2646f9790aebSLuigi Rizzo 
2647f9790aebSLuigi Rizzo 	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
26484bf50f18SLuigi Rizzo 	if (bna == NULL) {
2649f9790aebSLuigi Rizzo 		return ENOMEM;
26504bf50f18SLuigi Rizzo 	}
2651f9790aebSLuigi Rizzo 
2652f9790aebSLuigi Rizzo 	na = &bna->up.up;
2653*37e3a6d3SLuigi Rizzo 	/* make bwrap ifp point to the real ifp */
2654*37e3a6d3SLuigi Rizzo 	na->ifp = hwna->ifp;
2655847bf383SLuigi Rizzo 	na->na_private = bna;
26564bf50f18SLuigi Rizzo 	strncpy(na->name, nr_name, sizeof(na->name));
2657f9790aebSLuigi Rizzo 	/* fill the ring data for the bwrap adapter with rx/tx meanings
2658f9790aebSLuigi Rizzo 	 * swapped. The real cross-linking will be done during register,
2659f9790aebSLuigi Rizzo 	 * when all the krings will have been created.
2660f9790aebSLuigi Rizzo 	 */
2661847bf383SLuigi Rizzo 	for_rx_tx(t) {
2662847bf383SLuigi Rizzo 		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
2663847bf383SLuigi Rizzo 		nma_set_nrings(na, t, nma_get_nrings(hwna, r));
2664847bf383SLuigi Rizzo 		nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
2665847bf383SLuigi Rizzo 	}
2666f9790aebSLuigi Rizzo 	na->nm_dtor = netmap_bwrap_dtor;
2667*37e3a6d3SLuigi Rizzo 	na->nm_register = netmap_bwrap_reg;
2668f9790aebSLuigi Rizzo 	// na->nm_txsync = netmap_bwrap_txsync;
2669f9790aebSLuigi Rizzo 	// na->nm_rxsync = netmap_bwrap_rxsync;
2670f9790aebSLuigi Rizzo 	na->nm_config = netmap_bwrap_config;
2671f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_bwrap_krings_create;
2672f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_bwrap_krings_delete;
2673f9790aebSLuigi Rizzo 	na->nm_notify = netmap_bwrap_notify;
26744bf50f18SLuigi Rizzo 	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
26754bf50f18SLuigi Rizzo 	na->pdev = hwna->pdev;
2676*37e3a6d3SLuigi Rizzo 	na->nm_mem = hwna->nm_mem;
2677*37e3a6d3SLuigi Rizzo 	na->virt_hdr_len = hwna->virt_hdr_len;
2678f9790aebSLuigi Rizzo 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2679f9790aebSLuigi Rizzo 
2680f9790aebSLuigi Rizzo 	bna->hwna = hwna;
2681f9790aebSLuigi Rizzo 	netmap_adapter_get(hwna);
2682f9790aebSLuigi Rizzo 	hwna->na_private = bna; /* weak reference */
26834bf50f18SLuigi Rizzo 	hwna->na_vp = &bna->up;
2684f9790aebSLuigi Rizzo 
2685f0ea3689SLuigi Rizzo 	if (hwna->na_flags & NAF_HOST_RINGS) {
26864bf50f18SLuigi Rizzo 		if (hwna->na_flags & NAF_SW_ONLY)
26874bf50f18SLuigi Rizzo 			na->na_flags |= NAF_SW_ONLY;
2688f0ea3689SLuigi Rizzo 		na->na_flags |= NAF_HOST_RINGS;
2689f9790aebSLuigi Rizzo 		hostna = &bna->host.up;
26904bf50f18SLuigi Rizzo 		snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
2691f9790aebSLuigi Rizzo 		hostna->ifp = hwna->ifp;
2692847bf383SLuigi Rizzo 		for_rx_tx(t) {
2693847bf383SLuigi Rizzo 			enum txrx r = nm_txrx_swap(t);
2694847bf383SLuigi Rizzo 			nma_set_nrings(hostna, t, 1);
2695847bf383SLuigi Rizzo 			nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
2696847bf383SLuigi Rizzo 		}
2697f9790aebSLuigi Rizzo 		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2698f9790aebSLuigi Rizzo 		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2699847bf383SLuigi Rizzo 		hostna->nm_notify = netmap_bwrap_notify;
2700f9790aebSLuigi Rizzo 		hostna->nm_mem = na->nm_mem;
2701f9790aebSLuigi Rizzo 		hostna->na_private = bna;
27024bf50f18SLuigi Rizzo 		hostna->na_vp = &bna->up;
27034bf50f18SLuigi Rizzo 		na->na_hostvp = hwna->na_hostvp =
27044bf50f18SLuigi Rizzo 			hostna->na_hostvp = &bna->host;
27054bf50f18SLuigi Rizzo 		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
2706f0ea3689SLuigi Rizzo 	}
2707f9790aebSLuigi Rizzo 
270817885a7bSLuigi Rizzo 	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
27094bf50f18SLuigi Rizzo 		na->name, ifp->if_xname,
2710f9790aebSLuigi Rizzo 		na->num_tx_rings, na->num_tx_desc,
2711f9790aebSLuigi Rizzo 		na->num_rx_rings, na->num_rx_desc);
2712f9790aebSLuigi Rizzo 
2713f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
2714f9790aebSLuigi Rizzo 	if (error) {
27154bf50f18SLuigi Rizzo 		goto err_free;
27164bf50f18SLuigi Rizzo 	}
27174bf50f18SLuigi Rizzo 	hwna->na_flags |= NAF_BUSY;
27184bf50f18SLuigi Rizzo 	return 0;
27194bf50f18SLuigi Rizzo 
27204bf50f18SLuigi Rizzo err_free:
27214bf50f18SLuigi Rizzo 	hwna->na_vp = hwna->na_hostvp = NULL;
2722f9790aebSLuigi Rizzo 	netmap_adapter_put(hwna);
2723f9790aebSLuigi Rizzo 	free(bna, M_DEVBUF);
2724f9790aebSLuigi Rizzo 	return error;
27254bf50f18SLuigi Rizzo 
2726f9790aebSLuigi Rizzo }
2727f9790aebSLuigi Rizzo 
2728847bf383SLuigi Rizzo struct nm_bridge *
2729847bf383SLuigi Rizzo netmap_init_bridges2(u_int n)
2730f9790aebSLuigi Rizzo {
2731f9790aebSLuigi Rizzo 	int i;
2732847bf383SLuigi Rizzo 	struct nm_bridge *b;
2733847bf383SLuigi Rizzo 
2734847bf383SLuigi Rizzo 	b = malloc(sizeof(struct nm_bridge) * n, M_DEVBUF,
2735847bf383SLuigi Rizzo 		M_NOWAIT | M_ZERO);
2736847bf383SLuigi Rizzo 	if (b == NULL)
2737847bf383SLuigi Rizzo 		return NULL;
2738847bf383SLuigi Rizzo 	for (i = 0; i < n; i++)
2739847bf383SLuigi Rizzo 		BDG_RWINIT(&b[i]);
2740847bf383SLuigi Rizzo 	return b;
2741847bf383SLuigi Rizzo }
2742847bf383SLuigi Rizzo 
2743847bf383SLuigi Rizzo void
2744847bf383SLuigi Rizzo netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
2745847bf383SLuigi Rizzo {
2746847bf383SLuigi Rizzo 	int i;
2747847bf383SLuigi Rizzo 
2748847bf383SLuigi Rizzo 	if (b == NULL)
2749847bf383SLuigi Rizzo 		return;
2750847bf383SLuigi Rizzo 
2751847bf383SLuigi Rizzo 	for (i = 0; i < n; i++)
2752847bf383SLuigi Rizzo 		BDG_RWDESTROY(&b[i]);
2753847bf383SLuigi Rizzo 	free(b, M_DEVBUF);
2754847bf383SLuigi Rizzo }
2755847bf383SLuigi Rizzo 
2756847bf383SLuigi Rizzo int
2757847bf383SLuigi Rizzo netmap_init_bridges(void)
2758847bf383SLuigi Rizzo {
2759847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
2760847bf383SLuigi Rizzo 	return netmap_bns_register();
2761847bf383SLuigi Rizzo #else
2762847bf383SLuigi Rizzo 	nm_bridges = netmap_init_bridges2(NM_BRIDGES);
2763847bf383SLuigi Rizzo 	if (nm_bridges == NULL)
2764847bf383SLuigi Rizzo 		return ENOMEM;
2765847bf383SLuigi Rizzo 	return 0;
2766847bf383SLuigi Rizzo #endif
2767847bf383SLuigi Rizzo }
2768847bf383SLuigi Rizzo 
2769847bf383SLuigi Rizzo void
2770847bf383SLuigi Rizzo netmap_uninit_bridges(void)
2771847bf383SLuigi Rizzo {
2772847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
2773847bf383SLuigi Rizzo 	netmap_bns_unregister();
2774847bf383SLuigi Rizzo #else
2775847bf383SLuigi Rizzo 	netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
2776847bf383SLuigi Rizzo #endif
2777f9790aebSLuigi Rizzo }
2778f9790aebSLuigi Rizzo #endif /* WITH_VALE */
2779