xref: /freebsd-14.2/sys/dev/netmap/netmap_vale.c (revision c3e9b4db)
1f9790aebSLuigi Rizzo /*
237e3a6d3SLuigi Rizzo  * Copyright (C) 2013-2016 Universita` di Pisa
337e3a6d3SLuigi Rizzo  * All rights reserved.
4f9790aebSLuigi Rizzo  *
5f9790aebSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
6f9790aebSLuigi Rizzo  * modification, are permitted provided that the following conditions
7f9790aebSLuigi Rizzo  * are met:
8f9790aebSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
9f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
10f9790aebSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
11f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
12f9790aebSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
13f9790aebSLuigi Rizzo  *
14f9790aebSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15f9790aebSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16f9790aebSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17f9790aebSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18f9790aebSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19f9790aebSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20f9790aebSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21f9790aebSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22f9790aebSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23f9790aebSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24f9790aebSLuigi Rizzo  * SUCH DAMAGE.
25f9790aebSLuigi Rizzo  */
26f9790aebSLuigi Rizzo 
27f9790aebSLuigi Rizzo 
28f9790aebSLuigi Rizzo /*
29f9790aebSLuigi Rizzo  * This module implements the VALE switch for netmap
30f9790aebSLuigi Rizzo 
31f9790aebSLuigi Rizzo --- VALE SWITCH ---
32f9790aebSLuigi Rizzo 
33f9790aebSLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
34f9790aebSLuigi Rizzo A switch cannot be deleted until all ports are gone.
35f9790aebSLuigi Rizzo 
36f9790aebSLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
37f9790aebSLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
38f9790aebSLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
39f9790aebSLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
40f9790aebSLuigi Rizzo The lock is held throughout the entire forwarding cycle,
41f9790aebSLuigi Rizzo during which the thread may incur in a page fault.
42f9790aebSLuigi Rizzo Hence it is important that sleepable shared locks are used.
43f9790aebSLuigi Rizzo 
44f9790aebSLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
45f9790aebSLuigi Rizzo a number of slot in the ring, then the lock is released,
46f9790aebSLuigi Rizzo packets are copied from source to destination, and then
47f9790aebSLuigi Rizzo the lock is acquired again and the receive ring is updated.
48f9790aebSLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
49f9790aebSLuigi Rizzo ports attached to the switch)
50f9790aebSLuigi Rizzo 
51f9790aebSLuigi Rizzo  */
52f9790aebSLuigi Rizzo 
53f9790aebSLuigi Rizzo /*
54f9790aebSLuigi Rizzo  * OS-specific code that is used only within this file.
55f9790aebSLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
56f9790aebSLuigi Rizzo  * is present in netmap_kern.h
57f9790aebSLuigi Rizzo  */
58f9790aebSLuigi Rizzo 
59f9790aebSLuigi Rizzo #if defined(__FreeBSD__)
60f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
61f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$");
62f9790aebSLuigi Rizzo 
63f9790aebSLuigi Rizzo #include <sys/types.h>
64f9790aebSLuigi Rizzo #include <sys/errno.h>
65f9790aebSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
66f9790aebSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
67f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
68f9790aebSLuigi Rizzo #include <sys/sockio.h>
69f9790aebSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
70f9790aebSLuigi Rizzo #include <sys/malloc.h>
71f9790aebSLuigi Rizzo #include <sys/poll.h>
72f9790aebSLuigi Rizzo #include <sys/rwlock.h>
73f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
74f9790aebSLuigi Rizzo #include <sys/selinfo.h>
75f9790aebSLuigi Rizzo #include <sys/sysctl.h>
76f9790aebSLuigi Rizzo #include <net/if.h>
77f9790aebSLuigi Rizzo #include <net/if_var.h>
78f9790aebSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
79f9790aebSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
80f9790aebSLuigi Rizzo #include <sys/endian.h>
81f9790aebSLuigi Rizzo #include <sys/refcount.h>
82f9790aebSLuigi Rizzo 
83f9790aebSLuigi Rizzo 
84f9790aebSLuigi Rizzo #define BDG_RWLOCK_T		struct rwlock // struct rwlock
85f9790aebSLuigi Rizzo 
86f9790aebSLuigi Rizzo #define	BDG_RWINIT(b)		\
87f9790aebSLuigi Rizzo 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
88f9790aebSLuigi Rizzo #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
89f9790aebSLuigi Rizzo #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
90f9790aebSLuigi Rizzo #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
91f9790aebSLuigi Rizzo #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
92f9790aebSLuigi Rizzo #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
93f9790aebSLuigi Rizzo #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
94f9790aebSLuigi Rizzo 
95f9790aebSLuigi Rizzo 
96f9790aebSLuigi Rizzo #elif defined(linux)
97f9790aebSLuigi Rizzo 
98f9790aebSLuigi Rizzo #include "bsd_glue.h"
99f9790aebSLuigi Rizzo 
100f9790aebSLuigi Rizzo #elif defined(__APPLE__)
101f9790aebSLuigi Rizzo 
102f9790aebSLuigi Rizzo #warning OSX support is only partial
103f9790aebSLuigi Rizzo #include "osx_glue.h"
104f9790aebSLuigi Rizzo 
10537e3a6d3SLuigi Rizzo #elif defined(_WIN32)
10637e3a6d3SLuigi Rizzo #include "win_glue.h"
10737e3a6d3SLuigi Rizzo 
108f9790aebSLuigi Rizzo #else
109f9790aebSLuigi Rizzo 
110f9790aebSLuigi Rizzo #error	Unsupported platform
111f9790aebSLuigi Rizzo 
112f9790aebSLuigi Rizzo #endif /* unsupported */
113f9790aebSLuigi Rizzo 
114f9790aebSLuigi Rizzo /*
115f9790aebSLuigi Rizzo  * common headers
116f9790aebSLuigi Rizzo  */
117f9790aebSLuigi Rizzo 
118f9790aebSLuigi Rizzo #include <net/netmap.h>
119f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h>
120f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
121f9790aebSLuigi Rizzo 
122f9790aebSLuigi Rizzo #ifdef WITH_VALE
123f9790aebSLuigi Rizzo 
124f9790aebSLuigi Rizzo /*
125f9790aebSLuigi Rizzo  * system parameters (most of them in netmap_kern.h)
12637e3a6d3SLuigi Rizzo  * NM_BDG_NAME	prefix for switch port names, default "vale"
127f9790aebSLuigi Rizzo  * NM_BDG_MAXPORTS	number of ports
128f9790aebSLuigi Rizzo  * NM_BRIDGES	max number of switches in the system.
129f9790aebSLuigi Rizzo  *	XXX should become a sysctl or tunable
130f9790aebSLuigi Rizzo  *
131f9790aebSLuigi Rizzo  * Switch ports are named valeX:Y where X is the switch name and Y
132f9790aebSLuigi Rizzo  * is the port. If Y matches a physical interface name, the port is
133f9790aebSLuigi Rizzo  * connected to a physical device.
134f9790aebSLuigi Rizzo  *
135f9790aebSLuigi Rizzo  * Unlike physical interfaces, switch ports use their own memory region
136f9790aebSLuigi Rizzo  * for rings and buffers.
137f9790aebSLuigi Rizzo  * The virtual interfaces use per-queue lock instead of core lock.
138f9790aebSLuigi Rizzo  * In the tx loop, we aggregate traffic in batches to make all operations
139f9790aebSLuigi Rizzo  * faster. The batch size is bridge_batch.
140f9790aebSLuigi Rizzo  */
141f9790aebSLuigi Rizzo #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
142f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
143f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
144f9790aebSLuigi Rizzo #define NM_BDG_HASH		1024	/* forwarding table entries */
145f9790aebSLuigi Rizzo #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
146f9790aebSLuigi Rizzo #define NM_MULTISEG		64	/* max size of a chain of bufs */
147f9790aebSLuigi Rizzo /* actual size of the tables */
148f9790aebSLuigi Rizzo #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
149f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */
150f9790aebSLuigi Rizzo #define NM_FT_NULL		NM_BDG_BATCH_MAX
151f9790aebSLuigi Rizzo 
152f9790aebSLuigi Rizzo 
153f9790aebSLuigi Rizzo /*
154f9790aebSLuigi Rizzo  * bridge_batch is set via sysctl to the max batch size to be
155f9790aebSLuigi Rizzo  * used in the bridge. The actual value may be larger as the
156f9790aebSLuigi Rizzo  * last packet in the block may overflow the size.
157f9790aebSLuigi Rizzo  */
15837e3a6d3SLuigi Rizzo static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
15937e3a6d3SLuigi Rizzo SYSBEGIN(vars_vale);
160f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap);
161f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
16237e3a6d3SLuigi Rizzo SYSEND;
163f9790aebSLuigi Rizzo 
164*c3e9b4dbSLuiz Otavio O Souza static int netmap_vp_create(struct nmreq *, struct ifnet *,
165*c3e9b4dbSLuiz Otavio O Souza 		struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
1664bf50f18SLuigi Rizzo static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
16737e3a6d3SLuigi Rizzo static int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
168f9790aebSLuigi Rizzo 
169f9790aebSLuigi Rizzo /*
170f9790aebSLuigi Rizzo  * For each output interface, nm_bdg_q is used to construct a list.
171f9790aebSLuigi Rizzo  * bq_len is the number of output buffers (we can have coalescing
172f9790aebSLuigi Rizzo  * during the copy).
173f9790aebSLuigi Rizzo  */
174f9790aebSLuigi Rizzo struct nm_bdg_q {
175f9790aebSLuigi Rizzo 	uint16_t bq_head;
176f9790aebSLuigi Rizzo 	uint16_t bq_tail;
177f9790aebSLuigi Rizzo 	uint32_t bq_len;	/* number of buffers */
178f9790aebSLuigi Rizzo };
179f9790aebSLuigi Rizzo 
180f9790aebSLuigi Rizzo /* XXX revise this */
181f9790aebSLuigi Rizzo struct nm_hash_ent {
182f9790aebSLuigi Rizzo 	uint64_t	mac;	/* the top 2 bytes are the epoch */
183f9790aebSLuigi Rizzo 	uint64_t	ports;
184f9790aebSLuigi Rizzo };
185f9790aebSLuigi Rizzo 
186f9790aebSLuigi Rizzo /*
187f9790aebSLuigi Rizzo  * nm_bridge is a descriptor for a VALE switch.
188f9790aebSLuigi Rizzo  * Interfaces for a bridge are all in bdg_ports[].
189f9790aebSLuigi Rizzo  * The array has fixed size, an empty entry does not terminate
190f9790aebSLuigi Rizzo  * the search, but lookups only occur on attach/detach so we
191f9790aebSLuigi Rizzo  * don't mind if they are slow.
192f9790aebSLuigi Rizzo  *
193f9790aebSLuigi Rizzo  * The bridge is non blocking on the transmit ports: excess
194f9790aebSLuigi Rizzo  * packets are dropped if there is no room on the output port.
195f9790aebSLuigi Rizzo  *
196f9790aebSLuigi Rizzo  * bdg_lock protects accesses to the bdg_ports array.
197f9790aebSLuigi Rizzo  * This is a rw lock (or equivalent).
198f9790aebSLuigi Rizzo  */
199f9790aebSLuigi Rizzo struct nm_bridge {
200f9790aebSLuigi Rizzo 	/* XXX what is the proper alignment/layout ? */
201f9790aebSLuigi Rizzo 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
202f9790aebSLuigi Rizzo 	int		bdg_namelen;
203f9790aebSLuigi Rizzo 	uint32_t	bdg_active_ports; /* 0 means free */
204f9790aebSLuigi Rizzo 	char		bdg_basename[IFNAMSIZ];
205f9790aebSLuigi Rizzo 
206f9790aebSLuigi Rizzo 	/* Indexes of active ports (up to active_ports)
207f9790aebSLuigi Rizzo 	 * and all other remaining ports.
208f9790aebSLuigi Rizzo 	 */
209f9790aebSLuigi Rizzo 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
210f9790aebSLuigi Rizzo 
211f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
212f9790aebSLuigi Rizzo 
213f9790aebSLuigi Rizzo 
214f9790aebSLuigi Rizzo 	/*
215f9790aebSLuigi Rizzo 	 * The function to decide the destination port.
216f9790aebSLuigi Rizzo 	 * It returns either of an index of the destination port,
217f9790aebSLuigi Rizzo 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
218f9790aebSLuigi Rizzo 	 * forward this packet.  ring_nr is the source ring index, and the
219f9790aebSLuigi Rizzo 	 * function may overwrite this value to forward this packet to a
220f9790aebSLuigi Rizzo 	 * different ring index.
22137e3a6d3SLuigi Rizzo 	 * This function must be set by netmap_bdg_ctl().
222f9790aebSLuigi Rizzo 	 */
2234bf50f18SLuigi Rizzo 	struct netmap_bdg_ops bdg_ops;
224f9790aebSLuigi Rizzo 
225f9790aebSLuigi Rizzo 	/* the forwarding table, MAC+ports.
226f9790aebSLuigi Rizzo 	 * XXX should be changed to an argument to be passed to
227f9790aebSLuigi Rizzo 	 * the lookup function, and allocated on attach
228f9790aebSLuigi Rizzo 	 */
229f9790aebSLuigi Rizzo 	struct nm_hash_ent ht[NM_BDG_HASH];
230847bf383SLuigi Rizzo 
231847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
232847bf383SLuigi Rizzo 	struct net *ns;
233847bf383SLuigi Rizzo #endif /* CONFIG_NET_NS */
234f9790aebSLuigi Rizzo };
235f9790aebSLuigi Rizzo 
2364bf50f18SLuigi Rizzo const char*
2374bf50f18SLuigi Rizzo netmap_bdg_name(struct netmap_vp_adapter *vp)
2384bf50f18SLuigi Rizzo {
2394bf50f18SLuigi Rizzo 	struct nm_bridge *b = vp->na_bdg;
2404bf50f18SLuigi Rizzo 	if (b == NULL)
2414bf50f18SLuigi Rizzo 		return NULL;
2424bf50f18SLuigi Rizzo 	return b->bdg_basename;
2434bf50f18SLuigi Rizzo }
2444bf50f18SLuigi Rizzo 
245f9790aebSLuigi Rizzo 
246847bf383SLuigi Rizzo #ifndef CONFIG_NET_NS
247f9790aebSLuigi Rizzo /*
248f9790aebSLuigi Rizzo  * XXX in principle nm_bridges could be created dynamically
249f9790aebSLuigi Rizzo  * Right now we have a static array and deletions are protected
250f9790aebSLuigi Rizzo  * by an exclusive lock.
251f9790aebSLuigi Rizzo  */
25237e3a6d3SLuigi Rizzo static struct nm_bridge *nm_bridges;
253847bf383SLuigi Rizzo #endif /* !CONFIG_NET_NS */
254f9790aebSLuigi Rizzo 
255f9790aebSLuigi Rizzo 
256f9790aebSLuigi Rizzo /*
257f9790aebSLuigi Rizzo  * this is a slightly optimized copy routine which rounds
258f9790aebSLuigi Rizzo  * to multiple of 64 bytes and is often faster than dealing
259f9790aebSLuigi Rizzo  * with other odd sizes. We assume there is enough room
260f9790aebSLuigi Rizzo  * in the source and destination buffers.
261f9790aebSLuigi Rizzo  *
262f9790aebSLuigi Rizzo  * XXX only for multiples of 64 bytes, non overlapped.
263f9790aebSLuigi Rizzo  */
264f9790aebSLuigi Rizzo static inline void
265f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l)
266f9790aebSLuigi Rizzo {
267f9790aebSLuigi Rizzo         uint64_t *src = _src;
268f9790aebSLuigi Rizzo         uint64_t *dst = _dst;
269f9790aebSLuigi Rizzo         if (unlikely(l >= 1024)) {
270f9790aebSLuigi Rizzo                 memcpy(dst, src, l);
271f9790aebSLuigi Rizzo                 return;
272f9790aebSLuigi Rizzo         }
273f9790aebSLuigi Rizzo         for (; likely(l > 0); l-=64) {
274f9790aebSLuigi Rizzo                 *dst++ = *src++;
275f9790aebSLuigi Rizzo                 *dst++ = *src++;
276f9790aebSLuigi Rizzo                 *dst++ = *src++;
277f9790aebSLuigi Rizzo                 *dst++ = *src++;
278f9790aebSLuigi Rizzo                 *dst++ = *src++;
279f9790aebSLuigi Rizzo                 *dst++ = *src++;
280f9790aebSLuigi Rizzo                 *dst++ = *src++;
281f9790aebSLuigi Rizzo                 *dst++ = *src++;
282f9790aebSLuigi Rizzo         }
283f9790aebSLuigi Rizzo }
284f9790aebSLuigi Rizzo 
285f9790aebSLuigi Rizzo 
28637e3a6d3SLuigi Rizzo static int
28737e3a6d3SLuigi Rizzo nm_is_id_char(const char c)
28837e3a6d3SLuigi Rizzo {
28937e3a6d3SLuigi Rizzo 	return (c >= 'a' && c <= 'z') ||
29037e3a6d3SLuigi Rizzo 	       (c >= 'A' && c <= 'Z') ||
29137e3a6d3SLuigi Rizzo 	       (c >= '0' && c <= '9') ||
29237e3a6d3SLuigi Rizzo 	       (c == '_');
29337e3a6d3SLuigi Rizzo }
29437e3a6d3SLuigi Rizzo 
29537e3a6d3SLuigi Rizzo /* Validate the name of a VALE bridge port and return the
29637e3a6d3SLuigi Rizzo  * position of the ":" character. */
29737e3a6d3SLuigi Rizzo static int
29837e3a6d3SLuigi Rizzo nm_vale_name_validate(const char *name)
29937e3a6d3SLuigi Rizzo {
30037e3a6d3SLuigi Rizzo 	int colon_pos = -1;
30137e3a6d3SLuigi Rizzo 	int i;
30237e3a6d3SLuigi Rizzo 
30337e3a6d3SLuigi Rizzo 	if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
30437e3a6d3SLuigi Rizzo 		return -1;
30537e3a6d3SLuigi Rizzo 	}
30637e3a6d3SLuigi Rizzo 
30737e3a6d3SLuigi Rizzo 	for (i = 0; name[i]; i++) {
30837e3a6d3SLuigi Rizzo 		if (name[i] == ':') {
30937e3a6d3SLuigi Rizzo 			if (colon_pos != -1) {
31037e3a6d3SLuigi Rizzo 				return -1;
31137e3a6d3SLuigi Rizzo 			}
31237e3a6d3SLuigi Rizzo 			colon_pos = i;
31337e3a6d3SLuigi Rizzo 		} else if (!nm_is_id_char(name[i])) {
31437e3a6d3SLuigi Rizzo 			return -1;
31537e3a6d3SLuigi Rizzo 		}
31637e3a6d3SLuigi Rizzo 	}
31737e3a6d3SLuigi Rizzo 
31837e3a6d3SLuigi Rizzo 	if (i >= IFNAMSIZ) {
31937e3a6d3SLuigi Rizzo 		return -1;
32037e3a6d3SLuigi Rizzo 	}
32137e3a6d3SLuigi Rizzo 
32237e3a6d3SLuigi Rizzo 	return colon_pos;
32337e3a6d3SLuigi Rizzo }
32437e3a6d3SLuigi Rizzo 
325f9790aebSLuigi Rizzo /*
326f9790aebSLuigi Rizzo  * locate a bridge among the existing ones.
327f9790aebSLuigi Rizzo  * MUST BE CALLED WITH NMG_LOCK()
328f9790aebSLuigi Rizzo  *
329f9790aebSLuigi Rizzo  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
330f9790aebSLuigi Rizzo  * We assume that this is called with a name of at least NM_NAME chars.
331f9790aebSLuigi Rizzo  */
332f9790aebSLuigi Rizzo static struct nm_bridge *
333f9790aebSLuigi Rizzo nm_find_bridge(const char *name, int create)
334f9790aebSLuigi Rizzo {
33537e3a6d3SLuigi Rizzo 	int i, namelen;
336847bf383SLuigi Rizzo 	struct nm_bridge *b = NULL, *bridges;
337847bf383SLuigi Rizzo 	u_int num_bridges;
338f9790aebSLuigi Rizzo 
339f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
340f9790aebSLuigi Rizzo 
341847bf383SLuigi Rizzo 	netmap_bns_getbridges(&bridges, &num_bridges);
342847bf383SLuigi Rizzo 
34337e3a6d3SLuigi Rizzo 	namelen = nm_vale_name_validate(name);
34437e3a6d3SLuigi Rizzo 	if (namelen < 0) {
345f9790aebSLuigi Rizzo 		D("invalid bridge name %s", name ? name : NULL);
346f9790aebSLuigi Rizzo 		return NULL;
347f9790aebSLuigi Rizzo 	}
348f9790aebSLuigi Rizzo 
349f9790aebSLuigi Rizzo 	/* lookup the name, remember empty slot if there is one */
350847bf383SLuigi Rizzo 	for (i = 0; i < num_bridges; i++) {
351847bf383SLuigi Rizzo 		struct nm_bridge *x = bridges + i;
352f9790aebSLuigi Rizzo 
353f9790aebSLuigi Rizzo 		if (x->bdg_active_ports == 0) {
354f9790aebSLuigi Rizzo 			if (create && b == NULL)
355f9790aebSLuigi Rizzo 				b = x;	/* record empty slot */
356f9790aebSLuigi Rizzo 		} else if (x->bdg_namelen != namelen) {
357f9790aebSLuigi Rizzo 			continue;
358f9790aebSLuigi Rizzo 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
359f9790aebSLuigi Rizzo 			ND("found '%.*s' at %d", namelen, name, i);
360f9790aebSLuigi Rizzo 			b = x;
361f9790aebSLuigi Rizzo 			break;
362f9790aebSLuigi Rizzo 		}
363f9790aebSLuigi Rizzo 	}
364847bf383SLuigi Rizzo 	if (i == num_bridges && b) { /* name not found, can create entry */
365f9790aebSLuigi Rizzo 		/* initialize the bridge */
366f9790aebSLuigi Rizzo 		strncpy(b->bdg_basename, name, namelen);
367f9790aebSLuigi Rizzo 		ND("create new bridge %s with ports %d", b->bdg_basename,
368f9790aebSLuigi Rizzo 			b->bdg_active_ports);
369f9790aebSLuigi Rizzo 		b->bdg_namelen = namelen;
370f9790aebSLuigi Rizzo 		b->bdg_active_ports = 0;
371f9790aebSLuigi Rizzo 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
372f9790aebSLuigi Rizzo 			b->bdg_port_index[i] = i;
373f9790aebSLuigi Rizzo 		/* set the default function */
3744bf50f18SLuigi Rizzo 		b->bdg_ops.lookup = netmap_bdg_learning;
375f9790aebSLuigi Rizzo 		/* reset the MAC address table */
376f9790aebSLuigi Rizzo 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
377847bf383SLuigi Rizzo 		NM_BNS_GET(b);
378f9790aebSLuigi Rizzo 	}
379f9790aebSLuigi Rizzo 	return b;
380f9790aebSLuigi Rizzo }
381f9790aebSLuigi Rizzo 
382f9790aebSLuigi Rizzo 
383f9790aebSLuigi Rizzo /*
384f9790aebSLuigi Rizzo  * Free the forwarding tables for rings attached to switch ports.
385f9790aebSLuigi Rizzo  */
386f9790aebSLuigi Rizzo static void
387f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na)
388f9790aebSLuigi Rizzo {
389f9790aebSLuigi Rizzo 	int nrings, i;
390f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
391f9790aebSLuigi Rizzo 
392f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
39317885a7bSLuigi Rizzo 	nrings = na->num_tx_rings;
39417885a7bSLuigi Rizzo 	kring = na->tx_rings;
395f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
396f9790aebSLuigi Rizzo 		if (kring[i].nkr_ft) {
397*c3e9b4dbSLuiz Otavio O Souza 			nm_os_free(kring[i].nkr_ft);
398f9790aebSLuigi Rizzo 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
399f9790aebSLuigi Rizzo 		}
400f9790aebSLuigi Rizzo 	}
401f9790aebSLuigi Rizzo }
402f9790aebSLuigi Rizzo 
403f9790aebSLuigi Rizzo 
404f9790aebSLuigi Rizzo /*
405f9790aebSLuigi Rizzo  * Allocate the forwarding tables for the rings attached to the bridge ports.
406f9790aebSLuigi Rizzo  */
407f9790aebSLuigi Rizzo static int
408f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na)
409f9790aebSLuigi Rizzo {
410f9790aebSLuigi Rizzo 	int nrings, l, i, num_dstq;
411f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
412f9790aebSLuigi Rizzo 
413f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
414f9790aebSLuigi Rizzo 	/* all port:rings + broadcast */
415f9790aebSLuigi Rizzo 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
416f9790aebSLuigi Rizzo 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
417f9790aebSLuigi Rizzo 	l += sizeof(struct nm_bdg_q) * num_dstq;
418f9790aebSLuigi Rizzo 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
419f9790aebSLuigi Rizzo 
420847bf383SLuigi Rizzo 	nrings = netmap_real_rings(na, NR_TX);
421f9790aebSLuigi Rizzo 	kring = na->tx_rings;
422f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
423f9790aebSLuigi Rizzo 		struct nm_bdg_fwd *ft;
424f9790aebSLuigi Rizzo 		struct nm_bdg_q *dstq;
425f9790aebSLuigi Rizzo 		int j;
426f9790aebSLuigi Rizzo 
427*c3e9b4dbSLuiz Otavio O Souza 		ft = nm_os_malloc(l);
428f9790aebSLuigi Rizzo 		if (!ft) {
429f9790aebSLuigi Rizzo 			nm_free_bdgfwd(na);
430f9790aebSLuigi Rizzo 			return ENOMEM;
431f9790aebSLuigi Rizzo 		}
432f9790aebSLuigi Rizzo 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
433f9790aebSLuigi Rizzo 		for (j = 0; j < num_dstq; j++) {
434f9790aebSLuigi Rizzo 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
435f9790aebSLuigi Rizzo 			dstq[j].bq_len = 0;
436f9790aebSLuigi Rizzo 		}
437f9790aebSLuigi Rizzo 		kring[i].nkr_ft = ft;
438f9790aebSLuigi Rizzo 	}
439f9790aebSLuigi Rizzo 	return 0;
440f9790aebSLuigi Rizzo }
441f9790aebSLuigi Rizzo 
442f9790aebSLuigi Rizzo 
4434bf50f18SLuigi Rizzo /* remove from bridge b the ports in slots hw and sw
4444bf50f18SLuigi Rizzo  * (sw can be -1 if not needed)
4454bf50f18SLuigi Rizzo  */
446f9790aebSLuigi Rizzo static void
447f9790aebSLuigi Rizzo netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
448f9790aebSLuigi Rizzo {
449f9790aebSLuigi Rizzo 	int s_hw = hw, s_sw = sw;
450f9790aebSLuigi Rizzo 	int i, lim =b->bdg_active_ports;
451f9790aebSLuigi Rizzo 	uint8_t tmp[NM_BDG_MAXPORTS];
452f9790aebSLuigi Rizzo 
453f9790aebSLuigi Rizzo 	/*
454f9790aebSLuigi Rizzo 	New algorithm:
455f9790aebSLuigi Rizzo 	make a copy of bdg_port_index;
456f9790aebSLuigi Rizzo 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
457f9790aebSLuigi Rizzo 	in the array of bdg_port_index, replacing them with
458f9790aebSLuigi Rizzo 	entries from the bottom of the array;
459f9790aebSLuigi Rizzo 	decrement bdg_active_ports;
460f9790aebSLuigi Rizzo 	acquire BDG_WLOCK() and copy back the array.
461f9790aebSLuigi Rizzo 	 */
462f9790aebSLuigi Rizzo 
463f0ea3689SLuigi Rizzo 	if (netmap_verbose)
464f9790aebSLuigi Rizzo 		D("detach %d and %d (lim %d)", hw, sw, lim);
465f9790aebSLuigi Rizzo 	/* make a copy of the list of active ports, update it,
466f9790aebSLuigi Rizzo 	 * and then copy back within BDG_WLOCK().
467f9790aebSLuigi Rizzo 	 */
468f9790aebSLuigi Rizzo 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
469f9790aebSLuigi Rizzo 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
470f9790aebSLuigi Rizzo 		if (hw >= 0 && tmp[i] == hw) {
471f9790aebSLuigi Rizzo 			ND("detach hw %d at %d", hw, i);
472f9790aebSLuigi Rizzo 			lim--; /* point to last active port */
473f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim]; /* swap with i */
474f9790aebSLuigi Rizzo 			tmp[lim] = hw;	/* now this is inactive */
475f9790aebSLuigi Rizzo 			hw = -1;
476f9790aebSLuigi Rizzo 		} else if (sw >= 0 && tmp[i] == sw) {
477f9790aebSLuigi Rizzo 			ND("detach sw %d at %d", sw, i);
478f9790aebSLuigi Rizzo 			lim--;
479f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim];
480f9790aebSLuigi Rizzo 			tmp[lim] = sw;
481f9790aebSLuigi Rizzo 			sw = -1;
482f9790aebSLuigi Rizzo 		} else {
483f9790aebSLuigi Rizzo 			i++;
484f9790aebSLuigi Rizzo 		}
485f9790aebSLuigi Rizzo 	}
486f9790aebSLuigi Rizzo 	if (hw >= 0 || sw >= 0) {
487f9790aebSLuigi Rizzo 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
488f9790aebSLuigi Rizzo 	}
489f9790aebSLuigi Rizzo 
490f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
4914bf50f18SLuigi Rizzo 	if (b->bdg_ops.dtor)
4924bf50f18SLuigi Rizzo 		b->bdg_ops.dtor(b->bdg_ports[s_hw]);
493f9790aebSLuigi Rizzo 	b->bdg_ports[s_hw] = NULL;
494f9790aebSLuigi Rizzo 	if (s_sw >= 0) {
495f9790aebSLuigi Rizzo 		b->bdg_ports[s_sw] = NULL;
496f9790aebSLuigi Rizzo 	}
497f9790aebSLuigi Rizzo 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
498f9790aebSLuigi Rizzo 	b->bdg_active_ports = lim;
499f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
500f9790aebSLuigi Rizzo 
501f9790aebSLuigi Rizzo 	ND("now %d active ports", lim);
502f9790aebSLuigi Rizzo 	if (lim == 0) {
503f9790aebSLuigi Rizzo 		ND("marking bridge %s as free", b->bdg_basename);
5044bf50f18SLuigi Rizzo 		bzero(&b->bdg_ops, sizeof(b->bdg_ops));
505847bf383SLuigi Rizzo 		NM_BNS_PUT(b);
506f9790aebSLuigi Rizzo 	}
507f9790aebSLuigi Rizzo }
508f9790aebSLuigi Rizzo 
5094bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for VALE ports */
5104bf50f18SLuigi Rizzo static int
5114bf50f18SLuigi Rizzo netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
512f9790aebSLuigi Rizzo {
513f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
514f9790aebSLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
515f9790aebSLuigi Rizzo 
51637e3a6d3SLuigi Rizzo 	(void)nmr;	// XXX merge ?
5174bf50f18SLuigi Rizzo 	if (attach)
5184bf50f18SLuigi Rizzo 		return 0; /* nothing to do */
5194bf50f18SLuigi Rizzo 	if (b) {
5204bf50f18SLuigi Rizzo 		netmap_set_all_rings(na, 0 /* disable */);
5214bf50f18SLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
5224bf50f18SLuigi Rizzo 		vpna->na_bdg = NULL;
5234bf50f18SLuigi Rizzo 		netmap_set_all_rings(na, 1 /* enable */);
5244bf50f18SLuigi Rizzo 	}
5254bf50f18SLuigi Rizzo 	/* I have took reference just for attach */
5264bf50f18SLuigi Rizzo 	netmap_adapter_put(na);
5274bf50f18SLuigi Rizzo 	return 0;
5284bf50f18SLuigi Rizzo }
5294bf50f18SLuigi Rizzo 
5304bf50f18SLuigi Rizzo /* nm_dtor callback for ephemeral VALE ports */
5314bf50f18SLuigi Rizzo static void
5324bf50f18SLuigi Rizzo netmap_vp_dtor(struct netmap_adapter *na)
5334bf50f18SLuigi Rizzo {
5344bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
5354bf50f18SLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
5364bf50f18SLuigi Rizzo 
5374bf50f18SLuigi Rizzo 	ND("%s has %d references", na->name, na->na_refcount);
538f9790aebSLuigi Rizzo 
539f9790aebSLuigi Rizzo 	if (b) {
540f9790aebSLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
541f9790aebSLuigi Rizzo 	}
542*c3e9b4dbSLuiz Otavio O Souza 
543*c3e9b4dbSLuiz Otavio O Souza 	if (vpna->autodelete && na->ifp != NULL) {
544*c3e9b4dbSLuiz Otavio O Souza 		ND("releasing %s", na->ifp->if_xname);
545*c3e9b4dbSLuiz Otavio O Souza 		NMG_UNLOCK();
546*c3e9b4dbSLuiz Otavio O Souza 		nm_os_vi_detach(na->ifp);
547*c3e9b4dbSLuiz Otavio O Souza 		NMG_LOCK();
548*c3e9b4dbSLuiz Otavio O Souza 	}
549f9790aebSLuigi Rizzo }
550f9790aebSLuigi Rizzo 
5514bf50f18SLuigi Rizzo /* remove a persistent VALE port from the system */
5524bf50f18SLuigi Rizzo static int
5534bf50f18SLuigi Rizzo nm_vi_destroy(const char *name)
5544bf50f18SLuigi Rizzo {
5554bf50f18SLuigi Rizzo 	struct ifnet *ifp;
556*c3e9b4dbSLuiz Otavio O Souza 	struct netmap_vp_adapter *vpna;
5574bf50f18SLuigi Rizzo 	int error;
5584bf50f18SLuigi Rizzo 
5594bf50f18SLuigi Rizzo 	ifp = ifunit_ref(name);
5604bf50f18SLuigi Rizzo 	if (!ifp)
5614bf50f18SLuigi Rizzo 		return ENXIO;
5624bf50f18SLuigi Rizzo 	NMG_LOCK();
5634bf50f18SLuigi Rizzo 	/* make sure this is actually a VALE port */
56437e3a6d3SLuigi Rizzo 	if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
5654bf50f18SLuigi Rizzo 		error = EINVAL;
5664bf50f18SLuigi Rizzo 		goto err;
5674bf50f18SLuigi Rizzo 	}
5684bf50f18SLuigi Rizzo 
569*c3e9b4dbSLuiz Otavio O Souza 	vpna = (struct netmap_vp_adapter *)NA(ifp);
570*c3e9b4dbSLuiz Otavio O Souza 
571*c3e9b4dbSLuiz Otavio O Souza 	/* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
572*c3e9b4dbSLuiz Otavio O Souza 	if (vpna->autodelete) {
573*c3e9b4dbSLuiz Otavio O Souza 		error = EINVAL;
574*c3e9b4dbSLuiz Otavio O Souza 		goto err;
575*c3e9b4dbSLuiz Otavio O Souza 	}
576*c3e9b4dbSLuiz Otavio O Souza 
577*c3e9b4dbSLuiz Otavio O Souza 	/* also make sure that nobody is using the inferface */
578*c3e9b4dbSLuiz Otavio O Souza 	if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
579*c3e9b4dbSLuiz Otavio O Souza 	    vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
5804bf50f18SLuigi Rizzo 		error = EBUSY;
5814bf50f18SLuigi Rizzo 		goto err;
5824bf50f18SLuigi Rizzo 	}
583*c3e9b4dbSLuiz Otavio O Souza 
5844bf50f18SLuigi Rizzo 	NMG_UNLOCK();
5854bf50f18SLuigi Rizzo 
5864bf50f18SLuigi Rizzo 	D("destroying a persistent vale interface %s", ifp->if_xname);
5874bf50f18SLuigi Rizzo 	/* Linux requires all the references are released
5884bf50f18SLuigi Rizzo 	 * before unregister
5894bf50f18SLuigi Rizzo 	 */
5904bf50f18SLuigi Rizzo 	netmap_detach(ifp);
591*c3e9b4dbSLuiz Otavio O Souza 	if_rele(ifp);
59237e3a6d3SLuigi Rizzo 	nm_os_vi_detach(ifp);
5934bf50f18SLuigi Rizzo 	return 0;
5944bf50f18SLuigi Rizzo 
5954bf50f18SLuigi Rizzo err:
5964bf50f18SLuigi Rizzo 	NMG_UNLOCK();
5974bf50f18SLuigi Rizzo 	if_rele(ifp);
5984bf50f18SLuigi Rizzo 	return error;
5994bf50f18SLuigi Rizzo }
6004bf50f18SLuigi Rizzo 
601*c3e9b4dbSLuiz Otavio O Souza static int
602*c3e9b4dbSLuiz Otavio O Souza nm_update_info(struct nmreq *nmr, struct netmap_adapter *na)
603*c3e9b4dbSLuiz Otavio O Souza {
604*c3e9b4dbSLuiz Otavio O Souza 	nmr->nr_rx_rings = na->num_rx_rings;
605*c3e9b4dbSLuiz Otavio O Souza 	nmr->nr_tx_rings = na->num_tx_rings;
606*c3e9b4dbSLuiz Otavio O Souza 	nmr->nr_rx_slots = na->num_rx_desc;
607*c3e9b4dbSLuiz Otavio O Souza 	nmr->nr_tx_slots = na->num_tx_desc;
608*c3e9b4dbSLuiz Otavio O Souza 	return netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, NULL, &nmr->nr_arg2);
609*c3e9b4dbSLuiz Otavio O Souza }
610*c3e9b4dbSLuiz Otavio O Souza 
6114bf50f18SLuigi Rizzo /*
6124bf50f18SLuigi Rizzo  * Create a virtual interface registered to the system.
6134bf50f18SLuigi Rizzo  * The interface will be attached to a bridge later.
6144bf50f18SLuigi Rizzo  */
615*c3e9b4dbSLuiz Otavio O Souza int
616*c3e9b4dbSLuiz Otavio O Souza netmap_vi_create(struct nmreq *nmr, int autodelete)
6174bf50f18SLuigi Rizzo {
6184bf50f18SLuigi Rizzo 	struct ifnet *ifp;
6194bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna;
620*c3e9b4dbSLuiz Otavio O Souza 	struct netmap_mem_d *nmd = NULL;
6214bf50f18SLuigi Rizzo 	int error;
6224bf50f18SLuigi Rizzo 
6234bf50f18SLuigi Rizzo 	/* don't include VALE prefix */
62437e3a6d3SLuigi Rizzo 	if (!strncmp(nmr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
6254bf50f18SLuigi Rizzo 		return EINVAL;
6264bf50f18SLuigi Rizzo 	ifp = ifunit_ref(nmr->nr_name);
6274bf50f18SLuigi Rizzo 	if (ifp) { /* already exist, cannot create new one */
628*c3e9b4dbSLuiz Otavio O Souza 		error = EEXIST;
629*c3e9b4dbSLuiz Otavio O Souza 		NMG_LOCK();
630*c3e9b4dbSLuiz Otavio O Souza 		if (NM_NA_VALID(ifp)) {
631*c3e9b4dbSLuiz Otavio O Souza 			int update_err = nm_update_info(nmr, NA(ifp));
632*c3e9b4dbSLuiz Otavio O Souza 			if (update_err)
633*c3e9b4dbSLuiz Otavio O Souza 				error = update_err;
634*c3e9b4dbSLuiz Otavio O Souza 		}
635*c3e9b4dbSLuiz Otavio O Souza 		NMG_UNLOCK();
6364bf50f18SLuigi Rizzo 		if_rele(ifp);
637*c3e9b4dbSLuiz Otavio O Souza 		return error;
6384bf50f18SLuigi Rizzo 	}
63937e3a6d3SLuigi Rizzo 	error = nm_os_vi_persist(nmr->nr_name, &ifp);
6404bf50f18SLuigi Rizzo 	if (error)
6414bf50f18SLuigi Rizzo 		return error;
6424bf50f18SLuigi Rizzo 
6434bf50f18SLuigi Rizzo 	NMG_LOCK();
644*c3e9b4dbSLuiz Otavio O Souza 	if (nmr->nr_arg2) {
645*c3e9b4dbSLuiz Otavio O Souza 		nmd = netmap_mem_find(nmr->nr_arg2);
646*c3e9b4dbSLuiz Otavio O Souza 		if (nmd == NULL) {
647*c3e9b4dbSLuiz Otavio O Souza 			error = EINVAL;
648*c3e9b4dbSLuiz Otavio O Souza 			goto err_1;
649*c3e9b4dbSLuiz Otavio O Souza 		}
650*c3e9b4dbSLuiz Otavio O Souza 	}
6514bf50f18SLuigi Rizzo 	/* netmap_vp_create creates a struct netmap_vp_adapter */
652*c3e9b4dbSLuiz Otavio O Souza 	error = netmap_vp_create(nmr, ifp, nmd, &vpna);
6534bf50f18SLuigi Rizzo 	if (error) {
6544bf50f18SLuigi Rizzo 		D("error %d", error);
655*c3e9b4dbSLuiz Otavio O Souza 		goto err_1;
6564bf50f18SLuigi Rizzo 	}
6574bf50f18SLuigi Rizzo 	/* persist-specific routines */
6584bf50f18SLuigi Rizzo 	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
659*c3e9b4dbSLuiz Otavio O Souza 	if (!autodelete) {
6604bf50f18SLuigi Rizzo 		netmap_adapter_get(&vpna->up);
661*c3e9b4dbSLuiz Otavio O Souza 	} else {
662*c3e9b4dbSLuiz Otavio O Souza 		vpna->autodelete = 1;
663*c3e9b4dbSLuiz Otavio O Souza 	}
66437e3a6d3SLuigi Rizzo 	NM_ATTACH_NA(ifp, &vpna->up);
665*c3e9b4dbSLuiz Otavio O Souza 	/* return the updated info */
666*c3e9b4dbSLuiz Otavio O Souza 	error = nm_update_info(nmr, &vpna->up);
667*c3e9b4dbSLuiz Otavio O Souza 	if (error) {
668*c3e9b4dbSLuiz Otavio O Souza 		goto err_2;
669*c3e9b4dbSLuiz Otavio O Souza 	}
670*c3e9b4dbSLuiz Otavio O Souza 	D("returning nr_arg2 %d", nmr->nr_arg2);
671*c3e9b4dbSLuiz Otavio O Souza 	if (nmd)
672*c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_put(nmd);
6734bf50f18SLuigi Rizzo 	NMG_UNLOCK();
6744bf50f18SLuigi Rizzo 	D("created %s", ifp->if_xname);
6754bf50f18SLuigi Rizzo 	return 0;
676*c3e9b4dbSLuiz Otavio O Souza 
677*c3e9b4dbSLuiz Otavio O Souza err_2:
678*c3e9b4dbSLuiz Otavio O Souza 	netmap_detach(ifp);
679*c3e9b4dbSLuiz Otavio O Souza err_1:
680*c3e9b4dbSLuiz Otavio O Souza 	if (nmd)
681*c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_put(nmd);
682*c3e9b4dbSLuiz Otavio O Souza 	NMG_UNLOCK();
683*c3e9b4dbSLuiz Otavio O Souza 	nm_os_vi_detach(ifp);
684*c3e9b4dbSLuiz Otavio O Souza 
685*c3e9b4dbSLuiz Otavio O Souza 	return error;
6864bf50f18SLuigi Rizzo }
68717885a7bSLuigi Rizzo 
68817885a7bSLuigi Rizzo /* Try to get a reference to a netmap adapter attached to a VALE switch.
68917885a7bSLuigi Rizzo  * If the adapter is found (or is created), this function returns 0, a
69017885a7bSLuigi Rizzo  * non NULL pointer is returned into *na, and the caller holds a
69117885a7bSLuigi Rizzo  * reference to the adapter.
69217885a7bSLuigi Rizzo  * If an adapter is not found, then no reference is grabbed and the
69317885a7bSLuigi Rizzo  * function returns an error code, or 0 if there is just a VALE prefix
69417885a7bSLuigi Rizzo  * mismatch. Therefore the caller holds a reference when
69517885a7bSLuigi Rizzo  * (*na != NULL && return == 0).
69617885a7bSLuigi Rizzo  */
697f9790aebSLuigi Rizzo int
698*c3e9b4dbSLuiz Otavio O Souza netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
699*c3e9b4dbSLuiz Otavio O Souza 		struct netmap_mem_d *nmd, int create)
700f9790aebSLuigi Rizzo {
7014bf50f18SLuigi Rizzo 	char *nr_name = nmr->nr_name;
7024bf50f18SLuigi Rizzo 	const char *ifname;
703*c3e9b4dbSLuiz Otavio O Souza 	struct ifnet *ifp = NULL;
704f9790aebSLuigi Rizzo 	int error = 0;
7054bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna, *hostna = NULL;
706f9790aebSLuigi Rizzo 	struct nm_bridge *b;
707f9790aebSLuigi Rizzo 	int i, j, cand = -1, cand2 = -1;
708f9790aebSLuigi Rizzo 	int needed;
709f9790aebSLuigi Rizzo 
710f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
711f9790aebSLuigi Rizzo 
712f9790aebSLuigi Rizzo 	/* first try to see if this is a bridge port. */
713f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
71437e3a6d3SLuigi Rizzo 	if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) {
715f9790aebSLuigi Rizzo 		return 0;  /* no error, but no VALE prefix */
716f9790aebSLuigi Rizzo 	}
717f9790aebSLuigi Rizzo 
7184bf50f18SLuigi Rizzo 	b = nm_find_bridge(nr_name, create);
719f9790aebSLuigi Rizzo 	if (b == NULL) {
7204bf50f18SLuigi Rizzo 		D("no bridges available for '%s'", nr_name);
721f2637526SLuigi Rizzo 		return (create ? ENOMEM : ENXIO);
722f9790aebSLuigi Rizzo 	}
7234bf50f18SLuigi Rizzo 	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
7244bf50f18SLuigi Rizzo 		panic("x");
725f9790aebSLuigi Rizzo 
726f9790aebSLuigi Rizzo 	/* Now we are sure that name starts with the bridge's name,
727f9790aebSLuigi Rizzo 	 * lookup the port in the bridge. We need to scan the entire
728f9790aebSLuigi Rizzo 	 * list. It is not important to hold a WLOCK on the bridge
729f9790aebSLuigi Rizzo 	 * during the search because NMG_LOCK already guarantees
730f9790aebSLuigi Rizzo 	 * that there are no other possible writers.
731f9790aebSLuigi Rizzo 	 */
732f9790aebSLuigi Rizzo 
733f9790aebSLuigi Rizzo 	/* lookup in the local list of ports */
734f9790aebSLuigi Rizzo 	for (j = 0; j < b->bdg_active_ports; j++) {
735f9790aebSLuigi Rizzo 		i = b->bdg_port_index[j];
736f9790aebSLuigi Rizzo 		vpna = b->bdg_ports[i];
737f9790aebSLuigi Rizzo 		// KASSERT(na != NULL);
738847bf383SLuigi Rizzo 		ND("checking %s", vpna->up.name);
7394bf50f18SLuigi Rizzo 		if (!strcmp(vpna->up.name, nr_name)) {
740f9790aebSLuigi Rizzo 			netmap_adapter_get(&vpna->up);
7414bf50f18SLuigi Rizzo 			ND("found existing if %s refs %d", nr_name)
7424bf50f18SLuigi Rizzo 			*na = &vpna->up;
743f9790aebSLuigi Rizzo 			return 0;
744f9790aebSLuigi Rizzo 		}
745f9790aebSLuigi Rizzo 	}
746f9790aebSLuigi Rizzo 	/* not found, should we create it? */
747f9790aebSLuigi Rizzo 	if (!create)
748f9790aebSLuigi Rizzo 		return ENXIO;
749f9790aebSLuigi Rizzo 	/* yes we should, see if we have space to attach entries */
750f9790aebSLuigi Rizzo 	needed = 2; /* in some cases we only need 1 */
751f9790aebSLuigi Rizzo 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
752f9790aebSLuigi Rizzo 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
753f2637526SLuigi Rizzo 		return ENOMEM;
754f9790aebSLuigi Rizzo 	}
755f9790aebSLuigi Rizzo 	/* record the next two ports available, but do not allocate yet */
756f9790aebSLuigi Rizzo 	cand = b->bdg_port_index[b->bdg_active_ports];
757f9790aebSLuigi Rizzo 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
758f9790aebSLuigi Rizzo 	ND("+++ bridge %s port %s used %d avail %d %d",
7594bf50f18SLuigi Rizzo 		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
760f9790aebSLuigi Rizzo 
761f9790aebSLuigi Rizzo 	/*
762f9790aebSLuigi Rizzo 	 * try see if there is a matching NIC with this name
763f9790aebSLuigi Rizzo 	 * (after the bridge's name)
764f9790aebSLuigi Rizzo 	 */
7654bf50f18SLuigi Rizzo 	ifname = nr_name + b->bdg_namelen + 1;
7664bf50f18SLuigi Rizzo 	ifp = ifunit_ref(ifname);
7674bf50f18SLuigi Rizzo 	if (!ifp) {
7684bf50f18SLuigi Rizzo 		/* Create an ephemeral virtual port
7694bf50f18SLuigi Rizzo 		 * This block contains all the ephemeral-specific logics
7704bf50f18SLuigi Rizzo 		 */
771f9790aebSLuigi Rizzo 		if (nmr->nr_cmd) {
772f9790aebSLuigi Rizzo 			/* nr_cmd must be 0 for a virtual port */
773*c3e9b4dbSLuiz Otavio O Souza 			error = EINVAL;
774*c3e9b4dbSLuiz Otavio O Souza 			goto out;
775f9790aebSLuigi Rizzo 		}
776f9790aebSLuigi Rizzo 
777f9790aebSLuigi Rizzo 		/* bdg_netmap_attach creates a struct netmap_adapter */
778*c3e9b4dbSLuiz Otavio O Souza 		error = netmap_vp_create(nmr, NULL, nmd, &vpna);
779f9790aebSLuigi Rizzo 		if (error) {
780f9790aebSLuigi Rizzo 			D("error %d", error);
781*c3e9b4dbSLuiz Otavio O Souza 			goto out;
782f9790aebSLuigi Rizzo 		}
7834bf50f18SLuigi Rizzo 		/* shortcut - we can skip get_hw_na(),
7844bf50f18SLuigi Rizzo 		 * ownership check and nm_bdg_attach()
7854bf50f18SLuigi Rizzo 		 */
7864bf50f18SLuigi Rizzo 	} else {
7874bf50f18SLuigi Rizzo 		struct netmap_adapter *hw;
788f9790aebSLuigi Rizzo 
789*c3e9b4dbSLuiz Otavio O Souza 		error = netmap_get_hw_na(ifp, nmd, &hw);
7904bf50f18SLuigi Rizzo 		if (error || hw == NULL)
791f9790aebSLuigi Rizzo 			goto out;
792f9790aebSLuigi Rizzo 
7934bf50f18SLuigi Rizzo 		/* host adapter might not be created */
7944bf50f18SLuigi Rizzo 		error = hw->nm_bdg_attach(nr_name, hw);
7954bf50f18SLuigi Rizzo 		if (error)
796f9790aebSLuigi Rizzo 			goto out;
7974bf50f18SLuigi Rizzo 		vpna = hw->na_vp;
7984bf50f18SLuigi Rizzo 		hostna = hw->na_hostvp;
7994bf50f18SLuigi Rizzo 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
8004bf50f18SLuigi Rizzo 			hostna = NULL;
801f9790aebSLuigi Rizzo 	}
802f9790aebSLuigi Rizzo 
803f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
804f9790aebSLuigi Rizzo 	vpna->bdg_port = cand;
805f9790aebSLuigi Rizzo 	ND("NIC  %p to bridge port %d", vpna, cand);
806f9790aebSLuigi Rizzo 	/* bind the port to the bridge (virtual ports are not active) */
807f9790aebSLuigi Rizzo 	b->bdg_ports[cand] = vpna;
808f9790aebSLuigi Rizzo 	vpna->na_bdg = b;
809f9790aebSLuigi Rizzo 	b->bdg_active_ports++;
8104bf50f18SLuigi Rizzo 	if (hostna != NULL) {
811f9790aebSLuigi Rizzo 		/* also bind the host stack to the bridge */
812f9790aebSLuigi Rizzo 		b->bdg_ports[cand2] = hostna;
813f9790aebSLuigi Rizzo 		hostna->bdg_port = cand2;
814f9790aebSLuigi Rizzo 		hostna->na_bdg = b;
815f9790aebSLuigi Rizzo 		b->bdg_active_ports++;
816f9790aebSLuigi Rizzo 		ND("host %p to bridge port %d", hostna, cand2);
817f9790aebSLuigi Rizzo 	}
8184bf50f18SLuigi Rizzo 	ND("if %s refs %d", ifname, vpna->up.na_refcount);
819f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
8204bf50f18SLuigi Rizzo 	*na = &vpna->up;
8214bf50f18SLuigi Rizzo 	netmap_adapter_get(*na);
822f9790aebSLuigi Rizzo 
823f9790aebSLuigi Rizzo out:
824*c3e9b4dbSLuiz Otavio O Souza 	if (ifp)
825f9790aebSLuigi Rizzo 		if_rele(ifp);
826f9790aebSLuigi Rizzo 
827f9790aebSLuigi Rizzo 	return error;
828f9790aebSLuigi Rizzo }
829f9790aebSLuigi Rizzo 
830f9790aebSLuigi Rizzo 
8314bf50f18SLuigi Rizzo /* Process NETMAP_BDG_ATTACH */
832f9790aebSLuigi Rizzo static int
8334bf50f18SLuigi Rizzo nm_bdg_ctl_attach(struct nmreq *nmr)
834f9790aebSLuigi Rizzo {
835f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
836*c3e9b4dbSLuiz Otavio O Souza 	struct netmap_mem_d *nmd = NULL;
837f9790aebSLuigi Rizzo 	int error;
838f9790aebSLuigi Rizzo 
839f9790aebSLuigi Rizzo 	NMG_LOCK();
840f2637526SLuigi Rizzo 
841*c3e9b4dbSLuiz Otavio O Souza 	if (nmr->nr_arg2) {
842*c3e9b4dbSLuiz Otavio O Souza 		nmd = netmap_mem_find(nmr->nr_arg2);
843*c3e9b4dbSLuiz Otavio O Souza 		if (nmd == NULL) {
844*c3e9b4dbSLuiz Otavio O Souza 			error = EINVAL;
845*c3e9b4dbSLuiz Otavio O Souza 			goto unlock_exit;
846*c3e9b4dbSLuiz Otavio O Souza 		}
847*c3e9b4dbSLuiz Otavio O Souza 	}
848*c3e9b4dbSLuiz Otavio O Souza 
849*c3e9b4dbSLuiz Otavio O Souza 	error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */);
8504bf50f18SLuigi Rizzo 	if (error) /* no device */
851f9790aebSLuigi Rizzo 		goto unlock_exit;
852f2637526SLuigi Rizzo 
85317885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
854f9790aebSLuigi Rizzo 		error = EINVAL;
85517885a7bSLuigi Rizzo 		goto unlock_exit;
856f9790aebSLuigi Rizzo 	}
857f9790aebSLuigi Rizzo 
8584bf50f18SLuigi Rizzo 	if (NETMAP_OWNED_BY_ANY(na)) {
859f9790aebSLuigi Rizzo 		error = EBUSY;
860f9790aebSLuigi Rizzo 		goto unref_exit;
861f9790aebSLuigi Rizzo 	}
862f9790aebSLuigi Rizzo 
8634bf50f18SLuigi Rizzo 	if (na->nm_bdg_ctl) {
8644bf50f18SLuigi Rizzo 		/* nop for VALE ports. The bwrap needs to put the hwna
8654bf50f18SLuigi Rizzo 		 * in netmap mode (see netmap_bwrap_bdg_ctl)
8664bf50f18SLuigi Rizzo 		 */
8674bf50f18SLuigi Rizzo 		error = na->nm_bdg_ctl(na, nmr, 1);
8684bf50f18SLuigi Rizzo 		if (error)
869f9790aebSLuigi Rizzo 			goto unref_exit;
8704bf50f18SLuigi Rizzo 		ND("registered %s to netmap-mode", na->name);
871f9790aebSLuigi Rizzo 	}
872f9790aebSLuigi Rizzo 	NMG_UNLOCK();
873f9790aebSLuigi Rizzo 	return 0;
874f9790aebSLuigi Rizzo 
875f9790aebSLuigi Rizzo unref_exit:
876f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
877f9790aebSLuigi Rizzo unlock_exit:
878f9790aebSLuigi Rizzo 	NMG_UNLOCK();
879f9790aebSLuigi Rizzo 	return error;
880f9790aebSLuigi Rizzo }
881f9790aebSLuigi Rizzo 
88237e3a6d3SLuigi Rizzo static inline int
88337e3a6d3SLuigi Rizzo nm_is_bwrap(struct netmap_adapter *na)
88437e3a6d3SLuigi Rizzo {
88537e3a6d3SLuigi Rizzo 	return na->nm_register == netmap_bwrap_reg;
88637e3a6d3SLuigi Rizzo }
88717885a7bSLuigi Rizzo 
8884bf50f18SLuigi Rizzo /* process NETMAP_BDG_DETACH */
889f9790aebSLuigi Rizzo static int
8904bf50f18SLuigi Rizzo nm_bdg_ctl_detach(struct nmreq *nmr)
891f9790aebSLuigi Rizzo {
892f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
893f9790aebSLuigi Rizzo 	int error;
894f9790aebSLuigi Rizzo 
895f9790aebSLuigi Rizzo 	NMG_LOCK();
896*c3e9b4dbSLuiz Otavio O Souza 	error = netmap_get_bdg_na(nmr, &na, NULL, 0 /* don't create */);
897f9790aebSLuigi Rizzo 	if (error) { /* no device, or another bridge or user owns the device */
898f9790aebSLuigi Rizzo 		goto unlock_exit;
899f9790aebSLuigi Rizzo 	}
900f2637526SLuigi Rizzo 
90117885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
902f9790aebSLuigi Rizzo 		error = EINVAL;
90317885a7bSLuigi Rizzo 		goto unlock_exit;
90437e3a6d3SLuigi Rizzo 	} else if (nm_is_bwrap(na) &&
90537e3a6d3SLuigi Rizzo 		   ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
90637e3a6d3SLuigi Rizzo 		/* Don't detach a NIC with polling */
90737e3a6d3SLuigi Rizzo 		error = EBUSY;
90837e3a6d3SLuigi Rizzo 		netmap_adapter_put(na);
90937e3a6d3SLuigi Rizzo 		goto unlock_exit;
910f9790aebSLuigi Rizzo 	}
9114bf50f18SLuigi Rizzo 	if (na->nm_bdg_ctl) {
9124bf50f18SLuigi Rizzo 		/* remove the port from bridge. The bwrap
9134bf50f18SLuigi Rizzo 		 * also needs to put the hwna in normal mode
9144bf50f18SLuigi Rizzo 		 */
9154bf50f18SLuigi Rizzo 		error = na->nm_bdg_ctl(na, nmr, 0);
916f9790aebSLuigi Rizzo 	}
917f9790aebSLuigi Rizzo 
918f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
919f9790aebSLuigi Rizzo unlock_exit:
920f9790aebSLuigi Rizzo 	NMG_UNLOCK();
921f9790aebSLuigi Rizzo 	return error;
922f9790aebSLuigi Rizzo 
923f9790aebSLuigi Rizzo }
924f9790aebSLuigi Rizzo 
92537e3a6d3SLuigi Rizzo struct nm_bdg_polling_state;
92637e3a6d3SLuigi Rizzo struct
92737e3a6d3SLuigi Rizzo nm_bdg_kthread {
928*c3e9b4dbSLuiz Otavio O Souza 	struct nm_kctx *nmk;
92937e3a6d3SLuigi Rizzo 	u_int qfirst;
93037e3a6d3SLuigi Rizzo 	u_int qlast;
93137e3a6d3SLuigi Rizzo 	struct nm_bdg_polling_state *bps;
93237e3a6d3SLuigi Rizzo };
93337e3a6d3SLuigi Rizzo 
93437e3a6d3SLuigi Rizzo struct nm_bdg_polling_state {
93537e3a6d3SLuigi Rizzo 	bool configured;
93637e3a6d3SLuigi Rizzo 	bool stopped;
93737e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
93837e3a6d3SLuigi Rizzo 	u_int reg;
93937e3a6d3SLuigi Rizzo 	u_int qfirst;
94037e3a6d3SLuigi Rizzo 	u_int qlast;
94137e3a6d3SLuigi Rizzo 	u_int cpu_from;
94237e3a6d3SLuigi Rizzo 	u_int ncpus;
94337e3a6d3SLuigi Rizzo 	struct nm_bdg_kthread *kthreads;
94437e3a6d3SLuigi Rizzo };
94537e3a6d3SLuigi Rizzo 
94637e3a6d3SLuigi Rizzo static void
947*c3e9b4dbSLuiz Otavio O Souza netmap_bwrap_polling(void *data, int is_kthread)
94837e3a6d3SLuigi Rizzo {
94937e3a6d3SLuigi Rizzo 	struct nm_bdg_kthread *nbk = data;
95037e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
95137e3a6d3SLuigi Rizzo 	u_int qfirst, qlast, i;
95237e3a6d3SLuigi Rizzo 	struct netmap_kring *kring0, *kring;
95337e3a6d3SLuigi Rizzo 
95437e3a6d3SLuigi Rizzo 	if (!nbk)
95537e3a6d3SLuigi Rizzo 		return;
95637e3a6d3SLuigi Rizzo 	qfirst = nbk->qfirst;
95737e3a6d3SLuigi Rizzo 	qlast = nbk->qlast;
95837e3a6d3SLuigi Rizzo 	bna = nbk->bps->bna;
95937e3a6d3SLuigi Rizzo 	kring0 = NMR(bna->hwna, NR_RX);
96037e3a6d3SLuigi Rizzo 
96137e3a6d3SLuigi Rizzo 	for (i = qfirst; i < qlast; i++) {
96237e3a6d3SLuigi Rizzo 		kring = kring0 + i;
96337e3a6d3SLuigi Rizzo 		kring->nm_notify(kring, 0);
96437e3a6d3SLuigi Rizzo 	}
96537e3a6d3SLuigi Rizzo }
96637e3a6d3SLuigi Rizzo 
96737e3a6d3SLuigi Rizzo static int
96837e3a6d3SLuigi Rizzo nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
96937e3a6d3SLuigi Rizzo {
970*c3e9b4dbSLuiz Otavio O Souza 	struct nm_kctx_cfg kcfg;
97137e3a6d3SLuigi Rizzo 	int i, j;
97237e3a6d3SLuigi Rizzo 
973*c3e9b4dbSLuiz Otavio O Souza 	bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
97437e3a6d3SLuigi Rizzo 	if (bps->kthreads == NULL)
97537e3a6d3SLuigi Rizzo 		return ENOMEM;
97637e3a6d3SLuigi Rizzo 
97737e3a6d3SLuigi Rizzo 	bzero(&kcfg, sizeof(kcfg));
97837e3a6d3SLuigi Rizzo 	kcfg.worker_fn = netmap_bwrap_polling;
979*c3e9b4dbSLuiz Otavio O Souza 	kcfg.use_kthread = 1;
98037e3a6d3SLuigi Rizzo 	for (i = 0; i < bps->ncpus; i++) {
98137e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
98237e3a6d3SLuigi Rizzo 		int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC);
98337e3a6d3SLuigi Rizzo 		int affinity = bps->cpu_from + i;
98437e3a6d3SLuigi Rizzo 
98537e3a6d3SLuigi Rizzo 		t->bps = bps;
98637e3a6d3SLuigi Rizzo 		t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
98737e3a6d3SLuigi Rizzo 		t->qlast = all ? bps->qlast : t->qfirst + 1;
98837e3a6d3SLuigi Rizzo 		D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
98937e3a6d3SLuigi Rizzo 			t->qlast);
99037e3a6d3SLuigi Rizzo 
99137e3a6d3SLuigi Rizzo 		kcfg.type = i;
99237e3a6d3SLuigi Rizzo 		kcfg.worker_private = t;
993*c3e9b4dbSLuiz Otavio O Souza 		t->nmk = nm_os_kctx_create(&kcfg, 0, NULL);
99437e3a6d3SLuigi Rizzo 		if (t->nmk == NULL) {
99537e3a6d3SLuigi Rizzo 			goto cleanup;
99637e3a6d3SLuigi Rizzo 		}
997*c3e9b4dbSLuiz Otavio O Souza 		nm_os_kctx_worker_setaff(t->nmk, affinity);
99837e3a6d3SLuigi Rizzo 	}
99937e3a6d3SLuigi Rizzo 	return 0;
100037e3a6d3SLuigi Rizzo 
100137e3a6d3SLuigi Rizzo cleanup:
100237e3a6d3SLuigi Rizzo 	for (j = 0; j < i; j++) {
100337e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
1004*c3e9b4dbSLuiz Otavio O Souza 		nm_os_kctx_destroy(t->nmk);
100537e3a6d3SLuigi Rizzo 	}
1006*c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(bps->kthreads);
100737e3a6d3SLuigi Rizzo 	return EFAULT;
100837e3a6d3SLuigi Rizzo }
100937e3a6d3SLuigi Rizzo 
1010*c3e9b4dbSLuiz Otavio O Souza /* A variant of ptnetmap_start_kthreads() */
101137e3a6d3SLuigi Rizzo static int
101237e3a6d3SLuigi Rizzo nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
101337e3a6d3SLuigi Rizzo {
101437e3a6d3SLuigi Rizzo 	int error, i, j;
101537e3a6d3SLuigi Rizzo 
101637e3a6d3SLuigi Rizzo 	if (!bps) {
101737e3a6d3SLuigi Rizzo 		D("polling is not configured");
101837e3a6d3SLuigi Rizzo 		return EFAULT;
101937e3a6d3SLuigi Rizzo 	}
102037e3a6d3SLuigi Rizzo 	bps->stopped = false;
102137e3a6d3SLuigi Rizzo 
102237e3a6d3SLuigi Rizzo 	for (i = 0; i < bps->ncpus; i++) {
102337e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
1024*c3e9b4dbSLuiz Otavio O Souza 		error = nm_os_kctx_worker_start(t->nmk);
102537e3a6d3SLuigi Rizzo 		if (error) {
102637e3a6d3SLuigi Rizzo 			D("error in nm_kthread_start()");
102737e3a6d3SLuigi Rizzo 			goto cleanup;
102837e3a6d3SLuigi Rizzo 		}
102937e3a6d3SLuigi Rizzo 	}
103037e3a6d3SLuigi Rizzo 	return 0;
103137e3a6d3SLuigi Rizzo 
103237e3a6d3SLuigi Rizzo cleanup:
103337e3a6d3SLuigi Rizzo 	for (j = 0; j < i; j++) {
103437e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
1035*c3e9b4dbSLuiz Otavio O Souza 		nm_os_kctx_worker_stop(t->nmk);
103637e3a6d3SLuigi Rizzo 	}
103737e3a6d3SLuigi Rizzo 	bps->stopped = true;
103837e3a6d3SLuigi Rizzo 	return error;
103937e3a6d3SLuigi Rizzo }
104037e3a6d3SLuigi Rizzo 
104137e3a6d3SLuigi Rizzo static void
104237e3a6d3SLuigi Rizzo nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
104337e3a6d3SLuigi Rizzo {
104437e3a6d3SLuigi Rizzo 	int i;
104537e3a6d3SLuigi Rizzo 
104637e3a6d3SLuigi Rizzo 	if (!bps)
104737e3a6d3SLuigi Rizzo 		return;
104837e3a6d3SLuigi Rizzo 
104937e3a6d3SLuigi Rizzo 	for (i = 0; i < bps->ncpus; i++) {
105037e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
1051*c3e9b4dbSLuiz Otavio O Souza 		nm_os_kctx_worker_stop(t->nmk);
1052*c3e9b4dbSLuiz Otavio O Souza 		nm_os_kctx_destroy(t->nmk);
105337e3a6d3SLuigi Rizzo 	}
105437e3a6d3SLuigi Rizzo 	bps->stopped = true;
105537e3a6d3SLuigi Rizzo }
105637e3a6d3SLuigi Rizzo 
105737e3a6d3SLuigi Rizzo static int
105837e3a6d3SLuigi Rizzo get_polling_cfg(struct nmreq *nmr, struct netmap_adapter *na,
105937e3a6d3SLuigi Rizzo 			struct nm_bdg_polling_state *bps)
106037e3a6d3SLuigi Rizzo {
106137e3a6d3SLuigi Rizzo 	int req_cpus, avail_cpus, core_from;
106237e3a6d3SLuigi Rizzo 	u_int reg, i, qfirst, qlast;
106337e3a6d3SLuigi Rizzo 
106437e3a6d3SLuigi Rizzo 	avail_cpus = nm_os_ncpus();
106537e3a6d3SLuigi Rizzo 	req_cpus = nmr->nr_arg1;
106637e3a6d3SLuigi Rizzo 
106737e3a6d3SLuigi Rizzo 	if (req_cpus == 0) {
106837e3a6d3SLuigi Rizzo 		D("req_cpus must be > 0");
106937e3a6d3SLuigi Rizzo 		return EINVAL;
107037e3a6d3SLuigi Rizzo 	} else if (req_cpus >= avail_cpus) {
107137e3a6d3SLuigi Rizzo 		D("for safety, we need at least one core left in the system");
107237e3a6d3SLuigi Rizzo 		return EINVAL;
107337e3a6d3SLuigi Rizzo 	}
107437e3a6d3SLuigi Rizzo 	reg = nmr->nr_flags & NR_REG_MASK;
107537e3a6d3SLuigi Rizzo 	i = nmr->nr_ringid & NETMAP_RING_MASK;
107637e3a6d3SLuigi Rizzo 	/*
107737e3a6d3SLuigi Rizzo 	 * ONE_NIC: dedicate one core to one ring. If multiple cores
107837e3a6d3SLuigi Rizzo 	 *          are specified, consecutive rings are also polled.
107937e3a6d3SLuigi Rizzo 	 *          For example, if ringid=2 and 2 cores are given,
108037e3a6d3SLuigi Rizzo 	 *          ring 2 and 3 are polled by core 2 and 3, respectively.
108137e3a6d3SLuigi Rizzo 	 * ALL_NIC: poll all the rings using a core specified by ringid.
108237e3a6d3SLuigi Rizzo 	 *          the number of cores must be 1.
108337e3a6d3SLuigi Rizzo 	 */
108437e3a6d3SLuigi Rizzo 	if (reg == NR_REG_ONE_NIC) {
108537e3a6d3SLuigi Rizzo 		if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
108637e3a6d3SLuigi Rizzo 			D("only %d rings exist (ring %u-%u is given)",
108737e3a6d3SLuigi Rizzo 				nma_get_nrings(na, NR_RX), i, i+req_cpus);
108837e3a6d3SLuigi Rizzo 			return EINVAL;
108937e3a6d3SLuigi Rizzo 		}
109037e3a6d3SLuigi Rizzo 		qfirst = i;
109137e3a6d3SLuigi Rizzo 		qlast = qfirst + req_cpus;
109237e3a6d3SLuigi Rizzo 		core_from = qfirst;
109337e3a6d3SLuigi Rizzo 	} else if (reg == NR_REG_ALL_NIC) {
109437e3a6d3SLuigi Rizzo 		if (req_cpus != 1) {
109537e3a6d3SLuigi Rizzo 			D("ncpus must be 1 not %d for REG_ALL_NIC", req_cpus);
109637e3a6d3SLuigi Rizzo 			return EINVAL;
109737e3a6d3SLuigi Rizzo 		}
109837e3a6d3SLuigi Rizzo 		qfirst = 0;
109937e3a6d3SLuigi Rizzo 		qlast = nma_get_nrings(na, NR_RX);
110037e3a6d3SLuigi Rizzo 		core_from = i;
110137e3a6d3SLuigi Rizzo 	} else {
110237e3a6d3SLuigi Rizzo 		D("reg must be ALL_NIC or ONE_NIC");
110337e3a6d3SLuigi Rizzo 		return EINVAL;
110437e3a6d3SLuigi Rizzo 	}
110537e3a6d3SLuigi Rizzo 
110637e3a6d3SLuigi Rizzo 	bps->reg = reg;
110737e3a6d3SLuigi Rizzo 	bps->qfirst = qfirst;
110837e3a6d3SLuigi Rizzo 	bps->qlast = qlast;
110937e3a6d3SLuigi Rizzo 	bps->cpu_from = core_from;
111037e3a6d3SLuigi Rizzo 	bps->ncpus = req_cpus;
111137e3a6d3SLuigi Rizzo 	D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
111237e3a6d3SLuigi Rizzo 		reg == NR_REG_ALL_NIC ? "REG_ALL_NIC" : "REG_ONE_NIC",
111337e3a6d3SLuigi Rizzo 		qfirst, qlast, core_from, req_cpus);
111437e3a6d3SLuigi Rizzo 	return 0;
111537e3a6d3SLuigi Rizzo }
111637e3a6d3SLuigi Rizzo 
111737e3a6d3SLuigi Rizzo static int
111837e3a6d3SLuigi Rizzo nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
111937e3a6d3SLuigi Rizzo {
112037e3a6d3SLuigi Rizzo 	struct nm_bdg_polling_state *bps;
112137e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
112237e3a6d3SLuigi Rizzo 	int error;
112337e3a6d3SLuigi Rizzo 
112437e3a6d3SLuigi Rizzo 	bna = (struct netmap_bwrap_adapter *)na;
112537e3a6d3SLuigi Rizzo 	if (bna->na_polling_state) {
112637e3a6d3SLuigi Rizzo 		D("ERROR adapter already in polling mode");
112737e3a6d3SLuigi Rizzo 		return EFAULT;
112837e3a6d3SLuigi Rizzo 	}
112937e3a6d3SLuigi Rizzo 
1130*c3e9b4dbSLuiz Otavio O Souza 	bps = nm_os_malloc(sizeof(*bps));
113137e3a6d3SLuigi Rizzo 	if (!bps)
113237e3a6d3SLuigi Rizzo 		return ENOMEM;
113337e3a6d3SLuigi Rizzo 	bps->configured = false;
113437e3a6d3SLuigi Rizzo 	bps->stopped = true;
113537e3a6d3SLuigi Rizzo 
113637e3a6d3SLuigi Rizzo 	if (get_polling_cfg(nmr, na, bps)) {
1137*c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(bps);
113837e3a6d3SLuigi Rizzo 		return EINVAL;
113937e3a6d3SLuigi Rizzo 	}
114037e3a6d3SLuigi Rizzo 
114137e3a6d3SLuigi Rizzo 	if (nm_bdg_create_kthreads(bps)) {
1142*c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(bps);
114337e3a6d3SLuigi Rizzo 		return EFAULT;
114437e3a6d3SLuigi Rizzo 	}
114537e3a6d3SLuigi Rizzo 
114637e3a6d3SLuigi Rizzo 	bps->configured = true;
114737e3a6d3SLuigi Rizzo 	bna->na_polling_state = bps;
114837e3a6d3SLuigi Rizzo 	bps->bna = bna;
114937e3a6d3SLuigi Rizzo 
115037e3a6d3SLuigi Rizzo 	/* disable interrupt if possible */
115137e3a6d3SLuigi Rizzo 	if (bna->hwna->nm_intr)
115237e3a6d3SLuigi Rizzo 		bna->hwna->nm_intr(bna->hwna, 0);
115337e3a6d3SLuigi Rizzo 	/* start kthread now */
115437e3a6d3SLuigi Rizzo 	error = nm_bdg_polling_start_kthreads(bps);
115537e3a6d3SLuigi Rizzo 	if (error) {
115637e3a6d3SLuigi Rizzo 		D("ERROR nm_bdg_polling_start_kthread()");
1157*c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(bps->kthreads);
1158*c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(bps);
115937e3a6d3SLuigi Rizzo 		bna->na_polling_state = NULL;
116037e3a6d3SLuigi Rizzo 		if (bna->hwna->nm_intr)
116137e3a6d3SLuigi Rizzo 			bna->hwna->nm_intr(bna->hwna, 1);
116237e3a6d3SLuigi Rizzo 	}
116337e3a6d3SLuigi Rizzo 	return error;
116437e3a6d3SLuigi Rizzo }
116537e3a6d3SLuigi Rizzo 
116637e3a6d3SLuigi Rizzo static int
116737e3a6d3SLuigi Rizzo nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na)
116837e3a6d3SLuigi Rizzo {
116937e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
117037e3a6d3SLuigi Rizzo 	struct nm_bdg_polling_state *bps;
117137e3a6d3SLuigi Rizzo 
117237e3a6d3SLuigi Rizzo 	if (!bna->na_polling_state) {
117337e3a6d3SLuigi Rizzo 		D("ERROR adapter is not in polling mode");
117437e3a6d3SLuigi Rizzo 		return EFAULT;
117537e3a6d3SLuigi Rizzo 	}
117637e3a6d3SLuigi Rizzo 	bps = bna->na_polling_state;
117737e3a6d3SLuigi Rizzo 	nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
117837e3a6d3SLuigi Rizzo 	bps->configured = false;
1179*c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(bps);
118037e3a6d3SLuigi Rizzo 	bna->na_polling_state = NULL;
118137e3a6d3SLuigi Rizzo 	/* reenable interrupt */
118237e3a6d3SLuigi Rizzo 	if (bna->hwna->nm_intr)
118337e3a6d3SLuigi Rizzo 		bna->hwna->nm_intr(bna->hwna, 1);
118437e3a6d3SLuigi Rizzo 	return 0;
118537e3a6d3SLuigi Rizzo }
1186f9790aebSLuigi Rizzo 
11874bf50f18SLuigi Rizzo /* Called by either user's context (netmap_ioctl())
11884bf50f18SLuigi Rizzo  * or external kernel modules (e.g., Openvswitch).
11894bf50f18SLuigi Rizzo  * Operation is indicated in nmr->nr_cmd.
11904bf50f18SLuigi Rizzo  * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
11914bf50f18SLuigi Rizzo  * requires bdg_ops argument; the other commands ignore this argument.
11924bf50f18SLuigi Rizzo  *
1193f9790aebSLuigi Rizzo  * Called without NMG_LOCK.
1194f9790aebSLuigi Rizzo  */
1195f9790aebSLuigi Rizzo int
11964bf50f18SLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
1197f9790aebSLuigi Rizzo {
1198847bf383SLuigi Rizzo 	struct nm_bridge *b, *bridges;
1199f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
1200f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
1201f9790aebSLuigi Rizzo 	char *name = nmr->nr_name;
1202f9790aebSLuigi Rizzo 	int cmd = nmr->nr_cmd, namelen = strlen(name);
1203f9790aebSLuigi Rizzo 	int error = 0, i, j;
1204847bf383SLuigi Rizzo 	u_int num_bridges;
1205847bf383SLuigi Rizzo 
1206847bf383SLuigi Rizzo 	netmap_bns_getbridges(&bridges, &num_bridges);
1207f9790aebSLuigi Rizzo 
1208f9790aebSLuigi Rizzo 	switch (cmd) {
12094bf50f18SLuigi Rizzo 	case NETMAP_BDG_NEWIF:
1210*c3e9b4dbSLuiz Otavio O Souza 		error = netmap_vi_create(nmr, 0 /* no autodelete */);
12114bf50f18SLuigi Rizzo 		break;
12124bf50f18SLuigi Rizzo 
12134bf50f18SLuigi Rizzo 	case NETMAP_BDG_DELIF:
12144bf50f18SLuigi Rizzo 		error = nm_vi_destroy(nmr->nr_name);
12154bf50f18SLuigi Rizzo 		break;
12164bf50f18SLuigi Rizzo 
1217f9790aebSLuigi Rizzo 	case NETMAP_BDG_ATTACH:
12184bf50f18SLuigi Rizzo 		error = nm_bdg_ctl_attach(nmr);
1219f9790aebSLuigi Rizzo 		break;
1220f9790aebSLuigi Rizzo 
1221f9790aebSLuigi Rizzo 	case NETMAP_BDG_DETACH:
12224bf50f18SLuigi Rizzo 		error = nm_bdg_ctl_detach(nmr);
1223f9790aebSLuigi Rizzo 		break;
1224f9790aebSLuigi Rizzo 
1225f9790aebSLuigi Rizzo 	case NETMAP_BDG_LIST:
1226f9790aebSLuigi Rizzo 		/* this is used to enumerate bridges and ports */
1227f9790aebSLuigi Rizzo 		if (namelen) { /* look up indexes of bridge and port */
122837e3a6d3SLuigi Rizzo 			if (strncmp(name, NM_BDG_NAME, strlen(NM_BDG_NAME))) {
1229f9790aebSLuigi Rizzo 				error = EINVAL;
1230f9790aebSLuigi Rizzo 				break;
1231f9790aebSLuigi Rizzo 			}
1232f9790aebSLuigi Rizzo 			NMG_LOCK();
1233f9790aebSLuigi Rizzo 			b = nm_find_bridge(name, 0 /* don't create */);
1234f9790aebSLuigi Rizzo 			if (!b) {
1235f9790aebSLuigi Rizzo 				error = ENOENT;
1236f9790aebSLuigi Rizzo 				NMG_UNLOCK();
1237f9790aebSLuigi Rizzo 				break;
1238f9790aebSLuigi Rizzo 			}
1239f9790aebSLuigi Rizzo 
124037e3a6d3SLuigi Rizzo 			error = 0;
124137e3a6d3SLuigi Rizzo 			nmr->nr_arg1 = b - bridges; /* bridge index */
124237e3a6d3SLuigi Rizzo 			nmr->nr_arg2 = NM_BDG_NOPORT;
1243f9790aebSLuigi Rizzo 			for (j = 0; j < b->bdg_active_ports; j++) {
1244f9790aebSLuigi Rizzo 				i = b->bdg_port_index[j];
1245f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[i];
1246f9790aebSLuigi Rizzo 				if (vpna == NULL) {
1247f9790aebSLuigi Rizzo 					D("---AAAAAAAAARGH-------");
1248f9790aebSLuigi Rizzo 					continue;
1249f9790aebSLuigi Rizzo 				}
1250f9790aebSLuigi Rizzo 				/* the former and the latter identify a
1251f9790aebSLuigi Rizzo 				 * virtual port and a NIC, respectively
1252f9790aebSLuigi Rizzo 				 */
12534bf50f18SLuigi Rizzo 				if (!strcmp(vpna->up.name, name)) {
1254f9790aebSLuigi Rizzo 					nmr->nr_arg2 = i; /* port index */
1255f9790aebSLuigi Rizzo 					break;
1256f9790aebSLuigi Rizzo 				}
1257f9790aebSLuigi Rizzo 			}
1258f9790aebSLuigi Rizzo 			NMG_UNLOCK();
1259f9790aebSLuigi Rizzo 		} else {
1260f9790aebSLuigi Rizzo 			/* return the first non-empty entry starting from
1261f9790aebSLuigi Rizzo 			 * bridge nr_arg1 and port nr_arg2.
1262f9790aebSLuigi Rizzo 			 *
1263f9790aebSLuigi Rizzo 			 * Users can detect the end of the same bridge by
1264f9790aebSLuigi Rizzo 			 * seeing the new and old value of nr_arg1, and can
1265f9790aebSLuigi Rizzo 			 * detect the end of all the bridge by error != 0
1266f9790aebSLuigi Rizzo 			 */
1267f9790aebSLuigi Rizzo 			i = nmr->nr_arg1;
1268f9790aebSLuigi Rizzo 			j = nmr->nr_arg2;
1269f9790aebSLuigi Rizzo 
1270f9790aebSLuigi Rizzo 			NMG_LOCK();
1271f9790aebSLuigi Rizzo 			for (error = ENOENT; i < NM_BRIDGES; i++) {
1272847bf383SLuigi Rizzo 				b = bridges + i;
1273*c3e9b4dbSLuiz Otavio O Souza 				for ( ; j < NM_BDG_MAXPORTS; j++) {
1274*c3e9b4dbSLuiz Otavio O Souza 					if (b->bdg_ports[j] == NULL)
1275f9790aebSLuigi Rizzo 						continue;
1276f9790aebSLuigi Rizzo 					vpna = b->bdg_ports[j];
12774bf50f18SLuigi Rizzo 					strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
1278f9790aebSLuigi Rizzo 					error = 0;
1279*c3e9b4dbSLuiz Otavio O Souza 					goto out;
1280f9790aebSLuigi Rizzo 				}
1281*c3e9b4dbSLuiz Otavio O Souza 				j = 0; /* following bridges scan from 0 */
1282*c3e9b4dbSLuiz Otavio O Souza 			}
1283*c3e9b4dbSLuiz Otavio O Souza 		out:
1284*c3e9b4dbSLuiz Otavio O Souza 			nmr->nr_arg1 = i;
1285*c3e9b4dbSLuiz Otavio O Souza 			nmr->nr_arg2 = j;
1286f9790aebSLuigi Rizzo 			NMG_UNLOCK();
1287f9790aebSLuigi Rizzo 		}
1288f9790aebSLuigi Rizzo 		break;
1289f9790aebSLuigi Rizzo 
12904bf50f18SLuigi Rizzo 	case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
12914bf50f18SLuigi Rizzo 		/* register callbacks to the given bridge.
1292f9790aebSLuigi Rizzo 		 * nmr->nr_name may be just bridge's name (including ':'
1293f9790aebSLuigi Rizzo 		 * if it is not just NM_NAME).
1294f9790aebSLuigi Rizzo 		 */
12954bf50f18SLuigi Rizzo 		if (!bdg_ops) {
1296f9790aebSLuigi Rizzo 			error = EINVAL;
1297f9790aebSLuigi Rizzo 			break;
1298f9790aebSLuigi Rizzo 		}
1299f9790aebSLuigi Rizzo 		NMG_LOCK();
1300f9790aebSLuigi Rizzo 		b = nm_find_bridge(name, 0 /* don't create */);
1301f9790aebSLuigi Rizzo 		if (!b) {
1302f9790aebSLuigi Rizzo 			error = EINVAL;
1303f9790aebSLuigi Rizzo 		} else {
13044bf50f18SLuigi Rizzo 			b->bdg_ops = *bdg_ops;
1305f9790aebSLuigi Rizzo 		}
1306f9790aebSLuigi Rizzo 		NMG_UNLOCK();
1307f9790aebSLuigi Rizzo 		break;
1308f9790aebSLuigi Rizzo 
1309f0ea3689SLuigi Rizzo 	case NETMAP_BDG_VNET_HDR:
1310f0ea3689SLuigi Rizzo 		/* Valid lengths for the virtio-net header are 0 (no header),
1311f0ea3689SLuigi Rizzo 		   10 and 12. */
1312f0ea3689SLuigi Rizzo 		if (nmr->nr_arg1 != 0 &&
1313f0ea3689SLuigi Rizzo 			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
1314f0ea3689SLuigi Rizzo 				nmr->nr_arg1 != 12) {
1315f0ea3689SLuigi Rizzo 			error = EINVAL;
1316f0ea3689SLuigi Rizzo 			break;
1317f0ea3689SLuigi Rizzo 		}
1318f9790aebSLuigi Rizzo 		NMG_LOCK();
1319*c3e9b4dbSLuiz Otavio O Souza 		error = netmap_get_bdg_na(nmr, &na, NULL, 0);
132017885a7bSLuigi Rizzo 		if (na && !error) {
1321f9790aebSLuigi Rizzo 			vpna = (struct netmap_vp_adapter *)na;
132237e3a6d3SLuigi Rizzo 			na->virt_hdr_len = nmr->nr_arg1;
132337e3a6d3SLuigi Rizzo 			if (na->virt_hdr_len) {
13244bf50f18SLuigi Rizzo 				vpna->mfs = NETMAP_BUF_SIZE(na);
132537e3a6d3SLuigi Rizzo 			}
132637e3a6d3SLuigi Rizzo 			D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
132737e3a6d3SLuigi Rizzo 			netmap_adapter_put(na);
132837e3a6d3SLuigi Rizzo 		} else if (!na) {
132937e3a6d3SLuigi Rizzo 			error = ENXIO;
133037e3a6d3SLuigi Rizzo 		}
133137e3a6d3SLuigi Rizzo 		NMG_UNLOCK();
133237e3a6d3SLuigi Rizzo 		break;
133337e3a6d3SLuigi Rizzo 
133437e3a6d3SLuigi Rizzo 	case NETMAP_BDG_POLLING_ON:
133537e3a6d3SLuigi Rizzo 	case NETMAP_BDG_POLLING_OFF:
133637e3a6d3SLuigi Rizzo 		NMG_LOCK();
1337*c3e9b4dbSLuiz Otavio O Souza 		error = netmap_get_bdg_na(nmr, &na, NULL, 0);
133837e3a6d3SLuigi Rizzo 		if (na && !error) {
133937e3a6d3SLuigi Rizzo 			if (!nm_is_bwrap(na)) {
134037e3a6d3SLuigi Rizzo 				error = EOPNOTSUPP;
134137e3a6d3SLuigi Rizzo 			} else if (cmd == NETMAP_BDG_POLLING_ON) {
134237e3a6d3SLuigi Rizzo 				error = nm_bdg_ctl_polling_start(nmr, na);
134337e3a6d3SLuigi Rizzo 				if (!error)
134437e3a6d3SLuigi Rizzo 					netmap_adapter_get(na);
134537e3a6d3SLuigi Rizzo 			} else {
134637e3a6d3SLuigi Rizzo 				error = nm_bdg_ctl_polling_stop(nmr, na);
134737e3a6d3SLuigi Rizzo 				if (!error)
134837e3a6d3SLuigi Rizzo 					netmap_adapter_put(na);
134937e3a6d3SLuigi Rizzo 			}
135017885a7bSLuigi Rizzo 			netmap_adapter_put(na);
1351f9790aebSLuigi Rizzo 		}
1352f9790aebSLuigi Rizzo 		NMG_UNLOCK();
1353f9790aebSLuigi Rizzo 		break;
1354f9790aebSLuigi Rizzo 
1355f9790aebSLuigi Rizzo 	default:
1356f9790aebSLuigi Rizzo 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
1357f9790aebSLuigi Rizzo 		error = EINVAL;
1358f9790aebSLuigi Rizzo 		break;
1359f9790aebSLuigi Rizzo 	}
1360f9790aebSLuigi Rizzo 	return error;
1361f9790aebSLuigi Rizzo }
1362f9790aebSLuigi Rizzo 
13634bf50f18SLuigi Rizzo int
13644bf50f18SLuigi Rizzo netmap_bdg_config(struct nmreq *nmr)
13654bf50f18SLuigi Rizzo {
13664bf50f18SLuigi Rizzo 	struct nm_bridge *b;
13674bf50f18SLuigi Rizzo 	int error = EINVAL;
13684bf50f18SLuigi Rizzo 
13694bf50f18SLuigi Rizzo 	NMG_LOCK();
13704bf50f18SLuigi Rizzo 	b = nm_find_bridge(nmr->nr_name, 0);
13714bf50f18SLuigi Rizzo 	if (!b) {
13724bf50f18SLuigi Rizzo 		NMG_UNLOCK();
13734bf50f18SLuigi Rizzo 		return error;
13744bf50f18SLuigi Rizzo 	}
13754bf50f18SLuigi Rizzo 	NMG_UNLOCK();
13764bf50f18SLuigi Rizzo 	/* Don't call config() with NMG_LOCK() held */
13774bf50f18SLuigi Rizzo 	BDG_RLOCK(b);
13784bf50f18SLuigi Rizzo 	if (b->bdg_ops.config != NULL)
13794bf50f18SLuigi Rizzo 		error = b->bdg_ops.config((struct nm_ifreq *)nmr);
13804bf50f18SLuigi Rizzo 	BDG_RUNLOCK(b);
13814bf50f18SLuigi Rizzo 	return error;
13824bf50f18SLuigi Rizzo }
13834bf50f18SLuigi Rizzo 
13844bf50f18SLuigi Rizzo 
13854bf50f18SLuigi Rizzo /* nm_krings_create callback for VALE ports.
13864bf50f18SLuigi Rizzo  * Calls the standard netmap_krings_create, then adds leases on rx
13874bf50f18SLuigi Rizzo  * rings and bdgfwd on tx rings.
13884bf50f18SLuigi Rizzo  */
1389f9790aebSLuigi Rizzo static int
1390f9790aebSLuigi Rizzo netmap_vp_krings_create(struct netmap_adapter *na)
1391f9790aebSLuigi Rizzo {
1392f0ea3689SLuigi Rizzo 	u_int tailroom;
1393f9790aebSLuigi Rizzo 	int error, i;
1394f9790aebSLuigi Rizzo 	uint32_t *leases;
1395847bf383SLuigi Rizzo 	u_int nrx = netmap_real_rings(na, NR_RX);
1396f9790aebSLuigi Rizzo 
1397f9790aebSLuigi Rizzo 	/*
1398f9790aebSLuigi Rizzo 	 * Leases are attached to RX rings on vale ports
1399f9790aebSLuigi Rizzo 	 */
1400f9790aebSLuigi Rizzo 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
1401f9790aebSLuigi Rizzo 
1402f0ea3689SLuigi Rizzo 	error = netmap_krings_create(na, tailroom);
1403f9790aebSLuigi Rizzo 	if (error)
1404f9790aebSLuigi Rizzo 		return error;
1405f9790aebSLuigi Rizzo 
1406f9790aebSLuigi Rizzo 	leases = na->tailroom;
1407f9790aebSLuigi Rizzo 
1408f9790aebSLuigi Rizzo 	for (i = 0; i < nrx; i++) { /* Receive rings */
1409f9790aebSLuigi Rizzo 		na->rx_rings[i].nkr_leases = leases;
1410f9790aebSLuigi Rizzo 		leases += na->num_rx_desc;
1411f9790aebSLuigi Rizzo 	}
1412f9790aebSLuigi Rizzo 
1413f9790aebSLuigi Rizzo 	error = nm_alloc_bdgfwd(na);
1414f9790aebSLuigi Rizzo 	if (error) {
1415f9790aebSLuigi Rizzo 		netmap_krings_delete(na);
1416f9790aebSLuigi Rizzo 		return error;
1417f9790aebSLuigi Rizzo 	}
1418f9790aebSLuigi Rizzo 
1419f9790aebSLuigi Rizzo 	return 0;
1420f9790aebSLuigi Rizzo }
1421f9790aebSLuigi Rizzo 
142217885a7bSLuigi Rizzo 
14234bf50f18SLuigi Rizzo /* nm_krings_delete callback for VALE ports. */
1424f9790aebSLuigi Rizzo static void
1425f9790aebSLuigi Rizzo netmap_vp_krings_delete(struct netmap_adapter *na)
1426f9790aebSLuigi Rizzo {
1427f9790aebSLuigi Rizzo 	nm_free_bdgfwd(na);
1428f9790aebSLuigi Rizzo 	netmap_krings_delete(na);
1429f9790aebSLuigi Rizzo }
1430f9790aebSLuigi Rizzo 
1431f9790aebSLuigi Rizzo 
1432f9790aebSLuigi Rizzo static int
1433f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
1434f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *na, u_int ring_nr);
1435f9790aebSLuigi Rizzo 
1436f9790aebSLuigi Rizzo 
1437f9790aebSLuigi Rizzo /*
14384bf50f18SLuigi Rizzo  * main dispatch routine for the bridge.
1439f9790aebSLuigi Rizzo  * Grab packets from a kring, move them into the ft structure
1440f9790aebSLuigi Rizzo  * associated to the tx (input) port. Max one instance per port,
1441f9790aebSLuigi Rizzo  * filtered on input (ioctl, poll or XXX).
1442f9790aebSLuigi Rizzo  * Returns the next position in the ring.
1443f9790aebSLuigi Rizzo  */
1444f9790aebSLuigi Rizzo static int
14454bf50f18SLuigi Rizzo nm_bdg_preflush(struct netmap_kring *kring, u_int end)
1446f9790aebSLuigi Rizzo {
14474bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
14484bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter*)kring->na;
1449f9790aebSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
1450f9790aebSLuigi Rizzo 	struct nm_bdg_fwd *ft;
14514bf50f18SLuigi Rizzo 	u_int ring_nr = kring->ring_id;
1452f9790aebSLuigi Rizzo 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
1453f9790aebSLuigi Rizzo 	u_int ft_i = 0;	/* start from 0 */
1454f9790aebSLuigi Rizzo 	u_int frags = 1; /* how many frags ? */
1455f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
1456f9790aebSLuigi Rizzo 
1457f9790aebSLuigi Rizzo 	/* To protect against modifications to the bridge we acquire a
1458f9790aebSLuigi Rizzo 	 * shared lock, waiting if we can sleep (if the source port is
1459f9790aebSLuigi Rizzo 	 * attached to a user process) or with a trylock otherwise (NICs).
1460f9790aebSLuigi Rizzo 	 */
1461f9790aebSLuigi Rizzo 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1462f9790aebSLuigi Rizzo 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
1463f9790aebSLuigi Rizzo 		BDG_RLOCK(b);
1464f9790aebSLuigi Rizzo 	else if (!BDG_RTRYLOCK(b))
1465*c3e9b4dbSLuiz Otavio O Souza 		return j;
1466f9790aebSLuigi Rizzo 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1467f9790aebSLuigi Rizzo 	ft = kring->nkr_ft;
1468f9790aebSLuigi Rizzo 
1469f9790aebSLuigi Rizzo 	for (; likely(j != end); j = nm_next(j, lim)) {
1470f9790aebSLuigi Rizzo 		struct netmap_slot *slot = &ring->slot[j];
1471f9790aebSLuigi Rizzo 		char *buf;
1472f9790aebSLuigi Rizzo 
1473f9790aebSLuigi Rizzo 		ft[ft_i].ft_len = slot->len;
1474f9790aebSLuigi Rizzo 		ft[ft_i].ft_flags = slot->flags;
1475f9790aebSLuigi Rizzo 
1476f9790aebSLuigi Rizzo 		ND("flags is 0x%x", slot->flags);
1477847bf383SLuigi Rizzo 		/* we do not use the buf changed flag, but we still need to reset it */
1478847bf383SLuigi Rizzo 		slot->flags &= ~NS_BUF_CHANGED;
1479847bf383SLuigi Rizzo 
1480f9790aebSLuigi Rizzo 		/* this slot goes into a list so initialize the link field */
1481f9790aebSLuigi Rizzo 		ft[ft_i].ft_next = NM_FT_NULL;
1482f9790aebSLuigi Rizzo 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
14834bf50f18SLuigi Rizzo 			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
1484e31c6ec7SLuigi Rizzo 		if (unlikely(buf == NULL)) {
1485e31c6ec7SLuigi Rizzo 			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
1486e31c6ec7SLuigi Rizzo 				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
1487e31c6ec7SLuigi Rizzo 				kring->name, j, ft[ft_i].ft_len);
14884bf50f18SLuigi Rizzo 			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
1489e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_len = 0;
1490e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_flags = 0;
1491e31c6ec7SLuigi Rizzo 		}
14922e159ef0SLuigi Rizzo 		__builtin_prefetch(buf);
1493f9790aebSLuigi Rizzo 		++ft_i;
1494f9790aebSLuigi Rizzo 		if (slot->flags & NS_MOREFRAG) {
1495f9790aebSLuigi Rizzo 			frags++;
1496f9790aebSLuigi Rizzo 			continue;
1497f9790aebSLuigi Rizzo 		}
1498f9790aebSLuigi Rizzo 		if (unlikely(netmap_verbose && frags > 1))
1499f9790aebSLuigi Rizzo 			RD(5, "%d frags at %d", frags, ft_i - frags);
1500f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
1501f9790aebSLuigi Rizzo 		frags = 1;
1502f9790aebSLuigi Rizzo 		if (unlikely((int)ft_i >= bridge_batch))
1503f9790aebSLuigi Rizzo 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1504f9790aebSLuigi Rizzo 	}
1505f9790aebSLuigi Rizzo 	if (frags > 1) {
150637e3a6d3SLuigi Rizzo 		/* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
150737e3a6d3SLuigi Rizzo 		 * have to fix frags count. */
150837e3a6d3SLuigi Rizzo 		frags--;
150937e3a6d3SLuigi Rizzo 		ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
151037e3a6d3SLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
151137e3a6d3SLuigi Rizzo 		D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1512f9790aebSLuigi Rizzo 	}
1513f9790aebSLuigi Rizzo 	if (ft_i)
1514f9790aebSLuigi Rizzo 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1515f9790aebSLuigi Rizzo 	BDG_RUNLOCK(b);
1516f9790aebSLuigi Rizzo 	return j;
1517f9790aebSLuigi Rizzo }
1518f9790aebSLuigi Rizzo 
1519f9790aebSLuigi Rizzo 
1520f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */
1521f9790aebSLuigi Rizzo 
1522f9790aebSLuigi Rizzo /*
1523f9790aebSLuigi Rizzo  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1524f9790aebSLuigi Rizzo  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1525f9790aebSLuigi Rizzo  *
1526f9790aebSLuigi Rizzo  * http://www.burtleburtle.net/bob/hash/spooky.html
1527f9790aebSLuigi Rizzo  */
1528f9790aebSLuigi Rizzo #define mix(a, b, c)                                                    \
1529f9790aebSLuigi Rizzo do {                                                                    \
1530f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 13);                                 \
1531f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 8);                                  \
1532f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 13);                                 \
1533f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 12);                                 \
1534f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 16);                                 \
1535f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 5);                                  \
1536f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 3);                                  \
1537f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 10);                                 \
1538f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 15);                                 \
1539f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0)
1540f9790aebSLuigi Rizzo 
154117885a7bSLuigi Rizzo 
1542f9790aebSLuigi Rizzo static __inline uint32_t
1543f9790aebSLuigi Rizzo nm_bridge_rthash(const uint8_t *addr)
1544f9790aebSLuigi Rizzo {
1545f9790aebSLuigi Rizzo         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1546f9790aebSLuigi Rizzo 
1547f9790aebSLuigi Rizzo         b += addr[5] << 8;
1548f9790aebSLuigi Rizzo         b += addr[4];
1549f9790aebSLuigi Rizzo         a += addr[3] << 24;
1550f9790aebSLuigi Rizzo         a += addr[2] << 16;
1551f9790aebSLuigi Rizzo         a += addr[1] << 8;
1552f9790aebSLuigi Rizzo         a += addr[0];
1553f9790aebSLuigi Rizzo 
1554f9790aebSLuigi Rizzo         mix(a, b, c);
1555f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1556f9790aebSLuigi Rizzo         return (c & BRIDGE_RTHASH_MASK);
1557f9790aebSLuigi Rizzo }
1558f9790aebSLuigi Rizzo 
1559f9790aebSLuigi Rizzo #undef mix
1560f9790aebSLuigi Rizzo 
1561f9790aebSLuigi Rizzo 
15624bf50f18SLuigi Rizzo /* nm_register callback for VALE ports */
1563f9790aebSLuigi Rizzo static int
15644bf50f18SLuigi Rizzo netmap_vp_reg(struct netmap_adapter *na, int onoff)
1565f9790aebSLuigi Rizzo {
1566f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna =
1567f9790aebSLuigi Rizzo 		(struct netmap_vp_adapter*)na;
156837e3a6d3SLuigi Rizzo 	enum txrx t;
156937e3a6d3SLuigi Rizzo 	int i;
1570f9790aebSLuigi Rizzo 
15714bf50f18SLuigi Rizzo 	/* persistent ports may be put in netmap mode
15724bf50f18SLuigi Rizzo 	 * before being attached to a bridge
1573f9790aebSLuigi Rizzo 	 */
15744bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
1575f9790aebSLuigi Rizzo 		BDG_WLOCK(vpna->na_bdg);
1576f9790aebSLuigi Rizzo 	if (onoff) {
157737e3a6d3SLuigi Rizzo 		for_rx_tx(t) {
157837e3a6d3SLuigi Rizzo 			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
157937e3a6d3SLuigi Rizzo 				struct netmap_kring *kring = &NMR(na, t)[i];
158037e3a6d3SLuigi Rizzo 
158137e3a6d3SLuigi Rizzo 				if (nm_kring_pending_on(kring))
158237e3a6d3SLuigi Rizzo 					kring->nr_mode = NKR_NETMAP_ON;
158337e3a6d3SLuigi Rizzo 			}
158437e3a6d3SLuigi Rizzo 		}
158537e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
15864bf50f18SLuigi Rizzo 			na->na_flags |= NAF_NETMAP_ON;
15874bf50f18SLuigi Rizzo 		 /* XXX on FreeBSD, persistent VALE ports should also
15884bf50f18SLuigi Rizzo 		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
15894bf50f18SLuigi Rizzo 		 */
1590f9790aebSLuigi Rizzo 	} else {
159137e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
15924bf50f18SLuigi Rizzo 			na->na_flags &= ~NAF_NETMAP_ON;
159337e3a6d3SLuigi Rizzo 		for_rx_tx(t) {
159437e3a6d3SLuigi Rizzo 			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
159537e3a6d3SLuigi Rizzo 				struct netmap_kring *kring = &NMR(na, t)[i];
159637e3a6d3SLuigi Rizzo 
159737e3a6d3SLuigi Rizzo 				if (nm_kring_pending_off(kring))
159837e3a6d3SLuigi Rizzo 					kring->nr_mode = NKR_NETMAP_OFF;
159937e3a6d3SLuigi Rizzo 			}
160037e3a6d3SLuigi Rizzo 		}
1601f9790aebSLuigi Rizzo 	}
16024bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
1603f9790aebSLuigi Rizzo 		BDG_WUNLOCK(vpna->na_bdg);
1604f9790aebSLuigi Rizzo 	return 0;
1605f9790aebSLuigi Rizzo }
1606f9790aebSLuigi Rizzo 
1607f9790aebSLuigi Rizzo 
1608f9790aebSLuigi Rizzo /*
1609f9790aebSLuigi Rizzo  * Lookup function for a learning bridge.
1610f9790aebSLuigi Rizzo  * Update the hash table with the source address,
1611f9790aebSLuigi Rizzo  * and then returns the destination port index, and the
1612f9790aebSLuigi Rizzo  * ring in *dst_ring (at the moment, always use ring 0)
1613f9790aebSLuigi Rizzo  */
1614f9790aebSLuigi Rizzo u_int
16154bf50f18SLuigi Rizzo netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1616847bf383SLuigi Rizzo 		struct netmap_vp_adapter *na)
1617f9790aebSLuigi Rizzo {
16184bf50f18SLuigi Rizzo 	uint8_t *buf = ft->ft_buf;
16194bf50f18SLuigi Rizzo 	u_int buf_len = ft->ft_len;
1620f9790aebSLuigi Rizzo 	struct nm_hash_ent *ht = na->na_bdg->ht;
1621f9790aebSLuigi Rizzo 	uint32_t sh, dh;
1622f9790aebSLuigi Rizzo 	u_int dst, mysrc = na->bdg_port;
1623f9790aebSLuigi Rizzo 	uint64_t smac, dmac;
162437e3a6d3SLuigi Rizzo 	uint8_t indbuf[12];
1625f9790aebSLuigi Rizzo 
16264bf50f18SLuigi Rizzo 	/* safety check, unfortunately we have many cases */
162737e3a6d3SLuigi Rizzo 	if (buf_len >= 14 + na->up.virt_hdr_len) {
16284bf50f18SLuigi Rizzo 		/* virthdr + mac_hdr in the same slot */
162937e3a6d3SLuigi Rizzo 		buf += na->up.virt_hdr_len;
163037e3a6d3SLuigi Rizzo 		buf_len -= na->up.virt_hdr_len;
163137e3a6d3SLuigi Rizzo 	} else if (buf_len == na->up.virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
16324bf50f18SLuigi Rizzo 		/* only header in first fragment */
16334bf50f18SLuigi Rizzo 		ft++;
16344bf50f18SLuigi Rizzo 		buf = ft->ft_buf;
16354bf50f18SLuigi Rizzo 		buf_len = ft->ft_len;
16364bf50f18SLuigi Rizzo 	} else {
16374bf50f18SLuigi Rizzo 		RD(5, "invalid buf format, length %d", buf_len);
1638f9790aebSLuigi Rizzo 		return NM_BDG_NOPORT;
1639f9790aebSLuigi Rizzo 	}
164037e3a6d3SLuigi Rizzo 
164137e3a6d3SLuigi Rizzo 	if (ft->ft_flags & NS_INDIRECT) {
164237e3a6d3SLuigi Rizzo 		if (copyin(buf, indbuf, sizeof(indbuf))) {
164337e3a6d3SLuigi Rizzo 			return NM_BDG_NOPORT;
164437e3a6d3SLuigi Rizzo 		}
164537e3a6d3SLuigi Rizzo 		buf = indbuf;
164637e3a6d3SLuigi Rizzo 	}
164737e3a6d3SLuigi Rizzo 
1648f9790aebSLuigi Rizzo 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1649f9790aebSLuigi Rizzo 	smac = le64toh(*(uint64_t *)(buf + 4));
1650f9790aebSLuigi Rizzo 	smac >>= 16;
1651f9790aebSLuigi Rizzo 
1652f9790aebSLuigi Rizzo 	/*
1653f9790aebSLuigi Rizzo 	 * The hash is somewhat expensive, there might be some
1654f9790aebSLuigi Rizzo 	 * worthwhile optimizations here.
1655f9790aebSLuigi Rizzo 	 */
1656847bf383SLuigi Rizzo 	if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
1657f9790aebSLuigi Rizzo 		uint8_t *s = buf+6;
1658f9790aebSLuigi Rizzo 		sh = nm_bridge_rthash(s); // XXX hash of source
1659f9790aebSLuigi Rizzo 		/* update source port forwarding entry */
1660847bf383SLuigi Rizzo 		na->last_smac = ht[sh].mac = smac;	/* XXX expire ? */
1661f9790aebSLuigi Rizzo 		ht[sh].ports = mysrc;
1662f9790aebSLuigi Rizzo 		if (netmap_verbose)
1663f9790aebSLuigi Rizzo 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1664f9790aebSLuigi Rizzo 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1665f9790aebSLuigi Rizzo 	}
1666f9790aebSLuigi Rizzo 	dst = NM_BDG_BROADCAST;
1667f9790aebSLuigi Rizzo 	if ((buf[0] & 1) == 0) { /* unicast */
1668f9790aebSLuigi Rizzo 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1669f9790aebSLuigi Rizzo 		if (ht[dh].mac == dmac) {	/* found dst */
1670f9790aebSLuigi Rizzo 			dst = ht[dh].ports;
1671f9790aebSLuigi Rizzo 		}
1672f9790aebSLuigi Rizzo 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1673f9790aebSLuigi Rizzo 	}
1674f9790aebSLuigi Rizzo 	return dst;
1675f9790aebSLuigi Rizzo }
1676f9790aebSLuigi Rizzo 
1677f9790aebSLuigi Rizzo 
1678f9790aebSLuigi Rizzo /*
167917885a7bSLuigi Rizzo  * Available space in the ring. Only used in VALE code
168017885a7bSLuigi Rizzo  * and only with is_rx = 1
168117885a7bSLuigi Rizzo  */
168217885a7bSLuigi Rizzo static inline uint32_t
168317885a7bSLuigi Rizzo nm_kr_space(struct netmap_kring *k, int is_rx)
168417885a7bSLuigi Rizzo {
168517885a7bSLuigi Rizzo 	int space;
168617885a7bSLuigi Rizzo 
168717885a7bSLuigi Rizzo 	if (is_rx) {
168817885a7bSLuigi Rizzo 		int busy = k->nkr_hwlease - k->nr_hwcur;
168917885a7bSLuigi Rizzo 		if (busy < 0)
169017885a7bSLuigi Rizzo 			busy += k->nkr_num_slots;
169117885a7bSLuigi Rizzo 		space = k->nkr_num_slots - 1 - busy;
169217885a7bSLuigi Rizzo 	} else {
169317885a7bSLuigi Rizzo 		/* XXX never used in this branch */
169417885a7bSLuigi Rizzo 		space = k->nr_hwtail - k->nkr_hwlease;
169517885a7bSLuigi Rizzo 		if (space < 0)
169617885a7bSLuigi Rizzo 			space += k->nkr_num_slots;
169717885a7bSLuigi Rizzo 	}
169817885a7bSLuigi Rizzo #if 0
169917885a7bSLuigi Rizzo 	// sanity check
170017885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
170117885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
170217885a7bSLuigi Rizzo 		k->nr_tail >= k->nkr_num_slots ||
170317885a7bSLuigi Rizzo 		busy < 0 ||
170417885a7bSLuigi Rizzo 		busy >= k->nkr_num_slots) {
170517885a7bSLuigi Rizzo 		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
170617885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
170717885a7bSLuigi Rizzo 	}
170817885a7bSLuigi Rizzo #endif
170917885a7bSLuigi Rizzo 	return space;
171017885a7bSLuigi Rizzo }
171117885a7bSLuigi Rizzo 
171217885a7bSLuigi Rizzo 
171317885a7bSLuigi Rizzo 
171417885a7bSLuigi Rizzo 
171517885a7bSLuigi Rizzo /* make a lease on the kring for N positions. return the
171617885a7bSLuigi Rizzo  * lease index
171717885a7bSLuigi Rizzo  * XXX only used in VALE code and with is_rx = 1
171817885a7bSLuigi Rizzo  */
171917885a7bSLuigi Rizzo static inline uint32_t
172017885a7bSLuigi Rizzo nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
172117885a7bSLuigi Rizzo {
172217885a7bSLuigi Rizzo 	uint32_t lim = k->nkr_num_slots - 1;
172317885a7bSLuigi Rizzo 	uint32_t lease_idx = k->nkr_lease_idx;
172417885a7bSLuigi Rizzo 
172517885a7bSLuigi Rizzo 	k->nkr_leases[lease_idx] = NR_NOSLOT;
172617885a7bSLuigi Rizzo 	k->nkr_lease_idx = nm_next(lease_idx, lim);
172717885a7bSLuigi Rizzo 
172817885a7bSLuigi Rizzo 	if (n > nm_kr_space(k, is_rx)) {
172917885a7bSLuigi Rizzo 		D("invalid request for %d slots", n);
173017885a7bSLuigi Rizzo 		panic("x");
173117885a7bSLuigi Rizzo 	}
173217885a7bSLuigi Rizzo 	/* XXX verify that there are n slots */
173317885a7bSLuigi Rizzo 	k->nkr_hwlease += n;
173417885a7bSLuigi Rizzo 	if (k->nkr_hwlease > lim)
173517885a7bSLuigi Rizzo 		k->nkr_hwlease -= lim + 1;
173617885a7bSLuigi Rizzo 
173717885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
173817885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
173917885a7bSLuigi Rizzo 		k->nr_hwtail >= k->nkr_num_slots ||
174017885a7bSLuigi Rizzo 		k->nkr_lease_idx >= k->nkr_num_slots) {
174117885a7bSLuigi Rizzo 		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
17424bf50f18SLuigi Rizzo 			k->na->name,
174317885a7bSLuigi Rizzo 			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
174417885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
174517885a7bSLuigi Rizzo 	}
174617885a7bSLuigi Rizzo 	return lease_idx;
174717885a7bSLuigi Rizzo }
174817885a7bSLuigi Rizzo 
174917885a7bSLuigi Rizzo /*
17504bf50f18SLuigi Rizzo  *
1751f9790aebSLuigi Rizzo  * This flush routine supports only unicast and broadcast but a large
1752f9790aebSLuigi Rizzo  * number of ports, and lets us replace the learn and dispatch functions.
1753f9790aebSLuigi Rizzo  */
1754f9790aebSLuigi Rizzo int
1755f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1756f9790aebSLuigi Rizzo 		u_int ring_nr)
1757f9790aebSLuigi Rizzo {
1758f9790aebSLuigi Rizzo 	struct nm_bdg_q *dst_ents, *brddst;
1759f9790aebSLuigi Rizzo 	uint16_t num_dsts = 0, *dsts;
1760f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
176137e3a6d3SLuigi Rizzo 	u_int i, me = na->bdg_port;
1762f9790aebSLuigi Rizzo 
1763f9790aebSLuigi Rizzo 	/*
1764f9790aebSLuigi Rizzo 	 * The work area (pointed by ft) is followed by an array of
1765f9790aebSLuigi Rizzo 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1766f9790aebSLuigi Rizzo 	 * queues per port plus one for the broadcast traffic.
1767f9790aebSLuigi Rizzo 	 * Then we have an array of destination indexes.
1768f9790aebSLuigi Rizzo 	 */
1769f9790aebSLuigi Rizzo 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1770f9790aebSLuigi Rizzo 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1771f9790aebSLuigi Rizzo 
1772f9790aebSLuigi Rizzo 	/* first pass: find a destination for each packet in the batch */
1773f9790aebSLuigi Rizzo 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1774f9790aebSLuigi Rizzo 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1775f9790aebSLuigi Rizzo 		uint16_t dst_port, d_i;
1776f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1777f9790aebSLuigi Rizzo 
1778f9790aebSLuigi Rizzo 		ND("slot %d frags %d", i, ft[i].ft_frags);
1779f0ea3689SLuigi Rizzo 		/* Drop the packet if the virtio-net header is not into the first
1780f9790aebSLuigi Rizzo 		   fragment nor at the very beginning of the second. */
178137e3a6d3SLuigi Rizzo 		if (unlikely(na->up.virt_hdr_len > ft[i].ft_len))
1782f9790aebSLuigi Rizzo 			continue;
17834bf50f18SLuigi Rizzo 		dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
1784f9790aebSLuigi Rizzo 		if (netmap_verbose > 255)
1785f9790aebSLuigi Rizzo 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1786f9790aebSLuigi Rizzo 		if (dst_port == NM_BDG_NOPORT)
1787f9790aebSLuigi Rizzo 			continue; /* this packet is identified to be dropped */
1788f9790aebSLuigi Rizzo 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1789f9790aebSLuigi Rizzo 			continue;
1790f9790aebSLuigi Rizzo 		else if (dst_port == NM_BDG_BROADCAST)
1791f9790aebSLuigi Rizzo 			dst_ring = 0; /* broadcasts always go to ring 0 */
1792f9790aebSLuigi Rizzo 		else if (unlikely(dst_port == me ||
1793f9790aebSLuigi Rizzo 		    !b->bdg_ports[dst_port]))
1794f9790aebSLuigi Rizzo 			continue;
1795f9790aebSLuigi Rizzo 
1796f9790aebSLuigi Rizzo 		/* get a position in the scratch pad */
1797f9790aebSLuigi Rizzo 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1798f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1799f9790aebSLuigi Rizzo 
1800f9790aebSLuigi Rizzo 		/* append the first fragment to the list */
1801f9790aebSLuigi Rizzo 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1802f9790aebSLuigi Rizzo 			d->bq_head = d->bq_tail = i;
1803f9790aebSLuigi Rizzo 			/* remember this position to be scanned later */
1804f9790aebSLuigi Rizzo 			if (dst_port != NM_BDG_BROADCAST)
1805f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1806f9790aebSLuigi Rizzo 		} else {
1807f9790aebSLuigi Rizzo 			ft[d->bq_tail].ft_next = i;
1808f9790aebSLuigi Rizzo 			d->bq_tail = i;
1809f9790aebSLuigi Rizzo 		}
1810f9790aebSLuigi Rizzo 		d->bq_len += ft[i].ft_frags;
1811f9790aebSLuigi Rizzo 	}
1812f9790aebSLuigi Rizzo 
1813f9790aebSLuigi Rizzo 	/*
1814f9790aebSLuigi Rizzo 	 * Broadcast traffic goes to ring 0 on all destinations.
1815f9790aebSLuigi Rizzo 	 * So we need to add these rings to the list of ports to scan.
1816f9790aebSLuigi Rizzo 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1817f9790aebSLuigi Rizzo 	 * expensive. We should keep a compact list of active destinations
1818f9790aebSLuigi Rizzo 	 * so we could shorten this loop.
1819f9790aebSLuigi Rizzo 	 */
1820f9790aebSLuigi Rizzo 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1821f9790aebSLuigi Rizzo 	if (brddst->bq_head != NM_FT_NULL) {
182237e3a6d3SLuigi Rizzo 		u_int j;
1823f9790aebSLuigi Rizzo 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1824f9790aebSLuigi Rizzo 			uint16_t d_i;
1825f9790aebSLuigi Rizzo 			i = b->bdg_port_index[j];
1826f9790aebSLuigi Rizzo 			if (unlikely(i == me))
1827f9790aebSLuigi Rizzo 				continue;
1828f9790aebSLuigi Rizzo 			d_i = i * NM_BDG_MAXRINGS;
1829f9790aebSLuigi Rizzo 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1830f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1831f9790aebSLuigi Rizzo 		}
1832f9790aebSLuigi Rizzo 	}
1833f9790aebSLuigi Rizzo 
1834f9790aebSLuigi Rizzo 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
18354bf50f18SLuigi Rizzo 	/* second pass: scan destinations */
1836f9790aebSLuigi Rizzo 	for (i = 0; i < num_dsts; i++) {
1837f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *dst_na;
1838f9790aebSLuigi Rizzo 		struct netmap_kring *kring;
1839f9790aebSLuigi Rizzo 		struct netmap_ring *ring;
1840f0ea3689SLuigi Rizzo 		u_int dst_nr, lim, j, d_i, next, brd_next;
1841f9790aebSLuigi Rizzo 		u_int needed, howmany;
1842f9790aebSLuigi Rizzo 		int retry = netmap_txsync_retry;
1843f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1844f9790aebSLuigi Rizzo 		uint32_t my_start = 0, lease_idx = 0;
1845f9790aebSLuigi Rizzo 		int nrings;
1846f0ea3689SLuigi Rizzo 		int virt_hdr_mismatch = 0;
1847f9790aebSLuigi Rizzo 
1848f9790aebSLuigi Rizzo 		d_i = dsts[i];
1849f9790aebSLuigi Rizzo 		ND("second pass %d port %d", i, d_i);
1850f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1851f9790aebSLuigi Rizzo 		// XXX fix the division
1852f9790aebSLuigi Rizzo 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1853f9790aebSLuigi Rizzo 		/* protect from the lookup function returning an inactive
1854f9790aebSLuigi Rizzo 		 * destination port
1855f9790aebSLuigi Rizzo 		 */
1856f9790aebSLuigi Rizzo 		if (unlikely(dst_na == NULL))
1857f9790aebSLuigi Rizzo 			goto cleanup;
1858f9790aebSLuigi Rizzo 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1859f9790aebSLuigi Rizzo 			goto cleanup;
1860f9790aebSLuigi Rizzo 		/*
1861f9790aebSLuigi Rizzo 		 * The interface may be in !netmap mode in two cases:
1862f9790aebSLuigi Rizzo 		 * - when na is attached but not activated yet;
1863f9790aebSLuigi Rizzo 		 * - when na is being deactivated but is still attached.
1864f9790aebSLuigi Rizzo 		 */
18654bf50f18SLuigi Rizzo 		if (unlikely(!nm_netmap_on(&dst_na->up))) {
1866f9790aebSLuigi Rizzo 			ND("not in netmap mode!");
1867f9790aebSLuigi Rizzo 			goto cleanup;
1868f9790aebSLuigi Rizzo 		}
1869f9790aebSLuigi Rizzo 
1870f9790aebSLuigi Rizzo 		/* there is at least one either unicast or broadcast packet */
1871f9790aebSLuigi Rizzo 		brd_next = brddst->bq_head;
1872f9790aebSLuigi Rizzo 		next = d->bq_head;
1873f9790aebSLuigi Rizzo 		/* we need to reserve this many slots. If fewer are
1874f9790aebSLuigi Rizzo 		 * available, some packets will be dropped.
1875f9790aebSLuigi Rizzo 		 * Packets may have multiple fragments, so we may not use
1876f9790aebSLuigi Rizzo 		 * there is a chance that we may not use all of the slots
1877f9790aebSLuigi Rizzo 		 * we have claimed, so we will need to handle the leftover
1878f9790aebSLuigi Rizzo 		 * ones when we regain the lock.
1879f9790aebSLuigi Rizzo 		 */
1880f9790aebSLuigi Rizzo 		needed = d->bq_len + brddst->bq_len;
1881f9790aebSLuigi Rizzo 
188237e3a6d3SLuigi Rizzo 		if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
1883*c3e9b4dbSLuiz Otavio O Souza 			if (netmap_verbose) {
188437e3a6d3SLuigi Rizzo 			    RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
188537e3a6d3SLuigi Rizzo 				  dst_na->up.virt_hdr_len);
1886*c3e9b4dbSLuiz Otavio O Souza 			}
1887f0ea3689SLuigi Rizzo 			/* There is a virtio-net header/offloadings mismatch between
1888f0ea3689SLuigi Rizzo 			 * source and destination. The slower mismatch datapath will
1889f0ea3689SLuigi Rizzo 			 * be used to cope with all the mismatches.
1890f0ea3689SLuigi Rizzo 			 */
1891f0ea3689SLuigi Rizzo 			virt_hdr_mismatch = 1;
1892f0ea3689SLuigi Rizzo 			if (dst_na->mfs < na->mfs) {
1893f0ea3689SLuigi Rizzo 				/* We may need to do segmentation offloadings, and so
1894f0ea3689SLuigi Rizzo 				 * we may need a number of destination slots greater
1895f0ea3689SLuigi Rizzo 				 * than the number of input slots ('needed').
1896f0ea3689SLuigi Rizzo 				 * We look for the smallest integer 'x' which satisfies:
1897f0ea3689SLuigi Rizzo 				 *	needed * na->mfs + x * H <= x * na->mfs
1898f0ea3689SLuigi Rizzo 				 * where 'H' is the length of the longest header that may
1899f0ea3689SLuigi Rizzo 				 * be replicated in the segmentation process (e.g. for
1900f0ea3689SLuigi Rizzo 				 * TCPv4 we must account for ethernet header, IP header
1901f0ea3689SLuigi Rizzo 				 * and TCPv4 header).
1902f0ea3689SLuigi Rizzo 				 */
1903f0ea3689SLuigi Rizzo 				needed = (needed * na->mfs) /
1904f0ea3689SLuigi Rizzo 						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1905f0ea3689SLuigi Rizzo 				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1906f0ea3689SLuigi Rizzo 			}
1907f0ea3689SLuigi Rizzo 		}
1908f0ea3689SLuigi Rizzo 
1909f9790aebSLuigi Rizzo 		ND(5, "pass 2 dst %d is %x %s",
1910f9790aebSLuigi Rizzo 			i, d_i, is_vp ? "virtual" : "nic/host");
1911f9790aebSLuigi Rizzo 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1912f9790aebSLuigi Rizzo 		nrings = dst_na->up.num_rx_rings;
1913f9790aebSLuigi Rizzo 		if (dst_nr >= nrings)
1914f9790aebSLuigi Rizzo 			dst_nr = dst_nr % nrings;
1915f9790aebSLuigi Rizzo 		kring = &dst_na->up.rx_rings[dst_nr];
1916f9790aebSLuigi Rizzo 		ring = kring->ring;
1917f9790aebSLuigi Rizzo 		lim = kring->nkr_num_slots - 1;
1918f9790aebSLuigi Rizzo 
1919f9790aebSLuigi Rizzo retry:
1920f9790aebSLuigi Rizzo 
1921f0ea3689SLuigi Rizzo 		if (dst_na->retry && retry) {
1922f0ea3689SLuigi Rizzo 			/* try to get some free slot from the previous run */
1923847bf383SLuigi Rizzo 			kring->nm_notify(kring, 0);
19244bf50f18SLuigi Rizzo 			/* actually useful only for bwraps, since there
19254bf50f18SLuigi Rizzo 			 * the notify will trigger a txsync on the hwna. VALE ports
19264bf50f18SLuigi Rizzo 			 * have dst_na->retry == 0
19274bf50f18SLuigi Rizzo 			 */
1928f0ea3689SLuigi Rizzo 		}
1929f9790aebSLuigi Rizzo 		/* reserve the buffers in the queue and an entry
1930f9790aebSLuigi Rizzo 		 * to report completion, and drop lock.
1931f9790aebSLuigi Rizzo 		 * XXX this might become a helper function.
1932f9790aebSLuigi Rizzo 		 */
1933f9790aebSLuigi Rizzo 		mtx_lock(&kring->q_lock);
1934f9790aebSLuigi Rizzo 		if (kring->nkr_stopped) {
1935f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1936f9790aebSLuigi Rizzo 			goto cleanup;
1937f9790aebSLuigi Rizzo 		}
1938f9790aebSLuigi Rizzo 		my_start = j = kring->nkr_hwlease;
1939f9790aebSLuigi Rizzo 		howmany = nm_kr_space(kring, 1);
1940f9790aebSLuigi Rizzo 		if (needed < howmany)
1941f9790aebSLuigi Rizzo 			howmany = needed;
1942f9790aebSLuigi Rizzo 		lease_idx = nm_kr_lease(kring, howmany, 1);
1943f9790aebSLuigi Rizzo 		mtx_unlock(&kring->q_lock);
1944f9790aebSLuigi Rizzo 
1945f9790aebSLuigi Rizzo 		/* only retry if we need more than available slots */
1946f9790aebSLuigi Rizzo 		if (retry && needed <= howmany)
1947f9790aebSLuigi Rizzo 			retry = 0;
1948f9790aebSLuigi Rizzo 
1949f9790aebSLuigi Rizzo 		/* copy to the destination queue */
1950f9790aebSLuigi Rizzo 		while (howmany > 0) {
1951f9790aebSLuigi Rizzo 			struct netmap_slot *slot;
1952f9790aebSLuigi Rizzo 			struct nm_bdg_fwd *ft_p, *ft_end;
1953f9790aebSLuigi Rizzo 			u_int cnt;
1954f9790aebSLuigi Rizzo 
1955f9790aebSLuigi Rizzo 			/* find the queue from which we pick next packet.
1956f9790aebSLuigi Rizzo 			 * NM_FT_NULL is always higher than valid indexes
1957f9790aebSLuigi Rizzo 			 * so we never dereference it if the other list
1958f9790aebSLuigi Rizzo 			 * has packets (and if both are empty we never
1959f9790aebSLuigi Rizzo 			 * get here).
1960f9790aebSLuigi Rizzo 			 */
1961f9790aebSLuigi Rizzo 			if (next < brd_next) {
1962f9790aebSLuigi Rizzo 				ft_p = ft + next;
1963f9790aebSLuigi Rizzo 				next = ft_p->ft_next;
1964f9790aebSLuigi Rizzo 			} else { /* insert broadcast */
1965f9790aebSLuigi Rizzo 				ft_p = ft + brd_next;
1966f9790aebSLuigi Rizzo 				brd_next = ft_p->ft_next;
1967f9790aebSLuigi Rizzo 			}
1968f9790aebSLuigi Rizzo 			cnt = ft_p->ft_frags; // cnt > 0
1969f9790aebSLuigi Rizzo 			if (unlikely(cnt > howmany))
1970f9790aebSLuigi Rizzo 			    break; /* no more space */
1971f9790aebSLuigi Rizzo 			if (netmap_verbose && cnt > 1)
1972f9790aebSLuigi Rizzo 				RD(5, "rx %d frags to %d", cnt, j);
1973f9790aebSLuigi Rizzo 			ft_end = ft_p + cnt;
1974f0ea3689SLuigi Rizzo 			if (unlikely(virt_hdr_mismatch)) {
1975f0ea3689SLuigi Rizzo 				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1976f0ea3689SLuigi Rizzo 			} else {
1977f0ea3689SLuigi Rizzo 				howmany -= cnt;
1978f9790aebSLuigi Rizzo 				do {
1979f9790aebSLuigi Rizzo 					char *dst, *src = ft_p->ft_buf;
1980f9790aebSLuigi Rizzo 					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1981f9790aebSLuigi Rizzo 
1982f9790aebSLuigi Rizzo 					slot = &ring->slot[j];
19834bf50f18SLuigi Rizzo 					dst = NMB(&dst_na->up, slot);
1984f9790aebSLuigi Rizzo 
198517885a7bSLuigi Rizzo 					ND("send [%d] %d(%d) bytes at %s:%d",
198617885a7bSLuigi Rizzo 							i, (int)copy_len, (int)dst_len,
198717885a7bSLuigi Rizzo 							NM_IFPNAME(dst_ifp), j);
1988f9790aebSLuigi Rizzo 					/* round to a multiple of 64 */
1989f9790aebSLuigi Rizzo 					copy_len = (copy_len + 63) & ~63;
1990f9790aebSLuigi Rizzo 
19914bf50f18SLuigi Rizzo 					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
19924bf50f18SLuigi Rizzo 						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
1993e31c6ec7SLuigi Rizzo 						RD(5, "invalid len %d, down to 64", (int)copy_len);
1994e31c6ec7SLuigi Rizzo 						copy_len = dst_len = 64; // XXX
1995e31c6ec7SLuigi Rizzo 					}
1996f9790aebSLuigi Rizzo 					if (ft_p->ft_flags & NS_INDIRECT) {
1997f9790aebSLuigi Rizzo 						if (copyin(src, dst, copy_len)) {
1998f9790aebSLuigi Rizzo 							// invalid user pointer, pretend len is 0
1999f9790aebSLuigi Rizzo 							dst_len = 0;
2000f9790aebSLuigi Rizzo 						}
2001f9790aebSLuigi Rizzo 					} else {
2002f9790aebSLuigi Rizzo 						//memcpy(dst, src, copy_len);
2003f9790aebSLuigi Rizzo 						pkt_copy(src, dst, (int)copy_len);
2004f9790aebSLuigi Rizzo 					}
2005f9790aebSLuigi Rizzo 					slot->len = dst_len;
2006f9790aebSLuigi Rizzo 					slot->flags = (cnt << 8)| NS_MOREFRAG;
2007f9790aebSLuigi Rizzo 					j = nm_next(j, lim);
2008f0ea3689SLuigi Rizzo 					needed--;
2009f9790aebSLuigi Rizzo 					ft_p++;
2010f9790aebSLuigi Rizzo 				} while (ft_p != ft_end);
2011f9790aebSLuigi Rizzo 				slot->flags = (cnt << 8); /* clear flag on last entry */
2012f0ea3689SLuigi Rizzo 			}
2013f9790aebSLuigi Rizzo 			/* are we done ? */
2014f9790aebSLuigi Rizzo 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
2015f9790aebSLuigi Rizzo 				break;
2016f9790aebSLuigi Rizzo 		}
2017f9790aebSLuigi Rizzo 		{
2018f9790aebSLuigi Rizzo 		    /* current position */
2019f9790aebSLuigi Rizzo 		    uint32_t *p = kring->nkr_leases; /* shorthand */
2020f9790aebSLuigi Rizzo 		    uint32_t update_pos;
2021f9790aebSLuigi Rizzo 		    int still_locked = 1;
2022f9790aebSLuigi Rizzo 
2023f9790aebSLuigi Rizzo 		    mtx_lock(&kring->q_lock);
2024f9790aebSLuigi Rizzo 		    if (unlikely(howmany > 0)) {
2025f9790aebSLuigi Rizzo 			/* not used all bufs. If i am the last one
2026f9790aebSLuigi Rizzo 			 * i can recover the slots, otherwise must
2027f9790aebSLuigi Rizzo 			 * fill them with 0 to mark empty packets.
2028f9790aebSLuigi Rizzo 			 */
2029f9790aebSLuigi Rizzo 			ND("leftover %d bufs", howmany);
2030f9790aebSLuigi Rizzo 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
2031f9790aebSLuigi Rizzo 			    /* yes i am the last one */
2032f9790aebSLuigi Rizzo 			    ND("roll back nkr_hwlease to %d", j);
2033f9790aebSLuigi Rizzo 			    kring->nkr_hwlease = j;
2034f9790aebSLuigi Rizzo 			} else {
2035f9790aebSLuigi Rizzo 			    while (howmany-- > 0) {
2036f9790aebSLuigi Rizzo 				ring->slot[j].len = 0;
2037f9790aebSLuigi Rizzo 				ring->slot[j].flags = 0;
2038f9790aebSLuigi Rizzo 				j = nm_next(j, lim);
2039f9790aebSLuigi Rizzo 			    }
2040f9790aebSLuigi Rizzo 			}
2041f9790aebSLuigi Rizzo 		    }
2042f9790aebSLuigi Rizzo 		    p[lease_idx] = j; /* report I am done */
2043f9790aebSLuigi Rizzo 
204417885a7bSLuigi Rizzo 		    update_pos = kring->nr_hwtail;
2045f9790aebSLuigi Rizzo 
2046f9790aebSLuigi Rizzo 		    if (my_start == update_pos) {
2047f9790aebSLuigi Rizzo 			/* all slots before my_start have been reported,
2048f9790aebSLuigi Rizzo 			 * so scan subsequent leases to see if other ranges
2049f9790aebSLuigi Rizzo 			 * have been completed, and to a selwakeup or txsync.
2050f9790aebSLuigi Rizzo 		         */
2051f9790aebSLuigi Rizzo 			while (lease_idx != kring->nkr_lease_idx &&
2052f9790aebSLuigi Rizzo 				p[lease_idx] != NR_NOSLOT) {
2053f9790aebSLuigi Rizzo 			    j = p[lease_idx];
2054f9790aebSLuigi Rizzo 			    p[lease_idx] = NR_NOSLOT;
2055f9790aebSLuigi Rizzo 			    lease_idx = nm_next(lease_idx, lim);
2056f9790aebSLuigi Rizzo 			}
2057f9790aebSLuigi Rizzo 			/* j is the new 'write' position. j != my_start
2058f9790aebSLuigi Rizzo 			 * means there are new buffers to report
2059f9790aebSLuigi Rizzo 			 */
2060f9790aebSLuigi Rizzo 			if (likely(j != my_start)) {
206117885a7bSLuigi Rizzo 				kring->nr_hwtail = j;
2062f9790aebSLuigi Rizzo 				still_locked = 0;
2063f9790aebSLuigi Rizzo 				mtx_unlock(&kring->q_lock);
2064847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
20654bf50f18SLuigi Rizzo 				/* this is netmap_notify for VALE ports and
20664bf50f18SLuigi Rizzo 				 * netmap_bwrap_notify for bwrap. The latter will
20674bf50f18SLuigi Rizzo 				 * trigger a txsync on the underlying hwna
20684bf50f18SLuigi Rizzo 				 */
20694bf50f18SLuigi Rizzo 				if (dst_na->retry && retry--) {
20704bf50f18SLuigi Rizzo 					/* XXX this is going to call nm_notify again.
20714bf50f18SLuigi Rizzo 					 * Only useful for bwrap in virtual machines
20724bf50f18SLuigi Rizzo 					 */
2073f9790aebSLuigi Rizzo 					goto retry;
2074f9790aebSLuigi Rizzo 				}
2075f9790aebSLuigi Rizzo 			}
20764bf50f18SLuigi Rizzo 		    }
2077f9790aebSLuigi Rizzo 		    if (still_locked)
2078f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
2079f9790aebSLuigi Rizzo 		}
2080f9790aebSLuigi Rizzo cleanup:
2081f9790aebSLuigi Rizzo 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
2082f9790aebSLuigi Rizzo 		d->bq_len = 0;
2083f9790aebSLuigi Rizzo 	}
2084f9790aebSLuigi Rizzo 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
2085f9790aebSLuigi Rizzo 	brddst->bq_len = 0;
2086f9790aebSLuigi Rizzo 	return 0;
2087f9790aebSLuigi Rizzo }
2088f9790aebSLuigi Rizzo 
20894bf50f18SLuigi Rizzo /* nm_txsync callback for VALE ports */
2090f9790aebSLuigi Rizzo static int
20914bf50f18SLuigi Rizzo netmap_vp_txsync(struct netmap_kring *kring, int flags)
2092f9790aebSLuigi Rizzo {
20934bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
20944bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter *)kring->na;
209517885a7bSLuigi Rizzo 	u_int done;
209617885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
2097847bf383SLuigi Rizzo 	u_int const head = kring->rhead;
2098f9790aebSLuigi Rizzo 
2099f9790aebSLuigi Rizzo 	if (bridge_batch <= 0) { /* testing only */
2100847bf383SLuigi Rizzo 		done = head; // used all
2101f9790aebSLuigi Rizzo 		goto done;
2102f9790aebSLuigi Rizzo 	}
21034bf50f18SLuigi Rizzo 	if (!na->na_bdg) {
2104847bf383SLuigi Rizzo 		done = head;
21054bf50f18SLuigi Rizzo 		goto done;
21064bf50f18SLuigi Rizzo 	}
2107f9790aebSLuigi Rizzo 	if (bridge_batch > NM_BDG_BATCH)
2108f9790aebSLuigi Rizzo 		bridge_batch = NM_BDG_BATCH;
2109f9790aebSLuigi Rizzo 
2110847bf383SLuigi Rizzo 	done = nm_bdg_preflush(kring, head);
2111f9790aebSLuigi Rizzo done:
2112847bf383SLuigi Rizzo 	if (done != head)
2113847bf383SLuigi Rizzo 		D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
211417885a7bSLuigi Rizzo 	/*
211517885a7bSLuigi Rizzo 	 * packets between 'done' and 'cur' are left unsent.
211617885a7bSLuigi Rizzo 	 */
211717885a7bSLuigi Rizzo 	kring->nr_hwcur = done;
211817885a7bSLuigi Rizzo 	kring->nr_hwtail = nm_prev(done, lim);
2119f9790aebSLuigi Rizzo 	if (netmap_verbose)
21204bf50f18SLuigi Rizzo 		D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
2121f9790aebSLuigi Rizzo 	return 0;
2122f9790aebSLuigi Rizzo }
2123f9790aebSLuigi Rizzo 
2124f9790aebSLuigi Rizzo 
21254bf50f18SLuigi Rizzo /* rxsync code used by VALE ports nm_rxsync callback and also
21264bf50f18SLuigi Rizzo  * internally by the brwap
2127f9790aebSLuigi Rizzo  */
2128f9790aebSLuigi Rizzo static int
21294bf50f18SLuigi Rizzo netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
2130f9790aebSLuigi Rizzo {
21314bf50f18SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
213217885a7bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
213317885a7bSLuigi Rizzo 	u_int nm_i, lim = kring->nkr_num_slots - 1;
2134847bf383SLuigi Rizzo 	u_int head = kring->rhead;
213517885a7bSLuigi Rizzo 	int n;
213617885a7bSLuigi Rizzo 
213717885a7bSLuigi Rizzo 	if (head > lim) {
213817885a7bSLuigi Rizzo 		D("ouch dangerous reset!!!");
213917885a7bSLuigi Rizzo 		n = netmap_ring_reinit(kring);
214017885a7bSLuigi Rizzo 		goto done;
214117885a7bSLuigi Rizzo 	}
214217885a7bSLuigi Rizzo 
214317885a7bSLuigi Rizzo 	/* First part, import newly received packets. */
214417885a7bSLuigi Rizzo 	/* actually nothing to do here, they are already in the kring */
214517885a7bSLuigi Rizzo 
214617885a7bSLuigi Rizzo 	/* Second part, skip past packets that userspace has released. */
214717885a7bSLuigi Rizzo 	nm_i = kring->nr_hwcur;
214817885a7bSLuigi Rizzo 	if (nm_i != head) {
214917885a7bSLuigi Rizzo 		/* consistency check, but nothing really important here */
215017885a7bSLuigi Rizzo 		for (n = 0; likely(nm_i != head); n++) {
215117885a7bSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[nm_i];
21524bf50f18SLuigi Rizzo 			void *addr = NMB(na, slot);
215317885a7bSLuigi Rizzo 
21544bf50f18SLuigi Rizzo 			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
215517885a7bSLuigi Rizzo 				D("bad buffer index %d, ignore ?",
215617885a7bSLuigi Rizzo 					slot->buf_idx);
215717885a7bSLuigi Rizzo 			}
215817885a7bSLuigi Rizzo 			slot->flags &= ~NS_BUF_CHANGED;
215917885a7bSLuigi Rizzo 			nm_i = nm_next(nm_i, lim);
216017885a7bSLuigi Rizzo 		}
216117885a7bSLuigi Rizzo 		kring->nr_hwcur = head;
216217885a7bSLuigi Rizzo 	}
216317885a7bSLuigi Rizzo 
216417885a7bSLuigi Rizzo 	n = 0;
216517885a7bSLuigi Rizzo done:
216617885a7bSLuigi Rizzo 	return n;
216717885a7bSLuigi Rizzo }
2168f9790aebSLuigi Rizzo 
2169f9790aebSLuigi Rizzo /*
21704bf50f18SLuigi Rizzo  * nm_rxsync callback for VALE ports
2171f9790aebSLuigi Rizzo  * user process reading from a VALE switch.
2172f9790aebSLuigi Rizzo  * Already protected against concurrent calls from userspace,
2173f9790aebSLuigi Rizzo  * but we must acquire the queue's lock to protect against
2174f9790aebSLuigi Rizzo  * writers on the same queue.
2175f9790aebSLuigi Rizzo  */
2176f9790aebSLuigi Rizzo static int
21774bf50f18SLuigi Rizzo netmap_vp_rxsync(struct netmap_kring *kring, int flags)
2178f9790aebSLuigi Rizzo {
2179f9790aebSLuigi Rizzo 	int n;
2180f9790aebSLuigi Rizzo 
2181f9790aebSLuigi Rizzo 	mtx_lock(&kring->q_lock);
21824bf50f18SLuigi Rizzo 	n = netmap_vp_rxsync_locked(kring, flags);
2183f9790aebSLuigi Rizzo 	mtx_unlock(&kring->q_lock);
2184f9790aebSLuigi Rizzo 	return n;
2185f9790aebSLuigi Rizzo }
2186f9790aebSLuigi Rizzo 
218717885a7bSLuigi Rizzo 
21884bf50f18SLuigi Rizzo /* nm_bdg_attach callback for VALE ports
21894bf50f18SLuigi Rizzo  * The na_vp port is this same netmap_adapter. There is no host port.
21904bf50f18SLuigi Rizzo  */
2191f9790aebSLuigi Rizzo static int
21924bf50f18SLuigi Rizzo netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
21934bf50f18SLuigi Rizzo {
21944bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
21954bf50f18SLuigi Rizzo 
21964bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
21974bf50f18SLuigi Rizzo 		return EBUSY;
21984bf50f18SLuigi Rizzo 	na->na_vp = vpna;
21994bf50f18SLuigi Rizzo 	strncpy(na->name, name, sizeof(na->name));
22004bf50f18SLuigi Rizzo 	na->na_hostvp = NULL;
22014bf50f18SLuigi Rizzo 	return 0;
22024bf50f18SLuigi Rizzo }
22034bf50f18SLuigi Rizzo 
22044bf50f18SLuigi Rizzo /* create a netmap_vp_adapter that describes a VALE port.
22054bf50f18SLuigi Rizzo  * Only persistent VALE ports have a non-null ifp.
22064bf50f18SLuigi Rizzo  */
22074bf50f18SLuigi Rizzo static int
2208*c3e9b4dbSLuiz Otavio O Souza netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp,
2209*c3e9b4dbSLuiz Otavio O Souza 		struct netmap_mem_d *nmd,
2210*c3e9b4dbSLuiz Otavio O Souza 		struct netmap_vp_adapter **ret)
2211f9790aebSLuigi Rizzo {
2212f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
2213f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
2214*c3e9b4dbSLuiz Otavio O Souza 	int error = 0;
2215f0ea3689SLuigi Rizzo 	u_int npipes = 0;
2216f9790aebSLuigi Rizzo 
2217*c3e9b4dbSLuiz Otavio O Souza 	vpna = nm_os_malloc(sizeof(*vpna));
2218f9790aebSLuigi Rizzo 	if (vpna == NULL)
2219f9790aebSLuigi Rizzo 		return ENOMEM;
2220f9790aebSLuigi Rizzo 
2221f9790aebSLuigi Rizzo  	na = &vpna->up;
2222f9790aebSLuigi Rizzo 
2223f9790aebSLuigi Rizzo 	na->ifp = ifp;
22244bf50f18SLuigi Rizzo 	strncpy(na->name, nmr->nr_name, sizeof(na->name));
2225f9790aebSLuigi Rizzo 
2226f9790aebSLuigi Rizzo 	/* bound checking */
2227f9790aebSLuigi Rizzo 	na->num_tx_rings = nmr->nr_tx_rings;
2228f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
2229f9790aebSLuigi Rizzo 	nmr->nr_tx_rings = na->num_tx_rings; // write back
2230f9790aebSLuigi Rizzo 	na->num_rx_rings = nmr->nr_rx_rings;
2231f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
2232f9790aebSLuigi Rizzo 	nmr->nr_rx_rings = na->num_rx_rings; // write back
2233f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
2234f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
2235f9790aebSLuigi Rizzo 	na->num_tx_desc = nmr->nr_tx_slots;
2236f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
2237f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
2238f0ea3689SLuigi Rizzo 	/* validate number of pipes. We want at least 1,
2239f0ea3689SLuigi Rizzo 	 * but probably can do with some more.
2240f0ea3689SLuigi Rizzo 	 * So let's use 2 as default (when 0 is supplied)
2241f0ea3689SLuigi Rizzo 	 */
2242f0ea3689SLuigi Rizzo 	npipes = nmr->nr_arg1;
2243f0ea3689SLuigi Rizzo 	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
2244f0ea3689SLuigi Rizzo 	nmr->nr_arg1 = npipes;	/* write back */
2245f0ea3689SLuigi Rizzo 	/* validate extra bufs */
2246f0ea3689SLuigi Rizzo 	nm_bound_var(&nmr->nr_arg3, 0, 0,
2247f0ea3689SLuigi Rizzo 			128*NM_BDG_MAXSLOTS, NULL);
2248f9790aebSLuigi Rizzo 	na->num_rx_desc = nmr->nr_rx_slots;
2249f0ea3689SLuigi Rizzo 	vpna->mfs = 1514;
2250847bf383SLuigi Rizzo 	vpna->last_smac = ~0llu;
2251f0ea3689SLuigi Rizzo 	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
2252f0ea3689SLuigi Rizzo 		vpna->mfs = netmap_buf_size; */
2253f0ea3689SLuigi Rizzo         if (netmap_verbose)
2254f0ea3689SLuigi Rizzo 		D("max frame size %u", vpna->mfs);
2255f9790aebSLuigi Rizzo 
2256847bf383SLuigi Rizzo 	na->na_flags |= NAF_BDG_MAYSLEEP;
225710b8ef3dSLuigi Rizzo 	/* persistent VALE ports look like hw devices
225810b8ef3dSLuigi Rizzo 	 * with a native netmap adapter
225910b8ef3dSLuigi Rizzo 	 */
226010b8ef3dSLuigi Rizzo 	if (ifp)
226110b8ef3dSLuigi Rizzo 		na->na_flags |= NAF_NATIVE;
22624bf50f18SLuigi Rizzo 	na->nm_txsync = netmap_vp_txsync;
22634bf50f18SLuigi Rizzo 	na->nm_rxsync = netmap_vp_rxsync;
22644bf50f18SLuigi Rizzo 	na->nm_register = netmap_vp_reg;
2265f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_vp_krings_create;
2266f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_vp_krings_delete;
22674bf50f18SLuigi Rizzo 	na->nm_dtor = netmap_vp_dtor;
2268*c3e9b4dbSLuiz Otavio O Souza 	D("nr_arg2 %d", nmr->nr_arg2);
2269*c3e9b4dbSLuiz Otavio O Souza 	na->nm_mem = nmd ?
2270*c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_get(nmd):
2271*c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_private_new(
2272f9790aebSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
2273f0ea3689SLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc,
2274f0ea3689SLuigi Rizzo 			nmr->nr_arg3, npipes, &error);
2275f0ea3689SLuigi Rizzo 	if (na->nm_mem == NULL)
2276f0ea3689SLuigi Rizzo 		goto err;
22774bf50f18SLuigi Rizzo 	na->nm_bdg_attach = netmap_vp_bdg_attach;
2278f9790aebSLuigi Rizzo 	/* other nmd fields are set in the common routine */
2279f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
2280f0ea3689SLuigi Rizzo 	if (error)
2281f0ea3689SLuigi Rizzo 		goto err;
22824bf50f18SLuigi Rizzo 	*ret = vpna;
2283f0ea3689SLuigi Rizzo 	return 0;
2284f0ea3689SLuigi Rizzo 
2285f0ea3689SLuigi Rizzo err:
2286f0ea3689SLuigi Rizzo 	if (na->nm_mem != NULL)
2287*c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_put(na->nm_mem);
2288*c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(vpna);
2289f9790aebSLuigi Rizzo 	return error;
2290f9790aebSLuigi Rizzo }
2291f9790aebSLuigi Rizzo 
22924bf50f18SLuigi Rizzo /* Bridge wrapper code (bwrap).
22934bf50f18SLuigi Rizzo  * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
22944bf50f18SLuigi Rizzo  * VALE switch.
22954bf50f18SLuigi Rizzo  * The main task is to swap the meaning of tx and rx rings to match the
22964bf50f18SLuigi Rizzo  * expectations of the VALE switch code (see nm_bdg_flush).
22974bf50f18SLuigi Rizzo  *
22984bf50f18SLuigi Rizzo  * The bwrap works by interposing a netmap_bwrap_adapter between the
22994bf50f18SLuigi Rizzo  * rest of the system and the hwna. The netmap_bwrap_adapter looks like
23004bf50f18SLuigi Rizzo  * a netmap_vp_adapter to the rest the system, but, internally, it
23014bf50f18SLuigi Rizzo  * translates all callbacks to what the hwna expects.
23024bf50f18SLuigi Rizzo  *
23034bf50f18SLuigi Rizzo  * Note that we have to intercept callbacks coming from two sides:
23044bf50f18SLuigi Rizzo  *
23054bf50f18SLuigi Rizzo  *  - callbacks coming from the netmap module are intercepted by
23064bf50f18SLuigi Rizzo  *    passing around the netmap_bwrap_adapter instead of the hwna
23074bf50f18SLuigi Rizzo  *
23084bf50f18SLuigi Rizzo  *  - callbacks coming from outside of the netmap module only know
23094bf50f18SLuigi Rizzo  *    about the hwna. This, however, only happens in interrupt
23104bf50f18SLuigi Rizzo  *    handlers, where only the hwna->nm_notify callback is called.
23114bf50f18SLuigi Rizzo  *    What the bwrap does is to overwrite the hwna->nm_notify callback
23124bf50f18SLuigi Rizzo  *    with its own netmap_bwrap_intr_notify.
23134bf50f18SLuigi Rizzo  *    XXX This assumes that the hwna->nm_notify callback was the
23144bf50f18SLuigi Rizzo  *    standard netmap_notify(), as it is the case for nic adapters.
23154bf50f18SLuigi Rizzo  *    Any additional action performed by hwna->nm_notify will not be
23164bf50f18SLuigi Rizzo  *    performed by netmap_bwrap_intr_notify.
23174bf50f18SLuigi Rizzo  *
23184bf50f18SLuigi Rizzo  * Additionally, the bwrap can optionally attach the host rings pair
23194bf50f18SLuigi Rizzo  * of the wrapped adapter to a different port of the switch.
23204bf50f18SLuigi Rizzo  */
23214bf50f18SLuigi Rizzo 
232217885a7bSLuigi Rizzo 
2323f9790aebSLuigi Rizzo static void
2324f9790aebSLuigi Rizzo netmap_bwrap_dtor(struct netmap_adapter *na)
2325f9790aebSLuigi Rizzo {
2326f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
2327f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
232837e3a6d3SLuigi Rizzo 	struct nm_bridge *b = bna->up.na_bdg,
232937e3a6d3SLuigi Rizzo 		*bh = bna->host.na_bdg;
233037e3a6d3SLuigi Rizzo 
2331*c3e9b4dbSLuiz Otavio O Souza 	netmap_mem_put(bna->host.up.nm_mem);
2332*c3e9b4dbSLuiz Otavio O Souza 
233337e3a6d3SLuigi Rizzo 	if (b) {
233437e3a6d3SLuigi Rizzo 		netmap_bdg_detach_common(b, bna->up.bdg_port,
233537e3a6d3SLuigi Rizzo 			    (bh ? bna->host.bdg_port : -1));
233637e3a6d3SLuigi Rizzo 	}
2337f9790aebSLuigi Rizzo 
2338f9790aebSLuigi Rizzo 	ND("na %p", na);
2339f9790aebSLuigi Rizzo 	na->ifp = NULL;
23404bf50f18SLuigi Rizzo 	bna->host.up.ifp = NULL;
23414bf50f18SLuigi Rizzo 	hwna->na_private = NULL;
23424bf50f18SLuigi Rizzo 	hwna->na_vp = hwna->na_hostvp = NULL;
23434bf50f18SLuigi Rizzo 	hwna->na_flags &= ~NAF_BUSY;
23444bf50f18SLuigi Rizzo 	netmap_adapter_put(hwna);
2345f9790aebSLuigi Rizzo 
2346f9790aebSLuigi Rizzo }
2347f9790aebSLuigi Rizzo 
234817885a7bSLuigi Rizzo 
2349f9790aebSLuigi Rizzo /*
235017885a7bSLuigi Rizzo  * Intr callback for NICs connected to a bridge.
235117885a7bSLuigi Rizzo  * Simply ignore tx interrupts (maybe we could try to recover space ?)
235217885a7bSLuigi Rizzo  * and pass received packets from nic to the bridge.
235317885a7bSLuigi Rizzo  *
2354f9790aebSLuigi Rizzo  * XXX TODO check locking: this is called from the interrupt
2355f9790aebSLuigi Rizzo  * handler so we should make sure that the interface is not
2356f9790aebSLuigi Rizzo  * disconnected while passing down an interrupt.
2357f9790aebSLuigi Rizzo  *
235817885a7bSLuigi Rizzo  * Note, no user process can access this NIC or the host stack.
235917885a7bSLuigi Rizzo  * The only part of the ring that is significant are the slots,
236017885a7bSLuigi Rizzo  * and head/cur/tail are set from the kring as needed
236117885a7bSLuigi Rizzo  * (part as a receive ring, part as a transmit ring).
236217885a7bSLuigi Rizzo  *
236317885a7bSLuigi Rizzo  * callback that overwrites the hwna notify callback.
236437e3a6d3SLuigi Rizzo  * Packets come from the outside or from the host stack and are put on an
236537e3a6d3SLuigi Rizzo  * hwna rx ring.
2366f9790aebSLuigi Rizzo  * The bridge wrapper then sends the packets through the bridge.
2367f9790aebSLuigi Rizzo  */
2368f9790aebSLuigi Rizzo static int
2369847bf383SLuigi Rizzo netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
2370f9790aebSLuigi Rizzo {
2371847bf383SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
2372f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
2373847bf383SLuigi Rizzo 	struct netmap_kring *bkring;
2374f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = &bna->up;
2375847bf383SLuigi Rizzo 	u_int ring_nr = kring->ring_id;
237637e3a6d3SLuigi Rizzo 	int ret = NM_IRQ_COMPLETED;
237737e3a6d3SLuigi Rizzo 	int error;
2378f9790aebSLuigi Rizzo 
237917885a7bSLuigi Rizzo 	if (netmap_verbose)
2380847bf383SLuigi Rizzo 	    D("%s %s 0x%x", na->name, kring->name, flags);
2381f9790aebSLuigi Rizzo 
2382847bf383SLuigi Rizzo 	bkring = &vpna->up.tx_rings[ring_nr];
2383f9790aebSLuigi Rizzo 
2384f9790aebSLuigi Rizzo 	/* make sure the ring is not disabled */
238537e3a6d3SLuigi Rizzo 	if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
238637e3a6d3SLuigi Rizzo 		return EIO;
238737e3a6d3SLuigi Rizzo 	}
2388f9790aebSLuigi Rizzo 
238917885a7bSLuigi Rizzo 	if (netmap_verbose)
2390847bf383SLuigi Rizzo 	    D("%s head %d cur %d tail %d",  na->name,
239117885a7bSLuigi Rizzo 		kring->rhead, kring->rcur, kring->rtail);
239217885a7bSLuigi Rizzo 
2393847bf383SLuigi Rizzo 	/* simulate a user wakeup on the rx ring
2394847bf383SLuigi Rizzo 	 * fetch packets that have arrived.
2395f9790aebSLuigi Rizzo 	 */
2396f0ea3689SLuigi Rizzo 	error = kring->nm_sync(kring, 0);
2397f9790aebSLuigi Rizzo 	if (error)
2398f9790aebSLuigi Rizzo 		goto put_out;
239937e3a6d3SLuigi Rizzo 	if (kring->nr_hwcur == kring->nr_hwtail) {
240037e3a6d3SLuigi Rizzo 		if (netmap_verbose)
2401f9790aebSLuigi Rizzo 			D("how strange, interrupt with no packets on %s",
24024bf50f18SLuigi Rizzo 			    na->name);
2403f9790aebSLuigi Rizzo 		goto put_out;
2404f9790aebSLuigi Rizzo 	}
240517885a7bSLuigi Rizzo 
2406847bf383SLuigi Rizzo 	/* new packets are kring->rcur to kring->nr_hwtail, and the bkring
2407847bf383SLuigi Rizzo 	 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
240817885a7bSLuigi Rizzo 	 * to push all packets out.
240917885a7bSLuigi Rizzo 	 */
2410847bf383SLuigi Rizzo 	bkring->rhead = bkring->rcur = kring->nr_hwtail;
241117885a7bSLuigi Rizzo 
24124bf50f18SLuigi Rizzo 	netmap_vp_txsync(bkring, flags);
2413f9790aebSLuigi Rizzo 
241417885a7bSLuigi Rizzo 	/* mark all buffers as released on this ring */
2415847bf383SLuigi Rizzo 	kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
241617885a7bSLuigi Rizzo 	/* another call to actually release the buffers */
2417f0ea3689SLuigi Rizzo 	error = kring->nm_sync(kring, 0);
2418f9790aebSLuigi Rizzo 
241937e3a6d3SLuigi Rizzo 	/* The second rxsync may have further advanced hwtail. If this happens,
242037e3a6d3SLuigi Rizzo 	 *  return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
242137e3a6d3SLuigi Rizzo 	if (kring->rcur != kring->nr_hwtail) {
242237e3a6d3SLuigi Rizzo 		ret = NM_IRQ_RESCHED;
242337e3a6d3SLuigi Rizzo 	}
2424f9790aebSLuigi Rizzo put_out:
2425f9790aebSLuigi Rizzo 	nm_kr_put(kring);
242637e3a6d3SLuigi Rizzo 
242737e3a6d3SLuigi Rizzo 	return error ? error : ret;
2428f9790aebSLuigi Rizzo }
2429f9790aebSLuigi Rizzo 
243017885a7bSLuigi Rizzo 
24314bf50f18SLuigi Rizzo /* nm_register callback for bwrap */
2432f9790aebSLuigi Rizzo static int
243337e3a6d3SLuigi Rizzo netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
2434f9790aebSLuigi Rizzo {
2435f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2436f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2437f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2438f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *hostna = &bna->host;
243937e3a6d3SLuigi Rizzo 	int error, i;
2440847bf383SLuigi Rizzo 	enum txrx t;
2441f9790aebSLuigi Rizzo 
24424bf50f18SLuigi Rizzo 	ND("%s %s", na->name, onoff ? "on" : "off");
2443f9790aebSLuigi Rizzo 
2444f9790aebSLuigi Rizzo 	if (onoff) {
24454bf50f18SLuigi Rizzo 		/* netmap_do_regif has been called on the bwrap na.
24464bf50f18SLuigi Rizzo 		 * We need to pass the information about the
24474bf50f18SLuigi Rizzo 		 * memory allocator down to the hwna before
24484bf50f18SLuigi Rizzo 		 * putting it in netmap mode
24494bf50f18SLuigi Rizzo 		 */
2450f9790aebSLuigi Rizzo 		hwna->na_lut = na->na_lut;
2451f9790aebSLuigi Rizzo 
2452f9790aebSLuigi Rizzo 		if (hostna->na_bdg) {
24534bf50f18SLuigi Rizzo 			/* if the host rings have been attached to switch,
24544bf50f18SLuigi Rizzo 			 * we need to copy the memory allocator information
24554bf50f18SLuigi Rizzo 			 * in the hostna also
24564bf50f18SLuigi Rizzo 			 */
2457f9790aebSLuigi Rizzo 			hostna->up.na_lut = na->na_lut;
2458f9790aebSLuigi Rizzo 		}
2459f9790aebSLuigi Rizzo 
24600c7ba37eSLuigi Rizzo 		/* cross-link the netmap rings
24610c7ba37eSLuigi Rizzo 		 * The original number of rings comes from hwna,
24620c7ba37eSLuigi Rizzo 		 * rx rings on one side equals tx rings on the other.
24630c7ba37eSLuigi Rizzo 		 */
2464847bf383SLuigi Rizzo 		for_rx_tx(t) {
2465847bf383SLuigi Rizzo 			enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
246637e3a6d3SLuigi Rizzo 			for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
246737e3a6d3SLuigi Rizzo 				NMR(hwna, r)[i].ring = NMR(na, t)[i].ring;
2468f9790aebSLuigi Rizzo 			}
2469f9790aebSLuigi Rizzo 		}
247037e3a6d3SLuigi Rizzo 
247137e3a6d3SLuigi Rizzo 		if (na->na_flags & NAF_HOST_RINGS) {
247237e3a6d3SLuigi Rizzo 			struct netmap_adapter *hna = &hostna->up;
247337e3a6d3SLuigi Rizzo 			/* the hostna rings are the host rings of the bwrap.
247437e3a6d3SLuigi Rizzo 			 * The corresponding krings must point back to the
247537e3a6d3SLuigi Rizzo 			 * hostna
247637e3a6d3SLuigi Rizzo 			 */
247737e3a6d3SLuigi Rizzo 			hna->tx_rings = &na->tx_rings[na->num_tx_rings];
247837e3a6d3SLuigi Rizzo 			hna->tx_rings[0].na = hna;
247937e3a6d3SLuigi Rizzo 			hna->rx_rings = &na->rx_rings[na->num_rx_rings];
248037e3a6d3SLuigi Rizzo 			hna->rx_rings[0].na = hna;
248137e3a6d3SLuigi Rizzo 		}
248237e3a6d3SLuigi Rizzo 	}
248337e3a6d3SLuigi Rizzo 
248437e3a6d3SLuigi Rizzo 	/* pass down the pending ring state information */
248537e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
248637e3a6d3SLuigi Rizzo 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
248737e3a6d3SLuigi Rizzo 			NMR(hwna, t)[i].nr_pending_mode =
248837e3a6d3SLuigi Rizzo 				NMR(na, t)[i].nr_pending_mode;
2489f9790aebSLuigi Rizzo 	}
2490f9790aebSLuigi Rizzo 
24914bf50f18SLuigi Rizzo 	/* forward the request to the hwna */
2492f9790aebSLuigi Rizzo 	error = hwna->nm_register(hwna, onoff);
2493f9790aebSLuigi Rizzo 	if (error)
2494f9790aebSLuigi Rizzo 		return error;
2495f9790aebSLuigi Rizzo 
249637e3a6d3SLuigi Rizzo 	/* copy up the current ring state information */
249737e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
249837e3a6d3SLuigi Rizzo 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
249937e3a6d3SLuigi Rizzo 			NMR(na, t)[i].nr_mode =
250037e3a6d3SLuigi Rizzo 				NMR(hwna, t)[i].nr_mode;
250137e3a6d3SLuigi Rizzo 	}
250237e3a6d3SLuigi Rizzo 
25034bf50f18SLuigi Rizzo 	/* impersonate a netmap_vp_adapter */
25044bf50f18SLuigi Rizzo 	netmap_vp_reg(na, onoff);
25054bf50f18SLuigi Rizzo 	if (hostna->na_bdg)
25064bf50f18SLuigi Rizzo 		netmap_vp_reg(&hostna->up, onoff);
2507f9790aebSLuigi Rizzo 
2508f9790aebSLuigi Rizzo 	if (onoff) {
2509847bf383SLuigi Rizzo 		u_int i;
2510847bf383SLuigi Rizzo 		/* intercept the hwna nm_nofify callback on the hw rings */
2511847bf383SLuigi Rizzo 		for (i = 0; i < hwna->num_rx_rings; i++) {
2512847bf383SLuigi Rizzo 			hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
2513847bf383SLuigi Rizzo 			hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
2514847bf383SLuigi Rizzo 		}
2515847bf383SLuigi Rizzo 		i = hwna->num_rx_rings; /* for safety */
2516847bf383SLuigi Rizzo 		/* save the host ring notify unconditionally */
2517847bf383SLuigi Rizzo 		hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
2518847bf383SLuigi Rizzo 		if (hostna->na_bdg) {
2519847bf383SLuigi Rizzo 			/* also intercept the host ring notify */
2520847bf383SLuigi Rizzo 			hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
2521847bf383SLuigi Rizzo 		}
252237e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
252337e3a6d3SLuigi Rizzo 			na->na_flags |= NAF_NETMAP_ON;
2524f9790aebSLuigi Rizzo 	} else {
2525847bf383SLuigi Rizzo 		u_int i;
252637e3a6d3SLuigi Rizzo 
252737e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
252837e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_NETMAP_ON;
252937e3a6d3SLuigi Rizzo 
2530847bf383SLuigi Rizzo 		/* reset all notify callbacks (including host ring) */
2531847bf383SLuigi Rizzo 		for (i = 0; i <= hwna->num_rx_rings; i++) {
2532847bf383SLuigi Rizzo 			hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify;
2533847bf383SLuigi Rizzo 			hwna->rx_rings[i].save_notify = NULL;
2534847bf383SLuigi Rizzo 		}
2535847bf383SLuigi Rizzo 		hwna->na_lut.lut = NULL;
2536847bf383SLuigi Rizzo 		hwna->na_lut.objtotal = 0;
2537847bf383SLuigi Rizzo 		hwna->na_lut.objsize = 0;
2538f9790aebSLuigi Rizzo 	}
2539f9790aebSLuigi Rizzo 
2540f9790aebSLuigi Rizzo 	return 0;
2541f9790aebSLuigi Rizzo }
2542f9790aebSLuigi Rizzo 
25434bf50f18SLuigi Rizzo /* nm_config callback for bwrap */
2544f9790aebSLuigi Rizzo static int
2545f9790aebSLuigi Rizzo netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
2546f9790aebSLuigi Rizzo 				    u_int *rxr, u_int *rxd)
2547f9790aebSLuigi Rizzo {
2548f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2549f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2550f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2551f9790aebSLuigi Rizzo 
2552f9790aebSLuigi Rizzo 	/* forward the request */
2553f9790aebSLuigi Rizzo 	netmap_update_config(hwna);
2554f9790aebSLuigi Rizzo 	/* swap the results */
2555f9790aebSLuigi Rizzo 	*txr = hwna->num_rx_rings;
2556f9790aebSLuigi Rizzo 	*txd = hwna->num_rx_desc;
2557f9790aebSLuigi Rizzo 	*rxr = hwna->num_tx_rings;
2558f9790aebSLuigi Rizzo 	*rxd = hwna->num_rx_desc;
2559f9790aebSLuigi Rizzo 
2560f9790aebSLuigi Rizzo 	return 0;
2561f9790aebSLuigi Rizzo }
2562f9790aebSLuigi Rizzo 
256317885a7bSLuigi Rizzo 
25644bf50f18SLuigi Rizzo /* nm_krings_create callback for bwrap */
2565f9790aebSLuigi Rizzo static int
2566f9790aebSLuigi Rizzo netmap_bwrap_krings_create(struct netmap_adapter *na)
2567f9790aebSLuigi Rizzo {
2568f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2569f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2570f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
257137e3a6d3SLuigi Rizzo 	int i, error = 0;
257237e3a6d3SLuigi Rizzo 	enum txrx t;
2573f9790aebSLuigi Rizzo 
25744bf50f18SLuigi Rizzo 	ND("%s", na->name);
2575f9790aebSLuigi Rizzo 
25764bf50f18SLuigi Rizzo 	/* impersonate a netmap_vp_adapter */
2577f9790aebSLuigi Rizzo 	error = netmap_vp_krings_create(na);
2578f9790aebSLuigi Rizzo 	if (error)
2579f9790aebSLuigi Rizzo 		return error;
2580f9790aebSLuigi Rizzo 
25814bf50f18SLuigi Rizzo 	/* also create the hwna krings */
2582f9790aebSLuigi Rizzo 	error = hwna->nm_krings_create(hwna);
2583f9790aebSLuigi Rizzo 	if (error) {
258437e3a6d3SLuigi Rizzo 		goto err_del_vp_rings;
2585f9790aebSLuigi Rizzo 	}
2586f9790aebSLuigi Rizzo 
258737e3a6d3SLuigi Rizzo 	/* get each ring slot number from the corresponding hwna ring */
258837e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
258937e3a6d3SLuigi Rizzo 		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
259037e3a6d3SLuigi Rizzo 		for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
259137e3a6d3SLuigi Rizzo 			NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots;
259237e3a6d3SLuigi Rizzo 		}
2593f0ea3689SLuigi Rizzo 	}
2594f9790aebSLuigi Rizzo 
2595f9790aebSLuigi Rizzo 	return 0;
259637e3a6d3SLuigi Rizzo 
259737e3a6d3SLuigi Rizzo err_del_vp_rings:
259837e3a6d3SLuigi Rizzo 	netmap_vp_krings_delete(na);
259937e3a6d3SLuigi Rizzo 
260037e3a6d3SLuigi Rizzo 	return error;
2601f9790aebSLuigi Rizzo }
2602f9790aebSLuigi Rizzo 
260317885a7bSLuigi Rizzo 
2604f9790aebSLuigi Rizzo static void
2605f9790aebSLuigi Rizzo netmap_bwrap_krings_delete(struct netmap_adapter *na)
2606f9790aebSLuigi Rizzo {
2607f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2608f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2609f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2610f9790aebSLuigi Rizzo 
26114bf50f18SLuigi Rizzo 	ND("%s", na->name);
2612f9790aebSLuigi Rizzo 
2613f9790aebSLuigi Rizzo 	hwna->nm_krings_delete(hwna);
2614f9790aebSLuigi Rizzo 	netmap_vp_krings_delete(na);
2615f9790aebSLuigi Rizzo }
2616f9790aebSLuigi Rizzo 
261717885a7bSLuigi Rizzo 
2618f9790aebSLuigi Rizzo /* notify method for the bridge-->hwna direction */
2619f9790aebSLuigi Rizzo static int
2620847bf383SLuigi Rizzo netmap_bwrap_notify(struct netmap_kring *kring, int flags)
2621f9790aebSLuigi Rizzo {
2622847bf383SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
2623847bf383SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
2624f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2625847bf383SLuigi Rizzo 	u_int ring_n = kring->ring_id;
2626847bf383SLuigi Rizzo 	u_int lim = kring->nkr_num_slots - 1;
2627847bf383SLuigi Rizzo 	struct netmap_kring *hw_kring;
262837e3a6d3SLuigi Rizzo 	int error;
2629f9790aebSLuigi Rizzo 
2630847bf383SLuigi Rizzo 	ND("%s: na %s hwna %s",
2631847bf383SLuigi Rizzo 			(kring ? kring->name : "NULL!"),
2632847bf383SLuigi Rizzo 			(na ? na->name : "NULL!"),
2633847bf383SLuigi Rizzo 			(hwna ? hwna->name : "NULL!"));
2634f9790aebSLuigi Rizzo 	hw_kring = &hwna->tx_rings[ring_n];
2635847bf383SLuigi Rizzo 
263637e3a6d3SLuigi Rizzo 	if (nm_kr_tryget(hw_kring, 0, NULL)) {
263737e3a6d3SLuigi Rizzo 		return ENXIO;
263837e3a6d3SLuigi Rizzo 	}
2639f9790aebSLuigi Rizzo 
264017885a7bSLuigi Rizzo 	/* first step: simulate a user wakeup on the rx ring */
2641847bf383SLuigi Rizzo 	netmap_vp_rxsync(kring, flags);
264217885a7bSLuigi Rizzo 	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
26434bf50f18SLuigi Rizzo 		na->name, ring_n,
264417885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
264517885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
264617885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
2647847bf383SLuigi Rizzo 	/* second step: the new packets are sent on the tx ring
264817885a7bSLuigi Rizzo 	 * (which is actually the same ring)
264917885a7bSLuigi Rizzo 	 */
2650847bf383SLuigi Rizzo 	hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
2651f0ea3689SLuigi Rizzo 	error = hw_kring->nm_sync(hw_kring, flags);
2652847bf383SLuigi Rizzo 	if (error)
265337e3a6d3SLuigi Rizzo 		goto put_out;
265417885a7bSLuigi Rizzo 
2655847bf383SLuigi Rizzo 	/* third step: now we are back the rx ring */
265617885a7bSLuigi Rizzo 	/* claim ownership on all hw owned bufs */
2657847bf383SLuigi Rizzo 	kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
265817885a7bSLuigi Rizzo 
2659847bf383SLuigi Rizzo 	/* fourth step: the user goes to sleep again, causing another rxsync */
2660847bf383SLuigi Rizzo 	netmap_vp_rxsync(kring, flags);
266117885a7bSLuigi Rizzo 	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
26624bf50f18SLuigi Rizzo 		na->name, ring_n,
266317885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
266417885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
266517885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
266637e3a6d3SLuigi Rizzo put_out:
2667847bf383SLuigi Rizzo 	nm_kr_put(hw_kring);
266837e3a6d3SLuigi Rizzo 
266937e3a6d3SLuigi Rizzo 	return error ? error : NM_IRQ_COMPLETED;
2670f9790aebSLuigi Rizzo }
2671f9790aebSLuigi Rizzo 
267217885a7bSLuigi Rizzo 
26734bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for the bwrap.
26744bf50f18SLuigi Rizzo  * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
26754bf50f18SLuigi Rizzo  * On attach, it needs to provide a fake netmap_priv_d structure and
26764bf50f18SLuigi Rizzo  * perform a netmap_do_regif() on the bwrap. This will put both the
26774bf50f18SLuigi Rizzo  * bwrap and the hwna in netmap mode, with the netmap rings shared
26784bf50f18SLuigi Rizzo  * and cross linked. Moroever, it will start intercepting interrupts
26794bf50f18SLuigi Rizzo  * directed to hwna.
26804bf50f18SLuigi Rizzo  */
2681f9790aebSLuigi Rizzo static int
26824bf50f18SLuigi Rizzo netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
26834bf50f18SLuigi Rizzo {
26844bf50f18SLuigi Rizzo 	struct netmap_priv_d *npriv;
26854bf50f18SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
26864bf50f18SLuigi Rizzo 	int error = 0;
26874bf50f18SLuigi Rizzo 
26884bf50f18SLuigi Rizzo 	if (attach) {
26894bf50f18SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(na)) {
26904bf50f18SLuigi Rizzo 			return EBUSY;
26914bf50f18SLuigi Rizzo 		}
26924bf50f18SLuigi Rizzo 		if (bna->na_kpriv) {
26934bf50f18SLuigi Rizzo 			/* nothing to do */
26944bf50f18SLuigi Rizzo 			return 0;
26954bf50f18SLuigi Rizzo 		}
269637e3a6d3SLuigi Rizzo 		npriv = netmap_priv_new();
26974bf50f18SLuigi Rizzo 		if (npriv == NULL)
26984bf50f18SLuigi Rizzo 			return ENOMEM;
269937e3a6d3SLuigi Rizzo 		npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
270037e3a6d3SLuigi Rizzo 		error = netmap_do_regif(npriv, na, 0, NR_REG_NIC_SW);
2701847bf383SLuigi Rizzo 		if (error) {
270237e3a6d3SLuigi Rizzo 			netmap_priv_delete(npriv);
27034bf50f18SLuigi Rizzo 			return error;
27044bf50f18SLuigi Rizzo 		}
27054bf50f18SLuigi Rizzo 		bna->na_kpriv = npriv;
27064bf50f18SLuigi Rizzo 		na->na_flags |= NAF_BUSY;
27074bf50f18SLuigi Rizzo 	} else {
27084bf50f18SLuigi Rizzo 		if (na->active_fds == 0) /* not registered */
27094bf50f18SLuigi Rizzo 			return EINVAL;
271037e3a6d3SLuigi Rizzo 		netmap_priv_delete(bna->na_kpriv);
27114bf50f18SLuigi Rizzo 		bna->na_kpriv = NULL;
27124bf50f18SLuigi Rizzo 		na->na_flags &= ~NAF_BUSY;
27134bf50f18SLuigi Rizzo 	}
27144bf50f18SLuigi Rizzo 	return error;
27154bf50f18SLuigi Rizzo 
27164bf50f18SLuigi Rizzo }
27174bf50f18SLuigi Rizzo 
27184bf50f18SLuigi Rizzo /* attach a bridge wrapper to the 'real' device */
27194bf50f18SLuigi Rizzo int
27204bf50f18SLuigi Rizzo netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
2721f9790aebSLuigi Rizzo {
2722f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
27234bf50f18SLuigi Rizzo 	struct netmap_adapter *na = NULL;
27244bf50f18SLuigi Rizzo 	struct netmap_adapter *hostna = NULL;
27254bf50f18SLuigi Rizzo 	int error = 0;
2726847bf383SLuigi Rizzo 	enum txrx t;
2727f9790aebSLuigi Rizzo 
27284bf50f18SLuigi Rizzo 	/* make sure the NIC is not already in use */
27294bf50f18SLuigi Rizzo 	if (NETMAP_OWNED_BY_ANY(hwna)) {
27304bf50f18SLuigi Rizzo 		D("NIC %s busy, cannot attach to bridge", hwna->name);
27314bf50f18SLuigi Rizzo 		return EBUSY;
27324bf50f18SLuigi Rizzo 	}
2733f9790aebSLuigi Rizzo 
2734*c3e9b4dbSLuiz Otavio O Souza 	bna = nm_os_malloc(sizeof(*bna));
27354bf50f18SLuigi Rizzo 	if (bna == NULL) {
2736f9790aebSLuigi Rizzo 		return ENOMEM;
27374bf50f18SLuigi Rizzo 	}
2738f9790aebSLuigi Rizzo 
2739f9790aebSLuigi Rizzo 	na = &bna->up.up;
274037e3a6d3SLuigi Rizzo 	/* make bwrap ifp point to the real ifp */
274137e3a6d3SLuigi Rizzo 	na->ifp = hwna->ifp;
2742*c3e9b4dbSLuiz Otavio O Souza 	if_ref(na->ifp);
2743847bf383SLuigi Rizzo 	na->na_private = bna;
27444bf50f18SLuigi Rizzo 	strncpy(na->name, nr_name, sizeof(na->name));
2745f9790aebSLuigi Rizzo 	/* fill the ring data for the bwrap adapter with rx/tx meanings
2746f9790aebSLuigi Rizzo 	 * swapped. The real cross-linking will be done during register,
2747f9790aebSLuigi Rizzo 	 * when all the krings will have been created.
2748f9790aebSLuigi Rizzo 	 */
2749847bf383SLuigi Rizzo 	for_rx_tx(t) {
2750847bf383SLuigi Rizzo 		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
2751847bf383SLuigi Rizzo 		nma_set_nrings(na, t, nma_get_nrings(hwna, r));
2752847bf383SLuigi Rizzo 		nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
2753847bf383SLuigi Rizzo 	}
2754f9790aebSLuigi Rizzo 	na->nm_dtor = netmap_bwrap_dtor;
275537e3a6d3SLuigi Rizzo 	na->nm_register = netmap_bwrap_reg;
2756f9790aebSLuigi Rizzo 	// na->nm_txsync = netmap_bwrap_txsync;
2757f9790aebSLuigi Rizzo 	// na->nm_rxsync = netmap_bwrap_rxsync;
2758f9790aebSLuigi Rizzo 	na->nm_config = netmap_bwrap_config;
2759f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_bwrap_krings_create;
2760f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_bwrap_krings_delete;
2761f9790aebSLuigi Rizzo 	na->nm_notify = netmap_bwrap_notify;
27624bf50f18SLuigi Rizzo 	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
27634bf50f18SLuigi Rizzo 	na->pdev = hwna->pdev;
2764*c3e9b4dbSLuiz Otavio O Souza 	na->nm_mem = netmap_mem_get(hwna->nm_mem);
276537e3a6d3SLuigi Rizzo 	na->virt_hdr_len = hwna->virt_hdr_len;
2766f9790aebSLuigi Rizzo 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2767f9790aebSLuigi Rizzo 
2768f9790aebSLuigi Rizzo 	bna->hwna = hwna;
2769f9790aebSLuigi Rizzo 	netmap_adapter_get(hwna);
2770f9790aebSLuigi Rizzo 	hwna->na_private = bna; /* weak reference */
27714bf50f18SLuigi Rizzo 	hwna->na_vp = &bna->up;
2772f9790aebSLuigi Rizzo 
2773f0ea3689SLuigi Rizzo 	if (hwna->na_flags & NAF_HOST_RINGS) {
27744bf50f18SLuigi Rizzo 		if (hwna->na_flags & NAF_SW_ONLY)
27754bf50f18SLuigi Rizzo 			na->na_flags |= NAF_SW_ONLY;
2776f0ea3689SLuigi Rizzo 		na->na_flags |= NAF_HOST_RINGS;
2777f9790aebSLuigi Rizzo 		hostna = &bna->host.up;
27784bf50f18SLuigi Rizzo 		snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
2779f9790aebSLuigi Rizzo 		hostna->ifp = hwna->ifp;
2780847bf383SLuigi Rizzo 		for_rx_tx(t) {
2781847bf383SLuigi Rizzo 			enum txrx r = nm_txrx_swap(t);
2782847bf383SLuigi Rizzo 			nma_set_nrings(hostna, t, 1);
2783847bf383SLuigi Rizzo 			nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
2784847bf383SLuigi Rizzo 		}
2785f9790aebSLuigi Rizzo 		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2786f9790aebSLuigi Rizzo 		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2787847bf383SLuigi Rizzo 		hostna->nm_notify = netmap_bwrap_notify;
2788*c3e9b4dbSLuiz Otavio O Souza 		hostna->nm_mem = netmap_mem_get(na->nm_mem);
2789f9790aebSLuigi Rizzo 		hostna->na_private = bna;
27904bf50f18SLuigi Rizzo 		hostna->na_vp = &bna->up;
27914bf50f18SLuigi Rizzo 		na->na_hostvp = hwna->na_hostvp =
27924bf50f18SLuigi Rizzo 			hostna->na_hostvp = &bna->host;
27934bf50f18SLuigi Rizzo 		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
2794f0ea3689SLuigi Rizzo 	}
2795f9790aebSLuigi Rizzo 
279617885a7bSLuigi Rizzo 	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
27974bf50f18SLuigi Rizzo 		na->name, ifp->if_xname,
2798f9790aebSLuigi Rizzo 		na->num_tx_rings, na->num_tx_desc,
2799f9790aebSLuigi Rizzo 		na->num_rx_rings, na->num_rx_desc);
2800f9790aebSLuigi Rizzo 
2801f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
2802f9790aebSLuigi Rizzo 	if (error) {
28034bf50f18SLuigi Rizzo 		goto err_free;
28044bf50f18SLuigi Rizzo 	}
28054bf50f18SLuigi Rizzo 	hwna->na_flags |= NAF_BUSY;
28064bf50f18SLuigi Rizzo 	return 0;
28074bf50f18SLuigi Rizzo 
28084bf50f18SLuigi Rizzo err_free:
28094bf50f18SLuigi Rizzo 	hwna->na_vp = hwna->na_hostvp = NULL;
2810f9790aebSLuigi Rizzo 	netmap_adapter_put(hwna);
2811*c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(bna);
2812f9790aebSLuigi Rizzo 	return error;
28134bf50f18SLuigi Rizzo 
2814f9790aebSLuigi Rizzo }
2815f9790aebSLuigi Rizzo 
2816847bf383SLuigi Rizzo struct nm_bridge *
2817847bf383SLuigi Rizzo netmap_init_bridges2(u_int n)
2818f9790aebSLuigi Rizzo {
2819f9790aebSLuigi Rizzo 	int i;
2820847bf383SLuigi Rizzo 	struct nm_bridge *b;
2821847bf383SLuigi Rizzo 
2822*c3e9b4dbSLuiz Otavio O Souza 	b = nm_os_malloc(sizeof(struct nm_bridge) * n);
2823847bf383SLuigi Rizzo 	if (b == NULL)
2824847bf383SLuigi Rizzo 		return NULL;
2825847bf383SLuigi Rizzo 	for (i = 0; i < n; i++)
2826847bf383SLuigi Rizzo 		BDG_RWINIT(&b[i]);
2827847bf383SLuigi Rizzo 	return b;
2828847bf383SLuigi Rizzo }
2829847bf383SLuigi Rizzo 
2830847bf383SLuigi Rizzo void
2831847bf383SLuigi Rizzo netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
2832847bf383SLuigi Rizzo {
2833847bf383SLuigi Rizzo 	int i;
2834847bf383SLuigi Rizzo 
2835847bf383SLuigi Rizzo 	if (b == NULL)
2836847bf383SLuigi Rizzo 		return;
2837847bf383SLuigi Rizzo 
2838847bf383SLuigi Rizzo 	for (i = 0; i < n; i++)
2839847bf383SLuigi Rizzo 		BDG_RWDESTROY(&b[i]);
2840*c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(b);
2841847bf383SLuigi Rizzo }
2842847bf383SLuigi Rizzo 
2843847bf383SLuigi Rizzo int
2844847bf383SLuigi Rizzo netmap_init_bridges(void)
2845847bf383SLuigi Rizzo {
2846847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
2847847bf383SLuigi Rizzo 	return netmap_bns_register();
2848847bf383SLuigi Rizzo #else
2849847bf383SLuigi Rizzo 	nm_bridges = netmap_init_bridges2(NM_BRIDGES);
2850847bf383SLuigi Rizzo 	if (nm_bridges == NULL)
2851847bf383SLuigi Rizzo 		return ENOMEM;
2852847bf383SLuigi Rizzo 	return 0;
2853847bf383SLuigi Rizzo #endif
2854847bf383SLuigi Rizzo }
2855847bf383SLuigi Rizzo 
2856847bf383SLuigi Rizzo void
2857847bf383SLuigi Rizzo netmap_uninit_bridges(void)
2858847bf383SLuigi Rizzo {
2859847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
2860847bf383SLuigi Rizzo 	netmap_bns_unregister();
2861847bf383SLuigi Rizzo #else
2862847bf383SLuigi Rizzo 	netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
2863847bf383SLuigi Rizzo #endif
2864f9790aebSLuigi Rizzo }
2865f9790aebSLuigi Rizzo #endif /* WITH_VALE */
2866