xref: /freebsd-14.2/sys/dev/netmap/netmap_vale.c (revision 2ff91c17)
1718cf2ccSPedro F. Giffuni /*-
2718cf2ccSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3718cf2ccSPedro F. Giffuni  *
437e3a6d3SLuigi Rizzo  * Copyright (C) 2013-2016 Universita` di Pisa
537e3a6d3SLuigi Rizzo  * All rights reserved.
6f9790aebSLuigi Rizzo  *
7f9790aebSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
8f9790aebSLuigi Rizzo  * modification, are permitted provided that the following conditions
9f9790aebSLuigi Rizzo  * are met:
10f9790aebSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
11f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
12f9790aebSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
13f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
14f9790aebSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
15f9790aebSLuigi Rizzo  *
16f9790aebSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17f9790aebSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18f9790aebSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19f9790aebSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20f9790aebSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21f9790aebSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22f9790aebSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23f9790aebSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24f9790aebSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25f9790aebSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26f9790aebSLuigi Rizzo  * SUCH DAMAGE.
27f9790aebSLuigi Rizzo  */
28f9790aebSLuigi Rizzo 
29f9790aebSLuigi Rizzo 
30f9790aebSLuigi Rizzo /*
31f9790aebSLuigi Rizzo  * This module implements the VALE switch for netmap
32f9790aebSLuigi Rizzo 
33f9790aebSLuigi Rizzo --- VALE SWITCH ---
34f9790aebSLuigi Rizzo 
35f9790aebSLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
36f9790aebSLuigi Rizzo A switch cannot be deleted until all ports are gone.
37f9790aebSLuigi Rizzo 
38f9790aebSLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
39f9790aebSLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
40f9790aebSLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
41f9790aebSLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
42f9790aebSLuigi Rizzo The lock is held throughout the entire forwarding cycle,
43f9790aebSLuigi Rizzo during which the thread may incur in a page fault.
44f9790aebSLuigi Rizzo Hence it is important that sleepable shared locks are used.
45f9790aebSLuigi Rizzo 
46f9790aebSLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
47f9790aebSLuigi Rizzo a number of slot in the ring, then the lock is released,
48f9790aebSLuigi Rizzo packets are copied from source to destination, and then
49f9790aebSLuigi Rizzo the lock is acquired again and the receive ring is updated.
50f9790aebSLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
51f9790aebSLuigi Rizzo ports attached to the switch)
52f9790aebSLuigi Rizzo 
53f9790aebSLuigi Rizzo  */
54f9790aebSLuigi Rizzo 
55f9790aebSLuigi Rizzo /*
56f9790aebSLuigi Rizzo  * OS-specific code that is used only within this file.
57f9790aebSLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
58f9790aebSLuigi Rizzo  * is present in netmap_kern.h
59f9790aebSLuigi Rizzo  */
60f9790aebSLuigi Rizzo 
61f9790aebSLuigi Rizzo #if defined(__FreeBSD__)
62f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
63f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$");
64f9790aebSLuigi Rizzo 
65f9790aebSLuigi Rizzo #include <sys/types.h>
66f9790aebSLuigi Rizzo #include <sys/errno.h>
67f9790aebSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
68f9790aebSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
69f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
70f9790aebSLuigi Rizzo #include <sys/sockio.h>
71f9790aebSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
72f9790aebSLuigi Rizzo #include <sys/malloc.h>
73f9790aebSLuigi Rizzo #include <sys/poll.h>
74f9790aebSLuigi Rizzo #include <sys/rwlock.h>
75f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
76f9790aebSLuigi Rizzo #include <sys/selinfo.h>
77f9790aebSLuigi Rizzo #include <sys/sysctl.h>
78f9790aebSLuigi Rizzo #include <net/if.h>
79f9790aebSLuigi Rizzo #include <net/if_var.h>
80f9790aebSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
81f9790aebSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
82f9790aebSLuigi Rizzo #include <sys/endian.h>
83f9790aebSLuigi Rizzo #include <sys/refcount.h>
84f9790aebSLuigi Rizzo 
85f9790aebSLuigi Rizzo 
86f9790aebSLuigi Rizzo #define BDG_RWLOCK_T		struct rwlock // struct rwlock
87f9790aebSLuigi Rizzo 
88f9790aebSLuigi Rizzo #define	BDG_RWINIT(b)		\
89f9790aebSLuigi Rizzo 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
90f9790aebSLuigi Rizzo #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
91f9790aebSLuigi Rizzo #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
92f9790aebSLuigi Rizzo #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
93f9790aebSLuigi Rizzo #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
94f9790aebSLuigi Rizzo #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
95f9790aebSLuigi Rizzo #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
96f9790aebSLuigi Rizzo 
97f9790aebSLuigi Rizzo 
98f9790aebSLuigi Rizzo #elif defined(linux)
99f9790aebSLuigi Rizzo 
100f9790aebSLuigi Rizzo #include "bsd_glue.h"
101f9790aebSLuigi Rizzo 
102f9790aebSLuigi Rizzo #elif defined(__APPLE__)
103f9790aebSLuigi Rizzo 
104f9790aebSLuigi Rizzo #warning OSX support is only partial
105f9790aebSLuigi Rizzo #include "osx_glue.h"
106f9790aebSLuigi Rizzo 
10737e3a6d3SLuigi Rizzo #elif defined(_WIN32)
10837e3a6d3SLuigi Rizzo #include "win_glue.h"
10937e3a6d3SLuigi Rizzo 
110f9790aebSLuigi Rizzo #else
111f9790aebSLuigi Rizzo 
112f9790aebSLuigi Rizzo #error	Unsupported platform
113f9790aebSLuigi Rizzo 
114f9790aebSLuigi Rizzo #endif /* unsupported */
115f9790aebSLuigi Rizzo 
116f9790aebSLuigi Rizzo /*
117f9790aebSLuigi Rizzo  * common headers
118f9790aebSLuigi Rizzo  */
119f9790aebSLuigi Rizzo 
120f9790aebSLuigi Rizzo #include <net/netmap.h>
121f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h>
122f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
123f9790aebSLuigi Rizzo 
124f9790aebSLuigi Rizzo #ifdef WITH_VALE
125f9790aebSLuigi Rizzo 
126f9790aebSLuigi Rizzo /*
127f9790aebSLuigi Rizzo  * system parameters (most of them in netmap_kern.h)
12837e3a6d3SLuigi Rizzo  * NM_BDG_NAME	prefix for switch port names, default "vale"
129f9790aebSLuigi Rizzo  * NM_BDG_MAXPORTS	number of ports
130f9790aebSLuigi Rizzo  * NM_BRIDGES	max number of switches in the system.
131f9790aebSLuigi Rizzo  *	XXX should become a sysctl or tunable
132f9790aebSLuigi Rizzo  *
133f9790aebSLuigi Rizzo  * Switch ports are named valeX:Y where X is the switch name and Y
134f9790aebSLuigi Rizzo  * is the port. If Y matches a physical interface name, the port is
135f9790aebSLuigi Rizzo  * connected to a physical device.
136f9790aebSLuigi Rizzo  *
137f9790aebSLuigi Rizzo  * Unlike physical interfaces, switch ports use their own memory region
138f9790aebSLuigi Rizzo  * for rings and buffers.
139f9790aebSLuigi Rizzo  * The virtual interfaces use per-queue lock instead of core lock.
140f9790aebSLuigi Rizzo  * In the tx loop, we aggregate traffic in batches to make all operations
141f9790aebSLuigi Rizzo  * faster. The batch size is bridge_batch.
142f9790aebSLuigi Rizzo  */
143f9790aebSLuigi Rizzo #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
144f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
145f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
146f9790aebSLuigi Rizzo #define NM_BDG_HASH		1024	/* forwarding table entries */
147f9790aebSLuigi Rizzo #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
148f9790aebSLuigi Rizzo #define NM_MULTISEG		64	/* max size of a chain of bufs */
149f9790aebSLuigi Rizzo /* actual size of the tables */
150f9790aebSLuigi Rizzo #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
151f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */
152f9790aebSLuigi Rizzo #define NM_FT_NULL		NM_BDG_BATCH_MAX
1534f80b14cSVincenzo Maffione /* Default size for the Maximum Frame Size. */
1544f80b14cSVincenzo Maffione #define NM_BDG_MFS_DEFAULT	1514
155f9790aebSLuigi Rizzo 
156f9790aebSLuigi Rizzo 
157f9790aebSLuigi Rizzo /*
158f9790aebSLuigi Rizzo  * bridge_batch is set via sysctl to the max batch size to be
159f9790aebSLuigi Rizzo  * used in the bridge. The actual value may be larger as the
160f9790aebSLuigi Rizzo  * last packet in the block may overflow the size.
161f9790aebSLuigi Rizzo  */
16237e3a6d3SLuigi Rizzo static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
16337e3a6d3SLuigi Rizzo SYSBEGIN(vars_vale);
164f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap);
1654f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0,
1664f80b14cSVincenzo Maffione     "Max batch size to be used in the bridge");
16737e3a6d3SLuigi Rizzo SYSEND;
168f9790aebSLuigi Rizzo 
169*2ff91c17SVincenzo Maffione static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *,
170c3e9b4dbSLuiz Otavio O Souza 		struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
1714bf50f18SLuigi Rizzo static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
17237e3a6d3SLuigi Rizzo static int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
173f9790aebSLuigi Rizzo 
174f9790aebSLuigi Rizzo /*
175f9790aebSLuigi Rizzo  * For each output interface, nm_bdg_q is used to construct a list.
176f9790aebSLuigi Rizzo  * bq_len is the number of output buffers (we can have coalescing
177f9790aebSLuigi Rizzo  * during the copy).
178f9790aebSLuigi Rizzo  */
179f9790aebSLuigi Rizzo struct nm_bdg_q {
180f9790aebSLuigi Rizzo 	uint16_t bq_head;
181f9790aebSLuigi Rizzo 	uint16_t bq_tail;
182f9790aebSLuigi Rizzo 	uint32_t bq_len;	/* number of buffers */
183f9790aebSLuigi Rizzo };
184f9790aebSLuigi Rizzo 
185f9790aebSLuigi Rizzo /* XXX revise this */
186f9790aebSLuigi Rizzo struct nm_hash_ent {
187f9790aebSLuigi Rizzo 	uint64_t	mac;	/* the top 2 bytes are the epoch */
188f9790aebSLuigi Rizzo 	uint64_t	ports;
189f9790aebSLuigi Rizzo };
190f9790aebSLuigi Rizzo 
191*2ff91c17SVincenzo Maffione /* Holds the default callbacks */
192*2ff91c17SVincenzo Maffione static struct netmap_bdg_ops default_bdg_ops = {netmap_bdg_learning, NULL, NULL};
193*2ff91c17SVincenzo Maffione 
194f9790aebSLuigi Rizzo /*
195f9790aebSLuigi Rizzo  * nm_bridge is a descriptor for a VALE switch.
196f9790aebSLuigi Rizzo  * Interfaces for a bridge are all in bdg_ports[].
197f9790aebSLuigi Rizzo  * The array has fixed size, an empty entry does not terminate
198f9790aebSLuigi Rizzo  * the search, but lookups only occur on attach/detach so we
199f9790aebSLuigi Rizzo  * don't mind if they are slow.
200f9790aebSLuigi Rizzo  *
201f9790aebSLuigi Rizzo  * The bridge is non blocking on the transmit ports: excess
202f9790aebSLuigi Rizzo  * packets are dropped if there is no room on the output port.
203f9790aebSLuigi Rizzo  *
204f9790aebSLuigi Rizzo  * bdg_lock protects accesses to the bdg_ports array.
205f9790aebSLuigi Rizzo  * This is a rw lock (or equivalent).
206f9790aebSLuigi Rizzo  */
207*2ff91c17SVincenzo Maffione #define NM_BDG_IFNAMSIZ IFNAMSIZ
208f9790aebSLuigi Rizzo struct nm_bridge {
209f9790aebSLuigi Rizzo 	/* XXX what is the proper alignment/layout ? */
210f9790aebSLuigi Rizzo 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
211f9790aebSLuigi Rizzo 	int		bdg_namelen;
212*2ff91c17SVincenzo Maffione 	uint32_t	bdg_active_ports;
213*2ff91c17SVincenzo Maffione 	char		bdg_basename[NM_BDG_IFNAMSIZ];
214f9790aebSLuigi Rizzo 
215f9790aebSLuigi Rizzo 	/* Indexes of active ports (up to active_ports)
216f9790aebSLuigi Rizzo 	 * and all other remaining ports.
217f9790aebSLuigi Rizzo 	 */
218*2ff91c17SVincenzo Maffione 	uint32_t	bdg_port_index[NM_BDG_MAXPORTS];
219*2ff91c17SVincenzo Maffione 	/* used by netmap_bdg_detach_common() */
220*2ff91c17SVincenzo Maffione 	uint32_t	tmp_bdg_port_index[NM_BDG_MAXPORTS];
221f9790aebSLuigi Rizzo 
222f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
223f9790aebSLuigi Rizzo 
224f9790aebSLuigi Rizzo 	/*
225*2ff91c17SVincenzo Maffione 	 * Programmable lookup functions to figure out the destination port.
226f9790aebSLuigi Rizzo 	 * It returns either of an index of the destination port,
227f9790aebSLuigi Rizzo 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
228f9790aebSLuigi Rizzo 	 * forward this packet.  ring_nr is the source ring index, and the
229f9790aebSLuigi Rizzo 	 * function may overwrite this value to forward this packet to a
230f9790aebSLuigi Rizzo 	 * different ring index.
231*2ff91c17SVincenzo Maffione 	 * The function is set by netmap_bdg_regops().
232f9790aebSLuigi Rizzo 	 */
233*2ff91c17SVincenzo Maffione 	struct netmap_bdg_ops *bdg_ops;
234f9790aebSLuigi Rizzo 
235*2ff91c17SVincenzo Maffione 	/*
236*2ff91c17SVincenzo Maffione 	 * Contains the data structure used by the bdg_ops.lookup function.
237*2ff91c17SVincenzo Maffione 	 * By default points to *ht which is allocated on attach and used by the default lookup
238*2ff91c17SVincenzo Maffione 	 * otherwise will point to the data structure received by netmap_bdg_regops().
239f9790aebSLuigi Rizzo 	 */
240*2ff91c17SVincenzo Maffione 	void *private_data;
241*2ff91c17SVincenzo Maffione 	struct nm_hash_ent *ht;
242*2ff91c17SVincenzo Maffione 
243*2ff91c17SVincenzo Maffione 	/* Currently used to specify if the bridge is still in use while empty and
244*2ff91c17SVincenzo Maffione 	 * if it has been put in exclusive mode by an external module, see netmap_bdg_regops()
245*2ff91c17SVincenzo Maffione 	 * and netmap_bdg_create().
246*2ff91c17SVincenzo Maffione 	 */
247*2ff91c17SVincenzo Maffione #define NM_BDG_ACTIVE		1
248*2ff91c17SVincenzo Maffione #define NM_BDG_EXCLUSIVE	2
249*2ff91c17SVincenzo Maffione 	uint8_t			bdg_flags;
250*2ff91c17SVincenzo Maffione 
251847bf383SLuigi Rizzo 
252847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
253847bf383SLuigi Rizzo 	struct net *ns;
254847bf383SLuigi Rizzo #endif /* CONFIG_NET_NS */
255f9790aebSLuigi Rizzo };
256f9790aebSLuigi Rizzo 
2574bf50f18SLuigi Rizzo const char*
2584bf50f18SLuigi Rizzo netmap_bdg_name(struct netmap_vp_adapter *vp)
2594bf50f18SLuigi Rizzo {
2604bf50f18SLuigi Rizzo 	struct nm_bridge *b = vp->na_bdg;
2614bf50f18SLuigi Rizzo 	if (b == NULL)
2624bf50f18SLuigi Rizzo 		return NULL;
2634bf50f18SLuigi Rizzo 	return b->bdg_basename;
2644bf50f18SLuigi Rizzo }
2654bf50f18SLuigi Rizzo 
266f9790aebSLuigi Rizzo 
267847bf383SLuigi Rizzo #ifndef CONFIG_NET_NS
268f9790aebSLuigi Rizzo /*
269f9790aebSLuigi Rizzo  * XXX in principle nm_bridges could be created dynamically
270f9790aebSLuigi Rizzo  * Right now we have a static array and deletions are protected
271f9790aebSLuigi Rizzo  * by an exclusive lock.
272f9790aebSLuigi Rizzo  */
27337e3a6d3SLuigi Rizzo static struct nm_bridge *nm_bridges;
274847bf383SLuigi Rizzo #endif /* !CONFIG_NET_NS */
275f9790aebSLuigi Rizzo 
276f9790aebSLuigi Rizzo 
277f9790aebSLuigi Rizzo /*
278f9790aebSLuigi Rizzo  * this is a slightly optimized copy routine which rounds
279f9790aebSLuigi Rizzo  * to multiple of 64 bytes and is often faster than dealing
280f9790aebSLuigi Rizzo  * with other odd sizes. We assume there is enough room
281f9790aebSLuigi Rizzo  * in the source and destination buffers.
282f9790aebSLuigi Rizzo  *
283f9790aebSLuigi Rizzo  * XXX only for multiples of 64 bytes, non overlapped.
284f9790aebSLuigi Rizzo  */
285f9790aebSLuigi Rizzo static inline void
286f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l)
287f9790aebSLuigi Rizzo {
288f9790aebSLuigi Rizzo         uint64_t *src = _src;
289f9790aebSLuigi Rizzo         uint64_t *dst = _dst;
290f9790aebSLuigi Rizzo         if (unlikely(l >= 1024)) {
291f9790aebSLuigi Rizzo                 memcpy(dst, src, l);
292f9790aebSLuigi Rizzo                 return;
293f9790aebSLuigi Rizzo         }
294f9790aebSLuigi Rizzo         for (; likely(l > 0); l-=64) {
295f9790aebSLuigi Rizzo                 *dst++ = *src++;
296f9790aebSLuigi Rizzo                 *dst++ = *src++;
297f9790aebSLuigi Rizzo                 *dst++ = *src++;
298f9790aebSLuigi Rizzo                 *dst++ = *src++;
299f9790aebSLuigi Rizzo                 *dst++ = *src++;
300f9790aebSLuigi Rizzo                 *dst++ = *src++;
301f9790aebSLuigi Rizzo                 *dst++ = *src++;
302f9790aebSLuigi Rizzo                 *dst++ = *src++;
303f9790aebSLuigi Rizzo         }
304f9790aebSLuigi Rizzo }
305f9790aebSLuigi Rizzo 
306f9790aebSLuigi Rizzo 
30737e3a6d3SLuigi Rizzo static int
30837e3a6d3SLuigi Rizzo nm_is_id_char(const char c)
30937e3a6d3SLuigi Rizzo {
31037e3a6d3SLuigi Rizzo 	return (c >= 'a' && c <= 'z') ||
31137e3a6d3SLuigi Rizzo 	       (c >= 'A' && c <= 'Z') ||
31237e3a6d3SLuigi Rizzo 	       (c >= '0' && c <= '9') ||
31337e3a6d3SLuigi Rizzo 	       (c == '_');
31437e3a6d3SLuigi Rizzo }
31537e3a6d3SLuigi Rizzo 
31637e3a6d3SLuigi Rizzo /* Validate the name of a VALE bridge port and return the
31737e3a6d3SLuigi Rizzo  * position of the ":" character. */
31837e3a6d3SLuigi Rizzo static int
31937e3a6d3SLuigi Rizzo nm_vale_name_validate(const char *name)
32037e3a6d3SLuigi Rizzo {
32137e3a6d3SLuigi Rizzo 	int colon_pos = -1;
32237e3a6d3SLuigi Rizzo 	int i;
32337e3a6d3SLuigi Rizzo 
32437e3a6d3SLuigi Rizzo 	if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
32537e3a6d3SLuigi Rizzo 		return -1;
32637e3a6d3SLuigi Rizzo 	}
32737e3a6d3SLuigi Rizzo 
328*2ff91c17SVincenzo Maffione 	for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
32937e3a6d3SLuigi Rizzo 		if (name[i] == ':') {
33037e3a6d3SLuigi Rizzo 			colon_pos = i;
331*2ff91c17SVincenzo Maffione 			break;
33237e3a6d3SLuigi Rizzo 		} else if (!nm_is_id_char(name[i])) {
33337e3a6d3SLuigi Rizzo 			return -1;
33437e3a6d3SLuigi Rizzo 		}
33537e3a6d3SLuigi Rizzo 	}
33637e3a6d3SLuigi Rizzo 
337*2ff91c17SVincenzo Maffione 	if (strlen(name) - colon_pos > IFNAMSIZ) {
338*2ff91c17SVincenzo Maffione 		/* interface name too long */
33937e3a6d3SLuigi Rizzo 		return -1;
34037e3a6d3SLuigi Rizzo 	}
34137e3a6d3SLuigi Rizzo 
34237e3a6d3SLuigi Rizzo 	return colon_pos;
34337e3a6d3SLuigi Rizzo }
34437e3a6d3SLuigi Rizzo 
345f9790aebSLuigi Rizzo /*
346f9790aebSLuigi Rizzo  * locate a bridge among the existing ones.
347f9790aebSLuigi Rizzo  * MUST BE CALLED WITH NMG_LOCK()
348f9790aebSLuigi Rizzo  *
349f9790aebSLuigi Rizzo  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
350f9790aebSLuigi Rizzo  * We assume that this is called with a name of at least NM_NAME chars.
351f9790aebSLuigi Rizzo  */
352f9790aebSLuigi Rizzo static struct nm_bridge *
353f9790aebSLuigi Rizzo nm_find_bridge(const char *name, int create)
354f9790aebSLuigi Rizzo {
35537e3a6d3SLuigi Rizzo 	int i, namelen;
356847bf383SLuigi Rizzo 	struct nm_bridge *b = NULL, *bridges;
357847bf383SLuigi Rizzo 	u_int num_bridges;
358f9790aebSLuigi Rizzo 
359f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
360f9790aebSLuigi Rizzo 
361847bf383SLuigi Rizzo 	netmap_bns_getbridges(&bridges, &num_bridges);
362847bf383SLuigi Rizzo 
36337e3a6d3SLuigi Rizzo 	namelen = nm_vale_name_validate(name);
36437e3a6d3SLuigi Rizzo 	if (namelen < 0) {
365f9790aebSLuigi Rizzo 		D("invalid bridge name %s", name ? name : NULL);
366f9790aebSLuigi Rizzo 		return NULL;
367f9790aebSLuigi Rizzo 	}
368f9790aebSLuigi Rizzo 
369f9790aebSLuigi Rizzo 	/* lookup the name, remember empty slot if there is one */
370847bf383SLuigi Rizzo 	for (i = 0; i < num_bridges; i++) {
371847bf383SLuigi Rizzo 		struct nm_bridge *x = bridges + i;
372f9790aebSLuigi Rizzo 
373*2ff91c17SVincenzo Maffione 		if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
374f9790aebSLuigi Rizzo 			if (create && b == NULL)
375f9790aebSLuigi Rizzo 				b = x;	/* record empty slot */
376f9790aebSLuigi Rizzo 		} else if (x->bdg_namelen != namelen) {
377f9790aebSLuigi Rizzo 			continue;
378f9790aebSLuigi Rizzo 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
379f9790aebSLuigi Rizzo 			ND("found '%.*s' at %d", namelen, name, i);
380f9790aebSLuigi Rizzo 			b = x;
381f9790aebSLuigi Rizzo 			break;
382f9790aebSLuigi Rizzo 		}
383f9790aebSLuigi Rizzo 	}
384847bf383SLuigi Rizzo 	if (i == num_bridges && b) { /* name not found, can create entry */
385f9790aebSLuigi Rizzo 		/* initialize the bridge */
386f9790aebSLuigi Rizzo 		ND("create new bridge %s with ports %d", b->bdg_basename,
387f9790aebSLuigi Rizzo 			b->bdg_active_ports);
3884f80b14cSVincenzo Maffione 		b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
3894f80b14cSVincenzo Maffione 		if (b->ht == NULL) {
3904f80b14cSVincenzo Maffione 			D("failed to allocate hash table");
3914f80b14cSVincenzo Maffione 			return NULL;
3924f80b14cSVincenzo Maffione 		}
3934f80b14cSVincenzo Maffione 		strncpy(b->bdg_basename, name, namelen);
394f9790aebSLuigi Rizzo 		b->bdg_namelen = namelen;
395f9790aebSLuigi Rizzo 		b->bdg_active_ports = 0;
396f9790aebSLuigi Rizzo 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
397f9790aebSLuigi Rizzo 			b->bdg_port_index[i] = i;
398f9790aebSLuigi Rizzo 		/* set the default function */
399*2ff91c17SVincenzo Maffione 		b->bdg_ops = &default_bdg_ops;
400*2ff91c17SVincenzo Maffione 		b->private_data = b->ht;
401*2ff91c17SVincenzo Maffione 		b->bdg_flags = 0;
402847bf383SLuigi Rizzo 		NM_BNS_GET(b);
403f9790aebSLuigi Rizzo 	}
404f9790aebSLuigi Rizzo 	return b;
405f9790aebSLuigi Rizzo }
406f9790aebSLuigi Rizzo 
407f9790aebSLuigi Rizzo 
408f9790aebSLuigi Rizzo /*
409f9790aebSLuigi Rizzo  * Free the forwarding tables for rings attached to switch ports.
410f9790aebSLuigi Rizzo  */
411f9790aebSLuigi Rizzo static void
412f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na)
413f9790aebSLuigi Rizzo {
414f9790aebSLuigi Rizzo 	int nrings, i;
415*2ff91c17SVincenzo Maffione 	struct netmap_kring **kring;
416f9790aebSLuigi Rizzo 
417f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
41817885a7bSLuigi Rizzo 	nrings = na->num_tx_rings;
41917885a7bSLuigi Rizzo 	kring = na->tx_rings;
420f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
421*2ff91c17SVincenzo Maffione 		if (kring[i]->nkr_ft) {
422*2ff91c17SVincenzo Maffione 			nm_os_free(kring[i]->nkr_ft);
423*2ff91c17SVincenzo Maffione 			kring[i]->nkr_ft = NULL; /* protect from freeing twice */
424f9790aebSLuigi Rizzo 		}
425f9790aebSLuigi Rizzo 	}
426f9790aebSLuigi Rizzo }
427f9790aebSLuigi Rizzo 
428f9790aebSLuigi Rizzo 
429f9790aebSLuigi Rizzo /*
430f9790aebSLuigi Rizzo  * Allocate the forwarding tables for the rings attached to the bridge ports.
431f9790aebSLuigi Rizzo  */
432f9790aebSLuigi Rizzo static int
433f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na)
434f9790aebSLuigi Rizzo {
435f9790aebSLuigi Rizzo 	int nrings, l, i, num_dstq;
436*2ff91c17SVincenzo Maffione 	struct netmap_kring **kring;
437f9790aebSLuigi Rizzo 
438f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
439f9790aebSLuigi Rizzo 	/* all port:rings + broadcast */
440f9790aebSLuigi Rizzo 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
441f9790aebSLuigi Rizzo 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
442f9790aebSLuigi Rizzo 	l += sizeof(struct nm_bdg_q) * num_dstq;
443f9790aebSLuigi Rizzo 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
444f9790aebSLuigi Rizzo 
445847bf383SLuigi Rizzo 	nrings = netmap_real_rings(na, NR_TX);
446f9790aebSLuigi Rizzo 	kring = na->tx_rings;
447f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
448f9790aebSLuigi Rizzo 		struct nm_bdg_fwd *ft;
449f9790aebSLuigi Rizzo 		struct nm_bdg_q *dstq;
450f9790aebSLuigi Rizzo 		int j;
451f9790aebSLuigi Rizzo 
452c3e9b4dbSLuiz Otavio O Souza 		ft = nm_os_malloc(l);
453f9790aebSLuigi Rizzo 		if (!ft) {
454f9790aebSLuigi Rizzo 			nm_free_bdgfwd(na);
455f9790aebSLuigi Rizzo 			return ENOMEM;
456f9790aebSLuigi Rizzo 		}
457f9790aebSLuigi Rizzo 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
458f9790aebSLuigi Rizzo 		for (j = 0; j < num_dstq; j++) {
459f9790aebSLuigi Rizzo 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
460f9790aebSLuigi Rizzo 			dstq[j].bq_len = 0;
461f9790aebSLuigi Rizzo 		}
462*2ff91c17SVincenzo Maffione 		kring[i]->nkr_ft = ft;
463f9790aebSLuigi Rizzo 	}
464f9790aebSLuigi Rizzo 	return 0;
465f9790aebSLuigi Rizzo }
466f9790aebSLuigi Rizzo 
467*2ff91c17SVincenzo Maffione static int
468*2ff91c17SVincenzo Maffione netmap_bdg_free(struct nm_bridge *b)
469*2ff91c17SVincenzo Maffione {
470*2ff91c17SVincenzo Maffione 	if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
471*2ff91c17SVincenzo Maffione 		return EBUSY;
472*2ff91c17SVincenzo Maffione 	}
473*2ff91c17SVincenzo Maffione 
474*2ff91c17SVincenzo Maffione 	ND("marking bridge %s as free", b->bdg_basename);
475*2ff91c17SVincenzo Maffione 	nm_os_free(b->ht);
476*2ff91c17SVincenzo Maffione 	b->bdg_ops = NULL;
477*2ff91c17SVincenzo Maffione 	b->bdg_flags = 0;
478*2ff91c17SVincenzo Maffione 	NM_BNS_PUT(b);
479*2ff91c17SVincenzo Maffione 	return 0;
480*2ff91c17SVincenzo Maffione }
481*2ff91c17SVincenzo Maffione 
482f9790aebSLuigi Rizzo 
4834bf50f18SLuigi Rizzo /* remove from bridge b the ports in slots hw and sw
4844bf50f18SLuigi Rizzo  * (sw can be -1 if not needed)
4854bf50f18SLuigi Rizzo  */
486f9790aebSLuigi Rizzo static void
487f9790aebSLuigi Rizzo netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
488f9790aebSLuigi Rizzo {
489f9790aebSLuigi Rizzo 	int s_hw = hw, s_sw = sw;
490f9790aebSLuigi Rizzo 	int i, lim =b->bdg_active_ports;
491*2ff91c17SVincenzo Maffione 	uint32_t *tmp = b->tmp_bdg_port_index;
492f9790aebSLuigi Rizzo 
493f9790aebSLuigi Rizzo 	/*
494f9790aebSLuigi Rizzo 	New algorithm:
495f9790aebSLuigi Rizzo 	make a copy of bdg_port_index;
496f9790aebSLuigi Rizzo 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
497f9790aebSLuigi Rizzo 	in the array of bdg_port_index, replacing them with
498f9790aebSLuigi Rizzo 	entries from the bottom of the array;
499f9790aebSLuigi Rizzo 	decrement bdg_active_ports;
500f9790aebSLuigi Rizzo 	acquire BDG_WLOCK() and copy back the array.
501f9790aebSLuigi Rizzo 	 */
502f9790aebSLuigi Rizzo 
503f0ea3689SLuigi Rizzo 	if (netmap_verbose)
504f9790aebSLuigi Rizzo 		D("detach %d and %d (lim %d)", hw, sw, lim);
505f9790aebSLuigi Rizzo 	/* make a copy of the list of active ports, update it,
506f9790aebSLuigi Rizzo 	 * and then copy back within BDG_WLOCK().
507f9790aebSLuigi Rizzo 	 */
508*2ff91c17SVincenzo Maffione 	memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
509f9790aebSLuigi Rizzo 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
510f9790aebSLuigi Rizzo 		if (hw >= 0 && tmp[i] == hw) {
511f9790aebSLuigi Rizzo 			ND("detach hw %d at %d", hw, i);
512f9790aebSLuigi Rizzo 			lim--; /* point to last active port */
513f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim]; /* swap with i */
514f9790aebSLuigi Rizzo 			tmp[lim] = hw;	/* now this is inactive */
515f9790aebSLuigi Rizzo 			hw = -1;
516f9790aebSLuigi Rizzo 		} else if (sw >= 0 && tmp[i] == sw) {
517f9790aebSLuigi Rizzo 			ND("detach sw %d at %d", sw, i);
518f9790aebSLuigi Rizzo 			lim--;
519f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim];
520f9790aebSLuigi Rizzo 			tmp[lim] = sw;
521f9790aebSLuigi Rizzo 			sw = -1;
522f9790aebSLuigi Rizzo 		} else {
523f9790aebSLuigi Rizzo 			i++;
524f9790aebSLuigi Rizzo 		}
525f9790aebSLuigi Rizzo 	}
526f9790aebSLuigi Rizzo 	if (hw >= 0 || sw >= 0) {
527f9790aebSLuigi Rizzo 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
528f9790aebSLuigi Rizzo 	}
529f9790aebSLuigi Rizzo 
530f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
531*2ff91c17SVincenzo Maffione 	if (b->bdg_ops->dtor)
532*2ff91c17SVincenzo Maffione 		b->bdg_ops->dtor(b->bdg_ports[s_hw]);
533f9790aebSLuigi Rizzo 	b->bdg_ports[s_hw] = NULL;
534f9790aebSLuigi Rizzo 	if (s_sw >= 0) {
535f9790aebSLuigi Rizzo 		b->bdg_ports[s_sw] = NULL;
536f9790aebSLuigi Rizzo 	}
537*2ff91c17SVincenzo Maffione 	memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
538f9790aebSLuigi Rizzo 	b->bdg_active_ports = lim;
539f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
540f9790aebSLuigi Rizzo 
541f9790aebSLuigi Rizzo 	ND("now %d active ports", lim);
542*2ff91c17SVincenzo Maffione 	netmap_bdg_free(b);
543f9790aebSLuigi Rizzo }
544*2ff91c17SVincenzo Maffione 
545*2ff91c17SVincenzo Maffione static inline void *
546*2ff91c17SVincenzo Maffione nm_bdg_get_auth_token(struct nm_bridge *b)
547*2ff91c17SVincenzo Maffione {
548*2ff91c17SVincenzo Maffione 	return b->ht;
549f9790aebSLuigi Rizzo }
550f9790aebSLuigi Rizzo 
551*2ff91c17SVincenzo Maffione /* bridge not in exclusive mode ==> always valid
552*2ff91c17SVincenzo Maffione  * bridge in exclusive mode (created through netmap_bdg_create()) ==> check authentication token
553*2ff91c17SVincenzo Maffione  */
554*2ff91c17SVincenzo Maffione static inline int
555*2ff91c17SVincenzo Maffione nm_bdg_valid_auth_token(struct nm_bridge *b, void *auth_token)
556*2ff91c17SVincenzo Maffione {
557*2ff91c17SVincenzo Maffione 	return !(b->bdg_flags & NM_BDG_EXCLUSIVE) || b->ht == auth_token;
558*2ff91c17SVincenzo Maffione }
559*2ff91c17SVincenzo Maffione 
560*2ff91c17SVincenzo Maffione /* Allows external modules to create bridges in exclusive mode,
561*2ff91c17SVincenzo Maffione  * returns an authentication token that the external module will need
562*2ff91c17SVincenzo Maffione  * to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(),
563*2ff91c17SVincenzo Maffione  * and nm_bdg_update_private_data() operations.
564*2ff91c17SVincenzo Maffione  * Successfully executed if ret != NULL and *return_status == 0.
565*2ff91c17SVincenzo Maffione  */
566*2ff91c17SVincenzo Maffione void *
567*2ff91c17SVincenzo Maffione netmap_bdg_create(const char *bdg_name, int *return_status)
568*2ff91c17SVincenzo Maffione {
569*2ff91c17SVincenzo Maffione 	struct nm_bridge *b = NULL;
570*2ff91c17SVincenzo Maffione 	void *ret = NULL;
571*2ff91c17SVincenzo Maffione 
572*2ff91c17SVincenzo Maffione 	NMG_LOCK();
573*2ff91c17SVincenzo Maffione 	b = nm_find_bridge(bdg_name, 0 /* don't create */);
574*2ff91c17SVincenzo Maffione 	if (b) {
575*2ff91c17SVincenzo Maffione 		*return_status = EEXIST;
576*2ff91c17SVincenzo Maffione 		goto unlock_bdg_create;
577*2ff91c17SVincenzo Maffione 	}
578*2ff91c17SVincenzo Maffione 
579*2ff91c17SVincenzo Maffione 	b = nm_find_bridge(bdg_name, 1 /* create */);
580*2ff91c17SVincenzo Maffione 	if (!b) {
581*2ff91c17SVincenzo Maffione 		*return_status = ENOMEM;
582*2ff91c17SVincenzo Maffione 		goto unlock_bdg_create;
583*2ff91c17SVincenzo Maffione 	}
584*2ff91c17SVincenzo Maffione 
585*2ff91c17SVincenzo Maffione 	b->bdg_flags |= NM_BDG_ACTIVE | NM_BDG_EXCLUSIVE;
586*2ff91c17SVincenzo Maffione 	ret = nm_bdg_get_auth_token(b);
587*2ff91c17SVincenzo Maffione 	*return_status = 0;
588*2ff91c17SVincenzo Maffione 
589*2ff91c17SVincenzo Maffione unlock_bdg_create:
590*2ff91c17SVincenzo Maffione 	NMG_UNLOCK();
591*2ff91c17SVincenzo Maffione 	return ret;
592*2ff91c17SVincenzo Maffione }
593*2ff91c17SVincenzo Maffione 
594*2ff91c17SVincenzo Maffione /* Allows external modules to destroy a bridge created through
595*2ff91c17SVincenzo Maffione  * netmap_bdg_create(), the bridge must be empty.
596*2ff91c17SVincenzo Maffione  */
597*2ff91c17SVincenzo Maffione int
598*2ff91c17SVincenzo Maffione netmap_bdg_destroy(const char *bdg_name, void *auth_token)
599*2ff91c17SVincenzo Maffione {
600*2ff91c17SVincenzo Maffione 	struct nm_bridge *b = NULL;
601*2ff91c17SVincenzo Maffione 	int ret = 0;
602*2ff91c17SVincenzo Maffione 
603*2ff91c17SVincenzo Maffione 	NMG_LOCK();
604*2ff91c17SVincenzo Maffione 	b = nm_find_bridge(bdg_name, 0 /* don't create */);
605*2ff91c17SVincenzo Maffione 	if (!b) {
606*2ff91c17SVincenzo Maffione 		ret = ENXIO;
607*2ff91c17SVincenzo Maffione 		goto unlock_bdg_free;
608*2ff91c17SVincenzo Maffione 	}
609*2ff91c17SVincenzo Maffione 
610*2ff91c17SVincenzo Maffione 	if (!nm_bdg_valid_auth_token(b, auth_token)) {
611*2ff91c17SVincenzo Maffione 		ret = EACCES;
612*2ff91c17SVincenzo Maffione 		goto unlock_bdg_free;
613*2ff91c17SVincenzo Maffione 	}
614*2ff91c17SVincenzo Maffione 	if (!(b->bdg_flags & NM_BDG_EXCLUSIVE)) {
615*2ff91c17SVincenzo Maffione 		ret = EINVAL;
616*2ff91c17SVincenzo Maffione 		goto unlock_bdg_free;
617*2ff91c17SVincenzo Maffione 	}
618*2ff91c17SVincenzo Maffione 
619*2ff91c17SVincenzo Maffione 	b->bdg_flags &= ~(NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE);
620*2ff91c17SVincenzo Maffione 	ret = netmap_bdg_free(b);
621*2ff91c17SVincenzo Maffione 	if (ret) {
622*2ff91c17SVincenzo Maffione 		b->bdg_flags |= NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE;
623*2ff91c17SVincenzo Maffione 	}
624*2ff91c17SVincenzo Maffione 
625*2ff91c17SVincenzo Maffione unlock_bdg_free:
626*2ff91c17SVincenzo Maffione 	NMG_UNLOCK();
627*2ff91c17SVincenzo Maffione 	return ret;
628*2ff91c17SVincenzo Maffione }
629*2ff91c17SVincenzo Maffione 
630*2ff91c17SVincenzo Maffione 
631*2ff91c17SVincenzo Maffione 
6324bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for VALE ports */
6334bf50f18SLuigi Rizzo static int
634*2ff91c17SVincenzo Maffione netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
635f9790aebSLuigi Rizzo {
636f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
637f9790aebSLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
638f9790aebSLuigi Rizzo 
639*2ff91c17SVincenzo Maffione 	if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
6404bf50f18SLuigi Rizzo 		return 0; /* nothing to do */
641*2ff91c17SVincenzo Maffione 	}
6424bf50f18SLuigi Rizzo 	if (b) {
6434bf50f18SLuigi Rizzo 		netmap_set_all_rings(na, 0 /* disable */);
6444bf50f18SLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
6454bf50f18SLuigi Rizzo 		vpna->na_bdg = NULL;
6464bf50f18SLuigi Rizzo 		netmap_set_all_rings(na, 1 /* enable */);
6474bf50f18SLuigi Rizzo 	}
6484bf50f18SLuigi Rizzo 	/* I have took reference just for attach */
6494bf50f18SLuigi Rizzo 	netmap_adapter_put(na);
6504bf50f18SLuigi Rizzo 	return 0;
6514bf50f18SLuigi Rizzo }
6524bf50f18SLuigi Rizzo 
6534bf50f18SLuigi Rizzo /* nm_dtor callback for ephemeral VALE ports */
6544bf50f18SLuigi Rizzo static void
6554bf50f18SLuigi Rizzo netmap_vp_dtor(struct netmap_adapter *na)
6564bf50f18SLuigi Rizzo {
6574bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
6584bf50f18SLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
6594bf50f18SLuigi Rizzo 
6604bf50f18SLuigi Rizzo 	ND("%s has %d references", na->name, na->na_refcount);
661f9790aebSLuigi Rizzo 
662f9790aebSLuigi Rizzo 	if (b) {
663f9790aebSLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
664f9790aebSLuigi Rizzo 	}
665c3e9b4dbSLuiz Otavio O Souza 
6664f80b14cSVincenzo Maffione 	if (na->ifp != NULL && !nm_iszombie(na)) {
6674f80b14cSVincenzo Maffione 		WNA(na->ifp) = NULL;
6684f80b14cSVincenzo Maffione 		if (vpna->autodelete) {
669c3e9b4dbSLuiz Otavio O Souza 			ND("releasing %s", na->ifp->if_xname);
670c3e9b4dbSLuiz Otavio O Souza 			NMG_UNLOCK();
671c3e9b4dbSLuiz Otavio O Souza 			nm_os_vi_detach(na->ifp);
672c3e9b4dbSLuiz Otavio O Souza 			NMG_LOCK();
673c3e9b4dbSLuiz Otavio O Souza 		}
674f9790aebSLuigi Rizzo 	}
6754f80b14cSVincenzo Maffione }
676f9790aebSLuigi Rizzo 
677*2ff91c17SVincenzo Maffione /* creates a persistent VALE port */
678*2ff91c17SVincenzo Maffione int
679*2ff91c17SVincenzo Maffione nm_vi_create(struct nmreq_header *hdr)
680*2ff91c17SVincenzo Maffione {
681*2ff91c17SVincenzo Maffione 	struct nmreq_vale_newif *req =
682*2ff91c17SVincenzo Maffione 		(struct nmreq_vale_newif *)hdr->nr_body;
683*2ff91c17SVincenzo Maffione 	int error = 0;
684*2ff91c17SVincenzo Maffione 	/* Build a nmreq_register out of the nmreq_vale_newif,
685*2ff91c17SVincenzo Maffione 	 * so that we can call netmap_get_bdg_na(). */
686*2ff91c17SVincenzo Maffione 	struct nmreq_register regreq;
687*2ff91c17SVincenzo Maffione 	bzero(&regreq, sizeof(regreq));
688*2ff91c17SVincenzo Maffione 	regreq.nr_tx_slots = req->nr_tx_slots;
689*2ff91c17SVincenzo Maffione 	regreq.nr_rx_slots = req->nr_rx_slots;
690*2ff91c17SVincenzo Maffione 	regreq.nr_tx_rings = req->nr_tx_rings;
691*2ff91c17SVincenzo Maffione 	regreq.nr_rx_rings = req->nr_rx_rings;
692*2ff91c17SVincenzo Maffione 	regreq.nr_mem_id = req->nr_mem_id;
693*2ff91c17SVincenzo Maffione 	hdr->nr_reqtype = NETMAP_REQ_REGISTER;
694*2ff91c17SVincenzo Maffione 	hdr->nr_body = (uint64_t)&regreq;
695*2ff91c17SVincenzo Maffione 	error = netmap_vi_create(hdr, 0 /* no autodelete */);
696*2ff91c17SVincenzo Maffione 	hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
697*2ff91c17SVincenzo Maffione 	hdr->nr_body = (uint64_t)req;
698*2ff91c17SVincenzo Maffione         /* Write back to the original struct. */
699*2ff91c17SVincenzo Maffione 	req->nr_tx_slots = regreq.nr_tx_slots;
700*2ff91c17SVincenzo Maffione 	req->nr_rx_slots = regreq.nr_rx_slots;
701*2ff91c17SVincenzo Maffione 	req->nr_tx_rings = regreq.nr_tx_rings;
702*2ff91c17SVincenzo Maffione 	req->nr_rx_rings = regreq.nr_rx_rings;
703*2ff91c17SVincenzo Maffione 	req->nr_mem_id = regreq.nr_mem_id;
704*2ff91c17SVincenzo Maffione 	return error;
705*2ff91c17SVincenzo Maffione }
706*2ff91c17SVincenzo Maffione 
7074bf50f18SLuigi Rizzo /* remove a persistent VALE port from the system */
708*2ff91c17SVincenzo Maffione int
7094bf50f18SLuigi Rizzo nm_vi_destroy(const char *name)
7104bf50f18SLuigi Rizzo {
7114bf50f18SLuigi Rizzo 	struct ifnet *ifp;
712c3e9b4dbSLuiz Otavio O Souza 	struct netmap_vp_adapter *vpna;
7134bf50f18SLuigi Rizzo 	int error;
7144bf50f18SLuigi Rizzo 
7154bf50f18SLuigi Rizzo 	ifp = ifunit_ref(name);
7164bf50f18SLuigi Rizzo 	if (!ifp)
7174bf50f18SLuigi Rizzo 		return ENXIO;
7184bf50f18SLuigi Rizzo 	NMG_LOCK();
7194bf50f18SLuigi Rizzo 	/* make sure this is actually a VALE port */
72037e3a6d3SLuigi Rizzo 	if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
7214bf50f18SLuigi Rizzo 		error = EINVAL;
7224bf50f18SLuigi Rizzo 		goto err;
7234bf50f18SLuigi Rizzo 	}
7244bf50f18SLuigi Rizzo 
725c3e9b4dbSLuiz Otavio O Souza 	vpna = (struct netmap_vp_adapter *)NA(ifp);
726c3e9b4dbSLuiz Otavio O Souza 
727c3e9b4dbSLuiz Otavio O Souza 	/* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
728c3e9b4dbSLuiz Otavio O Souza 	if (vpna->autodelete) {
729c3e9b4dbSLuiz Otavio O Souza 		error = EINVAL;
730c3e9b4dbSLuiz Otavio O Souza 		goto err;
731c3e9b4dbSLuiz Otavio O Souza 	}
732c3e9b4dbSLuiz Otavio O Souza 
733c3e9b4dbSLuiz Otavio O Souza 	/* also make sure that nobody is using the inferface */
734c3e9b4dbSLuiz Otavio O Souza 	if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
735c3e9b4dbSLuiz Otavio O Souza 	    vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
7364bf50f18SLuigi Rizzo 		error = EBUSY;
7374bf50f18SLuigi Rizzo 		goto err;
7384bf50f18SLuigi Rizzo 	}
739c3e9b4dbSLuiz Otavio O Souza 
7404bf50f18SLuigi Rizzo 	NMG_UNLOCK();
7414bf50f18SLuigi Rizzo 
7424bf50f18SLuigi Rizzo 	D("destroying a persistent vale interface %s", ifp->if_xname);
7434bf50f18SLuigi Rizzo 	/* Linux requires all the references are released
7444bf50f18SLuigi Rizzo 	 * before unregister
7454bf50f18SLuigi Rizzo 	 */
7464bf50f18SLuigi Rizzo 	netmap_detach(ifp);
747c3e9b4dbSLuiz Otavio O Souza 	if_rele(ifp);
74837e3a6d3SLuigi Rizzo 	nm_os_vi_detach(ifp);
7494bf50f18SLuigi Rizzo 	return 0;
7504bf50f18SLuigi Rizzo 
7514bf50f18SLuigi Rizzo err:
7524bf50f18SLuigi Rizzo 	NMG_UNLOCK();
7534bf50f18SLuigi Rizzo 	if_rele(ifp);
7544bf50f18SLuigi Rizzo 	return error;
7554bf50f18SLuigi Rizzo }
7564bf50f18SLuigi Rizzo 
757c3e9b4dbSLuiz Otavio O Souza static int
758*2ff91c17SVincenzo Maffione nm_update_info(struct nmreq_register *req, struct netmap_adapter *na)
759c3e9b4dbSLuiz Otavio O Souza {
760*2ff91c17SVincenzo Maffione 	req->nr_rx_rings = na->num_rx_rings;
761*2ff91c17SVincenzo Maffione 	req->nr_tx_rings = na->num_tx_rings;
762*2ff91c17SVincenzo Maffione 	req->nr_rx_slots = na->num_rx_desc;
763*2ff91c17SVincenzo Maffione 	req->nr_tx_slots = na->num_tx_desc;
764*2ff91c17SVincenzo Maffione 	return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL,
765*2ff91c17SVincenzo Maffione 					&req->nr_mem_id);
766c3e9b4dbSLuiz Otavio O Souza }
767c3e9b4dbSLuiz Otavio O Souza 
7684bf50f18SLuigi Rizzo /*
7694bf50f18SLuigi Rizzo  * Create a virtual interface registered to the system.
7704bf50f18SLuigi Rizzo  * The interface will be attached to a bridge later.
7714bf50f18SLuigi Rizzo  */
772c3e9b4dbSLuiz Otavio O Souza int
773*2ff91c17SVincenzo Maffione netmap_vi_create(struct nmreq_header *hdr, int autodelete)
7744bf50f18SLuigi Rizzo {
775*2ff91c17SVincenzo Maffione 	struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body;
7764bf50f18SLuigi Rizzo 	struct ifnet *ifp;
7774bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna;
778c3e9b4dbSLuiz Otavio O Souza 	struct netmap_mem_d *nmd = NULL;
7794bf50f18SLuigi Rizzo 	int error;
7804bf50f18SLuigi Rizzo 
781*2ff91c17SVincenzo Maffione 	if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
7824bf50f18SLuigi Rizzo 		return EINVAL;
783*2ff91c17SVincenzo Maffione 	}
784*2ff91c17SVincenzo Maffione 
785*2ff91c17SVincenzo Maffione 	/* don't include VALE prefix */
786*2ff91c17SVincenzo Maffione 	if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
787*2ff91c17SVincenzo Maffione 		return EINVAL;
788*2ff91c17SVincenzo Maffione 	if (strlen(hdr->nr_name) >= IFNAMSIZ) {
789*2ff91c17SVincenzo Maffione 		return EINVAL;
790*2ff91c17SVincenzo Maffione 	}
791*2ff91c17SVincenzo Maffione 	ifp = ifunit_ref(hdr->nr_name);
7924bf50f18SLuigi Rizzo 	if (ifp) { /* already exist, cannot create new one */
793c3e9b4dbSLuiz Otavio O Souza 		error = EEXIST;
794c3e9b4dbSLuiz Otavio O Souza 		NMG_LOCK();
795c3e9b4dbSLuiz Otavio O Souza 		if (NM_NA_VALID(ifp)) {
796*2ff91c17SVincenzo Maffione 			int update_err = nm_update_info(req, NA(ifp));
797c3e9b4dbSLuiz Otavio O Souza 			if (update_err)
798c3e9b4dbSLuiz Otavio O Souza 				error = update_err;
799c3e9b4dbSLuiz Otavio O Souza 		}
800c3e9b4dbSLuiz Otavio O Souza 		NMG_UNLOCK();
8014bf50f18SLuigi Rizzo 		if_rele(ifp);
802c3e9b4dbSLuiz Otavio O Souza 		return error;
8034bf50f18SLuigi Rizzo 	}
804*2ff91c17SVincenzo Maffione 	error = nm_os_vi_persist(hdr->nr_name, &ifp);
8054bf50f18SLuigi Rizzo 	if (error)
8064bf50f18SLuigi Rizzo 		return error;
8074bf50f18SLuigi Rizzo 
8084bf50f18SLuigi Rizzo 	NMG_LOCK();
809*2ff91c17SVincenzo Maffione 	if (req->nr_mem_id) {
810*2ff91c17SVincenzo Maffione 		nmd = netmap_mem_find(req->nr_mem_id);
811c3e9b4dbSLuiz Otavio O Souza 		if (nmd == NULL) {
812c3e9b4dbSLuiz Otavio O Souza 			error = EINVAL;
813c3e9b4dbSLuiz Otavio O Souza 			goto err_1;
814c3e9b4dbSLuiz Otavio O Souza 		}
815c3e9b4dbSLuiz Otavio O Souza 	}
8164bf50f18SLuigi Rizzo 	/* netmap_vp_create creates a struct netmap_vp_adapter */
817*2ff91c17SVincenzo Maffione 	error = netmap_vp_create(hdr, ifp, nmd, &vpna);
8184bf50f18SLuigi Rizzo 	if (error) {
8194bf50f18SLuigi Rizzo 		D("error %d", error);
820c3e9b4dbSLuiz Otavio O Souza 		goto err_1;
8214bf50f18SLuigi Rizzo 	}
8224bf50f18SLuigi Rizzo 	/* persist-specific routines */
8234bf50f18SLuigi Rizzo 	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
824c3e9b4dbSLuiz Otavio O Souza 	if (!autodelete) {
8254bf50f18SLuigi Rizzo 		netmap_adapter_get(&vpna->up);
826c3e9b4dbSLuiz Otavio O Souza 	} else {
827c3e9b4dbSLuiz Otavio O Souza 		vpna->autodelete = 1;
828c3e9b4dbSLuiz Otavio O Souza 	}
82937e3a6d3SLuigi Rizzo 	NM_ATTACH_NA(ifp, &vpna->up);
830c3e9b4dbSLuiz Otavio O Souza 	/* return the updated info */
831*2ff91c17SVincenzo Maffione 	error = nm_update_info(req, &vpna->up);
832c3e9b4dbSLuiz Otavio O Souza 	if (error) {
833c3e9b4dbSLuiz Otavio O Souza 		goto err_2;
834c3e9b4dbSLuiz Otavio O Souza 	}
835*2ff91c17SVincenzo Maffione 	ND("returning nr_mem_id %d", req->nr_mem_id);
836c3e9b4dbSLuiz Otavio O Souza 	if (nmd)
837c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_put(nmd);
8384bf50f18SLuigi Rizzo 	NMG_UNLOCK();
839*2ff91c17SVincenzo Maffione 	ND("created %s", ifp->if_xname);
8404bf50f18SLuigi Rizzo 	return 0;
841c3e9b4dbSLuiz Otavio O Souza 
842c3e9b4dbSLuiz Otavio O Souza err_2:
843c3e9b4dbSLuiz Otavio O Souza 	netmap_detach(ifp);
844c3e9b4dbSLuiz Otavio O Souza err_1:
845c3e9b4dbSLuiz Otavio O Souza 	if (nmd)
846c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_put(nmd);
847c3e9b4dbSLuiz Otavio O Souza 	NMG_UNLOCK();
848c3e9b4dbSLuiz Otavio O Souza 	nm_os_vi_detach(ifp);
849c3e9b4dbSLuiz Otavio O Souza 
850c3e9b4dbSLuiz Otavio O Souza 	return error;
8514bf50f18SLuigi Rizzo }
85217885a7bSLuigi Rizzo 
85317885a7bSLuigi Rizzo /* Try to get a reference to a netmap adapter attached to a VALE switch.
85417885a7bSLuigi Rizzo  * If the adapter is found (or is created), this function returns 0, a
85517885a7bSLuigi Rizzo  * non NULL pointer is returned into *na, and the caller holds a
85617885a7bSLuigi Rizzo  * reference to the adapter.
85717885a7bSLuigi Rizzo  * If an adapter is not found, then no reference is grabbed and the
85817885a7bSLuigi Rizzo  * function returns an error code, or 0 if there is just a VALE prefix
85917885a7bSLuigi Rizzo  * mismatch. Therefore the caller holds a reference when
86017885a7bSLuigi Rizzo  * (*na != NULL && return == 0).
86117885a7bSLuigi Rizzo  */
862f9790aebSLuigi Rizzo int
863*2ff91c17SVincenzo Maffione netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
864c3e9b4dbSLuiz Otavio O Souza 		struct netmap_mem_d *nmd, int create)
865f9790aebSLuigi Rizzo {
866*2ff91c17SVincenzo Maffione 	char *nr_name = hdr->nr_name;
8674bf50f18SLuigi Rizzo 	const char *ifname;
868c3e9b4dbSLuiz Otavio O Souza 	struct ifnet *ifp = NULL;
869f9790aebSLuigi Rizzo 	int error = 0;
8704bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna, *hostna = NULL;
871f9790aebSLuigi Rizzo 	struct nm_bridge *b;
872*2ff91c17SVincenzo Maffione 	uint32_t i, j;
873*2ff91c17SVincenzo Maffione 	uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
874f9790aebSLuigi Rizzo 	int needed;
875f9790aebSLuigi Rizzo 
876f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
877f9790aebSLuigi Rizzo 
878f9790aebSLuigi Rizzo 	/* first try to see if this is a bridge port. */
879f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
88037e3a6d3SLuigi Rizzo 	if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) {
881f9790aebSLuigi Rizzo 		return 0;  /* no error, but no VALE prefix */
882f9790aebSLuigi Rizzo 	}
883f9790aebSLuigi Rizzo 
8844bf50f18SLuigi Rizzo 	b = nm_find_bridge(nr_name, create);
885f9790aebSLuigi Rizzo 	if (b == NULL) {
8864bf50f18SLuigi Rizzo 		D("no bridges available for '%s'", nr_name);
887f2637526SLuigi Rizzo 		return (create ? ENOMEM : ENXIO);
888f9790aebSLuigi Rizzo 	}
8894bf50f18SLuigi Rizzo 	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
8904bf50f18SLuigi Rizzo 		panic("x");
891f9790aebSLuigi Rizzo 
892f9790aebSLuigi Rizzo 	/* Now we are sure that name starts with the bridge's name,
893f9790aebSLuigi Rizzo 	 * lookup the port in the bridge. We need to scan the entire
894f9790aebSLuigi Rizzo 	 * list. It is not important to hold a WLOCK on the bridge
895f9790aebSLuigi Rizzo 	 * during the search because NMG_LOCK already guarantees
896f9790aebSLuigi Rizzo 	 * that there are no other possible writers.
897f9790aebSLuigi Rizzo 	 */
898f9790aebSLuigi Rizzo 
899f9790aebSLuigi Rizzo 	/* lookup in the local list of ports */
900f9790aebSLuigi Rizzo 	for (j = 0; j < b->bdg_active_ports; j++) {
901f9790aebSLuigi Rizzo 		i = b->bdg_port_index[j];
902f9790aebSLuigi Rizzo 		vpna = b->bdg_ports[i];
903847bf383SLuigi Rizzo 		ND("checking %s", vpna->up.name);
9044bf50f18SLuigi Rizzo 		if (!strcmp(vpna->up.name, nr_name)) {
905f9790aebSLuigi Rizzo 			netmap_adapter_get(&vpna->up);
9064bf50f18SLuigi Rizzo 			ND("found existing if %s refs %d", nr_name)
9074bf50f18SLuigi Rizzo 			*na = &vpna->up;
908f9790aebSLuigi Rizzo 			return 0;
909f9790aebSLuigi Rizzo 		}
910f9790aebSLuigi Rizzo 	}
911f9790aebSLuigi Rizzo 	/* not found, should we create it? */
912f9790aebSLuigi Rizzo 	if (!create)
913f9790aebSLuigi Rizzo 		return ENXIO;
914f9790aebSLuigi Rizzo 	/* yes we should, see if we have space to attach entries */
915f9790aebSLuigi Rizzo 	needed = 2; /* in some cases we only need 1 */
916f9790aebSLuigi Rizzo 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
917f9790aebSLuigi Rizzo 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
918f2637526SLuigi Rizzo 		return ENOMEM;
919f9790aebSLuigi Rizzo 	}
920f9790aebSLuigi Rizzo 	/* record the next two ports available, but do not allocate yet */
921f9790aebSLuigi Rizzo 	cand = b->bdg_port_index[b->bdg_active_ports];
922f9790aebSLuigi Rizzo 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
923f9790aebSLuigi Rizzo 	ND("+++ bridge %s port %s used %d avail %d %d",
9244bf50f18SLuigi Rizzo 		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
925f9790aebSLuigi Rizzo 
926f9790aebSLuigi Rizzo 	/*
927f9790aebSLuigi Rizzo 	 * try see if there is a matching NIC with this name
928f9790aebSLuigi Rizzo 	 * (after the bridge's name)
929f9790aebSLuigi Rizzo 	 */
9304bf50f18SLuigi Rizzo 	ifname = nr_name + b->bdg_namelen + 1;
9314bf50f18SLuigi Rizzo 	ifp = ifunit_ref(ifname);
9324bf50f18SLuigi Rizzo 	if (!ifp) {
933*2ff91c17SVincenzo Maffione 		/* Create an ephemeral virtual port.
934*2ff91c17SVincenzo Maffione 		 * This block contains all the ephemeral-specific logic.
9354bf50f18SLuigi Rizzo 		 */
936*2ff91c17SVincenzo Maffione 
937*2ff91c17SVincenzo Maffione 		if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
938c3e9b4dbSLuiz Otavio O Souza 			error = EINVAL;
939c3e9b4dbSLuiz Otavio O Souza 			goto out;
940f9790aebSLuigi Rizzo 		}
941f9790aebSLuigi Rizzo 
942f9790aebSLuigi Rizzo 		/* bdg_netmap_attach creates a struct netmap_adapter */
943*2ff91c17SVincenzo Maffione 		error = netmap_vp_create(hdr, NULL, nmd, &vpna);
944f9790aebSLuigi Rizzo 		if (error) {
945f9790aebSLuigi Rizzo 			D("error %d", error);
946c3e9b4dbSLuiz Otavio O Souza 			goto out;
947f9790aebSLuigi Rizzo 		}
9484bf50f18SLuigi Rizzo 		/* shortcut - we can skip get_hw_na(),
9494bf50f18SLuigi Rizzo 		 * ownership check and nm_bdg_attach()
9504bf50f18SLuigi Rizzo 		 */
951*2ff91c17SVincenzo Maffione 
9524bf50f18SLuigi Rizzo 	} else {
9534bf50f18SLuigi Rizzo 		struct netmap_adapter *hw;
954f9790aebSLuigi Rizzo 
9554f80b14cSVincenzo Maffione 		/* the vale:nic syntax is only valid for some commands */
956*2ff91c17SVincenzo Maffione 		switch (hdr->nr_reqtype) {
957*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_ATTACH:
958*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_DETACH:
959*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_POLLING_ENABLE:
960*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_POLLING_DISABLE:
9614f80b14cSVincenzo Maffione 			break; /* ok */
9624f80b14cSVincenzo Maffione 		default:
9634f80b14cSVincenzo Maffione 			error = EINVAL;
9644f80b14cSVincenzo Maffione 			goto out;
9654f80b14cSVincenzo Maffione 		}
9664f80b14cSVincenzo Maffione 
967c3e9b4dbSLuiz Otavio O Souza 		error = netmap_get_hw_na(ifp, nmd, &hw);
9684bf50f18SLuigi Rizzo 		if (error || hw == NULL)
969f9790aebSLuigi Rizzo 			goto out;
970f9790aebSLuigi Rizzo 
9714bf50f18SLuigi Rizzo 		/* host adapter might not be created */
9724bf50f18SLuigi Rizzo 		error = hw->nm_bdg_attach(nr_name, hw);
9734bf50f18SLuigi Rizzo 		if (error)
974f9790aebSLuigi Rizzo 			goto out;
9754bf50f18SLuigi Rizzo 		vpna = hw->na_vp;
9764bf50f18SLuigi Rizzo 		hostna = hw->na_hostvp;
977*2ff91c17SVincenzo Maffione 		if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
978*2ff91c17SVincenzo Maffione 			/* Check if we need to skip the host rings. */
979*2ff91c17SVincenzo Maffione 			struct nmreq_vale_attach *areq =
980*2ff91c17SVincenzo Maffione 				(struct nmreq_vale_attach *)hdr->nr_body;
981*2ff91c17SVincenzo Maffione 			if (areq->reg.nr_mode != NR_REG_NIC_SW) {
9824bf50f18SLuigi Rizzo 				hostna = NULL;
983f9790aebSLuigi Rizzo 			}
984*2ff91c17SVincenzo Maffione 		}
985*2ff91c17SVincenzo Maffione 	}
986f9790aebSLuigi Rizzo 
987f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
988f9790aebSLuigi Rizzo 	vpna->bdg_port = cand;
989f9790aebSLuigi Rizzo 	ND("NIC  %p to bridge port %d", vpna, cand);
990f9790aebSLuigi Rizzo 	/* bind the port to the bridge (virtual ports are not active) */
991f9790aebSLuigi Rizzo 	b->bdg_ports[cand] = vpna;
992f9790aebSLuigi Rizzo 	vpna->na_bdg = b;
993f9790aebSLuigi Rizzo 	b->bdg_active_ports++;
9944bf50f18SLuigi Rizzo 	if (hostna != NULL) {
995f9790aebSLuigi Rizzo 		/* also bind the host stack to the bridge */
996f9790aebSLuigi Rizzo 		b->bdg_ports[cand2] = hostna;
997f9790aebSLuigi Rizzo 		hostna->bdg_port = cand2;
998f9790aebSLuigi Rizzo 		hostna->na_bdg = b;
999f9790aebSLuigi Rizzo 		b->bdg_active_ports++;
1000f9790aebSLuigi Rizzo 		ND("host %p to bridge port %d", hostna, cand2);
1001f9790aebSLuigi Rizzo 	}
10024bf50f18SLuigi Rizzo 	ND("if %s refs %d", ifname, vpna->up.na_refcount);
1003f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
10044bf50f18SLuigi Rizzo 	*na = &vpna->up;
10054bf50f18SLuigi Rizzo 	netmap_adapter_get(*na);
1006f9790aebSLuigi Rizzo 
1007f9790aebSLuigi Rizzo out:
1008c3e9b4dbSLuiz Otavio O Souza 	if (ifp)
1009f9790aebSLuigi Rizzo 		if_rele(ifp);
1010f9790aebSLuigi Rizzo 
1011f9790aebSLuigi Rizzo 	return error;
1012f9790aebSLuigi Rizzo }
1013f9790aebSLuigi Rizzo 
1014*2ff91c17SVincenzo Maffione /* Process NETMAP_REQ_VALE_ATTACH.
1015*2ff91c17SVincenzo Maffione  */
1016*2ff91c17SVincenzo Maffione int
1017*2ff91c17SVincenzo Maffione nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
1018f9790aebSLuigi Rizzo {
1019*2ff91c17SVincenzo Maffione 	struct nmreq_vale_attach *req =
1020*2ff91c17SVincenzo Maffione 		(struct nmreq_vale_attach *)hdr->nr_body;
1021*2ff91c17SVincenzo Maffione 	struct netmap_vp_adapter * vpna;
1022f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
1023c3e9b4dbSLuiz Otavio O Souza 	struct netmap_mem_d *nmd = NULL;
1024*2ff91c17SVincenzo Maffione 	struct nm_bridge *b = NULL;
1025f9790aebSLuigi Rizzo 	int error;
1026f9790aebSLuigi Rizzo 
1027f9790aebSLuigi Rizzo 	NMG_LOCK();
1028*2ff91c17SVincenzo Maffione 	/* permission check for modified bridges */
1029*2ff91c17SVincenzo Maffione 	b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
1030*2ff91c17SVincenzo Maffione 	if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
1031*2ff91c17SVincenzo Maffione 		error = EACCES;
1032*2ff91c17SVincenzo Maffione 		goto unlock_exit;
1033*2ff91c17SVincenzo Maffione 	}
1034f2637526SLuigi Rizzo 
1035*2ff91c17SVincenzo Maffione 	if (req->reg.nr_mem_id) {
1036*2ff91c17SVincenzo Maffione 		nmd = netmap_mem_find(req->reg.nr_mem_id);
1037c3e9b4dbSLuiz Otavio O Souza 		if (nmd == NULL) {
1038c3e9b4dbSLuiz Otavio O Souza 			error = EINVAL;
1039c3e9b4dbSLuiz Otavio O Souza 			goto unlock_exit;
1040c3e9b4dbSLuiz Otavio O Souza 		}
1041c3e9b4dbSLuiz Otavio O Souza 	}
1042c3e9b4dbSLuiz Otavio O Souza 
1043*2ff91c17SVincenzo Maffione 	/* check for existing one */
1044*2ff91c17SVincenzo Maffione 	error = netmap_get_bdg_na(hdr, &na, nmd, 0);
10454f80b14cSVincenzo Maffione 	if (!error) {
10464f80b14cSVincenzo Maffione 		error = EBUSY;
10474f80b14cSVincenzo Maffione 		goto unref_exit;
10484f80b14cSVincenzo Maffione 	}
1049*2ff91c17SVincenzo Maffione 	error = netmap_get_bdg_na(hdr, &na,
1050*2ff91c17SVincenzo Maffione 				nmd, 1 /* create if not exists */);
1051*2ff91c17SVincenzo Maffione 	if (error) { /* no device */
1052f9790aebSLuigi Rizzo 		goto unlock_exit;
1053*2ff91c17SVincenzo Maffione 	}
1054f2637526SLuigi Rizzo 
105517885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
1056f9790aebSLuigi Rizzo 		error = EINVAL;
105717885a7bSLuigi Rizzo 		goto unlock_exit;
1058f9790aebSLuigi Rizzo 	}
1059f9790aebSLuigi Rizzo 
10604bf50f18SLuigi Rizzo 	if (NETMAP_OWNED_BY_ANY(na)) {
1061f9790aebSLuigi Rizzo 		error = EBUSY;
1062f9790aebSLuigi Rizzo 		goto unref_exit;
1063f9790aebSLuigi Rizzo 	}
1064f9790aebSLuigi Rizzo 
10654bf50f18SLuigi Rizzo 	if (na->nm_bdg_ctl) {
10664bf50f18SLuigi Rizzo 		/* nop for VALE ports. The bwrap needs to put the hwna
10674bf50f18SLuigi Rizzo 		 * in netmap mode (see netmap_bwrap_bdg_ctl)
10684bf50f18SLuigi Rizzo 		 */
1069*2ff91c17SVincenzo Maffione 		error = na->nm_bdg_ctl(hdr, na);
10704bf50f18SLuigi Rizzo 		if (error)
1071f9790aebSLuigi Rizzo 			goto unref_exit;
10724bf50f18SLuigi Rizzo 		ND("registered %s to netmap-mode", na->name);
1073f9790aebSLuigi Rizzo 	}
1074*2ff91c17SVincenzo Maffione 	vpna = (struct netmap_vp_adapter *)na;
1075*2ff91c17SVincenzo Maffione 	req->port_index = vpna->bdg_port;
1076f9790aebSLuigi Rizzo 	NMG_UNLOCK();
1077f9790aebSLuigi Rizzo 	return 0;
1078f9790aebSLuigi Rizzo 
1079f9790aebSLuigi Rizzo unref_exit:
1080f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
1081f9790aebSLuigi Rizzo unlock_exit:
1082f9790aebSLuigi Rizzo 	NMG_UNLOCK();
1083f9790aebSLuigi Rizzo 	return error;
1084f9790aebSLuigi Rizzo }
1085f9790aebSLuigi Rizzo 
108637e3a6d3SLuigi Rizzo static inline int
108737e3a6d3SLuigi Rizzo nm_is_bwrap(struct netmap_adapter *na)
108837e3a6d3SLuigi Rizzo {
108937e3a6d3SLuigi Rizzo 	return na->nm_register == netmap_bwrap_reg;
109037e3a6d3SLuigi Rizzo }
109117885a7bSLuigi Rizzo 
1092*2ff91c17SVincenzo Maffione /* Process NETMAP_REQ_VALE_DETACH.
1093*2ff91c17SVincenzo Maffione  */
1094*2ff91c17SVincenzo Maffione int
1095*2ff91c17SVincenzo Maffione nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
1096f9790aebSLuigi Rizzo {
1097*2ff91c17SVincenzo Maffione 	struct nmreq_vale_detach *nmreq_det = (void *)hdr->nr_body;
1098*2ff91c17SVincenzo Maffione 	struct netmap_vp_adapter *vpna;
1099f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
1100*2ff91c17SVincenzo Maffione 	struct nm_bridge *b = NULL;
1101f9790aebSLuigi Rizzo 	int error;
1102f9790aebSLuigi Rizzo 
1103f9790aebSLuigi Rizzo 	NMG_LOCK();
1104*2ff91c17SVincenzo Maffione 	/* permission check for modified bridges */
1105*2ff91c17SVincenzo Maffione 	b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
1106*2ff91c17SVincenzo Maffione 	if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
1107*2ff91c17SVincenzo Maffione 		error = EACCES;
1108*2ff91c17SVincenzo Maffione 		goto unlock_exit;
1109*2ff91c17SVincenzo Maffione 	}
1110*2ff91c17SVincenzo Maffione 
1111*2ff91c17SVincenzo Maffione 	error = netmap_get_bdg_na(hdr, &na, NULL, 0 /* don't create */);
1112f9790aebSLuigi Rizzo 	if (error) { /* no device, or another bridge or user owns the device */
1113f9790aebSLuigi Rizzo 		goto unlock_exit;
1114f9790aebSLuigi Rizzo 	}
1115f2637526SLuigi Rizzo 
111617885a7bSLuigi Rizzo 	if (na == NULL) { /* VALE prefix missing */
1117f9790aebSLuigi Rizzo 		error = EINVAL;
111817885a7bSLuigi Rizzo 		goto unlock_exit;
111937e3a6d3SLuigi Rizzo 	} else if (nm_is_bwrap(na) &&
112037e3a6d3SLuigi Rizzo 		   ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
112137e3a6d3SLuigi Rizzo 		/* Don't detach a NIC with polling */
112237e3a6d3SLuigi Rizzo 		error = EBUSY;
1123*2ff91c17SVincenzo Maffione 		goto unref_exit;
1124f9790aebSLuigi Rizzo 	}
1125*2ff91c17SVincenzo Maffione 
1126*2ff91c17SVincenzo Maffione 	vpna = (struct netmap_vp_adapter *)na;
1127*2ff91c17SVincenzo Maffione 	if (na->na_vp != vpna) {
1128*2ff91c17SVincenzo Maffione 		/* trying to detach first attach of VALE persistent port attached
1129*2ff91c17SVincenzo Maffione 		 * to 2 bridges
1130*2ff91c17SVincenzo Maffione 		 */
1131*2ff91c17SVincenzo Maffione 		error = EBUSY;
1132*2ff91c17SVincenzo Maffione 		goto unref_exit;
1133*2ff91c17SVincenzo Maffione 	}
1134*2ff91c17SVincenzo Maffione 	nmreq_det->port_index = vpna->bdg_port;
1135*2ff91c17SVincenzo Maffione 
11364bf50f18SLuigi Rizzo 	if (na->nm_bdg_ctl) {
11374bf50f18SLuigi Rizzo 		/* remove the port from bridge. The bwrap
11384bf50f18SLuigi Rizzo 		 * also needs to put the hwna in normal mode
11394bf50f18SLuigi Rizzo 		 */
1140*2ff91c17SVincenzo Maffione 		error = na->nm_bdg_ctl(hdr, na);
1141f9790aebSLuigi Rizzo 	}
1142f9790aebSLuigi Rizzo 
1143*2ff91c17SVincenzo Maffione unref_exit:
1144f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
1145f9790aebSLuigi Rizzo unlock_exit:
1146f9790aebSLuigi Rizzo 	NMG_UNLOCK();
1147f9790aebSLuigi Rizzo 	return error;
1148f9790aebSLuigi Rizzo 
1149f9790aebSLuigi Rizzo }
1150f9790aebSLuigi Rizzo 
115137e3a6d3SLuigi Rizzo struct nm_bdg_polling_state;
115237e3a6d3SLuigi Rizzo struct
115337e3a6d3SLuigi Rizzo nm_bdg_kthread {
1154c3e9b4dbSLuiz Otavio O Souza 	struct nm_kctx *nmk;
115537e3a6d3SLuigi Rizzo 	u_int qfirst;
115637e3a6d3SLuigi Rizzo 	u_int qlast;
115737e3a6d3SLuigi Rizzo 	struct nm_bdg_polling_state *bps;
115837e3a6d3SLuigi Rizzo };
115937e3a6d3SLuigi Rizzo 
116037e3a6d3SLuigi Rizzo struct nm_bdg_polling_state {
116137e3a6d3SLuigi Rizzo 	bool configured;
116237e3a6d3SLuigi Rizzo 	bool stopped;
116337e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
1164*2ff91c17SVincenzo Maffione 	uint32_t mode;
116537e3a6d3SLuigi Rizzo 	u_int qfirst;
116637e3a6d3SLuigi Rizzo 	u_int qlast;
116737e3a6d3SLuigi Rizzo 	u_int cpu_from;
116837e3a6d3SLuigi Rizzo 	u_int ncpus;
116937e3a6d3SLuigi Rizzo 	struct nm_bdg_kthread *kthreads;
117037e3a6d3SLuigi Rizzo };
117137e3a6d3SLuigi Rizzo 
117237e3a6d3SLuigi Rizzo static void
1173c3e9b4dbSLuiz Otavio O Souza netmap_bwrap_polling(void *data, int is_kthread)
117437e3a6d3SLuigi Rizzo {
117537e3a6d3SLuigi Rizzo 	struct nm_bdg_kthread *nbk = data;
117637e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
117737e3a6d3SLuigi Rizzo 	u_int qfirst, qlast, i;
1178*2ff91c17SVincenzo Maffione 	struct netmap_kring **kring0, *kring;
117937e3a6d3SLuigi Rizzo 
118037e3a6d3SLuigi Rizzo 	if (!nbk)
118137e3a6d3SLuigi Rizzo 		return;
118237e3a6d3SLuigi Rizzo 	qfirst = nbk->qfirst;
118337e3a6d3SLuigi Rizzo 	qlast = nbk->qlast;
118437e3a6d3SLuigi Rizzo 	bna = nbk->bps->bna;
118537e3a6d3SLuigi Rizzo 	kring0 = NMR(bna->hwna, NR_RX);
118637e3a6d3SLuigi Rizzo 
118737e3a6d3SLuigi Rizzo 	for (i = qfirst; i < qlast; i++) {
1188*2ff91c17SVincenzo Maffione 		kring = kring0[i];
118937e3a6d3SLuigi Rizzo 		kring->nm_notify(kring, 0);
119037e3a6d3SLuigi Rizzo 	}
119137e3a6d3SLuigi Rizzo }
119237e3a6d3SLuigi Rizzo 
119337e3a6d3SLuigi Rizzo static int
119437e3a6d3SLuigi Rizzo nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
119537e3a6d3SLuigi Rizzo {
1196c3e9b4dbSLuiz Otavio O Souza 	struct nm_kctx_cfg kcfg;
119737e3a6d3SLuigi Rizzo 	int i, j;
119837e3a6d3SLuigi Rizzo 
1199c3e9b4dbSLuiz Otavio O Souza 	bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
120037e3a6d3SLuigi Rizzo 	if (bps->kthreads == NULL)
120137e3a6d3SLuigi Rizzo 		return ENOMEM;
120237e3a6d3SLuigi Rizzo 
120337e3a6d3SLuigi Rizzo 	bzero(&kcfg, sizeof(kcfg));
120437e3a6d3SLuigi Rizzo 	kcfg.worker_fn = netmap_bwrap_polling;
1205c3e9b4dbSLuiz Otavio O Souza 	kcfg.use_kthread = 1;
120637e3a6d3SLuigi Rizzo 	for (i = 0; i < bps->ncpus; i++) {
120737e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
1208*2ff91c17SVincenzo Maffione 		int all = (bps->ncpus == 1 &&
1209*2ff91c17SVincenzo Maffione 			bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
121037e3a6d3SLuigi Rizzo 		int affinity = bps->cpu_from + i;
121137e3a6d3SLuigi Rizzo 
121237e3a6d3SLuigi Rizzo 		t->bps = bps;
121337e3a6d3SLuigi Rizzo 		t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
121437e3a6d3SLuigi Rizzo 		t->qlast = all ? bps->qlast : t->qfirst + 1;
121537e3a6d3SLuigi Rizzo 		D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
121637e3a6d3SLuigi Rizzo 			t->qlast);
121737e3a6d3SLuigi Rizzo 
121837e3a6d3SLuigi Rizzo 		kcfg.type = i;
121937e3a6d3SLuigi Rizzo 		kcfg.worker_private = t;
1220*2ff91c17SVincenzo Maffione 		t->nmk = nm_os_kctx_create(&kcfg, NULL);
122137e3a6d3SLuigi Rizzo 		if (t->nmk == NULL) {
122237e3a6d3SLuigi Rizzo 			goto cleanup;
122337e3a6d3SLuigi Rizzo 		}
1224c3e9b4dbSLuiz Otavio O Souza 		nm_os_kctx_worker_setaff(t->nmk, affinity);
122537e3a6d3SLuigi Rizzo 	}
122637e3a6d3SLuigi Rizzo 	return 0;
122737e3a6d3SLuigi Rizzo 
122837e3a6d3SLuigi Rizzo cleanup:
122937e3a6d3SLuigi Rizzo 	for (j = 0; j < i; j++) {
123037e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
1231c3e9b4dbSLuiz Otavio O Souza 		nm_os_kctx_destroy(t->nmk);
123237e3a6d3SLuigi Rizzo 	}
1233c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(bps->kthreads);
123437e3a6d3SLuigi Rizzo 	return EFAULT;
123537e3a6d3SLuigi Rizzo }
123637e3a6d3SLuigi Rizzo 
1237c3e9b4dbSLuiz Otavio O Souza /* A variant of ptnetmap_start_kthreads() */
123837e3a6d3SLuigi Rizzo static int
123937e3a6d3SLuigi Rizzo nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
124037e3a6d3SLuigi Rizzo {
124137e3a6d3SLuigi Rizzo 	int error, i, j;
124237e3a6d3SLuigi Rizzo 
124337e3a6d3SLuigi Rizzo 	if (!bps) {
124437e3a6d3SLuigi Rizzo 		D("polling is not configured");
124537e3a6d3SLuigi Rizzo 		return EFAULT;
124637e3a6d3SLuigi Rizzo 	}
124737e3a6d3SLuigi Rizzo 	bps->stopped = false;
124837e3a6d3SLuigi Rizzo 
124937e3a6d3SLuigi Rizzo 	for (i = 0; i < bps->ncpus; i++) {
125037e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
1251c3e9b4dbSLuiz Otavio O Souza 		error = nm_os_kctx_worker_start(t->nmk);
125237e3a6d3SLuigi Rizzo 		if (error) {
125337e3a6d3SLuigi Rizzo 			D("error in nm_kthread_start()");
125437e3a6d3SLuigi Rizzo 			goto cleanup;
125537e3a6d3SLuigi Rizzo 		}
125637e3a6d3SLuigi Rizzo 	}
125737e3a6d3SLuigi Rizzo 	return 0;
125837e3a6d3SLuigi Rizzo 
125937e3a6d3SLuigi Rizzo cleanup:
126037e3a6d3SLuigi Rizzo 	for (j = 0; j < i; j++) {
126137e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
1262c3e9b4dbSLuiz Otavio O Souza 		nm_os_kctx_worker_stop(t->nmk);
126337e3a6d3SLuigi Rizzo 	}
126437e3a6d3SLuigi Rizzo 	bps->stopped = true;
126537e3a6d3SLuigi Rizzo 	return error;
126637e3a6d3SLuigi Rizzo }
126737e3a6d3SLuigi Rizzo 
126837e3a6d3SLuigi Rizzo static void
126937e3a6d3SLuigi Rizzo nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
127037e3a6d3SLuigi Rizzo {
127137e3a6d3SLuigi Rizzo 	int i;
127237e3a6d3SLuigi Rizzo 
127337e3a6d3SLuigi Rizzo 	if (!bps)
127437e3a6d3SLuigi Rizzo 		return;
127537e3a6d3SLuigi Rizzo 
127637e3a6d3SLuigi Rizzo 	for (i = 0; i < bps->ncpus; i++) {
127737e3a6d3SLuigi Rizzo 		struct nm_bdg_kthread *t = bps->kthreads + i;
1278c3e9b4dbSLuiz Otavio O Souza 		nm_os_kctx_worker_stop(t->nmk);
1279c3e9b4dbSLuiz Otavio O Souza 		nm_os_kctx_destroy(t->nmk);
128037e3a6d3SLuigi Rizzo 	}
128137e3a6d3SLuigi Rizzo 	bps->stopped = true;
128237e3a6d3SLuigi Rizzo }
128337e3a6d3SLuigi Rizzo 
128437e3a6d3SLuigi Rizzo static int
1285*2ff91c17SVincenzo Maffione get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
128637e3a6d3SLuigi Rizzo 		struct nm_bdg_polling_state *bps)
128737e3a6d3SLuigi Rizzo {
1288*2ff91c17SVincenzo Maffione 	unsigned int avail_cpus, core_from;
1289*2ff91c17SVincenzo Maffione 	unsigned int qfirst, qlast;
1290*2ff91c17SVincenzo Maffione 	uint32_t i = req->nr_first_cpu_id;
1291*2ff91c17SVincenzo Maffione 	uint32_t req_cpus = req->nr_num_polling_cpus;
129237e3a6d3SLuigi Rizzo 
129337e3a6d3SLuigi Rizzo 	avail_cpus = nm_os_ncpus();
129437e3a6d3SLuigi Rizzo 
129537e3a6d3SLuigi Rizzo 	if (req_cpus == 0) {
129637e3a6d3SLuigi Rizzo 		D("req_cpus must be > 0");
129737e3a6d3SLuigi Rizzo 		return EINVAL;
129837e3a6d3SLuigi Rizzo 	} else if (req_cpus >= avail_cpus) {
1299*2ff91c17SVincenzo Maffione 		D("Cannot use all the CPUs in the system");
130037e3a6d3SLuigi Rizzo 		return EINVAL;
130137e3a6d3SLuigi Rizzo 	}
1302*2ff91c17SVincenzo Maffione 
1303*2ff91c17SVincenzo Maffione 	if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
1304*2ff91c17SVincenzo Maffione 		/* Use a separate core for each ring. If nr_num_polling_cpus>1
1305*2ff91c17SVincenzo Maffione 		 * more consecutive rings are polled.
1306*2ff91c17SVincenzo Maffione 		 * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
1307*2ff91c17SVincenzo Maffione 		 * ring 2 and 3 are polled by core 2 and 3, respectively. */
130837e3a6d3SLuigi Rizzo 		if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
1309*2ff91c17SVincenzo Maffione 			D("Rings %u-%u not in range (have %d rings)",
1310*2ff91c17SVincenzo Maffione 				i, i + req_cpus, nma_get_nrings(na, NR_RX));
131137e3a6d3SLuigi Rizzo 			return EINVAL;
131237e3a6d3SLuigi Rizzo 		}
131337e3a6d3SLuigi Rizzo 		qfirst = i;
131437e3a6d3SLuigi Rizzo 		qlast = qfirst + req_cpus;
131537e3a6d3SLuigi Rizzo 		core_from = qfirst;
1316*2ff91c17SVincenzo Maffione 
1317*2ff91c17SVincenzo Maffione 	} else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
1318*2ff91c17SVincenzo Maffione 		/* Poll all the rings using a core specified by nr_first_cpu_id.
1319*2ff91c17SVincenzo Maffione 		 * the number of cores must be 1. */
132037e3a6d3SLuigi Rizzo 		if (req_cpus != 1) {
1321*2ff91c17SVincenzo Maffione 			D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
1322*2ff91c17SVincenzo Maffione 				"(was %d)", req_cpus);
132337e3a6d3SLuigi Rizzo 			return EINVAL;
132437e3a6d3SLuigi Rizzo 		}
132537e3a6d3SLuigi Rizzo 		qfirst = 0;
132637e3a6d3SLuigi Rizzo 		qlast = nma_get_nrings(na, NR_RX);
132737e3a6d3SLuigi Rizzo 		core_from = i;
132837e3a6d3SLuigi Rizzo 	} else {
1329*2ff91c17SVincenzo Maffione 		D("Invalid polling mode");
133037e3a6d3SLuigi Rizzo 		return EINVAL;
133137e3a6d3SLuigi Rizzo 	}
133237e3a6d3SLuigi Rizzo 
1333*2ff91c17SVincenzo Maffione 	bps->mode = req->nr_mode;
133437e3a6d3SLuigi Rizzo 	bps->qfirst = qfirst;
133537e3a6d3SLuigi Rizzo 	bps->qlast = qlast;
133637e3a6d3SLuigi Rizzo 	bps->cpu_from = core_from;
133737e3a6d3SLuigi Rizzo 	bps->ncpus = req_cpus;
133837e3a6d3SLuigi Rizzo 	D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
1339*2ff91c17SVincenzo Maffione 		req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
1340*2ff91c17SVincenzo Maffione 		"MULTI" : "SINGLE",
134137e3a6d3SLuigi Rizzo 		qfirst, qlast, core_from, req_cpus);
134237e3a6d3SLuigi Rizzo 	return 0;
134337e3a6d3SLuigi Rizzo }
134437e3a6d3SLuigi Rizzo 
134537e3a6d3SLuigi Rizzo static int
1346*2ff91c17SVincenzo Maffione nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
134737e3a6d3SLuigi Rizzo {
134837e3a6d3SLuigi Rizzo 	struct nm_bdg_polling_state *bps;
134937e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
135037e3a6d3SLuigi Rizzo 	int error;
135137e3a6d3SLuigi Rizzo 
135237e3a6d3SLuigi Rizzo 	bna = (struct netmap_bwrap_adapter *)na;
135337e3a6d3SLuigi Rizzo 	if (bna->na_polling_state) {
135437e3a6d3SLuigi Rizzo 		D("ERROR adapter already in polling mode");
135537e3a6d3SLuigi Rizzo 		return EFAULT;
135637e3a6d3SLuigi Rizzo 	}
135737e3a6d3SLuigi Rizzo 
1358c3e9b4dbSLuiz Otavio O Souza 	bps = nm_os_malloc(sizeof(*bps));
135937e3a6d3SLuigi Rizzo 	if (!bps)
136037e3a6d3SLuigi Rizzo 		return ENOMEM;
136137e3a6d3SLuigi Rizzo 	bps->configured = false;
136237e3a6d3SLuigi Rizzo 	bps->stopped = true;
136337e3a6d3SLuigi Rizzo 
1364*2ff91c17SVincenzo Maffione 	if (get_polling_cfg(req, na, bps)) {
1365c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(bps);
136637e3a6d3SLuigi Rizzo 		return EINVAL;
136737e3a6d3SLuigi Rizzo 	}
136837e3a6d3SLuigi Rizzo 
136937e3a6d3SLuigi Rizzo 	if (nm_bdg_create_kthreads(bps)) {
1370c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(bps);
137137e3a6d3SLuigi Rizzo 		return EFAULT;
137237e3a6d3SLuigi Rizzo 	}
137337e3a6d3SLuigi Rizzo 
137437e3a6d3SLuigi Rizzo 	bps->configured = true;
137537e3a6d3SLuigi Rizzo 	bna->na_polling_state = bps;
137637e3a6d3SLuigi Rizzo 	bps->bna = bna;
137737e3a6d3SLuigi Rizzo 
13784f80b14cSVincenzo Maffione 	/* disable interrupts if possible */
13794f80b14cSVincenzo Maffione 	nma_intr_enable(bna->hwna, 0);
138037e3a6d3SLuigi Rizzo 	/* start kthread now */
138137e3a6d3SLuigi Rizzo 	error = nm_bdg_polling_start_kthreads(bps);
138237e3a6d3SLuigi Rizzo 	if (error) {
138337e3a6d3SLuigi Rizzo 		D("ERROR nm_bdg_polling_start_kthread()");
1384c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(bps->kthreads);
1385c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(bps);
138637e3a6d3SLuigi Rizzo 		bna->na_polling_state = NULL;
13874f80b14cSVincenzo Maffione 		nma_intr_enable(bna->hwna, 1);
138837e3a6d3SLuigi Rizzo 	}
138937e3a6d3SLuigi Rizzo 	return error;
139037e3a6d3SLuigi Rizzo }
139137e3a6d3SLuigi Rizzo 
139237e3a6d3SLuigi Rizzo static int
1393*2ff91c17SVincenzo Maffione nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
139437e3a6d3SLuigi Rizzo {
139537e3a6d3SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
139637e3a6d3SLuigi Rizzo 	struct nm_bdg_polling_state *bps;
139737e3a6d3SLuigi Rizzo 
139837e3a6d3SLuigi Rizzo 	if (!bna->na_polling_state) {
139937e3a6d3SLuigi Rizzo 		D("ERROR adapter is not in polling mode");
140037e3a6d3SLuigi Rizzo 		return EFAULT;
140137e3a6d3SLuigi Rizzo 	}
140237e3a6d3SLuigi Rizzo 	bps = bna->na_polling_state;
140337e3a6d3SLuigi Rizzo 	nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
140437e3a6d3SLuigi Rizzo 	bps->configured = false;
1405c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(bps);
140637e3a6d3SLuigi Rizzo 	bna->na_polling_state = NULL;
14074f80b14cSVincenzo Maffione 	/* reenable interrupts */
14084f80b14cSVincenzo Maffione 	nma_intr_enable(bna->hwna, 1);
140937e3a6d3SLuigi Rizzo 	return 0;
141037e3a6d3SLuigi Rizzo }
1411f9790aebSLuigi Rizzo 
1412f9790aebSLuigi Rizzo int
1413*2ff91c17SVincenzo Maffione nm_bdg_polling(struct nmreq_header *hdr)
1414f9790aebSLuigi Rizzo {
1415*2ff91c17SVincenzo Maffione 	struct nmreq_vale_polling *req =
1416*2ff91c17SVincenzo Maffione 		(struct nmreq_vale_polling *)hdr->nr_body;
1417*2ff91c17SVincenzo Maffione 	struct netmap_adapter *na = NULL;
1418*2ff91c17SVincenzo Maffione 	int error = 0;
1419*2ff91c17SVincenzo Maffione 
1420*2ff91c17SVincenzo Maffione 	NMG_LOCK();
1421*2ff91c17SVincenzo Maffione 	error = netmap_get_bdg_na(hdr, &na, NULL, /*create=*/0);
1422*2ff91c17SVincenzo Maffione 	if (na && !error) {
1423*2ff91c17SVincenzo Maffione 		if (!nm_is_bwrap(na)) {
1424*2ff91c17SVincenzo Maffione 			error = EOPNOTSUPP;
1425*2ff91c17SVincenzo Maffione 		} else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
1426*2ff91c17SVincenzo Maffione 			error = nm_bdg_ctl_polling_start(req, na);
1427*2ff91c17SVincenzo Maffione 			if (!error)
1428*2ff91c17SVincenzo Maffione 				netmap_adapter_get(na);
1429*2ff91c17SVincenzo Maffione 		} else {
1430*2ff91c17SVincenzo Maffione 			error = nm_bdg_ctl_polling_stop(na);
1431*2ff91c17SVincenzo Maffione 			if (!error)
1432*2ff91c17SVincenzo Maffione 				netmap_adapter_put(na);
1433*2ff91c17SVincenzo Maffione 		}
1434*2ff91c17SVincenzo Maffione 		netmap_adapter_put(na);
1435*2ff91c17SVincenzo Maffione 	} else if (!na && !error) {
1436*2ff91c17SVincenzo Maffione 		/* Not VALE port. */
1437*2ff91c17SVincenzo Maffione 		error = EINVAL;
1438*2ff91c17SVincenzo Maffione 	}
1439*2ff91c17SVincenzo Maffione 	NMG_UNLOCK();
1440*2ff91c17SVincenzo Maffione 
1441*2ff91c17SVincenzo Maffione 	return error;
1442*2ff91c17SVincenzo Maffione }
1443*2ff91c17SVincenzo Maffione 
1444*2ff91c17SVincenzo Maffione /* Process NETMAP_REQ_VALE_LIST. */
1445*2ff91c17SVincenzo Maffione int
1446*2ff91c17SVincenzo Maffione netmap_bdg_list(struct nmreq_header *hdr)
1447*2ff91c17SVincenzo Maffione {
1448*2ff91c17SVincenzo Maffione 	struct nmreq_vale_list *req =
1449*2ff91c17SVincenzo Maffione 		(struct nmreq_vale_list *)hdr->nr_body;
1450*2ff91c17SVincenzo Maffione 	int namelen = strlen(hdr->nr_name);
1451847bf383SLuigi Rizzo 	struct nm_bridge *b, *bridges;
1452f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
1453f9790aebSLuigi Rizzo 	int error = 0, i, j;
1454847bf383SLuigi Rizzo 	u_int num_bridges;
1455847bf383SLuigi Rizzo 
1456847bf383SLuigi Rizzo 	netmap_bns_getbridges(&bridges, &num_bridges);
1457f9790aebSLuigi Rizzo 
1458f9790aebSLuigi Rizzo 	/* this is used to enumerate bridges and ports */
1459f9790aebSLuigi Rizzo 	if (namelen) { /* look up indexes of bridge and port */
1460*2ff91c17SVincenzo Maffione 		if (strncmp(hdr->nr_name, NM_BDG_NAME,
1461*2ff91c17SVincenzo Maffione 					strlen(NM_BDG_NAME))) {
1462*2ff91c17SVincenzo Maffione 			return EINVAL;
1463f9790aebSLuigi Rizzo 		}
1464f9790aebSLuigi Rizzo 		NMG_LOCK();
1465*2ff91c17SVincenzo Maffione 		b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
1466f9790aebSLuigi Rizzo 		if (!b) {
1467f9790aebSLuigi Rizzo 			NMG_UNLOCK();
1468*2ff91c17SVincenzo Maffione 			return ENOENT;
1469f9790aebSLuigi Rizzo 		}
1470f9790aebSLuigi Rizzo 
1471*2ff91c17SVincenzo Maffione 		req->nr_bridge_idx = b - bridges; /* bridge index */
1472*2ff91c17SVincenzo Maffione 		req->nr_port_idx = NM_BDG_NOPORT;
1473f9790aebSLuigi Rizzo 		for (j = 0; j < b->bdg_active_ports; j++) {
1474f9790aebSLuigi Rizzo 			i = b->bdg_port_index[j];
1475f9790aebSLuigi Rizzo 			vpna = b->bdg_ports[i];
1476f9790aebSLuigi Rizzo 			if (vpna == NULL) {
1477*2ff91c17SVincenzo Maffione 				D("This should not happen");
1478f9790aebSLuigi Rizzo 				continue;
1479f9790aebSLuigi Rizzo 			}
1480f9790aebSLuigi Rizzo 			/* the former and the latter identify a
1481f9790aebSLuigi Rizzo 			 * virtual port and a NIC, respectively
1482f9790aebSLuigi Rizzo 			 */
1483*2ff91c17SVincenzo Maffione 			if (!strcmp(vpna->up.name, hdr->nr_name)) {
1484*2ff91c17SVincenzo Maffione 				req->nr_port_idx = i; /* port index */
1485f9790aebSLuigi Rizzo 				break;
1486f9790aebSLuigi Rizzo 			}
1487f9790aebSLuigi Rizzo 		}
1488f9790aebSLuigi Rizzo 		NMG_UNLOCK();
1489f9790aebSLuigi Rizzo 	} else {
1490f9790aebSLuigi Rizzo 		/* return the first non-empty entry starting from
1491f9790aebSLuigi Rizzo 		 * bridge nr_arg1 and port nr_arg2.
1492f9790aebSLuigi Rizzo 		 *
1493f9790aebSLuigi Rizzo 		 * Users can detect the end of the same bridge by
1494f9790aebSLuigi Rizzo 		 * seeing the new and old value of nr_arg1, and can
1495f9790aebSLuigi Rizzo 		 * detect the end of all the bridge by error != 0
1496f9790aebSLuigi Rizzo 		 */
1497*2ff91c17SVincenzo Maffione 		i = req->nr_bridge_idx;
1498*2ff91c17SVincenzo Maffione 		j = req->nr_port_idx;
1499f9790aebSLuigi Rizzo 
1500f9790aebSLuigi Rizzo 		NMG_LOCK();
1501f9790aebSLuigi Rizzo 		for (error = ENOENT; i < NM_BRIDGES; i++) {
1502847bf383SLuigi Rizzo 			b = bridges + i;
1503c3e9b4dbSLuiz Otavio O Souza 			for ( ; j < NM_BDG_MAXPORTS; j++) {
1504c3e9b4dbSLuiz Otavio O Souza 				if (b->bdg_ports[j] == NULL)
1505f9790aebSLuigi Rizzo 					continue;
1506f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[j];
1507*2ff91c17SVincenzo Maffione 				/* write back the VALE switch name */
1508*2ff91c17SVincenzo Maffione 				strncpy(hdr->nr_name, vpna->up.name,
1509*2ff91c17SVincenzo Maffione 					(size_t)IFNAMSIZ);
1510f9790aebSLuigi Rizzo 				error = 0;
1511c3e9b4dbSLuiz Otavio O Souza 				goto out;
1512f9790aebSLuigi Rizzo 			}
1513c3e9b4dbSLuiz Otavio O Souza 			j = 0; /* following bridges scan from 0 */
1514c3e9b4dbSLuiz Otavio O Souza 		}
1515c3e9b4dbSLuiz Otavio O Souza 	out:
1516*2ff91c17SVincenzo Maffione 		req->nr_bridge_idx = i;
1517*2ff91c17SVincenzo Maffione 		req->nr_port_idx = j;
1518f9790aebSLuigi Rizzo 		NMG_UNLOCK();
1519f9790aebSLuigi Rizzo 	}
1520f9790aebSLuigi Rizzo 
1521*2ff91c17SVincenzo Maffione 	return error;
1522f9790aebSLuigi Rizzo }
1523*2ff91c17SVincenzo Maffione 
1524*2ff91c17SVincenzo Maffione /* Called by external kernel modules (e.g., Openvswitch).
1525*2ff91c17SVincenzo Maffione  * to set configure/lookup/dtor functions of a VALE instance.
1526*2ff91c17SVincenzo Maffione  * Register callbacks to the given bridge. 'name' may be just
1527*2ff91c17SVincenzo Maffione  * bridge's name (including ':' if it is not just NM_BDG_NAME).
1528*2ff91c17SVincenzo Maffione  *
1529*2ff91c17SVincenzo Maffione  * Called without NMG_LOCK.
1530*2ff91c17SVincenzo Maffione  */
1531*2ff91c17SVincenzo Maffione 
1532*2ff91c17SVincenzo Maffione int
1533*2ff91c17SVincenzo Maffione netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
1534*2ff91c17SVincenzo Maffione {
1535*2ff91c17SVincenzo Maffione 	struct nm_bridge *b;
1536*2ff91c17SVincenzo Maffione 	int error = 0;
1537*2ff91c17SVincenzo Maffione 
1538*2ff91c17SVincenzo Maffione 	NMG_LOCK();
1539*2ff91c17SVincenzo Maffione 	b = nm_find_bridge(name, 0 /* don't create */);
1540*2ff91c17SVincenzo Maffione 	if (!b) {
1541*2ff91c17SVincenzo Maffione 		error = ENXIO;
1542*2ff91c17SVincenzo Maffione 		goto unlock_regops;
1543*2ff91c17SVincenzo Maffione 	}
1544*2ff91c17SVincenzo Maffione 	if (!nm_bdg_valid_auth_token(b, auth_token)) {
1545*2ff91c17SVincenzo Maffione 		error = EACCES;
1546*2ff91c17SVincenzo Maffione 		goto unlock_regops;
1547*2ff91c17SVincenzo Maffione 	}
1548*2ff91c17SVincenzo Maffione 
1549*2ff91c17SVincenzo Maffione 	BDG_WLOCK(b);
1550*2ff91c17SVincenzo Maffione 	if (!bdg_ops) {
1551*2ff91c17SVincenzo Maffione 		/* resetting the bridge */
1552*2ff91c17SVincenzo Maffione 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
1553*2ff91c17SVincenzo Maffione 		b->bdg_ops = &default_bdg_ops;
1554*2ff91c17SVincenzo Maffione 		b->private_data = b->ht;
1555*2ff91c17SVincenzo Maffione 	} else {
1556*2ff91c17SVincenzo Maffione 		/* modifying the bridge */
1557*2ff91c17SVincenzo Maffione 		b->private_data = private_data;
1558*2ff91c17SVincenzo Maffione 		b->bdg_ops = bdg_ops;
1559*2ff91c17SVincenzo Maffione 	}
1560*2ff91c17SVincenzo Maffione 	BDG_WUNLOCK(b);
1561*2ff91c17SVincenzo Maffione 
1562*2ff91c17SVincenzo Maffione unlock_regops:
1563*2ff91c17SVincenzo Maffione 	NMG_UNLOCK();
1564*2ff91c17SVincenzo Maffione 	return error;
1565*2ff91c17SVincenzo Maffione }
1566*2ff91c17SVincenzo Maffione 
1567*2ff91c17SVincenzo Maffione /* Called by external kernel modules (e.g., Openvswitch).
1568*2ff91c17SVincenzo Maffione  * to modify the private data previously given to regops().
1569*2ff91c17SVincenzo Maffione  * 'name' may be just bridge's name (including ':' if it
1570*2ff91c17SVincenzo Maffione  * is not just NM_BDG_NAME).
1571*2ff91c17SVincenzo Maffione  * Called without NMG_LOCK.
1572*2ff91c17SVincenzo Maffione  */
1573*2ff91c17SVincenzo Maffione int
1574*2ff91c17SVincenzo Maffione nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
1575*2ff91c17SVincenzo Maffione 	void *callback_data, void *auth_token)
1576*2ff91c17SVincenzo Maffione {
1577*2ff91c17SVincenzo Maffione 	void *private_data = NULL;
1578*2ff91c17SVincenzo Maffione 	struct nm_bridge *b;
1579*2ff91c17SVincenzo Maffione 	int error = 0;
1580*2ff91c17SVincenzo Maffione 
1581f9790aebSLuigi Rizzo 	NMG_LOCK();
1582f9790aebSLuigi Rizzo 	b = nm_find_bridge(name, 0 /* don't create */);
1583f9790aebSLuigi Rizzo 	if (!b) {
1584f9790aebSLuigi Rizzo 		error = EINVAL;
1585*2ff91c17SVincenzo Maffione 		goto unlock_update_priv;
1586f9790aebSLuigi Rizzo 	}
1587*2ff91c17SVincenzo Maffione 	if (!nm_bdg_valid_auth_token(b, auth_token)) {
1588*2ff91c17SVincenzo Maffione 		error = EACCES;
1589*2ff91c17SVincenzo Maffione 		goto unlock_update_priv;
1590*2ff91c17SVincenzo Maffione 	}
1591*2ff91c17SVincenzo Maffione 	BDG_WLOCK(b);
1592*2ff91c17SVincenzo Maffione 	private_data = callback(b->private_data, callback_data, &error);
1593*2ff91c17SVincenzo Maffione 	b->private_data = private_data;
1594*2ff91c17SVincenzo Maffione 	BDG_WUNLOCK(b);
1595f9790aebSLuigi Rizzo 
1596*2ff91c17SVincenzo Maffione unlock_update_priv:
159737e3a6d3SLuigi Rizzo 	NMG_UNLOCK();
1598f9790aebSLuigi Rizzo 	return error;
1599f9790aebSLuigi Rizzo }
1600f9790aebSLuigi Rizzo 
16014bf50f18SLuigi Rizzo int
1602*2ff91c17SVincenzo Maffione netmap_bdg_config(struct nm_ifreq *nr)
16034bf50f18SLuigi Rizzo {
16044bf50f18SLuigi Rizzo 	struct nm_bridge *b;
16054bf50f18SLuigi Rizzo 	int error = EINVAL;
16064bf50f18SLuigi Rizzo 
16074bf50f18SLuigi Rizzo 	NMG_LOCK();
1608*2ff91c17SVincenzo Maffione 	b = nm_find_bridge(nr->nifr_name, 0);
16094bf50f18SLuigi Rizzo 	if (!b) {
16104bf50f18SLuigi Rizzo 		NMG_UNLOCK();
16114bf50f18SLuigi Rizzo 		return error;
16124bf50f18SLuigi Rizzo 	}
16134bf50f18SLuigi Rizzo 	NMG_UNLOCK();
16144bf50f18SLuigi Rizzo 	/* Don't call config() with NMG_LOCK() held */
16154bf50f18SLuigi Rizzo 	BDG_RLOCK(b);
1616*2ff91c17SVincenzo Maffione 	if (b->bdg_ops->config != NULL)
1617*2ff91c17SVincenzo Maffione 		error = b->bdg_ops->config(nr);
16184bf50f18SLuigi Rizzo 	BDG_RUNLOCK(b);
16194bf50f18SLuigi Rizzo 	return error;
16204bf50f18SLuigi Rizzo }
16214bf50f18SLuigi Rizzo 
16224bf50f18SLuigi Rizzo 
16234bf50f18SLuigi Rizzo /* nm_krings_create callback for VALE ports.
16244bf50f18SLuigi Rizzo  * Calls the standard netmap_krings_create, then adds leases on rx
16254bf50f18SLuigi Rizzo  * rings and bdgfwd on tx rings.
16264bf50f18SLuigi Rizzo  */
1627f9790aebSLuigi Rizzo static int
1628f9790aebSLuigi Rizzo netmap_vp_krings_create(struct netmap_adapter *na)
1629f9790aebSLuigi Rizzo {
1630f0ea3689SLuigi Rizzo 	u_int tailroom;
1631f9790aebSLuigi Rizzo 	int error, i;
1632f9790aebSLuigi Rizzo 	uint32_t *leases;
1633847bf383SLuigi Rizzo 	u_int nrx = netmap_real_rings(na, NR_RX);
1634f9790aebSLuigi Rizzo 
1635f9790aebSLuigi Rizzo 	/*
1636f9790aebSLuigi Rizzo 	 * Leases are attached to RX rings on vale ports
1637f9790aebSLuigi Rizzo 	 */
1638f9790aebSLuigi Rizzo 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
1639f9790aebSLuigi Rizzo 
1640f0ea3689SLuigi Rizzo 	error = netmap_krings_create(na, tailroom);
1641f9790aebSLuigi Rizzo 	if (error)
1642f9790aebSLuigi Rizzo 		return error;
1643f9790aebSLuigi Rizzo 
1644f9790aebSLuigi Rizzo 	leases = na->tailroom;
1645f9790aebSLuigi Rizzo 
1646f9790aebSLuigi Rizzo 	for (i = 0; i < nrx; i++) { /* Receive rings */
1647*2ff91c17SVincenzo Maffione 		na->rx_rings[i]->nkr_leases = leases;
1648f9790aebSLuigi Rizzo 		leases += na->num_rx_desc;
1649f9790aebSLuigi Rizzo 	}
1650f9790aebSLuigi Rizzo 
1651f9790aebSLuigi Rizzo 	error = nm_alloc_bdgfwd(na);
1652f9790aebSLuigi Rizzo 	if (error) {
1653f9790aebSLuigi Rizzo 		netmap_krings_delete(na);
1654f9790aebSLuigi Rizzo 		return error;
1655f9790aebSLuigi Rizzo 	}
1656f9790aebSLuigi Rizzo 
1657f9790aebSLuigi Rizzo 	return 0;
1658f9790aebSLuigi Rizzo }
1659f9790aebSLuigi Rizzo 
166017885a7bSLuigi Rizzo 
16614bf50f18SLuigi Rizzo /* nm_krings_delete callback for VALE ports. */
1662f9790aebSLuigi Rizzo static void
1663f9790aebSLuigi Rizzo netmap_vp_krings_delete(struct netmap_adapter *na)
1664f9790aebSLuigi Rizzo {
1665f9790aebSLuigi Rizzo 	nm_free_bdgfwd(na);
1666f9790aebSLuigi Rizzo 	netmap_krings_delete(na);
1667f9790aebSLuigi Rizzo }
1668f9790aebSLuigi Rizzo 
1669f9790aebSLuigi Rizzo 
1670f9790aebSLuigi Rizzo static int
1671f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
1672f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *na, u_int ring_nr);
1673f9790aebSLuigi Rizzo 
1674f9790aebSLuigi Rizzo 
1675f9790aebSLuigi Rizzo /*
16764bf50f18SLuigi Rizzo  * main dispatch routine for the bridge.
1677f9790aebSLuigi Rizzo  * Grab packets from a kring, move them into the ft structure
1678f9790aebSLuigi Rizzo  * associated to the tx (input) port. Max one instance per port,
1679f9790aebSLuigi Rizzo  * filtered on input (ioctl, poll or XXX).
1680f9790aebSLuigi Rizzo  * Returns the next position in the ring.
1681f9790aebSLuigi Rizzo  */
1682f9790aebSLuigi Rizzo static int
16834bf50f18SLuigi Rizzo nm_bdg_preflush(struct netmap_kring *kring, u_int end)
1684f9790aebSLuigi Rizzo {
16854bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
16864bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter*)kring->na;
1687f9790aebSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
1688f9790aebSLuigi Rizzo 	struct nm_bdg_fwd *ft;
16894bf50f18SLuigi Rizzo 	u_int ring_nr = kring->ring_id;
1690f9790aebSLuigi Rizzo 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
1691f9790aebSLuigi Rizzo 	u_int ft_i = 0;	/* start from 0 */
1692f9790aebSLuigi Rizzo 	u_int frags = 1; /* how many frags ? */
1693f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
1694f9790aebSLuigi Rizzo 
1695f9790aebSLuigi Rizzo 	/* To protect against modifications to the bridge we acquire a
1696f9790aebSLuigi Rizzo 	 * shared lock, waiting if we can sleep (if the source port is
1697f9790aebSLuigi Rizzo 	 * attached to a user process) or with a trylock otherwise (NICs).
1698f9790aebSLuigi Rizzo 	 */
1699f9790aebSLuigi Rizzo 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1700f9790aebSLuigi Rizzo 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
1701f9790aebSLuigi Rizzo 		BDG_RLOCK(b);
1702f9790aebSLuigi Rizzo 	else if (!BDG_RTRYLOCK(b))
1703c3e9b4dbSLuiz Otavio O Souza 		return j;
1704f9790aebSLuigi Rizzo 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1705f9790aebSLuigi Rizzo 	ft = kring->nkr_ft;
1706f9790aebSLuigi Rizzo 
1707f9790aebSLuigi Rizzo 	for (; likely(j != end); j = nm_next(j, lim)) {
1708f9790aebSLuigi Rizzo 		struct netmap_slot *slot = &ring->slot[j];
1709f9790aebSLuigi Rizzo 		char *buf;
1710f9790aebSLuigi Rizzo 
1711f9790aebSLuigi Rizzo 		ft[ft_i].ft_len = slot->len;
1712f9790aebSLuigi Rizzo 		ft[ft_i].ft_flags = slot->flags;
1713*2ff91c17SVincenzo Maffione 		ft[ft_i].ft_offset = 0;
1714f9790aebSLuigi Rizzo 
1715f9790aebSLuigi Rizzo 		ND("flags is 0x%x", slot->flags);
1716847bf383SLuigi Rizzo 		/* we do not use the buf changed flag, but we still need to reset it */
1717847bf383SLuigi Rizzo 		slot->flags &= ~NS_BUF_CHANGED;
1718847bf383SLuigi Rizzo 
1719f9790aebSLuigi Rizzo 		/* this slot goes into a list so initialize the link field */
1720f9790aebSLuigi Rizzo 		ft[ft_i].ft_next = NM_FT_NULL;
1721f9790aebSLuigi Rizzo 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
17224bf50f18SLuigi Rizzo 			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
1723e31c6ec7SLuigi Rizzo 		if (unlikely(buf == NULL)) {
1724e31c6ec7SLuigi Rizzo 			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
1725e31c6ec7SLuigi Rizzo 				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
1726e31c6ec7SLuigi Rizzo 				kring->name, j, ft[ft_i].ft_len);
17274bf50f18SLuigi Rizzo 			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
1728e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_len = 0;
1729e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_flags = 0;
1730e31c6ec7SLuigi Rizzo 		}
17312e159ef0SLuigi Rizzo 		__builtin_prefetch(buf);
1732f9790aebSLuigi Rizzo 		++ft_i;
1733f9790aebSLuigi Rizzo 		if (slot->flags & NS_MOREFRAG) {
1734f9790aebSLuigi Rizzo 			frags++;
1735f9790aebSLuigi Rizzo 			continue;
1736f9790aebSLuigi Rizzo 		}
1737f9790aebSLuigi Rizzo 		if (unlikely(netmap_verbose && frags > 1))
1738f9790aebSLuigi Rizzo 			RD(5, "%d frags at %d", frags, ft_i - frags);
1739f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
1740f9790aebSLuigi Rizzo 		frags = 1;
1741f9790aebSLuigi Rizzo 		if (unlikely((int)ft_i >= bridge_batch))
1742f9790aebSLuigi Rizzo 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1743f9790aebSLuigi Rizzo 	}
1744f9790aebSLuigi Rizzo 	if (frags > 1) {
174537e3a6d3SLuigi Rizzo 		/* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
174637e3a6d3SLuigi Rizzo 		 * have to fix frags count. */
174737e3a6d3SLuigi Rizzo 		frags--;
174837e3a6d3SLuigi Rizzo 		ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
174937e3a6d3SLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
175037e3a6d3SLuigi Rizzo 		D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1751f9790aebSLuigi Rizzo 	}
1752f9790aebSLuigi Rizzo 	if (ft_i)
1753f9790aebSLuigi Rizzo 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1754f9790aebSLuigi Rizzo 	BDG_RUNLOCK(b);
1755f9790aebSLuigi Rizzo 	return j;
1756f9790aebSLuigi Rizzo }
1757f9790aebSLuigi Rizzo 
1758f9790aebSLuigi Rizzo 
1759f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */
1760f9790aebSLuigi Rizzo 
1761f9790aebSLuigi Rizzo /*
1762f9790aebSLuigi Rizzo  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1763f9790aebSLuigi Rizzo  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1764f9790aebSLuigi Rizzo  *
1765f9790aebSLuigi Rizzo  * http://www.burtleburtle.net/bob/hash/spooky.html
1766f9790aebSLuigi Rizzo  */
1767f9790aebSLuigi Rizzo #define mix(a, b, c)                                                    \
1768f9790aebSLuigi Rizzo do {                                                                    \
1769f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 13);                                 \
1770f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 8);                                  \
1771f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 13);                                 \
1772f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 12);                                 \
1773f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 16);                                 \
1774f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 5);                                  \
1775f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 3);                                  \
1776f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 10);                                 \
1777f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 15);                                 \
1778f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0)
1779f9790aebSLuigi Rizzo 
178017885a7bSLuigi Rizzo 
1781f9790aebSLuigi Rizzo static __inline uint32_t
1782f9790aebSLuigi Rizzo nm_bridge_rthash(const uint8_t *addr)
1783f9790aebSLuigi Rizzo {
1784f9790aebSLuigi Rizzo         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1785f9790aebSLuigi Rizzo 
1786f9790aebSLuigi Rizzo         b += addr[5] << 8;
1787f9790aebSLuigi Rizzo         b += addr[4];
1788f9790aebSLuigi Rizzo         a += addr[3] << 24;
1789f9790aebSLuigi Rizzo         a += addr[2] << 16;
1790f9790aebSLuigi Rizzo         a += addr[1] << 8;
1791f9790aebSLuigi Rizzo         a += addr[0];
1792f9790aebSLuigi Rizzo 
1793f9790aebSLuigi Rizzo         mix(a, b, c);
1794f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1795f9790aebSLuigi Rizzo         return (c & BRIDGE_RTHASH_MASK);
1796f9790aebSLuigi Rizzo }
1797f9790aebSLuigi Rizzo 
1798f9790aebSLuigi Rizzo #undef mix
1799f9790aebSLuigi Rizzo 
1800f9790aebSLuigi Rizzo 
18014bf50f18SLuigi Rizzo /* nm_register callback for VALE ports */
1802f9790aebSLuigi Rizzo static int
18034bf50f18SLuigi Rizzo netmap_vp_reg(struct netmap_adapter *na, int onoff)
1804f9790aebSLuigi Rizzo {
1805f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna =
1806f9790aebSLuigi Rizzo 		(struct netmap_vp_adapter*)na;
180737e3a6d3SLuigi Rizzo 	enum txrx t;
180837e3a6d3SLuigi Rizzo 	int i;
1809f9790aebSLuigi Rizzo 
18104bf50f18SLuigi Rizzo 	/* persistent ports may be put in netmap mode
18114bf50f18SLuigi Rizzo 	 * before being attached to a bridge
1812f9790aebSLuigi Rizzo 	 */
18134bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
1814f9790aebSLuigi Rizzo 		BDG_WLOCK(vpna->na_bdg);
1815f9790aebSLuigi Rizzo 	if (onoff) {
181637e3a6d3SLuigi Rizzo 		for_rx_tx(t) {
18174f80b14cSVincenzo Maffione 			for (i = 0; i < netmap_real_rings(na, t); i++) {
1818*2ff91c17SVincenzo Maffione 				struct netmap_kring *kring = NMR(na, t)[i];
181937e3a6d3SLuigi Rizzo 
182037e3a6d3SLuigi Rizzo 				if (nm_kring_pending_on(kring))
182137e3a6d3SLuigi Rizzo 					kring->nr_mode = NKR_NETMAP_ON;
182237e3a6d3SLuigi Rizzo 			}
182337e3a6d3SLuigi Rizzo 		}
182437e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
18254bf50f18SLuigi Rizzo 			na->na_flags |= NAF_NETMAP_ON;
18264bf50f18SLuigi Rizzo 		 /* XXX on FreeBSD, persistent VALE ports should also
18274bf50f18SLuigi Rizzo 		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
18284bf50f18SLuigi Rizzo 		 */
1829f9790aebSLuigi Rizzo 	} else {
183037e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
18314bf50f18SLuigi Rizzo 			na->na_flags &= ~NAF_NETMAP_ON;
183237e3a6d3SLuigi Rizzo 		for_rx_tx(t) {
18334f80b14cSVincenzo Maffione 			for (i = 0; i < netmap_real_rings(na, t); i++) {
1834*2ff91c17SVincenzo Maffione 				struct netmap_kring *kring = NMR(na, t)[i];
183537e3a6d3SLuigi Rizzo 
183637e3a6d3SLuigi Rizzo 				if (nm_kring_pending_off(kring))
183737e3a6d3SLuigi Rizzo 					kring->nr_mode = NKR_NETMAP_OFF;
183837e3a6d3SLuigi Rizzo 			}
183937e3a6d3SLuigi Rizzo 		}
1840f9790aebSLuigi Rizzo 	}
18414bf50f18SLuigi Rizzo 	if (vpna->na_bdg)
1842f9790aebSLuigi Rizzo 		BDG_WUNLOCK(vpna->na_bdg);
1843f9790aebSLuigi Rizzo 	return 0;
1844f9790aebSLuigi Rizzo }
1845f9790aebSLuigi Rizzo 
1846f9790aebSLuigi Rizzo 
1847f9790aebSLuigi Rizzo /*
1848f9790aebSLuigi Rizzo  * Lookup function for a learning bridge.
1849f9790aebSLuigi Rizzo  * Update the hash table with the source address,
1850f9790aebSLuigi Rizzo  * and then returns the destination port index, and the
1851f9790aebSLuigi Rizzo  * ring in *dst_ring (at the moment, always use ring 0)
1852f9790aebSLuigi Rizzo  */
1853*2ff91c17SVincenzo Maffione uint32_t
18544bf50f18SLuigi Rizzo netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1855*2ff91c17SVincenzo Maffione 		struct netmap_vp_adapter *na, void *private_data)
1856f9790aebSLuigi Rizzo {
1857*2ff91c17SVincenzo Maffione 	uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset;
1858*2ff91c17SVincenzo Maffione 	u_int buf_len = ft->ft_len - ft->ft_offset;
1859*2ff91c17SVincenzo Maffione 	struct nm_hash_ent *ht = private_data;
1860f9790aebSLuigi Rizzo 	uint32_t sh, dh;
1861f9790aebSLuigi Rizzo 	u_int dst, mysrc = na->bdg_port;
1862f9790aebSLuigi Rizzo 	uint64_t smac, dmac;
186337e3a6d3SLuigi Rizzo 	uint8_t indbuf[12];
1864f9790aebSLuigi Rizzo 
1865*2ff91c17SVincenzo Maffione 	if (buf_len < 14) {
1866f9790aebSLuigi Rizzo 		return NM_BDG_NOPORT;
1867f9790aebSLuigi Rizzo 	}
186837e3a6d3SLuigi Rizzo 
186937e3a6d3SLuigi Rizzo 	if (ft->ft_flags & NS_INDIRECT) {
187037e3a6d3SLuigi Rizzo 		if (copyin(buf, indbuf, sizeof(indbuf))) {
187137e3a6d3SLuigi Rizzo 			return NM_BDG_NOPORT;
187237e3a6d3SLuigi Rizzo 		}
187337e3a6d3SLuigi Rizzo 		buf = indbuf;
187437e3a6d3SLuigi Rizzo 	}
187537e3a6d3SLuigi Rizzo 
1876f9790aebSLuigi Rizzo 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1877f9790aebSLuigi Rizzo 	smac = le64toh(*(uint64_t *)(buf + 4));
1878f9790aebSLuigi Rizzo 	smac >>= 16;
1879f9790aebSLuigi Rizzo 
1880f9790aebSLuigi Rizzo 	/*
1881f9790aebSLuigi Rizzo 	 * The hash is somewhat expensive, there might be some
1882f9790aebSLuigi Rizzo 	 * worthwhile optimizations here.
1883f9790aebSLuigi Rizzo 	 */
1884847bf383SLuigi Rizzo 	if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
1885f9790aebSLuigi Rizzo 		uint8_t *s = buf+6;
18864f80b14cSVincenzo Maffione 		sh = nm_bridge_rthash(s); /* hash of source */
1887f9790aebSLuigi Rizzo 		/* update source port forwarding entry */
1888847bf383SLuigi Rizzo 		na->last_smac = ht[sh].mac = smac;	/* XXX expire ? */
1889f9790aebSLuigi Rizzo 		ht[sh].ports = mysrc;
1890f9790aebSLuigi Rizzo 		if (netmap_verbose)
1891f9790aebSLuigi Rizzo 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1892f9790aebSLuigi Rizzo 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1893f9790aebSLuigi Rizzo 	}
1894f9790aebSLuigi Rizzo 	dst = NM_BDG_BROADCAST;
1895f9790aebSLuigi Rizzo 	if ((buf[0] & 1) == 0) { /* unicast */
18964f80b14cSVincenzo Maffione 		dh = nm_bridge_rthash(buf); /* hash of dst */
1897f9790aebSLuigi Rizzo 		if (ht[dh].mac == dmac) {	/* found dst */
1898f9790aebSLuigi Rizzo 			dst = ht[dh].ports;
1899f9790aebSLuigi Rizzo 		}
1900f9790aebSLuigi Rizzo 	}
1901f9790aebSLuigi Rizzo 	return dst;
1902f9790aebSLuigi Rizzo }
1903f9790aebSLuigi Rizzo 
1904f9790aebSLuigi Rizzo 
1905f9790aebSLuigi Rizzo /*
190617885a7bSLuigi Rizzo  * Available space in the ring. Only used in VALE code
190717885a7bSLuigi Rizzo  * and only with is_rx = 1
190817885a7bSLuigi Rizzo  */
190917885a7bSLuigi Rizzo static inline uint32_t
191017885a7bSLuigi Rizzo nm_kr_space(struct netmap_kring *k, int is_rx)
191117885a7bSLuigi Rizzo {
191217885a7bSLuigi Rizzo 	int space;
191317885a7bSLuigi Rizzo 
191417885a7bSLuigi Rizzo 	if (is_rx) {
191517885a7bSLuigi Rizzo 		int busy = k->nkr_hwlease - k->nr_hwcur;
191617885a7bSLuigi Rizzo 		if (busy < 0)
191717885a7bSLuigi Rizzo 			busy += k->nkr_num_slots;
191817885a7bSLuigi Rizzo 		space = k->nkr_num_slots - 1 - busy;
191917885a7bSLuigi Rizzo 	} else {
192017885a7bSLuigi Rizzo 		/* XXX never used in this branch */
192117885a7bSLuigi Rizzo 		space = k->nr_hwtail - k->nkr_hwlease;
192217885a7bSLuigi Rizzo 		if (space < 0)
192317885a7bSLuigi Rizzo 			space += k->nkr_num_slots;
192417885a7bSLuigi Rizzo 	}
192517885a7bSLuigi Rizzo #if 0
192617885a7bSLuigi Rizzo 	// sanity check
192717885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
192817885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
192917885a7bSLuigi Rizzo 		k->nr_tail >= k->nkr_num_slots ||
193017885a7bSLuigi Rizzo 		busy < 0 ||
193117885a7bSLuigi Rizzo 		busy >= k->nkr_num_slots) {
193217885a7bSLuigi Rizzo 		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
193317885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
193417885a7bSLuigi Rizzo 	}
193517885a7bSLuigi Rizzo #endif
193617885a7bSLuigi Rizzo 	return space;
193717885a7bSLuigi Rizzo }
193817885a7bSLuigi Rizzo 
193917885a7bSLuigi Rizzo 
194017885a7bSLuigi Rizzo 
194117885a7bSLuigi Rizzo 
194217885a7bSLuigi Rizzo /* make a lease on the kring for N positions. return the
194317885a7bSLuigi Rizzo  * lease index
194417885a7bSLuigi Rizzo  * XXX only used in VALE code and with is_rx = 1
194517885a7bSLuigi Rizzo  */
194617885a7bSLuigi Rizzo static inline uint32_t
194717885a7bSLuigi Rizzo nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
194817885a7bSLuigi Rizzo {
194917885a7bSLuigi Rizzo 	uint32_t lim = k->nkr_num_slots - 1;
195017885a7bSLuigi Rizzo 	uint32_t lease_idx = k->nkr_lease_idx;
195117885a7bSLuigi Rizzo 
195217885a7bSLuigi Rizzo 	k->nkr_leases[lease_idx] = NR_NOSLOT;
195317885a7bSLuigi Rizzo 	k->nkr_lease_idx = nm_next(lease_idx, lim);
195417885a7bSLuigi Rizzo 
195517885a7bSLuigi Rizzo 	if (n > nm_kr_space(k, is_rx)) {
195617885a7bSLuigi Rizzo 		D("invalid request for %d slots", n);
195717885a7bSLuigi Rizzo 		panic("x");
195817885a7bSLuigi Rizzo 	}
195917885a7bSLuigi Rizzo 	/* XXX verify that there are n slots */
196017885a7bSLuigi Rizzo 	k->nkr_hwlease += n;
196117885a7bSLuigi Rizzo 	if (k->nkr_hwlease > lim)
196217885a7bSLuigi Rizzo 		k->nkr_hwlease -= lim + 1;
196317885a7bSLuigi Rizzo 
196417885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
196517885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
196617885a7bSLuigi Rizzo 		k->nr_hwtail >= k->nkr_num_slots ||
196717885a7bSLuigi Rizzo 		k->nkr_lease_idx >= k->nkr_num_slots) {
196817885a7bSLuigi Rizzo 		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
19694bf50f18SLuigi Rizzo 			k->na->name,
197017885a7bSLuigi Rizzo 			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
197117885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
197217885a7bSLuigi Rizzo 	}
197317885a7bSLuigi Rizzo 	return lease_idx;
197417885a7bSLuigi Rizzo }
197517885a7bSLuigi Rizzo 
197617885a7bSLuigi Rizzo /*
19774bf50f18SLuigi Rizzo  *
1978f9790aebSLuigi Rizzo  * This flush routine supports only unicast and broadcast but a large
1979f9790aebSLuigi Rizzo  * number of ports, and lets us replace the learn and dispatch functions.
1980f9790aebSLuigi Rizzo  */
1981f9790aebSLuigi Rizzo int
1982f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1983f9790aebSLuigi Rizzo 		u_int ring_nr)
1984f9790aebSLuigi Rizzo {
1985f9790aebSLuigi Rizzo 	struct nm_bdg_q *dst_ents, *brddst;
1986f9790aebSLuigi Rizzo 	uint16_t num_dsts = 0, *dsts;
1987f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
198837e3a6d3SLuigi Rizzo 	u_int i, me = na->bdg_port;
1989f9790aebSLuigi Rizzo 
1990f9790aebSLuigi Rizzo 	/*
1991f9790aebSLuigi Rizzo 	 * The work area (pointed by ft) is followed by an array of
1992f9790aebSLuigi Rizzo 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1993f9790aebSLuigi Rizzo 	 * queues per port plus one for the broadcast traffic.
1994f9790aebSLuigi Rizzo 	 * Then we have an array of destination indexes.
1995f9790aebSLuigi Rizzo 	 */
1996f9790aebSLuigi Rizzo 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1997f9790aebSLuigi Rizzo 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1998f9790aebSLuigi Rizzo 
1999f9790aebSLuigi Rizzo 	/* first pass: find a destination for each packet in the batch */
2000f9790aebSLuigi Rizzo 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
2001f9790aebSLuigi Rizzo 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
2002f9790aebSLuigi Rizzo 		uint16_t dst_port, d_i;
2003f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
2004*2ff91c17SVincenzo Maffione 		struct nm_bdg_fwd *start_ft = NULL;
2005f9790aebSLuigi Rizzo 
2006f9790aebSLuigi Rizzo 		ND("slot %d frags %d", i, ft[i].ft_frags);
2007*2ff91c17SVincenzo Maffione 
2008*2ff91c17SVincenzo Maffione 		if (na->up.virt_hdr_len < ft[i].ft_len) {
2009*2ff91c17SVincenzo Maffione 			ft[i].ft_offset = na->up.virt_hdr_len;
2010*2ff91c17SVincenzo Maffione 			start_ft = &ft[i];
2011*2ff91c17SVincenzo Maffione 		} else if (na->up.virt_hdr_len == ft[i].ft_len && ft[i].ft_flags & NS_MOREFRAG) {
2012*2ff91c17SVincenzo Maffione 			ft[i].ft_offset = ft[i].ft_len;
2013*2ff91c17SVincenzo Maffione 			start_ft = &ft[i+1];
2014*2ff91c17SVincenzo Maffione 		} else {
2015f0ea3689SLuigi Rizzo 			/* Drop the packet if the virtio-net header is not into the first
2016*2ff91c17SVincenzo Maffione 			 * fragment nor at the very beginning of the second.
2017*2ff91c17SVincenzo Maffione 			 */
2018f9790aebSLuigi Rizzo 			continue;
2019*2ff91c17SVincenzo Maffione 		}
2020*2ff91c17SVincenzo Maffione 		dst_port = b->bdg_ops->lookup(start_ft, &dst_ring, na, b->private_data);
2021f9790aebSLuigi Rizzo 		if (netmap_verbose > 255)
2022f9790aebSLuigi Rizzo 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
20234f80b14cSVincenzo Maffione 		if (dst_port >= NM_BDG_NOPORT)
2024f9790aebSLuigi Rizzo 			continue; /* this packet is identified to be dropped */
2025f9790aebSLuigi Rizzo 		else if (dst_port == NM_BDG_BROADCAST)
2026f9790aebSLuigi Rizzo 			dst_ring = 0; /* broadcasts always go to ring 0 */
2027f9790aebSLuigi Rizzo 		else if (unlikely(dst_port == me ||
2028f9790aebSLuigi Rizzo 		    !b->bdg_ports[dst_port]))
2029f9790aebSLuigi Rizzo 			continue;
2030f9790aebSLuigi Rizzo 
2031f9790aebSLuigi Rizzo 		/* get a position in the scratch pad */
2032f9790aebSLuigi Rizzo 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
2033f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
2034f9790aebSLuigi Rizzo 
2035f9790aebSLuigi Rizzo 		/* append the first fragment to the list */
2036f9790aebSLuigi Rizzo 		if (d->bq_head == NM_FT_NULL) { /* new destination */
2037f9790aebSLuigi Rizzo 			d->bq_head = d->bq_tail = i;
2038f9790aebSLuigi Rizzo 			/* remember this position to be scanned later */
2039f9790aebSLuigi Rizzo 			if (dst_port != NM_BDG_BROADCAST)
2040f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
2041f9790aebSLuigi Rizzo 		} else {
2042f9790aebSLuigi Rizzo 			ft[d->bq_tail].ft_next = i;
2043f9790aebSLuigi Rizzo 			d->bq_tail = i;
2044f9790aebSLuigi Rizzo 		}
2045f9790aebSLuigi Rizzo 		d->bq_len += ft[i].ft_frags;
2046f9790aebSLuigi Rizzo 	}
2047f9790aebSLuigi Rizzo 
2048f9790aebSLuigi Rizzo 	/*
2049f9790aebSLuigi Rizzo 	 * Broadcast traffic goes to ring 0 on all destinations.
2050f9790aebSLuigi Rizzo 	 * So we need to add these rings to the list of ports to scan.
2051f9790aebSLuigi Rizzo 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
2052f9790aebSLuigi Rizzo 	 * expensive. We should keep a compact list of active destinations
2053f9790aebSLuigi Rizzo 	 * so we could shorten this loop.
2054f9790aebSLuigi Rizzo 	 */
2055f9790aebSLuigi Rizzo 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
2056f9790aebSLuigi Rizzo 	if (brddst->bq_head != NM_FT_NULL) {
205737e3a6d3SLuigi Rizzo 		u_int j;
2058f9790aebSLuigi Rizzo 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
2059f9790aebSLuigi Rizzo 			uint16_t d_i;
2060f9790aebSLuigi Rizzo 			i = b->bdg_port_index[j];
2061f9790aebSLuigi Rizzo 			if (unlikely(i == me))
2062f9790aebSLuigi Rizzo 				continue;
2063f9790aebSLuigi Rizzo 			d_i = i * NM_BDG_MAXRINGS;
2064f9790aebSLuigi Rizzo 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
2065f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
2066f9790aebSLuigi Rizzo 		}
2067f9790aebSLuigi Rizzo 	}
2068f9790aebSLuigi Rizzo 
2069f9790aebSLuigi Rizzo 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
20704bf50f18SLuigi Rizzo 	/* second pass: scan destinations */
2071f9790aebSLuigi Rizzo 	for (i = 0; i < num_dsts; i++) {
2072f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *dst_na;
2073f9790aebSLuigi Rizzo 		struct netmap_kring *kring;
2074f9790aebSLuigi Rizzo 		struct netmap_ring *ring;
2075f0ea3689SLuigi Rizzo 		u_int dst_nr, lim, j, d_i, next, brd_next;
2076f9790aebSLuigi Rizzo 		u_int needed, howmany;
2077f9790aebSLuigi Rizzo 		int retry = netmap_txsync_retry;
2078f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
2079f9790aebSLuigi Rizzo 		uint32_t my_start = 0, lease_idx = 0;
2080f9790aebSLuigi Rizzo 		int nrings;
2081f0ea3689SLuigi Rizzo 		int virt_hdr_mismatch = 0;
2082f9790aebSLuigi Rizzo 
2083f9790aebSLuigi Rizzo 		d_i = dsts[i];
2084f9790aebSLuigi Rizzo 		ND("second pass %d port %d", i, d_i);
2085f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
2086f9790aebSLuigi Rizzo 		// XXX fix the division
2087f9790aebSLuigi Rizzo 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
2088f9790aebSLuigi Rizzo 		/* protect from the lookup function returning an inactive
2089f9790aebSLuigi Rizzo 		 * destination port
2090f9790aebSLuigi Rizzo 		 */
2091f9790aebSLuigi Rizzo 		if (unlikely(dst_na == NULL))
2092f9790aebSLuigi Rizzo 			goto cleanup;
2093f9790aebSLuigi Rizzo 		if (dst_na->up.na_flags & NAF_SW_ONLY)
2094f9790aebSLuigi Rizzo 			goto cleanup;
2095f9790aebSLuigi Rizzo 		/*
2096f9790aebSLuigi Rizzo 		 * The interface may be in !netmap mode in two cases:
2097f9790aebSLuigi Rizzo 		 * - when na is attached but not activated yet;
2098f9790aebSLuigi Rizzo 		 * - when na is being deactivated but is still attached.
2099f9790aebSLuigi Rizzo 		 */
21004bf50f18SLuigi Rizzo 		if (unlikely(!nm_netmap_on(&dst_na->up))) {
2101f9790aebSLuigi Rizzo 			ND("not in netmap mode!");
2102f9790aebSLuigi Rizzo 			goto cleanup;
2103f9790aebSLuigi Rizzo 		}
2104f9790aebSLuigi Rizzo 
2105f9790aebSLuigi Rizzo 		/* there is at least one either unicast or broadcast packet */
2106f9790aebSLuigi Rizzo 		brd_next = brddst->bq_head;
2107f9790aebSLuigi Rizzo 		next = d->bq_head;
2108f9790aebSLuigi Rizzo 		/* we need to reserve this many slots. If fewer are
2109f9790aebSLuigi Rizzo 		 * available, some packets will be dropped.
2110f9790aebSLuigi Rizzo 		 * Packets may have multiple fragments, so we may not use
2111f9790aebSLuigi Rizzo 		 * there is a chance that we may not use all of the slots
2112f9790aebSLuigi Rizzo 		 * we have claimed, so we will need to handle the leftover
2113f9790aebSLuigi Rizzo 		 * ones when we regain the lock.
2114f9790aebSLuigi Rizzo 		 */
2115f9790aebSLuigi Rizzo 		needed = d->bq_len + brddst->bq_len;
2116f9790aebSLuigi Rizzo 
211737e3a6d3SLuigi Rizzo 		if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
2118c3e9b4dbSLuiz Otavio O Souza                         if (netmap_verbose) {
211937e3a6d3SLuigi Rizzo                             RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
212037e3a6d3SLuigi Rizzo                                   dst_na->up.virt_hdr_len);
2121c3e9b4dbSLuiz Otavio O Souza                         }
2122f0ea3689SLuigi Rizzo 			/* There is a virtio-net header/offloadings mismatch between
2123f0ea3689SLuigi Rizzo 			 * source and destination. The slower mismatch datapath will
2124f0ea3689SLuigi Rizzo 			 * be used to cope with all the mismatches.
2125f0ea3689SLuigi Rizzo 			 */
2126f0ea3689SLuigi Rizzo 			virt_hdr_mismatch = 1;
2127f0ea3689SLuigi Rizzo 			if (dst_na->mfs < na->mfs) {
2128f0ea3689SLuigi Rizzo 				/* We may need to do segmentation offloadings, and so
2129f0ea3689SLuigi Rizzo 				 * we may need a number of destination slots greater
2130f0ea3689SLuigi Rizzo 				 * than the number of input slots ('needed').
2131f0ea3689SLuigi Rizzo 				 * We look for the smallest integer 'x' which satisfies:
2132f0ea3689SLuigi Rizzo 				 *	needed * na->mfs + x * H <= x * na->mfs
2133f0ea3689SLuigi Rizzo 				 * where 'H' is the length of the longest header that may
2134f0ea3689SLuigi Rizzo 				 * be replicated in the segmentation process (e.g. for
2135f0ea3689SLuigi Rizzo 				 * TCPv4 we must account for ethernet header, IP header
2136f0ea3689SLuigi Rizzo 				 * and TCPv4 header).
2137f0ea3689SLuigi Rizzo 				 */
21384f80b14cSVincenzo Maffione 				KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0"));
2139f0ea3689SLuigi Rizzo 				needed = (needed * na->mfs) /
2140f0ea3689SLuigi Rizzo 						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
2141f0ea3689SLuigi Rizzo 				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
2142f0ea3689SLuigi Rizzo 			}
2143f0ea3689SLuigi Rizzo 		}
2144f0ea3689SLuigi Rizzo 
2145f9790aebSLuigi Rizzo 		ND(5, "pass 2 dst %d is %x %s",
2146f9790aebSLuigi Rizzo 			i, d_i, is_vp ? "virtual" : "nic/host");
2147f9790aebSLuigi Rizzo 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
2148f9790aebSLuigi Rizzo 		nrings = dst_na->up.num_rx_rings;
2149f9790aebSLuigi Rizzo 		if (dst_nr >= nrings)
2150f9790aebSLuigi Rizzo 			dst_nr = dst_nr % nrings;
2151*2ff91c17SVincenzo Maffione 		kring = dst_na->up.rx_rings[dst_nr];
2152f9790aebSLuigi Rizzo 		ring = kring->ring;
21534f80b14cSVincenzo Maffione 		/* the destination ring may have not been opened for RX */
21544f80b14cSVincenzo Maffione 		if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON))
21554f80b14cSVincenzo Maffione 			goto cleanup;
2156f9790aebSLuigi Rizzo 		lim = kring->nkr_num_slots - 1;
2157f9790aebSLuigi Rizzo 
2158f9790aebSLuigi Rizzo retry:
2159f9790aebSLuigi Rizzo 
2160f0ea3689SLuigi Rizzo 		if (dst_na->retry && retry) {
2161f0ea3689SLuigi Rizzo 			/* try to get some free slot from the previous run */
2162847bf383SLuigi Rizzo 			kring->nm_notify(kring, 0);
21634bf50f18SLuigi Rizzo 			/* actually useful only for bwraps, since there
21644bf50f18SLuigi Rizzo 			 * the notify will trigger a txsync on the hwna. VALE ports
21654bf50f18SLuigi Rizzo 			 * have dst_na->retry == 0
21664bf50f18SLuigi Rizzo 			 */
2167f0ea3689SLuigi Rizzo 		}
2168f9790aebSLuigi Rizzo 		/* reserve the buffers in the queue and an entry
2169f9790aebSLuigi Rizzo 		 * to report completion, and drop lock.
2170f9790aebSLuigi Rizzo 		 * XXX this might become a helper function.
2171f9790aebSLuigi Rizzo 		 */
2172f9790aebSLuigi Rizzo 		mtx_lock(&kring->q_lock);
2173f9790aebSLuigi Rizzo 		if (kring->nkr_stopped) {
2174f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
2175f9790aebSLuigi Rizzo 			goto cleanup;
2176f9790aebSLuigi Rizzo 		}
2177f9790aebSLuigi Rizzo 		my_start = j = kring->nkr_hwlease;
2178f9790aebSLuigi Rizzo 		howmany = nm_kr_space(kring, 1);
2179f9790aebSLuigi Rizzo 		if (needed < howmany)
2180f9790aebSLuigi Rizzo 			howmany = needed;
2181f9790aebSLuigi Rizzo 		lease_idx = nm_kr_lease(kring, howmany, 1);
2182f9790aebSLuigi Rizzo 		mtx_unlock(&kring->q_lock);
2183f9790aebSLuigi Rizzo 
2184f9790aebSLuigi Rizzo 		/* only retry if we need more than available slots */
2185f9790aebSLuigi Rizzo 		if (retry && needed <= howmany)
2186f9790aebSLuigi Rizzo 			retry = 0;
2187f9790aebSLuigi Rizzo 
2188f9790aebSLuigi Rizzo 		/* copy to the destination queue */
2189f9790aebSLuigi Rizzo 		while (howmany > 0) {
2190f9790aebSLuigi Rizzo 			struct netmap_slot *slot;
2191f9790aebSLuigi Rizzo 			struct nm_bdg_fwd *ft_p, *ft_end;
2192f9790aebSLuigi Rizzo 			u_int cnt;
2193f9790aebSLuigi Rizzo 
2194f9790aebSLuigi Rizzo 			/* find the queue from which we pick next packet.
2195f9790aebSLuigi Rizzo 			 * NM_FT_NULL is always higher than valid indexes
2196f9790aebSLuigi Rizzo 			 * so we never dereference it if the other list
2197f9790aebSLuigi Rizzo 			 * has packets (and if both are empty we never
2198f9790aebSLuigi Rizzo 			 * get here).
2199f9790aebSLuigi Rizzo 			 */
2200f9790aebSLuigi Rizzo 			if (next < brd_next) {
2201f9790aebSLuigi Rizzo 				ft_p = ft + next;
2202f9790aebSLuigi Rizzo 				next = ft_p->ft_next;
2203f9790aebSLuigi Rizzo 			} else { /* insert broadcast */
2204f9790aebSLuigi Rizzo 				ft_p = ft + brd_next;
2205f9790aebSLuigi Rizzo 				brd_next = ft_p->ft_next;
2206f9790aebSLuigi Rizzo 			}
2207f9790aebSLuigi Rizzo 			cnt = ft_p->ft_frags; // cnt > 0
2208f9790aebSLuigi Rizzo 			if (unlikely(cnt > howmany))
2209f9790aebSLuigi Rizzo 			    break; /* no more space */
2210f9790aebSLuigi Rizzo 			if (netmap_verbose && cnt > 1)
2211f9790aebSLuigi Rizzo 				RD(5, "rx %d frags to %d", cnt, j);
2212f9790aebSLuigi Rizzo 			ft_end = ft_p + cnt;
2213f0ea3689SLuigi Rizzo 			if (unlikely(virt_hdr_mismatch)) {
2214f0ea3689SLuigi Rizzo 				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
2215f0ea3689SLuigi Rizzo 			} else {
2216f0ea3689SLuigi Rizzo 				howmany -= cnt;
2217f9790aebSLuigi Rizzo 				do {
2218f9790aebSLuigi Rizzo 					char *dst, *src = ft_p->ft_buf;
2219f9790aebSLuigi Rizzo 					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
2220f9790aebSLuigi Rizzo 
2221f9790aebSLuigi Rizzo 					slot = &ring->slot[j];
22224bf50f18SLuigi Rizzo 					dst = NMB(&dst_na->up, slot);
2223f9790aebSLuigi Rizzo 
222417885a7bSLuigi Rizzo 					ND("send [%d] %d(%d) bytes at %s:%d",
222517885a7bSLuigi Rizzo 							i, (int)copy_len, (int)dst_len,
222617885a7bSLuigi Rizzo 							NM_IFPNAME(dst_ifp), j);
2227f9790aebSLuigi Rizzo 					/* round to a multiple of 64 */
2228f9790aebSLuigi Rizzo 					copy_len = (copy_len + 63) & ~63;
2229f9790aebSLuigi Rizzo 
22304bf50f18SLuigi Rizzo 					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
22314bf50f18SLuigi Rizzo 						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
2232e31c6ec7SLuigi Rizzo 						RD(5, "invalid len %d, down to 64", (int)copy_len);
2233e31c6ec7SLuigi Rizzo 						copy_len = dst_len = 64; // XXX
2234e31c6ec7SLuigi Rizzo 					}
2235f9790aebSLuigi Rizzo 					if (ft_p->ft_flags & NS_INDIRECT) {
2236f9790aebSLuigi Rizzo 						if (copyin(src, dst, copy_len)) {
2237f9790aebSLuigi Rizzo 							// invalid user pointer, pretend len is 0
2238f9790aebSLuigi Rizzo 							dst_len = 0;
2239f9790aebSLuigi Rizzo 						}
2240f9790aebSLuigi Rizzo 					} else {
2241f9790aebSLuigi Rizzo 						//memcpy(dst, src, copy_len);
2242f9790aebSLuigi Rizzo 						pkt_copy(src, dst, (int)copy_len);
2243f9790aebSLuigi Rizzo 					}
2244f9790aebSLuigi Rizzo 					slot->len = dst_len;
2245f9790aebSLuigi Rizzo 					slot->flags = (cnt << 8)| NS_MOREFRAG;
2246f9790aebSLuigi Rizzo 					j = nm_next(j, lim);
2247f0ea3689SLuigi Rizzo 					needed--;
2248f9790aebSLuigi Rizzo 					ft_p++;
2249f9790aebSLuigi Rizzo 				} while (ft_p != ft_end);
2250f9790aebSLuigi Rizzo 				slot->flags = (cnt << 8); /* clear flag on last entry */
2251f0ea3689SLuigi Rizzo 			}
2252f9790aebSLuigi Rizzo 			/* are we done ? */
2253f9790aebSLuigi Rizzo 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
2254f9790aebSLuigi Rizzo 				break;
2255f9790aebSLuigi Rizzo 		}
2256f9790aebSLuigi Rizzo 		{
2257f9790aebSLuigi Rizzo 		    /* current position */
2258f9790aebSLuigi Rizzo 		    uint32_t *p = kring->nkr_leases; /* shorthand */
2259f9790aebSLuigi Rizzo 		    uint32_t update_pos;
2260f9790aebSLuigi Rizzo 		    int still_locked = 1;
2261f9790aebSLuigi Rizzo 
2262f9790aebSLuigi Rizzo 		    mtx_lock(&kring->q_lock);
2263f9790aebSLuigi Rizzo 		    if (unlikely(howmany > 0)) {
2264f9790aebSLuigi Rizzo 			/* not used all bufs. If i am the last one
2265f9790aebSLuigi Rizzo 			 * i can recover the slots, otherwise must
2266f9790aebSLuigi Rizzo 			 * fill them with 0 to mark empty packets.
2267f9790aebSLuigi Rizzo 			 */
2268f9790aebSLuigi Rizzo 			ND("leftover %d bufs", howmany);
2269f9790aebSLuigi Rizzo 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
2270f9790aebSLuigi Rizzo 			    /* yes i am the last one */
2271f9790aebSLuigi Rizzo 			    ND("roll back nkr_hwlease to %d", j);
2272f9790aebSLuigi Rizzo 			    kring->nkr_hwlease = j;
2273f9790aebSLuigi Rizzo 			} else {
2274f9790aebSLuigi Rizzo 			    while (howmany-- > 0) {
2275f9790aebSLuigi Rizzo 				ring->slot[j].len = 0;
2276f9790aebSLuigi Rizzo 				ring->slot[j].flags = 0;
2277f9790aebSLuigi Rizzo 				j = nm_next(j, lim);
2278f9790aebSLuigi Rizzo 			    }
2279f9790aebSLuigi Rizzo 			}
2280f9790aebSLuigi Rizzo 		    }
2281f9790aebSLuigi Rizzo 		    p[lease_idx] = j; /* report I am done */
2282f9790aebSLuigi Rizzo 
228317885a7bSLuigi Rizzo 		    update_pos = kring->nr_hwtail;
2284f9790aebSLuigi Rizzo 
2285f9790aebSLuigi Rizzo 		    if (my_start == update_pos) {
2286f9790aebSLuigi Rizzo 			/* all slots before my_start have been reported,
2287f9790aebSLuigi Rizzo 			 * so scan subsequent leases to see if other ranges
2288f9790aebSLuigi Rizzo 			 * have been completed, and to a selwakeup or txsync.
2289f9790aebSLuigi Rizzo 		         */
2290f9790aebSLuigi Rizzo 			while (lease_idx != kring->nkr_lease_idx &&
2291f9790aebSLuigi Rizzo 				p[lease_idx] != NR_NOSLOT) {
2292f9790aebSLuigi Rizzo 			    j = p[lease_idx];
2293f9790aebSLuigi Rizzo 			    p[lease_idx] = NR_NOSLOT;
2294f9790aebSLuigi Rizzo 			    lease_idx = nm_next(lease_idx, lim);
2295f9790aebSLuigi Rizzo 			}
2296f9790aebSLuigi Rizzo 			/* j is the new 'write' position. j != my_start
2297f9790aebSLuigi Rizzo 			 * means there are new buffers to report
2298f9790aebSLuigi Rizzo 			 */
2299f9790aebSLuigi Rizzo 			if (likely(j != my_start)) {
230017885a7bSLuigi Rizzo 				kring->nr_hwtail = j;
2301f9790aebSLuigi Rizzo 				still_locked = 0;
2302f9790aebSLuigi Rizzo 				mtx_unlock(&kring->q_lock);
2303847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
23044bf50f18SLuigi Rizzo 				/* this is netmap_notify for VALE ports and
23054bf50f18SLuigi Rizzo 				 * netmap_bwrap_notify for bwrap. The latter will
23064bf50f18SLuigi Rizzo 				 * trigger a txsync on the underlying hwna
23074bf50f18SLuigi Rizzo 				 */
23084bf50f18SLuigi Rizzo 				if (dst_na->retry && retry--) {
23094bf50f18SLuigi Rizzo 					/* XXX this is going to call nm_notify again.
23104bf50f18SLuigi Rizzo 					 * Only useful for bwrap in virtual machines
23114bf50f18SLuigi Rizzo 					 */
2312f9790aebSLuigi Rizzo 					goto retry;
2313f9790aebSLuigi Rizzo 				}
2314f9790aebSLuigi Rizzo 			}
23154bf50f18SLuigi Rizzo 		    }
2316f9790aebSLuigi Rizzo 		    if (still_locked)
2317f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
2318f9790aebSLuigi Rizzo 		}
2319f9790aebSLuigi Rizzo cleanup:
2320f9790aebSLuigi Rizzo 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
2321f9790aebSLuigi Rizzo 		d->bq_len = 0;
2322f9790aebSLuigi Rizzo 	}
2323f9790aebSLuigi Rizzo 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
2324f9790aebSLuigi Rizzo 	brddst->bq_len = 0;
2325f9790aebSLuigi Rizzo 	return 0;
2326f9790aebSLuigi Rizzo }
2327f9790aebSLuigi Rizzo 
23284bf50f18SLuigi Rizzo /* nm_txsync callback for VALE ports */
2329f9790aebSLuigi Rizzo static int
23304bf50f18SLuigi Rizzo netmap_vp_txsync(struct netmap_kring *kring, int flags)
2331f9790aebSLuigi Rizzo {
23324bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
23334bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter *)kring->na;
233417885a7bSLuigi Rizzo 	u_int done;
233517885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
2336847bf383SLuigi Rizzo 	u_int const head = kring->rhead;
2337f9790aebSLuigi Rizzo 
2338f9790aebSLuigi Rizzo 	if (bridge_batch <= 0) { /* testing only */
2339847bf383SLuigi Rizzo 		done = head; // used all
2340f9790aebSLuigi Rizzo 		goto done;
2341f9790aebSLuigi Rizzo 	}
23424bf50f18SLuigi Rizzo 	if (!na->na_bdg) {
2343847bf383SLuigi Rizzo 		done = head;
23444bf50f18SLuigi Rizzo 		goto done;
23454bf50f18SLuigi Rizzo 	}
2346f9790aebSLuigi Rizzo 	if (bridge_batch > NM_BDG_BATCH)
2347f9790aebSLuigi Rizzo 		bridge_batch = NM_BDG_BATCH;
2348f9790aebSLuigi Rizzo 
2349847bf383SLuigi Rizzo 	done = nm_bdg_preflush(kring, head);
2350f9790aebSLuigi Rizzo done:
2351847bf383SLuigi Rizzo 	if (done != head)
2352847bf383SLuigi Rizzo 		D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
235317885a7bSLuigi Rizzo 	/*
235417885a7bSLuigi Rizzo 	 * packets between 'done' and 'cur' are left unsent.
235517885a7bSLuigi Rizzo 	 */
235617885a7bSLuigi Rizzo 	kring->nr_hwcur = done;
235717885a7bSLuigi Rizzo 	kring->nr_hwtail = nm_prev(done, lim);
2358f9790aebSLuigi Rizzo 	if (netmap_verbose)
23594bf50f18SLuigi Rizzo 		D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
2360f9790aebSLuigi Rizzo 	return 0;
2361f9790aebSLuigi Rizzo }
2362f9790aebSLuigi Rizzo 
2363f9790aebSLuigi Rizzo 
23644bf50f18SLuigi Rizzo /* rxsync code used by VALE ports nm_rxsync callback and also
23654bf50f18SLuigi Rizzo  * internally by the brwap
2366f9790aebSLuigi Rizzo  */
2367f9790aebSLuigi Rizzo static int
23684bf50f18SLuigi Rizzo netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
2369f9790aebSLuigi Rizzo {
23704bf50f18SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
237117885a7bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
237217885a7bSLuigi Rizzo 	u_int nm_i, lim = kring->nkr_num_slots - 1;
2373847bf383SLuigi Rizzo 	u_int head = kring->rhead;
237417885a7bSLuigi Rizzo 	int n;
237517885a7bSLuigi Rizzo 
237617885a7bSLuigi Rizzo 	if (head > lim) {
237717885a7bSLuigi Rizzo 		D("ouch dangerous reset!!!");
237817885a7bSLuigi Rizzo 		n = netmap_ring_reinit(kring);
237917885a7bSLuigi Rizzo 		goto done;
238017885a7bSLuigi Rizzo 	}
238117885a7bSLuigi Rizzo 
238217885a7bSLuigi Rizzo 	/* First part, import newly received packets. */
238317885a7bSLuigi Rizzo 	/* actually nothing to do here, they are already in the kring */
238417885a7bSLuigi Rizzo 
238517885a7bSLuigi Rizzo 	/* Second part, skip past packets that userspace has released. */
238617885a7bSLuigi Rizzo 	nm_i = kring->nr_hwcur;
238717885a7bSLuigi Rizzo 	if (nm_i != head) {
238817885a7bSLuigi Rizzo 		/* consistency check, but nothing really important here */
238917885a7bSLuigi Rizzo 		for (n = 0; likely(nm_i != head); n++) {
239017885a7bSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[nm_i];
23914bf50f18SLuigi Rizzo 			void *addr = NMB(na, slot);
239217885a7bSLuigi Rizzo 
23934bf50f18SLuigi Rizzo 			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
239417885a7bSLuigi Rizzo 				D("bad buffer index %d, ignore ?",
239517885a7bSLuigi Rizzo 					slot->buf_idx);
239617885a7bSLuigi Rizzo 			}
239717885a7bSLuigi Rizzo 			slot->flags &= ~NS_BUF_CHANGED;
239817885a7bSLuigi Rizzo 			nm_i = nm_next(nm_i, lim);
239917885a7bSLuigi Rizzo 		}
240017885a7bSLuigi Rizzo 		kring->nr_hwcur = head;
240117885a7bSLuigi Rizzo 	}
240217885a7bSLuigi Rizzo 
240317885a7bSLuigi Rizzo 	n = 0;
240417885a7bSLuigi Rizzo done:
240517885a7bSLuigi Rizzo 	return n;
240617885a7bSLuigi Rizzo }
2407f9790aebSLuigi Rizzo 
2408f9790aebSLuigi Rizzo /*
24094bf50f18SLuigi Rizzo  * nm_rxsync callback for VALE ports
2410f9790aebSLuigi Rizzo  * user process reading from a VALE switch.
2411f9790aebSLuigi Rizzo  * Already protected against concurrent calls from userspace,
2412f9790aebSLuigi Rizzo  * but we must acquire the queue's lock to protect against
2413f9790aebSLuigi Rizzo  * writers on the same queue.
2414f9790aebSLuigi Rizzo  */
2415f9790aebSLuigi Rizzo static int
24164bf50f18SLuigi Rizzo netmap_vp_rxsync(struct netmap_kring *kring, int flags)
2417f9790aebSLuigi Rizzo {
2418f9790aebSLuigi Rizzo 	int n;
2419f9790aebSLuigi Rizzo 
2420f9790aebSLuigi Rizzo 	mtx_lock(&kring->q_lock);
24214bf50f18SLuigi Rizzo 	n = netmap_vp_rxsync_locked(kring, flags);
2422f9790aebSLuigi Rizzo 	mtx_unlock(&kring->q_lock);
2423f9790aebSLuigi Rizzo 	return n;
2424f9790aebSLuigi Rizzo }
2425f9790aebSLuigi Rizzo 
242617885a7bSLuigi Rizzo 
24274bf50f18SLuigi Rizzo /* nm_bdg_attach callback for VALE ports
24284bf50f18SLuigi Rizzo  * The na_vp port is this same netmap_adapter. There is no host port.
24294bf50f18SLuigi Rizzo  */
2430f9790aebSLuigi Rizzo static int
24314bf50f18SLuigi Rizzo netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
24324bf50f18SLuigi Rizzo {
24334bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
24344bf50f18SLuigi Rizzo 
2435*2ff91c17SVincenzo Maffione 	if (vpna->na_bdg) {
24364f80b14cSVincenzo Maffione 		return netmap_bwrap_attach(name, na);
2437*2ff91c17SVincenzo Maffione 	}
24384bf50f18SLuigi Rizzo 	na->na_vp = vpna;
24394bf50f18SLuigi Rizzo 	strncpy(na->name, name, sizeof(na->name));
24404bf50f18SLuigi Rizzo 	na->na_hostvp = NULL;
24414bf50f18SLuigi Rizzo 	return 0;
24424bf50f18SLuigi Rizzo }
24434bf50f18SLuigi Rizzo 
24444bf50f18SLuigi Rizzo /* create a netmap_vp_adapter that describes a VALE port.
24454bf50f18SLuigi Rizzo  * Only persistent VALE ports have a non-null ifp.
24464bf50f18SLuigi Rizzo  */
24474bf50f18SLuigi Rizzo static int
2448*2ff91c17SVincenzo Maffione netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
2449*2ff91c17SVincenzo Maffione 		struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret)
2450f9790aebSLuigi Rizzo {
2451*2ff91c17SVincenzo Maffione 	struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body;
2452f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
2453f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
2454c3e9b4dbSLuiz Otavio O Souza 	int error = 0;
2455f0ea3689SLuigi Rizzo 	u_int npipes = 0;
2456*2ff91c17SVincenzo Maffione 	u_int extrabufs = 0;
2457*2ff91c17SVincenzo Maffione 
2458*2ff91c17SVincenzo Maffione 	if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
2459*2ff91c17SVincenzo Maffione 		return EINVAL;
2460*2ff91c17SVincenzo Maffione 	}
2461f9790aebSLuigi Rizzo 
2462c3e9b4dbSLuiz Otavio O Souza 	vpna = nm_os_malloc(sizeof(*vpna));
2463f9790aebSLuigi Rizzo 	if (vpna == NULL)
2464f9790aebSLuigi Rizzo 		return ENOMEM;
2465f9790aebSLuigi Rizzo 
2466f9790aebSLuigi Rizzo  	na = &vpna->up;
2467f9790aebSLuigi Rizzo 
2468f9790aebSLuigi Rizzo 	na->ifp = ifp;
2469*2ff91c17SVincenzo Maffione 	strncpy(na->name, hdr->nr_name, sizeof(na->name));
2470f9790aebSLuigi Rizzo 
2471f9790aebSLuigi Rizzo 	/* bound checking */
2472*2ff91c17SVincenzo Maffione 	na->num_tx_rings = req->nr_tx_rings;
2473f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
2474*2ff91c17SVincenzo Maffione 	req->nr_tx_rings = na->num_tx_rings; /* write back */
2475*2ff91c17SVincenzo Maffione 	na->num_rx_rings = req->nr_rx_rings;
2476f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
2477*2ff91c17SVincenzo Maffione 	req->nr_rx_rings = na->num_rx_rings; /* write back */
2478*2ff91c17SVincenzo Maffione 	nm_bound_var(&req->nr_tx_slots, NM_BRIDGE_RINGSIZE,
2479f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
2480*2ff91c17SVincenzo Maffione 	na->num_tx_desc = req->nr_tx_slots;
2481*2ff91c17SVincenzo Maffione 	nm_bound_var(&req->nr_rx_slots, NM_BRIDGE_RINGSIZE,
2482f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
2483f0ea3689SLuigi Rizzo 	/* validate number of pipes. We want at least 1,
2484f0ea3689SLuigi Rizzo 	 * but probably can do with some more.
2485f0ea3689SLuigi Rizzo 	 * So let's use 2 as default (when 0 is supplied)
2486f0ea3689SLuigi Rizzo 	 */
2487f0ea3689SLuigi Rizzo 	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
2488f0ea3689SLuigi Rizzo 	/* validate extra bufs */
2489*2ff91c17SVincenzo Maffione 	nm_bound_var(&extrabufs, 0, 0,
2490f0ea3689SLuigi Rizzo 			128*NM_BDG_MAXSLOTS, NULL);
2491*2ff91c17SVincenzo Maffione 	req->nr_extra_bufs = extrabufs; /* write back */
2492*2ff91c17SVincenzo Maffione 	na->num_rx_desc = req->nr_rx_slots;
24934f80b14cSVincenzo Maffione 	/* Set the mfs to a default value, as it is needed on the VALE
24944f80b14cSVincenzo Maffione 	 * mismatch datapath. XXX We should set it according to the MTU
24954f80b14cSVincenzo Maffione 	 * known to the kernel. */
24964f80b14cSVincenzo Maffione 	vpna->mfs = NM_BDG_MFS_DEFAULT;
2497847bf383SLuigi Rizzo 	vpna->last_smac = ~0llu;
2498f0ea3689SLuigi Rizzo 	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
2499f0ea3689SLuigi Rizzo 		vpna->mfs = netmap_buf_size; */
2500f0ea3689SLuigi Rizzo         if (netmap_verbose)
2501f0ea3689SLuigi Rizzo 		D("max frame size %u", vpna->mfs);
2502f9790aebSLuigi Rizzo 
2503847bf383SLuigi Rizzo 	na->na_flags |= NAF_BDG_MAYSLEEP;
250410b8ef3dSLuigi Rizzo 	/* persistent VALE ports look like hw devices
250510b8ef3dSLuigi Rizzo 	 * with a native netmap adapter
250610b8ef3dSLuigi Rizzo 	 */
250710b8ef3dSLuigi Rizzo 	if (ifp)
250810b8ef3dSLuigi Rizzo 		na->na_flags |= NAF_NATIVE;
25094bf50f18SLuigi Rizzo 	na->nm_txsync = netmap_vp_txsync;
25104bf50f18SLuigi Rizzo 	na->nm_rxsync = netmap_vp_rxsync;
25114bf50f18SLuigi Rizzo 	na->nm_register = netmap_vp_reg;
2512f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_vp_krings_create;
2513f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_vp_krings_delete;
25144bf50f18SLuigi Rizzo 	na->nm_dtor = netmap_vp_dtor;
2515*2ff91c17SVincenzo Maffione 	ND("nr_mem_id %d", req->nr_mem_id);
2516c3e9b4dbSLuiz Otavio O Souza 	na->nm_mem = nmd ?
2517c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_get(nmd):
2518c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_private_new(
2519f9790aebSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
2520f0ea3689SLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc,
2521*2ff91c17SVincenzo Maffione 			req->nr_extra_bufs, npipes, &error);
2522f0ea3689SLuigi Rizzo 	if (na->nm_mem == NULL)
2523f0ea3689SLuigi Rizzo 		goto err;
25244bf50f18SLuigi Rizzo 	na->nm_bdg_attach = netmap_vp_bdg_attach;
2525f9790aebSLuigi Rizzo 	/* other nmd fields are set in the common routine */
2526f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
2527f0ea3689SLuigi Rizzo 	if (error)
2528f0ea3689SLuigi Rizzo 		goto err;
25294bf50f18SLuigi Rizzo 	*ret = vpna;
2530f0ea3689SLuigi Rizzo 	return 0;
2531f0ea3689SLuigi Rizzo 
2532f0ea3689SLuigi Rizzo err:
2533f0ea3689SLuigi Rizzo 	if (na->nm_mem != NULL)
2534c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_put(na->nm_mem);
2535c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(vpna);
2536f9790aebSLuigi Rizzo 	return error;
2537f9790aebSLuigi Rizzo }
2538f9790aebSLuigi Rizzo 
25394bf50f18SLuigi Rizzo /* Bridge wrapper code (bwrap).
25404bf50f18SLuigi Rizzo  * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
25414bf50f18SLuigi Rizzo  * VALE switch.
25424bf50f18SLuigi Rizzo  * The main task is to swap the meaning of tx and rx rings to match the
25434bf50f18SLuigi Rizzo  * expectations of the VALE switch code (see nm_bdg_flush).
25444bf50f18SLuigi Rizzo  *
25454bf50f18SLuigi Rizzo  * The bwrap works by interposing a netmap_bwrap_adapter between the
25464bf50f18SLuigi Rizzo  * rest of the system and the hwna. The netmap_bwrap_adapter looks like
25474bf50f18SLuigi Rizzo  * a netmap_vp_adapter to the rest the system, but, internally, it
25484bf50f18SLuigi Rizzo  * translates all callbacks to what the hwna expects.
25494bf50f18SLuigi Rizzo  *
25504bf50f18SLuigi Rizzo  * Note that we have to intercept callbacks coming from two sides:
25514bf50f18SLuigi Rizzo  *
25524bf50f18SLuigi Rizzo  *  - callbacks coming from the netmap module are intercepted by
25534bf50f18SLuigi Rizzo  *    passing around the netmap_bwrap_adapter instead of the hwna
25544bf50f18SLuigi Rizzo  *
25554bf50f18SLuigi Rizzo  *  - callbacks coming from outside of the netmap module only know
25564bf50f18SLuigi Rizzo  *    about the hwna. This, however, only happens in interrupt
25574bf50f18SLuigi Rizzo  *    handlers, where only the hwna->nm_notify callback is called.
25584bf50f18SLuigi Rizzo  *    What the bwrap does is to overwrite the hwna->nm_notify callback
25594bf50f18SLuigi Rizzo  *    with its own netmap_bwrap_intr_notify.
25604bf50f18SLuigi Rizzo  *    XXX This assumes that the hwna->nm_notify callback was the
25614bf50f18SLuigi Rizzo  *    standard netmap_notify(), as it is the case for nic adapters.
25624bf50f18SLuigi Rizzo  *    Any additional action performed by hwna->nm_notify will not be
25634bf50f18SLuigi Rizzo  *    performed by netmap_bwrap_intr_notify.
25644bf50f18SLuigi Rizzo  *
25654bf50f18SLuigi Rizzo  * Additionally, the bwrap can optionally attach the host rings pair
25664bf50f18SLuigi Rizzo  * of the wrapped adapter to a different port of the switch.
25674bf50f18SLuigi Rizzo  */
25684bf50f18SLuigi Rizzo 
256917885a7bSLuigi Rizzo 
2570f9790aebSLuigi Rizzo static void
2571f9790aebSLuigi Rizzo netmap_bwrap_dtor(struct netmap_adapter *na)
2572f9790aebSLuigi Rizzo {
2573f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
2574f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
257537e3a6d3SLuigi Rizzo 	struct nm_bridge *b = bna->up.na_bdg,
257637e3a6d3SLuigi Rizzo 		*bh = bna->host.na_bdg;
257737e3a6d3SLuigi Rizzo 
25784f80b14cSVincenzo Maffione 	if (bna->host.up.nm_mem)
2579c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_put(bna->host.up.nm_mem);
2580c3e9b4dbSLuiz Otavio O Souza 
258137e3a6d3SLuigi Rizzo 	if (b) {
258237e3a6d3SLuigi Rizzo 		netmap_bdg_detach_common(b, bna->up.bdg_port,
258337e3a6d3SLuigi Rizzo 			    (bh ? bna->host.bdg_port : -1));
258437e3a6d3SLuigi Rizzo 	}
2585f9790aebSLuigi Rizzo 
2586f9790aebSLuigi Rizzo 	ND("na %p", na);
2587f9790aebSLuigi Rizzo 	na->ifp = NULL;
25884bf50f18SLuigi Rizzo 	bna->host.up.ifp = NULL;
2589*2ff91c17SVincenzo Maffione 	hwna->na_vp = bna->saved_na_vp;
2590*2ff91c17SVincenzo Maffione 	hwna->na_hostvp = NULL;
25914bf50f18SLuigi Rizzo 	hwna->na_private = NULL;
25924bf50f18SLuigi Rizzo 	hwna->na_flags &= ~NAF_BUSY;
25934bf50f18SLuigi Rizzo 	netmap_adapter_put(hwna);
2594f9790aebSLuigi Rizzo 
2595f9790aebSLuigi Rizzo }
2596f9790aebSLuigi Rizzo 
259717885a7bSLuigi Rizzo 
2598f9790aebSLuigi Rizzo /*
259917885a7bSLuigi Rizzo  * Intr callback for NICs connected to a bridge.
260017885a7bSLuigi Rizzo  * Simply ignore tx interrupts (maybe we could try to recover space ?)
260117885a7bSLuigi Rizzo  * and pass received packets from nic to the bridge.
260217885a7bSLuigi Rizzo  *
2603f9790aebSLuigi Rizzo  * XXX TODO check locking: this is called from the interrupt
2604f9790aebSLuigi Rizzo  * handler so we should make sure that the interface is not
2605f9790aebSLuigi Rizzo  * disconnected while passing down an interrupt.
2606f9790aebSLuigi Rizzo  *
260717885a7bSLuigi Rizzo  * Note, no user process can access this NIC or the host stack.
260817885a7bSLuigi Rizzo  * The only part of the ring that is significant are the slots,
260917885a7bSLuigi Rizzo  * and head/cur/tail are set from the kring as needed
261017885a7bSLuigi Rizzo  * (part as a receive ring, part as a transmit ring).
261117885a7bSLuigi Rizzo  *
261217885a7bSLuigi Rizzo  * callback that overwrites the hwna notify callback.
261337e3a6d3SLuigi Rizzo  * Packets come from the outside or from the host stack and are put on an
261437e3a6d3SLuigi Rizzo  * hwna rx ring.
2615f9790aebSLuigi Rizzo  * The bridge wrapper then sends the packets through the bridge.
2616f9790aebSLuigi Rizzo  */
2617f9790aebSLuigi Rizzo static int
2618847bf383SLuigi Rizzo netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
2619f9790aebSLuigi Rizzo {
2620847bf383SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
2621f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
2622847bf383SLuigi Rizzo 	struct netmap_kring *bkring;
2623f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = &bna->up;
2624847bf383SLuigi Rizzo 	u_int ring_nr = kring->ring_id;
262537e3a6d3SLuigi Rizzo 	int ret = NM_IRQ_COMPLETED;
262637e3a6d3SLuigi Rizzo 	int error;
2627f9790aebSLuigi Rizzo 
262817885a7bSLuigi Rizzo 	if (netmap_verbose)
2629847bf383SLuigi Rizzo 	    D("%s %s 0x%x", na->name, kring->name, flags);
2630f9790aebSLuigi Rizzo 
2631*2ff91c17SVincenzo Maffione 	bkring = vpna->up.tx_rings[ring_nr];
2632f9790aebSLuigi Rizzo 
2633f9790aebSLuigi Rizzo 	/* make sure the ring is not disabled */
263437e3a6d3SLuigi Rizzo 	if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
263537e3a6d3SLuigi Rizzo 		return EIO;
263637e3a6d3SLuigi Rizzo 	}
2637f9790aebSLuigi Rizzo 
263817885a7bSLuigi Rizzo 	if (netmap_verbose)
2639847bf383SLuigi Rizzo 	    D("%s head %d cur %d tail %d",  na->name,
264017885a7bSLuigi Rizzo 		kring->rhead, kring->rcur, kring->rtail);
264117885a7bSLuigi Rizzo 
2642847bf383SLuigi Rizzo 	/* simulate a user wakeup on the rx ring
2643847bf383SLuigi Rizzo 	 * fetch packets that have arrived.
2644f9790aebSLuigi Rizzo 	 */
2645f0ea3689SLuigi Rizzo 	error = kring->nm_sync(kring, 0);
2646f9790aebSLuigi Rizzo 	if (error)
2647f9790aebSLuigi Rizzo 		goto put_out;
264837e3a6d3SLuigi Rizzo 	if (kring->nr_hwcur == kring->nr_hwtail) {
264937e3a6d3SLuigi Rizzo 		if (netmap_verbose)
2650f9790aebSLuigi Rizzo 			D("how strange, interrupt with no packets on %s",
26514bf50f18SLuigi Rizzo 			    na->name);
2652f9790aebSLuigi Rizzo 		goto put_out;
2653f9790aebSLuigi Rizzo 	}
265417885a7bSLuigi Rizzo 
2655847bf383SLuigi Rizzo 	/* new packets are kring->rcur to kring->nr_hwtail, and the bkring
2656847bf383SLuigi Rizzo 	 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
265717885a7bSLuigi Rizzo 	 * to push all packets out.
265817885a7bSLuigi Rizzo 	 */
2659847bf383SLuigi Rizzo 	bkring->rhead = bkring->rcur = kring->nr_hwtail;
266017885a7bSLuigi Rizzo 
26614bf50f18SLuigi Rizzo 	netmap_vp_txsync(bkring, flags);
2662f9790aebSLuigi Rizzo 
266317885a7bSLuigi Rizzo 	/* mark all buffers as released on this ring */
2664847bf383SLuigi Rizzo 	kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
266517885a7bSLuigi Rizzo 	/* another call to actually release the buffers */
2666f0ea3689SLuigi Rizzo 	error = kring->nm_sync(kring, 0);
2667f9790aebSLuigi Rizzo 
266837e3a6d3SLuigi Rizzo 	/* The second rxsync may have further advanced hwtail. If this happens,
266937e3a6d3SLuigi Rizzo 	 *  return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
267037e3a6d3SLuigi Rizzo 	if (kring->rcur != kring->nr_hwtail) {
267137e3a6d3SLuigi Rizzo 		ret = NM_IRQ_RESCHED;
267237e3a6d3SLuigi Rizzo 	}
2673f9790aebSLuigi Rizzo put_out:
2674f9790aebSLuigi Rizzo 	nm_kr_put(kring);
267537e3a6d3SLuigi Rizzo 
267637e3a6d3SLuigi Rizzo 	return error ? error : ret;
2677f9790aebSLuigi Rizzo }
2678f9790aebSLuigi Rizzo 
267917885a7bSLuigi Rizzo 
26804bf50f18SLuigi Rizzo /* nm_register callback for bwrap */
2681f9790aebSLuigi Rizzo static int
268237e3a6d3SLuigi Rizzo netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
2683f9790aebSLuigi Rizzo {
2684f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2685f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2686f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2687f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *hostna = &bna->host;
268837e3a6d3SLuigi Rizzo 	int error, i;
2689847bf383SLuigi Rizzo 	enum txrx t;
2690f9790aebSLuigi Rizzo 
26914bf50f18SLuigi Rizzo 	ND("%s %s", na->name, onoff ? "on" : "off");
2692f9790aebSLuigi Rizzo 
2693f9790aebSLuigi Rizzo 	if (onoff) {
26944bf50f18SLuigi Rizzo 		/* netmap_do_regif has been called on the bwrap na.
26954bf50f18SLuigi Rizzo 		 * We need to pass the information about the
26964bf50f18SLuigi Rizzo 		 * memory allocator down to the hwna before
26974bf50f18SLuigi Rizzo 		 * putting it in netmap mode
26984bf50f18SLuigi Rizzo 		 */
2699f9790aebSLuigi Rizzo 		hwna->na_lut = na->na_lut;
2700f9790aebSLuigi Rizzo 
2701f9790aebSLuigi Rizzo 		if (hostna->na_bdg) {
27024bf50f18SLuigi Rizzo 			/* if the host rings have been attached to switch,
27034bf50f18SLuigi Rizzo 			 * we need to copy the memory allocator information
27044bf50f18SLuigi Rizzo 			 * in the hostna also
27054bf50f18SLuigi Rizzo 			 */
2706f9790aebSLuigi Rizzo 			hostna->up.na_lut = na->na_lut;
2707f9790aebSLuigi Rizzo 		}
2708f9790aebSLuigi Rizzo 
270937e3a6d3SLuigi Rizzo 	}
271037e3a6d3SLuigi Rizzo 
271137e3a6d3SLuigi Rizzo 	/* pass down the pending ring state information */
271237e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
271337e3a6d3SLuigi Rizzo 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
2714*2ff91c17SVincenzo Maffione 			NMR(hwna, t)[i]->nr_pending_mode =
2715*2ff91c17SVincenzo Maffione 				NMR(na, t)[i]->nr_pending_mode;
2716f9790aebSLuigi Rizzo 	}
2717f9790aebSLuigi Rizzo 
27184bf50f18SLuigi Rizzo 	/* forward the request to the hwna */
2719f9790aebSLuigi Rizzo 	error = hwna->nm_register(hwna, onoff);
2720f9790aebSLuigi Rizzo 	if (error)
2721f9790aebSLuigi Rizzo 		return error;
2722f9790aebSLuigi Rizzo 
272337e3a6d3SLuigi Rizzo 	/* copy up the current ring state information */
272437e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
27254f80b14cSVincenzo Maffione 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
2726*2ff91c17SVincenzo Maffione 			struct netmap_kring *kring = NMR(hwna, t)[i];
2727*2ff91c17SVincenzo Maffione 			NMR(na, t)[i]->nr_mode = kring->nr_mode;
27284f80b14cSVincenzo Maffione 		}
272937e3a6d3SLuigi Rizzo 	}
273037e3a6d3SLuigi Rizzo 
27314bf50f18SLuigi Rizzo 	/* impersonate a netmap_vp_adapter */
27324bf50f18SLuigi Rizzo 	netmap_vp_reg(na, onoff);
27334bf50f18SLuigi Rizzo 	if (hostna->na_bdg)
27344bf50f18SLuigi Rizzo 		netmap_vp_reg(&hostna->up, onoff);
2735f9790aebSLuigi Rizzo 
2736f9790aebSLuigi Rizzo 	if (onoff) {
2737847bf383SLuigi Rizzo 		u_int i;
2738847bf383SLuigi Rizzo 		/* intercept the hwna nm_nofify callback on the hw rings */
2739847bf383SLuigi Rizzo 		for (i = 0; i < hwna->num_rx_rings; i++) {
2740*2ff91c17SVincenzo Maffione 			hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
2741*2ff91c17SVincenzo Maffione 			hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
2742847bf383SLuigi Rizzo 		}
2743847bf383SLuigi Rizzo 		i = hwna->num_rx_rings; /* for safety */
2744847bf383SLuigi Rizzo 		/* save the host ring notify unconditionally */
2745*2ff91c17SVincenzo Maffione 		hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
2746847bf383SLuigi Rizzo 		if (hostna->na_bdg) {
2747847bf383SLuigi Rizzo 			/* also intercept the host ring notify */
2748*2ff91c17SVincenzo Maffione 			hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
2749847bf383SLuigi Rizzo 		}
275037e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
275137e3a6d3SLuigi Rizzo 			na->na_flags |= NAF_NETMAP_ON;
2752f9790aebSLuigi Rizzo 	} else {
2753847bf383SLuigi Rizzo 		u_int i;
275437e3a6d3SLuigi Rizzo 
275537e3a6d3SLuigi Rizzo 		if (na->active_fds == 0)
275637e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_NETMAP_ON;
275737e3a6d3SLuigi Rizzo 
2758847bf383SLuigi Rizzo 		/* reset all notify callbacks (including host ring) */
2759847bf383SLuigi Rizzo 		for (i = 0; i <= hwna->num_rx_rings; i++) {
2760*2ff91c17SVincenzo Maffione 			hwna->rx_rings[i]->nm_notify = hwna->rx_rings[i]->save_notify;
2761*2ff91c17SVincenzo Maffione 			hwna->rx_rings[i]->save_notify = NULL;
2762847bf383SLuigi Rizzo 		}
2763847bf383SLuigi Rizzo 		hwna->na_lut.lut = NULL;
2764*2ff91c17SVincenzo Maffione 		hwna->na_lut.plut = NULL;
2765847bf383SLuigi Rizzo 		hwna->na_lut.objtotal = 0;
2766847bf383SLuigi Rizzo 		hwna->na_lut.objsize = 0;
27674f80b14cSVincenzo Maffione 
27684f80b14cSVincenzo Maffione 		/* pass ownership of the netmap rings to the hwna */
27694f80b14cSVincenzo Maffione 		for_rx_tx(t) {
27704f80b14cSVincenzo Maffione 			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
2771*2ff91c17SVincenzo Maffione 				NMR(na, t)[i]->ring = NULL;
27724f80b14cSVincenzo Maffione 			}
27734f80b14cSVincenzo Maffione 		}
27744f80b14cSVincenzo Maffione 
2775f9790aebSLuigi Rizzo 	}
2776f9790aebSLuigi Rizzo 
2777f9790aebSLuigi Rizzo 	return 0;
2778f9790aebSLuigi Rizzo }
2779f9790aebSLuigi Rizzo 
27804bf50f18SLuigi Rizzo /* nm_config callback for bwrap */
2781f9790aebSLuigi Rizzo static int
2782*2ff91c17SVincenzo Maffione netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
2783f9790aebSLuigi Rizzo {
2784f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2785f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2786f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2787f9790aebSLuigi Rizzo 
2788f9790aebSLuigi Rizzo 	/* forward the request */
2789f9790aebSLuigi Rizzo 	netmap_update_config(hwna);
2790*2ff91c17SVincenzo Maffione 	/* swap the results and propagate */
2791*2ff91c17SVincenzo Maffione 	info->num_tx_rings = hwna->num_rx_rings;
2792*2ff91c17SVincenzo Maffione 	info->num_tx_descs = hwna->num_rx_desc;
2793*2ff91c17SVincenzo Maffione 	info->num_rx_rings = hwna->num_tx_rings;
2794*2ff91c17SVincenzo Maffione 	info->num_rx_descs = hwna->num_tx_desc;
2795*2ff91c17SVincenzo Maffione 	info->rx_buf_maxsize = hwna->rx_buf_maxsize;
2796f9790aebSLuigi Rizzo 
2797f9790aebSLuigi Rizzo 	return 0;
2798f9790aebSLuigi Rizzo }
2799f9790aebSLuigi Rizzo 
280017885a7bSLuigi Rizzo 
28014bf50f18SLuigi Rizzo /* nm_krings_create callback for bwrap */
2802f9790aebSLuigi Rizzo static int
2803f9790aebSLuigi Rizzo netmap_bwrap_krings_create(struct netmap_adapter *na)
2804f9790aebSLuigi Rizzo {
2805f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2806f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2807f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
28084f80b14cSVincenzo Maffione 	struct netmap_adapter *hostna = &bna->host.up;
280937e3a6d3SLuigi Rizzo 	int i, error = 0;
281037e3a6d3SLuigi Rizzo 	enum txrx t;
2811f9790aebSLuigi Rizzo 
28124bf50f18SLuigi Rizzo 	ND("%s", na->name);
2813f9790aebSLuigi Rizzo 
28144bf50f18SLuigi Rizzo 	/* impersonate a netmap_vp_adapter */
2815f9790aebSLuigi Rizzo 	error = netmap_vp_krings_create(na);
2816f9790aebSLuigi Rizzo 	if (error)
2817f9790aebSLuigi Rizzo 		return error;
2818f9790aebSLuigi Rizzo 
28194bf50f18SLuigi Rizzo 	/* also create the hwna krings */
2820f9790aebSLuigi Rizzo 	error = hwna->nm_krings_create(hwna);
2821f9790aebSLuigi Rizzo 	if (error) {
282237e3a6d3SLuigi Rizzo 		goto err_del_vp_rings;
2823f9790aebSLuigi Rizzo 	}
2824f9790aebSLuigi Rizzo 
28254f80b14cSVincenzo Maffione 	/* increment the usage counter for all the hwna krings */
28264f80b14cSVincenzo Maffione         for_rx_tx(t) {
28274f80b14cSVincenzo Maffione                 for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
2828*2ff91c17SVincenzo Maffione 			NMR(hwna, t)[i]->users++;
28294f80b14cSVincenzo Maffione 		}
28304f80b14cSVincenzo Maffione         }
28314f80b14cSVincenzo Maffione 
28324f80b14cSVincenzo Maffione 	/* now create the actual rings */
28334f80b14cSVincenzo Maffione 	error = netmap_mem_rings_create(hwna);
28344f80b14cSVincenzo Maffione 	if (error) {
28354f80b14cSVincenzo Maffione 		goto err_dec_users;
28364f80b14cSVincenzo Maffione 	}
28374f80b14cSVincenzo Maffione 
28384f80b14cSVincenzo Maffione 	/* cross-link the netmap rings
28394f80b14cSVincenzo Maffione 	 * The original number of rings comes from hwna,
28404f80b14cSVincenzo Maffione 	 * rx rings on one side equals tx rings on the other.
28414f80b14cSVincenzo Maffione 	 */
284237e3a6d3SLuigi Rizzo         for_rx_tx(t) {
284337e3a6d3SLuigi Rizzo                 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
284437e3a6d3SLuigi Rizzo                 for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
2845*2ff91c17SVincenzo Maffione                         NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
2846*2ff91c17SVincenzo Maffione                         NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
284737e3a6d3SLuigi Rizzo                 }
2848f0ea3689SLuigi Rizzo         }
2849f9790aebSLuigi Rizzo 
28504f80b14cSVincenzo Maffione 	if (na->na_flags & NAF_HOST_RINGS) {
28514f80b14cSVincenzo Maffione 		/* the hostna rings are the host rings of the bwrap.
28524f80b14cSVincenzo Maffione 		 * The corresponding krings must point back to the
28534f80b14cSVincenzo Maffione 		 * hostna
28544f80b14cSVincenzo Maffione 		 */
28554f80b14cSVincenzo Maffione 		hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
2856*2ff91c17SVincenzo Maffione 		hostna->tx_rings[0]->na = hostna;
28574f80b14cSVincenzo Maffione 		hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
2858*2ff91c17SVincenzo Maffione 		hostna->rx_rings[0]->na = hostna;
28594f80b14cSVincenzo Maffione 	}
28604f80b14cSVincenzo Maffione 
2861f9790aebSLuigi Rizzo 	return 0;
286237e3a6d3SLuigi Rizzo 
28634f80b14cSVincenzo Maffione err_dec_users:
28644f80b14cSVincenzo Maffione         for_rx_tx(t) {
2865*2ff91c17SVincenzo Maffione 		NMR(hwna, t)[i]->users--;
28664f80b14cSVincenzo Maffione         }
28674f80b14cSVincenzo Maffione 	hwna->nm_krings_delete(hwna);
286837e3a6d3SLuigi Rizzo err_del_vp_rings:
286937e3a6d3SLuigi Rizzo 	netmap_vp_krings_delete(na);
287037e3a6d3SLuigi Rizzo 
287137e3a6d3SLuigi Rizzo 	return error;
2872f9790aebSLuigi Rizzo }
2873f9790aebSLuigi Rizzo 
287417885a7bSLuigi Rizzo 
2875f9790aebSLuigi Rizzo static void
2876f9790aebSLuigi Rizzo netmap_bwrap_krings_delete(struct netmap_adapter *na)
2877f9790aebSLuigi Rizzo {
2878f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
2879f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
2880f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
28814f80b14cSVincenzo Maffione 	enum txrx t;
28824f80b14cSVincenzo Maffione 	int i;
2883f9790aebSLuigi Rizzo 
28844bf50f18SLuigi Rizzo 	ND("%s", na->name);
2885f9790aebSLuigi Rizzo 
28864f80b14cSVincenzo Maffione 	/* decrement the usage counter for all the hwna krings */
28874f80b14cSVincenzo Maffione         for_rx_tx(t) {
28884f80b14cSVincenzo Maffione                 for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
2889*2ff91c17SVincenzo Maffione 			NMR(hwna, t)[i]->users--;
28904f80b14cSVincenzo Maffione 		}
28914f80b14cSVincenzo Maffione         }
28924f80b14cSVincenzo Maffione 
28934f80b14cSVincenzo Maffione 	/* delete any netmap rings that are no longer needed */
28944f80b14cSVincenzo Maffione 	netmap_mem_rings_delete(hwna);
2895f9790aebSLuigi Rizzo 	hwna->nm_krings_delete(hwna);
2896f9790aebSLuigi Rizzo 	netmap_vp_krings_delete(na);
2897f9790aebSLuigi Rizzo }
2898f9790aebSLuigi Rizzo 
289917885a7bSLuigi Rizzo 
2900f9790aebSLuigi Rizzo /* notify method for the bridge-->hwna direction */
2901f9790aebSLuigi Rizzo static int
2902847bf383SLuigi Rizzo netmap_bwrap_notify(struct netmap_kring *kring, int flags)
2903f9790aebSLuigi Rizzo {
2904847bf383SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
2905847bf383SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
2906f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
2907847bf383SLuigi Rizzo 	u_int ring_n = kring->ring_id;
2908847bf383SLuigi Rizzo 	u_int lim = kring->nkr_num_slots - 1;
2909847bf383SLuigi Rizzo 	struct netmap_kring *hw_kring;
291037e3a6d3SLuigi Rizzo 	int error;
2911f9790aebSLuigi Rizzo 
2912847bf383SLuigi Rizzo 	ND("%s: na %s hwna %s",
2913847bf383SLuigi Rizzo 			(kring ? kring->name : "NULL!"),
2914847bf383SLuigi Rizzo 			(na ? na->name : "NULL!"),
2915847bf383SLuigi Rizzo 			(hwna ? hwna->name : "NULL!"));
2916*2ff91c17SVincenzo Maffione 	hw_kring = hwna->tx_rings[ring_n];
2917847bf383SLuigi Rizzo 
291837e3a6d3SLuigi Rizzo 	if (nm_kr_tryget(hw_kring, 0, NULL)) {
291937e3a6d3SLuigi Rizzo 		return ENXIO;
292037e3a6d3SLuigi Rizzo 	}
2921f9790aebSLuigi Rizzo 
292217885a7bSLuigi Rizzo 	/* first step: simulate a user wakeup on the rx ring */
2923847bf383SLuigi Rizzo 	netmap_vp_rxsync(kring, flags);
292417885a7bSLuigi Rizzo 	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
29254bf50f18SLuigi Rizzo 		na->name, ring_n,
292617885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
292717885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
292817885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
2929847bf383SLuigi Rizzo 	/* second step: the new packets are sent on the tx ring
293017885a7bSLuigi Rizzo 	 * (which is actually the same ring)
293117885a7bSLuigi Rizzo 	 */
2932847bf383SLuigi Rizzo 	hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
2933f0ea3689SLuigi Rizzo 	error = hw_kring->nm_sync(hw_kring, flags);
2934847bf383SLuigi Rizzo 	if (error)
293537e3a6d3SLuigi Rizzo 		goto put_out;
293617885a7bSLuigi Rizzo 
2937847bf383SLuigi Rizzo 	/* third step: now we are back the rx ring */
293817885a7bSLuigi Rizzo 	/* claim ownership on all hw owned bufs */
2939847bf383SLuigi Rizzo 	kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
294017885a7bSLuigi Rizzo 
2941847bf383SLuigi Rizzo 	/* fourth step: the user goes to sleep again, causing another rxsync */
2942847bf383SLuigi Rizzo 	netmap_vp_rxsync(kring, flags);
294317885a7bSLuigi Rizzo 	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
29444bf50f18SLuigi Rizzo 		na->name, ring_n,
294517885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
294617885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail,
294717885a7bSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
294837e3a6d3SLuigi Rizzo put_out:
2949847bf383SLuigi Rizzo 	nm_kr_put(hw_kring);
295037e3a6d3SLuigi Rizzo 
295137e3a6d3SLuigi Rizzo 	return error ? error : NM_IRQ_COMPLETED;
2952f9790aebSLuigi Rizzo }
2953f9790aebSLuigi Rizzo 
295417885a7bSLuigi Rizzo 
29554bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for the bwrap.
29564bf50f18SLuigi Rizzo  * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
29574bf50f18SLuigi Rizzo  * On attach, it needs to provide a fake netmap_priv_d structure and
29584bf50f18SLuigi Rizzo  * perform a netmap_do_regif() on the bwrap. This will put both the
29594bf50f18SLuigi Rizzo  * bwrap and the hwna in netmap mode, with the netmap rings shared
29604bf50f18SLuigi Rizzo  * and cross linked. Moroever, it will start intercepting interrupts
29614bf50f18SLuigi Rizzo  * directed to hwna.
29624bf50f18SLuigi Rizzo  */
2963f9790aebSLuigi Rizzo static int
2964*2ff91c17SVincenzo Maffione netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
29654bf50f18SLuigi Rizzo {
29664bf50f18SLuigi Rizzo 	struct netmap_priv_d *npriv;
29674bf50f18SLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
29684bf50f18SLuigi Rizzo 	int error = 0;
29694bf50f18SLuigi Rizzo 
2970*2ff91c17SVincenzo Maffione 	if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
2971*2ff91c17SVincenzo Maffione 		struct nmreq_vale_attach *req =
2972*2ff91c17SVincenzo Maffione 			(struct nmreq_vale_attach *)hdr->nr_body;
2973*2ff91c17SVincenzo Maffione 		if (req->reg.nr_ringid != 0 ||
2974*2ff91c17SVincenzo Maffione 			(req->reg.nr_mode != NR_REG_ALL_NIC &&
2975*2ff91c17SVincenzo Maffione 				req->reg.nr_mode != NR_REG_NIC_SW)) {
2976*2ff91c17SVincenzo Maffione 			/* We only support attaching all the NIC rings
2977*2ff91c17SVincenzo Maffione 			 * and/or the host stack. */
2978*2ff91c17SVincenzo Maffione 			return EINVAL;
2979*2ff91c17SVincenzo Maffione 		}
29804bf50f18SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(na)) {
29814bf50f18SLuigi Rizzo 			return EBUSY;
29824bf50f18SLuigi Rizzo 		}
29834bf50f18SLuigi Rizzo 		if (bna->na_kpriv) {
29844bf50f18SLuigi Rizzo 			/* nothing to do */
29854bf50f18SLuigi Rizzo 			return 0;
29864bf50f18SLuigi Rizzo 		}
298737e3a6d3SLuigi Rizzo 		npriv = netmap_priv_new();
29884bf50f18SLuigi Rizzo 		if (npriv == NULL)
29894bf50f18SLuigi Rizzo 			return ENOMEM;
299037e3a6d3SLuigi Rizzo 		npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
2991*2ff91c17SVincenzo Maffione 		error = netmap_do_regif(npriv, na, req->reg.nr_mode,
2992*2ff91c17SVincenzo Maffione 					req->reg.nr_ringid, req->reg.nr_flags);
2993847bf383SLuigi Rizzo 		if (error) {
299437e3a6d3SLuigi Rizzo 			netmap_priv_delete(npriv);
29954bf50f18SLuigi Rizzo 			return error;
29964bf50f18SLuigi Rizzo 		}
29974bf50f18SLuigi Rizzo 		bna->na_kpriv = npriv;
29984bf50f18SLuigi Rizzo 		na->na_flags |= NAF_BUSY;
29994bf50f18SLuigi Rizzo 	} else {
30004bf50f18SLuigi Rizzo 		if (na->active_fds == 0) /* not registered */
30014bf50f18SLuigi Rizzo 			return EINVAL;
300237e3a6d3SLuigi Rizzo 		netmap_priv_delete(bna->na_kpriv);
30034bf50f18SLuigi Rizzo 		bna->na_kpriv = NULL;
30044bf50f18SLuigi Rizzo 		na->na_flags &= ~NAF_BUSY;
30054bf50f18SLuigi Rizzo 	}
30064bf50f18SLuigi Rizzo 
3007*2ff91c17SVincenzo Maffione 	return error;
30084bf50f18SLuigi Rizzo }
30094bf50f18SLuigi Rizzo 
30104bf50f18SLuigi Rizzo /* attach a bridge wrapper to the 'real' device */
30114bf50f18SLuigi Rizzo int
30124bf50f18SLuigi Rizzo netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
3013f9790aebSLuigi Rizzo {
3014f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
30154bf50f18SLuigi Rizzo 	struct netmap_adapter *na = NULL;
30164bf50f18SLuigi Rizzo 	struct netmap_adapter *hostna = NULL;
30174bf50f18SLuigi Rizzo 	int error = 0;
3018847bf383SLuigi Rizzo 	enum txrx t;
3019f9790aebSLuigi Rizzo 
30204bf50f18SLuigi Rizzo 	/* make sure the NIC is not already in use */
30214bf50f18SLuigi Rizzo 	if (NETMAP_OWNED_BY_ANY(hwna)) {
30224bf50f18SLuigi Rizzo 		D("NIC %s busy, cannot attach to bridge", hwna->name);
30234bf50f18SLuigi Rizzo 		return EBUSY;
30244bf50f18SLuigi Rizzo 	}
3025f9790aebSLuigi Rizzo 
3026c3e9b4dbSLuiz Otavio O Souza 	bna = nm_os_malloc(sizeof(*bna));
30274bf50f18SLuigi Rizzo 	if (bna == NULL) {
3028f9790aebSLuigi Rizzo 		return ENOMEM;
30294bf50f18SLuigi Rizzo 	}
3030f9790aebSLuigi Rizzo 
3031f9790aebSLuigi Rizzo 	na = &bna->up.up;
303237e3a6d3SLuigi Rizzo 	/* make bwrap ifp point to the real ifp */
303337e3a6d3SLuigi Rizzo 	na->ifp = hwna->ifp;
3034c3e9b4dbSLuiz Otavio O Souza 	if_ref(na->ifp);
3035847bf383SLuigi Rizzo 	na->na_private = bna;
30364bf50f18SLuigi Rizzo 	strncpy(na->name, nr_name, sizeof(na->name));
3037f9790aebSLuigi Rizzo 	/* fill the ring data for the bwrap adapter with rx/tx meanings
3038f9790aebSLuigi Rizzo 	 * swapped. The real cross-linking will be done during register,
3039f9790aebSLuigi Rizzo 	 * when all the krings will have been created.
3040f9790aebSLuigi Rizzo 	 */
3041847bf383SLuigi Rizzo 	for_rx_tx(t) {
3042847bf383SLuigi Rizzo 		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
3043847bf383SLuigi Rizzo 		nma_set_nrings(na, t, nma_get_nrings(hwna, r));
3044847bf383SLuigi Rizzo 		nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
3045847bf383SLuigi Rizzo 	}
3046f9790aebSLuigi Rizzo 	na->nm_dtor = netmap_bwrap_dtor;
304737e3a6d3SLuigi Rizzo 	na->nm_register = netmap_bwrap_reg;
3048f9790aebSLuigi Rizzo 	// na->nm_txsync = netmap_bwrap_txsync;
3049f9790aebSLuigi Rizzo 	// na->nm_rxsync = netmap_bwrap_rxsync;
3050f9790aebSLuigi Rizzo 	na->nm_config = netmap_bwrap_config;
3051f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_bwrap_krings_create;
3052f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_bwrap_krings_delete;
3053f9790aebSLuigi Rizzo 	na->nm_notify = netmap_bwrap_notify;
30544bf50f18SLuigi Rizzo 	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
30554bf50f18SLuigi Rizzo 	na->pdev = hwna->pdev;
3056c3e9b4dbSLuiz Otavio O Souza 	na->nm_mem = netmap_mem_get(hwna->nm_mem);
305737e3a6d3SLuigi Rizzo 	na->virt_hdr_len = hwna->virt_hdr_len;
3058f9790aebSLuigi Rizzo 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
30594f80b14cSVincenzo Maffione 	/* Set the mfs, needed on the VALE mismatch datapath. */
30604f80b14cSVincenzo Maffione 	bna->up.mfs = NM_BDG_MFS_DEFAULT;
3061f9790aebSLuigi Rizzo 
3062f9790aebSLuigi Rizzo 	bna->hwna = hwna;
3063f9790aebSLuigi Rizzo 	netmap_adapter_get(hwna);
3064f9790aebSLuigi Rizzo 	hwna->na_private = bna; /* weak reference */
3065*2ff91c17SVincenzo Maffione 	bna->saved_na_vp = hwna->na_vp;
30664bf50f18SLuigi Rizzo 	hwna->na_vp = &bna->up;
3067*2ff91c17SVincenzo Maffione 	bna->up.up.na_vp = &(bna->up);
3068f9790aebSLuigi Rizzo 
3069f0ea3689SLuigi Rizzo 	if (hwna->na_flags & NAF_HOST_RINGS) {
30704bf50f18SLuigi Rizzo 		if (hwna->na_flags & NAF_SW_ONLY)
30714bf50f18SLuigi Rizzo 			na->na_flags |= NAF_SW_ONLY;
3072f0ea3689SLuigi Rizzo 		na->na_flags |= NAF_HOST_RINGS;
3073f9790aebSLuigi Rizzo 		hostna = &bna->host.up;
30744bf50f18SLuigi Rizzo 		snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
3075f9790aebSLuigi Rizzo 		hostna->ifp = hwna->ifp;
3076847bf383SLuigi Rizzo 		for_rx_tx(t) {
3077847bf383SLuigi Rizzo 			enum txrx r = nm_txrx_swap(t);
3078847bf383SLuigi Rizzo 			nma_set_nrings(hostna, t, 1);
3079847bf383SLuigi Rizzo 			nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
3080847bf383SLuigi Rizzo 		}
3081f9790aebSLuigi Rizzo 		// hostna->nm_txsync = netmap_bwrap_host_txsync;
3082f9790aebSLuigi Rizzo 		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
3083847bf383SLuigi Rizzo 		hostna->nm_notify = netmap_bwrap_notify;
3084c3e9b4dbSLuiz Otavio O Souza 		hostna->nm_mem = netmap_mem_get(na->nm_mem);
3085f9790aebSLuigi Rizzo 		hostna->na_private = bna;
30864bf50f18SLuigi Rizzo 		hostna->na_vp = &bna->up;
30874bf50f18SLuigi Rizzo 		na->na_hostvp = hwna->na_hostvp =
30884bf50f18SLuigi Rizzo 			hostna->na_hostvp = &bna->host;
30894bf50f18SLuigi Rizzo 		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
30904f80b14cSVincenzo Maffione 		bna->host.mfs = NM_BDG_MFS_DEFAULT;
3091f0ea3689SLuigi Rizzo 	}
3092f9790aebSLuigi Rizzo 
309317885a7bSLuigi Rizzo 	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
30944bf50f18SLuigi Rizzo 		na->name, ifp->if_xname,
3095f9790aebSLuigi Rizzo 		na->num_tx_rings, na->num_tx_desc,
3096f9790aebSLuigi Rizzo 		na->num_rx_rings, na->num_rx_desc);
3097f9790aebSLuigi Rizzo 
3098f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
3099f9790aebSLuigi Rizzo 	if (error) {
31004bf50f18SLuigi Rizzo 		goto err_free;
31014bf50f18SLuigi Rizzo 	}
31024bf50f18SLuigi Rizzo 	hwna->na_flags |= NAF_BUSY;
31034bf50f18SLuigi Rizzo 	return 0;
31044bf50f18SLuigi Rizzo 
31054bf50f18SLuigi Rizzo err_free:
31064bf50f18SLuigi Rizzo 	hwna->na_vp = hwna->na_hostvp = NULL;
3107f9790aebSLuigi Rizzo 	netmap_adapter_put(hwna);
3108c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(bna);
3109f9790aebSLuigi Rizzo 	return error;
31104bf50f18SLuigi Rizzo 
3111f9790aebSLuigi Rizzo }
3112f9790aebSLuigi Rizzo 
3113847bf383SLuigi Rizzo struct nm_bridge *
3114847bf383SLuigi Rizzo netmap_init_bridges2(u_int n)
3115f9790aebSLuigi Rizzo {
3116f9790aebSLuigi Rizzo 	int i;
3117847bf383SLuigi Rizzo 	struct nm_bridge *b;
3118847bf383SLuigi Rizzo 
3119c3e9b4dbSLuiz Otavio O Souza 	b = nm_os_malloc(sizeof(struct nm_bridge) * n);
3120847bf383SLuigi Rizzo 	if (b == NULL)
3121847bf383SLuigi Rizzo 		return NULL;
3122847bf383SLuigi Rizzo 	for (i = 0; i < n; i++)
3123847bf383SLuigi Rizzo 		BDG_RWINIT(&b[i]);
3124847bf383SLuigi Rizzo 	return b;
3125847bf383SLuigi Rizzo }
3126847bf383SLuigi Rizzo 
3127847bf383SLuigi Rizzo void
3128847bf383SLuigi Rizzo netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
3129847bf383SLuigi Rizzo {
3130847bf383SLuigi Rizzo 	int i;
3131847bf383SLuigi Rizzo 
3132847bf383SLuigi Rizzo 	if (b == NULL)
3133847bf383SLuigi Rizzo 		return;
3134847bf383SLuigi Rizzo 
3135847bf383SLuigi Rizzo 	for (i = 0; i < n; i++)
3136847bf383SLuigi Rizzo 		BDG_RWDESTROY(&b[i]);
3137c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(b);
3138847bf383SLuigi Rizzo }
3139847bf383SLuigi Rizzo 
3140847bf383SLuigi Rizzo int
3141847bf383SLuigi Rizzo netmap_init_bridges(void)
3142847bf383SLuigi Rizzo {
3143847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
3144847bf383SLuigi Rizzo 	return netmap_bns_register();
3145847bf383SLuigi Rizzo #else
3146847bf383SLuigi Rizzo 	nm_bridges = netmap_init_bridges2(NM_BRIDGES);
3147847bf383SLuigi Rizzo 	if (nm_bridges == NULL)
3148847bf383SLuigi Rizzo 		return ENOMEM;
3149847bf383SLuigi Rizzo 	return 0;
3150847bf383SLuigi Rizzo #endif
3151847bf383SLuigi Rizzo }
3152847bf383SLuigi Rizzo 
3153847bf383SLuigi Rizzo void
3154847bf383SLuigi Rizzo netmap_uninit_bridges(void)
3155847bf383SLuigi Rizzo {
3156847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS
3157847bf383SLuigi Rizzo 	netmap_bns_unregister();
3158847bf383SLuigi Rizzo #else
3159847bf383SLuigi Rizzo 	netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
3160847bf383SLuigi Rizzo #endif
3161f9790aebSLuigi Rizzo }
3162f9790aebSLuigi Rizzo #endif /* WITH_VALE */
3163