xref: /freebsd-14.2/sys/dev/netmap/netmap_vale.c (revision e330262f)
1718cf2ccSPedro F. Giffuni /*-
2718cf2ccSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3718cf2ccSPedro F. Giffuni  *
437e3a6d3SLuigi Rizzo  * Copyright (C) 2013-2016 Universita` di Pisa
537e3a6d3SLuigi Rizzo  * All rights reserved.
6f9790aebSLuigi Rizzo  *
7f9790aebSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
8f9790aebSLuigi Rizzo  * modification, are permitted provided that the following conditions
9f9790aebSLuigi Rizzo  * are met:
10f9790aebSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
11f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
12f9790aebSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
13f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
14f9790aebSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
15f9790aebSLuigi Rizzo  *
16f9790aebSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17f9790aebSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18f9790aebSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19f9790aebSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20f9790aebSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21f9790aebSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22f9790aebSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23f9790aebSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24f9790aebSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25f9790aebSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26f9790aebSLuigi Rizzo  * SUCH DAMAGE.
27f9790aebSLuigi Rizzo  */
28f9790aebSLuigi Rizzo 
29f9790aebSLuigi Rizzo 
30f9790aebSLuigi Rizzo #if defined(__FreeBSD__)
31f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
32f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$");
33f9790aebSLuigi Rizzo 
34f9790aebSLuigi Rizzo #include <sys/types.h>
35f9790aebSLuigi Rizzo #include <sys/errno.h>
36f9790aebSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
37f9790aebSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
38f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
39f9790aebSLuigi Rizzo #include <sys/sockio.h>
40f9790aebSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
41f9790aebSLuigi Rizzo #include <sys/malloc.h>
42f9790aebSLuigi Rizzo #include <sys/poll.h>
43f9790aebSLuigi Rizzo #include <sys/rwlock.h>
44f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
45f9790aebSLuigi Rizzo #include <sys/selinfo.h>
46f9790aebSLuigi Rizzo #include <sys/sysctl.h>
47f9790aebSLuigi Rizzo #include <net/if.h>
48f9790aebSLuigi Rizzo #include <net/if_var.h>
49f9790aebSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
50f9790aebSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
51f9790aebSLuigi Rizzo #include <sys/endian.h>
52f9790aebSLuigi Rizzo #include <sys/refcount.h>
532a7db7a6SVincenzo Maffione #include <sys/smp.h>
54f9790aebSLuigi Rizzo 
55f9790aebSLuigi Rizzo 
56f9790aebSLuigi Rizzo #elif defined(linux)
57f9790aebSLuigi Rizzo 
58f9790aebSLuigi Rizzo #include "bsd_glue.h"
59f9790aebSLuigi Rizzo 
60f9790aebSLuigi Rizzo #elif defined(__APPLE__)
61f9790aebSLuigi Rizzo 
62f9790aebSLuigi Rizzo #warning OSX support is only partial
63f9790aebSLuigi Rizzo #include "osx_glue.h"
64f9790aebSLuigi Rizzo 
6537e3a6d3SLuigi Rizzo #elif defined(_WIN32)
6637e3a6d3SLuigi Rizzo #include "win_glue.h"
6737e3a6d3SLuigi Rizzo 
68f9790aebSLuigi Rizzo #else
69f9790aebSLuigi Rizzo 
70f9790aebSLuigi Rizzo #error	Unsupported platform
71f9790aebSLuigi Rizzo 
72f9790aebSLuigi Rizzo #endif /* unsupported */
73f9790aebSLuigi Rizzo 
74f9790aebSLuigi Rizzo /*
75f9790aebSLuigi Rizzo  * common headers
76f9790aebSLuigi Rizzo  */
77f9790aebSLuigi Rizzo 
78f9790aebSLuigi Rizzo #include <net/netmap.h>
79f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h>
80f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
812a7db7a6SVincenzo Maffione #include <dev/netmap/netmap_bdg.h>
82f9790aebSLuigi Rizzo 
83f9790aebSLuigi Rizzo #ifdef WITH_VALE
84f9790aebSLuigi Rizzo 
85f9790aebSLuigi Rizzo /*
86f9790aebSLuigi Rizzo  * system parameters (most of them in netmap_kern.h)
8737e3a6d3SLuigi Rizzo  * NM_BDG_NAME		prefix for switch port names, default "vale"
88f9790aebSLuigi Rizzo  * NM_BDG_MAXPORTS	number of ports
89f9790aebSLuigi Rizzo  * NM_BRIDGES		max number of switches in the system.
90f9790aebSLuigi Rizzo  *
91f9790aebSLuigi Rizzo  * Switch ports are named valeX:Y where X is the switch name and Y
92f9790aebSLuigi Rizzo  * is the port. If Y matches a physical interface name, the port is
93f9790aebSLuigi Rizzo  * connected to a physical device.
94f9790aebSLuigi Rizzo  *
95f9790aebSLuigi Rizzo  * Unlike physical interfaces, switch ports use their own memory region
96f9790aebSLuigi Rizzo  * for rings and buffers.
97f9790aebSLuigi Rizzo  * The virtual interfaces use per-queue lock instead of core lock.
98f9790aebSLuigi Rizzo  * In the tx loop, we aggregate traffic in batches to make all operations
99f9790aebSLuigi Rizzo  * faster. The batch size is bridge_batch.
100f9790aebSLuigi Rizzo  */
101a6d768d8SVincenzo Maffione #define NM_BDG_MAXRINGS		16	/* XXX unclear how many (must be a pow of 2). */
102f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
103f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
104f9790aebSLuigi Rizzo #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
105f9790aebSLuigi Rizzo /* actual size of the tables */
1062a7db7a6SVincenzo Maffione #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NETMAP_MAX_FRAGS)
107f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */
108f9790aebSLuigi Rizzo #define NM_FT_NULL		NM_BDG_BATCH_MAX
109f9790aebSLuigi Rizzo 
110f9790aebSLuigi Rizzo 
111f9790aebSLuigi Rizzo /*
112f9790aebSLuigi Rizzo  * bridge_batch is set via sysctl to the max batch size to be
113f9790aebSLuigi Rizzo  * used in the bridge. The actual value may be larger as the
114f9790aebSLuigi Rizzo  * last packet in the block may overflow the size.
115f9790aebSLuigi Rizzo  */
11637e3a6d3SLuigi Rizzo static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
117dd6ab49aSVincenzo Maffione 
118dd6ab49aSVincenzo Maffione /* Max number of vale bridges (loader tunable). */
119dd6ab49aSVincenzo Maffione unsigned int vale_max_bridges = NM_BRIDGES;
120dd6ab49aSVincenzo Maffione 
12137e3a6d3SLuigi Rizzo SYSBEGIN(vars_vale);
122f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap);
1234f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0,
1244f80b14cSVincenzo Maffione 		"Max batch size to be used in the bridge");
125dd6ab49aSVincenzo Maffione SYSCTL_UINT(_dev_netmap, OID_AUTO, max_bridges, CTLFLAG_RDTUN, &vale_max_bridges, 0,
126dd6ab49aSVincenzo Maffione 		"Max number of vale bridges");
12737e3a6d3SLuigi Rizzo SYSEND;
128f9790aebSLuigi Rizzo 
129*e330262fSJustin Hibbits static int netmap_vale_vp_create(struct nmreq_header *hdr, if_t,
130c3e9b4dbSLuiz Otavio O Souza 		struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
131b6e66be2SVincenzo Maffione static int netmap_vale_vp_bdg_attach(const char *, struct netmap_adapter *,
1322a7db7a6SVincenzo Maffione 		struct nm_bridge *);
1332a7db7a6SVincenzo Maffione static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *);
134f9790aebSLuigi Rizzo 
135f9790aebSLuigi Rizzo /*
136b6e66be2SVincenzo Maffione  * For each output interface, nm_vale_q is used to construct a list.
137f9790aebSLuigi Rizzo  * bq_len is the number of output buffers (we can have coalescing
138f9790aebSLuigi Rizzo  * during the copy).
139f9790aebSLuigi Rizzo  */
140b6e66be2SVincenzo Maffione struct nm_vale_q {
141f9790aebSLuigi Rizzo 	uint16_t bq_head;
142f9790aebSLuigi Rizzo 	uint16_t bq_tail;
143f9790aebSLuigi Rizzo 	uint32_t bq_len;	/* number of buffers */
144f9790aebSLuigi Rizzo };
145f9790aebSLuigi Rizzo 
1462ff91c17SVincenzo Maffione /* Holds the default callbacks */
1472a7db7a6SVincenzo Maffione struct netmap_bdg_ops vale_bdg_ops = {
148b6e66be2SVincenzo Maffione 	.lookup = netmap_vale_learning,
1492a7db7a6SVincenzo Maffione 	.config = NULL,
1502a7db7a6SVincenzo Maffione 	.dtor = NULL,
151b6e66be2SVincenzo Maffione 	.vp_create = netmap_vale_vp_create,
1522a7db7a6SVincenzo Maffione 	.bwrap_attach = netmap_vale_bwrap_attach,
1532a7db7a6SVincenzo Maffione 	.name = NM_BDG_NAME,
154f9790aebSLuigi Rizzo };
155f9790aebSLuigi Rizzo 
156f9790aebSLuigi Rizzo /*
157f9790aebSLuigi Rizzo  * this is a slightly optimized copy routine which rounds
158f9790aebSLuigi Rizzo  * to multiple of 64 bytes and is often faster than dealing
159f9790aebSLuigi Rizzo  * with other odd sizes. We assume there is enough room
160f9790aebSLuigi Rizzo  * in the source and destination buffers.
161f9790aebSLuigi Rizzo  *
162a6d768d8SVincenzo Maffione  * XXX only for multiples of NM_BUF_ALIGN bytes, non overlapped.
163f9790aebSLuigi Rizzo  */
164a6d768d8SVincenzo Maffione 
165f9790aebSLuigi Rizzo static inline void
166f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l)
167f9790aebSLuigi Rizzo {
168f9790aebSLuigi Rizzo 	uint64_t *src = _src;
169f9790aebSLuigi Rizzo 	uint64_t *dst = _dst;
170f9790aebSLuigi Rizzo 	if (unlikely(l >= 1024)) {
171f9790aebSLuigi Rizzo 		memcpy(dst, src, l);
172f9790aebSLuigi Rizzo 		return;
173f9790aebSLuigi Rizzo 	}
174a6d768d8SVincenzo Maffione 	for (; likely(l > 0); l -= NM_BUF_ALIGN) {
175a6d768d8SVincenzo Maffione 		/* XXX NM_BUF_ALIGN/sizeof(uint64_t) statements */
176f9790aebSLuigi Rizzo 		*dst++ = *src++;
177f9790aebSLuigi Rizzo 		*dst++ = *src++;
178f9790aebSLuigi Rizzo 		*dst++ = *src++;
179f9790aebSLuigi Rizzo 		*dst++ = *src++;
180f9790aebSLuigi Rizzo 		*dst++ = *src++;
181f9790aebSLuigi Rizzo 		*dst++ = *src++;
182f9790aebSLuigi Rizzo 		*dst++ = *src++;
183f9790aebSLuigi Rizzo 		*dst++ = *src++;
184f9790aebSLuigi Rizzo 	}
185f9790aebSLuigi Rizzo }
186f9790aebSLuigi Rizzo 
187f9790aebSLuigi Rizzo 
188f9790aebSLuigi Rizzo /*
189f9790aebSLuigi Rizzo  * Free the forwarding tables for rings attached to switch ports.
190f9790aebSLuigi Rizzo  */
191f9790aebSLuigi Rizzo static void
192f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na)
193f9790aebSLuigi Rizzo {
194f9790aebSLuigi Rizzo 	int nrings, i;
1952ff91c17SVincenzo Maffione 	struct netmap_kring **kring;
196f9790aebSLuigi Rizzo 
197f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
19817885a7bSLuigi Rizzo 	nrings = na->num_tx_rings;
19917885a7bSLuigi Rizzo 	kring = na->tx_rings;
200f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
2012ff91c17SVincenzo Maffione 		if (kring[i]->nkr_ft) {
2022ff91c17SVincenzo Maffione 			nm_os_free(kring[i]->nkr_ft);
2032ff91c17SVincenzo Maffione 			kring[i]->nkr_ft = NULL; /* protect from freeing twice */
204f9790aebSLuigi Rizzo 		}
205f9790aebSLuigi Rizzo 	}
206f9790aebSLuigi Rizzo }
207f9790aebSLuigi Rizzo 
208f9790aebSLuigi Rizzo 
209f9790aebSLuigi Rizzo /*
210f9790aebSLuigi Rizzo  * Allocate the forwarding tables for the rings attached to the bridge ports.
211f9790aebSLuigi Rizzo  */
212f9790aebSLuigi Rizzo static int
213f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na)
214f9790aebSLuigi Rizzo {
215f9790aebSLuigi Rizzo 	int nrings, l, i, num_dstq;
2162ff91c17SVincenzo Maffione 	struct netmap_kring **kring;
217f9790aebSLuigi Rizzo 
218f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
219f9790aebSLuigi Rizzo 	/* all port:rings + broadcast */
220f9790aebSLuigi Rizzo 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
221f9790aebSLuigi Rizzo 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
222b6e66be2SVincenzo Maffione 	l += sizeof(struct nm_vale_q) * num_dstq;
223f9790aebSLuigi Rizzo 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
224f9790aebSLuigi Rizzo 
225847bf383SLuigi Rizzo 	nrings = netmap_real_rings(na, NR_TX);
226f9790aebSLuigi Rizzo 	kring = na->tx_rings;
227f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
228f9790aebSLuigi Rizzo 		struct nm_bdg_fwd *ft;
229b6e66be2SVincenzo Maffione 		struct nm_vale_q *dstq;
230f9790aebSLuigi Rizzo 		int j;
231f9790aebSLuigi Rizzo 
232c3e9b4dbSLuiz Otavio O Souza 		ft = nm_os_malloc(l);
233f9790aebSLuigi Rizzo 		if (!ft) {
234f9790aebSLuigi Rizzo 			nm_free_bdgfwd(na);
235f9790aebSLuigi Rizzo 			return ENOMEM;
236f9790aebSLuigi Rizzo 		}
237b6e66be2SVincenzo Maffione 		dstq = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
238f9790aebSLuigi Rizzo 		for (j = 0; j < num_dstq; j++) {
239f9790aebSLuigi Rizzo 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
240f9790aebSLuigi Rizzo 			dstq[j].bq_len = 0;
241f9790aebSLuigi Rizzo 		}
2422ff91c17SVincenzo Maffione 		kring[i]->nkr_ft = ft;
243f9790aebSLuigi Rizzo 	}
244f9790aebSLuigi Rizzo 	return 0;
245f9790aebSLuigi Rizzo }
246f9790aebSLuigi Rizzo 
2472ff91c17SVincenzo Maffione /* Allows external modules to create bridges in exclusive mode,
2482ff91c17SVincenzo Maffione  * returns an authentication token that the external module will need
2492ff91c17SVincenzo Maffione  * to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(),
2502ff91c17SVincenzo Maffione  * and nm_bdg_update_private_data() operations.
2512ff91c17SVincenzo Maffione  * Successfully executed if ret != NULL and *return_status == 0.
2522ff91c17SVincenzo Maffione  */
2532ff91c17SVincenzo Maffione void *
2542a7db7a6SVincenzo Maffione netmap_vale_create(const char *bdg_name, int *return_status)
2552ff91c17SVincenzo Maffione {
2562ff91c17SVincenzo Maffione 	struct nm_bridge *b = NULL;
2572ff91c17SVincenzo Maffione 	void *ret = NULL;
2582ff91c17SVincenzo Maffione 
2592ff91c17SVincenzo Maffione 	NMG_LOCK();
2602a7db7a6SVincenzo Maffione 	b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
2612ff91c17SVincenzo Maffione 	if (b) {
2622ff91c17SVincenzo Maffione 		*return_status = EEXIST;
2632ff91c17SVincenzo Maffione 		goto unlock_bdg_create;
2642ff91c17SVincenzo Maffione 	}
2652ff91c17SVincenzo Maffione 
2662a7db7a6SVincenzo Maffione 	b = nm_find_bridge(bdg_name, 1 /* create */, &vale_bdg_ops);
2672ff91c17SVincenzo Maffione 	if (!b) {
2682ff91c17SVincenzo Maffione 		*return_status = ENOMEM;
2692ff91c17SVincenzo Maffione 		goto unlock_bdg_create;
2702ff91c17SVincenzo Maffione 	}
2712ff91c17SVincenzo Maffione 
2722ff91c17SVincenzo Maffione 	b->bdg_flags |= NM_BDG_ACTIVE | NM_BDG_EXCLUSIVE;
2732ff91c17SVincenzo Maffione 	ret = nm_bdg_get_auth_token(b);
2742ff91c17SVincenzo Maffione 	*return_status = 0;
2752ff91c17SVincenzo Maffione 
2762ff91c17SVincenzo Maffione unlock_bdg_create:
2772ff91c17SVincenzo Maffione 	NMG_UNLOCK();
2782ff91c17SVincenzo Maffione 	return ret;
2792ff91c17SVincenzo Maffione }
2802ff91c17SVincenzo Maffione 
2812ff91c17SVincenzo Maffione /* Allows external modules to destroy a bridge created through
2822ff91c17SVincenzo Maffione  * netmap_bdg_create(), the bridge must be empty.
2832ff91c17SVincenzo Maffione  */
2842ff91c17SVincenzo Maffione int
2852a7db7a6SVincenzo Maffione netmap_vale_destroy(const char *bdg_name, void *auth_token)
2862ff91c17SVincenzo Maffione {
2872ff91c17SVincenzo Maffione 	struct nm_bridge *b = NULL;
2882ff91c17SVincenzo Maffione 	int ret = 0;
2892ff91c17SVincenzo Maffione 
2902ff91c17SVincenzo Maffione 	NMG_LOCK();
2912a7db7a6SVincenzo Maffione 	b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
2922ff91c17SVincenzo Maffione 	if (!b) {
2932ff91c17SVincenzo Maffione 		ret = ENXIO;
2942ff91c17SVincenzo Maffione 		goto unlock_bdg_free;
2952ff91c17SVincenzo Maffione 	}
2962ff91c17SVincenzo Maffione 
2972ff91c17SVincenzo Maffione 	if (!nm_bdg_valid_auth_token(b, auth_token)) {
2982ff91c17SVincenzo Maffione 		ret = EACCES;
2992ff91c17SVincenzo Maffione 		goto unlock_bdg_free;
3002ff91c17SVincenzo Maffione 	}
3012ff91c17SVincenzo Maffione 	if (!(b->bdg_flags & NM_BDG_EXCLUSIVE)) {
3022ff91c17SVincenzo Maffione 		ret = EINVAL;
3032ff91c17SVincenzo Maffione 		goto unlock_bdg_free;
3042ff91c17SVincenzo Maffione 	}
3052ff91c17SVincenzo Maffione 
3062ff91c17SVincenzo Maffione 	b->bdg_flags &= ~(NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE);
3072ff91c17SVincenzo Maffione 	ret = netmap_bdg_free(b);
3082ff91c17SVincenzo Maffione 	if (ret) {
3092ff91c17SVincenzo Maffione 		b->bdg_flags |= NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE;
3102ff91c17SVincenzo Maffione 	}
3112ff91c17SVincenzo Maffione 
3122ff91c17SVincenzo Maffione unlock_bdg_free:
3132ff91c17SVincenzo Maffione 	NMG_UNLOCK();
3142ff91c17SVincenzo Maffione 	return ret;
3152ff91c17SVincenzo Maffione }
3162ff91c17SVincenzo Maffione 
317b6e66be2SVincenzo Maffione /* Process NETMAP_REQ_VALE_LIST. */
318b6e66be2SVincenzo Maffione int
319b6e66be2SVincenzo Maffione netmap_vale_list(struct nmreq_header *hdr)
320b6e66be2SVincenzo Maffione {
321b6e66be2SVincenzo Maffione 	struct nmreq_vale_list *req =
322b6e66be2SVincenzo Maffione 		(struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
323b6e66be2SVincenzo Maffione 	int namelen = strlen(hdr->nr_name);
324b6e66be2SVincenzo Maffione 	struct nm_bridge *b, *bridges;
325b6e66be2SVincenzo Maffione 	struct netmap_vp_adapter *vpna;
326b6e66be2SVincenzo Maffione 	int error = 0, i, j;
327b6e66be2SVincenzo Maffione 	u_int num_bridges;
328b6e66be2SVincenzo Maffione 
329b6e66be2SVincenzo Maffione 	netmap_bns_getbridges(&bridges, &num_bridges);
330b6e66be2SVincenzo Maffione 
331b6e66be2SVincenzo Maffione 	/* this is used to enumerate bridges and ports */
332b6e66be2SVincenzo Maffione 	if (namelen) { /* look up indexes of bridge and port */
333b6e66be2SVincenzo Maffione 		if (strncmp(hdr->nr_name, NM_BDG_NAME,
334b6e66be2SVincenzo Maffione 					strlen(NM_BDG_NAME))) {
335b6e66be2SVincenzo Maffione 			return EINVAL;
336b6e66be2SVincenzo Maffione 		}
337b6e66be2SVincenzo Maffione 		NMG_LOCK();
338b6e66be2SVincenzo Maffione 		b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
339b6e66be2SVincenzo Maffione 		if (!b) {
340b6e66be2SVincenzo Maffione 			NMG_UNLOCK();
341b6e66be2SVincenzo Maffione 			return ENOENT;
342b6e66be2SVincenzo Maffione 		}
343b6e66be2SVincenzo Maffione 
344b6e66be2SVincenzo Maffione 		req->nr_bridge_idx = b - bridges; /* bridge index */
345b6e66be2SVincenzo Maffione 		req->nr_port_idx = NM_BDG_NOPORT;
346b6e66be2SVincenzo Maffione 		for (j = 0; j < b->bdg_active_ports; j++) {
347b6e66be2SVincenzo Maffione 			i = b->bdg_port_index[j];
348b6e66be2SVincenzo Maffione 			vpna = b->bdg_ports[i];
349b6e66be2SVincenzo Maffione 			if (vpna == NULL) {
350b6e66be2SVincenzo Maffione 				nm_prerr("This should not happen");
351b6e66be2SVincenzo Maffione 				continue;
352b6e66be2SVincenzo Maffione 			}
353b6e66be2SVincenzo Maffione 			/* the former and the latter identify a
354b6e66be2SVincenzo Maffione 			 * virtual port and a NIC, respectively
355b6e66be2SVincenzo Maffione 			 */
356b6e66be2SVincenzo Maffione 			if (!strcmp(vpna->up.name, hdr->nr_name)) {
357b6e66be2SVincenzo Maffione 				req->nr_port_idx = i; /* port index */
358b6e66be2SVincenzo Maffione 				break;
359b6e66be2SVincenzo Maffione 			}
360b6e66be2SVincenzo Maffione 		}
361b6e66be2SVincenzo Maffione 		NMG_UNLOCK();
362b6e66be2SVincenzo Maffione 	} else {
363b6e66be2SVincenzo Maffione 		/* return the first non-empty entry starting from
364b6e66be2SVincenzo Maffione 		 * bridge nr_arg1 and port nr_arg2.
365b6e66be2SVincenzo Maffione 		 *
366b6e66be2SVincenzo Maffione 		 * Users can detect the end of the same bridge by
367b6e66be2SVincenzo Maffione 		 * seeing the new and old value of nr_arg1, and can
368b6e66be2SVincenzo Maffione 		 * detect the end of all the bridge by error != 0
369b6e66be2SVincenzo Maffione 		 */
370b6e66be2SVincenzo Maffione 		i = req->nr_bridge_idx;
371b6e66be2SVincenzo Maffione 		j = req->nr_port_idx;
372b6e66be2SVincenzo Maffione 
373b6e66be2SVincenzo Maffione 		NMG_LOCK();
374dd6ab49aSVincenzo Maffione 		for (error = ENOENT; i < vale_max_bridges; i++) {
375b6e66be2SVincenzo Maffione 			b = bridges + i;
376b6e66be2SVincenzo Maffione 			for ( ; j < NM_BDG_MAXPORTS; j++) {
377b6e66be2SVincenzo Maffione 				if (b->bdg_ports[j] == NULL)
378b6e66be2SVincenzo Maffione 					continue;
379b6e66be2SVincenzo Maffione 				vpna = b->bdg_ports[j];
380b6e66be2SVincenzo Maffione 				/* write back the VALE switch name */
381b6e66be2SVincenzo Maffione 				strlcpy(hdr->nr_name, vpna->up.name,
382b6e66be2SVincenzo Maffione 					sizeof(hdr->nr_name));
383b6e66be2SVincenzo Maffione 				error = 0;
384b6e66be2SVincenzo Maffione 				goto out;
385b6e66be2SVincenzo Maffione 			}
386b6e66be2SVincenzo Maffione 			j = 0; /* following bridges scan from 0 */
387b6e66be2SVincenzo Maffione 		}
388b6e66be2SVincenzo Maffione 	out:
389b6e66be2SVincenzo Maffione 		req->nr_bridge_idx = i;
390b6e66be2SVincenzo Maffione 		req->nr_port_idx = j;
391b6e66be2SVincenzo Maffione 		NMG_UNLOCK();
392b6e66be2SVincenzo Maffione 	}
393b6e66be2SVincenzo Maffione 
394b6e66be2SVincenzo Maffione 	return error;
395b6e66be2SVincenzo Maffione }
396b6e66be2SVincenzo Maffione 
3972ff91c17SVincenzo Maffione 
3984bf50f18SLuigi Rizzo /* nm_dtor callback for ephemeral VALE ports */
3994bf50f18SLuigi Rizzo static void
400b6e66be2SVincenzo Maffione netmap_vale_vp_dtor(struct netmap_adapter *na)
4014bf50f18SLuigi Rizzo {
4024bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
4034bf50f18SLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
4044bf50f18SLuigi Rizzo 
40575f4f3edSVincenzo Maffione 	nm_prdis("%s has %d references", na->name, na->na_refcount);
406f9790aebSLuigi Rizzo 
407f9790aebSLuigi Rizzo 	if (b) {
408f9790aebSLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
409f9790aebSLuigi Rizzo 	}
410c3e9b4dbSLuiz Otavio O Souza 
4114f80b14cSVincenzo Maffione 	if (na->ifp != NULL && !nm_iszombie(na)) {
4122a7db7a6SVincenzo Maffione 		NM_DETACH_NA(na->ifp);
4134f80b14cSVincenzo Maffione 		if (vpna->autodelete) {
414*e330262fSJustin Hibbits 			nm_prdis("releasing %s", if_name(na->ifp));
415c3e9b4dbSLuiz Otavio O Souza 			NMG_UNLOCK();
416c3e9b4dbSLuiz Otavio O Souza 			nm_os_vi_detach(na->ifp);
417c3e9b4dbSLuiz Otavio O Souza 			NMG_LOCK();
418c3e9b4dbSLuiz Otavio O Souza 		}
419f9790aebSLuigi Rizzo 	}
4204f80b14cSVincenzo Maffione }
421f9790aebSLuigi Rizzo 
4222ff91c17SVincenzo Maffione 
4234bf50f18SLuigi Rizzo 
4244bf50f18SLuigi Rizzo /* nm_krings_create callback for VALE ports.
4254bf50f18SLuigi Rizzo  * Calls the standard netmap_krings_create, then adds leases on rx
4264bf50f18SLuigi Rizzo  * rings and bdgfwd on tx rings.
4274bf50f18SLuigi Rizzo  */
428f9790aebSLuigi Rizzo static int
429b6e66be2SVincenzo Maffione netmap_vale_vp_krings_create(struct netmap_adapter *na)
430f9790aebSLuigi Rizzo {
431f0ea3689SLuigi Rizzo 	u_int tailroom;
432f9790aebSLuigi Rizzo 	int error, i;
433f9790aebSLuigi Rizzo 	uint32_t *leases;
434847bf383SLuigi Rizzo 	u_int nrx = netmap_real_rings(na, NR_RX);
435f9790aebSLuigi Rizzo 
436f9790aebSLuigi Rizzo 	/*
437f9790aebSLuigi Rizzo 	 * Leases are attached to RX rings on vale ports
438f9790aebSLuigi Rizzo 	 */
439f9790aebSLuigi Rizzo 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
440f9790aebSLuigi Rizzo 
441f0ea3689SLuigi Rizzo 	error = netmap_krings_create(na, tailroom);
442f9790aebSLuigi Rizzo 	if (error)
443f9790aebSLuigi Rizzo 		return error;
444f9790aebSLuigi Rizzo 
445f9790aebSLuigi Rizzo 	leases = na->tailroom;
446f9790aebSLuigi Rizzo 
447f9790aebSLuigi Rizzo 	for (i = 0; i < nrx; i++) { /* Receive rings */
4482ff91c17SVincenzo Maffione 		na->rx_rings[i]->nkr_leases = leases;
449f9790aebSLuigi Rizzo 		leases += na->num_rx_desc;
450f9790aebSLuigi Rizzo 	}
451f9790aebSLuigi Rizzo 
452f9790aebSLuigi Rizzo 	error = nm_alloc_bdgfwd(na);
453f9790aebSLuigi Rizzo 	if (error) {
454f9790aebSLuigi Rizzo 		netmap_krings_delete(na);
455f9790aebSLuigi Rizzo 		return error;
456f9790aebSLuigi Rizzo 	}
457f9790aebSLuigi Rizzo 
458f9790aebSLuigi Rizzo 	return 0;
459f9790aebSLuigi Rizzo }
460f9790aebSLuigi Rizzo 
46117885a7bSLuigi Rizzo 
4624bf50f18SLuigi Rizzo /* nm_krings_delete callback for VALE ports. */
463f9790aebSLuigi Rizzo static void
464b6e66be2SVincenzo Maffione netmap_vale_vp_krings_delete(struct netmap_adapter *na)
465f9790aebSLuigi Rizzo {
466f9790aebSLuigi Rizzo 	nm_free_bdgfwd(na);
467f9790aebSLuigi Rizzo 	netmap_krings_delete(na);
468f9790aebSLuigi Rizzo }
469f9790aebSLuigi Rizzo 
470f9790aebSLuigi Rizzo 
471f9790aebSLuigi Rizzo static int
472b6e66be2SVincenzo Maffione nm_vale_flush(struct nm_bdg_fwd *ft, u_int n,
473f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *na, u_int ring_nr);
474f9790aebSLuigi Rizzo 
475f9790aebSLuigi Rizzo 
476f9790aebSLuigi Rizzo /*
4774bf50f18SLuigi Rizzo  * main dispatch routine for the bridge.
478f9790aebSLuigi Rizzo  * Grab packets from a kring, move them into the ft structure
479f9790aebSLuigi Rizzo  * associated to the tx (input) port. Max one instance per port,
480f9790aebSLuigi Rizzo  * filtered on input (ioctl, poll or XXX).
481f9790aebSLuigi Rizzo  * Returns the next position in the ring.
482f9790aebSLuigi Rizzo  */
483f9790aebSLuigi Rizzo static int
484b6e66be2SVincenzo Maffione nm_vale_preflush(struct netmap_kring *kring, u_int end)
485f9790aebSLuigi Rizzo {
4864bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
4874bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter*)kring->na;
488f9790aebSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
489f9790aebSLuigi Rizzo 	struct nm_bdg_fwd *ft;
4904bf50f18SLuigi Rizzo 	u_int ring_nr = kring->ring_id;
491f9790aebSLuigi Rizzo 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
492f9790aebSLuigi Rizzo 	u_int ft_i = 0;	/* start from 0 */
493f9790aebSLuigi Rizzo 	u_int frags = 1; /* how many frags ? */
494f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
495f9790aebSLuigi Rizzo 
496f9790aebSLuigi Rizzo 	/* To protect against modifications to the bridge we acquire a
497f9790aebSLuigi Rizzo 	 * shared lock, waiting if we can sleep (if the source port is
498f9790aebSLuigi Rizzo 	 * attached to a user process) or with a trylock otherwise (NICs).
499f9790aebSLuigi Rizzo 	 */
50075f4f3edSVincenzo Maffione 	nm_prdis("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
501f9790aebSLuigi Rizzo 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
502f9790aebSLuigi Rizzo 		BDG_RLOCK(b);
503f9790aebSLuigi Rizzo 	else if (!BDG_RTRYLOCK(b))
504c3e9b4dbSLuiz Otavio O Souza 		return j;
50575f4f3edSVincenzo Maffione 	nm_prdis(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
506f9790aebSLuigi Rizzo 	ft = kring->nkr_ft;
507f9790aebSLuigi Rizzo 
508f9790aebSLuigi Rizzo 	for (; likely(j != end); j = nm_next(j, lim)) {
509f9790aebSLuigi Rizzo 		struct netmap_slot *slot = &ring->slot[j];
510f9790aebSLuigi Rizzo 		char *buf;
511f9790aebSLuigi Rizzo 
512f9790aebSLuigi Rizzo 		ft[ft_i].ft_len = slot->len;
513f9790aebSLuigi Rizzo 		ft[ft_i].ft_flags = slot->flags;
5142ff91c17SVincenzo Maffione 		ft[ft_i].ft_offset = 0;
515f9790aebSLuigi Rizzo 
51675f4f3edSVincenzo Maffione 		nm_prdis("flags is 0x%x", slot->flags);
517847bf383SLuigi Rizzo 		/* we do not use the buf changed flag, but we still need to reset it */
518847bf383SLuigi Rizzo 		slot->flags &= ~NS_BUF_CHANGED;
519847bf383SLuigi Rizzo 
520f9790aebSLuigi Rizzo 		/* this slot goes into a list so initialize the link field */
521f9790aebSLuigi Rizzo 		ft[ft_i].ft_next = NM_FT_NULL;
522f9790aebSLuigi Rizzo 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
523a6d768d8SVincenzo Maffione 			(void *)(uintptr_t)slot->ptr : NMB_O(kring, slot);
524a6d768d8SVincenzo Maffione 		if (unlikely(buf == NULL ||
525a6d768d8SVincenzo Maffione 		     slot->len > NETMAP_BUF_SIZE(&na->up) - nm_get_offset(kring, slot))) {
526b6e66be2SVincenzo Maffione 			nm_prlim(5, "NULL %s buffer pointer from %s slot %d len %d",
527e31c6ec7SLuigi Rizzo 				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
528e31c6ec7SLuigi Rizzo 				kring->name, j, ft[ft_i].ft_len);
5294bf50f18SLuigi Rizzo 			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
530e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_len = 0;
531e31c6ec7SLuigi Rizzo 			ft[ft_i].ft_flags = 0;
532e31c6ec7SLuigi Rizzo 		}
5332e159ef0SLuigi Rizzo 		__builtin_prefetch(buf);
534f9790aebSLuigi Rizzo 		++ft_i;
535f9790aebSLuigi Rizzo 		if (slot->flags & NS_MOREFRAG) {
536f9790aebSLuigi Rizzo 			frags++;
537f9790aebSLuigi Rizzo 			continue;
538f9790aebSLuigi Rizzo 		}
539f9790aebSLuigi Rizzo 		if (unlikely(netmap_verbose && frags > 1))
54075f4f3edSVincenzo Maffione 			nm_prlim(5, "%d frags at %d", frags, ft_i - frags);
541f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
542f9790aebSLuigi Rizzo 		frags = 1;
543f9790aebSLuigi Rizzo 		if (unlikely((int)ft_i >= bridge_batch))
544b6e66be2SVincenzo Maffione 			ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
545f9790aebSLuigi Rizzo 	}
546f9790aebSLuigi Rizzo 	if (frags > 1) {
54737e3a6d3SLuigi Rizzo 		/* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
54837e3a6d3SLuigi Rizzo 		 * have to fix frags count. */
54937e3a6d3SLuigi Rizzo 		frags--;
55037e3a6d3SLuigi Rizzo 		ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
55137e3a6d3SLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
552b6e66be2SVincenzo Maffione 		nm_prlim(5, "Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
553f9790aebSLuigi Rizzo 	}
554f9790aebSLuigi Rizzo 	if (ft_i)
555b6e66be2SVincenzo Maffione 		ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
556f9790aebSLuigi Rizzo 	BDG_RUNLOCK(b);
557f9790aebSLuigi Rizzo 	return j;
558f9790aebSLuigi Rizzo }
559f9790aebSLuigi Rizzo 
560f9790aebSLuigi Rizzo 
561f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */
562f9790aebSLuigi Rizzo 
563f9790aebSLuigi Rizzo /*
564f9790aebSLuigi Rizzo  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
565f9790aebSLuigi Rizzo  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
566f9790aebSLuigi Rizzo  *
567f9790aebSLuigi Rizzo  * http://www.burtleburtle.net/bob/hash/spooky.html
568f9790aebSLuigi Rizzo  */
569f9790aebSLuigi Rizzo #define mix(a, b, c)                                                    \
570f9790aebSLuigi Rizzo do {                                                                    \
571f9790aebSLuigi Rizzo 	a -= b; a -= c; a ^= (c >> 13);                                 \
572f9790aebSLuigi Rizzo 	b -= c; b -= a; b ^= (a << 8);                                  \
573f9790aebSLuigi Rizzo 	c -= a; c -= b; c ^= (b >> 13);                                 \
574f9790aebSLuigi Rizzo 	a -= b; a -= c; a ^= (c >> 12);                                 \
575f9790aebSLuigi Rizzo 	b -= c; b -= a; b ^= (a << 16);                                 \
576f9790aebSLuigi Rizzo 	c -= a; c -= b; c ^= (b >> 5);                                  \
577f9790aebSLuigi Rizzo 	a -= b; a -= c; a ^= (c >> 3);                                  \
578f9790aebSLuigi Rizzo 	b -= c; b -= a; b ^= (a << 10);                                 \
579f9790aebSLuigi Rizzo 	c -= a; c -= b; c ^= (b >> 15);                                 \
580f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0)
581f9790aebSLuigi Rizzo 
58217885a7bSLuigi Rizzo 
583f9790aebSLuigi Rizzo static __inline uint32_t
584b6e66be2SVincenzo Maffione nm_vale_rthash(const uint8_t *addr)
585f9790aebSLuigi Rizzo {
58645c67e8fSVincenzo Maffione 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hash key
587f9790aebSLuigi Rizzo 
588f9790aebSLuigi Rizzo 	b += addr[5] << 8;
589f9790aebSLuigi Rizzo 	b += addr[4];
590f9790aebSLuigi Rizzo 	a += addr[3] << 24;
591f9790aebSLuigi Rizzo 	a += addr[2] << 16;
592f9790aebSLuigi Rizzo 	a += addr[1] << 8;
593f9790aebSLuigi Rizzo 	a += addr[0];
594f9790aebSLuigi Rizzo 
595f9790aebSLuigi Rizzo 	mix(a, b, c);
596f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
597f9790aebSLuigi Rizzo 	return (c & BRIDGE_RTHASH_MASK);
598f9790aebSLuigi Rizzo }
599f9790aebSLuigi Rizzo 
600f9790aebSLuigi Rizzo #undef mix
601f9790aebSLuigi Rizzo 
602f9790aebSLuigi Rizzo 
603f9790aebSLuigi Rizzo /*
604f9790aebSLuigi Rizzo  * Lookup function for a learning bridge.
605f9790aebSLuigi Rizzo  * Update the hash table with the source address,
606f9790aebSLuigi Rizzo  * and then returns the destination port index, and the
607f9790aebSLuigi Rizzo  * ring in *dst_ring (at the moment, always use ring 0)
608f9790aebSLuigi Rizzo  */
6092ff91c17SVincenzo Maffione uint32_t
610b6e66be2SVincenzo Maffione netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
6112ff91c17SVincenzo Maffione 		struct netmap_vp_adapter *na, void *private_data)
612f9790aebSLuigi Rizzo {
6132ff91c17SVincenzo Maffione 	uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset;
6142ff91c17SVincenzo Maffione 	u_int buf_len = ft->ft_len - ft->ft_offset;
6152ff91c17SVincenzo Maffione 	struct nm_hash_ent *ht = private_data;
616f9790aebSLuigi Rizzo 	uint32_t sh, dh;
617f9790aebSLuigi Rizzo 	u_int dst, mysrc = na->bdg_port;
618f9790aebSLuigi Rizzo 	uint64_t smac, dmac;
61937e3a6d3SLuigi Rizzo 	uint8_t indbuf[12];
620f9790aebSLuigi Rizzo 
6212ff91c17SVincenzo Maffione 	if (buf_len < 14) {
622f9790aebSLuigi Rizzo 		return NM_BDG_NOPORT;
623f9790aebSLuigi Rizzo 	}
62437e3a6d3SLuigi Rizzo 
62537e3a6d3SLuigi Rizzo 	if (ft->ft_flags & NS_INDIRECT) {
62637e3a6d3SLuigi Rizzo 		if (copyin(buf, indbuf, sizeof(indbuf))) {
62737e3a6d3SLuigi Rizzo 			return NM_BDG_NOPORT;
62837e3a6d3SLuigi Rizzo 		}
62937e3a6d3SLuigi Rizzo 		buf = indbuf;
63037e3a6d3SLuigi Rizzo 	}
63137e3a6d3SLuigi Rizzo 
632f9790aebSLuigi Rizzo 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
633f9790aebSLuigi Rizzo 	smac = le64toh(*(uint64_t *)(buf + 4));
634f9790aebSLuigi Rizzo 	smac >>= 16;
635f9790aebSLuigi Rizzo 
636f9790aebSLuigi Rizzo 	/*
637f9790aebSLuigi Rizzo 	 * The hash is somewhat expensive, there might be some
638f9790aebSLuigi Rizzo 	 * worthwhile optimizations here.
639f9790aebSLuigi Rizzo 	 */
640847bf383SLuigi Rizzo 	if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
641f9790aebSLuigi Rizzo 		uint8_t *s = buf+6;
642b6e66be2SVincenzo Maffione 		sh = nm_vale_rthash(s); /* hash of source */
643f9790aebSLuigi Rizzo 		/* update source port forwarding entry */
644847bf383SLuigi Rizzo 		na->last_smac = ht[sh].mac = smac;	/* XXX expire ? */
645f9790aebSLuigi Rizzo 		ht[sh].ports = mysrc;
646b6e66be2SVincenzo Maffione 		if (netmap_debug & NM_DEBUG_VALE)
647b6e66be2SVincenzo Maffione 		    nm_prinf("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
648f9790aebSLuigi Rizzo 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
649f9790aebSLuigi Rizzo 	}
650f9790aebSLuigi Rizzo 	dst = NM_BDG_BROADCAST;
651f9790aebSLuigi Rizzo 	if ((buf[0] & 1) == 0) { /* unicast */
652b6e66be2SVincenzo Maffione 		dh = nm_vale_rthash(buf); /* hash of dst */
653f9790aebSLuigi Rizzo 		if (ht[dh].mac == dmac) {	/* found dst */
654f9790aebSLuigi Rizzo 			dst = ht[dh].ports;
655f9790aebSLuigi Rizzo 		}
656f9790aebSLuigi Rizzo 	}
657f9790aebSLuigi Rizzo 	return dst;
658f9790aebSLuigi Rizzo }
659f9790aebSLuigi Rizzo 
660f9790aebSLuigi Rizzo 
661f9790aebSLuigi Rizzo /*
66217885a7bSLuigi Rizzo  * Available space in the ring. Only used in VALE code
66317885a7bSLuigi Rizzo  * and only with is_rx = 1
66417885a7bSLuigi Rizzo  */
66517885a7bSLuigi Rizzo static inline uint32_t
66617885a7bSLuigi Rizzo nm_kr_space(struct netmap_kring *k, int is_rx)
66717885a7bSLuigi Rizzo {
66817885a7bSLuigi Rizzo 	int space;
66917885a7bSLuigi Rizzo 
67017885a7bSLuigi Rizzo 	if (is_rx) {
67117885a7bSLuigi Rizzo 		int busy = k->nkr_hwlease - k->nr_hwcur;
67217885a7bSLuigi Rizzo 		if (busy < 0)
67317885a7bSLuigi Rizzo 			busy += k->nkr_num_slots;
67417885a7bSLuigi Rizzo 		space = k->nkr_num_slots - 1 - busy;
67517885a7bSLuigi Rizzo 	} else {
67617885a7bSLuigi Rizzo 		/* XXX never used in this branch */
67717885a7bSLuigi Rizzo 		space = k->nr_hwtail - k->nkr_hwlease;
67817885a7bSLuigi Rizzo 		if (space < 0)
67917885a7bSLuigi Rizzo 			space += k->nkr_num_slots;
68017885a7bSLuigi Rizzo 	}
68117885a7bSLuigi Rizzo #if 0
68217885a7bSLuigi Rizzo 	// sanity check
68317885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
68417885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
68517885a7bSLuigi Rizzo 		k->nr_tail >= k->nkr_num_slots ||
68617885a7bSLuigi Rizzo 		busy < 0 ||
68717885a7bSLuigi Rizzo 		busy >= k->nkr_num_slots) {
68875f4f3edSVincenzo Maffione 		nm_prerr("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",
68975f4f3edSVincenzo Maffione 		    k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
69017885a7bSLuigi Rizzo 		    k->nkr_lease_idx, k->nkr_num_slots);
69117885a7bSLuigi Rizzo 	}
69217885a7bSLuigi Rizzo #endif
69317885a7bSLuigi Rizzo 	return space;
69417885a7bSLuigi Rizzo }
69517885a7bSLuigi Rizzo 
69617885a7bSLuigi Rizzo 
69717885a7bSLuigi Rizzo 
69817885a7bSLuigi Rizzo 
69917885a7bSLuigi Rizzo /* make a lease on the kring for N positions. return the
70017885a7bSLuigi Rizzo  * lease index
70117885a7bSLuigi Rizzo  * XXX only used in VALE code and with is_rx = 1
70217885a7bSLuigi Rizzo  */
70317885a7bSLuigi Rizzo static inline uint32_t
70417885a7bSLuigi Rizzo nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
70517885a7bSLuigi Rizzo {
70617885a7bSLuigi Rizzo 	uint32_t lim = k->nkr_num_slots - 1;
70717885a7bSLuigi Rizzo 	uint32_t lease_idx = k->nkr_lease_idx;
70817885a7bSLuigi Rizzo 
70917885a7bSLuigi Rizzo 	k->nkr_leases[lease_idx] = NR_NOSLOT;
71017885a7bSLuigi Rizzo 	k->nkr_lease_idx = nm_next(lease_idx, lim);
71117885a7bSLuigi Rizzo 
712b6e66be2SVincenzo Maffione #ifdef CONFIG_NETMAP_DEBUG
71317885a7bSLuigi Rizzo 	if (n > nm_kr_space(k, is_rx)) {
714b6e66be2SVincenzo Maffione 		nm_prerr("invalid request for %d slots", n);
71517885a7bSLuigi Rizzo 		panic("x");
71617885a7bSLuigi Rizzo 	}
717b6e66be2SVincenzo Maffione #endif /* CONFIG NETMAP_DEBUG */
71817885a7bSLuigi Rizzo 	/* XXX verify that there are n slots */
71917885a7bSLuigi Rizzo 	k->nkr_hwlease += n;
72017885a7bSLuigi Rizzo 	if (k->nkr_hwlease > lim)
72117885a7bSLuigi Rizzo 		k->nkr_hwlease -= lim + 1;
72217885a7bSLuigi Rizzo 
723b6e66be2SVincenzo Maffione #ifdef CONFIG_NETMAP_DEBUG
72417885a7bSLuigi Rizzo 	if (k->nkr_hwlease >= k->nkr_num_slots ||
72517885a7bSLuigi Rizzo 		k->nr_hwcur >= k->nkr_num_slots ||
72617885a7bSLuigi Rizzo 		k->nr_hwtail >= k->nkr_num_slots ||
72717885a7bSLuigi Rizzo 		k->nkr_lease_idx >= k->nkr_num_slots) {
728b6e66be2SVincenzo Maffione 		nm_prerr("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
7294bf50f18SLuigi Rizzo 			k->na->name,
73017885a7bSLuigi Rizzo 			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
73117885a7bSLuigi Rizzo 			k->nkr_lease_idx, k->nkr_num_slots);
73217885a7bSLuigi Rizzo 	}
733b6e66be2SVincenzo Maffione #endif /* CONFIG_NETMAP_DEBUG */
73417885a7bSLuigi Rizzo 	return lease_idx;
73517885a7bSLuigi Rizzo }
73617885a7bSLuigi Rizzo 
73717885a7bSLuigi Rizzo /*
7384bf50f18SLuigi Rizzo  *
739f9790aebSLuigi Rizzo  * This flush routine supports only unicast and broadcast but a large
740f9790aebSLuigi Rizzo  * number of ports, and lets us replace the learn and dispatch functions.
741f9790aebSLuigi Rizzo  */
742f9790aebSLuigi Rizzo int
743b6e66be2SVincenzo Maffione nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
744f9790aebSLuigi Rizzo 		u_int ring_nr)
745f9790aebSLuigi Rizzo {
746b6e66be2SVincenzo Maffione 	struct nm_vale_q *dst_ents, *brddst;
747f9790aebSLuigi Rizzo 	uint16_t num_dsts = 0, *dsts;
748f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
74937e3a6d3SLuigi Rizzo 	u_int i, me = na->bdg_port;
750f9790aebSLuigi Rizzo 
751f9790aebSLuigi Rizzo 	/*
752f9790aebSLuigi Rizzo 	 * The work area (pointed by ft) is followed by an array of
753f9790aebSLuigi Rizzo 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
754f9790aebSLuigi Rizzo 	 * queues per port plus one for the broadcast traffic.
755f9790aebSLuigi Rizzo 	 * Then we have an array of destination indexes.
756f9790aebSLuigi Rizzo 	 */
757b6e66be2SVincenzo Maffione 	dst_ents = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
758f9790aebSLuigi Rizzo 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
759f9790aebSLuigi Rizzo 
760f9790aebSLuigi Rizzo 	/* first pass: find a destination for each packet in the batch */
761f9790aebSLuigi Rizzo 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
762f9790aebSLuigi Rizzo 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
763f9790aebSLuigi Rizzo 		uint16_t dst_port, d_i;
764b6e66be2SVincenzo Maffione 		struct nm_vale_q *d;
7652ff91c17SVincenzo Maffione 		struct nm_bdg_fwd *start_ft = NULL;
766f9790aebSLuigi Rizzo 
76775f4f3edSVincenzo Maffione 		nm_prdis("slot %d frags %d", i, ft[i].ft_frags);
7682ff91c17SVincenzo Maffione 
7692ff91c17SVincenzo Maffione 		if (na->up.virt_hdr_len < ft[i].ft_len) {
7702ff91c17SVincenzo Maffione 			ft[i].ft_offset = na->up.virt_hdr_len;
7712ff91c17SVincenzo Maffione 			start_ft = &ft[i];
7722ff91c17SVincenzo Maffione 		} else if (na->up.virt_hdr_len == ft[i].ft_len && ft[i].ft_flags & NS_MOREFRAG) {
7732ff91c17SVincenzo Maffione 			ft[i].ft_offset = ft[i].ft_len;
7742ff91c17SVincenzo Maffione 			start_ft = &ft[i+1];
7752ff91c17SVincenzo Maffione 		} else {
776f0ea3689SLuigi Rizzo 			/* Drop the packet if the virtio-net header is not into the first
7772ff91c17SVincenzo Maffione 			 * fragment nor at the very beginning of the second.
7782ff91c17SVincenzo Maffione 			 */
779f9790aebSLuigi Rizzo 			continue;
7802ff91c17SVincenzo Maffione 		}
781b6e66be2SVincenzo Maffione 		dst_port = b->bdg_ops.lookup(start_ft, &dst_ring, na, b->private_data);
782f9790aebSLuigi Rizzo 		if (netmap_verbose > 255)
78375f4f3edSVincenzo Maffione 			nm_prlim(5, "slot %d port %d -> %d", i, me, dst_port);
7844f80b14cSVincenzo Maffione 		if (dst_port >= NM_BDG_NOPORT)
785f9790aebSLuigi Rizzo 			continue; /* this packet is identified to be dropped */
786f9790aebSLuigi Rizzo 		else if (dst_port == NM_BDG_BROADCAST)
787f9790aebSLuigi Rizzo 			dst_ring = 0; /* broadcasts always go to ring 0 */
788f9790aebSLuigi Rizzo 		else if (unlikely(dst_port == me ||
789f9790aebSLuigi Rizzo 		    !b->bdg_ports[dst_port]))
790f9790aebSLuigi Rizzo 			continue;
791f9790aebSLuigi Rizzo 
792f9790aebSLuigi Rizzo 		/* get a position in the scratch pad */
793f9790aebSLuigi Rizzo 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
794f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
795f9790aebSLuigi Rizzo 
796f9790aebSLuigi Rizzo 		/* append the first fragment to the list */
797f9790aebSLuigi Rizzo 		if (d->bq_head == NM_FT_NULL) { /* new destination */
798f9790aebSLuigi Rizzo 			d->bq_head = d->bq_tail = i;
799f9790aebSLuigi Rizzo 			/* remember this position to be scanned later */
800f9790aebSLuigi Rizzo 			if (dst_port != NM_BDG_BROADCAST)
801f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
802f9790aebSLuigi Rizzo 		} else {
803f9790aebSLuigi Rizzo 			ft[d->bq_tail].ft_next = i;
804f9790aebSLuigi Rizzo 			d->bq_tail = i;
805f9790aebSLuigi Rizzo 		}
806f9790aebSLuigi Rizzo 		d->bq_len += ft[i].ft_frags;
807f9790aebSLuigi Rizzo 	}
808f9790aebSLuigi Rizzo 
809f9790aebSLuigi Rizzo 	/*
810f9790aebSLuigi Rizzo 	 * Broadcast traffic goes to ring 0 on all destinations.
811f9790aebSLuigi Rizzo 	 * So we need to add these rings to the list of ports to scan.
812f9790aebSLuigi Rizzo 	 */
813f9790aebSLuigi Rizzo 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
814f9790aebSLuigi Rizzo 	if (brddst->bq_head != NM_FT_NULL) {
81537e3a6d3SLuigi Rizzo 		u_int j;
816f9790aebSLuigi Rizzo 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
817f9790aebSLuigi Rizzo 			uint16_t d_i;
818f9790aebSLuigi Rizzo 			i = b->bdg_port_index[j];
819f9790aebSLuigi Rizzo 			if (unlikely(i == me))
820f9790aebSLuigi Rizzo 				continue;
821f9790aebSLuigi Rizzo 			d_i = i * NM_BDG_MAXRINGS;
822f9790aebSLuigi Rizzo 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
823f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
824f9790aebSLuigi Rizzo 		}
825f9790aebSLuigi Rizzo 	}
826f9790aebSLuigi Rizzo 
82775f4f3edSVincenzo Maffione 	nm_prdis(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
8284bf50f18SLuigi Rizzo 	/* second pass: scan destinations */
829f9790aebSLuigi Rizzo 	for (i = 0; i < num_dsts; i++) {
830f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *dst_na;
831f9790aebSLuigi Rizzo 		struct netmap_kring *kring;
832f9790aebSLuigi Rizzo 		struct netmap_ring *ring;
833f0ea3689SLuigi Rizzo 		u_int dst_nr, lim, j, d_i, next, brd_next;
834f9790aebSLuigi Rizzo 		u_int needed, howmany;
835f9790aebSLuigi Rizzo 		int retry = netmap_txsync_retry;
836b6e66be2SVincenzo Maffione 		struct nm_vale_q *d;
837f9790aebSLuigi Rizzo 		uint32_t my_start = 0, lease_idx = 0;
838f9790aebSLuigi Rizzo 		int nrings;
839f0ea3689SLuigi Rizzo 		int virt_hdr_mismatch = 0;
840f9790aebSLuigi Rizzo 
841f9790aebSLuigi Rizzo 		d_i = dsts[i];
84275f4f3edSVincenzo Maffione 		nm_prdis("second pass %d port %d", i, d_i);
843f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
844f9790aebSLuigi Rizzo 		// XXX fix the division
845f9790aebSLuigi Rizzo 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
846f9790aebSLuigi Rizzo 		/* protect from the lookup function returning an inactive
847f9790aebSLuigi Rizzo 		 * destination port
848f9790aebSLuigi Rizzo 		 */
849f9790aebSLuigi Rizzo 		if (unlikely(dst_na == NULL))
850f9790aebSLuigi Rizzo 			goto cleanup;
851f9790aebSLuigi Rizzo 		if (dst_na->up.na_flags & NAF_SW_ONLY)
852f9790aebSLuigi Rizzo 			goto cleanup;
853f9790aebSLuigi Rizzo 		/*
854f9790aebSLuigi Rizzo 		 * The interface may be in !netmap mode in two cases:
855f9790aebSLuigi Rizzo 		 * - when na is attached but not activated yet;
856f9790aebSLuigi Rizzo 		 * - when na is being deactivated but is still attached.
857f9790aebSLuigi Rizzo 		 */
8584bf50f18SLuigi Rizzo 		if (unlikely(!nm_netmap_on(&dst_na->up))) {
85975f4f3edSVincenzo Maffione 			nm_prdis("not in netmap mode!");
860f9790aebSLuigi Rizzo 			goto cleanup;
861f9790aebSLuigi Rizzo 		}
862f9790aebSLuigi Rizzo 
863f9790aebSLuigi Rizzo 		/* there is at least one either unicast or broadcast packet */
864f9790aebSLuigi Rizzo 		brd_next = brddst->bq_head;
865f9790aebSLuigi Rizzo 		next = d->bq_head;
866f9790aebSLuigi Rizzo 		/* we need to reserve this many slots. If fewer are
867f9790aebSLuigi Rizzo 		 * available, some packets will be dropped.
868a6d768d8SVincenzo Maffione 		 * Packets may have multiple fragments, so
869f9790aebSLuigi Rizzo 		 * there is a chance that we may not use all of the slots
870f9790aebSLuigi Rizzo 		 * we have claimed, so we will need to handle the leftover
871f9790aebSLuigi Rizzo 		 * ones when we regain the lock.
872f9790aebSLuigi Rizzo 		 */
873f9790aebSLuigi Rizzo 		needed = d->bq_len + brddst->bq_len;
874f9790aebSLuigi Rizzo 
87537e3a6d3SLuigi Rizzo 		if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
876c3e9b4dbSLuiz Otavio O Souza 			if (netmap_verbose) {
87775f4f3edSVincenzo Maffione 				nm_prlim(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
87837e3a6d3SLuigi Rizzo 						dst_na->up.virt_hdr_len);
879c3e9b4dbSLuiz Otavio O Souza 			}
880f0ea3689SLuigi Rizzo 			/* There is a virtio-net header/offloadings mismatch between
881f0ea3689SLuigi Rizzo 			 * source and destination. The slower mismatch datapath will
882f0ea3689SLuigi Rizzo 			 * be used to cope with all the mismatches.
883f0ea3689SLuigi Rizzo 			 */
884f0ea3689SLuigi Rizzo 			virt_hdr_mismatch = 1;
885f0ea3689SLuigi Rizzo 			if (dst_na->mfs < na->mfs) {
886f0ea3689SLuigi Rizzo 				/* We may need to do segmentation offloadings, and so
887f0ea3689SLuigi Rizzo 				 * we may need a number of destination slots greater
888f0ea3689SLuigi Rizzo 				 * than the number of input slots ('needed').
889f0ea3689SLuigi Rizzo 				 * We look for the smallest integer 'x' which satisfies:
890f0ea3689SLuigi Rizzo 				 *	needed * na->mfs + x * H <= x * na->mfs
891f0ea3689SLuigi Rizzo 				 * where 'H' is the length of the longest header that may
892f0ea3689SLuigi Rizzo 				 * be replicated in the segmentation process (e.g. for
893f0ea3689SLuigi Rizzo 				 * TCPv4 we must account for ethernet header, IP header
894f0ea3689SLuigi Rizzo 				 * and TCPv4 header).
895f0ea3689SLuigi Rizzo 				 */
8964f80b14cSVincenzo Maffione 				KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0"));
897f0ea3689SLuigi Rizzo 				needed = (needed * na->mfs) /
898f0ea3689SLuigi Rizzo 						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
89975f4f3edSVincenzo Maffione 				nm_prdis(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
900f0ea3689SLuigi Rizzo 			}
901f0ea3689SLuigi Rizzo 		}
902f0ea3689SLuigi Rizzo 
90375f4f3edSVincenzo Maffione 		nm_prdis(5, "pass 2 dst %d is %x %s",
90406f6997eSVincenzo Maffione 			i, d_i, nm_is_bwrap(&dst_na->up) ? "nic/host" : "virtual");
905f9790aebSLuigi Rizzo 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
906f9790aebSLuigi Rizzo 		nrings = dst_na->up.num_rx_rings;
907f9790aebSLuigi Rizzo 		if (dst_nr >= nrings)
908f9790aebSLuigi Rizzo 			dst_nr = dst_nr % nrings;
9092ff91c17SVincenzo Maffione 		kring = dst_na->up.rx_rings[dst_nr];
910f9790aebSLuigi Rizzo 		ring = kring->ring;
9114f80b14cSVincenzo Maffione 		/* the destination ring may have not been opened for RX */
9124f80b14cSVincenzo Maffione 		if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON))
9134f80b14cSVincenzo Maffione 			goto cleanup;
914f9790aebSLuigi Rizzo 		lim = kring->nkr_num_slots - 1;
915f9790aebSLuigi Rizzo 
916f9790aebSLuigi Rizzo retry:
917f9790aebSLuigi Rizzo 
918f0ea3689SLuigi Rizzo 		if (dst_na->retry && retry) {
919f0ea3689SLuigi Rizzo 			/* try to get some free slot from the previous run */
920b6e66be2SVincenzo Maffione 			kring->nm_notify(kring, NAF_FORCE_RECLAIM);
9214bf50f18SLuigi Rizzo 			/* actually useful only for bwraps, since there
9224bf50f18SLuigi Rizzo 			 * the notify will trigger a txsync on the hwna. VALE ports
9234bf50f18SLuigi Rizzo 			 * have dst_na->retry == 0
9244bf50f18SLuigi Rizzo 			 */
925f0ea3689SLuigi Rizzo 		}
926f9790aebSLuigi Rizzo 		/* reserve the buffers in the queue and an entry
927f9790aebSLuigi Rizzo 		 * to report completion, and drop lock.
928f9790aebSLuigi Rizzo 		 * XXX this might become a helper function.
929f9790aebSLuigi Rizzo 		 */
930f9790aebSLuigi Rizzo 		mtx_lock(&kring->q_lock);
931f9790aebSLuigi Rizzo 		if (kring->nkr_stopped) {
932f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
933f9790aebSLuigi Rizzo 			goto cleanup;
934f9790aebSLuigi Rizzo 		}
935f9790aebSLuigi Rizzo 		my_start = j = kring->nkr_hwlease;
936f9790aebSLuigi Rizzo 		howmany = nm_kr_space(kring, 1);
937f9790aebSLuigi Rizzo 		if (needed < howmany)
938f9790aebSLuigi Rizzo 			howmany = needed;
939f9790aebSLuigi Rizzo 		lease_idx = nm_kr_lease(kring, howmany, 1);
940f9790aebSLuigi Rizzo 		mtx_unlock(&kring->q_lock);
941f9790aebSLuigi Rizzo 
942f9790aebSLuigi Rizzo 		/* only retry if we need more than available slots */
943f9790aebSLuigi Rizzo 		if (retry && needed <= howmany)
944f9790aebSLuigi Rizzo 			retry = 0;
945f9790aebSLuigi Rizzo 
946f9790aebSLuigi Rizzo 		/* copy to the destination queue */
947f9790aebSLuigi Rizzo 		while (howmany > 0) {
948f9790aebSLuigi Rizzo 			struct netmap_slot *slot;
949f9790aebSLuigi Rizzo 			struct nm_bdg_fwd *ft_p, *ft_end;
950f9790aebSLuigi Rizzo 			u_int cnt;
951f9790aebSLuigi Rizzo 
952f9790aebSLuigi Rizzo 			/* find the queue from which we pick next packet.
953f9790aebSLuigi Rizzo 			 * NM_FT_NULL is always higher than valid indexes
954f9790aebSLuigi Rizzo 			 * so we never dereference it if the other list
955f9790aebSLuigi Rizzo 			 * has packets (and if both are empty we never
956f9790aebSLuigi Rizzo 			 * get here).
957f9790aebSLuigi Rizzo 			 */
958f9790aebSLuigi Rizzo 			if (next < brd_next) {
959f9790aebSLuigi Rizzo 				ft_p = ft + next;
960f9790aebSLuigi Rizzo 				next = ft_p->ft_next;
961f9790aebSLuigi Rizzo 			} else { /* insert broadcast */
962f9790aebSLuigi Rizzo 				ft_p = ft + brd_next;
963f9790aebSLuigi Rizzo 				brd_next = ft_p->ft_next;
964f9790aebSLuigi Rizzo 			}
965f9790aebSLuigi Rizzo 			cnt = ft_p->ft_frags; // cnt > 0
966f9790aebSLuigi Rizzo 			if (unlikely(cnt > howmany))
967f9790aebSLuigi Rizzo 			    break; /* no more space */
968f9790aebSLuigi Rizzo 			if (netmap_verbose && cnt > 1)
96975f4f3edSVincenzo Maffione 				nm_prlim(5, "rx %d frags to %d", cnt, j);
970f9790aebSLuigi Rizzo 			ft_end = ft_p + cnt;
971f0ea3689SLuigi Rizzo 			if (unlikely(virt_hdr_mismatch)) {
972f0ea3689SLuigi Rizzo 				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
973f0ea3689SLuigi Rizzo 			} else {
974f0ea3689SLuigi Rizzo 				howmany -= cnt;
975f9790aebSLuigi Rizzo 				do {
976f9790aebSLuigi Rizzo 					char *dst, *src = ft_p->ft_buf;
977f9790aebSLuigi Rizzo 					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
978a6d768d8SVincenzo Maffione 					uintptr_t src_cb;
979a6d768d8SVincenzo Maffione 					uint64_t dstoff, dstoff_cb;
980a6d768d8SVincenzo Maffione 					int src_co, dst_co;
981a6d768d8SVincenzo Maffione 					const uintptr_t mask = NM_BUF_ALIGN - 1;
982f9790aebSLuigi Rizzo 
983f9790aebSLuigi Rizzo 					slot = &ring->slot[j];
9844bf50f18SLuigi Rizzo 					dst = NMB(&dst_na->up, slot);
985a6d768d8SVincenzo Maffione 					dstoff = nm_get_offset(kring, slot);
986a6d768d8SVincenzo Maffione 					dstoff_cb = dstoff & ~mask;
987a6d768d8SVincenzo Maffione 					src_cb = ((uintptr_t)src) & ~mask;
988a6d768d8SVincenzo Maffione 					src_co = ((uintptr_t)src) & mask;
989a6d768d8SVincenzo Maffione 					dst_co = ((uintptr_t)(dst + dstoff)) & mask;
990a6d768d8SVincenzo Maffione 					if (dst_co < src_co) {
991a6d768d8SVincenzo Maffione 						dstoff_cb += NM_BUF_ALIGN;
992a6d768d8SVincenzo Maffione 					}
993a6d768d8SVincenzo Maffione 					dstoff = dstoff_cb + src_co;
994a6d768d8SVincenzo Maffione 					copy_len += src_co;
995f9790aebSLuigi Rizzo 
99675f4f3edSVincenzo Maffione 					nm_prdis("send [%d] %d(%d) bytes at %s:%d",
99717885a7bSLuigi Rizzo 							i, (int)copy_len, (int)dst_len,
998a6d768d8SVincenzo Maffione 							NM_IFPNAME(dst_ifp), j);
999f9790aebSLuigi Rizzo 
1000a6d768d8SVincenzo Maffione 					if (unlikely(dstoff > NETMAP_BUF_SIZE(&dst_na->up) ||
1001a6d768d8SVincenzo Maffione 				                     dst_len > NETMAP_BUF_SIZE(&dst_na->up) - dstoff)) {
1002a6d768d8SVincenzo Maffione 						nm_prlim(5, "dropping packet/fragment of len %zu, dest offset %llu",
1003a6d768d8SVincenzo Maffione 								dst_len, (unsigned long long)dstoff);
1004a6d768d8SVincenzo Maffione 						copy_len = dst_len = 0;
1005a6d768d8SVincenzo Maffione 						dstoff = nm_get_offset(kring, slot);
1006e31c6ec7SLuigi Rizzo 					}
1007a6d768d8SVincenzo Maffione 
1008f9790aebSLuigi Rizzo 					if (ft_p->ft_flags & NS_INDIRECT) {
1009f9790aebSLuigi Rizzo 						if (copyin(src, dst, copy_len)) {
1010f9790aebSLuigi Rizzo 							// invalid user pointer, pretend len is 0
1011f9790aebSLuigi Rizzo 							dst_len = 0;
1012f9790aebSLuigi Rizzo 						}
1013f9790aebSLuigi Rizzo 					} else {
1014f9790aebSLuigi Rizzo 						//memcpy(dst, src, copy_len);
1015a6d768d8SVincenzo Maffione 						pkt_copy((char *)src_cb, dst + dstoff_cb, (int)copy_len);
1016f9790aebSLuigi Rizzo 					}
1017f9790aebSLuigi Rizzo 					slot->len = dst_len;
1018f9790aebSLuigi Rizzo 					slot->flags = (cnt << 8)| NS_MOREFRAG;
1019a6d768d8SVincenzo Maffione 					nm_write_offset(kring, slot, dstoff);
1020f9790aebSLuigi Rizzo 					j = nm_next(j, lim);
1021f0ea3689SLuigi Rizzo 					needed--;
1022f9790aebSLuigi Rizzo 					ft_p++;
1023f9790aebSLuigi Rizzo 				} while (ft_p != ft_end);
1024f9790aebSLuigi Rizzo 				slot->flags = (cnt << 8); /* clear flag on last entry */
1025f0ea3689SLuigi Rizzo 			}
1026f9790aebSLuigi Rizzo 			/* are we done ? */
1027f9790aebSLuigi Rizzo 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1028f9790aebSLuigi Rizzo 				break;
1029f9790aebSLuigi Rizzo 		}
1030f9790aebSLuigi Rizzo 		{
1031f9790aebSLuigi Rizzo 		    /* current position */
1032f9790aebSLuigi Rizzo 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1033f9790aebSLuigi Rizzo 		    uint32_t update_pos;
1034f9790aebSLuigi Rizzo 		    int still_locked = 1;
1035f9790aebSLuigi Rizzo 
1036f9790aebSLuigi Rizzo 		    mtx_lock(&kring->q_lock);
1037f9790aebSLuigi Rizzo 		    if (unlikely(howmany > 0)) {
1038f9790aebSLuigi Rizzo 			/* not used all bufs. If i am the last one
1039f9790aebSLuigi Rizzo 			 * i can recover the slots, otherwise must
1040f9790aebSLuigi Rizzo 			 * fill them with 0 to mark empty packets.
1041f9790aebSLuigi Rizzo 			 */
104275f4f3edSVincenzo Maffione 			nm_prdis("leftover %d bufs", howmany);
1043f9790aebSLuigi Rizzo 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1044f9790aebSLuigi Rizzo 			    /* yes i am the last one */
104575f4f3edSVincenzo Maffione 			    nm_prdis("roll back nkr_hwlease to %d", j);
1046f9790aebSLuigi Rizzo 			    kring->nkr_hwlease = j;
1047f9790aebSLuigi Rizzo 			} else {
1048f9790aebSLuigi Rizzo 			    while (howmany-- > 0) {
1049f9790aebSLuigi Rizzo 				ring->slot[j].len = 0;
1050f9790aebSLuigi Rizzo 				ring->slot[j].flags = 0;
1051f9790aebSLuigi Rizzo 				j = nm_next(j, lim);
1052f9790aebSLuigi Rizzo 			    }
1053f9790aebSLuigi Rizzo 			}
1054f9790aebSLuigi Rizzo 		    }
1055f9790aebSLuigi Rizzo 		    p[lease_idx] = j; /* report I am done */
1056f9790aebSLuigi Rizzo 
105717885a7bSLuigi Rizzo 		    update_pos = kring->nr_hwtail;
1058f9790aebSLuigi Rizzo 
1059f9790aebSLuigi Rizzo 		    if (my_start == update_pos) {
1060f9790aebSLuigi Rizzo 			/* all slots before my_start have been reported,
1061f9790aebSLuigi Rizzo 			 * so scan subsequent leases to see if other ranges
1062f9790aebSLuigi Rizzo 			 * have been completed, and to a selwakeup or txsync.
1063f9790aebSLuigi Rizzo 		         */
1064f9790aebSLuigi Rizzo 			while (lease_idx != kring->nkr_lease_idx &&
1065f9790aebSLuigi Rizzo 				p[lease_idx] != NR_NOSLOT) {
1066f9790aebSLuigi Rizzo 			    j = p[lease_idx];
1067f9790aebSLuigi Rizzo 			    p[lease_idx] = NR_NOSLOT;
1068f9790aebSLuigi Rizzo 			    lease_idx = nm_next(lease_idx, lim);
1069f9790aebSLuigi Rizzo 			}
1070f9790aebSLuigi Rizzo 			/* j is the new 'write' position. j != my_start
1071f9790aebSLuigi Rizzo 			 * means there are new buffers to report
1072f9790aebSLuigi Rizzo 			 */
1073f9790aebSLuigi Rizzo 			if (likely(j != my_start)) {
107417885a7bSLuigi Rizzo 				kring->nr_hwtail = j;
1075f9790aebSLuigi Rizzo 				still_locked = 0;
1076f9790aebSLuigi Rizzo 				mtx_unlock(&kring->q_lock);
1077847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
10784bf50f18SLuigi Rizzo 				/* this is netmap_notify for VALE ports and
10794bf50f18SLuigi Rizzo 				 * netmap_bwrap_notify for bwrap. The latter will
10804bf50f18SLuigi Rizzo 				 * trigger a txsync on the underlying hwna
10814bf50f18SLuigi Rizzo 				 */
10824bf50f18SLuigi Rizzo 				if (dst_na->retry && retry--) {
10834bf50f18SLuigi Rizzo 					/* XXX this is going to call nm_notify again.
10844bf50f18SLuigi Rizzo 					 * Only useful for bwrap in virtual machines
10854bf50f18SLuigi Rizzo 					 */
1086f9790aebSLuigi Rizzo 					goto retry;
1087f9790aebSLuigi Rizzo 				}
1088f9790aebSLuigi Rizzo 			}
10894bf50f18SLuigi Rizzo 		    }
1090f9790aebSLuigi Rizzo 		    if (still_locked)
1091f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1092f9790aebSLuigi Rizzo 		}
1093f9790aebSLuigi Rizzo cleanup:
1094f9790aebSLuigi Rizzo 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1095f9790aebSLuigi Rizzo 		d->bq_len = 0;
1096f9790aebSLuigi Rizzo 	}
1097f9790aebSLuigi Rizzo 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1098f9790aebSLuigi Rizzo 	brddst->bq_len = 0;
1099f9790aebSLuigi Rizzo 	return 0;
1100f9790aebSLuigi Rizzo }
1101f9790aebSLuigi Rizzo 
11024bf50f18SLuigi Rizzo /* nm_txsync callback for VALE ports */
1103f9790aebSLuigi Rizzo static int
1104b6e66be2SVincenzo Maffione netmap_vale_vp_txsync(struct netmap_kring *kring, int flags)
1105f9790aebSLuigi Rizzo {
11064bf50f18SLuigi Rizzo 	struct netmap_vp_adapter *na =
11074bf50f18SLuigi Rizzo 		(struct netmap_vp_adapter *)kring->na;
110817885a7bSLuigi Rizzo 	u_int done;
110917885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1110847bf383SLuigi Rizzo 	u_int const head = kring->rhead;
1111f9790aebSLuigi Rizzo 
1112f9790aebSLuigi Rizzo 	if (bridge_batch <= 0) { /* testing only */
1113847bf383SLuigi Rizzo 		done = head; // used all
1114f9790aebSLuigi Rizzo 		goto done;
1115f9790aebSLuigi Rizzo 	}
11164bf50f18SLuigi Rizzo 	if (!na->na_bdg) {
1117847bf383SLuigi Rizzo 		done = head;
11184bf50f18SLuigi Rizzo 		goto done;
11194bf50f18SLuigi Rizzo 	}
1120f9790aebSLuigi Rizzo 	if (bridge_batch > NM_BDG_BATCH)
1121f9790aebSLuigi Rizzo 		bridge_batch = NM_BDG_BATCH;
1122f9790aebSLuigi Rizzo 
1123b6e66be2SVincenzo Maffione 	done = nm_vale_preflush(kring, head);
1124f9790aebSLuigi Rizzo done:
1125847bf383SLuigi Rizzo 	if (done != head)
1126b6e66be2SVincenzo Maffione 		nm_prerr("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
112717885a7bSLuigi Rizzo 	/*
112817885a7bSLuigi Rizzo 	 * packets between 'done' and 'cur' are left unsent.
112917885a7bSLuigi Rizzo 	 */
113017885a7bSLuigi Rizzo 	kring->nr_hwcur = done;
113117885a7bSLuigi Rizzo 	kring->nr_hwtail = nm_prev(done, lim);
1132b6e66be2SVincenzo Maffione 	if (netmap_debug & NM_DEBUG_TXSYNC)
1133b6e66be2SVincenzo Maffione 		nm_prinf("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
1134f9790aebSLuigi Rizzo 	return 0;
1135f9790aebSLuigi Rizzo }
1136f9790aebSLuigi Rizzo 
1137f9790aebSLuigi Rizzo 
11384bf50f18SLuigi Rizzo /* create a netmap_vp_adapter that describes a VALE port.
11394bf50f18SLuigi Rizzo  * Only persistent VALE ports have a non-null ifp.
11404bf50f18SLuigi Rizzo  */
11414bf50f18SLuigi Rizzo static int
1142*e330262fSJustin Hibbits netmap_vale_vp_create(struct nmreq_header *hdr, if_t ifp,
11432ff91c17SVincenzo Maffione 		struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret)
1144f9790aebSLuigi Rizzo {
1145cfa866f6SMatt Macy 	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
1146f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
1147f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
1148c3e9b4dbSLuiz Otavio O Souza 	int error = 0;
1149f0ea3689SLuigi Rizzo 	u_int npipes = 0;
11502ff91c17SVincenzo Maffione 	u_int extrabufs = 0;
11512ff91c17SVincenzo Maffione 
11522ff91c17SVincenzo Maffione 	if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
11532ff91c17SVincenzo Maffione 		return EINVAL;
11542ff91c17SVincenzo Maffione 	}
1155f9790aebSLuigi Rizzo 
1156c3e9b4dbSLuiz Otavio O Souza 	vpna = nm_os_malloc(sizeof(*vpna));
1157f9790aebSLuigi Rizzo 	if (vpna == NULL)
1158f9790aebSLuigi Rizzo 		return ENOMEM;
1159f9790aebSLuigi Rizzo 
1160f9790aebSLuigi Rizzo  	na = &vpna->up;
1161f9790aebSLuigi Rizzo 
1162f9790aebSLuigi Rizzo 	na->ifp = ifp;
1163b6e66be2SVincenzo Maffione 	strlcpy(na->name, hdr->nr_name, sizeof(na->name));
1164f9790aebSLuigi Rizzo 
1165f9790aebSLuigi Rizzo 	/* bound checking */
11662ff91c17SVincenzo Maffione 	na->num_tx_rings = req->nr_tx_rings;
1167f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
11682ff91c17SVincenzo Maffione 	req->nr_tx_rings = na->num_tx_rings; /* write back */
11692ff91c17SVincenzo Maffione 	na->num_rx_rings = req->nr_rx_rings;
1170f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
11712ff91c17SVincenzo Maffione 	req->nr_rx_rings = na->num_rx_rings; /* write back */
11722ff91c17SVincenzo Maffione 	nm_bound_var(&req->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1173f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
11742ff91c17SVincenzo Maffione 	na->num_tx_desc = req->nr_tx_slots;
11752ff91c17SVincenzo Maffione 	nm_bound_var(&req->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1176f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
1177f0ea3689SLuigi Rizzo 	/* validate number of pipes. We want at least 1,
1178f0ea3689SLuigi Rizzo 	 * but probably can do with some more.
1179f0ea3689SLuigi Rizzo 	 * So let's use 2 as default (when 0 is supplied)
1180f0ea3689SLuigi Rizzo 	 */
1181f0ea3689SLuigi Rizzo 	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1182f0ea3689SLuigi Rizzo 	/* validate extra bufs */
1183b6e66be2SVincenzo Maffione 	extrabufs = req->nr_extra_bufs;
11842ff91c17SVincenzo Maffione 	nm_bound_var(&extrabufs, 0, 0,
1185f0ea3689SLuigi Rizzo 			128*NM_BDG_MAXSLOTS, NULL);
11862ff91c17SVincenzo Maffione 	req->nr_extra_bufs = extrabufs; /* write back */
11872ff91c17SVincenzo Maffione 	na->num_rx_desc = req->nr_rx_slots;
11884f80b14cSVincenzo Maffione 	/* Set the mfs to a default value, as it is needed on the VALE
11894f80b14cSVincenzo Maffione 	 * mismatch datapath. XXX We should set it according to the MTU
11904f80b14cSVincenzo Maffione 	 * known to the kernel. */
11914f80b14cSVincenzo Maffione 	vpna->mfs = NM_BDG_MFS_DEFAULT;
1192847bf383SLuigi Rizzo 	vpna->last_smac = ~0llu;
1193f0ea3689SLuigi Rizzo 	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1194f0ea3689SLuigi Rizzo 		vpna->mfs = netmap_buf_size; */
1195f0ea3689SLuigi Rizzo 	if (netmap_verbose)
1196b6e66be2SVincenzo Maffione 		nm_prinf("max frame size %u", vpna->mfs);
1197f9790aebSLuigi Rizzo 
1198a6d768d8SVincenzo Maffione 	na->na_flags |= (NAF_BDG_MAYSLEEP | NAF_OFFSETS);
119910b8ef3dSLuigi Rizzo 	/* persistent VALE ports look like hw devices
120010b8ef3dSLuigi Rizzo 	 * with a native netmap adapter
120110b8ef3dSLuigi Rizzo 	 */
120210b8ef3dSLuigi Rizzo 	if (ifp)
120310b8ef3dSLuigi Rizzo 		na->na_flags |= NAF_NATIVE;
1204b6e66be2SVincenzo Maffione 	na->nm_txsync = netmap_vale_vp_txsync;
1205b6e66be2SVincenzo Maffione 	na->nm_rxsync = netmap_vp_rxsync; /* use the one provided by bdg */
1206b6e66be2SVincenzo Maffione 	na->nm_register = netmap_vp_reg;  /* use the one provided by bdg */
1207b6e66be2SVincenzo Maffione 	na->nm_krings_create = netmap_vale_vp_krings_create;
1208b6e66be2SVincenzo Maffione 	na->nm_krings_delete = netmap_vale_vp_krings_delete;
1209b6e66be2SVincenzo Maffione 	na->nm_dtor = netmap_vale_vp_dtor;
121075f4f3edSVincenzo Maffione 	nm_prdis("nr_mem_id %d", req->nr_mem_id);
1211c3e9b4dbSLuiz Otavio O Souza 	na->nm_mem = nmd ?
1212c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_get(nmd):
1213c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_private_new(
1214f9790aebSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
1215f0ea3689SLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc,
12162ff91c17SVincenzo Maffione 			req->nr_extra_bufs, npipes, &error);
1217f0ea3689SLuigi Rizzo 	if (na->nm_mem == NULL)
1218f0ea3689SLuigi Rizzo 		goto err;
1219b6e66be2SVincenzo Maffione 	na->nm_bdg_attach = netmap_vale_vp_bdg_attach;
1220f9790aebSLuigi Rizzo 	/* other nmd fields are set in the common routine */
1221f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
1222f0ea3689SLuigi Rizzo 	if (error)
1223f0ea3689SLuigi Rizzo 		goto err;
12244bf50f18SLuigi Rizzo 	*ret = vpna;
1225f0ea3689SLuigi Rizzo 	return 0;
1226f0ea3689SLuigi Rizzo 
1227f0ea3689SLuigi Rizzo err:
1228f0ea3689SLuigi Rizzo 	if (na->nm_mem != NULL)
1229c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_put(na->nm_mem);
1230c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(vpna);
1231f9790aebSLuigi Rizzo 	return error;
1232f9790aebSLuigi Rizzo }
1233f9790aebSLuigi Rizzo 
12342a7db7a6SVincenzo Maffione /* nm_bdg_attach callback for VALE ports
12352a7db7a6SVincenzo Maffione  * The na_vp port is this same netmap_adapter. There is no host port.
1236f9790aebSLuigi Rizzo  */
1237f9790aebSLuigi Rizzo static int
1238b6e66be2SVincenzo Maffione netmap_vale_vp_bdg_attach(const char *name, struct netmap_adapter *na,
12392a7db7a6SVincenzo Maffione 		struct nm_bridge *b)
1240f9790aebSLuigi Rizzo {
12412a7db7a6SVincenzo Maffione 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
1242f9790aebSLuigi Rizzo 
1243b6e66be2SVincenzo Maffione 	if ((b->bdg_flags & NM_BDG_NEED_BWRAP) || vpna->na_bdg) {
12442a7db7a6SVincenzo Maffione 		return NM_NEED_BWRAP;
1245f9790aebSLuigi Rizzo 	}
12462a7db7a6SVincenzo Maffione 	na->na_vp = vpna;
1247b6e66be2SVincenzo Maffione 	strlcpy(na->name, name, sizeof(na->name));
12482a7db7a6SVincenzo Maffione 	na->na_hostvp = NULL;
1249f9790aebSLuigi Rizzo 	return 0;
1250f9790aebSLuigi Rizzo }
1251f9790aebSLuigi Rizzo 
1252f9790aebSLuigi Rizzo static int
12532a7db7a6SVincenzo Maffione netmap_vale_bwrap_krings_create(struct netmap_adapter *na)
1254f9790aebSLuigi Rizzo {
1255cfa866f6SMatt Macy 	int error;
1256f9790aebSLuigi Rizzo 
12574bf50f18SLuigi Rizzo 	/* impersonate a netmap_vp_adapter */
1258b6e66be2SVincenzo Maffione 	error = netmap_vale_vp_krings_create(na);
1259f9790aebSLuigi Rizzo 	if (error)
1260f9790aebSLuigi Rizzo 		return error;
12612a7db7a6SVincenzo Maffione 	error = netmap_bwrap_krings_create_common(na);
1262f9790aebSLuigi Rizzo 	if (error) {
1263b6e66be2SVincenzo Maffione 		netmap_vale_vp_krings_delete(na);
12642a7db7a6SVincenzo Maffione 	}
126537e3a6d3SLuigi Rizzo 	return error;
1266f9790aebSLuigi Rizzo }
1267f9790aebSLuigi Rizzo 
1268f9790aebSLuigi Rizzo static void
12692a7db7a6SVincenzo Maffione netmap_vale_bwrap_krings_delete(struct netmap_adapter *na)
1270f9790aebSLuigi Rizzo {
12712a7db7a6SVincenzo Maffione 	netmap_bwrap_krings_delete_common(na);
1272b6e66be2SVincenzo Maffione 	netmap_vale_vp_krings_delete(na);
1273f9790aebSLuigi Rizzo }
1274f9790aebSLuigi Rizzo 
1275f9790aebSLuigi Rizzo static int
12762a7db7a6SVincenzo Maffione netmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
1277f9790aebSLuigi Rizzo {
1278f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
12794bf50f18SLuigi Rizzo 	struct netmap_adapter *na = NULL;
12804bf50f18SLuigi Rizzo 	struct netmap_adapter *hostna = NULL;
12812a7db7a6SVincenzo Maffione 	int error;
1282f9790aebSLuigi Rizzo 
1283c3e9b4dbSLuiz Otavio O Souza 	bna = nm_os_malloc(sizeof(*bna));
12844bf50f18SLuigi Rizzo 	if (bna == NULL) {
1285f9790aebSLuigi Rizzo 		return ENOMEM;
12864bf50f18SLuigi Rizzo 	}
1287f9790aebSLuigi Rizzo 	na = &bna->up.up;
1288b6e66be2SVincenzo Maffione 	strlcpy(na->name, nr_name, sizeof(na->name));
128937e3a6d3SLuigi Rizzo 	na->nm_register = netmap_bwrap_reg;
1290b6e66be2SVincenzo Maffione 	na->nm_txsync = netmap_vale_vp_txsync;
1291f9790aebSLuigi Rizzo 	// na->nm_rxsync = netmap_bwrap_rxsync;
12922a7db7a6SVincenzo Maffione 	na->nm_krings_create = netmap_vale_bwrap_krings_create;
12932a7db7a6SVincenzo Maffione 	na->nm_krings_delete = netmap_vale_bwrap_krings_delete;
1294f9790aebSLuigi Rizzo 	na->nm_notify = netmap_bwrap_notify;
1295a6d768d8SVincenzo Maffione 	bna->nm_intr_notify = netmap_bwrap_intr_notify;
1296f9790aebSLuigi Rizzo 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
12974f80b14cSVincenzo Maffione 	/* Set the mfs, needed on the VALE mismatch datapath. */
12984f80b14cSVincenzo Maffione 	bna->up.mfs = NM_BDG_MFS_DEFAULT;
1299f9790aebSLuigi Rizzo 
1300f0ea3689SLuigi Rizzo 	if (hwna->na_flags & NAF_HOST_RINGS) {
1301f9790aebSLuigi Rizzo 		hostna = &bna->host.up;
1302847bf383SLuigi Rizzo 		hostna->nm_notify = netmap_bwrap_notify;
13034f80b14cSVincenzo Maffione 		bna->host.mfs = NM_BDG_MFS_DEFAULT;
1304f0ea3689SLuigi Rizzo 	}
1305f9790aebSLuigi Rizzo 
13062a7db7a6SVincenzo Maffione 	error = netmap_bwrap_attach_common(na, hwna);
1307f9790aebSLuigi Rizzo 	if (error) {
1308c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(bna);
13092a7db7a6SVincenzo Maffione 	}
1310f9790aebSLuigi Rizzo 	return error;
1311847bf383SLuigi Rizzo }
1312847bf383SLuigi Rizzo 
1313847bf383SLuigi Rizzo int
13142a7db7a6SVincenzo Maffione netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na,
13152a7db7a6SVincenzo Maffione 		struct netmap_mem_d *nmd, int create)
1316847bf383SLuigi Rizzo {
13172a7db7a6SVincenzo Maffione 	return netmap_get_bdg_na(hdr, na, nmd, create, &vale_bdg_ops);
1318847bf383SLuigi Rizzo }
1319847bf383SLuigi Rizzo 
13202a7db7a6SVincenzo Maffione 
13212a7db7a6SVincenzo Maffione /* creates a persistent VALE port */
13222a7db7a6SVincenzo Maffione int
13232a7db7a6SVincenzo Maffione nm_vi_create(struct nmreq_header *hdr)
1324847bf383SLuigi Rizzo {
13252a7db7a6SVincenzo Maffione 	struct nmreq_vale_newif *req =
13262a7db7a6SVincenzo Maffione 		(struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body;
13272a7db7a6SVincenzo Maffione 	int error = 0;
13282a7db7a6SVincenzo Maffione 	/* Build a nmreq_register out of the nmreq_vale_newif,
13292a7db7a6SVincenzo Maffione 	 * so that we can call netmap_get_bdg_na(). */
13302a7db7a6SVincenzo Maffione 	struct nmreq_register regreq;
13312a7db7a6SVincenzo Maffione 	bzero(&regreq, sizeof(regreq));
13322a7db7a6SVincenzo Maffione 	regreq.nr_tx_slots = req->nr_tx_slots;
13332a7db7a6SVincenzo Maffione 	regreq.nr_rx_slots = req->nr_rx_slots;
13342a7db7a6SVincenzo Maffione 	regreq.nr_tx_rings = req->nr_tx_rings;
13352a7db7a6SVincenzo Maffione 	regreq.nr_rx_rings = req->nr_rx_rings;
13362a7db7a6SVincenzo Maffione 	regreq.nr_mem_id = req->nr_mem_id;
13372a7db7a6SVincenzo Maffione 	hdr->nr_reqtype = NETMAP_REQ_REGISTER;
13382a7db7a6SVincenzo Maffione 	hdr->nr_body = (uintptr_t)&regreq;
13392a7db7a6SVincenzo Maffione 	error = netmap_vi_create(hdr, 0 /* no autodelete */);
13402a7db7a6SVincenzo Maffione 	hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
13412a7db7a6SVincenzo Maffione 	hdr->nr_body = (uintptr_t)req;
13422a7db7a6SVincenzo Maffione 	/* Write back to the original struct. */
13432a7db7a6SVincenzo Maffione 	req->nr_tx_slots = regreq.nr_tx_slots;
13442a7db7a6SVincenzo Maffione 	req->nr_rx_slots = regreq.nr_rx_slots;
13452a7db7a6SVincenzo Maffione 	req->nr_tx_rings = regreq.nr_tx_rings;
13462a7db7a6SVincenzo Maffione 	req->nr_rx_rings = regreq.nr_rx_rings;
13472a7db7a6SVincenzo Maffione 	req->nr_mem_id = regreq.nr_mem_id;
13482a7db7a6SVincenzo Maffione 	return error;
1349f9790aebSLuigi Rizzo }
13502a7db7a6SVincenzo Maffione 
13512a7db7a6SVincenzo Maffione /* remove a persistent VALE port from the system */
13522a7db7a6SVincenzo Maffione int
13532a7db7a6SVincenzo Maffione nm_vi_destroy(const char *name)
13542a7db7a6SVincenzo Maffione {
1355*e330262fSJustin Hibbits 	if_t ifp;
13562a7db7a6SVincenzo Maffione 	struct netmap_vp_adapter *vpna;
13572a7db7a6SVincenzo Maffione 	int error;
13582a7db7a6SVincenzo Maffione 
13592a7db7a6SVincenzo Maffione 	ifp = ifunit_ref(name);
13602a7db7a6SVincenzo Maffione 	if (!ifp)
13612a7db7a6SVincenzo Maffione 		return ENXIO;
13622a7db7a6SVincenzo Maffione 	NMG_LOCK();
13632a7db7a6SVincenzo Maffione 	/* make sure this is actually a VALE port */
13642a7db7a6SVincenzo Maffione 	if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
13652a7db7a6SVincenzo Maffione 		error = EINVAL;
13662a7db7a6SVincenzo Maffione 		goto err;
13672a7db7a6SVincenzo Maffione 	}
13682a7db7a6SVincenzo Maffione 
13692a7db7a6SVincenzo Maffione 	vpna = (struct netmap_vp_adapter *)NA(ifp);
13702a7db7a6SVincenzo Maffione 
13712a7db7a6SVincenzo Maffione 	/* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
13722a7db7a6SVincenzo Maffione 	if (vpna->autodelete) {
13732a7db7a6SVincenzo Maffione 		error = EINVAL;
13742a7db7a6SVincenzo Maffione 		goto err;
13752a7db7a6SVincenzo Maffione 	}
13762a7db7a6SVincenzo Maffione 
137745c67e8fSVincenzo Maffione 	/* also make sure that nobody is using the interface */
13782a7db7a6SVincenzo Maffione 	if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
13792a7db7a6SVincenzo Maffione 	    vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
13802a7db7a6SVincenzo Maffione 		error = EBUSY;
13812a7db7a6SVincenzo Maffione 		goto err;
13822a7db7a6SVincenzo Maffione 	}
13832a7db7a6SVincenzo Maffione 
13842a7db7a6SVincenzo Maffione 	NMG_UNLOCK();
13852a7db7a6SVincenzo Maffione 
1386b6e66be2SVincenzo Maffione 	if (netmap_verbose)
1387*e330262fSJustin Hibbits 		nm_prinf("destroying a persistent vale interface %s", if_name(ifp));
13882a7db7a6SVincenzo Maffione 	/* Linux requires all the references are released
13892a7db7a6SVincenzo Maffione 	 * before unregister
13902a7db7a6SVincenzo Maffione 	 */
13912a7db7a6SVincenzo Maffione 	netmap_detach(ifp);
13922a7db7a6SVincenzo Maffione 	if_rele(ifp);
13932a7db7a6SVincenzo Maffione 	nm_os_vi_detach(ifp);
13942a7db7a6SVincenzo Maffione 	return 0;
13952a7db7a6SVincenzo Maffione 
13962a7db7a6SVincenzo Maffione err:
13972a7db7a6SVincenzo Maffione 	NMG_UNLOCK();
13982a7db7a6SVincenzo Maffione 	if_rele(ifp);
13992a7db7a6SVincenzo Maffione 	return error;
14002a7db7a6SVincenzo Maffione }
14012a7db7a6SVincenzo Maffione 
14022a7db7a6SVincenzo Maffione static int
14032a7db7a6SVincenzo Maffione nm_update_info(struct nmreq_register *req, struct netmap_adapter *na)
14042a7db7a6SVincenzo Maffione {
14052a7db7a6SVincenzo Maffione 	req->nr_rx_rings = na->num_rx_rings;
14062a7db7a6SVincenzo Maffione 	req->nr_tx_rings = na->num_tx_rings;
14072a7db7a6SVincenzo Maffione 	req->nr_rx_slots = na->num_rx_desc;
14082a7db7a6SVincenzo Maffione 	req->nr_tx_slots = na->num_tx_desc;
14092a7db7a6SVincenzo Maffione 	return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL,
14102a7db7a6SVincenzo Maffione 					&req->nr_mem_id);
14112a7db7a6SVincenzo Maffione }
14122a7db7a6SVincenzo Maffione 
14132a7db7a6SVincenzo Maffione 
14142a7db7a6SVincenzo Maffione /*
14152a7db7a6SVincenzo Maffione  * Create a virtual interface registered to the system.
14162a7db7a6SVincenzo Maffione  * The interface will be attached to a bridge later.
14172a7db7a6SVincenzo Maffione  */
14182a7db7a6SVincenzo Maffione int
14192a7db7a6SVincenzo Maffione netmap_vi_create(struct nmreq_header *hdr, int autodelete)
14202a7db7a6SVincenzo Maffione {
14212a7db7a6SVincenzo Maffione 	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
1422*e330262fSJustin Hibbits 	if_t ifp;
14232a7db7a6SVincenzo Maffione 	struct netmap_vp_adapter *vpna;
14242a7db7a6SVincenzo Maffione 	struct netmap_mem_d *nmd = NULL;
14252a7db7a6SVincenzo Maffione 	int error;
14262a7db7a6SVincenzo Maffione 
14272a7db7a6SVincenzo Maffione 	if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
14282a7db7a6SVincenzo Maffione 		return EINVAL;
14292a7db7a6SVincenzo Maffione 	}
14302a7db7a6SVincenzo Maffione 
14312a7db7a6SVincenzo Maffione 	/* don't include VALE prefix */
14322a7db7a6SVincenzo Maffione 	if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
14332a7db7a6SVincenzo Maffione 		return EINVAL;
14342a7db7a6SVincenzo Maffione 	if (strlen(hdr->nr_name) >= IFNAMSIZ) {
14352a7db7a6SVincenzo Maffione 		return EINVAL;
14362a7db7a6SVincenzo Maffione 	}
14372a7db7a6SVincenzo Maffione 	ifp = ifunit_ref(hdr->nr_name);
14382a7db7a6SVincenzo Maffione 	if (ifp) { /* already exist, cannot create new one */
14392a7db7a6SVincenzo Maffione 		error = EEXIST;
14402a7db7a6SVincenzo Maffione 		NMG_LOCK();
14412a7db7a6SVincenzo Maffione 		if (NM_NA_VALID(ifp)) {
14422a7db7a6SVincenzo Maffione 			int update_err = nm_update_info(req, NA(ifp));
14432a7db7a6SVincenzo Maffione 			if (update_err)
14442a7db7a6SVincenzo Maffione 				error = update_err;
14452a7db7a6SVincenzo Maffione 		}
14462a7db7a6SVincenzo Maffione 		NMG_UNLOCK();
14472a7db7a6SVincenzo Maffione 		if_rele(ifp);
14482a7db7a6SVincenzo Maffione 		return error;
14492a7db7a6SVincenzo Maffione 	}
14502a7db7a6SVincenzo Maffione 	error = nm_os_vi_persist(hdr->nr_name, &ifp);
14512a7db7a6SVincenzo Maffione 	if (error)
14522a7db7a6SVincenzo Maffione 		return error;
14532a7db7a6SVincenzo Maffione 
14542a7db7a6SVincenzo Maffione 	NMG_LOCK();
14552a7db7a6SVincenzo Maffione 	if (req->nr_mem_id) {
14562a7db7a6SVincenzo Maffione 		nmd = netmap_mem_find(req->nr_mem_id);
14572a7db7a6SVincenzo Maffione 		if (nmd == NULL) {
14582a7db7a6SVincenzo Maffione 			error = EINVAL;
14592a7db7a6SVincenzo Maffione 			goto err_1;
14602a7db7a6SVincenzo Maffione 		}
14612a7db7a6SVincenzo Maffione 	}
14622a7db7a6SVincenzo Maffione 	/* netmap_vp_create creates a struct netmap_vp_adapter */
1463b6e66be2SVincenzo Maffione 	error = netmap_vale_vp_create(hdr, ifp, nmd, &vpna);
14642a7db7a6SVincenzo Maffione 	if (error) {
1465b6e66be2SVincenzo Maffione 		if (netmap_debug & NM_DEBUG_VALE)
1466b6e66be2SVincenzo Maffione 			nm_prerr("error %d", error);
14672a7db7a6SVincenzo Maffione 		goto err_1;
14682a7db7a6SVincenzo Maffione 	}
14692a7db7a6SVincenzo Maffione 	/* persist-specific routines */
14702a7db7a6SVincenzo Maffione 	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
14712a7db7a6SVincenzo Maffione 	if (!autodelete) {
14722a7db7a6SVincenzo Maffione 		netmap_adapter_get(&vpna->up);
14732a7db7a6SVincenzo Maffione 	} else {
14742a7db7a6SVincenzo Maffione 		vpna->autodelete = 1;
14752a7db7a6SVincenzo Maffione 	}
14762a7db7a6SVincenzo Maffione 	NM_ATTACH_NA(ifp, &vpna->up);
14772a7db7a6SVincenzo Maffione 	/* return the updated info */
14782a7db7a6SVincenzo Maffione 	error = nm_update_info(req, &vpna->up);
14792a7db7a6SVincenzo Maffione 	if (error) {
14802a7db7a6SVincenzo Maffione 		goto err_2;
14812a7db7a6SVincenzo Maffione 	}
148275f4f3edSVincenzo Maffione 	nm_prdis("returning nr_mem_id %d", req->nr_mem_id);
14832a7db7a6SVincenzo Maffione 	if (nmd)
14842a7db7a6SVincenzo Maffione 		netmap_mem_put(nmd);
14852a7db7a6SVincenzo Maffione 	NMG_UNLOCK();
1486*e330262fSJustin Hibbits 	nm_prdis("created %s", if_name(ifp));
14872a7db7a6SVincenzo Maffione 	return 0;
14882a7db7a6SVincenzo Maffione 
14892a7db7a6SVincenzo Maffione err_2:
14902a7db7a6SVincenzo Maffione 	netmap_detach(ifp);
14912a7db7a6SVincenzo Maffione err_1:
14922a7db7a6SVincenzo Maffione 	if (nmd)
14932a7db7a6SVincenzo Maffione 		netmap_mem_put(nmd);
14942a7db7a6SVincenzo Maffione 	NMG_UNLOCK();
14952a7db7a6SVincenzo Maffione 	nm_os_vi_detach(ifp);
14962a7db7a6SVincenzo Maffione 
14972a7db7a6SVincenzo Maffione 	return error;
14982a7db7a6SVincenzo Maffione }
14992a7db7a6SVincenzo Maffione 
1500f9790aebSLuigi Rizzo #endif /* WITH_VALE */
1501