xref: /freebsd-12.1/sys/dev/netmap/netmap_vale.c (revision 847bf383)
1 /*
2  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *   1. Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *   2. Redistributions in binary form must reproduce the above copyright
10  *      notice, this list of conditions and the following disclaimer in the
11  *      documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 
27 /*
28  * This module implements the VALE switch for netmap
29 
30 --- VALE SWITCH ---
31 
32 NMG_LOCK() serializes all modifications to switches and ports.
33 A switch cannot be deleted until all ports are gone.
34 
35 For each switch, an SX lock (RWlock on linux) protects
36 deletion of ports. When configuring or deleting a new port, the
37 lock is acquired in exclusive mode (after holding NMG_LOCK).
38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39 The lock is held throughout the entire forwarding cycle,
40 during which the thread may incur in a page fault.
41 Hence it is important that sleepable shared locks are used.
42 
43 On the rx ring, the per-port lock is grabbed initially to reserve
44 a number of slot in the ring, then the lock is released,
45 packets are copied from source to destination, and then
46 the lock is acquired again and the receive ring is updated.
47 (A similar thing is done on the tx ring for NIC and host stack
48 ports attached to the switch)
49 
50  */
51 
52 /*
53  * OS-specific code that is used only within this file.
54  * Other OS-specific code that must be accessed by drivers
55  * is present in netmap_kern.h
56  */
57 
58 #if defined(__FreeBSD__)
59 #include <sys/cdefs.h> /* prerequisite */
60 __FBSDID("$FreeBSD$");
61 
62 #include <sys/types.h>
63 #include <sys/errno.h>
64 #include <sys/param.h>	/* defines used in kernel.h */
65 #include <sys/kernel.h>	/* types used in module initialization */
66 #include <sys/conf.h>	/* cdevsw struct, UID, GID */
67 #include <sys/sockio.h>
68 #include <sys/socketvar.h>	/* struct socket */
69 #include <sys/malloc.h>
70 #include <sys/poll.h>
71 #include <sys/rwlock.h>
72 #include <sys/socket.h> /* sockaddrs */
73 #include <sys/selinfo.h>
74 #include <sys/sysctl.h>
75 #include <net/if.h>
76 #include <net/if_var.h>
77 #include <net/bpf.h>		/* BIOCIMMEDIATE */
78 #include <machine/bus.h>	/* bus_dmamap_* */
79 #include <sys/endian.h>
80 #include <sys/refcount.h>
81 
82 
83 #define BDG_RWLOCK_T		struct rwlock // struct rwlock
84 
85 #define	BDG_RWINIT(b)		\
86 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87 #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88 #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89 #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90 #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91 #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92 #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93 
94 
95 #elif defined(linux)
96 
97 #include "bsd_glue.h"
98 
99 #elif defined(__APPLE__)
100 
101 #warning OSX support is only partial
102 #include "osx_glue.h"
103 
104 #else
105 
106 #error	Unsupported platform
107 
108 #endif /* unsupported */
109 
110 /*
111  * common headers
112  */
113 
114 #include <net/netmap.h>
115 #include <dev/netmap/netmap_kern.h>
116 #include <dev/netmap/netmap_mem2.h>
117 
118 #ifdef WITH_VALE
119 
120 /*
121  * system parameters (most of them in netmap_kern.h)
122  * NM_NAME	prefix for switch port names, default "vale"
123  * NM_BDG_MAXPORTS	number of ports
124  * NM_BRIDGES	max number of switches in the system.
125  *	XXX should become a sysctl or tunable
126  *
127  * Switch ports are named valeX:Y where X is the switch name and Y
128  * is the port. If Y matches a physical interface name, the port is
129  * connected to a physical device.
130  *
131  * Unlike physical interfaces, switch ports use their own memory region
132  * for rings and buffers.
133  * The virtual interfaces use per-queue lock instead of core lock.
134  * In the tx loop, we aggregate traffic in batches to make all operations
135  * faster. The batch size is bridge_batch.
136  */
137 #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
138 #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
139 #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
140 #define NM_BDG_HASH		1024	/* forwarding table entries */
141 #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
142 #define NM_MULTISEG		64	/* max size of a chain of bufs */
143 /* actual size of the tables */
144 #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
145 /* NM_FT_NULL terminates a list of slots in the ft */
146 #define NM_FT_NULL		NM_BDG_BATCH_MAX
147 #define	NM_BRIDGES		8	/* number of bridges */
148 
149 
150 /*
151  * bridge_batch is set via sysctl to the max batch size to be
152  * used in the bridge. The actual value may be larger as the
153  * last packet in the block may overflow the size.
154  */
155 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156 SYSCTL_DECL(_dev_netmap);
157 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
158 
159 
160 static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
161 static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
162 static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
163 
164 /*
165  * For each output interface, nm_bdg_q is used to construct a list.
166  * bq_len is the number of output buffers (we can have coalescing
167  * during the copy).
168  */
169 struct nm_bdg_q {
170 	uint16_t bq_head;
171 	uint16_t bq_tail;
172 	uint32_t bq_len;	/* number of buffers */
173 };
174 
175 /* XXX revise this */
176 struct nm_hash_ent {
177 	uint64_t	mac;	/* the top 2 bytes are the epoch */
178 	uint64_t	ports;
179 };
180 
181 /*
182  * nm_bridge is a descriptor for a VALE switch.
183  * Interfaces for a bridge are all in bdg_ports[].
184  * The array has fixed size, an empty entry does not terminate
185  * the search, but lookups only occur on attach/detach so we
186  * don't mind if they are slow.
187  *
188  * The bridge is non blocking on the transmit ports: excess
189  * packets are dropped if there is no room on the output port.
190  *
191  * bdg_lock protects accesses to the bdg_ports array.
192  * This is a rw lock (or equivalent).
193  */
194 struct nm_bridge {
195 	/* XXX what is the proper alignment/layout ? */
196 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
197 	int		bdg_namelen;
198 	uint32_t	bdg_active_ports; /* 0 means free */
199 	char		bdg_basename[IFNAMSIZ];
200 
201 	/* Indexes of active ports (up to active_ports)
202 	 * and all other remaining ports.
203 	 */
204 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
205 
206 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
207 
208 
209 	/*
210 	 * The function to decide the destination port.
211 	 * It returns either of an index of the destination port,
212 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
213 	 * forward this packet.  ring_nr is the source ring index, and the
214 	 * function may overwrite this value to forward this packet to a
215 	 * different ring index.
216 	 * This function must be set by netmap_bdgctl().
217 	 */
218 	struct netmap_bdg_ops bdg_ops;
219 
220 	/* the forwarding table, MAC+ports.
221 	 * XXX should be changed to an argument to be passed to
222 	 * the lookup function, and allocated on attach
223 	 */
224 	struct nm_hash_ent ht[NM_BDG_HASH];
225 
226 #ifdef CONFIG_NET_NS
227 	struct net *ns;
228 #endif /* CONFIG_NET_NS */
229 };
230 
231 const char*
232 netmap_bdg_name(struct netmap_vp_adapter *vp)
233 {
234 	struct nm_bridge *b = vp->na_bdg;
235 	if (b == NULL)
236 		return NULL;
237 	return b->bdg_basename;
238 }
239 
240 
241 #ifndef CONFIG_NET_NS
242 /*
243  * XXX in principle nm_bridges could be created dynamically
244  * Right now we have a static array and deletions are protected
245  * by an exclusive lock.
246  */
247 struct nm_bridge *nm_bridges;
248 #endif /* !CONFIG_NET_NS */
249 
250 
251 /*
252  * this is a slightly optimized copy routine which rounds
253  * to multiple of 64 bytes and is often faster than dealing
254  * with other odd sizes. We assume there is enough room
255  * in the source and destination buffers.
256  *
257  * XXX only for multiples of 64 bytes, non overlapped.
258  */
259 static inline void
260 pkt_copy(void *_src, void *_dst, int l)
261 {
262         uint64_t *src = _src;
263         uint64_t *dst = _dst;
264         if (unlikely(l >= 1024)) {
265                 memcpy(dst, src, l);
266                 return;
267         }
268         for (; likely(l > 0); l-=64) {
269                 *dst++ = *src++;
270                 *dst++ = *src++;
271                 *dst++ = *src++;
272                 *dst++ = *src++;
273                 *dst++ = *src++;
274                 *dst++ = *src++;
275                 *dst++ = *src++;
276                 *dst++ = *src++;
277         }
278 }
279 
280 
281 /*
282  * locate a bridge among the existing ones.
283  * MUST BE CALLED WITH NMG_LOCK()
284  *
285  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
286  * We assume that this is called with a name of at least NM_NAME chars.
287  */
288 static struct nm_bridge *
289 nm_find_bridge(const char *name, int create)
290 {
291 	int i, l, namelen;
292 	struct nm_bridge *b = NULL, *bridges;
293 	u_int num_bridges;
294 
295 	NMG_LOCK_ASSERT();
296 
297 	netmap_bns_getbridges(&bridges, &num_bridges);
298 
299 	namelen = strlen(NM_NAME);	/* base length */
300 	l = name ? strlen(name) : 0;		/* actual length */
301 	if (l < namelen) {
302 		D("invalid bridge name %s", name ? name : NULL);
303 		return NULL;
304 	}
305 	for (i = namelen + 1; i < l; i++) {
306 		if (name[i] == ':') {
307 			namelen = i;
308 			break;
309 		}
310 	}
311 	if (namelen >= IFNAMSIZ)
312 		namelen = IFNAMSIZ;
313 	ND("--- prefix is '%.*s' ---", namelen, name);
314 
315 	/* lookup the name, remember empty slot if there is one */
316 	for (i = 0; i < num_bridges; i++) {
317 		struct nm_bridge *x = bridges + i;
318 
319 		if (x->bdg_active_ports == 0) {
320 			if (create && b == NULL)
321 				b = x;	/* record empty slot */
322 		} else if (x->bdg_namelen != namelen) {
323 			continue;
324 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
325 			ND("found '%.*s' at %d", namelen, name, i);
326 			b = x;
327 			break;
328 		}
329 	}
330 	if (i == num_bridges && b) { /* name not found, can create entry */
331 		/* initialize the bridge */
332 		strncpy(b->bdg_basename, name, namelen);
333 		ND("create new bridge %s with ports %d", b->bdg_basename,
334 			b->bdg_active_ports);
335 		b->bdg_namelen = namelen;
336 		b->bdg_active_ports = 0;
337 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
338 			b->bdg_port_index[i] = i;
339 		/* set the default function */
340 		b->bdg_ops.lookup = netmap_bdg_learning;
341 		/* reset the MAC address table */
342 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
343 		NM_BNS_GET(b);
344 	}
345 	return b;
346 }
347 
348 
349 /*
350  * Free the forwarding tables for rings attached to switch ports.
351  */
352 static void
353 nm_free_bdgfwd(struct netmap_adapter *na)
354 {
355 	int nrings, i;
356 	struct netmap_kring *kring;
357 
358 	NMG_LOCK_ASSERT();
359 	nrings = na->num_tx_rings;
360 	kring = na->tx_rings;
361 	for (i = 0; i < nrings; i++) {
362 		if (kring[i].nkr_ft) {
363 			free(kring[i].nkr_ft, M_DEVBUF);
364 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
365 		}
366 	}
367 }
368 
369 
370 /*
371  * Allocate the forwarding tables for the rings attached to the bridge ports.
372  */
373 static int
374 nm_alloc_bdgfwd(struct netmap_adapter *na)
375 {
376 	int nrings, l, i, num_dstq;
377 	struct netmap_kring *kring;
378 
379 	NMG_LOCK_ASSERT();
380 	/* all port:rings + broadcast */
381 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
382 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
383 	l += sizeof(struct nm_bdg_q) * num_dstq;
384 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
385 
386 	nrings = netmap_real_rings(na, NR_TX);
387 	kring = na->tx_rings;
388 	for (i = 0; i < nrings; i++) {
389 		struct nm_bdg_fwd *ft;
390 		struct nm_bdg_q *dstq;
391 		int j;
392 
393 		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
394 		if (!ft) {
395 			nm_free_bdgfwd(na);
396 			return ENOMEM;
397 		}
398 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
399 		for (j = 0; j < num_dstq; j++) {
400 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
401 			dstq[j].bq_len = 0;
402 		}
403 		kring[i].nkr_ft = ft;
404 	}
405 	return 0;
406 }
407 
408 
409 /* remove from bridge b the ports in slots hw and sw
410  * (sw can be -1 if not needed)
411  */
412 static void
413 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
414 {
415 	int s_hw = hw, s_sw = sw;
416 	int i, lim =b->bdg_active_ports;
417 	uint8_t tmp[NM_BDG_MAXPORTS];
418 
419 	/*
420 	New algorithm:
421 	make a copy of bdg_port_index;
422 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
423 	in the array of bdg_port_index, replacing them with
424 	entries from the bottom of the array;
425 	decrement bdg_active_ports;
426 	acquire BDG_WLOCK() and copy back the array.
427 	 */
428 
429 	if (netmap_verbose)
430 		D("detach %d and %d (lim %d)", hw, sw, lim);
431 	/* make a copy of the list of active ports, update it,
432 	 * and then copy back within BDG_WLOCK().
433 	 */
434 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
435 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
436 		if (hw >= 0 && tmp[i] == hw) {
437 			ND("detach hw %d at %d", hw, i);
438 			lim--; /* point to last active port */
439 			tmp[i] = tmp[lim]; /* swap with i */
440 			tmp[lim] = hw;	/* now this is inactive */
441 			hw = -1;
442 		} else if (sw >= 0 && tmp[i] == sw) {
443 			ND("detach sw %d at %d", sw, i);
444 			lim--;
445 			tmp[i] = tmp[lim];
446 			tmp[lim] = sw;
447 			sw = -1;
448 		} else {
449 			i++;
450 		}
451 	}
452 	if (hw >= 0 || sw >= 0) {
453 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
454 	}
455 
456 	BDG_WLOCK(b);
457 	if (b->bdg_ops.dtor)
458 		b->bdg_ops.dtor(b->bdg_ports[s_hw]);
459 	b->bdg_ports[s_hw] = NULL;
460 	if (s_sw >= 0) {
461 		b->bdg_ports[s_sw] = NULL;
462 	}
463 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
464 	b->bdg_active_ports = lim;
465 	BDG_WUNLOCK(b);
466 
467 	ND("now %d active ports", lim);
468 	if (lim == 0) {
469 		ND("marking bridge %s as free", b->bdg_basename);
470 		bzero(&b->bdg_ops, sizeof(b->bdg_ops));
471 		NM_BNS_PUT(b);
472 	}
473 }
474 
475 /* nm_bdg_ctl callback for VALE ports */
476 static int
477 netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
478 {
479 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
480 	struct nm_bridge *b = vpna->na_bdg;
481 
482 	if (attach)
483 		return 0; /* nothing to do */
484 	if (b) {
485 		netmap_set_all_rings(na, 0 /* disable */);
486 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
487 		vpna->na_bdg = NULL;
488 		netmap_set_all_rings(na, 1 /* enable */);
489 	}
490 	/* I have took reference just for attach */
491 	netmap_adapter_put(na);
492 	return 0;
493 }
494 
495 /* nm_dtor callback for ephemeral VALE ports */
496 static void
497 netmap_vp_dtor(struct netmap_adapter *na)
498 {
499 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
500 	struct nm_bridge *b = vpna->na_bdg;
501 
502 	ND("%s has %d references", na->name, na->na_refcount);
503 
504 	if (b) {
505 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
506 	}
507 }
508 
509 /* nm_dtor callback for persistent VALE ports */
510 static void
511 netmap_persist_vp_dtor(struct netmap_adapter *na)
512 {
513 	struct ifnet *ifp = na->ifp;
514 
515 	netmap_vp_dtor(na);
516 	na->ifp = NULL;
517 	nm_vi_detach(ifp);
518 }
519 
520 /* remove a persistent VALE port from the system */
521 static int
522 nm_vi_destroy(const char *name)
523 {
524 	struct ifnet *ifp;
525 	int error;
526 
527 	ifp = ifunit_ref(name);
528 	if (!ifp)
529 		return ENXIO;
530 	NMG_LOCK();
531 	/* make sure this is actually a VALE port */
532 	if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
533 		error = EINVAL;
534 		goto err;
535 	}
536 
537 	if (NA(ifp)->na_refcount > 1) {
538 		error = EBUSY;
539 		goto err;
540 	}
541 	NMG_UNLOCK();
542 
543 	D("destroying a persistent vale interface %s", ifp->if_xname);
544 	/* Linux requires all the references are released
545 	 * before unregister
546 	 */
547 	if_rele(ifp);
548 	netmap_detach(ifp);
549 	return 0;
550 
551 err:
552 	NMG_UNLOCK();
553 	if_rele(ifp);
554 	return error;
555 }
556 
557 /*
558  * Create a virtual interface registered to the system.
559  * The interface will be attached to a bridge later.
560  */
561 static int
562 nm_vi_create(struct nmreq *nmr)
563 {
564 	struct ifnet *ifp;
565 	struct netmap_vp_adapter *vpna;
566 	int error;
567 
568 	/* don't include VALE prefix */
569 	if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME)))
570 		return EINVAL;
571 	ifp = ifunit_ref(nmr->nr_name);
572 	if (ifp) { /* already exist, cannot create new one */
573 		if_rele(ifp);
574 		return EEXIST;
575 	}
576 	error = nm_vi_persist(nmr->nr_name, &ifp);
577 	if (error)
578 		return error;
579 
580 	NMG_LOCK();
581 	/* netmap_vp_create creates a struct netmap_vp_adapter */
582 	error = netmap_vp_create(nmr, ifp, &vpna);
583 	if (error) {
584 		D("error %d", error);
585 		nm_vi_detach(ifp);
586 		return error;
587 	}
588 	/* persist-specific routines */
589 	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
590 	vpna->up.nm_dtor = netmap_persist_vp_dtor;
591 	netmap_adapter_get(&vpna->up);
592 	NMG_UNLOCK();
593 	D("created %s", ifp->if_xname);
594 	return 0;
595 }
596 
597 /* Try to get a reference to a netmap adapter attached to a VALE switch.
598  * If the adapter is found (or is created), this function returns 0, a
599  * non NULL pointer is returned into *na, and the caller holds a
600  * reference to the adapter.
601  * If an adapter is not found, then no reference is grabbed and the
602  * function returns an error code, or 0 if there is just a VALE prefix
603  * mismatch. Therefore the caller holds a reference when
604  * (*na != NULL && return == 0).
605  */
606 int
607 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
608 {
609 	char *nr_name = nmr->nr_name;
610 	const char *ifname;
611 	struct ifnet *ifp;
612 	int error = 0;
613 	struct netmap_vp_adapter *vpna, *hostna = NULL;
614 	struct nm_bridge *b;
615 	int i, j, cand = -1, cand2 = -1;
616 	int needed;
617 
618 	*na = NULL;     /* default return value */
619 
620 	/* first try to see if this is a bridge port. */
621 	NMG_LOCK_ASSERT();
622 	if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) {
623 		return 0;  /* no error, but no VALE prefix */
624 	}
625 
626 	b = nm_find_bridge(nr_name, create);
627 	if (b == NULL) {
628 		D("no bridges available for '%s'", nr_name);
629 		return (create ? ENOMEM : ENXIO);
630 	}
631 	if (strlen(nr_name) < b->bdg_namelen) /* impossible */
632 		panic("x");
633 
634 	/* Now we are sure that name starts with the bridge's name,
635 	 * lookup the port in the bridge. We need to scan the entire
636 	 * list. It is not important to hold a WLOCK on the bridge
637 	 * during the search because NMG_LOCK already guarantees
638 	 * that there are no other possible writers.
639 	 */
640 
641 	/* lookup in the local list of ports */
642 	for (j = 0; j < b->bdg_active_ports; j++) {
643 		i = b->bdg_port_index[j];
644 		vpna = b->bdg_ports[i];
645 		// KASSERT(na != NULL);
646 		ND("checking %s", vpna->up.name);
647 		if (!strcmp(vpna->up.name, nr_name)) {
648 			netmap_adapter_get(&vpna->up);
649 			ND("found existing if %s refs %d", nr_name)
650 			*na = &vpna->up;
651 			return 0;
652 		}
653 	}
654 	/* not found, should we create it? */
655 	if (!create)
656 		return ENXIO;
657 	/* yes we should, see if we have space to attach entries */
658 	needed = 2; /* in some cases we only need 1 */
659 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
660 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
661 		return ENOMEM;
662 	}
663 	/* record the next two ports available, but do not allocate yet */
664 	cand = b->bdg_port_index[b->bdg_active_ports];
665 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
666 	ND("+++ bridge %s port %s used %d avail %d %d",
667 		b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
668 
669 	/*
670 	 * try see if there is a matching NIC with this name
671 	 * (after the bridge's name)
672 	 */
673 	ifname = nr_name + b->bdg_namelen + 1;
674 	ifp = ifunit_ref(ifname);
675 	if (!ifp) {
676 		/* Create an ephemeral virtual port
677 		 * This block contains all the ephemeral-specific logics
678 		 */
679 		if (nmr->nr_cmd) {
680 			/* nr_cmd must be 0 for a virtual port */
681 			return EINVAL;
682 		}
683 
684 		/* bdg_netmap_attach creates a struct netmap_adapter */
685 		error = netmap_vp_create(nmr, NULL, &vpna);
686 		if (error) {
687 			D("error %d", error);
688 			free(ifp, M_DEVBUF);
689 			return error;
690 		}
691 		/* shortcut - we can skip get_hw_na(),
692 		 * ownership check and nm_bdg_attach()
693 		 */
694 	} else {
695 		struct netmap_adapter *hw;
696 
697 		error = netmap_get_hw_na(ifp, &hw);
698 		if (error || hw == NULL)
699 			goto out;
700 
701 		/* host adapter might not be created */
702 		error = hw->nm_bdg_attach(nr_name, hw);
703 		if (error)
704 			goto out;
705 		vpna = hw->na_vp;
706 		hostna = hw->na_hostvp;
707 		if_rele(ifp);
708 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
709 			hostna = NULL;
710 	}
711 
712 	BDG_WLOCK(b);
713 	vpna->bdg_port = cand;
714 	ND("NIC  %p to bridge port %d", vpna, cand);
715 	/* bind the port to the bridge (virtual ports are not active) */
716 	b->bdg_ports[cand] = vpna;
717 	vpna->na_bdg = b;
718 	b->bdg_active_ports++;
719 	if (hostna != NULL) {
720 		/* also bind the host stack to the bridge */
721 		b->bdg_ports[cand2] = hostna;
722 		hostna->bdg_port = cand2;
723 		hostna->na_bdg = b;
724 		b->bdg_active_ports++;
725 		ND("host %p to bridge port %d", hostna, cand2);
726 	}
727 	ND("if %s refs %d", ifname, vpna->up.na_refcount);
728 	BDG_WUNLOCK(b);
729 	*na = &vpna->up;
730 	netmap_adapter_get(*na);
731 	return 0;
732 
733 out:
734 	if_rele(ifp);
735 
736 	return error;
737 }
738 
739 
740 /* Process NETMAP_BDG_ATTACH */
741 static int
742 nm_bdg_ctl_attach(struct nmreq *nmr)
743 {
744 	struct netmap_adapter *na;
745 	int error;
746 
747 	NMG_LOCK();
748 
749 	error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
750 	if (error) /* no device */
751 		goto unlock_exit;
752 
753 	if (na == NULL) { /* VALE prefix missing */
754 		error = EINVAL;
755 		goto unlock_exit;
756 	}
757 
758 	if (NETMAP_OWNED_BY_ANY(na)) {
759 		error = EBUSY;
760 		goto unref_exit;
761 	}
762 
763 	if (na->nm_bdg_ctl) {
764 		/* nop for VALE ports. The bwrap needs to put the hwna
765 		 * in netmap mode (see netmap_bwrap_bdg_ctl)
766 		 */
767 		error = na->nm_bdg_ctl(na, nmr, 1);
768 		if (error)
769 			goto unref_exit;
770 		ND("registered %s to netmap-mode", na->name);
771 	}
772 	NMG_UNLOCK();
773 	return 0;
774 
775 unref_exit:
776 	netmap_adapter_put(na);
777 unlock_exit:
778 	NMG_UNLOCK();
779 	return error;
780 }
781 
782 
783 /* process NETMAP_BDG_DETACH */
784 static int
785 nm_bdg_ctl_detach(struct nmreq *nmr)
786 {
787 	struct netmap_adapter *na;
788 	int error;
789 
790 	NMG_LOCK();
791 	error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
792 	if (error) { /* no device, or another bridge or user owns the device */
793 		goto unlock_exit;
794 	}
795 
796 	if (na == NULL) { /* VALE prefix missing */
797 		error = EINVAL;
798 		goto unlock_exit;
799 	}
800 
801 	if (na->nm_bdg_ctl) {
802 		/* remove the port from bridge. The bwrap
803 		 * also needs to put the hwna in normal mode
804 		 */
805 		error = na->nm_bdg_ctl(na, nmr, 0);
806 	}
807 
808 	netmap_adapter_put(na);
809 unlock_exit:
810 	NMG_UNLOCK();
811 	return error;
812 
813 }
814 
815 
816 /* Called by either user's context (netmap_ioctl())
817  * or external kernel modules (e.g., Openvswitch).
818  * Operation is indicated in nmr->nr_cmd.
819  * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
820  * requires bdg_ops argument; the other commands ignore this argument.
821  *
822  * Called without NMG_LOCK.
823  */
824 int
825 netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
826 {
827 	struct nm_bridge *b, *bridges;
828 	struct netmap_adapter *na;
829 	struct netmap_vp_adapter *vpna;
830 	char *name = nmr->nr_name;
831 	int cmd = nmr->nr_cmd, namelen = strlen(name);
832 	int error = 0, i, j;
833 	u_int num_bridges;
834 
835 	netmap_bns_getbridges(&bridges, &num_bridges);
836 
837 	switch (cmd) {
838 	case NETMAP_BDG_NEWIF:
839 		error = nm_vi_create(nmr);
840 		break;
841 
842 	case NETMAP_BDG_DELIF:
843 		error = nm_vi_destroy(nmr->nr_name);
844 		break;
845 
846 	case NETMAP_BDG_ATTACH:
847 		error = nm_bdg_ctl_attach(nmr);
848 		break;
849 
850 	case NETMAP_BDG_DETACH:
851 		error = nm_bdg_ctl_detach(nmr);
852 		break;
853 
854 	case NETMAP_BDG_LIST:
855 		/* this is used to enumerate bridges and ports */
856 		if (namelen) { /* look up indexes of bridge and port */
857 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
858 				error = EINVAL;
859 				break;
860 			}
861 			NMG_LOCK();
862 			b = nm_find_bridge(name, 0 /* don't create */);
863 			if (!b) {
864 				error = ENOENT;
865 				NMG_UNLOCK();
866 				break;
867 			}
868 
869 			error = ENOENT;
870 			for (j = 0; j < b->bdg_active_ports; j++) {
871 				i = b->bdg_port_index[j];
872 				vpna = b->bdg_ports[i];
873 				if (vpna == NULL) {
874 					D("---AAAAAAAAARGH-------");
875 					continue;
876 				}
877 				/* the former and the latter identify a
878 				 * virtual port and a NIC, respectively
879 				 */
880 				if (!strcmp(vpna->up.name, name)) {
881 					/* bridge index */
882 					nmr->nr_arg1 = b - bridges;
883 					nmr->nr_arg2 = i; /* port index */
884 					error = 0;
885 					break;
886 				}
887 			}
888 			NMG_UNLOCK();
889 		} else {
890 			/* return the first non-empty entry starting from
891 			 * bridge nr_arg1 and port nr_arg2.
892 			 *
893 			 * Users can detect the end of the same bridge by
894 			 * seeing the new and old value of nr_arg1, and can
895 			 * detect the end of all the bridge by error != 0
896 			 */
897 			i = nmr->nr_arg1;
898 			j = nmr->nr_arg2;
899 
900 			NMG_LOCK();
901 			for (error = ENOENT; i < NM_BRIDGES; i++) {
902 				b = bridges + i;
903 				if (j >= b->bdg_active_ports) {
904 					j = 0; /* following bridges scan from 0 */
905 					continue;
906 				}
907 				nmr->nr_arg1 = i;
908 				nmr->nr_arg2 = j;
909 				j = b->bdg_port_index[j];
910 				vpna = b->bdg_ports[j];
911 				strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
912 				error = 0;
913 				break;
914 			}
915 			NMG_UNLOCK();
916 		}
917 		break;
918 
919 	case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
920 		/* register callbacks to the given bridge.
921 		 * nmr->nr_name may be just bridge's name (including ':'
922 		 * if it is not just NM_NAME).
923 		 */
924 		if (!bdg_ops) {
925 			error = EINVAL;
926 			break;
927 		}
928 		NMG_LOCK();
929 		b = nm_find_bridge(name, 0 /* don't create */);
930 		if (!b) {
931 			error = EINVAL;
932 		} else {
933 			b->bdg_ops = *bdg_ops;
934 		}
935 		NMG_UNLOCK();
936 		break;
937 
938 	case NETMAP_BDG_VNET_HDR:
939 		/* Valid lengths for the virtio-net header are 0 (no header),
940 		   10 and 12. */
941 		if (nmr->nr_arg1 != 0 &&
942 			nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
943 				nmr->nr_arg1 != 12) {
944 			error = EINVAL;
945 			break;
946 		}
947 		NMG_LOCK();
948 		error = netmap_get_bdg_na(nmr, &na, 0);
949 		if (na && !error) {
950 			vpna = (struct netmap_vp_adapter *)na;
951 			vpna->virt_hdr_len = nmr->nr_arg1;
952 			if (vpna->virt_hdr_len)
953 				vpna->mfs = NETMAP_BUF_SIZE(na);
954 			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
955 			netmap_adapter_put(na);
956 		}
957 		NMG_UNLOCK();
958 		break;
959 
960 	default:
961 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
962 		error = EINVAL;
963 		break;
964 	}
965 	return error;
966 }
967 
968 int
969 netmap_bdg_config(struct nmreq *nmr)
970 {
971 	struct nm_bridge *b;
972 	int error = EINVAL;
973 
974 	NMG_LOCK();
975 	b = nm_find_bridge(nmr->nr_name, 0);
976 	if (!b) {
977 		NMG_UNLOCK();
978 		return error;
979 	}
980 	NMG_UNLOCK();
981 	/* Don't call config() with NMG_LOCK() held */
982 	BDG_RLOCK(b);
983 	if (b->bdg_ops.config != NULL)
984 		error = b->bdg_ops.config((struct nm_ifreq *)nmr);
985 	BDG_RUNLOCK(b);
986 	return error;
987 }
988 
989 
990 /* nm_krings_create callback for VALE ports.
991  * Calls the standard netmap_krings_create, then adds leases on rx
992  * rings and bdgfwd on tx rings.
993  */
994 static int
995 netmap_vp_krings_create(struct netmap_adapter *na)
996 {
997 	u_int tailroom;
998 	int error, i;
999 	uint32_t *leases;
1000 	u_int nrx = netmap_real_rings(na, NR_RX);
1001 
1002 	/*
1003 	 * Leases are attached to RX rings on vale ports
1004 	 */
1005 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
1006 
1007 	error = netmap_krings_create(na, tailroom);
1008 	if (error)
1009 		return error;
1010 
1011 	leases = na->tailroom;
1012 
1013 	for (i = 0; i < nrx; i++) { /* Receive rings */
1014 		na->rx_rings[i].nkr_leases = leases;
1015 		leases += na->num_rx_desc;
1016 	}
1017 
1018 	error = nm_alloc_bdgfwd(na);
1019 	if (error) {
1020 		netmap_krings_delete(na);
1021 		return error;
1022 	}
1023 
1024 	return 0;
1025 }
1026 
1027 
1028 /* nm_krings_delete callback for VALE ports. */
1029 static void
1030 netmap_vp_krings_delete(struct netmap_adapter *na)
1031 {
1032 	nm_free_bdgfwd(na);
1033 	netmap_krings_delete(na);
1034 }
1035 
1036 
1037 static int
1038 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
1039 	struct netmap_vp_adapter *na, u_int ring_nr);
1040 
1041 
1042 /*
1043  * main dispatch routine for the bridge.
1044  * Grab packets from a kring, move them into the ft structure
1045  * associated to the tx (input) port. Max one instance per port,
1046  * filtered on input (ioctl, poll or XXX).
1047  * Returns the next position in the ring.
1048  */
1049 static int
1050 nm_bdg_preflush(struct netmap_kring *kring, u_int end)
1051 {
1052 	struct netmap_vp_adapter *na =
1053 		(struct netmap_vp_adapter*)kring->na;
1054 	struct netmap_ring *ring = kring->ring;
1055 	struct nm_bdg_fwd *ft;
1056 	u_int ring_nr = kring->ring_id;
1057 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
1058 	u_int ft_i = 0;	/* start from 0 */
1059 	u_int frags = 1; /* how many frags ? */
1060 	struct nm_bridge *b = na->na_bdg;
1061 
1062 	/* To protect against modifications to the bridge we acquire a
1063 	 * shared lock, waiting if we can sleep (if the source port is
1064 	 * attached to a user process) or with a trylock otherwise (NICs).
1065 	 */
1066 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1067 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
1068 		BDG_RLOCK(b);
1069 	else if (!BDG_RTRYLOCK(b))
1070 		return 0;
1071 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1072 	ft = kring->nkr_ft;
1073 
1074 	for (; likely(j != end); j = nm_next(j, lim)) {
1075 		struct netmap_slot *slot = &ring->slot[j];
1076 		char *buf;
1077 
1078 		ft[ft_i].ft_len = slot->len;
1079 		ft[ft_i].ft_flags = slot->flags;
1080 
1081 		ND("flags is 0x%x", slot->flags);
1082 		/* we do not use the buf changed flag, but we still need to reset it */
1083 		slot->flags &= ~NS_BUF_CHANGED;
1084 
1085 		/* this slot goes into a list so initialize the link field */
1086 		ft[ft_i].ft_next = NM_FT_NULL;
1087 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
1088 			(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
1089 		if (unlikely(buf == NULL)) {
1090 			RD(5, "NULL %s buffer pointer from %s slot %d len %d",
1091 				(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
1092 				kring->name, j, ft[ft_i].ft_len);
1093 			buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
1094 			ft[ft_i].ft_len = 0;
1095 			ft[ft_i].ft_flags = 0;
1096 		}
1097 		__builtin_prefetch(buf);
1098 		++ft_i;
1099 		if (slot->flags & NS_MOREFRAG) {
1100 			frags++;
1101 			continue;
1102 		}
1103 		if (unlikely(netmap_verbose && frags > 1))
1104 			RD(5, "%d frags at %d", frags, ft_i - frags);
1105 		ft[ft_i - frags].ft_frags = frags;
1106 		frags = 1;
1107 		if (unlikely((int)ft_i >= bridge_batch))
1108 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1109 	}
1110 	if (frags > 1) {
1111 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1112 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1113 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1114 		ft[ft_i - frags].ft_frags = frags - 1;
1115 	}
1116 	if (ft_i)
1117 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1118 	BDG_RUNLOCK(b);
1119 	return j;
1120 }
1121 
1122 
1123 /* ----- FreeBSD if_bridge hash function ------- */
1124 
1125 /*
1126  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1127  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1128  *
1129  * http://www.burtleburtle.net/bob/hash/spooky.html
1130  */
1131 #define mix(a, b, c)                                                    \
1132 do {                                                                    \
1133         a -= b; a -= c; a ^= (c >> 13);                                 \
1134         b -= c; b -= a; b ^= (a << 8);                                  \
1135         c -= a; c -= b; c ^= (b >> 13);                                 \
1136         a -= b; a -= c; a ^= (c >> 12);                                 \
1137         b -= c; b -= a; b ^= (a << 16);                                 \
1138         c -= a; c -= b; c ^= (b >> 5);                                  \
1139         a -= b; a -= c; a ^= (c >> 3);                                  \
1140         b -= c; b -= a; b ^= (a << 10);                                 \
1141         c -= a; c -= b; c ^= (b >> 15);                                 \
1142 } while (/*CONSTCOND*/0)
1143 
1144 
1145 static __inline uint32_t
1146 nm_bridge_rthash(const uint8_t *addr)
1147 {
1148         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1149 
1150         b += addr[5] << 8;
1151         b += addr[4];
1152         a += addr[3] << 24;
1153         a += addr[2] << 16;
1154         a += addr[1] << 8;
1155         a += addr[0];
1156 
1157         mix(a, b, c);
1158 #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1159         return (c & BRIDGE_RTHASH_MASK);
1160 }
1161 
1162 #undef mix
1163 
1164 
1165 /* nm_register callback for VALE ports */
1166 static int
1167 netmap_vp_reg(struct netmap_adapter *na, int onoff)
1168 {
1169 	struct netmap_vp_adapter *vpna =
1170 		(struct netmap_vp_adapter*)na;
1171 
1172 	/* persistent ports may be put in netmap mode
1173 	 * before being attached to a bridge
1174 	 */
1175 	if (vpna->na_bdg)
1176 		BDG_WLOCK(vpna->na_bdg);
1177 	if (onoff) {
1178 		na->na_flags |= NAF_NETMAP_ON;
1179 		 /* XXX on FreeBSD, persistent VALE ports should also
1180 		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
1181 		 */
1182 	} else {
1183 		na->na_flags &= ~NAF_NETMAP_ON;
1184 	}
1185 	if (vpna->na_bdg)
1186 		BDG_WUNLOCK(vpna->na_bdg);
1187 	return 0;
1188 }
1189 
1190 
1191 /*
1192  * Lookup function for a learning bridge.
1193  * Update the hash table with the source address,
1194  * and then returns the destination port index, and the
1195  * ring in *dst_ring (at the moment, always use ring 0)
1196  */
1197 u_int
1198 netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1199 		struct netmap_vp_adapter *na)
1200 {
1201 	uint8_t *buf = ft->ft_buf;
1202 	u_int buf_len = ft->ft_len;
1203 	struct nm_hash_ent *ht = na->na_bdg->ht;
1204 	uint32_t sh, dh;
1205 	u_int dst, mysrc = na->bdg_port;
1206 	uint64_t smac, dmac;
1207 
1208 	/* safety check, unfortunately we have many cases */
1209 	if (buf_len >= 14 + na->virt_hdr_len) {
1210 		/* virthdr + mac_hdr in the same slot */
1211 		buf += na->virt_hdr_len;
1212 		buf_len -= na->virt_hdr_len;
1213 	} else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
1214 		/* only header in first fragment */
1215 		ft++;
1216 		buf = ft->ft_buf;
1217 		buf_len = ft->ft_len;
1218 	} else {
1219 		RD(5, "invalid buf format, length %d", buf_len);
1220 		return NM_BDG_NOPORT;
1221 	}
1222 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1223 	smac = le64toh(*(uint64_t *)(buf + 4));
1224 	smac >>= 16;
1225 
1226 	/*
1227 	 * The hash is somewhat expensive, there might be some
1228 	 * worthwhile optimizations here.
1229 	 */
1230 	if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
1231 		uint8_t *s = buf+6;
1232 		sh = nm_bridge_rthash(s); // XXX hash of source
1233 		/* update source port forwarding entry */
1234 		na->last_smac = ht[sh].mac = smac;	/* XXX expire ? */
1235 		ht[sh].ports = mysrc;
1236 		if (netmap_verbose)
1237 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1238 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1239 	}
1240 	dst = NM_BDG_BROADCAST;
1241 	if ((buf[0] & 1) == 0) { /* unicast */
1242 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1243 		if (ht[dh].mac == dmac) {	/* found dst */
1244 			dst = ht[dh].ports;
1245 		}
1246 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1247 	}
1248 	return dst;
1249 }
1250 
1251 
1252 /*
1253  * Available space in the ring. Only used in VALE code
1254  * and only with is_rx = 1
1255  */
1256 static inline uint32_t
1257 nm_kr_space(struct netmap_kring *k, int is_rx)
1258 {
1259 	int space;
1260 
1261 	if (is_rx) {
1262 		int busy = k->nkr_hwlease - k->nr_hwcur;
1263 		if (busy < 0)
1264 			busy += k->nkr_num_slots;
1265 		space = k->nkr_num_slots - 1 - busy;
1266 	} else {
1267 		/* XXX never used in this branch */
1268 		space = k->nr_hwtail - k->nkr_hwlease;
1269 		if (space < 0)
1270 			space += k->nkr_num_slots;
1271 	}
1272 #if 0
1273 	// sanity check
1274 	if (k->nkr_hwlease >= k->nkr_num_slots ||
1275 		k->nr_hwcur >= k->nkr_num_slots ||
1276 		k->nr_tail >= k->nkr_num_slots ||
1277 		busy < 0 ||
1278 		busy >= k->nkr_num_slots) {
1279 		D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1280 			k->nkr_lease_idx, k->nkr_num_slots);
1281 	}
1282 #endif
1283 	return space;
1284 }
1285 
1286 
1287 
1288 
1289 /* make a lease on the kring for N positions. return the
1290  * lease index
1291  * XXX only used in VALE code and with is_rx = 1
1292  */
1293 static inline uint32_t
1294 nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
1295 {
1296 	uint32_t lim = k->nkr_num_slots - 1;
1297 	uint32_t lease_idx = k->nkr_lease_idx;
1298 
1299 	k->nkr_leases[lease_idx] = NR_NOSLOT;
1300 	k->nkr_lease_idx = nm_next(lease_idx, lim);
1301 
1302 	if (n > nm_kr_space(k, is_rx)) {
1303 		D("invalid request for %d slots", n);
1304 		panic("x");
1305 	}
1306 	/* XXX verify that there are n slots */
1307 	k->nkr_hwlease += n;
1308 	if (k->nkr_hwlease > lim)
1309 		k->nkr_hwlease -= lim + 1;
1310 
1311 	if (k->nkr_hwlease >= k->nkr_num_slots ||
1312 		k->nr_hwcur >= k->nkr_num_slots ||
1313 		k->nr_hwtail >= k->nkr_num_slots ||
1314 		k->nkr_lease_idx >= k->nkr_num_slots) {
1315 		D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
1316 			k->na->name,
1317 			k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1318 			k->nkr_lease_idx, k->nkr_num_slots);
1319 	}
1320 	return lease_idx;
1321 }
1322 
1323 /*
1324  *
1325  * This flush routine supports only unicast and broadcast but a large
1326  * number of ports, and lets us replace the learn and dispatch functions.
1327  */
1328 int
1329 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1330 		u_int ring_nr)
1331 {
1332 	struct nm_bdg_q *dst_ents, *brddst;
1333 	uint16_t num_dsts = 0, *dsts;
1334 	struct nm_bridge *b = na->na_bdg;
1335 	u_int i, j, me = na->bdg_port;
1336 
1337 	/*
1338 	 * The work area (pointed by ft) is followed by an array of
1339 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1340 	 * queues per port plus one for the broadcast traffic.
1341 	 * Then we have an array of destination indexes.
1342 	 */
1343 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1344 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1345 
1346 	/* first pass: find a destination for each packet in the batch */
1347 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1348 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1349 		uint16_t dst_port, d_i;
1350 		struct nm_bdg_q *d;
1351 
1352 		ND("slot %d frags %d", i, ft[i].ft_frags);
1353 		/* Drop the packet if the virtio-net header is not into the first
1354 		   fragment nor at the very beginning of the second. */
1355 		if (unlikely(na->virt_hdr_len > ft[i].ft_len))
1356 			continue;
1357 		dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
1358 		if (netmap_verbose > 255)
1359 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1360 		if (dst_port == NM_BDG_NOPORT)
1361 			continue; /* this packet is identified to be dropped */
1362 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1363 			continue;
1364 		else if (dst_port == NM_BDG_BROADCAST)
1365 			dst_ring = 0; /* broadcasts always go to ring 0 */
1366 		else if (unlikely(dst_port == me ||
1367 		    !b->bdg_ports[dst_port]))
1368 			continue;
1369 
1370 		/* get a position in the scratch pad */
1371 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1372 		d = dst_ents + d_i;
1373 
1374 		/* append the first fragment to the list */
1375 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1376 			d->bq_head = d->bq_tail = i;
1377 			/* remember this position to be scanned later */
1378 			if (dst_port != NM_BDG_BROADCAST)
1379 				dsts[num_dsts++] = d_i;
1380 		} else {
1381 			ft[d->bq_tail].ft_next = i;
1382 			d->bq_tail = i;
1383 		}
1384 		d->bq_len += ft[i].ft_frags;
1385 	}
1386 
1387 	/*
1388 	 * Broadcast traffic goes to ring 0 on all destinations.
1389 	 * So we need to add these rings to the list of ports to scan.
1390 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1391 	 * expensive. We should keep a compact list of active destinations
1392 	 * so we could shorten this loop.
1393 	 */
1394 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1395 	if (brddst->bq_head != NM_FT_NULL) {
1396 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1397 			uint16_t d_i;
1398 			i = b->bdg_port_index[j];
1399 			if (unlikely(i == me))
1400 				continue;
1401 			d_i = i * NM_BDG_MAXRINGS;
1402 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1403 				dsts[num_dsts++] = d_i;
1404 		}
1405 	}
1406 
1407 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1408 	/* second pass: scan destinations */
1409 	for (i = 0; i < num_dsts; i++) {
1410 		struct netmap_vp_adapter *dst_na;
1411 		struct netmap_kring *kring;
1412 		struct netmap_ring *ring;
1413 		u_int dst_nr, lim, j, d_i, next, brd_next;
1414 		u_int needed, howmany;
1415 		int retry = netmap_txsync_retry;
1416 		struct nm_bdg_q *d;
1417 		uint32_t my_start = 0, lease_idx = 0;
1418 		int nrings;
1419 		int virt_hdr_mismatch = 0;
1420 
1421 		d_i = dsts[i];
1422 		ND("second pass %d port %d", i, d_i);
1423 		d = dst_ents + d_i;
1424 		// XXX fix the division
1425 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1426 		/* protect from the lookup function returning an inactive
1427 		 * destination port
1428 		 */
1429 		if (unlikely(dst_na == NULL))
1430 			goto cleanup;
1431 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1432 			goto cleanup;
1433 		/*
1434 		 * The interface may be in !netmap mode in two cases:
1435 		 * - when na is attached but not activated yet;
1436 		 * - when na is being deactivated but is still attached.
1437 		 */
1438 		if (unlikely(!nm_netmap_on(&dst_na->up))) {
1439 			ND("not in netmap mode!");
1440 			goto cleanup;
1441 		}
1442 
1443 		/* there is at least one either unicast or broadcast packet */
1444 		brd_next = brddst->bq_head;
1445 		next = d->bq_head;
1446 		/* we need to reserve this many slots. If fewer are
1447 		 * available, some packets will be dropped.
1448 		 * Packets may have multiple fragments, so we may not use
1449 		 * there is a chance that we may not use all of the slots
1450 		 * we have claimed, so we will need to handle the leftover
1451 		 * ones when we regain the lock.
1452 		 */
1453 		needed = d->bq_len + brddst->bq_len;
1454 
1455 		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
1456 			RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len);
1457 			/* There is a virtio-net header/offloadings mismatch between
1458 			 * source and destination. The slower mismatch datapath will
1459 			 * be used to cope with all the mismatches.
1460 			 */
1461 			virt_hdr_mismatch = 1;
1462 			if (dst_na->mfs < na->mfs) {
1463 				/* We may need to do segmentation offloadings, and so
1464 				 * we may need a number of destination slots greater
1465 				 * than the number of input slots ('needed').
1466 				 * We look for the smallest integer 'x' which satisfies:
1467 				 *	needed * na->mfs + x * H <= x * na->mfs
1468 				 * where 'H' is the length of the longest header that may
1469 				 * be replicated in the segmentation process (e.g. for
1470 				 * TCPv4 we must account for ethernet header, IP header
1471 				 * and TCPv4 header).
1472 				 */
1473 				needed = (needed * na->mfs) /
1474 						(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1475 				ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1476 			}
1477 		}
1478 
1479 		ND(5, "pass 2 dst %d is %x %s",
1480 			i, d_i, is_vp ? "virtual" : "nic/host");
1481 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1482 		nrings = dst_na->up.num_rx_rings;
1483 		if (dst_nr >= nrings)
1484 			dst_nr = dst_nr % nrings;
1485 		kring = &dst_na->up.rx_rings[dst_nr];
1486 		ring = kring->ring;
1487 		lim = kring->nkr_num_slots - 1;
1488 
1489 retry:
1490 
1491 		if (dst_na->retry && retry) {
1492 			/* try to get some free slot from the previous run */
1493 			kring->nm_notify(kring, 0);
1494 			/* actually useful only for bwraps, since there
1495 			 * the notify will trigger a txsync on the hwna. VALE ports
1496 			 * have dst_na->retry == 0
1497 			 */
1498 		}
1499 		/* reserve the buffers in the queue and an entry
1500 		 * to report completion, and drop lock.
1501 		 * XXX this might become a helper function.
1502 		 */
1503 		mtx_lock(&kring->q_lock);
1504 		if (kring->nkr_stopped) {
1505 			mtx_unlock(&kring->q_lock);
1506 			goto cleanup;
1507 		}
1508 		my_start = j = kring->nkr_hwlease;
1509 		howmany = nm_kr_space(kring, 1);
1510 		if (needed < howmany)
1511 			howmany = needed;
1512 		lease_idx = nm_kr_lease(kring, howmany, 1);
1513 		mtx_unlock(&kring->q_lock);
1514 
1515 		/* only retry if we need more than available slots */
1516 		if (retry && needed <= howmany)
1517 			retry = 0;
1518 
1519 		/* copy to the destination queue */
1520 		while (howmany > 0) {
1521 			struct netmap_slot *slot;
1522 			struct nm_bdg_fwd *ft_p, *ft_end;
1523 			u_int cnt;
1524 
1525 			/* find the queue from which we pick next packet.
1526 			 * NM_FT_NULL is always higher than valid indexes
1527 			 * so we never dereference it if the other list
1528 			 * has packets (and if both are empty we never
1529 			 * get here).
1530 			 */
1531 			if (next < brd_next) {
1532 				ft_p = ft + next;
1533 				next = ft_p->ft_next;
1534 			} else { /* insert broadcast */
1535 				ft_p = ft + brd_next;
1536 				brd_next = ft_p->ft_next;
1537 			}
1538 			cnt = ft_p->ft_frags; // cnt > 0
1539 			if (unlikely(cnt > howmany))
1540 			    break; /* no more space */
1541 			if (netmap_verbose && cnt > 1)
1542 				RD(5, "rx %d frags to %d", cnt, j);
1543 			ft_end = ft_p + cnt;
1544 			if (unlikely(virt_hdr_mismatch)) {
1545 				bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1546 			} else {
1547 				howmany -= cnt;
1548 				do {
1549 					char *dst, *src = ft_p->ft_buf;
1550 					size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1551 
1552 					slot = &ring->slot[j];
1553 					dst = NMB(&dst_na->up, slot);
1554 
1555 					ND("send [%d] %d(%d) bytes at %s:%d",
1556 							i, (int)copy_len, (int)dst_len,
1557 							NM_IFPNAME(dst_ifp), j);
1558 					/* round to a multiple of 64 */
1559 					copy_len = (copy_len + 63) & ~63;
1560 
1561 					if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
1562 						     copy_len > NETMAP_BUF_SIZE(&na->up))) {
1563 						RD(5, "invalid len %d, down to 64", (int)copy_len);
1564 						copy_len = dst_len = 64; // XXX
1565 					}
1566 					if (ft_p->ft_flags & NS_INDIRECT) {
1567 						if (copyin(src, dst, copy_len)) {
1568 							// invalid user pointer, pretend len is 0
1569 							dst_len = 0;
1570 						}
1571 					} else {
1572 						//memcpy(dst, src, copy_len);
1573 						pkt_copy(src, dst, (int)copy_len);
1574 					}
1575 					slot->len = dst_len;
1576 					slot->flags = (cnt << 8)| NS_MOREFRAG;
1577 					j = nm_next(j, lim);
1578 					needed--;
1579 					ft_p++;
1580 				} while (ft_p != ft_end);
1581 				slot->flags = (cnt << 8); /* clear flag on last entry */
1582 			}
1583 			/* are we done ? */
1584 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1585 				break;
1586 		}
1587 		{
1588 		    /* current position */
1589 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1590 		    uint32_t update_pos;
1591 		    int still_locked = 1;
1592 
1593 		    mtx_lock(&kring->q_lock);
1594 		    if (unlikely(howmany > 0)) {
1595 			/* not used all bufs. If i am the last one
1596 			 * i can recover the slots, otherwise must
1597 			 * fill them with 0 to mark empty packets.
1598 			 */
1599 			ND("leftover %d bufs", howmany);
1600 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1601 			    /* yes i am the last one */
1602 			    ND("roll back nkr_hwlease to %d", j);
1603 			    kring->nkr_hwlease = j;
1604 			} else {
1605 			    while (howmany-- > 0) {
1606 				ring->slot[j].len = 0;
1607 				ring->slot[j].flags = 0;
1608 				j = nm_next(j, lim);
1609 			    }
1610 			}
1611 		    }
1612 		    p[lease_idx] = j; /* report I am done */
1613 
1614 		    update_pos = kring->nr_hwtail;
1615 
1616 		    if (my_start == update_pos) {
1617 			/* all slots before my_start have been reported,
1618 			 * so scan subsequent leases to see if other ranges
1619 			 * have been completed, and to a selwakeup or txsync.
1620 		         */
1621 			while (lease_idx != kring->nkr_lease_idx &&
1622 				p[lease_idx] != NR_NOSLOT) {
1623 			    j = p[lease_idx];
1624 			    p[lease_idx] = NR_NOSLOT;
1625 			    lease_idx = nm_next(lease_idx, lim);
1626 			}
1627 			/* j is the new 'write' position. j != my_start
1628 			 * means there are new buffers to report
1629 			 */
1630 			if (likely(j != my_start)) {
1631 				kring->nr_hwtail = j;
1632 				still_locked = 0;
1633 				mtx_unlock(&kring->q_lock);
1634 				kring->nm_notify(kring, 0);
1635 				/* this is netmap_notify for VALE ports and
1636 				 * netmap_bwrap_notify for bwrap. The latter will
1637 				 * trigger a txsync on the underlying hwna
1638 				 */
1639 				if (dst_na->retry && retry--) {
1640 					/* XXX this is going to call nm_notify again.
1641 					 * Only useful for bwrap in virtual machines
1642 					 */
1643 					goto retry;
1644 				}
1645 			}
1646 		    }
1647 		    if (still_locked)
1648 			mtx_unlock(&kring->q_lock);
1649 		}
1650 cleanup:
1651 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1652 		d->bq_len = 0;
1653 	}
1654 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1655 	brddst->bq_len = 0;
1656 	return 0;
1657 }
1658 
1659 /* nm_txsync callback for VALE ports */
1660 static int
1661 netmap_vp_txsync(struct netmap_kring *kring, int flags)
1662 {
1663 	struct netmap_vp_adapter *na =
1664 		(struct netmap_vp_adapter *)kring->na;
1665 	u_int done;
1666 	u_int const lim = kring->nkr_num_slots - 1;
1667 	u_int const head = kring->rhead;
1668 
1669 	if (bridge_batch <= 0) { /* testing only */
1670 		done = head; // used all
1671 		goto done;
1672 	}
1673 	if (!na->na_bdg) {
1674 		done = head;
1675 		goto done;
1676 	}
1677 	if (bridge_batch > NM_BDG_BATCH)
1678 		bridge_batch = NM_BDG_BATCH;
1679 
1680 	done = nm_bdg_preflush(kring, head);
1681 done:
1682 	if (done != head)
1683 		D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
1684 	/*
1685 	 * packets between 'done' and 'cur' are left unsent.
1686 	 */
1687 	kring->nr_hwcur = done;
1688 	kring->nr_hwtail = nm_prev(done, lim);
1689 	if (netmap_verbose)
1690 		D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
1691 	return 0;
1692 }
1693 
1694 
1695 /* rxsync code used by VALE ports nm_rxsync callback and also
1696  * internally by the brwap
1697  */
1698 static int
1699 netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
1700 {
1701 	struct netmap_adapter *na = kring->na;
1702 	struct netmap_ring *ring = kring->ring;
1703 	u_int nm_i, lim = kring->nkr_num_slots - 1;
1704 	u_int head = kring->rhead;
1705 	int n;
1706 
1707 	if (head > lim) {
1708 		D("ouch dangerous reset!!!");
1709 		n = netmap_ring_reinit(kring);
1710 		goto done;
1711 	}
1712 
1713 	/* First part, import newly received packets. */
1714 	/* actually nothing to do here, they are already in the kring */
1715 
1716 	/* Second part, skip past packets that userspace has released. */
1717 	nm_i = kring->nr_hwcur;
1718 	if (nm_i != head) {
1719 		/* consistency check, but nothing really important here */
1720 		for (n = 0; likely(nm_i != head); n++) {
1721 			struct netmap_slot *slot = &ring->slot[nm_i];
1722 			void *addr = NMB(na, slot);
1723 
1724 			if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
1725 				D("bad buffer index %d, ignore ?",
1726 					slot->buf_idx);
1727 			}
1728 			slot->flags &= ~NS_BUF_CHANGED;
1729 			nm_i = nm_next(nm_i, lim);
1730 		}
1731 		kring->nr_hwcur = head;
1732 	}
1733 
1734 	n = 0;
1735 done:
1736 	return n;
1737 }
1738 
1739 /*
1740  * nm_rxsync callback for VALE ports
1741  * user process reading from a VALE switch.
1742  * Already protected against concurrent calls from userspace,
1743  * but we must acquire the queue's lock to protect against
1744  * writers on the same queue.
1745  */
1746 static int
1747 netmap_vp_rxsync(struct netmap_kring *kring, int flags)
1748 {
1749 	int n;
1750 
1751 	mtx_lock(&kring->q_lock);
1752 	n = netmap_vp_rxsync_locked(kring, flags);
1753 	mtx_unlock(&kring->q_lock);
1754 	return n;
1755 }
1756 
1757 
1758 /* nm_bdg_attach callback for VALE ports
1759  * The na_vp port is this same netmap_adapter. There is no host port.
1760  */
1761 static int
1762 netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
1763 {
1764 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
1765 
1766 	if (vpna->na_bdg)
1767 		return EBUSY;
1768 	na->na_vp = vpna;
1769 	strncpy(na->name, name, sizeof(na->name));
1770 	na->na_hostvp = NULL;
1771 	return 0;
1772 }
1773 
1774 /* create a netmap_vp_adapter that describes a VALE port.
1775  * Only persistent VALE ports have a non-null ifp.
1776  */
1777 static int
1778 netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
1779 {
1780 	struct netmap_vp_adapter *vpna;
1781 	struct netmap_adapter *na;
1782 	int error;
1783 	u_int npipes = 0;
1784 
1785 	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1786 	if (vpna == NULL)
1787 		return ENOMEM;
1788 
1789  	na = &vpna->up;
1790 
1791 	na->ifp = ifp;
1792 	strncpy(na->name, nmr->nr_name, sizeof(na->name));
1793 
1794 	/* bound checking */
1795 	na->num_tx_rings = nmr->nr_tx_rings;
1796 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1797 	nmr->nr_tx_rings = na->num_tx_rings; // write back
1798 	na->num_rx_rings = nmr->nr_rx_rings;
1799 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1800 	nmr->nr_rx_rings = na->num_rx_rings; // write back
1801 	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1802 			1, NM_BDG_MAXSLOTS, NULL);
1803 	na->num_tx_desc = nmr->nr_tx_slots;
1804 	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1805 			1, NM_BDG_MAXSLOTS, NULL);
1806 	/* validate number of pipes. We want at least 1,
1807 	 * but probably can do with some more.
1808 	 * So let's use 2 as default (when 0 is supplied)
1809 	 */
1810 	npipes = nmr->nr_arg1;
1811 	nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1812 	nmr->nr_arg1 = npipes;	/* write back */
1813 	/* validate extra bufs */
1814 	nm_bound_var(&nmr->nr_arg3, 0, 0,
1815 			128*NM_BDG_MAXSLOTS, NULL);
1816 	na->num_rx_desc = nmr->nr_rx_slots;
1817 	vpna->virt_hdr_len = 0;
1818 	vpna->mfs = 1514;
1819 	vpna->last_smac = ~0llu;
1820 	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
1821 		vpna->mfs = netmap_buf_size; */
1822         if (netmap_verbose)
1823 		D("max frame size %u", vpna->mfs);
1824 
1825 	na->na_flags |= NAF_BDG_MAYSLEEP;
1826 	na->nm_txsync = netmap_vp_txsync;
1827 	na->nm_rxsync = netmap_vp_rxsync;
1828 	na->nm_register = netmap_vp_reg;
1829 	na->nm_krings_create = netmap_vp_krings_create;
1830 	na->nm_krings_delete = netmap_vp_krings_delete;
1831 	na->nm_dtor = netmap_vp_dtor;
1832 	na->nm_mem = netmap_mem_private_new(na->name,
1833 			na->num_tx_rings, na->num_tx_desc,
1834 			na->num_rx_rings, na->num_rx_desc,
1835 			nmr->nr_arg3, npipes, &error);
1836 	if (na->nm_mem == NULL)
1837 		goto err;
1838 	na->nm_bdg_attach = netmap_vp_bdg_attach;
1839 	/* other nmd fields are set in the common routine */
1840 	error = netmap_attach_common(na);
1841 	if (error)
1842 		goto err;
1843 	*ret = vpna;
1844 	return 0;
1845 
1846 err:
1847 	if (na->nm_mem != NULL)
1848 		netmap_mem_delete(na->nm_mem);
1849 	free(vpna, M_DEVBUF);
1850 	return error;
1851 }
1852 
1853 /* Bridge wrapper code (bwrap).
1854  * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1855  * VALE switch.
1856  * The main task is to swap the meaning of tx and rx rings to match the
1857  * expectations of the VALE switch code (see nm_bdg_flush).
1858  *
1859  * The bwrap works by interposing a netmap_bwrap_adapter between the
1860  * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1861  * a netmap_vp_adapter to the rest the system, but, internally, it
1862  * translates all callbacks to what the hwna expects.
1863  *
1864  * Note that we have to intercept callbacks coming from two sides:
1865  *
1866  *  - callbacks coming from the netmap module are intercepted by
1867  *    passing around the netmap_bwrap_adapter instead of the hwna
1868  *
1869  *  - callbacks coming from outside of the netmap module only know
1870  *    about the hwna. This, however, only happens in interrupt
1871  *    handlers, where only the hwna->nm_notify callback is called.
1872  *    What the bwrap does is to overwrite the hwna->nm_notify callback
1873  *    with its own netmap_bwrap_intr_notify.
1874  *    XXX This assumes that the hwna->nm_notify callback was the
1875  *    standard netmap_notify(), as it is the case for nic adapters.
1876  *    Any additional action performed by hwna->nm_notify will not be
1877  *    performed by netmap_bwrap_intr_notify.
1878  *
1879  * Additionally, the bwrap can optionally attach the host rings pair
1880  * of the wrapped adapter to a different port of the switch.
1881  */
1882 
1883 
1884 static void
1885 netmap_bwrap_dtor(struct netmap_adapter *na)
1886 {
1887 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1888 	struct netmap_adapter *hwna = bna->hwna;
1889 
1890 	ND("na %p", na);
1891 	/* drop reference to hwna->ifp.
1892 	 * If we don't do this, netmap_detach_common(na)
1893 	 * will think it has set NA(na->ifp) to NULL
1894 	 */
1895 	na->ifp = NULL;
1896 	/* for safety, also drop the possible reference
1897 	 * in the hostna
1898 	 */
1899 	bna->host.up.ifp = NULL;
1900 
1901 	hwna->nm_mem = bna->save_nmd;
1902 	hwna->na_private = NULL;
1903 	hwna->na_vp = hwna->na_hostvp = NULL;
1904 	hwna->na_flags &= ~NAF_BUSY;
1905 	netmap_adapter_put(hwna);
1906 
1907 }
1908 
1909 
1910 /*
1911  * Intr callback for NICs connected to a bridge.
1912  * Simply ignore tx interrupts (maybe we could try to recover space ?)
1913  * and pass received packets from nic to the bridge.
1914  *
1915  * XXX TODO check locking: this is called from the interrupt
1916  * handler so we should make sure that the interface is not
1917  * disconnected while passing down an interrupt.
1918  *
1919  * Note, no user process can access this NIC or the host stack.
1920  * The only part of the ring that is significant are the slots,
1921  * and head/cur/tail are set from the kring as needed
1922  * (part as a receive ring, part as a transmit ring).
1923  *
1924  * callback that overwrites the hwna notify callback.
1925  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1926  * The bridge wrapper then sends the packets through the bridge.
1927  */
1928 static int
1929 netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
1930 {
1931 	struct netmap_adapter *na = kring->na;
1932 	struct netmap_bwrap_adapter *bna = na->na_private;
1933 	struct netmap_kring *bkring;
1934 	struct netmap_ring *ring;
1935 	struct netmap_vp_adapter *vpna = &bna->up;
1936 	u_int ring_nr = kring->ring_id;
1937 	int error = 0;
1938 
1939 	if (netmap_verbose)
1940 	    D("%s %s 0x%x", na->name, kring->name, flags);
1941 
1942 	if (!nm_netmap_on(na))
1943 		return 0;
1944 
1945 	bkring = &vpna->up.tx_rings[ring_nr];
1946 	ring = kring->ring; /* == kbkring->ring */
1947 
1948 	/* make sure the ring is not disabled */
1949 	if (nm_kr_tryget(kring))
1950 		return 0;
1951 
1952 	if (netmap_verbose)
1953 	    D("%s head %d cur %d tail %d",  na->name,
1954 		kring->rhead, kring->rcur, kring->rtail);
1955 
1956 	/* simulate a user wakeup on the rx ring
1957 	 * fetch packets that have arrived.
1958 	 */
1959 	error = kring->nm_sync(kring, 0);
1960 	if (error)
1961 		goto put_out;
1962 	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
1963 		D("how strange, interrupt with no packets on %s",
1964 			na->name);
1965 		goto put_out;
1966 	}
1967 
1968 	/* new packets are kring->rcur to kring->nr_hwtail, and the bkring
1969 	 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
1970 	 * to push all packets out.
1971 	 */
1972 	bkring->rhead = bkring->rcur = kring->nr_hwtail;
1973 
1974 	netmap_vp_txsync(bkring, flags);
1975 
1976 	/* mark all buffers as released on this ring */
1977 	kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
1978 	/* another call to actually release the buffers */
1979 	error = kring->nm_sync(kring, 0);
1980 
1981 put_out:
1982 	nm_kr_put(kring);
1983 	return error;
1984 }
1985 
1986 
1987 /* nm_register callback for bwrap */
1988 static int
1989 netmap_bwrap_register(struct netmap_adapter *na, int onoff)
1990 {
1991 	struct netmap_bwrap_adapter *bna =
1992 		(struct netmap_bwrap_adapter *)na;
1993 	struct netmap_adapter *hwna = bna->hwna;
1994 	struct netmap_vp_adapter *hostna = &bna->host;
1995 	int error;
1996 	enum txrx t;
1997 
1998 	ND("%s %s", na->name, onoff ? "on" : "off");
1999 
2000 	if (onoff) {
2001 		int i;
2002 
2003 		/* netmap_do_regif has been called on the bwrap na.
2004 		 * We need to pass the information about the
2005 		 * memory allocator down to the hwna before
2006 		 * putting it in netmap mode
2007 		 */
2008 		hwna->na_lut = na->na_lut;
2009 
2010 		if (hostna->na_bdg) {
2011 			/* if the host rings have been attached to switch,
2012 			 * we need to copy the memory allocator information
2013 			 * in the hostna also
2014 			 */
2015 			hostna->up.na_lut = na->na_lut;
2016 		}
2017 
2018 		/* cross-link the netmap rings
2019 		 * The original number of rings comes from hwna,
2020 		 * rx rings on one side equals tx rings on the other.
2021 		 * We need to do this now, after the initialization
2022 		 * of the kring->ring pointers
2023 		 */
2024 		for_rx_tx(t) {
2025 			enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
2026 			for (i = 0; i < nma_get_nrings(na, r) + 1; i++) {
2027 				NMR(hwna, t)[i].nkr_num_slots = NMR(na, r)[i].nkr_num_slots;
2028 				NMR(hwna, t)[i].ring = NMR(na, r)[i].ring;
2029 			}
2030 		}
2031 	}
2032 
2033 	/* forward the request to the hwna */
2034 	error = hwna->nm_register(hwna, onoff);
2035 	if (error)
2036 		return error;
2037 
2038 	/* impersonate a netmap_vp_adapter */
2039 	netmap_vp_reg(na, onoff);
2040 	if (hostna->na_bdg)
2041 		netmap_vp_reg(&hostna->up, onoff);
2042 
2043 	if (onoff) {
2044 		u_int i;
2045 		/* intercept the hwna nm_nofify callback on the hw rings */
2046 		for (i = 0; i < hwna->num_rx_rings; i++) {
2047 			hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
2048 			hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
2049 		}
2050 		i = hwna->num_rx_rings; /* for safety */
2051 		/* save the host ring notify unconditionally */
2052 		hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
2053 		if (hostna->na_bdg) {
2054 			/* also intercept the host ring notify */
2055 			hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
2056 		}
2057 	} else {
2058 		u_int i;
2059 		/* reset all notify callbacks (including host ring) */
2060 		for (i = 0; i <= hwna->num_rx_rings; i++) {
2061 			hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify;
2062 			hwna->rx_rings[i].save_notify = NULL;
2063 		}
2064 		hwna->na_lut.lut = NULL;
2065 		hwna->na_lut.objtotal = 0;
2066 		hwna->na_lut.objsize = 0;
2067 	}
2068 
2069 	return 0;
2070 }
2071 
2072 /* nm_config callback for bwrap */
2073 static int
2074 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
2075 				    u_int *rxr, u_int *rxd)
2076 {
2077 	struct netmap_bwrap_adapter *bna =
2078 		(struct netmap_bwrap_adapter *)na;
2079 	struct netmap_adapter *hwna = bna->hwna;
2080 
2081 	/* forward the request */
2082 	netmap_update_config(hwna);
2083 	/* swap the results */
2084 	*txr = hwna->num_rx_rings;
2085 	*txd = hwna->num_rx_desc;
2086 	*rxr = hwna->num_tx_rings;
2087 	*rxd = hwna->num_rx_desc;
2088 
2089 	return 0;
2090 }
2091 
2092 
2093 /* nm_krings_create callback for bwrap */
2094 static int
2095 netmap_bwrap_krings_create(struct netmap_adapter *na)
2096 {
2097 	struct netmap_bwrap_adapter *bna =
2098 		(struct netmap_bwrap_adapter *)na;
2099 	struct netmap_adapter *hwna = bna->hwna;
2100 	struct netmap_adapter *hostna = &bna->host.up;
2101 	int error;
2102 
2103 	ND("%s", na->name);
2104 
2105 	/* impersonate a netmap_vp_adapter */
2106 	error = netmap_vp_krings_create(na);
2107 	if (error)
2108 		return error;
2109 
2110 	/* also create the hwna krings */
2111 	error = hwna->nm_krings_create(hwna);
2112 	if (error) {
2113 		netmap_vp_krings_delete(na);
2114 		return error;
2115 	}
2116 	/* the connection between the bwrap krings and the hwna krings
2117 	 * will be perfomed later, in the nm_register callback, since
2118 	 * now the kring->ring pointers have not been initialized yet
2119 	 */
2120 
2121 	if (na->na_flags & NAF_HOST_RINGS) {
2122 		/* the hostna rings are the host rings of the bwrap.
2123 		 * The corresponding krings must point back to the
2124 		 * hostna
2125 		 */
2126 		hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
2127 		hostna->tx_rings[0].na = hostna;
2128 		hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
2129 		hostna->rx_rings[0].na = hostna;
2130 	}
2131 
2132 	return 0;
2133 }
2134 
2135 
2136 static void
2137 netmap_bwrap_krings_delete(struct netmap_adapter *na)
2138 {
2139 	struct netmap_bwrap_adapter *bna =
2140 		(struct netmap_bwrap_adapter *)na;
2141 	struct netmap_adapter *hwna = bna->hwna;
2142 
2143 	ND("%s", na->name);
2144 
2145 	hwna->nm_krings_delete(hwna);
2146 	netmap_vp_krings_delete(na);
2147 }
2148 
2149 
2150 /* notify method for the bridge-->hwna direction */
2151 static int
2152 netmap_bwrap_notify(struct netmap_kring *kring, int flags)
2153 {
2154 	struct netmap_adapter *na = kring->na;
2155 	struct netmap_bwrap_adapter *bna = na->na_private;
2156 	struct netmap_adapter *hwna = bna->hwna;
2157 	u_int ring_n = kring->ring_id;
2158 	u_int lim = kring->nkr_num_slots - 1;
2159 	struct netmap_kring *hw_kring;
2160 	int error = 0;
2161 
2162 	ND("%s: na %s hwna %s",
2163 			(kring ? kring->name : "NULL!"),
2164 			(na ? na->name : "NULL!"),
2165 			(hwna ? hwna->name : "NULL!"));
2166 	hw_kring = &hwna->tx_rings[ring_n];
2167 
2168 	if (nm_kr_tryget(hw_kring))
2169 		return 0;
2170 
2171 	if (!nm_netmap_on(hwna))
2172 		return 0;
2173 	/* first step: simulate a user wakeup on the rx ring */
2174 	netmap_vp_rxsync(kring, flags);
2175 	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2176 		na->name, ring_n,
2177 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2178 		ring->head, ring->cur, ring->tail,
2179 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
2180 	/* second step: the new packets are sent on the tx ring
2181 	 * (which is actually the same ring)
2182 	 */
2183 	hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
2184 	error = hw_kring->nm_sync(hw_kring, flags);
2185 	if (error)
2186 		goto out;
2187 
2188 	/* third step: now we are back the rx ring */
2189 	/* claim ownership on all hw owned bufs */
2190 	kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
2191 
2192 	/* fourth step: the user goes to sleep again, causing another rxsync */
2193 	netmap_vp_rxsync(kring, flags);
2194 	ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2195 		na->name, ring_n,
2196 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2197 		ring->head, ring->cur, ring->tail,
2198 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2199 out:
2200 	nm_kr_put(hw_kring);
2201 	return error;
2202 }
2203 
2204 
2205 /* nm_bdg_ctl callback for the bwrap.
2206  * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
2207  * On attach, it needs to provide a fake netmap_priv_d structure and
2208  * perform a netmap_do_regif() on the bwrap. This will put both the
2209  * bwrap and the hwna in netmap mode, with the netmap rings shared
2210  * and cross linked. Moroever, it will start intercepting interrupts
2211  * directed to hwna.
2212  */
2213 static int
2214 netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
2215 {
2216 	struct netmap_priv_d *npriv;
2217 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
2218 	int error = 0;
2219 
2220 	if (attach) {
2221 		if (NETMAP_OWNED_BY_ANY(na)) {
2222 			return EBUSY;
2223 		}
2224 		if (bna->na_kpriv) {
2225 			/* nothing to do */
2226 			return 0;
2227 		}
2228 		npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
2229 		if (npriv == NULL)
2230 			return ENOMEM;
2231 		error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags);
2232 		if (error) {
2233 			bzero(npriv, sizeof(*npriv));
2234 			free(npriv, M_DEVBUF);
2235 			return error;
2236 		}
2237 		bna->na_kpriv = npriv;
2238 		na->na_flags |= NAF_BUSY;
2239 	} else {
2240 		int last_instance;
2241 
2242 		if (na->active_fds == 0) /* not registered */
2243 			return EINVAL;
2244 		last_instance = netmap_dtor_locked(bna->na_kpriv);
2245 		if (!last_instance) {
2246 			D("--- error, trying to detach an entry with active mmaps");
2247 			error = EINVAL;
2248 		} else {
2249 			struct nm_bridge *b = bna->up.na_bdg,
2250 				*bh = bna->host.na_bdg;
2251 			npriv = bna->na_kpriv;
2252 			bna->na_kpriv = NULL;
2253 			D("deleting priv");
2254 
2255 			bzero(npriv, sizeof(*npriv));
2256 			free(npriv, M_DEVBUF);
2257 			if (b) {
2258 				/* XXX the bwrap dtor should take care
2259 				 * of this (2014-06-16)
2260 				 */
2261 				netmap_bdg_detach_common(b, bna->up.bdg_port,
2262 				    (bh ? bna->host.bdg_port : -1));
2263 			}
2264 			na->na_flags &= ~NAF_BUSY;
2265 		}
2266 	}
2267 	return error;
2268 
2269 }
2270 
2271 /* attach a bridge wrapper to the 'real' device */
2272 int
2273 netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
2274 {
2275 	struct netmap_bwrap_adapter *bna;
2276 	struct netmap_adapter *na = NULL;
2277 	struct netmap_adapter *hostna = NULL;
2278 	int error = 0;
2279 	enum txrx t;
2280 
2281 	/* make sure the NIC is not already in use */
2282 	if (NETMAP_OWNED_BY_ANY(hwna)) {
2283 		D("NIC %s busy, cannot attach to bridge", hwna->name);
2284 		return EBUSY;
2285 	}
2286 
2287 	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
2288 	if (bna == NULL) {
2289 		return ENOMEM;
2290 	}
2291 
2292 	na = &bna->up.up;
2293 	na->na_private = bna;
2294 	strncpy(na->name, nr_name, sizeof(na->name));
2295 	/* fill the ring data for the bwrap adapter with rx/tx meanings
2296 	 * swapped. The real cross-linking will be done during register,
2297 	 * when all the krings will have been created.
2298 	 */
2299 	for_rx_tx(t) {
2300 		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
2301 		nma_set_nrings(na, t, nma_get_nrings(hwna, r));
2302 		nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
2303 	}
2304 	na->nm_dtor = netmap_bwrap_dtor;
2305 	na->nm_register = netmap_bwrap_register;
2306 	// na->nm_txsync = netmap_bwrap_txsync;
2307 	// na->nm_rxsync = netmap_bwrap_rxsync;
2308 	na->nm_config = netmap_bwrap_config;
2309 	na->nm_krings_create = netmap_bwrap_krings_create;
2310 	na->nm_krings_delete = netmap_bwrap_krings_delete;
2311 	na->nm_notify = netmap_bwrap_notify;
2312 	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
2313 	na->pdev = hwna->pdev;
2314 	na->nm_mem = netmap_mem_private_new(na->name,
2315 			na->num_tx_rings, na->num_tx_desc,
2316 			na->num_rx_rings, na->num_rx_desc,
2317 			0, 0, &error);
2318 	na->na_flags |= NAF_MEM_OWNER;
2319 	if (na->nm_mem == NULL)
2320 		goto err_put;
2321 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2322 
2323 	bna->hwna = hwna;
2324 	netmap_adapter_get(hwna);
2325 	hwna->na_private = bna; /* weak reference */
2326 	hwna->na_vp = &bna->up;
2327 
2328 	if (hwna->na_flags & NAF_HOST_RINGS) {
2329 		if (hwna->na_flags & NAF_SW_ONLY)
2330 			na->na_flags |= NAF_SW_ONLY;
2331 		na->na_flags |= NAF_HOST_RINGS;
2332 		hostna = &bna->host.up;
2333 		snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
2334 		hostna->ifp = hwna->ifp;
2335 		for_rx_tx(t) {
2336 			enum txrx r = nm_txrx_swap(t);
2337 			nma_set_nrings(hostna, t, 1);
2338 			nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
2339 		}
2340 		// hostna->nm_txsync = netmap_bwrap_host_txsync;
2341 		// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2342 		hostna->nm_notify = netmap_bwrap_notify;
2343 		hostna->nm_mem = na->nm_mem;
2344 		hostna->na_private = bna;
2345 		hostna->na_vp = &bna->up;
2346 		na->na_hostvp = hwna->na_hostvp =
2347 			hostna->na_hostvp = &bna->host;
2348 		hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
2349 	}
2350 
2351 	ND("%s<->%s txr %d txd %d rxr %d rxd %d",
2352 		na->name, ifp->if_xname,
2353 		na->num_tx_rings, na->num_tx_desc,
2354 		na->num_rx_rings, na->num_rx_desc);
2355 
2356 	error = netmap_attach_common(na);
2357 	if (error) {
2358 		goto err_free;
2359 	}
2360 	/* make bwrap ifp point to the real ifp
2361 	 * NOTE: netmap_attach_common() interprets a non-NULL na->ifp
2362 	 * as a request to make the ifp point to the na. Since we
2363 	 * do not want to change the na already pointed to by hwna->ifp,
2364 	 * the following assignment has to be delayed until now
2365 	 */
2366 	na->ifp = hwna->ifp;
2367 	hwna->na_flags |= NAF_BUSY;
2368 	/* make hwna point to the allocator we are actually using,
2369 	 * so that monitors will be able to find it
2370 	 */
2371 	bna->save_nmd = hwna->nm_mem;
2372 	hwna->nm_mem = na->nm_mem;
2373 	return 0;
2374 
2375 err_free:
2376 	netmap_mem_delete(na->nm_mem);
2377 err_put:
2378 	hwna->na_vp = hwna->na_hostvp = NULL;
2379 	netmap_adapter_put(hwna);
2380 	free(bna, M_DEVBUF);
2381 	return error;
2382 
2383 }
2384 
2385 struct nm_bridge *
2386 netmap_init_bridges2(u_int n)
2387 {
2388 	int i;
2389 	struct nm_bridge *b;
2390 
2391 	b = malloc(sizeof(struct nm_bridge) * n, M_DEVBUF,
2392 		M_NOWAIT | M_ZERO);
2393 	if (b == NULL)
2394 		return NULL;
2395 	for (i = 0; i < n; i++)
2396 		BDG_RWINIT(&b[i]);
2397 	return b;
2398 }
2399 
2400 void
2401 netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
2402 {
2403 	int i;
2404 
2405 	if (b == NULL)
2406 		return;
2407 
2408 	for (i = 0; i < n; i++)
2409 		BDG_RWDESTROY(&b[i]);
2410 	free(b, M_DEVBUF);
2411 }
2412 
2413 int
2414 netmap_init_bridges(void)
2415 {
2416 #ifdef CONFIG_NET_NS
2417 	return netmap_bns_register();
2418 #else
2419 	nm_bridges = netmap_init_bridges2(NM_BRIDGES);
2420 	if (nm_bridges == NULL)
2421 		return ENOMEM;
2422 	return 0;
2423 #endif
2424 }
2425 
2426 void
2427 netmap_uninit_bridges(void)
2428 {
2429 #ifdef CONFIG_NET_NS
2430 	netmap_bns_unregister();
2431 #else
2432 	netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
2433 #endif
2434 }
2435 #endif /* WITH_VALE */
2436