xref: /freebsd-13.1/sys/dev/netmap/netmap_vale.c (revision 2e159ef0)
1 /*
2  * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *   1. Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *   2. Redistributions in binary form must reproduce the above copyright
10  *      notice, this list of conditions and the following disclaimer in the
11  *      documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 
27 /*
28  * This module implements the VALE switch for netmap
29 
30 --- VALE SWITCH ---
31 
32 NMG_LOCK() serializes all modifications to switches and ports.
33 A switch cannot be deleted until all ports are gone.
34 
35 For each switch, an SX lock (RWlock on linux) protects
36 deletion of ports. When configuring or deleting a new port, the
37 lock is acquired in exclusive mode (after holding NMG_LOCK).
38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39 The lock is held throughout the entire forwarding cycle,
40 during which the thread may incur in a page fault.
41 Hence it is important that sleepable shared locks are used.
42 
43 On the rx ring, the per-port lock is grabbed initially to reserve
44 a number of slot in the ring, then the lock is released,
45 packets are copied from source to destination, and then
46 the lock is acquired again and the receive ring is updated.
47 (A similar thing is done on the tx ring for NIC and host stack
48 ports attached to the switch)
49 
50  */
51 
52 /*
53  * OS-specific code that is used only within this file.
54  * Other OS-specific code that must be accessed by drivers
55  * is present in netmap_kern.h
56  */
57 
58 #if defined(__FreeBSD__)
59 #include <sys/cdefs.h> /* prerequisite */
60 __FBSDID("$FreeBSD$");
61 
62 #include <sys/types.h>
63 #include <sys/errno.h>
64 #include <sys/param.h>	/* defines used in kernel.h */
65 #include <sys/kernel.h>	/* types used in module initialization */
66 #include <sys/conf.h>	/* cdevsw struct, UID, GID */
67 #include <sys/sockio.h>
68 #include <sys/socketvar.h>	/* struct socket */
69 #include <sys/malloc.h>
70 #include <sys/poll.h>
71 #include <sys/rwlock.h>
72 #include <sys/socket.h> /* sockaddrs */
73 #include <sys/selinfo.h>
74 #include <sys/sysctl.h>
75 #include <net/if.h>
76 #include <net/if_var.h>
77 #include <net/bpf.h>		/* BIOCIMMEDIATE */
78 #include <machine/bus.h>	/* bus_dmamap_* */
79 #include <sys/endian.h>
80 #include <sys/refcount.h>
81 
82 
83 #define BDG_RWLOCK_T		struct rwlock // struct rwlock
84 
85 #define	BDG_RWINIT(b)		\
86 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87 #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88 #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89 #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90 #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91 #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92 #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93 
94 
95 #elif defined(linux)
96 
97 #include "bsd_glue.h"
98 
99 #elif defined(__APPLE__)
100 
101 #warning OSX support is only partial
102 #include "osx_glue.h"
103 
104 #else
105 
106 #error	Unsupported platform
107 
108 #endif /* unsupported */
109 
110 /*
111  * common headers
112  */
113 
114 #include <net/netmap.h>
115 #include <dev/netmap/netmap_kern.h>
116 #include <dev/netmap/netmap_mem2.h>
117 
118 #ifdef WITH_VALE
119 
120 /*
121  * system parameters (most of them in netmap_kern.h)
122  * NM_NAME	prefix for switch port names, default "vale"
123  * NM_BDG_MAXPORTS	number of ports
124  * NM_BRIDGES	max number of switches in the system.
125  *	XXX should become a sysctl or tunable
126  *
127  * Switch ports are named valeX:Y where X is the switch name and Y
128  * is the port. If Y matches a physical interface name, the port is
129  * connected to a physical device.
130  *
131  * Unlike physical interfaces, switch ports use their own memory region
132  * for rings and buffers.
133  * The virtual interfaces use per-queue lock instead of core lock.
134  * In the tx loop, we aggregate traffic in batches to make all operations
135  * faster. The batch size is bridge_batch.
136  */
137 #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
138 #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
139 #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
140 #define NM_BDG_HASH		1024	/* forwarding table entries */
141 #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
142 #define NM_MULTISEG		64	/* max size of a chain of bufs */
143 /* actual size of the tables */
144 #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
145 /* NM_FT_NULL terminates a list of slots in the ft */
146 #define NM_FT_NULL		NM_BDG_BATCH_MAX
147 #define	NM_BRIDGES		8	/* number of bridges */
148 
149 
150 /*
151  * bridge_batch is set via sysctl to the max batch size to be
152  * used in the bridge. The actual value may be larger as the
153  * last packet in the block may overflow the size.
154  */
155 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156 SYSCTL_DECL(_dev_netmap);
157 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
158 
159 
160 static int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp);
161 static int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
162 static int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
163 static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
164 int kern_netmap_regif(struct nmreq *nmr);
165 
166 /*
167  * Each transmit queue accumulates a batch of packets into
168  * a structure before forwarding. Packets to the same
169  * destination are put in a list using ft_next as a link field.
170  * ft_frags and ft_next are valid only on the first fragment.
171  */
172 struct nm_bdg_fwd {	/* forwarding entry for a bridge */
173 	void *ft_buf;		/* netmap or indirect buffer */
174 	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
175 	uint8_t _ft_port;	/* dst port (unused) */
176 	uint16_t ft_flags;	/* flags, e.g. indirect */
177 	uint16_t ft_len;	/* src fragment len */
178 	uint16_t ft_next;	/* next packet to same destination */
179 };
180 
181 /*
182  * For each output interface, nm_bdg_q is used to construct a list.
183  * bq_len is the number of output buffers (we can have coalescing
184  * during the copy).
185  */
186 struct nm_bdg_q {
187 	uint16_t bq_head;
188 	uint16_t bq_tail;
189 	uint32_t bq_len;	/* number of buffers */
190 };
191 
192 /* XXX revise this */
193 struct nm_hash_ent {
194 	uint64_t	mac;	/* the top 2 bytes are the epoch */
195 	uint64_t	ports;
196 };
197 
198 /*
199  * nm_bridge is a descriptor for a VALE switch.
200  * Interfaces for a bridge are all in bdg_ports[].
201  * The array has fixed size, an empty entry does not terminate
202  * the search, but lookups only occur on attach/detach so we
203  * don't mind if they are slow.
204  *
205  * The bridge is non blocking on the transmit ports: excess
206  * packets are dropped if there is no room on the output port.
207  *
208  * bdg_lock protects accesses to the bdg_ports array.
209  * This is a rw lock (or equivalent).
210  */
211 struct nm_bridge {
212 	/* XXX what is the proper alignment/layout ? */
213 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
214 	int		bdg_namelen;
215 	uint32_t	bdg_active_ports; /* 0 means free */
216 	char		bdg_basename[IFNAMSIZ];
217 
218 	/* Indexes of active ports (up to active_ports)
219 	 * and all other remaining ports.
220 	 */
221 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
222 
223 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
224 
225 
226 	/*
227 	 * The function to decide the destination port.
228 	 * It returns either of an index of the destination port,
229 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
230 	 * forward this packet.  ring_nr is the source ring index, and the
231 	 * function may overwrite this value to forward this packet to a
232 	 * different ring index.
233 	 * This function must be set by netmap_bdgctl().
234 	 */
235 	bdg_lookup_fn_t nm_bdg_lookup;
236 
237 	/* the forwarding table, MAC+ports.
238 	 * XXX should be changed to an argument to be passed to
239 	 * the lookup function, and allocated on attach
240 	 */
241 	struct nm_hash_ent ht[NM_BDG_HASH];
242 };
243 
244 
245 /*
246  * XXX in principle nm_bridges could be created dynamically
247  * Right now we have a static array and deletions are protected
248  * by an exclusive lock.
249  */
250 struct nm_bridge nm_bridges[NM_BRIDGES];
251 
252 
253 /*
254  * A few function to tell which kind of port are we using.
255  * XXX should we hold a lock ?
256  *
257  * nma_is_vp()		virtual port
258  * nma_is_host()	port connected to the host stack
259  * nma_is_hw()		port connected to a NIC
260  * nma_is_generic()	generic netmap adapter XXX stop this madness
261  */
262 static __inline int
263 nma_is_vp(struct netmap_adapter *na)
264 {
265 	return na->nm_register == bdg_netmap_reg;
266 }
267 
268 
269 static __inline int
270 nma_is_host(struct netmap_adapter *na)
271 {
272 	return na->nm_register == NULL;
273 }
274 
275 
276 static __inline int
277 nma_is_hw(struct netmap_adapter *na)
278 {
279 	/* In case of sw adapter, nm_register is NULL */
280 	return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na);
281 }
282 
283 static __inline int
284 nma_is_bwrap(struct netmap_adapter *na)
285 {
286 	return na->nm_register == netmap_bwrap_register;
287 }
288 
289 
290 
291 /*
292  * this is a slightly optimized copy routine which rounds
293  * to multiple of 64 bytes and is often faster than dealing
294  * with other odd sizes. We assume there is enough room
295  * in the source and destination buffers.
296  *
297  * XXX only for multiples of 64 bytes, non overlapped.
298  */
299 static inline void
300 pkt_copy(void *_src, void *_dst, int l)
301 {
302         uint64_t *src = _src;
303         uint64_t *dst = _dst;
304         if (unlikely(l >= 1024)) {
305                 memcpy(dst, src, l);
306                 return;
307         }
308         for (; likely(l > 0); l-=64) {
309                 *dst++ = *src++;
310                 *dst++ = *src++;
311                 *dst++ = *src++;
312                 *dst++ = *src++;
313                 *dst++ = *src++;
314                 *dst++ = *src++;
315                 *dst++ = *src++;
316                 *dst++ = *src++;
317         }
318 }
319 
320 
321 
322 /*
323  * locate a bridge among the existing ones.
324  * MUST BE CALLED WITH NMG_LOCK()
325  *
326  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
327  * We assume that this is called with a name of at least NM_NAME chars.
328  */
329 static struct nm_bridge *
330 nm_find_bridge(const char *name, int create)
331 {
332 	int i, l, namelen;
333 	struct nm_bridge *b = NULL;
334 
335 	NMG_LOCK_ASSERT();
336 
337 	namelen = strlen(NM_NAME);	/* base length */
338 	l = name ? strlen(name) : 0;		/* actual length */
339 	if (l < namelen) {
340 		D("invalid bridge name %s", name ? name : NULL);
341 		return NULL;
342 	}
343 	for (i = namelen + 1; i < l; i++) {
344 		if (name[i] == ':') {
345 			namelen = i;
346 			break;
347 		}
348 	}
349 	if (namelen >= IFNAMSIZ)
350 		namelen = IFNAMSIZ;
351 	ND("--- prefix is '%.*s' ---", namelen, name);
352 
353 	/* lookup the name, remember empty slot if there is one */
354 	for (i = 0; i < NM_BRIDGES; i++) {
355 		struct nm_bridge *x = nm_bridges + i;
356 
357 		if (x->bdg_active_ports == 0) {
358 			if (create && b == NULL)
359 				b = x;	/* record empty slot */
360 		} else if (x->bdg_namelen != namelen) {
361 			continue;
362 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
363 			ND("found '%.*s' at %d", namelen, name, i);
364 			b = x;
365 			break;
366 		}
367 	}
368 	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
369 		/* initialize the bridge */
370 		strncpy(b->bdg_basename, name, namelen);
371 		ND("create new bridge %s with ports %d", b->bdg_basename,
372 			b->bdg_active_ports);
373 		b->bdg_namelen = namelen;
374 		b->bdg_active_ports = 0;
375 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
376 			b->bdg_port_index[i] = i;
377 		/* set the default function */
378 		b->nm_bdg_lookup = netmap_bdg_learning;
379 		/* reset the MAC address table */
380 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
381 	}
382 	return b;
383 }
384 
385 
386 /*
387  * Free the forwarding tables for rings attached to switch ports.
388  */
389 static void
390 nm_free_bdgfwd(struct netmap_adapter *na)
391 {
392 	int nrings, i;
393 	struct netmap_kring *kring;
394 
395 	NMG_LOCK_ASSERT();
396 	nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
397 	kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
398 	for (i = 0; i < nrings; i++) {
399 		if (kring[i].nkr_ft) {
400 			free(kring[i].nkr_ft, M_DEVBUF);
401 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
402 		}
403 	}
404 }
405 
406 
407 /*
408  * Allocate the forwarding tables for the rings attached to the bridge ports.
409  */
410 static int
411 nm_alloc_bdgfwd(struct netmap_adapter *na)
412 {
413 	int nrings, l, i, num_dstq;
414 	struct netmap_kring *kring;
415 
416 	NMG_LOCK_ASSERT();
417 	/* all port:rings + broadcast */
418 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
419 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
420 	l += sizeof(struct nm_bdg_q) * num_dstq;
421 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
422 
423 	nrings = na->num_tx_rings + 1;
424 	kring = na->tx_rings;
425 	for (i = 0; i < nrings; i++) {
426 		struct nm_bdg_fwd *ft;
427 		struct nm_bdg_q *dstq;
428 		int j;
429 
430 		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
431 		if (!ft) {
432 			nm_free_bdgfwd(na);
433 			return ENOMEM;
434 		}
435 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
436 		for (j = 0; j < num_dstq; j++) {
437 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
438 			dstq[j].bq_len = 0;
439 		}
440 		kring[i].nkr_ft = ft;
441 	}
442 	return 0;
443 }
444 
445 
446 static void
447 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
448 {
449 	int s_hw = hw, s_sw = sw;
450 	int i, lim =b->bdg_active_ports;
451 	uint8_t tmp[NM_BDG_MAXPORTS];
452 
453 	/*
454 	New algorithm:
455 	make a copy of bdg_port_index;
456 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
457 	in the array of bdg_port_index, replacing them with
458 	entries from the bottom of the array;
459 	decrement bdg_active_ports;
460 	acquire BDG_WLOCK() and copy back the array.
461 	 */
462 
463 	D("detach %d and %d (lim %d)", hw, sw, lim);
464 	/* make a copy of the list of active ports, update it,
465 	 * and then copy back within BDG_WLOCK().
466 	 */
467 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
468 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
469 		if (hw >= 0 && tmp[i] == hw) {
470 			ND("detach hw %d at %d", hw, i);
471 			lim--; /* point to last active port */
472 			tmp[i] = tmp[lim]; /* swap with i */
473 			tmp[lim] = hw;	/* now this is inactive */
474 			hw = -1;
475 		} else if (sw >= 0 && tmp[i] == sw) {
476 			ND("detach sw %d at %d", sw, i);
477 			lim--;
478 			tmp[i] = tmp[lim];
479 			tmp[lim] = sw;
480 			sw = -1;
481 		} else {
482 			i++;
483 		}
484 	}
485 	if (hw >= 0 || sw >= 0) {
486 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
487 	}
488 
489 	BDG_WLOCK(b);
490 	b->bdg_ports[s_hw] = NULL;
491 	if (s_sw >= 0) {
492 		b->bdg_ports[s_sw] = NULL;
493 	}
494 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
495 	b->bdg_active_ports = lim;
496 	BDG_WUNLOCK(b);
497 
498 	ND("now %d active ports", lim);
499 	if (lim == 0) {
500 		ND("marking bridge %s as free", b->bdg_basename);
501 		b->nm_bdg_lookup = NULL;
502 	}
503 }
504 
505 static void
506 netmap_adapter_vp_dtor(struct netmap_adapter *na)
507 {
508 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
509 	struct nm_bridge *b = vpna->na_bdg;
510 	struct ifnet *ifp = na->ifp;
511 
512 	ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
513 
514 	if (b) {
515 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
516 	}
517 
518 	bzero(ifp, sizeof(*ifp));
519 	free(ifp, M_DEVBUF);
520 	na->ifp = NULL;
521 }
522 
523 int
524 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
525 {
526 	const char *name = nmr->nr_name;
527 	struct ifnet *ifp;
528 	int error = 0;
529 	struct netmap_adapter *ret;
530 	struct netmap_vp_adapter *vpna;
531 	struct nm_bridge *b;
532 	int i, j, cand = -1, cand2 = -1;
533 	int needed;
534 
535 	*na = NULL;     /* default return value */
536 
537 	/* first try to see if this is a bridge port. */
538 	NMG_LOCK_ASSERT();
539 	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
540 		return 0;  /* no error, but no VALE prefix */
541 	}
542 
543 	b = nm_find_bridge(name, create);
544 	if (b == NULL) {
545 		D("no bridges available for '%s'", name);
546 		return (ENXIO);
547 	}
548 
549 	/* Now we are sure that name starts with the bridge's name,
550 	 * lookup the port in the bridge. We need to scan the entire
551 	 * list. It is not important to hold a WLOCK on the bridge
552 	 * during the search because NMG_LOCK already guarantees
553 	 * that there are no other possible writers.
554 	 */
555 
556 	/* lookup in the local list of ports */
557 	for (j = 0; j < b->bdg_active_ports; j++) {
558 		i = b->bdg_port_index[j];
559 		vpna = b->bdg_ports[i];
560 		// KASSERT(na != NULL);
561 		ifp = vpna->up.ifp;
562 		/* XXX make sure the name only contains one : */
563 		if (!strcmp(NM_IFPNAME(ifp), name)) {
564 			netmap_adapter_get(&vpna->up);
565 			ND("found existing if %s refs %d", name,
566 				vpna->na_bdg_refcount);
567 			*na = (struct netmap_adapter *)vpna;
568 			return 0;
569 		}
570 	}
571 	/* not found, should we create it? */
572 	if (!create)
573 		return ENXIO;
574 	/* yes we should, see if we have space to attach entries */
575 	needed = 2; /* in some cases we only need 1 */
576 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
577 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
578 		return EINVAL;
579 	}
580 	/* record the next two ports available, but do not allocate yet */
581 	cand = b->bdg_port_index[b->bdg_active_ports];
582 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
583 	ND("+++ bridge %s port %s used %d avail %d %d",
584 		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
585 
586 	/*
587 	 * try see if there is a matching NIC with this name
588 	 * (after the bridge's name)
589 	 */
590 	ifp = ifunit_ref(name + b->bdg_namelen + 1);
591 	if (!ifp) { /* this is a virtual port */
592 		if (nmr->nr_cmd) {
593 			/* nr_cmd must be 0 for a virtual port */
594 			return EINVAL;
595 		}
596 
597 	 	/* create a struct ifnet for the new port.
598 		 * need M_NOWAIT as we are under nma_lock
599 		 */
600 		ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
601 		if (!ifp)
602 			return ENOMEM;
603 
604 		strcpy(ifp->if_xname, name);
605 		/* bdg_netmap_attach creates a struct netmap_adapter */
606 		error = bdg_netmap_attach(nmr, ifp);
607 		if (error) {
608 			D("error %d", error);
609 			free(ifp, M_DEVBUF);
610 			return error;
611 		}
612 		ret = NA(ifp);
613 		cand2 = -1;	/* only need one port */
614 	} else {  /* this is a NIC */
615 		struct ifnet *fake_ifp;
616 
617 		error = netmap_get_hw_na(ifp, &ret);
618 		if (error || ret == NULL)
619 			goto out;
620 
621 		/* make sure the NIC is not already in use */
622 		if (NETMAP_OWNED_BY_ANY(ret)) {
623 			D("NIC %s busy, cannot attach to bridge",
624 				NM_IFPNAME(ifp));
625 			error = EINVAL;
626 			goto out;
627 		}
628 		/* create a fake interface */
629 		fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
630 		if (!fake_ifp) {
631 			error = ENOMEM;
632 			goto out;
633 		}
634 		strcpy(fake_ifp->if_xname, name);
635 		error = netmap_bwrap_attach(fake_ifp, ifp);
636 		if (error) {
637 			free(fake_ifp, M_DEVBUF);
638 			goto out;
639 		}
640 		ret = NA(fake_ifp);
641 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
642 			cand2 = -1; /* only need one port */
643 		if_rele(ifp);
644 	}
645 	vpna = (struct netmap_vp_adapter *)ret;
646 
647 	BDG_WLOCK(b);
648 	vpna->bdg_port = cand;
649 	ND("NIC  %p to bridge port %d", vpna, cand);
650 	/* bind the port to the bridge (virtual ports are not active) */
651 	b->bdg_ports[cand] = vpna;
652 	vpna->na_bdg = b;
653 	b->bdg_active_ports++;
654 	if (cand2 >= 0) {
655 		struct netmap_vp_adapter *hostna = vpna + 1;
656 		/* also bind the host stack to the bridge */
657 		b->bdg_ports[cand2] = hostna;
658 		hostna->bdg_port = cand2;
659 		hostna->na_bdg = b;
660 		b->bdg_active_ports++;
661 		ND("host %p to bridge port %d", hostna, cand2);
662 	}
663 	ND("if %s refs %d", name, vpna->up.na_refcount);
664 	BDG_WUNLOCK(b);
665 	*na = ret;
666 	netmap_adapter_get(ret);
667 	return 0;
668 
669 out:
670 	if_rele(ifp);
671 
672 	return error;
673 }
674 
675 
676 /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
677 static int
678 nm_bdg_attach(struct nmreq *nmr)
679 {
680 	struct netmap_adapter *na;
681 	struct netmap_if *nifp;
682 	struct netmap_priv_d *npriv;
683 	struct netmap_bwrap_adapter *bna;
684 	int error;
685 
686 	npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
687 	if (npriv == NULL)
688 		return ENOMEM;
689 	NMG_LOCK();
690 	/* XXX probably netmap_get_bdg_na() */
691 	error = netmap_get_na(nmr, &na, 1 /* create if not exists */);
692 	if (error) /* no device, or another bridge or user owns the device */
693 		goto unlock_exit;
694 	/* netmap_get_na() sets na_bdg if this is a physical interface
695 	 * that we can attach to a switch.
696 	 */
697 	if (!nma_is_bwrap(na)) {
698 		/* got reference to a virtual port or direct access to a NIC.
699 		 * perhaps specified no bridge prefix or wrong NIC name
700 		 */
701 		error = EINVAL;
702 		goto unref_exit;
703 	}
704 
705 	if (na->active_fds > 0) { /* already registered */
706 		error = EBUSY;
707 		goto unref_exit;
708 	}
709 
710 	nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error);
711 	if (!nifp) {
712 		goto unref_exit;
713 	}
714 
715 	bna = (struct netmap_bwrap_adapter*)na;
716 	bna->na_kpriv = npriv;
717 	NMG_UNLOCK();
718 	ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
719 	return 0;
720 
721 unref_exit:
722 	netmap_adapter_put(na);
723 unlock_exit:
724 	NMG_UNLOCK();
725 	bzero(npriv, sizeof(*npriv));
726 	free(npriv, M_DEVBUF);
727 	return error;
728 }
729 
730 static int
731 nm_bdg_detach(struct nmreq *nmr)
732 {
733 	struct netmap_adapter *na;
734 	int error;
735 	struct netmap_bwrap_adapter *bna;
736 	int last_instance;
737 
738 	NMG_LOCK();
739 	error = netmap_get_na(nmr, &na, 0 /* don't create */);
740 	if (error) { /* no device, or another bridge or user owns the device */
741 		goto unlock_exit;
742 	}
743 	if (!nma_is_bwrap(na)) {
744 		/* got reference to a virtual port or direct access to a NIC.
745 		 * perhaps specified no bridge's prefix or wrong NIC's name
746 		 */
747 		error = EINVAL;
748 		goto unref_exit;
749 	}
750 	bna = (struct netmap_bwrap_adapter *)na;
751 
752 	if (na->active_fds == 0) { /* not registered */
753 		error = EINVAL;
754 		goto unref_exit;
755 	}
756 
757 	last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
758 	if (!last_instance) {
759 		D("--- error, trying to detach an entry with active mmaps");
760 		error = EINVAL;
761 	} else {
762 		struct netmap_priv_d *npriv = bna->na_kpriv;
763 
764 		bna->na_kpriv = NULL;
765 		D("deleting priv");
766 
767 		bzero(npriv, sizeof(*npriv));
768 		free(npriv, M_DEVBUF);
769 	}
770 
771 unref_exit:
772 	netmap_adapter_put(na);
773 unlock_exit:
774 	NMG_UNLOCK();
775 	return error;
776 
777 }
778 
779 
780 /* exported to kernel callers, e.g. OVS ?
781  * Entry point.
782  * Called without NMG_LOCK.
783  */
784 int
785 netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
786 {
787 	struct nm_bridge *b;
788 	struct netmap_adapter *na;
789 	struct netmap_vp_adapter *vpna;
790 	struct ifnet *iter;
791 	char *name = nmr->nr_name;
792 	int cmd = nmr->nr_cmd, namelen = strlen(name);
793 	int error = 0, i, j;
794 
795 	switch (cmd) {
796 	case NETMAP_BDG_ATTACH:
797 		error = nm_bdg_attach(nmr);
798 		break;
799 
800 	case NETMAP_BDG_DETACH:
801 		error = nm_bdg_detach(nmr);
802 		break;
803 
804 	case NETMAP_BDG_LIST:
805 		/* this is used to enumerate bridges and ports */
806 		if (namelen) { /* look up indexes of bridge and port */
807 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
808 				error = EINVAL;
809 				break;
810 			}
811 			NMG_LOCK();
812 			b = nm_find_bridge(name, 0 /* don't create */);
813 			if (!b) {
814 				error = ENOENT;
815 				NMG_UNLOCK();
816 				break;
817 			}
818 
819 			error = ENOENT;
820 			for (j = 0; j < b->bdg_active_ports; j++) {
821 				i = b->bdg_port_index[j];
822 				vpna = b->bdg_ports[i];
823 				if (vpna == NULL) {
824 					D("---AAAAAAAAARGH-------");
825 					continue;
826 				}
827 				iter = vpna->up.ifp;
828 				/* the former and the latter identify a
829 				 * virtual port and a NIC, respectively
830 				 */
831 				if (!strcmp(iter->if_xname, name)) {
832 					/* bridge index */
833 					nmr->nr_arg1 = b - nm_bridges;
834 					nmr->nr_arg2 = i; /* port index */
835 					error = 0;
836 					break;
837 				}
838 			}
839 			NMG_UNLOCK();
840 		} else {
841 			/* return the first non-empty entry starting from
842 			 * bridge nr_arg1 and port nr_arg2.
843 			 *
844 			 * Users can detect the end of the same bridge by
845 			 * seeing the new and old value of nr_arg1, and can
846 			 * detect the end of all the bridge by error != 0
847 			 */
848 			i = nmr->nr_arg1;
849 			j = nmr->nr_arg2;
850 
851 			NMG_LOCK();
852 			for (error = ENOENT; i < NM_BRIDGES; i++) {
853 				b = nm_bridges + i;
854 				if (j >= b->bdg_active_ports) {
855 					j = 0; /* following bridges scan from 0 */
856 					continue;
857 				}
858 				nmr->nr_arg1 = i;
859 				nmr->nr_arg2 = j;
860 				j = b->bdg_port_index[j];
861 				vpna = b->bdg_ports[j];
862 				iter = vpna->up.ifp;
863 				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
864 				error = 0;
865 				break;
866 			}
867 			NMG_UNLOCK();
868 		}
869 		break;
870 
871 	case NETMAP_BDG_LOOKUP_REG:
872 		/* register a lookup function to the given bridge.
873 		 * nmr->nr_name may be just bridge's name (including ':'
874 		 * if it is not just NM_NAME).
875 		 */
876 		if (!func) {
877 			error = EINVAL;
878 			break;
879 		}
880 		NMG_LOCK();
881 		b = nm_find_bridge(name, 0 /* don't create */);
882 		if (!b) {
883 			error = EINVAL;
884 		} else {
885 			b->nm_bdg_lookup = func;
886 		}
887 		NMG_UNLOCK();
888 		break;
889 
890 	case NETMAP_BDG_OFFSET:
891 		NMG_LOCK();
892 		error = netmap_get_bdg_na(nmr, &na, 0);
893 		if (!error) {
894 			vpna = (struct netmap_vp_adapter *)na;
895 			if (nmr->nr_arg1 > NETMAP_BDG_MAX_OFFSET)
896 				nmr->nr_arg1 = NETMAP_BDG_MAX_OFFSET;
897 			vpna->offset = nmr->nr_arg1;
898 			D("Using offset %d for %p", vpna->offset, vpna);
899 		}
900 		NMG_UNLOCK();
901 		break;
902 
903 	default:
904 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
905 		error = EINVAL;
906 		break;
907 	}
908 	return error;
909 }
910 
911 
912 static int
913 netmap_vp_krings_create(struct netmap_adapter *na)
914 {
915 	u_int ntx, nrx, tailroom;
916 	int error, i;
917 	uint32_t *leases;
918 
919 	/* XXX vps do not need host rings,
920 	 * but we crash if we don't have one
921 	 */
922 	ntx = na->num_tx_rings + 1;
923 	nrx = na->num_rx_rings + 1;
924 
925 	/*
926 	 * Leases are attached to RX rings on vale ports
927 	 */
928 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
929 
930 	error = netmap_krings_create(na, ntx, nrx, tailroom);
931 	if (error)
932 		return error;
933 
934 	leases = na->tailroom;
935 
936 	for (i = 0; i < nrx; i++) { /* Receive rings */
937 		na->rx_rings[i].nkr_leases = leases;
938 		leases += na->num_rx_desc;
939 	}
940 
941 	error = nm_alloc_bdgfwd(na);
942 	if (error) {
943 		netmap_krings_delete(na);
944 		return error;
945 	}
946 
947 	return 0;
948 }
949 
950 static void
951 netmap_vp_krings_delete(struct netmap_adapter *na)
952 {
953 	nm_free_bdgfwd(na);
954 	netmap_krings_delete(na);
955 }
956 
957 
958 static int
959 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
960 	struct netmap_vp_adapter *na, u_int ring_nr);
961 
962 
963 /*
964  * Grab packets from a kring, move them into the ft structure
965  * associated to the tx (input) port. Max one instance per port,
966  * filtered on input (ioctl, poll or XXX).
967  * Returns the next position in the ring.
968  */
969 static int
970 nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
971 	struct netmap_kring *kring, u_int end)
972 {
973 	struct netmap_ring *ring = kring->ring;
974 	struct nm_bdg_fwd *ft;
975 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
976 	u_int ft_i = 0;	/* start from 0 */
977 	u_int frags = 1; /* how many frags ? */
978 	struct nm_bridge *b = na->na_bdg;
979 
980 	/* To protect against modifications to the bridge we acquire a
981 	 * shared lock, waiting if we can sleep (if the source port is
982 	 * attached to a user process) or with a trylock otherwise (NICs).
983 	 */
984 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
985 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
986 		BDG_RLOCK(b);
987 	else if (!BDG_RTRYLOCK(b))
988 		return 0;
989 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
990 	ft = kring->nkr_ft;
991 
992 	for (; likely(j != end); j = nm_next(j, lim)) {
993 		struct netmap_slot *slot = &ring->slot[j];
994 		char *buf;
995 
996 		ft[ft_i].ft_len = slot->len;
997 		ft[ft_i].ft_flags = slot->flags;
998 
999 		ND("flags is 0x%x", slot->flags);
1000 		/* this slot goes into a list so initialize the link field */
1001 		ft[ft_i].ft_next = NM_FT_NULL;
1002 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
1003 			(void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
1004 		__builtin_prefetch(buf);
1005 		++ft_i;
1006 		if (slot->flags & NS_MOREFRAG) {
1007 			frags++;
1008 			continue;
1009 		}
1010 		if (unlikely(netmap_verbose && frags > 1))
1011 			RD(5, "%d frags at %d", frags, ft_i - frags);
1012 		ft[ft_i - frags].ft_frags = frags;
1013 		frags = 1;
1014 		if (unlikely((int)ft_i >= bridge_batch))
1015 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1016 	}
1017 	if (frags > 1) {
1018 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1019 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1020 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1021 		ft[ft_i - frags].ft_frags = frags - 1;
1022 	}
1023 	if (ft_i)
1024 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1025 	BDG_RUNLOCK(b);
1026 	return j;
1027 }
1028 
1029 
1030 /*
1031  *---- support for virtual bridge -----
1032  */
1033 
1034 /* ----- FreeBSD if_bridge hash function ------- */
1035 
1036 /*
1037  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1038  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1039  *
1040  * http://www.burtleburtle.net/bob/hash/spooky.html
1041  */
1042 #define mix(a, b, c)                                                    \
1043 do {                                                                    \
1044         a -= b; a -= c; a ^= (c >> 13);                                 \
1045         b -= c; b -= a; b ^= (a << 8);                                  \
1046         c -= a; c -= b; c ^= (b >> 13);                                 \
1047         a -= b; a -= c; a ^= (c >> 12);                                 \
1048         b -= c; b -= a; b ^= (a << 16);                                 \
1049         c -= a; c -= b; c ^= (b >> 5);                                  \
1050         a -= b; a -= c; a ^= (c >> 3);                                  \
1051         b -= c; b -= a; b ^= (a << 10);                                 \
1052         c -= a; c -= b; c ^= (b >> 15);                                 \
1053 } while (/*CONSTCOND*/0)
1054 
1055 static __inline uint32_t
1056 nm_bridge_rthash(const uint8_t *addr)
1057 {
1058         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1059 
1060         b += addr[5] << 8;
1061         b += addr[4];
1062         a += addr[3] << 24;
1063         a += addr[2] << 16;
1064         a += addr[1] << 8;
1065         a += addr[0];
1066 
1067         mix(a, b, c);
1068 #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1069         return (c & BRIDGE_RTHASH_MASK);
1070 }
1071 
1072 #undef mix
1073 
1074 
1075 static int
1076 bdg_netmap_reg(struct netmap_adapter *na, int onoff)
1077 {
1078 	struct netmap_vp_adapter *vpna =
1079 		(struct netmap_vp_adapter*)na;
1080 	struct ifnet *ifp = na->ifp;
1081 
1082 	/* the interface is already attached to the bridge,
1083 	 * so we only need to toggle IFCAP_NETMAP.
1084 	 */
1085 	BDG_WLOCK(vpna->na_bdg);
1086 	if (onoff) {
1087 		ifp->if_capenable |= IFCAP_NETMAP;
1088 	} else {
1089 		ifp->if_capenable &= ~IFCAP_NETMAP;
1090 	}
1091 	BDG_WUNLOCK(vpna->na_bdg);
1092 	return 0;
1093 }
1094 
1095 
1096 /*
1097  * Lookup function for a learning bridge.
1098  * Update the hash table with the source address,
1099  * and then returns the destination port index, and the
1100  * ring in *dst_ring (at the moment, always use ring 0)
1101  */
1102 u_int
1103 netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1104 		struct netmap_vp_adapter *na)
1105 {
1106 	struct nm_hash_ent *ht = na->na_bdg->ht;
1107 	uint32_t sh, dh;
1108 	u_int dst, mysrc = na->bdg_port;
1109 	uint64_t smac, dmac;
1110 
1111 	if (buf_len < 14) {
1112 		D("invalid buf length %d", buf_len);
1113 		return NM_BDG_NOPORT;
1114 	}
1115 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1116 	smac = le64toh(*(uint64_t *)(buf + 4));
1117 	smac >>= 16;
1118 
1119 	/*
1120 	 * The hash is somewhat expensive, there might be some
1121 	 * worthwhile optimizations here.
1122 	 */
1123 	if ((buf[6] & 1) == 0) { /* valid src */
1124 		uint8_t *s = buf+6;
1125 		sh = nm_bridge_rthash(s); // XXX hash of source
1126 		/* update source port forwarding entry */
1127 		ht[sh].mac = smac;	/* XXX expire ? */
1128 		ht[sh].ports = mysrc;
1129 		if (netmap_verbose)
1130 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1131 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1132 	}
1133 	dst = NM_BDG_BROADCAST;
1134 	if ((buf[0] & 1) == 0) { /* unicast */
1135 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1136 		if (ht[dh].mac == dmac) {	/* found dst */
1137 			dst = ht[dh].ports;
1138 		}
1139 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1140 	}
1141 	*dst_ring = 0;
1142 	return dst;
1143 }
1144 
1145 
1146 /*
1147  * This flush routine supports only unicast and broadcast but a large
1148  * number of ports, and lets us replace the learn and dispatch functions.
1149  */
1150 int
1151 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1152 		u_int ring_nr)
1153 {
1154 	struct nm_bdg_q *dst_ents, *brddst;
1155 	uint16_t num_dsts = 0, *dsts;
1156 	struct nm_bridge *b = na->na_bdg;
1157 	u_int i, j, me = na->bdg_port;
1158 
1159 	/*
1160 	 * The work area (pointed by ft) is followed by an array of
1161 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1162 	 * queues per port plus one for the broadcast traffic.
1163 	 * Then we have an array of destination indexes.
1164 	 */
1165 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1166 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1167 
1168 	/* first pass: find a destination for each packet in the batch */
1169 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1170 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1171 		uint16_t dst_port, d_i;
1172 		struct nm_bdg_q *d;
1173 		uint8_t *buf = ft[i].ft_buf;
1174 		u_int len = ft[i].ft_len;
1175 
1176 		ND("slot %d frags %d", i, ft[i].ft_frags);
1177 		/* Drop the packet if the offset is not into the first
1178 		   fragment nor at the very beginning of the second. */
1179 		if (unlikely(na->offset > len))
1180 			continue;
1181 		if (len == na->offset) {
1182 			buf = ft[i+1].ft_buf;
1183 			len = ft[i+1].ft_len;
1184 		} else {
1185 			buf += na->offset;
1186 			len -= na->offset;
1187 		}
1188 		dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na);
1189 		if (netmap_verbose > 255)
1190 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1191 		if (dst_port == NM_BDG_NOPORT)
1192 			continue; /* this packet is identified to be dropped */
1193 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1194 			continue;
1195 		else if (dst_port == NM_BDG_BROADCAST)
1196 			dst_ring = 0; /* broadcasts always go to ring 0 */
1197 		else if (unlikely(dst_port == me ||
1198 		    !b->bdg_ports[dst_port]))
1199 			continue;
1200 
1201 		/* get a position in the scratch pad */
1202 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1203 		d = dst_ents + d_i;
1204 
1205 		/* append the first fragment to the list */
1206 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1207 			d->bq_head = d->bq_tail = i;
1208 			/* remember this position to be scanned later */
1209 			if (dst_port != NM_BDG_BROADCAST)
1210 				dsts[num_dsts++] = d_i;
1211 		} else {
1212 			ft[d->bq_tail].ft_next = i;
1213 			d->bq_tail = i;
1214 		}
1215 		d->bq_len += ft[i].ft_frags;
1216 	}
1217 
1218 	/*
1219 	 * Broadcast traffic goes to ring 0 on all destinations.
1220 	 * So we need to add these rings to the list of ports to scan.
1221 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1222 	 * expensive. We should keep a compact list of active destinations
1223 	 * so we could shorten this loop.
1224 	 */
1225 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1226 	if (brddst->bq_head != NM_FT_NULL) {
1227 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1228 			uint16_t d_i;
1229 			i = b->bdg_port_index[j];
1230 			if (unlikely(i == me))
1231 				continue;
1232 			d_i = i * NM_BDG_MAXRINGS;
1233 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1234 				dsts[num_dsts++] = d_i;
1235 		}
1236 	}
1237 
1238 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1239 	/* second pass: scan destinations (XXX will be modular somehow) */
1240 	for (i = 0; i < num_dsts; i++) {
1241 		struct ifnet *dst_ifp;
1242 		struct netmap_vp_adapter *dst_na;
1243 		struct netmap_kring *kring;
1244 		struct netmap_ring *ring;
1245 		u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next;
1246 		u_int needed, howmany;
1247 		int retry = netmap_txsync_retry;
1248 		struct nm_bdg_q *d;
1249 		uint32_t my_start = 0, lease_idx = 0;
1250 		int nrings;
1251 		int offset_mismatch;
1252 
1253 		d_i = dsts[i];
1254 		ND("second pass %d port %d", i, d_i);
1255 		d = dst_ents + d_i;
1256 		// XXX fix the division
1257 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1258 		/* protect from the lookup function returning an inactive
1259 		 * destination port
1260 		 */
1261 		if (unlikely(dst_na == NULL))
1262 			goto cleanup;
1263 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1264 			goto cleanup;
1265 		dst_ifp = dst_na->up.ifp;
1266 		/*
1267 		 * The interface may be in !netmap mode in two cases:
1268 		 * - when na is attached but not activated yet;
1269 		 * - when na is being deactivated but is still attached.
1270 		 */
1271 		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1272 			ND("not in netmap mode!");
1273 			goto cleanup;
1274 		}
1275 
1276 		offset_mismatch = (dst_na->offset != na->offset);
1277 
1278 		/* there is at least one either unicast or broadcast packet */
1279 		brd_next = brddst->bq_head;
1280 		next = d->bq_head;
1281 		/* we need to reserve this many slots. If fewer are
1282 		 * available, some packets will be dropped.
1283 		 * Packets may have multiple fragments, so we may not use
1284 		 * there is a chance that we may not use all of the slots
1285 		 * we have claimed, so we will need to handle the leftover
1286 		 * ones when we regain the lock.
1287 		 */
1288 		needed = d->bq_len + brddst->bq_len;
1289 
1290 		ND(5, "pass 2 dst %d is %x %s",
1291 			i, d_i, is_vp ? "virtual" : "nic/host");
1292 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1293 		nrings = dst_na->up.num_rx_rings;
1294 		if (dst_nr >= nrings)
1295 			dst_nr = dst_nr % nrings;
1296 		kring = &dst_na->up.rx_rings[dst_nr];
1297 		ring = kring->ring;
1298 		lim = kring->nkr_num_slots - 1;
1299 
1300 retry:
1301 
1302 		/* reserve the buffers in the queue and an entry
1303 		 * to report completion, and drop lock.
1304 		 * XXX this might become a helper function.
1305 		 */
1306 		mtx_lock(&kring->q_lock);
1307 		if (kring->nkr_stopped) {
1308 			mtx_unlock(&kring->q_lock);
1309 			goto cleanup;
1310 		}
1311 		if (dst_na->retry) {
1312 			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1313 		}
1314 		my_start = j = kring->nkr_hwlease;
1315 		howmany = nm_kr_space(kring, 1);
1316 		if (needed < howmany)
1317 			howmany = needed;
1318 		lease_idx = nm_kr_lease(kring, howmany, 1);
1319 		mtx_unlock(&kring->q_lock);
1320 
1321 		/* only retry if we need more than available slots */
1322 		if (retry && needed <= howmany)
1323 			retry = 0;
1324 
1325 		/* copy to the destination queue */
1326 		while (howmany > 0) {
1327 			struct netmap_slot *slot;
1328 			struct nm_bdg_fwd *ft_p, *ft_end;
1329 			u_int cnt;
1330 			int fix_mismatch = offset_mismatch;
1331 
1332 			/* find the queue from which we pick next packet.
1333 			 * NM_FT_NULL is always higher than valid indexes
1334 			 * so we never dereference it if the other list
1335 			 * has packets (and if both are empty we never
1336 			 * get here).
1337 			 */
1338 			if (next < brd_next) {
1339 				ft_p = ft + next;
1340 				next = ft_p->ft_next;
1341 			} else { /* insert broadcast */
1342 				ft_p = ft + brd_next;
1343 				brd_next = ft_p->ft_next;
1344 			}
1345 			cnt = ft_p->ft_frags; // cnt > 0
1346 			if (unlikely(cnt > howmany))
1347 			    break; /* no more space */
1348 			howmany -= cnt;
1349 			if (netmap_verbose && cnt > 1)
1350 				RD(5, "rx %d frags to %d", cnt, j);
1351 			ft_end = ft_p + cnt;
1352 			do {
1353 			    char *dst, *src = ft_p->ft_buf;
1354 			    size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1355 
1356 			    slot = &ring->slot[j];
1357 			    dst = BDG_NMB(&dst_na->up, slot);
1358 
1359 			    if (unlikely(fix_mismatch)) {
1360 				if (na->offset > dst_na->offset) {
1361 					src += na->offset - dst_na->offset;
1362 					copy_len -= na->offset - dst_na->offset;
1363 					dst_len = copy_len;
1364 				} else {
1365 					bzero(dst, dst_na->offset - na->offset);
1366 					dst_len += dst_na->offset - na->offset;
1367 					dst += dst_na->offset - na->offset;
1368 				}
1369 				/* fix the first fragment only */
1370 				fix_mismatch = 0;
1371 				/* completely skip an header only fragment */
1372 				if (copy_len == 0) {
1373 					ft_p++;
1374 					continue;
1375 				}
1376 			    }
1377 			    /* round to a multiple of 64 */
1378 			    copy_len = (copy_len + 63) & ~63;
1379 
1380 			    ND("send %d %d bytes at %s:%d",
1381 				i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j);
1382 			    if (ft_p->ft_flags & NS_INDIRECT) {
1383 				if (copyin(src, dst, copy_len)) {
1384 					// invalid user pointer, pretend len is 0
1385 					dst_len = 0;
1386 				}
1387 			    } else {
1388 				//memcpy(dst, src, copy_len);
1389 				pkt_copy(src, dst, (int)copy_len);
1390 			    }
1391 			    slot->len = dst_len;
1392 			    slot->flags = (cnt << 8)| NS_MOREFRAG;
1393 			    j = nm_next(j, lim);
1394 			    ft_p++;
1395 			    sent++;
1396 			} while (ft_p != ft_end);
1397 			slot->flags = (cnt << 8); /* clear flag on last entry */
1398 			/* are we done ? */
1399 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1400 				break;
1401 		}
1402 		{
1403 		    /* current position */
1404 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1405 		    uint32_t update_pos;
1406 		    int still_locked = 1;
1407 
1408 		    mtx_lock(&kring->q_lock);
1409 		    if (unlikely(howmany > 0)) {
1410 			/* not used all bufs. If i am the last one
1411 			 * i can recover the slots, otherwise must
1412 			 * fill them with 0 to mark empty packets.
1413 			 */
1414 			ND("leftover %d bufs", howmany);
1415 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1416 			    /* yes i am the last one */
1417 			    ND("roll back nkr_hwlease to %d", j);
1418 			    kring->nkr_hwlease = j;
1419 			} else {
1420 			    while (howmany-- > 0) {
1421 				ring->slot[j].len = 0;
1422 				ring->slot[j].flags = 0;
1423 				j = nm_next(j, lim);
1424 			    }
1425 			}
1426 		    }
1427 		    p[lease_idx] = j; /* report I am done */
1428 
1429 		    update_pos = nm_kr_rxpos(kring);
1430 
1431 		    if (my_start == update_pos) {
1432 			/* all slots before my_start have been reported,
1433 			 * so scan subsequent leases to see if other ranges
1434 			 * have been completed, and to a selwakeup or txsync.
1435 		         */
1436 			while (lease_idx != kring->nkr_lease_idx &&
1437 				p[lease_idx] != NR_NOSLOT) {
1438 			    j = p[lease_idx];
1439 			    p[lease_idx] = NR_NOSLOT;
1440 			    lease_idx = nm_next(lease_idx, lim);
1441 			}
1442 			/* j is the new 'write' position. j != my_start
1443 			 * means there are new buffers to report
1444 			 */
1445 			if (likely(j != my_start)) {
1446 				uint32_t old_avail = kring->nr_hwavail;
1447 
1448 				kring->nr_hwavail = (j >= kring->nr_hwcur) ?
1449 					j - kring->nr_hwcur :
1450 					j + lim + 1 - kring->nr_hwcur;
1451 				if (kring->nr_hwavail < old_avail) {
1452 					D("avail shrink %d -> %d",
1453 						old_avail, kring->nr_hwavail);
1454 				}
1455 				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1456 				still_locked = 0;
1457 				mtx_unlock(&kring->q_lock);
1458 				if (dst_na->retry && retry--)
1459 					goto retry;
1460 			}
1461 		    }
1462 		    if (still_locked)
1463 			mtx_unlock(&kring->q_lock);
1464 		}
1465 cleanup:
1466 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1467 		d->bq_len = 0;
1468 	}
1469 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1470 	brddst->bq_len = 0;
1471 	return 0;
1472 }
1473 
1474 static int
1475 netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1476 {
1477 	struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
1478 	struct netmap_ring *ring = kring->ring;
1479 	u_int j, k, lim = kring->nkr_num_slots - 1;
1480 
1481 	k = ring->cur;
1482 	if (k > lim)
1483 		return netmap_ring_reinit(kring);
1484 
1485 	if (bridge_batch <= 0) { /* testing only */
1486 		j = k; // used all
1487 		goto done;
1488 	}
1489 	if (bridge_batch > NM_BDG_BATCH)
1490 		bridge_batch = NM_BDG_BATCH;
1491 
1492 	j = nm_bdg_preflush(na, ring_nr, kring, k);
1493 	if (j != k)
1494 		D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
1495 	/* k-j modulo ring size is the number of slots processed */
1496 	if (k < j)
1497 		k += kring->nkr_num_slots;
1498 	kring->nr_hwavail = lim - (k - j);
1499 
1500 done:
1501 	kring->nr_hwcur = j;
1502 	ring->avail = kring->nr_hwavail;
1503 	if (netmap_verbose)
1504 		D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1505 	return 0;
1506 }
1507 
1508 
1509 /*
1510  * main dispatch routine for the bridge.
1511  * We already know that only one thread is running this.
1512  * we must run nm_bdg_preflush without lock.
1513  */
1514 static int
1515 bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1516 {
1517 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1518 	return netmap_vp_txsync(vpna, ring_nr, flags);
1519 }
1520 
1521 
1522 /*
1523  * user process reading from a VALE switch.
1524  * Already protected against concurrent calls from userspace,
1525  * but we must acquire the queue's lock to protect against
1526  * writers on the same queue.
1527  */
1528 static int
1529 bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1530 {
1531 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1532 	struct netmap_ring *ring = kring->ring;
1533 	u_int j, lim = kring->nkr_num_slots - 1;
1534 	u_int k = ring->cur, resvd = ring->reserved;
1535 	int n;
1536 
1537 	mtx_lock(&kring->q_lock);
1538 	if (k > lim) {
1539 		D("ouch dangerous reset!!!");
1540 		n = netmap_ring_reinit(kring);
1541 		goto done;
1542 	}
1543 
1544 	/* skip past packets that userspace has released */
1545 	j = kring->nr_hwcur;    /* netmap ring index */
1546 	if (resvd > 0) {
1547 		if (resvd + ring->avail >= lim + 1) {
1548 			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
1549 			ring->reserved = resvd = 0; // XXX panic...
1550 		}
1551 		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
1552 	}
1553 
1554 	if (j != k) { /* userspace has released some packets. */
1555 		n = k - j;
1556 		if (n < 0)
1557 			n += kring->nkr_num_slots;
1558 		ND("userspace releases %d packets", n);
1559 		for (n = 0; likely(j != k); n++) {
1560 			struct netmap_slot *slot = &ring->slot[j];
1561 			void *addr = BDG_NMB(na, slot);
1562 
1563 			if (addr == netmap_buffer_base) { /* bad buf */
1564 				D("bad buffer index %d, ignore ?",
1565 					slot->buf_idx);
1566 			}
1567 			slot->flags &= ~NS_BUF_CHANGED;
1568 			j = nm_next(j, lim);
1569 		}
1570 		kring->nr_hwavail -= n;
1571 		kring->nr_hwcur = k;
1572 	}
1573 	/* tell userspace that there are new packets */
1574 	ring->avail = kring->nr_hwavail - resvd;
1575 	n = 0;
1576 done:
1577 	mtx_unlock(&kring->q_lock);
1578 	return n;
1579 }
1580 
1581 static int
1582 bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
1583 {
1584 	struct netmap_vp_adapter *vpna;
1585 	struct netmap_adapter *na;
1586 	int error;
1587 
1588 	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1589 	if (vpna == NULL)
1590 		return ENOMEM;
1591 
1592  	na = &vpna->up;
1593 
1594 	na->ifp = ifp;
1595 
1596 	/* bound checking */
1597 	na->num_tx_rings = nmr->nr_tx_rings;
1598 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1599 	nmr->nr_tx_rings = na->num_tx_rings; // write back
1600 	na->num_rx_rings = nmr->nr_rx_rings;
1601 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1602 	nmr->nr_rx_rings = na->num_rx_rings; // write back
1603 	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1604 			1, NM_BDG_MAXSLOTS, NULL);
1605 	na->num_tx_desc = nmr->nr_tx_slots;
1606 	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1607 			1, NM_BDG_MAXSLOTS, NULL);
1608 	na->num_rx_desc = nmr->nr_rx_slots;
1609 	vpna->offset = 0;
1610 
1611 	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1612 	na->nm_txsync = bdg_netmap_txsync;
1613 	na->nm_rxsync = bdg_netmap_rxsync;
1614 	na->nm_register = bdg_netmap_reg;
1615 	na->nm_dtor = netmap_adapter_vp_dtor;
1616 	na->nm_krings_create = netmap_vp_krings_create;
1617 	na->nm_krings_delete = netmap_vp_krings_delete;
1618 	na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp),
1619 			na->num_tx_rings, na->num_tx_desc,
1620 			na->num_rx_rings, na->num_rx_desc);
1621 	/* other nmd fields are set in the common routine */
1622 	error = netmap_attach_common(na);
1623 	if (error) {
1624 		free(vpna, M_DEVBUF);
1625 		return error;
1626 	}
1627 	return 0;
1628 }
1629 
1630 static void
1631 netmap_bwrap_dtor(struct netmap_adapter *na)
1632 {
1633 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1634 	struct netmap_adapter *hwna = bna->hwna;
1635 	struct nm_bridge *b = bna->up.na_bdg,
1636 		*bh = bna->host.na_bdg;
1637 	struct ifnet *ifp = na->ifp;
1638 
1639 	ND("na %p", na);
1640 
1641 	if (b) {
1642 		netmap_bdg_detach_common(b, bna->up.bdg_port,
1643 			(bh ? bna->host.bdg_port : -1));
1644 	}
1645 
1646 	hwna->na_private = NULL;
1647 	netmap_adapter_put(hwna);
1648 
1649 	bzero(ifp, sizeof(*ifp));
1650 	free(ifp, M_DEVBUF);
1651 	na->ifp = NULL;
1652 
1653 }
1654 
1655 /*
1656  * Pass packets from nic to the bridge.
1657  * XXX TODO check locking: this is called from the interrupt
1658  * handler so we should make sure that the interface is not
1659  * disconnected while passing down an interrupt.
1660  *
1661  * Note, no user process can access this NIC so we can ignore
1662  * the info in the 'ring'.
1663  */
1664 /* callback that overwrites the hwna notify callback.
1665  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1666  * The bridge wrapper then sends the packets through the bridge.
1667  */
1668 static int
1669 netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1670 {
1671 	struct ifnet *ifp = na->ifp;
1672 	struct netmap_bwrap_adapter *bna = na->na_private;
1673 	struct netmap_vp_adapter *hostna = &bna->host;
1674 	struct netmap_kring *kring, *bkring;
1675 	struct netmap_ring *ring;
1676 	int is_host_ring = ring_nr == na->num_rx_rings;
1677 	struct netmap_vp_adapter *vpna = &bna->up;
1678 	int error = 0;
1679 
1680 	ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags);
1681 
1682 	if (flags & NAF_DISABLE_NOTIFY) {
1683 		kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1684 		bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
1685 		if (kring->nkr_stopped)
1686 			netmap_disable_ring(bkring);
1687 		else
1688 			bkring->nkr_stopped = 0;
1689 		return 0;
1690 	}
1691 
1692 	if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1693 		return 0;
1694 
1695 	if (tx == NR_TX)
1696 		return 0;
1697 
1698 	kring = &na->rx_rings[ring_nr];
1699 	ring = kring->ring;
1700 
1701 	/* make sure the ring is not disabled */
1702 	if (nm_kr_tryget(kring))
1703 		return 0;
1704 
1705 	if (is_host_ring && hostna->na_bdg == NULL) {
1706 		error = bna->save_notify(na, ring_nr, tx, flags);
1707 		goto put_out;
1708 	}
1709 
1710 	if (is_host_ring) {
1711 		vpna = hostna;
1712 		ring_nr = 0;
1713 	} else {
1714 		/* fetch packets that have arrived.
1715 		 * XXX maybe do this in a loop ?
1716 		 */
1717 		error = na->nm_rxsync(na, ring_nr, 0);
1718 		if (error)
1719 			goto put_out;
1720 	}
1721 	if (kring->nr_hwavail == 0 && netmap_verbose) {
1722 		D("how strange, interrupt with no packets on %s",
1723 			NM_IFPNAME(ifp));
1724 		goto put_out;
1725 	}
1726 	/* XXX avail ? */
1727 	ring->cur = nm_kr_rxpos(kring);
1728 	netmap_vp_txsync(vpna, ring_nr, flags);
1729 
1730 	if (!is_host_ring)
1731 		error = na->nm_rxsync(na, ring_nr, 0);
1732 
1733 put_out:
1734 	nm_kr_put(kring);
1735 	return error;
1736 }
1737 
1738 static int
1739 netmap_bwrap_register(struct netmap_adapter *na, int onoff)
1740 {
1741 	struct netmap_bwrap_adapter *bna =
1742 		(struct netmap_bwrap_adapter *)na;
1743 	struct netmap_adapter *hwna = bna->hwna;
1744 	struct netmap_vp_adapter *hostna = &bna->host;
1745 	int error;
1746 
1747 	ND("%s %d", NM_IFPNAME(ifp), onoff);
1748 
1749 	if (onoff) {
1750 		int i;
1751 
1752 		hwna->na_lut = na->na_lut;
1753 		hwna->na_lut_objtotal = na->na_lut_objtotal;
1754 
1755 		if (hostna->na_bdg) {
1756 			hostna->up.na_lut = na->na_lut;
1757 			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1758 		}
1759 
1760 		/* cross-link the netmap rings */
1761 		for (i = 0; i <= na->num_tx_rings; i++) {
1762 			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1763 			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1764 		}
1765 		for (i = 0; i <= na->num_rx_rings; i++) {
1766 			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1767 			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1768 		}
1769 	}
1770 
1771 	if (hwna->ifp) {
1772 		error = hwna->nm_register(hwna, onoff);
1773 		if (error)
1774 			return error;
1775 	}
1776 
1777 	bdg_netmap_reg(na, onoff);
1778 
1779 	if (onoff) {
1780 		bna->save_notify = hwna->nm_notify;
1781 		hwna->nm_notify = netmap_bwrap_intr_notify;
1782 	} else {
1783 		hwna->nm_notify = bna->save_notify;
1784 		hwna->na_lut = NULL;
1785 		hwna->na_lut_objtotal = 0;
1786 	}
1787 
1788 	return 0;
1789 }
1790 
1791 static int
1792 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1793 				    u_int *rxr, u_int *rxd)
1794 {
1795 	struct netmap_bwrap_adapter *bna =
1796 		(struct netmap_bwrap_adapter *)na;
1797 	struct netmap_adapter *hwna = bna->hwna;
1798 
1799 	/* forward the request */
1800 	netmap_update_config(hwna);
1801 	/* swap the results */
1802 	*txr = hwna->num_rx_rings;
1803 	*txd = hwna->num_rx_desc;
1804 	*rxr = hwna->num_tx_rings;
1805 	*rxd = hwna->num_rx_desc;
1806 
1807 	return 0;
1808 }
1809 
1810 static int
1811 netmap_bwrap_krings_create(struct netmap_adapter *na)
1812 {
1813 	struct netmap_bwrap_adapter *bna =
1814 		(struct netmap_bwrap_adapter *)na;
1815 	struct netmap_adapter *hwna = bna->hwna;
1816 	struct netmap_adapter *hostna = &bna->host.up;
1817 	int error;
1818 
1819 	ND("%s", NM_IFPNAME(na->ifp));
1820 
1821 	error = netmap_vp_krings_create(na);
1822 	if (error)
1823 		return error;
1824 
1825 	error = hwna->nm_krings_create(hwna);
1826 	if (error) {
1827 		netmap_vp_krings_delete(na);
1828 		return error;
1829 	}
1830 
1831 	hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1832 	hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1833 
1834 	return 0;
1835 }
1836 
1837 static void
1838 netmap_bwrap_krings_delete(struct netmap_adapter *na)
1839 {
1840 	struct netmap_bwrap_adapter *bna =
1841 		(struct netmap_bwrap_adapter *)na;
1842 	struct netmap_adapter *hwna = bna->hwna;
1843 
1844 	ND("%s", NM_IFPNAME(na->ifp));
1845 
1846 	hwna->nm_krings_delete(hwna);
1847 	netmap_vp_krings_delete(na);
1848 }
1849 
1850 /* notify method for the bridge-->hwna direction */
1851 static int
1852 netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1853 {
1854 	struct netmap_bwrap_adapter *bna =
1855 		(struct netmap_bwrap_adapter *)na;
1856 	struct netmap_adapter *hwna = bna->hwna;
1857 	struct netmap_kring *kring, *hw_kring;
1858 	struct netmap_ring *ring;
1859 	u_int lim, k;
1860 	int error = 0;
1861 
1862 	if (tx == NR_TX)
1863 	        return ENXIO;
1864 
1865 	kring = &na->rx_rings[ring_n];
1866 	hw_kring = &hwna->tx_rings[ring_n];
1867 	ring = kring->ring;
1868 
1869 	lim = kring->nkr_num_slots - 1;
1870 	k = nm_kr_rxpos(kring);
1871 
1872 	if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1873 		return 0;
1874 	ring->cur = k;
1875 	ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1876 		NM_IFPNAME(na->ifp), ring_n,
1877 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1878 		ring->cur, ring->avail, ring->reserved,
1879 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1880 	if (ring_n == na->num_rx_rings) {
1881 		netmap_txsync_to_host(hwna);
1882 	} else {
1883 		error = hwna->nm_txsync(hwna, ring_n, flags);
1884 	}
1885 	kring->nr_hwcur = ring->cur;
1886 	kring->nr_hwavail = 0;
1887 	kring->nr_hwreserved = lim - ring->avail;
1888 	ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1889 		NM_IFPNAME(na->ifp), ring_n,
1890 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1891 		ring->cur, ring->avail, ring->reserved,
1892 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1893 
1894 	return error;
1895 }
1896 
1897 static int
1898 netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1899 {
1900 	struct netmap_bwrap_adapter *bna = na->na_private;
1901 	struct netmap_adapter *port_na = &bna->up.up;
1902 	if (tx == NR_TX || ring_n != 0)
1903 		return ENXIO;
1904 	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
1905 }
1906 
1907 /* attach a bridge wrapper to the 'real' device */
1908 static int
1909 netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
1910 {
1911 	struct netmap_bwrap_adapter *bna;
1912 	struct netmap_adapter *na;
1913 	struct netmap_adapter *hwna = NA(real);
1914 	struct netmap_adapter *hostna;
1915 	int error;
1916 
1917 
1918 	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
1919 	if (bna == NULL)
1920 		return ENOMEM;
1921 
1922 	na = &bna->up.up;
1923 	na->ifp = fake;
1924 	/* fill the ring data for the bwrap adapter with rx/tx meanings
1925 	 * swapped. The real cross-linking will be done during register,
1926 	 * when all the krings will have been created.
1927 	 */
1928 	na->num_rx_rings = hwna->num_tx_rings;
1929 	na->num_tx_rings = hwna->num_rx_rings;
1930 	na->num_tx_desc = hwna->num_rx_desc;
1931 	na->num_rx_desc = hwna->num_tx_desc;
1932 	na->nm_dtor = netmap_bwrap_dtor;
1933 	na->nm_register = netmap_bwrap_register;
1934 	// na->nm_txsync = netmap_bwrap_txsync;
1935 	// na->nm_rxsync = netmap_bwrap_rxsync;
1936 	na->nm_config = netmap_bwrap_config;
1937 	na->nm_krings_create = netmap_bwrap_krings_create;
1938 	na->nm_krings_delete = netmap_bwrap_krings_delete;
1939 	na->nm_notify = netmap_bwrap_notify;
1940 	na->nm_mem = hwna->nm_mem;
1941 	na->na_private = na; /* prevent NIOCREGIF */
1942 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
1943 
1944 	bna->hwna = hwna;
1945 	netmap_adapter_get(hwna);
1946 	hwna->na_private = bna; /* weak reference */
1947 
1948 	hostna = &bna->host.up;
1949 	hostna->ifp = hwna->ifp;
1950 	hostna->num_tx_rings = 1;
1951 	hostna->num_tx_desc = hwna->num_rx_desc;
1952 	hostna->num_rx_rings = 1;
1953 	hostna->num_rx_desc = hwna->num_tx_desc;
1954 	// hostna->nm_txsync = netmap_bwrap_host_txsync;
1955 	// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1956 	hostna->nm_notify = netmap_bwrap_host_notify;
1957 	hostna->nm_mem = na->nm_mem;
1958 	hostna->na_private = bna;
1959 
1960 	D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname,
1961 		na->num_tx_rings, na->num_tx_desc,
1962 		na->num_rx_rings, na->num_rx_desc);
1963 
1964 	error = netmap_attach_common(na);
1965 	if (error) {
1966 		netmap_adapter_put(hwna);
1967 		free(bna, M_DEVBUF);
1968 		return error;
1969 	}
1970 	return 0;
1971 }
1972 
1973 void
1974 netmap_init_bridges(void)
1975 {
1976 	int i;
1977 	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
1978 	for (i = 0; i < NM_BRIDGES; i++)
1979 		BDG_RWINIT(&nm_bridges[i]);
1980 }
1981 #endif /* WITH_VALE */
1982