xref: /freebsd-13.1/sys/dev/netmap/netmap_vale.c (revision f9790aeb)
1 /*
2  * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *   1. Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *   2. Redistributions in binary form must reproduce the above copyright
10  *      notice, this list of conditions and the following disclaimer in the
11  *      documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 
27 /*
28  * This module implements the VALE switch for netmap
29 
30 --- VALE SWITCH ---
31 
32 NMG_LOCK() serializes all modifications to switches and ports.
33 A switch cannot be deleted until all ports are gone.
34 
35 For each switch, an SX lock (RWlock on linux) protects
36 deletion of ports. When configuring or deleting a new port, the
37 lock is acquired in exclusive mode (after holding NMG_LOCK).
38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39 The lock is held throughout the entire forwarding cycle,
40 during which the thread may incur in a page fault.
41 Hence it is important that sleepable shared locks are used.
42 
43 On the rx ring, the per-port lock is grabbed initially to reserve
44 a number of slot in the ring, then the lock is released,
45 packets are copied from source to destination, and then
46 the lock is acquired again and the receive ring is updated.
47 (A similar thing is done on the tx ring for NIC and host stack
48 ports attached to the switch)
49 
50  */
51 
52 /*
53  * OS-specific code that is used only within this file.
54  * Other OS-specific code that must be accessed by drivers
55  * is present in netmap_kern.h
56  */
57 
58 #if defined(__FreeBSD__)
59 #include <sys/cdefs.h> /* prerequisite */
60 __FBSDID("$FreeBSD$");
61 
62 #include <sys/types.h>
63 #include <sys/errno.h>
64 #include <sys/param.h>	/* defines used in kernel.h */
65 #include <sys/kernel.h>	/* types used in module initialization */
66 #include <sys/conf.h>	/* cdevsw struct, UID, GID */
67 #include <sys/sockio.h>
68 #include <sys/socketvar.h>	/* struct socket */
69 #include <sys/malloc.h>
70 #include <sys/poll.h>
71 #include <sys/rwlock.h>
72 #include <sys/socket.h> /* sockaddrs */
73 #include <sys/selinfo.h>
74 #include <sys/sysctl.h>
75 #include <net/if.h>
76 #include <net/if_var.h>
77 #include <net/bpf.h>		/* BIOCIMMEDIATE */
78 #include <machine/bus.h>	/* bus_dmamap_* */
79 #include <sys/endian.h>
80 #include <sys/refcount.h>
81 
82 // #define prefetch(x)	__builtin_prefetch(x)
83 
84 
85 #define BDG_RWLOCK_T		struct rwlock // struct rwlock
86 
87 #define	BDG_RWINIT(b)		\
88 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
89 #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
90 #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
91 #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
92 #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
93 #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
94 #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
95 
96 
97 #elif defined(linux)
98 
99 #include "bsd_glue.h"
100 
101 #elif defined(__APPLE__)
102 
103 #warning OSX support is only partial
104 #include "osx_glue.h"
105 
106 #else
107 
108 #error	Unsupported platform
109 
110 #endif /* unsupported */
111 
112 /*
113  * common headers
114  */
115 
116 #include <net/netmap.h>
117 #include <dev/netmap/netmap_kern.h>
118 #include <dev/netmap/netmap_mem2.h>
119 
120 #ifdef WITH_VALE
121 
122 /*
123  * system parameters (most of them in netmap_kern.h)
124  * NM_NAME	prefix for switch port names, default "vale"
125  * NM_BDG_MAXPORTS	number of ports
126  * NM_BRIDGES	max number of switches in the system.
127  *	XXX should become a sysctl or tunable
128  *
129  * Switch ports are named valeX:Y where X is the switch name and Y
130  * is the port. If Y matches a physical interface name, the port is
131  * connected to a physical device.
132  *
133  * Unlike physical interfaces, switch ports use their own memory region
134  * for rings and buffers.
135  * The virtual interfaces use per-queue lock instead of core lock.
136  * In the tx loop, we aggregate traffic in batches to make all operations
137  * faster. The batch size is bridge_batch.
138  */
139 #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
140 #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
141 #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
142 #define NM_BDG_HASH		1024	/* forwarding table entries */
143 #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
144 #define NM_MULTISEG		64	/* max size of a chain of bufs */
145 /* actual size of the tables */
146 #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
147 /* NM_FT_NULL terminates a list of slots in the ft */
148 #define NM_FT_NULL		NM_BDG_BATCH_MAX
149 #define	NM_BRIDGES		8	/* number of bridges */
150 
151 
152 /*
153  * bridge_batch is set via sysctl to the max batch size to be
154  * used in the bridge. The actual value may be larger as the
155  * last packet in the block may overflow the size.
156  */
157 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
158 SYSCTL_DECL(_dev_netmap);
159 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
160 
161 
162 static int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp);
163 static int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
164 static int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
165 static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
166 int kern_netmap_regif(struct nmreq *nmr);
167 
168 /*
169  * Each transmit queue accumulates a batch of packets into
170  * a structure before forwarding. Packets to the same
171  * destination are put in a list using ft_next as a link field.
172  * ft_frags and ft_next are valid only on the first fragment.
173  */
174 struct nm_bdg_fwd {	/* forwarding entry for a bridge */
175 	void *ft_buf;		/* netmap or indirect buffer */
176 	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
177 	uint8_t _ft_port;	/* dst port (unused) */
178 	uint16_t ft_flags;	/* flags, e.g. indirect */
179 	uint16_t ft_len;	/* src fragment len */
180 	uint16_t ft_next;	/* next packet to same destination */
181 };
182 
183 /*
184  * For each output interface, nm_bdg_q is used to construct a list.
185  * bq_len is the number of output buffers (we can have coalescing
186  * during the copy).
187  */
188 struct nm_bdg_q {
189 	uint16_t bq_head;
190 	uint16_t bq_tail;
191 	uint32_t bq_len;	/* number of buffers */
192 };
193 
194 /* XXX revise this */
195 struct nm_hash_ent {
196 	uint64_t	mac;	/* the top 2 bytes are the epoch */
197 	uint64_t	ports;
198 };
199 
200 /*
201  * nm_bridge is a descriptor for a VALE switch.
202  * Interfaces for a bridge are all in bdg_ports[].
203  * The array has fixed size, an empty entry does not terminate
204  * the search, but lookups only occur on attach/detach so we
205  * don't mind if they are slow.
206  *
207  * The bridge is non blocking on the transmit ports: excess
208  * packets are dropped if there is no room on the output port.
209  *
210  * bdg_lock protects accesses to the bdg_ports array.
211  * This is a rw lock (or equivalent).
212  */
213 struct nm_bridge {
214 	/* XXX what is the proper alignment/layout ? */
215 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
216 	int		bdg_namelen;
217 	uint32_t	bdg_active_ports; /* 0 means free */
218 	char		bdg_basename[IFNAMSIZ];
219 
220 	/* Indexes of active ports (up to active_ports)
221 	 * and all other remaining ports.
222 	 */
223 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
224 
225 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
226 
227 
228 	/*
229 	 * The function to decide the destination port.
230 	 * It returns either of an index of the destination port,
231 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
232 	 * forward this packet.  ring_nr is the source ring index, and the
233 	 * function may overwrite this value to forward this packet to a
234 	 * different ring index.
235 	 * This function must be set by netmap_bdgctl().
236 	 */
237 	bdg_lookup_fn_t nm_bdg_lookup;
238 
239 	/* the forwarding table, MAC+ports.
240 	 * XXX should be changed to an argument to be passed to
241 	 * the lookup function, and allocated on attach
242 	 */
243 	struct nm_hash_ent ht[NM_BDG_HASH];
244 };
245 
246 
247 /*
248  * XXX in principle nm_bridges could be created dynamically
249  * Right now we have a static array and deletions are protected
250  * by an exclusive lock.
251  */
252 struct nm_bridge nm_bridges[NM_BRIDGES];
253 
254 
255 /*
256  * A few function to tell which kind of port are we using.
257  * XXX should we hold a lock ?
258  *
259  * nma_is_vp()		virtual port
260  * nma_is_host()	port connected to the host stack
261  * nma_is_hw()		port connected to a NIC
262  * nma_is_generic()	generic netmap adapter XXX stop this madness
263  */
264 static __inline int
265 nma_is_vp(struct netmap_adapter *na)
266 {
267 	return na->nm_register == bdg_netmap_reg;
268 }
269 
270 
271 static __inline int
272 nma_is_host(struct netmap_adapter *na)
273 {
274 	return na->nm_register == NULL;
275 }
276 
277 
278 static __inline int
279 nma_is_hw(struct netmap_adapter *na)
280 {
281 	/* In case of sw adapter, nm_register is NULL */
282 	return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na);
283 }
284 
285 static __inline int
286 nma_is_bwrap(struct netmap_adapter *na)
287 {
288 	return na->nm_register == netmap_bwrap_register;
289 }
290 
291 
292 
293 /*
294  * this is a slightly optimized copy routine which rounds
295  * to multiple of 64 bytes and is often faster than dealing
296  * with other odd sizes. We assume there is enough room
297  * in the source and destination buffers.
298  *
299  * XXX only for multiples of 64 bytes, non overlapped.
300  */
301 static inline void
302 pkt_copy(void *_src, void *_dst, int l)
303 {
304         uint64_t *src = _src;
305         uint64_t *dst = _dst;
306         if (unlikely(l >= 1024)) {
307                 memcpy(dst, src, l);
308                 return;
309         }
310         for (; likely(l > 0); l-=64) {
311                 *dst++ = *src++;
312                 *dst++ = *src++;
313                 *dst++ = *src++;
314                 *dst++ = *src++;
315                 *dst++ = *src++;
316                 *dst++ = *src++;
317                 *dst++ = *src++;
318                 *dst++ = *src++;
319         }
320 }
321 
322 
323 
324 /*
325  * locate a bridge among the existing ones.
326  * MUST BE CALLED WITH NMG_LOCK()
327  *
328  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
329  * We assume that this is called with a name of at least NM_NAME chars.
330  */
331 static struct nm_bridge *
332 nm_find_bridge(const char *name, int create)
333 {
334 	int i, l, namelen;
335 	struct nm_bridge *b = NULL;
336 
337 	NMG_LOCK_ASSERT();
338 
339 	namelen = strlen(NM_NAME);	/* base length */
340 	l = name ? strlen(name) : 0;		/* actual length */
341 	if (l < namelen) {
342 		D("invalid bridge name %s", name ? name : NULL);
343 		return NULL;
344 	}
345 	for (i = namelen + 1; i < l; i++) {
346 		if (name[i] == ':') {
347 			namelen = i;
348 			break;
349 		}
350 	}
351 	if (namelen >= IFNAMSIZ)
352 		namelen = IFNAMSIZ;
353 	ND("--- prefix is '%.*s' ---", namelen, name);
354 
355 	/* lookup the name, remember empty slot if there is one */
356 	for (i = 0; i < NM_BRIDGES; i++) {
357 		struct nm_bridge *x = nm_bridges + i;
358 
359 		if (x->bdg_active_ports == 0) {
360 			if (create && b == NULL)
361 				b = x;	/* record empty slot */
362 		} else if (x->bdg_namelen != namelen) {
363 			continue;
364 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
365 			ND("found '%.*s' at %d", namelen, name, i);
366 			b = x;
367 			break;
368 		}
369 	}
370 	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
371 		/* initialize the bridge */
372 		strncpy(b->bdg_basename, name, namelen);
373 		ND("create new bridge %s with ports %d", b->bdg_basename,
374 			b->bdg_active_ports);
375 		b->bdg_namelen = namelen;
376 		b->bdg_active_ports = 0;
377 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
378 			b->bdg_port_index[i] = i;
379 		/* set the default function */
380 		b->nm_bdg_lookup = netmap_bdg_learning;
381 		/* reset the MAC address table */
382 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
383 	}
384 	return b;
385 }
386 
387 
388 /*
389  * Free the forwarding tables for rings attached to switch ports.
390  */
391 static void
392 nm_free_bdgfwd(struct netmap_adapter *na)
393 {
394 	int nrings, i;
395 	struct netmap_kring *kring;
396 
397 	NMG_LOCK_ASSERT();
398 	nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
399 	kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
400 	for (i = 0; i < nrings; i++) {
401 		if (kring[i].nkr_ft) {
402 			free(kring[i].nkr_ft, M_DEVBUF);
403 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
404 		}
405 	}
406 }
407 
408 
409 /*
410  * Allocate the forwarding tables for the rings attached to the bridge ports.
411  */
412 static int
413 nm_alloc_bdgfwd(struct netmap_adapter *na)
414 {
415 	int nrings, l, i, num_dstq;
416 	struct netmap_kring *kring;
417 
418 	NMG_LOCK_ASSERT();
419 	/* all port:rings + broadcast */
420 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
421 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
422 	l += sizeof(struct nm_bdg_q) * num_dstq;
423 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
424 
425 	nrings = na->num_tx_rings + 1;
426 	kring = na->tx_rings;
427 	for (i = 0; i < nrings; i++) {
428 		struct nm_bdg_fwd *ft;
429 		struct nm_bdg_q *dstq;
430 		int j;
431 
432 		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
433 		if (!ft) {
434 			nm_free_bdgfwd(na);
435 			return ENOMEM;
436 		}
437 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
438 		for (j = 0; j < num_dstq; j++) {
439 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
440 			dstq[j].bq_len = 0;
441 		}
442 		kring[i].nkr_ft = ft;
443 	}
444 	return 0;
445 }
446 
447 
448 static void
449 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
450 {
451 	int s_hw = hw, s_sw = sw;
452 	int i, lim =b->bdg_active_ports;
453 	uint8_t tmp[NM_BDG_MAXPORTS];
454 
455 	/*
456 	New algorithm:
457 	make a copy of bdg_port_index;
458 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
459 	in the array of bdg_port_index, replacing them with
460 	entries from the bottom of the array;
461 	decrement bdg_active_ports;
462 	acquire BDG_WLOCK() and copy back the array.
463 	 */
464 
465 	D("detach %d and %d (lim %d)", hw, sw, lim);
466 	/* make a copy of the list of active ports, update it,
467 	 * and then copy back within BDG_WLOCK().
468 	 */
469 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
470 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
471 		if (hw >= 0 && tmp[i] == hw) {
472 			ND("detach hw %d at %d", hw, i);
473 			lim--; /* point to last active port */
474 			tmp[i] = tmp[lim]; /* swap with i */
475 			tmp[lim] = hw;	/* now this is inactive */
476 			hw = -1;
477 		} else if (sw >= 0 && tmp[i] == sw) {
478 			ND("detach sw %d at %d", sw, i);
479 			lim--;
480 			tmp[i] = tmp[lim];
481 			tmp[lim] = sw;
482 			sw = -1;
483 		} else {
484 			i++;
485 		}
486 	}
487 	if (hw >= 0 || sw >= 0) {
488 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
489 	}
490 
491 	BDG_WLOCK(b);
492 	b->bdg_ports[s_hw] = NULL;
493 	if (s_sw >= 0) {
494 		b->bdg_ports[s_sw] = NULL;
495 	}
496 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
497 	b->bdg_active_ports = lim;
498 	BDG_WUNLOCK(b);
499 
500 	ND("now %d active ports", lim);
501 	if (lim == 0) {
502 		ND("marking bridge %s as free", b->bdg_basename);
503 		b->nm_bdg_lookup = NULL;
504 	}
505 }
506 
507 static void
508 netmap_adapter_vp_dtor(struct netmap_adapter *na)
509 {
510 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
511 	struct nm_bridge *b = vpna->na_bdg;
512 	struct ifnet *ifp = na->ifp;
513 
514 	ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
515 
516 	if (b) {
517 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
518 	}
519 
520 	bzero(ifp, sizeof(*ifp));
521 	free(ifp, M_DEVBUF);
522 	na->ifp = NULL;
523 }
524 
525 int
526 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
527 {
528 	const char *name = nmr->nr_name;
529 	struct ifnet *ifp;
530 	int error = 0;
531 	struct netmap_adapter *ret;
532 	struct netmap_vp_adapter *vpna;
533 	struct nm_bridge *b;
534 	int i, j, cand = -1, cand2 = -1;
535 	int needed;
536 
537 	*na = NULL;     /* default return value */
538 
539 	/* first try to see if this is a bridge port. */
540 	NMG_LOCK_ASSERT();
541 	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
542 		return 0;  /* no error, but no VALE prefix */
543 	}
544 
545 	b = nm_find_bridge(name, create);
546 	if (b == NULL) {
547 		D("no bridges available for '%s'", name);
548 		return (ENXIO);
549 	}
550 
551 	/* Now we are sure that name starts with the bridge's name,
552 	 * lookup the port in the bridge. We need to scan the entire
553 	 * list. It is not important to hold a WLOCK on the bridge
554 	 * during the search because NMG_LOCK already guarantees
555 	 * that there are no other possible writers.
556 	 */
557 
558 	/* lookup in the local list of ports */
559 	for (j = 0; j < b->bdg_active_ports; j++) {
560 		i = b->bdg_port_index[j];
561 		vpna = b->bdg_ports[i];
562 		// KASSERT(na != NULL);
563 		ifp = vpna->up.ifp;
564 		/* XXX make sure the name only contains one : */
565 		if (!strcmp(NM_IFPNAME(ifp), name)) {
566 			netmap_adapter_get(&vpna->up);
567 			ND("found existing if %s refs %d", name,
568 				vpna->na_bdg_refcount);
569 			*na = (struct netmap_adapter *)vpna;
570 			return 0;
571 		}
572 	}
573 	/* not found, should we create it? */
574 	if (!create)
575 		return ENXIO;
576 	/* yes we should, see if we have space to attach entries */
577 	needed = 2; /* in some cases we only need 1 */
578 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
579 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
580 		return EINVAL;
581 	}
582 	/* record the next two ports available, but do not allocate yet */
583 	cand = b->bdg_port_index[b->bdg_active_ports];
584 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
585 	ND("+++ bridge %s port %s used %d avail %d %d",
586 		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
587 
588 	/*
589 	 * try see if there is a matching NIC with this name
590 	 * (after the bridge's name)
591 	 */
592 	ifp = ifunit_ref(name + b->bdg_namelen + 1);
593 	if (!ifp) { /* this is a virtual port */
594 		if (nmr->nr_cmd) {
595 			/* nr_cmd must be 0 for a virtual port */
596 			return EINVAL;
597 		}
598 
599 	 	/* create a struct ifnet for the new port.
600 		 * need M_NOWAIT as we are under nma_lock
601 		 */
602 		ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
603 		if (!ifp)
604 			return ENOMEM;
605 
606 		strcpy(ifp->if_xname, name);
607 		/* bdg_netmap_attach creates a struct netmap_adapter */
608 		error = bdg_netmap_attach(nmr, ifp);
609 		if (error) {
610 			D("error %d", error);
611 			free(ifp, M_DEVBUF);
612 			return error;
613 		}
614 		ret = NA(ifp);
615 		cand2 = -1;	/* only need one port */
616 	} else {  /* this is a NIC */
617 		struct ifnet *fake_ifp;
618 
619 		error = netmap_get_hw_na(ifp, &ret);
620 		if (error || ret == NULL)
621 			goto out;
622 
623 		/* make sure the NIC is not already in use */
624 		if (NETMAP_OWNED_BY_ANY(ret)) {
625 			D("NIC %s busy, cannot attach to bridge",
626 				NM_IFPNAME(ifp));
627 			error = EINVAL;
628 			goto out;
629 		}
630 		/* create a fake interface */
631 		fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
632 		if (!fake_ifp) {
633 			error = ENOMEM;
634 			goto out;
635 		}
636 		strcpy(fake_ifp->if_xname, name);
637 		error = netmap_bwrap_attach(fake_ifp, ifp);
638 		if (error) {
639 			free(fake_ifp, M_DEVBUF);
640 			goto out;
641 		}
642 		ret = NA(fake_ifp);
643 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
644 			cand2 = -1; /* only need one port */
645 		if_rele(ifp);
646 	}
647 	vpna = (struct netmap_vp_adapter *)ret;
648 
649 	BDG_WLOCK(b);
650 	vpna->bdg_port = cand;
651 	ND("NIC  %p to bridge port %d", vpna, cand);
652 	/* bind the port to the bridge (virtual ports are not active) */
653 	b->bdg_ports[cand] = vpna;
654 	vpna->na_bdg = b;
655 	b->bdg_active_ports++;
656 	if (cand2 >= 0) {
657 		struct netmap_vp_adapter *hostna = vpna + 1;
658 		/* also bind the host stack to the bridge */
659 		b->bdg_ports[cand2] = hostna;
660 		hostna->bdg_port = cand2;
661 		hostna->na_bdg = b;
662 		b->bdg_active_ports++;
663 		ND("host %p to bridge port %d", hostna, cand2);
664 	}
665 	ND("if %s refs %d", name, vpna->up.na_refcount);
666 	BDG_WUNLOCK(b);
667 	*na = ret;
668 	netmap_adapter_get(ret);
669 	return 0;
670 
671 out:
672 	if_rele(ifp);
673 
674 	return error;
675 }
676 
677 
678 /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
679 static int
680 nm_bdg_attach(struct nmreq *nmr)
681 {
682 	struct netmap_adapter *na;
683 	struct netmap_if *nifp;
684 	struct netmap_priv_d *npriv;
685 	struct netmap_bwrap_adapter *bna;
686 	int error;
687 
688 	npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
689 	if (npriv == NULL)
690 		return ENOMEM;
691 	NMG_LOCK();
692 	/* XXX probably netmap_get_bdg_na() */
693 	error = netmap_get_na(nmr, &na, 1 /* create if not exists */);
694 	if (error) /* no device, or another bridge or user owns the device */
695 		goto unlock_exit;
696 	/* netmap_get_na() sets na_bdg if this is a physical interface
697 	 * that we can attach to a switch.
698 	 */
699 	if (!nma_is_bwrap(na)) {
700 		/* got reference to a virtual port or direct access to a NIC.
701 		 * perhaps specified no bridge prefix or wrong NIC name
702 		 */
703 		error = EINVAL;
704 		goto unref_exit;
705 	}
706 
707 	if (na->active_fds > 0) { /* already registered */
708 		error = EBUSY;
709 		goto unref_exit;
710 	}
711 
712 	nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error);
713 	if (!nifp) {
714 		goto unref_exit;
715 	}
716 
717 	bna = (struct netmap_bwrap_adapter*)na;
718 	bna->na_kpriv = npriv;
719 	NMG_UNLOCK();
720 	ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
721 	return 0;
722 
723 unref_exit:
724 	netmap_adapter_put(na);
725 unlock_exit:
726 	NMG_UNLOCK();
727 	bzero(npriv, sizeof(*npriv));
728 	free(npriv, M_DEVBUF);
729 	return error;
730 }
731 
732 static int
733 nm_bdg_detach(struct nmreq *nmr)
734 {
735 	struct netmap_adapter *na;
736 	int error;
737 	struct netmap_bwrap_adapter *bna;
738 	int last_instance;
739 
740 	NMG_LOCK();
741 	error = netmap_get_na(nmr, &na, 0 /* don't create */);
742 	if (error) { /* no device, or another bridge or user owns the device */
743 		goto unlock_exit;
744 	}
745 	if (!nma_is_bwrap(na)) {
746 		/* got reference to a virtual port or direct access to a NIC.
747 		 * perhaps specified no bridge's prefix or wrong NIC's name
748 		 */
749 		error = EINVAL;
750 		goto unref_exit;
751 	}
752 	bna = (struct netmap_bwrap_adapter *)na;
753 
754 	if (na->active_fds == 0) { /* not registered */
755 		error = EINVAL;
756 		goto unref_exit;
757 	}
758 
759 	last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
760 	if (!last_instance) {
761 		D("--- error, trying to detach an entry with active mmaps");
762 		error = EINVAL;
763 	} else {
764 		struct netmap_priv_d *npriv = bna->na_kpriv;
765 
766 		bna->na_kpriv = NULL;
767 		D("deleting priv");
768 
769 		bzero(npriv, sizeof(*npriv));
770 		free(npriv, M_DEVBUF);
771 	}
772 
773 unref_exit:
774 	netmap_adapter_put(na);
775 unlock_exit:
776 	NMG_UNLOCK();
777 	return error;
778 
779 }
780 
781 
782 /* exported to kernel callers, e.g. OVS ?
783  * Entry point.
784  * Called without NMG_LOCK.
785  */
786 int
787 netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
788 {
789 	struct nm_bridge *b;
790 	struct netmap_adapter *na;
791 	struct netmap_vp_adapter *vpna;
792 	struct ifnet *iter;
793 	char *name = nmr->nr_name;
794 	int cmd = nmr->nr_cmd, namelen = strlen(name);
795 	int error = 0, i, j;
796 
797 	switch (cmd) {
798 	case NETMAP_BDG_ATTACH:
799 		error = nm_bdg_attach(nmr);
800 		break;
801 
802 	case NETMAP_BDG_DETACH:
803 		error = nm_bdg_detach(nmr);
804 		break;
805 
806 	case NETMAP_BDG_LIST:
807 		/* this is used to enumerate bridges and ports */
808 		if (namelen) { /* look up indexes of bridge and port */
809 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
810 				error = EINVAL;
811 				break;
812 			}
813 			NMG_LOCK();
814 			b = nm_find_bridge(name, 0 /* don't create */);
815 			if (!b) {
816 				error = ENOENT;
817 				NMG_UNLOCK();
818 				break;
819 			}
820 
821 			error = ENOENT;
822 			for (j = 0; j < b->bdg_active_ports; j++) {
823 				i = b->bdg_port_index[j];
824 				vpna = b->bdg_ports[i];
825 				if (vpna == NULL) {
826 					D("---AAAAAAAAARGH-------");
827 					continue;
828 				}
829 				iter = vpna->up.ifp;
830 				/* the former and the latter identify a
831 				 * virtual port and a NIC, respectively
832 				 */
833 				if (!strcmp(iter->if_xname, name)) {
834 					/* bridge index */
835 					nmr->nr_arg1 = b - nm_bridges;
836 					nmr->nr_arg2 = i; /* port index */
837 					error = 0;
838 					break;
839 				}
840 			}
841 			NMG_UNLOCK();
842 		} else {
843 			/* return the first non-empty entry starting from
844 			 * bridge nr_arg1 and port nr_arg2.
845 			 *
846 			 * Users can detect the end of the same bridge by
847 			 * seeing the new and old value of nr_arg1, and can
848 			 * detect the end of all the bridge by error != 0
849 			 */
850 			i = nmr->nr_arg1;
851 			j = nmr->nr_arg2;
852 
853 			NMG_LOCK();
854 			for (error = ENOENT; i < NM_BRIDGES; i++) {
855 				b = nm_bridges + i;
856 				if (j >= b->bdg_active_ports) {
857 					j = 0; /* following bridges scan from 0 */
858 					continue;
859 				}
860 				nmr->nr_arg1 = i;
861 				nmr->nr_arg2 = j;
862 				j = b->bdg_port_index[j];
863 				vpna = b->bdg_ports[j];
864 				iter = vpna->up.ifp;
865 				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
866 				error = 0;
867 				break;
868 			}
869 			NMG_UNLOCK();
870 		}
871 		break;
872 
873 	case NETMAP_BDG_LOOKUP_REG:
874 		/* register a lookup function to the given bridge.
875 		 * nmr->nr_name may be just bridge's name (including ':'
876 		 * if it is not just NM_NAME).
877 		 */
878 		if (!func) {
879 			error = EINVAL;
880 			break;
881 		}
882 		NMG_LOCK();
883 		b = nm_find_bridge(name, 0 /* don't create */);
884 		if (!b) {
885 			error = EINVAL;
886 		} else {
887 			b->nm_bdg_lookup = func;
888 		}
889 		NMG_UNLOCK();
890 		break;
891 
892 	case NETMAP_BDG_OFFSET:
893 		NMG_LOCK();
894 		error = netmap_get_bdg_na(nmr, &na, 0);
895 		if (!error) {
896 			vpna = (struct netmap_vp_adapter *)na;
897 			if (nmr->nr_arg1 > NETMAP_BDG_MAX_OFFSET)
898 				nmr->nr_arg1 = NETMAP_BDG_MAX_OFFSET;
899 			vpna->offset = nmr->nr_arg1;
900 			D("Using offset %d for %p", vpna->offset, vpna);
901 		}
902 		NMG_UNLOCK();
903 		break;
904 
905 	default:
906 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
907 		error = EINVAL;
908 		break;
909 	}
910 	return error;
911 }
912 
913 
914 static int
915 netmap_vp_krings_create(struct netmap_adapter *na)
916 {
917 	u_int ntx, nrx, tailroom;
918 	int error, i;
919 	uint32_t *leases;
920 
921 	/* XXX vps do not need host rings,
922 	 * but we crash if we don't have one
923 	 */
924 	ntx = na->num_tx_rings + 1;
925 	nrx = na->num_rx_rings + 1;
926 
927 	/*
928 	 * Leases are attached to RX rings on vale ports
929 	 */
930 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
931 
932 	error = netmap_krings_create(na, ntx, nrx, tailroom);
933 	if (error)
934 		return error;
935 
936 	leases = na->tailroom;
937 
938 	for (i = 0; i < nrx; i++) { /* Receive rings */
939 		na->rx_rings[i].nkr_leases = leases;
940 		leases += na->num_rx_desc;
941 	}
942 
943 	error = nm_alloc_bdgfwd(na);
944 	if (error) {
945 		netmap_krings_delete(na);
946 		return error;
947 	}
948 
949 	return 0;
950 }
951 
952 static void
953 netmap_vp_krings_delete(struct netmap_adapter *na)
954 {
955 	nm_free_bdgfwd(na);
956 	netmap_krings_delete(na);
957 }
958 
959 
960 static int
961 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
962 	struct netmap_vp_adapter *na, u_int ring_nr);
963 
964 
965 /*
966  * Grab packets from a kring, move them into the ft structure
967  * associated to the tx (input) port. Max one instance per port,
968  * filtered on input (ioctl, poll or XXX).
969  * Returns the next position in the ring.
970  */
971 static int
972 nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
973 	struct netmap_kring *kring, u_int end)
974 {
975 	struct netmap_ring *ring = kring->ring;
976 	struct nm_bdg_fwd *ft;
977 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
978 	u_int ft_i = 0;	/* start from 0 */
979 	u_int frags = 1; /* how many frags ? */
980 	struct nm_bridge *b = na->na_bdg;
981 
982 	/* To protect against modifications to the bridge we acquire a
983 	 * shared lock, waiting if we can sleep (if the source port is
984 	 * attached to a user process) or with a trylock otherwise (NICs).
985 	 */
986 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
987 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
988 		BDG_RLOCK(b);
989 	else if (!BDG_RTRYLOCK(b))
990 		return 0;
991 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
992 	ft = kring->nkr_ft;
993 
994 	for (; likely(j != end); j = nm_next(j, lim)) {
995 		struct netmap_slot *slot = &ring->slot[j];
996 		char *buf;
997 
998 		ft[ft_i].ft_len = slot->len;
999 		ft[ft_i].ft_flags = slot->flags;
1000 
1001 		ND("flags is 0x%x", slot->flags);
1002 		/* this slot goes into a list so initialize the link field */
1003 		ft[ft_i].ft_next = NM_FT_NULL;
1004 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
1005 			(void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
1006 		prefetch(buf);
1007 		++ft_i;
1008 		if (slot->flags & NS_MOREFRAG) {
1009 			frags++;
1010 			continue;
1011 		}
1012 		if (unlikely(netmap_verbose && frags > 1))
1013 			RD(5, "%d frags at %d", frags, ft_i - frags);
1014 		ft[ft_i - frags].ft_frags = frags;
1015 		frags = 1;
1016 		if (unlikely((int)ft_i >= bridge_batch))
1017 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1018 	}
1019 	if (frags > 1) {
1020 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1021 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1022 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1023 		ft[ft_i - frags].ft_frags = frags - 1;
1024 	}
1025 	if (ft_i)
1026 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1027 	BDG_RUNLOCK(b);
1028 	return j;
1029 }
1030 
1031 
1032 /*
1033  *---- support for virtual bridge -----
1034  */
1035 
1036 /* ----- FreeBSD if_bridge hash function ------- */
1037 
1038 /*
1039  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1040  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1041  *
1042  * http://www.burtleburtle.net/bob/hash/spooky.html
1043  */
1044 #define mix(a, b, c)                                                    \
1045 do {                                                                    \
1046         a -= b; a -= c; a ^= (c >> 13);                                 \
1047         b -= c; b -= a; b ^= (a << 8);                                  \
1048         c -= a; c -= b; c ^= (b >> 13);                                 \
1049         a -= b; a -= c; a ^= (c >> 12);                                 \
1050         b -= c; b -= a; b ^= (a << 16);                                 \
1051         c -= a; c -= b; c ^= (b >> 5);                                  \
1052         a -= b; a -= c; a ^= (c >> 3);                                  \
1053         b -= c; b -= a; b ^= (a << 10);                                 \
1054         c -= a; c -= b; c ^= (b >> 15);                                 \
1055 } while (/*CONSTCOND*/0)
1056 
1057 static __inline uint32_t
1058 nm_bridge_rthash(const uint8_t *addr)
1059 {
1060         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1061 
1062         b += addr[5] << 8;
1063         b += addr[4];
1064         a += addr[3] << 24;
1065         a += addr[2] << 16;
1066         a += addr[1] << 8;
1067         a += addr[0];
1068 
1069         mix(a, b, c);
1070 #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1071         return (c & BRIDGE_RTHASH_MASK);
1072 }
1073 
1074 #undef mix
1075 
1076 
1077 static int
1078 bdg_netmap_reg(struct netmap_adapter *na, int onoff)
1079 {
1080 	struct netmap_vp_adapter *vpna =
1081 		(struct netmap_vp_adapter*)na;
1082 	struct ifnet *ifp = na->ifp;
1083 
1084 	/* the interface is already attached to the bridge,
1085 	 * so we only need to toggle IFCAP_NETMAP.
1086 	 */
1087 	BDG_WLOCK(vpna->na_bdg);
1088 	if (onoff) {
1089 		ifp->if_capenable |= IFCAP_NETMAP;
1090 	} else {
1091 		ifp->if_capenable &= ~IFCAP_NETMAP;
1092 	}
1093 	BDG_WUNLOCK(vpna->na_bdg);
1094 	return 0;
1095 }
1096 
1097 
1098 /*
1099  * Lookup function for a learning bridge.
1100  * Update the hash table with the source address,
1101  * and then returns the destination port index, and the
1102  * ring in *dst_ring (at the moment, always use ring 0)
1103  */
1104 u_int
1105 netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1106 		struct netmap_vp_adapter *na)
1107 {
1108 	struct nm_hash_ent *ht = na->na_bdg->ht;
1109 	uint32_t sh, dh;
1110 	u_int dst, mysrc = na->bdg_port;
1111 	uint64_t smac, dmac;
1112 
1113 	if (buf_len < 14) {
1114 		D("invalid buf length %d", buf_len);
1115 		return NM_BDG_NOPORT;
1116 	}
1117 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1118 	smac = le64toh(*(uint64_t *)(buf + 4));
1119 	smac >>= 16;
1120 
1121 	/*
1122 	 * The hash is somewhat expensive, there might be some
1123 	 * worthwhile optimizations here.
1124 	 */
1125 	if ((buf[6] & 1) == 0) { /* valid src */
1126 		uint8_t *s = buf+6;
1127 		sh = nm_bridge_rthash(s); // XXX hash of source
1128 		/* update source port forwarding entry */
1129 		ht[sh].mac = smac;	/* XXX expire ? */
1130 		ht[sh].ports = mysrc;
1131 		if (netmap_verbose)
1132 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1133 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1134 	}
1135 	dst = NM_BDG_BROADCAST;
1136 	if ((buf[0] & 1) == 0) { /* unicast */
1137 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1138 		if (ht[dh].mac == dmac) {	/* found dst */
1139 			dst = ht[dh].ports;
1140 		}
1141 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1142 	}
1143 	*dst_ring = 0;
1144 	return dst;
1145 }
1146 
1147 
1148 /*
1149  * This flush routine supports only unicast and broadcast but a large
1150  * number of ports, and lets us replace the learn and dispatch functions.
1151  */
1152 int
1153 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1154 		u_int ring_nr)
1155 {
1156 	struct nm_bdg_q *dst_ents, *brddst;
1157 	uint16_t num_dsts = 0, *dsts;
1158 	struct nm_bridge *b = na->na_bdg;
1159 	u_int i, j, me = na->bdg_port;
1160 
1161 	/*
1162 	 * The work area (pointed by ft) is followed by an array of
1163 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1164 	 * queues per port plus one for the broadcast traffic.
1165 	 * Then we have an array of destination indexes.
1166 	 */
1167 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1168 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1169 
1170 	/* first pass: find a destination for each packet in the batch */
1171 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1172 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1173 		uint16_t dst_port, d_i;
1174 		struct nm_bdg_q *d;
1175 		uint8_t *buf = ft[i].ft_buf;
1176 		u_int len = ft[i].ft_len;
1177 
1178 		ND("slot %d frags %d", i, ft[i].ft_frags);
1179 		/* Drop the packet if the offset is not into the first
1180 		   fragment nor at the very beginning of the second. */
1181 		if (unlikely(na->offset > len))
1182 			continue;
1183 		if (len == na->offset) {
1184 			buf = ft[i+1].ft_buf;
1185 			len = ft[i+1].ft_len;
1186 		} else {
1187 			buf += na->offset;
1188 			len -= na->offset;
1189 		}
1190 		dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na);
1191 		if (netmap_verbose > 255)
1192 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1193 		if (dst_port == NM_BDG_NOPORT)
1194 			continue; /* this packet is identified to be dropped */
1195 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1196 			continue;
1197 		else if (dst_port == NM_BDG_BROADCAST)
1198 			dst_ring = 0; /* broadcasts always go to ring 0 */
1199 		else if (unlikely(dst_port == me ||
1200 		    !b->bdg_ports[dst_port]))
1201 			continue;
1202 
1203 		/* get a position in the scratch pad */
1204 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1205 		d = dst_ents + d_i;
1206 
1207 		/* append the first fragment to the list */
1208 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1209 			d->bq_head = d->bq_tail = i;
1210 			/* remember this position to be scanned later */
1211 			if (dst_port != NM_BDG_BROADCAST)
1212 				dsts[num_dsts++] = d_i;
1213 		} else {
1214 			ft[d->bq_tail].ft_next = i;
1215 			d->bq_tail = i;
1216 		}
1217 		d->bq_len += ft[i].ft_frags;
1218 	}
1219 
1220 	/*
1221 	 * Broadcast traffic goes to ring 0 on all destinations.
1222 	 * So we need to add these rings to the list of ports to scan.
1223 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1224 	 * expensive. We should keep a compact list of active destinations
1225 	 * so we could shorten this loop.
1226 	 */
1227 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1228 	if (brddst->bq_head != NM_FT_NULL) {
1229 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1230 			uint16_t d_i;
1231 			i = b->bdg_port_index[j];
1232 			if (unlikely(i == me))
1233 				continue;
1234 			d_i = i * NM_BDG_MAXRINGS;
1235 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1236 				dsts[num_dsts++] = d_i;
1237 		}
1238 	}
1239 
1240 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1241 	/* second pass: scan destinations (XXX will be modular somehow) */
1242 	for (i = 0; i < num_dsts; i++) {
1243 		struct ifnet *dst_ifp;
1244 		struct netmap_vp_adapter *dst_na;
1245 		struct netmap_kring *kring;
1246 		struct netmap_ring *ring;
1247 		u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next;
1248 		u_int needed, howmany;
1249 		int retry = netmap_txsync_retry;
1250 		struct nm_bdg_q *d;
1251 		uint32_t my_start = 0, lease_idx = 0;
1252 		int nrings;
1253 		int offset_mismatch;
1254 
1255 		d_i = dsts[i];
1256 		ND("second pass %d port %d", i, d_i);
1257 		d = dst_ents + d_i;
1258 		// XXX fix the division
1259 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1260 		/* protect from the lookup function returning an inactive
1261 		 * destination port
1262 		 */
1263 		if (unlikely(dst_na == NULL))
1264 			goto cleanup;
1265 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1266 			goto cleanup;
1267 		dst_ifp = dst_na->up.ifp;
1268 		/*
1269 		 * The interface may be in !netmap mode in two cases:
1270 		 * - when na is attached but not activated yet;
1271 		 * - when na is being deactivated but is still attached.
1272 		 */
1273 		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1274 			ND("not in netmap mode!");
1275 			goto cleanup;
1276 		}
1277 
1278 		offset_mismatch = (dst_na->offset != na->offset);
1279 
1280 		/* there is at least one either unicast or broadcast packet */
1281 		brd_next = brddst->bq_head;
1282 		next = d->bq_head;
1283 		/* we need to reserve this many slots. If fewer are
1284 		 * available, some packets will be dropped.
1285 		 * Packets may have multiple fragments, so we may not use
1286 		 * there is a chance that we may not use all of the slots
1287 		 * we have claimed, so we will need to handle the leftover
1288 		 * ones when we regain the lock.
1289 		 */
1290 		needed = d->bq_len + brddst->bq_len;
1291 
1292 		ND(5, "pass 2 dst %d is %x %s",
1293 			i, d_i, is_vp ? "virtual" : "nic/host");
1294 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1295 		nrings = dst_na->up.num_rx_rings;
1296 		if (dst_nr >= nrings)
1297 			dst_nr = dst_nr % nrings;
1298 		kring = &dst_na->up.rx_rings[dst_nr];
1299 		ring = kring->ring;
1300 		lim = kring->nkr_num_slots - 1;
1301 
1302 retry:
1303 
1304 		/* reserve the buffers in the queue and an entry
1305 		 * to report completion, and drop lock.
1306 		 * XXX this might become a helper function.
1307 		 */
1308 		mtx_lock(&kring->q_lock);
1309 		if (kring->nkr_stopped) {
1310 			mtx_unlock(&kring->q_lock);
1311 			goto cleanup;
1312 		}
1313 		if (dst_na->retry) {
1314 			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1315 		}
1316 		my_start = j = kring->nkr_hwlease;
1317 		howmany = nm_kr_space(kring, 1);
1318 		if (needed < howmany)
1319 			howmany = needed;
1320 		lease_idx = nm_kr_lease(kring, howmany, 1);
1321 		mtx_unlock(&kring->q_lock);
1322 
1323 		/* only retry if we need more than available slots */
1324 		if (retry && needed <= howmany)
1325 			retry = 0;
1326 
1327 		/* copy to the destination queue */
1328 		while (howmany > 0) {
1329 			struct netmap_slot *slot;
1330 			struct nm_bdg_fwd *ft_p, *ft_end;
1331 			u_int cnt;
1332 			int fix_mismatch = offset_mismatch;
1333 
1334 			/* find the queue from which we pick next packet.
1335 			 * NM_FT_NULL is always higher than valid indexes
1336 			 * so we never dereference it if the other list
1337 			 * has packets (and if both are empty we never
1338 			 * get here).
1339 			 */
1340 			if (next < brd_next) {
1341 				ft_p = ft + next;
1342 				next = ft_p->ft_next;
1343 			} else { /* insert broadcast */
1344 				ft_p = ft + brd_next;
1345 				brd_next = ft_p->ft_next;
1346 			}
1347 			cnt = ft_p->ft_frags; // cnt > 0
1348 			if (unlikely(cnt > howmany))
1349 			    break; /* no more space */
1350 			howmany -= cnt;
1351 			if (netmap_verbose && cnt > 1)
1352 				RD(5, "rx %d frags to %d", cnt, j);
1353 			ft_end = ft_p + cnt;
1354 			do {
1355 			    char *dst, *src = ft_p->ft_buf;
1356 			    size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1357 
1358 			    slot = &ring->slot[j];
1359 			    dst = BDG_NMB(&dst_na->up, slot);
1360 
1361 			    if (unlikely(fix_mismatch)) {
1362 				if (na->offset > dst_na->offset) {
1363 					src += na->offset - dst_na->offset;
1364 					copy_len -= na->offset - dst_na->offset;
1365 					dst_len = copy_len;
1366 				} else {
1367 					bzero(dst, dst_na->offset - na->offset);
1368 					dst_len += dst_na->offset - na->offset;
1369 					dst += dst_na->offset - na->offset;
1370 				}
1371 				/* fix the first fragment only */
1372 				fix_mismatch = 0;
1373 				/* completely skip an header only fragment */
1374 				if (copy_len == 0) {
1375 					ft_p++;
1376 					continue;
1377 				}
1378 			    }
1379 			    /* round to a multiple of 64 */
1380 			    copy_len = (copy_len + 63) & ~63;
1381 
1382 			    ND("send %d %d bytes at %s:%d",
1383 				i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j);
1384 			    if (ft_p->ft_flags & NS_INDIRECT) {
1385 				if (copyin(src, dst, copy_len)) {
1386 					// invalid user pointer, pretend len is 0
1387 					dst_len = 0;
1388 				}
1389 			    } else {
1390 				//memcpy(dst, src, copy_len);
1391 				pkt_copy(src, dst, (int)copy_len);
1392 			    }
1393 			    slot->len = dst_len;
1394 			    slot->flags = (cnt << 8)| NS_MOREFRAG;
1395 			    j = nm_next(j, lim);
1396 			    ft_p++;
1397 			    sent++;
1398 			} while (ft_p != ft_end);
1399 			slot->flags = (cnt << 8); /* clear flag on last entry */
1400 			/* are we done ? */
1401 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1402 				break;
1403 		}
1404 		{
1405 		    /* current position */
1406 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1407 		    uint32_t update_pos;
1408 		    int still_locked = 1;
1409 
1410 		    mtx_lock(&kring->q_lock);
1411 		    if (unlikely(howmany > 0)) {
1412 			/* not used all bufs. If i am the last one
1413 			 * i can recover the slots, otherwise must
1414 			 * fill them with 0 to mark empty packets.
1415 			 */
1416 			ND("leftover %d bufs", howmany);
1417 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1418 			    /* yes i am the last one */
1419 			    ND("roll back nkr_hwlease to %d", j);
1420 			    kring->nkr_hwlease = j;
1421 			} else {
1422 			    while (howmany-- > 0) {
1423 				ring->slot[j].len = 0;
1424 				ring->slot[j].flags = 0;
1425 				j = nm_next(j, lim);
1426 			    }
1427 			}
1428 		    }
1429 		    p[lease_idx] = j; /* report I am done */
1430 
1431 		    update_pos = nm_kr_rxpos(kring);
1432 
1433 		    if (my_start == update_pos) {
1434 			/* all slots before my_start have been reported,
1435 			 * so scan subsequent leases to see if other ranges
1436 			 * have been completed, and to a selwakeup or txsync.
1437 		         */
1438 			while (lease_idx != kring->nkr_lease_idx &&
1439 				p[lease_idx] != NR_NOSLOT) {
1440 			    j = p[lease_idx];
1441 			    p[lease_idx] = NR_NOSLOT;
1442 			    lease_idx = nm_next(lease_idx, lim);
1443 			}
1444 			/* j is the new 'write' position. j != my_start
1445 			 * means there are new buffers to report
1446 			 */
1447 			if (likely(j != my_start)) {
1448 				uint32_t old_avail = kring->nr_hwavail;
1449 
1450 				kring->nr_hwavail = (j >= kring->nr_hwcur) ?
1451 					j - kring->nr_hwcur :
1452 					j + lim + 1 - kring->nr_hwcur;
1453 				if (kring->nr_hwavail < old_avail) {
1454 					D("avail shrink %d -> %d",
1455 						old_avail, kring->nr_hwavail);
1456 				}
1457 				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1458 				still_locked = 0;
1459 				mtx_unlock(&kring->q_lock);
1460 				if (dst_na->retry && retry--)
1461 					goto retry;
1462 			}
1463 		    }
1464 		    if (still_locked)
1465 			mtx_unlock(&kring->q_lock);
1466 		}
1467 cleanup:
1468 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1469 		d->bq_len = 0;
1470 	}
1471 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1472 	brddst->bq_len = 0;
1473 	return 0;
1474 }
1475 
1476 static int
1477 netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1478 {
1479 	struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
1480 	struct netmap_ring *ring = kring->ring;
1481 	u_int j, k, lim = kring->nkr_num_slots - 1;
1482 
1483 	k = ring->cur;
1484 	if (k > lim)
1485 		return netmap_ring_reinit(kring);
1486 
1487 	if (bridge_batch <= 0) { /* testing only */
1488 		j = k; // used all
1489 		goto done;
1490 	}
1491 	if (bridge_batch > NM_BDG_BATCH)
1492 		bridge_batch = NM_BDG_BATCH;
1493 
1494 	j = nm_bdg_preflush(na, ring_nr, kring, k);
1495 	if (j != k)
1496 		D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
1497 	/* k-j modulo ring size is the number of slots processed */
1498 	if (k < j)
1499 		k += kring->nkr_num_slots;
1500 	kring->nr_hwavail = lim - (k - j);
1501 
1502 done:
1503 	kring->nr_hwcur = j;
1504 	ring->avail = kring->nr_hwavail;
1505 	if (netmap_verbose)
1506 		D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1507 	return 0;
1508 }
1509 
1510 
1511 /*
1512  * main dispatch routine for the bridge.
1513  * We already know that only one thread is running this.
1514  * we must run nm_bdg_preflush without lock.
1515  */
1516 static int
1517 bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1518 {
1519 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1520 	return netmap_vp_txsync(vpna, ring_nr, flags);
1521 }
1522 
1523 
1524 /*
1525  * user process reading from a VALE switch.
1526  * Already protected against concurrent calls from userspace,
1527  * but we must acquire the queue's lock to protect against
1528  * writers on the same queue.
1529  */
1530 static int
1531 bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1532 {
1533 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1534 	struct netmap_ring *ring = kring->ring;
1535 	u_int j, lim = kring->nkr_num_slots - 1;
1536 	u_int k = ring->cur, resvd = ring->reserved;
1537 	int n;
1538 
1539 	mtx_lock(&kring->q_lock);
1540 	if (k > lim) {
1541 		D("ouch dangerous reset!!!");
1542 		n = netmap_ring_reinit(kring);
1543 		goto done;
1544 	}
1545 
1546 	/* skip past packets that userspace has released */
1547 	j = kring->nr_hwcur;    /* netmap ring index */
1548 	if (resvd > 0) {
1549 		if (resvd + ring->avail >= lim + 1) {
1550 			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
1551 			ring->reserved = resvd = 0; // XXX panic...
1552 		}
1553 		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
1554 	}
1555 
1556 	if (j != k) { /* userspace has released some packets. */
1557 		n = k - j;
1558 		if (n < 0)
1559 			n += kring->nkr_num_slots;
1560 		ND("userspace releases %d packets", n);
1561 		for (n = 0; likely(j != k); n++) {
1562 			struct netmap_slot *slot = &ring->slot[j];
1563 			void *addr = BDG_NMB(na, slot);
1564 
1565 			if (addr == netmap_buffer_base) { /* bad buf */
1566 				D("bad buffer index %d, ignore ?",
1567 					slot->buf_idx);
1568 			}
1569 			slot->flags &= ~NS_BUF_CHANGED;
1570 			j = nm_next(j, lim);
1571 		}
1572 		kring->nr_hwavail -= n;
1573 		kring->nr_hwcur = k;
1574 	}
1575 	/* tell userspace that there are new packets */
1576 	ring->avail = kring->nr_hwavail - resvd;
1577 	n = 0;
1578 done:
1579 	mtx_unlock(&kring->q_lock);
1580 	return n;
1581 }
1582 
1583 static int
1584 bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
1585 {
1586 	struct netmap_vp_adapter *vpna;
1587 	struct netmap_adapter *na;
1588 	int error;
1589 
1590 	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1591 	if (vpna == NULL)
1592 		return ENOMEM;
1593 
1594  	na = &vpna->up;
1595 
1596 	na->ifp = ifp;
1597 
1598 	/* bound checking */
1599 	na->num_tx_rings = nmr->nr_tx_rings;
1600 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1601 	nmr->nr_tx_rings = na->num_tx_rings; // write back
1602 	na->num_rx_rings = nmr->nr_rx_rings;
1603 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1604 	nmr->nr_rx_rings = na->num_rx_rings; // write back
1605 	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1606 			1, NM_BDG_MAXSLOTS, NULL);
1607 	na->num_tx_desc = nmr->nr_tx_slots;
1608 	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1609 			1, NM_BDG_MAXSLOTS, NULL);
1610 	na->num_rx_desc = nmr->nr_rx_slots;
1611 	vpna->offset = 0;
1612 
1613 	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1614 	na->nm_txsync = bdg_netmap_txsync;
1615 	na->nm_rxsync = bdg_netmap_rxsync;
1616 	na->nm_register = bdg_netmap_reg;
1617 	na->nm_dtor = netmap_adapter_vp_dtor;
1618 	na->nm_krings_create = netmap_vp_krings_create;
1619 	na->nm_krings_delete = netmap_vp_krings_delete;
1620 	na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp),
1621 			na->num_tx_rings, na->num_tx_desc,
1622 			na->num_rx_rings, na->num_rx_desc);
1623 	/* other nmd fields are set in the common routine */
1624 	error = netmap_attach_common(na);
1625 	if (error) {
1626 		free(vpna, M_DEVBUF);
1627 		return error;
1628 	}
1629 	return 0;
1630 }
1631 
1632 static void
1633 netmap_bwrap_dtor(struct netmap_adapter *na)
1634 {
1635 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1636 	struct netmap_adapter *hwna = bna->hwna;
1637 	struct nm_bridge *b = bna->up.na_bdg,
1638 		*bh = bna->host.na_bdg;
1639 	struct ifnet *ifp = na->ifp;
1640 
1641 	ND("na %p", na);
1642 
1643 	if (b) {
1644 		netmap_bdg_detach_common(b, bna->up.bdg_port,
1645 			(bh ? bna->host.bdg_port : -1));
1646 	}
1647 
1648 	hwna->na_private = NULL;
1649 	netmap_adapter_put(hwna);
1650 
1651 	bzero(ifp, sizeof(*ifp));
1652 	free(ifp, M_DEVBUF);
1653 	na->ifp = NULL;
1654 
1655 }
1656 
1657 /*
1658  * Pass packets from nic to the bridge.
1659  * XXX TODO check locking: this is called from the interrupt
1660  * handler so we should make sure that the interface is not
1661  * disconnected while passing down an interrupt.
1662  *
1663  * Note, no user process can access this NIC so we can ignore
1664  * the info in the 'ring'.
1665  */
1666 /* callback that overwrites the hwna notify callback.
1667  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1668  * The bridge wrapper then sends the packets through the bridge.
1669  */
1670 static int
1671 netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1672 {
1673 	struct ifnet *ifp = na->ifp;
1674 	struct netmap_bwrap_adapter *bna = na->na_private;
1675 	struct netmap_vp_adapter *hostna = &bna->host;
1676 	struct netmap_kring *kring, *bkring;
1677 	struct netmap_ring *ring;
1678 	int is_host_ring = ring_nr == na->num_rx_rings;
1679 	struct netmap_vp_adapter *vpna = &bna->up;
1680 	int error = 0;
1681 
1682 	ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags);
1683 
1684 	if (flags & NAF_DISABLE_NOTIFY) {
1685 		kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1686 		bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
1687 		if (kring->nkr_stopped)
1688 			netmap_disable_ring(bkring);
1689 		else
1690 			bkring->nkr_stopped = 0;
1691 		return 0;
1692 	}
1693 
1694 	if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1695 		return 0;
1696 
1697 	if (tx == NR_TX)
1698 		return 0;
1699 
1700 	kring = &na->rx_rings[ring_nr];
1701 	ring = kring->ring;
1702 
1703 	/* make sure the ring is not disabled */
1704 	if (nm_kr_tryget(kring))
1705 		return 0;
1706 
1707 	if (is_host_ring && hostna->na_bdg == NULL) {
1708 		error = bna->save_notify(na, ring_nr, tx, flags);
1709 		goto put_out;
1710 	}
1711 
1712 	if (is_host_ring) {
1713 		vpna = hostna;
1714 		ring_nr = 0;
1715 	} else {
1716 		/* fetch packets that have arrived.
1717 		 * XXX maybe do this in a loop ?
1718 		 */
1719 		error = na->nm_rxsync(na, ring_nr, 0);
1720 		if (error)
1721 			goto put_out;
1722 	}
1723 	if (kring->nr_hwavail == 0 && netmap_verbose) {
1724 		D("how strange, interrupt with no packets on %s",
1725 			NM_IFPNAME(ifp));
1726 		goto put_out;
1727 	}
1728 	/* XXX avail ? */
1729 	ring->cur = nm_kr_rxpos(kring);
1730 	netmap_vp_txsync(vpna, ring_nr, flags);
1731 
1732 	if (!is_host_ring)
1733 		error = na->nm_rxsync(na, ring_nr, 0);
1734 
1735 put_out:
1736 	nm_kr_put(kring);
1737 	return error;
1738 }
1739 
1740 static int
1741 netmap_bwrap_register(struct netmap_adapter *na, int onoff)
1742 {
1743 	struct netmap_bwrap_adapter *bna =
1744 		(struct netmap_bwrap_adapter *)na;
1745 	struct netmap_adapter *hwna = bna->hwna;
1746 	struct netmap_vp_adapter *hostna = &bna->host;
1747 	int error;
1748 
1749 	ND("%s %d", NM_IFPNAME(ifp), onoff);
1750 
1751 	if (onoff) {
1752 		int i;
1753 
1754 		hwna->na_lut = na->na_lut;
1755 		hwna->na_lut_objtotal = na->na_lut_objtotal;
1756 
1757 		if (hostna->na_bdg) {
1758 			hostna->up.na_lut = na->na_lut;
1759 			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1760 		}
1761 
1762 		/* cross-link the netmap rings */
1763 		for (i = 0; i <= na->num_tx_rings; i++) {
1764 			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1765 			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1766 		}
1767 		for (i = 0; i <= na->num_rx_rings; i++) {
1768 			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1769 			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1770 		}
1771 	}
1772 
1773 	if (hwna->ifp) {
1774 		error = hwna->nm_register(hwna, onoff);
1775 		if (error)
1776 			return error;
1777 	}
1778 
1779 	bdg_netmap_reg(na, onoff);
1780 
1781 	if (onoff) {
1782 		bna->save_notify = hwna->nm_notify;
1783 		hwna->nm_notify = netmap_bwrap_intr_notify;
1784 	} else {
1785 		hwna->nm_notify = bna->save_notify;
1786 		hwna->na_lut = NULL;
1787 		hwna->na_lut_objtotal = 0;
1788 	}
1789 
1790 	return 0;
1791 }
1792 
1793 static int
1794 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1795 				    u_int *rxr, u_int *rxd)
1796 {
1797 	struct netmap_bwrap_adapter *bna =
1798 		(struct netmap_bwrap_adapter *)na;
1799 	struct netmap_adapter *hwna = bna->hwna;
1800 
1801 	/* forward the request */
1802 	netmap_update_config(hwna);
1803 	/* swap the results */
1804 	*txr = hwna->num_rx_rings;
1805 	*txd = hwna->num_rx_desc;
1806 	*rxr = hwna->num_tx_rings;
1807 	*rxd = hwna->num_rx_desc;
1808 
1809 	return 0;
1810 }
1811 
1812 static int
1813 netmap_bwrap_krings_create(struct netmap_adapter *na)
1814 {
1815 	struct netmap_bwrap_adapter *bna =
1816 		(struct netmap_bwrap_adapter *)na;
1817 	struct netmap_adapter *hwna = bna->hwna;
1818 	struct netmap_adapter *hostna = &bna->host.up;
1819 	int error;
1820 
1821 	ND("%s", NM_IFPNAME(na->ifp));
1822 
1823 	error = netmap_vp_krings_create(na);
1824 	if (error)
1825 		return error;
1826 
1827 	error = hwna->nm_krings_create(hwna);
1828 	if (error) {
1829 		netmap_vp_krings_delete(na);
1830 		return error;
1831 	}
1832 
1833 	hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1834 	hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1835 
1836 	return 0;
1837 }
1838 
1839 static void
1840 netmap_bwrap_krings_delete(struct netmap_adapter *na)
1841 {
1842 	struct netmap_bwrap_adapter *bna =
1843 		(struct netmap_bwrap_adapter *)na;
1844 	struct netmap_adapter *hwna = bna->hwna;
1845 
1846 	ND("%s", NM_IFPNAME(na->ifp));
1847 
1848 	hwna->nm_krings_delete(hwna);
1849 	netmap_vp_krings_delete(na);
1850 }
1851 
1852 /* notify method for the bridge-->hwna direction */
1853 static int
1854 netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1855 {
1856 	struct netmap_bwrap_adapter *bna =
1857 		(struct netmap_bwrap_adapter *)na;
1858 	struct netmap_adapter *hwna = bna->hwna;
1859 	struct netmap_kring *kring, *hw_kring;
1860 	struct netmap_ring *ring;
1861 	u_int lim, k;
1862 	int error = 0;
1863 
1864 	if (tx == NR_TX)
1865 	        return ENXIO;
1866 
1867 	kring = &na->rx_rings[ring_n];
1868 	hw_kring = &hwna->tx_rings[ring_n];
1869 	ring = kring->ring;
1870 
1871 	lim = kring->nkr_num_slots - 1;
1872 	k = nm_kr_rxpos(kring);
1873 
1874 	if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1875 		return 0;
1876 	ring->cur = k;
1877 	ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1878 		NM_IFPNAME(na->ifp), ring_n,
1879 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1880 		ring->cur, ring->avail, ring->reserved,
1881 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1882 	if (ring_n == na->num_rx_rings) {
1883 		netmap_txsync_to_host(hwna);
1884 	} else {
1885 		error = hwna->nm_txsync(hwna, ring_n, flags);
1886 	}
1887 	kring->nr_hwcur = ring->cur;
1888 	kring->nr_hwavail = 0;
1889 	kring->nr_hwreserved = lim - ring->avail;
1890 	ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1891 		NM_IFPNAME(na->ifp), ring_n,
1892 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1893 		ring->cur, ring->avail, ring->reserved,
1894 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1895 
1896 	return error;
1897 }
1898 
1899 static int
1900 netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1901 {
1902 	struct netmap_bwrap_adapter *bna = na->na_private;
1903 	struct netmap_adapter *port_na = &bna->up.up;
1904 	if (tx == NR_TX || ring_n != 0)
1905 		return ENXIO;
1906 	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
1907 }
1908 
1909 /* attach a bridge wrapper to the 'real' device */
1910 static int
1911 netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
1912 {
1913 	struct netmap_bwrap_adapter *bna;
1914 	struct netmap_adapter *na;
1915 	struct netmap_adapter *hwna = NA(real);
1916 	struct netmap_adapter *hostna;
1917 	int error;
1918 
1919 
1920 	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
1921 	if (bna == NULL)
1922 		return ENOMEM;
1923 
1924 	na = &bna->up.up;
1925 	na->ifp = fake;
1926 	/* fill the ring data for the bwrap adapter with rx/tx meanings
1927 	 * swapped. The real cross-linking will be done during register,
1928 	 * when all the krings will have been created.
1929 	 */
1930 	na->num_rx_rings = hwna->num_tx_rings;
1931 	na->num_tx_rings = hwna->num_rx_rings;
1932 	na->num_tx_desc = hwna->num_rx_desc;
1933 	na->num_rx_desc = hwna->num_tx_desc;
1934 	na->nm_dtor = netmap_bwrap_dtor;
1935 	na->nm_register = netmap_bwrap_register;
1936 	// na->nm_txsync = netmap_bwrap_txsync;
1937 	// na->nm_rxsync = netmap_bwrap_rxsync;
1938 	na->nm_config = netmap_bwrap_config;
1939 	na->nm_krings_create = netmap_bwrap_krings_create;
1940 	na->nm_krings_delete = netmap_bwrap_krings_delete;
1941 	na->nm_notify = netmap_bwrap_notify;
1942 	na->nm_mem = hwna->nm_mem;
1943 	na->na_private = na; /* prevent NIOCREGIF */
1944 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
1945 
1946 	bna->hwna = hwna;
1947 	netmap_adapter_get(hwna);
1948 	hwna->na_private = bna; /* weak reference */
1949 
1950 	hostna = &bna->host.up;
1951 	hostna->ifp = hwna->ifp;
1952 	hostna->num_tx_rings = 1;
1953 	hostna->num_tx_desc = hwna->num_rx_desc;
1954 	hostna->num_rx_rings = 1;
1955 	hostna->num_rx_desc = hwna->num_tx_desc;
1956 	// hostna->nm_txsync = netmap_bwrap_host_txsync;
1957 	// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1958 	hostna->nm_notify = netmap_bwrap_host_notify;
1959 	hostna->nm_mem = na->nm_mem;
1960 	hostna->na_private = bna;
1961 
1962 	D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname,
1963 		na->num_tx_rings, na->num_tx_desc,
1964 		na->num_rx_rings, na->num_rx_desc);
1965 
1966 	error = netmap_attach_common(na);
1967 	if (error) {
1968 		netmap_adapter_put(hwna);
1969 		free(bna, M_DEVBUF);
1970 		return error;
1971 	}
1972 	return 0;
1973 }
1974 
1975 void
1976 netmap_init_bridges(void)
1977 {
1978 	int i;
1979 	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
1980 	for (i = 0; i < NM_BRIDGES; i++)
1981 		BDG_RWINIT(&nm_bridges[i]);
1982 }
1983 #endif /* WITH_VALE */
1984