xref: /freebsd-14.2/sys/dev/netmap/netmap_vale.c (revision f9790aeb)
1*f9790aebSLuigi Rizzo /*
2*f9790aebSLuigi Rizzo  * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
3*f9790aebSLuigi Rizzo  *
4*f9790aebSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
5*f9790aebSLuigi Rizzo  * modification, are permitted provided that the following conditions
6*f9790aebSLuigi Rizzo  * are met:
7*f9790aebSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
8*f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
9*f9790aebSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
10*f9790aebSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
11*f9790aebSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
12*f9790aebSLuigi Rizzo  *
13*f9790aebSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14*f9790aebSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15*f9790aebSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16*f9790aebSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17*f9790aebSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18*f9790aebSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19*f9790aebSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20*f9790aebSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21*f9790aebSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22*f9790aebSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23*f9790aebSLuigi Rizzo  * SUCH DAMAGE.
24*f9790aebSLuigi Rizzo  */
25*f9790aebSLuigi Rizzo 
26*f9790aebSLuigi Rizzo 
27*f9790aebSLuigi Rizzo /*
28*f9790aebSLuigi Rizzo  * This module implements the VALE switch for netmap
29*f9790aebSLuigi Rizzo 
30*f9790aebSLuigi Rizzo --- VALE SWITCH ---
31*f9790aebSLuigi Rizzo 
32*f9790aebSLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
33*f9790aebSLuigi Rizzo A switch cannot be deleted until all ports are gone.
34*f9790aebSLuigi Rizzo 
35*f9790aebSLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
36*f9790aebSLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
37*f9790aebSLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
38*f9790aebSLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39*f9790aebSLuigi Rizzo The lock is held throughout the entire forwarding cycle,
40*f9790aebSLuigi Rizzo during which the thread may incur in a page fault.
41*f9790aebSLuigi Rizzo Hence it is important that sleepable shared locks are used.
42*f9790aebSLuigi Rizzo 
43*f9790aebSLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
44*f9790aebSLuigi Rizzo a number of slot in the ring, then the lock is released,
45*f9790aebSLuigi Rizzo packets are copied from source to destination, and then
46*f9790aebSLuigi Rizzo the lock is acquired again and the receive ring is updated.
47*f9790aebSLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
48*f9790aebSLuigi Rizzo ports attached to the switch)
49*f9790aebSLuigi Rizzo 
50*f9790aebSLuigi Rizzo  */
51*f9790aebSLuigi Rizzo 
52*f9790aebSLuigi Rizzo /*
53*f9790aebSLuigi Rizzo  * OS-specific code that is used only within this file.
54*f9790aebSLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
55*f9790aebSLuigi Rizzo  * is present in netmap_kern.h
56*f9790aebSLuigi Rizzo  */
57*f9790aebSLuigi Rizzo 
58*f9790aebSLuigi Rizzo #if defined(__FreeBSD__)
59*f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
60*f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$");
61*f9790aebSLuigi Rizzo 
62*f9790aebSLuigi Rizzo #include <sys/types.h>
63*f9790aebSLuigi Rizzo #include <sys/errno.h>
64*f9790aebSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
65*f9790aebSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
66*f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
67*f9790aebSLuigi Rizzo #include <sys/sockio.h>
68*f9790aebSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
69*f9790aebSLuigi Rizzo #include <sys/malloc.h>
70*f9790aebSLuigi Rizzo #include <sys/poll.h>
71*f9790aebSLuigi Rizzo #include <sys/rwlock.h>
72*f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
73*f9790aebSLuigi Rizzo #include <sys/selinfo.h>
74*f9790aebSLuigi Rizzo #include <sys/sysctl.h>
75*f9790aebSLuigi Rizzo #include <net/if.h>
76*f9790aebSLuigi Rizzo #include <net/if_var.h>
77*f9790aebSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
78*f9790aebSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
79*f9790aebSLuigi Rizzo #include <sys/endian.h>
80*f9790aebSLuigi Rizzo #include <sys/refcount.h>
81*f9790aebSLuigi Rizzo 
82*f9790aebSLuigi Rizzo // #define prefetch(x)	__builtin_prefetch(x)
83*f9790aebSLuigi Rizzo 
84*f9790aebSLuigi Rizzo 
85*f9790aebSLuigi Rizzo #define BDG_RWLOCK_T		struct rwlock // struct rwlock
86*f9790aebSLuigi Rizzo 
87*f9790aebSLuigi Rizzo #define	BDG_RWINIT(b)		\
88*f9790aebSLuigi Rizzo 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
89*f9790aebSLuigi Rizzo #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
90*f9790aebSLuigi Rizzo #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
91*f9790aebSLuigi Rizzo #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
92*f9790aebSLuigi Rizzo #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
93*f9790aebSLuigi Rizzo #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
94*f9790aebSLuigi Rizzo #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
95*f9790aebSLuigi Rizzo 
96*f9790aebSLuigi Rizzo 
97*f9790aebSLuigi Rizzo #elif defined(linux)
98*f9790aebSLuigi Rizzo 
99*f9790aebSLuigi Rizzo #include "bsd_glue.h"
100*f9790aebSLuigi Rizzo 
101*f9790aebSLuigi Rizzo #elif defined(__APPLE__)
102*f9790aebSLuigi Rizzo 
103*f9790aebSLuigi Rizzo #warning OSX support is only partial
104*f9790aebSLuigi Rizzo #include "osx_glue.h"
105*f9790aebSLuigi Rizzo 
106*f9790aebSLuigi Rizzo #else
107*f9790aebSLuigi Rizzo 
108*f9790aebSLuigi Rizzo #error	Unsupported platform
109*f9790aebSLuigi Rizzo 
110*f9790aebSLuigi Rizzo #endif /* unsupported */
111*f9790aebSLuigi Rizzo 
112*f9790aebSLuigi Rizzo /*
113*f9790aebSLuigi Rizzo  * common headers
114*f9790aebSLuigi Rizzo  */
115*f9790aebSLuigi Rizzo 
116*f9790aebSLuigi Rizzo #include <net/netmap.h>
117*f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h>
118*f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
119*f9790aebSLuigi Rizzo 
120*f9790aebSLuigi Rizzo #ifdef WITH_VALE
121*f9790aebSLuigi Rizzo 
122*f9790aebSLuigi Rizzo /*
123*f9790aebSLuigi Rizzo  * system parameters (most of them in netmap_kern.h)
124*f9790aebSLuigi Rizzo  * NM_NAME	prefix for switch port names, default "vale"
125*f9790aebSLuigi Rizzo  * NM_BDG_MAXPORTS	number of ports
126*f9790aebSLuigi Rizzo  * NM_BRIDGES	max number of switches in the system.
127*f9790aebSLuigi Rizzo  *	XXX should become a sysctl or tunable
128*f9790aebSLuigi Rizzo  *
129*f9790aebSLuigi Rizzo  * Switch ports are named valeX:Y where X is the switch name and Y
130*f9790aebSLuigi Rizzo  * is the port. If Y matches a physical interface name, the port is
131*f9790aebSLuigi Rizzo  * connected to a physical device.
132*f9790aebSLuigi Rizzo  *
133*f9790aebSLuigi Rizzo  * Unlike physical interfaces, switch ports use their own memory region
134*f9790aebSLuigi Rizzo  * for rings and buffers.
135*f9790aebSLuigi Rizzo  * The virtual interfaces use per-queue lock instead of core lock.
136*f9790aebSLuigi Rizzo  * In the tx loop, we aggregate traffic in batches to make all operations
137*f9790aebSLuigi Rizzo  * faster. The batch size is bridge_batch.
138*f9790aebSLuigi Rizzo  */
139*f9790aebSLuigi Rizzo #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
140*f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
141*f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
142*f9790aebSLuigi Rizzo #define NM_BDG_HASH		1024	/* forwarding table entries */
143*f9790aebSLuigi Rizzo #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
144*f9790aebSLuigi Rizzo #define NM_MULTISEG		64	/* max size of a chain of bufs */
145*f9790aebSLuigi Rizzo /* actual size of the tables */
146*f9790aebSLuigi Rizzo #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
147*f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */
148*f9790aebSLuigi Rizzo #define NM_FT_NULL		NM_BDG_BATCH_MAX
149*f9790aebSLuigi Rizzo #define	NM_BRIDGES		8	/* number of bridges */
150*f9790aebSLuigi Rizzo 
151*f9790aebSLuigi Rizzo 
152*f9790aebSLuigi Rizzo /*
153*f9790aebSLuigi Rizzo  * bridge_batch is set via sysctl to the max batch size to be
154*f9790aebSLuigi Rizzo  * used in the bridge. The actual value may be larger as the
155*f9790aebSLuigi Rizzo  * last packet in the block may overflow the size.
156*f9790aebSLuigi Rizzo  */
157*f9790aebSLuigi Rizzo int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
158*f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap);
159*f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
160*f9790aebSLuigi Rizzo 
161*f9790aebSLuigi Rizzo 
162*f9790aebSLuigi Rizzo static int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp);
163*f9790aebSLuigi Rizzo static int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
164*f9790aebSLuigi Rizzo static int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
165*f9790aebSLuigi Rizzo static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
166*f9790aebSLuigi Rizzo int kern_netmap_regif(struct nmreq *nmr);
167*f9790aebSLuigi Rizzo 
168*f9790aebSLuigi Rizzo /*
169*f9790aebSLuigi Rizzo  * Each transmit queue accumulates a batch of packets into
170*f9790aebSLuigi Rizzo  * a structure before forwarding. Packets to the same
171*f9790aebSLuigi Rizzo  * destination are put in a list using ft_next as a link field.
172*f9790aebSLuigi Rizzo  * ft_frags and ft_next are valid only on the first fragment.
173*f9790aebSLuigi Rizzo  */
174*f9790aebSLuigi Rizzo struct nm_bdg_fwd {	/* forwarding entry for a bridge */
175*f9790aebSLuigi Rizzo 	void *ft_buf;		/* netmap or indirect buffer */
176*f9790aebSLuigi Rizzo 	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
177*f9790aebSLuigi Rizzo 	uint8_t _ft_port;	/* dst port (unused) */
178*f9790aebSLuigi Rizzo 	uint16_t ft_flags;	/* flags, e.g. indirect */
179*f9790aebSLuigi Rizzo 	uint16_t ft_len;	/* src fragment len */
180*f9790aebSLuigi Rizzo 	uint16_t ft_next;	/* next packet to same destination */
181*f9790aebSLuigi Rizzo };
182*f9790aebSLuigi Rizzo 
183*f9790aebSLuigi Rizzo /*
184*f9790aebSLuigi Rizzo  * For each output interface, nm_bdg_q is used to construct a list.
185*f9790aebSLuigi Rizzo  * bq_len is the number of output buffers (we can have coalescing
186*f9790aebSLuigi Rizzo  * during the copy).
187*f9790aebSLuigi Rizzo  */
188*f9790aebSLuigi Rizzo struct nm_bdg_q {
189*f9790aebSLuigi Rizzo 	uint16_t bq_head;
190*f9790aebSLuigi Rizzo 	uint16_t bq_tail;
191*f9790aebSLuigi Rizzo 	uint32_t bq_len;	/* number of buffers */
192*f9790aebSLuigi Rizzo };
193*f9790aebSLuigi Rizzo 
194*f9790aebSLuigi Rizzo /* XXX revise this */
195*f9790aebSLuigi Rizzo struct nm_hash_ent {
196*f9790aebSLuigi Rizzo 	uint64_t	mac;	/* the top 2 bytes are the epoch */
197*f9790aebSLuigi Rizzo 	uint64_t	ports;
198*f9790aebSLuigi Rizzo };
199*f9790aebSLuigi Rizzo 
200*f9790aebSLuigi Rizzo /*
201*f9790aebSLuigi Rizzo  * nm_bridge is a descriptor for a VALE switch.
202*f9790aebSLuigi Rizzo  * Interfaces for a bridge are all in bdg_ports[].
203*f9790aebSLuigi Rizzo  * The array has fixed size, an empty entry does not terminate
204*f9790aebSLuigi Rizzo  * the search, but lookups only occur on attach/detach so we
205*f9790aebSLuigi Rizzo  * don't mind if they are slow.
206*f9790aebSLuigi Rizzo  *
207*f9790aebSLuigi Rizzo  * The bridge is non blocking on the transmit ports: excess
208*f9790aebSLuigi Rizzo  * packets are dropped if there is no room on the output port.
209*f9790aebSLuigi Rizzo  *
210*f9790aebSLuigi Rizzo  * bdg_lock protects accesses to the bdg_ports array.
211*f9790aebSLuigi Rizzo  * This is a rw lock (or equivalent).
212*f9790aebSLuigi Rizzo  */
213*f9790aebSLuigi Rizzo struct nm_bridge {
214*f9790aebSLuigi Rizzo 	/* XXX what is the proper alignment/layout ? */
215*f9790aebSLuigi Rizzo 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
216*f9790aebSLuigi Rizzo 	int		bdg_namelen;
217*f9790aebSLuigi Rizzo 	uint32_t	bdg_active_ports; /* 0 means free */
218*f9790aebSLuigi Rizzo 	char		bdg_basename[IFNAMSIZ];
219*f9790aebSLuigi Rizzo 
220*f9790aebSLuigi Rizzo 	/* Indexes of active ports (up to active_ports)
221*f9790aebSLuigi Rizzo 	 * and all other remaining ports.
222*f9790aebSLuigi Rizzo 	 */
223*f9790aebSLuigi Rizzo 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
224*f9790aebSLuigi Rizzo 
225*f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
226*f9790aebSLuigi Rizzo 
227*f9790aebSLuigi Rizzo 
228*f9790aebSLuigi Rizzo 	/*
229*f9790aebSLuigi Rizzo 	 * The function to decide the destination port.
230*f9790aebSLuigi Rizzo 	 * It returns either of an index of the destination port,
231*f9790aebSLuigi Rizzo 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
232*f9790aebSLuigi Rizzo 	 * forward this packet.  ring_nr is the source ring index, and the
233*f9790aebSLuigi Rizzo 	 * function may overwrite this value to forward this packet to a
234*f9790aebSLuigi Rizzo 	 * different ring index.
235*f9790aebSLuigi Rizzo 	 * This function must be set by netmap_bdgctl().
236*f9790aebSLuigi Rizzo 	 */
237*f9790aebSLuigi Rizzo 	bdg_lookup_fn_t nm_bdg_lookup;
238*f9790aebSLuigi Rizzo 
239*f9790aebSLuigi Rizzo 	/* the forwarding table, MAC+ports.
240*f9790aebSLuigi Rizzo 	 * XXX should be changed to an argument to be passed to
241*f9790aebSLuigi Rizzo 	 * the lookup function, and allocated on attach
242*f9790aebSLuigi Rizzo 	 */
243*f9790aebSLuigi Rizzo 	struct nm_hash_ent ht[NM_BDG_HASH];
244*f9790aebSLuigi Rizzo };
245*f9790aebSLuigi Rizzo 
246*f9790aebSLuigi Rizzo 
247*f9790aebSLuigi Rizzo /*
248*f9790aebSLuigi Rizzo  * XXX in principle nm_bridges could be created dynamically
249*f9790aebSLuigi Rizzo  * Right now we have a static array and deletions are protected
250*f9790aebSLuigi Rizzo  * by an exclusive lock.
251*f9790aebSLuigi Rizzo  */
252*f9790aebSLuigi Rizzo struct nm_bridge nm_bridges[NM_BRIDGES];
253*f9790aebSLuigi Rizzo 
254*f9790aebSLuigi Rizzo 
255*f9790aebSLuigi Rizzo /*
256*f9790aebSLuigi Rizzo  * A few function to tell which kind of port are we using.
257*f9790aebSLuigi Rizzo  * XXX should we hold a lock ?
258*f9790aebSLuigi Rizzo  *
259*f9790aebSLuigi Rizzo  * nma_is_vp()		virtual port
260*f9790aebSLuigi Rizzo  * nma_is_host()	port connected to the host stack
261*f9790aebSLuigi Rizzo  * nma_is_hw()		port connected to a NIC
262*f9790aebSLuigi Rizzo  * nma_is_generic()	generic netmap adapter XXX stop this madness
263*f9790aebSLuigi Rizzo  */
264*f9790aebSLuigi Rizzo static __inline int
265*f9790aebSLuigi Rizzo nma_is_vp(struct netmap_adapter *na)
266*f9790aebSLuigi Rizzo {
267*f9790aebSLuigi Rizzo 	return na->nm_register == bdg_netmap_reg;
268*f9790aebSLuigi Rizzo }
269*f9790aebSLuigi Rizzo 
270*f9790aebSLuigi Rizzo 
271*f9790aebSLuigi Rizzo static __inline int
272*f9790aebSLuigi Rizzo nma_is_host(struct netmap_adapter *na)
273*f9790aebSLuigi Rizzo {
274*f9790aebSLuigi Rizzo 	return na->nm_register == NULL;
275*f9790aebSLuigi Rizzo }
276*f9790aebSLuigi Rizzo 
277*f9790aebSLuigi Rizzo 
278*f9790aebSLuigi Rizzo static __inline int
279*f9790aebSLuigi Rizzo nma_is_hw(struct netmap_adapter *na)
280*f9790aebSLuigi Rizzo {
281*f9790aebSLuigi Rizzo 	/* In case of sw adapter, nm_register is NULL */
282*f9790aebSLuigi Rizzo 	return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na);
283*f9790aebSLuigi Rizzo }
284*f9790aebSLuigi Rizzo 
285*f9790aebSLuigi Rizzo static __inline int
286*f9790aebSLuigi Rizzo nma_is_bwrap(struct netmap_adapter *na)
287*f9790aebSLuigi Rizzo {
288*f9790aebSLuigi Rizzo 	return na->nm_register == netmap_bwrap_register;
289*f9790aebSLuigi Rizzo }
290*f9790aebSLuigi Rizzo 
291*f9790aebSLuigi Rizzo 
292*f9790aebSLuigi Rizzo 
293*f9790aebSLuigi Rizzo /*
294*f9790aebSLuigi Rizzo  * this is a slightly optimized copy routine which rounds
295*f9790aebSLuigi Rizzo  * to multiple of 64 bytes and is often faster than dealing
296*f9790aebSLuigi Rizzo  * with other odd sizes. We assume there is enough room
297*f9790aebSLuigi Rizzo  * in the source and destination buffers.
298*f9790aebSLuigi Rizzo  *
299*f9790aebSLuigi Rizzo  * XXX only for multiples of 64 bytes, non overlapped.
300*f9790aebSLuigi Rizzo  */
301*f9790aebSLuigi Rizzo static inline void
302*f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l)
303*f9790aebSLuigi Rizzo {
304*f9790aebSLuigi Rizzo         uint64_t *src = _src;
305*f9790aebSLuigi Rizzo         uint64_t *dst = _dst;
306*f9790aebSLuigi Rizzo         if (unlikely(l >= 1024)) {
307*f9790aebSLuigi Rizzo                 memcpy(dst, src, l);
308*f9790aebSLuigi Rizzo                 return;
309*f9790aebSLuigi Rizzo         }
310*f9790aebSLuigi Rizzo         for (; likely(l > 0); l-=64) {
311*f9790aebSLuigi Rizzo                 *dst++ = *src++;
312*f9790aebSLuigi Rizzo                 *dst++ = *src++;
313*f9790aebSLuigi Rizzo                 *dst++ = *src++;
314*f9790aebSLuigi Rizzo                 *dst++ = *src++;
315*f9790aebSLuigi Rizzo                 *dst++ = *src++;
316*f9790aebSLuigi Rizzo                 *dst++ = *src++;
317*f9790aebSLuigi Rizzo                 *dst++ = *src++;
318*f9790aebSLuigi Rizzo                 *dst++ = *src++;
319*f9790aebSLuigi Rizzo         }
320*f9790aebSLuigi Rizzo }
321*f9790aebSLuigi Rizzo 
322*f9790aebSLuigi Rizzo 
323*f9790aebSLuigi Rizzo 
324*f9790aebSLuigi Rizzo /*
325*f9790aebSLuigi Rizzo  * locate a bridge among the existing ones.
326*f9790aebSLuigi Rizzo  * MUST BE CALLED WITH NMG_LOCK()
327*f9790aebSLuigi Rizzo  *
328*f9790aebSLuigi Rizzo  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
329*f9790aebSLuigi Rizzo  * We assume that this is called with a name of at least NM_NAME chars.
330*f9790aebSLuigi Rizzo  */
331*f9790aebSLuigi Rizzo static struct nm_bridge *
332*f9790aebSLuigi Rizzo nm_find_bridge(const char *name, int create)
333*f9790aebSLuigi Rizzo {
334*f9790aebSLuigi Rizzo 	int i, l, namelen;
335*f9790aebSLuigi Rizzo 	struct nm_bridge *b = NULL;
336*f9790aebSLuigi Rizzo 
337*f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
338*f9790aebSLuigi Rizzo 
339*f9790aebSLuigi Rizzo 	namelen = strlen(NM_NAME);	/* base length */
340*f9790aebSLuigi Rizzo 	l = name ? strlen(name) : 0;		/* actual length */
341*f9790aebSLuigi Rizzo 	if (l < namelen) {
342*f9790aebSLuigi Rizzo 		D("invalid bridge name %s", name ? name : NULL);
343*f9790aebSLuigi Rizzo 		return NULL;
344*f9790aebSLuigi Rizzo 	}
345*f9790aebSLuigi Rizzo 	for (i = namelen + 1; i < l; i++) {
346*f9790aebSLuigi Rizzo 		if (name[i] == ':') {
347*f9790aebSLuigi Rizzo 			namelen = i;
348*f9790aebSLuigi Rizzo 			break;
349*f9790aebSLuigi Rizzo 		}
350*f9790aebSLuigi Rizzo 	}
351*f9790aebSLuigi Rizzo 	if (namelen >= IFNAMSIZ)
352*f9790aebSLuigi Rizzo 		namelen = IFNAMSIZ;
353*f9790aebSLuigi Rizzo 	ND("--- prefix is '%.*s' ---", namelen, name);
354*f9790aebSLuigi Rizzo 
355*f9790aebSLuigi Rizzo 	/* lookup the name, remember empty slot if there is one */
356*f9790aebSLuigi Rizzo 	for (i = 0; i < NM_BRIDGES; i++) {
357*f9790aebSLuigi Rizzo 		struct nm_bridge *x = nm_bridges + i;
358*f9790aebSLuigi Rizzo 
359*f9790aebSLuigi Rizzo 		if (x->bdg_active_ports == 0) {
360*f9790aebSLuigi Rizzo 			if (create && b == NULL)
361*f9790aebSLuigi Rizzo 				b = x;	/* record empty slot */
362*f9790aebSLuigi Rizzo 		} else if (x->bdg_namelen != namelen) {
363*f9790aebSLuigi Rizzo 			continue;
364*f9790aebSLuigi Rizzo 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
365*f9790aebSLuigi Rizzo 			ND("found '%.*s' at %d", namelen, name, i);
366*f9790aebSLuigi Rizzo 			b = x;
367*f9790aebSLuigi Rizzo 			break;
368*f9790aebSLuigi Rizzo 		}
369*f9790aebSLuigi Rizzo 	}
370*f9790aebSLuigi Rizzo 	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
371*f9790aebSLuigi Rizzo 		/* initialize the bridge */
372*f9790aebSLuigi Rizzo 		strncpy(b->bdg_basename, name, namelen);
373*f9790aebSLuigi Rizzo 		ND("create new bridge %s with ports %d", b->bdg_basename,
374*f9790aebSLuigi Rizzo 			b->bdg_active_ports);
375*f9790aebSLuigi Rizzo 		b->bdg_namelen = namelen;
376*f9790aebSLuigi Rizzo 		b->bdg_active_ports = 0;
377*f9790aebSLuigi Rizzo 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
378*f9790aebSLuigi Rizzo 			b->bdg_port_index[i] = i;
379*f9790aebSLuigi Rizzo 		/* set the default function */
380*f9790aebSLuigi Rizzo 		b->nm_bdg_lookup = netmap_bdg_learning;
381*f9790aebSLuigi Rizzo 		/* reset the MAC address table */
382*f9790aebSLuigi Rizzo 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
383*f9790aebSLuigi Rizzo 	}
384*f9790aebSLuigi Rizzo 	return b;
385*f9790aebSLuigi Rizzo }
386*f9790aebSLuigi Rizzo 
387*f9790aebSLuigi Rizzo 
388*f9790aebSLuigi Rizzo /*
389*f9790aebSLuigi Rizzo  * Free the forwarding tables for rings attached to switch ports.
390*f9790aebSLuigi Rizzo  */
391*f9790aebSLuigi Rizzo static void
392*f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na)
393*f9790aebSLuigi Rizzo {
394*f9790aebSLuigi Rizzo 	int nrings, i;
395*f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
396*f9790aebSLuigi Rizzo 
397*f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
398*f9790aebSLuigi Rizzo 	nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
399*f9790aebSLuigi Rizzo 	kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
400*f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
401*f9790aebSLuigi Rizzo 		if (kring[i].nkr_ft) {
402*f9790aebSLuigi Rizzo 			free(kring[i].nkr_ft, M_DEVBUF);
403*f9790aebSLuigi Rizzo 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
404*f9790aebSLuigi Rizzo 		}
405*f9790aebSLuigi Rizzo 	}
406*f9790aebSLuigi Rizzo }
407*f9790aebSLuigi Rizzo 
408*f9790aebSLuigi Rizzo 
409*f9790aebSLuigi Rizzo /*
410*f9790aebSLuigi Rizzo  * Allocate the forwarding tables for the rings attached to the bridge ports.
411*f9790aebSLuigi Rizzo  */
412*f9790aebSLuigi Rizzo static int
413*f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na)
414*f9790aebSLuigi Rizzo {
415*f9790aebSLuigi Rizzo 	int nrings, l, i, num_dstq;
416*f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
417*f9790aebSLuigi Rizzo 
418*f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
419*f9790aebSLuigi Rizzo 	/* all port:rings + broadcast */
420*f9790aebSLuigi Rizzo 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
421*f9790aebSLuigi Rizzo 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
422*f9790aebSLuigi Rizzo 	l += sizeof(struct nm_bdg_q) * num_dstq;
423*f9790aebSLuigi Rizzo 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
424*f9790aebSLuigi Rizzo 
425*f9790aebSLuigi Rizzo 	nrings = na->num_tx_rings + 1;
426*f9790aebSLuigi Rizzo 	kring = na->tx_rings;
427*f9790aebSLuigi Rizzo 	for (i = 0; i < nrings; i++) {
428*f9790aebSLuigi Rizzo 		struct nm_bdg_fwd *ft;
429*f9790aebSLuigi Rizzo 		struct nm_bdg_q *dstq;
430*f9790aebSLuigi Rizzo 		int j;
431*f9790aebSLuigi Rizzo 
432*f9790aebSLuigi Rizzo 		ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
433*f9790aebSLuigi Rizzo 		if (!ft) {
434*f9790aebSLuigi Rizzo 			nm_free_bdgfwd(na);
435*f9790aebSLuigi Rizzo 			return ENOMEM;
436*f9790aebSLuigi Rizzo 		}
437*f9790aebSLuigi Rizzo 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
438*f9790aebSLuigi Rizzo 		for (j = 0; j < num_dstq; j++) {
439*f9790aebSLuigi Rizzo 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
440*f9790aebSLuigi Rizzo 			dstq[j].bq_len = 0;
441*f9790aebSLuigi Rizzo 		}
442*f9790aebSLuigi Rizzo 		kring[i].nkr_ft = ft;
443*f9790aebSLuigi Rizzo 	}
444*f9790aebSLuigi Rizzo 	return 0;
445*f9790aebSLuigi Rizzo }
446*f9790aebSLuigi Rizzo 
447*f9790aebSLuigi Rizzo 
448*f9790aebSLuigi Rizzo static void
449*f9790aebSLuigi Rizzo netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
450*f9790aebSLuigi Rizzo {
451*f9790aebSLuigi Rizzo 	int s_hw = hw, s_sw = sw;
452*f9790aebSLuigi Rizzo 	int i, lim =b->bdg_active_ports;
453*f9790aebSLuigi Rizzo 	uint8_t tmp[NM_BDG_MAXPORTS];
454*f9790aebSLuigi Rizzo 
455*f9790aebSLuigi Rizzo 	/*
456*f9790aebSLuigi Rizzo 	New algorithm:
457*f9790aebSLuigi Rizzo 	make a copy of bdg_port_index;
458*f9790aebSLuigi Rizzo 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
459*f9790aebSLuigi Rizzo 	in the array of bdg_port_index, replacing them with
460*f9790aebSLuigi Rizzo 	entries from the bottom of the array;
461*f9790aebSLuigi Rizzo 	decrement bdg_active_ports;
462*f9790aebSLuigi Rizzo 	acquire BDG_WLOCK() and copy back the array.
463*f9790aebSLuigi Rizzo 	 */
464*f9790aebSLuigi Rizzo 
465*f9790aebSLuigi Rizzo 	D("detach %d and %d (lim %d)", hw, sw, lim);
466*f9790aebSLuigi Rizzo 	/* make a copy of the list of active ports, update it,
467*f9790aebSLuigi Rizzo 	 * and then copy back within BDG_WLOCK().
468*f9790aebSLuigi Rizzo 	 */
469*f9790aebSLuigi Rizzo 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
470*f9790aebSLuigi Rizzo 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
471*f9790aebSLuigi Rizzo 		if (hw >= 0 && tmp[i] == hw) {
472*f9790aebSLuigi Rizzo 			ND("detach hw %d at %d", hw, i);
473*f9790aebSLuigi Rizzo 			lim--; /* point to last active port */
474*f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim]; /* swap with i */
475*f9790aebSLuigi Rizzo 			tmp[lim] = hw;	/* now this is inactive */
476*f9790aebSLuigi Rizzo 			hw = -1;
477*f9790aebSLuigi Rizzo 		} else if (sw >= 0 && tmp[i] == sw) {
478*f9790aebSLuigi Rizzo 			ND("detach sw %d at %d", sw, i);
479*f9790aebSLuigi Rizzo 			lim--;
480*f9790aebSLuigi Rizzo 			tmp[i] = tmp[lim];
481*f9790aebSLuigi Rizzo 			tmp[lim] = sw;
482*f9790aebSLuigi Rizzo 			sw = -1;
483*f9790aebSLuigi Rizzo 		} else {
484*f9790aebSLuigi Rizzo 			i++;
485*f9790aebSLuigi Rizzo 		}
486*f9790aebSLuigi Rizzo 	}
487*f9790aebSLuigi Rizzo 	if (hw >= 0 || sw >= 0) {
488*f9790aebSLuigi Rizzo 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
489*f9790aebSLuigi Rizzo 	}
490*f9790aebSLuigi Rizzo 
491*f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
492*f9790aebSLuigi Rizzo 	b->bdg_ports[s_hw] = NULL;
493*f9790aebSLuigi Rizzo 	if (s_sw >= 0) {
494*f9790aebSLuigi Rizzo 		b->bdg_ports[s_sw] = NULL;
495*f9790aebSLuigi Rizzo 	}
496*f9790aebSLuigi Rizzo 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
497*f9790aebSLuigi Rizzo 	b->bdg_active_ports = lim;
498*f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
499*f9790aebSLuigi Rizzo 
500*f9790aebSLuigi Rizzo 	ND("now %d active ports", lim);
501*f9790aebSLuigi Rizzo 	if (lim == 0) {
502*f9790aebSLuigi Rizzo 		ND("marking bridge %s as free", b->bdg_basename);
503*f9790aebSLuigi Rizzo 		b->nm_bdg_lookup = NULL;
504*f9790aebSLuigi Rizzo 	}
505*f9790aebSLuigi Rizzo }
506*f9790aebSLuigi Rizzo 
507*f9790aebSLuigi Rizzo static void
508*f9790aebSLuigi Rizzo netmap_adapter_vp_dtor(struct netmap_adapter *na)
509*f9790aebSLuigi Rizzo {
510*f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
511*f9790aebSLuigi Rizzo 	struct nm_bridge *b = vpna->na_bdg;
512*f9790aebSLuigi Rizzo 	struct ifnet *ifp = na->ifp;
513*f9790aebSLuigi Rizzo 
514*f9790aebSLuigi Rizzo 	ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
515*f9790aebSLuigi Rizzo 
516*f9790aebSLuigi Rizzo 	if (b) {
517*f9790aebSLuigi Rizzo 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
518*f9790aebSLuigi Rizzo 	}
519*f9790aebSLuigi Rizzo 
520*f9790aebSLuigi Rizzo 	bzero(ifp, sizeof(*ifp));
521*f9790aebSLuigi Rizzo 	free(ifp, M_DEVBUF);
522*f9790aebSLuigi Rizzo 	na->ifp = NULL;
523*f9790aebSLuigi Rizzo }
524*f9790aebSLuigi Rizzo 
525*f9790aebSLuigi Rizzo int
526*f9790aebSLuigi Rizzo netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
527*f9790aebSLuigi Rizzo {
528*f9790aebSLuigi Rizzo 	const char *name = nmr->nr_name;
529*f9790aebSLuigi Rizzo 	struct ifnet *ifp;
530*f9790aebSLuigi Rizzo 	int error = 0;
531*f9790aebSLuigi Rizzo 	struct netmap_adapter *ret;
532*f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
533*f9790aebSLuigi Rizzo 	struct nm_bridge *b;
534*f9790aebSLuigi Rizzo 	int i, j, cand = -1, cand2 = -1;
535*f9790aebSLuigi Rizzo 	int needed;
536*f9790aebSLuigi Rizzo 
537*f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
538*f9790aebSLuigi Rizzo 
539*f9790aebSLuigi Rizzo 	/* first try to see if this is a bridge port. */
540*f9790aebSLuigi Rizzo 	NMG_LOCK_ASSERT();
541*f9790aebSLuigi Rizzo 	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
542*f9790aebSLuigi Rizzo 		return 0;  /* no error, but no VALE prefix */
543*f9790aebSLuigi Rizzo 	}
544*f9790aebSLuigi Rizzo 
545*f9790aebSLuigi Rizzo 	b = nm_find_bridge(name, create);
546*f9790aebSLuigi Rizzo 	if (b == NULL) {
547*f9790aebSLuigi Rizzo 		D("no bridges available for '%s'", name);
548*f9790aebSLuigi Rizzo 		return (ENXIO);
549*f9790aebSLuigi Rizzo 	}
550*f9790aebSLuigi Rizzo 
551*f9790aebSLuigi Rizzo 	/* Now we are sure that name starts with the bridge's name,
552*f9790aebSLuigi Rizzo 	 * lookup the port in the bridge. We need to scan the entire
553*f9790aebSLuigi Rizzo 	 * list. It is not important to hold a WLOCK on the bridge
554*f9790aebSLuigi Rizzo 	 * during the search because NMG_LOCK already guarantees
555*f9790aebSLuigi Rizzo 	 * that there are no other possible writers.
556*f9790aebSLuigi Rizzo 	 */
557*f9790aebSLuigi Rizzo 
558*f9790aebSLuigi Rizzo 	/* lookup in the local list of ports */
559*f9790aebSLuigi Rizzo 	for (j = 0; j < b->bdg_active_ports; j++) {
560*f9790aebSLuigi Rizzo 		i = b->bdg_port_index[j];
561*f9790aebSLuigi Rizzo 		vpna = b->bdg_ports[i];
562*f9790aebSLuigi Rizzo 		// KASSERT(na != NULL);
563*f9790aebSLuigi Rizzo 		ifp = vpna->up.ifp;
564*f9790aebSLuigi Rizzo 		/* XXX make sure the name only contains one : */
565*f9790aebSLuigi Rizzo 		if (!strcmp(NM_IFPNAME(ifp), name)) {
566*f9790aebSLuigi Rizzo 			netmap_adapter_get(&vpna->up);
567*f9790aebSLuigi Rizzo 			ND("found existing if %s refs %d", name,
568*f9790aebSLuigi Rizzo 				vpna->na_bdg_refcount);
569*f9790aebSLuigi Rizzo 			*na = (struct netmap_adapter *)vpna;
570*f9790aebSLuigi Rizzo 			return 0;
571*f9790aebSLuigi Rizzo 		}
572*f9790aebSLuigi Rizzo 	}
573*f9790aebSLuigi Rizzo 	/* not found, should we create it? */
574*f9790aebSLuigi Rizzo 	if (!create)
575*f9790aebSLuigi Rizzo 		return ENXIO;
576*f9790aebSLuigi Rizzo 	/* yes we should, see if we have space to attach entries */
577*f9790aebSLuigi Rizzo 	needed = 2; /* in some cases we only need 1 */
578*f9790aebSLuigi Rizzo 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
579*f9790aebSLuigi Rizzo 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
580*f9790aebSLuigi Rizzo 		return EINVAL;
581*f9790aebSLuigi Rizzo 	}
582*f9790aebSLuigi Rizzo 	/* record the next two ports available, but do not allocate yet */
583*f9790aebSLuigi Rizzo 	cand = b->bdg_port_index[b->bdg_active_ports];
584*f9790aebSLuigi Rizzo 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
585*f9790aebSLuigi Rizzo 	ND("+++ bridge %s port %s used %d avail %d %d",
586*f9790aebSLuigi Rizzo 		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
587*f9790aebSLuigi Rizzo 
588*f9790aebSLuigi Rizzo 	/*
589*f9790aebSLuigi Rizzo 	 * try see if there is a matching NIC with this name
590*f9790aebSLuigi Rizzo 	 * (after the bridge's name)
591*f9790aebSLuigi Rizzo 	 */
592*f9790aebSLuigi Rizzo 	ifp = ifunit_ref(name + b->bdg_namelen + 1);
593*f9790aebSLuigi Rizzo 	if (!ifp) { /* this is a virtual port */
594*f9790aebSLuigi Rizzo 		if (nmr->nr_cmd) {
595*f9790aebSLuigi Rizzo 			/* nr_cmd must be 0 for a virtual port */
596*f9790aebSLuigi Rizzo 			return EINVAL;
597*f9790aebSLuigi Rizzo 		}
598*f9790aebSLuigi Rizzo 
599*f9790aebSLuigi Rizzo 	 	/* create a struct ifnet for the new port.
600*f9790aebSLuigi Rizzo 		 * need M_NOWAIT as we are under nma_lock
601*f9790aebSLuigi Rizzo 		 */
602*f9790aebSLuigi Rizzo 		ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
603*f9790aebSLuigi Rizzo 		if (!ifp)
604*f9790aebSLuigi Rizzo 			return ENOMEM;
605*f9790aebSLuigi Rizzo 
606*f9790aebSLuigi Rizzo 		strcpy(ifp->if_xname, name);
607*f9790aebSLuigi Rizzo 		/* bdg_netmap_attach creates a struct netmap_adapter */
608*f9790aebSLuigi Rizzo 		error = bdg_netmap_attach(nmr, ifp);
609*f9790aebSLuigi Rizzo 		if (error) {
610*f9790aebSLuigi Rizzo 			D("error %d", error);
611*f9790aebSLuigi Rizzo 			free(ifp, M_DEVBUF);
612*f9790aebSLuigi Rizzo 			return error;
613*f9790aebSLuigi Rizzo 		}
614*f9790aebSLuigi Rizzo 		ret = NA(ifp);
615*f9790aebSLuigi Rizzo 		cand2 = -1;	/* only need one port */
616*f9790aebSLuigi Rizzo 	} else {  /* this is a NIC */
617*f9790aebSLuigi Rizzo 		struct ifnet *fake_ifp;
618*f9790aebSLuigi Rizzo 
619*f9790aebSLuigi Rizzo 		error = netmap_get_hw_na(ifp, &ret);
620*f9790aebSLuigi Rizzo 		if (error || ret == NULL)
621*f9790aebSLuigi Rizzo 			goto out;
622*f9790aebSLuigi Rizzo 
623*f9790aebSLuigi Rizzo 		/* make sure the NIC is not already in use */
624*f9790aebSLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(ret)) {
625*f9790aebSLuigi Rizzo 			D("NIC %s busy, cannot attach to bridge",
626*f9790aebSLuigi Rizzo 				NM_IFPNAME(ifp));
627*f9790aebSLuigi Rizzo 			error = EINVAL;
628*f9790aebSLuigi Rizzo 			goto out;
629*f9790aebSLuigi Rizzo 		}
630*f9790aebSLuigi Rizzo 		/* create a fake interface */
631*f9790aebSLuigi Rizzo 		fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
632*f9790aebSLuigi Rizzo 		if (!fake_ifp) {
633*f9790aebSLuigi Rizzo 			error = ENOMEM;
634*f9790aebSLuigi Rizzo 			goto out;
635*f9790aebSLuigi Rizzo 		}
636*f9790aebSLuigi Rizzo 		strcpy(fake_ifp->if_xname, name);
637*f9790aebSLuigi Rizzo 		error = netmap_bwrap_attach(fake_ifp, ifp);
638*f9790aebSLuigi Rizzo 		if (error) {
639*f9790aebSLuigi Rizzo 			free(fake_ifp, M_DEVBUF);
640*f9790aebSLuigi Rizzo 			goto out;
641*f9790aebSLuigi Rizzo 		}
642*f9790aebSLuigi Rizzo 		ret = NA(fake_ifp);
643*f9790aebSLuigi Rizzo 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
644*f9790aebSLuigi Rizzo 			cand2 = -1; /* only need one port */
645*f9790aebSLuigi Rizzo 		if_rele(ifp);
646*f9790aebSLuigi Rizzo 	}
647*f9790aebSLuigi Rizzo 	vpna = (struct netmap_vp_adapter *)ret;
648*f9790aebSLuigi Rizzo 
649*f9790aebSLuigi Rizzo 	BDG_WLOCK(b);
650*f9790aebSLuigi Rizzo 	vpna->bdg_port = cand;
651*f9790aebSLuigi Rizzo 	ND("NIC  %p to bridge port %d", vpna, cand);
652*f9790aebSLuigi Rizzo 	/* bind the port to the bridge (virtual ports are not active) */
653*f9790aebSLuigi Rizzo 	b->bdg_ports[cand] = vpna;
654*f9790aebSLuigi Rizzo 	vpna->na_bdg = b;
655*f9790aebSLuigi Rizzo 	b->bdg_active_ports++;
656*f9790aebSLuigi Rizzo 	if (cand2 >= 0) {
657*f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *hostna = vpna + 1;
658*f9790aebSLuigi Rizzo 		/* also bind the host stack to the bridge */
659*f9790aebSLuigi Rizzo 		b->bdg_ports[cand2] = hostna;
660*f9790aebSLuigi Rizzo 		hostna->bdg_port = cand2;
661*f9790aebSLuigi Rizzo 		hostna->na_bdg = b;
662*f9790aebSLuigi Rizzo 		b->bdg_active_ports++;
663*f9790aebSLuigi Rizzo 		ND("host %p to bridge port %d", hostna, cand2);
664*f9790aebSLuigi Rizzo 	}
665*f9790aebSLuigi Rizzo 	ND("if %s refs %d", name, vpna->up.na_refcount);
666*f9790aebSLuigi Rizzo 	BDG_WUNLOCK(b);
667*f9790aebSLuigi Rizzo 	*na = ret;
668*f9790aebSLuigi Rizzo 	netmap_adapter_get(ret);
669*f9790aebSLuigi Rizzo 	return 0;
670*f9790aebSLuigi Rizzo 
671*f9790aebSLuigi Rizzo out:
672*f9790aebSLuigi Rizzo 	if_rele(ifp);
673*f9790aebSLuigi Rizzo 
674*f9790aebSLuigi Rizzo 	return error;
675*f9790aebSLuigi Rizzo }
676*f9790aebSLuigi Rizzo 
677*f9790aebSLuigi Rizzo 
678*f9790aebSLuigi Rizzo /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
679*f9790aebSLuigi Rizzo static int
680*f9790aebSLuigi Rizzo nm_bdg_attach(struct nmreq *nmr)
681*f9790aebSLuigi Rizzo {
682*f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
683*f9790aebSLuigi Rizzo 	struct netmap_if *nifp;
684*f9790aebSLuigi Rizzo 	struct netmap_priv_d *npriv;
685*f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
686*f9790aebSLuigi Rizzo 	int error;
687*f9790aebSLuigi Rizzo 
688*f9790aebSLuigi Rizzo 	npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
689*f9790aebSLuigi Rizzo 	if (npriv == NULL)
690*f9790aebSLuigi Rizzo 		return ENOMEM;
691*f9790aebSLuigi Rizzo 	NMG_LOCK();
692*f9790aebSLuigi Rizzo 	/* XXX probably netmap_get_bdg_na() */
693*f9790aebSLuigi Rizzo 	error = netmap_get_na(nmr, &na, 1 /* create if not exists */);
694*f9790aebSLuigi Rizzo 	if (error) /* no device, or another bridge or user owns the device */
695*f9790aebSLuigi Rizzo 		goto unlock_exit;
696*f9790aebSLuigi Rizzo 	/* netmap_get_na() sets na_bdg if this is a physical interface
697*f9790aebSLuigi Rizzo 	 * that we can attach to a switch.
698*f9790aebSLuigi Rizzo 	 */
699*f9790aebSLuigi Rizzo 	if (!nma_is_bwrap(na)) {
700*f9790aebSLuigi Rizzo 		/* got reference to a virtual port or direct access to a NIC.
701*f9790aebSLuigi Rizzo 		 * perhaps specified no bridge prefix or wrong NIC name
702*f9790aebSLuigi Rizzo 		 */
703*f9790aebSLuigi Rizzo 		error = EINVAL;
704*f9790aebSLuigi Rizzo 		goto unref_exit;
705*f9790aebSLuigi Rizzo 	}
706*f9790aebSLuigi Rizzo 
707*f9790aebSLuigi Rizzo 	if (na->active_fds > 0) { /* already registered */
708*f9790aebSLuigi Rizzo 		error = EBUSY;
709*f9790aebSLuigi Rizzo 		goto unref_exit;
710*f9790aebSLuigi Rizzo 	}
711*f9790aebSLuigi Rizzo 
712*f9790aebSLuigi Rizzo 	nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error);
713*f9790aebSLuigi Rizzo 	if (!nifp) {
714*f9790aebSLuigi Rizzo 		goto unref_exit;
715*f9790aebSLuigi Rizzo 	}
716*f9790aebSLuigi Rizzo 
717*f9790aebSLuigi Rizzo 	bna = (struct netmap_bwrap_adapter*)na;
718*f9790aebSLuigi Rizzo 	bna->na_kpriv = npriv;
719*f9790aebSLuigi Rizzo 	NMG_UNLOCK();
720*f9790aebSLuigi Rizzo 	ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
721*f9790aebSLuigi Rizzo 	return 0;
722*f9790aebSLuigi Rizzo 
723*f9790aebSLuigi Rizzo unref_exit:
724*f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
725*f9790aebSLuigi Rizzo unlock_exit:
726*f9790aebSLuigi Rizzo 	NMG_UNLOCK();
727*f9790aebSLuigi Rizzo 	bzero(npriv, sizeof(*npriv));
728*f9790aebSLuigi Rizzo 	free(npriv, M_DEVBUF);
729*f9790aebSLuigi Rizzo 	return error;
730*f9790aebSLuigi Rizzo }
731*f9790aebSLuigi Rizzo 
732*f9790aebSLuigi Rizzo static int
733*f9790aebSLuigi Rizzo nm_bdg_detach(struct nmreq *nmr)
734*f9790aebSLuigi Rizzo {
735*f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
736*f9790aebSLuigi Rizzo 	int error;
737*f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
738*f9790aebSLuigi Rizzo 	int last_instance;
739*f9790aebSLuigi Rizzo 
740*f9790aebSLuigi Rizzo 	NMG_LOCK();
741*f9790aebSLuigi Rizzo 	error = netmap_get_na(nmr, &na, 0 /* don't create */);
742*f9790aebSLuigi Rizzo 	if (error) { /* no device, or another bridge or user owns the device */
743*f9790aebSLuigi Rizzo 		goto unlock_exit;
744*f9790aebSLuigi Rizzo 	}
745*f9790aebSLuigi Rizzo 	if (!nma_is_bwrap(na)) {
746*f9790aebSLuigi Rizzo 		/* got reference to a virtual port or direct access to a NIC.
747*f9790aebSLuigi Rizzo 		 * perhaps specified no bridge's prefix or wrong NIC's name
748*f9790aebSLuigi Rizzo 		 */
749*f9790aebSLuigi Rizzo 		error = EINVAL;
750*f9790aebSLuigi Rizzo 		goto unref_exit;
751*f9790aebSLuigi Rizzo 	}
752*f9790aebSLuigi Rizzo 	bna = (struct netmap_bwrap_adapter *)na;
753*f9790aebSLuigi Rizzo 
754*f9790aebSLuigi Rizzo 	if (na->active_fds == 0) { /* not registered */
755*f9790aebSLuigi Rizzo 		error = EINVAL;
756*f9790aebSLuigi Rizzo 		goto unref_exit;
757*f9790aebSLuigi Rizzo 	}
758*f9790aebSLuigi Rizzo 
759*f9790aebSLuigi Rizzo 	last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
760*f9790aebSLuigi Rizzo 	if (!last_instance) {
761*f9790aebSLuigi Rizzo 		D("--- error, trying to detach an entry with active mmaps");
762*f9790aebSLuigi Rizzo 		error = EINVAL;
763*f9790aebSLuigi Rizzo 	} else {
764*f9790aebSLuigi Rizzo 		struct netmap_priv_d *npriv = bna->na_kpriv;
765*f9790aebSLuigi Rizzo 
766*f9790aebSLuigi Rizzo 		bna->na_kpriv = NULL;
767*f9790aebSLuigi Rizzo 		D("deleting priv");
768*f9790aebSLuigi Rizzo 
769*f9790aebSLuigi Rizzo 		bzero(npriv, sizeof(*npriv));
770*f9790aebSLuigi Rizzo 		free(npriv, M_DEVBUF);
771*f9790aebSLuigi Rizzo 	}
772*f9790aebSLuigi Rizzo 
773*f9790aebSLuigi Rizzo unref_exit:
774*f9790aebSLuigi Rizzo 	netmap_adapter_put(na);
775*f9790aebSLuigi Rizzo unlock_exit:
776*f9790aebSLuigi Rizzo 	NMG_UNLOCK();
777*f9790aebSLuigi Rizzo 	return error;
778*f9790aebSLuigi Rizzo 
779*f9790aebSLuigi Rizzo }
780*f9790aebSLuigi Rizzo 
781*f9790aebSLuigi Rizzo 
782*f9790aebSLuigi Rizzo /* exported to kernel callers, e.g. OVS ?
783*f9790aebSLuigi Rizzo  * Entry point.
784*f9790aebSLuigi Rizzo  * Called without NMG_LOCK.
785*f9790aebSLuigi Rizzo  */
786*f9790aebSLuigi Rizzo int
787*f9790aebSLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
788*f9790aebSLuigi Rizzo {
789*f9790aebSLuigi Rizzo 	struct nm_bridge *b;
790*f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
791*f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
792*f9790aebSLuigi Rizzo 	struct ifnet *iter;
793*f9790aebSLuigi Rizzo 	char *name = nmr->nr_name;
794*f9790aebSLuigi Rizzo 	int cmd = nmr->nr_cmd, namelen = strlen(name);
795*f9790aebSLuigi Rizzo 	int error = 0, i, j;
796*f9790aebSLuigi Rizzo 
797*f9790aebSLuigi Rizzo 	switch (cmd) {
798*f9790aebSLuigi Rizzo 	case NETMAP_BDG_ATTACH:
799*f9790aebSLuigi Rizzo 		error = nm_bdg_attach(nmr);
800*f9790aebSLuigi Rizzo 		break;
801*f9790aebSLuigi Rizzo 
802*f9790aebSLuigi Rizzo 	case NETMAP_BDG_DETACH:
803*f9790aebSLuigi Rizzo 		error = nm_bdg_detach(nmr);
804*f9790aebSLuigi Rizzo 		break;
805*f9790aebSLuigi Rizzo 
806*f9790aebSLuigi Rizzo 	case NETMAP_BDG_LIST:
807*f9790aebSLuigi Rizzo 		/* this is used to enumerate bridges and ports */
808*f9790aebSLuigi Rizzo 		if (namelen) { /* look up indexes of bridge and port */
809*f9790aebSLuigi Rizzo 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
810*f9790aebSLuigi Rizzo 				error = EINVAL;
811*f9790aebSLuigi Rizzo 				break;
812*f9790aebSLuigi Rizzo 			}
813*f9790aebSLuigi Rizzo 			NMG_LOCK();
814*f9790aebSLuigi Rizzo 			b = nm_find_bridge(name, 0 /* don't create */);
815*f9790aebSLuigi Rizzo 			if (!b) {
816*f9790aebSLuigi Rizzo 				error = ENOENT;
817*f9790aebSLuigi Rizzo 				NMG_UNLOCK();
818*f9790aebSLuigi Rizzo 				break;
819*f9790aebSLuigi Rizzo 			}
820*f9790aebSLuigi Rizzo 
821*f9790aebSLuigi Rizzo 			error = ENOENT;
822*f9790aebSLuigi Rizzo 			for (j = 0; j < b->bdg_active_ports; j++) {
823*f9790aebSLuigi Rizzo 				i = b->bdg_port_index[j];
824*f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[i];
825*f9790aebSLuigi Rizzo 				if (vpna == NULL) {
826*f9790aebSLuigi Rizzo 					D("---AAAAAAAAARGH-------");
827*f9790aebSLuigi Rizzo 					continue;
828*f9790aebSLuigi Rizzo 				}
829*f9790aebSLuigi Rizzo 				iter = vpna->up.ifp;
830*f9790aebSLuigi Rizzo 				/* the former and the latter identify a
831*f9790aebSLuigi Rizzo 				 * virtual port and a NIC, respectively
832*f9790aebSLuigi Rizzo 				 */
833*f9790aebSLuigi Rizzo 				if (!strcmp(iter->if_xname, name)) {
834*f9790aebSLuigi Rizzo 					/* bridge index */
835*f9790aebSLuigi Rizzo 					nmr->nr_arg1 = b - nm_bridges;
836*f9790aebSLuigi Rizzo 					nmr->nr_arg2 = i; /* port index */
837*f9790aebSLuigi Rizzo 					error = 0;
838*f9790aebSLuigi Rizzo 					break;
839*f9790aebSLuigi Rizzo 				}
840*f9790aebSLuigi Rizzo 			}
841*f9790aebSLuigi Rizzo 			NMG_UNLOCK();
842*f9790aebSLuigi Rizzo 		} else {
843*f9790aebSLuigi Rizzo 			/* return the first non-empty entry starting from
844*f9790aebSLuigi Rizzo 			 * bridge nr_arg1 and port nr_arg2.
845*f9790aebSLuigi Rizzo 			 *
846*f9790aebSLuigi Rizzo 			 * Users can detect the end of the same bridge by
847*f9790aebSLuigi Rizzo 			 * seeing the new and old value of nr_arg1, and can
848*f9790aebSLuigi Rizzo 			 * detect the end of all the bridge by error != 0
849*f9790aebSLuigi Rizzo 			 */
850*f9790aebSLuigi Rizzo 			i = nmr->nr_arg1;
851*f9790aebSLuigi Rizzo 			j = nmr->nr_arg2;
852*f9790aebSLuigi Rizzo 
853*f9790aebSLuigi Rizzo 			NMG_LOCK();
854*f9790aebSLuigi Rizzo 			for (error = ENOENT; i < NM_BRIDGES; i++) {
855*f9790aebSLuigi Rizzo 				b = nm_bridges + i;
856*f9790aebSLuigi Rizzo 				if (j >= b->bdg_active_ports) {
857*f9790aebSLuigi Rizzo 					j = 0; /* following bridges scan from 0 */
858*f9790aebSLuigi Rizzo 					continue;
859*f9790aebSLuigi Rizzo 				}
860*f9790aebSLuigi Rizzo 				nmr->nr_arg1 = i;
861*f9790aebSLuigi Rizzo 				nmr->nr_arg2 = j;
862*f9790aebSLuigi Rizzo 				j = b->bdg_port_index[j];
863*f9790aebSLuigi Rizzo 				vpna = b->bdg_ports[j];
864*f9790aebSLuigi Rizzo 				iter = vpna->up.ifp;
865*f9790aebSLuigi Rizzo 				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
866*f9790aebSLuigi Rizzo 				error = 0;
867*f9790aebSLuigi Rizzo 				break;
868*f9790aebSLuigi Rizzo 			}
869*f9790aebSLuigi Rizzo 			NMG_UNLOCK();
870*f9790aebSLuigi Rizzo 		}
871*f9790aebSLuigi Rizzo 		break;
872*f9790aebSLuigi Rizzo 
873*f9790aebSLuigi Rizzo 	case NETMAP_BDG_LOOKUP_REG:
874*f9790aebSLuigi Rizzo 		/* register a lookup function to the given bridge.
875*f9790aebSLuigi Rizzo 		 * nmr->nr_name may be just bridge's name (including ':'
876*f9790aebSLuigi Rizzo 		 * if it is not just NM_NAME).
877*f9790aebSLuigi Rizzo 		 */
878*f9790aebSLuigi Rizzo 		if (!func) {
879*f9790aebSLuigi Rizzo 			error = EINVAL;
880*f9790aebSLuigi Rizzo 			break;
881*f9790aebSLuigi Rizzo 		}
882*f9790aebSLuigi Rizzo 		NMG_LOCK();
883*f9790aebSLuigi Rizzo 		b = nm_find_bridge(name, 0 /* don't create */);
884*f9790aebSLuigi Rizzo 		if (!b) {
885*f9790aebSLuigi Rizzo 			error = EINVAL;
886*f9790aebSLuigi Rizzo 		} else {
887*f9790aebSLuigi Rizzo 			b->nm_bdg_lookup = func;
888*f9790aebSLuigi Rizzo 		}
889*f9790aebSLuigi Rizzo 		NMG_UNLOCK();
890*f9790aebSLuigi Rizzo 		break;
891*f9790aebSLuigi Rizzo 
892*f9790aebSLuigi Rizzo 	case NETMAP_BDG_OFFSET:
893*f9790aebSLuigi Rizzo 		NMG_LOCK();
894*f9790aebSLuigi Rizzo 		error = netmap_get_bdg_na(nmr, &na, 0);
895*f9790aebSLuigi Rizzo 		if (!error) {
896*f9790aebSLuigi Rizzo 			vpna = (struct netmap_vp_adapter *)na;
897*f9790aebSLuigi Rizzo 			if (nmr->nr_arg1 > NETMAP_BDG_MAX_OFFSET)
898*f9790aebSLuigi Rizzo 				nmr->nr_arg1 = NETMAP_BDG_MAX_OFFSET;
899*f9790aebSLuigi Rizzo 			vpna->offset = nmr->nr_arg1;
900*f9790aebSLuigi Rizzo 			D("Using offset %d for %p", vpna->offset, vpna);
901*f9790aebSLuigi Rizzo 		}
902*f9790aebSLuigi Rizzo 		NMG_UNLOCK();
903*f9790aebSLuigi Rizzo 		break;
904*f9790aebSLuigi Rizzo 
905*f9790aebSLuigi Rizzo 	default:
906*f9790aebSLuigi Rizzo 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
907*f9790aebSLuigi Rizzo 		error = EINVAL;
908*f9790aebSLuigi Rizzo 		break;
909*f9790aebSLuigi Rizzo 	}
910*f9790aebSLuigi Rizzo 	return error;
911*f9790aebSLuigi Rizzo }
912*f9790aebSLuigi Rizzo 
913*f9790aebSLuigi Rizzo 
914*f9790aebSLuigi Rizzo static int
915*f9790aebSLuigi Rizzo netmap_vp_krings_create(struct netmap_adapter *na)
916*f9790aebSLuigi Rizzo {
917*f9790aebSLuigi Rizzo 	u_int ntx, nrx, tailroom;
918*f9790aebSLuigi Rizzo 	int error, i;
919*f9790aebSLuigi Rizzo 	uint32_t *leases;
920*f9790aebSLuigi Rizzo 
921*f9790aebSLuigi Rizzo 	/* XXX vps do not need host rings,
922*f9790aebSLuigi Rizzo 	 * but we crash if we don't have one
923*f9790aebSLuigi Rizzo 	 */
924*f9790aebSLuigi Rizzo 	ntx = na->num_tx_rings + 1;
925*f9790aebSLuigi Rizzo 	nrx = na->num_rx_rings + 1;
926*f9790aebSLuigi Rizzo 
927*f9790aebSLuigi Rizzo 	/*
928*f9790aebSLuigi Rizzo 	 * Leases are attached to RX rings on vale ports
929*f9790aebSLuigi Rizzo 	 */
930*f9790aebSLuigi Rizzo 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
931*f9790aebSLuigi Rizzo 
932*f9790aebSLuigi Rizzo 	error = netmap_krings_create(na, ntx, nrx, tailroom);
933*f9790aebSLuigi Rizzo 	if (error)
934*f9790aebSLuigi Rizzo 		return error;
935*f9790aebSLuigi Rizzo 
936*f9790aebSLuigi Rizzo 	leases = na->tailroom;
937*f9790aebSLuigi Rizzo 
938*f9790aebSLuigi Rizzo 	for (i = 0; i < nrx; i++) { /* Receive rings */
939*f9790aebSLuigi Rizzo 		na->rx_rings[i].nkr_leases = leases;
940*f9790aebSLuigi Rizzo 		leases += na->num_rx_desc;
941*f9790aebSLuigi Rizzo 	}
942*f9790aebSLuigi Rizzo 
943*f9790aebSLuigi Rizzo 	error = nm_alloc_bdgfwd(na);
944*f9790aebSLuigi Rizzo 	if (error) {
945*f9790aebSLuigi Rizzo 		netmap_krings_delete(na);
946*f9790aebSLuigi Rizzo 		return error;
947*f9790aebSLuigi Rizzo 	}
948*f9790aebSLuigi Rizzo 
949*f9790aebSLuigi Rizzo 	return 0;
950*f9790aebSLuigi Rizzo }
951*f9790aebSLuigi Rizzo 
952*f9790aebSLuigi Rizzo static void
953*f9790aebSLuigi Rizzo netmap_vp_krings_delete(struct netmap_adapter *na)
954*f9790aebSLuigi Rizzo {
955*f9790aebSLuigi Rizzo 	nm_free_bdgfwd(na);
956*f9790aebSLuigi Rizzo 	netmap_krings_delete(na);
957*f9790aebSLuigi Rizzo }
958*f9790aebSLuigi Rizzo 
959*f9790aebSLuigi Rizzo 
960*f9790aebSLuigi Rizzo static int
961*f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
962*f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *na, u_int ring_nr);
963*f9790aebSLuigi Rizzo 
964*f9790aebSLuigi Rizzo 
965*f9790aebSLuigi Rizzo /*
966*f9790aebSLuigi Rizzo  * Grab packets from a kring, move them into the ft structure
967*f9790aebSLuigi Rizzo  * associated to the tx (input) port. Max one instance per port,
968*f9790aebSLuigi Rizzo  * filtered on input (ioctl, poll or XXX).
969*f9790aebSLuigi Rizzo  * Returns the next position in the ring.
970*f9790aebSLuigi Rizzo  */
971*f9790aebSLuigi Rizzo static int
972*f9790aebSLuigi Rizzo nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
973*f9790aebSLuigi Rizzo 	struct netmap_kring *kring, u_int end)
974*f9790aebSLuigi Rizzo {
975*f9790aebSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
976*f9790aebSLuigi Rizzo 	struct nm_bdg_fwd *ft;
977*f9790aebSLuigi Rizzo 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
978*f9790aebSLuigi Rizzo 	u_int ft_i = 0;	/* start from 0 */
979*f9790aebSLuigi Rizzo 	u_int frags = 1; /* how many frags ? */
980*f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
981*f9790aebSLuigi Rizzo 
982*f9790aebSLuigi Rizzo 	/* To protect against modifications to the bridge we acquire a
983*f9790aebSLuigi Rizzo 	 * shared lock, waiting if we can sleep (if the source port is
984*f9790aebSLuigi Rizzo 	 * attached to a user process) or with a trylock otherwise (NICs).
985*f9790aebSLuigi Rizzo 	 */
986*f9790aebSLuigi Rizzo 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
987*f9790aebSLuigi Rizzo 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
988*f9790aebSLuigi Rizzo 		BDG_RLOCK(b);
989*f9790aebSLuigi Rizzo 	else if (!BDG_RTRYLOCK(b))
990*f9790aebSLuigi Rizzo 		return 0;
991*f9790aebSLuigi Rizzo 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
992*f9790aebSLuigi Rizzo 	ft = kring->nkr_ft;
993*f9790aebSLuigi Rizzo 
994*f9790aebSLuigi Rizzo 	for (; likely(j != end); j = nm_next(j, lim)) {
995*f9790aebSLuigi Rizzo 		struct netmap_slot *slot = &ring->slot[j];
996*f9790aebSLuigi Rizzo 		char *buf;
997*f9790aebSLuigi Rizzo 
998*f9790aebSLuigi Rizzo 		ft[ft_i].ft_len = slot->len;
999*f9790aebSLuigi Rizzo 		ft[ft_i].ft_flags = slot->flags;
1000*f9790aebSLuigi Rizzo 
1001*f9790aebSLuigi Rizzo 		ND("flags is 0x%x", slot->flags);
1002*f9790aebSLuigi Rizzo 		/* this slot goes into a list so initialize the link field */
1003*f9790aebSLuigi Rizzo 		ft[ft_i].ft_next = NM_FT_NULL;
1004*f9790aebSLuigi Rizzo 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
1005*f9790aebSLuigi Rizzo 			(void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
1006*f9790aebSLuigi Rizzo 		prefetch(buf);
1007*f9790aebSLuigi Rizzo 		++ft_i;
1008*f9790aebSLuigi Rizzo 		if (slot->flags & NS_MOREFRAG) {
1009*f9790aebSLuigi Rizzo 			frags++;
1010*f9790aebSLuigi Rizzo 			continue;
1011*f9790aebSLuigi Rizzo 		}
1012*f9790aebSLuigi Rizzo 		if (unlikely(netmap_verbose && frags > 1))
1013*f9790aebSLuigi Rizzo 			RD(5, "%d frags at %d", frags, ft_i - frags);
1014*f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags;
1015*f9790aebSLuigi Rizzo 		frags = 1;
1016*f9790aebSLuigi Rizzo 		if (unlikely((int)ft_i >= bridge_batch))
1017*f9790aebSLuigi Rizzo 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1018*f9790aebSLuigi Rizzo 	}
1019*f9790aebSLuigi Rizzo 	if (frags > 1) {
1020*f9790aebSLuigi Rizzo 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1021*f9790aebSLuigi Rizzo 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1022*f9790aebSLuigi Rizzo 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1023*f9790aebSLuigi Rizzo 		ft[ft_i - frags].ft_frags = frags - 1;
1024*f9790aebSLuigi Rizzo 	}
1025*f9790aebSLuigi Rizzo 	if (ft_i)
1026*f9790aebSLuigi Rizzo 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1027*f9790aebSLuigi Rizzo 	BDG_RUNLOCK(b);
1028*f9790aebSLuigi Rizzo 	return j;
1029*f9790aebSLuigi Rizzo }
1030*f9790aebSLuigi Rizzo 
1031*f9790aebSLuigi Rizzo 
1032*f9790aebSLuigi Rizzo /*
1033*f9790aebSLuigi Rizzo  *---- support for virtual bridge -----
1034*f9790aebSLuigi Rizzo  */
1035*f9790aebSLuigi Rizzo 
1036*f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */
1037*f9790aebSLuigi Rizzo 
1038*f9790aebSLuigi Rizzo /*
1039*f9790aebSLuigi Rizzo  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1040*f9790aebSLuigi Rizzo  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1041*f9790aebSLuigi Rizzo  *
1042*f9790aebSLuigi Rizzo  * http://www.burtleburtle.net/bob/hash/spooky.html
1043*f9790aebSLuigi Rizzo  */
1044*f9790aebSLuigi Rizzo #define mix(a, b, c)                                                    \
1045*f9790aebSLuigi Rizzo do {                                                                    \
1046*f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 13);                                 \
1047*f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 8);                                  \
1048*f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 13);                                 \
1049*f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 12);                                 \
1050*f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 16);                                 \
1051*f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 5);                                  \
1052*f9790aebSLuigi Rizzo         a -= b; a -= c; a ^= (c >> 3);                                  \
1053*f9790aebSLuigi Rizzo         b -= c; b -= a; b ^= (a << 10);                                 \
1054*f9790aebSLuigi Rizzo         c -= a; c -= b; c ^= (b >> 15);                                 \
1055*f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0)
1056*f9790aebSLuigi Rizzo 
1057*f9790aebSLuigi Rizzo static __inline uint32_t
1058*f9790aebSLuigi Rizzo nm_bridge_rthash(const uint8_t *addr)
1059*f9790aebSLuigi Rizzo {
1060*f9790aebSLuigi Rizzo         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1061*f9790aebSLuigi Rizzo 
1062*f9790aebSLuigi Rizzo         b += addr[5] << 8;
1063*f9790aebSLuigi Rizzo         b += addr[4];
1064*f9790aebSLuigi Rizzo         a += addr[3] << 24;
1065*f9790aebSLuigi Rizzo         a += addr[2] << 16;
1066*f9790aebSLuigi Rizzo         a += addr[1] << 8;
1067*f9790aebSLuigi Rizzo         a += addr[0];
1068*f9790aebSLuigi Rizzo 
1069*f9790aebSLuigi Rizzo         mix(a, b, c);
1070*f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1071*f9790aebSLuigi Rizzo         return (c & BRIDGE_RTHASH_MASK);
1072*f9790aebSLuigi Rizzo }
1073*f9790aebSLuigi Rizzo 
1074*f9790aebSLuigi Rizzo #undef mix
1075*f9790aebSLuigi Rizzo 
1076*f9790aebSLuigi Rizzo 
1077*f9790aebSLuigi Rizzo static int
1078*f9790aebSLuigi Rizzo bdg_netmap_reg(struct netmap_adapter *na, int onoff)
1079*f9790aebSLuigi Rizzo {
1080*f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna =
1081*f9790aebSLuigi Rizzo 		(struct netmap_vp_adapter*)na;
1082*f9790aebSLuigi Rizzo 	struct ifnet *ifp = na->ifp;
1083*f9790aebSLuigi Rizzo 
1084*f9790aebSLuigi Rizzo 	/* the interface is already attached to the bridge,
1085*f9790aebSLuigi Rizzo 	 * so we only need to toggle IFCAP_NETMAP.
1086*f9790aebSLuigi Rizzo 	 */
1087*f9790aebSLuigi Rizzo 	BDG_WLOCK(vpna->na_bdg);
1088*f9790aebSLuigi Rizzo 	if (onoff) {
1089*f9790aebSLuigi Rizzo 		ifp->if_capenable |= IFCAP_NETMAP;
1090*f9790aebSLuigi Rizzo 	} else {
1091*f9790aebSLuigi Rizzo 		ifp->if_capenable &= ~IFCAP_NETMAP;
1092*f9790aebSLuigi Rizzo 	}
1093*f9790aebSLuigi Rizzo 	BDG_WUNLOCK(vpna->na_bdg);
1094*f9790aebSLuigi Rizzo 	return 0;
1095*f9790aebSLuigi Rizzo }
1096*f9790aebSLuigi Rizzo 
1097*f9790aebSLuigi Rizzo 
1098*f9790aebSLuigi Rizzo /*
1099*f9790aebSLuigi Rizzo  * Lookup function for a learning bridge.
1100*f9790aebSLuigi Rizzo  * Update the hash table with the source address,
1101*f9790aebSLuigi Rizzo  * and then returns the destination port index, and the
1102*f9790aebSLuigi Rizzo  * ring in *dst_ring (at the moment, always use ring 0)
1103*f9790aebSLuigi Rizzo  */
1104*f9790aebSLuigi Rizzo u_int
1105*f9790aebSLuigi Rizzo netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1106*f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *na)
1107*f9790aebSLuigi Rizzo {
1108*f9790aebSLuigi Rizzo 	struct nm_hash_ent *ht = na->na_bdg->ht;
1109*f9790aebSLuigi Rizzo 	uint32_t sh, dh;
1110*f9790aebSLuigi Rizzo 	u_int dst, mysrc = na->bdg_port;
1111*f9790aebSLuigi Rizzo 	uint64_t smac, dmac;
1112*f9790aebSLuigi Rizzo 
1113*f9790aebSLuigi Rizzo 	if (buf_len < 14) {
1114*f9790aebSLuigi Rizzo 		D("invalid buf length %d", buf_len);
1115*f9790aebSLuigi Rizzo 		return NM_BDG_NOPORT;
1116*f9790aebSLuigi Rizzo 	}
1117*f9790aebSLuigi Rizzo 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1118*f9790aebSLuigi Rizzo 	smac = le64toh(*(uint64_t *)(buf + 4));
1119*f9790aebSLuigi Rizzo 	smac >>= 16;
1120*f9790aebSLuigi Rizzo 
1121*f9790aebSLuigi Rizzo 	/*
1122*f9790aebSLuigi Rizzo 	 * The hash is somewhat expensive, there might be some
1123*f9790aebSLuigi Rizzo 	 * worthwhile optimizations here.
1124*f9790aebSLuigi Rizzo 	 */
1125*f9790aebSLuigi Rizzo 	if ((buf[6] & 1) == 0) { /* valid src */
1126*f9790aebSLuigi Rizzo 		uint8_t *s = buf+6;
1127*f9790aebSLuigi Rizzo 		sh = nm_bridge_rthash(s); // XXX hash of source
1128*f9790aebSLuigi Rizzo 		/* update source port forwarding entry */
1129*f9790aebSLuigi Rizzo 		ht[sh].mac = smac;	/* XXX expire ? */
1130*f9790aebSLuigi Rizzo 		ht[sh].ports = mysrc;
1131*f9790aebSLuigi Rizzo 		if (netmap_verbose)
1132*f9790aebSLuigi Rizzo 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1133*f9790aebSLuigi Rizzo 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1134*f9790aebSLuigi Rizzo 	}
1135*f9790aebSLuigi Rizzo 	dst = NM_BDG_BROADCAST;
1136*f9790aebSLuigi Rizzo 	if ((buf[0] & 1) == 0) { /* unicast */
1137*f9790aebSLuigi Rizzo 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1138*f9790aebSLuigi Rizzo 		if (ht[dh].mac == dmac) {	/* found dst */
1139*f9790aebSLuigi Rizzo 			dst = ht[dh].ports;
1140*f9790aebSLuigi Rizzo 		}
1141*f9790aebSLuigi Rizzo 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1142*f9790aebSLuigi Rizzo 	}
1143*f9790aebSLuigi Rizzo 	*dst_ring = 0;
1144*f9790aebSLuigi Rizzo 	return dst;
1145*f9790aebSLuigi Rizzo }
1146*f9790aebSLuigi Rizzo 
1147*f9790aebSLuigi Rizzo 
1148*f9790aebSLuigi Rizzo /*
1149*f9790aebSLuigi Rizzo  * This flush routine supports only unicast and broadcast but a large
1150*f9790aebSLuigi Rizzo  * number of ports, and lets us replace the learn and dispatch functions.
1151*f9790aebSLuigi Rizzo  */
1152*f9790aebSLuigi Rizzo int
1153*f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1154*f9790aebSLuigi Rizzo 		u_int ring_nr)
1155*f9790aebSLuigi Rizzo {
1156*f9790aebSLuigi Rizzo 	struct nm_bdg_q *dst_ents, *brddst;
1157*f9790aebSLuigi Rizzo 	uint16_t num_dsts = 0, *dsts;
1158*f9790aebSLuigi Rizzo 	struct nm_bridge *b = na->na_bdg;
1159*f9790aebSLuigi Rizzo 	u_int i, j, me = na->bdg_port;
1160*f9790aebSLuigi Rizzo 
1161*f9790aebSLuigi Rizzo 	/*
1162*f9790aebSLuigi Rizzo 	 * The work area (pointed by ft) is followed by an array of
1163*f9790aebSLuigi Rizzo 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1164*f9790aebSLuigi Rizzo 	 * queues per port plus one for the broadcast traffic.
1165*f9790aebSLuigi Rizzo 	 * Then we have an array of destination indexes.
1166*f9790aebSLuigi Rizzo 	 */
1167*f9790aebSLuigi Rizzo 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1168*f9790aebSLuigi Rizzo 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1169*f9790aebSLuigi Rizzo 
1170*f9790aebSLuigi Rizzo 	/* first pass: find a destination for each packet in the batch */
1171*f9790aebSLuigi Rizzo 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1172*f9790aebSLuigi Rizzo 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1173*f9790aebSLuigi Rizzo 		uint16_t dst_port, d_i;
1174*f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1175*f9790aebSLuigi Rizzo 		uint8_t *buf = ft[i].ft_buf;
1176*f9790aebSLuigi Rizzo 		u_int len = ft[i].ft_len;
1177*f9790aebSLuigi Rizzo 
1178*f9790aebSLuigi Rizzo 		ND("slot %d frags %d", i, ft[i].ft_frags);
1179*f9790aebSLuigi Rizzo 		/* Drop the packet if the offset is not into the first
1180*f9790aebSLuigi Rizzo 		   fragment nor at the very beginning of the second. */
1181*f9790aebSLuigi Rizzo 		if (unlikely(na->offset > len))
1182*f9790aebSLuigi Rizzo 			continue;
1183*f9790aebSLuigi Rizzo 		if (len == na->offset) {
1184*f9790aebSLuigi Rizzo 			buf = ft[i+1].ft_buf;
1185*f9790aebSLuigi Rizzo 			len = ft[i+1].ft_len;
1186*f9790aebSLuigi Rizzo 		} else {
1187*f9790aebSLuigi Rizzo 			buf += na->offset;
1188*f9790aebSLuigi Rizzo 			len -= na->offset;
1189*f9790aebSLuigi Rizzo 		}
1190*f9790aebSLuigi Rizzo 		dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na);
1191*f9790aebSLuigi Rizzo 		if (netmap_verbose > 255)
1192*f9790aebSLuigi Rizzo 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1193*f9790aebSLuigi Rizzo 		if (dst_port == NM_BDG_NOPORT)
1194*f9790aebSLuigi Rizzo 			continue; /* this packet is identified to be dropped */
1195*f9790aebSLuigi Rizzo 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1196*f9790aebSLuigi Rizzo 			continue;
1197*f9790aebSLuigi Rizzo 		else if (dst_port == NM_BDG_BROADCAST)
1198*f9790aebSLuigi Rizzo 			dst_ring = 0; /* broadcasts always go to ring 0 */
1199*f9790aebSLuigi Rizzo 		else if (unlikely(dst_port == me ||
1200*f9790aebSLuigi Rizzo 		    !b->bdg_ports[dst_port]))
1201*f9790aebSLuigi Rizzo 			continue;
1202*f9790aebSLuigi Rizzo 
1203*f9790aebSLuigi Rizzo 		/* get a position in the scratch pad */
1204*f9790aebSLuigi Rizzo 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1205*f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1206*f9790aebSLuigi Rizzo 
1207*f9790aebSLuigi Rizzo 		/* append the first fragment to the list */
1208*f9790aebSLuigi Rizzo 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1209*f9790aebSLuigi Rizzo 			d->bq_head = d->bq_tail = i;
1210*f9790aebSLuigi Rizzo 			/* remember this position to be scanned later */
1211*f9790aebSLuigi Rizzo 			if (dst_port != NM_BDG_BROADCAST)
1212*f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1213*f9790aebSLuigi Rizzo 		} else {
1214*f9790aebSLuigi Rizzo 			ft[d->bq_tail].ft_next = i;
1215*f9790aebSLuigi Rizzo 			d->bq_tail = i;
1216*f9790aebSLuigi Rizzo 		}
1217*f9790aebSLuigi Rizzo 		d->bq_len += ft[i].ft_frags;
1218*f9790aebSLuigi Rizzo 	}
1219*f9790aebSLuigi Rizzo 
1220*f9790aebSLuigi Rizzo 	/*
1221*f9790aebSLuigi Rizzo 	 * Broadcast traffic goes to ring 0 on all destinations.
1222*f9790aebSLuigi Rizzo 	 * So we need to add these rings to the list of ports to scan.
1223*f9790aebSLuigi Rizzo 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1224*f9790aebSLuigi Rizzo 	 * expensive. We should keep a compact list of active destinations
1225*f9790aebSLuigi Rizzo 	 * so we could shorten this loop.
1226*f9790aebSLuigi Rizzo 	 */
1227*f9790aebSLuigi Rizzo 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1228*f9790aebSLuigi Rizzo 	if (brddst->bq_head != NM_FT_NULL) {
1229*f9790aebSLuigi Rizzo 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1230*f9790aebSLuigi Rizzo 			uint16_t d_i;
1231*f9790aebSLuigi Rizzo 			i = b->bdg_port_index[j];
1232*f9790aebSLuigi Rizzo 			if (unlikely(i == me))
1233*f9790aebSLuigi Rizzo 				continue;
1234*f9790aebSLuigi Rizzo 			d_i = i * NM_BDG_MAXRINGS;
1235*f9790aebSLuigi Rizzo 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1236*f9790aebSLuigi Rizzo 				dsts[num_dsts++] = d_i;
1237*f9790aebSLuigi Rizzo 		}
1238*f9790aebSLuigi Rizzo 	}
1239*f9790aebSLuigi Rizzo 
1240*f9790aebSLuigi Rizzo 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1241*f9790aebSLuigi Rizzo 	/* second pass: scan destinations (XXX will be modular somehow) */
1242*f9790aebSLuigi Rizzo 	for (i = 0; i < num_dsts; i++) {
1243*f9790aebSLuigi Rizzo 		struct ifnet *dst_ifp;
1244*f9790aebSLuigi Rizzo 		struct netmap_vp_adapter *dst_na;
1245*f9790aebSLuigi Rizzo 		struct netmap_kring *kring;
1246*f9790aebSLuigi Rizzo 		struct netmap_ring *ring;
1247*f9790aebSLuigi Rizzo 		u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next;
1248*f9790aebSLuigi Rizzo 		u_int needed, howmany;
1249*f9790aebSLuigi Rizzo 		int retry = netmap_txsync_retry;
1250*f9790aebSLuigi Rizzo 		struct nm_bdg_q *d;
1251*f9790aebSLuigi Rizzo 		uint32_t my_start = 0, lease_idx = 0;
1252*f9790aebSLuigi Rizzo 		int nrings;
1253*f9790aebSLuigi Rizzo 		int offset_mismatch;
1254*f9790aebSLuigi Rizzo 
1255*f9790aebSLuigi Rizzo 		d_i = dsts[i];
1256*f9790aebSLuigi Rizzo 		ND("second pass %d port %d", i, d_i);
1257*f9790aebSLuigi Rizzo 		d = dst_ents + d_i;
1258*f9790aebSLuigi Rizzo 		// XXX fix the division
1259*f9790aebSLuigi Rizzo 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1260*f9790aebSLuigi Rizzo 		/* protect from the lookup function returning an inactive
1261*f9790aebSLuigi Rizzo 		 * destination port
1262*f9790aebSLuigi Rizzo 		 */
1263*f9790aebSLuigi Rizzo 		if (unlikely(dst_na == NULL))
1264*f9790aebSLuigi Rizzo 			goto cleanup;
1265*f9790aebSLuigi Rizzo 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1266*f9790aebSLuigi Rizzo 			goto cleanup;
1267*f9790aebSLuigi Rizzo 		dst_ifp = dst_na->up.ifp;
1268*f9790aebSLuigi Rizzo 		/*
1269*f9790aebSLuigi Rizzo 		 * The interface may be in !netmap mode in two cases:
1270*f9790aebSLuigi Rizzo 		 * - when na is attached but not activated yet;
1271*f9790aebSLuigi Rizzo 		 * - when na is being deactivated but is still attached.
1272*f9790aebSLuigi Rizzo 		 */
1273*f9790aebSLuigi Rizzo 		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1274*f9790aebSLuigi Rizzo 			ND("not in netmap mode!");
1275*f9790aebSLuigi Rizzo 			goto cleanup;
1276*f9790aebSLuigi Rizzo 		}
1277*f9790aebSLuigi Rizzo 
1278*f9790aebSLuigi Rizzo 		offset_mismatch = (dst_na->offset != na->offset);
1279*f9790aebSLuigi Rizzo 
1280*f9790aebSLuigi Rizzo 		/* there is at least one either unicast or broadcast packet */
1281*f9790aebSLuigi Rizzo 		brd_next = brddst->bq_head;
1282*f9790aebSLuigi Rizzo 		next = d->bq_head;
1283*f9790aebSLuigi Rizzo 		/* we need to reserve this many slots. If fewer are
1284*f9790aebSLuigi Rizzo 		 * available, some packets will be dropped.
1285*f9790aebSLuigi Rizzo 		 * Packets may have multiple fragments, so we may not use
1286*f9790aebSLuigi Rizzo 		 * there is a chance that we may not use all of the slots
1287*f9790aebSLuigi Rizzo 		 * we have claimed, so we will need to handle the leftover
1288*f9790aebSLuigi Rizzo 		 * ones when we regain the lock.
1289*f9790aebSLuigi Rizzo 		 */
1290*f9790aebSLuigi Rizzo 		needed = d->bq_len + brddst->bq_len;
1291*f9790aebSLuigi Rizzo 
1292*f9790aebSLuigi Rizzo 		ND(5, "pass 2 dst %d is %x %s",
1293*f9790aebSLuigi Rizzo 			i, d_i, is_vp ? "virtual" : "nic/host");
1294*f9790aebSLuigi Rizzo 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1295*f9790aebSLuigi Rizzo 		nrings = dst_na->up.num_rx_rings;
1296*f9790aebSLuigi Rizzo 		if (dst_nr >= nrings)
1297*f9790aebSLuigi Rizzo 			dst_nr = dst_nr % nrings;
1298*f9790aebSLuigi Rizzo 		kring = &dst_na->up.rx_rings[dst_nr];
1299*f9790aebSLuigi Rizzo 		ring = kring->ring;
1300*f9790aebSLuigi Rizzo 		lim = kring->nkr_num_slots - 1;
1301*f9790aebSLuigi Rizzo 
1302*f9790aebSLuigi Rizzo retry:
1303*f9790aebSLuigi Rizzo 
1304*f9790aebSLuigi Rizzo 		/* reserve the buffers in the queue and an entry
1305*f9790aebSLuigi Rizzo 		 * to report completion, and drop lock.
1306*f9790aebSLuigi Rizzo 		 * XXX this might become a helper function.
1307*f9790aebSLuigi Rizzo 		 */
1308*f9790aebSLuigi Rizzo 		mtx_lock(&kring->q_lock);
1309*f9790aebSLuigi Rizzo 		if (kring->nkr_stopped) {
1310*f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1311*f9790aebSLuigi Rizzo 			goto cleanup;
1312*f9790aebSLuigi Rizzo 		}
1313*f9790aebSLuigi Rizzo 		if (dst_na->retry) {
1314*f9790aebSLuigi Rizzo 			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1315*f9790aebSLuigi Rizzo 		}
1316*f9790aebSLuigi Rizzo 		my_start = j = kring->nkr_hwlease;
1317*f9790aebSLuigi Rizzo 		howmany = nm_kr_space(kring, 1);
1318*f9790aebSLuigi Rizzo 		if (needed < howmany)
1319*f9790aebSLuigi Rizzo 			howmany = needed;
1320*f9790aebSLuigi Rizzo 		lease_idx = nm_kr_lease(kring, howmany, 1);
1321*f9790aebSLuigi Rizzo 		mtx_unlock(&kring->q_lock);
1322*f9790aebSLuigi Rizzo 
1323*f9790aebSLuigi Rizzo 		/* only retry if we need more than available slots */
1324*f9790aebSLuigi Rizzo 		if (retry && needed <= howmany)
1325*f9790aebSLuigi Rizzo 			retry = 0;
1326*f9790aebSLuigi Rizzo 
1327*f9790aebSLuigi Rizzo 		/* copy to the destination queue */
1328*f9790aebSLuigi Rizzo 		while (howmany > 0) {
1329*f9790aebSLuigi Rizzo 			struct netmap_slot *slot;
1330*f9790aebSLuigi Rizzo 			struct nm_bdg_fwd *ft_p, *ft_end;
1331*f9790aebSLuigi Rizzo 			u_int cnt;
1332*f9790aebSLuigi Rizzo 			int fix_mismatch = offset_mismatch;
1333*f9790aebSLuigi Rizzo 
1334*f9790aebSLuigi Rizzo 			/* find the queue from which we pick next packet.
1335*f9790aebSLuigi Rizzo 			 * NM_FT_NULL is always higher than valid indexes
1336*f9790aebSLuigi Rizzo 			 * so we never dereference it if the other list
1337*f9790aebSLuigi Rizzo 			 * has packets (and if both are empty we never
1338*f9790aebSLuigi Rizzo 			 * get here).
1339*f9790aebSLuigi Rizzo 			 */
1340*f9790aebSLuigi Rizzo 			if (next < brd_next) {
1341*f9790aebSLuigi Rizzo 				ft_p = ft + next;
1342*f9790aebSLuigi Rizzo 				next = ft_p->ft_next;
1343*f9790aebSLuigi Rizzo 			} else { /* insert broadcast */
1344*f9790aebSLuigi Rizzo 				ft_p = ft + brd_next;
1345*f9790aebSLuigi Rizzo 				brd_next = ft_p->ft_next;
1346*f9790aebSLuigi Rizzo 			}
1347*f9790aebSLuigi Rizzo 			cnt = ft_p->ft_frags; // cnt > 0
1348*f9790aebSLuigi Rizzo 			if (unlikely(cnt > howmany))
1349*f9790aebSLuigi Rizzo 			    break; /* no more space */
1350*f9790aebSLuigi Rizzo 			howmany -= cnt;
1351*f9790aebSLuigi Rizzo 			if (netmap_verbose && cnt > 1)
1352*f9790aebSLuigi Rizzo 				RD(5, "rx %d frags to %d", cnt, j);
1353*f9790aebSLuigi Rizzo 			ft_end = ft_p + cnt;
1354*f9790aebSLuigi Rizzo 			do {
1355*f9790aebSLuigi Rizzo 			    char *dst, *src = ft_p->ft_buf;
1356*f9790aebSLuigi Rizzo 			    size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1357*f9790aebSLuigi Rizzo 
1358*f9790aebSLuigi Rizzo 			    slot = &ring->slot[j];
1359*f9790aebSLuigi Rizzo 			    dst = BDG_NMB(&dst_na->up, slot);
1360*f9790aebSLuigi Rizzo 
1361*f9790aebSLuigi Rizzo 			    if (unlikely(fix_mismatch)) {
1362*f9790aebSLuigi Rizzo 				if (na->offset > dst_na->offset) {
1363*f9790aebSLuigi Rizzo 					src += na->offset - dst_na->offset;
1364*f9790aebSLuigi Rizzo 					copy_len -= na->offset - dst_na->offset;
1365*f9790aebSLuigi Rizzo 					dst_len = copy_len;
1366*f9790aebSLuigi Rizzo 				} else {
1367*f9790aebSLuigi Rizzo 					bzero(dst, dst_na->offset - na->offset);
1368*f9790aebSLuigi Rizzo 					dst_len += dst_na->offset - na->offset;
1369*f9790aebSLuigi Rizzo 					dst += dst_na->offset - na->offset;
1370*f9790aebSLuigi Rizzo 				}
1371*f9790aebSLuigi Rizzo 				/* fix the first fragment only */
1372*f9790aebSLuigi Rizzo 				fix_mismatch = 0;
1373*f9790aebSLuigi Rizzo 				/* completely skip an header only fragment */
1374*f9790aebSLuigi Rizzo 				if (copy_len == 0) {
1375*f9790aebSLuigi Rizzo 					ft_p++;
1376*f9790aebSLuigi Rizzo 					continue;
1377*f9790aebSLuigi Rizzo 				}
1378*f9790aebSLuigi Rizzo 			    }
1379*f9790aebSLuigi Rizzo 			    /* round to a multiple of 64 */
1380*f9790aebSLuigi Rizzo 			    copy_len = (copy_len + 63) & ~63;
1381*f9790aebSLuigi Rizzo 
1382*f9790aebSLuigi Rizzo 			    ND("send %d %d bytes at %s:%d",
1383*f9790aebSLuigi Rizzo 				i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j);
1384*f9790aebSLuigi Rizzo 			    if (ft_p->ft_flags & NS_INDIRECT) {
1385*f9790aebSLuigi Rizzo 				if (copyin(src, dst, copy_len)) {
1386*f9790aebSLuigi Rizzo 					// invalid user pointer, pretend len is 0
1387*f9790aebSLuigi Rizzo 					dst_len = 0;
1388*f9790aebSLuigi Rizzo 				}
1389*f9790aebSLuigi Rizzo 			    } else {
1390*f9790aebSLuigi Rizzo 				//memcpy(dst, src, copy_len);
1391*f9790aebSLuigi Rizzo 				pkt_copy(src, dst, (int)copy_len);
1392*f9790aebSLuigi Rizzo 			    }
1393*f9790aebSLuigi Rizzo 			    slot->len = dst_len;
1394*f9790aebSLuigi Rizzo 			    slot->flags = (cnt << 8)| NS_MOREFRAG;
1395*f9790aebSLuigi Rizzo 			    j = nm_next(j, lim);
1396*f9790aebSLuigi Rizzo 			    ft_p++;
1397*f9790aebSLuigi Rizzo 			    sent++;
1398*f9790aebSLuigi Rizzo 			} while (ft_p != ft_end);
1399*f9790aebSLuigi Rizzo 			slot->flags = (cnt << 8); /* clear flag on last entry */
1400*f9790aebSLuigi Rizzo 			/* are we done ? */
1401*f9790aebSLuigi Rizzo 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1402*f9790aebSLuigi Rizzo 				break;
1403*f9790aebSLuigi Rizzo 		}
1404*f9790aebSLuigi Rizzo 		{
1405*f9790aebSLuigi Rizzo 		    /* current position */
1406*f9790aebSLuigi Rizzo 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1407*f9790aebSLuigi Rizzo 		    uint32_t update_pos;
1408*f9790aebSLuigi Rizzo 		    int still_locked = 1;
1409*f9790aebSLuigi Rizzo 
1410*f9790aebSLuigi Rizzo 		    mtx_lock(&kring->q_lock);
1411*f9790aebSLuigi Rizzo 		    if (unlikely(howmany > 0)) {
1412*f9790aebSLuigi Rizzo 			/* not used all bufs. If i am the last one
1413*f9790aebSLuigi Rizzo 			 * i can recover the slots, otherwise must
1414*f9790aebSLuigi Rizzo 			 * fill them with 0 to mark empty packets.
1415*f9790aebSLuigi Rizzo 			 */
1416*f9790aebSLuigi Rizzo 			ND("leftover %d bufs", howmany);
1417*f9790aebSLuigi Rizzo 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1418*f9790aebSLuigi Rizzo 			    /* yes i am the last one */
1419*f9790aebSLuigi Rizzo 			    ND("roll back nkr_hwlease to %d", j);
1420*f9790aebSLuigi Rizzo 			    kring->nkr_hwlease = j;
1421*f9790aebSLuigi Rizzo 			} else {
1422*f9790aebSLuigi Rizzo 			    while (howmany-- > 0) {
1423*f9790aebSLuigi Rizzo 				ring->slot[j].len = 0;
1424*f9790aebSLuigi Rizzo 				ring->slot[j].flags = 0;
1425*f9790aebSLuigi Rizzo 				j = nm_next(j, lim);
1426*f9790aebSLuigi Rizzo 			    }
1427*f9790aebSLuigi Rizzo 			}
1428*f9790aebSLuigi Rizzo 		    }
1429*f9790aebSLuigi Rizzo 		    p[lease_idx] = j; /* report I am done */
1430*f9790aebSLuigi Rizzo 
1431*f9790aebSLuigi Rizzo 		    update_pos = nm_kr_rxpos(kring);
1432*f9790aebSLuigi Rizzo 
1433*f9790aebSLuigi Rizzo 		    if (my_start == update_pos) {
1434*f9790aebSLuigi Rizzo 			/* all slots before my_start have been reported,
1435*f9790aebSLuigi Rizzo 			 * so scan subsequent leases to see if other ranges
1436*f9790aebSLuigi Rizzo 			 * have been completed, and to a selwakeup or txsync.
1437*f9790aebSLuigi Rizzo 		         */
1438*f9790aebSLuigi Rizzo 			while (lease_idx != kring->nkr_lease_idx &&
1439*f9790aebSLuigi Rizzo 				p[lease_idx] != NR_NOSLOT) {
1440*f9790aebSLuigi Rizzo 			    j = p[lease_idx];
1441*f9790aebSLuigi Rizzo 			    p[lease_idx] = NR_NOSLOT;
1442*f9790aebSLuigi Rizzo 			    lease_idx = nm_next(lease_idx, lim);
1443*f9790aebSLuigi Rizzo 			}
1444*f9790aebSLuigi Rizzo 			/* j is the new 'write' position. j != my_start
1445*f9790aebSLuigi Rizzo 			 * means there are new buffers to report
1446*f9790aebSLuigi Rizzo 			 */
1447*f9790aebSLuigi Rizzo 			if (likely(j != my_start)) {
1448*f9790aebSLuigi Rizzo 				uint32_t old_avail = kring->nr_hwavail;
1449*f9790aebSLuigi Rizzo 
1450*f9790aebSLuigi Rizzo 				kring->nr_hwavail = (j >= kring->nr_hwcur) ?
1451*f9790aebSLuigi Rizzo 					j - kring->nr_hwcur :
1452*f9790aebSLuigi Rizzo 					j + lim + 1 - kring->nr_hwcur;
1453*f9790aebSLuigi Rizzo 				if (kring->nr_hwavail < old_avail) {
1454*f9790aebSLuigi Rizzo 					D("avail shrink %d -> %d",
1455*f9790aebSLuigi Rizzo 						old_avail, kring->nr_hwavail);
1456*f9790aebSLuigi Rizzo 				}
1457*f9790aebSLuigi Rizzo 				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1458*f9790aebSLuigi Rizzo 				still_locked = 0;
1459*f9790aebSLuigi Rizzo 				mtx_unlock(&kring->q_lock);
1460*f9790aebSLuigi Rizzo 				if (dst_na->retry && retry--)
1461*f9790aebSLuigi Rizzo 					goto retry;
1462*f9790aebSLuigi Rizzo 			}
1463*f9790aebSLuigi Rizzo 		    }
1464*f9790aebSLuigi Rizzo 		    if (still_locked)
1465*f9790aebSLuigi Rizzo 			mtx_unlock(&kring->q_lock);
1466*f9790aebSLuigi Rizzo 		}
1467*f9790aebSLuigi Rizzo cleanup:
1468*f9790aebSLuigi Rizzo 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1469*f9790aebSLuigi Rizzo 		d->bq_len = 0;
1470*f9790aebSLuigi Rizzo 	}
1471*f9790aebSLuigi Rizzo 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1472*f9790aebSLuigi Rizzo 	brddst->bq_len = 0;
1473*f9790aebSLuigi Rizzo 	return 0;
1474*f9790aebSLuigi Rizzo }
1475*f9790aebSLuigi Rizzo 
1476*f9790aebSLuigi Rizzo static int
1477*f9790aebSLuigi Rizzo netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1478*f9790aebSLuigi Rizzo {
1479*f9790aebSLuigi Rizzo 	struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
1480*f9790aebSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
1481*f9790aebSLuigi Rizzo 	u_int j, k, lim = kring->nkr_num_slots - 1;
1482*f9790aebSLuigi Rizzo 
1483*f9790aebSLuigi Rizzo 	k = ring->cur;
1484*f9790aebSLuigi Rizzo 	if (k > lim)
1485*f9790aebSLuigi Rizzo 		return netmap_ring_reinit(kring);
1486*f9790aebSLuigi Rizzo 
1487*f9790aebSLuigi Rizzo 	if (bridge_batch <= 0) { /* testing only */
1488*f9790aebSLuigi Rizzo 		j = k; // used all
1489*f9790aebSLuigi Rizzo 		goto done;
1490*f9790aebSLuigi Rizzo 	}
1491*f9790aebSLuigi Rizzo 	if (bridge_batch > NM_BDG_BATCH)
1492*f9790aebSLuigi Rizzo 		bridge_batch = NM_BDG_BATCH;
1493*f9790aebSLuigi Rizzo 
1494*f9790aebSLuigi Rizzo 	j = nm_bdg_preflush(na, ring_nr, kring, k);
1495*f9790aebSLuigi Rizzo 	if (j != k)
1496*f9790aebSLuigi Rizzo 		D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
1497*f9790aebSLuigi Rizzo 	/* k-j modulo ring size is the number of slots processed */
1498*f9790aebSLuigi Rizzo 	if (k < j)
1499*f9790aebSLuigi Rizzo 		k += kring->nkr_num_slots;
1500*f9790aebSLuigi Rizzo 	kring->nr_hwavail = lim - (k - j);
1501*f9790aebSLuigi Rizzo 
1502*f9790aebSLuigi Rizzo done:
1503*f9790aebSLuigi Rizzo 	kring->nr_hwcur = j;
1504*f9790aebSLuigi Rizzo 	ring->avail = kring->nr_hwavail;
1505*f9790aebSLuigi Rizzo 	if (netmap_verbose)
1506*f9790aebSLuigi Rizzo 		D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1507*f9790aebSLuigi Rizzo 	return 0;
1508*f9790aebSLuigi Rizzo }
1509*f9790aebSLuigi Rizzo 
1510*f9790aebSLuigi Rizzo 
1511*f9790aebSLuigi Rizzo /*
1512*f9790aebSLuigi Rizzo  * main dispatch routine for the bridge.
1513*f9790aebSLuigi Rizzo  * We already know that only one thread is running this.
1514*f9790aebSLuigi Rizzo  * we must run nm_bdg_preflush without lock.
1515*f9790aebSLuigi Rizzo  */
1516*f9790aebSLuigi Rizzo static int
1517*f9790aebSLuigi Rizzo bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1518*f9790aebSLuigi Rizzo {
1519*f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1520*f9790aebSLuigi Rizzo 	return netmap_vp_txsync(vpna, ring_nr, flags);
1521*f9790aebSLuigi Rizzo }
1522*f9790aebSLuigi Rizzo 
1523*f9790aebSLuigi Rizzo 
1524*f9790aebSLuigi Rizzo /*
1525*f9790aebSLuigi Rizzo  * user process reading from a VALE switch.
1526*f9790aebSLuigi Rizzo  * Already protected against concurrent calls from userspace,
1527*f9790aebSLuigi Rizzo  * but we must acquire the queue's lock to protect against
1528*f9790aebSLuigi Rizzo  * writers on the same queue.
1529*f9790aebSLuigi Rizzo  */
1530*f9790aebSLuigi Rizzo static int
1531*f9790aebSLuigi Rizzo bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1532*f9790aebSLuigi Rizzo {
1533*f9790aebSLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1534*f9790aebSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
1535*f9790aebSLuigi Rizzo 	u_int j, lim = kring->nkr_num_slots - 1;
1536*f9790aebSLuigi Rizzo 	u_int k = ring->cur, resvd = ring->reserved;
1537*f9790aebSLuigi Rizzo 	int n;
1538*f9790aebSLuigi Rizzo 
1539*f9790aebSLuigi Rizzo 	mtx_lock(&kring->q_lock);
1540*f9790aebSLuigi Rizzo 	if (k > lim) {
1541*f9790aebSLuigi Rizzo 		D("ouch dangerous reset!!!");
1542*f9790aebSLuigi Rizzo 		n = netmap_ring_reinit(kring);
1543*f9790aebSLuigi Rizzo 		goto done;
1544*f9790aebSLuigi Rizzo 	}
1545*f9790aebSLuigi Rizzo 
1546*f9790aebSLuigi Rizzo 	/* skip past packets that userspace has released */
1547*f9790aebSLuigi Rizzo 	j = kring->nr_hwcur;    /* netmap ring index */
1548*f9790aebSLuigi Rizzo 	if (resvd > 0) {
1549*f9790aebSLuigi Rizzo 		if (resvd + ring->avail >= lim + 1) {
1550*f9790aebSLuigi Rizzo 			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
1551*f9790aebSLuigi Rizzo 			ring->reserved = resvd = 0; // XXX panic...
1552*f9790aebSLuigi Rizzo 		}
1553*f9790aebSLuigi Rizzo 		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
1554*f9790aebSLuigi Rizzo 	}
1555*f9790aebSLuigi Rizzo 
1556*f9790aebSLuigi Rizzo 	if (j != k) { /* userspace has released some packets. */
1557*f9790aebSLuigi Rizzo 		n = k - j;
1558*f9790aebSLuigi Rizzo 		if (n < 0)
1559*f9790aebSLuigi Rizzo 			n += kring->nkr_num_slots;
1560*f9790aebSLuigi Rizzo 		ND("userspace releases %d packets", n);
1561*f9790aebSLuigi Rizzo 		for (n = 0; likely(j != k); n++) {
1562*f9790aebSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[j];
1563*f9790aebSLuigi Rizzo 			void *addr = BDG_NMB(na, slot);
1564*f9790aebSLuigi Rizzo 
1565*f9790aebSLuigi Rizzo 			if (addr == netmap_buffer_base) { /* bad buf */
1566*f9790aebSLuigi Rizzo 				D("bad buffer index %d, ignore ?",
1567*f9790aebSLuigi Rizzo 					slot->buf_idx);
1568*f9790aebSLuigi Rizzo 			}
1569*f9790aebSLuigi Rizzo 			slot->flags &= ~NS_BUF_CHANGED;
1570*f9790aebSLuigi Rizzo 			j = nm_next(j, lim);
1571*f9790aebSLuigi Rizzo 		}
1572*f9790aebSLuigi Rizzo 		kring->nr_hwavail -= n;
1573*f9790aebSLuigi Rizzo 		kring->nr_hwcur = k;
1574*f9790aebSLuigi Rizzo 	}
1575*f9790aebSLuigi Rizzo 	/* tell userspace that there are new packets */
1576*f9790aebSLuigi Rizzo 	ring->avail = kring->nr_hwavail - resvd;
1577*f9790aebSLuigi Rizzo 	n = 0;
1578*f9790aebSLuigi Rizzo done:
1579*f9790aebSLuigi Rizzo 	mtx_unlock(&kring->q_lock);
1580*f9790aebSLuigi Rizzo 	return n;
1581*f9790aebSLuigi Rizzo }
1582*f9790aebSLuigi Rizzo 
1583*f9790aebSLuigi Rizzo static int
1584*f9790aebSLuigi Rizzo bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
1585*f9790aebSLuigi Rizzo {
1586*f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna;
1587*f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
1588*f9790aebSLuigi Rizzo 	int error;
1589*f9790aebSLuigi Rizzo 
1590*f9790aebSLuigi Rizzo 	vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1591*f9790aebSLuigi Rizzo 	if (vpna == NULL)
1592*f9790aebSLuigi Rizzo 		return ENOMEM;
1593*f9790aebSLuigi Rizzo 
1594*f9790aebSLuigi Rizzo  	na = &vpna->up;
1595*f9790aebSLuigi Rizzo 
1596*f9790aebSLuigi Rizzo 	na->ifp = ifp;
1597*f9790aebSLuigi Rizzo 
1598*f9790aebSLuigi Rizzo 	/* bound checking */
1599*f9790aebSLuigi Rizzo 	na->num_tx_rings = nmr->nr_tx_rings;
1600*f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1601*f9790aebSLuigi Rizzo 	nmr->nr_tx_rings = na->num_tx_rings; // write back
1602*f9790aebSLuigi Rizzo 	na->num_rx_rings = nmr->nr_rx_rings;
1603*f9790aebSLuigi Rizzo 	nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1604*f9790aebSLuigi Rizzo 	nmr->nr_rx_rings = na->num_rx_rings; // write back
1605*f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1606*f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
1607*f9790aebSLuigi Rizzo 	na->num_tx_desc = nmr->nr_tx_slots;
1608*f9790aebSLuigi Rizzo 	nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1609*f9790aebSLuigi Rizzo 			1, NM_BDG_MAXSLOTS, NULL);
1610*f9790aebSLuigi Rizzo 	na->num_rx_desc = nmr->nr_rx_slots;
1611*f9790aebSLuigi Rizzo 	vpna->offset = 0;
1612*f9790aebSLuigi Rizzo 
1613*f9790aebSLuigi Rizzo 	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1614*f9790aebSLuigi Rizzo 	na->nm_txsync = bdg_netmap_txsync;
1615*f9790aebSLuigi Rizzo 	na->nm_rxsync = bdg_netmap_rxsync;
1616*f9790aebSLuigi Rizzo 	na->nm_register = bdg_netmap_reg;
1617*f9790aebSLuigi Rizzo 	na->nm_dtor = netmap_adapter_vp_dtor;
1618*f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_vp_krings_create;
1619*f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_vp_krings_delete;
1620*f9790aebSLuigi Rizzo 	na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp),
1621*f9790aebSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
1622*f9790aebSLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc);
1623*f9790aebSLuigi Rizzo 	/* other nmd fields are set in the common routine */
1624*f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
1625*f9790aebSLuigi Rizzo 	if (error) {
1626*f9790aebSLuigi Rizzo 		free(vpna, M_DEVBUF);
1627*f9790aebSLuigi Rizzo 		return error;
1628*f9790aebSLuigi Rizzo 	}
1629*f9790aebSLuigi Rizzo 	return 0;
1630*f9790aebSLuigi Rizzo }
1631*f9790aebSLuigi Rizzo 
1632*f9790aebSLuigi Rizzo static void
1633*f9790aebSLuigi Rizzo netmap_bwrap_dtor(struct netmap_adapter *na)
1634*f9790aebSLuigi Rizzo {
1635*f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1636*f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1637*f9790aebSLuigi Rizzo 	struct nm_bridge *b = bna->up.na_bdg,
1638*f9790aebSLuigi Rizzo 		*bh = bna->host.na_bdg;
1639*f9790aebSLuigi Rizzo 	struct ifnet *ifp = na->ifp;
1640*f9790aebSLuigi Rizzo 
1641*f9790aebSLuigi Rizzo 	ND("na %p", na);
1642*f9790aebSLuigi Rizzo 
1643*f9790aebSLuigi Rizzo 	if (b) {
1644*f9790aebSLuigi Rizzo 		netmap_bdg_detach_common(b, bna->up.bdg_port,
1645*f9790aebSLuigi Rizzo 			(bh ? bna->host.bdg_port : -1));
1646*f9790aebSLuigi Rizzo 	}
1647*f9790aebSLuigi Rizzo 
1648*f9790aebSLuigi Rizzo 	hwna->na_private = NULL;
1649*f9790aebSLuigi Rizzo 	netmap_adapter_put(hwna);
1650*f9790aebSLuigi Rizzo 
1651*f9790aebSLuigi Rizzo 	bzero(ifp, sizeof(*ifp));
1652*f9790aebSLuigi Rizzo 	free(ifp, M_DEVBUF);
1653*f9790aebSLuigi Rizzo 	na->ifp = NULL;
1654*f9790aebSLuigi Rizzo 
1655*f9790aebSLuigi Rizzo }
1656*f9790aebSLuigi Rizzo 
1657*f9790aebSLuigi Rizzo /*
1658*f9790aebSLuigi Rizzo  * Pass packets from nic to the bridge.
1659*f9790aebSLuigi Rizzo  * XXX TODO check locking: this is called from the interrupt
1660*f9790aebSLuigi Rizzo  * handler so we should make sure that the interface is not
1661*f9790aebSLuigi Rizzo  * disconnected while passing down an interrupt.
1662*f9790aebSLuigi Rizzo  *
1663*f9790aebSLuigi Rizzo  * Note, no user process can access this NIC so we can ignore
1664*f9790aebSLuigi Rizzo  * the info in the 'ring'.
1665*f9790aebSLuigi Rizzo  */
1666*f9790aebSLuigi Rizzo /* callback that overwrites the hwna notify callback.
1667*f9790aebSLuigi Rizzo  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1668*f9790aebSLuigi Rizzo  * The bridge wrapper then sends the packets through the bridge.
1669*f9790aebSLuigi Rizzo  */
1670*f9790aebSLuigi Rizzo static int
1671*f9790aebSLuigi Rizzo netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1672*f9790aebSLuigi Rizzo {
1673*f9790aebSLuigi Rizzo 	struct ifnet *ifp = na->ifp;
1674*f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
1675*f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *hostna = &bna->host;
1676*f9790aebSLuigi Rizzo 	struct netmap_kring *kring, *bkring;
1677*f9790aebSLuigi Rizzo 	struct netmap_ring *ring;
1678*f9790aebSLuigi Rizzo 	int is_host_ring = ring_nr == na->num_rx_rings;
1679*f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *vpna = &bna->up;
1680*f9790aebSLuigi Rizzo 	int error = 0;
1681*f9790aebSLuigi Rizzo 
1682*f9790aebSLuigi Rizzo 	ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags);
1683*f9790aebSLuigi Rizzo 
1684*f9790aebSLuigi Rizzo 	if (flags & NAF_DISABLE_NOTIFY) {
1685*f9790aebSLuigi Rizzo 		kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1686*f9790aebSLuigi Rizzo 		bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
1687*f9790aebSLuigi Rizzo 		if (kring->nkr_stopped)
1688*f9790aebSLuigi Rizzo 			netmap_disable_ring(bkring);
1689*f9790aebSLuigi Rizzo 		else
1690*f9790aebSLuigi Rizzo 			bkring->nkr_stopped = 0;
1691*f9790aebSLuigi Rizzo 		return 0;
1692*f9790aebSLuigi Rizzo 	}
1693*f9790aebSLuigi Rizzo 
1694*f9790aebSLuigi Rizzo 	if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1695*f9790aebSLuigi Rizzo 		return 0;
1696*f9790aebSLuigi Rizzo 
1697*f9790aebSLuigi Rizzo 	if (tx == NR_TX)
1698*f9790aebSLuigi Rizzo 		return 0;
1699*f9790aebSLuigi Rizzo 
1700*f9790aebSLuigi Rizzo 	kring = &na->rx_rings[ring_nr];
1701*f9790aebSLuigi Rizzo 	ring = kring->ring;
1702*f9790aebSLuigi Rizzo 
1703*f9790aebSLuigi Rizzo 	/* make sure the ring is not disabled */
1704*f9790aebSLuigi Rizzo 	if (nm_kr_tryget(kring))
1705*f9790aebSLuigi Rizzo 		return 0;
1706*f9790aebSLuigi Rizzo 
1707*f9790aebSLuigi Rizzo 	if (is_host_ring && hostna->na_bdg == NULL) {
1708*f9790aebSLuigi Rizzo 		error = bna->save_notify(na, ring_nr, tx, flags);
1709*f9790aebSLuigi Rizzo 		goto put_out;
1710*f9790aebSLuigi Rizzo 	}
1711*f9790aebSLuigi Rizzo 
1712*f9790aebSLuigi Rizzo 	if (is_host_ring) {
1713*f9790aebSLuigi Rizzo 		vpna = hostna;
1714*f9790aebSLuigi Rizzo 		ring_nr = 0;
1715*f9790aebSLuigi Rizzo 	} else {
1716*f9790aebSLuigi Rizzo 		/* fetch packets that have arrived.
1717*f9790aebSLuigi Rizzo 		 * XXX maybe do this in a loop ?
1718*f9790aebSLuigi Rizzo 		 */
1719*f9790aebSLuigi Rizzo 		error = na->nm_rxsync(na, ring_nr, 0);
1720*f9790aebSLuigi Rizzo 		if (error)
1721*f9790aebSLuigi Rizzo 			goto put_out;
1722*f9790aebSLuigi Rizzo 	}
1723*f9790aebSLuigi Rizzo 	if (kring->nr_hwavail == 0 && netmap_verbose) {
1724*f9790aebSLuigi Rizzo 		D("how strange, interrupt with no packets on %s",
1725*f9790aebSLuigi Rizzo 			NM_IFPNAME(ifp));
1726*f9790aebSLuigi Rizzo 		goto put_out;
1727*f9790aebSLuigi Rizzo 	}
1728*f9790aebSLuigi Rizzo 	/* XXX avail ? */
1729*f9790aebSLuigi Rizzo 	ring->cur = nm_kr_rxpos(kring);
1730*f9790aebSLuigi Rizzo 	netmap_vp_txsync(vpna, ring_nr, flags);
1731*f9790aebSLuigi Rizzo 
1732*f9790aebSLuigi Rizzo 	if (!is_host_ring)
1733*f9790aebSLuigi Rizzo 		error = na->nm_rxsync(na, ring_nr, 0);
1734*f9790aebSLuigi Rizzo 
1735*f9790aebSLuigi Rizzo put_out:
1736*f9790aebSLuigi Rizzo 	nm_kr_put(kring);
1737*f9790aebSLuigi Rizzo 	return error;
1738*f9790aebSLuigi Rizzo }
1739*f9790aebSLuigi Rizzo 
1740*f9790aebSLuigi Rizzo static int
1741*f9790aebSLuigi Rizzo netmap_bwrap_register(struct netmap_adapter *na, int onoff)
1742*f9790aebSLuigi Rizzo {
1743*f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
1744*f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
1745*f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1746*f9790aebSLuigi Rizzo 	struct netmap_vp_adapter *hostna = &bna->host;
1747*f9790aebSLuigi Rizzo 	int error;
1748*f9790aebSLuigi Rizzo 
1749*f9790aebSLuigi Rizzo 	ND("%s %d", NM_IFPNAME(ifp), onoff);
1750*f9790aebSLuigi Rizzo 
1751*f9790aebSLuigi Rizzo 	if (onoff) {
1752*f9790aebSLuigi Rizzo 		int i;
1753*f9790aebSLuigi Rizzo 
1754*f9790aebSLuigi Rizzo 		hwna->na_lut = na->na_lut;
1755*f9790aebSLuigi Rizzo 		hwna->na_lut_objtotal = na->na_lut_objtotal;
1756*f9790aebSLuigi Rizzo 
1757*f9790aebSLuigi Rizzo 		if (hostna->na_bdg) {
1758*f9790aebSLuigi Rizzo 			hostna->up.na_lut = na->na_lut;
1759*f9790aebSLuigi Rizzo 			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1760*f9790aebSLuigi Rizzo 		}
1761*f9790aebSLuigi Rizzo 
1762*f9790aebSLuigi Rizzo 		/* cross-link the netmap rings */
1763*f9790aebSLuigi Rizzo 		for (i = 0; i <= na->num_tx_rings; i++) {
1764*f9790aebSLuigi Rizzo 			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1765*f9790aebSLuigi Rizzo 			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1766*f9790aebSLuigi Rizzo 		}
1767*f9790aebSLuigi Rizzo 		for (i = 0; i <= na->num_rx_rings; i++) {
1768*f9790aebSLuigi Rizzo 			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1769*f9790aebSLuigi Rizzo 			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1770*f9790aebSLuigi Rizzo 		}
1771*f9790aebSLuigi Rizzo 	}
1772*f9790aebSLuigi Rizzo 
1773*f9790aebSLuigi Rizzo 	if (hwna->ifp) {
1774*f9790aebSLuigi Rizzo 		error = hwna->nm_register(hwna, onoff);
1775*f9790aebSLuigi Rizzo 		if (error)
1776*f9790aebSLuigi Rizzo 			return error;
1777*f9790aebSLuigi Rizzo 	}
1778*f9790aebSLuigi Rizzo 
1779*f9790aebSLuigi Rizzo 	bdg_netmap_reg(na, onoff);
1780*f9790aebSLuigi Rizzo 
1781*f9790aebSLuigi Rizzo 	if (onoff) {
1782*f9790aebSLuigi Rizzo 		bna->save_notify = hwna->nm_notify;
1783*f9790aebSLuigi Rizzo 		hwna->nm_notify = netmap_bwrap_intr_notify;
1784*f9790aebSLuigi Rizzo 	} else {
1785*f9790aebSLuigi Rizzo 		hwna->nm_notify = bna->save_notify;
1786*f9790aebSLuigi Rizzo 		hwna->na_lut = NULL;
1787*f9790aebSLuigi Rizzo 		hwna->na_lut_objtotal = 0;
1788*f9790aebSLuigi Rizzo 	}
1789*f9790aebSLuigi Rizzo 
1790*f9790aebSLuigi Rizzo 	return 0;
1791*f9790aebSLuigi Rizzo }
1792*f9790aebSLuigi Rizzo 
1793*f9790aebSLuigi Rizzo static int
1794*f9790aebSLuigi Rizzo netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1795*f9790aebSLuigi Rizzo 				    u_int *rxr, u_int *rxd)
1796*f9790aebSLuigi Rizzo {
1797*f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
1798*f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
1799*f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1800*f9790aebSLuigi Rizzo 
1801*f9790aebSLuigi Rizzo 	/* forward the request */
1802*f9790aebSLuigi Rizzo 	netmap_update_config(hwna);
1803*f9790aebSLuigi Rizzo 	/* swap the results */
1804*f9790aebSLuigi Rizzo 	*txr = hwna->num_rx_rings;
1805*f9790aebSLuigi Rizzo 	*txd = hwna->num_rx_desc;
1806*f9790aebSLuigi Rizzo 	*rxr = hwna->num_tx_rings;
1807*f9790aebSLuigi Rizzo 	*rxd = hwna->num_rx_desc;
1808*f9790aebSLuigi Rizzo 
1809*f9790aebSLuigi Rizzo 	return 0;
1810*f9790aebSLuigi Rizzo }
1811*f9790aebSLuigi Rizzo 
1812*f9790aebSLuigi Rizzo static int
1813*f9790aebSLuigi Rizzo netmap_bwrap_krings_create(struct netmap_adapter *na)
1814*f9790aebSLuigi Rizzo {
1815*f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
1816*f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
1817*f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1818*f9790aebSLuigi Rizzo 	struct netmap_adapter *hostna = &bna->host.up;
1819*f9790aebSLuigi Rizzo 	int error;
1820*f9790aebSLuigi Rizzo 
1821*f9790aebSLuigi Rizzo 	ND("%s", NM_IFPNAME(na->ifp));
1822*f9790aebSLuigi Rizzo 
1823*f9790aebSLuigi Rizzo 	error = netmap_vp_krings_create(na);
1824*f9790aebSLuigi Rizzo 	if (error)
1825*f9790aebSLuigi Rizzo 		return error;
1826*f9790aebSLuigi Rizzo 
1827*f9790aebSLuigi Rizzo 	error = hwna->nm_krings_create(hwna);
1828*f9790aebSLuigi Rizzo 	if (error) {
1829*f9790aebSLuigi Rizzo 		netmap_vp_krings_delete(na);
1830*f9790aebSLuigi Rizzo 		return error;
1831*f9790aebSLuigi Rizzo 	}
1832*f9790aebSLuigi Rizzo 
1833*f9790aebSLuigi Rizzo 	hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1834*f9790aebSLuigi Rizzo 	hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1835*f9790aebSLuigi Rizzo 
1836*f9790aebSLuigi Rizzo 	return 0;
1837*f9790aebSLuigi Rizzo }
1838*f9790aebSLuigi Rizzo 
1839*f9790aebSLuigi Rizzo static void
1840*f9790aebSLuigi Rizzo netmap_bwrap_krings_delete(struct netmap_adapter *na)
1841*f9790aebSLuigi Rizzo {
1842*f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
1843*f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
1844*f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1845*f9790aebSLuigi Rizzo 
1846*f9790aebSLuigi Rizzo 	ND("%s", NM_IFPNAME(na->ifp));
1847*f9790aebSLuigi Rizzo 
1848*f9790aebSLuigi Rizzo 	hwna->nm_krings_delete(hwna);
1849*f9790aebSLuigi Rizzo 	netmap_vp_krings_delete(na);
1850*f9790aebSLuigi Rizzo }
1851*f9790aebSLuigi Rizzo 
1852*f9790aebSLuigi Rizzo /* notify method for the bridge-->hwna direction */
1853*f9790aebSLuigi Rizzo static int
1854*f9790aebSLuigi Rizzo netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1855*f9790aebSLuigi Rizzo {
1856*f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna =
1857*f9790aebSLuigi Rizzo 		(struct netmap_bwrap_adapter *)na;
1858*f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = bna->hwna;
1859*f9790aebSLuigi Rizzo 	struct netmap_kring *kring, *hw_kring;
1860*f9790aebSLuigi Rizzo 	struct netmap_ring *ring;
1861*f9790aebSLuigi Rizzo 	u_int lim, k;
1862*f9790aebSLuigi Rizzo 	int error = 0;
1863*f9790aebSLuigi Rizzo 
1864*f9790aebSLuigi Rizzo 	if (tx == NR_TX)
1865*f9790aebSLuigi Rizzo 	        return ENXIO;
1866*f9790aebSLuigi Rizzo 
1867*f9790aebSLuigi Rizzo 	kring = &na->rx_rings[ring_n];
1868*f9790aebSLuigi Rizzo 	hw_kring = &hwna->tx_rings[ring_n];
1869*f9790aebSLuigi Rizzo 	ring = kring->ring;
1870*f9790aebSLuigi Rizzo 
1871*f9790aebSLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
1872*f9790aebSLuigi Rizzo 	k = nm_kr_rxpos(kring);
1873*f9790aebSLuigi Rizzo 
1874*f9790aebSLuigi Rizzo 	if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1875*f9790aebSLuigi Rizzo 		return 0;
1876*f9790aebSLuigi Rizzo 	ring->cur = k;
1877*f9790aebSLuigi Rizzo 	ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1878*f9790aebSLuigi Rizzo 		NM_IFPNAME(na->ifp), ring_n,
1879*f9790aebSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1880*f9790aebSLuigi Rizzo 		ring->cur, ring->avail, ring->reserved,
1881*f9790aebSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1882*f9790aebSLuigi Rizzo 	if (ring_n == na->num_rx_rings) {
1883*f9790aebSLuigi Rizzo 		netmap_txsync_to_host(hwna);
1884*f9790aebSLuigi Rizzo 	} else {
1885*f9790aebSLuigi Rizzo 		error = hwna->nm_txsync(hwna, ring_n, flags);
1886*f9790aebSLuigi Rizzo 	}
1887*f9790aebSLuigi Rizzo 	kring->nr_hwcur = ring->cur;
1888*f9790aebSLuigi Rizzo 	kring->nr_hwavail = 0;
1889*f9790aebSLuigi Rizzo 	kring->nr_hwreserved = lim - ring->avail;
1890*f9790aebSLuigi Rizzo 	ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1891*f9790aebSLuigi Rizzo 		NM_IFPNAME(na->ifp), ring_n,
1892*f9790aebSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1893*f9790aebSLuigi Rizzo 		ring->cur, ring->avail, ring->reserved,
1894*f9790aebSLuigi Rizzo 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1895*f9790aebSLuigi Rizzo 
1896*f9790aebSLuigi Rizzo 	return error;
1897*f9790aebSLuigi Rizzo }
1898*f9790aebSLuigi Rizzo 
1899*f9790aebSLuigi Rizzo static int
1900*f9790aebSLuigi Rizzo netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1901*f9790aebSLuigi Rizzo {
1902*f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna = na->na_private;
1903*f9790aebSLuigi Rizzo 	struct netmap_adapter *port_na = &bna->up.up;
1904*f9790aebSLuigi Rizzo 	if (tx == NR_TX || ring_n != 0)
1905*f9790aebSLuigi Rizzo 		return ENXIO;
1906*f9790aebSLuigi Rizzo 	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
1907*f9790aebSLuigi Rizzo }
1908*f9790aebSLuigi Rizzo 
1909*f9790aebSLuigi Rizzo /* attach a bridge wrapper to the 'real' device */
1910*f9790aebSLuigi Rizzo static int
1911*f9790aebSLuigi Rizzo netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
1912*f9790aebSLuigi Rizzo {
1913*f9790aebSLuigi Rizzo 	struct netmap_bwrap_adapter *bna;
1914*f9790aebSLuigi Rizzo 	struct netmap_adapter *na;
1915*f9790aebSLuigi Rizzo 	struct netmap_adapter *hwna = NA(real);
1916*f9790aebSLuigi Rizzo 	struct netmap_adapter *hostna;
1917*f9790aebSLuigi Rizzo 	int error;
1918*f9790aebSLuigi Rizzo 
1919*f9790aebSLuigi Rizzo 
1920*f9790aebSLuigi Rizzo 	bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
1921*f9790aebSLuigi Rizzo 	if (bna == NULL)
1922*f9790aebSLuigi Rizzo 		return ENOMEM;
1923*f9790aebSLuigi Rizzo 
1924*f9790aebSLuigi Rizzo 	na = &bna->up.up;
1925*f9790aebSLuigi Rizzo 	na->ifp = fake;
1926*f9790aebSLuigi Rizzo 	/* fill the ring data for the bwrap adapter with rx/tx meanings
1927*f9790aebSLuigi Rizzo 	 * swapped. The real cross-linking will be done during register,
1928*f9790aebSLuigi Rizzo 	 * when all the krings will have been created.
1929*f9790aebSLuigi Rizzo 	 */
1930*f9790aebSLuigi Rizzo 	na->num_rx_rings = hwna->num_tx_rings;
1931*f9790aebSLuigi Rizzo 	na->num_tx_rings = hwna->num_rx_rings;
1932*f9790aebSLuigi Rizzo 	na->num_tx_desc = hwna->num_rx_desc;
1933*f9790aebSLuigi Rizzo 	na->num_rx_desc = hwna->num_tx_desc;
1934*f9790aebSLuigi Rizzo 	na->nm_dtor = netmap_bwrap_dtor;
1935*f9790aebSLuigi Rizzo 	na->nm_register = netmap_bwrap_register;
1936*f9790aebSLuigi Rizzo 	// na->nm_txsync = netmap_bwrap_txsync;
1937*f9790aebSLuigi Rizzo 	// na->nm_rxsync = netmap_bwrap_rxsync;
1938*f9790aebSLuigi Rizzo 	na->nm_config = netmap_bwrap_config;
1939*f9790aebSLuigi Rizzo 	na->nm_krings_create = netmap_bwrap_krings_create;
1940*f9790aebSLuigi Rizzo 	na->nm_krings_delete = netmap_bwrap_krings_delete;
1941*f9790aebSLuigi Rizzo 	na->nm_notify = netmap_bwrap_notify;
1942*f9790aebSLuigi Rizzo 	na->nm_mem = hwna->nm_mem;
1943*f9790aebSLuigi Rizzo 	na->na_private = na; /* prevent NIOCREGIF */
1944*f9790aebSLuigi Rizzo 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
1945*f9790aebSLuigi Rizzo 
1946*f9790aebSLuigi Rizzo 	bna->hwna = hwna;
1947*f9790aebSLuigi Rizzo 	netmap_adapter_get(hwna);
1948*f9790aebSLuigi Rizzo 	hwna->na_private = bna; /* weak reference */
1949*f9790aebSLuigi Rizzo 
1950*f9790aebSLuigi Rizzo 	hostna = &bna->host.up;
1951*f9790aebSLuigi Rizzo 	hostna->ifp = hwna->ifp;
1952*f9790aebSLuigi Rizzo 	hostna->num_tx_rings = 1;
1953*f9790aebSLuigi Rizzo 	hostna->num_tx_desc = hwna->num_rx_desc;
1954*f9790aebSLuigi Rizzo 	hostna->num_rx_rings = 1;
1955*f9790aebSLuigi Rizzo 	hostna->num_rx_desc = hwna->num_tx_desc;
1956*f9790aebSLuigi Rizzo 	// hostna->nm_txsync = netmap_bwrap_host_txsync;
1957*f9790aebSLuigi Rizzo 	// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1958*f9790aebSLuigi Rizzo 	hostna->nm_notify = netmap_bwrap_host_notify;
1959*f9790aebSLuigi Rizzo 	hostna->nm_mem = na->nm_mem;
1960*f9790aebSLuigi Rizzo 	hostna->na_private = bna;
1961*f9790aebSLuigi Rizzo 
1962*f9790aebSLuigi Rizzo 	D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname,
1963*f9790aebSLuigi Rizzo 		na->num_tx_rings, na->num_tx_desc,
1964*f9790aebSLuigi Rizzo 		na->num_rx_rings, na->num_rx_desc);
1965*f9790aebSLuigi Rizzo 
1966*f9790aebSLuigi Rizzo 	error = netmap_attach_common(na);
1967*f9790aebSLuigi Rizzo 	if (error) {
1968*f9790aebSLuigi Rizzo 		netmap_adapter_put(hwna);
1969*f9790aebSLuigi Rizzo 		free(bna, M_DEVBUF);
1970*f9790aebSLuigi Rizzo 		return error;
1971*f9790aebSLuigi Rizzo 	}
1972*f9790aebSLuigi Rizzo 	return 0;
1973*f9790aebSLuigi Rizzo }
1974*f9790aebSLuigi Rizzo 
1975*f9790aebSLuigi Rizzo void
1976*f9790aebSLuigi Rizzo netmap_init_bridges(void)
1977*f9790aebSLuigi Rizzo {
1978*f9790aebSLuigi Rizzo 	int i;
1979*f9790aebSLuigi Rizzo 	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
1980*f9790aebSLuigi Rizzo 	for (i = 0; i < NM_BRIDGES; i++)
1981*f9790aebSLuigi Rizzo 		BDG_RWINIT(&nm_bridges[i]);
1982*f9790aebSLuigi Rizzo }
1983*f9790aebSLuigi Rizzo #endif /* WITH_VALE */
1984