1f9790aebSLuigi Rizzo /* 2*37e3a6d3SLuigi Rizzo * Copyright (C) 2013-2016 Universita` di Pisa 3*37e3a6d3SLuigi Rizzo * All rights reserved. 4f9790aebSLuigi Rizzo * 5f9790aebSLuigi Rizzo * Redistribution and use in source and binary forms, with or without 6f9790aebSLuigi Rizzo * modification, are permitted provided that the following conditions 7f9790aebSLuigi Rizzo * are met: 8f9790aebSLuigi Rizzo * 1. Redistributions of source code must retain the above copyright 9f9790aebSLuigi Rizzo * notice, this list of conditions and the following disclaimer. 10f9790aebSLuigi Rizzo * 2. Redistributions in binary form must reproduce the above copyright 11f9790aebSLuigi Rizzo * notice, this list of conditions and the following disclaimer in the 12f9790aebSLuigi Rizzo * documentation and/or other materials provided with the distribution. 13f9790aebSLuigi Rizzo * 14f9790aebSLuigi Rizzo * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15f9790aebSLuigi Rizzo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16f9790aebSLuigi Rizzo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17f9790aebSLuigi Rizzo * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18f9790aebSLuigi Rizzo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19f9790aebSLuigi Rizzo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20f9790aebSLuigi Rizzo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21f9790aebSLuigi Rizzo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22f9790aebSLuigi Rizzo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23f9790aebSLuigi Rizzo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24f9790aebSLuigi Rizzo * SUCH DAMAGE. 25f9790aebSLuigi Rizzo */ 26f9790aebSLuigi Rizzo 27f9790aebSLuigi Rizzo 28f9790aebSLuigi Rizzo /* 29f9790aebSLuigi Rizzo * This module implements the VALE switch for netmap 30f9790aebSLuigi Rizzo 31f9790aebSLuigi Rizzo --- VALE SWITCH --- 32f9790aebSLuigi Rizzo 33f9790aebSLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports. 34f9790aebSLuigi Rizzo A switch cannot be deleted until all ports are gone. 35f9790aebSLuigi Rizzo 36f9790aebSLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects 37f9790aebSLuigi Rizzo deletion of ports. When configuring or deleting a new port, the 38f9790aebSLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK). 39f9790aebSLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 40f9790aebSLuigi Rizzo The lock is held throughout the entire forwarding cycle, 41f9790aebSLuigi Rizzo during which the thread may incur in a page fault. 42f9790aebSLuigi Rizzo Hence it is important that sleepable shared locks are used. 43f9790aebSLuigi Rizzo 44f9790aebSLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve 45f9790aebSLuigi Rizzo a number of slot in the ring, then the lock is released, 46f9790aebSLuigi Rizzo packets are copied from source to destination, and then 47f9790aebSLuigi Rizzo the lock is acquired again and the receive ring is updated. 48f9790aebSLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack 49f9790aebSLuigi Rizzo ports attached to the switch) 50f9790aebSLuigi Rizzo 51f9790aebSLuigi Rizzo */ 52f9790aebSLuigi Rizzo 53f9790aebSLuigi Rizzo /* 54f9790aebSLuigi Rizzo * OS-specific code that is used only within this file. 55f9790aebSLuigi Rizzo * Other OS-specific code that must be accessed by drivers 56f9790aebSLuigi Rizzo * is present in netmap_kern.h 57f9790aebSLuigi Rizzo */ 58f9790aebSLuigi Rizzo 59f9790aebSLuigi Rizzo #if defined(__FreeBSD__) 60f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */ 61f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$"); 62f9790aebSLuigi Rizzo 63f9790aebSLuigi Rizzo #include <sys/types.h> 64f9790aebSLuigi Rizzo #include <sys/errno.h> 65f9790aebSLuigi Rizzo #include <sys/param.h> /* defines used in kernel.h */ 66f9790aebSLuigi Rizzo #include <sys/kernel.h> /* types used in module initialization */ 67f9790aebSLuigi Rizzo #include <sys/conf.h> /* cdevsw struct, UID, GID */ 68f9790aebSLuigi Rizzo #include <sys/sockio.h> 69f9790aebSLuigi Rizzo #include <sys/socketvar.h> /* struct socket */ 70f9790aebSLuigi Rizzo #include <sys/malloc.h> 71f9790aebSLuigi Rizzo #include <sys/poll.h> 72f9790aebSLuigi Rizzo #include <sys/rwlock.h> 73f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */ 74f9790aebSLuigi Rizzo #include <sys/selinfo.h> 75f9790aebSLuigi Rizzo #include <sys/sysctl.h> 76f9790aebSLuigi Rizzo #include <net/if.h> 77f9790aebSLuigi Rizzo #include <net/if_var.h> 78f9790aebSLuigi Rizzo #include <net/bpf.h> /* BIOCIMMEDIATE */ 79f9790aebSLuigi Rizzo #include <machine/bus.h> /* bus_dmamap_* */ 80f9790aebSLuigi Rizzo #include <sys/endian.h> 81f9790aebSLuigi Rizzo #include <sys/refcount.h> 82f9790aebSLuigi Rizzo 83f9790aebSLuigi Rizzo 84f9790aebSLuigi Rizzo #define BDG_RWLOCK_T struct rwlock // struct rwlock 85f9790aebSLuigi Rizzo 86f9790aebSLuigi Rizzo #define BDG_RWINIT(b) \ 87f9790aebSLuigi Rizzo rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 88f9790aebSLuigi Rizzo #define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 89f9790aebSLuigi Rizzo #define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 90f9790aebSLuigi Rizzo #define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 91f9790aebSLuigi Rizzo #define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 92f9790aebSLuigi Rizzo #define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 93f9790aebSLuigi Rizzo #define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 94f9790aebSLuigi Rizzo 95f9790aebSLuigi Rizzo 96f9790aebSLuigi Rizzo #elif defined(linux) 97f9790aebSLuigi Rizzo 98f9790aebSLuigi Rizzo #include "bsd_glue.h" 99f9790aebSLuigi Rizzo 100f9790aebSLuigi Rizzo #elif defined(__APPLE__) 101f9790aebSLuigi Rizzo 102f9790aebSLuigi Rizzo #warning OSX support is only partial 103f9790aebSLuigi Rizzo #include "osx_glue.h" 104f9790aebSLuigi Rizzo 105*37e3a6d3SLuigi Rizzo #elif defined(_WIN32) 106*37e3a6d3SLuigi Rizzo #include "win_glue.h" 107*37e3a6d3SLuigi Rizzo 108f9790aebSLuigi Rizzo #else 109f9790aebSLuigi Rizzo 110f9790aebSLuigi Rizzo #error Unsupported platform 111f9790aebSLuigi Rizzo 112f9790aebSLuigi Rizzo #endif /* unsupported */ 113f9790aebSLuigi Rizzo 114f9790aebSLuigi Rizzo /* 115f9790aebSLuigi Rizzo * common headers 116f9790aebSLuigi Rizzo */ 117f9790aebSLuigi Rizzo 118f9790aebSLuigi Rizzo #include <net/netmap.h> 119f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h> 120f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h> 121f9790aebSLuigi Rizzo 122f9790aebSLuigi Rizzo #ifdef WITH_VALE 123f9790aebSLuigi Rizzo 124f9790aebSLuigi Rizzo /* 125f9790aebSLuigi Rizzo * system parameters (most of them in netmap_kern.h) 126*37e3a6d3SLuigi Rizzo * NM_BDG_NAME prefix for switch port names, default "vale" 127f9790aebSLuigi Rizzo * NM_BDG_MAXPORTS number of ports 128f9790aebSLuigi Rizzo * NM_BRIDGES max number of switches in the system. 129f9790aebSLuigi Rizzo * XXX should become a sysctl or tunable 130f9790aebSLuigi Rizzo * 131f9790aebSLuigi Rizzo * Switch ports are named valeX:Y where X is the switch name and Y 132f9790aebSLuigi Rizzo * is the port. If Y matches a physical interface name, the port is 133f9790aebSLuigi Rizzo * connected to a physical device. 134f9790aebSLuigi Rizzo * 135f9790aebSLuigi Rizzo * Unlike physical interfaces, switch ports use their own memory region 136f9790aebSLuigi Rizzo * for rings and buffers. 137f9790aebSLuigi Rizzo * The virtual interfaces use per-queue lock instead of core lock. 138f9790aebSLuigi Rizzo * In the tx loop, we aggregate traffic in batches to make all operations 139f9790aebSLuigi Rizzo * faster. The batch size is bridge_batch. 140f9790aebSLuigi Rizzo */ 141f9790aebSLuigi Rizzo #define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 142f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 143f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 144f9790aebSLuigi Rizzo #define NM_BDG_HASH 1024 /* forwarding table entries */ 145f9790aebSLuigi Rizzo #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 146f9790aebSLuigi Rizzo #define NM_MULTISEG 64 /* max size of a chain of bufs */ 147f9790aebSLuigi Rizzo /* actual size of the tables */ 148f9790aebSLuigi Rizzo #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 149f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */ 150f9790aebSLuigi Rizzo #define NM_FT_NULL NM_BDG_BATCH_MAX 151f9790aebSLuigi Rizzo 152f9790aebSLuigi Rizzo 153f9790aebSLuigi Rizzo /* 154f9790aebSLuigi Rizzo * bridge_batch is set via sysctl to the max batch size to be 155f9790aebSLuigi Rizzo * used in the bridge. The actual value may be larger as the 156f9790aebSLuigi Rizzo * last packet in the block may overflow the size. 157f9790aebSLuigi Rizzo */ 158*37e3a6d3SLuigi Rizzo static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 159*37e3a6d3SLuigi Rizzo SYSBEGIN(vars_vale); 160f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap); 161f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 162*37e3a6d3SLuigi Rizzo SYSEND; 163f9790aebSLuigi Rizzo 1644bf50f18SLuigi Rizzo static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **); 1654bf50f18SLuigi Rizzo static int netmap_vp_reg(struct netmap_adapter *na, int onoff); 166*37e3a6d3SLuigi Rizzo static int netmap_bwrap_reg(struct netmap_adapter *, int onoff); 167f9790aebSLuigi Rizzo 168f9790aebSLuigi Rizzo /* 169f9790aebSLuigi Rizzo * For each output interface, nm_bdg_q is used to construct a list. 170f9790aebSLuigi Rizzo * bq_len is the number of output buffers (we can have coalescing 171f9790aebSLuigi Rizzo * during the copy). 172f9790aebSLuigi Rizzo */ 173f9790aebSLuigi Rizzo struct nm_bdg_q { 174f9790aebSLuigi Rizzo uint16_t bq_head; 175f9790aebSLuigi Rizzo uint16_t bq_tail; 176f9790aebSLuigi Rizzo uint32_t bq_len; /* number of buffers */ 177f9790aebSLuigi Rizzo }; 178f9790aebSLuigi Rizzo 179f9790aebSLuigi Rizzo /* XXX revise this */ 180f9790aebSLuigi Rizzo struct nm_hash_ent { 181f9790aebSLuigi Rizzo uint64_t mac; /* the top 2 bytes are the epoch */ 182f9790aebSLuigi Rizzo uint64_t ports; 183f9790aebSLuigi Rizzo }; 184f9790aebSLuigi Rizzo 185f9790aebSLuigi Rizzo /* 186f9790aebSLuigi Rizzo * nm_bridge is a descriptor for a VALE switch. 187f9790aebSLuigi Rizzo * Interfaces for a bridge are all in bdg_ports[]. 188f9790aebSLuigi Rizzo * The array has fixed size, an empty entry does not terminate 189f9790aebSLuigi Rizzo * the search, but lookups only occur on attach/detach so we 190f9790aebSLuigi Rizzo * don't mind if they are slow. 191f9790aebSLuigi Rizzo * 192f9790aebSLuigi Rizzo * The bridge is non blocking on the transmit ports: excess 193f9790aebSLuigi Rizzo * packets are dropped if there is no room on the output port. 194f9790aebSLuigi Rizzo * 195f9790aebSLuigi Rizzo * bdg_lock protects accesses to the bdg_ports array. 196f9790aebSLuigi Rizzo * This is a rw lock (or equivalent). 197f9790aebSLuigi Rizzo */ 198f9790aebSLuigi Rizzo struct nm_bridge { 199f9790aebSLuigi Rizzo /* XXX what is the proper alignment/layout ? */ 200f9790aebSLuigi Rizzo BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 201f9790aebSLuigi Rizzo int bdg_namelen; 202f9790aebSLuigi Rizzo uint32_t bdg_active_ports; /* 0 means free */ 203f9790aebSLuigi Rizzo char bdg_basename[IFNAMSIZ]; 204f9790aebSLuigi Rizzo 205f9790aebSLuigi Rizzo /* Indexes of active ports (up to active_ports) 206f9790aebSLuigi Rizzo * and all other remaining ports. 207f9790aebSLuigi Rizzo */ 208f9790aebSLuigi Rizzo uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 209f9790aebSLuigi Rizzo 210f9790aebSLuigi Rizzo struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; 211f9790aebSLuigi Rizzo 212f9790aebSLuigi Rizzo 213f9790aebSLuigi Rizzo /* 214f9790aebSLuigi Rizzo * The function to decide the destination port. 215f9790aebSLuigi Rizzo * It returns either of an index of the destination port, 216f9790aebSLuigi Rizzo * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 217f9790aebSLuigi Rizzo * forward this packet. ring_nr is the source ring index, and the 218f9790aebSLuigi Rizzo * function may overwrite this value to forward this packet to a 219f9790aebSLuigi Rizzo * different ring index. 220*37e3a6d3SLuigi Rizzo * This function must be set by netmap_bdg_ctl(). 221f9790aebSLuigi Rizzo */ 2224bf50f18SLuigi Rizzo struct netmap_bdg_ops bdg_ops; 223f9790aebSLuigi Rizzo 224f9790aebSLuigi Rizzo /* the forwarding table, MAC+ports. 225f9790aebSLuigi Rizzo * XXX should be changed to an argument to be passed to 226f9790aebSLuigi Rizzo * the lookup function, and allocated on attach 227f9790aebSLuigi Rizzo */ 228f9790aebSLuigi Rizzo struct nm_hash_ent ht[NM_BDG_HASH]; 229847bf383SLuigi Rizzo 230847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS 231847bf383SLuigi Rizzo struct net *ns; 232847bf383SLuigi Rizzo #endif /* CONFIG_NET_NS */ 233f9790aebSLuigi Rizzo }; 234f9790aebSLuigi Rizzo 2354bf50f18SLuigi Rizzo const char* 2364bf50f18SLuigi Rizzo netmap_bdg_name(struct netmap_vp_adapter *vp) 2374bf50f18SLuigi Rizzo { 2384bf50f18SLuigi Rizzo struct nm_bridge *b = vp->na_bdg; 2394bf50f18SLuigi Rizzo if (b == NULL) 2404bf50f18SLuigi Rizzo return NULL; 2414bf50f18SLuigi Rizzo return b->bdg_basename; 2424bf50f18SLuigi Rizzo } 2434bf50f18SLuigi Rizzo 244f9790aebSLuigi Rizzo 245847bf383SLuigi Rizzo #ifndef CONFIG_NET_NS 246f9790aebSLuigi Rizzo /* 247f9790aebSLuigi Rizzo * XXX in principle nm_bridges could be created dynamically 248f9790aebSLuigi Rizzo * Right now we have a static array and deletions are protected 249f9790aebSLuigi Rizzo * by an exclusive lock. 250f9790aebSLuigi Rizzo */ 251*37e3a6d3SLuigi Rizzo static struct nm_bridge *nm_bridges; 252847bf383SLuigi Rizzo #endif /* !CONFIG_NET_NS */ 253f9790aebSLuigi Rizzo 254f9790aebSLuigi Rizzo 255f9790aebSLuigi Rizzo /* 256f9790aebSLuigi Rizzo * this is a slightly optimized copy routine which rounds 257f9790aebSLuigi Rizzo * to multiple of 64 bytes and is often faster than dealing 258f9790aebSLuigi Rizzo * with other odd sizes. We assume there is enough room 259f9790aebSLuigi Rizzo * in the source and destination buffers. 260f9790aebSLuigi Rizzo * 261f9790aebSLuigi Rizzo * XXX only for multiples of 64 bytes, non overlapped. 262f9790aebSLuigi Rizzo */ 263f9790aebSLuigi Rizzo static inline void 264f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l) 265f9790aebSLuigi Rizzo { 266f9790aebSLuigi Rizzo uint64_t *src = _src; 267f9790aebSLuigi Rizzo uint64_t *dst = _dst; 268f9790aebSLuigi Rizzo if (unlikely(l >= 1024)) { 269f9790aebSLuigi Rizzo memcpy(dst, src, l); 270f9790aebSLuigi Rizzo return; 271f9790aebSLuigi Rizzo } 272f9790aebSLuigi Rizzo for (; likely(l > 0); l-=64) { 273f9790aebSLuigi Rizzo *dst++ = *src++; 274f9790aebSLuigi Rizzo *dst++ = *src++; 275f9790aebSLuigi Rizzo *dst++ = *src++; 276f9790aebSLuigi Rizzo *dst++ = *src++; 277f9790aebSLuigi Rizzo *dst++ = *src++; 278f9790aebSLuigi Rizzo *dst++ = *src++; 279f9790aebSLuigi Rizzo *dst++ = *src++; 280f9790aebSLuigi Rizzo *dst++ = *src++; 281f9790aebSLuigi Rizzo } 282f9790aebSLuigi Rizzo } 283f9790aebSLuigi Rizzo 284f9790aebSLuigi Rizzo 285*37e3a6d3SLuigi Rizzo static int 286*37e3a6d3SLuigi Rizzo nm_is_id_char(const char c) 287*37e3a6d3SLuigi Rizzo { 288*37e3a6d3SLuigi Rizzo return (c >= 'a' && c <= 'z') || 289*37e3a6d3SLuigi Rizzo (c >= 'A' && c <= 'Z') || 290*37e3a6d3SLuigi Rizzo (c >= '0' && c <= '9') || 291*37e3a6d3SLuigi Rizzo (c == '_'); 292*37e3a6d3SLuigi Rizzo } 293*37e3a6d3SLuigi Rizzo 294*37e3a6d3SLuigi Rizzo /* Validate the name of a VALE bridge port and return the 295*37e3a6d3SLuigi Rizzo * position of the ":" character. */ 296*37e3a6d3SLuigi Rizzo static int 297*37e3a6d3SLuigi Rizzo nm_vale_name_validate(const char *name) 298*37e3a6d3SLuigi Rizzo { 299*37e3a6d3SLuigi Rizzo int colon_pos = -1; 300*37e3a6d3SLuigi Rizzo int i; 301*37e3a6d3SLuigi Rizzo 302*37e3a6d3SLuigi Rizzo if (!name || strlen(name) < strlen(NM_BDG_NAME)) { 303*37e3a6d3SLuigi Rizzo return -1; 304*37e3a6d3SLuigi Rizzo } 305*37e3a6d3SLuigi Rizzo 306*37e3a6d3SLuigi Rizzo for (i = 0; name[i]; i++) { 307*37e3a6d3SLuigi Rizzo if (name[i] == ':') { 308*37e3a6d3SLuigi Rizzo if (colon_pos != -1) { 309*37e3a6d3SLuigi Rizzo return -1; 310*37e3a6d3SLuigi Rizzo } 311*37e3a6d3SLuigi Rizzo colon_pos = i; 312*37e3a6d3SLuigi Rizzo } else if (!nm_is_id_char(name[i])) { 313*37e3a6d3SLuigi Rizzo return -1; 314*37e3a6d3SLuigi Rizzo } 315*37e3a6d3SLuigi Rizzo } 316*37e3a6d3SLuigi Rizzo 317*37e3a6d3SLuigi Rizzo if (i >= IFNAMSIZ) { 318*37e3a6d3SLuigi Rizzo return -1; 319*37e3a6d3SLuigi Rizzo } 320*37e3a6d3SLuigi Rizzo 321*37e3a6d3SLuigi Rizzo return colon_pos; 322*37e3a6d3SLuigi Rizzo } 323*37e3a6d3SLuigi Rizzo 324f9790aebSLuigi Rizzo /* 325f9790aebSLuigi Rizzo * locate a bridge among the existing ones. 326f9790aebSLuigi Rizzo * MUST BE CALLED WITH NMG_LOCK() 327f9790aebSLuigi Rizzo * 328f9790aebSLuigi Rizzo * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 329f9790aebSLuigi Rizzo * We assume that this is called with a name of at least NM_NAME chars. 330f9790aebSLuigi Rizzo */ 331f9790aebSLuigi Rizzo static struct nm_bridge * 332f9790aebSLuigi Rizzo nm_find_bridge(const char *name, int create) 333f9790aebSLuigi Rizzo { 334*37e3a6d3SLuigi Rizzo int i, namelen; 335847bf383SLuigi Rizzo struct nm_bridge *b = NULL, *bridges; 336847bf383SLuigi Rizzo u_int num_bridges; 337f9790aebSLuigi Rizzo 338f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 339f9790aebSLuigi Rizzo 340847bf383SLuigi Rizzo netmap_bns_getbridges(&bridges, &num_bridges); 341847bf383SLuigi Rizzo 342*37e3a6d3SLuigi Rizzo namelen = nm_vale_name_validate(name); 343*37e3a6d3SLuigi Rizzo if (namelen < 0) { 344f9790aebSLuigi Rizzo D("invalid bridge name %s", name ? name : NULL); 345f9790aebSLuigi Rizzo return NULL; 346f9790aebSLuigi Rizzo } 347f9790aebSLuigi Rizzo 348f9790aebSLuigi Rizzo /* lookup the name, remember empty slot if there is one */ 349847bf383SLuigi Rizzo for (i = 0; i < num_bridges; i++) { 350847bf383SLuigi Rizzo struct nm_bridge *x = bridges + i; 351f9790aebSLuigi Rizzo 352f9790aebSLuigi Rizzo if (x->bdg_active_ports == 0) { 353f9790aebSLuigi Rizzo if (create && b == NULL) 354f9790aebSLuigi Rizzo b = x; /* record empty slot */ 355f9790aebSLuigi Rizzo } else if (x->bdg_namelen != namelen) { 356f9790aebSLuigi Rizzo continue; 357f9790aebSLuigi Rizzo } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 358f9790aebSLuigi Rizzo ND("found '%.*s' at %d", namelen, name, i); 359f9790aebSLuigi Rizzo b = x; 360f9790aebSLuigi Rizzo break; 361f9790aebSLuigi Rizzo } 362f9790aebSLuigi Rizzo } 363847bf383SLuigi Rizzo if (i == num_bridges && b) { /* name not found, can create entry */ 364f9790aebSLuigi Rizzo /* initialize the bridge */ 365f9790aebSLuigi Rizzo strncpy(b->bdg_basename, name, namelen); 366f9790aebSLuigi Rizzo ND("create new bridge %s with ports %d", b->bdg_basename, 367f9790aebSLuigi Rizzo b->bdg_active_ports); 368f9790aebSLuigi Rizzo b->bdg_namelen = namelen; 369f9790aebSLuigi Rizzo b->bdg_active_ports = 0; 370f9790aebSLuigi Rizzo for (i = 0; i < NM_BDG_MAXPORTS; i++) 371f9790aebSLuigi Rizzo b->bdg_port_index[i] = i; 372f9790aebSLuigi Rizzo /* set the default function */ 3734bf50f18SLuigi Rizzo b->bdg_ops.lookup = netmap_bdg_learning; 374f9790aebSLuigi Rizzo /* reset the MAC address table */ 375f9790aebSLuigi Rizzo bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 376847bf383SLuigi Rizzo NM_BNS_GET(b); 377f9790aebSLuigi Rizzo } 378f9790aebSLuigi Rizzo return b; 379f9790aebSLuigi Rizzo } 380f9790aebSLuigi Rizzo 381f9790aebSLuigi Rizzo 382f9790aebSLuigi Rizzo /* 383f9790aebSLuigi Rizzo * Free the forwarding tables for rings attached to switch ports. 384f9790aebSLuigi Rizzo */ 385f9790aebSLuigi Rizzo static void 386f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na) 387f9790aebSLuigi Rizzo { 388f9790aebSLuigi Rizzo int nrings, i; 389f9790aebSLuigi Rizzo struct netmap_kring *kring; 390f9790aebSLuigi Rizzo 391f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 39217885a7bSLuigi Rizzo nrings = na->num_tx_rings; 39317885a7bSLuigi Rizzo kring = na->tx_rings; 394f9790aebSLuigi Rizzo for (i = 0; i < nrings; i++) { 395f9790aebSLuigi Rizzo if (kring[i].nkr_ft) { 396f9790aebSLuigi Rizzo free(kring[i].nkr_ft, M_DEVBUF); 397f9790aebSLuigi Rizzo kring[i].nkr_ft = NULL; /* protect from freeing twice */ 398f9790aebSLuigi Rizzo } 399f9790aebSLuigi Rizzo } 400f9790aebSLuigi Rizzo } 401f9790aebSLuigi Rizzo 402f9790aebSLuigi Rizzo 403f9790aebSLuigi Rizzo /* 404f9790aebSLuigi Rizzo * Allocate the forwarding tables for the rings attached to the bridge ports. 405f9790aebSLuigi Rizzo */ 406f9790aebSLuigi Rizzo static int 407f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na) 408f9790aebSLuigi Rizzo { 409f9790aebSLuigi Rizzo int nrings, l, i, num_dstq; 410f9790aebSLuigi Rizzo struct netmap_kring *kring; 411f9790aebSLuigi Rizzo 412f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 413f9790aebSLuigi Rizzo /* all port:rings + broadcast */ 414f9790aebSLuigi Rizzo num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 415f9790aebSLuigi Rizzo l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 416f9790aebSLuigi Rizzo l += sizeof(struct nm_bdg_q) * num_dstq; 417f9790aebSLuigi Rizzo l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 418f9790aebSLuigi Rizzo 419847bf383SLuigi Rizzo nrings = netmap_real_rings(na, NR_TX); 420f9790aebSLuigi Rizzo kring = na->tx_rings; 421f9790aebSLuigi Rizzo for (i = 0; i < nrings; i++) { 422f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft; 423f9790aebSLuigi Rizzo struct nm_bdg_q *dstq; 424f9790aebSLuigi Rizzo int j; 425f9790aebSLuigi Rizzo 426f9790aebSLuigi Rizzo ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); 427f9790aebSLuigi Rizzo if (!ft) { 428f9790aebSLuigi Rizzo nm_free_bdgfwd(na); 429f9790aebSLuigi Rizzo return ENOMEM; 430f9790aebSLuigi Rizzo } 431f9790aebSLuigi Rizzo dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 432f9790aebSLuigi Rizzo for (j = 0; j < num_dstq; j++) { 433f9790aebSLuigi Rizzo dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 434f9790aebSLuigi Rizzo dstq[j].bq_len = 0; 435f9790aebSLuigi Rizzo } 436f9790aebSLuigi Rizzo kring[i].nkr_ft = ft; 437f9790aebSLuigi Rizzo } 438f9790aebSLuigi Rizzo return 0; 439f9790aebSLuigi Rizzo } 440f9790aebSLuigi Rizzo 441f9790aebSLuigi Rizzo 4424bf50f18SLuigi Rizzo /* remove from bridge b the ports in slots hw and sw 4434bf50f18SLuigi Rizzo * (sw can be -1 if not needed) 4444bf50f18SLuigi Rizzo */ 445f9790aebSLuigi Rizzo static void 446f9790aebSLuigi Rizzo netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 447f9790aebSLuigi Rizzo { 448f9790aebSLuigi Rizzo int s_hw = hw, s_sw = sw; 449f9790aebSLuigi Rizzo int i, lim =b->bdg_active_ports; 450f9790aebSLuigi Rizzo uint8_t tmp[NM_BDG_MAXPORTS]; 451f9790aebSLuigi Rizzo 452f9790aebSLuigi Rizzo /* 453f9790aebSLuigi Rizzo New algorithm: 454f9790aebSLuigi Rizzo make a copy of bdg_port_index; 455f9790aebSLuigi Rizzo lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 456f9790aebSLuigi Rizzo in the array of bdg_port_index, replacing them with 457f9790aebSLuigi Rizzo entries from the bottom of the array; 458f9790aebSLuigi Rizzo decrement bdg_active_ports; 459f9790aebSLuigi Rizzo acquire BDG_WLOCK() and copy back the array. 460f9790aebSLuigi Rizzo */ 461f9790aebSLuigi Rizzo 462f0ea3689SLuigi Rizzo if (netmap_verbose) 463f9790aebSLuigi Rizzo D("detach %d and %d (lim %d)", hw, sw, lim); 464f9790aebSLuigi Rizzo /* make a copy of the list of active ports, update it, 465f9790aebSLuigi Rizzo * and then copy back within BDG_WLOCK(). 466f9790aebSLuigi Rizzo */ 467f9790aebSLuigi Rizzo memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 468f9790aebSLuigi Rizzo for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 469f9790aebSLuigi Rizzo if (hw >= 0 && tmp[i] == hw) { 470f9790aebSLuigi Rizzo ND("detach hw %d at %d", hw, i); 471f9790aebSLuigi Rizzo lim--; /* point to last active port */ 472f9790aebSLuigi Rizzo tmp[i] = tmp[lim]; /* swap with i */ 473f9790aebSLuigi Rizzo tmp[lim] = hw; /* now this is inactive */ 474f9790aebSLuigi Rizzo hw = -1; 475f9790aebSLuigi Rizzo } else if (sw >= 0 && tmp[i] == sw) { 476f9790aebSLuigi Rizzo ND("detach sw %d at %d", sw, i); 477f9790aebSLuigi Rizzo lim--; 478f9790aebSLuigi Rizzo tmp[i] = tmp[lim]; 479f9790aebSLuigi Rizzo tmp[lim] = sw; 480f9790aebSLuigi Rizzo sw = -1; 481f9790aebSLuigi Rizzo } else { 482f9790aebSLuigi Rizzo i++; 483f9790aebSLuigi Rizzo } 484f9790aebSLuigi Rizzo } 485f9790aebSLuigi Rizzo if (hw >= 0 || sw >= 0) { 486f9790aebSLuigi Rizzo D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 487f9790aebSLuigi Rizzo } 488f9790aebSLuigi Rizzo 489f9790aebSLuigi Rizzo BDG_WLOCK(b); 4904bf50f18SLuigi Rizzo if (b->bdg_ops.dtor) 4914bf50f18SLuigi Rizzo b->bdg_ops.dtor(b->bdg_ports[s_hw]); 492f9790aebSLuigi Rizzo b->bdg_ports[s_hw] = NULL; 493f9790aebSLuigi Rizzo if (s_sw >= 0) { 494f9790aebSLuigi Rizzo b->bdg_ports[s_sw] = NULL; 495f9790aebSLuigi Rizzo } 496f9790aebSLuigi Rizzo memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 497f9790aebSLuigi Rizzo b->bdg_active_ports = lim; 498f9790aebSLuigi Rizzo BDG_WUNLOCK(b); 499f9790aebSLuigi Rizzo 500f9790aebSLuigi Rizzo ND("now %d active ports", lim); 501f9790aebSLuigi Rizzo if (lim == 0) { 502f9790aebSLuigi Rizzo ND("marking bridge %s as free", b->bdg_basename); 5034bf50f18SLuigi Rizzo bzero(&b->bdg_ops, sizeof(b->bdg_ops)); 504847bf383SLuigi Rizzo NM_BNS_PUT(b); 505f9790aebSLuigi Rizzo } 506f9790aebSLuigi Rizzo } 507f9790aebSLuigi Rizzo 5084bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for VALE ports */ 5094bf50f18SLuigi Rizzo static int 5104bf50f18SLuigi Rizzo netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 511f9790aebSLuigi Rizzo { 512f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 513f9790aebSLuigi Rizzo struct nm_bridge *b = vpna->na_bdg; 514f9790aebSLuigi Rizzo 515*37e3a6d3SLuigi Rizzo (void)nmr; // XXX merge ? 5164bf50f18SLuigi Rizzo if (attach) 5174bf50f18SLuigi Rizzo return 0; /* nothing to do */ 5184bf50f18SLuigi Rizzo if (b) { 5194bf50f18SLuigi Rizzo netmap_set_all_rings(na, 0 /* disable */); 5204bf50f18SLuigi Rizzo netmap_bdg_detach_common(b, vpna->bdg_port, -1); 5214bf50f18SLuigi Rizzo vpna->na_bdg = NULL; 5224bf50f18SLuigi Rizzo netmap_set_all_rings(na, 1 /* enable */); 5234bf50f18SLuigi Rizzo } 5244bf50f18SLuigi Rizzo /* I have took reference just for attach */ 5254bf50f18SLuigi Rizzo netmap_adapter_put(na); 5264bf50f18SLuigi Rizzo return 0; 5274bf50f18SLuigi Rizzo } 5284bf50f18SLuigi Rizzo 5294bf50f18SLuigi Rizzo /* nm_dtor callback for ephemeral VALE ports */ 5304bf50f18SLuigi Rizzo static void 5314bf50f18SLuigi Rizzo netmap_vp_dtor(struct netmap_adapter *na) 5324bf50f18SLuigi Rizzo { 5334bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 5344bf50f18SLuigi Rizzo struct nm_bridge *b = vpna->na_bdg; 5354bf50f18SLuigi Rizzo 5364bf50f18SLuigi Rizzo ND("%s has %d references", na->name, na->na_refcount); 537f9790aebSLuigi Rizzo 538f9790aebSLuigi Rizzo if (b) { 539f9790aebSLuigi Rizzo netmap_bdg_detach_common(b, vpna->bdg_port, -1); 540f9790aebSLuigi Rizzo } 541f9790aebSLuigi Rizzo } 542f9790aebSLuigi Rizzo 5434bf50f18SLuigi Rizzo /* remove a persistent VALE port from the system */ 5444bf50f18SLuigi Rizzo static int 5454bf50f18SLuigi Rizzo nm_vi_destroy(const char *name) 5464bf50f18SLuigi Rizzo { 5474bf50f18SLuigi Rizzo struct ifnet *ifp; 5484bf50f18SLuigi Rizzo int error; 5494bf50f18SLuigi Rizzo 5504bf50f18SLuigi Rizzo ifp = ifunit_ref(name); 5514bf50f18SLuigi Rizzo if (!ifp) 5524bf50f18SLuigi Rizzo return ENXIO; 5534bf50f18SLuigi Rizzo NMG_LOCK(); 5544bf50f18SLuigi Rizzo /* make sure this is actually a VALE port */ 555*37e3a6d3SLuigi Rizzo if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) { 5564bf50f18SLuigi Rizzo error = EINVAL; 5574bf50f18SLuigi Rizzo goto err; 5584bf50f18SLuigi Rizzo } 5594bf50f18SLuigi Rizzo 5604bf50f18SLuigi Rizzo if (NA(ifp)->na_refcount > 1) { 5614bf50f18SLuigi Rizzo error = EBUSY; 5624bf50f18SLuigi Rizzo goto err; 5634bf50f18SLuigi Rizzo } 5644bf50f18SLuigi Rizzo NMG_UNLOCK(); 5654bf50f18SLuigi Rizzo 5664bf50f18SLuigi Rizzo D("destroying a persistent vale interface %s", ifp->if_xname); 5674bf50f18SLuigi Rizzo /* Linux requires all the references are released 5684bf50f18SLuigi Rizzo * before unregister 5694bf50f18SLuigi Rizzo */ 5704bf50f18SLuigi Rizzo if_rele(ifp); 5714bf50f18SLuigi Rizzo netmap_detach(ifp); 572*37e3a6d3SLuigi Rizzo nm_os_vi_detach(ifp); 5734bf50f18SLuigi Rizzo return 0; 5744bf50f18SLuigi Rizzo 5754bf50f18SLuigi Rizzo err: 5764bf50f18SLuigi Rizzo NMG_UNLOCK(); 5774bf50f18SLuigi Rizzo if_rele(ifp); 5784bf50f18SLuigi Rizzo return error; 5794bf50f18SLuigi Rizzo } 5804bf50f18SLuigi Rizzo 5814bf50f18SLuigi Rizzo /* 5824bf50f18SLuigi Rizzo * Create a virtual interface registered to the system. 5834bf50f18SLuigi Rizzo * The interface will be attached to a bridge later. 5844bf50f18SLuigi Rizzo */ 5854bf50f18SLuigi Rizzo static int 5864bf50f18SLuigi Rizzo nm_vi_create(struct nmreq *nmr) 5874bf50f18SLuigi Rizzo { 5884bf50f18SLuigi Rizzo struct ifnet *ifp; 5894bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna; 5904bf50f18SLuigi Rizzo int error; 5914bf50f18SLuigi Rizzo 5924bf50f18SLuigi Rizzo /* don't include VALE prefix */ 593*37e3a6d3SLuigi Rizzo if (!strncmp(nmr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME))) 5944bf50f18SLuigi Rizzo return EINVAL; 5954bf50f18SLuigi Rizzo ifp = ifunit_ref(nmr->nr_name); 5964bf50f18SLuigi Rizzo if (ifp) { /* already exist, cannot create new one */ 5974bf50f18SLuigi Rizzo if_rele(ifp); 5984bf50f18SLuigi Rizzo return EEXIST; 5994bf50f18SLuigi Rizzo } 600*37e3a6d3SLuigi Rizzo error = nm_os_vi_persist(nmr->nr_name, &ifp); 6014bf50f18SLuigi Rizzo if (error) 6024bf50f18SLuigi Rizzo return error; 6034bf50f18SLuigi Rizzo 6044bf50f18SLuigi Rizzo NMG_LOCK(); 6054bf50f18SLuigi Rizzo /* netmap_vp_create creates a struct netmap_vp_adapter */ 6064bf50f18SLuigi Rizzo error = netmap_vp_create(nmr, ifp, &vpna); 6074bf50f18SLuigi Rizzo if (error) { 6084bf50f18SLuigi Rizzo D("error %d", error); 609*37e3a6d3SLuigi Rizzo nm_os_vi_detach(ifp); 6104bf50f18SLuigi Rizzo return error; 6114bf50f18SLuigi Rizzo } 6124bf50f18SLuigi Rizzo /* persist-specific routines */ 6134bf50f18SLuigi Rizzo vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl; 6144bf50f18SLuigi Rizzo netmap_adapter_get(&vpna->up); 615*37e3a6d3SLuigi Rizzo NM_ATTACH_NA(ifp, &vpna->up); 6164bf50f18SLuigi Rizzo NMG_UNLOCK(); 6174bf50f18SLuigi Rizzo D("created %s", ifp->if_xname); 6184bf50f18SLuigi Rizzo return 0; 6194bf50f18SLuigi Rizzo } 62017885a7bSLuigi Rizzo 62117885a7bSLuigi Rizzo /* Try to get a reference to a netmap adapter attached to a VALE switch. 62217885a7bSLuigi Rizzo * If the adapter is found (or is created), this function returns 0, a 62317885a7bSLuigi Rizzo * non NULL pointer is returned into *na, and the caller holds a 62417885a7bSLuigi Rizzo * reference to the adapter. 62517885a7bSLuigi Rizzo * If an adapter is not found, then no reference is grabbed and the 62617885a7bSLuigi Rizzo * function returns an error code, or 0 if there is just a VALE prefix 62717885a7bSLuigi Rizzo * mismatch. Therefore the caller holds a reference when 62817885a7bSLuigi Rizzo * (*na != NULL && return == 0). 62917885a7bSLuigi Rizzo */ 630f9790aebSLuigi Rizzo int 631f9790aebSLuigi Rizzo netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 632f9790aebSLuigi Rizzo { 6334bf50f18SLuigi Rizzo char *nr_name = nmr->nr_name; 6344bf50f18SLuigi Rizzo const char *ifname; 635f9790aebSLuigi Rizzo struct ifnet *ifp; 636f9790aebSLuigi Rizzo int error = 0; 6374bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna, *hostna = NULL; 638f9790aebSLuigi Rizzo struct nm_bridge *b; 639f9790aebSLuigi Rizzo int i, j, cand = -1, cand2 = -1; 640f9790aebSLuigi Rizzo int needed; 641f9790aebSLuigi Rizzo 642f9790aebSLuigi Rizzo *na = NULL; /* default return value */ 643f9790aebSLuigi Rizzo 644f9790aebSLuigi Rizzo /* first try to see if this is a bridge port. */ 645f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 646*37e3a6d3SLuigi Rizzo if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) { 647f9790aebSLuigi Rizzo return 0; /* no error, but no VALE prefix */ 648f9790aebSLuigi Rizzo } 649f9790aebSLuigi Rizzo 6504bf50f18SLuigi Rizzo b = nm_find_bridge(nr_name, create); 651f9790aebSLuigi Rizzo if (b == NULL) { 6524bf50f18SLuigi Rizzo D("no bridges available for '%s'", nr_name); 653f2637526SLuigi Rizzo return (create ? ENOMEM : ENXIO); 654f9790aebSLuigi Rizzo } 6554bf50f18SLuigi Rizzo if (strlen(nr_name) < b->bdg_namelen) /* impossible */ 6564bf50f18SLuigi Rizzo panic("x"); 657f9790aebSLuigi Rizzo 658f9790aebSLuigi Rizzo /* Now we are sure that name starts with the bridge's name, 659f9790aebSLuigi Rizzo * lookup the port in the bridge. We need to scan the entire 660f9790aebSLuigi Rizzo * list. It is not important to hold a WLOCK on the bridge 661f9790aebSLuigi Rizzo * during the search because NMG_LOCK already guarantees 662f9790aebSLuigi Rizzo * that there are no other possible writers. 663f9790aebSLuigi Rizzo */ 664f9790aebSLuigi Rizzo 665f9790aebSLuigi Rizzo /* lookup in the local list of ports */ 666f9790aebSLuigi Rizzo for (j = 0; j < b->bdg_active_ports; j++) { 667f9790aebSLuigi Rizzo i = b->bdg_port_index[j]; 668f9790aebSLuigi Rizzo vpna = b->bdg_ports[i]; 669f9790aebSLuigi Rizzo // KASSERT(na != NULL); 670847bf383SLuigi Rizzo ND("checking %s", vpna->up.name); 6714bf50f18SLuigi Rizzo if (!strcmp(vpna->up.name, nr_name)) { 672f9790aebSLuigi Rizzo netmap_adapter_get(&vpna->up); 6734bf50f18SLuigi Rizzo ND("found existing if %s refs %d", nr_name) 6744bf50f18SLuigi Rizzo *na = &vpna->up; 675f9790aebSLuigi Rizzo return 0; 676f9790aebSLuigi Rizzo } 677f9790aebSLuigi Rizzo } 678f9790aebSLuigi Rizzo /* not found, should we create it? */ 679f9790aebSLuigi Rizzo if (!create) 680f9790aebSLuigi Rizzo return ENXIO; 681f9790aebSLuigi Rizzo /* yes we should, see if we have space to attach entries */ 682f9790aebSLuigi Rizzo needed = 2; /* in some cases we only need 1 */ 683f9790aebSLuigi Rizzo if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 684f9790aebSLuigi Rizzo D("bridge full %d, cannot create new port", b->bdg_active_ports); 685f2637526SLuigi Rizzo return ENOMEM; 686f9790aebSLuigi Rizzo } 687f9790aebSLuigi Rizzo /* record the next two ports available, but do not allocate yet */ 688f9790aebSLuigi Rizzo cand = b->bdg_port_index[b->bdg_active_ports]; 689f9790aebSLuigi Rizzo cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 690f9790aebSLuigi Rizzo ND("+++ bridge %s port %s used %d avail %d %d", 6914bf50f18SLuigi Rizzo b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); 692f9790aebSLuigi Rizzo 693f9790aebSLuigi Rizzo /* 694f9790aebSLuigi Rizzo * try see if there is a matching NIC with this name 695f9790aebSLuigi Rizzo * (after the bridge's name) 696f9790aebSLuigi Rizzo */ 6974bf50f18SLuigi Rizzo ifname = nr_name + b->bdg_namelen + 1; 6984bf50f18SLuigi Rizzo ifp = ifunit_ref(ifname); 6994bf50f18SLuigi Rizzo if (!ifp) { 7004bf50f18SLuigi Rizzo /* Create an ephemeral virtual port 7014bf50f18SLuigi Rizzo * This block contains all the ephemeral-specific logics 7024bf50f18SLuigi Rizzo */ 703f9790aebSLuigi Rizzo if (nmr->nr_cmd) { 704f9790aebSLuigi Rizzo /* nr_cmd must be 0 for a virtual port */ 705f9790aebSLuigi Rizzo return EINVAL; 706f9790aebSLuigi Rizzo } 707f9790aebSLuigi Rizzo 708f9790aebSLuigi Rizzo /* bdg_netmap_attach creates a struct netmap_adapter */ 7094bf50f18SLuigi Rizzo error = netmap_vp_create(nmr, NULL, &vpna); 710f9790aebSLuigi Rizzo if (error) { 711f9790aebSLuigi Rizzo D("error %d", error); 712f9790aebSLuigi Rizzo free(ifp, M_DEVBUF); 713f9790aebSLuigi Rizzo return error; 714f9790aebSLuigi Rizzo } 7154bf50f18SLuigi Rizzo /* shortcut - we can skip get_hw_na(), 7164bf50f18SLuigi Rizzo * ownership check and nm_bdg_attach() 7174bf50f18SLuigi Rizzo */ 7184bf50f18SLuigi Rizzo } else { 7194bf50f18SLuigi Rizzo struct netmap_adapter *hw; 720f9790aebSLuigi Rizzo 7214bf50f18SLuigi Rizzo error = netmap_get_hw_na(ifp, &hw); 7224bf50f18SLuigi Rizzo if (error || hw == NULL) 723f9790aebSLuigi Rizzo goto out; 724f9790aebSLuigi Rizzo 7254bf50f18SLuigi Rizzo /* host adapter might not be created */ 7264bf50f18SLuigi Rizzo error = hw->nm_bdg_attach(nr_name, hw); 7274bf50f18SLuigi Rizzo if (error) 728f9790aebSLuigi Rizzo goto out; 7294bf50f18SLuigi Rizzo vpna = hw->na_vp; 7304bf50f18SLuigi Rizzo hostna = hw->na_hostvp; 7314bf50f18SLuigi Rizzo if (nmr->nr_arg1 != NETMAP_BDG_HOST) 7324bf50f18SLuigi Rizzo hostna = NULL; 733f9790aebSLuigi Rizzo } 734f9790aebSLuigi Rizzo 735f9790aebSLuigi Rizzo BDG_WLOCK(b); 736f9790aebSLuigi Rizzo vpna->bdg_port = cand; 737f9790aebSLuigi Rizzo ND("NIC %p to bridge port %d", vpna, cand); 738f9790aebSLuigi Rizzo /* bind the port to the bridge (virtual ports are not active) */ 739f9790aebSLuigi Rizzo b->bdg_ports[cand] = vpna; 740f9790aebSLuigi Rizzo vpna->na_bdg = b; 741f9790aebSLuigi Rizzo b->bdg_active_ports++; 7424bf50f18SLuigi Rizzo if (hostna != NULL) { 743f9790aebSLuigi Rizzo /* also bind the host stack to the bridge */ 744f9790aebSLuigi Rizzo b->bdg_ports[cand2] = hostna; 745f9790aebSLuigi Rizzo hostna->bdg_port = cand2; 746f9790aebSLuigi Rizzo hostna->na_bdg = b; 747f9790aebSLuigi Rizzo b->bdg_active_ports++; 748f9790aebSLuigi Rizzo ND("host %p to bridge port %d", hostna, cand2); 749f9790aebSLuigi Rizzo } 7504bf50f18SLuigi Rizzo ND("if %s refs %d", ifname, vpna->up.na_refcount); 751f9790aebSLuigi Rizzo BDG_WUNLOCK(b); 7524bf50f18SLuigi Rizzo *na = &vpna->up; 7534bf50f18SLuigi Rizzo netmap_adapter_get(*na); 754f9790aebSLuigi Rizzo return 0; 755f9790aebSLuigi Rizzo 756f9790aebSLuigi Rizzo out: 757f9790aebSLuigi Rizzo if_rele(ifp); 758f9790aebSLuigi Rizzo 759f9790aebSLuigi Rizzo return error; 760f9790aebSLuigi Rizzo } 761f9790aebSLuigi Rizzo 762f9790aebSLuigi Rizzo 7634bf50f18SLuigi Rizzo /* Process NETMAP_BDG_ATTACH */ 764f9790aebSLuigi Rizzo static int 7654bf50f18SLuigi Rizzo nm_bdg_ctl_attach(struct nmreq *nmr) 766f9790aebSLuigi Rizzo { 767f9790aebSLuigi Rizzo struct netmap_adapter *na; 768f9790aebSLuigi Rizzo int error; 769f9790aebSLuigi Rizzo 770f9790aebSLuigi Rizzo NMG_LOCK(); 771f2637526SLuigi Rizzo 77217885a7bSLuigi Rizzo error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */); 7734bf50f18SLuigi Rizzo if (error) /* no device */ 774f9790aebSLuigi Rizzo goto unlock_exit; 775f2637526SLuigi Rizzo 77617885a7bSLuigi Rizzo if (na == NULL) { /* VALE prefix missing */ 777f9790aebSLuigi Rizzo error = EINVAL; 77817885a7bSLuigi Rizzo goto unlock_exit; 779f9790aebSLuigi Rizzo } 780f9790aebSLuigi Rizzo 7814bf50f18SLuigi Rizzo if (NETMAP_OWNED_BY_ANY(na)) { 782f9790aebSLuigi Rizzo error = EBUSY; 783f9790aebSLuigi Rizzo goto unref_exit; 784f9790aebSLuigi Rizzo } 785f9790aebSLuigi Rizzo 7864bf50f18SLuigi Rizzo if (na->nm_bdg_ctl) { 7874bf50f18SLuigi Rizzo /* nop for VALE ports. The bwrap needs to put the hwna 7884bf50f18SLuigi Rizzo * in netmap mode (see netmap_bwrap_bdg_ctl) 7894bf50f18SLuigi Rizzo */ 7904bf50f18SLuigi Rizzo error = na->nm_bdg_ctl(na, nmr, 1); 7914bf50f18SLuigi Rizzo if (error) 792f9790aebSLuigi Rizzo goto unref_exit; 7934bf50f18SLuigi Rizzo ND("registered %s to netmap-mode", na->name); 794f9790aebSLuigi Rizzo } 795f9790aebSLuigi Rizzo NMG_UNLOCK(); 796f9790aebSLuigi Rizzo return 0; 797f9790aebSLuigi Rizzo 798f9790aebSLuigi Rizzo unref_exit: 799f9790aebSLuigi Rizzo netmap_adapter_put(na); 800f9790aebSLuigi Rizzo unlock_exit: 801f9790aebSLuigi Rizzo NMG_UNLOCK(); 802f9790aebSLuigi Rizzo return error; 803f9790aebSLuigi Rizzo } 804f9790aebSLuigi Rizzo 805*37e3a6d3SLuigi Rizzo static inline int 806*37e3a6d3SLuigi Rizzo nm_is_bwrap(struct netmap_adapter *na) 807*37e3a6d3SLuigi Rizzo { 808*37e3a6d3SLuigi Rizzo return na->nm_register == netmap_bwrap_reg; 809*37e3a6d3SLuigi Rizzo } 81017885a7bSLuigi Rizzo 8114bf50f18SLuigi Rizzo /* process NETMAP_BDG_DETACH */ 812f9790aebSLuigi Rizzo static int 8134bf50f18SLuigi Rizzo nm_bdg_ctl_detach(struct nmreq *nmr) 814f9790aebSLuigi Rizzo { 815f9790aebSLuigi Rizzo struct netmap_adapter *na; 816f9790aebSLuigi Rizzo int error; 817f9790aebSLuigi Rizzo 818f9790aebSLuigi Rizzo NMG_LOCK(); 81917885a7bSLuigi Rizzo error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */); 820f9790aebSLuigi Rizzo if (error) { /* no device, or another bridge or user owns the device */ 821f9790aebSLuigi Rizzo goto unlock_exit; 822f9790aebSLuigi Rizzo } 823f2637526SLuigi Rizzo 82417885a7bSLuigi Rizzo if (na == NULL) { /* VALE prefix missing */ 825f9790aebSLuigi Rizzo error = EINVAL; 82617885a7bSLuigi Rizzo goto unlock_exit; 827*37e3a6d3SLuigi Rizzo } else if (nm_is_bwrap(na) && 828*37e3a6d3SLuigi Rizzo ((struct netmap_bwrap_adapter *)na)->na_polling_state) { 829*37e3a6d3SLuigi Rizzo /* Don't detach a NIC with polling */ 830*37e3a6d3SLuigi Rizzo error = EBUSY; 831*37e3a6d3SLuigi Rizzo netmap_adapter_put(na); 832*37e3a6d3SLuigi Rizzo goto unlock_exit; 833f9790aebSLuigi Rizzo } 8344bf50f18SLuigi Rizzo if (na->nm_bdg_ctl) { 8354bf50f18SLuigi Rizzo /* remove the port from bridge. The bwrap 8364bf50f18SLuigi Rizzo * also needs to put the hwna in normal mode 8374bf50f18SLuigi Rizzo */ 8384bf50f18SLuigi Rizzo error = na->nm_bdg_ctl(na, nmr, 0); 839f9790aebSLuigi Rizzo } 840f9790aebSLuigi Rizzo 841f9790aebSLuigi Rizzo netmap_adapter_put(na); 842f9790aebSLuigi Rizzo unlock_exit: 843f9790aebSLuigi Rizzo NMG_UNLOCK(); 844f9790aebSLuigi Rizzo return error; 845f9790aebSLuigi Rizzo 846f9790aebSLuigi Rizzo } 847f9790aebSLuigi Rizzo 848*37e3a6d3SLuigi Rizzo struct nm_bdg_polling_state; 849*37e3a6d3SLuigi Rizzo struct 850*37e3a6d3SLuigi Rizzo nm_bdg_kthread { 851*37e3a6d3SLuigi Rizzo struct nm_kthread *nmk; 852*37e3a6d3SLuigi Rizzo u_int qfirst; 853*37e3a6d3SLuigi Rizzo u_int qlast; 854*37e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps; 855*37e3a6d3SLuigi Rizzo }; 856*37e3a6d3SLuigi Rizzo 857*37e3a6d3SLuigi Rizzo struct nm_bdg_polling_state { 858*37e3a6d3SLuigi Rizzo bool configured; 859*37e3a6d3SLuigi Rizzo bool stopped; 860*37e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna; 861*37e3a6d3SLuigi Rizzo u_int reg; 862*37e3a6d3SLuigi Rizzo u_int qfirst; 863*37e3a6d3SLuigi Rizzo u_int qlast; 864*37e3a6d3SLuigi Rizzo u_int cpu_from; 865*37e3a6d3SLuigi Rizzo u_int ncpus; 866*37e3a6d3SLuigi Rizzo struct nm_bdg_kthread *kthreads; 867*37e3a6d3SLuigi Rizzo }; 868*37e3a6d3SLuigi Rizzo 869*37e3a6d3SLuigi Rizzo static void 870*37e3a6d3SLuigi Rizzo netmap_bwrap_polling(void *data) 871*37e3a6d3SLuigi Rizzo { 872*37e3a6d3SLuigi Rizzo struct nm_bdg_kthread *nbk = data; 873*37e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna; 874*37e3a6d3SLuigi Rizzo u_int qfirst, qlast, i; 875*37e3a6d3SLuigi Rizzo struct netmap_kring *kring0, *kring; 876*37e3a6d3SLuigi Rizzo 877*37e3a6d3SLuigi Rizzo if (!nbk) 878*37e3a6d3SLuigi Rizzo return; 879*37e3a6d3SLuigi Rizzo qfirst = nbk->qfirst; 880*37e3a6d3SLuigi Rizzo qlast = nbk->qlast; 881*37e3a6d3SLuigi Rizzo bna = nbk->bps->bna; 882*37e3a6d3SLuigi Rizzo kring0 = NMR(bna->hwna, NR_RX); 883*37e3a6d3SLuigi Rizzo 884*37e3a6d3SLuigi Rizzo for (i = qfirst; i < qlast; i++) { 885*37e3a6d3SLuigi Rizzo kring = kring0 + i; 886*37e3a6d3SLuigi Rizzo kring->nm_notify(kring, 0); 887*37e3a6d3SLuigi Rizzo } 888*37e3a6d3SLuigi Rizzo } 889*37e3a6d3SLuigi Rizzo 890*37e3a6d3SLuigi Rizzo static int 891*37e3a6d3SLuigi Rizzo nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) 892*37e3a6d3SLuigi Rizzo { 893*37e3a6d3SLuigi Rizzo struct nm_kthread_cfg kcfg; 894*37e3a6d3SLuigi Rizzo int i, j; 895*37e3a6d3SLuigi Rizzo 896*37e3a6d3SLuigi Rizzo bps->kthreads = malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus, 897*37e3a6d3SLuigi Rizzo M_DEVBUF, M_NOWAIT | M_ZERO); 898*37e3a6d3SLuigi Rizzo if (bps->kthreads == NULL) 899*37e3a6d3SLuigi Rizzo return ENOMEM; 900*37e3a6d3SLuigi Rizzo 901*37e3a6d3SLuigi Rizzo bzero(&kcfg, sizeof(kcfg)); 902*37e3a6d3SLuigi Rizzo kcfg.worker_fn = netmap_bwrap_polling; 903*37e3a6d3SLuigi Rizzo for (i = 0; i < bps->ncpus; i++) { 904*37e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 905*37e3a6d3SLuigi Rizzo int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC); 906*37e3a6d3SLuigi Rizzo int affinity = bps->cpu_from + i; 907*37e3a6d3SLuigi Rizzo 908*37e3a6d3SLuigi Rizzo t->bps = bps; 909*37e3a6d3SLuigi Rizzo t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; 910*37e3a6d3SLuigi Rizzo t->qlast = all ? bps->qlast : t->qfirst + 1; 911*37e3a6d3SLuigi Rizzo D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, 912*37e3a6d3SLuigi Rizzo t->qlast); 913*37e3a6d3SLuigi Rizzo 914*37e3a6d3SLuigi Rizzo kcfg.type = i; 915*37e3a6d3SLuigi Rizzo kcfg.worker_private = t; 916*37e3a6d3SLuigi Rizzo t->nmk = nm_os_kthread_create(&kcfg); 917*37e3a6d3SLuigi Rizzo if (t->nmk == NULL) { 918*37e3a6d3SLuigi Rizzo goto cleanup; 919*37e3a6d3SLuigi Rizzo } 920*37e3a6d3SLuigi Rizzo nm_os_kthread_set_affinity(t->nmk, affinity); 921*37e3a6d3SLuigi Rizzo } 922*37e3a6d3SLuigi Rizzo return 0; 923*37e3a6d3SLuigi Rizzo 924*37e3a6d3SLuigi Rizzo cleanup: 925*37e3a6d3SLuigi Rizzo for (j = 0; j < i; j++) { 926*37e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 927*37e3a6d3SLuigi Rizzo nm_os_kthread_delete(t->nmk); 928*37e3a6d3SLuigi Rizzo } 929*37e3a6d3SLuigi Rizzo free(bps->kthreads, M_DEVBUF); 930*37e3a6d3SLuigi Rizzo return EFAULT; 931*37e3a6d3SLuigi Rizzo } 932*37e3a6d3SLuigi Rizzo 933*37e3a6d3SLuigi Rizzo /* a version of ptnetmap_start_kthreads() */ 934*37e3a6d3SLuigi Rizzo static int 935*37e3a6d3SLuigi Rizzo nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) 936*37e3a6d3SLuigi Rizzo { 937*37e3a6d3SLuigi Rizzo int error, i, j; 938*37e3a6d3SLuigi Rizzo 939*37e3a6d3SLuigi Rizzo if (!bps) { 940*37e3a6d3SLuigi Rizzo D("polling is not configured"); 941*37e3a6d3SLuigi Rizzo return EFAULT; 942*37e3a6d3SLuigi Rizzo } 943*37e3a6d3SLuigi Rizzo bps->stopped = false; 944*37e3a6d3SLuigi Rizzo 945*37e3a6d3SLuigi Rizzo for (i = 0; i < bps->ncpus; i++) { 946*37e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 947*37e3a6d3SLuigi Rizzo error = nm_os_kthread_start(t->nmk); 948*37e3a6d3SLuigi Rizzo if (error) { 949*37e3a6d3SLuigi Rizzo D("error in nm_kthread_start()"); 950*37e3a6d3SLuigi Rizzo goto cleanup; 951*37e3a6d3SLuigi Rizzo } 952*37e3a6d3SLuigi Rizzo } 953*37e3a6d3SLuigi Rizzo return 0; 954*37e3a6d3SLuigi Rizzo 955*37e3a6d3SLuigi Rizzo cleanup: 956*37e3a6d3SLuigi Rizzo for (j = 0; j < i; j++) { 957*37e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 958*37e3a6d3SLuigi Rizzo nm_os_kthread_stop(t->nmk); 959*37e3a6d3SLuigi Rizzo } 960*37e3a6d3SLuigi Rizzo bps->stopped = true; 961*37e3a6d3SLuigi Rizzo return error; 962*37e3a6d3SLuigi Rizzo } 963*37e3a6d3SLuigi Rizzo 964*37e3a6d3SLuigi Rizzo static void 965*37e3a6d3SLuigi Rizzo nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) 966*37e3a6d3SLuigi Rizzo { 967*37e3a6d3SLuigi Rizzo int i; 968*37e3a6d3SLuigi Rizzo 969*37e3a6d3SLuigi Rizzo if (!bps) 970*37e3a6d3SLuigi Rizzo return; 971*37e3a6d3SLuigi Rizzo 972*37e3a6d3SLuigi Rizzo for (i = 0; i < bps->ncpus; i++) { 973*37e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 974*37e3a6d3SLuigi Rizzo nm_os_kthread_stop(t->nmk); 975*37e3a6d3SLuigi Rizzo nm_os_kthread_delete(t->nmk); 976*37e3a6d3SLuigi Rizzo } 977*37e3a6d3SLuigi Rizzo bps->stopped = true; 978*37e3a6d3SLuigi Rizzo } 979*37e3a6d3SLuigi Rizzo 980*37e3a6d3SLuigi Rizzo static int 981*37e3a6d3SLuigi Rizzo get_polling_cfg(struct nmreq *nmr, struct netmap_adapter *na, 982*37e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps) 983*37e3a6d3SLuigi Rizzo { 984*37e3a6d3SLuigi Rizzo int req_cpus, avail_cpus, core_from; 985*37e3a6d3SLuigi Rizzo u_int reg, i, qfirst, qlast; 986*37e3a6d3SLuigi Rizzo 987*37e3a6d3SLuigi Rizzo avail_cpus = nm_os_ncpus(); 988*37e3a6d3SLuigi Rizzo req_cpus = nmr->nr_arg1; 989*37e3a6d3SLuigi Rizzo 990*37e3a6d3SLuigi Rizzo if (req_cpus == 0) { 991*37e3a6d3SLuigi Rizzo D("req_cpus must be > 0"); 992*37e3a6d3SLuigi Rizzo return EINVAL; 993*37e3a6d3SLuigi Rizzo } else if (req_cpus >= avail_cpus) { 994*37e3a6d3SLuigi Rizzo D("for safety, we need at least one core left in the system"); 995*37e3a6d3SLuigi Rizzo return EINVAL; 996*37e3a6d3SLuigi Rizzo } 997*37e3a6d3SLuigi Rizzo reg = nmr->nr_flags & NR_REG_MASK; 998*37e3a6d3SLuigi Rizzo i = nmr->nr_ringid & NETMAP_RING_MASK; 999*37e3a6d3SLuigi Rizzo /* 1000*37e3a6d3SLuigi Rizzo * ONE_NIC: dedicate one core to one ring. If multiple cores 1001*37e3a6d3SLuigi Rizzo * are specified, consecutive rings are also polled. 1002*37e3a6d3SLuigi Rizzo * For example, if ringid=2 and 2 cores are given, 1003*37e3a6d3SLuigi Rizzo * ring 2 and 3 are polled by core 2 and 3, respectively. 1004*37e3a6d3SLuigi Rizzo * ALL_NIC: poll all the rings using a core specified by ringid. 1005*37e3a6d3SLuigi Rizzo * the number of cores must be 1. 1006*37e3a6d3SLuigi Rizzo */ 1007*37e3a6d3SLuigi Rizzo if (reg == NR_REG_ONE_NIC) { 1008*37e3a6d3SLuigi Rizzo if (i + req_cpus > nma_get_nrings(na, NR_RX)) { 1009*37e3a6d3SLuigi Rizzo D("only %d rings exist (ring %u-%u is given)", 1010*37e3a6d3SLuigi Rizzo nma_get_nrings(na, NR_RX), i, i+req_cpus); 1011*37e3a6d3SLuigi Rizzo return EINVAL; 1012*37e3a6d3SLuigi Rizzo } 1013*37e3a6d3SLuigi Rizzo qfirst = i; 1014*37e3a6d3SLuigi Rizzo qlast = qfirst + req_cpus; 1015*37e3a6d3SLuigi Rizzo core_from = qfirst; 1016*37e3a6d3SLuigi Rizzo } else if (reg == NR_REG_ALL_NIC) { 1017*37e3a6d3SLuigi Rizzo if (req_cpus != 1) { 1018*37e3a6d3SLuigi Rizzo D("ncpus must be 1 not %d for REG_ALL_NIC", req_cpus); 1019*37e3a6d3SLuigi Rizzo return EINVAL; 1020*37e3a6d3SLuigi Rizzo } 1021*37e3a6d3SLuigi Rizzo qfirst = 0; 1022*37e3a6d3SLuigi Rizzo qlast = nma_get_nrings(na, NR_RX); 1023*37e3a6d3SLuigi Rizzo core_from = i; 1024*37e3a6d3SLuigi Rizzo } else { 1025*37e3a6d3SLuigi Rizzo D("reg must be ALL_NIC or ONE_NIC"); 1026*37e3a6d3SLuigi Rizzo return EINVAL; 1027*37e3a6d3SLuigi Rizzo } 1028*37e3a6d3SLuigi Rizzo 1029*37e3a6d3SLuigi Rizzo bps->reg = reg; 1030*37e3a6d3SLuigi Rizzo bps->qfirst = qfirst; 1031*37e3a6d3SLuigi Rizzo bps->qlast = qlast; 1032*37e3a6d3SLuigi Rizzo bps->cpu_from = core_from; 1033*37e3a6d3SLuigi Rizzo bps->ncpus = req_cpus; 1034*37e3a6d3SLuigi Rizzo D("%s qfirst %u qlast %u cpu_from %u ncpus %u", 1035*37e3a6d3SLuigi Rizzo reg == NR_REG_ALL_NIC ? "REG_ALL_NIC" : "REG_ONE_NIC", 1036*37e3a6d3SLuigi Rizzo qfirst, qlast, core_from, req_cpus); 1037*37e3a6d3SLuigi Rizzo return 0; 1038*37e3a6d3SLuigi Rizzo } 1039*37e3a6d3SLuigi Rizzo 1040*37e3a6d3SLuigi Rizzo static int 1041*37e3a6d3SLuigi Rizzo nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) 1042*37e3a6d3SLuigi Rizzo { 1043*37e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps; 1044*37e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna; 1045*37e3a6d3SLuigi Rizzo int error; 1046*37e3a6d3SLuigi Rizzo 1047*37e3a6d3SLuigi Rizzo bna = (struct netmap_bwrap_adapter *)na; 1048*37e3a6d3SLuigi Rizzo if (bna->na_polling_state) { 1049*37e3a6d3SLuigi Rizzo D("ERROR adapter already in polling mode"); 1050*37e3a6d3SLuigi Rizzo return EFAULT; 1051*37e3a6d3SLuigi Rizzo } 1052*37e3a6d3SLuigi Rizzo 1053*37e3a6d3SLuigi Rizzo bps = malloc(sizeof(*bps), M_DEVBUF, M_NOWAIT | M_ZERO); 1054*37e3a6d3SLuigi Rizzo if (!bps) 1055*37e3a6d3SLuigi Rizzo return ENOMEM; 1056*37e3a6d3SLuigi Rizzo bps->configured = false; 1057*37e3a6d3SLuigi Rizzo bps->stopped = true; 1058*37e3a6d3SLuigi Rizzo 1059*37e3a6d3SLuigi Rizzo if (get_polling_cfg(nmr, na, bps)) { 1060*37e3a6d3SLuigi Rizzo free(bps, M_DEVBUF); 1061*37e3a6d3SLuigi Rizzo return EINVAL; 1062*37e3a6d3SLuigi Rizzo } 1063*37e3a6d3SLuigi Rizzo 1064*37e3a6d3SLuigi Rizzo if (nm_bdg_create_kthreads(bps)) { 1065*37e3a6d3SLuigi Rizzo free(bps, M_DEVBUF); 1066*37e3a6d3SLuigi Rizzo return EFAULT; 1067*37e3a6d3SLuigi Rizzo } 1068*37e3a6d3SLuigi Rizzo 1069*37e3a6d3SLuigi Rizzo bps->configured = true; 1070*37e3a6d3SLuigi Rizzo bna->na_polling_state = bps; 1071*37e3a6d3SLuigi Rizzo bps->bna = bna; 1072*37e3a6d3SLuigi Rizzo 1073*37e3a6d3SLuigi Rizzo /* disable interrupt if possible */ 1074*37e3a6d3SLuigi Rizzo if (bna->hwna->nm_intr) 1075*37e3a6d3SLuigi Rizzo bna->hwna->nm_intr(bna->hwna, 0); 1076*37e3a6d3SLuigi Rizzo /* start kthread now */ 1077*37e3a6d3SLuigi Rizzo error = nm_bdg_polling_start_kthreads(bps); 1078*37e3a6d3SLuigi Rizzo if (error) { 1079*37e3a6d3SLuigi Rizzo D("ERROR nm_bdg_polling_start_kthread()"); 1080*37e3a6d3SLuigi Rizzo free(bps->kthreads, M_DEVBUF); 1081*37e3a6d3SLuigi Rizzo free(bps, M_DEVBUF); 1082*37e3a6d3SLuigi Rizzo bna->na_polling_state = NULL; 1083*37e3a6d3SLuigi Rizzo if (bna->hwna->nm_intr) 1084*37e3a6d3SLuigi Rizzo bna->hwna->nm_intr(bna->hwna, 1); 1085*37e3a6d3SLuigi Rizzo } 1086*37e3a6d3SLuigi Rizzo return error; 1087*37e3a6d3SLuigi Rizzo } 1088*37e3a6d3SLuigi Rizzo 1089*37e3a6d3SLuigi Rizzo static int 1090*37e3a6d3SLuigi Rizzo nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na) 1091*37e3a6d3SLuigi Rizzo { 1092*37e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; 1093*37e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps; 1094*37e3a6d3SLuigi Rizzo 1095*37e3a6d3SLuigi Rizzo if (!bna->na_polling_state) { 1096*37e3a6d3SLuigi Rizzo D("ERROR adapter is not in polling mode"); 1097*37e3a6d3SLuigi Rizzo return EFAULT; 1098*37e3a6d3SLuigi Rizzo } 1099*37e3a6d3SLuigi Rizzo bps = bna->na_polling_state; 1100*37e3a6d3SLuigi Rizzo nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); 1101*37e3a6d3SLuigi Rizzo bps->configured = false; 1102*37e3a6d3SLuigi Rizzo free(bps, M_DEVBUF); 1103*37e3a6d3SLuigi Rizzo bna->na_polling_state = NULL; 1104*37e3a6d3SLuigi Rizzo /* reenable interrupt */ 1105*37e3a6d3SLuigi Rizzo if (bna->hwna->nm_intr) 1106*37e3a6d3SLuigi Rizzo bna->hwna->nm_intr(bna->hwna, 1); 1107*37e3a6d3SLuigi Rizzo return 0; 1108*37e3a6d3SLuigi Rizzo } 1109f9790aebSLuigi Rizzo 11104bf50f18SLuigi Rizzo /* Called by either user's context (netmap_ioctl()) 11114bf50f18SLuigi Rizzo * or external kernel modules (e.g., Openvswitch). 11124bf50f18SLuigi Rizzo * Operation is indicated in nmr->nr_cmd. 11134bf50f18SLuigi Rizzo * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge 11144bf50f18SLuigi Rizzo * requires bdg_ops argument; the other commands ignore this argument. 11154bf50f18SLuigi Rizzo * 1116f9790aebSLuigi Rizzo * Called without NMG_LOCK. 1117f9790aebSLuigi Rizzo */ 1118f9790aebSLuigi Rizzo int 11194bf50f18SLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) 1120f9790aebSLuigi Rizzo { 1121847bf383SLuigi Rizzo struct nm_bridge *b, *bridges; 1122f9790aebSLuigi Rizzo struct netmap_adapter *na; 1123f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna; 1124f9790aebSLuigi Rizzo char *name = nmr->nr_name; 1125f9790aebSLuigi Rizzo int cmd = nmr->nr_cmd, namelen = strlen(name); 1126f9790aebSLuigi Rizzo int error = 0, i, j; 1127847bf383SLuigi Rizzo u_int num_bridges; 1128847bf383SLuigi Rizzo 1129847bf383SLuigi Rizzo netmap_bns_getbridges(&bridges, &num_bridges); 1130f9790aebSLuigi Rizzo 1131f9790aebSLuigi Rizzo switch (cmd) { 11324bf50f18SLuigi Rizzo case NETMAP_BDG_NEWIF: 11334bf50f18SLuigi Rizzo error = nm_vi_create(nmr); 11344bf50f18SLuigi Rizzo break; 11354bf50f18SLuigi Rizzo 11364bf50f18SLuigi Rizzo case NETMAP_BDG_DELIF: 11374bf50f18SLuigi Rizzo error = nm_vi_destroy(nmr->nr_name); 11384bf50f18SLuigi Rizzo break; 11394bf50f18SLuigi Rizzo 1140f9790aebSLuigi Rizzo case NETMAP_BDG_ATTACH: 11414bf50f18SLuigi Rizzo error = nm_bdg_ctl_attach(nmr); 1142f9790aebSLuigi Rizzo break; 1143f9790aebSLuigi Rizzo 1144f9790aebSLuigi Rizzo case NETMAP_BDG_DETACH: 11454bf50f18SLuigi Rizzo error = nm_bdg_ctl_detach(nmr); 1146f9790aebSLuigi Rizzo break; 1147f9790aebSLuigi Rizzo 1148f9790aebSLuigi Rizzo case NETMAP_BDG_LIST: 1149f9790aebSLuigi Rizzo /* this is used to enumerate bridges and ports */ 1150f9790aebSLuigi Rizzo if (namelen) { /* look up indexes of bridge and port */ 1151*37e3a6d3SLuigi Rizzo if (strncmp(name, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 1152f9790aebSLuigi Rizzo error = EINVAL; 1153f9790aebSLuigi Rizzo break; 1154f9790aebSLuigi Rizzo } 1155f9790aebSLuigi Rizzo NMG_LOCK(); 1156f9790aebSLuigi Rizzo b = nm_find_bridge(name, 0 /* don't create */); 1157f9790aebSLuigi Rizzo if (!b) { 1158f9790aebSLuigi Rizzo error = ENOENT; 1159f9790aebSLuigi Rizzo NMG_UNLOCK(); 1160f9790aebSLuigi Rizzo break; 1161f9790aebSLuigi Rizzo } 1162f9790aebSLuigi Rizzo 1163*37e3a6d3SLuigi Rizzo error = 0; 1164*37e3a6d3SLuigi Rizzo nmr->nr_arg1 = b - bridges; /* bridge index */ 1165*37e3a6d3SLuigi Rizzo nmr->nr_arg2 = NM_BDG_NOPORT; 1166f9790aebSLuigi Rizzo for (j = 0; j < b->bdg_active_ports; j++) { 1167f9790aebSLuigi Rizzo i = b->bdg_port_index[j]; 1168f9790aebSLuigi Rizzo vpna = b->bdg_ports[i]; 1169f9790aebSLuigi Rizzo if (vpna == NULL) { 1170f9790aebSLuigi Rizzo D("---AAAAAAAAARGH-------"); 1171f9790aebSLuigi Rizzo continue; 1172f9790aebSLuigi Rizzo } 1173f9790aebSLuigi Rizzo /* the former and the latter identify a 1174f9790aebSLuigi Rizzo * virtual port and a NIC, respectively 1175f9790aebSLuigi Rizzo */ 11764bf50f18SLuigi Rizzo if (!strcmp(vpna->up.name, name)) { 1177f9790aebSLuigi Rizzo nmr->nr_arg2 = i; /* port index */ 1178f9790aebSLuigi Rizzo break; 1179f9790aebSLuigi Rizzo } 1180f9790aebSLuigi Rizzo } 1181f9790aebSLuigi Rizzo NMG_UNLOCK(); 1182f9790aebSLuigi Rizzo } else { 1183f9790aebSLuigi Rizzo /* return the first non-empty entry starting from 1184f9790aebSLuigi Rizzo * bridge nr_arg1 and port nr_arg2. 1185f9790aebSLuigi Rizzo * 1186f9790aebSLuigi Rizzo * Users can detect the end of the same bridge by 1187f9790aebSLuigi Rizzo * seeing the new and old value of nr_arg1, and can 1188f9790aebSLuigi Rizzo * detect the end of all the bridge by error != 0 1189f9790aebSLuigi Rizzo */ 1190f9790aebSLuigi Rizzo i = nmr->nr_arg1; 1191f9790aebSLuigi Rizzo j = nmr->nr_arg2; 1192f9790aebSLuigi Rizzo 1193f9790aebSLuigi Rizzo NMG_LOCK(); 1194f9790aebSLuigi Rizzo for (error = ENOENT; i < NM_BRIDGES; i++) { 1195847bf383SLuigi Rizzo b = bridges + i; 1196f9790aebSLuigi Rizzo if (j >= b->bdg_active_ports) { 1197f9790aebSLuigi Rizzo j = 0; /* following bridges scan from 0 */ 1198f9790aebSLuigi Rizzo continue; 1199f9790aebSLuigi Rizzo } 1200f9790aebSLuigi Rizzo nmr->nr_arg1 = i; 1201f9790aebSLuigi Rizzo nmr->nr_arg2 = j; 1202f9790aebSLuigi Rizzo j = b->bdg_port_index[j]; 1203f9790aebSLuigi Rizzo vpna = b->bdg_ports[j]; 12044bf50f18SLuigi Rizzo strncpy(name, vpna->up.name, (size_t)IFNAMSIZ); 1205f9790aebSLuigi Rizzo error = 0; 1206f9790aebSLuigi Rizzo break; 1207f9790aebSLuigi Rizzo } 1208f9790aebSLuigi Rizzo NMG_UNLOCK(); 1209f9790aebSLuigi Rizzo } 1210f9790aebSLuigi Rizzo break; 1211f9790aebSLuigi Rizzo 12124bf50f18SLuigi Rizzo case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */ 12134bf50f18SLuigi Rizzo /* register callbacks to the given bridge. 1214f9790aebSLuigi Rizzo * nmr->nr_name may be just bridge's name (including ':' 1215f9790aebSLuigi Rizzo * if it is not just NM_NAME). 1216f9790aebSLuigi Rizzo */ 12174bf50f18SLuigi Rizzo if (!bdg_ops) { 1218f9790aebSLuigi Rizzo error = EINVAL; 1219f9790aebSLuigi Rizzo break; 1220f9790aebSLuigi Rizzo } 1221f9790aebSLuigi Rizzo NMG_LOCK(); 1222f9790aebSLuigi Rizzo b = nm_find_bridge(name, 0 /* don't create */); 1223f9790aebSLuigi Rizzo if (!b) { 1224f9790aebSLuigi Rizzo error = EINVAL; 1225f9790aebSLuigi Rizzo } else { 12264bf50f18SLuigi Rizzo b->bdg_ops = *bdg_ops; 1227f9790aebSLuigi Rizzo } 1228f9790aebSLuigi Rizzo NMG_UNLOCK(); 1229f9790aebSLuigi Rizzo break; 1230f9790aebSLuigi Rizzo 1231f0ea3689SLuigi Rizzo case NETMAP_BDG_VNET_HDR: 1232f0ea3689SLuigi Rizzo /* Valid lengths for the virtio-net header are 0 (no header), 1233f0ea3689SLuigi Rizzo 10 and 12. */ 1234f0ea3689SLuigi Rizzo if (nmr->nr_arg1 != 0 && 1235f0ea3689SLuigi Rizzo nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) && 1236f0ea3689SLuigi Rizzo nmr->nr_arg1 != 12) { 1237f0ea3689SLuigi Rizzo error = EINVAL; 1238f0ea3689SLuigi Rizzo break; 1239f0ea3689SLuigi Rizzo } 1240f9790aebSLuigi Rizzo NMG_LOCK(); 1241f9790aebSLuigi Rizzo error = netmap_get_bdg_na(nmr, &na, 0); 124217885a7bSLuigi Rizzo if (na && !error) { 1243f9790aebSLuigi Rizzo vpna = (struct netmap_vp_adapter *)na; 1244*37e3a6d3SLuigi Rizzo na->virt_hdr_len = nmr->nr_arg1; 1245*37e3a6d3SLuigi Rizzo if (na->virt_hdr_len) { 12464bf50f18SLuigi Rizzo vpna->mfs = NETMAP_BUF_SIZE(na); 1247*37e3a6d3SLuigi Rizzo } 1248*37e3a6d3SLuigi Rizzo D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na); 1249*37e3a6d3SLuigi Rizzo netmap_adapter_put(na); 1250*37e3a6d3SLuigi Rizzo } else if (!na) { 1251*37e3a6d3SLuigi Rizzo error = ENXIO; 1252*37e3a6d3SLuigi Rizzo } 1253*37e3a6d3SLuigi Rizzo NMG_UNLOCK(); 1254*37e3a6d3SLuigi Rizzo break; 1255*37e3a6d3SLuigi Rizzo 1256*37e3a6d3SLuigi Rizzo case NETMAP_BDG_POLLING_ON: 1257*37e3a6d3SLuigi Rizzo case NETMAP_BDG_POLLING_OFF: 1258*37e3a6d3SLuigi Rizzo NMG_LOCK(); 1259*37e3a6d3SLuigi Rizzo error = netmap_get_bdg_na(nmr, &na, 0); 1260*37e3a6d3SLuigi Rizzo if (na && !error) { 1261*37e3a6d3SLuigi Rizzo if (!nm_is_bwrap(na)) { 1262*37e3a6d3SLuigi Rizzo error = EOPNOTSUPP; 1263*37e3a6d3SLuigi Rizzo } else if (cmd == NETMAP_BDG_POLLING_ON) { 1264*37e3a6d3SLuigi Rizzo error = nm_bdg_ctl_polling_start(nmr, na); 1265*37e3a6d3SLuigi Rizzo if (!error) 1266*37e3a6d3SLuigi Rizzo netmap_adapter_get(na); 1267*37e3a6d3SLuigi Rizzo } else { 1268*37e3a6d3SLuigi Rizzo error = nm_bdg_ctl_polling_stop(nmr, na); 1269*37e3a6d3SLuigi Rizzo if (!error) 1270*37e3a6d3SLuigi Rizzo netmap_adapter_put(na); 1271*37e3a6d3SLuigi Rizzo } 127217885a7bSLuigi Rizzo netmap_adapter_put(na); 1273f9790aebSLuigi Rizzo } 1274f9790aebSLuigi Rizzo NMG_UNLOCK(); 1275f9790aebSLuigi Rizzo break; 1276f9790aebSLuigi Rizzo 1277f9790aebSLuigi Rizzo default: 1278f9790aebSLuigi Rizzo D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 1279f9790aebSLuigi Rizzo error = EINVAL; 1280f9790aebSLuigi Rizzo break; 1281f9790aebSLuigi Rizzo } 1282f9790aebSLuigi Rizzo return error; 1283f9790aebSLuigi Rizzo } 1284f9790aebSLuigi Rizzo 12854bf50f18SLuigi Rizzo int 12864bf50f18SLuigi Rizzo netmap_bdg_config(struct nmreq *nmr) 12874bf50f18SLuigi Rizzo { 12884bf50f18SLuigi Rizzo struct nm_bridge *b; 12894bf50f18SLuigi Rizzo int error = EINVAL; 12904bf50f18SLuigi Rizzo 12914bf50f18SLuigi Rizzo NMG_LOCK(); 12924bf50f18SLuigi Rizzo b = nm_find_bridge(nmr->nr_name, 0); 12934bf50f18SLuigi Rizzo if (!b) { 12944bf50f18SLuigi Rizzo NMG_UNLOCK(); 12954bf50f18SLuigi Rizzo return error; 12964bf50f18SLuigi Rizzo } 12974bf50f18SLuigi Rizzo NMG_UNLOCK(); 12984bf50f18SLuigi Rizzo /* Don't call config() with NMG_LOCK() held */ 12994bf50f18SLuigi Rizzo BDG_RLOCK(b); 13004bf50f18SLuigi Rizzo if (b->bdg_ops.config != NULL) 13014bf50f18SLuigi Rizzo error = b->bdg_ops.config((struct nm_ifreq *)nmr); 13024bf50f18SLuigi Rizzo BDG_RUNLOCK(b); 13034bf50f18SLuigi Rizzo return error; 13044bf50f18SLuigi Rizzo } 13054bf50f18SLuigi Rizzo 13064bf50f18SLuigi Rizzo 13074bf50f18SLuigi Rizzo /* nm_krings_create callback for VALE ports. 13084bf50f18SLuigi Rizzo * Calls the standard netmap_krings_create, then adds leases on rx 13094bf50f18SLuigi Rizzo * rings and bdgfwd on tx rings. 13104bf50f18SLuigi Rizzo */ 1311f9790aebSLuigi Rizzo static int 1312f9790aebSLuigi Rizzo netmap_vp_krings_create(struct netmap_adapter *na) 1313f9790aebSLuigi Rizzo { 1314f0ea3689SLuigi Rizzo u_int tailroom; 1315f9790aebSLuigi Rizzo int error, i; 1316f9790aebSLuigi Rizzo uint32_t *leases; 1317847bf383SLuigi Rizzo u_int nrx = netmap_real_rings(na, NR_RX); 1318f9790aebSLuigi Rizzo 1319f9790aebSLuigi Rizzo /* 1320f9790aebSLuigi Rizzo * Leases are attached to RX rings on vale ports 1321f9790aebSLuigi Rizzo */ 1322f9790aebSLuigi Rizzo tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 1323f9790aebSLuigi Rizzo 1324f0ea3689SLuigi Rizzo error = netmap_krings_create(na, tailroom); 1325f9790aebSLuigi Rizzo if (error) 1326f9790aebSLuigi Rizzo return error; 1327f9790aebSLuigi Rizzo 1328f9790aebSLuigi Rizzo leases = na->tailroom; 1329f9790aebSLuigi Rizzo 1330f9790aebSLuigi Rizzo for (i = 0; i < nrx; i++) { /* Receive rings */ 1331f9790aebSLuigi Rizzo na->rx_rings[i].nkr_leases = leases; 1332f9790aebSLuigi Rizzo leases += na->num_rx_desc; 1333f9790aebSLuigi Rizzo } 1334f9790aebSLuigi Rizzo 1335f9790aebSLuigi Rizzo error = nm_alloc_bdgfwd(na); 1336f9790aebSLuigi Rizzo if (error) { 1337f9790aebSLuigi Rizzo netmap_krings_delete(na); 1338f9790aebSLuigi Rizzo return error; 1339f9790aebSLuigi Rizzo } 1340f9790aebSLuigi Rizzo 1341f9790aebSLuigi Rizzo return 0; 1342f9790aebSLuigi Rizzo } 1343f9790aebSLuigi Rizzo 134417885a7bSLuigi Rizzo 13454bf50f18SLuigi Rizzo /* nm_krings_delete callback for VALE ports. */ 1346f9790aebSLuigi Rizzo static void 1347f9790aebSLuigi Rizzo netmap_vp_krings_delete(struct netmap_adapter *na) 1348f9790aebSLuigi Rizzo { 1349f9790aebSLuigi Rizzo nm_free_bdgfwd(na); 1350f9790aebSLuigi Rizzo netmap_krings_delete(na); 1351f9790aebSLuigi Rizzo } 1352f9790aebSLuigi Rizzo 1353f9790aebSLuigi Rizzo 1354f9790aebSLuigi Rizzo static int 1355f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 1356f9790aebSLuigi Rizzo struct netmap_vp_adapter *na, u_int ring_nr); 1357f9790aebSLuigi Rizzo 1358f9790aebSLuigi Rizzo 1359f9790aebSLuigi Rizzo /* 13604bf50f18SLuigi Rizzo * main dispatch routine for the bridge. 1361f9790aebSLuigi Rizzo * Grab packets from a kring, move them into the ft structure 1362f9790aebSLuigi Rizzo * associated to the tx (input) port. Max one instance per port, 1363f9790aebSLuigi Rizzo * filtered on input (ioctl, poll or XXX). 1364f9790aebSLuigi Rizzo * Returns the next position in the ring. 1365f9790aebSLuigi Rizzo */ 1366f9790aebSLuigi Rizzo static int 13674bf50f18SLuigi Rizzo nm_bdg_preflush(struct netmap_kring *kring, u_int end) 1368f9790aebSLuigi Rizzo { 13694bf50f18SLuigi Rizzo struct netmap_vp_adapter *na = 13704bf50f18SLuigi Rizzo (struct netmap_vp_adapter*)kring->na; 1371f9790aebSLuigi Rizzo struct netmap_ring *ring = kring->ring; 1372f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft; 13734bf50f18SLuigi Rizzo u_int ring_nr = kring->ring_id; 1374f9790aebSLuigi Rizzo u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 1375f9790aebSLuigi Rizzo u_int ft_i = 0; /* start from 0 */ 1376f9790aebSLuigi Rizzo u_int frags = 1; /* how many frags ? */ 1377f9790aebSLuigi Rizzo struct nm_bridge *b = na->na_bdg; 1378f9790aebSLuigi Rizzo 1379f9790aebSLuigi Rizzo /* To protect against modifications to the bridge we acquire a 1380f9790aebSLuigi Rizzo * shared lock, waiting if we can sleep (if the source port is 1381f9790aebSLuigi Rizzo * attached to a user process) or with a trylock otherwise (NICs). 1382f9790aebSLuigi Rizzo */ 1383f9790aebSLuigi Rizzo ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1384f9790aebSLuigi Rizzo if (na->up.na_flags & NAF_BDG_MAYSLEEP) 1385f9790aebSLuigi Rizzo BDG_RLOCK(b); 1386f9790aebSLuigi Rizzo else if (!BDG_RTRYLOCK(b)) 1387f9790aebSLuigi Rizzo return 0; 1388f9790aebSLuigi Rizzo ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1389f9790aebSLuigi Rizzo ft = kring->nkr_ft; 1390f9790aebSLuigi Rizzo 1391f9790aebSLuigi Rizzo for (; likely(j != end); j = nm_next(j, lim)) { 1392f9790aebSLuigi Rizzo struct netmap_slot *slot = &ring->slot[j]; 1393f9790aebSLuigi Rizzo char *buf; 1394f9790aebSLuigi Rizzo 1395f9790aebSLuigi Rizzo ft[ft_i].ft_len = slot->len; 1396f9790aebSLuigi Rizzo ft[ft_i].ft_flags = slot->flags; 1397f9790aebSLuigi Rizzo 1398f9790aebSLuigi Rizzo ND("flags is 0x%x", slot->flags); 1399847bf383SLuigi Rizzo /* we do not use the buf changed flag, but we still need to reset it */ 1400847bf383SLuigi Rizzo slot->flags &= ~NS_BUF_CHANGED; 1401847bf383SLuigi Rizzo 1402f9790aebSLuigi Rizzo /* this slot goes into a list so initialize the link field */ 1403f9790aebSLuigi Rizzo ft[ft_i].ft_next = NM_FT_NULL; 1404f9790aebSLuigi Rizzo buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 14054bf50f18SLuigi Rizzo (void *)(uintptr_t)slot->ptr : NMB(&na->up, slot); 1406e31c6ec7SLuigi Rizzo if (unlikely(buf == NULL)) { 1407e31c6ec7SLuigi Rizzo RD(5, "NULL %s buffer pointer from %s slot %d len %d", 1408e31c6ec7SLuigi Rizzo (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT", 1409e31c6ec7SLuigi Rizzo kring->name, j, ft[ft_i].ft_len); 14104bf50f18SLuigi Rizzo buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up); 1411e31c6ec7SLuigi Rizzo ft[ft_i].ft_len = 0; 1412e31c6ec7SLuigi Rizzo ft[ft_i].ft_flags = 0; 1413e31c6ec7SLuigi Rizzo } 14142e159ef0SLuigi Rizzo __builtin_prefetch(buf); 1415f9790aebSLuigi Rizzo ++ft_i; 1416f9790aebSLuigi Rizzo if (slot->flags & NS_MOREFRAG) { 1417f9790aebSLuigi Rizzo frags++; 1418f9790aebSLuigi Rizzo continue; 1419f9790aebSLuigi Rizzo } 1420f9790aebSLuigi Rizzo if (unlikely(netmap_verbose && frags > 1)) 1421f9790aebSLuigi Rizzo RD(5, "%d frags at %d", frags, ft_i - frags); 1422f9790aebSLuigi Rizzo ft[ft_i - frags].ft_frags = frags; 1423f9790aebSLuigi Rizzo frags = 1; 1424f9790aebSLuigi Rizzo if (unlikely((int)ft_i >= bridge_batch)) 1425f9790aebSLuigi Rizzo ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1426f9790aebSLuigi Rizzo } 1427f9790aebSLuigi Rizzo if (frags > 1) { 1428*37e3a6d3SLuigi Rizzo /* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we 1429*37e3a6d3SLuigi Rizzo * have to fix frags count. */ 1430*37e3a6d3SLuigi Rizzo frags--; 1431*37e3a6d3SLuigi Rizzo ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG; 1432*37e3a6d3SLuigi Rizzo ft[ft_i - frags].ft_frags = frags; 1433*37e3a6d3SLuigi Rizzo D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags); 1434f9790aebSLuigi Rizzo } 1435f9790aebSLuigi Rizzo if (ft_i) 1436f9790aebSLuigi Rizzo ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1437f9790aebSLuigi Rizzo BDG_RUNLOCK(b); 1438f9790aebSLuigi Rizzo return j; 1439f9790aebSLuigi Rizzo } 1440f9790aebSLuigi Rizzo 1441f9790aebSLuigi Rizzo 1442f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */ 1443f9790aebSLuigi Rizzo 1444f9790aebSLuigi Rizzo /* 1445f9790aebSLuigi Rizzo * The following hash function is adapted from "Hash Functions" by Bob Jenkins 1446f9790aebSLuigi Rizzo * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 1447f9790aebSLuigi Rizzo * 1448f9790aebSLuigi Rizzo * http://www.burtleburtle.net/bob/hash/spooky.html 1449f9790aebSLuigi Rizzo */ 1450f9790aebSLuigi Rizzo #define mix(a, b, c) \ 1451f9790aebSLuigi Rizzo do { \ 1452f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 13); \ 1453f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 8); \ 1454f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 13); \ 1455f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 12); \ 1456f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 16); \ 1457f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 5); \ 1458f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 3); \ 1459f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 10); \ 1460f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 15); \ 1461f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0) 1462f9790aebSLuigi Rizzo 146317885a7bSLuigi Rizzo 1464f9790aebSLuigi Rizzo static __inline uint32_t 1465f9790aebSLuigi Rizzo nm_bridge_rthash(const uint8_t *addr) 1466f9790aebSLuigi Rizzo { 1467f9790aebSLuigi Rizzo uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 1468f9790aebSLuigi Rizzo 1469f9790aebSLuigi Rizzo b += addr[5] << 8; 1470f9790aebSLuigi Rizzo b += addr[4]; 1471f9790aebSLuigi Rizzo a += addr[3] << 24; 1472f9790aebSLuigi Rizzo a += addr[2] << 16; 1473f9790aebSLuigi Rizzo a += addr[1] << 8; 1474f9790aebSLuigi Rizzo a += addr[0]; 1475f9790aebSLuigi Rizzo 1476f9790aebSLuigi Rizzo mix(a, b, c); 1477f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 1478f9790aebSLuigi Rizzo return (c & BRIDGE_RTHASH_MASK); 1479f9790aebSLuigi Rizzo } 1480f9790aebSLuigi Rizzo 1481f9790aebSLuigi Rizzo #undef mix 1482f9790aebSLuigi Rizzo 1483f9790aebSLuigi Rizzo 14844bf50f18SLuigi Rizzo /* nm_register callback for VALE ports */ 1485f9790aebSLuigi Rizzo static int 14864bf50f18SLuigi Rizzo netmap_vp_reg(struct netmap_adapter *na, int onoff) 1487f9790aebSLuigi Rizzo { 1488f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna = 1489f9790aebSLuigi Rizzo (struct netmap_vp_adapter*)na; 1490*37e3a6d3SLuigi Rizzo enum txrx t; 1491*37e3a6d3SLuigi Rizzo int i; 1492f9790aebSLuigi Rizzo 14934bf50f18SLuigi Rizzo /* persistent ports may be put in netmap mode 14944bf50f18SLuigi Rizzo * before being attached to a bridge 1495f9790aebSLuigi Rizzo */ 14964bf50f18SLuigi Rizzo if (vpna->na_bdg) 1497f9790aebSLuigi Rizzo BDG_WLOCK(vpna->na_bdg); 1498f9790aebSLuigi Rizzo if (onoff) { 1499*37e3a6d3SLuigi Rizzo for_rx_tx(t) { 1500*37e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { 1501*37e3a6d3SLuigi Rizzo struct netmap_kring *kring = &NMR(na, t)[i]; 1502*37e3a6d3SLuigi Rizzo 1503*37e3a6d3SLuigi Rizzo if (nm_kring_pending_on(kring)) 1504*37e3a6d3SLuigi Rizzo kring->nr_mode = NKR_NETMAP_ON; 1505*37e3a6d3SLuigi Rizzo } 1506*37e3a6d3SLuigi Rizzo } 1507*37e3a6d3SLuigi Rizzo if (na->active_fds == 0) 15084bf50f18SLuigi Rizzo na->na_flags |= NAF_NETMAP_ON; 15094bf50f18SLuigi Rizzo /* XXX on FreeBSD, persistent VALE ports should also 15104bf50f18SLuigi Rizzo * toggle IFCAP_NETMAP in na->ifp (2014-03-16) 15114bf50f18SLuigi Rizzo */ 1512f9790aebSLuigi Rizzo } else { 1513*37e3a6d3SLuigi Rizzo if (na->active_fds == 0) 15144bf50f18SLuigi Rizzo na->na_flags &= ~NAF_NETMAP_ON; 1515*37e3a6d3SLuigi Rizzo for_rx_tx(t) { 1516*37e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { 1517*37e3a6d3SLuigi Rizzo struct netmap_kring *kring = &NMR(na, t)[i]; 1518*37e3a6d3SLuigi Rizzo 1519*37e3a6d3SLuigi Rizzo if (nm_kring_pending_off(kring)) 1520*37e3a6d3SLuigi Rizzo kring->nr_mode = NKR_NETMAP_OFF; 1521*37e3a6d3SLuigi Rizzo } 1522*37e3a6d3SLuigi Rizzo } 1523f9790aebSLuigi Rizzo } 15244bf50f18SLuigi Rizzo if (vpna->na_bdg) 1525f9790aebSLuigi Rizzo BDG_WUNLOCK(vpna->na_bdg); 1526f9790aebSLuigi Rizzo return 0; 1527f9790aebSLuigi Rizzo } 1528f9790aebSLuigi Rizzo 1529f9790aebSLuigi Rizzo 1530f9790aebSLuigi Rizzo /* 1531f9790aebSLuigi Rizzo * Lookup function for a learning bridge. 1532f9790aebSLuigi Rizzo * Update the hash table with the source address, 1533f9790aebSLuigi Rizzo * and then returns the destination port index, and the 1534f9790aebSLuigi Rizzo * ring in *dst_ring (at the moment, always use ring 0) 1535f9790aebSLuigi Rizzo */ 1536f9790aebSLuigi Rizzo u_int 15374bf50f18SLuigi Rizzo netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, 1538847bf383SLuigi Rizzo struct netmap_vp_adapter *na) 1539f9790aebSLuigi Rizzo { 15404bf50f18SLuigi Rizzo uint8_t *buf = ft->ft_buf; 15414bf50f18SLuigi Rizzo u_int buf_len = ft->ft_len; 1542f9790aebSLuigi Rizzo struct nm_hash_ent *ht = na->na_bdg->ht; 1543f9790aebSLuigi Rizzo uint32_t sh, dh; 1544f9790aebSLuigi Rizzo u_int dst, mysrc = na->bdg_port; 1545f9790aebSLuigi Rizzo uint64_t smac, dmac; 1546*37e3a6d3SLuigi Rizzo uint8_t indbuf[12]; 1547f9790aebSLuigi Rizzo 15484bf50f18SLuigi Rizzo /* safety check, unfortunately we have many cases */ 1549*37e3a6d3SLuigi Rizzo if (buf_len >= 14 + na->up.virt_hdr_len) { 15504bf50f18SLuigi Rizzo /* virthdr + mac_hdr in the same slot */ 1551*37e3a6d3SLuigi Rizzo buf += na->up.virt_hdr_len; 1552*37e3a6d3SLuigi Rizzo buf_len -= na->up.virt_hdr_len; 1553*37e3a6d3SLuigi Rizzo } else if (buf_len == na->up.virt_hdr_len && ft->ft_flags & NS_MOREFRAG) { 15544bf50f18SLuigi Rizzo /* only header in first fragment */ 15554bf50f18SLuigi Rizzo ft++; 15564bf50f18SLuigi Rizzo buf = ft->ft_buf; 15574bf50f18SLuigi Rizzo buf_len = ft->ft_len; 15584bf50f18SLuigi Rizzo } else { 15594bf50f18SLuigi Rizzo RD(5, "invalid buf format, length %d", buf_len); 1560f9790aebSLuigi Rizzo return NM_BDG_NOPORT; 1561f9790aebSLuigi Rizzo } 1562*37e3a6d3SLuigi Rizzo 1563*37e3a6d3SLuigi Rizzo if (ft->ft_flags & NS_INDIRECT) { 1564*37e3a6d3SLuigi Rizzo if (copyin(buf, indbuf, sizeof(indbuf))) { 1565*37e3a6d3SLuigi Rizzo return NM_BDG_NOPORT; 1566*37e3a6d3SLuigi Rizzo } 1567*37e3a6d3SLuigi Rizzo buf = indbuf; 1568*37e3a6d3SLuigi Rizzo } 1569*37e3a6d3SLuigi Rizzo 1570f9790aebSLuigi Rizzo dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 1571f9790aebSLuigi Rizzo smac = le64toh(*(uint64_t *)(buf + 4)); 1572f9790aebSLuigi Rizzo smac >>= 16; 1573f9790aebSLuigi Rizzo 1574f9790aebSLuigi Rizzo /* 1575f9790aebSLuigi Rizzo * The hash is somewhat expensive, there might be some 1576f9790aebSLuigi Rizzo * worthwhile optimizations here. 1577f9790aebSLuigi Rizzo */ 1578847bf383SLuigi Rizzo if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */ 1579f9790aebSLuigi Rizzo uint8_t *s = buf+6; 1580f9790aebSLuigi Rizzo sh = nm_bridge_rthash(s); // XXX hash of source 1581f9790aebSLuigi Rizzo /* update source port forwarding entry */ 1582847bf383SLuigi Rizzo na->last_smac = ht[sh].mac = smac; /* XXX expire ? */ 1583f9790aebSLuigi Rizzo ht[sh].ports = mysrc; 1584f9790aebSLuigi Rizzo if (netmap_verbose) 1585f9790aebSLuigi Rizzo D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 1586f9790aebSLuigi Rizzo s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 1587f9790aebSLuigi Rizzo } 1588f9790aebSLuigi Rizzo dst = NM_BDG_BROADCAST; 1589f9790aebSLuigi Rizzo if ((buf[0] & 1) == 0) { /* unicast */ 1590f9790aebSLuigi Rizzo dh = nm_bridge_rthash(buf); // XXX hash of dst 1591f9790aebSLuigi Rizzo if (ht[dh].mac == dmac) { /* found dst */ 1592f9790aebSLuigi Rizzo dst = ht[dh].ports; 1593f9790aebSLuigi Rizzo } 1594f9790aebSLuigi Rizzo /* XXX otherwise return NM_BDG_UNKNOWN ? */ 1595f9790aebSLuigi Rizzo } 1596f9790aebSLuigi Rizzo return dst; 1597f9790aebSLuigi Rizzo } 1598f9790aebSLuigi Rizzo 1599f9790aebSLuigi Rizzo 1600f9790aebSLuigi Rizzo /* 160117885a7bSLuigi Rizzo * Available space in the ring. Only used in VALE code 160217885a7bSLuigi Rizzo * and only with is_rx = 1 160317885a7bSLuigi Rizzo */ 160417885a7bSLuigi Rizzo static inline uint32_t 160517885a7bSLuigi Rizzo nm_kr_space(struct netmap_kring *k, int is_rx) 160617885a7bSLuigi Rizzo { 160717885a7bSLuigi Rizzo int space; 160817885a7bSLuigi Rizzo 160917885a7bSLuigi Rizzo if (is_rx) { 161017885a7bSLuigi Rizzo int busy = k->nkr_hwlease - k->nr_hwcur; 161117885a7bSLuigi Rizzo if (busy < 0) 161217885a7bSLuigi Rizzo busy += k->nkr_num_slots; 161317885a7bSLuigi Rizzo space = k->nkr_num_slots - 1 - busy; 161417885a7bSLuigi Rizzo } else { 161517885a7bSLuigi Rizzo /* XXX never used in this branch */ 161617885a7bSLuigi Rizzo space = k->nr_hwtail - k->nkr_hwlease; 161717885a7bSLuigi Rizzo if (space < 0) 161817885a7bSLuigi Rizzo space += k->nkr_num_slots; 161917885a7bSLuigi Rizzo } 162017885a7bSLuigi Rizzo #if 0 162117885a7bSLuigi Rizzo // sanity check 162217885a7bSLuigi Rizzo if (k->nkr_hwlease >= k->nkr_num_slots || 162317885a7bSLuigi Rizzo k->nr_hwcur >= k->nkr_num_slots || 162417885a7bSLuigi Rizzo k->nr_tail >= k->nkr_num_slots || 162517885a7bSLuigi Rizzo busy < 0 || 162617885a7bSLuigi Rizzo busy >= k->nkr_num_slots) { 162717885a7bSLuigi Rizzo D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 162817885a7bSLuigi Rizzo k->nkr_lease_idx, k->nkr_num_slots); 162917885a7bSLuigi Rizzo } 163017885a7bSLuigi Rizzo #endif 163117885a7bSLuigi Rizzo return space; 163217885a7bSLuigi Rizzo } 163317885a7bSLuigi Rizzo 163417885a7bSLuigi Rizzo 163517885a7bSLuigi Rizzo 163617885a7bSLuigi Rizzo 163717885a7bSLuigi Rizzo /* make a lease on the kring for N positions. return the 163817885a7bSLuigi Rizzo * lease index 163917885a7bSLuigi Rizzo * XXX only used in VALE code and with is_rx = 1 164017885a7bSLuigi Rizzo */ 164117885a7bSLuigi Rizzo static inline uint32_t 164217885a7bSLuigi Rizzo nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 164317885a7bSLuigi Rizzo { 164417885a7bSLuigi Rizzo uint32_t lim = k->nkr_num_slots - 1; 164517885a7bSLuigi Rizzo uint32_t lease_idx = k->nkr_lease_idx; 164617885a7bSLuigi Rizzo 164717885a7bSLuigi Rizzo k->nkr_leases[lease_idx] = NR_NOSLOT; 164817885a7bSLuigi Rizzo k->nkr_lease_idx = nm_next(lease_idx, lim); 164917885a7bSLuigi Rizzo 165017885a7bSLuigi Rizzo if (n > nm_kr_space(k, is_rx)) { 165117885a7bSLuigi Rizzo D("invalid request for %d slots", n); 165217885a7bSLuigi Rizzo panic("x"); 165317885a7bSLuigi Rizzo } 165417885a7bSLuigi Rizzo /* XXX verify that there are n slots */ 165517885a7bSLuigi Rizzo k->nkr_hwlease += n; 165617885a7bSLuigi Rizzo if (k->nkr_hwlease > lim) 165717885a7bSLuigi Rizzo k->nkr_hwlease -= lim + 1; 165817885a7bSLuigi Rizzo 165917885a7bSLuigi Rizzo if (k->nkr_hwlease >= k->nkr_num_slots || 166017885a7bSLuigi Rizzo k->nr_hwcur >= k->nkr_num_slots || 166117885a7bSLuigi Rizzo k->nr_hwtail >= k->nkr_num_slots || 166217885a7bSLuigi Rizzo k->nkr_lease_idx >= k->nkr_num_slots) { 166317885a7bSLuigi Rizzo D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 16644bf50f18SLuigi Rizzo k->na->name, 166517885a7bSLuigi Rizzo k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 166617885a7bSLuigi Rizzo k->nkr_lease_idx, k->nkr_num_slots); 166717885a7bSLuigi Rizzo } 166817885a7bSLuigi Rizzo return lease_idx; 166917885a7bSLuigi Rizzo } 167017885a7bSLuigi Rizzo 167117885a7bSLuigi Rizzo /* 16724bf50f18SLuigi Rizzo * 1673f9790aebSLuigi Rizzo * This flush routine supports only unicast and broadcast but a large 1674f9790aebSLuigi Rizzo * number of ports, and lets us replace the learn and dispatch functions. 1675f9790aebSLuigi Rizzo */ 1676f9790aebSLuigi Rizzo int 1677f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 1678f9790aebSLuigi Rizzo u_int ring_nr) 1679f9790aebSLuigi Rizzo { 1680f9790aebSLuigi Rizzo struct nm_bdg_q *dst_ents, *brddst; 1681f9790aebSLuigi Rizzo uint16_t num_dsts = 0, *dsts; 1682f9790aebSLuigi Rizzo struct nm_bridge *b = na->na_bdg; 1683*37e3a6d3SLuigi Rizzo u_int i, me = na->bdg_port; 1684f9790aebSLuigi Rizzo 1685f9790aebSLuigi Rizzo /* 1686f9790aebSLuigi Rizzo * The work area (pointed by ft) is followed by an array of 1687f9790aebSLuigi Rizzo * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 1688f9790aebSLuigi Rizzo * queues per port plus one for the broadcast traffic. 1689f9790aebSLuigi Rizzo * Then we have an array of destination indexes. 1690f9790aebSLuigi Rizzo */ 1691f9790aebSLuigi Rizzo dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 1692f9790aebSLuigi Rizzo dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 1693f9790aebSLuigi Rizzo 1694f9790aebSLuigi Rizzo /* first pass: find a destination for each packet in the batch */ 1695f9790aebSLuigi Rizzo for (i = 0; likely(i < n); i += ft[i].ft_frags) { 1696f9790aebSLuigi Rizzo uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 1697f9790aebSLuigi Rizzo uint16_t dst_port, d_i; 1698f9790aebSLuigi Rizzo struct nm_bdg_q *d; 1699f9790aebSLuigi Rizzo 1700f9790aebSLuigi Rizzo ND("slot %d frags %d", i, ft[i].ft_frags); 1701f0ea3689SLuigi Rizzo /* Drop the packet if the virtio-net header is not into the first 1702f9790aebSLuigi Rizzo fragment nor at the very beginning of the second. */ 1703*37e3a6d3SLuigi Rizzo if (unlikely(na->up.virt_hdr_len > ft[i].ft_len)) 1704f9790aebSLuigi Rizzo continue; 17054bf50f18SLuigi Rizzo dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na); 1706f9790aebSLuigi Rizzo if (netmap_verbose > 255) 1707f9790aebSLuigi Rizzo RD(5, "slot %d port %d -> %d", i, me, dst_port); 1708f9790aebSLuigi Rizzo if (dst_port == NM_BDG_NOPORT) 1709f9790aebSLuigi Rizzo continue; /* this packet is identified to be dropped */ 1710f9790aebSLuigi Rizzo else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 1711f9790aebSLuigi Rizzo continue; 1712f9790aebSLuigi Rizzo else if (dst_port == NM_BDG_BROADCAST) 1713f9790aebSLuigi Rizzo dst_ring = 0; /* broadcasts always go to ring 0 */ 1714f9790aebSLuigi Rizzo else if (unlikely(dst_port == me || 1715f9790aebSLuigi Rizzo !b->bdg_ports[dst_port])) 1716f9790aebSLuigi Rizzo continue; 1717f9790aebSLuigi Rizzo 1718f9790aebSLuigi Rizzo /* get a position in the scratch pad */ 1719f9790aebSLuigi Rizzo d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 1720f9790aebSLuigi Rizzo d = dst_ents + d_i; 1721f9790aebSLuigi Rizzo 1722f9790aebSLuigi Rizzo /* append the first fragment to the list */ 1723f9790aebSLuigi Rizzo if (d->bq_head == NM_FT_NULL) { /* new destination */ 1724f9790aebSLuigi Rizzo d->bq_head = d->bq_tail = i; 1725f9790aebSLuigi Rizzo /* remember this position to be scanned later */ 1726f9790aebSLuigi Rizzo if (dst_port != NM_BDG_BROADCAST) 1727f9790aebSLuigi Rizzo dsts[num_dsts++] = d_i; 1728f9790aebSLuigi Rizzo } else { 1729f9790aebSLuigi Rizzo ft[d->bq_tail].ft_next = i; 1730f9790aebSLuigi Rizzo d->bq_tail = i; 1731f9790aebSLuigi Rizzo } 1732f9790aebSLuigi Rizzo d->bq_len += ft[i].ft_frags; 1733f9790aebSLuigi Rizzo } 1734f9790aebSLuigi Rizzo 1735f9790aebSLuigi Rizzo /* 1736f9790aebSLuigi Rizzo * Broadcast traffic goes to ring 0 on all destinations. 1737f9790aebSLuigi Rizzo * So we need to add these rings to the list of ports to scan. 1738f9790aebSLuigi Rizzo * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 1739f9790aebSLuigi Rizzo * expensive. We should keep a compact list of active destinations 1740f9790aebSLuigi Rizzo * so we could shorten this loop. 1741f9790aebSLuigi Rizzo */ 1742f9790aebSLuigi Rizzo brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 1743f9790aebSLuigi Rizzo if (brddst->bq_head != NM_FT_NULL) { 1744*37e3a6d3SLuigi Rizzo u_int j; 1745f9790aebSLuigi Rizzo for (j = 0; likely(j < b->bdg_active_ports); j++) { 1746f9790aebSLuigi Rizzo uint16_t d_i; 1747f9790aebSLuigi Rizzo i = b->bdg_port_index[j]; 1748f9790aebSLuigi Rizzo if (unlikely(i == me)) 1749f9790aebSLuigi Rizzo continue; 1750f9790aebSLuigi Rizzo d_i = i * NM_BDG_MAXRINGS; 1751f9790aebSLuigi Rizzo if (dst_ents[d_i].bq_head == NM_FT_NULL) 1752f9790aebSLuigi Rizzo dsts[num_dsts++] = d_i; 1753f9790aebSLuigi Rizzo } 1754f9790aebSLuigi Rizzo } 1755f9790aebSLuigi Rizzo 1756f9790aebSLuigi Rizzo ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 17574bf50f18SLuigi Rizzo /* second pass: scan destinations */ 1758f9790aebSLuigi Rizzo for (i = 0; i < num_dsts; i++) { 1759f9790aebSLuigi Rizzo struct netmap_vp_adapter *dst_na; 1760f9790aebSLuigi Rizzo struct netmap_kring *kring; 1761f9790aebSLuigi Rizzo struct netmap_ring *ring; 1762f0ea3689SLuigi Rizzo u_int dst_nr, lim, j, d_i, next, brd_next; 1763f9790aebSLuigi Rizzo u_int needed, howmany; 1764f9790aebSLuigi Rizzo int retry = netmap_txsync_retry; 1765f9790aebSLuigi Rizzo struct nm_bdg_q *d; 1766f9790aebSLuigi Rizzo uint32_t my_start = 0, lease_idx = 0; 1767f9790aebSLuigi Rizzo int nrings; 1768f0ea3689SLuigi Rizzo int virt_hdr_mismatch = 0; 1769f9790aebSLuigi Rizzo 1770f9790aebSLuigi Rizzo d_i = dsts[i]; 1771f9790aebSLuigi Rizzo ND("second pass %d port %d", i, d_i); 1772f9790aebSLuigi Rizzo d = dst_ents + d_i; 1773f9790aebSLuigi Rizzo // XXX fix the division 1774f9790aebSLuigi Rizzo dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 1775f9790aebSLuigi Rizzo /* protect from the lookup function returning an inactive 1776f9790aebSLuigi Rizzo * destination port 1777f9790aebSLuigi Rizzo */ 1778f9790aebSLuigi Rizzo if (unlikely(dst_na == NULL)) 1779f9790aebSLuigi Rizzo goto cleanup; 1780f9790aebSLuigi Rizzo if (dst_na->up.na_flags & NAF_SW_ONLY) 1781f9790aebSLuigi Rizzo goto cleanup; 1782f9790aebSLuigi Rizzo /* 1783f9790aebSLuigi Rizzo * The interface may be in !netmap mode in two cases: 1784f9790aebSLuigi Rizzo * - when na is attached but not activated yet; 1785f9790aebSLuigi Rizzo * - when na is being deactivated but is still attached. 1786f9790aebSLuigi Rizzo */ 17874bf50f18SLuigi Rizzo if (unlikely(!nm_netmap_on(&dst_na->up))) { 1788f9790aebSLuigi Rizzo ND("not in netmap mode!"); 1789f9790aebSLuigi Rizzo goto cleanup; 1790f9790aebSLuigi Rizzo } 1791f9790aebSLuigi Rizzo 1792f9790aebSLuigi Rizzo /* there is at least one either unicast or broadcast packet */ 1793f9790aebSLuigi Rizzo brd_next = brddst->bq_head; 1794f9790aebSLuigi Rizzo next = d->bq_head; 1795f9790aebSLuigi Rizzo /* we need to reserve this many slots. If fewer are 1796f9790aebSLuigi Rizzo * available, some packets will be dropped. 1797f9790aebSLuigi Rizzo * Packets may have multiple fragments, so we may not use 1798f9790aebSLuigi Rizzo * there is a chance that we may not use all of the slots 1799f9790aebSLuigi Rizzo * we have claimed, so we will need to handle the leftover 1800f9790aebSLuigi Rizzo * ones when we regain the lock. 1801f9790aebSLuigi Rizzo */ 1802f9790aebSLuigi Rizzo needed = d->bq_len + brddst->bq_len; 1803f9790aebSLuigi Rizzo 1804*37e3a6d3SLuigi Rizzo if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) { 1805*37e3a6d3SLuigi Rizzo RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len, 1806*37e3a6d3SLuigi Rizzo dst_na->up.virt_hdr_len); 1807f0ea3689SLuigi Rizzo /* There is a virtio-net header/offloadings mismatch between 1808f0ea3689SLuigi Rizzo * source and destination. The slower mismatch datapath will 1809f0ea3689SLuigi Rizzo * be used to cope with all the mismatches. 1810f0ea3689SLuigi Rizzo */ 1811f0ea3689SLuigi Rizzo virt_hdr_mismatch = 1; 1812f0ea3689SLuigi Rizzo if (dst_na->mfs < na->mfs) { 1813f0ea3689SLuigi Rizzo /* We may need to do segmentation offloadings, and so 1814f0ea3689SLuigi Rizzo * we may need a number of destination slots greater 1815f0ea3689SLuigi Rizzo * than the number of input slots ('needed'). 1816f0ea3689SLuigi Rizzo * We look for the smallest integer 'x' which satisfies: 1817f0ea3689SLuigi Rizzo * needed * na->mfs + x * H <= x * na->mfs 1818f0ea3689SLuigi Rizzo * where 'H' is the length of the longest header that may 1819f0ea3689SLuigi Rizzo * be replicated in the segmentation process (e.g. for 1820f0ea3689SLuigi Rizzo * TCPv4 we must account for ethernet header, IP header 1821f0ea3689SLuigi Rizzo * and TCPv4 header). 1822f0ea3689SLuigi Rizzo */ 1823f0ea3689SLuigi Rizzo needed = (needed * na->mfs) / 1824f0ea3689SLuigi Rizzo (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; 1825f0ea3689SLuigi Rizzo ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); 1826f0ea3689SLuigi Rizzo } 1827f0ea3689SLuigi Rizzo } 1828f0ea3689SLuigi Rizzo 1829f9790aebSLuigi Rizzo ND(5, "pass 2 dst %d is %x %s", 1830f9790aebSLuigi Rizzo i, d_i, is_vp ? "virtual" : "nic/host"); 1831f9790aebSLuigi Rizzo dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1832f9790aebSLuigi Rizzo nrings = dst_na->up.num_rx_rings; 1833f9790aebSLuigi Rizzo if (dst_nr >= nrings) 1834f9790aebSLuigi Rizzo dst_nr = dst_nr % nrings; 1835f9790aebSLuigi Rizzo kring = &dst_na->up.rx_rings[dst_nr]; 1836f9790aebSLuigi Rizzo ring = kring->ring; 1837f9790aebSLuigi Rizzo lim = kring->nkr_num_slots - 1; 1838f9790aebSLuigi Rizzo 1839f9790aebSLuigi Rizzo retry: 1840f9790aebSLuigi Rizzo 1841f0ea3689SLuigi Rizzo if (dst_na->retry && retry) { 1842f0ea3689SLuigi Rizzo /* try to get some free slot from the previous run */ 1843847bf383SLuigi Rizzo kring->nm_notify(kring, 0); 18444bf50f18SLuigi Rizzo /* actually useful only for bwraps, since there 18454bf50f18SLuigi Rizzo * the notify will trigger a txsync on the hwna. VALE ports 18464bf50f18SLuigi Rizzo * have dst_na->retry == 0 18474bf50f18SLuigi Rizzo */ 1848f0ea3689SLuigi Rizzo } 1849f9790aebSLuigi Rizzo /* reserve the buffers in the queue and an entry 1850f9790aebSLuigi Rizzo * to report completion, and drop lock. 1851f9790aebSLuigi Rizzo * XXX this might become a helper function. 1852f9790aebSLuigi Rizzo */ 1853f9790aebSLuigi Rizzo mtx_lock(&kring->q_lock); 1854f9790aebSLuigi Rizzo if (kring->nkr_stopped) { 1855f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1856f9790aebSLuigi Rizzo goto cleanup; 1857f9790aebSLuigi Rizzo } 1858f9790aebSLuigi Rizzo my_start = j = kring->nkr_hwlease; 1859f9790aebSLuigi Rizzo howmany = nm_kr_space(kring, 1); 1860f9790aebSLuigi Rizzo if (needed < howmany) 1861f9790aebSLuigi Rizzo howmany = needed; 1862f9790aebSLuigi Rizzo lease_idx = nm_kr_lease(kring, howmany, 1); 1863f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1864f9790aebSLuigi Rizzo 1865f9790aebSLuigi Rizzo /* only retry if we need more than available slots */ 1866f9790aebSLuigi Rizzo if (retry && needed <= howmany) 1867f9790aebSLuigi Rizzo retry = 0; 1868f9790aebSLuigi Rizzo 1869f9790aebSLuigi Rizzo /* copy to the destination queue */ 1870f9790aebSLuigi Rizzo while (howmany > 0) { 1871f9790aebSLuigi Rizzo struct netmap_slot *slot; 1872f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft_p, *ft_end; 1873f9790aebSLuigi Rizzo u_int cnt; 1874f9790aebSLuigi Rizzo 1875f9790aebSLuigi Rizzo /* find the queue from which we pick next packet. 1876f9790aebSLuigi Rizzo * NM_FT_NULL is always higher than valid indexes 1877f9790aebSLuigi Rizzo * so we never dereference it if the other list 1878f9790aebSLuigi Rizzo * has packets (and if both are empty we never 1879f9790aebSLuigi Rizzo * get here). 1880f9790aebSLuigi Rizzo */ 1881f9790aebSLuigi Rizzo if (next < brd_next) { 1882f9790aebSLuigi Rizzo ft_p = ft + next; 1883f9790aebSLuigi Rizzo next = ft_p->ft_next; 1884f9790aebSLuigi Rizzo } else { /* insert broadcast */ 1885f9790aebSLuigi Rizzo ft_p = ft + brd_next; 1886f9790aebSLuigi Rizzo brd_next = ft_p->ft_next; 1887f9790aebSLuigi Rizzo } 1888f9790aebSLuigi Rizzo cnt = ft_p->ft_frags; // cnt > 0 1889f9790aebSLuigi Rizzo if (unlikely(cnt > howmany)) 1890f9790aebSLuigi Rizzo break; /* no more space */ 1891f9790aebSLuigi Rizzo if (netmap_verbose && cnt > 1) 1892f9790aebSLuigi Rizzo RD(5, "rx %d frags to %d", cnt, j); 1893f9790aebSLuigi Rizzo ft_end = ft_p + cnt; 1894f0ea3689SLuigi Rizzo if (unlikely(virt_hdr_mismatch)) { 1895f0ea3689SLuigi Rizzo bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany); 1896f0ea3689SLuigi Rizzo } else { 1897f0ea3689SLuigi Rizzo howmany -= cnt; 1898f9790aebSLuigi Rizzo do { 1899f9790aebSLuigi Rizzo char *dst, *src = ft_p->ft_buf; 1900f9790aebSLuigi Rizzo size_t copy_len = ft_p->ft_len, dst_len = copy_len; 1901f9790aebSLuigi Rizzo 1902f9790aebSLuigi Rizzo slot = &ring->slot[j]; 19034bf50f18SLuigi Rizzo dst = NMB(&dst_na->up, slot); 1904f9790aebSLuigi Rizzo 190517885a7bSLuigi Rizzo ND("send [%d] %d(%d) bytes at %s:%d", 190617885a7bSLuigi Rizzo i, (int)copy_len, (int)dst_len, 190717885a7bSLuigi Rizzo NM_IFPNAME(dst_ifp), j); 1908f9790aebSLuigi Rizzo /* round to a multiple of 64 */ 1909f9790aebSLuigi Rizzo copy_len = (copy_len + 63) & ~63; 1910f9790aebSLuigi Rizzo 19114bf50f18SLuigi Rizzo if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) || 19124bf50f18SLuigi Rizzo copy_len > NETMAP_BUF_SIZE(&na->up))) { 1913e31c6ec7SLuigi Rizzo RD(5, "invalid len %d, down to 64", (int)copy_len); 1914e31c6ec7SLuigi Rizzo copy_len = dst_len = 64; // XXX 1915e31c6ec7SLuigi Rizzo } 1916f9790aebSLuigi Rizzo if (ft_p->ft_flags & NS_INDIRECT) { 1917f9790aebSLuigi Rizzo if (copyin(src, dst, copy_len)) { 1918f9790aebSLuigi Rizzo // invalid user pointer, pretend len is 0 1919f9790aebSLuigi Rizzo dst_len = 0; 1920f9790aebSLuigi Rizzo } 1921f9790aebSLuigi Rizzo } else { 1922f9790aebSLuigi Rizzo //memcpy(dst, src, copy_len); 1923f9790aebSLuigi Rizzo pkt_copy(src, dst, (int)copy_len); 1924f9790aebSLuigi Rizzo } 1925f9790aebSLuigi Rizzo slot->len = dst_len; 1926f9790aebSLuigi Rizzo slot->flags = (cnt << 8)| NS_MOREFRAG; 1927f9790aebSLuigi Rizzo j = nm_next(j, lim); 1928f0ea3689SLuigi Rizzo needed--; 1929f9790aebSLuigi Rizzo ft_p++; 1930f9790aebSLuigi Rizzo } while (ft_p != ft_end); 1931f9790aebSLuigi Rizzo slot->flags = (cnt << 8); /* clear flag on last entry */ 1932f0ea3689SLuigi Rizzo } 1933f9790aebSLuigi Rizzo /* are we done ? */ 1934f9790aebSLuigi Rizzo if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 1935f9790aebSLuigi Rizzo break; 1936f9790aebSLuigi Rizzo } 1937f9790aebSLuigi Rizzo { 1938f9790aebSLuigi Rizzo /* current position */ 1939f9790aebSLuigi Rizzo uint32_t *p = kring->nkr_leases; /* shorthand */ 1940f9790aebSLuigi Rizzo uint32_t update_pos; 1941f9790aebSLuigi Rizzo int still_locked = 1; 1942f9790aebSLuigi Rizzo 1943f9790aebSLuigi Rizzo mtx_lock(&kring->q_lock); 1944f9790aebSLuigi Rizzo if (unlikely(howmany > 0)) { 1945f9790aebSLuigi Rizzo /* not used all bufs. If i am the last one 1946f9790aebSLuigi Rizzo * i can recover the slots, otherwise must 1947f9790aebSLuigi Rizzo * fill them with 0 to mark empty packets. 1948f9790aebSLuigi Rizzo */ 1949f9790aebSLuigi Rizzo ND("leftover %d bufs", howmany); 1950f9790aebSLuigi Rizzo if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 1951f9790aebSLuigi Rizzo /* yes i am the last one */ 1952f9790aebSLuigi Rizzo ND("roll back nkr_hwlease to %d", j); 1953f9790aebSLuigi Rizzo kring->nkr_hwlease = j; 1954f9790aebSLuigi Rizzo } else { 1955f9790aebSLuigi Rizzo while (howmany-- > 0) { 1956f9790aebSLuigi Rizzo ring->slot[j].len = 0; 1957f9790aebSLuigi Rizzo ring->slot[j].flags = 0; 1958f9790aebSLuigi Rizzo j = nm_next(j, lim); 1959f9790aebSLuigi Rizzo } 1960f9790aebSLuigi Rizzo } 1961f9790aebSLuigi Rizzo } 1962f9790aebSLuigi Rizzo p[lease_idx] = j; /* report I am done */ 1963f9790aebSLuigi Rizzo 196417885a7bSLuigi Rizzo update_pos = kring->nr_hwtail; 1965f9790aebSLuigi Rizzo 1966f9790aebSLuigi Rizzo if (my_start == update_pos) { 1967f9790aebSLuigi Rizzo /* all slots before my_start have been reported, 1968f9790aebSLuigi Rizzo * so scan subsequent leases to see if other ranges 1969f9790aebSLuigi Rizzo * have been completed, and to a selwakeup or txsync. 1970f9790aebSLuigi Rizzo */ 1971f9790aebSLuigi Rizzo while (lease_idx != kring->nkr_lease_idx && 1972f9790aebSLuigi Rizzo p[lease_idx] != NR_NOSLOT) { 1973f9790aebSLuigi Rizzo j = p[lease_idx]; 1974f9790aebSLuigi Rizzo p[lease_idx] = NR_NOSLOT; 1975f9790aebSLuigi Rizzo lease_idx = nm_next(lease_idx, lim); 1976f9790aebSLuigi Rizzo } 1977f9790aebSLuigi Rizzo /* j is the new 'write' position. j != my_start 1978f9790aebSLuigi Rizzo * means there are new buffers to report 1979f9790aebSLuigi Rizzo */ 1980f9790aebSLuigi Rizzo if (likely(j != my_start)) { 198117885a7bSLuigi Rizzo kring->nr_hwtail = j; 1982f9790aebSLuigi Rizzo still_locked = 0; 1983f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1984847bf383SLuigi Rizzo kring->nm_notify(kring, 0); 19854bf50f18SLuigi Rizzo /* this is netmap_notify for VALE ports and 19864bf50f18SLuigi Rizzo * netmap_bwrap_notify for bwrap. The latter will 19874bf50f18SLuigi Rizzo * trigger a txsync on the underlying hwna 19884bf50f18SLuigi Rizzo */ 19894bf50f18SLuigi Rizzo if (dst_na->retry && retry--) { 19904bf50f18SLuigi Rizzo /* XXX this is going to call nm_notify again. 19914bf50f18SLuigi Rizzo * Only useful for bwrap in virtual machines 19924bf50f18SLuigi Rizzo */ 1993f9790aebSLuigi Rizzo goto retry; 1994f9790aebSLuigi Rizzo } 1995f9790aebSLuigi Rizzo } 19964bf50f18SLuigi Rizzo } 1997f9790aebSLuigi Rizzo if (still_locked) 1998f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1999f9790aebSLuigi Rizzo } 2000f9790aebSLuigi Rizzo cleanup: 2001f9790aebSLuigi Rizzo d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 2002f9790aebSLuigi Rizzo d->bq_len = 0; 2003f9790aebSLuigi Rizzo } 2004f9790aebSLuigi Rizzo brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 2005f9790aebSLuigi Rizzo brddst->bq_len = 0; 2006f9790aebSLuigi Rizzo return 0; 2007f9790aebSLuigi Rizzo } 2008f9790aebSLuigi Rizzo 20094bf50f18SLuigi Rizzo /* nm_txsync callback for VALE ports */ 2010f9790aebSLuigi Rizzo static int 20114bf50f18SLuigi Rizzo netmap_vp_txsync(struct netmap_kring *kring, int flags) 2012f9790aebSLuigi Rizzo { 20134bf50f18SLuigi Rizzo struct netmap_vp_adapter *na = 20144bf50f18SLuigi Rizzo (struct netmap_vp_adapter *)kring->na; 201517885a7bSLuigi Rizzo u_int done; 201617885a7bSLuigi Rizzo u_int const lim = kring->nkr_num_slots - 1; 2017847bf383SLuigi Rizzo u_int const head = kring->rhead; 2018f9790aebSLuigi Rizzo 2019f9790aebSLuigi Rizzo if (bridge_batch <= 0) { /* testing only */ 2020847bf383SLuigi Rizzo done = head; // used all 2021f9790aebSLuigi Rizzo goto done; 2022f9790aebSLuigi Rizzo } 20234bf50f18SLuigi Rizzo if (!na->na_bdg) { 2024847bf383SLuigi Rizzo done = head; 20254bf50f18SLuigi Rizzo goto done; 20264bf50f18SLuigi Rizzo } 2027f9790aebSLuigi Rizzo if (bridge_batch > NM_BDG_BATCH) 2028f9790aebSLuigi Rizzo bridge_batch = NM_BDG_BATCH; 2029f9790aebSLuigi Rizzo 2030847bf383SLuigi Rizzo done = nm_bdg_preflush(kring, head); 2031f9790aebSLuigi Rizzo done: 2032847bf383SLuigi Rizzo if (done != head) 2033847bf383SLuigi Rizzo D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail); 203417885a7bSLuigi Rizzo /* 203517885a7bSLuigi Rizzo * packets between 'done' and 'cur' are left unsent. 203617885a7bSLuigi Rizzo */ 203717885a7bSLuigi Rizzo kring->nr_hwcur = done; 203817885a7bSLuigi Rizzo kring->nr_hwtail = nm_prev(done, lim); 2039f9790aebSLuigi Rizzo if (netmap_verbose) 20404bf50f18SLuigi Rizzo D("%s ring %d flags %d", na->up.name, kring->ring_id, flags); 2041f9790aebSLuigi Rizzo return 0; 2042f9790aebSLuigi Rizzo } 2043f9790aebSLuigi Rizzo 2044f9790aebSLuigi Rizzo 20454bf50f18SLuigi Rizzo /* rxsync code used by VALE ports nm_rxsync callback and also 20464bf50f18SLuigi Rizzo * internally by the brwap 2047f9790aebSLuigi Rizzo */ 2048f9790aebSLuigi Rizzo static int 20494bf50f18SLuigi Rizzo netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) 2050f9790aebSLuigi Rizzo { 20514bf50f18SLuigi Rizzo struct netmap_adapter *na = kring->na; 205217885a7bSLuigi Rizzo struct netmap_ring *ring = kring->ring; 205317885a7bSLuigi Rizzo u_int nm_i, lim = kring->nkr_num_slots - 1; 2054847bf383SLuigi Rizzo u_int head = kring->rhead; 205517885a7bSLuigi Rizzo int n; 205617885a7bSLuigi Rizzo 205717885a7bSLuigi Rizzo if (head > lim) { 205817885a7bSLuigi Rizzo D("ouch dangerous reset!!!"); 205917885a7bSLuigi Rizzo n = netmap_ring_reinit(kring); 206017885a7bSLuigi Rizzo goto done; 206117885a7bSLuigi Rizzo } 206217885a7bSLuigi Rizzo 206317885a7bSLuigi Rizzo /* First part, import newly received packets. */ 206417885a7bSLuigi Rizzo /* actually nothing to do here, they are already in the kring */ 206517885a7bSLuigi Rizzo 206617885a7bSLuigi Rizzo /* Second part, skip past packets that userspace has released. */ 206717885a7bSLuigi Rizzo nm_i = kring->nr_hwcur; 206817885a7bSLuigi Rizzo if (nm_i != head) { 206917885a7bSLuigi Rizzo /* consistency check, but nothing really important here */ 207017885a7bSLuigi Rizzo for (n = 0; likely(nm_i != head); n++) { 207117885a7bSLuigi Rizzo struct netmap_slot *slot = &ring->slot[nm_i]; 20724bf50f18SLuigi Rizzo void *addr = NMB(na, slot); 207317885a7bSLuigi Rizzo 20744bf50f18SLuigi Rizzo if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ 207517885a7bSLuigi Rizzo D("bad buffer index %d, ignore ?", 207617885a7bSLuigi Rizzo slot->buf_idx); 207717885a7bSLuigi Rizzo } 207817885a7bSLuigi Rizzo slot->flags &= ~NS_BUF_CHANGED; 207917885a7bSLuigi Rizzo nm_i = nm_next(nm_i, lim); 208017885a7bSLuigi Rizzo } 208117885a7bSLuigi Rizzo kring->nr_hwcur = head; 208217885a7bSLuigi Rizzo } 208317885a7bSLuigi Rizzo 208417885a7bSLuigi Rizzo n = 0; 208517885a7bSLuigi Rizzo done: 208617885a7bSLuigi Rizzo return n; 208717885a7bSLuigi Rizzo } 2088f9790aebSLuigi Rizzo 2089f9790aebSLuigi Rizzo /* 20904bf50f18SLuigi Rizzo * nm_rxsync callback for VALE ports 2091f9790aebSLuigi Rizzo * user process reading from a VALE switch. 2092f9790aebSLuigi Rizzo * Already protected against concurrent calls from userspace, 2093f9790aebSLuigi Rizzo * but we must acquire the queue's lock to protect against 2094f9790aebSLuigi Rizzo * writers on the same queue. 2095f9790aebSLuigi Rizzo */ 2096f9790aebSLuigi Rizzo static int 20974bf50f18SLuigi Rizzo netmap_vp_rxsync(struct netmap_kring *kring, int flags) 2098f9790aebSLuigi Rizzo { 2099f9790aebSLuigi Rizzo int n; 2100f9790aebSLuigi Rizzo 2101f9790aebSLuigi Rizzo mtx_lock(&kring->q_lock); 21024bf50f18SLuigi Rizzo n = netmap_vp_rxsync_locked(kring, flags); 2103f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 2104f9790aebSLuigi Rizzo return n; 2105f9790aebSLuigi Rizzo } 2106f9790aebSLuigi Rizzo 210717885a7bSLuigi Rizzo 21084bf50f18SLuigi Rizzo /* nm_bdg_attach callback for VALE ports 21094bf50f18SLuigi Rizzo * The na_vp port is this same netmap_adapter. There is no host port. 21104bf50f18SLuigi Rizzo */ 2111f9790aebSLuigi Rizzo static int 21124bf50f18SLuigi Rizzo netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) 21134bf50f18SLuigi Rizzo { 21144bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 21154bf50f18SLuigi Rizzo 21164bf50f18SLuigi Rizzo if (vpna->na_bdg) 21174bf50f18SLuigi Rizzo return EBUSY; 21184bf50f18SLuigi Rizzo na->na_vp = vpna; 21194bf50f18SLuigi Rizzo strncpy(na->name, name, sizeof(na->name)); 21204bf50f18SLuigi Rizzo na->na_hostvp = NULL; 21214bf50f18SLuigi Rizzo return 0; 21224bf50f18SLuigi Rizzo } 21234bf50f18SLuigi Rizzo 21244bf50f18SLuigi Rizzo /* create a netmap_vp_adapter that describes a VALE port. 21254bf50f18SLuigi Rizzo * Only persistent VALE ports have a non-null ifp. 21264bf50f18SLuigi Rizzo */ 21274bf50f18SLuigi Rizzo static int 21284bf50f18SLuigi Rizzo netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret) 2129f9790aebSLuigi Rizzo { 2130f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna; 2131f9790aebSLuigi Rizzo struct netmap_adapter *na; 2132f9790aebSLuigi Rizzo int error; 2133f0ea3689SLuigi Rizzo u_int npipes = 0; 2134f9790aebSLuigi Rizzo 2135f9790aebSLuigi Rizzo vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO); 2136f9790aebSLuigi Rizzo if (vpna == NULL) 2137f9790aebSLuigi Rizzo return ENOMEM; 2138f9790aebSLuigi Rizzo 2139f9790aebSLuigi Rizzo na = &vpna->up; 2140f9790aebSLuigi Rizzo 2141f9790aebSLuigi Rizzo na->ifp = ifp; 21424bf50f18SLuigi Rizzo strncpy(na->name, nmr->nr_name, sizeof(na->name)); 2143f9790aebSLuigi Rizzo 2144f9790aebSLuigi Rizzo /* bound checking */ 2145f9790aebSLuigi Rizzo na->num_tx_rings = nmr->nr_tx_rings; 2146f9790aebSLuigi Rizzo nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 2147f9790aebSLuigi Rizzo nmr->nr_tx_rings = na->num_tx_rings; // write back 2148f9790aebSLuigi Rizzo na->num_rx_rings = nmr->nr_rx_rings; 2149f9790aebSLuigi Rizzo nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 2150f9790aebSLuigi Rizzo nmr->nr_rx_rings = na->num_rx_rings; // write back 2151f9790aebSLuigi Rizzo nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 2152f9790aebSLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 2153f9790aebSLuigi Rizzo na->num_tx_desc = nmr->nr_tx_slots; 2154f9790aebSLuigi Rizzo nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 2155f9790aebSLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 2156f0ea3689SLuigi Rizzo /* validate number of pipes. We want at least 1, 2157f0ea3689SLuigi Rizzo * but probably can do with some more. 2158f0ea3689SLuigi Rizzo * So let's use 2 as default (when 0 is supplied) 2159f0ea3689SLuigi Rizzo */ 2160f0ea3689SLuigi Rizzo npipes = nmr->nr_arg1; 2161f0ea3689SLuigi Rizzo nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); 2162f0ea3689SLuigi Rizzo nmr->nr_arg1 = npipes; /* write back */ 2163f0ea3689SLuigi Rizzo /* validate extra bufs */ 2164f0ea3689SLuigi Rizzo nm_bound_var(&nmr->nr_arg3, 0, 0, 2165f0ea3689SLuigi Rizzo 128*NM_BDG_MAXSLOTS, NULL); 2166f9790aebSLuigi Rizzo na->num_rx_desc = nmr->nr_rx_slots; 2167f0ea3689SLuigi Rizzo vpna->mfs = 1514; 2168847bf383SLuigi Rizzo vpna->last_smac = ~0llu; 2169f0ea3689SLuigi Rizzo /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? 2170f0ea3689SLuigi Rizzo vpna->mfs = netmap_buf_size; */ 2171f0ea3689SLuigi Rizzo if (netmap_verbose) 2172f0ea3689SLuigi Rizzo D("max frame size %u", vpna->mfs); 2173f9790aebSLuigi Rizzo 2174847bf383SLuigi Rizzo na->na_flags |= NAF_BDG_MAYSLEEP; 217510b8ef3dSLuigi Rizzo /* persistent VALE ports look like hw devices 217610b8ef3dSLuigi Rizzo * with a native netmap adapter 217710b8ef3dSLuigi Rizzo */ 217810b8ef3dSLuigi Rizzo if (ifp) 217910b8ef3dSLuigi Rizzo na->na_flags |= NAF_NATIVE; 21804bf50f18SLuigi Rizzo na->nm_txsync = netmap_vp_txsync; 21814bf50f18SLuigi Rizzo na->nm_rxsync = netmap_vp_rxsync; 21824bf50f18SLuigi Rizzo na->nm_register = netmap_vp_reg; 2183f9790aebSLuigi Rizzo na->nm_krings_create = netmap_vp_krings_create; 2184f9790aebSLuigi Rizzo na->nm_krings_delete = netmap_vp_krings_delete; 21854bf50f18SLuigi Rizzo na->nm_dtor = netmap_vp_dtor; 21864bf50f18SLuigi Rizzo na->nm_mem = netmap_mem_private_new(na->name, 2187f9790aebSLuigi Rizzo na->num_tx_rings, na->num_tx_desc, 2188f0ea3689SLuigi Rizzo na->num_rx_rings, na->num_rx_desc, 2189f0ea3689SLuigi Rizzo nmr->nr_arg3, npipes, &error); 2190f0ea3689SLuigi Rizzo if (na->nm_mem == NULL) 2191f0ea3689SLuigi Rizzo goto err; 21924bf50f18SLuigi Rizzo na->nm_bdg_attach = netmap_vp_bdg_attach; 2193f9790aebSLuigi Rizzo /* other nmd fields are set in the common routine */ 2194f9790aebSLuigi Rizzo error = netmap_attach_common(na); 2195f0ea3689SLuigi Rizzo if (error) 2196f0ea3689SLuigi Rizzo goto err; 21974bf50f18SLuigi Rizzo *ret = vpna; 2198f0ea3689SLuigi Rizzo return 0; 2199f0ea3689SLuigi Rizzo 2200f0ea3689SLuigi Rizzo err: 2201f0ea3689SLuigi Rizzo if (na->nm_mem != NULL) 2202847bf383SLuigi Rizzo netmap_mem_delete(na->nm_mem); 2203f9790aebSLuigi Rizzo free(vpna, M_DEVBUF); 2204f9790aebSLuigi Rizzo return error; 2205f9790aebSLuigi Rizzo } 2206f9790aebSLuigi Rizzo 22074bf50f18SLuigi Rizzo /* Bridge wrapper code (bwrap). 22084bf50f18SLuigi Rizzo * This is used to connect a non-VALE-port netmap_adapter (hwna) to a 22094bf50f18SLuigi Rizzo * VALE switch. 22104bf50f18SLuigi Rizzo * The main task is to swap the meaning of tx and rx rings to match the 22114bf50f18SLuigi Rizzo * expectations of the VALE switch code (see nm_bdg_flush). 22124bf50f18SLuigi Rizzo * 22134bf50f18SLuigi Rizzo * The bwrap works by interposing a netmap_bwrap_adapter between the 22144bf50f18SLuigi Rizzo * rest of the system and the hwna. The netmap_bwrap_adapter looks like 22154bf50f18SLuigi Rizzo * a netmap_vp_adapter to the rest the system, but, internally, it 22164bf50f18SLuigi Rizzo * translates all callbacks to what the hwna expects. 22174bf50f18SLuigi Rizzo * 22184bf50f18SLuigi Rizzo * Note that we have to intercept callbacks coming from two sides: 22194bf50f18SLuigi Rizzo * 22204bf50f18SLuigi Rizzo * - callbacks coming from the netmap module are intercepted by 22214bf50f18SLuigi Rizzo * passing around the netmap_bwrap_adapter instead of the hwna 22224bf50f18SLuigi Rizzo * 22234bf50f18SLuigi Rizzo * - callbacks coming from outside of the netmap module only know 22244bf50f18SLuigi Rizzo * about the hwna. This, however, only happens in interrupt 22254bf50f18SLuigi Rizzo * handlers, where only the hwna->nm_notify callback is called. 22264bf50f18SLuigi Rizzo * What the bwrap does is to overwrite the hwna->nm_notify callback 22274bf50f18SLuigi Rizzo * with its own netmap_bwrap_intr_notify. 22284bf50f18SLuigi Rizzo * XXX This assumes that the hwna->nm_notify callback was the 22294bf50f18SLuigi Rizzo * standard netmap_notify(), as it is the case for nic adapters. 22304bf50f18SLuigi Rizzo * Any additional action performed by hwna->nm_notify will not be 22314bf50f18SLuigi Rizzo * performed by netmap_bwrap_intr_notify. 22324bf50f18SLuigi Rizzo * 22334bf50f18SLuigi Rizzo * Additionally, the bwrap can optionally attach the host rings pair 22344bf50f18SLuigi Rizzo * of the wrapped adapter to a different port of the switch. 22354bf50f18SLuigi Rizzo */ 22364bf50f18SLuigi Rizzo 223717885a7bSLuigi Rizzo 2238f9790aebSLuigi Rizzo static void 2239f9790aebSLuigi Rizzo netmap_bwrap_dtor(struct netmap_adapter *na) 2240f9790aebSLuigi Rizzo { 2241f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 2242f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2243*37e3a6d3SLuigi Rizzo struct nm_bridge *b = bna->up.na_bdg, 2244*37e3a6d3SLuigi Rizzo *bh = bna->host.na_bdg; 2245*37e3a6d3SLuigi Rizzo 2246*37e3a6d3SLuigi Rizzo if (b) { 2247*37e3a6d3SLuigi Rizzo netmap_bdg_detach_common(b, bna->up.bdg_port, 2248*37e3a6d3SLuigi Rizzo (bh ? bna->host.bdg_port : -1)); 2249*37e3a6d3SLuigi Rizzo } 2250f9790aebSLuigi Rizzo 2251f9790aebSLuigi Rizzo ND("na %p", na); 2252f9790aebSLuigi Rizzo na->ifp = NULL; 22534bf50f18SLuigi Rizzo bna->host.up.ifp = NULL; 22544bf50f18SLuigi Rizzo hwna->na_private = NULL; 22554bf50f18SLuigi Rizzo hwna->na_vp = hwna->na_hostvp = NULL; 22564bf50f18SLuigi Rizzo hwna->na_flags &= ~NAF_BUSY; 22574bf50f18SLuigi Rizzo netmap_adapter_put(hwna); 2258f9790aebSLuigi Rizzo 2259f9790aebSLuigi Rizzo } 2260f9790aebSLuigi Rizzo 226117885a7bSLuigi Rizzo 2262f9790aebSLuigi Rizzo /* 226317885a7bSLuigi Rizzo * Intr callback for NICs connected to a bridge. 226417885a7bSLuigi Rizzo * Simply ignore tx interrupts (maybe we could try to recover space ?) 226517885a7bSLuigi Rizzo * and pass received packets from nic to the bridge. 226617885a7bSLuigi Rizzo * 2267f9790aebSLuigi Rizzo * XXX TODO check locking: this is called from the interrupt 2268f9790aebSLuigi Rizzo * handler so we should make sure that the interface is not 2269f9790aebSLuigi Rizzo * disconnected while passing down an interrupt. 2270f9790aebSLuigi Rizzo * 227117885a7bSLuigi Rizzo * Note, no user process can access this NIC or the host stack. 227217885a7bSLuigi Rizzo * The only part of the ring that is significant are the slots, 227317885a7bSLuigi Rizzo * and head/cur/tail are set from the kring as needed 227417885a7bSLuigi Rizzo * (part as a receive ring, part as a transmit ring). 227517885a7bSLuigi Rizzo * 227617885a7bSLuigi Rizzo * callback that overwrites the hwna notify callback. 2277*37e3a6d3SLuigi Rizzo * Packets come from the outside or from the host stack and are put on an 2278*37e3a6d3SLuigi Rizzo * hwna rx ring. 2279f9790aebSLuigi Rizzo * The bridge wrapper then sends the packets through the bridge. 2280f9790aebSLuigi Rizzo */ 2281f9790aebSLuigi Rizzo static int 2282847bf383SLuigi Rizzo netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) 2283f9790aebSLuigi Rizzo { 2284847bf383SLuigi Rizzo struct netmap_adapter *na = kring->na; 2285f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = na->na_private; 2286847bf383SLuigi Rizzo struct netmap_kring *bkring; 2287f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna = &bna->up; 2288847bf383SLuigi Rizzo u_int ring_nr = kring->ring_id; 2289*37e3a6d3SLuigi Rizzo int ret = NM_IRQ_COMPLETED; 2290*37e3a6d3SLuigi Rizzo int error; 2291f9790aebSLuigi Rizzo 229217885a7bSLuigi Rizzo if (netmap_verbose) 2293847bf383SLuigi Rizzo D("%s %s 0x%x", na->name, kring->name, flags); 2294f9790aebSLuigi Rizzo 2295847bf383SLuigi Rizzo bkring = &vpna->up.tx_rings[ring_nr]; 2296f9790aebSLuigi Rizzo 2297f9790aebSLuigi Rizzo /* make sure the ring is not disabled */ 2298*37e3a6d3SLuigi Rizzo if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { 2299*37e3a6d3SLuigi Rizzo return EIO; 2300*37e3a6d3SLuigi Rizzo } 2301f9790aebSLuigi Rizzo 230217885a7bSLuigi Rizzo if (netmap_verbose) 2303847bf383SLuigi Rizzo D("%s head %d cur %d tail %d", na->name, 230417885a7bSLuigi Rizzo kring->rhead, kring->rcur, kring->rtail); 230517885a7bSLuigi Rizzo 2306847bf383SLuigi Rizzo /* simulate a user wakeup on the rx ring 2307847bf383SLuigi Rizzo * fetch packets that have arrived. 2308f9790aebSLuigi Rizzo */ 2309f0ea3689SLuigi Rizzo error = kring->nm_sync(kring, 0); 2310f9790aebSLuigi Rizzo if (error) 2311f9790aebSLuigi Rizzo goto put_out; 2312*37e3a6d3SLuigi Rizzo if (kring->nr_hwcur == kring->nr_hwtail) { 2313*37e3a6d3SLuigi Rizzo if (netmap_verbose) 2314f9790aebSLuigi Rizzo D("how strange, interrupt with no packets on %s", 23154bf50f18SLuigi Rizzo na->name); 2316f9790aebSLuigi Rizzo goto put_out; 2317f9790aebSLuigi Rizzo } 231817885a7bSLuigi Rizzo 2319847bf383SLuigi Rizzo /* new packets are kring->rcur to kring->nr_hwtail, and the bkring 2320847bf383SLuigi Rizzo * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail 232117885a7bSLuigi Rizzo * to push all packets out. 232217885a7bSLuigi Rizzo */ 2323847bf383SLuigi Rizzo bkring->rhead = bkring->rcur = kring->nr_hwtail; 232417885a7bSLuigi Rizzo 23254bf50f18SLuigi Rizzo netmap_vp_txsync(bkring, flags); 2326f9790aebSLuigi Rizzo 232717885a7bSLuigi Rizzo /* mark all buffers as released on this ring */ 2328847bf383SLuigi Rizzo kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; 232917885a7bSLuigi Rizzo /* another call to actually release the buffers */ 2330f0ea3689SLuigi Rizzo error = kring->nm_sync(kring, 0); 2331f9790aebSLuigi Rizzo 2332*37e3a6d3SLuigi Rizzo /* The second rxsync may have further advanced hwtail. If this happens, 2333*37e3a6d3SLuigi Rizzo * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ 2334*37e3a6d3SLuigi Rizzo if (kring->rcur != kring->nr_hwtail) { 2335*37e3a6d3SLuigi Rizzo ret = NM_IRQ_RESCHED; 2336*37e3a6d3SLuigi Rizzo } 2337f9790aebSLuigi Rizzo put_out: 2338f9790aebSLuigi Rizzo nm_kr_put(kring); 2339*37e3a6d3SLuigi Rizzo 2340*37e3a6d3SLuigi Rizzo return error ? error : ret; 2341f9790aebSLuigi Rizzo } 2342f9790aebSLuigi Rizzo 234317885a7bSLuigi Rizzo 23444bf50f18SLuigi Rizzo /* nm_register callback for bwrap */ 2345f9790aebSLuigi Rizzo static int 2346*37e3a6d3SLuigi Rizzo netmap_bwrap_reg(struct netmap_adapter *na, int onoff) 2347f9790aebSLuigi Rizzo { 2348f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2349f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2350f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2351f9790aebSLuigi Rizzo struct netmap_vp_adapter *hostna = &bna->host; 2352*37e3a6d3SLuigi Rizzo int error, i; 2353847bf383SLuigi Rizzo enum txrx t; 2354f9790aebSLuigi Rizzo 23554bf50f18SLuigi Rizzo ND("%s %s", na->name, onoff ? "on" : "off"); 2356f9790aebSLuigi Rizzo 2357f9790aebSLuigi Rizzo if (onoff) { 23584bf50f18SLuigi Rizzo /* netmap_do_regif has been called on the bwrap na. 23594bf50f18SLuigi Rizzo * We need to pass the information about the 23604bf50f18SLuigi Rizzo * memory allocator down to the hwna before 23614bf50f18SLuigi Rizzo * putting it in netmap mode 23624bf50f18SLuigi Rizzo */ 2363f9790aebSLuigi Rizzo hwna->na_lut = na->na_lut; 2364f9790aebSLuigi Rizzo 2365f9790aebSLuigi Rizzo if (hostna->na_bdg) { 23664bf50f18SLuigi Rizzo /* if the host rings have been attached to switch, 23674bf50f18SLuigi Rizzo * we need to copy the memory allocator information 23684bf50f18SLuigi Rizzo * in the hostna also 23694bf50f18SLuigi Rizzo */ 2370f9790aebSLuigi Rizzo hostna->up.na_lut = na->na_lut; 2371f9790aebSLuigi Rizzo } 2372f9790aebSLuigi Rizzo 23730c7ba37eSLuigi Rizzo /* cross-link the netmap rings 23740c7ba37eSLuigi Rizzo * The original number of rings comes from hwna, 23750c7ba37eSLuigi Rizzo * rx rings on one side equals tx rings on the other. 23760c7ba37eSLuigi Rizzo */ 2377847bf383SLuigi Rizzo for_rx_tx(t) { 2378847bf383SLuigi Rizzo enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 2379*37e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { 2380*37e3a6d3SLuigi Rizzo NMR(hwna, r)[i].ring = NMR(na, t)[i].ring; 2381f9790aebSLuigi Rizzo } 2382f9790aebSLuigi Rizzo } 2383*37e3a6d3SLuigi Rizzo 2384*37e3a6d3SLuigi Rizzo if (na->na_flags & NAF_HOST_RINGS) { 2385*37e3a6d3SLuigi Rizzo struct netmap_adapter *hna = &hostna->up; 2386*37e3a6d3SLuigi Rizzo /* the hostna rings are the host rings of the bwrap. 2387*37e3a6d3SLuigi Rizzo * The corresponding krings must point back to the 2388*37e3a6d3SLuigi Rizzo * hostna 2389*37e3a6d3SLuigi Rizzo */ 2390*37e3a6d3SLuigi Rizzo hna->tx_rings = &na->tx_rings[na->num_tx_rings]; 2391*37e3a6d3SLuigi Rizzo hna->tx_rings[0].na = hna; 2392*37e3a6d3SLuigi Rizzo hna->rx_rings = &na->rx_rings[na->num_rx_rings]; 2393*37e3a6d3SLuigi Rizzo hna->rx_rings[0].na = hna; 2394*37e3a6d3SLuigi Rizzo } 2395*37e3a6d3SLuigi Rizzo } 2396*37e3a6d3SLuigi Rizzo 2397*37e3a6d3SLuigi Rizzo /* pass down the pending ring state information */ 2398*37e3a6d3SLuigi Rizzo for_rx_tx(t) { 2399*37e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(na, t) + 1; i++) 2400*37e3a6d3SLuigi Rizzo NMR(hwna, t)[i].nr_pending_mode = 2401*37e3a6d3SLuigi Rizzo NMR(na, t)[i].nr_pending_mode; 2402f9790aebSLuigi Rizzo } 2403f9790aebSLuigi Rizzo 24044bf50f18SLuigi Rizzo /* forward the request to the hwna */ 2405f9790aebSLuigi Rizzo error = hwna->nm_register(hwna, onoff); 2406f9790aebSLuigi Rizzo if (error) 2407f9790aebSLuigi Rizzo return error; 2408f9790aebSLuigi Rizzo 2409*37e3a6d3SLuigi Rizzo /* copy up the current ring state information */ 2410*37e3a6d3SLuigi Rizzo for_rx_tx(t) { 2411*37e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(na, t) + 1; i++) 2412*37e3a6d3SLuigi Rizzo NMR(na, t)[i].nr_mode = 2413*37e3a6d3SLuigi Rizzo NMR(hwna, t)[i].nr_mode; 2414*37e3a6d3SLuigi Rizzo } 2415*37e3a6d3SLuigi Rizzo 24164bf50f18SLuigi Rizzo /* impersonate a netmap_vp_adapter */ 24174bf50f18SLuigi Rizzo netmap_vp_reg(na, onoff); 24184bf50f18SLuigi Rizzo if (hostna->na_bdg) 24194bf50f18SLuigi Rizzo netmap_vp_reg(&hostna->up, onoff); 2420f9790aebSLuigi Rizzo 2421f9790aebSLuigi Rizzo if (onoff) { 2422847bf383SLuigi Rizzo u_int i; 2423847bf383SLuigi Rizzo /* intercept the hwna nm_nofify callback on the hw rings */ 2424847bf383SLuigi Rizzo for (i = 0; i < hwna->num_rx_rings; i++) { 2425847bf383SLuigi Rizzo hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; 2426847bf383SLuigi Rizzo hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; 2427847bf383SLuigi Rizzo } 2428847bf383SLuigi Rizzo i = hwna->num_rx_rings; /* for safety */ 2429847bf383SLuigi Rizzo /* save the host ring notify unconditionally */ 2430847bf383SLuigi Rizzo hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; 2431847bf383SLuigi Rizzo if (hostna->na_bdg) { 2432847bf383SLuigi Rizzo /* also intercept the host ring notify */ 2433847bf383SLuigi Rizzo hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; 2434847bf383SLuigi Rizzo } 2435*37e3a6d3SLuigi Rizzo if (na->active_fds == 0) 2436*37e3a6d3SLuigi Rizzo na->na_flags |= NAF_NETMAP_ON; 2437f9790aebSLuigi Rizzo } else { 2438847bf383SLuigi Rizzo u_int i; 2439*37e3a6d3SLuigi Rizzo 2440*37e3a6d3SLuigi Rizzo if (na->active_fds == 0) 2441*37e3a6d3SLuigi Rizzo na->na_flags &= ~NAF_NETMAP_ON; 2442*37e3a6d3SLuigi Rizzo 2443847bf383SLuigi Rizzo /* reset all notify callbacks (including host ring) */ 2444847bf383SLuigi Rizzo for (i = 0; i <= hwna->num_rx_rings; i++) { 2445847bf383SLuigi Rizzo hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify; 2446847bf383SLuigi Rizzo hwna->rx_rings[i].save_notify = NULL; 2447847bf383SLuigi Rizzo } 2448847bf383SLuigi Rizzo hwna->na_lut.lut = NULL; 2449847bf383SLuigi Rizzo hwna->na_lut.objtotal = 0; 2450847bf383SLuigi Rizzo hwna->na_lut.objsize = 0; 2451f9790aebSLuigi Rizzo } 2452f9790aebSLuigi Rizzo 2453f9790aebSLuigi Rizzo return 0; 2454f9790aebSLuigi Rizzo } 2455f9790aebSLuigi Rizzo 24564bf50f18SLuigi Rizzo /* nm_config callback for bwrap */ 2457f9790aebSLuigi Rizzo static int 2458f9790aebSLuigi Rizzo netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, 2459f9790aebSLuigi Rizzo u_int *rxr, u_int *rxd) 2460f9790aebSLuigi Rizzo { 2461f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2462f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2463f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2464f9790aebSLuigi Rizzo 2465f9790aebSLuigi Rizzo /* forward the request */ 2466f9790aebSLuigi Rizzo netmap_update_config(hwna); 2467f9790aebSLuigi Rizzo /* swap the results */ 2468f9790aebSLuigi Rizzo *txr = hwna->num_rx_rings; 2469f9790aebSLuigi Rizzo *txd = hwna->num_rx_desc; 2470f9790aebSLuigi Rizzo *rxr = hwna->num_tx_rings; 2471f9790aebSLuigi Rizzo *rxd = hwna->num_rx_desc; 2472f9790aebSLuigi Rizzo 2473f9790aebSLuigi Rizzo return 0; 2474f9790aebSLuigi Rizzo } 2475f9790aebSLuigi Rizzo 247617885a7bSLuigi Rizzo 24774bf50f18SLuigi Rizzo /* nm_krings_create callback for bwrap */ 2478f9790aebSLuigi Rizzo static int 2479f9790aebSLuigi Rizzo netmap_bwrap_krings_create(struct netmap_adapter *na) 2480f9790aebSLuigi Rizzo { 2481f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2482f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2483f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2484*37e3a6d3SLuigi Rizzo int i, error = 0; 2485*37e3a6d3SLuigi Rizzo enum txrx t; 2486f9790aebSLuigi Rizzo 24874bf50f18SLuigi Rizzo ND("%s", na->name); 2488f9790aebSLuigi Rizzo 24894bf50f18SLuigi Rizzo /* impersonate a netmap_vp_adapter */ 2490f9790aebSLuigi Rizzo error = netmap_vp_krings_create(na); 2491f9790aebSLuigi Rizzo if (error) 2492f9790aebSLuigi Rizzo return error; 2493f9790aebSLuigi Rizzo 24944bf50f18SLuigi Rizzo /* also create the hwna krings */ 2495f9790aebSLuigi Rizzo error = hwna->nm_krings_create(hwna); 2496f9790aebSLuigi Rizzo if (error) { 2497*37e3a6d3SLuigi Rizzo goto err_del_vp_rings; 2498f9790aebSLuigi Rizzo } 2499f9790aebSLuigi Rizzo 2500*37e3a6d3SLuigi Rizzo /* get each ring slot number from the corresponding hwna ring */ 2501*37e3a6d3SLuigi Rizzo for_rx_tx(t) { 2502*37e3a6d3SLuigi Rizzo enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 2503*37e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { 2504*37e3a6d3SLuigi Rizzo NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots; 2505*37e3a6d3SLuigi Rizzo } 2506f0ea3689SLuigi Rizzo } 2507f9790aebSLuigi Rizzo 2508f9790aebSLuigi Rizzo return 0; 2509*37e3a6d3SLuigi Rizzo 2510*37e3a6d3SLuigi Rizzo err_del_vp_rings: 2511*37e3a6d3SLuigi Rizzo netmap_vp_krings_delete(na); 2512*37e3a6d3SLuigi Rizzo 2513*37e3a6d3SLuigi Rizzo return error; 2514f9790aebSLuigi Rizzo } 2515f9790aebSLuigi Rizzo 251617885a7bSLuigi Rizzo 2517f9790aebSLuigi Rizzo static void 2518f9790aebSLuigi Rizzo netmap_bwrap_krings_delete(struct netmap_adapter *na) 2519f9790aebSLuigi Rizzo { 2520f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2521f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2522f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2523f9790aebSLuigi Rizzo 25244bf50f18SLuigi Rizzo ND("%s", na->name); 2525f9790aebSLuigi Rizzo 2526f9790aebSLuigi Rizzo hwna->nm_krings_delete(hwna); 2527f9790aebSLuigi Rizzo netmap_vp_krings_delete(na); 2528f9790aebSLuigi Rizzo } 2529f9790aebSLuigi Rizzo 253017885a7bSLuigi Rizzo 2531f9790aebSLuigi Rizzo /* notify method for the bridge-->hwna direction */ 2532f9790aebSLuigi Rizzo static int 2533847bf383SLuigi Rizzo netmap_bwrap_notify(struct netmap_kring *kring, int flags) 2534f9790aebSLuigi Rizzo { 2535847bf383SLuigi Rizzo struct netmap_adapter *na = kring->na; 2536847bf383SLuigi Rizzo struct netmap_bwrap_adapter *bna = na->na_private; 2537f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2538847bf383SLuigi Rizzo u_int ring_n = kring->ring_id; 2539847bf383SLuigi Rizzo u_int lim = kring->nkr_num_slots - 1; 2540847bf383SLuigi Rizzo struct netmap_kring *hw_kring; 2541*37e3a6d3SLuigi Rizzo int error; 2542f9790aebSLuigi Rizzo 2543847bf383SLuigi Rizzo ND("%s: na %s hwna %s", 2544847bf383SLuigi Rizzo (kring ? kring->name : "NULL!"), 2545847bf383SLuigi Rizzo (na ? na->name : "NULL!"), 2546847bf383SLuigi Rizzo (hwna ? hwna->name : "NULL!")); 2547f9790aebSLuigi Rizzo hw_kring = &hwna->tx_rings[ring_n]; 2548847bf383SLuigi Rizzo 2549*37e3a6d3SLuigi Rizzo if (nm_kr_tryget(hw_kring, 0, NULL)) { 2550*37e3a6d3SLuigi Rizzo return ENXIO; 2551*37e3a6d3SLuigi Rizzo } 2552f9790aebSLuigi Rizzo 255317885a7bSLuigi Rizzo /* first step: simulate a user wakeup on the rx ring */ 2554847bf383SLuigi Rizzo netmap_vp_rxsync(kring, flags); 255517885a7bSLuigi Rizzo ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 25564bf50f18SLuigi Rizzo na->name, ring_n, 255717885a7bSLuigi Rizzo kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 255817885a7bSLuigi Rizzo ring->head, ring->cur, ring->tail, 255917885a7bSLuigi Rizzo hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 2560847bf383SLuigi Rizzo /* second step: the new packets are sent on the tx ring 256117885a7bSLuigi Rizzo * (which is actually the same ring) 256217885a7bSLuigi Rizzo */ 2563847bf383SLuigi Rizzo hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; 2564f0ea3689SLuigi Rizzo error = hw_kring->nm_sync(hw_kring, flags); 2565847bf383SLuigi Rizzo if (error) 2566*37e3a6d3SLuigi Rizzo goto put_out; 256717885a7bSLuigi Rizzo 2568847bf383SLuigi Rizzo /* third step: now we are back the rx ring */ 256917885a7bSLuigi Rizzo /* claim ownership on all hw owned bufs */ 2570847bf383SLuigi Rizzo kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ 257117885a7bSLuigi Rizzo 2572847bf383SLuigi Rizzo /* fourth step: the user goes to sleep again, causing another rxsync */ 2573847bf383SLuigi Rizzo netmap_vp_rxsync(kring, flags); 257417885a7bSLuigi Rizzo ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 25754bf50f18SLuigi Rizzo na->name, ring_n, 257617885a7bSLuigi Rizzo kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 257717885a7bSLuigi Rizzo ring->head, ring->cur, ring->tail, 257817885a7bSLuigi Rizzo hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 2579*37e3a6d3SLuigi Rizzo put_out: 2580847bf383SLuigi Rizzo nm_kr_put(hw_kring); 2581*37e3a6d3SLuigi Rizzo 2582*37e3a6d3SLuigi Rizzo return error ? error : NM_IRQ_COMPLETED; 2583f9790aebSLuigi Rizzo } 2584f9790aebSLuigi Rizzo 258517885a7bSLuigi Rizzo 25864bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for the bwrap. 25874bf50f18SLuigi Rizzo * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. 25884bf50f18SLuigi Rizzo * On attach, it needs to provide a fake netmap_priv_d structure and 25894bf50f18SLuigi Rizzo * perform a netmap_do_regif() on the bwrap. This will put both the 25904bf50f18SLuigi Rizzo * bwrap and the hwna in netmap mode, with the netmap rings shared 25914bf50f18SLuigi Rizzo * and cross linked. Moroever, it will start intercepting interrupts 25924bf50f18SLuigi Rizzo * directed to hwna. 25934bf50f18SLuigi Rizzo */ 2594f9790aebSLuigi Rizzo static int 25954bf50f18SLuigi Rizzo netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 25964bf50f18SLuigi Rizzo { 25974bf50f18SLuigi Rizzo struct netmap_priv_d *npriv; 25984bf50f18SLuigi Rizzo struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 25994bf50f18SLuigi Rizzo int error = 0; 26004bf50f18SLuigi Rizzo 26014bf50f18SLuigi Rizzo if (attach) { 26024bf50f18SLuigi Rizzo if (NETMAP_OWNED_BY_ANY(na)) { 26034bf50f18SLuigi Rizzo return EBUSY; 26044bf50f18SLuigi Rizzo } 26054bf50f18SLuigi Rizzo if (bna->na_kpriv) { 26064bf50f18SLuigi Rizzo /* nothing to do */ 26074bf50f18SLuigi Rizzo return 0; 26084bf50f18SLuigi Rizzo } 2609*37e3a6d3SLuigi Rizzo npriv = netmap_priv_new(); 26104bf50f18SLuigi Rizzo if (npriv == NULL) 26114bf50f18SLuigi Rizzo return ENOMEM; 2612*37e3a6d3SLuigi Rizzo npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ 2613*37e3a6d3SLuigi Rizzo error = netmap_do_regif(npriv, na, 0, NR_REG_NIC_SW); 2614847bf383SLuigi Rizzo if (error) { 2615*37e3a6d3SLuigi Rizzo netmap_priv_delete(npriv); 26164bf50f18SLuigi Rizzo return error; 26174bf50f18SLuigi Rizzo } 26184bf50f18SLuigi Rizzo bna->na_kpriv = npriv; 26194bf50f18SLuigi Rizzo na->na_flags |= NAF_BUSY; 26204bf50f18SLuigi Rizzo } else { 26214bf50f18SLuigi Rizzo if (na->active_fds == 0) /* not registered */ 26224bf50f18SLuigi Rizzo return EINVAL; 2623*37e3a6d3SLuigi Rizzo netmap_priv_delete(bna->na_kpriv); 26244bf50f18SLuigi Rizzo bna->na_kpriv = NULL; 26254bf50f18SLuigi Rizzo na->na_flags &= ~NAF_BUSY; 26264bf50f18SLuigi Rizzo } 26274bf50f18SLuigi Rizzo return error; 26284bf50f18SLuigi Rizzo 26294bf50f18SLuigi Rizzo } 26304bf50f18SLuigi Rizzo 26314bf50f18SLuigi Rizzo /* attach a bridge wrapper to the 'real' device */ 26324bf50f18SLuigi Rizzo int 26334bf50f18SLuigi Rizzo netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) 2634f9790aebSLuigi Rizzo { 2635f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna; 26364bf50f18SLuigi Rizzo struct netmap_adapter *na = NULL; 26374bf50f18SLuigi Rizzo struct netmap_adapter *hostna = NULL; 26384bf50f18SLuigi Rizzo int error = 0; 2639847bf383SLuigi Rizzo enum txrx t; 2640f9790aebSLuigi Rizzo 26414bf50f18SLuigi Rizzo /* make sure the NIC is not already in use */ 26424bf50f18SLuigi Rizzo if (NETMAP_OWNED_BY_ANY(hwna)) { 26434bf50f18SLuigi Rizzo D("NIC %s busy, cannot attach to bridge", hwna->name); 26444bf50f18SLuigi Rizzo return EBUSY; 26454bf50f18SLuigi Rizzo } 2646f9790aebSLuigi Rizzo 2647f9790aebSLuigi Rizzo bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO); 26484bf50f18SLuigi Rizzo if (bna == NULL) { 2649f9790aebSLuigi Rizzo return ENOMEM; 26504bf50f18SLuigi Rizzo } 2651f9790aebSLuigi Rizzo 2652f9790aebSLuigi Rizzo na = &bna->up.up; 2653*37e3a6d3SLuigi Rizzo /* make bwrap ifp point to the real ifp */ 2654*37e3a6d3SLuigi Rizzo na->ifp = hwna->ifp; 2655847bf383SLuigi Rizzo na->na_private = bna; 26564bf50f18SLuigi Rizzo strncpy(na->name, nr_name, sizeof(na->name)); 2657f9790aebSLuigi Rizzo /* fill the ring data for the bwrap adapter with rx/tx meanings 2658f9790aebSLuigi Rizzo * swapped. The real cross-linking will be done during register, 2659f9790aebSLuigi Rizzo * when all the krings will have been created. 2660f9790aebSLuigi Rizzo */ 2661847bf383SLuigi Rizzo for_rx_tx(t) { 2662847bf383SLuigi Rizzo enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 2663847bf383SLuigi Rizzo nma_set_nrings(na, t, nma_get_nrings(hwna, r)); 2664847bf383SLuigi Rizzo nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); 2665847bf383SLuigi Rizzo } 2666f9790aebSLuigi Rizzo na->nm_dtor = netmap_bwrap_dtor; 2667*37e3a6d3SLuigi Rizzo na->nm_register = netmap_bwrap_reg; 2668f9790aebSLuigi Rizzo // na->nm_txsync = netmap_bwrap_txsync; 2669f9790aebSLuigi Rizzo // na->nm_rxsync = netmap_bwrap_rxsync; 2670f9790aebSLuigi Rizzo na->nm_config = netmap_bwrap_config; 2671f9790aebSLuigi Rizzo na->nm_krings_create = netmap_bwrap_krings_create; 2672f9790aebSLuigi Rizzo na->nm_krings_delete = netmap_bwrap_krings_delete; 2673f9790aebSLuigi Rizzo na->nm_notify = netmap_bwrap_notify; 26744bf50f18SLuigi Rizzo na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; 26754bf50f18SLuigi Rizzo na->pdev = hwna->pdev; 2676*37e3a6d3SLuigi Rizzo na->nm_mem = hwna->nm_mem; 2677*37e3a6d3SLuigi Rizzo na->virt_hdr_len = hwna->virt_hdr_len; 2678f9790aebSLuigi Rizzo bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 2679f9790aebSLuigi Rizzo 2680f9790aebSLuigi Rizzo bna->hwna = hwna; 2681f9790aebSLuigi Rizzo netmap_adapter_get(hwna); 2682f9790aebSLuigi Rizzo hwna->na_private = bna; /* weak reference */ 26834bf50f18SLuigi Rizzo hwna->na_vp = &bna->up; 2684f9790aebSLuigi Rizzo 2685f0ea3689SLuigi Rizzo if (hwna->na_flags & NAF_HOST_RINGS) { 26864bf50f18SLuigi Rizzo if (hwna->na_flags & NAF_SW_ONLY) 26874bf50f18SLuigi Rizzo na->na_flags |= NAF_SW_ONLY; 2688f0ea3689SLuigi Rizzo na->na_flags |= NAF_HOST_RINGS; 2689f9790aebSLuigi Rizzo hostna = &bna->host.up; 26904bf50f18SLuigi Rizzo snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name); 2691f9790aebSLuigi Rizzo hostna->ifp = hwna->ifp; 2692847bf383SLuigi Rizzo for_rx_tx(t) { 2693847bf383SLuigi Rizzo enum txrx r = nm_txrx_swap(t); 2694847bf383SLuigi Rizzo nma_set_nrings(hostna, t, 1); 2695847bf383SLuigi Rizzo nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); 2696847bf383SLuigi Rizzo } 2697f9790aebSLuigi Rizzo // hostna->nm_txsync = netmap_bwrap_host_txsync; 2698f9790aebSLuigi Rizzo // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 2699847bf383SLuigi Rizzo hostna->nm_notify = netmap_bwrap_notify; 2700f9790aebSLuigi Rizzo hostna->nm_mem = na->nm_mem; 2701f9790aebSLuigi Rizzo hostna->na_private = bna; 27024bf50f18SLuigi Rizzo hostna->na_vp = &bna->up; 27034bf50f18SLuigi Rizzo na->na_hostvp = hwna->na_hostvp = 27044bf50f18SLuigi Rizzo hostna->na_hostvp = &bna->host; 27054bf50f18SLuigi Rizzo hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ 2706f0ea3689SLuigi Rizzo } 2707f9790aebSLuigi Rizzo 270817885a7bSLuigi Rizzo ND("%s<->%s txr %d txd %d rxr %d rxd %d", 27094bf50f18SLuigi Rizzo na->name, ifp->if_xname, 2710f9790aebSLuigi Rizzo na->num_tx_rings, na->num_tx_desc, 2711f9790aebSLuigi Rizzo na->num_rx_rings, na->num_rx_desc); 2712f9790aebSLuigi Rizzo 2713f9790aebSLuigi Rizzo error = netmap_attach_common(na); 2714f9790aebSLuigi Rizzo if (error) { 27154bf50f18SLuigi Rizzo goto err_free; 27164bf50f18SLuigi Rizzo } 27174bf50f18SLuigi Rizzo hwna->na_flags |= NAF_BUSY; 27184bf50f18SLuigi Rizzo return 0; 27194bf50f18SLuigi Rizzo 27204bf50f18SLuigi Rizzo err_free: 27214bf50f18SLuigi Rizzo hwna->na_vp = hwna->na_hostvp = NULL; 2722f9790aebSLuigi Rizzo netmap_adapter_put(hwna); 2723f9790aebSLuigi Rizzo free(bna, M_DEVBUF); 2724f9790aebSLuigi Rizzo return error; 27254bf50f18SLuigi Rizzo 2726f9790aebSLuigi Rizzo } 2727f9790aebSLuigi Rizzo 2728847bf383SLuigi Rizzo struct nm_bridge * 2729847bf383SLuigi Rizzo netmap_init_bridges2(u_int n) 2730f9790aebSLuigi Rizzo { 2731f9790aebSLuigi Rizzo int i; 2732847bf383SLuigi Rizzo struct nm_bridge *b; 2733847bf383SLuigi Rizzo 2734847bf383SLuigi Rizzo b = malloc(sizeof(struct nm_bridge) * n, M_DEVBUF, 2735847bf383SLuigi Rizzo M_NOWAIT | M_ZERO); 2736847bf383SLuigi Rizzo if (b == NULL) 2737847bf383SLuigi Rizzo return NULL; 2738847bf383SLuigi Rizzo for (i = 0; i < n; i++) 2739847bf383SLuigi Rizzo BDG_RWINIT(&b[i]); 2740847bf383SLuigi Rizzo return b; 2741847bf383SLuigi Rizzo } 2742847bf383SLuigi Rizzo 2743847bf383SLuigi Rizzo void 2744847bf383SLuigi Rizzo netmap_uninit_bridges2(struct nm_bridge *b, u_int n) 2745847bf383SLuigi Rizzo { 2746847bf383SLuigi Rizzo int i; 2747847bf383SLuigi Rizzo 2748847bf383SLuigi Rizzo if (b == NULL) 2749847bf383SLuigi Rizzo return; 2750847bf383SLuigi Rizzo 2751847bf383SLuigi Rizzo for (i = 0; i < n; i++) 2752847bf383SLuigi Rizzo BDG_RWDESTROY(&b[i]); 2753847bf383SLuigi Rizzo free(b, M_DEVBUF); 2754847bf383SLuigi Rizzo } 2755847bf383SLuigi Rizzo 2756847bf383SLuigi Rizzo int 2757847bf383SLuigi Rizzo netmap_init_bridges(void) 2758847bf383SLuigi Rizzo { 2759847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS 2760847bf383SLuigi Rizzo return netmap_bns_register(); 2761847bf383SLuigi Rizzo #else 2762847bf383SLuigi Rizzo nm_bridges = netmap_init_bridges2(NM_BRIDGES); 2763847bf383SLuigi Rizzo if (nm_bridges == NULL) 2764847bf383SLuigi Rizzo return ENOMEM; 2765847bf383SLuigi Rizzo return 0; 2766847bf383SLuigi Rizzo #endif 2767847bf383SLuigi Rizzo } 2768847bf383SLuigi Rizzo 2769847bf383SLuigi Rizzo void 2770847bf383SLuigi Rizzo netmap_uninit_bridges(void) 2771847bf383SLuigi Rizzo { 2772847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS 2773847bf383SLuigi Rizzo netmap_bns_unregister(); 2774847bf383SLuigi Rizzo #else 2775847bf383SLuigi Rizzo netmap_uninit_bridges2(nm_bridges, NM_BRIDGES); 2776847bf383SLuigi Rizzo #endif 2777f9790aebSLuigi Rizzo } 2778f9790aebSLuigi Rizzo #endif /* WITH_VALE */ 2779