1*718cf2ccSPedro F. Giffuni /*- 2*718cf2ccSPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3*718cf2ccSPedro F. Giffuni * 437e3a6d3SLuigi Rizzo * Copyright (C) 2013-2016 Universita` di Pisa 537e3a6d3SLuigi Rizzo * All rights reserved. 6f9790aebSLuigi Rizzo * 7f9790aebSLuigi Rizzo * Redistribution and use in source and binary forms, with or without 8f9790aebSLuigi Rizzo * modification, are permitted provided that the following conditions 9f9790aebSLuigi Rizzo * are met: 10f9790aebSLuigi Rizzo * 1. Redistributions of source code must retain the above copyright 11f9790aebSLuigi Rizzo * notice, this list of conditions and the following disclaimer. 12f9790aebSLuigi Rizzo * 2. Redistributions in binary form must reproduce the above copyright 13f9790aebSLuigi Rizzo * notice, this list of conditions and the following disclaimer in the 14f9790aebSLuigi Rizzo * documentation and/or other materials provided with the distribution. 15f9790aebSLuigi Rizzo * 16f9790aebSLuigi Rizzo * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17f9790aebSLuigi Rizzo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18f9790aebSLuigi Rizzo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19f9790aebSLuigi Rizzo * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20f9790aebSLuigi Rizzo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21f9790aebSLuigi Rizzo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22f9790aebSLuigi Rizzo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23f9790aebSLuigi Rizzo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24f9790aebSLuigi Rizzo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25f9790aebSLuigi Rizzo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26f9790aebSLuigi Rizzo * SUCH DAMAGE. 27f9790aebSLuigi Rizzo */ 28f9790aebSLuigi Rizzo 29f9790aebSLuigi Rizzo 30f9790aebSLuigi Rizzo /* 31f9790aebSLuigi Rizzo * This module implements the VALE switch for netmap 32f9790aebSLuigi Rizzo 33f9790aebSLuigi Rizzo --- VALE SWITCH --- 34f9790aebSLuigi Rizzo 35f9790aebSLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports. 36f9790aebSLuigi Rizzo A switch cannot be deleted until all ports are gone. 37f9790aebSLuigi Rizzo 38f9790aebSLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects 39f9790aebSLuigi Rizzo deletion of ports. When configuring or deleting a new port, the 40f9790aebSLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK). 41f9790aebSLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 42f9790aebSLuigi Rizzo The lock is held throughout the entire forwarding cycle, 43f9790aebSLuigi Rizzo during which the thread may incur in a page fault. 44f9790aebSLuigi Rizzo Hence it is important that sleepable shared locks are used. 45f9790aebSLuigi Rizzo 46f9790aebSLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve 47f9790aebSLuigi Rizzo a number of slot in the ring, then the lock is released, 48f9790aebSLuigi Rizzo packets are copied from source to destination, and then 49f9790aebSLuigi Rizzo the lock is acquired again and the receive ring is updated. 50f9790aebSLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack 51f9790aebSLuigi Rizzo ports attached to the switch) 52f9790aebSLuigi Rizzo 53f9790aebSLuigi Rizzo */ 54f9790aebSLuigi Rizzo 55f9790aebSLuigi Rizzo /* 56f9790aebSLuigi Rizzo * OS-specific code that is used only within this file. 57f9790aebSLuigi Rizzo * Other OS-specific code that must be accessed by drivers 58f9790aebSLuigi Rizzo * is present in netmap_kern.h 59f9790aebSLuigi Rizzo */ 60f9790aebSLuigi Rizzo 61f9790aebSLuigi Rizzo #if defined(__FreeBSD__) 62f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */ 63f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$"); 64f9790aebSLuigi Rizzo 65f9790aebSLuigi Rizzo #include <sys/types.h> 66f9790aebSLuigi Rizzo #include <sys/errno.h> 67f9790aebSLuigi Rizzo #include <sys/param.h> /* defines used in kernel.h */ 68f9790aebSLuigi Rizzo #include <sys/kernel.h> /* types used in module initialization */ 69f9790aebSLuigi Rizzo #include <sys/conf.h> /* cdevsw struct, UID, GID */ 70f9790aebSLuigi Rizzo #include <sys/sockio.h> 71f9790aebSLuigi Rizzo #include <sys/socketvar.h> /* struct socket */ 72f9790aebSLuigi Rizzo #include <sys/malloc.h> 73f9790aebSLuigi Rizzo #include <sys/poll.h> 74f9790aebSLuigi Rizzo #include <sys/rwlock.h> 75f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */ 76f9790aebSLuigi Rizzo #include <sys/selinfo.h> 77f9790aebSLuigi Rizzo #include <sys/sysctl.h> 78f9790aebSLuigi Rizzo #include <net/if.h> 79f9790aebSLuigi Rizzo #include <net/if_var.h> 80f9790aebSLuigi Rizzo #include <net/bpf.h> /* BIOCIMMEDIATE */ 81f9790aebSLuigi Rizzo #include <machine/bus.h> /* bus_dmamap_* */ 82f9790aebSLuigi Rizzo #include <sys/endian.h> 83f9790aebSLuigi Rizzo #include <sys/refcount.h> 84f9790aebSLuigi Rizzo 85f9790aebSLuigi Rizzo 86f9790aebSLuigi Rizzo #define BDG_RWLOCK_T struct rwlock // struct rwlock 87f9790aebSLuigi Rizzo 88f9790aebSLuigi Rizzo #define BDG_RWINIT(b) \ 89f9790aebSLuigi Rizzo rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 90f9790aebSLuigi Rizzo #define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 91f9790aebSLuigi Rizzo #define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 92f9790aebSLuigi Rizzo #define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 93f9790aebSLuigi Rizzo #define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 94f9790aebSLuigi Rizzo #define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 95f9790aebSLuigi Rizzo #define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 96f9790aebSLuigi Rizzo 97f9790aebSLuigi Rizzo 98f9790aebSLuigi Rizzo #elif defined(linux) 99f9790aebSLuigi Rizzo 100f9790aebSLuigi Rizzo #include "bsd_glue.h" 101f9790aebSLuigi Rizzo 102f9790aebSLuigi Rizzo #elif defined(__APPLE__) 103f9790aebSLuigi Rizzo 104f9790aebSLuigi Rizzo #warning OSX support is only partial 105f9790aebSLuigi Rizzo #include "osx_glue.h" 106f9790aebSLuigi Rizzo 10737e3a6d3SLuigi Rizzo #elif defined(_WIN32) 10837e3a6d3SLuigi Rizzo #include "win_glue.h" 10937e3a6d3SLuigi Rizzo 110f9790aebSLuigi Rizzo #else 111f9790aebSLuigi Rizzo 112f9790aebSLuigi Rizzo #error Unsupported platform 113f9790aebSLuigi Rizzo 114f9790aebSLuigi Rizzo #endif /* unsupported */ 115f9790aebSLuigi Rizzo 116f9790aebSLuigi Rizzo /* 117f9790aebSLuigi Rizzo * common headers 118f9790aebSLuigi Rizzo */ 119f9790aebSLuigi Rizzo 120f9790aebSLuigi Rizzo #include <net/netmap.h> 121f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h> 122f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h> 123f9790aebSLuigi Rizzo 124f9790aebSLuigi Rizzo #ifdef WITH_VALE 125f9790aebSLuigi Rizzo 126f9790aebSLuigi Rizzo /* 127f9790aebSLuigi Rizzo * system parameters (most of them in netmap_kern.h) 12837e3a6d3SLuigi Rizzo * NM_BDG_NAME prefix for switch port names, default "vale" 129f9790aebSLuigi Rizzo * NM_BDG_MAXPORTS number of ports 130f9790aebSLuigi Rizzo * NM_BRIDGES max number of switches in the system. 131f9790aebSLuigi Rizzo * XXX should become a sysctl or tunable 132f9790aebSLuigi Rizzo * 133f9790aebSLuigi Rizzo * Switch ports are named valeX:Y where X is the switch name and Y 134f9790aebSLuigi Rizzo * is the port. If Y matches a physical interface name, the port is 135f9790aebSLuigi Rizzo * connected to a physical device. 136f9790aebSLuigi Rizzo * 137f9790aebSLuigi Rizzo * Unlike physical interfaces, switch ports use their own memory region 138f9790aebSLuigi Rizzo * for rings and buffers. 139f9790aebSLuigi Rizzo * The virtual interfaces use per-queue lock instead of core lock. 140f9790aebSLuigi Rizzo * In the tx loop, we aggregate traffic in batches to make all operations 141f9790aebSLuigi Rizzo * faster. The batch size is bridge_batch. 142f9790aebSLuigi Rizzo */ 143f9790aebSLuigi Rizzo #define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 144f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 145f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 146f9790aebSLuigi Rizzo #define NM_BDG_HASH 1024 /* forwarding table entries */ 147f9790aebSLuigi Rizzo #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 148f9790aebSLuigi Rizzo #define NM_MULTISEG 64 /* max size of a chain of bufs */ 149f9790aebSLuigi Rizzo /* actual size of the tables */ 150f9790aebSLuigi Rizzo #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 151f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */ 152f9790aebSLuigi Rizzo #define NM_FT_NULL NM_BDG_BATCH_MAX 153f9790aebSLuigi Rizzo 154f9790aebSLuigi Rizzo 155f9790aebSLuigi Rizzo /* 156f9790aebSLuigi Rizzo * bridge_batch is set via sysctl to the max batch size to be 157f9790aebSLuigi Rizzo * used in the bridge. The actual value may be larger as the 158f9790aebSLuigi Rizzo * last packet in the block may overflow the size. 159f9790aebSLuigi Rizzo */ 16037e3a6d3SLuigi Rizzo static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 16137e3a6d3SLuigi Rizzo SYSBEGIN(vars_vale); 162f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap); 163f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 16437e3a6d3SLuigi Rizzo SYSEND; 165f9790aebSLuigi Rizzo 166c3e9b4dbSLuiz Otavio O Souza static int netmap_vp_create(struct nmreq *, struct ifnet *, 167c3e9b4dbSLuiz Otavio O Souza struct netmap_mem_d *nmd, struct netmap_vp_adapter **); 1684bf50f18SLuigi Rizzo static int netmap_vp_reg(struct netmap_adapter *na, int onoff); 16937e3a6d3SLuigi Rizzo static int netmap_bwrap_reg(struct netmap_adapter *, int onoff); 170f9790aebSLuigi Rizzo 171f9790aebSLuigi Rizzo /* 172f9790aebSLuigi Rizzo * For each output interface, nm_bdg_q is used to construct a list. 173f9790aebSLuigi Rizzo * bq_len is the number of output buffers (we can have coalescing 174f9790aebSLuigi Rizzo * during the copy). 175f9790aebSLuigi Rizzo */ 176f9790aebSLuigi Rizzo struct nm_bdg_q { 177f9790aebSLuigi Rizzo uint16_t bq_head; 178f9790aebSLuigi Rizzo uint16_t bq_tail; 179f9790aebSLuigi Rizzo uint32_t bq_len; /* number of buffers */ 180f9790aebSLuigi Rizzo }; 181f9790aebSLuigi Rizzo 182f9790aebSLuigi Rizzo /* XXX revise this */ 183f9790aebSLuigi Rizzo struct nm_hash_ent { 184f9790aebSLuigi Rizzo uint64_t mac; /* the top 2 bytes are the epoch */ 185f9790aebSLuigi Rizzo uint64_t ports; 186f9790aebSLuigi Rizzo }; 187f9790aebSLuigi Rizzo 188f9790aebSLuigi Rizzo /* 189f9790aebSLuigi Rizzo * nm_bridge is a descriptor for a VALE switch. 190f9790aebSLuigi Rizzo * Interfaces for a bridge are all in bdg_ports[]. 191f9790aebSLuigi Rizzo * The array has fixed size, an empty entry does not terminate 192f9790aebSLuigi Rizzo * the search, but lookups only occur on attach/detach so we 193f9790aebSLuigi Rizzo * don't mind if they are slow. 194f9790aebSLuigi Rizzo * 195f9790aebSLuigi Rizzo * The bridge is non blocking on the transmit ports: excess 196f9790aebSLuigi Rizzo * packets are dropped if there is no room on the output port. 197f9790aebSLuigi Rizzo * 198f9790aebSLuigi Rizzo * bdg_lock protects accesses to the bdg_ports array. 199f9790aebSLuigi Rizzo * This is a rw lock (or equivalent). 200f9790aebSLuigi Rizzo */ 201f9790aebSLuigi Rizzo struct nm_bridge { 202f9790aebSLuigi Rizzo /* XXX what is the proper alignment/layout ? */ 203f9790aebSLuigi Rizzo BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 204f9790aebSLuigi Rizzo int bdg_namelen; 205f9790aebSLuigi Rizzo uint32_t bdg_active_ports; /* 0 means free */ 206f9790aebSLuigi Rizzo char bdg_basename[IFNAMSIZ]; 207f9790aebSLuigi Rizzo 208f9790aebSLuigi Rizzo /* Indexes of active ports (up to active_ports) 209f9790aebSLuigi Rizzo * and all other remaining ports. 210f9790aebSLuigi Rizzo */ 211f9790aebSLuigi Rizzo uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 212f9790aebSLuigi Rizzo 213f9790aebSLuigi Rizzo struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; 214f9790aebSLuigi Rizzo 215f9790aebSLuigi Rizzo 216f9790aebSLuigi Rizzo /* 217f9790aebSLuigi Rizzo * The function to decide the destination port. 218f9790aebSLuigi Rizzo * It returns either of an index of the destination port, 219f9790aebSLuigi Rizzo * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 220f9790aebSLuigi Rizzo * forward this packet. ring_nr is the source ring index, and the 221f9790aebSLuigi Rizzo * function may overwrite this value to forward this packet to a 222f9790aebSLuigi Rizzo * different ring index. 22337e3a6d3SLuigi Rizzo * This function must be set by netmap_bdg_ctl(). 224f9790aebSLuigi Rizzo */ 2254bf50f18SLuigi Rizzo struct netmap_bdg_ops bdg_ops; 226f9790aebSLuigi Rizzo 227f9790aebSLuigi Rizzo /* the forwarding table, MAC+ports. 228f9790aebSLuigi Rizzo * XXX should be changed to an argument to be passed to 229f9790aebSLuigi Rizzo * the lookup function, and allocated on attach 230f9790aebSLuigi Rizzo */ 231f9790aebSLuigi Rizzo struct nm_hash_ent ht[NM_BDG_HASH]; 232847bf383SLuigi Rizzo 233847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS 234847bf383SLuigi Rizzo struct net *ns; 235847bf383SLuigi Rizzo #endif /* CONFIG_NET_NS */ 236f9790aebSLuigi Rizzo }; 237f9790aebSLuigi Rizzo 2384bf50f18SLuigi Rizzo const char* 2394bf50f18SLuigi Rizzo netmap_bdg_name(struct netmap_vp_adapter *vp) 2404bf50f18SLuigi Rizzo { 2414bf50f18SLuigi Rizzo struct nm_bridge *b = vp->na_bdg; 2424bf50f18SLuigi Rizzo if (b == NULL) 2434bf50f18SLuigi Rizzo return NULL; 2444bf50f18SLuigi Rizzo return b->bdg_basename; 2454bf50f18SLuigi Rizzo } 2464bf50f18SLuigi Rizzo 247f9790aebSLuigi Rizzo 248847bf383SLuigi Rizzo #ifndef CONFIG_NET_NS 249f9790aebSLuigi Rizzo /* 250f9790aebSLuigi Rizzo * XXX in principle nm_bridges could be created dynamically 251f9790aebSLuigi Rizzo * Right now we have a static array and deletions are protected 252f9790aebSLuigi Rizzo * by an exclusive lock. 253f9790aebSLuigi Rizzo */ 25437e3a6d3SLuigi Rizzo static struct nm_bridge *nm_bridges; 255847bf383SLuigi Rizzo #endif /* !CONFIG_NET_NS */ 256f9790aebSLuigi Rizzo 257f9790aebSLuigi Rizzo 258f9790aebSLuigi Rizzo /* 259f9790aebSLuigi Rizzo * this is a slightly optimized copy routine which rounds 260f9790aebSLuigi Rizzo * to multiple of 64 bytes and is often faster than dealing 261f9790aebSLuigi Rizzo * with other odd sizes. We assume there is enough room 262f9790aebSLuigi Rizzo * in the source and destination buffers. 263f9790aebSLuigi Rizzo * 264f9790aebSLuigi Rizzo * XXX only for multiples of 64 bytes, non overlapped. 265f9790aebSLuigi Rizzo */ 266f9790aebSLuigi Rizzo static inline void 267f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l) 268f9790aebSLuigi Rizzo { 269f9790aebSLuigi Rizzo uint64_t *src = _src; 270f9790aebSLuigi Rizzo uint64_t *dst = _dst; 271f9790aebSLuigi Rizzo if (unlikely(l >= 1024)) { 272f9790aebSLuigi Rizzo memcpy(dst, src, l); 273f9790aebSLuigi Rizzo return; 274f9790aebSLuigi Rizzo } 275f9790aebSLuigi Rizzo for (; likely(l > 0); l-=64) { 276f9790aebSLuigi Rizzo *dst++ = *src++; 277f9790aebSLuigi Rizzo *dst++ = *src++; 278f9790aebSLuigi Rizzo *dst++ = *src++; 279f9790aebSLuigi Rizzo *dst++ = *src++; 280f9790aebSLuigi Rizzo *dst++ = *src++; 281f9790aebSLuigi Rizzo *dst++ = *src++; 282f9790aebSLuigi Rizzo *dst++ = *src++; 283f9790aebSLuigi Rizzo *dst++ = *src++; 284f9790aebSLuigi Rizzo } 285f9790aebSLuigi Rizzo } 286f9790aebSLuigi Rizzo 287f9790aebSLuigi Rizzo 28837e3a6d3SLuigi Rizzo static int 28937e3a6d3SLuigi Rizzo nm_is_id_char(const char c) 29037e3a6d3SLuigi Rizzo { 29137e3a6d3SLuigi Rizzo return (c >= 'a' && c <= 'z') || 29237e3a6d3SLuigi Rizzo (c >= 'A' && c <= 'Z') || 29337e3a6d3SLuigi Rizzo (c >= '0' && c <= '9') || 29437e3a6d3SLuigi Rizzo (c == '_'); 29537e3a6d3SLuigi Rizzo } 29637e3a6d3SLuigi Rizzo 29737e3a6d3SLuigi Rizzo /* Validate the name of a VALE bridge port and return the 29837e3a6d3SLuigi Rizzo * position of the ":" character. */ 29937e3a6d3SLuigi Rizzo static int 30037e3a6d3SLuigi Rizzo nm_vale_name_validate(const char *name) 30137e3a6d3SLuigi Rizzo { 30237e3a6d3SLuigi Rizzo int colon_pos = -1; 30337e3a6d3SLuigi Rizzo int i; 30437e3a6d3SLuigi Rizzo 30537e3a6d3SLuigi Rizzo if (!name || strlen(name) < strlen(NM_BDG_NAME)) { 30637e3a6d3SLuigi Rizzo return -1; 30737e3a6d3SLuigi Rizzo } 30837e3a6d3SLuigi Rizzo 30937e3a6d3SLuigi Rizzo for (i = 0; name[i]; i++) { 31037e3a6d3SLuigi Rizzo if (name[i] == ':') { 31137e3a6d3SLuigi Rizzo if (colon_pos != -1) { 31237e3a6d3SLuigi Rizzo return -1; 31337e3a6d3SLuigi Rizzo } 31437e3a6d3SLuigi Rizzo colon_pos = i; 31537e3a6d3SLuigi Rizzo } else if (!nm_is_id_char(name[i])) { 31637e3a6d3SLuigi Rizzo return -1; 31737e3a6d3SLuigi Rizzo } 31837e3a6d3SLuigi Rizzo } 31937e3a6d3SLuigi Rizzo 32037e3a6d3SLuigi Rizzo if (i >= IFNAMSIZ) { 32137e3a6d3SLuigi Rizzo return -1; 32237e3a6d3SLuigi Rizzo } 32337e3a6d3SLuigi Rizzo 32437e3a6d3SLuigi Rizzo return colon_pos; 32537e3a6d3SLuigi Rizzo } 32637e3a6d3SLuigi Rizzo 327f9790aebSLuigi Rizzo /* 328f9790aebSLuigi Rizzo * locate a bridge among the existing ones. 329f9790aebSLuigi Rizzo * MUST BE CALLED WITH NMG_LOCK() 330f9790aebSLuigi Rizzo * 331f9790aebSLuigi Rizzo * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 332f9790aebSLuigi Rizzo * We assume that this is called with a name of at least NM_NAME chars. 333f9790aebSLuigi Rizzo */ 334f9790aebSLuigi Rizzo static struct nm_bridge * 335f9790aebSLuigi Rizzo nm_find_bridge(const char *name, int create) 336f9790aebSLuigi Rizzo { 33737e3a6d3SLuigi Rizzo int i, namelen; 338847bf383SLuigi Rizzo struct nm_bridge *b = NULL, *bridges; 339847bf383SLuigi Rizzo u_int num_bridges; 340f9790aebSLuigi Rizzo 341f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 342f9790aebSLuigi Rizzo 343847bf383SLuigi Rizzo netmap_bns_getbridges(&bridges, &num_bridges); 344847bf383SLuigi Rizzo 34537e3a6d3SLuigi Rizzo namelen = nm_vale_name_validate(name); 34637e3a6d3SLuigi Rizzo if (namelen < 0) { 347f9790aebSLuigi Rizzo D("invalid bridge name %s", name ? name : NULL); 348f9790aebSLuigi Rizzo return NULL; 349f9790aebSLuigi Rizzo } 350f9790aebSLuigi Rizzo 351f9790aebSLuigi Rizzo /* lookup the name, remember empty slot if there is one */ 352847bf383SLuigi Rizzo for (i = 0; i < num_bridges; i++) { 353847bf383SLuigi Rizzo struct nm_bridge *x = bridges + i; 354f9790aebSLuigi Rizzo 355f9790aebSLuigi Rizzo if (x->bdg_active_ports == 0) { 356f9790aebSLuigi Rizzo if (create && b == NULL) 357f9790aebSLuigi Rizzo b = x; /* record empty slot */ 358f9790aebSLuigi Rizzo } else if (x->bdg_namelen != namelen) { 359f9790aebSLuigi Rizzo continue; 360f9790aebSLuigi Rizzo } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 361f9790aebSLuigi Rizzo ND("found '%.*s' at %d", namelen, name, i); 362f9790aebSLuigi Rizzo b = x; 363f9790aebSLuigi Rizzo break; 364f9790aebSLuigi Rizzo } 365f9790aebSLuigi Rizzo } 366847bf383SLuigi Rizzo if (i == num_bridges && b) { /* name not found, can create entry */ 367f9790aebSLuigi Rizzo /* initialize the bridge */ 368f9790aebSLuigi Rizzo strncpy(b->bdg_basename, name, namelen); 369f9790aebSLuigi Rizzo ND("create new bridge %s with ports %d", b->bdg_basename, 370f9790aebSLuigi Rizzo b->bdg_active_ports); 371f9790aebSLuigi Rizzo b->bdg_namelen = namelen; 372f9790aebSLuigi Rizzo b->bdg_active_ports = 0; 373f9790aebSLuigi Rizzo for (i = 0; i < NM_BDG_MAXPORTS; i++) 374f9790aebSLuigi Rizzo b->bdg_port_index[i] = i; 375f9790aebSLuigi Rizzo /* set the default function */ 3764bf50f18SLuigi Rizzo b->bdg_ops.lookup = netmap_bdg_learning; 377f9790aebSLuigi Rizzo /* reset the MAC address table */ 378f9790aebSLuigi Rizzo bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 379847bf383SLuigi Rizzo NM_BNS_GET(b); 380f9790aebSLuigi Rizzo } 381f9790aebSLuigi Rizzo return b; 382f9790aebSLuigi Rizzo } 383f9790aebSLuigi Rizzo 384f9790aebSLuigi Rizzo 385f9790aebSLuigi Rizzo /* 386f9790aebSLuigi Rizzo * Free the forwarding tables for rings attached to switch ports. 387f9790aebSLuigi Rizzo */ 388f9790aebSLuigi Rizzo static void 389f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na) 390f9790aebSLuigi Rizzo { 391f9790aebSLuigi Rizzo int nrings, i; 392f9790aebSLuigi Rizzo struct netmap_kring *kring; 393f9790aebSLuigi Rizzo 394f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 39517885a7bSLuigi Rizzo nrings = na->num_tx_rings; 39617885a7bSLuigi Rizzo kring = na->tx_rings; 397f9790aebSLuigi Rizzo for (i = 0; i < nrings; i++) { 398f9790aebSLuigi Rizzo if (kring[i].nkr_ft) { 399c3e9b4dbSLuiz Otavio O Souza nm_os_free(kring[i].nkr_ft); 400f9790aebSLuigi Rizzo kring[i].nkr_ft = NULL; /* protect from freeing twice */ 401f9790aebSLuigi Rizzo } 402f9790aebSLuigi Rizzo } 403f9790aebSLuigi Rizzo } 404f9790aebSLuigi Rizzo 405f9790aebSLuigi Rizzo 406f9790aebSLuigi Rizzo /* 407f9790aebSLuigi Rizzo * Allocate the forwarding tables for the rings attached to the bridge ports. 408f9790aebSLuigi Rizzo */ 409f9790aebSLuigi Rizzo static int 410f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na) 411f9790aebSLuigi Rizzo { 412f9790aebSLuigi Rizzo int nrings, l, i, num_dstq; 413f9790aebSLuigi Rizzo struct netmap_kring *kring; 414f9790aebSLuigi Rizzo 415f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 416f9790aebSLuigi Rizzo /* all port:rings + broadcast */ 417f9790aebSLuigi Rizzo num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 418f9790aebSLuigi Rizzo l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 419f9790aebSLuigi Rizzo l += sizeof(struct nm_bdg_q) * num_dstq; 420f9790aebSLuigi Rizzo l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 421f9790aebSLuigi Rizzo 422847bf383SLuigi Rizzo nrings = netmap_real_rings(na, NR_TX); 423f9790aebSLuigi Rizzo kring = na->tx_rings; 424f9790aebSLuigi Rizzo for (i = 0; i < nrings; i++) { 425f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft; 426f9790aebSLuigi Rizzo struct nm_bdg_q *dstq; 427f9790aebSLuigi Rizzo int j; 428f9790aebSLuigi Rizzo 429c3e9b4dbSLuiz Otavio O Souza ft = nm_os_malloc(l); 430f9790aebSLuigi Rizzo if (!ft) { 431f9790aebSLuigi Rizzo nm_free_bdgfwd(na); 432f9790aebSLuigi Rizzo return ENOMEM; 433f9790aebSLuigi Rizzo } 434f9790aebSLuigi Rizzo dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 435f9790aebSLuigi Rizzo for (j = 0; j < num_dstq; j++) { 436f9790aebSLuigi Rizzo dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 437f9790aebSLuigi Rizzo dstq[j].bq_len = 0; 438f9790aebSLuigi Rizzo } 439f9790aebSLuigi Rizzo kring[i].nkr_ft = ft; 440f9790aebSLuigi Rizzo } 441f9790aebSLuigi Rizzo return 0; 442f9790aebSLuigi Rizzo } 443f9790aebSLuigi Rizzo 444f9790aebSLuigi Rizzo 4454bf50f18SLuigi Rizzo /* remove from bridge b the ports in slots hw and sw 4464bf50f18SLuigi Rizzo * (sw can be -1 if not needed) 4474bf50f18SLuigi Rizzo */ 448f9790aebSLuigi Rizzo static void 449f9790aebSLuigi Rizzo netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 450f9790aebSLuigi Rizzo { 451f9790aebSLuigi Rizzo int s_hw = hw, s_sw = sw; 452f9790aebSLuigi Rizzo int i, lim =b->bdg_active_ports; 453f9790aebSLuigi Rizzo uint8_t tmp[NM_BDG_MAXPORTS]; 454f9790aebSLuigi Rizzo 455f9790aebSLuigi Rizzo /* 456f9790aebSLuigi Rizzo New algorithm: 457f9790aebSLuigi Rizzo make a copy of bdg_port_index; 458f9790aebSLuigi Rizzo lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 459f9790aebSLuigi Rizzo in the array of bdg_port_index, replacing them with 460f9790aebSLuigi Rizzo entries from the bottom of the array; 461f9790aebSLuigi Rizzo decrement bdg_active_ports; 462f9790aebSLuigi Rizzo acquire BDG_WLOCK() and copy back the array. 463f9790aebSLuigi Rizzo */ 464f9790aebSLuigi Rizzo 465f0ea3689SLuigi Rizzo if (netmap_verbose) 466f9790aebSLuigi Rizzo D("detach %d and %d (lim %d)", hw, sw, lim); 467f9790aebSLuigi Rizzo /* make a copy of the list of active ports, update it, 468f9790aebSLuigi Rizzo * and then copy back within BDG_WLOCK(). 469f9790aebSLuigi Rizzo */ 470f9790aebSLuigi Rizzo memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 471f9790aebSLuigi Rizzo for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 472f9790aebSLuigi Rizzo if (hw >= 0 && tmp[i] == hw) { 473f9790aebSLuigi Rizzo ND("detach hw %d at %d", hw, i); 474f9790aebSLuigi Rizzo lim--; /* point to last active port */ 475f9790aebSLuigi Rizzo tmp[i] = tmp[lim]; /* swap with i */ 476f9790aebSLuigi Rizzo tmp[lim] = hw; /* now this is inactive */ 477f9790aebSLuigi Rizzo hw = -1; 478f9790aebSLuigi Rizzo } else if (sw >= 0 && tmp[i] == sw) { 479f9790aebSLuigi Rizzo ND("detach sw %d at %d", sw, i); 480f9790aebSLuigi Rizzo lim--; 481f9790aebSLuigi Rizzo tmp[i] = tmp[lim]; 482f9790aebSLuigi Rizzo tmp[lim] = sw; 483f9790aebSLuigi Rizzo sw = -1; 484f9790aebSLuigi Rizzo } else { 485f9790aebSLuigi Rizzo i++; 486f9790aebSLuigi Rizzo } 487f9790aebSLuigi Rizzo } 488f9790aebSLuigi Rizzo if (hw >= 0 || sw >= 0) { 489f9790aebSLuigi Rizzo D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 490f9790aebSLuigi Rizzo } 491f9790aebSLuigi Rizzo 492f9790aebSLuigi Rizzo BDG_WLOCK(b); 4934bf50f18SLuigi Rizzo if (b->bdg_ops.dtor) 4944bf50f18SLuigi Rizzo b->bdg_ops.dtor(b->bdg_ports[s_hw]); 495f9790aebSLuigi Rizzo b->bdg_ports[s_hw] = NULL; 496f9790aebSLuigi Rizzo if (s_sw >= 0) { 497f9790aebSLuigi Rizzo b->bdg_ports[s_sw] = NULL; 498f9790aebSLuigi Rizzo } 499f9790aebSLuigi Rizzo memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 500f9790aebSLuigi Rizzo b->bdg_active_ports = lim; 501f9790aebSLuigi Rizzo BDG_WUNLOCK(b); 502f9790aebSLuigi Rizzo 503f9790aebSLuigi Rizzo ND("now %d active ports", lim); 504f9790aebSLuigi Rizzo if (lim == 0) { 505f9790aebSLuigi Rizzo ND("marking bridge %s as free", b->bdg_basename); 5064bf50f18SLuigi Rizzo bzero(&b->bdg_ops, sizeof(b->bdg_ops)); 507847bf383SLuigi Rizzo NM_BNS_PUT(b); 508f9790aebSLuigi Rizzo } 509f9790aebSLuigi Rizzo } 510f9790aebSLuigi Rizzo 5114bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for VALE ports */ 5124bf50f18SLuigi Rizzo static int 5134bf50f18SLuigi Rizzo netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 514f9790aebSLuigi Rizzo { 515f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 516f9790aebSLuigi Rizzo struct nm_bridge *b = vpna->na_bdg; 517f9790aebSLuigi Rizzo 51837e3a6d3SLuigi Rizzo (void)nmr; // XXX merge ? 5194bf50f18SLuigi Rizzo if (attach) 5204bf50f18SLuigi Rizzo return 0; /* nothing to do */ 5214bf50f18SLuigi Rizzo if (b) { 5224bf50f18SLuigi Rizzo netmap_set_all_rings(na, 0 /* disable */); 5234bf50f18SLuigi Rizzo netmap_bdg_detach_common(b, vpna->bdg_port, -1); 5244bf50f18SLuigi Rizzo vpna->na_bdg = NULL; 5254bf50f18SLuigi Rizzo netmap_set_all_rings(na, 1 /* enable */); 5264bf50f18SLuigi Rizzo } 5274bf50f18SLuigi Rizzo /* I have took reference just for attach */ 5284bf50f18SLuigi Rizzo netmap_adapter_put(na); 5294bf50f18SLuigi Rizzo return 0; 5304bf50f18SLuigi Rizzo } 5314bf50f18SLuigi Rizzo 5324bf50f18SLuigi Rizzo /* nm_dtor callback for ephemeral VALE ports */ 5334bf50f18SLuigi Rizzo static void 5344bf50f18SLuigi Rizzo netmap_vp_dtor(struct netmap_adapter *na) 5354bf50f18SLuigi Rizzo { 5364bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 5374bf50f18SLuigi Rizzo struct nm_bridge *b = vpna->na_bdg; 5384bf50f18SLuigi Rizzo 5394bf50f18SLuigi Rizzo ND("%s has %d references", na->name, na->na_refcount); 540f9790aebSLuigi Rizzo 541f9790aebSLuigi Rizzo if (b) { 542f9790aebSLuigi Rizzo netmap_bdg_detach_common(b, vpna->bdg_port, -1); 543f9790aebSLuigi Rizzo } 544c3e9b4dbSLuiz Otavio O Souza 545c3e9b4dbSLuiz Otavio O Souza if (vpna->autodelete && na->ifp != NULL) { 546c3e9b4dbSLuiz Otavio O Souza ND("releasing %s", na->ifp->if_xname); 547c3e9b4dbSLuiz Otavio O Souza NMG_UNLOCK(); 548c3e9b4dbSLuiz Otavio O Souza nm_os_vi_detach(na->ifp); 549c3e9b4dbSLuiz Otavio O Souza NMG_LOCK(); 550c3e9b4dbSLuiz Otavio O Souza } 551f9790aebSLuigi Rizzo } 552f9790aebSLuigi Rizzo 5534bf50f18SLuigi Rizzo /* remove a persistent VALE port from the system */ 5544bf50f18SLuigi Rizzo static int 5554bf50f18SLuigi Rizzo nm_vi_destroy(const char *name) 5564bf50f18SLuigi Rizzo { 5574bf50f18SLuigi Rizzo struct ifnet *ifp; 558c3e9b4dbSLuiz Otavio O Souza struct netmap_vp_adapter *vpna; 5594bf50f18SLuigi Rizzo int error; 5604bf50f18SLuigi Rizzo 5614bf50f18SLuigi Rizzo ifp = ifunit_ref(name); 5624bf50f18SLuigi Rizzo if (!ifp) 5634bf50f18SLuigi Rizzo return ENXIO; 5644bf50f18SLuigi Rizzo NMG_LOCK(); 5654bf50f18SLuigi Rizzo /* make sure this is actually a VALE port */ 56637e3a6d3SLuigi Rizzo if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) { 5674bf50f18SLuigi Rizzo error = EINVAL; 5684bf50f18SLuigi Rizzo goto err; 5694bf50f18SLuigi Rizzo } 5704bf50f18SLuigi Rizzo 571c3e9b4dbSLuiz Otavio O Souza vpna = (struct netmap_vp_adapter *)NA(ifp); 572c3e9b4dbSLuiz Otavio O Souza 573c3e9b4dbSLuiz Otavio O Souza /* we can only destroy ports that were created via NETMAP_BDG_NEWIF */ 574c3e9b4dbSLuiz Otavio O Souza if (vpna->autodelete) { 575c3e9b4dbSLuiz Otavio O Souza error = EINVAL; 576c3e9b4dbSLuiz Otavio O Souza goto err; 577c3e9b4dbSLuiz Otavio O Souza } 578c3e9b4dbSLuiz Otavio O Souza 579c3e9b4dbSLuiz Otavio O Souza /* also make sure that nobody is using the inferface */ 580c3e9b4dbSLuiz Otavio O Souza if (NETMAP_OWNED_BY_ANY(&vpna->up) || 581c3e9b4dbSLuiz Otavio O Souza vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) { 5824bf50f18SLuigi Rizzo error = EBUSY; 5834bf50f18SLuigi Rizzo goto err; 5844bf50f18SLuigi Rizzo } 585c3e9b4dbSLuiz Otavio O Souza 5864bf50f18SLuigi Rizzo NMG_UNLOCK(); 5874bf50f18SLuigi Rizzo 5884bf50f18SLuigi Rizzo D("destroying a persistent vale interface %s", ifp->if_xname); 5894bf50f18SLuigi Rizzo /* Linux requires all the references are released 5904bf50f18SLuigi Rizzo * before unregister 5914bf50f18SLuigi Rizzo */ 5924bf50f18SLuigi Rizzo netmap_detach(ifp); 593c3e9b4dbSLuiz Otavio O Souza if_rele(ifp); 59437e3a6d3SLuigi Rizzo nm_os_vi_detach(ifp); 5954bf50f18SLuigi Rizzo return 0; 5964bf50f18SLuigi Rizzo 5974bf50f18SLuigi Rizzo err: 5984bf50f18SLuigi Rizzo NMG_UNLOCK(); 5994bf50f18SLuigi Rizzo if_rele(ifp); 6004bf50f18SLuigi Rizzo return error; 6014bf50f18SLuigi Rizzo } 6024bf50f18SLuigi Rizzo 603c3e9b4dbSLuiz Otavio O Souza static int 604c3e9b4dbSLuiz Otavio O Souza nm_update_info(struct nmreq *nmr, struct netmap_adapter *na) 605c3e9b4dbSLuiz Otavio O Souza { 606c3e9b4dbSLuiz Otavio O Souza nmr->nr_rx_rings = na->num_rx_rings; 607c3e9b4dbSLuiz Otavio O Souza nmr->nr_tx_rings = na->num_tx_rings; 608c3e9b4dbSLuiz Otavio O Souza nmr->nr_rx_slots = na->num_rx_desc; 609c3e9b4dbSLuiz Otavio O Souza nmr->nr_tx_slots = na->num_tx_desc; 610c3e9b4dbSLuiz Otavio O Souza return netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, NULL, &nmr->nr_arg2); 611c3e9b4dbSLuiz Otavio O Souza } 612c3e9b4dbSLuiz Otavio O Souza 6134bf50f18SLuigi Rizzo /* 6144bf50f18SLuigi Rizzo * Create a virtual interface registered to the system. 6154bf50f18SLuigi Rizzo * The interface will be attached to a bridge later. 6164bf50f18SLuigi Rizzo */ 617c3e9b4dbSLuiz Otavio O Souza int 618c3e9b4dbSLuiz Otavio O Souza netmap_vi_create(struct nmreq *nmr, int autodelete) 6194bf50f18SLuigi Rizzo { 6204bf50f18SLuigi Rizzo struct ifnet *ifp; 6214bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna; 622c3e9b4dbSLuiz Otavio O Souza struct netmap_mem_d *nmd = NULL; 6234bf50f18SLuigi Rizzo int error; 6244bf50f18SLuigi Rizzo 6254bf50f18SLuigi Rizzo /* don't include VALE prefix */ 62637e3a6d3SLuigi Rizzo if (!strncmp(nmr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME))) 6274bf50f18SLuigi Rizzo return EINVAL; 6284bf50f18SLuigi Rizzo ifp = ifunit_ref(nmr->nr_name); 6294bf50f18SLuigi Rizzo if (ifp) { /* already exist, cannot create new one */ 630c3e9b4dbSLuiz Otavio O Souza error = EEXIST; 631c3e9b4dbSLuiz Otavio O Souza NMG_LOCK(); 632c3e9b4dbSLuiz Otavio O Souza if (NM_NA_VALID(ifp)) { 633c3e9b4dbSLuiz Otavio O Souza int update_err = nm_update_info(nmr, NA(ifp)); 634c3e9b4dbSLuiz Otavio O Souza if (update_err) 635c3e9b4dbSLuiz Otavio O Souza error = update_err; 636c3e9b4dbSLuiz Otavio O Souza } 637c3e9b4dbSLuiz Otavio O Souza NMG_UNLOCK(); 6384bf50f18SLuigi Rizzo if_rele(ifp); 639c3e9b4dbSLuiz Otavio O Souza return error; 6404bf50f18SLuigi Rizzo } 64137e3a6d3SLuigi Rizzo error = nm_os_vi_persist(nmr->nr_name, &ifp); 6424bf50f18SLuigi Rizzo if (error) 6434bf50f18SLuigi Rizzo return error; 6444bf50f18SLuigi Rizzo 6454bf50f18SLuigi Rizzo NMG_LOCK(); 646c3e9b4dbSLuiz Otavio O Souza if (nmr->nr_arg2) { 647c3e9b4dbSLuiz Otavio O Souza nmd = netmap_mem_find(nmr->nr_arg2); 648c3e9b4dbSLuiz Otavio O Souza if (nmd == NULL) { 649c3e9b4dbSLuiz Otavio O Souza error = EINVAL; 650c3e9b4dbSLuiz Otavio O Souza goto err_1; 651c3e9b4dbSLuiz Otavio O Souza } 652c3e9b4dbSLuiz Otavio O Souza } 6534bf50f18SLuigi Rizzo /* netmap_vp_create creates a struct netmap_vp_adapter */ 654c3e9b4dbSLuiz Otavio O Souza error = netmap_vp_create(nmr, ifp, nmd, &vpna); 6554bf50f18SLuigi Rizzo if (error) { 6564bf50f18SLuigi Rizzo D("error %d", error); 657c3e9b4dbSLuiz Otavio O Souza goto err_1; 6584bf50f18SLuigi Rizzo } 6594bf50f18SLuigi Rizzo /* persist-specific routines */ 6604bf50f18SLuigi Rizzo vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl; 661c3e9b4dbSLuiz Otavio O Souza if (!autodelete) { 6624bf50f18SLuigi Rizzo netmap_adapter_get(&vpna->up); 663c3e9b4dbSLuiz Otavio O Souza } else { 664c3e9b4dbSLuiz Otavio O Souza vpna->autodelete = 1; 665c3e9b4dbSLuiz Otavio O Souza } 66637e3a6d3SLuigi Rizzo NM_ATTACH_NA(ifp, &vpna->up); 667c3e9b4dbSLuiz Otavio O Souza /* return the updated info */ 668c3e9b4dbSLuiz Otavio O Souza error = nm_update_info(nmr, &vpna->up); 669c3e9b4dbSLuiz Otavio O Souza if (error) { 670c3e9b4dbSLuiz Otavio O Souza goto err_2; 671c3e9b4dbSLuiz Otavio O Souza } 672c3e9b4dbSLuiz Otavio O Souza D("returning nr_arg2 %d", nmr->nr_arg2); 673c3e9b4dbSLuiz Otavio O Souza if (nmd) 674c3e9b4dbSLuiz Otavio O Souza netmap_mem_put(nmd); 6754bf50f18SLuigi Rizzo NMG_UNLOCK(); 6764bf50f18SLuigi Rizzo D("created %s", ifp->if_xname); 6774bf50f18SLuigi Rizzo return 0; 678c3e9b4dbSLuiz Otavio O Souza 679c3e9b4dbSLuiz Otavio O Souza err_2: 680c3e9b4dbSLuiz Otavio O Souza netmap_detach(ifp); 681c3e9b4dbSLuiz Otavio O Souza err_1: 682c3e9b4dbSLuiz Otavio O Souza if (nmd) 683c3e9b4dbSLuiz Otavio O Souza netmap_mem_put(nmd); 684c3e9b4dbSLuiz Otavio O Souza NMG_UNLOCK(); 685c3e9b4dbSLuiz Otavio O Souza nm_os_vi_detach(ifp); 686c3e9b4dbSLuiz Otavio O Souza 687c3e9b4dbSLuiz Otavio O Souza return error; 6884bf50f18SLuigi Rizzo } 68917885a7bSLuigi Rizzo 69017885a7bSLuigi Rizzo /* Try to get a reference to a netmap adapter attached to a VALE switch. 69117885a7bSLuigi Rizzo * If the adapter is found (or is created), this function returns 0, a 69217885a7bSLuigi Rizzo * non NULL pointer is returned into *na, and the caller holds a 69317885a7bSLuigi Rizzo * reference to the adapter. 69417885a7bSLuigi Rizzo * If an adapter is not found, then no reference is grabbed and the 69517885a7bSLuigi Rizzo * function returns an error code, or 0 if there is just a VALE prefix 69617885a7bSLuigi Rizzo * mismatch. Therefore the caller holds a reference when 69717885a7bSLuigi Rizzo * (*na != NULL && return == 0). 69817885a7bSLuigi Rizzo */ 699f9790aebSLuigi Rizzo int 700c3e9b4dbSLuiz Otavio O Souza netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, 701c3e9b4dbSLuiz Otavio O Souza struct netmap_mem_d *nmd, int create) 702f9790aebSLuigi Rizzo { 7034bf50f18SLuigi Rizzo char *nr_name = nmr->nr_name; 7044bf50f18SLuigi Rizzo const char *ifname; 705c3e9b4dbSLuiz Otavio O Souza struct ifnet *ifp = NULL; 706f9790aebSLuigi Rizzo int error = 0; 7074bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna, *hostna = NULL; 708f9790aebSLuigi Rizzo struct nm_bridge *b; 709f9790aebSLuigi Rizzo int i, j, cand = -1, cand2 = -1; 710f9790aebSLuigi Rizzo int needed; 711f9790aebSLuigi Rizzo 712f9790aebSLuigi Rizzo *na = NULL; /* default return value */ 713f9790aebSLuigi Rizzo 714f9790aebSLuigi Rizzo /* first try to see if this is a bridge port. */ 715f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 71637e3a6d3SLuigi Rizzo if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) { 717f9790aebSLuigi Rizzo return 0; /* no error, but no VALE prefix */ 718f9790aebSLuigi Rizzo } 719f9790aebSLuigi Rizzo 7204bf50f18SLuigi Rizzo b = nm_find_bridge(nr_name, create); 721f9790aebSLuigi Rizzo if (b == NULL) { 7224bf50f18SLuigi Rizzo D("no bridges available for '%s'", nr_name); 723f2637526SLuigi Rizzo return (create ? ENOMEM : ENXIO); 724f9790aebSLuigi Rizzo } 7254bf50f18SLuigi Rizzo if (strlen(nr_name) < b->bdg_namelen) /* impossible */ 7264bf50f18SLuigi Rizzo panic("x"); 727f9790aebSLuigi Rizzo 728f9790aebSLuigi Rizzo /* Now we are sure that name starts with the bridge's name, 729f9790aebSLuigi Rizzo * lookup the port in the bridge. We need to scan the entire 730f9790aebSLuigi Rizzo * list. It is not important to hold a WLOCK on the bridge 731f9790aebSLuigi Rizzo * during the search because NMG_LOCK already guarantees 732f9790aebSLuigi Rizzo * that there are no other possible writers. 733f9790aebSLuigi Rizzo */ 734f9790aebSLuigi Rizzo 735f9790aebSLuigi Rizzo /* lookup in the local list of ports */ 736f9790aebSLuigi Rizzo for (j = 0; j < b->bdg_active_ports; j++) { 737f9790aebSLuigi Rizzo i = b->bdg_port_index[j]; 738f9790aebSLuigi Rizzo vpna = b->bdg_ports[i]; 739f9790aebSLuigi Rizzo // KASSERT(na != NULL); 740847bf383SLuigi Rizzo ND("checking %s", vpna->up.name); 7414bf50f18SLuigi Rizzo if (!strcmp(vpna->up.name, nr_name)) { 742f9790aebSLuigi Rizzo netmap_adapter_get(&vpna->up); 7434bf50f18SLuigi Rizzo ND("found existing if %s refs %d", nr_name) 7444bf50f18SLuigi Rizzo *na = &vpna->up; 745f9790aebSLuigi Rizzo return 0; 746f9790aebSLuigi Rizzo } 747f9790aebSLuigi Rizzo } 748f9790aebSLuigi Rizzo /* not found, should we create it? */ 749f9790aebSLuigi Rizzo if (!create) 750f9790aebSLuigi Rizzo return ENXIO; 751f9790aebSLuigi Rizzo /* yes we should, see if we have space to attach entries */ 752f9790aebSLuigi Rizzo needed = 2; /* in some cases we only need 1 */ 753f9790aebSLuigi Rizzo if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 754f9790aebSLuigi Rizzo D("bridge full %d, cannot create new port", b->bdg_active_ports); 755f2637526SLuigi Rizzo return ENOMEM; 756f9790aebSLuigi Rizzo } 757f9790aebSLuigi Rizzo /* record the next two ports available, but do not allocate yet */ 758f9790aebSLuigi Rizzo cand = b->bdg_port_index[b->bdg_active_ports]; 759f9790aebSLuigi Rizzo cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 760f9790aebSLuigi Rizzo ND("+++ bridge %s port %s used %d avail %d %d", 7614bf50f18SLuigi Rizzo b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); 762f9790aebSLuigi Rizzo 763f9790aebSLuigi Rizzo /* 764f9790aebSLuigi Rizzo * try see if there is a matching NIC with this name 765f9790aebSLuigi Rizzo * (after the bridge's name) 766f9790aebSLuigi Rizzo */ 7674bf50f18SLuigi Rizzo ifname = nr_name + b->bdg_namelen + 1; 7684bf50f18SLuigi Rizzo ifp = ifunit_ref(ifname); 7694bf50f18SLuigi Rizzo if (!ifp) { 7704bf50f18SLuigi Rizzo /* Create an ephemeral virtual port 7714bf50f18SLuigi Rizzo * This block contains all the ephemeral-specific logics 7724bf50f18SLuigi Rizzo */ 773f9790aebSLuigi Rizzo if (nmr->nr_cmd) { 774f9790aebSLuigi Rizzo /* nr_cmd must be 0 for a virtual port */ 775c3e9b4dbSLuiz Otavio O Souza error = EINVAL; 776c3e9b4dbSLuiz Otavio O Souza goto out; 777f9790aebSLuigi Rizzo } 778f9790aebSLuigi Rizzo 779f9790aebSLuigi Rizzo /* bdg_netmap_attach creates a struct netmap_adapter */ 780c3e9b4dbSLuiz Otavio O Souza error = netmap_vp_create(nmr, NULL, nmd, &vpna); 781f9790aebSLuigi Rizzo if (error) { 782f9790aebSLuigi Rizzo D("error %d", error); 783c3e9b4dbSLuiz Otavio O Souza goto out; 784f9790aebSLuigi Rizzo } 7854bf50f18SLuigi Rizzo /* shortcut - we can skip get_hw_na(), 7864bf50f18SLuigi Rizzo * ownership check and nm_bdg_attach() 7874bf50f18SLuigi Rizzo */ 7884bf50f18SLuigi Rizzo } else { 7894bf50f18SLuigi Rizzo struct netmap_adapter *hw; 790f9790aebSLuigi Rizzo 791c3e9b4dbSLuiz Otavio O Souza error = netmap_get_hw_na(ifp, nmd, &hw); 7924bf50f18SLuigi Rizzo if (error || hw == NULL) 793f9790aebSLuigi Rizzo goto out; 794f9790aebSLuigi Rizzo 7954bf50f18SLuigi Rizzo /* host adapter might not be created */ 7964bf50f18SLuigi Rizzo error = hw->nm_bdg_attach(nr_name, hw); 7974bf50f18SLuigi Rizzo if (error) 798f9790aebSLuigi Rizzo goto out; 7994bf50f18SLuigi Rizzo vpna = hw->na_vp; 8004bf50f18SLuigi Rizzo hostna = hw->na_hostvp; 8014bf50f18SLuigi Rizzo if (nmr->nr_arg1 != NETMAP_BDG_HOST) 8024bf50f18SLuigi Rizzo hostna = NULL; 803f9790aebSLuigi Rizzo } 804f9790aebSLuigi Rizzo 805f9790aebSLuigi Rizzo BDG_WLOCK(b); 806f9790aebSLuigi Rizzo vpna->bdg_port = cand; 807f9790aebSLuigi Rizzo ND("NIC %p to bridge port %d", vpna, cand); 808f9790aebSLuigi Rizzo /* bind the port to the bridge (virtual ports are not active) */ 809f9790aebSLuigi Rizzo b->bdg_ports[cand] = vpna; 810f9790aebSLuigi Rizzo vpna->na_bdg = b; 811f9790aebSLuigi Rizzo b->bdg_active_ports++; 8124bf50f18SLuigi Rizzo if (hostna != NULL) { 813f9790aebSLuigi Rizzo /* also bind the host stack to the bridge */ 814f9790aebSLuigi Rizzo b->bdg_ports[cand2] = hostna; 815f9790aebSLuigi Rizzo hostna->bdg_port = cand2; 816f9790aebSLuigi Rizzo hostna->na_bdg = b; 817f9790aebSLuigi Rizzo b->bdg_active_ports++; 818f9790aebSLuigi Rizzo ND("host %p to bridge port %d", hostna, cand2); 819f9790aebSLuigi Rizzo } 8204bf50f18SLuigi Rizzo ND("if %s refs %d", ifname, vpna->up.na_refcount); 821f9790aebSLuigi Rizzo BDG_WUNLOCK(b); 8224bf50f18SLuigi Rizzo *na = &vpna->up; 8234bf50f18SLuigi Rizzo netmap_adapter_get(*na); 824f9790aebSLuigi Rizzo 825f9790aebSLuigi Rizzo out: 826c3e9b4dbSLuiz Otavio O Souza if (ifp) 827f9790aebSLuigi Rizzo if_rele(ifp); 828f9790aebSLuigi Rizzo 829f9790aebSLuigi Rizzo return error; 830f9790aebSLuigi Rizzo } 831f9790aebSLuigi Rizzo 832f9790aebSLuigi Rizzo 8334bf50f18SLuigi Rizzo /* Process NETMAP_BDG_ATTACH */ 834f9790aebSLuigi Rizzo static int 8354bf50f18SLuigi Rizzo nm_bdg_ctl_attach(struct nmreq *nmr) 836f9790aebSLuigi Rizzo { 837f9790aebSLuigi Rizzo struct netmap_adapter *na; 838c3e9b4dbSLuiz Otavio O Souza struct netmap_mem_d *nmd = NULL; 839f9790aebSLuigi Rizzo int error; 840f9790aebSLuigi Rizzo 841f9790aebSLuigi Rizzo NMG_LOCK(); 842f2637526SLuigi Rizzo 843c3e9b4dbSLuiz Otavio O Souza if (nmr->nr_arg2) { 844c3e9b4dbSLuiz Otavio O Souza nmd = netmap_mem_find(nmr->nr_arg2); 845c3e9b4dbSLuiz Otavio O Souza if (nmd == NULL) { 846c3e9b4dbSLuiz Otavio O Souza error = EINVAL; 847c3e9b4dbSLuiz Otavio O Souza goto unlock_exit; 848c3e9b4dbSLuiz Otavio O Souza } 849c3e9b4dbSLuiz Otavio O Souza } 850c3e9b4dbSLuiz Otavio O Souza 851c3e9b4dbSLuiz Otavio O Souza error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */); 8524bf50f18SLuigi Rizzo if (error) /* no device */ 853f9790aebSLuigi Rizzo goto unlock_exit; 854f2637526SLuigi Rizzo 85517885a7bSLuigi Rizzo if (na == NULL) { /* VALE prefix missing */ 856f9790aebSLuigi Rizzo error = EINVAL; 85717885a7bSLuigi Rizzo goto unlock_exit; 858f9790aebSLuigi Rizzo } 859f9790aebSLuigi Rizzo 8604bf50f18SLuigi Rizzo if (NETMAP_OWNED_BY_ANY(na)) { 861f9790aebSLuigi Rizzo error = EBUSY; 862f9790aebSLuigi Rizzo goto unref_exit; 863f9790aebSLuigi Rizzo } 864f9790aebSLuigi Rizzo 8654bf50f18SLuigi Rizzo if (na->nm_bdg_ctl) { 8664bf50f18SLuigi Rizzo /* nop for VALE ports. The bwrap needs to put the hwna 8674bf50f18SLuigi Rizzo * in netmap mode (see netmap_bwrap_bdg_ctl) 8684bf50f18SLuigi Rizzo */ 8694bf50f18SLuigi Rizzo error = na->nm_bdg_ctl(na, nmr, 1); 8704bf50f18SLuigi Rizzo if (error) 871f9790aebSLuigi Rizzo goto unref_exit; 8724bf50f18SLuigi Rizzo ND("registered %s to netmap-mode", na->name); 873f9790aebSLuigi Rizzo } 874f9790aebSLuigi Rizzo NMG_UNLOCK(); 875f9790aebSLuigi Rizzo return 0; 876f9790aebSLuigi Rizzo 877f9790aebSLuigi Rizzo unref_exit: 878f9790aebSLuigi Rizzo netmap_adapter_put(na); 879f9790aebSLuigi Rizzo unlock_exit: 880f9790aebSLuigi Rizzo NMG_UNLOCK(); 881f9790aebSLuigi Rizzo return error; 882f9790aebSLuigi Rizzo } 883f9790aebSLuigi Rizzo 88437e3a6d3SLuigi Rizzo static inline int 88537e3a6d3SLuigi Rizzo nm_is_bwrap(struct netmap_adapter *na) 88637e3a6d3SLuigi Rizzo { 88737e3a6d3SLuigi Rizzo return na->nm_register == netmap_bwrap_reg; 88837e3a6d3SLuigi Rizzo } 88917885a7bSLuigi Rizzo 8904bf50f18SLuigi Rizzo /* process NETMAP_BDG_DETACH */ 891f9790aebSLuigi Rizzo static int 8924bf50f18SLuigi Rizzo nm_bdg_ctl_detach(struct nmreq *nmr) 893f9790aebSLuigi Rizzo { 894f9790aebSLuigi Rizzo struct netmap_adapter *na; 895f9790aebSLuigi Rizzo int error; 896f9790aebSLuigi Rizzo 897f9790aebSLuigi Rizzo NMG_LOCK(); 898c3e9b4dbSLuiz Otavio O Souza error = netmap_get_bdg_na(nmr, &na, NULL, 0 /* don't create */); 899f9790aebSLuigi Rizzo if (error) { /* no device, or another bridge or user owns the device */ 900f9790aebSLuigi Rizzo goto unlock_exit; 901f9790aebSLuigi Rizzo } 902f2637526SLuigi Rizzo 90317885a7bSLuigi Rizzo if (na == NULL) { /* VALE prefix missing */ 904f9790aebSLuigi Rizzo error = EINVAL; 90517885a7bSLuigi Rizzo goto unlock_exit; 90637e3a6d3SLuigi Rizzo } else if (nm_is_bwrap(na) && 90737e3a6d3SLuigi Rizzo ((struct netmap_bwrap_adapter *)na)->na_polling_state) { 90837e3a6d3SLuigi Rizzo /* Don't detach a NIC with polling */ 90937e3a6d3SLuigi Rizzo error = EBUSY; 91037e3a6d3SLuigi Rizzo netmap_adapter_put(na); 91137e3a6d3SLuigi Rizzo goto unlock_exit; 912f9790aebSLuigi Rizzo } 9134bf50f18SLuigi Rizzo if (na->nm_bdg_ctl) { 9144bf50f18SLuigi Rizzo /* remove the port from bridge. The bwrap 9154bf50f18SLuigi Rizzo * also needs to put the hwna in normal mode 9164bf50f18SLuigi Rizzo */ 9174bf50f18SLuigi Rizzo error = na->nm_bdg_ctl(na, nmr, 0); 918f9790aebSLuigi Rizzo } 919f9790aebSLuigi Rizzo 920f9790aebSLuigi Rizzo netmap_adapter_put(na); 921f9790aebSLuigi Rizzo unlock_exit: 922f9790aebSLuigi Rizzo NMG_UNLOCK(); 923f9790aebSLuigi Rizzo return error; 924f9790aebSLuigi Rizzo 925f9790aebSLuigi Rizzo } 926f9790aebSLuigi Rizzo 92737e3a6d3SLuigi Rizzo struct nm_bdg_polling_state; 92837e3a6d3SLuigi Rizzo struct 92937e3a6d3SLuigi Rizzo nm_bdg_kthread { 930c3e9b4dbSLuiz Otavio O Souza struct nm_kctx *nmk; 93137e3a6d3SLuigi Rizzo u_int qfirst; 93237e3a6d3SLuigi Rizzo u_int qlast; 93337e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps; 93437e3a6d3SLuigi Rizzo }; 93537e3a6d3SLuigi Rizzo 93637e3a6d3SLuigi Rizzo struct nm_bdg_polling_state { 93737e3a6d3SLuigi Rizzo bool configured; 93837e3a6d3SLuigi Rizzo bool stopped; 93937e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna; 94037e3a6d3SLuigi Rizzo u_int reg; 94137e3a6d3SLuigi Rizzo u_int qfirst; 94237e3a6d3SLuigi Rizzo u_int qlast; 94337e3a6d3SLuigi Rizzo u_int cpu_from; 94437e3a6d3SLuigi Rizzo u_int ncpus; 94537e3a6d3SLuigi Rizzo struct nm_bdg_kthread *kthreads; 94637e3a6d3SLuigi Rizzo }; 94737e3a6d3SLuigi Rizzo 94837e3a6d3SLuigi Rizzo static void 949c3e9b4dbSLuiz Otavio O Souza netmap_bwrap_polling(void *data, int is_kthread) 95037e3a6d3SLuigi Rizzo { 95137e3a6d3SLuigi Rizzo struct nm_bdg_kthread *nbk = data; 95237e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna; 95337e3a6d3SLuigi Rizzo u_int qfirst, qlast, i; 95437e3a6d3SLuigi Rizzo struct netmap_kring *kring0, *kring; 95537e3a6d3SLuigi Rizzo 95637e3a6d3SLuigi Rizzo if (!nbk) 95737e3a6d3SLuigi Rizzo return; 95837e3a6d3SLuigi Rizzo qfirst = nbk->qfirst; 95937e3a6d3SLuigi Rizzo qlast = nbk->qlast; 96037e3a6d3SLuigi Rizzo bna = nbk->bps->bna; 96137e3a6d3SLuigi Rizzo kring0 = NMR(bna->hwna, NR_RX); 96237e3a6d3SLuigi Rizzo 96337e3a6d3SLuigi Rizzo for (i = qfirst; i < qlast; i++) { 96437e3a6d3SLuigi Rizzo kring = kring0 + i; 96537e3a6d3SLuigi Rizzo kring->nm_notify(kring, 0); 96637e3a6d3SLuigi Rizzo } 96737e3a6d3SLuigi Rizzo } 96837e3a6d3SLuigi Rizzo 96937e3a6d3SLuigi Rizzo static int 97037e3a6d3SLuigi Rizzo nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) 97137e3a6d3SLuigi Rizzo { 972c3e9b4dbSLuiz Otavio O Souza struct nm_kctx_cfg kcfg; 97337e3a6d3SLuigi Rizzo int i, j; 97437e3a6d3SLuigi Rizzo 975c3e9b4dbSLuiz Otavio O Souza bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus); 97637e3a6d3SLuigi Rizzo if (bps->kthreads == NULL) 97737e3a6d3SLuigi Rizzo return ENOMEM; 97837e3a6d3SLuigi Rizzo 97937e3a6d3SLuigi Rizzo bzero(&kcfg, sizeof(kcfg)); 98037e3a6d3SLuigi Rizzo kcfg.worker_fn = netmap_bwrap_polling; 981c3e9b4dbSLuiz Otavio O Souza kcfg.use_kthread = 1; 98237e3a6d3SLuigi Rizzo for (i = 0; i < bps->ncpus; i++) { 98337e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 98437e3a6d3SLuigi Rizzo int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC); 98537e3a6d3SLuigi Rizzo int affinity = bps->cpu_from + i; 98637e3a6d3SLuigi Rizzo 98737e3a6d3SLuigi Rizzo t->bps = bps; 98837e3a6d3SLuigi Rizzo t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; 98937e3a6d3SLuigi Rizzo t->qlast = all ? bps->qlast : t->qfirst + 1; 99037e3a6d3SLuigi Rizzo D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, 99137e3a6d3SLuigi Rizzo t->qlast); 99237e3a6d3SLuigi Rizzo 99337e3a6d3SLuigi Rizzo kcfg.type = i; 99437e3a6d3SLuigi Rizzo kcfg.worker_private = t; 995c3e9b4dbSLuiz Otavio O Souza t->nmk = nm_os_kctx_create(&kcfg, 0, NULL); 99637e3a6d3SLuigi Rizzo if (t->nmk == NULL) { 99737e3a6d3SLuigi Rizzo goto cleanup; 99837e3a6d3SLuigi Rizzo } 999c3e9b4dbSLuiz Otavio O Souza nm_os_kctx_worker_setaff(t->nmk, affinity); 100037e3a6d3SLuigi Rizzo } 100137e3a6d3SLuigi Rizzo return 0; 100237e3a6d3SLuigi Rizzo 100337e3a6d3SLuigi Rizzo cleanup: 100437e3a6d3SLuigi Rizzo for (j = 0; j < i; j++) { 100537e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 1006c3e9b4dbSLuiz Otavio O Souza nm_os_kctx_destroy(t->nmk); 100737e3a6d3SLuigi Rizzo } 1008c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps->kthreads); 100937e3a6d3SLuigi Rizzo return EFAULT; 101037e3a6d3SLuigi Rizzo } 101137e3a6d3SLuigi Rizzo 1012c3e9b4dbSLuiz Otavio O Souza /* A variant of ptnetmap_start_kthreads() */ 101337e3a6d3SLuigi Rizzo static int 101437e3a6d3SLuigi Rizzo nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) 101537e3a6d3SLuigi Rizzo { 101637e3a6d3SLuigi Rizzo int error, i, j; 101737e3a6d3SLuigi Rizzo 101837e3a6d3SLuigi Rizzo if (!bps) { 101937e3a6d3SLuigi Rizzo D("polling is not configured"); 102037e3a6d3SLuigi Rizzo return EFAULT; 102137e3a6d3SLuigi Rizzo } 102237e3a6d3SLuigi Rizzo bps->stopped = false; 102337e3a6d3SLuigi Rizzo 102437e3a6d3SLuigi Rizzo for (i = 0; i < bps->ncpus; i++) { 102537e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 1026c3e9b4dbSLuiz Otavio O Souza error = nm_os_kctx_worker_start(t->nmk); 102737e3a6d3SLuigi Rizzo if (error) { 102837e3a6d3SLuigi Rizzo D("error in nm_kthread_start()"); 102937e3a6d3SLuigi Rizzo goto cleanup; 103037e3a6d3SLuigi Rizzo } 103137e3a6d3SLuigi Rizzo } 103237e3a6d3SLuigi Rizzo return 0; 103337e3a6d3SLuigi Rizzo 103437e3a6d3SLuigi Rizzo cleanup: 103537e3a6d3SLuigi Rizzo for (j = 0; j < i; j++) { 103637e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 1037c3e9b4dbSLuiz Otavio O Souza nm_os_kctx_worker_stop(t->nmk); 103837e3a6d3SLuigi Rizzo } 103937e3a6d3SLuigi Rizzo bps->stopped = true; 104037e3a6d3SLuigi Rizzo return error; 104137e3a6d3SLuigi Rizzo } 104237e3a6d3SLuigi Rizzo 104337e3a6d3SLuigi Rizzo static void 104437e3a6d3SLuigi Rizzo nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) 104537e3a6d3SLuigi Rizzo { 104637e3a6d3SLuigi Rizzo int i; 104737e3a6d3SLuigi Rizzo 104837e3a6d3SLuigi Rizzo if (!bps) 104937e3a6d3SLuigi Rizzo return; 105037e3a6d3SLuigi Rizzo 105137e3a6d3SLuigi Rizzo for (i = 0; i < bps->ncpus; i++) { 105237e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 1053c3e9b4dbSLuiz Otavio O Souza nm_os_kctx_worker_stop(t->nmk); 1054c3e9b4dbSLuiz Otavio O Souza nm_os_kctx_destroy(t->nmk); 105537e3a6d3SLuigi Rizzo } 105637e3a6d3SLuigi Rizzo bps->stopped = true; 105737e3a6d3SLuigi Rizzo } 105837e3a6d3SLuigi Rizzo 105937e3a6d3SLuigi Rizzo static int 106037e3a6d3SLuigi Rizzo get_polling_cfg(struct nmreq *nmr, struct netmap_adapter *na, 106137e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps) 106237e3a6d3SLuigi Rizzo { 106337e3a6d3SLuigi Rizzo int req_cpus, avail_cpus, core_from; 106437e3a6d3SLuigi Rizzo u_int reg, i, qfirst, qlast; 106537e3a6d3SLuigi Rizzo 106637e3a6d3SLuigi Rizzo avail_cpus = nm_os_ncpus(); 106737e3a6d3SLuigi Rizzo req_cpus = nmr->nr_arg1; 106837e3a6d3SLuigi Rizzo 106937e3a6d3SLuigi Rizzo if (req_cpus == 0) { 107037e3a6d3SLuigi Rizzo D("req_cpus must be > 0"); 107137e3a6d3SLuigi Rizzo return EINVAL; 107237e3a6d3SLuigi Rizzo } else if (req_cpus >= avail_cpus) { 107337e3a6d3SLuigi Rizzo D("for safety, we need at least one core left in the system"); 107437e3a6d3SLuigi Rizzo return EINVAL; 107537e3a6d3SLuigi Rizzo } 107637e3a6d3SLuigi Rizzo reg = nmr->nr_flags & NR_REG_MASK; 107737e3a6d3SLuigi Rizzo i = nmr->nr_ringid & NETMAP_RING_MASK; 107837e3a6d3SLuigi Rizzo /* 107937e3a6d3SLuigi Rizzo * ONE_NIC: dedicate one core to one ring. If multiple cores 108037e3a6d3SLuigi Rizzo * are specified, consecutive rings are also polled. 108137e3a6d3SLuigi Rizzo * For example, if ringid=2 and 2 cores are given, 108237e3a6d3SLuigi Rizzo * ring 2 and 3 are polled by core 2 and 3, respectively. 108337e3a6d3SLuigi Rizzo * ALL_NIC: poll all the rings using a core specified by ringid. 108437e3a6d3SLuigi Rizzo * the number of cores must be 1. 108537e3a6d3SLuigi Rizzo */ 108637e3a6d3SLuigi Rizzo if (reg == NR_REG_ONE_NIC) { 108737e3a6d3SLuigi Rizzo if (i + req_cpus > nma_get_nrings(na, NR_RX)) { 108837e3a6d3SLuigi Rizzo D("only %d rings exist (ring %u-%u is given)", 108937e3a6d3SLuigi Rizzo nma_get_nrings(na, NR_RX), i, i+req_cpus); 109037e3a6d3SLuigi Rizzo return EINVAL; 109137e3a6d3SLuigi Rizzo } 109237e3a6d3SLuigi Rizzo qfirst = i; 109337e3a6d3SLuigi Rizzo qlast = qfirst + req_cpus; 109437e3a6d3SLuigi Rizzo core_from = qfirst; 109537e3a6d3SLuigi Rizzo } else if (reg == NR_REG_ALL_NIC) { 109637e3a6d3SLuigi Rizzo if (req_cpus != 1) { 109737e3a6d3SLuigi Rizzo D("ncpus must be 1 not %d for REG_ALL_NIC", req_cpus); 109837e3a6d3SLuigi Rizzo return EINVAL; 109937e3a6d3SLuigi Rizzo } 110037e3a6d3SLuigi Rizzo qfirst = 0; 110137e3a6d3SLuigi Rizzo qlast = nma_get_nrings(na, NR_RX); 110237e3a6d3SLuigi Rizzo core_from = i; 110337e3a6d3SLuigi Rizzo } else { 110437e3a6d3SLuigi Rizzo D("reg must be ALL_NIC or ONE_NIC"); 110537e3a6d3SLuigi Rizzo return EINVAL; 110637e3a6d3SLuigi Rizzo } 110737e3a6d3SLuigi Rizzo 110837e3a6d3SLuigi Rizzo bps->reg = reg; 110937e3a6d3SLuigi Rizzo bps->qfirst = qfirst; 111037e3a6d3SLuigi Rizzo bps->qlast = qlast; 111137e3a6d3SLuigi Rizzo bps->cpu_from = core_from; 111237e3a6d3SLuigi Rizzo bps->ncpus = req_cpus; 111337e3a6d3SLuigi Rizzo D("%s qfirst %u qlast %u cpu_from %u ncpus %u", 111437e3a6d3SLuigi Rizzo reg == NR_REG_ALL_NIC ? "REG_ALL_NIC" : "REG_ONE_NIC", 111537e3a6d3SLuigi Rizzo qfirst, qlast, core_from, req_cpus); 111637e3a6d3SLuigi Rizzo return 0; 111737e3a6d3SLuigi Rizzo } 111837e3a6d3SLuigi Rizzo 111937e3a6d3SLuigi Rizzo static int 112037e3a6d3SLuigi Rizzo nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) 112137e3a6d3SLuigi Rizzo { 112237e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps; 112337e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna; 112437e3a6d3SLuigi Rizzo int error; 112537e3a6d3SLuigi Rizzo 112637e3a6d3SLuigi Rizzo bna = (struct netmap_bwrap_adapter *)na; 112737e3a6d3SLuigi Rizzo if (bna->na_polling_state) { 112837e3a6d3SLuigi Rizzo D("ERROR adapter already in polling mode"); 112937e3a6d3SLuigi Rizzo return EFAULT; 113037e3a6d3SLuigi Rizzo } 113137e3a6d3SLuigi Rizzo 1132c3e9b4dbSLuiz Otavio O Souza bps = nm_os_malloc(sizeof(*bps)); 113337e3a6d3SLuigi Rizzo if (!bps) 113437e3a6d3SLuigi Rizzo return ENOMEM; 113537e3a6d3SLuigi Rizzo bps->configured = false; 113637e3a6d3SLuigi Rizzo bps->stopped = true; 113737e3a6d3SLuigi Rizzo 113837e3a6d3SLuigi Rizzo if (get_polling_cfg(nmr, na, bps)) { 1139c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps); 114037e3a6d3SLuigi Rizzo return EINVAL; 114137e3a6d3SLuigi Rizzo } 114237e3a6d3SLuigi Rizzo 114337e3a6d3SLuigi Rizzo if (nm_bdg_create_kthreads(bps)) { 1144c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps); 114537e3a6d3SLuigi Rizzo return EFAULT; 114637e3a6d3SLuigi Rizzo } 114737e3a6d3SLuigi Rizzo 114837e3a6d3SLuigi Rizzo bps->configured = true; 114937e3a6d3SLuigi Rizzo bna->na_polling_state = bps; 115037e3a6d3SLuigi Rizzo bps->bna = bna; 115137e3a6d3SLuigi Rizzo 115237e3a6d3SLuigi Rizzo /* disable interrupt if possible */ 115337e3a6d3SLuigi Rizzo if (bna->hwna->nm_intr) 115437e3a6d3SLuigi Rizzo bna->hwna->nm_intr(bna->hwna, 0); 115537e3a6d3SLuigi Rizzo /* start kthread now */ 115637e3a6d3SLuigi Rizzo error = nm_bdg_polling_start_kthreads(bps); 115737e3a6d3SLuigi Rizzo if (error) { 115837e3a6d3SLuigi Rizzo D("ERROR nm_bdg_polling_start_kthread()"); 1159c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps->kthreads); 1160c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps); 116137e3a6d3SLuigi Rizzo bna->na_polling_state = NULL; 116237e3a6d3SLuigi Rizzo if (bna->hwna->nm_intr) 116337e3a6d3SLuigi Rizzo bna->hwna->nm_intr(bna->hwna, 1); 116437e3a6d3SLuigi Rizzo } 116537e3a6d3SLuigi Rizzo return error; 116637e3a6d3SLuigi Rizzo } 116737e3a6d3SLuigi Rizzo 116837e3a6d3SLuigi Rizzo static int 116937e3a6d3SLuigi Rizzo nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na) 117037e3a6d3SLuigi Rizzo { 117137e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; 117237e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps; 117337e3a6d3SLuigi Rizzo 117437e3a6d3SLuigi Rizzo if (!bna->na_polling_state) { 117537e3a6d3SLuigi Rizzo D("ERROR adapter is not in polling mode"); 117637e3a6d3SLuigi Rizzo return EFAULT; 117737e3a6d3SLuigi Rizzo } 117837e3a6d3SLuigi Rizzo bps = bna->na_polling_state; 117937e3a6d3SLuigi Rizzo nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); 118037e3a6d3SLuigi Rizzo bps->configured = false; 1181c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps); 118237e3a6d3SLuigi Rizzo bna->na_polling_state = NULL; 118337e3a6d3SLuigi Rizzo /* reenable interrupt */ 118437e3a6d3SLuigi Rizzo if (bna->hwna->nm_intr) 118537e3a6d3SLuigi Rizzo bna->hwna->nm_intr(bna->hwna, 1); 118637e3a6d3SLuigi Rizzo return 0; 118737e3a6d3SLuigi Rizzo } 1188f9790aebSLuigi Rizzo 11894bf50f18SLuigi Rizzo /* Called by either user's context (netmap_ioctl()) 11904bf50f18SLuigi Rizzo * or external kernel modules (e.g., Openvswitch). 11914bf50f18SLuigi Rizzo * Operation is indicated in nmr->nr_cmd. 11924bf50f18SLuigi Rizzo * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge 11934bf50f18SLuigi Rizzo * requires bdg_ops argument; the other commands ignore this argument. 11944bf50f18SLuigi Rizzo * 1195f9790aebSLuigi Rizzo * Called without NMG_LOCK. 1196f9790aebSLuigi Rizzo */ 1197f9790aebSLuigi Rizzo int 11984bf50f18SLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) 1199f9790aebSLuigi Rizzo { 1200847bf383SLuigi Rizzo struct nm_bridge *b, *bridges; 1201f9790aebSLuigi Rizzo struct netmap_adapter *na; 1202f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna; 1203f9790aebSLuigi Rizzo char *name = nmr->nr_name; 1204f9790aebSLuigi Rizzo int cmd = nmr->nr_cmd, namelen = strlen(name); 1205f9790aebSLuigi Rizzo int error = 0, i, j; 1206847bf383SLuigi Rizzo u_int num_bridges; 1207847bf383SLuigi Rizzo 1208847bf383SLuigi Rizzo netmap_bns_getbridges(&bridges, &num_bridges); 1209f9790aebSLuigi Rizzo 1210f9790aebSLuigi Rizzo switch (cmd) { 12114bf50f18SLuigi Rizzo case NETMAP_BDG_NEWIF: 1212c3e9b4dbSLuiz Otavio O Souza error = netmap_vi_create(nmr, 0 /* no autodelete */); 12134bf50f18SLuigi Rizzo break; 12144bf50f18SLuigi Rizzo 12154bf50f18SLuigi Rizzo case NETMAP_BDG_DELIF: 12164bf50f18SLuigi Rizzo error = nm_vi_destroy(nmr->nr_name); 12174bf50f18SLuigi Rizzo break; 12184bf50f18SLuigi Rizzo 1219f9790aebSLuigi Rizzo case NETMAP_BDG_ATTACH: 12204bf50f18SLuigi Rizzo error = nm_bdg_ctl_attach(nmr); 1221f9790aebSLuigi Rizzo break; 1222f9790aebSLuigi Rizzo 1223f9790aebSLuigi Rizzo case NETMAP_BDG_DETACH: 12244bf50f18SLuigi Rizzo error = nm_bdg_ctl_detach(nmr); 1225f9790aebSLuigi Rizzo break; 1226f9790aebSLuigi Rizzo 1227f9790aebSLuigi Rizzo case NETMAP_BDG_LIST: 1228f9790aebSLuigi Rizzo /* this is used to enumerate bridges and ports */ 1229f9790aebSLuigi Rizzo if (namelen) { /* look up indexes of bridge and port */ 123037e3a6d3SLuigi Rizzo if (strncmp(name, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 1231f9790aebSLuigi Rizzo error = EINVAL; 1232f9790aebSLuigi Rizzo break; 1233f9790aebSLuigi Rizzo } 1234f9790aebSLuigi Rizzo NMG_LOCK(); 1235f9790aebSLuigi Rizzo b = nm_find_bridge(name, 0 /* don't create */); 1236f9790aebSLuigi Rizzo if (!b) { 1237f9790aebSLuigi Rizzo error = ENOENT; 1238f9790aebSLuigi Rizzo NMG_UNLOCK(); 1239f9790aebSLuigi Rizzo break; 1240f9790aebSLuigi Rizzo } 1241f9790aebSLuigi Rizzo 124237e3a6d3SLuigi Rizzo error = 0; 124337e3a6d3SLuigi Rizzo nmr->nr_arg1 = b - bridges; /* bridge index */ 124437e3a6d3SLuigi Rizzo nmr->nr_arg2 = NM_BDG_NOPORT; 1245f9790aebSLuigi Rizzo for (j = 0; j < b->bdg_active_ports; j++) { 1246f9790aebSLuigi Rizzo i = b->bdg_port_index[j]; 1247f9790aebSLuigi Rizzo vpna = b->bdg_ports[i]; 1248f9790aebSLuigi Rizzo if (vpna == NULL) { 1249f9790aebSLuigi Rizzo D("---AAAAAAAAARGH-------"); 1250f9790aebSLuigi Rizzo continue; 1251f9790aebSLuigi Rizzo } 1252f9790aebSLuigi Rizzo /* the former and the latter identify a 1253f9790aebSLuigi Rizzo * virtual port and a NIC, respectively 1254f9790aebSLuigi Rizzo */ 12554bf50f18SLuigi Rizzo if (!strcmp(vpna->up.name, name)) { 1256f9790aebSLuigi Rizzo nmr->nr_arg2 = i; /* port index */ 1257f9790aebSLuigi Rizzo break; 1258f9790aebSLuigi Rizzo } 1259f9790aebSLuigi Rizzo } 1260f9790aebSLuigi Rizzo NMG_UNLOCK(); 1261f9790aebSLuigi Rizzo } else { 1262f9790aebSLuigi Rizzo /* return the first non-empty entry starting from 1263f9790aebSLuigi Rizzo * bridge nr_arg1 and port nr_arg2. 1264f9790aebSLuigi Rizzo * 1265f9790aebSLuigi Rizzo * Users can detect the end of the same bridge by 1266f9790aebSLuigi Rizzo * seeing the new and old value of nr_arg1, and can 1267f9790aebSLuigi Rizzo * detect the end of all the bridge by error != 0 1268f9790aebSLuigi Rizzo */ 1269f9790aebSLuigi Rizzo i = nmr->nr_arg1; 1270f9790aebSLuigi Rizzo j = nmr->nr_arg2; 1271f9790aebSLuigi Rizzo 1272f9790aebSLuigi Rizzo NMG_LOCK(); 1273f9790aebSLuigi Rizzo for (error = ENOENT; i < NM_BRIDGES; i++) { 1274847bf383SLuigi Rizzo b = bridges + i; 1275c3e9b4dbSLuiz Otavio O Souza for ( ; j < NM_BDG_MAXPORTS; j++) { 1276c3e9b4dbSLuiz Otavio O Souza if (b->bdg_ports[j] == NULL) 1277f9790aebSLuigi Rizzo continue; 1278f9790aebSLuigi Rizzo vpna = b->bdg_ports[j]; 12794bf50f18SLuigi Rizzo strncpy(name, vpna->up.name, (size_t)IFNAMSIZ); 1280f9790aebSLuigi Rizzo error = 0; 1281c3e9b4dbSLuiz Otavio O Souza goto out; 1282f9790aebSLuigi Rizzo } 1283c3e9b4dbSLuiz Otavio O Souza j = 0; /* following bridges scan from 0 */ 1284c3e9b4dbSLuiz Otavio O Souza } 1285c3e9b4dbSLuiz Otavio O Souza out: 1286c3e9b4dbSLuiz Otavio O Souza nmr->nr_arg1 = i; 1287c3e9b4dbSLuiz Otavio O Souza nmr->nr_arg2 = j; 1288f9790aebSLuigi Rizzo NMG_UNLOCK(); 1289f9790aebSLuigi Rizzo } 1290f9790aebSLuigi Rizzo break; 1291f9790aebSLuigi Rizzo 12924bf50f18SLuigi Rizzo case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */ 12934bf50f18SLuigi Rizzo /* register callbacks to the given bridge. 1294f9790aebSLuigi Rizzo * nmr->nr_name may be just bridge's name (including ':' 1295f9790aebSLuigi Rizzo * if it is not just NM_NAME). 1296f9790aebSLuigi Rizzo */ 12974bf50f18SLuigi Rizzo if (!bdg_ops) { 1298f9790aebSLuigi Rizzo error = EINVAL; 1299f9790aebSLuigi Rizzo break; 1300f9790aebSLuigi Rizzo } 1301f9790aebSLuigi Rizzo NMG_LOCK(); 1302f9790aebSLuigi Rizzo b = nm_find_bridge(name, 0 /* don't create */); 1303f9790aebSLuigi Rizzo if (!b) { 1304f9790aebSLuigi Rizzo error = EINVAL; 1305f9790aebSLuigi Rizzo } else { 13064bf50f18SLuigi Rizzo b->bdg_ops = *bdg_ops; 1307f9790aebSLuigi Rizzo } 1308f9790aebSLuigi Rizzo NMG_UNLOCK(); 1309f9790aebSLuigi Rizzo break; 1310f9790aebSLuigi Rizzo 1311f0ea3689SLuigi Rizzo case NETMAP_BDG_VNET_HDR: 1312f0ea3689SLuigi Rizzo /* Valid lengths for the virtio-net header are 0 (no header), 1313f0ea3689SLuigi Rizzo 10 and 12. */ 1314f0ea3689SLuigi Rizzo if (nmr->nr_arg1 != 0 && 1315f0ea3689SLuigi Rizzo nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) && 1316f0ea3689SLuigi Rizzo nmr->nr_arg1 != 12) { 1317f0ea3689SLuigi Rizzo error = EINVAL; 1318f0ea3689SLuigi Rizzo break; 1319f0ea3689SLuigi Rizzo } 1320f9790aebSLuigi Rizzo NMG_LOCK(); 1321c3e9b4dbSLuiz Otavio O Souza error = netmap_get_bdg_na(nmr, &na, NULL, 0); 132217885a7bSLuigi Rizzo if (na && !error) { 1323f9790aebSLuigi Rizzo vpna = (struct netmap_vp_adapter *)na; 132437e3a6d3SLuigi Rizzo na->virt_hdr_len = nmr->nr_arg1; 132537e3a6d3SLuigi Rizzo if (na->virt_hdr_len) { 13264bf50f18SLuigi Rizzo vpna->mfs = NETMAP_BUF_SIZE(na); 132737e3a6d3SLuigi Rizzo } 132837e3a6d3SLuigi Rizzo D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na); 132937e3a6d3SLuigi Rizzo netmap_adapter_put(na); 133037e3a6d3SLuigi Rizzo } else if (!na) { 133137e3a6d3SLuigi Rizzo error = ENXIO; 133237e3a6d3SLuigi Rizzo } 133337e3a6d3SLuigi Rizzo NMG_UNLOCK(); 133437e3a6d3SLuigi Rizzo break; 133537e3a6d3SLuigi Rizzo 133637e3a6d3SLuigi Rizzo case NETMAP_BDG_POLLING_ON: 133737e3a6d3SLuigi Rizzo case NETMAP_BDG_POLLING_OFF: 133837e3a6d3SLuigi Rizzo NMG_LOCK(); 1339c3e9b4dbSLuiz Otavio O Souza error = netmap_get_bdg_na(nmr, &na, NULL, 0); 134037e3a6d3SLuigi Rizzo if (na && !error) { 134137e3a6d3SLuigi Rizzo if (!nm_is_bwrap(na)) { 134237e3a6d3SLuigi Rizzo error = EOPNOTSUPP; 134337e3a6d3SLuigi Rizzo } else if (cmd == NETMAP_BDG_POLLING_ON) { 134437e3a6d3SLuigi Rizzo error = nm_bdg_ctl_polling_start(nmr, na); 134537e3a6d3SLuigi Rizzo if (!error) 134637e3a6d3SLuigi Rizzo netmap_adapter_get(na); 134737e3a6d3SLuigi Rizzo } else { 134837e3a6d3SLuigi Rizzo error = nm_bdg_ctl_polling_stop(nmr, na); 134937e3a6d3SLuigi Rizzo if (!error) 135037e3a6d3SLuigi Rizzo netmap_adapter_put(na); 135137e3a6d3SLuigi Rizzo } 135217885a7bSLuigi Rizzo netmap_adapter_put(na); 1353f9790aebSLuigi Rizzo } 1354f9790aebSLuigi Rizzo NMG_UNLOCK(); 1355f9790aebSLuigi Rizzo break; 1356f9790aebSLuigi Rizzo 1357f9790aebSLuigi Rizzo default: 1358f9790aebSLuigi Rizzo D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 1359f9790aebSLuigi Rizzo error = EINVAL; 1360f9790aebSLuigi Rizzo break; 1361f9790aebSLuigi Rizzo } 1362f9790aebSLuigi Rizzo return error; 1363f9790aebSLuigi Rizzo } 1364f9790aebSLuigi Rizzo 13654bf50f18SLuigi Rizzo int 13664bf50f18SLuigi Rizzo netmap_bdg_config(struct nmreq *nmr) 13674bf50f18SLuigi Rizzo { 13684bf50f18SLuigi Rizzo struct nm_bridge *b; 13694bf50f18SLuigi Rizzo int error = EINVAL; 13704bf50f18SLuigi Rizzo 13714bf50f18SLuigi Rizzo NMG_LOCK(); 13724bf50f18SLuigi Rizzo b = nm_find_bridge(nmr->nr_name, 0); 13734bf50f18SLuigi Rizzo if (!b) { 13744bf50f18SLuigi Rizzo NMG_UNLOCK(); 13754bf50f18SLuigi Rizzo return error; 13764bf50f18SLuigi Rizzo } 13774bf50f18SLuigi Rizzo NMG_UNLOCK(); 13784bf50f18SLuigi Rizzo /* Don't call config() with NMG_LOCK() held */ 13794bf50f18SLuigi Rizzo BDG_RLOCK(b); 13804bf50f18SLuigi Rizzo if (b->bdg_ops.config != NULL) 13814bf50f18SLuigi Rizzo error = b->bdg_ops.config((struct nm_ifreq *)nmr); 13824bf50f18SLuigi Rizzo BDG_RUNLOCK(b); 13834bf50f18SLuigi Rizzo return error; 13844bf50f18SLuigi Rizzo } 13854bf50f18SLuigi Rizzo 13864bf50f18SLuigi Rizzo 13874bf50f18SLuigi Rizzo /* nm_krings_create callback for VALE ports. 13884bf50f18SLuigi Rizzo * Calls the standard netmap_krings_create, then adds leases on rx 13894bf50f18SLuigi Rizzo * rings and bdgfwd on tx rings. 13904bf50f18SLuigi Rizzo */ 1391f9790aebSLuigi Rizzo static int 1392f9790aebSLuigi Rizzo netmap_vp_krings_create(struct netmap_adapter *na) 1393f9790aebSLuigi Rizzo { 1394f0ea3689SLuigi Rizzo u_int tailroom; 1395f9790aebSLuigi Rizzo int error, i; 1396f9790aebSLuigi Rizzo uint32_t *leases; 1397847bf383SLuigi Rizzo u_int nrx = netmap_real_rings(na, NR_RX); 1398f9790aebSLuigi Rizzo 1399f9790aebSLuigi Rizzo /* 1400f9790aebSLuigi Rizzo * Leases are attached to RX rings on vale ports 1401f9790aebSLuigi Rizzo */ 1402f9790aebSLuigi Rizzo tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 1403f9790aebSLuigi Rizzo 1404f0ea3689SLuigi Rizzo error = netmap_krings_create(na, tailroom); 1405f9790aebSLuigi Rizzo if (error) 1406f9790aebSLuigi Rizzo return error; 1407f9790aebSLuigi Rizzo 1408f9790aebSLuigi Rizzo leases = na->tailroom; 1409f9790aebSLuigi Rizzo 1410f9790aebSLuigi Rizzo for (i = 0; i < nrx; i++) { /* Receive rings */ 1411f9790aebSLuigi Rizzo na->rx_rings[i].nkr_leases = leases; 1412f9790aebSLuigi Rizzo leases += na->num_rx_desc; 1413f9790aebSLuigi Rizzo } 1414f9790aebSLuigi Rizzo 1415f9790aebSLuigi Rizzo error = nm_alloc_bdgfwd(na); 1416f9790aebSLuigi Rizzo if (error) { 1417f9790aebSLuigi Rizzo netmap_krings_delete(na); 1418f9790aebSLuigi Rizzo return error; 1419f9790aebSLuigi Rizzo } 1420f9790aebSLuigi Rizzo 1421f9790aebSLuigi Rizzo return 0; 1422f9790aebSLuigi Rizzo } 1423f9790aebSLuigi Rizzo 142417885a7bSLuigi Rizzo 14254bf50f18SLuigi Rizzo /* nm_krings_delete callback for VALE ports. */ 1426f9790aebSLuigi Rizzo static void 1427f9790aebSLuigi Rizzo netmap_vp_krings_delete(struct netmap_adapter *na) 1428f9790aebSLuigi Rizzo { 1429f9790aebSLuigi Rizzo nm_free_bdgfwd(na); 1430f9790aebSLuigi Rizzo netmap_krings_delete(na); 1431f9790aebSLuigi Rizzo } 1432f9790aebSLuigi Rizzo 1433f9790aebSLuigi Rizzo 1434f9790aebSLuigi Rizzo static int 1435f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 1436f9790aebSLuigi Rizzo struct netmap_vp_adapter *na, u_int ring_nr); 1437f9790aebSLuigi Rizzo 1438f9790aebSLuigi Rizzo 1439f9790aebSLuigi Rizzo /* 14404bf50f18SLuigi Rizzo * main dispatch routine for the bridge. 1441f9790aebSLuigi Rizzo * Grab packets from a kring, move them into the ft structure 1442f9790aebSLuigi Rizzo * associated to the tx (input) port. Max one instance per port, 1443f9790aebSLuigi Rizzo * filtered on input (ioctl, poll or XXX). 1444f9790aebSLuigi Rizzo * Returns the next position in the ring. 1445f9790aebSLuigi Rizzo */ 1446f9790aebSLuigi Rizzo static int 14474bf50f18SLuigi Rizzo nm_bdg_preflush(struct netmap_kring *kring, u_int end) 1448f9790aebSLuigi Rizzo { 14494bf50f18SLuigi Rizzo struct netmap_vp_adapter *na = 14504bf50f18SLuigi Rizzo (struct netmap_vp_adapter*)kring->na; 1451f9790aebSLuigi Rizzo struct netmap_ring *ring = kring->ring; 1452f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft; 14534bf50f18SLuigi Rizzo u_int ring_nr = kring->ring_id; 1454f9790aebSLuigi Rizzo u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 1455f9790aebSLuigi Rizzo u_int ft_i = 0; /* start from 0 */ 1456f9790aebSLuigi Rizzo u_int frags = 1; /* how many frags ? */ 1457f9790aebSLuigi Rizzo struct nm_bridge *b = na->na_bdg; 1458f9790aebSLuigi Rizzo 1459f9790aebSLuigi Rizzo /* To protect against modifications to the bridge we acquire a 1460f9790aebSLuigi Rizzo * shared lock, waiting if we can sleep (if the source port is 1461f9790aebSLuigi Rizzo * attached to a user process) or with a trylock otherwise (NICs). 1462f9790aebSLuigi Rizzo */ 1463f9790aebSLuigi Rizzo ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1464f9790aebSLuigi Rizzo if (na->up.na_flags & NAF_BDG_MAYSLEEP) 1465f9790aebSLuigi Rizzo BDG_RLOCK(b); 1466f9790aebSLuigi Rizzo else if (!BDG_RTRYLOCK(b)) 1467c3e9b4dbSLuiz Otavio O Souza return j; 1468f9790aebSLuigi Rizzo ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1469f9790aebSLuigi Rizzo ft = kring->nkr_ft; 1470f9790aebSLuigi Rizzo 1471f9790aebSLuigi Rizzo for (; likely(j != end); j = nm_next(j, lim)) { 1472f9790aebSLuigi Rizzo struct netmap_slot *slot = &ring->slot[j]; 1473f9790aebSLuigi Rizzo char *buf; 1474f9790aebSLuigi Rizzo 1475f9790aebSLuigi Rizzo ft[ft_i].ft_len = slot->len; 1476f9790aebSLuigi Rizzo ft[ft_i].ft_flags = slot->flags; 1477f9790aebSLuigi Rizzo 1478f9790aebSLuigi Rizzo ND("flags is 0x%x", slot->flags); 1479847bf383SLuigi Rizzo /* we do not use the buf changed flag, but we still need to reset it */ 1480847bf383SLuigi Rizzo slot->flags &= ~NS_BUF_CHANGED; 1481847bf383SLuigi Rizzo 1482f9790aebSLuigi Rizzo /* this slot goes into a list so initialize the link field */ 1483f9790aebSLuigi Rizzo ft[ft_i].ft_next = NM_FT_NULL; 1484f9790aebSLuigi Rizzo buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 14854bf50f18SLuigi Rizzo (void *)(uintptr_t)slot->ptr : NMB(&na->up, slot); 1486e31c6ec7SLuigi Rizzo if (unlikely(buf == NULL)) { 1487e31c6ec7SLuigi Rizzo RD(5, "NULL %s buffer pointer from %s slot %d len %d", 1488e31c6ec7SLuigi Rizzo (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT", 1489e31c6ec7SLuigi Rizzo kring->name, j, ft[ft_i].ft_len); 14904bf50f18SLuigi Rizzo buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up); 1491e31c6ec7SLuigi Rizzo ft[ft_i].ft_len = 0; 1492e31c6ec7SLuigi Rizzo ft[ft_i].ft_flags = 0; 1493e31c6ec7SLuigi Rizzo } 14942e159ef0SLuigi Rizzo __builtin_prefetch(buf); 1495f9790aebSLuigi Rizzo ++ft_i; 1496f9790aebSLuigi Rizzo if (slot->flags & NS_MOREFRAG) { 1497f9790aebSLuigi Rizzo frags++; 1498f9790aebSLuigi Rizzo continue; 1499f9790aebSLuigi Rizzo } 1500f9790aebSLuigi Rizzo if (unlikely(netmap_verbose && frags > 1)) 1501f9790aebSLuigi Rizzo RD(5, "%d frags at %d", frags, ft_i - frags); 1502f9790aebSLuigi Rizzo ft[ft_i - frags].ft_frags = frags; 1503f9790aebSLuigi Rizzo frags = 1; 1504f9790aebSLuigi Rizzo if (unlikely((int)ft_i >= bridge_batch)) 1505f9790aebSLuigi Rizzo ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1506f9790aebSLuigi Rizzo } 1507f9790aebSLuigi Rizzo if (frags > 1) { 150837e3a6d3SLuigi Rizzo /* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we 150937e3a6d3SLuigi Rizzo * have to fix frags count. */ 151037e3a6d3SLuigi Rizzo frags--; 151137e3a6d3SLuigi Rizzo ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG; 151237e3a6d3SLuigi Rizzo ft[ft_i - frags].ft_frags = frags; 151337e3a6d3SLuigi Rizzo D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags); 1514f9790aebSLuigi Rizzo } 1515f9790aebSLuigi Rizzo if (ft_i) 1516f9790aebSLuigi Rizzo ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1517f9790aebSLuigi Rizzo BDG_RUNLOCK(b); 1518f9790aebSLuigi Rizzo return j; 1519f9790aebSLuigi Rizzo } 1520f9790aebSLuigi Rizzo 1521f9790aebSLuigi Rizzo 1522f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */ 1523f9790aebSLuigi Rizzo 1524f9790aebSLuigi Rizzo /* 1525f9790aebSLuigi Rizzo * The following hash function is adapted from "Hash Functions" by Bob Jenkins 1526f9790aebSLuigi Rizzo * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 1527f9790aebSLuigi Rizzo * 1528f9790aebSLuigi Rizzo * http://www.burtleburtle.net/bob/hash/spooky.html 1529f9790aebSLuigi Rizzo */ 1530f9790aebSLuigi Rizzo #define mix(a, b, c) \ 1531f9790aebSLuigi Rizzo do { \ 1532f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 13); \ 1533f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 8); \ 1534f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 13); \ 1535f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 12); \ 1536f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 16); \ 1537f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 5); \ 1538f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 3); \ 1539f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 10); \ 1540f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 15); \ 1541f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0) 1542f9790aebSLuigi Rizzo 154317885a7bSLuigi Rizzo 1544f9790aebSLuigi Rizzo static __inline uint32_t 1545f9790aebSLuigi Rizzo nm_bridge_rthash(const uint8_t *addr) 1546f9790aebSLuigi Rizzo { 1547f9790aebSLuigi Rizzo uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 1548f9790aebSLuigi Rizzo 1549f9790aebSLuigi Rizzo b += addr[5] << 8; 1550f9790aebSLuigi Rizzo b += addr[4]; 1551f9790aebSLuigi Rizzo a += addr[3] << 24; 1552f9790aebSLuigi Rizzo a += addr[2] << 16; 1553f9790aebSLuigi Rizzo a += addr[1] << 8; 1554f9790aebSLuigi Rizzo a += addr[0]; 1555f9790aebSLuigi Rizzo 1556f9790aebSLuigi Rizzo mix(a, b, c); 1557f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 1558f9790aebSLuigi Rizzo return (c & BRIDGE_RTHASH_MASK); 1559f9790aebSLuigi Rizzo } 1560f9790aebSLuigi Rizzo 1561f9790aebSLuigi Rizzo #undef mix 1562f9790aebSLuigi Rizzo 1563f9790aebSLuigi Rizzo 15644bf50f18SLuigi Rizzo /* nm_register callback for VALE ports */ 1565f9790aebSLuigi Rizzo static int 15664bf50f18SLuigi Rizzo netmap_vp_reg(struct netmap_adapter *na, int onoff) 1567f9790aebSLuigi Rizzo { 1568f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna = 1569f9790aebSLuigi Rizzo (struct netmap_vp_adapter*)na; 157037e3a6d3SLuigi Rizzo enum txrx t; 157137e3a6d3SLuigi Rizzo int i; 1572f9790aebSLuigi Rizzo 15734bf50f18SLuigi Rizzo /* persistent ports may be put in netmap mode 15744bf50f18SLuigi Rizzo * before being attached to a bridge 1575f9790aebSLuigi Rizzo */ 15764bf50f18SLuigi Rizzo if (vpna->na_bdg) 1577f9790aebSLuigi Rizzo BDG_WLOCK(vpna->na_bdg); 1578f9790aebSLuigi Rizzo if (onoff) { 157937e3a6d3SLuigi Rizzo for_rx_tx(t) { 158037e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { 158137e3a6d3SLuigi Rizzo struct netmap_kring *kring = &NMR(na, t)[i]; 158237e3a6d3SLuigi Rizzo 158337e3a6d3SLuigi Rizzo if (nm_kring_pending_on(kring)) 158437e3a6d3SLuigi Rizzo kring->nr_mode = NKR_NETMAP_ON; 158537e3a6d3SLuigi Rizzo } 158637e3a6d3SLuigi Rizzo } 158737e3a6d3SLuigi Rizzo if (na->active_fds == 0) 15884bf50f18SLuigi Rizzo na->na_flags |= NAF_NETMAP_ON; 15894bf50f18SLuigi Rizzo /* XXX on FreeBSD, persistent VALE ports should also 15904bf50f18SLuigi Rizzo * toggle IFCAP_NETMAP in na->ifp (2014-03-16) 15914bf50f18SLuigi Rizzo */ 1592f9790aebSLuigi Rizzo } else { 159337e3a6d3SLuigi Rizzo if (na->active_fds == 0) 15944bf50f18SLuigi Rizzo na->na_flags &= ~NAF_NETMAP_ON; 159537e3a6d3SLuigi Rizzo for_rx_tx(t) { 159637e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { 159737e3a6d3SLuigi Rizzo struct netmap_kring *kring = &NMR(na, t)[i]; 159837e3a6d3SLuigi Rizzo 159937e3a6d3SLuigi Rizzo if (nm_kring_pending_off(kring)) 160037e3a6d3SLuigi Rizzo kring->nr_mode = NKR_NETMAP_OFF; 160137e3a6d3SLuigi Rizzo } 160237e3a6d3SLuigi Rizzo } 1603f9790aebSLuigi Rizzo } 16044bf50f18SLuigi Rizzo if (vpna->na_bdg) 1605f9790aebSLuigi Rizzo BDG_WUNLOCK(vpna->na_bdg); 1606f9790aebSLuigi Rizzo return 0; 1607f9790aebSLuigi Rizzo } 1608f9790aebSLuigi Rizzo 1609f9790aebSLuigi Rizzo 1610f9790aebSLuigi Rizzo /* 1611f9790aebSLuigi Rizzo * Lookup function for a learning bridge. 1612f9790aebSLuigi Rizzo * Update the hash table with the source address, 1613f9790aebSLuigi Rizzo * and then returns the destination port index, and the 1614f9790aebSLuigi Rizzo * ring in *dst_ring (at the moment, always use ring 0) 1615f9790aebSLuigi Rizzo */ 1616f9790aebSLuigi Rizzo u_int 16174bf50f18SLuigi Rizzo netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, 1618847bf383SLuigi Rizzo struct netmap_vp_adapter *na) 1619f9790aebSLuigi Rizzo { 16204bf50f18SLuigi Rizzo uint8_t *buf = ft->ft_buf; 16214bf50f18SLuigi Rizzo u_int buf_len = ft->ft_len; 1622f9790aebSLuigi Rizzo struct nm_hash_ent *ht = na->na_bdg->ht; 1623f9790aebSLuigi Rizzo uint32_t sh, dh; 1624f9790aebSLuigi Rizzo u_int dst, mysrc = na->bdg_port; 1625f9790aebSLuigi Rizzo uint64_t smac, dmac; 162637e3a6d3SLuigi Rizzo uint8_t indbuf[12]; 1627f9790aebSLuigi Rizzo 16284bf50f18SLuigi Rizzo /* safety check, unfortunately we have many cases */ 162937e3a6d3SLuigi Rizzo if (buf_len >= 14 + na->up.virt_hdr_len) { 16304bf50f18SLuigi Rizzo /* virthdr + mac_hdr in the same slot */ 163137e3a6d3SLuigi Rizzo buf += na->up.virt_hdr_len; 163237e3a6d3SLuigi Rizzo buf_len -= na->up.virt_hdr_len; 163337e3a6d3SLuigi Rizzo } else if (buf_len == na->up.virt_hdr_len && ft->ft_flags & NS_MOREFRAG) { 16344bf50f18SLuigi Rizzo /* only header in first fragment */ 16354bf50f18SLuigi Rizzo ft++; 16364bf50f18SLuigi Rizzo buf = ft->ft_buf; 16374bf50f18SLuigi Rizzo buf_len = ft->ft_len; 16384bf50f18SLuigi Rizzo } else { 16394bf50f18SLuigi Rizzo RD(5, "invalid buf format, length %d", buf_len); 1640f9790aebSLuigi Rizzo return NM_BDG_NOPORT; 1641f9790aebSLuigi Rizzo } 164237e3a6d3SLuigi Rizzo 164337e3a6d3SLuigi Rizzo if (ft->ft_flags & NS_INDIRECT) { 164437e3a6d3SLuigi Rizzo if (copyin(buf, indbuf, sizeof(indbuf))) { 164537e3a6d3SLuigi Rizzo return NM_BDG_NOPORT; 164637e3a6d3SLuigi Rizzo } 164737e3a6d3SLuigi Rizzo buf = indbuf; 164837e3a6d3SLuigi Rizzo } 164937e3a6d3SLuigi Rizzo 1650f9790aebSLuigi Rizzo dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 1651f9790aebSLuigi Rizzo smac = le64toh(*(uint64_t *)(buf + 4)); 1652f9790aebSLuigi Rizzo smac >>= 16; 1653f9790aebSLuigi Rizzo 1654f9790aebSLuigi Rizzo /* 1655f9790aebSLuigi Rizzo * The hash is somewhat expensive, there might be some 1656f9790aebSLuigi Rizzo * worthwhile optimizations here. 1657f9790aebSLuigi Rizzo */ 1658847bf383SLuigi Rizzo if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */ 1659f9790aebSLuigi Rizzo uint8_t *s = buf+6; 1660f9790aebSLuigi Rizzo sh = nm_bridge_rthash(s); // XXX hash of source 1661f9790aebSLuigi Rizzo /* update source port forwarding entry */ 1662847bf383SLuigi Rizzo na->last_smac = ht[sh].mac = smac; /* XXX expire ? */ 1663f9790aebSLuigi Rizzo ht[sh].ports = mysrc; 1664f9790aebSLuigi Rizzo if (netmap_verbose) 1665f9790aebSLuigi Rizzo D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 1666f9790aebSLuigi Rizzo s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 1667f9790aebSLuigi Rizzo } 1668f9790aebSLuigi Rizzo dst = NM_BDG_BROADCAST; 1669f9790aebSLuigi Rizzo if ((buf[0] & 1) == 0) { /* unicast */ 1670f9790aebSLuigi Rizzo dh = nm_bridge_rthash(buf); // XXX hash of dst 1671f9790aebSLuigi Rizzo if (ht[dh].mac == dmac) { /* found dst */ 1672f9790aebSLuigi Rizzo dst = ht[dh].ports; 1673f9790aebSLuigi Rizzo } 1674f9790aebSLuigi Rizzo /* XXX otherwise return NM_BDG_UNKNOWN ? */ 1675f9790aebSLuigi Rizzo } 1676f9790aebSLuigi Rizzo return dst; 1677f9790aebSLuigi Rizzo } 1678f9790aebSLuigi Rizzo 1679f9790aebSLuigi Rizzo 1680f9790aebSLuigi Rizzo /* 168117885a7bSLuigi Rizzo * Available space in the ring. Only used in VALE code 168217885a7bSLuigi Rizzo * and only with is_rx = 1 168317885a7bSLuigi Rizzo */ 168417885a7bSLuigi Rizzo static inline uint32_t 168517885a7bSLuigi Rizzo nm_kr_space(struct netmap_kring *k, int is_rx) 168617885a7bSLuigi Rizzo { 168717885a7bSLuigi Rizzo int space; 168817885a7bSLuigi Rizzo 168917885a7bSLuigi Rizzo if (is_rx) { 169017885a7bSLuigi Rizzo int busy = k->nkr_hwlease - k->nr_hwcur; 169117885a7bSLuigi Rizzo if (busy < 0) 169217885a7bSLuigi Rizzo busy += k->nkr_num_slots; 169317885a7bSLuigi Rizzo space = k->nkr_num_slots - 1 - busy; 169417885a7bSLuigi Rizzo } else { 169517885a7bSLuigi Rizzo /* XXX never used in this branch */ 169617885a7bSLuigi Rizzo space = k->nr_hwtail - k->nkr_hwlease; 169717885a7bSLuigi Rizzo if (space < 0) 169817885a7bSLuigi Rizzo space += k->nkr_num_slots; 169917885a7bSLuigi Rizzo } 170017885a7bSLuigi Rizzo #if 0 170117885a7bSLuigi Rizzo // sanity check 170217885a7bSLuigi Rizzo if (k->nkr_hwlease >= k->nkr_num_slots || 170317885a7bSLuigi Rizzo k->nr_hwcur >= k->nkr_num_slots || 170417885a7bSLuigi Rizzo k->nr_tail >= k->nkr_num_slots || 170517885a7bSLuigi Rizzo busy < 0 || 170617885a7bSLuigi Rizzo busy >= k->nkr_num_slots) { 170717885a7bSLuigi Rizzo D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 170817885a7bSLuigi Rizzo k->nkr_lease_idx, k->nkr_num_slots); 170917885a7bSLuigi Rizzo } 171017885a7bSLuigi Rizzo #endif 171117885a7bSLuigi Rizzo return space; 171217885a7bSLuigi Rizzo } 171317885a7bSLuigi Rizzo 171417885a7bSLuigi Rizzo 171517885a7bSLuigi Rizzo 171617885a7bSLuigi Rizzo 171717885a7bSLuigi Rizzo /* make a lease on the kring for N positions. return the 171817885a7bSLuigi Rizzo * lease index 171917885a7bSLuigi Rizzo * XXX only used in VALE code and with is_rx = 1 172017885a7bSLuigi Rizzo */ 172117885a7bSLuigi Rizzo static inline uint32_t 172217885a7bSLuigi Rizzo nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 172317885a7bSLuigi Rizzo { 172417885a7bSLuigi Rizzo uint32_t lim = k->nkr_num_slots - 1; 172517885a7bSLuigi Rizzo uint32_t lease_idx = k->nkr_lease_idx; 172617885a7bSLuigi Rizzo 172717885a7bSLuigi Rizzo k->nkr_leases[lease_idx] = NR_NOSLOT; 172817885a7bSLuigi Rizzo k->nkr_lease_idx = nm_next(lease_idx, lim); 172917885a7bSLuigi Rizzo 173017885a7bSLuigi Rizzo if (n > nm_kr_space(k, is_rx)) { 173117885a7bSLuigi Rizzo D("invalid request for %d slots", n); 173217885a7bSLuigi Rizzo panic("x"); 173317885a7bSLuigi Rizzo } 173417885a7bSLuigi Rizzo /* XXX verify that there are n slots */ 173517885a7bSLuigi Rizzo k->nkr_hwlease += n; 173617885a7bSLuigi Rizzo if (k->nkr_hwlease > lim) 173717885a7bSLuigi Rizzo k->nkr_hwlease -= lim + 1; 173817885a7bSLuigi Rizzo 173917885a7bSLuigi Rizzo if (k->nkr_hwlease >= k->nkr_num_slots || 174017885a7bSLuigi Rizzo k->nr_hwcur >= k->nkr_num_slots || 174117885a7bSLuigi Rizzo k->nr_hwtail >= k->nkr_num_slots || 174217885a7bSLuigi Rizzo k->nkr_lease_idx >= k->nkr_num_slots) { 174317885a7bSLuigi Rizzo D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 17444bf50f18SLuigi Rizzo k->na->name, 174517885a7bSLuigi Rizzo k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 174617885a7bSLuigi Rizzo k->nkr_lease_idx, k->nkr_num_slots); 174717885a7bSLuigi Rizzo } 174817885a7bSLuigi Rizzo return lease_idx; 174917885a7bSLuigi Rizzo } 175017885a7bSLuigi Rizzo 175117885a7bSLuigi Rizzo /* 17524bf50f18SLuigi Rizzo * 1753f9790aebSLuigi Rizzo * This flush routine supports only unicast and broadcast but a large 1754f9790aebSLuigi Rizzo * number of ports, and lets us replace the learn and dispatch functions. 1755f9790aebSLuigi Rizzo */ 1756f9790aebSLuigi Rizzo int 1757f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 1758f9790aebSLuigi Rizzo u_int ring_nr) 1759f9790aebSLuigi Rizzo { 1760f9790aebSLuigi Rizzo struct nm_bdg_q *dst_ents, *brddst; 1761f9790aebSLuigi Rizzo uint16_t num_dsts = 0, *dsts; 1762f9790aebSLuigi Rizzo struct nm_bridge *b = na->na_bdg; 176337e3a6d3SLuigi Rizzo u_int i, me = na->bdg_port; 1764f9790aebSLuigi Rizzo 1765f9790aebSLuigi Rizzo /* 1766f9790aebSLuigi Rizzo * The work area (pointed by ft) is followed by an array of 1767f9790aebSLuigi Rizzo * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 1768f9790aebSLuigi Rizzo * queues per port plus one for the broadcast traffic. 1769f9790aebSLuigi Rizzo * Then we have an array of destination indexes. 1770f9790aebSLuigi Rizzo */ 1771f9790aebSLuigi Rizzo dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 1772f9790aebSLuigi Rizzo dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 1773f9790aebSLuigi Rizzo 1774f9790aebSLuigi Rizzo /* first pass: find a destination for each packet in the batch */ 1775f9790aebSLuigi Rizzo for (i = 0; likely(i < n); i += ft[i].ft_frags) { 1776f9790aebSLuigi Rizzo uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 1777f9790aebSLuigi Rizzo uint16_t dst_port, d_i; 1778f9790aebSLuigi Rizzo struct nm_bdg_q *d; 1779f9790aebSLuigi Rizzo 1780f9790aebSLuigi Rizzo ND("slot %d frags %d", i, ft[i].ft_frags); 1781f0ea3689SLuigi Rizzo /* Drop the packet if the virtio-net header is not into the first 1782f9790aebSLuigi Rizzo fragment nor at the very beginning of the second. */ 178337e3a6d3SLuigi Rizzo if (unlikely(na->up.virt_hdr_len > ft[i].ft_len)) 1784f9790aebSLuigi Rizzo continue; 17854bf50f18SLuigi Rizzo dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na); 1786f9790aebSLuigi Rizzo if (netmap_verbose > 255) 1787f9790aebSLuigi Rizzo RD(5, "slot %d port %d -> %d", i, me, dst_port); 1788f9790aebSLuigi Rizzo if (dst_port == NM_BDG_NOPORT) 1789f9790aebSLuigi Rizzo continue; /* this packet is identified to be dropped */ 1790f9790aebSLuigi Rizzo else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 1791f9790aebSLuigi Rizzo continue; 1792f9790aebSLuigi Rizzo else if (dst_port == NM_BDG_BROADCAST) 1793f9790aebSLuigi Rizzo dst_ring = 0; /* broadcasts always go to ring 0 */ 1794f9790aebSLuigi Rizzo else if (unlikely(dst_port == me || 1795f9790aebSLuigi Rizzo !b->bdg_ports[dst_port])) 1796f9790aebSLuigi Rizzo continue; 1797f9790aebSLuigi Rizzo 1798f9790aebSLuigi Rizzo /* get a position in the scratch pad */ 1799f9790aebSLuigi Rizzo d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 1800f9790aebSLuigi Rizzo d = dst_ents + d_i; 1801f9790aebSLuigi Rizzo 1802f9790aebSLuigi Rizzo /* append the first fragment to the list */ 1803f9790aebSLuigi Rizzo if (d->bq_head == NM_FT_NULL) { /* new destination */ 1804f9790aebSLuigi Rizzo d->bq_head = d->bq_tail = i; 1805f9790aebSLuigi Rizzo /* remember this position to be scanned later */ 1806f9790aebSLuigi Rizzo if (dst_port != NM_BDG_BROADCAST) 1807f9790aebSLuigi Rizzo dsts[num_dsts++] = d_i; 1808f9790aebSLuigi Rizzo } else { 1809f9790aebSLuigi Rizzo ft[d->bq_tail].ft_next = i; 1810f9790aebSLuigi Rizzo d->bq_tail = i; 1811f9790aebSLuigi Rizzo } 1812f9790aebSLuigi Rizzo d->bq_len += ft[i].ft_frags; 1813f9790aebSLuigi Rizzo } 1814f9790aebSLuigi Rizzo 1815f9790aebSLuigi Rizzo /* 1816f9790aebSLuigi Rizzo * Broadcast traffic goes to ring 0 on all destinations. 1817f9790aebSLuigi Rizzo * So we need to add these rings to the list of ports to scan. 1818f9790aebSLuigi Rizzo * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 1819f9790aebSLuigi Rizzo * expensive. We should keep a compact list of active destinations 1820f9790aebSLuigi Rizzo * so we could shorten this loop. 1821f9790aebSLuigi Rizzo */ 1822f9790aebSLuigi Rizzo brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 1823f9790aebSLuigi Rizzo if (brddst->bq_head != NM_FT_NULL) { 182437e3a6d3SLuigi Rizzo u_int j; 1825f9790aebSLuigi Rizzo for (j = 0; likely(j < b->bdg_active_ports); j++) { 1826f9790aebSLuigi Rizzo uint16_t d_i; 1827f9790aebSLuigi Rizzo i = b->bdg_port_index[j]; 1828f9790aebSLuigi Rizzo if (unlikely(i == me)) 1829f9790aebSLuigi Rizzo continue; 1830f9790aebSLuigi Rizzo d_i = i * NM_BDG_MAXRINGS; 1831f9790aebSLuigi Rizzo if (dst_ents[d_i].bq_head == NM_FT_NULL) 1832f9790aebSLuigi Rizzo dsts[num_dsts++] = d_i; 1833f9790aebSLuigi Rizzo } 1834f9790aebSLuigi Rizzo } 1835f9790aebSLuigi Rizzo 1836f9790aebSLuigi Rizzo ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 18374bf50f18SLuigi Rizzo /* second pass: scan destinations */ 1838f9790aebSLuigi Rizzo for (i = 0; i < num_dsts; i++) { 1839f9790aebSLuigi Rizzo struct netmap_vp_adapter *dst_na; 1840f9790aebSLuigi Rizzo struct netmap_kring *kring; 1841f9790aebSLuigi Rizzo struct netmap_ring *ring; 1842f0ea3689SLuigi Rizzo u_int dst_nr, lim, j, d_i, next, brd_next; 1843f9790aebSLuigi Rizzo u_int needed, howmany; 1844f9790aebSLuigi Rizzo int retry = netmap_txsync_retry; 1845f9790aebSLuigi Rizzo struct nm_bdg_q *d; 1846f9790aebSLuigi Rizzo uint32_t my_start = 0, lease_idx = 0; 1847f9790aebSLuigi Rizzo int nrings; 1848f0ea3689SLuigi Rizzo int virt_hdr_mismatch = 0; 1849f9790aebSLuigi Rizzo 1850f9790aebSLuigi Rizzo d_i = dsts[i]; 1851f9790aebSLuigi Rizzo ND("second pass %d port %d", i, d_i); 1852f9790aebSLuigi Rizzo d = dst_ents + d_i; 1853f9790aebSLuigi Rizzo // XXX fix the division 1854f9790aebSLuigi Rizzo dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 1855f9790aebSLuigi Rizzo /* protect from the lookup function returning an inactive 1856f9790aebSLuigi Rizzo * destination port 1857f9790aebSLuigi Rizzo */ 1858f9790aebSLuigi Rizzo if (unlikely(dst_na == NULL)) 1859f9790aebSLuigi Rizzo goto cleanup; 1860f9790aebSLuigi Rizzo if (dst_na->up.na_flags & NAF_SW_ONLY) 1861f9790aebSLuigi Rizzo goto cleanup; 1862f9790aebSLuigi Rizzo /* 1863f9790aebSLuigi Rizzo * The interface may be in !netmap mode in two cases: 1864f9790aebSLuigi Rizzo * - when na is attached but not activated yet; 1865f9790aebSLuigi Rizzo * - when na is being deactivated but is still attached. 1866f9790aebSLuigi Rizzo */ 18674bf50f18SLuigi Rizzo if (unlikely(!nm_netmap_on(&dst_na->up))) { 1868f9790aebSLuigi Rizzo ND("not in netmap mode!"); 1869f9790aebSLuigi Rizzo goto cleanup; 1870f9790aebSLuigi Rizzo } 1871f9790aebSLuigi Rizzo 1872f9790aebSLuigi Rizzo /* there is at least one either unicast or broadcast packet */ 1873f9790aebSLuigi Rizzo brd_next = brddst->bq_head; 1874f9790aebSLuigi Rizzo next = d->bq_head; 1875f9790aebSLuigi Rizzo /* we need to reserve this many slots. If fewer are 1876f9790aebSLuigi Rizzo * available, some packets will be dropped. 1877f9790aebSLuigi Rizzo * Packets may have multiple fragments, so we may not use 1878f9790aebSLuigi Rizzo * there is a chance that we may not use all of the slots 1879f9790aebSLuigi Rizzo * we have claimed, so we will need to handle the leftover 1880f9790aebSLuigi Rizzo * ones when we regain the lock. 1881f9790aebSLuigi Rizzo */ 1882f9790aebSLuigi Rizzo needed = d->bq_len + brddst->bq_len; 1883f9790aebSLuigi Rizzo 188437e3a6d3SLuigi Rizzo if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) { 1885c3e9b4dbSLuiz Otavio O Souza if (netmap_verbose) { 188637e3a6d3SLuigi Rizzo RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len, 188737e3a6d3SLuigi Rizzo dst_na->up.virt_hdr_len); 1888c3e9b4dbSLuiz Otavio O Souza } 1889f0ea3689SLuigi Rizzo /* There is a virtio-net header/offloadings mismatch between 1890f0ea3689SLuigi Rizzo * source and destination. The slower mismatch datapath will 1891f0ea3689SLuigi Rizzo * be used to cope with all the mismatches. 1892f0ea3689SLuigi Rizzo */ 1893f0ea3689SLuigi Rizzo virt_hdr_mismatch = 1; 1894f0ea3689SLuigi Rizzo if (dst_na->mfs < na->mfs) { 1895f0ea3689SLuigi Rizzo /* We may need to do segmentation offloadings, and so 1896f0ea3689SLuigi Rizzo * we may need a number of destination slots greater 1897f0ea3689SLuigi Rizzo * than the number of input slots ('needed'). 1898f0ea3689SLuigi Rizzo * We look for the smallest integer 'x' which satisfies: 1899f0ea3689SLuigi Rizzo * needed * na->mfs + x * H <= x * na->mfs 1900f0ea3689SLuigi Rizzo * where 'H' is the length of the longest header that may 1901f0ea3689SLuigi Rizzo * be replicated in the segmentation process (e.g. for 1902f0ea3689SLuigi Rizzo * TCPv4 we must account for ethernet header, IP header 1903f0ea3689SLuigi Rizzo * and TCPv4 header). 1904f0ea3689SLuigi Rizzo */ 1905f0ea3689SLuigi Rizzo needed = (needed * na->mfs) / 1906f0ea3689SLuigi Rizzo (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; 1907f0ea3689SLuigi Rizzo ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); 1908f0ea3689SLuigi Rizzo } 1909f0ea3689SLuigi Rizzo } 1910f0ea3689SLuigi Rizzo 1911f9790aebSLuigi Rizzo ND(5, "pass 2 dst %d is %x %s", 1912f9790aebSLuigi Rizzo i, d_i, is_vp ? "virtual" : "nic/host"); 1913f9790aebSLuigi Rizzo dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1914f9790aebSLuigi Rizzo nrings = dst_na->up.num_rx_rings; 1915f9790aebSLuigi Rizzo if (dst_nr >= nrings) 1916f9790aebSLuigi Rizzo dst_nr = dst_nr % nrings; 1917f9790aebSLuigi Rizzo kring = &dst_na->up.rx_rings[dst_nr]; 1918f9790aebSLuigi Rizzo ring = kring->ring; 1919f9790aebSLuigi Rizzo lim = kring->nkr_num_slots - 1; 1920f9790aebSLuigi Rizzo 1921f9790aebSLuigi Rizzo retry: 1922f9790aebSLuigi Rizzo 1923f0ea3689SLuigi Rizzo if (dst_na->retry && retry) { 1924f0ea3689SLuigi Rizzo /* try to get some free slot from the previous run */ 1925847bf383SLuigi Rizzo kring->nm_notify(kring, 0); 19264bf50f18SLuigi Rizzo /* actually useful only for bwraps, since there 19274bf50f18SLuigi Rizzo * the notify will trigger a txsync on the hwna. VALE ports 19284bf50f18SLuigi Rizzo * have dst_na->retry == 0 19294bf50f18SLuigi Rizzo */ 1930f0ea3689SLuigi Rizzo } 1931f9790aebSLuigi Rizzo /* reserve the buffers in the queue and an entry 1932f9790aebSLuigi Rizzo * to report completion, and drop lock. 1933f9790aebSLuigi Rizzo * XXX this might become a helper function. 1934f9790aebSLuigi Rizzo */ 1935f9790aebSLuigi Rizzo mtx_lock(&kring->q_lock); 1936f9790aebSLuigi Rizzo if (kring->nkr_stopped) { 1937f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1938f9790aebSLuigi Rizzo goto cleanup; 1939f9790aebSLuigi Rizzo } 1940f9790aebSLuigi Rizzo my_start = j = kring->nkr_hwlease; 1941f9790aebSLuigi Rizzo howmany = nm_kr_space(kring, 1); 1942f9790aebSLuigi Rizzo if (needed < howmany) 1943f9790aebSLuigi Rizzo howmany = needed; 1944f9790aebSLuigi Rizzo lease_idx = nm_kr_lease(kring, howmany, 1); 1945f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1946f9790aebSLuigi Rizzo 1947f9790aebSLuigi Rizzo /* only retry if we need more than available slots */ 1948f9790aebSLuigi Rizzo if (retry && needed <= howmany) 1949f9790aebSLuigi Rizzo retry = 0; 1950f9790aebSLuigi Rizzo 1951f9790aebSLuigi Rizzo /* copy to the destination queue */ 1952f9790aebSLuigi Rizzo while (howmany > 0) { 1953f9790aebSLuigi Rizzo struct netmap_slot *slot; 1954f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft_p, *ft_end; 1955f9790aebSLuigi Rizzo u_int cnt; 1956f9790aebSLuigi Rizzo 1957f9790aebSLuigi Rizzo /* find the queue from which we pick next packet. 1958f9790aebSLuigi Rizzo * NM_FT_NULL is always higher than valid indexes 1959f9790aebSLuigi Rizzo * so we never dereference it if the other list 1960f9790aebSLuigi Rizzo * has packets (and if both are empty we never 1961f9790aebSLuigi Rizzo * get here). 1962f9790aebSLuigi Rizzo */ 1963f9790aebSLuigi Rizzo if (next < brd_next) { 1964f9790aebSLuigi Rizzo ft_p = ft + next; 1965f9790aebSLuigi Rizzo next = ft_p->ft_next; 1966f9790aebSLuigi Rizzo } else { /* insert broadcast */ 1967f9790aebSLuigi Rizzo ft_p = ft + brd_next; 1968f9790aebSLuigi Rizzo brd_next = ft_p->ft_next; 1969f9790aebSLuigi Rizzo } 1970f9790aebSLuigi Rizzo cnt = ft_p->ft_frags; // cnt > 0 1971f9790aebSLuigi Rizzo if (unlikely(cnt > howmany)) 1972f9790aebSLuigi Rizzo break; /* no more space */ 1973f9790aebSLuigi Rizzo if (netmap_verbose && cnt > 1) 1974f9790aebSLuigi Rizzo RD(5, "rx %d frags to %d", cnt, j); 1975f9790aebSLuigi Rizzo ft_end = ft_p + cnt; 1976f0ea3689SLuigi Rizzo if (unlikely(virt_hdr_mismatch)) { 1977f0ea3689SLuigi Rizzo bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany); 1978f0ea3689SLuigi Rizzo } else { 1979f0ea3689SLuigi Rizzo howmany -= cnt; 1980f9790aebSLuigi Rizzo do { 1981f9790aebSLuigi Rizzo char *dst, *src = ft_p->ft_buf; 1982f9790aebSLuigi Rizzo size_t copy_len = ft_p->ft_len, dst_len = copy_len; 1983f9790aebSLuigi Rizzo 1984f9790aebSLuigi Rizzo slot = &ring->slot[j]; 19854bf50f18SLuigi Rizzo dst = NMB(&dst_na->up, slot); 1986f9790aebSLuigi Rizzo 198717885a7bSLuigi Rizzo ND("send [%d] %d(%d) bytes at %s:%d", 198817885a7bSLuigi Rizzo i, (int)copy_len, (int)dst_len, 198917885a7bSLuigi Rizzo NM_IFPNAME(dst_ifp), j); 1990f9790aebSLuigi Rizzo /* round to a multiple of 64 */ 1991f9790aebSLuigi Rizzo copy_len = (copy_len + 63) & ~63; 1992f9790aebSLuigi Rizzo 19934bf50f18SLuigi Rizzo if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) || 19944bf50f18SLuigi Rizzo copy_len > NETMAP_BUF_SIZE(&na->up))) { 1995e31c6ec7SLuigi Rizzo RD(5, "invalid len %d, down to 64", (int)copy_len); 1996e31c6ec7SLuigi Rizzo copy_len = dst_len = 64; // XXX 1997e31c6ec7SLuigi Rizzo } 1998f9790aebSLuigi Rizzo if (ft_p->ft_flags & NS_INDIRECT) { 1999f9790aebSLuigi Rizzo if (copyin(src, dst, copy_len)) { 2000f9790aebSLuigi Rizzo // invalid user pointer, pretend len is 0 2001f9790aebSLuigi Rizzo dst_len = 0; 2002f9790aebSLuigi Rizzo } 2003f9790aebSLuigi Rizzo } else { 2004f9790aebSLuigi Rizzo //memcpy(dst, src, copy_len); 2005f9790aebSLuigi Rizzo pkt_copy(src, dst, (int)copy_len); 2006f9790aebSLuigi Rizzo } 2007f9790aebSLuigi Rizzo slot->len = dst_len; 2008f9790aebSLuigi Rizzo slot->flags = (cnt << 8)| NS_MOREFRAG; 2009f9790aebSLuigi Rizzo j = nm_next(j, lim); 2010f0ea3689SLuigi Rizzo needed--; 2011f9790aebSLuigi Rizzo ft_p++; 2012f9790aebSLuigi Rizzo } while (ft_p != ft_end); 2013f9790aebSLuigi Rizzo slot->flags = (cnt << 8); /* clear flag on last entry */ 2014f0ea3689SLuigi Rizzo } 2015f9790aebSLuigi Rizzo /* are we done ? */ 2016f9790aebSLuigi Rizzo if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 2017f9790aebSLuigi Rizzo break; 2018f9790aebSLuigi Rizzo } 2019f9790aebSLuigi Rizzo { 2020f9790aebSLuigi Rizzo /* current position */ 2021f9790aebSLuigi Rizzo uint32_t *p = kring->nkr_leases; /* shorthand */ 2022f9790aebSLuigi Rizzo uint32_t update_pos; 2023f9790aebSLuigi Rizzo int still_locked = 1; 2024f9790aebSLuigi Rizzo 2025f9790aebSLuigi Rizzo mtx_lock(&kring->q_lock); 2026f9790aebSLuigi Rizzo if (unlikely(howmany > 0)) { 2027f9790aebSLuigi Rizzo /* not used all bufs. If i am the last one 2028f9790aebSLuigi Rizzo * i can recover the slots, otherwise must 2029f9790aebSLuigi Rizzo * fill them with 0 to mark empty packets. 2030f9790aebSLuigi Rizzo */ 2031f9790aebSLuigi Rizzo ND("leftover %d bufs", howmany); 2032f9790aebSLuigi Rizzo if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 2033f9790aebSLuigi Rizzo /* yes i am the last one */ 2034f9790aebSLuigi Rizzo ND("roll back nkr_hwlease to %d", j); 2035f9790aebSLuigi Rizzo kring->nkr_hwlease = j; 2036f9790aebSLuigi Rizzo } else { 2037f9790aebSLuigi Rizzo while (howmany-- > 0) { 2038f9790aebSLuigi Rizzo ring->slot[j].len = 0; 2039f9790aebSLuigi Rizzo ring->slot[j].flags = 0; 2040f9790aebSLuigi Rizzo j = nm_next(j, lim); 2041f9790aebSLuigi Rizzo } 2042f9790aebSLuigi Rizzo } 2043f9790aebSLuigi Rizzo } 2044f9790aebSLuigi Rizzo p[lease_idx] = j; /* report I am done */ 2045f9790aebSLuigi Rizzo 204617885a7bSLuigi Rizzo update_pos = kring->nr_hwtail; 2047f9790aebSLuigi Rizzo 2048f9790aebSLuigi Rizzo if (my_start == update_pos) { 2049f9790aebSLuigi Rizzo /* all slots before my_start have been reported, 2050f9790aebSLuigi Rizzo * so scan subsequent leases to see if other ranges 2051f9790aebSLuigi Rizzo * have been completed, and to a selwakeup or txsync. 2052f9790aebSLuigi Rizzo */ 2053f9790aebSLuigi Rizzo while (lease_idx != kring->nkr_lease_idx && 2054f9790aebSLuigi Rizzo p[lease_idx] != NR_NOSLOT) { 2055f9790aebSLuigi Rizzo j = p[lease_idx]; 2056f9790aebSLuigi Rizzo p[lease_idx] = NR_NOSLOT; 2057f9790aebSLuigi Rizzo lease_idx = nm_next(lease_idx, lim); 2058f9790aebSLuigi Rizzo } 2059f9790aebSLuigi Rizzo /* j is the new 'write' position. j != my_start 2060f9790aebSLuigi Rizzo * means there are new buffers to report 2061f9790aebSLuigi Rizzo */ 2062f9790aebSLuigi Rizzo if (likely(j != my_start)) { 206317885a7bSLuigi Rizzo kring->nr_hwtail = j; 2064f9790aebSLuigi Rizzo still_locked = 0; 2065f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 2066847bf383SLuigi Rizzo kring->nm_notify(kring, 0); 20674bf50f18SLuigi Rizzo /* this is netmap_notify for VALE ports and 20684bf50f18SLuigi Rizzo * netmap_bwrap_notify for bwrap. The latter will 20694bf50f18SLuigi Rizzo * trigger a txsync on the underlying hwna 20704bf50f18SLuigi Rizzo */ 20714bf50f18SLuigi Rizzo if (dst_na->retry && retry--) { 20724bf50f18SLuigi Rizzo /* XXX this is going to call nm_notify again. 20734bf50f18SLuigi Rizzo * Only useful for bwrap in virtual machines 20744bf50f18SLuigi Rizzo */ 2075f9790aebSLuigi Rizzo goto retry; 2076f9790aebSLuigi Rizzo } 2077f9790aebSLuigi Rizzo } 20784bf50f18SLuigi Rizzo } 2079f9790aebSLuigi Rizzo if (still_locked) 2080f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 2081f9790aebSLuigi Rizzo } 2082f9790aebSLuigi Rizzo cleanup: 2083f9790aebSLuigi Rizzo d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 2084f9790aebSLuigi Rizzo d->bq_len = 0; 2085f9790aebSLuigi Rizzo } 2086f9790aebSLuigi Rizzo brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 2087f9790aebSLuigi Rizzo brddst->bq_len = 0; 2088f9790aebSLuigi Rizzo return 0; 2089f9790aebSLuigi Rizzo } 2090f9790aebSLuigi Rizzo 20914bf50f18SLuigi Rizzo /* nm_txsync callback for VALE ports */ 2092f9790aebSLuigi Rizzo static int 20934bf50f18SLuigi Rizzo netmap_vp_txsync(struct netmap_kring *kring, int flags) 2094f9790aebSLuigi Rizzo { 20954bf50f18SLuigi Rizzo struct netmap_vp_adapter *na = 20964bf50f18SLuigi Rizzo (struct netmap_vp_adapter *)kring->na; 209717885a7bSLuigi Rizzo u_int done; 209817885a7bSLuigi Rizzo u_int const lim = kring->nkr_num_slots - 1; 2099847bf383SLuigi Rizzo u_int const head = kring->rhead; 2100f9790aebSLuigi Rizzo 2101f9790aebSLuigi Rizzo if (bridge_batch <= 0) { /* testing only */ 2102847bf383SLuigi Rizzo done = head; // used all 2103f9790aebSLuigi Rizzo goto done; 2104f9790aebSLuigi Rizzo } 21054bf50f18SLuigi Rizzo if (!na->na_bdg) { 2106847bf383SLuigi Rizzo done = head; 21074bf50f18SLuigi Rizzo goto done; 21084bf50f18SLuigi Rizzo } 2109f9790aebSLuigi Rizzo if (bridge_batch > NM_BDG_BATCH) 2110f9790aebSLuigi Rizzo bridge_batch = NM_BDG_BATCH; 2111f9790aebSLuigi Rizzo 2112847bf383SLuigi Rizzo done = nm_bdg_preflush(kring, head); 2113f9790aebSLuigi Rizzo done: 2114847bf383SLuigi Rizzo if (done != head) 2115847bf383SLuigi Rizzo D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail); 211617885a7bSLuigi Rizzo /* 211717885a7bSLuigi Rizzo * packets between 'done' and 'cur' are left unsent. 211817885a7bSLuigi Rizzo */ 211917885a7bSLuigi Rizzo kring->nr_hwcur = done; 212017885a7bSLuigi Rizzo kring->nr_hwtail = nm_prev(done, lim); 2121f9790aebSLuigi Rizzo if (netmap_verbose) 21224bf50f18SLuigi Rizzo D("%s ring %d flags %d", na->up.name, kring->ring_id, flags); 2123f9790aebSLuigi Rizzo return 0; 2124f9790aebSLuigi Rizzo } 2125f9790aebSLuigi Rizzo 2126f9790aebSLuigi Rizzo 21274bf50f18SLuigi Rizzo /* rxsync code used by VALE ports nm_rxsync callback and also 21284bf50f18SLuigi Rizzo * internally by the brwap 2129f9790aebSLuigi Rizzo */ 2130f9790aebSLuigi Rizzo static int 21314bf50f18SLuigi Rizzo netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) 2132f9790aebSLuigi Rizzo { 21334bf50f18SLuigi Rizzo struct netmap_adapter *na = kring->na; 213417885a7bSLuigi Rizzo struct netmap_ring *ring = kring->ring; 213517885a7bSLuigi Rizzo u_int nm_i, lim = kring->nkr_num_slots - 1; 2136847bf383SLuigi Rizzo u_int head = kring->rhead; 213717885a7bSLuigi Rizzo int n; 213817885a7bSLuigi Rizzo 213917885a7bSLuigi Rizzo if (head > lim) { 214017885a7bSLuigi Rizzo D("ouch dangerous reset!!!"); 214117885a7bSLuigi Rizzo n = netmap_ring_reinit(kring); 214217885a7bSLuigi Rizzo goto done; 214317885a7bSLuigi Rizzo } 214417885a7bSLuigi Rizzo 214517885a7bSLuigi Rizzo /* First part, import newly received packets. */ 214617885a7bSLuigi Rizzo /* actually nothing to do here, they are already in the kring */ 214717885a7bSLuigi Rizzo 214817885a7bSLuigi Rizzo /* Second part, skip past packets that userspace has released. */ 214917885a7bSLuigi Rizzo nm_i = kring->nr_hwcur; 215017885a7bSLuigi Rizzo if (nm_i != head) { 215117885a7bSLuigi Rizzo /* consistency check, but nothing really important here */ 215217885a7bSLuigi Rizzo for (n = 0; likely(nm_i != head); n++) { 215317885a7bSLuigi Rizzo struct netmap_slot *slot = &ring->slot[nm_i]; 21544bf50f18SLuigi Rizzo void *addr = NMB(na, slot); 215517885a7bSLuigi Rizzo 21564bf50f18SLuigi Rizzo if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ 215717885a7bSLuigi Rizzo D("bad buffer index %d, ignore ?", 215817885a7bSLuigi Rizzo slot->buf_idx); 215917885a7bSLuigi Rizzo } 216017885a7bSLuigi Rizzo slot->flags &= ~NS_BUF_CHANGED; 216117885a7bSLuigi Rizzo nm_i = nm_next(nm_i, lim); 216217885a7bSLuigi Rizzo } 216317885a7bSLuigi Rizzo kring->nr_hwcur = head; 216417885a7bSLuigi Rizzo } 216517885a7bSLuigi Rizzo 216617885a7bSLuigi Rizzo n = 0; 216717885a7bSLuigi Rizzo done: 216817885a7bSLuigi Rizzo return n; 216917885a7bSLuigi Rizzo } 2170f9790aebSLuigi Rizzo 2171f9790aebSLuigi Rizzo /* 21724bf50f18SLuigi Rizzo * nm_rxsync callback for VALE ports 2173f9790aebSLuigi Rizzo * user process reading from a VALE switch. 2174f9790aebSLuigi Rizzo * Already protected against concurrent calls from userspace, 2175f9790aebSLuigi Rizzo * but we must acquire the queue's lock to protect against 2176f9790aebSLuigi Rizzo * writers on the same queue. 2177f9790aebSLuigi Rizzo */ 2178f9790aebSLuigi Rizzo static int 21794bf50f18SLuigi Rizzo netmap_vp_rxsync(struct netmap_kring *kring, int flags) 2180f9790aebSLuigi Rizzo { 2181f9790aebSLuigi Rizzo int n; 2182f9790aebSLuigi Rizzo 2183f9790aebSLuigi Rizzo mtx_lock(&kring->q_lock); 21844bf50f18SLuigi Rizzo n = netmap_vp_rxsync_locked(kring, flags); 2185f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 2186f9790aebSLuigi Rizzo return n; 2187f9790aebSLuigi Rizzo } 2188f9790aebSLuigi Rizzo 218917885a7bSLuigi Rizzo 21904bf50f18SLuigi Rizzo /* nm_bdg_attach callback for VALE ports 21914bf50f18SLuigi Rizzo * The na_vp port is this same netmap_adapter. There is no host port. 21924bf50f18SLuigi Rizzo */ 2193f9790aebSLuigi Rizzo static int 21944bf50f18SLuigi Rizzo netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) 21954bf50f18SLuigi Rizzo { 21964bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 21974bf50f18SLuigi Rizzo 21984bf50f18SLuigi Rizzo if (vpna->na_bdg) 21994bf50f18SLuigi Rizzo return EBUSY; 22004bf50f18SLuigi Rizzo na->na_vp = vpna; 22014bf50f18SLuigi Rizzo strncpy(na->name, name, sizeof(na->name)); 22024bf50f18SLuigi Rizzo na->na_hostvp = NULL; 22034bf50f18SLuigi Rizzo return 0; 22044bf50f18SLuigi Rizzo } 22054bf50f18SLuigi Rizzo 22064bf50f18SLuigi Rizzo /* create a netmap_vp_adapter that describes a VALE port. 22074bf50f18SLuigi Rizzo * Only persistent VALE ports have a non-null ifp. 22084bf50f18SLuigi Rizzo */ 22094bf50f18SLuigi Rizzo static int 2210c3e9b4dbSLuiz Otavio O Souza netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, 2211c3e9b4dbSLuiz Otavio O Souza struct netmap_mem_d *nmd, 2212c3e9b4dbSLuiz Otavio O Souza struct netmap_vp_adapter **ret) 2213f9790aebSLuigi Rizzo { 2214f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna; 2215f9790aebSLuigi Rizzo struct netmap_adapter *na; 2216c3e9b4dbSLuiz Otavio O Souza int error = 0; 2217f0ea3689SLuigi Rizzo u_int npipes = 0; 2218f9790aebSLuigi Rizzo 2219c3e9b4dbSLuiz Otavio O Souza vpna = nm_os_malloc(sizeof(*vpna)); 2220f9790aebSLuigi Rizzo if (vpna == NULL) 2221f9790aebSLuigi Rizzo return ENOMEM; 2222f9790aebSLuigi Rizzo 2223f9790aebSLuigi Rizzo na = &vpna->up; 2224f9790aebSLuigi Rizzo 2225f9790aebSLuigi Rizzo na->ifp = ifp; 22264bf50f18SLuigi Rizzo strncpy(na->name, nmr->nr_name, sizeof(na->name)); 2227f9790aebSLuigi Rizzo 2228f9790aebSLuigi Rizzo /* bound checking */ 2229f9790aebSLuigi Rizzo na->num_tx_rings = nmr->nr_tx_rings; 2230f9790aebSLuigi Rizzo nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 2231f9790aebSLuigi Rizzo nmr->nr_tx_rings = na->num_tx_rings; // write back 2232f9790aebSLuigi Rizzo na->num_rx_rings = nmr->nr_rx_rings; 2233f9790aebSLuigi Rizzo nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 2234f9790aebSLuigi Rizzo nmr->nr_rx_rings = na->num_rx_rings; // write back 2235f9790aebSLuigi Rizzo nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 2236f9790aebSLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 2237f9790aebSLuigi Rizzo na->num_tx_desc = nmr->nr_tx_slots; 2238f9790aebSLuigi Rizzo nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 2239f9790aebSLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 2240f0ea3689SLuigi Rizzo /* validate number of pipes. We want at least 1, 2241f0ea3689SLuigi Rizzo * but probably can do with some more. 2242f0ea3689SLuigi Rizzo * So let's use 2 as default (when 0 is supplied) 2243f0ea3689SLuigi Rizzo */ 2244f0ea3689SLuigi Rizzo npipes = nmr->nr_arg1; 2245f0ea3689SLuigi Rizzo nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); 2246f0ea3689SLuigi Rizzo nmr->nr_arg1 = npipes; /* write back */ 2247f0ea3689SLuigi Rizzo /* validate extra bufs */ 2248f0ea3689SLuigi Rizzo nm_bound_var(&nmr->nr_arg3, 0, 0, 2249f0ea3689SLuigi Rizzo 128*NM_BDG_MAXSLOTS, NULL); 2250f9790aebSLuigi Rizzo na->num_rx_desc = nmr->nr_rx_slots; 2251f0ea3689SLuigi Rizzo vpna->mfs = 1514; 2252847bf383SLuigi Rizzo vpna->last_smac = ~0llu; 2253f0ea3689SLuigi Rizzo /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? 2254f0ea3689SLuigi Rizzo vpna->mfs = netmap_buf_size; */ 2255f0ea3689SLuigi Rizzo if (netmap_verbose) 2256f0ea3689SLuigi Rizzo D("max frame size %u", vpna->mfs); 2257f9790aebSLuigi Rizzo 2258847bf383SLuigi Rizzo na->na_flags |= NAF_BDG_MAYSLEEP; 225910b8ef3dSLuigi Rizzo /* persistent VALE ports look like hw devices 226010b8ef3dSLuigi Rizzo * with a native netmap adapter 226110b8ef3dSLuigi Rizzo */ 226210b8ef3dSLuigi Rizzo if (ifp) 226310b8ef3dSLuigi Rizzo na->na_flags |= NAF_NATIVE; 22644bf50f18SLuigi Rizzo na->nm_txsync = netmap_vp_txsync; 22654bf50f18SLuigi Rizzo na->nm_rxsync = netmap_vp_rxsync; 22664bf50f18SLuigi Rizzo na->nm_register = netmap_vp_reg; 2267f9790aebSLuigi Rizzo na->nm_krings_create = netmap_vp_krings_create; 2268f9790aebSLuigi Rizzo na->nm_krings_delete = netmap_vp_krings_delete; 22694bf50f18SLuigi Rizzo na->nm_dtor = netmap_vp_dtor; 2270c3e9b4dbSLuiz Otavio O Souza D("nr_arg2 %d", nmr->nr_arg2); 2271c3e9b4dbSLuiz Otavio O Souza na->nm_mem = nmd ? 2272c3e9b4dbSLuiz Otavio O Souza netmap_mem_get(nmd): 2273c3e9b4dbSLuiz Otavio O Souza netmap_mem_private_new( 2274f9790aebSLuigi Rizzo na->num_tx_rings, na->num_tx_desc, 2275f0ea3689SLuigi Rizzo na->num_rx_rings, na->num_rx_desc, 2276f0ea3689SLuigi Rizzo nmr->nr_arg3, npipes, &error); 2277f0ea3689SLuigi Rizzo if (na->nm_mem == NULL) 2278f0ea3689SLuigi Rizzo goto err; 22794bf50f18SLuigi Rizzo na->nm_bdg_attach = netmap_vp_bdg_attach; 2280f9790aebSLuigi Rizzo /* other nmd fields are set in the common routine */ 2281f9790aebSLuigi Rizzo error = netmap_attach_common(na); 2282f0ea3689SLuigi Rizzo if (error) 2283f0ea3689SLuigi Rizzo goto err; 22844bf50f18SLuigi Rizzo *ret = vpna; 2285f0ea3689SLuigi Rizzo return 0; 2286f0ea3689SLuigi Rizzo 2287f0ea3689SLuigi Rizzo err: 2288f0ea3689SLuigi Rizzo if (na->nm_mem != NULL) 2289c3e9b4dbSLuiz Otavio O Souza netmap_mem_put(na->nm_mem); 2290c3e9b4dbSLuiz Otavio O Souza nm_os_free(vpna); 2291f9790aebSLuigi Rizzo return error; 2292f9790aebSLuigi Rizzo } 2293f9790aebSLuigi Rizzo 22944bf50f18SLuigi Rizzo /* Bridge wrapper code (bwrap). 22954bf50f18SLuigi Rizzo * This is used to connect a non-VALE-port netmap_adapter (hwna) to a 22964bf50f18SLuigi Rizzo * VALE switch. 22974bf50f18SLuigi Rizzo * The main task is to swap the meaning of tx and rx rings to match the 22984bf50f18SLuigi Rizzo * expectations of the VALE switch code (see nm_bdg_flush). 22994bf50f18SLuigi Rizzo * 23004bf50f18SLuigi Rizzo * The bwrap works by interposing a netmap_bwrap_adapter between the 23014bf50f18SLuigi Rizzo * rest of the system and the hwna. The netmap_bwrap_adapter looks like 23024bf50f18SLuigi Rizzo * a netmap_vp_adapter to the rest the system, but, internally, it 23034bf50f18SLuigi Rizzo * translates all callbacks to what the hwna expects. 23044bf50f18SLuigi Rizzo * 23054bf50f18SLuigi Rizzo * Note that we have to intercept callbacks coming from two sides: 23064bf50f18SLuigi Rizzo * 23074bf50f18SLuigi Rizzo * - callbacks coming from the netmap module are intercepted by 23084bf50f18SLuigi Rizzo * passing around the netmap_bwrap_adapter instead of the hwna 23094bf50f18SLuigi Rizzo * 23104bf50f18SLuigi Rizzo * - callbacks coming from outside of the netmap module only know 23114bf50f18SLuigi Rizzo * about the hwna. This, however, only happens in interrupt 23124bf50f18SLuigi Rizzo * handlers, where only the hwna->nm_notify callback is called. 23134bf50f18SLuigi Rizzo * What the bwrap does is to overwrite the hwna->nm_notify callback 23144bf50f18SLuigi Rizzo * with its own netmap_bwrap_intr_notify. 23154bf50f18SLuigi Rizzo * XXX This assumes that the hwna->nm_notify callback was the 23164bf50f18SLuigi Rizzo * standard netmap_notify(), as it is the case for nic adapters. 23174bf50f18SLuigi Rizzo * Any additional action performed by hwna->nm_notify will not be 23184bf50f18SLuigi Rizzo * performed by netmap_bwrap_intr_notify. 23194bf50f18SLuigi Rizzo * 23204bf50f18SLuigi Rizzo * Additionally, the bwrap can optionally attach the host rings pair 23214bf50f18SLuigi Rizzo * of the wrapped adapter to a different port of the switch. 23224bf50f18SLuigi Rizzo */ 23234bf50f18SLuigi Rizzo 232417885a7bSLuigi Rizzo 2325f9790aebSLuigi Rizzo static void 2326f9790aebSLuigi Rizzo netmap_bwrap_dtor(struct netmap_adapter *na) 2327f9790aebSLuigi Rizzo { 2328f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 2329f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 233037e3a6d3SLuigi Rizzo struct nm_bridge *b = bna->up.na_bdg, 233137e3a6d3SLuigi Rizzo *bh = bna->host.na_bdg; 233237e3a6d3SLuigi Rizzo 2333c3e9b4dbSLuiz Otavio O Souza netmap_mem_put(bna->host.up.nm_mem); 2334c3e9b4dbSLuiz Otavio O Souza 233537e3a6d3SLuigi Rizzo if (b) { 233637e3a6d3SLuigi Rizzo netmap_bdg_detach_common(b, bna->up.bdg_port, 233737e3a6d3SLuigi Rizzo (bh ? bna->host.bdg_port : -1)); 233837e3a6d3SLuigi Rizzo } 2339f9790aebSLuigi Rizzo 2340f9790aebSLuigi Rizzo ND("na %p", na); 2341f9790aebSLuigi Rizzo na->ifp = NULL; 23424bf50f18SLuigi Rizzo bna->host.up.ifp = NULL; 23434bf50f18SLuigi Rizzo hwna->na_private = NULL; 23444bf50f18SLuigi Rizzo hwna->na_vp = hwna->na_hostvp = NULL; 23454bf50f18SLuigi Rizzo hwna->na_flags &= ~NAF_BUSY; 23464bf50f18SLuigi Rizzo netmap_adapter_put(hwna); 2347f9790aebSLuigi Rizzo 2348f9790aebSLuigi Rizzo } 2349f9790aebSLuigi Rizzo 235017885a7bSLuigi Rizzo 2351f9790aebSLuigi Rizzo /* 235217885a7bSLuigi Rizzo * Intr callback for NICs connected to a bridge. 235317885a7bSLuigi Rizzo * Simply ignore tx interrupts (maybe we could try to recover space ?) 235417885a7bSLuigi Rizzo * and pass received packets from nic to the bridge. 235517885a7bSLuigi Rizzo * 2356f9790aebSLuigi Rizzo * XXX TODO check locking: this is called from the interrupt 2357f9790aebSLuigi Rizzo * handler so we should make sure that the interface is not 2358f9790aebSLuigi Rizzo * disconnected while passing down an interrupt. 2359f9790aebSLuigi Rizzo * 236017885a7bSLuigi Rizzo * Note, no user process can access this NIC or the host stack. 236117885a7bSLuigi Rizzo * The only part of the ring that is significant are the slots, 236217885a7bSLuigi Rizzo * and head/cur/tail are set from the kring as needed 236317885a7bSLuigi Rizzo * (part as a receive ring, part as a transmit ring). 236417885a7bSLuigi Rizzo * 236517885a7bSLuigi Rizzo * callback that overwrites the hwna notify callback. 236637e3a6d3SLuigi Rizzo * Packets come from the outside or from the host stack and are put on an 236737e3a6d3SLuigi Rizzo * hwna rx ring. 2368f9790aebSLuigi Rizzo * The bridge wrapper then sends the packets through the bridge. 2369f9790aebSLuigi Rizzo */ 2370f9790aebSLuigi Rizzo static int 2371847bf383SLuigi Rizzo netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) 2372f9790aebSLuigi Rizzo { 2373847bf383SLuigi Rizzo struct netmap_adapter *na = kring->na; 2374f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = na->na_private; 2375847bf383SLuigi Rizzo struct netmap_kring *bkring; 2376f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna = &bna->up; 2377847bf383SLuigi Rizzo u_int ring_nr = kring->ring_id; 237837e3a6d3SLuigi Rizzo int ret = NM_IRQ_COMPLETED; 237937e3a6d3SLuigi Rizzo int error; 2380f9790aebSLuigi Rizzo 238117885a7bSLuigi Rizzo if (netmap_verbose) 2382847bf383SLuigi Rizzo D("%s %s 0x%x", na->name, kring->name, flags); 2383f9790aebSLuigi Rizzo 2384847bf383SLuigi Rizzo bkring = &vpna->up.tx_rings[ring_nr]; 2385f9790aebSLuigi Rizzo 2386f9790aebSLuigi Rizzo /* make sure the ring is not disabled */ 238737e3a6d3SLuigi Rizzo if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { 238837e3a6d3SLuigi Rizzo return EIO; 238937e3a6d3SLuigi Rizzo } 2390f9790aebSLuigi Rizzo 239117885a7bSLuigi Rizzo if (netmap_verbose) 2392847bf383SLuigi Rizzo D("%s head %d cur %d tail %d", na->name, 239317885a7bSLuigi Rizzo kring->rhead, kring->rcur, kring->rtail); 239417885a7bSLuigi Rizzo 2395847bf383SLuigi Rizzo /* simulate a user wakeup on the rx ring 2396847bf383SLuigi Rizzo * fetch packets that have arrived. 2397f9790aebSLuigi Rizzo */ 2398f0ea3689SLuigi Rizzo error = kring->nm_sync(kring, 0); 2399f9790aebSLuigi Rizzo if (error) 2400f9790aebSLuigi Rizzo goto put_out; 240137e3a6d3SLuigi Rizzo if (kring->nr_hwcur == kring->nr_hwtail) { 240237e3a6d3SLuigi Rizzo if (netmap_verbose) 2403f9790aebSLuigi Rizzo D("how strange, interrupt with no packets on %s", 24044bf50f18SLuigi Rizzo na->name); 2405f9790aebSLuigi Rizzo goto put_out; 2406f9790aebSLuigi Rizzo } 240717885a7bSLuigi Rizzo 2408847bf383SLuigi Rizzo /* new packets are kring->rcur to kring->nr_hwtail, and the bkring 2409847bf383SLuigi Rizzo * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail 241017885a7bSLuigi Rizzo * to push all packets out. 241117885a7bSLuigi Rizzo */ 2412847bf383SLuigi Rizzo bkring->rhead = bkring->rcur = kring->nr_hwtail; 241317885a7bSLuigi Rizzo 24144bf50f18SLuigi Rizzo netmap_vp_txsync(bkring, flags); 2415f9790aebSLuigi Rizzo 241617885a7bSLuigi Rizzo /* mark all buffers as released on this ring */ 2417847bf383SLuigi Rizzo kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; 241817885a7bSLuigi Rizzo /* another call to actually release the buffers */ 2419f0ea3689SLuigi Rizzo error = kring->nm_sync(kring, 0); 2420f9790aebSLuigi Rizzo 242137e3a6d3SLuigi Rizzo /* The second rxsync may have further advanced hwtail. If this happens, 242237e3a6d3SLuigi Rizzo * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ 242337e3a6d3SLuigi Rizzo if (kring->rcur != kring->nr_hwtail) { 242437e3a6d3SLuigi Rizzo ret = NM_IRQ_RESCHED; 242537e3a6d3SLuigi Rizzo } 2426f9790aebSLuigi Rizzo put_out: 2427f9790aebSLuigi Rizzo nm_kr_put(kring); 242837e3a6d3SLuigi Rizzo 242937e3a6d3SLuigi Rizzo return error ? error : ret; 2430f9790aebSLuigi Rizzo } 2431f9790aebSLuigi Rizzo 243217885a7bSLuigi Rizzo 24334bf50f18SLuigi Rizzo /* nm_register callback for bwrap */ 2434f9790aebSLuigi Rizzo static int 243537e3a6d3SLuigi Rizzo netmap_bwrap_reg(struct netmap_adapter *na, int onoff) 2436f9790aebSLuigi Rizzo { 2437f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2438f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2439f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2440f9790aebSLuigi Rizzo struct netmap_vp_adapter *hostna = &bna->host; 244137e3a6d3SLuigi Rizzo int error, i; 2442847bf383SLuigi Rizzo enum txrx t; 2443f9790aebSLuigi Rizzo 24444bf50f18SLuigi Rizzo ND("%s %s", na->name, onoff ? "on" : "off"); 2445f9790aebSLuigi Rizzo 2446f9790aebSLuigi Rizzo if (onoff) { 24474bf50f18SLuigi Rizzo /* netmap_do_regif has been called on the bwrap na. 24484bf50f18SLuigi Rizzo * We need to pass the information about the 24494bf50f18SLuigi Rizzo * memory allocator down to the hwna before 24504bf50f18SLuigi Rizzo * putting it in netmap mode 24514bf50f18SLuigi Rizzo */ 2452f9790aebSLuigi Rizzo hwna->na_lut = na->na_lut; 2453f9790aebSLuigi Rizzo 2454f9790aebSLuigi Rizzo if (hostna->na_bdg) { 24554bf50f18SLuigi Rizzo /* if the host rings have been attached to switch, 24564bf50f18SLuigi Rizzo * we need to copy the memory allocator information 24574bf50f18SLuigi Rizzo * in the hostna also 24584bf50f18SLuigi Rizzo */ 2459f9790aebSLuigi Rizzo hostna->up.na_lut = na->na_lut; 2460f9790aebSLuigi Rizzo } 2461f9790aebSLuigi Rizzo 24620c7ba37eSLuigi Rizzo /* cross-link the netmap rings 24630c7ba37eSLuigi Rizzo * The original number of rings comes from hwna, 24640c7ba37eSLuigi Rizzo * rx rings on one side equals tx rings on the other. 24650c7ba37eSLuigi Rizzo */ 2466847bf383SLuigi Rizzo for_rx_tx(t) { 2467847bf383SLuigi Rizzo enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 246837e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { 246937e3a6d3SLuigi Rizzo NMR(hwna, r)[i].ring = NMR(na, t)[i].ring; 2470f9790aebSLuigi Rizzo } 2471f9790aebSLuigi Rizzo } 247237e3a6d3SLuigi Rizzo 247337e3a6d3SLuigi Rizzo if (na->na_flags & NAF_HOST_RINGS) { 247437e3a6d3SLuigi Rizzo struct netmap_adapter *hna = &hostna->up; 247537e3a6d3SLuigi Rizzo /* the hostna rings are the host rings of the bwrap. 247637e3a6d3SLuigi Rizzo * The corresponding krings must point back to the 247737e3a6d3SLuigi Rizzo * hostna 247837e3a6d3SLuigi Rizzo */ 247937e3a6d3SLuigi Rizzo hna->tx_rings = &na->tx_rings[na->num_tx_rings]; 248037e3a6d3SLuigi Rizzo hna->tx_rings[0].na = hna; 248137e3a6d3SLuigi Rizzo hna->rx_rings = &na->rx_rings[na->num_rx_rings]; 248237e3a6d3SLuigi Rizzo hna->rx_rings[0].na = hna; 248337e3a6d3SLuigi Rizzo } 248437e3a6d3SLuigi Rizzo } 248537e3a6d3SLuigi Rizzo 248637e3a6d3SLuigi Rizzo /* pass down the pending ring state information */ 248737e3a6d3SLuigi Rizzo for_rx_tx(t) { 248837e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(na, t) + 1; i++) 248937e3a6d3SLuigi Rizzo NMR(hwna, t)[i].nr_pending_mode = 249037e3a6d3SLuigi Rizzo NMR(na, t)[i].nr_pending_mode; 2491f9790aebSLuigi Rizzo } 2492f9790aebSLuigi Rizzo 24934bf50f18SLuigi Rizzo /* forward the request to the hwna */ 2494f9790aebSLuigi Rizzo error = hwna->nm_register(hwna, onoff); 2495f9790aebSLuigi Rizzo if (error) 2496f9790aebSLuigi Rizzo return error; 2497f9790aebSLuigi Rizzo 249837e3a6d3SLuigi Rizzo /* copy up the current ring state information */ 249937e3a6d3SLuigi Rizzo for_rx_tx(t) { 250037e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(na, t) + 1; i++) 250137e3a6d3SLuigi Rizzo NMR(na, t)[i].nr_mode = 250237e3a6d3SLuigi Rizzo NMR(hwna, t)[i].nr_mode; 250337e3a6d3SLuigi Rizzo } 250437e3a6d3SLuigi Rizzo 25054bf50f18SLuigi Rizzo /* impersonate a netmap_vp_adapter */ 25064bf50f18SLuigi Rizzo netmap_vp_reg(na, onoff); 25074bf50f18SLuigi Rizzo if (hostna->na_bdg) 25084bf50f18SLuigi Rizzo netmap_vp_reg(&hostna->up, onoff); 2509f9790aebSLuigi Rizzo 2510f9790aebSLuigi Rizzo if (onoff) { 2511847bf383SLuigi Rizzo u_int i; 2512847bf383SLuigi Rizzo /* intercept the hwna nm_nofify callback on the hw rings */ 2513847bf383SLuigi Rizzo for (i = 0; i < hwna->num_rx_rings; i++) { 2514847bf383SLuigi Rizzo hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; 2515847bf383SLuigi Rizzo hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; 2516847bf383SLuigi Rizzo } 2517847bf383SLuigi Rizzo i = hwna->num_rx_rings; /* for safety */ 2518847bf383SLuigi Rizzo /* save the host ring notify unconditionally */ 2519847bf383SLuigi Rizzo hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; 2520847bf383SLuigi Rizzo if (hostna->na_bdg) { 2521847bf383SLuigi Rizzo /* also intercept the host ring notify */ 2522847bf383SLuigi Rizzo hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; 2523847bf383SLuigi Rizzo } 252437e3a6d3SLuigi Rizzo if (na->active_fds == 0) 252537e3a6d3SLuigi Rizzo na->na_flags |= NAF_NETMAP_ON; 2526f9790aebSLuigi Rizzo } else { 2527847bf383SLuigi Rizzo u_int i; 252837e3a6d3SLuigi Rizzo 252937e3a6d3SLuigi Rizzo if (na->active_fds == 0) 253037e3a6d3SLuigi Rizzo na->na_flags &= ~NAF_NETMAP_ON; 253137e3a6d3SLuigi Rizzo 2532847bf383SLuigi Rizzo /* reset all notify callbacks (including host ring) */ 2533847bf383SLuigi Rizzo for (i = 0; i <= hwna->num_rx_rings; i++) { 2534847bf383SLuigi Rizzo hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify; 2535847bf383SLuigi Rizzo hwna->rx_rings[i].save_notify = NULL; 2536847bf383SLuigi Rizzo } 2537847bf383SLuigi Rizzo hwna->na_lut.lut = NULL; 2538847bf383SLuigi Rizzo hwna->na_lut.objtotal = 0; 2539847bf383SLuigi Rizzo hwna->na_lut.objsize = 0; 2540f9790aebSLuigi Rizzo } 2541f9790aebSLuigi Rizzo 2542f9790aebSLuigi Rizzo return 0; 2543f9790aebSLuigi Rizzo } 2544f9790aebSLuigi Rizzo 25454bf50f18SLuigi Rizzo /* nm_config callback for bwrap */ 2546f9790aebSLuigi Rizzo static int 2547f9790aebSLuigi Rizzo netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, 2548f9790aebSLuigi Rizzo u_int *rxr, u_int *rxd) 2549f9790aebSLuigi Rizzo { 2550f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2551f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2552f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2553f9790aebSLuigi Rizzo 2554f9790aebSLuigi Rizzo /* forward the request */ 2555f9790aebSLuigi Rizzo netmap_update_config(hwna); 2556f9790aebSLuigi Rizzo /* swap the results */ 2557f9790aebSLuigi Rizzo *txr = hwna->num_rx_rings; 2558f9790aebSLuigi Rizzo *txd = hwna->num_rx_desc; 2559f9790aebSLuigi Rizzo *rxr = hwna->num_tx_rings; 2560f9790aebSLuigi Rizzo *rxd = hwna->num_rx_desc; 2561f9790aebSLuigi Rizzo 2562f9790aebSLuigi Rizzo return 0; 2563f9790aebSLuigi Rizzo } 2564f9790aebSLuigi Rizzo 256517885a7bSLuigi Rizzo 25664bf50f18SLuigi Rizzo /* nm_krings_create callback for bwrap */ 2567f9790aebSLuigi Rizzo static int 2568f9790aebSLuigi Rizzo netmap_bwrap_krings_create(struct netmap_adapter *na) 2569f9790aebSLuigi Rizzo { 2570f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2571f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2572f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 257337e3a6d3SLuigi Rizzo int i, error = 0; 257437e3a6d3SLuigi Rizzo enum txrx t; 2575f9790aebSLuigi Rizzo 25764bf50f18SLuigi Rizzo ND("%s", na->name); 2577f9790aebSLuigi Rizzo 25784bf50f18SLuigi Rizzo /* impersonate a netmap_vp_adapter */ 2579f9790aebSLuigi Rizzo error = netmap_vp_krings_create(na); 2580f9790aebSLuigi Rizzo if (error) 2581f9790aebSLuigi Rizzo return error; 2582f9790aebSLuigi Rizzo 25834bf50f18SLuigi Rizzo /* also create the hwna krings */ 2584f9790aebSLuigi Rizzo error = hwna->nm_krings_create(hwna); 2585f9790aebSLuigi Rizzo if (error) { 258637e3a6d3SLuigi Rizzo goto err_del_vp_rings; 2587f9790aebSLuigi Rizzo } 2588f9790aebSLuigi Rizzo 258937e3a6d3SLuigi Rizzo /* get each ring slot number from the corresponding hwna ring */ 259037e3a6d3SLuigi Rizzo for_rx_tx(t) { 259137e3a6d3SLuigi Rizzo enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 259237e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { 259337e3a6d3SLuigi Rizzo NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots; 259437e3a6d3SLuigi Rizzo } 2595f0ea3689SLuigi Rizzo } 2596f9790aebSLuigi Rizzo 2597f9790aebSLuigi Rizzo return 0; 259837e3a6d3SLuigi Rizzo 259937e3a6d3SLuigi Rizzo err_del_vp_rings: 260037e3a6d3SLuigi Rizzo netmap_vp_krings_delete(na); 260137e3a6d3SLuigi Rizzo 260237e3a6d3SLuigi Rizzo return error; 2603f9790aebSLuigi Rizzo } 2604f9790aebSLuigi Rizzo 260517885a7bSLuigi Rizzo 2606f9790aebSLuigi Rizzo static void 2607f9790aebSLuigi Rizzo netmap_bwrap_krings_delete(struct netmap_adapter *na) 2608f9790aebSLuigi Rizzo { 2609f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2610f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2611f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2612f9790aebSLuigi Rizzo 26134bf50f18SLuigi Rizzo ND("%s", na->name); 2614f9790aebSLuigi Rizzo 2615f9790aebSLuigi Rizzo hwna->nm_krings_delete(hwna); 2616f9790aebSLuigi Rizzo netmap_vp_krings_delete(na); 2617f9790aebSLuigi Rizzo } 2618f9790aebSLuigi Rizzo 261917885a7bSLuigi Rizzo 2620f9790aebSLuigi Rizzo /* notify method for the bridge-->hwna direction */ 2621f9790aebSLuigi Rizzo static int 2622847bf383SLuigi Rizzo netmap_bwrap_notify(struct netmap_kring *kring, int flags) 2623f9790aebSLuigi Rizzo { 2624847bf383SLuigi Rizzo struct netmap_adapter *na = kring->na; 2625847bf383SLuigi Rizzo struct netmap_bwrap_adapter *bna = na->na_private; 2626f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2627847bf383SLuigi Rizzo u_int ring_n = kring->ring_id; 2628847bf383SLuigi Rizzo u_int lim = kring->nkr_num_slots - 1; 2629847bf383SLuigi Rizzo struct netmap_kring *hw_kring; 263037e3a6d3SLuigi Rizzo int error; 2631f9790aebSLuigi Rizzo 2632847bf383SLuigi Rizzo ND("%s: na %s hwna %s", 2633847bf383SLuigi Rizzo (kring ? kring->name : "NULL!"), 2634847bf383SLuigi Rizzo (na ? na->name : "NULL!"), 2635847bf383SLuigi Rizzo (hwna ? hwna->name : "NULL!")); 2636f9790aebSLuigi Rizzo hw_kring = &hwna->tx_rings[ring_n]; 2637847bf383SLuigi Rizzo 263837e3a6d3SLuigi Rizzo if (nm_kr_tryget(hw_kring, 0, NULL)) { 263937e3a6d3SLuigi Rizzo return ENXIO; 264037e3a6d3SLuigi Rizzo } 2641f9790aebSLuigi Rizzo 264217885a7bSLuigi Rizzo /* first step: simulate a user wakeup on the rx ring */ 2643847bf383SLuigi Rizzo netmap_vp_rxsync(kring, flags); 264417885a7bSLuigi Rizzo ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 26454bf50f18SLuigi Rizzo na->name, ring_n, 264617885a7bSLuigi Rizzo kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 264717885a7bSLuigi Rizzo ring->head, ring->cur, ring->tail, 264817885a7bSLuigi Rizzo hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 2649847bf383SLuigi Rizzo /* second step: the new packets are sent on the tx ring 265017885a7bSLuigi Rizzo * (which is actually the same ring) 265117885a7bSLuigi Rizzo */ 2652847bf383SLuigi Rizzo hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; 2653f0ea3689SLuigi Rizzo error = hw_kring->nm_sync(hw_kring, flags); 2654847bf383SLuigi Rizzo if (error) 265537e3a6d3SLuigi Rizzo goto put_out; 265617885a7bSLuigi Rizzo 2657847bf383SLuigi Rizzo /* third step: now we are back the rx ring */ 265817885a7bSLuigi Rizzo /* claim ownership on all hw owned bufs */ 2659847bf383SLuigi Rizzo kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ 266017885a7bSLuigi Rizzo 2661847bf383SLuigi Rizzo /* fourth step: the user goes to sleep again, causing another rxsync */ 2662847bf383SLuigi Rizzo netmap_vp_rxsync(kring, flags); 266317885a7bSLuigi Rizzo ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 26644bf50f18SLuigi Rizzo na->name, ring_n, 266517885a7bSLuigi Rizzo kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 266617885a7bSLuigi Rizzo ring->head, ring->cur, ring->tail, 266717885a7bSLuigi Rizzo hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 266837e3a6d3SLuigi Rizzo put_out: 2669847bf383SLuigi Rizzo nm_kr_put(hw_kring); 267037e3a6d3SLuigi Rizzo 267137e3a6d3SLuigi Rizzo return error ? error : NM_IRQ_COMPLETED; 2672f9790aebSLuigi Rizzo } 2673f9790aebSLuigi Rizzo 267417885a7bSLuigi Rizzo 26754bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for the bwrap. 26764bf50f18SLuigi Rizzo * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. 26774bf50f18SLuigi Rizzo * On attach, it needs to provide a fake netmap_priv_d structure and 26784bf50f18SLuigi Rizzo * perform a netmap_do_regif() on the bwrap. This will put both the 26794bf50f18SLuigi Rizzo * bwrap and the hwna in netmap mode, with the netmap rings shared 26804bf50f18SLuigi Rizzo * and cross linked. Moroever, it will start intercepting interrupts 26814bf50f18SLuigi Rizzo * directed to hwna. 26824bf50f18SLuigi Rizzo */ 2683f9790aebSLuigi Rizzo static int 26844bf50f18SLuigi Rizzo netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 26854bf50f18SLuigi Rizzo { 26864bf50f18SLuigi Rizzo struct netmap_priv_d *npriv; 26874bf50f18SLuigi Rizzo struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 26884bf50f18SLuigi Rizzo int error = 0; 26894bf50f18SLuigi Rizzo 26904bf50f18SLuigi Rizzo if (attach) { 26914bf50f18SLuigi Rizzo if (NETMAP_OWNED_BY_ANY(na)) { 26924bf50f18SLuigi Rizzo return EBUSY; 26934bf50f18SLuigi Rizzo } 26944bf50f18SLuigi Rizzo if (bna->na_kpriv) { 26954bf50f18SLuigi Rizzo /* nothing to do */ 26964bf50f18SLuigi Rizzo return 0; 26974bf50f18SLuigi Rizzo } 269837e3a6d3SLuigi Rizzo npriv = netmap_priv_new(); 26994bf50f18SLuigi Rizzo if (npriv == NULL) 27004bf50f18SLuigi Rizzo return ENOMEM; 270137e3a6d3SLuigi Rizzo npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ 270237e3a6d3SLuigi Rizzo error = netmap_do_regif(npriv, na, 0, NR_REG_NIC_SW); 2703847bf383SLuigi Rizzo if (error) { 270437e3a6d3SLuigi Rizzo netmap_priv_delete(npriv); 27054bf50f18SLuigi Rizzo return error; 27064bf50f18SLuigi Rizzo } 27074bf50f18SLuigi Rizzo bna->na_kpriv = npriv; 27084bf50f18SLuigi Rizzo na->na_flags |= NAF_BUSY; 27094bf50f18SLuigi Rizzo } else { 27104bf50f18SLuigi Rizzo if (na->active_fds == 0) /* not registered */ 27114bf50f18SLuigi Rizzo return EINVAL; 271237e3a6d3SLuigi Rizzo netmap_priv_delete(bna->na_kpriv); 27134bf50f18SLuigi Rizzo bna->na_kpriv = NULL; 27144bf50f18SLuigi Rizzo na->na_flags &= ~NAF_BUSY; 27154bf50f18SLuigi Rizzo } 27164bf50f18SLuigi Rizzo return error; 27174bf50f18SLuigi Rizzo 27184bf50f18SLuigi Rizzo } 27194bf50f18SLuigi Rizzo 27204bf50f18SLuigi Rizzo /* attach a bridge wrapper to the 'real' device */ 27214bf50f18SLuigi Rizzo int 27224bf50f18SLuigi Rizzo netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) 2723f9790aebSLuigi Rizzo { 2724f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna; 27254bf50f18SLuigi Rizzo struct netmap_adapter *na = NULL; 27264bf50f18SLuigi Rizzo struct netmap_adapter *hostna = NULL; 27274bf50f18SLuigi Rizzo int error = 0; 2728847bf383SLuigi Rizzo enum txrx t; 2729f9790aebSLuigi Rizzo 27304bf50f18SLuigi Rizzo /* make sure the NIC is not already in use */ 27314bf50f18SLuigi Rizzo if (NETMAP_OWNED_BY_ANY(hwna)) { 27324bf50f18SLuigi Rizzo D("NIC %s busy, cannot attach to bridge", hwna->name); 27334bf50f18SLuigi Rizzo return EBUSY; 27344bf50f18SLuigi Rizzo } 2735f9790aebSLuigi Rizzo 2736c3e9b4dbSLuiz Otavio O Souza bna = nm_os_malloc(sizeof(*bna)); 27374bf50f18SLuigi Rizzo if (bna == NULL) { 2738f9790aebSLuigi Rizzo return ENOMEM; 27394bf50f18SLuigi Rizzo } 2740f9790aebSLuigi Rizzo 2741f9790aebSLuigi Rizzo na = &bna->up.up; 274237e3a6d3SLuigi Rizzo /* make bwrap ifp point to the real ifp */ 274337e3a6d3SLuigi Rizzo na->ifp = hwna->ifp; 2744c3e9b4dbSLuiz Otavio O Souza if_ref(na->ifp); 2745847bf383SLuigi Rizzo na->na_private = bna; 27464bf50f18SLuigi Rizzo strncpy(na->name, nr_name, sizeof(na->name)); 2747f9790aebSLuigi Rizzo /* fill the ring data for the bwrap adapter with rx/tx meanings 2748f9790aebSLuigi Rizzo * swapped. The real cross-linking will be done during register, 2749f9790aebSLuigi Rizzo * when all the krings will have been created. 2750f9790aebSLuigi Rizzo */ 2751847bf383SLuigi Rizzo for_rx_tx(t) { 2752847bf383SLuigi Rizzo enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 2753847bf383SLuigi Rizzo nma_set_nrings(na, t, nma_get_nrings(hwna, r)); 2754847bf383SLuigi Rizzo nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); 2755847bf383SLuigi Rizzo } 2756f9790aebSLuigi Rizzo na->nm_dtor = netmap_bwrap_dtor; 275737e3a6d3SLuigi Rizzo na->nm_register = netmap_bwrap_reg; 2758f9790aebSLuigi Rizzo // na->nm_txsync = netmap_bwrap_txsync; 2759f9790aebSLuigi Rizzo // na->nm_rxsync = netmap_bwrap_rxsync; 2760f9790aebSLuigi Rizzo na->nm_config = netmap_bwrap_config; 2761f9790aebSLuigi Rizzo na->nm_krings_create = netmap_bwrap_krings_create; 2762f9790aebSLuigi Rizzo na->nm_krings_delete = netmap_bwrap_krings_delete; 2763f9790aebSLuigi Rizzo na->nm_notify = netmap_bwrap_notify; 27644bf50f18SLuigi Rizzo na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; 27654bf50f18SLuigi Rizzo na->pdev = hwna->pdev; 2766c3e9b4dbSLuiz Otavio O Souza na->nm_mem = netmap_mem_get(hwna->nm_mem); 276737e3a6d3SLuigi Rizzo na->virt_hdr_len = hwna->virt_hdr_len; 2768f9790aebSLuigi Rizzo bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 2769f9790aebSLuigi Rizzo 2770f9790aebSLuigi Rizzo bna->hwna = hwna; 2771f9790aebSLuigi Rizzo netmap_adapter_get(hwna); 2772f9790aebSLuigi Rizzo hwna->na_private = bna; /* weak reference */ 27734bf50f18SLuigi Rizzo hwna->na_vp = &bna->up; 2774f9790aebSLuigi Rizzo 2775f0ea3689SLuigi Rizzo if (hwna->na_flags & NAF_HOST_RINGS) { 27764bf50f18SLuigi Rizzo if (hwna->na_flags & NAF_SW_ONLY) 27774bf50f18SLuigi Rizzo na->na_flags |= NAF_SW_ONLY; 2778f0ea3689SLuigi Rizzo na->na_flags |= NAF_HOST_RINGS; 2779f9790aebSLuigi Rizzo hostna = &bna->host.up; 27804bf50f18SLuigi Rizzo snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name); 2781f9790aebSLuigi Rizzo hostna->ifp = hwna->ifp; 2782847bf383SLuigi Rizzo for_rx_tx(t) { 2783847bf383SLuigi Rizzo enum txrx r = nm_txrx_swap(t); 2784847bf383SLuigi Rizzo nma_set_nrings(hostna, t, 1); 2785847bf383SLuigi Rizzo nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); 2786847bf383SLuigi Rizzo } 2787f9790aebSLuigi Rizzo // hostna->nm_txsync = netmap_bwrap_host_txsync; 2788f9790aebSLuigi Rizzo // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 2789847bf383SLuigi Rizzo hostna->nm_notify = netmap_bwrap_notify; 2790c3e9b4dbSLuiz Otavio O Souza hostna->nm_mem = netmap_mem_get(na->nm_mem); 2791f9790aebSLuigi Rizzo hostna->na_private = bna; 27924bf50f18SLuigi Rizzo hostna->na_vp = &bna->up; 27934bf50f18SLuigi Rizzo na->na_hostvp = hwna->na_hostvp = 27944bf50f18SLuigi Rizzo hostna->na_hostvp = &bna->host; 27954bf50f18SLuigi Rizzo hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ 2796f0ea3689SLuigi Rizzo } 2797f9790aebSLuigi Rizzo 279817885a7bSLuigi Rizzo ND("%s<->%s txr %d txd %d rxr %d rxd %d", 27994bf50f18SLuigi Rizzo na->name, ifp->if_xname, 2800f9790aebSLuigi Rizzo na->num_tx_rings, na->num_tx_desc, 2801f9790aebSLuigi Rizzo na->num_rx_rings, na->num_rx_desc); 2802f9790aebSLuigi Rizzo 2803f9790aebSLuigi Rizzo error = netmap_attach_common(na); 2804f9790aebSLuigi Rizzo if (error) { 28054bf50f18SLuigi Rizzo goto err_free; 28064bf50f18SLuigi Rizzo } 28074bf50f18SLuigi Rizzo hwna->na_flags |= NAF_BUSY; 28084bf50f18SLuigi Rizzo return 0; 28094bf50f18SLuigi Rizzo 28104bf50f18SLuigi Rizzo err_free: 28114bf50f18SLuigi Rizzo hwna->na_vp = hwna->na_hostvp = NULL; 2812f9790aebSLuigi Rizzo netmap_adapter_put(hwna); 2813c3e9b4dbSLuiz Otavio O Souza nm_os_free(bna); 2814f9790aebSLuigi Rizzo return error; 28154bf50f18SLuigi Rizzo 2816f9790aebSLuigi Rizzo } 2817f9790aebSLuigi Rizzo 2818847bf383SLuigi Rizzo struct nm_bridge * 2819847bf383SLuigi Rizzo netmap_init_bridges2(u_int n) 2820f9790aebSLuigi Rizzo { 2821f9790aebSLuigi Rizzo int i; 2822847bf383SLuigi Rizzo struct nm_bridge *b; 2823847bf383SLuigi Rizzo 2824c3e9b4dbSLuiz Otavio O Souza b = nm_os_malloc(sizeof(struct nm_bridge) * n); 2825847bf383SLuigi Rizzo if (b == NULL) 2826847bf383SLuigi Rizzo return NULL; 2827847bf383SLuigi Rizzo for (i = 0; i < n; i++) 2828847bf383SLuigi Rizzo BDG_RWINIT(&b[i]); 2829847bf383SLuigi Rizzo return b; 2830847bf383SLuigi Rizzo } 2831847bf383SLuigi Rizzo 2832847bf383SLuigi Rizzo void 2833847bf383SLuigi Rizzo netmap_uninit_bridges2(struct nm_bridge *b, u_int n) 2834847bf383SLuigi Rizzo { 2835847bf383SLuigi Rizzo int i; 2836847bf383SLuigi Rizzo 2837847bf383SLuigi Rizzo if (b == NULL) 2838847bf383SLuigi Rizzo return; 2839847bf383SLuigi Rizzo 2840847bf383SLuigi Rizzo for (i = 0; i < n; i++) 2841847bf383SLuigi Rizzo BDG_RWDESTROY(&b[i]); 2842c3e9b4dbSLuiz Otavio O Souza nm_os_free(b); 2843847bf383SLuigi Rizzo } 2844847bf383SLuigi Rizzo 2845847bf383SLuigi Rizzo int 2846847bf383SLuigi Rizzo netmap_init_bridges(void) 2847847bf383SLuigi Rizzo { 2848847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS 2849847bf383SLuigi Rizzo return netmap_bns_register(); 2850847bf383SLuigi Rizzo #else 2851847bf383SLuigi Rizzo nm_bridges = netmap_init_bridges2(NM_BRIDGES); 2852847bf383SLuigi Rizzo if (nm_bridges == NULL) 2853847bf383SLuigi Rizzo return ENOMEM; 2854847bf383SLuigi Rizzo return 0; 2855847bf383SLuigi Rizzo #endif 2856847bf383SLuigi Rizzo } 2857847bf383SLuigi Rizzo 2858847bf383SLuigi Rizzo void 2859847bf383SLuigi Rizzo netmap_uninit_bridges(void) 2860847bf383SLuigi Rizzo { 2861847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS 2862847bf383SLuigi Rizzo netmap_bns_unregister(); 2863847bf383SLuigi Rizzo #else 2864847bf383SLuigi Rizzo netmap_uninit_bridges2(nm_bridges, NM_BRIDGES); 2865847bf383SLuigi Rizzo #endif 2866f9790aebSLuigi Rizzo } 2867f9790aebSLuigi Rizzo #endif /* WITH_VALE */ 2868