1718cf2ccSPedro F. Giffuni /*- 2718cf2ccSPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3718cf2ccSPedro F. Giffuni * 437e3a6d3SLuigi Rizzo * Copyright (C) 2013-2016 Universita` di Pisa 537e3a6d3SLuigi Rizzo * All rights reserved. 6f9790aebSLuigi Rizzo * 7f9790aebSLuigi Rizzo * Redistribution and use in source and binary forms, with or without 8f9790aebSLuigi Rizzo * modification, are permitted provided that the following conditions 9f9790aebSLuigi Rizzo * are met: 10f9790aebSLuigi Rizzo * 1. Redistributions of source code must retain the above copyright 11f9790aebSLuigi Rizzo * notice, this list of conditions and the following disclaimer. 12f9790aebSLuigi Rizzo * 2. Redistributions in binary form must reproduce the above copyright 13f9790aebSLuigi Rizzo * notice, this list of conditions and the following disclaimer in the 14f9790aebSLuigi Rizzo * documentation and/or other materials provided with the distribution. 15f9790aebSLuigi Rizzo * 16f9790aebSLuigi Rizzo * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17f9790aebSLuigi Rizzo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18f9790aebSLuigi Rizzo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19f9790aebSLuigi Rizzo * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20f9790aebSLuigi Rizzo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21f9790aebSLuigi Rizzo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22f9790aebSLuigi Rizzo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23f9790aebSLuigi Rizzo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24f9790aebSLuigi Rizzo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25f9790aebSLuigi Rizzo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26f9790aebSLuigi Rizzo * SUCH DAMAGE. 27f9790aebSLuigi Rizzo */ 28f9790aebSLuigi Rizzo 29f9790aebSLuigi Rizzo 30f9790aebSLuigi Rizzo /* 31f9790aebSLuigi Rizzo * This module implements the VALE switch for netmap 32f9790aebSLuigi Rizzo 33f9790aebSLuigi Rizzo --- VALE SWITCH --- 34f9790aebSLuigi Rizzo 35f9790aebSLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports. 36f9790aebSLuigi Rizzo A switch cannot be deleted until all ports are gone. 37f9790aebSLuigi Rizzo 38f9790aebSLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects 39f9790aebSLuigi Rizzo deletion of ports. When configuring or deleting a new port, the 40f9790aebSLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK). 41f9790aebSLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 42f9790aebSLuigi Rizzo The lock is held throughout the entire forwarding cycle, 43f9790aebSLuigi Rizzo during which the thread may incur in a page fault. 44f9790aebSLuigi Rizzo Hence it is important that sleepable shared locks are used. 45f9790aebSLuigi Rizzo 46f9790aebSLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve 47f9790aebSLuigi Rizzo a number of slot in the ring, then the lock is released, 48f9790aebSLuigi Rizzo packets are copied from source to destination, and then 49f9790aebSLuigi Rizzo the lock is acquired again and the receive ring is updated. 50f9790aebSLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack 51f9790aebSLuigi Rizzo ports attached to the switch) 52f9790aebSLuigi Rizzo 53f9790aebSLuigi Rizzo */ 54f9790aebSLuigi Rizzo 55f9790aebSLuigi Rizzo /* 56f9790aebSLuigi Rizzo * OS-specific code that is used only within this file. 57f9790aebSLuigi Rizzo * Other OS-specific code that must be accessed by drivers 58f9790aebSLuigi Rizzo * is present in netmap_kern.h 59f9790aebSLuigi Rizzo */ 60f9790aebSLuigi Rizzo 61f9790aebSLuigi Rizzo #if defined(__FreeBSD__) 62f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */ 63f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$"); 64f9790aebSLuigi Rizzo 65f9790aebSLuigi Rizzo #include <sys/types.h> 66f9790aebSLuigi Rizzo #include <sys/errno.h> 67f9790aebSLuigi Rizzo #include <sys/param.h> /* defines used in kernel.h */ 68f9790aebSLuigi Rizzo #include <sys/kernel.h> /* types used in module initialization */ 69f9790aebSLuigi Rizzo #include <sys/conf.h> /* cdevsw struct, UID, GID */ 70f9790aebSLuigi Rizzo #include <sys/sockio.h> 71f9790aebSLuigi Rizzo #include <sys/socketvar.h> /* struct socket */ 72f9790aebSLuigi Rizzo #include <sys/malloc.h> 73f9790aebSLuigi Rizzo #include <sys/poll.h> 74f9790aebSLuigi Rizzo #include <sys/rwlock.h> 75f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */ 76f9790aebSLuigi Rizzo #include <sys/selinfo.h> 77f9790aebSLuigi Rizzo #include <sys/sysctl.h> 78f9790aebSLuigi Rizzo #include <net/if.h> 79f9790aebSLuigi Rizzo #include <net/if_var.h> 80f9790aebSLuigi Rizzo #include <net/bpf.h> /* BIOCIMMEDIATE */ 81f9790aebSLuigi Rizzo #include <machine/bus.h> /* bus_dmamap_* */ 82f9790aebSLuigi Rizzo #include <sys/endian.h> 83f9790aebSLuigi Rizzo #include <sys/refcount.h> 84f9790aebSLuigi Rizzo 85f9790aebSLuigi Rizzo 86f9790aebSLuigi Rizzo #define BDG_RWLOCK_T struct rwlock // struct rwlock 87f9790aebSLuigi Rizzo 88f9790aebSLuigi Rizzo #define BDG_RWINIT(b) \ 89f9790aebSLuigi Rizzo rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 90f9790aebSLuigi Rizzo #define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 91f9790aebSLuigi Rizzo #define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 92f9790aebSLuigi Rizzo #define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 93f9790aebSLuigi Rizzo #define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 94f9790aebSLuigi Rizzo #define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 95f9790aebSLuigi Rizzo #define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 96f9790aebSLuigi Rizzo 97f9790aebSLuigi Rizzo 98f9790aebSLuigi Rizzo #elif defined(linux) 99f9790aebSLuigi Rizzo 100f9790aebSLuigi Rizzo #include "bsd_glue.h" 101f9790aebSLuigi Rizzo 102f9790aebSLuigi Rizzo #elif defined(__APPLE__) 103f9790aebSLuigi Rizzo 104f9790aebSLuigi Rizzo #warning OSX support is only partial 105f9790aebSLuigi Rizzo #include "osx_glue.h" 106f9790aebSLuigi Rizzo 10737e3a6d3SLuigi Rizzo #elif defined(_WIN32) 10837e3a6d3SLuigi Rizzo #include "win_glue.h" 10937e3a6d3SLuigi Rizzo 110f9790aebSLuigi Rizzo #else 111f9790aebSLuigi Rizzo 112f9790aebSLuigi Rizzo #error Unsupported platform 113f9790aebSLuigi Rizzo 114f9790aebSLuigi Rizzo #endif /* unsupported */ 115f9790aebSLuigi Rizzo 116f9790aebSLuigi Rizzo /* 117f9790aebSLuigi Rizzo * common headers 118f9790aebSLuigi Rizzo */ 119f9790aebSLuigi Rizzo 120f9790aebSLuigi Rizzo #include <net/netmap.h> 121f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h> 122f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h> 123f9790aebSLuigi Rizzo 124f9790aebSLuigi Rizzo #ifdef WITH_VALE 125f9790aebSLuigi Rizzo 126f9790aebSLuigi Rizzo /* 127f9790aebSLuigi Rizzo * system parameters (most of them in netmap_kern.h) 12837e3a6d3SLuigi Rizzo * NM_BDG_NAME prefix for switch port names, default "vale" 129f9790aebSLuigi Rizzo * NM_BDG_MAXPORTS number of ports 130f9790aebSLuigi Rizzo * NM_BRIDGES max number of switches in the system. 131f9790aebSLuigi Rizzo * XXX should become a sysctl or tunable 132f9790aebSLuigi Rizzo * 133f9790aebSLuigi Rizzo * Switch ports are named valeX:Y where X is the switch name and Y 134f9790aebSLuigi Rizzo * is the port. If Y matches a physical interface name, the port is 135f9790aebSLuigi Rizzo * connected to a physical device. 136f9790aebSLuigi Rizzo * 137f9790aebSLuigi Rizzo * Unlike physical interfaces, switch ports use their own memory region 138f9790aebSLuigi Rizzo * for rings and buffers. 139f9790aebSLuigi Rizzo * The virtual interfaces use per-queue lock instead of core lock. 140f9790aebSLuigi Rizzo * In the tx loop, we aggregate traffic in batches to make all operations 141f9790aebSLuigi Rizzo * faster. The batch size is bridge_batch. 142f9790aebSLuigi Rizzo */ 143f9790aebSLuigi Rizzo #define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 144f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 145f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 146f9790aebSLuigi Rizzo #define NM_BDG_HASH 1024 /* forwarding table entries */ 147f9790aebSLuigi Rizzo #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 148f9790aebSLuigi Rizzo #define NM_MULTISEG 64 /* max size of a chain of bufs */ 149f9790aebSLuigi Rizzo /* actual size of the tables */ 150f9790aebSLuigi Rizzo #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 151f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */ 152f9790aebSLuigi Rizzo #define NM_FT_NULL NM_BDG_BATCH_MAX 153*4f80b14cSVincenzo Maffione /* Default size for the Maximum Frame Size. */ 154*4f80b14cSVincenzo Maffione #define NM_BDG_MFS_DEFAULT 1514 155f9790aebSLuigi Rizzo 156f9790aebSLuigi Rizzo 157f9790aebSLuigi Rizzo /* 158f9790aebSLuigi Rizzo * bridge_batch is set via sysctl to the max batch size to be 159f9790aebSLuigi Rizzo * used in the bridge. The actual value may be larger as the 160f9790aebSLuigi Rizzo * last packet in the block may overflow the size. 161f9790aebSLuigi Rizzo */ 16237e3a6d3SLuigi Rizzo static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 16337e3a6d3SLuigi Rizzo SYSBEGIN(vars_vale); 164f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap); 165*4f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0, 166*4f80b14cSVincenzo Maffione "Max batch size to be used in the bridge"); 16737e3a6d3SLuigi Rizzo SYSEND; 168f9790aebSLuigi Rizzo 169c3e9b4dbSLuiz Otavio O Souza static int netmap_vp_create(struct nmreq *, struct ifnet *, 170c3e9b4dbSLuiz Otavio O Souza struct netmap_mem_d *nmd, struct netmap_vp_adapter **); 1714bf50f18SLuigi Rizzo static int netmap_vp_reg(struct netmap_adapter *na, int onoff); 17237e3a6d3SLuigi Rizzo static int netmap_bwrap_reg(struct netmap_adapter *, int onoff); 173f9790aebSLuigi Rizzo 174f9790aebSLuigi Rizzo /* 175f9790aebSLuigi Rizzo * For each output interface, nm_bdg_q is used to construct a list. 176f9790aebSLuigi Rizzo * bq_len is the number of output buffers (we can have coalescing 177f9790aebSLuigi Rizzo * during the copy). 178f9790aebSLuigi Rizzo */ 179f9790aebSLuigi Rizzo struct nm_bdg_q { 180f9790aebSLuigi Rizzo uint16_t bq_head; 181f9790aebSLuigi Rizzo uint16_t bq_tail; 182f9790aebSLuigi Rizzo uint32_t bq_len; /* number of buffers */ 183f9790aebSLuigi Rizzo }; 184f9790aebSLuigi Rizzo 185f9790aebSLuigi Rizzo /* XXX revise this */ 186f9790aebSLuigi Rizzo struct nm_hash_ent { 187f9790aebSLuigi Rizzo uint64_t mac; /* the top 2 bytes are the epoch */ 188f9790aebSLuigi Rizzo uint64_t ports; 189f9790aebSLuigi Rizzo }; 190f9790aebSLuigi Rizzo 191f9790aebSLuigi Rizzo /* 192f9790aebSLuigi Rizzo * nm_bridge is a descriptor for a VALE switch. 193f9790aebSLuigi Rizzo * Interfaces for a bridge are all in bdg_ports[]. 194f9790aebSLuigi Rizzo * The array has fixed size, an empty entry does not terminate 195f9790aebSLuigi Rizzo * the search, but lookups only occur on attach/detach so we 196f9790aebSLuigi Rizzo * don't mind if they are slow. 197f9790aebSLuigi Rizzo * 198f9790aebSLuigi Rizzo * The bridge is non blocking on the transmit ports: excess 199f9790aebSLuigi Rizzo * packets are dropped if there is no room on the output port. 200f9790aebSLuigi Rizzo * 201f9790aebSLuigi Rizzo * bdg_lock protects accesses to the bdg_ports array. 202f9790aebSLuigi Rizzo * This is a rw lock (or equivalent). 203f9790aebSLuigi Rizzo */ 204f9790aebSLuigi Rizzo struct nm_bridge { 205f9790aebSLuigi Rizzo /* XXX what is the proper alignment/layout ? */ 206f9790aebSLuigi Rizzo BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 207f9790aebSLuigi Rizzo int bdg_namelen; 208f9790aebSLuigi Rizzo uint32_t bdg_active_ports; /* 0 means free */ 209f9790aebSLuigi Rizzo char bdg_basename[IFNAMSIZ]; 210f9790aebSLuigi Rizzo 211f9790aebSLuigi Rizzo /* Indexes of active ports (up to active_ports) 212f9790aebSLuigi Rizzo * and all other remaining ports. 213f9790aebSLuigi Rizzo */ 214f9790aebSLuigi Rizzo uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 215f9790aebSLuigi Rizzo 216f9790aebSLuigi Rizzo struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; 217f9790aebSLuigi Rizzo 218f9790aebSLuigi Rizzo 219f9790aebSLuigi Rizzo /* 220f9790aebSLuigi Rizzo * The function to decide the destination port. 221f9790aebSLuigi Rizzo * It returns either of an index of the destination port, 222f9790aebSLuigi Rizzo * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 223f9790aebSLuigi Rizzo * forward this packet. ring_nr is the source ring index, and the 224f9790aebSLuigi Rizzo * function may overwrite this value to forward this packet to a 225f9790aebSLuigi Rizzo * different ring index. 22637e3a6d3SLuigi Rizzo * This function must be set by netmap_bdg_ctl(). 227f9790aebSLuigi Rizzo */ 2284bf50f18SLuigi Rizzo struct netmap_bdg_ops bdg_ops; 229f9790aebSLuigi Rizzo 230f9790aebSLuigi Rizzo /* the forwarding table, MAC+ports. 231f9790aebSLuigi Rizzo * XXX should be changed to an argument to be passed to 232*4f80b14cSVincenzo Maffione * the lookup function 233f9790aebSLuigi Rizzo */ 234*4f80b14cSVincenzo Maffione struct nm_hash_ent *ht; // allocated on attach 235847bf383SLuigi Rizzo 236847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS 237847bf383SLuigi Rizzo struct net *ns; 238847bf383SLuigi Rizzo #endif /* CONFIG_NET_NS */ 239f9790aebSLuigi Rizzo }; 240f9790aebSLuigi Rizzo 2414bf50f18SLuigi Rizzo const char* 2424bf50f18SLuigi Rizzo netmap_bdg_name(struct netmap_vp_adapter *vp) 2434bf50f18SLuigi Rizzo { 2444bf50f18SLuigi Rizzo struct nm_bridge *b = vp->na_bdg; 2454bf50f18SLuigi Rizzo if (b == NULL) 2464bf50f18SLuigi Rizzo return NULL; 2474bf50f18SLuigi Rizzo return b->bdg_basename; 2484bf50f18SLuigi Rizzo } 2494bf50f18SLuigi Rizzo 250f9790aebSLuigi Rizzo 251847bf383SLuigi Rizzo #ifndef CONFIG_NET_NS 252f9790aebSLuigi Rizzo /* 253f9790aebSLuigi Rizzo * XXX in principle nm_bridges could be created dynamically 254f9790aebSLuigi Rizzo * Right now we have a static array and deletions are protected 255f9790aebSLuigi Rizzo * by an exclusive lock. 256f9790aebSLuigi Rizzo */ 25737e3a6d3SLuigi Rizzo static struct nm_bridge *nm_bridges; 258847bf383SLuigi Rizzo #endif /* !CONFIG_NET_NS */ 259f9790aebSLuigi Rizzo 260f9790aebSLuigi Rizzo 261f9790aebSLuigi Rizzo /* 262f9790aebSLuigi Rizzo * this is a slightly optimized copy routine which rounds 263f9790aebSLuigi Rizzo * to multiple of 64 bytes and is often faster than dealing 264f9790aebSLuigi Rizzo * with other odd sizes. We assume there is enough room 265f9790aebSLuigi Rizzo * in the source and destination buffers. 266f9790aebSLuigi Rizzo * 267f9790aebSLuigi Rizzo * XXX only for multiples of 64 bytes, non overlapped. 268f9790aebSLuigi Rizzo */ 269f9790aebSLuigi Rizzo static inline void 270f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l) 271f9790aebSLuigi Rizzo { 272f9790aebSLuigi Rizzo uint64_t *src = _src; 273f9790aebSLuigi Rizzo uint64_t *dst = _dst; 274f9790aebSLuigi Rizzo if (unlikely(l >= 1024)) { 275f9790aebSLuigi Rizzo memcpy(dst, src, l); 276f9790aebSLuigi Rizzo return; 277f9790aebSLuigi Rizzo } 278f9790aebSLuigi Rizzo for (; likely(l > 0); l-=64) { 279f9790aebSLuigi Rizzo *dst++ = *src++; 280f9790aebSLuigi Rizzo *dst++ = *src++; 281f9790aebSLuigi Rizzo *dst++ = *src++; 282f9790aebSLuigi Rizzo *dst++ = *src++; 283f9790aebSLuigi Rizzo *dst++ = *src++; 284f9790aebSLuigi Rizzo *dst++ = *src++; 285f9790aebSLuigi Rizzo *dst++ = *src++; 286f9790aebSLuigi Rizzo *dst++ = *src++; 287f9790aebSLuigi Rizzo } 288f9790aebSLuigi Rizzo } 289f9790aebSLuigi Rizzo 290f9790aebSLuigi Rizzo 29137e3a6d3SLuigi Rizzo static int 29237e3a6d3SLuigi Rizzo nm_is_id_char(const char c) 29337e3a6d3SLuigi Rizzo { 29437e3a6d3SLuigi Rizzo return (c >= 'a' && c <= 'z') || 29537e3a6d3SLuigi Rizzo (c >= 'A' && c <= 'Z') || 29637e3a6d3SLuigi Rizzo (c >= '0' && c <= '9') || 29737e3a6d3SLuigi Rizzo (c == '_'); 29837e3a6d3SLuigi Rizzo } 29937e3a6d3SLuigi Rizzo 30037e3a6d3SLuigi Rizzo /* Validate the name of a VALE bridge port and return the 30137e3a6d3SLuigi Rizzo * position of the ":" character. */ 30237e3a6d3SLuigi Rizzo static int 30337e3a6d3SLuigi Rizzo nm_vale_name_validate(const char *name) 30437e3a6d3SLuigi Rizzo { 30537e3a6d3SLuigi Rizzo int colon_pos = -1; 30637e3a6d3SLuigi Rizzo int i; 30737e3a6d3SLuigi Rizzo 30837e3a6d3SLuigi Rizzo if (!name || strlen(name) < strlen(NM_BDG_NAME)) { 30937e3a6d3SLuigi Rizzo return -1; 31037e3a6d3SLuigi Rizzo } 31137e3a6d3SLuigi Rizzo 31237e3a6d3SLuigi Rizzo for (i = 0; name[i]; i++) { 31337e3a6d3SLuigi Rizzo if (name[i] == ':') { 31437e3a6d3SLuigi Rizzo if (colon_pos != -1) { 31537e3a6d3SLuigi Rizzo return -1; 31637e3a6d3SLuigi Rizzo } 31737e3a6d3SLuigi Rizzo colon_pos = i; 31837e3a6d3SLuigi Rizzo } else if (!nm_is_id_char(name[i])) { 31937e3a6d3SLuigi Rizzo return -1; 32037e3a6d3SLuigi Rizzo } 32137e3a6d3SLuigi Rizzo } 32237e3a6d3SLuigi Rizzo 32337e3a6d3SLuigi Rizzo if (i >= IFNAMSIZ) { 32437e3a6d3SLuigi Rizzo return -1; 32537e3a6d3SLuigi Rizzo } 32637e3a6d3SLuigi Rizzo 32737e3a6d3SLuigi Rizzo return colon_pos; 32837e3a6d3SLuigi Rizzo } 32937e3a6d3SLuigi Rizzo 330f9790aebSLuigi Rizzo /* 331f9790aebSLuigi Rizzo * locate a bridge among the existing ones. 332f9790aebSLuigi Rizzo * MUST BE CALLED WITH NMG_LOCK() 333f9790aebSLuigi Rizzo * 334f9790aebSLuigi Rizzo * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 335f9790aebSLuigi Rizzo * We assume that this is called with a name of at least NM_NAME chars. 336f9790aebSLuigi Rizzo */ 337f9790aebSLuigi Rizzo static struct nm_bridge * 338f9790aebSLuigi Rizzo nm_find_bridge(const char *name, int create) 339f9790aebSLuigi Rizzo { 34037e3a6d3SLuigi Rizzo int i, namelen; 341847bf383SLuigi Rizzo struct nm_bridge *b = NULL, *bridges; 342847bf383SLuigi Rizzo u_int num_bridges; 343f9790aebSLuigi Rizzo 344f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 345f9790aebSLuigi Rizzo 346847bf383SLuigi Rizzo netmap_bns_getbridges(&bridges, &num_bridges); 347847bf383SLuigi Rizzo 34837e3a6d3SLuigi Rizzo namelen = nm_vale_name_validate(name); 34937e3a6d3SLuigi Rizzo if (namelen < 0) { 350f9790aebSLuigi Rizzo D("invalid bridge name %s", name ? name : NULL); 351f9790aebSLuigi Rizzo return NULL; 352f9790aebSLuigi Rizzo } 353f9790aebSLuigi Rizzo 354f9790aebSLuigi Rizzo /* lookup the name, remember empty slot if there is one */ 355847bf383SLuigi Rizzo for (i = 0; i < num_bridges; i++) { 356847bf383SLuigi Rizzo struct nm_bridge *x = bridges + i; 357f9790aebSLuigi Rizzo 358f9790aebSLuigi Rizzo if (x->bdg_active_ports == 0) { 359f9790aebSLuigi Rizzo if (create && b == NULL) 360f9790aebSLuigi Rizzo b = x; /* record empty slot */ 361f9790aebSLuigi Rizzo } else if (x->bdg_namelen != namelen) { 362f9790aebSLuigi Rizzo continue; 363f9790aebSLuigi Rizzo } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 364f9790aebSLuigi Rizzo ND("found '%.*s' at %d", namelen, name, i); 365f9790aebSLuigi Rizzo b = x; 366f9790aebSLuigi Rizzo break; 367f9790aebSLuigi Rizzo } 368f9790aebSLuigi Rizzo } 369847bf383SLuigi Rizzo if (i == num_bridges && b) { /* name not found, can create entry */ 370f9790aebSLuigi Rizzo /* initialize the bridge */ 371f9790aebSLuigi Rizzo ND("create new bridge %s with ports %d", b->bdg_basename, 372f9790aebSLuigi Rizzo b->bdg_active_ports); 373*4f80b14cSVincenzo Maffione b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); 374*4f80b14cSVincenzo Maffione if (b->ht == NULL) { 375*4f80b14cSVincenzo Maffione D("failed to allocate hash table"); 376*4f80b14cSVincenzo Maffione return NULL; 377*4f80b14cSVincenzo Maffione } 378*4f80b14cSVincenzo Maffione strncpy(b->bdg_basename, name, namelen); 379f9790aebSLuigi Rizzo b->bdg_namelen = namelen; 380f9790aebSLuigi Rizzo b->bdg_active_ports = 0; 381f9790aebSLuigi Rizzo for (i = 0; i < NM_BDG_MAXPORTS; i++) 382f9790aebSLuigi Rizzo b->bdg_port_index[i] = i; 383f9790aebSLuigi Rizzo /* set the default function */ 3844bf50f18SLuigi Rizzo b->bdg_ops.lookup = netmap_bdg_learning; 385847bf383SLuigi Rizzo NM_BNS_GET(b); 386f9790aebSLuigi Rizzo } 387f9790aebSLuigi Rizzo return b; 388f9790aebSLuigi Rizzo } 389f9790aebSLuigi Rizzo 390f9790aebSLuigi Rizzo 391f9790aebSLuigi Rizzo /* 392f9790aebSLuigi Rizzo * Free the forwarding tables for rings attached to switch ports. 393f9790aebSLuigi Rizzo */ 394f9790aebSLuigi Rizzo static void 395f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na) 396f9790aebSLuigi Rizzo { 397f9790aebSLuigi Rizzo int nrings, i; 398f9790aebSLuigi Rizzo struct netmap_kring *kring; 399f9790aebSLuigi Rizzo 400f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 40117885a7bSLuigi Rizzo nrings = na->num_tx_rings; 40217885a7bSLuigi Rizzo kring = na->tx_rings; 403f9790aebSLuigi Rizzo for (i = 0; i < nrings; i++) { 404f9790aebSLuigi Rizzo if (kring[i].nkr_ft) { 405c3e9b4dbSLuiz Otavio O Souza nm_os_free(kring[i].nkr_ft); 406f9790aebSLuigi Rizzo kring[i].nkr_ft = NULL; /* protect from freeing twice */ 407f9790aebSLuigi Rizzo } 408f9790aebSLuigi Rizzo } 409f9790aebSLuigi Rizzo } 410f9790aebSLuigi Rizzo 411f9790aebSLuigi Rizzo 412f9790aebSLuigi Rizzo /* 413f9790aebSLuigi Rizzo * Allocate the forwarding tables for the rings attached to the bridge ports. 414f9790aebSLuigi Rizzo */ 415f9790aebSLuigi Rizzo static int 416f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na) 417f9790aebSLuigi Rizzo { 418f9790aebSLuigi Rizzo int nrings, l, i, num_dstq; 419f9790aebSLuigi Rizzo struct netmap_kring *kring; 420f9790aebSLuigi Rizzo 421f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 422f9790aebSLuigi Rizzo /* all port:rings + broadcast */ 423f9790aebSLuigi Rizzo num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 424f9790aebSLuigi Rizzo l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 425f9790aebSLuigi Rizzo l += sizeof(struct nm_bdg_q) * num_dstq; 426f9790aebSLuigi Rizzo l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 427f9790aebSLuigi Rizzo 428847bf383SLuigi Rizzo nrings = netmap_real_rings(na, NR_TX); 429f9790aebSLuigi Rizzo kring = na->tx_rings; 430f9790aebSLuigi Rizzo for (i = 0; i < nrings; i++) { 431f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft; 432f9790aebSLuigi Rizzo struct nm_bdg_q *dstq; 433f9790aebSLuigi Rizzo int j; 434f9790aebSLuigi Rizzo 435c3e9b4dbSLuiz Otavio O Souza ft = nm_os_malloc(l); 436f9790aebSLuigi Rizzo if (!ft) { 437f9790aebSLuigi Rizzo nm_free_bdgfwd(na); 438f9790aebSLuigi Rizzo return ENOMEM; 439f9790aebSLuigi Rizzo } 440f9790aebSLuigi Rizzo dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 441f9790aebSLuigi Rizzo for (j = 0; j < num_dstq; j++) { 442f9790aebSLuigi Rizzo dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 443f9790aebSLuigi Rizzo dstq[j].bq_len = 0; 444f9790aebSLuigi Rizzo } 445f9790aebSLuigi Rizzo kring[i].nkr_ft = ft; 446f9790aebSLuigi Rizzo } 447f9790aebSLuigi Rizzo return 0; 448f9790aebSLuigi Rizzo } 449f9790aebSLuigi Rizzo 450f9790aebSLuigi Rizzo 4514bf50f18SLuigi Rizzo /* remove from bridge b the ports in slots hw and sw 4524bf50f18SLuigi Rizzo * (sw can be -1 if not needed) 4534bf50f18SLuigi Rizzo */ 454f9790aebSLuigi Rizzo static void 455f9790aebSLuigi Rizzo netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 456f9790aebSLuigi Rizzo { 457f9790aebSLuigi Rizzo int s_hw = hw, s_sw = sw; 458f9790aebSLuigi Rizzo int i, lim =b->bdg_active_ports; 459f9790aebSLuigi Rizzo uint8_t tmp[NM_BDG_MAXPORTS]; 460f9790aebSLuigi Rizzo 461f9790aebSLuigi Rizzo /* 462f9790aebSLuigi Rizzo New algorithm: 463f9790aebSLuigi Rizzo make a copy of bdg_port_index; 464f9790aebSLuigi Rizzo lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 465f9790aebSLuigi Rizzo in the array of bdg_port_index, replacing them with 466f9790aebSLuigi Rizzo entries from the bottom of the array; 467f9790aebSLuigi Rizzo decrement bdg_active_ports; 468f9790aebSLuigi Rizzo acquire BDG_WLOCK() and copy back the array. 469f9790aebSLuigi Rizzo */ 470f9790aebSLuigi Rizzo 471f0ea3689SLuigi Rizzo if (netmap_verbose) 472f9790aebSLuigi Rizzo D("detach %d and %d (lim %d)", hw, sw, lim); 473f9790aebSLuigi Rizzo /* make a copy of the list of active ports, update it, 474f9790aebSLuigi Rizzo * and then copy back within BDG_WLOCK(). 475f9790aebSLuigi Rizzo */ 476f9790aebSLuigi Rizzo memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 477f9790aebSLuigi Rizzo for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 478f9790aebSLuigi Rizzo if (hw >= 0 && tmp[i] == hw) { 479f9790aebSLuigi Rizzo ND("detach hw %d at %d", hw, i); 480f9790aebSLuigi Rizzo lim--; /* point to last active port */ 481f9790aebSLuigi Rizzo tmp[i] = tmp[lim]; /* swap with i */ 482f9790aebSLuigi Rizzo tmp[lim] = hw; /* now this is inactive */ 483f9790aebSLuigi Rizzo hw = -1; 484f9790aebSLuigi Rizzo } else if (sw >= 0 && tmp[i] == sw) { 485f9790aebSLuigi Rizzo ND("detach sw %d at %d", sw, i); 486f9790aebSLuigi Rizzo lim--; 487f9790aebSLuigi Rizzo tmp[i] = tmp[lim]; 488f9790aebSLuigi Rizzo tmp[lim] = sw; 489f9790aebSLuigi Rizzo sw = -1; 490f9790aebSLuigi Rizzo } else { 491f9790aebSLuigi Rizzo i++; 492f9790aebSLuigi Rizzo } 493f9790aebSLuigi Rizzo } 494f9790aebSLuigi Rizzo if (hw >= 0 || sw >= 0) { 495f9790aebSLuigi Rizzo D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 496f9790aebSLuigi Rizzo } 497f9790aebSLuigi Rizzo 498f9790aebSLuigi Rizzo BDG_WLOCK(b); 4994bf50f18SLuigi Rizzo if (b->bdg_ops.dtor) 5004bf50f18SLuigi Rizzo b->bdg_ops.dtor(b->bdg_ports[s_hw]); 501f9790aebSLuigi Rizzo b->bdg_ports[s_hw] = NULL; 502f9790aebSLuigi Rizzo if (s_sw >= 0) { 503f9790aebSLuigi Rizzo b->bdg_ports[s_sw] = NULL; 504f9790aebSLuigi Rizzo } 505f9790aebSLuigi Rizzo memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 506f9790aebSLuigi Rizzo b->bdg_active_ports = lim; 507f9790aebSLuigi Rizzo BDG_WUNLOCK(b); 508f9790aebSLuigi Rizzo 509f9790aebSLuigi Rizzo ND("now %d active ports", lim); 510f9790aebSLuigi Rizzo if (lim == 0) { 511f9790aebSLuigi Rizzo ND("marking bridge %s as free", b->bdg_basename); 512*4f80b14cSVincenzo Maffione nm_os_free(b->ht); 5134bf50f18SLuigi Rizzo bzero(&b->bdg_ops, sizeof(b->bdg_ops)); 514847bf383SLuigi Rizzo NM_BNS_PUT(b); 515f9790aebSLuigi Rizzo } 516f9790aebSLuigi Rizzo } 517f9790aebSLuigi Rizzo 5184bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for VALE ports */ 5194bf50f18SLuigi Rizzo static int 5204bf50f18SLuigi Rizzo netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 521f9790aebSLuigi Rizzo { 522f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 523f9790aebSLuigi Rizzo struct nm_bridge *b = vpna->na_bdg; 524f9790aebSLuigi Rizzo 52537e3a6d3SLuigi Rizzo (void)nmr; // XXX merge ? 5264bf50f18SLuigi Rizzo if (attach) 5274bf50f18SLuigi Rizzo return 0; /* nothing to do */ 5284bf50f18SLuigi Rizzo if (b) { 5294bf50f18SLuigi Rizzo netmap_set_all_rings(na, 0 /* disable */); 5304bf50f18SLuigi Rizzo netmap_bdg_detach_common(b, vpna->bdg_port, -1); 5314bf50f18SLuigi Rizzo vpna->na_bdg = NULL; 5324bf50f18SLuigi Rizzo netmap_set_all_rings(na, 1 /* enable */); 5334bf50f18SLuigi Rizzo } 5344bf50f18SLuigi Rizzo /* I have took reference just for attach */ 5354bf50f18SLuigi Rizzo netmap_adapter_put(na); 5364bf50f18SLuigi Rizzo return 0; 5374bf50f18SLuigi Rizzo } 5384bf50f18SLuigi Rizzo 5394bf50f18SLuigi Rizzo /* nm_dtor callback for ephemeral VALE ports */ 5404bf50f18SLuigi Rizzo static void 5414bf50f18SLuigi Rizzo netmap_vp_dtor(struct netmap_adapter *na) 5424bf50f18SLuigi Rizzo { 5434bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 5444bf50f18SLuigi Rizzo struct nm_bridge *b = vpna->na_bdg; 5454bf50f18SLuigi Rizzo 5464bf50f18SLuigi Rizzo ND("%s has %d references", na->name, na->na_refcount); 547f9790aebSLuigi Rizzo 548f9790aebSLuigi Rizzo if (b) { 549f9790aebSLuigi Rizzo netmap_bdg_detach_common(b, vpna->bdg_port, -1); 550f9790aebSLuigi Rizzo } 551c3e9b4dbSLuiz Otavio O Souza 552*4f80b14cSVincenzo Maffione if (na->ifp != NULL && !nm_iszombie(na)) { 553*4f80b14cSVincenzo Maffione WNA(na->ifp) = NULL; 554*4f80b14cSVincenzo Maffione if (vpna->autodelete) { 555c3e9b4dbSLuiz Otavio O Souza ND("releasing %s", na->ifp->if_xname); 556c3e9b4dbSLuiz Otavio O Souza NMG_UNLOCK(); 557c3e9b4dbSLuiz Otavio O Souza nm_os_vi_detach(na->ifp); 558c3e9b4dbSLuiz Otavio O Souza NMG_LOCK(); 559c3e9b4dbSLuiz Otavio O Souza } 560f9790aebSLuigi Rizzo } 561*4f80b14cSVincenzo Maffione } 562f9790aebSLuigi Rizzo 5634bf50f18SLuigi Rizzo /* remove a persistent VALE port from the system */ 5644bf50f18SLuigi Rizzo static int 5654bf50f18SLuigi Rizzo nm_vi_destroy(const char *name) 5664bf50f18SLuigi Rizzo { 5674bf50f18SLuigi Rizzo struct ifnet *ifp; 568c3e9b4dbSLuiz Otavio O Souza struct netmap_vp_adapter *vpna; 5694bf50f18SLuigi Rizzo int error; 5704bf50f18SLuigi Rizzo 5714bf50f18SLuigi Rizzo ifp = ifunit_ref(name); 5724bf50f18SLuigi Rizzo if (!ifp) 5734bf50f18SLuigi Rizzo return ENXIO; 5744bf50f18SLuigi Rizzo NMG_LOCK(); 5754bf50f18SLuigi Rizzo /* make sure this is actually a VALE port */ 57637e3a6d3SLuigi Rizzo if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) { 5774bf50f18SLuigi Rizzo error = EINVAL; 5784bf50f18SLuigi Rizzo goto err; 5794bf50f18SLuigi Rizzo } 5804bf50f18SLuigi Rizzo 581c3e9b4dbSLuiz Otavio O Souza vpna = (struct netmap_vp_adapter *)NA(ifp); 582c3e9b4dbSLuiz Otavio O Souza 583c3e9b4dbSLuiz Otavio O Souza /* we can only destroy ports that were created via NETMAP_BDG_NEWIF */ 584c3e9b4dbSLuiz Otavio O Souza if (vpna->autodelete) { 585c3e9b4dbSLuiz Otavio O Souza error = EINVAL; 586c3e9b4dbSLuiz Otavio O Souza goto err; 587c3e9b4dbSLuiz Otavio O Souza } 588c3e9b4dbSLuiz Otavio O Souza 589c3e9b4dbSLuiz Otavio O Souza /* also make sure that nobody is using the inferface */ 590c3e9b4dbSLuiz Otavio O Souza if (NETMAP_OWNED_BY_ANY(&vpna->up) || 591c3e9b4dbSLuiz Otavio O Souza vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) { 5924bf50f18SLuigi Rizzo error = EBUSY; 5934bf50f18SLuigi Rizzo goto err; 5944bf50f18SLuigi Rizzo } 595c3e9b4dbSLuiz Otavio O Souza 5964bf50f18SLuigi Rizzo NMG_UNLOCK(); 5974bf50f18SLuigi Rizzo 5984bf50f18SLuigi Rizzo D("destroying a persistent vale interface %s", ifp->if_xname); 5994bf50f18SLuigi Rizzo /* Linux requires all the references are released 6004bf50f18SLuigi Rizzo * before unregister 6014bf50f18SLuigi Rizzo */ 6024bf50f18SLuigi Rizzo netmap_detach(ifp); 603c3e9b4dbSLuiz Otavio O Souza if_rele(ifp); 60437e3a6d3SLuigi Rizzo nm_os_vi_detach(ifp); 6054bf50f18SLuigi Rizzo return 0; 6064bf50f18SLuigi Rizzo 6074bf50f18SLuigi Rizzo err: 6084bf50f18SLuigi Rizzo NMG_UNLOCK(); 6094bf50f18SLuigi Rizzo if_rele(ifp); 6104bf50f18SLuigi Rizzo return error; 6114bf50f18SLuigi Rizzo } 6124bf50f18SLuigi Rizzo 613c3e9b4dbSLuiz Otavio O Souza static int 614c3e9b4dbSLuiz Otavio O Souza nm_update_info(struct nmreq *nmr, struct netmap_adapter *na) 615c3e9b4dbSLuiz Otavio O Souza { 616*4f80b14cSVincenzo Maffione uint64_t memsize; 617*4f80b14cSVincenzo Maffione int ret; 618c3e9b4dbSLuiz Otavio O Souza nmr->nr_rx_rings = na->num_rx_rings; 619c3e9b4dbSLuiz Otavio O Souza nmr->nr_tx_rings = na->num_tx_rings; 620c3e9b4dbSLuiz Otavio O Souza nmr->nr_rx_slots = na->num_rx_desc; 621c3e9b4dbSLuiz Otavio O Souza nmr->nr_tx_slots = na->num_tx_desc; 622*4f80b14cSVincenzo Maffione ret = netmap_mem_get_info(na->nm_mem, &memsize, NULL, &nmr->nr_arg2); 623*4f80b14cSVincenzo Maffione nmr->nr_memsize = (uint32_t)memsize; 624*4f80b14cSVincenzo Maffione return ret; 625c3e9b4dbSLuiz Otavio O Souza } 626c3e9b4dbSLuiz Otavio O Souza 6274bf50f18SLuigi Rizzo /* 6284bf50f18SLuigi Rizzo * Create a virtual interface registered to the system. 6294bf50f18SLuigi Rizzo * The interface will be attached to a bridge later. 6304bf50f18SLuigi Rizzo */ 631c3e9b4dbSLuiz Otavio O Souza int 632c3e9b4dbSLuiz Otavio O Souza netmap_vi_create(struct nmreq *nmr, int autodelete) 6334bf50f18SLuigi Rizzo { 6344bf50f18SLuigi Rizzo struct ifnet *ifp; 6354bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna; 636c3e9b4dbSLuiz Otavio O Souza struct netmap_mem_d *nmd = NULL; 6374bf50f18SLuigi Rizzo int error; 6384bf50f18SLuigi Rizzo 6394bf50f18SLuigi Rizzo /* don't include VALE prefix */ 64037e3a6d3SLuigi Rizzo if (!strncmp(nmr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME))) 6414bf50f18SLuigi Rizzo return EINVAL; 6424bf50f18SLuigi Rizzo ifp = ifunit_ref(nmr->nr_name); 6434bf50f18SLuigi Rizzo if (ifp) { /* already exist, cannot create new one */ 644c3e9b4dbSLuiz Otavio O Souza error = EEXIST; 645c3e9b4dbSLuiz Otavio O Souza NMG_LOCK(); 646c3e9b4dbSLuiz Otavio O Souza if (NM_NA_VALID(ifp)) { 647c3e9b4dbSLuiz Otavio O Souza int update_err = nm_update_info(nmr, NA(ifp)); 648c3e9b4dbSLuiz Otavio O Souza if (update_err) 649c3e9b4dbSLuiz Otavio O Souza error = update_err; 650c3e9b4dbSLuiz Otavio O Souza } 651c3e9b4dbSLuiz Otavio O Souza NMG_UNLOCK(); 6524bf50f18SLuigi Rizzo if_rele(ifp); 653c3e9b4dbSLuiz Otavio O Souza return error; 6544bf50f18SLuigi Rizzo } 65537e3a6d3SLuigi Rizzo error = nm_os_vi_persist(nmr->nr_name, &ifp); 6564bf50f18SLuigi Rizzo if (error) 6574bf50f18SLuigi Rizzo return error; 6584bf50f18SLuigi Rizzo 6594bf50f18SLuigi Rizzo NMG_LOCK(); 660c3e9b4dbSLuiz Otavio O Souza if (nmr->nr_arg2) { 661c3e9b4dbSLuiz Otavio O Souza nmd = netmap_mem_find(nmr->nr_arg2); 662c3e9b4dbSLuiz Otavio O Souza if (nmd == NULL) { 663c3e9b4dbSLuiz Otavio O Souza error = EINVAL; 664c3e9b4dbSLuiz Otavio O Souza goto err_1; 665c3e9b4dbSLuiz Otavio O Souza } 666c3e9b4dbSLuiz Otavio O Souza } 6674bf50f18SLuigi Rizzo /* netmap_vp_create creates a struct netmap_vp_adapter */ 668c3e9b4dbSLuiz Otavio O Souza error = netmap_vp_create(nmr, ifp, nmd, &vpna); 6694bf50f18SLuigi Rizzo if (error) { 6704bf50f18SLuigi Rizzo D("error %d", error); 671c3e9b4dbSLuiz Otavio O Souza goto err_1; 6724bf50f18SLuigi Rizzo } 6734bf50f18SLuigi Rizzo /* persist-specific routines */ 6744bf50f18SLuigi Rizzo vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl; 675c3e9b4dbSLuiz Otavio O Souza if (!autodelete) { 6764bf50f18SLuigi Rizzo netmap_adapter_get(&vpna->up); 677c3e9b4dbSLuiz Otavio O Souza } else { 678c3e9b4dbSLuiz Otavio O Souza vpna->autodelete = 1; 679c3e9b4dbSLuiz Otavio O Souza } 68037e3a6d3SLuigi Rizzo NM_ATTACH_NA(ifp, &vpna->up); 681c3e9b4dbSLuiz Otavio O Souza /* return the updated info */ 682c3e9b4dbSLuiz Otavio O Souza error = nm_update_info(nmr, &vpna->up); 683c3e9b4dbSLuiz Otavio O Souza if (error) { 684c3e9b4dbSLuiz Otavio O Souza goto err_2; 685c3e9b4dbSLuiz Otavio O Souza } 686c3e9b4dbSLuiz Otavio O Souza D("returning nr_arg2 %d", nmr->nr_arg2); 687c3e9b4dbSLuiz Otavio O Souza if (nmd) 688c3e9b4dbSLuiz Otavio O Souza netmap_mem_put(nmd); 6894bf50f18SLuigi Rizzo NMG_UNLOCK(); 6904bf50f18SLuigi Rizzo D("created %s", ifp->if_xname); 6914bf50f18SLuigi Rizzo return 0; 692c3e9b4dbSLuiz Otavio O Souza 693c3e9b4dbSLuiz Otavio O Souza err_2: 694c3e9b4dbSLuiz Otavio O Souza netmap_detach(ifp); 695c3e9b4dbSLuiz Otavio O Souza err_1: 696c3e9b4dbSLuiz Otavio O Souza if (nmd) 697c3e9b4dbSLuiz Otavio O Souza netmap_mem_put(nmd); 698c3e9b4dbSLuiz Otavio O Souza NMG_UNLOCK(); 699c3e9b4dbSLuiz Otavio O Souza nm_os_vi_detach(ifp); 700c3e9b4dbSLuiz Otavio O Souza 701c3e9b4dbSLuiz Otavio O Souza return error; 7024bf50f18SLuigi Rizzo } 70317885a7bSLuigi Rizzo 70417885a7bSLuigi Rizzo /* Try to get a reference to a netmap adapter attached to a VALE switch. 70517885a7bSLuigi Rizzo * If the adapter is found (or is created), this function returns 0, a 70617885a7bSLuigi Rizzo * non NULL pointer is returned into *na, and the caller holds a 70717885a7bSLuigi Rizzo * reference to the adapter. 70817885a7bSLuigi Rizzo * If an adapter is not found, then no reference is grabbed and the 70917885a7bSLuigi Rizzo * function returns an error code, or 0 if there is just a VALE prefix 71017885a7bSLuigi Rizzo * mismatch. Therefore the caller holds a reference when 71117885a7bSLuigi Rizzo * (*na != NULL && return == 0). 71217885a7bSLuigi Rizzo */ 713f9790aebSLuigi Rizzo int 714c3e9b4dbSLuiz Otavio O Souza netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, 715c3e9b4dbSLuiz Otavio O Souza struct netmap_mem_d *nmd, int create) 716f9790aebSLuigi Rizzo { 7174bf50f18SLuigi Rizzo char *nr_name = nmr->nr_name; 7184bf50f18SLuigi Rizzo const char *ifname; 719c3e9b4dbSLuiz Otavio O Souza struct ifnet *ifp = NULL; 720f9790aebSLuigi Rizzo int error = 0; 7214bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna, *hostna = NULL; 722f9790aebSLuigi Rizzo struct nm_bridge *b; 723f9790aebSLuigi Rizzo int i, j, cand = -1, cand2 = -1; 724f9790aebSLuigi Rizzo int needed; 725f9790aebSLuigi Rizzo 726f9790aebSLuigi Rizzo *na = NULL; /* default return value */ 727f9790aebSLuigi Rizzo 728f9790aebSLuigi Rizzo /* first try to see if this is a bridge port. */ 729f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 73037e3a6d3SLuigi Rizzo if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) { 731f9790aebSLuigi Rizzo return 0; /* no error, but no VALE prefix */ 732f9790aebSLuigi Rizzo } 733f9790aebSLuigi Rizzo 7344bf50f18SLuigi Rizzo b = nm_find_bridge(nr_name, create); 735f9790aebSLuigi Rizzo if (b == NULL) { 7364bf50f18SLuigi Rizzo D("no bridges available for '%s'", nr_name); 737f2637526SLuigi Rizzo return (create ? ENOMEM : ENXIO); 738f9790aebSLuigi Rizzo } 7394bf50f18SLuigi Rizzo if (strlen(nr_name) < b->bdg_namelen) /* impossible */ 7404bf50f18SLuigi Rizzo panic("x"); 741f9790aebSLuigi Rizzo 742f9790aebSLuigi Rizzo /* Now we are sure that name starts with the bridge's name, 743f9790aebSLuigi Rizzo * lookup the port in the bridge. We need to scan the entire 744f9790aebSLuigi Rizzo * list. It is not important to hold a WLOCK on the bridge 745f9790aebSLuigi Rizzo * during the search because NMG_LOCK already guarantees 746f9790aebSLuigi Rizzo * that there are no other possible writers. 747f9790aebSLuigi Rizzo */ 748f9790aebSLuigi Rizzo 749f9790aebSLuigi Rizzo /* lookup in the local list of ports */ 750f9790aebSLuigi Rizzo for (j = 0; j < b->bdg_active_ports; j++) { 751f9790aebSLuigi Rizzo i = b->bdg_port_index[j]; 752f9790aebSLuigi Rizzo vpna = b->bdg_ports[i]; 753847bf383SLuigi Rizzo ND("checking %s", vpna->up.name); 7544bf50f18SLuigi Rizzo if (!strcmp(vpna->up.name, nr_name)) { 755f9790aebSLuigi Rizzo netmap_adapter_get(&vpna->up); 7564bf50f18SLuigi Rizzo ND("found existing if %s refs %d", nr_name) 7574bf50f18SLuigi Rizzo *na = &vpna->up; 758f9790aebSLuigi Rizzo return 0; 759f9790aebSLuigi Rizzo } 760f9790aebSLuigi Rizzo } 761f9790aebSLuigi Rizzo /* not found, should we create it? */ 762f9790aebSLuigi Rizzo if (!create) 763f9790aebSLuigi Rizzo return ENXIO; 764f9790aebSLuigi Rizzo /* yes we should, see if we have space to attach entries */ 765f9790aebSLuigi Rizzo needed = 2; /* in some cases we only need 1 */ 766f9790aebSLuigi Rizzo if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 767f9790aebSLuigi Rizzo D("bridge full %d, cannot create new port", b->bdg_active_ports); 768f2637526SLuigi Rizzo return ENOMEM; 769f9790aebSLuigi Rizzo } 770f9790aebSLuigi Rizzo /* record the next two ports available, but do not allocate yet */ 771f9790aebSLuigi Rizzo cand = b->bdg_port_index[b->bdg_active_ports]; 772f9790aebSLuigi Rizzo cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 773f9790aebSLuigi Rizzo ND("+++ bridge %s port %s used %d avail %d %d", 7744bf50f18SLuigi Rizzo b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); 775f9790aebSLuigi Rizzo 776f9790aebSLuigi Rizzo /* 777f9790aebSLuigi Rizzo * try see if there is a matching NIC with this name 778f9790aebSLuigi Rizzo * (after the bridge's name) 779f9790aebSLuigi Rizzo */ 7804bf50f18SLuigi Rizzo ifname = nr_name + b->bdg_namelen + 1; 7814bf50f18SLuigi Rizzo ifp = ifunit_ref(ifname); 7824bf50f18SLuigi Rizzo if (!ifp) { 7834bf50f18SLuigi Rizzo /* Create an ephemeral virtual port 7844bf50f18SLuigi Rizzo * This block contains all the ephemeral-specific logics 7854bf50f18SLuigi Rizzo */ 786f9790aebSLuigi Rizzo if (nmr->nr_cmd) { 787f9790aebSLuigi Rizzo /* nr_cmd must be 0 for a virtual port */ 788c3e9b4dbSLuiz Otavio O Souza error = EINVAL; 789c3e9b4dbSLuiz Otavio O Souza goto out; 790f9790aebSLuigi Rizzo } 791f9790aebSLuigi Rizzo 792f9790aebSLuigi Rizzo /* bdg_netmap_attach creates a struct netmap_adapter */ 793c3e9b4dbSLuiz Otavio O Souza error = netmap_vp_create(nmr, NULL, nmd, &vpna); 794f9790aebSLuigi Rizzo if (error) { 795f9790aebSLuigi Rizzo D("error %d", error); 796c3e9b4dbSLuiz Otavio O Souza goto out; 797f9790aebSLuigi Rizzo } 7984bf50f18SLuigi Rizzo /* shortcut - we can skip get_hw_na(), 7994bf50f18SLuigi Rizzo * ownership check and nm_bdg_attach() 8004bf50f18SLuigi Rizzo */ 8014bf50f18SLuigi Rizzo } else { 8024bf50f18SLuigi Rizzo struct netmap_adapter *hw; 803f9790aebSLuigi Rizzo 804*4f80b14cSVincenzo Maffione /* the vale:nic syntax is only valid for some commands */ 805*4f80b14cSVincenzo Maffione switch (nmr->nr_cmd) { 806*4f80b14cSVincenzo Maffione case NETMAP_BDG_ATTACH: 807*4f80b14cSVincenzo Maffione case NETMAP_BDG_DETACH: 808*4f80b14cSVincenzo Maffione case NETMAP_BDG_POLLING_ON: 809*4f80b14cSVincenzo Maffione case NETMAP_BDG_POLLING_OFF: 810*4f80b14cSVincenzo Maffione break; /* ok */ 811*4f80b14cSVincenzo Maffione default: 812*4f80b14cSVincenzo Maffione error = EINVAL; 813*4f80b14cSVincenzo Maffione goto out; 814*4f80b14cSVincenzo Maffione } 815*4f80b14cSVincenzo Maffione 816c3e9b4dbSLuiz Otavio O Souza error = netmap_get_hw_na(ifp, nmd, &hw); 8174bf50f18SLuigi Rizzo if (error || hw == NULL) 818f9790aebSLuigi Rizzo goto out; 819f9790aebSLuigi Rizzo 8204bf50f18SLuigi Rizzo /* host adapter might not be created */ 8214bf50f18SLuigi Rizzo error = hw->nm_bdg_attach(nr_name, hw); 8224bf50f18SLuigi Rizzo if (error) 823f9790aebSLuigi Rizzo goto out; 8244bf50f18SLuigi Rizzo vpna = hw->na_vp; 8254bf50f18SLuigi Rizzo hostna = hw->na_hostvp; 8264bf50f18SLuigi Rizzo if (nmr->nr_arg1 != NETMAP_BDG_HOST) 8274bf50f18SLuigi Rizzo hostna = NULL; 828f9790aebSLuigi Rizzo } 829f9790aebSLuigi Rizzo 830f9790aebSLuigi Rizzo BDG_WLOCK(b); 831f9790aebSLuigi Rizzo vpna->bdg_port = cand; 832f9790aebSLuigi Rizzo ND("NIC %p to bridge port %d", vpna, cand); 833f9790aebSLuigi Rizzo /* bind the port to the bridge (virtual ports are not active) */ 834f9790aebSLuigi Rizzo b->bdg_ports[cand] = vpna; 835f9790aebSLuigi Rizzo vpna->na_bdg = b; 836f9790aebSLuigi Rizzo b->bdg_active_ports++; 8374bf50f18SLuigi Rizzo if (hostna != NULL) { 838f9790aebSLuigi Rizzo /* also bind the host stack to the bridge */ 839f9790aebSLuigi Rizzo b->bdg_ports[cand2] = hostna; 840f9790aebSLuigi Rizzo hostna->bdg_port = cand2; 841f9790aebSLuigi Rizzo hostna->na_bdg = b; 842f9790aebSLuigi Rizzo b->bdg_active_ports++; 843f9790aebSLuigi Rizzo ND("host %p to bridge port %d", hostna, cand2); 844f9790aebSLuigi Rizzo } 8454bf50f18SLuigi Rizzo ND("if %s refs %d", ifname, vpna->up.na_refcount); 846f9790aebSLuigi Rizzo BDG_WUNLOCK(b); 8474bf50f18SLuigi Rizzo *na = &vpna->up; 8484bf50f18SLuigi Rizzo netmap_adapter_get(*na); 849f9790aebSLuigi Rizzo 850f9790aebSLuigi Rizzo out: 851c3e9b4dbSLuiz Otavio O Souza if (ifp) 852f9790aebSLuigi Rizzo if_rele(ifp); 853f9790aebSLuigi Rizzo 854f9790aebSLuigi Rizzo return error; 855f9790aebSLuigi Rizzo } 856f9790aebSLuigi Rizzo 857f9790aebSLuigi Rizzo 8584bf50f18SLuigi Rizzo /* Process NETMAP_BDG_ATTACH */ 859f9790aebSLuigi Rizzo static int 8604bf50f18SLuigi Rizzo nm_bdg_ctl_attach(struct nmreq *nmr) 861f9790aebSLuigi Rizzo { 862f9790aebSLuigi Rizzo struct netmap_adapter *na; 863c3e9b4dbSLuiz Otavio O Souza struct netmap_mem_d *nmd = NULL; 864f9790aebSLuigi Rizzo int error; 865f9790aebSLuigi Rizzo 866f9790aebSLuigi Rizzo NMG_LOCK(); 867f2637526SLuigi Rizzo 868c3e9b4dbSLuiz Otavio O Souza if (nmr->nr_arg2) { 869c3e9b4dbSLuiz Otavio O Souza nmd = netmap_mem_find(nmr->nr_arg2); 870c3e9b4dbSLuiz Otavio O Souza if (nmd == NULL) { 871c3e9b4dbSLuiz Otavio O Souza error = EINVAL; 872c3e9b4dbSLuiz Otavio O Souza goto unlock_exit; 873c3e9b4dbSLuiz Otavio O Souza } 874c3e9b4dbSLuiz Otavio O Souza } 875c3e9b4dbSLuiz Otavio O Souza 876*4f80b14cSVincenzo Maffione /* XXX check existing one */ 877*4f80b14cSVincenzo Maffione error = netmap_get_bdg_na(nmr, &na, nmd, 0); 878*4f80b14cSVincenzo Maffione if (!error) { 879*4f80b14cSVincenzo Maffione error = EBUSY; 880*4f80b14cSVincenzo Maffione goto unref_exit; 881*4f80b14cSVincenzo Maffione } 882c3e9b4dbSLuiz Otavio O Souza error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */); 8834bf50f18SLuigi Rizzo if (error) /* no device */ 884f9790aebSLuigi Rizzo goto unlock_exit; 885f2637526SLuigi Rizzo 88617885a7bSLuigi Rizzo if (na == NULL) { /* VALE prefix missing */ 887f9790aebSLuigi Rizzo error = EINVAL; 88817885a7bSLuigi Rizzo goto unlock_exit; 889f9790aebSLuigi Rizzo } 890f9790aebSLuigi Rizzo 8914bf50f18SLuigi Rizzo if (NETMAP_OWNED_BY_ANY(na)) { 892f9790aebSLuigi Rizzo error = EBUSY; 893f9790aebSLuigi Rizzo goto unref_exit; 894f9790aebSLuigi Rizzo } 895f9790aebSLuigi Rizzo 8964bf50f18SLuigi Rizzo if (na->nm_bdg_ctl) { 8974bf50f18SLuigi Rizzo /* nop for VALE ports. The bwrap needs to put the hwna 8984bf50f18SLuigi Rizzo * in netmap mode (see netmap_bwrap_bdg_ctl) 8994bf50f18SLuigi Rizzo */ 9004bf50f18SLuigi Rizzo error = na->nm_bdg_ctl(na, nmr, 1); 9014bf50f18SLuigi Rizzo if (error) 902f9790aebSLuigi Rizzo goto unref_exit; 9034bf50f18SLuigi Rizzo ND("registered %s to netmap-mode", na->name); 904f9790aebSLuigi Rizzo } 905f9790aebSLuigi Rizzo NMG_UNLOCK(); 906f9790aebSLuigi Rizzo return 0; 907f9790aebSLuigi Rizzo 908f9790aebSLuigi Rizzo unref_exit: 909f9790aebSLuigi Rizzo netmap_adapter_put(na); 910f9790aebSLuigi Rizzo unlock_exit: 911f9790aebSLuigi Rizzo NMG_UNLOCK(); 912f9790aebSLuigi Rizzo return error; 913f9790aebSLuigi Rizzo } 914f9790aebSLuigi Rizzo 91537e3a6d3SLuigi Rizzo static inline int 91637e3a6d3SLuigi Rizzo nm_is_bwrap(struct netmap_adapter *na) 91737e3a6d3SLuigi Rizzo { 91837e3a6d3SLuigi Rizzo return na->nm_register == netmap_bwrap_reg; 91937e3a6d3SLuigi Rizzo } 92017885a7bSLuigi Rizzo 9214bf50f18SLuigi Rizzo /* process NETMAP_BDG_DETACH */ 922f9790aebSLuigi Rizzo static int 9234bf50f18SLuigi Rizzo nm_bdg_ctl_detach(struct nmreq *nmr) 924f9790aebSLuigi Rizzo { 925f9790aebSLuigi Rizzo struct netmap_adapter *na; 926f9790aebSLuigi Rizzo int error; 927f9790aebSLuigi Rizzo 928f9790aebSLuigi Rizzo NMG_LOCK(); 929c3e9b4dbSLuiz Otavio O Souza error = netmap_get_bdg_na(nmr, &na, NULL, 0 /* don't create */); 930f9790aebSLuigi Rizzo if (error) { /* no device, or another bridge or user owns the device */ 931f9790aebSLuigi Rizzo goto unlock_exit; 932f9790aebSLuigi Rizzo } 933f2637526SLuigi Rizzo 93417885a7bSLuigi Rizzo if (na == NULL) { /* VALE prefix missing */ 935f9790aebSLuigi Rizzo error = EINVAL; 93617885a7bSLuigi Rizzo goto unlock_exit; 93737e3a6d3SLuigi Rizzo } else if (nm_is_bwrap(na) && 93837e3a6d3SLuigi Rizzo ((struct netmap_bwrap_adapter *)na)->na_polling_state) { 93937e3a6d3SLuigi Rizzo /* Don't detach a NIC with polling */ 94037e3a6d3SLuigi Rizzo error = EBUSY; 94137e3a6d3SLuigi Rizzo netmap_adapter_put(na); 94237e3a6d3SLuigi Rizzo goto unlock_exit; 943f9790aebSLuigi Rizzo } 9444bf50f18SLuigi Rizzo if (na->nm_bdg_ctl) { 9454bf50f18SLuigi Rizzo /* remove the port from bridge. The bwrap 9464bf50f18SLuigi Rizzo * also needs to put the hwna in normal mode 9474bf50f18SLuigi Rizzo */ 9484bf50f18SLuigi Rizzo error = na->nm_bdg_ctl(na, nmr, 0); 949f9790aebSLuigi Rizzo } 950f9790aebSLuigi Rizzo 951f9790aebSLuigi Rizzo netmap_adapter_put(na); 952f9790aebSLuigi Rizzo unlock_exit: 953f9790aebSLuigi Rizzo NMG_UNLOCK(); 954f9790aebSLuigi Rizzo return error; 955f9790aebSLuigi Rizzo 956f9790aebSLuigi Rizzo } 957f9790aebSLuigi Rizzo 95837e3a6d3SLuigi Rizzo struct nm_bdg_polling_state; 95937e3a6d3SLuigi Rizzo struct 96037e3a6d3SLuigi Rizzo nm_bdg_kthread { 961c3e9b4dbSLuiz Otavio O Souza struct nm_kctx *nmk; 96237e3a6d3SLuigi Rizzo u_int qfirst; 96337e3a6d3SLuigi Rizzo u_int qlast; 96437e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps; 96537e3a6d3SLuigi Rizzo }; 96637e3a6d3SLuigi Rizzo 96737e3a6d3SLuigi Rizzo struct nm_bdg_polling_state { 96837e3a6d3SLuigi Rizzo bool configured; 96937e3a6d3SLuigi Rizzo bool stopped; 97037e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna; 97137e3a6d3SLuigi Rizzo u_int reg; 97237e3a6d3SLuigi Rizzo u_int qfirst; 97337e3a6d3SLuigi Rizzo u_int qlast; 97437e3a6d3SLuigi Rizzo u_int cpu_from; 97537e3a6d3SLuigi Rizzo u_int ncpus; 97637e3a6d3SLuigi Rizzo struct nm_bdg_kthread *kthreads; 97737e3a6d3SLuigi Rizzo }; 97837e3a6d3SLuigi Rizzo 97937e3a6d3SLuigi Rizzo static void 980c3e9b4dbSLuiz Otavio O Souza netmap_bwrap_polling(void *data, int is_kthread) 98137e3a6d3SLuigi Rizzo { 98237e3a6d3SLuigi Rizzo struct nm_bdg_kthread *nbk = data; 98337e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna; 98437e3a6d3SLuigi Rizzo u_int qfirst, qlast, i; 98537e3a6d3SLuigi Rizzo struct netmap_kring *kring0, *kring; 98637e3a6d3SLuigi Rizzo 98737e3a6d3SLuigi Rizzo if (!nbk) 98837e3a6d3SLuigi Rizzo return; 98937e3a6d3SLuigi Rizzo qfirst = nbk->qfirst; 99037e3a6d3SLuigi Rizzo qlast = nbk->qlast; 99137e3a6d3SLuigi Rizzo bna = nbk->bps->bna; 99237e3a6d3SLuigi Rizzo kring0 = NMR(bna->hwna, NR_RX); 99337e3a6d3SLuigi Rizzo 99437e3a6d3SLuigi Rizzo for (i = qfirst; i < qlast; i++) { 99537e3a6d3SLuigi Rizzo kring = kring0 + i; 99637e3a6d3SLuigi Rizzo kring->nm_notify(kring, 0); 99737e3a6d3SLuigi Rizzo } 99837e3a6d3SLuigi Rizzo } 99937e3a6d3SLuigi Rizzo 100037e3a6d3SLuigi Rizzo static int 100137e3a6d3SLuigi Rizzo nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) 100237e3a6d3SLuigi Rizzo { 1003c3e9b4dbSLuiz Otavio O Souza struct nm_kctx_cfg kcfg; 100437e3a6d3SLuigi Rizzo int i, j; 100537e3a6d3SLuigi Rizzo 1006c3e9b4dbSLuiz Otavio O Souza bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus); 100737e3a6d3SLuigi Rizzo if (bps->kthreads == NULL) 100837e3a6d3SLuigi Rizzo return ENOMEM; 100937e3a6d3SLuigi Rizzo 101037e3a6d3SLuigi Rizzo bzero(&kcfg, sizeof(kcfg)); 101137e3a6d3SLuigi Rizzo kcfg.worker_fn = netmap_bwrap_polling; 1012c3e9b4dbSLuiz Otavio O Souza kcfg.use_kthread = 1; 101337e3a6d3SLuigi Rizzo for (i = 0; i < bps->ncpus; i++) { 101437e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 101537e3a6d3SLuigi Rizzo int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC); 101637e3a6d3SLuigi Rizzo int affinity = bps->cpu_from + i; 101737e3a6d3SLuigi Rizzo 101837e3a6d3SLuigi Rizzo t->bps = bps; 101937e3a6d3SLuigi Rizzo t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; 102037e3a6d3SLuigi Rizzo t->qlast = all ? bps->qlast : t->qfirst + 1; 102137e3a6d3SLuigi Rizzo D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, 102237e3a6d3SLuigi Rizzo t->qlast); 102337e3a6d3SLuigi Rizzo 102437e3a6d3SLuigi Rizzo kcfg.type = i; 102537e3a6d3SLuigi Rizzo kcfg.worker_private = t; 1026c3e9b4dbSLuiz Otavio O Souza t->nmk = nm_os_kctx_create(&kcfg, 0, NULL); 102737e3a6d3SLuigi Rizzo if (t->nmk == NULL) { 102837e3a6d3SLuigi Rizzo goto cleanup; 102937e3a6d3SLuigi Rizzo } 1030c3e9b4dbSLuiz Otavio O Souza nm_os_kctx_worker_setaff(t->nmk, affinity); 103137e3a6d3SLuigi Rizzo } 103237e3a6d3SLuigi Rizzo return 0; 103337e3a6d3SLuigi Rizzo 103437e3a6d3SLuigi Rizzo cleanup: 103537e3a6d3SLuigi Rizzo for (j = 0; j < i; j++) { 103637e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 1037c3e9b4dbSLuiz Otavio O Souza nm_os_kctx_destroy(t->nmk); 103837e3a6d3SLuigi Rizzo } 1039c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps->kthreads); 104037e3a6d3SLuigi Rizzo return EFAULT; 104137e3a6d3SLuigi Rizzo } 104237e3a6d3SLuigi Rizzo 1043c3e9b4dbSLuiz Otavio O Souza /* A variant of ptnetmap_start_kthreads() */ 104437e3a6d3SLuigi Rizzo static int 104537e3a6d3SLuigi Rizzo nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) 104637e3a6d3SLuigi Rizzo { 104737e3a6d3SLuigi Rizzo int error, i, j; 104837e3a6d3SLuigi Rizzo 104937e3a6d3SLuigi Rizzo if (!bps) { 105037e3a6d3SLuigi Rizzo D("polling is not configured"); 105137e3a6d3SLuigi Rizzo return EFAULT; 105237e3a6d3SLuigi Rizzo } 105337e3a6d3SLuigi Rizzo bps->stopped = false; 105437e3a6d3SLuigi Rizzo 105537e3a6d3SLuigi Rizzo for (i = 0; i < bps->ncpus; i++) { 105637e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 1057c3e9b4dbSLuiz Otavio O Souza error = nm_os_kctx_worker_start(t->nmk); 105837e3a6d3SLuigi Rizzo if (error) { 105937e3a6d3SLuigi Rizzo D("error in nm_kthread_start()"); 106037e3a6d3SLuigi Rizzo goto cleanup; 106137e3a6d3SLuigi Rizzo } 106237e3a6d3SLuigi Rizzo } 106337e3a6d3SLuigi Rizzo return 0; 106437e3a6d3SLuigi Rizzo 106537e3a6d3SLuigi Rizzo cleanup: 106637e3a6d3SLuigi Rizzo for (j = 0; j < i; j++) { 106737e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 1068c3e9b4dbSLuiz Otavio O Souza nm_os_kctx_worker_stop(t->nmk); 106937e3a6d3SLuigi Rizzo } 107037e3a6d3SLuigi Rizzo bps->stopped = true; 107137e3a6d3SLuigi Rizzo return error; 107237e3a6d3SLuigi Rizzo } 107337e3a6d3SLuigi Rizzo 107437e3a6d3SLuigi Rizzo static void 107537e3a6d3SLuigi Rizzo nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) 107637e3a6d3SLuigi Rizzo { 107737e3a6d3SLuigi Rizzo int i; 107837e3a6d3SLuigi Rizzo 107937e3a6d3SLuigi Rizzo if (!bps) 108037e3a6d3SLuigi Rizzo return; 108137e3a6d3SLuigi Rizzo 108237e3a6d3SLuigi Rizzo for (i = 0; i < bps->ncpus; i++) { 108337e3a6d3SLuigi Rizzo struct nm_bdg_kthread *t = bps->kthreads + i; 1084c3e9b4dbSLuiz Otavio O Souza nm_os_kctx_worker_stop(t->nmk); 1085c3e9b4dbSLuiz Otavio O Souza nm_os_kctx_destroy(t->nmk); 108637e3a6d3SLuigi Rizzo } 108737e3a6d3SLuigi Rizzo bps->stopped = true; 108837e3a6d3SLuigi Rizzo } 108937e3a6d3SLuigi Rizzo 109037e3a6d3SLuigi Rizzo static int 109137e3a6d3SLuigi Rizzo get_polling_cfg(struct nmreq *nmr, struct netmap_adapter *na, 109237e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps) 109337e3a6d3SLuigi Rizzo { 109437e3a6d3SLuigi Rizzo int req_cpus, avail_cpus, core_from; 109537e3a6d3SLuigi Rizzo u_int reg, i, qfirst, qlast; 109637e3a6d3SLuigi Rizzo 109737e3a6d3SLuigi Rizzo avail_cpus = nm_os_ncpus(); 109837e3a6d3SLuigi Rizzo req_cpus = nmr->nr_arg1; 109937e3a6d3SLuigi Rizzo 110037e3a6d3SLuigi Rizzo if (req_cpus == 0) { 110137e3a6d3SLuigi Rizzo D("req_cpus must be > 0"); 110237e3a6d3SLuigi Rizzo return EINVAL; 110337e3a6d3SLuigi Rizzo } else if (req_cpus >= avail_cpus) { 110437e3a6d3SLuigi Rizzo D("for safety, we need at least one core left in the system"); 110537e3a6d3SLuigi Rizzo return EINVAL; 110637e3a6d3SLuigi Rizzo } 110737e3a6d3SLuigi Rizzo reg = nmr->nr_flags & NR_REG_MASK; 110837e3a6d3SLuigi Rizzo i = nmr->nr_ringid & NETMAP_RING_MASK; 110937e3a6d3SLuigi Rizzo /* 111037e3a6d3SLuigi Rizzo * ONE_NIC: dedicate one core to one ring. If multiple cores 111137e3a6d3SLuigi Rizzo * are specified, consecutive rings are also polled. 111237e3a6d3SLuigi Rizzo * For example, if ringid=2 and 2 cores are given, 111337e3a6d3SLuigi Rizzo * ring 2 and 3 are polled by core 2 and 3, respectively. 111437e3a6d3SLuigi Rizzo * ALL_NIC: poll all the rings using a core specified by ringid. 111537e3a6d3SLuigi Rizzo * the number of cores must be 1. 111637e3a6d3SLuigi Rizzo */ 111737e3a6d3SLuigi Rizzo if (reg == NR_REG_ONE_NIC) { 111837e3a6d3SLuigi Rizzo if (i + req_cpus > nma_get_nrings(na, NR_RX)) { 111937e3a6d3SLuigi Rizzo D("only %d rings exist (ring %u-%u is given)", 112037e3a6d3SLuigi Rizzo nma_get_nrings(na, NR_RX), i, i+req_cpus); 112137e3a6d3SLuigi Rizzo return EINVAL; 112237e3a6d3SLuigi Rizzo } 112337e3a6d3SLuigi Rizzo qfirst = i; 112437e3a6d3SLuigi Rizzo qlast = qfirst + req_cpus; 112537e3a6d3SLuigi Rizzo core_from = qfirst; 112637e3a6d3SLuigi Rizzo } else if (reg == NR_REG_ALL_NIC) { 112737e3a6d3SLuigi Rizzo if (req_cpus != 1) { 112837e3a6d3SLuigi Rizzo D("ncpus must be 1 not %d for REG_ALL_NIC", req_cpus); 112937e3a6d3SLuigi Rizzo return EINVAL; 113037e3a6d3SLuigi Rizzo } 113137e3a6d3SLuigi Rizzo qfirst = 0; 113237e3a6d3SLuigi Rizzo qlast = nma_get_nrings(na, NR_RX); 113337e3a6d3SLuigi Rizzo core_from = i; 113437e3a6d3SLuigi Rizzo } else { 113537e3a6d3SLuigi Rizzo D("reg must be ALL_NIC or ONE_NIC"); 113637e3a6d3SLuigi Rizzo return EINVAL; 113737e3a6d3SLuigi Rizzo } 113837e3a6d3SLuigi Rizzo 113937e3a6d3SLuigi Rizzo bps->reg = reg; 114037e3a6d3SLuigi Rizzo bps->qfirst = qfirst; 114137e3a6d3SLuigi Rizzo bps->qlast = qlast; 114237e3a6d3SLuigi Rizzo bps->cpu_from = core_from; 114337e3a6d3SLuigi Rizzo bps->ncpus = req_cpus; 114437e3a6d3SLuigi Rizzo D("%s qfirst %u qlast %u cpu_from %u ncpus %u", 114537e3a6d3SLuigi Rizzo reg == NR_REG_ALL_NIC ? "REG_ALL_NIC" : "REG_ONE_NIC", 114637e3a6d3SLuigi Rizzo qfirst, qlast, core_from, req_cpus); 114737e3a6d3SLuigi Rizzo return 0; 114837e3a6d3SLuigi Rizzo } 114937e3a6d3SLuigi Rizzo 115037e3a6d3SLuigi Rizzo static int 115137e3a6d3SLuigi Rizzo nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) 115237e3a6d3SLuigi Rizzo { 115337e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps; 115437e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna; 115537e3a6d3SLuigi Rizzo int error; 115637e3a6d3SLuigi Rizzo 115737e3a6d3SLuigi Rizzo bna = (struct netmap_bwrap_adapter *)na; 115837e3a6d3SLuigi Rizzo if (bna->na_polling_state) { 115937e3a6d3SLuigi Rizzo D("ERROR adapter already in polling mode"); 116037e3a6d3SLuigi Rizzo return EFAULT; 116137e3a6d3SLuigi Rizzo } 116237e3a6d3SLuigi Rizzo 1163c3e9b4dbSLuiz Otavio O Souza bps = nm_os_malloc(sizeof(*bps)); 116437e3a6d3SLuigi Rizzo if (!bps) 116537e3a6d3SLuigi Rizzo return ENOMEM; 116637e3a6d3SLuigi Rizzo bps->configured = false; 116737e3a6d3SLuigi Rizzo bps->stopped = true; 116837e3a6d3SLuigi Rizzo 116937e3a6d3SLuigi Rizzo if (get_polling_cfg(nmr, na, bps)) { 1170c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps); 117137e3a6d3SLuigi Rizzo return EINVAL; 117237e3a6d3SLuigi Rizzo } 117337e3a6d3SLuigi Rizzo 117437e3a6d3SLuigi Rizzo if (nm_bdg_create_kthreads(bps)) { 1175c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps); 117637e3a6d3SLuigi Rizzo return EFAULT; 117737e3a6d3SLuigi Rizzo } 117837e3a6d3SLuigi Rizzo 117937e3a6d3SLuigi Rizzo bps->configured = true; 118037e3a6d3SLuigi Rizzo bna->na_polling_state = bps; 118137e3a6d3SLuigi Rizzo bps->bna = bna; 118237e3a6d3SLuigi Rizzo 1183*4f80b14cSVincenzo Maffione /* disable interrupts if possible */ 1184*4f80b14cSVincenzo Maffione nma_intr_enable(bna->hwna, 0); 118537e3a6d3SLuigi Rizzo /* start kthread now */ 118637e3a6d3SLuigi Rizzo error = nm_bdg_polling_start_kthreads(bps); 118737e3a6d3SLuigi Rizzo if (error) { 118837e3a6d3SLuigi Rizzo D("ERROR nm_bdg_polling_start_kthread()"); 1189c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps->kthreads); 1190c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps); 119137e3a6d3SLuigi Rizzo bna->na_polling_state = NULL; 1192*4f80b14cSVincenzo Maffione nma_intr_enable(bna->hwna, 1); 119337e3a6d3SLuigi Rizzo } 119437e3a6d3SLuigi Rizzo return error; 119537e3a6d3SLuigi Rizzo } 119637e3a6d3SLuigi Rizzo 119737e3a6d3SLuigi Rizzo static int 119837e3a6d3SLuigi Rizzo nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na) 119937e3a6d3SLuigi Rizzo { 120037e3a6d3SLuigi Rizzo struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; 120137e3a6d3SLuigi Rizzo struct nm_bdg_polling_state *bps; 120237e3a6d3SLuigi Rizzo 120337e3a6d3SLuigi Rizzo if (!bna->na_polling_state) { 120437e3a6d3SLuigi Rizzo D("ERROR adapter is not in polling mode"); 120537e3a6d3SLuigi Rizzo return EFAULT; 120637e3a6d3SLuigi Rizzo } 120737e3a6d3SLuigi Rizzo bps = bna->na_polling_state; 120837e3a6d3SLuigi Rizzo nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); 120937e3a6d3SLuigi Rizzo bps->configured = false; 1210c3e9b4dbSLuiz Otavio O Souza nm_os_free(bps); 121137e3a6d3SLuigi Rizzo bna->na_polling_state = NULL; 1212*4f80b14cSVincenzo Maffione /* reenable interrupts */ 1213*4f80b14cSVincenzo Maffione nma_intr_enable(bna->hwna, 1); 121437e3a6d3SLuigi Rizzo return 0; 121537e3a6d3SLuigi Rizzo } 1216f9790aebSLuigi Rizzo 12174bf50f18SLuigi Rizzo /* Called by either user's context (netmap_ioctl()) 12184bf50f18SLuigi Rizzo * or external kernel modules (e.g., Openvswitch). 12194bf50f18SLuigi Rizzo * Operation is indicated in nmr->nr_cmd. 12204bf50f18SLuigi Rizzo * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge 12214bf50f18SLuigi Rizzo * requires bdg_ops argument; the other commands ignore this argument. 12224bf50f18SLuigi Rizzo * 1223f9790aebSLuigi Rizzo * Called without NMG_LOCK. 1224f9790aebSLuigi Rizzo */ 1225f9790aebSLuigi Rizzo int 12264bf50f18SLuigi Rizzo netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) 1227f9790aebSLuigi Rizzo { 1228847bf383SLuigi Rizzo struct nm_bridge *b, *bridges; 1229f9790aebSLuigi Rizzo struct netmap_adapter *na; 1230f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna; 1231f9790aebSLuigi Rizzo char *name = nmr->nr_name; 1232f9790aebSLuigi Rizzo int cmd = nmr->nr_cmd, namelen = strlen(name); 1233f9790aebSLuigi Rizzo int error = 0, i, j; 1234847bf383SLuigi Rizzo u_int num_bridges; 1235847bf383SLuigi Rizzo 1236847bf383SLuigi Rizzo netmap_bns_getbridges(&bridges, &num_bridges); 1237f9790aebSLuigi Rizzo 1238f9790aebSLuigi Rizzo switch (cmd) { 12394bf50f18SLuigi Rizzo case NETMAP_BDG_NEWIF: 1240c3e9b4dbSLuiz Otavio O Souza error = netmap_vi_create(nmr, 0 /* no autodelete */); 12414bf50f18SLuigi Rizzo break; 12424bf50f18SLuigi Rizzo 12434bf50f18SLuigi Rizzo case NETMAP_BDG_DELIF: 12444bf50f18SLuigi Rizzo error = nm_vi_destroy(nmr->nr_name); 12454bf50f18SLuigi Rizzo break; 12464bf50f18SLuigi Rizzo 1247f9790aebSLuigi Rizzo case NETMAP_BDG_ATTACH: 12484bf50f18SLuigi Rizzo error = nm_bdg_ctl_attach(nmr); 1249f9790aebSLuigi Rizzo break; 1250f9790aebSLuigi Rizzo 1251f9790aebSLuigi Rizzo case NETMAP_BDG_DETACH: 12524bf50f18SLuigi Rizzo error = nm_bdg_ctl_detach(nmr); 1253f9790aebSLuigi Rizzo break; 1254f9790aebSLuigi Rizzo 1255f9790aebSLuigi Rizzo case NETMAP_BDG_LIST: 1256f9790aebSLuigi Rizzo /* this is used to enumerate bridges and ports */ 1257f9790aebSLuigi Rizzo if (namelen) { /* look up indexes of bridge and port */ 125837e3a6d3SLuigi Rizzo if (strncmp(name, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 1259f9790aebSLuigi Rizzo error = EINVAL; 1260f9790aebSLuigi Rizzo break; 1261f9790aebSLuigi Rizzo } 1262f9790aebSLuigi Rizzo NMG_LOCK(); 1263f9790aebSLuigi Rizzo b = nm_find_bridge(name, 0 /* don't create */); 1264f9790aebSLuigi Rizzo if (!b) { 1265f9790aebSLuigi Rizzo error = ENOENT; 1266f9790aebSLuigi Rizzo NMG_UNLOCK(); 1267f9790aebSLuigi Rizzo break; 1268f9790aebSLuigi Rizzo } 1269f9790aebSLuigi Rizzo 127037e3a6d3SLuigi Rizzo error = 0; 127137e3a6d3SLuigi Rizzo nmr->nr_arg1 = b - bridges; /* bridge index */ 127237e3a6d3SLuigi Rizzo nmr->nr_arg2 = NM_BDG_NOPORT; 1273f9790aebSLuigi Rizzo for (j = 0; j < b->bdg_active_ports; j++) { 1274f9790aebSLuigi Rizzo i = b->bdg_port_index[j]; 1275f9790aebSLuigi Rizzo vpna = b->bdg_ports[i]; 1276f9790aebSLuigi Rizzo if (vpna == NULL) { 1277f9790aebSLuigi Rizzo D("---AAAAAAAAARGH-------"); 1278f9790aebSLuigi Rizzo continue; 1279f9790aebSLuigi Rizzo } 1280f9790aebSLuigi Rizzo /* the former and the latter identify a 1281f9790aebSLuigi Rizzo * virtual port and a NIC, respectively 1282f9790aebSLuigi Rizzo */ 12834bf50f18SLuigi Rizzo if (!strcmp(vpna->up.name, name)) { 1284f9790aebSLuigi Rizzo nmr->nr_arg2 = i; /* port index */ 1285f9790aebSLuigi Rizzo break; 1286f9790aebSLuigi Rizzo } 1287f9790aebSLuigi Rizzo } 1288f9790aebSLuigi Rizzo NMG_UNLOCK(); 1289f9790aebSLuigi Rizzo } else { 1290f9790aebSLuigi Rizzo /* return the first non-empty entry starting from 1291f9790aebSLuigi Rizzo * bridge nr_arg1 and port nr_arg2. 1292f9790aebSLuigi Rizzo * 1293f9790aebSLuigi Rizzo * Users can detect the end of the same bridge by 1294f9790aebSLuigi Rizzo * seeing the new and old value of nr_arg1, and can 1295f9790aebSLuigi Rizzo * detect the end of all the bridge by error != 0 1296f9790aebSLuigi Rizzo */ 1297f9790aebSLuigi Rizzo i = nmr->nr_arg1; 1298f9790aebSLuigi Rizzo j = nmr->nr_arg2; 1299f9790aebSLuigi Rizzo 1300f9790aebSLuigi Rizzo NMG_LOCK(); 1301f9790aebSLuigi Rizzo for (error = ENOENT; i < NM_BRIDGES; i++) { 1302847bf383SLuigi Rizzo b = bridges + i; 1303c3e9b4dbSLuiz Otavio O Souza for ( ; j < NM_BDG_MAXPORTS; j++) { 1304c3e9b4dbSLuiz Otavio O Souza if (b->bdg_ports[j] == NULL) 1305f9790aebSLuigi Rizzo continue; 1306f9790aebSLuigi Rizzo vpna = b->bdg_ports[j]; 13074bf50f18SLuigi Rizzo strncpy(name, vpna->up.name, (size_t)IFNAMSIZ); 1308f9790aebSLuigi Rizzo error = 0; 1309c3e9b4dbSLuiz Otavio O Souza goto out; 1310f9790aebSLuigi Rizzo } 1311c3e9b4dbSLuiz Otavio O Souza j = 0; /* following bridges scan from 0 */ 1312c3e9b4dbSLuiz Otavio O Souza } 1313c3e9b4dbSLuiz Otavio O Souza out: 1314c3e9b4dbSLuiz Otavio O Souza nmr->nr_arg1 = i; 1315c3e9b4dbSLuiz Otavio O Souza nmr->nr_arg2 = j; 1316f9790aebSLuigi Rizzo NMG_UNLOCK(); 1317f9790aebSLuigi Rizzo } 1318f9790aebSLuigi Rizzo break; 1319f9790aebSLuigi Rizzo 13204bf50f18SLuigi Rizzo case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */ 13214bf50f18SLuigi Rizzo /* register callbacks to the given bridge. 1322f9790aebSLuigi Rizzo * nmr->nr_name may be just bridge's name (including ':' 1323f9790aebSLuigi Rizzo * if it is not just NM_NAME). 1324f9790aebSLuigi Rizzo */ 13254bf50f18SLuigi Rizzo if (!bdg_ops) { 1326f9790aebSLuigi Rizzo error = EINVAL; 1327f9790aebSLuigi Rizzo break; 1328f9790aebSLuigi Rizzo } 1329f9790aebSLuigi Rizzo NMG_LOCK(); 1330f9790aebSLuigi Rizzo b = nm_find_bridge(name, 0 /* don't create */); 1331f9790aebSLuigi Rizzo if (!b) { 1332f9790aebSLuigi Rizzo error = EINVAL; 1333f9790aebSLuigi Rizzo } else { 13344bf50f18SLuigi Rizzo b->bdg_ops = *bdg_ops; 1335f9790aebSLuigi Rizzo } 1336f9790aebSLuigi Rizzo NMG_UNLOCK(); 1337f9790aebSLuigi Rizzo break; 1338f9790aebSLuigi Rizzo 1339f0ea3689SLuigi Rizzo case NETMAP_BDG_VNET_HDR: 1340f0ea3689SLuigi Rizzo /* Valid lengths for the virtio-net header are 0 (no header), 1341f0ea3689SLuigi Rizzo 10 and 12. */ 1342f0ea3689SLuigi Rizzo if (nmr->nr_arg1 != 0 && 1343f0ea3689SLuigi Rizzo nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) && 1344f0ea3689SLuigi Rizzo nmr->nr_arg1 != 12) { 1345f0ea3689SLuigi Rizzo error = EINVAL; 1346f0ea3689SLuigi Rizzo break; 1347f0ea3689SLuigi Rizzo } 1348f9790aebSLuigi Rizzo NMG_LOCK(); 1349c3e9b4dbSLuiz Otavio O Souza error = netmap_get_bdg_na(nmr, &na, NULL, 0); 135017885a7bSLuigi Rizzo if (na && !error) { 1351f9790aebSLuigi Rizzo vpna = (struct netmap_vp_adapter *)na; 135237e3a6d3SLuigi Rizzo na->virt_hdr_len = nmr->nr_arg1; 135337e3a6d3SLuigi Rizzo if (na->virt_hdr_len) { 13544bf50f18SLuigi Rizzo vpna->mfs = NETMAP_BUF_SIZE(na); 135537e3a6d3SLuigi Rizzo } 135637e3a6d3SLuigi Rizzo D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na); 135737e3a6d3SLuigi Rizzo netmap_adapter_put(na); 135837e3a6d3SLuigi Rizzo } else if (!na) { 135937e3a6d3SLuigi Rizzo error = ENXIO; 136037e3a6d3SLuigi Rizzo } 136137e3a6d3SLuigi Rizzo NMG_UNLOCK(); 136237e3a6d3SLuigi Rizzo break; 136337e3a6d3SLuigi Rizzo 136437e3a6d3SLuigi Rizzo case NETMAP_BDG_POLLING_ON: 136537e3a6d3SLuigi Rizzo case NETMAP_BDG_POLLING_OFF: 136637e3a6d3SLuigi Rizzo NMG_LOCK(); 1367c3e9b4dbSLuiz Otavio O Souza error = netmap_get_bdg_na(nmr, &na, NULL, 0); 136837e3a6d3SLuigi Rizzo if (na && !error) { 136937e3a6d3SLuigi Rizzo if (!nm_is_bwrap(na)) { 137037e3a6d3SLuigi Rizzo error = EOPNOTSUPP; 137137e3a6d3SLuigi Rizzo } else if (cmd == NETMAP_BDG_POLLING_ON) { 137237e3a6d3SLuigi Rizzo error = nm_bdg_ctl_polling_start(nmr, na); 137337e3a6d3SLuigi Rizzo if (!error) 137437e3a6d3SLuigi Rizzo netmap_adapter_get(na); 137537e3a6d3SLuigi Rizzo } else { 137637e3a6d3SLuigi Rizzo error = nm_bdg_ctl_polling_stop(nmr, na); 137737e3a6d3SLuigi Rizzo if (!error) 137837e3a6d3SLuigi Rizzo netmap_adapter_put(na); 137937e3a6d3SLuigi Rizzo } 138017885a7bSLuigi Rizzo netmap_adapter_put(na); 1381f9790aebSLuigi Rizzo } 1382f9790aebSLuigi Rizzo NMG_UNLOCK(); 1383f9790aebSLuigi Rizzo break; 1384f9790aebSLuigi Rizzo 1385f9790aebSLuigi Rizzo default: 1386f9790aebSLuigi Rizzo D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 1387f9790aebSLuigi Rizzo error = EINVAL; 1388f9790aebSLuigi Rizzo break; 1389f9790aebSLuigi Rizzo } 1390f9790aebSLuigi Rizzo return error; 1391f9790aebSLuigi Rizzo } 1392f9790aebSLuigi Rizzo 13934bf50f18SLuigi Rizzo int 13944bf50f18SLuigi Rizzo netmap_bdg_config(struct nmreq *nmr) 13954bf50f18SLuigi Rizzo { 13964bf50f18SLuigi Rizzo struct nm_bridge *b; 13974bf50f18SLuigi Rizzo int error = EINVAL; 13984bf50f18SLuigi Rizzo 13994bf50f18SLuigi Rizzo NMG_LOCK(); 14004bf50f18SLuigi Rizzo b = nm_find_bridge(nmr->nr_name, 0); 14014bf50f18SLuigi Rizzo if (!b) { 14024bf50f18SLuigi Rizzo NMG_UNLOCK(); 14034bf50f18SLuigi Rizzo return error; 14044bf50f18SLuigi Rizzo } 14054bf50f18SLuigi Rizzo NMG_UNLOCK(); 14064bf50f18SLuigi Rizzo /* Don't call config() with NMG_LOCK() held */ 14074bf50f18SLuigi Rizzo BDG_RLOCK(b); 14084bf50f18SLuigi Rizzo if (b->bdg_ops.config != NULL) 14094bf50f18SLuigi Rizzo error = b->bdg_ops.config((struct nm_ifreq *)nmr); 14104bf50f18SLuigi Rizzo BDG_RUNLOCK(b); 14114bf50f18SLuigi Rizzo return error; 14124bf50f18SLuigi Rizzo } 14134bf50f18SLuigi Rizzo 14144bf50f18SLuigi Rizzo 14154bf50f18SLuigi Rizzo /* nm_krings_create callback for VALE ports. 14164bf50f18SLuigi Rizzo * Calls the standard netmap_krings_create, then adds leases on rx 14174bf50f18SLuigi Rizzo * rings and bdgfwd on tx rings. 14184bf50f18SLuigi Rizzo */ 1419f9790aebSLuigi Rizzo static int 1420f9790aebSLuigi Rizzo netmap_vp_krings_create(struct netmap_adapter *na) 1421f9790aebSLuigi Rizzo { 1422f0ea3689SLuigi Rizzo u_int tailroom; 1423f9790aebSLuigi Rizzo int error, i; 1424f9790aebSLuigi Rizzo uint32_t *leases; 1425847bf383SLuigi Rizzo u_int nrx = netmap_real_rings(na, NR_RX); 1426f9790aebSLuigi Rizzo 1427f9790aebSLuigi Rizzo /* 1428f9790aebSLuigi Rizzo * Leases are attached to RX rings on vale ports 1429f9790aebSLuigi Rizzo */ 1430f9790aebSLuigi Rizzo tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 1431f9790aebSLuigi Rizzo 1432f0ea3689SLuigi Rizzo error = netmap_krings_create(na, tailroom); 1433f9790aebSLuigi Rizzo if (error) 1434f9790aebSLuigi Rizzo return error; 1435f9790aebSLuigi Rizzo 1436f9790aebSLuigi Rizzo leases = na->tailroom; 1437f9790aebSLuigi Rizzo 1438f9790aebSLuigi Rizzo for (i = 0; i < nrx; i++) { /* Receive rings */ 1439f9790aebSLuigi Rizzo na->rx_rings[i].nkr_leases = leases; 1440f9790aebSLuigi Rizzo leases += na->num_rx_desc; 1441f9790aebSLuigi Rizzo } 1442f9790aebSLuigi Rizzo 1443f9790aebSLuigi Rizzo error = nm_alloc_bdgfwd(na); 1444f9790aebSLuigi Rizzo if (error) { 1445f9790aebSLuigi Rizzo netmap_krings_delete(na); 1446f9790aebSLuigi Rizzo return error; 1447f9790aebSLuigi Rizzo } 1448f9790aebSLuigi Rizzo 1449f9790aebSLuigi Rizzo return 0; 1450f9790aebSLuigi Rizzo } 1451f9790aebSLuigi Rizzo 145217885a7bSLuigi Rizzo 14534bf50f18SLuigi Rizzo /* nm_krings_delete callback for VALE ports. */ 1454f9790aebSLuigi Rizzo static void 1455f9790aebSLuigi Rizzo netmap_vp_krings_delete(struct netmap_adapter *na) 1456f9790aebSLuigi Rizzo { 1457f9790aebSLuigi Rizzo nm_free_bdgfwd(na); 1458f9790aebSLuigi Rizzo netmap_krings_delete(na); 1459f9790aebSLuigi Rizzo } 1460f9790aebSLuigi Rizzo 1461f9790aebSLuigi Rizzo 1462f9790aebSLuigi Rizzo static int 1463f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 1464f9790aebSLuigi Rizzo struct netmap_vp_adapter *na, u_int ring_nr); 1465f9790aebSLuigi Rizzo 1466f9790aebSLuigi Rizzo 1467f9790aebSLuigi Rizzo /* 14684bf50f18SLuigi Rizzo * main dispatch routine for the bridge. 1469f9790aebSLuigi Rizzo * Grab packets from a kring, move them into the ft structure 1470f9790aebSLuigi Rizzo * associated to the tx (input) port. Max one instance per port, 1471f9790aebSLuigi Rizzo * filtered on input (ioctl, poll or XXX). 1472f9790aebSLuigi Rizzo * Returns the next position in the ring. 1473f9790aebSLuigi Rizzo */ 1474f9790aebSLuigi Rizzo static int 14754bf50f18SLuigi Rizzo nm_bdg_preflush(struct netmap_kring *kring, u_int end) 1476f9790aebSLuigi Rizzo { 14774bf50f18SLuigi Rizzo struct netmap_vp_adapter *na = 14784bf50f18SLuigi Rizzo (struct netmap_vp_adapter*)kring->na; 1479f9790aebSLuigi Rizzo struct netmap_ring *ring = kring->ring; 1480f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft; 14814bf50f18SLuigi Rizzo u_int ring_nr = kring->ring_id; 1482f9790aebSLuigi Rizzo u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 1483f9790aebSLuigi Rizzo u_int ft_i = 0; /* start from 0 */ 1484f9790aebSLuigi Rizzo u_int frags = 1; /* how many frags ? */ 1485f9790aebSLuigi Rizzo struct nm_bridge *b = na->na_bdg; 1486f9790aebSLuigi Rizzo 1487f9790aebSLuigi Rizzo /* To protect against modifications to the bridge we acquire a 1488f9790aebSLuigi Rizzo * shared lock, waiting if we can sleep (if the source port is 1489f9790aebSLuigi Rizzo * attached to a user process) or with a trylock otherwise (NICs). 1490f9790aebSLuigi Rizzo */ 1491f9790aebSLuigi Rizzo ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1492f9790aebSLuigi Rizzo if (na->up.na_flags & NAF_BDG_MAYSLEEP) 1493f9790aebSLuigi Rizzo BDG_RLOCK(b); 1494f9790aebSLuigi Rizzo else if (!BDG_RTRYLOCK(b)) 1495c3e9b4dbSLuiz Otavio O Souza return j; 1496f9790aebSLuigi Rizzo ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 1497f9790aebSLuigi Rizzo ft = kring->nkr_ft; 1498f9790aebSLuigi Rizzo 1499f9790aebSLuigi Rizzo for (; likely(j != end); j = nm_next(j, lim)) { 1500f9790aebSLuigi Rizzo struct netmap_slot *slot = &ring->slot[j]; 1501f9790aebSLuigi Rizzo char *buf; 1502f9790aebSLuigi Rizzo 1503f9790aebSLuigi Rizzo ft[ft_i].ft_len = slot->len; 1504f9790aebSLuigi Rizzo ft[ft_i].ft_flags = slot->flags; 1505f9790aebSLuigi Rizzo 1506f9790aebSLuigi Rizzo ND("flags is 0x%x", slot->flags); 1507847bf383SLuigi Rizzo /* we do not use the buf changed flag, but we still need to reset it */ 1508847bf383SLuigi Rizzo slot->flags &= ~NS_BUF_CHANGED; 1509847bf383SLuigi Rizzo 1510f9790aebSLuigi Rizzo /* this slot goes into a list so initialize the link field */ 1511f9790aebSLuigi Rizzo ft[ft_i].ft_next = NM_FT_NULL; 1512f9790aebSLuigi Rizzo buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 15134bf50f18SLuigi Rizzo (void *)(uintptr_t)slot->ptr : NMB(&na->up, slot); 1514e31c6ec7SLuigi Rizzo if (unlikely(buf == NULL)) { 1515e31c6ec7SLuigi Rizzo RD(5, "NULL %s buffer pointer from %s slot %d len %d", 1516e31c6ec7SLuigi Rizzo (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT", 1517e31c6ec7SLuigi Rizzo kring->name, j, ft[ft_i].ft_len); 15184bf50f18SLuigi Rizzo buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up); 1519e31c6ec7SLuigi Rizzo ft[ft_i].ft_len = 0; 1520e31c6ec7SLuigi Rizzo ft[ft_i].ft_flags = 0; 1521e31c6ec7SLuigi Rizzo } 15222e159ef0SLuigi Rizzo __builtin_prefetch(buf); 1523f9790aebSLuigi Rizzo ++ft_i; 1524f9790aebSLuigi Rizzo if (slot->flags & NS_MOREFRAG) { 1525f9790aebSLuigi Rizzo frags++; 1526f9790aebSLuigi Rizzo continue; 1527f9790aebSLuigi Rizzo } 1528f9790aebSLuigi Rizzo if (unlikely(netmap_verbose && frags > 1)) 1529f9790aebSLuigi Rizzo RD(5, "%d frags at %d", frags, ft_i - frags); 1530f9790aebSLuigi Rizzo ft[ft_i - frags].ft_frags = frags; 1531f9790aebSLuigi Rizzo frags = 1; 1532f9790aebSLuigi Rizzo if (unlikely((int)ft_i >= bridge_batch)) 1533f9790aebSLuigi Rizzo ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1534f9790aebSLuigi Rizzo } 1535f9790aebSLuigi Rizzo if (frags > 1) { 153637e3a6d3SLuigi Rizzo /* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we 153737e3a6d3SLuigi Rizzo * have to fix frags count. */ 153837e3a6d3SLuigi Rizzo frags--; 153937e3a6d3SLuigi Rizzo ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG; 154037e3a6d3SLuigi Rizzo ft[ft_i - frags].ft_frags = frags; 154137e3a6d3SLuigi Rizzo D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags); 1542f9790aebSLuigi Rizzo } 1543f9790aebSLuigi Rizzo if (ft_i) 1544f9790aebSLuigi Rizzo ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 1545f9790aebSLuigi Rizzo BDG_RUNLOCK(b); 1546f9790aebSLuigi Rizzo return j; 1547f9790aebSLuigi Rizzo } 1548f9790aebSLuigi Rizzo 1549f9790aebSLuigi Rizzo 1550f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */ 1551f9790aebSLuigi Rizzo 1552f9790aebSLuigi Rizzo /* 1553f9790aebSLuigi Rizzo * The following hash function is adapted from "Hash Functions" by Bob Jenkins 1554f9790aebSLuigi Rizzo * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 1555f9790aebSLuigi Rizzo * 1556f9790aebSLuigi Rizzo * http://www.burtleburtle.net/bob/hash/spooky.html 1557f9790aebSLuigi Rizzo */ 1558f9790aebSLuigi Rizzo #define mix(a, b, c) \ 1559f9790aebSLuigi Rizzo do { \ 1560f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 13); \ 1561f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 8); \ 1562f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 13); \ 1563f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 12); \ 1564f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 16); \ 1565f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 5); \ 1566f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 3); \ 1567f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 10); \ 1568f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 15); \ 1569f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0) 1570f9790aebSLuigi Rizzo 157117885a7bSLuigi Rizzo 1572f9790aebSLuigi Rizzo static __inline uint32_t 1573f9790aebSLuigi Rizzo nm_bridge_rthash(const uint8_t *addr) 1574f9790aebSLuigi Rizzo { 1575f9790aebSLuigi Rizzo uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 1576f9790aebSLuigi Rizzo 1577f9790aebSLuigi Rizzo b += addr[5] << 8; 1578f9790aebSLuigi Rizzo b += addr[4]; 1579f9790aebSLuigi Rizzo a += addr[3] << 24; 1580f9790aebSLuigi Rizzo a += addr[2] << 16; 1581f9790aebSLuigi Rizzo a += addr[1] << 8; 1582f9790aebSLuigi Rizzo a += addr[0]; 1583f9790aebSLuigi Rizzo 1584f9790aebSLuigi Rizzo mix(a, b, c); 1585f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 1586f9790aebSLuigi Rizzo return (c & BRIDGE_RTHASH_MASK); 1587f9790aebSLuigi Rizzo } 1588f9790aebSLuigi Rizzo 1589f9790aebSLuigi Rizzo #undef mix 1590f9790aebSLuigi Rizzo 1591f9790aebSLuigi Rizzo 15924bf50f18SLuigi Rizzo /* nm_register callback for VALE ports */ 1593f9790aebSLuigi Rizzo static int 15944bf50f18SLuigi Rizzo netmap_vp_reg(struct netmap_adapter *na, int onoff) 1595f9790aebSLuigi Rizzo { 1596f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna = 1597f9790aebSLuigi Rizzo (struct netmap_vp_adapter*)na; 159837e3a6d3SLuigi Rizzo enum txrx t; 159937e3a6d3SLuigi Rizzo int i; 1600f9790aebSLuigi Rizzo 16014bf50f18SLuigi Rizzo /* persistent ports may be put in netmap mode 16024bf50f18SLuigi Rizzo * before being attached to a bridge 1603f9790aebSLuigi Rizzo */ 16044bf50f18SLuigi Rizzo if (vpna->na_bdg) 1605f9790aebSLuigi Rizzo BDG_WLOCK(vpna->na_bdg); 1606f9790aebSLuigi Rizzo if (onoff) { 160737e3a6d3SLuigi Rizzo for_rx_tx(t) { 1608*4f80b14cSVincenzo Maffione for (i = 0; i < netmap_real_rings(na, t); i++) { 160937e3a6d3SLuigi Rizzo struct netmap_kring *kring = &NMR(na, t)[i]; 161037e3a6d3SLuigi Rizzo 161137e3a6d3SLuigi Rizzo if (nm_kring_pending_on(kring)) 161237e3a6d3SLuigi Rizzo kring->nr_mode = NKR_NETMAP_ON; 161337e3a6d3SLuigi Rizzo } 161437e3a6d3SLuigi Rizzo } 161537e3a6d3SLuigi Rizzo if (na->active_fds == 0) 16164bf50f18SLuigi Rizzo na->na_flags |= NAF_NETMAP_ON; 16174bf50f18SLuigi Rizzo /* XXX on FreeBSD, persistent VALE ports should also 16184bf50f18SLuigi Rizzo * toggle IFCAP_NETMAP in na->ifp (2014-03-16) 16194bf50f18SLuigi Rizzo */ 1620f9790aebSLuigi Rizzo } else { 162137e3a6d3SLuigi Rizzo if (na->active_fds == 0) 16224bf50f18SLuigi Rizzo na->na_flags &= ~NAF_NETMAP_ON; 162337e3a6d3SLuigi Rizzo for_rx_tx(t) { 1624*4f80b14cSVincenzo Maffione for (i = 0; i < netmap_real_rings(na, t); i++) { 162537e3a6d3SLuigi Rizzo struct netmap_kring *kring = &NMR(na, t)[i]; 162637e3a6d3SLuigi Rizzo 162737e3a6d3SLuigi Rizzo if (nm_kring_pending_off(kring)) 162837e3a6d3SLuigi Rizzo kring->nr_mode = NKR_NETMAP_OFF; 162937e3a6d3SLuigi Rizzo } 163037e3a6d3SLuigi Rizzo } 1631f9790aebSLuigi Rizzo } 16324bf50f18SLuigi Rizzo if (vpna->na_bdg) 1633f9790aebSLuigi Rizzo BDG_WUNLOCK(vpna->na_bdg); 1634f9790aebSLuigi Rizzo return 0; 1635f9790aebSLuigi Rizzo } 1636f9790aebSLuigi Rizzo 1637f9790aebSLuigi Rizzo 1638f9790aebSLuigi Rizzo /* 1639f9790aebSLuigi Rizzo * Lookup function for a learning bridge. 1640f9790aebSLuigi Rizzo * Update the hash table with the source address, 1641f9790aebSLuigi Rizzo * and then returns the destination port index, and the 1642f9790aebSLuigi Rizzo * ring in *dst_ring (at the moment, always use ring 0) 1643f9790aebSLuigi Rizzo */ 1644f9790aebSLuigi Rizzo u_int 16454bf50f18SLuigi Rizzo netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, 1646847bf383SLuigi Rizzo struct netmap_vp_adapter *na) 1647f9790aebSLuigi Rizzo { 16484bf50f18SLuigi Rizzo uint8_t *buf = ft->ft_buf; 16494bf50f18SLuigi Rizzo u_int buf_len = ft->ft_len; 1650f9790aebSLuigi Rizzo struct nm_hash_ent *ht = na->na_bdg->ht; 1651f9790aebSLuigi Rizzo uint32_t sh, dh; 1652f9790aebSLuigi Rizzo u_int dst, mysrc = na->bdg_port; 1653f9790aebSLuigi Rizzo uint64_t smac, dmac; 165437e3a6d3SLuigi Rizzo uint8_t indbuf[12]; 1655f9790aebSLuigi Rizzo 16564bf50f18SLuigi Rizzo /* safety check, unfortunately we have many cases */ 165737e3a6d3SLuigi Rizzo if (buf_len >= 14 + na->up.virt_hdr_len) { 16584bf50f18SLuigi Rizzo /* virthdr + mac_hdr in the same slot */ 165937e3a6d3SLuigi Rizzo buf += na->up.virt_hdr_len; 166037e3a6d3SLuigi Rizzo buf_len -= na->up.virt_hdr_len; 166137e3a6d3SLuigi Rizzo } else if (buf_len == na->up.virt_hdr_len && ft->ft_flags & NS_MOREFRAG) { 16624bf50f18SLuigi Rizzo /* only header in first fragment */ 16634bf50f18SLuigi Rizzo ft++; 16644bf50f18SLuigi Rizzo buf = ft->ft_buf; 16654bf50f18SLuigi Rizzo buf_len = ft->ft_len; 16664bf50f18SLuigi Rizzo } else { 16674bf50f18SLuigi Rizzo RD(5, "invalid buf format, length %d", buf_len); 1668f9790aebSLuigi Rizzo return NM_BDG_NOPORT; 1669f9790aebSLuigi Rizzo } 167037e3a6d3SLuigi Rizzo 167137e3a6d3SLuigi Rizzo if (ft->ft_flags & NS_INDIRECT) { 167237e3a6d3SLuigi Rizzo if (copyin(buf, indbuf, sizeof(indbuf))) { 167337e3a6d3SLuigi Rizzo return NM_BDG_NOPORT; 167437e3a6d3SLuigi Rizzo } 167537e3a6d3SLuigi Rizzo buf = indbuf; 167637e3a6d3SLuigi Rizzo } 167737e3a6d3SLuigi Rizzo 1678f9790aebSLuigi Rizzo dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 1679f9790aebSLuigi Rizzo smac = le64toh(*(uint64_t *)(buf + 4)); 1680f9790aebSLuigi Rizzo smac >>= 16; 1681f9790aebSLuigi Rizzo 1682f9790aebSLuigi Rizzo /* 1683f9790aebSLuigi Rizzo * The hash is somewhat expensive, there might be some 1684f9790aebSLuigi Rizzo * worthwhile optimizations here. 1685f9790aebSLuigi Rizzo */ 1686847bf383SLuigi Rizzo if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */ 1687f9790aebSLuigi Rizzo uint8_t *s = buf+6; 1688*4f80b14cSVincenzo Maffione sh = nm_bridge_rthash(s); /* hash of source */ 1689f9790aebSLuigi Rizzo /* update source port forwarding entry */ 1690847bf383SLuigi Rizzo na->last_smac = ht[sh].mac = smac; /* XXX expire ? */ 1691f9790aebSLuigi Rizzo ht[sh].ports = mysrc; 1692f9790aebSLuigi Rizzo if (netmap_verbose) 1693f9790aebSLuigi Rizzo D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 1694f9790aebSLuigi Rizzo s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 1695f9790aebSLuigi Rizzo } 1696f9790aebSLuigi Rizzo dst = NM_BDG_BROADCAST; 1697f9790aebSLuigi Rizzo if ((buf[0] & 1) == 0) { /* unicast */ 1698*4f80b14cSVincenzo Maffione dh = nm_bridge_rthash(buf); /* hash of dst */ 1699f9790aebSLuigi Rizzo if (ht[dh].mac == dmac) { /* found dst */ 1700f9790aebSLuigi Rizzo dst = ht[dh].ports; 1701f9790aebSLuigi Rizzo } 1702f9790aebSLuigi Rizzo } 1703f9790aebSLuigi Rizzo return dst; 1704f9790aebSLuigi Rizzo } 1705f9790aebSLuigi Rizzo 1706f9790aebSLuigi Rizzo 1707f9790aebSLuigi Rizzo /* 170817885a7bSLuigi Rizzo * Available space in the ring. Only used in VALE code 170917885a7bSLuigi Rizzo * and only with is_rx = 1 171017885a7bSLuigi Rizzo */ 171117885a7bSLuigi Rizzo static inline uint32_t 171217885a7bSLuigi Rizzo nm_kr_space(struct netmap_kring *k, int is_rx) 171317885a7bSLuigi Rizzo { 171417885a7bSLuigi Rizzo int space; 171517885a7bSLuigi Rizzo 171617885a7bSLuigi Rizzo if (is_rx) { 171717885a7bSLuigi Rizzo int busy = k->nkr_hwlease - k->nr_hwcur; 171817885a7bSLuigi Rizzo if (busy < 0) 171917885a7bSLuigi Rizzo busy += k->nkr_num_slots; 172017885a7bSLuigi Rizzo space = k->nkr_num_slots - 1 - busy; 172117885a7bSLuigi Rizzo } else { 172217885a7bSLuigi Rizzo /* XXX never used in this branch */ 172317885a7bSLuigi Rizzo space = k->nr_hwtail - k->nkr_hwlease; 172417885a7bSLuigi Rizzo if (space < 0) 172517885a7bSLuigi Rizzo space += k->nkr_num_slots; 172617885a7bSLuigi Rizzo } 172717885a7bSLuigi Rizzo #if 0 172817885a7bSLuigi Rizzo // sanity check 172917885a7bSLuigi Rizzo if (k->nkr_hwlease >= k->nkr_num_slots || 173017885a7bSLuigi Rizzo k->nr_hwcur >= k->nkr_num_slots || 173117885a7bSLuigi Rizzo k->nr_tail >= k->nkr_num_slots || 173217885a7bSLuigi Rizzo busy < 0 || 173317885a7bSLuigi Rizzo busy >= k->nkr_num_slots) { 173417885a7bSLuigi Rizzo D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 173517885a7bSLuigi Rizzo k->nkr_lease_idx, k->nkr_num_slots); 173617885a7bSLuigi Rizzo } 173717885a7bSLuigi Rizzo #endif 173817885a7bSLuigi Rizzo return space; 173917885a7bSLuigi Rizzo } 174017885a7bSLuigi Rizzo 174117885a7bSLuigi Rizzo 174217885a7bSLuigi Rizzo 174317885a7bSLuigi Rizzo 174417885a7bSLuigi Rizzo /* make a lease on the kring for N positions. return the 174517885a7bSLuigi Rizzo * lease index 174617885a7bSLuigi Rizzo * XXX only used in VALE code and with is_rx = 1 174717885a7bSLuigi Rizzo */ 174817885a7bSLuigi Rizzo static inline uint32_t 174917885a7bSLuigi Rizzo nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 175017885a7bSLuigi Rizzo { 175117885a7bSLuigi Rizzo uint32_t lim = k->nkr_num_slots - 1; 175217885a7bSLuigi Rizzo uint32_t lease_idx = k->nkr_lease_idx; 175317885a7bSLuigi Rizzo 175417885a7bSLuigi Rizzo k->nkr_leases[lease_idx] = NR_NOSLOT; 175517885a7bSLuigi Rizzo k->nkr_lease_idx = nm_next(lease_idx, lim); 175617885a7bSLuigi Rizzo 175717885a7bSLuigi Rizzo if (n > nm_kr_space(k, is_rx)) { 175817885a7bSLuigi Rizzo D("invalid request for %d slots", n); 175917885a7bSLuigi Rizzo panic("x"); 176017885a7bSLuigi Rizzo } 176117885a7bSLuigi Rizzo /* XXX verify that there are n slots */ 176217885a7bSLuigi Rizzo k->nkr_hwlease += n; 176317885a7bSLuigi Rizzo if (k->nkr_hwlease > lim) 176417885a7bSLuigi Rizzo k->nkr_hwlease -= lim + 1; 176517885a7bSLuigi Rizzo 176617885a7bSLuigi Rizzo if (k->nkr_hwlease >= k->nkr_num_slots || 176717885a7bSLuigi Rizzo k->nr_hwcur >= k->nkr_num_slots || 176817885a7bSLuigi Rizzo k->nr_hwtail >= k->nkr_num_slots || 176917885a7bSLuigi Rizzo k->nkr_lease_idx >= k->nkr_num_slots) { 177017885a7bSLuigi Rizzo D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 17714bf50f18SLuigi Rizzo k->na->name, 177217885a7bSLuigi Rizzo k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 177317885a7bSLuigi Rizzo k->nkr_lease_idx, k->nkr_num_slots); 177417885a7bSLuigi Rizzo } 177517885a7bSLuigi Rizzo return lease_idx; 177617885a7bSLuigi Rizzo } 177717885a7bSLuigi Rizzo 177817885a7bSLuigi Rizzo /* 17794bf50f18SLuigi Rizzo * 1780f9790aebSLuigi Rizzo * This flush routine supports only unicast and broadcast but a large 1781f9790aebSLuigi Rizzo * number of ports, and lets us replace the learn and dispatch functions. 1782f9790aebSLuigi Rizzo */ 1783f9790aebSLuigi Rizzo int 1784f9790aebSLuigi Rizzo nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 1785f9790aebSLuigi Rizzo u_int ring_nr) 1786f9790aebSLuigi Rizzo { 1787f9790aebSLuigi Rizzo struct nm_bdg_q *dst_ents, *brddst; 1788f9790aebSLuigi Rizzo uint16_t num_dsts = 0, *dsts; 1789f9790aebSLuigi Rizzo struct nm_bridge *b = na->na_bdg; 179037e3a6d3SLuigi Rizzo u_int i, me = na->bdg_port; 1791f9790aebSLuigi Rizzo 1792f9790aebSLuigi Rizzo /* 1793f9790aebSLuigi Rizzo * The work area (pointed by ft) is followed by an array of 1794f9790aebSLuigi Rizzo * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 1795f9790aebSLuigi Rizzo * queues per port plus one for the broadcast traffic. 1796f9790aebSLuigi Rizzo * Then we have an array of destination indexes. 1797f9790aebSLuigi Rizzo */ 1798f9790aebSLuigi Rizzo dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 1799f9790aebSLuigi Rizzo dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 1800f9790aebSLuigi Rizzo 1801f9790aebSLuigi Rizzo /* first pass: find a destination for each packet in the batch */ 1802f9790aebSLuigi Rizzo for (i = 0; likely(i < n); i += ft[i].ft_frags) { 1803f9790aebSLuigi Rizzo uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 1804f9790aebSLuigi Rizzo uint16_t dst_port, d_i; 1805f9790aebSLuigi Rizzo struct nm_bdg_q *d; 1806f9790aebSLuigi Rizzo 1807f9790aebSLuigi Rizzo ND("slot %d frags %d", i, ft[i].ft_frags); 1808f0ea3689SLuigi Rizzo /* Drop the packet if the virtio-net header is not into the first 1809f9790aebSLuigi Rizzo fragment nor at the very beginning of the second. */ 181037e3a6d3SLuigi Rizzo if (unlikely(na->up.virt_hdr_len > ft[i].ft_len)) 1811f9790aebSLuigi Rizzo continue; 18124bf50f18SLuigi Rizzo dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na); 1813f9790aebSLuigi Rizzo if (netmap_verbose > 255) 1814f9790aebSLuigi Rizzo RD(5, "slot %d port %d -> %d", i, me, dst_port); 1815*4f80b14cSVincenzo Maffione if (dst_port >= NM_BDG_NOPORT) 1816f9790aebSLuigi Rizzo continue; /* this packet is identified to be dropped */ 1817f9790aebSLuigi Rizzo else if (dst_port == NM_BDG_BROADCAST) 1818f9790aebSLuigi Rizzo dst_ring = 0; /* broadcasts always go to ring 0 */ 1819f9790aebSLuigi Rizzo else if (unlikely(dst_port == me || 1820f9790aebSLuigi Rizzo !b->bdg_ports[dst_port])) 1821f9790aebSLuigi Rizzo continue; 1822f9790aebSLuigi Rizzo 1823f9790aebSLuigi Rizzo /* get a position in the scratch pad */ 1824f9790aebSLuigi Rizzo d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 1825f9790aebSLuigi Rizzo d = dst_ents + d_i; 1826f9790aebSLuigi Rizzo 1827f9790aebSLuigi Rizzo /* append the first fragment to the list */ 1828f9790aebSLuigi Rizzo if (d->bq_head == NM_FT_NULL) { /* new destination */ 1829f9790aebSLuigi Rizzo d->bq_head = d->bq_tail = i; 1830f9790aebSLuigi Rizzo /* remember this position to be scanned later */ 1831f9790aebSLuigi Rizzo if (dst_port != NM_BDG_BROADCAST) 1832f9790aebSLuigi Rizzo dsts[num_dsts++] = d_i; 1833f9790aebSLuigi Rizzo } else { 1834f9790aebSLuigi Rizzo ft[d->bq_tail].ft_next = i; 1835f9790aebSLuigi Rizzo d->bq_tail = i; 1836f9790aebSLuigi Rizzo } 1837f9790aebSLuigi Rizzo d->bq_len += ft[i].ft_frags; 1838f9790aebSLuigi Rizzo } 1839f9790aebSLuigi Rizzo 1840f9790aebSLuigi Rizzo /* 1841f9790aebSLuigi Rizzo * Broadcast traffic goes to ring 0 on all destinations. 1842f9790aebSLuigi Rizzo * So we need to add these rings to the list of ports to scan. 1843f9790aebSLuigi Rizzo * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 1844f9790aebSLuigi Rizzo * expensive. We should keep a compact list of active destinations 1845f9790aebSLuigi Rizzo * so we could shorten this loop. 1846f9790aebSLuigi Rizzo */ 1847f9790aebSLuigi Rizzo brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 1848f9790aebSLuigi Rizzo if (brddst->bq_head != NM_FT_NULL) { 184937e3a6d3SLuigi Rizzo u_int j; 1850f9790aebSLuigi Rizzo for (j = 0; likely(j < b->bdg_active_ports); j++) { 1851f9790aebSLuigi Rizzo uint16_t d_i; 1852f9790aebSLuigi Rizzo i = b->bdg_port_index[j]; 1853f9790aebSLuigi Rizzo if (unlikely(i == me)) 1854f9790aebSLuigi Rizzo continue; 1855f9790aebSLuigi Rizzo d_i = i * NM_BDG_MAXRINGS; 1856f9790aebSLuigi Rizzo if (dst_ents[d_i].bq_head == NM_FT_NULL) 1857f9790aebSLuigi Rizzo dsts[num_dsts++] = d_i; 1858f9790aebSLuigi Rizzo } 1859f9790aebSLuigi Rizzo } 1860f9790aebSLuigi Rizzo 1861f9790aebSLuigi Rizzo ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 18624bf50f18SLuigi Rizzo /* second pass: scan destinations */ 1863f9790aebSLuigi Rizzo for (i = 0; i < num_dsts; i++) { 1864f9790aebSLuigi Rizzo struct netmap_vp_adapter *dst_na; 1865f9790aebSLuigi Rizzo struct netmap_kring *kring; 1866f9790aebSLuigi Rizzo struct netmap_ring *ring; 1867f0ea3689SLuigi Rizzo u_int dst_nr, lim, j, d_i, next, brd_next; 1868f9790aebSLuigi Rizzo u_int needed, howmany; 1869f9790aebSLuigi Rizzo int retry = netmap_txsync_retry; 1870f9790aebSLuigi Rizzo struct nm_bdg_q *d; 1871f9790aebSLuigi Rizzo uint32_t my_start = 0, lease_idx = 0; 1872f9790aebSLuigi Rizzo int nrings; 1873f0ea3689SLuigi Rizzo int virt_hdr_mismatch = 0; 1874f9790aebSLuigi Rizzo 1875f9790aebSLuigi Rizzo d_i = dsts[i]; 1876f9790aebSLuigi Rizzo ND("second pass %d port %d", i, d_i); 1877f9790aebSLuigi Rizzo d = dst_ents + d_i; 1878f9790aebSLuigi Rizzo // XXX fix the division 1879f9790aebSLuigi Rizzo dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 1880f9790aebSLuigi Rizzo /* protect from the lookup function returning an inactive 1881f9790aebSLuigi Rizzo * destination port 1882f9790aebSLuigi Rizzo */ 1883f9790aebSLuigi Rizzo if (unlikely(dst_na == NULL)) 1884f9790aebSLuigi Rizzo goto cleanup; 1885f9790aebSLuigi Rizzo if (dst_na->up.na_flags & NAF_SW_ONLY) 1886f9790aebSLuigi Rizzo goto cleanup; 1887f9790aebSLuigi Rizzo /* 1888f9790aebSLuigi Rizzo * The interface may be in !netmap mode in two cases: 1889f9790aebSLuigi Rizzo * - when na is attached but not activated yet; 1890f9790aebSLuigi Rizzo * - when na is being deactivated but is still attached. 1891f9790aebSLuigi Rizzo */ 18924bf50f18SLuigi Rizzo if (unlikely(!nm_netmap_on(&dst_na->up))) { 1893f9790aebSLuigi Rizzo ND("not in netmap mode!"); 1894f9790aebSLuigi Rizzo goto cleanup; 1895f9790aebSLuigi Rizzo } 1896f9790aebSLuigi Rizzo 1897f9790aebSLuigi Rizzo /* there is at least one either unicast or broadcast packet */ 1898f9790aebSLuigi Rizzo brd_next = brddst->bq_head; 1899f9790aebSLuigi Rizzo next = d->bq_head; 1900f9790aebSLuigi Rizzo /* we need to reserve this many slots. If fewer are 1901f9790aebSLuigi Rizzo * available, some packets will be dropped. 1902f9790aebSLuigi Rizzo * Packets may have multiple fragments, so we may not use 1903f9790aebSLuigi Rizzo * there is a chance that we may not use all of the slots 1904f9790aebSLuigi Rizzo * we have claimed, so we will need to handle the leftover 1905f9790aebSLuigi Rizzo * ones when we regain the lock. 1906f9790aebSLuigi Rizzo */ 1907f9790aebSLuigi Rizzo needed = d->bq_len + brddst->bq_len; 1908f9790aebSLuigi Rizzo 190937e3a6d3SLuigi Rizzo if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) { 1910c3e9b4dbSLuiz Otavio O Souza if (netmap_verbose) { 191137e3a6d3SLuigi Rizzo RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len, 191237e3a6d3SLuigi Rizzo dst_na->up.virt_hdr_len); 1913c3e9b4dbSLuiz Otavio O Souza } 1914f0ea3689SLuigi Rizzo /* There is a virtio-net header/offloadings mismatch between 1915f0ea3689SLuigi Rizzo * source and destination. The slower mismatch datapath will 1916f0ea3689SLuigi Rizzo * be used to cope with all the mismatches. 1917f0ea3689SLuigi Rizzo */ 1918f0ea3689SLuigi Rizzo virt_hdr_mismatch = 1; 1919f0ea3689SLuigi Rizzo if (dst_na->mfs < na->mfs) { 1920f0ea3689SLuigi Rizzo /* We may need to do segmentation offloadings, and so 1921f0ea3689SLuigi Rizzo * we may need a number of destination slots greater 1922f0ea3689SLuigi Rizzo * than the number of input slots ('needed'). 1923f0ea3689SLuigi Rizzo * We look for the smallest integer 'x' which satisfies: 1924f0ea3689SLuigi Rizzo * needed * na->mfs + x * H <= x * na->mfs 1925f0ea3689SLuigi Rizzo * where 'H' is the length of the longest header that may 1926f0ea3689SLuigi Rizzo * be replicated in the segmentation process (e.g. for 1927f0ea3689SLuigi Rizzo * TCPv4 we must account for ethernet header, IP header 1928f0ea3689SLuigi Rizzo * and TCPv4 header). 1929f0ea3689SLuigi Rizzo */ 1930*4f80b14cSVincenzo Maffione KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0")); 1931f0ea3689SLuigi Rizzo needed = (needed * na->mfs) / 1932f0ea3689SLuigi Rizzo (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; 1933f0ea3689SLuigi Rizzo ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); 1934f0ea3689SLuigi Rizzo } 1935f0ea3689SLuigi Rizzo } 1936f0ea3689SLuigi Rizzo 1937f9790aebSLuigi Rizzo ND(5, "pass 2 dst %d is %x %s", 1938f9790aebSLuigi Rizzo i, d_i, is_vp ? "virtual" : "nic/host"); 1939f9790aebSLuigi Rizzo dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1940f9790aebSLuigi Rizzo nrings = dst_na->up.num_rx_rings; 1941f9790aebSLuigi Rizzo if (dst_nr >= nrings) 1942f9790aebSLuigi Rizzo dst_nr = dst_nr % nrings; 1943f9790aebSLuigi Rizzo kring = &dst_na->up.rx_rings[dst_nr]; 1944f9790aebSLuigi Rizzo ring = kring->ring; 1945*4f80b14cSVincenzo Maffione /* the destination ring may have not been opened for RX */ 1946*4f80b14cSVincenzo Maffione if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON)) 1947*4f80b14cSVincenzo Maffione goto cleanup; 1948f9790aebSLuigi Rizzo lim = kring->nkr_num_slots - 1; 1949f9790aebSLuigi Rizzo 1950f9790aebSLuigi Rizzo retry: 1951f9790aebSLuigi Rizzo 1952f0ea3689SLuigi Rizzo if (dst_na->retry && retry) { 1953f0ea3689SLuigi Rizzo /* try to get some free slot from the previous run */ 1954847bf383SLuigi Rizzo kring->nm_notify(kring, 0); 19554bf50f18SLuigi Rizzo /* actually useful only for bwraps, since there 19564bf50f18SLuigi Rizzo * the notify will trigger a txsync on the hwna. VALE ports 19574bf50f18SLuigi Rizzo * have dst_na->retry == 0 19584bf50f18SLuigi Rizzo */ 1959f0ea3689SLuigi Rizzo } 1960f9790aebSLuigi Rizzo /* reserve the buffers in the queue and an entry 1961f9790aebSLuigi Rizzo * to report completion, and drop lock. 1962f9790aebSLuigi Rizzo * XXX this might become a helper function. 1963f9790aebSLuigi Rizzo */ 1964f9790aebSLuigi Rizzo mtx_lock(&kring->q_lock); 1965f9790aebSLuigi Rizzo if (kring->nkr_stopped) { 1966f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1967f9790aebSLuigi Rizzo goto cleanup; 1968f9790aebSLuigi Rizzo } 1969f9790aebSLuigi Rizzo my_start = j = kring->nkr_hwlease; 1970f9790aebSLuigi Rizzo howmany = nm_kr_space(kring, 1); 1971f9790aebSLuigi Rizzo if (needed < howmany) 1972f9790aebSLuigi Rizzo howmany = needed; 1973f9790aebSLuigi Rizzo lease_idx = nm_kr_lease(kring, howmany, 1); 1974f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1975f9790aebSLuigi Rizzo 1976f9790aebSLuigi Rizzo /* only retry if we need more than available slots */ 1977f9790aebSLuigi Rizzo if (retry && needed <= howmany) 1978f9790aebSLuigi Rizzo retry = 0; 1979f9790aebSLuigi Rizzo 1980f9790aebSLuigi Rizzo /* copy to the destination queue */ 1981f9790aebSLuigi Rizzo while (howmany > 0) { 1982f9790aebSLuigi Rizzo struct netmap_slot *slot; 1983f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft_p, *ft_end; 1984f9790aebSLuigi Rizzo u_int cnt; 1985f9790aebSLuigi Rizzo 1986f9790aebSLuigi Rizzo /* find the queue from which we pick next packet. 1987f9790aebSLuigi Rizzo * NM_FT_NULL is always higher than valid indexes 1988f9790aebSLuigi Rizzo * so we never dereference it if the other list 1989f9790aebSLuigi Rizzo * has packets (and if both are empty we never 1990f9790aebSLuigi Rizzo * get here). 1991f9790aebSLuigi Rizzo */ 1992f9790aebSLuigi Rizzo if (next < brd_next) { 1993f9790aebSLuigi Rizzo ft_p = ft + next; 1994f9790aebSLuigi Rizzo next = ft_p->ft_next; 1995f9790aebSLuigi Rizzo } else { /* insert broadcast */ 1996f9790aebSLuigi Rizzo ft_p = ft + brd_next; 1997f9790aebSLuigi Rizzo brd_next = ft_p->ft_next; 1998f9790aebSLuigi Rizzo } 1999f9790aebSLuigi Rizzo cnt = ft_p->ft_frags; // cnt > 0 2000f9790aebSLuigi Rizzo if (unlikely(cnt > howmany)) 2001f9790aebSLuigi Rizzo break; /* no more space */ 2002f9790aebSLuigi Rizzo if (netmap_verbose && cnt > 1) 2003f9790aebSLuigi Rizzo RD(5, "rx %d frags to %d", cnt, j); 2004f9790aebSLuigi Rizzo ft_end = ft_p + cnt; 2005f0ea3689SLuigi Rizzo if (unlikely(virt_hdr_mismatch)) { 2006f0ea3689SLuigi Rizzo bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany); 2007f0ea3689SLuigi Rizzo } else { 2008f0ea3689SLuigi Rizzo howmany -= cnt; 2009f9790aebSLuigi Rizzo do { 2010f9790aebSLuigi Rizzo char *dst, *src = ft_p->ft_buf; 2011f9790aebSLuigi Rizzo size_t copy_len = ft_p->ft_len, dst_len = copy_len; 2012f9790aebSLuigi Rizzo 2013f9790aebSLuigi Rizzo slot = &ring->slot[j]; 20144bf50f18SLuigi Rizzo dst = NMB(&dst_na->up, slot); 2015f9790aebSLuigi Rizzo 201617885a7bSLuigi Rizzo ND("send [%d] %d(%d) bytes at %s:%d", 201717885a7bSLuigi Rizzo i, (int)copy_len, (int)dst_len, 201817885a7bSLuigi Rizzo NM_IFPNAME(dst_ifp), j); 2019f9790aebSLuigi Rizzo /* round to a multiple of 64 */ 2020f9790aebSLuigi Rizzo copy_len = (copy_len + 63) & ~63; 2021f9790aebSLuigi Rizzo 20224bf50f18SLuigi Rizzo if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) || 20234bf50f18SLuigi Rizzo copy_len > NETMAP_BUF_SIZE(&na->up))) { 2024e31c6ec7SLuigi Rizzo RD(5, "invalid len %d, down to 64", (int)copy_len); 2025e31c6ec7SLuigi Rizzo copy_len = dst_len = 64; // XXX 2026e31c6ec7SLuigi Rizzo } 2027f9790aebSLuigi Rizzo if (ft_p->ft_flags & NS_INDIRECT) { 2028f9790aebSLuigi Rizzo if (copyin(src, dst, copy_len)) { 2029f9790aebSLuigi Rizzo // invalid user pointer, pretend len is 0 2030f9790aebSLuigi Rizzo dst_len = 0; 2031f9790aebSLuigi Rizzo } 2032f9790aebSLuigi Rizzo } else { 2033f9790aebSLuigi Rizzo //memcpy(dst, src, copy_len); 2034f9790aebSLuigi Rizzo pkt_copy(src, dst, (int)copy_len); 2035f9790aebSLuigi Rizzo } 2036f9790aebSLuigi Rizzo slot->len = dst_len; 2037f9790aebSLuigi Rizzo slot->flags = (cnt << 8)| NS_MOREFRAG; 2038f9790aebSLuigi Rizzo j = nm_next(j, lim); 2039f0ea3689SLuigi Rizzo needed--; 2040f9790aebSLuigi Rizzo ft_p++; 2041f9790aebSLuigi Rizzo } while (ft_p != ft_end); 2042f9790aebSLuigi Rizzo slot->flags = (cnt << 8); /* clear flag on last entry */ 2043f0ea3689SLuigi Rizzo } 2044f9790aebSLuigi Rizzo /* are we done ? */ 2045f9790aebSLuigi Rizzo if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 2046f9790aebSLuigi Rizzo break; 2047f9790aebSLuigi Rizzo } 2048f9790aebSLuigi Rizzo { 2049f9790aebSLuigi Rizzo /* current position */ 2050f9790aebSLuigi Rizzo uint32_t *p = kring->nkr_leases; /* shorthand */ 2051f9790aebSLuigi Rizzo uint32_t update_pos; 2052f9790aebSLuigi Rizzo int still_locked = 1; 2053f9790aebSLuigi Rizzo 2054f9790aebSLuigi Rizzo mtx_lock(&kring->q_lock); 2055f9790aebSLuigi Rizzo if (unlikely(howmany > 0)) { 2056f9790aebSLuigi Rizzo /* not used all bufs. If i am the last one 2057f9790aebSLuigi Rizzo * i can recover the slots, otherwise must 2058f9790aebSLuigi Rizzo * fill them with 0 to mark empty packets. 2059f9790aebSLuigi Rizzo */ 2060f9790aebSLuigi Rizzo ND("leftover %d bufs", howmany); 2061f9790aebSLuigi Rizzo if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 2062f9790aebSLuigi Rizzo /* yes i am the last one */ 2063f9790aebSLuigi Rizzo ND("roll back nkr_hwlease to %d", j); 2064f9790aebSLuigi Rizzo kring->nkr_hwlease = j; 2065f9790aebSLuigi Rizzo } else { 2066f9790aebSLuigi Rizzo while (howmany-- > 0) { 2067f9790aebSLuigi Rizzo ring->slot[j].len = 0; 2068f9790aebSLuigi Rizzo ring->slot[j].flags = 0; 2069f9790aebSLuigi Rizzo j = nm_next(j, lim); 2070f9790aebSLuigi Rizzo } 2071f9790aebSLuigi Rizzo } 2072f9790aebSLuigi Rizzo } 2073f9790aebSLuigi Rizzo p[lease_idx] = j; /* report I am done */ 2074f9790aebSLuigi Rizzo 207517885a7bSLuigi Rizzo update_pos = kring->nr_hwtail; 2076f9790aebSLuigi Rizzo 2077f9790aebSLuigi Rizzo if (my_start == update_pos) { 2078f9790aebSLuigi Rizzo /* all slots before my_start have been reported, 2079f9790aebSLuigi Rizzo * so scan subsequent leases to see if other ranges 2080f9790aebSLuigi Rizzo * have been completed, and to a selwakeup or txsync. 2081f9790aebSLuigi Rizzo */ 2082f9790aebSLuigi Rizzo while (lease_idx != kring->nkr_lease_idx && 2083f9790aebSLuigi Rizzo p[lease_idx] != NR_NOSLOT) { 2084f9790aebSLuigi Rizzo j = p[lease_idx]; 2085f9790aebSLuigi Rizzo p[lease_idx] = NR_NOSLOT; 2086f9790aebSLuigi Rizzo lease_idx = nm_next(lease_idx, lim); 2087f9790aebSLuigi Rizzo } 2088f9790aebSLuigi Rizzo /* j is the new 'write' position. j != my_start 2089f9790aebSLuigi Rizzo * means there are new buffers to report 2090f9790aebSLuigi Rizzo */ 2091f9790aebSLuigi Rizzo if (likely(j != my_start)) { 209217885a7bSLuigi Rizzo kring->nr_hwtail = j; 2093f9790aebSLuigi Rizzo still_locked = 0; 2094f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 2095847bf383SLuigi Rizzo kring->nm_notify(kring, 0); 20964bf50f18SLuigi Rizzo /* this is netmap_notify for VALE ports and 20974bf50f18SLuigi Rizzo * netmap_bwrap_notify for bwrap. The latter will 20984bf50f18SLuigi Rizzo * trigger a txsync on the underlying hwna 20994bf50f18SLuigi Rizzo */ 21004bf50f18SLuigi Rizzo if (dst_na->retry && retry--) { 21014bf50f18SLuigi Rizzo /* XXX this is going to call nm_notify again. 21024bf50f18SLuigi Rizzo * Only useful for bwrap in virtual machines 21034bf50f18SLuigi Rizzo */ 2104f9790aebSLuigi Rizzo goto retry; 2105f9790aebSLuigi Rizzo } 2106f9790aebSLuigi Rizzo } 21074bf50f18SLuigi Rizzo } 2108f9790aebSLuigi Rizzo if (still_locked) 2109f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 2110f9790aebSLuigi Rizzo } 2111f9790aebSLuigi Rizzo cleanup: 2112f9790aebSLuigi Rizzo d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 2113f9790aebSLuigi Rizzo d->bq_len = 0; 2114f9790aebSLuigi Rizzo } 2115f9790aebSLuigi Rizzo brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 2116f9790aebSLuigi Rizzo brddst->bq_len = 0; 2117f9790aebSLuigi Rizzo return 0; 2118f9790aebSLuigi Rizzo } 2119f9790aebSLuigi Rizzo 21204bf50f18SLuigi Rizzo /* nm_txsync callback for VALE ports */ 2121f9790aebSLuigi Rizzo static int 21224bf50f18SLuigi Rizzo netmap_vp_txsync(struct netmap_kring *kring, int flags) 2123f9790aebSLuigi Rizzo { 21244bf50f18SLuigi Rizzo struct netmap_vp_adapter *na = 21254bf50f18SLuigi Rizzo (struct netmap_vp_adapter *)kring->na; 212617885a7bSLuigi Rizzo u_int done; 212717885a7bSLuigi Rizzo u_int const lim = kring->nkr_num_slots - 1; 2128847bf383SLuigi Rizzo u_int const head = kring->rhead; 2129f9790aebSLuigi Rizzo 2130f9790aebSLuigi Rizzo if (bridge_batch <= 0) { /* testing only */ 2131847bf383SLuigi Rizzo done = head; // used all 2132f9790aebSLuigi Rizzo goto done; 2133f9790aebSLuigi Rizzo } 21344bf50f18SLuigi Rizzo if (!na->na_bdg) { 2135847bf383SLuigi Rizzo done = head; 21364bf50f18SLuigi Rizzo goto done; 21374bf50f18SLuigi Rizzo } 2138f9790aebSLuigi Rizzo if (bridge_batch > NM_BDG_BATCH) 2139f9790aebSLuigi Rizzo bridge_batch = NM_BDG_BATCH; 2140f9790aebSLuigi Rizzo 2141847bf383SLuigi Rizzo done = nm_bdg_preflush(kring, head); 2142f9790aebSLuigi Rizzo done: 2143847bf383SLuigi Rizzo if (done != head) 2144847bf383SLuigi Rizzo D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail); 214517885a7bSLuigi Rizzo /* 214617885a7bSLuigi Rizzo * packets between 'done' and 'cur' are left unsent. 214717885a7bSLuigi Rizzo */ 214817885a7bSLuigi Rizzo kring->nr_hwcur = done; 214917885a7bSLuigi Rizzo kring->nr_hwtail = nm_prev(done, lim); 2150f9790aebSLuigi Rizzo if (netmap_verbose) 21514bf50f18SLuigi Rizzo D("%s ring %d flags %d", na->up.name, kring->ring_id, flags); 2152f9790aebSLuigi Rizzo return 0; 2153f9790aebSLuigi Rizzo } 2154f9790aebSLuigi Rizzo 2155f9790aebSLuigi Rizzo 21564bf50f18SLuigi Rizzo /* rxsync code used by VALE ports nm_rxsync callback and also 21574bf50f18SLuigi Rizzo * internally by the brwap 2158f9790aebSLuigi Rizzo */ 2159f9790aebSLuigi Rizzo static int 21604bf50f18SLuigi Rizzo netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) 2161f9790aebSLuigi Rizzo { 21624bf50f18SLuigi Rizzo struct netmap_adapter *na = kring->na; 216317885a7bSLuigi Rizzo struct netmap_ring *ring = kring->ring; 216417885a7bSLuigi Rizzo u_int nm_i, lim = kring->nkr_num_slots - 1; 2165847bf383SLuigi Rizzo u_int head = kring->rhead; 216617885a7bSLuigi Rizzo int n; 216717885a7bSLuigi Rizzo 216817885a7bSLuigi Rizzo if (head > lim) { 216917885a7bSLuigi Rizzo D("ouch dangerous reset!!!"); 217017885a7bSLuigi Rizzo n = netmap_ring_reinit(kring); 217117885a7bSLuigi Rizzo goto done; 217217885a7bSLuigi Rizzo } 217317885a7bSLuigi Rizzo 217417885a7bSLuigi Rizzo /* First part, import newly received packets. */ 217517885a7bSLuigi Rizzo /* actually nothing to do here, they are already in the kring */ 217617885a7bSLuigi Rizzo 217717885a7bSLuigi Rizzo /* Second part, skip past packets that userspace has released. */ 217817885a7bSLuigi Rizzo nm_i = kring->nr_hwcur; 217917885a7bSLuigi Rizzo if (nm_i != head) { 218017885a7bSLuigi Rizzo /* consistency check, but nothing really important here */ 218117885a7bSLuigi Rizzo for (n = 0; likely(nm_i != head); n++) { 218217885a7bSLuigi Rizzo struct netmap_slot *slot = &ring->slot[nm_i]; 21834bf50f18SLuigi Rizzo void *addr = NMB(na, slot); 218417885a7bSLuigi Rizzo 21854bf50f18SLuigi Rizzo if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ 218617885a7bSLuigi Rizzo D("bad buffer index %d, ignore ?", 218717885a7bSLuigi Rizzo slot->buf_idx); 218817885a7bSLuigi Rizzo } 218917885a7bSLuigi Rizzo slot->flags &= ~NS_BUF_CHANGED; 219017885a7bSLuigi Rizzo nm_i = nm_next(nm_i, lim); 219117885a7bSLuigi Rizzo } 219217885a7bSLuigi Rizzo kring->nr_hwcur = head; 219317885a7bSLuigi Rizzo } 219417885a7bSLuigi Rizzo 219517885a7bSLuigi Rizzo n = 0; 219617885a7bSLuigi Rizzo done: 219717885a7bSLuigi Rizzo return n; 219817885a7bSLuigi Rizzo } 2199f9790aebSLuigi Rizzo 2200f9790aebSLuigi Rizzo /* 22014bf50f18SLuigi Rizzo * nm_rxsync callback for VALE ports 2202f9790aebSLuigi Rizzo * user process reading from a VALE switch. 2203f9790aebSLuigi Rizzo * Already protected against concurrent calls from userspace, 2204f9790aebSLuigi Rizzo * but we must acquire the queue's lock to protect against 2205f9790aebSLuigi Rizzo * writers on the same queue. 2206f9790aebSLuigi Rizzo */ 2207f9790aebSLuigi Rizzo static int 22084bf50f18SLuigi Rizzo netmap_vp_rxsync(struct netmap_kring *kring, int flags) 2209f9790aebSLuigi Rizzo { 2210f9790aebSLuigi Rizzo int n; 2211f9790aebSLuigi Rizzo 2212f9790aebSLuigi Rizzo mtx_lock(&kring->q_lock); 22134bf50f18SLuigi Rizzo n = netmap_vp_rxsync_locked(kring, flags); 2214f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 2215f9790aebSLuigi Rizzo return n; 2216f9790aebSLuigi Rizzo } 2217f9790aebSLuigi Rizzo 221817885a7bSLuigi Rizzo 22194bf50f18SLuigi Rizzo /* nm_bdg_attach callback for VALE ports 22204bf50f18SLuigi Rizzo * The na_vp port is this same netmap_adapter. There is no host port. 22214bf50f18SLuigi Rizzo */ 2222f9790aebSLuigi Rizzo static int 22234bf50f18SLuigi Rizzo netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) 22244bf50f18SLuigi Rizzo { 22254bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 22264bf50f18SLuigi Rizzo 22274bf50f18SLuigi Rizzo if (vpna->na_bdg) 2228*4f80b14cSVincenzo Maffione return netmap_bwrap_attach(name, na); 22294bf50f18SLuigi Rizzo na->na_vp = vpna; 22304bf50f18SLuigi Rizzo strncpy(na->name, name, sizeof(na->name)); 22314bf50f18SLuigi Rizzo na->na_hostvp = NULL; 22324bf50f18SLuigi Rizzo return 0; 22334bf50f18SLuigi Rizzo } 22344bf50f18SLuigi Rizzo 22354bf50f18SLuigi Rizzo /* create a netmap_vp_adapter that describes a VALE port. 22364bf50f18SLuigi Rizzo * Only persistent VALE ports have a non-null ifp. 22374bf50f18SLuigi Rizzo */ 22384bf50f18SLuigi Rizzo static int 2239c3e9b4dbSLuiz Otavio O Souza netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, 2240c3e9b4dbSLuiz Otavio O Souza struct netmap_mem_d *nmd, 2241c3e9b4dbSLuiz Otavio O Souza struct netmap_vp_adapter **ret) 2242f9790aebSLuigi Rizzo { 2243f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna; 2244f9790aebSLuigi Rizzo struct netmap_adapter *na; 2245c3e9b4dbSLuiz Otavio O Souza int error = 0; 2246f0ea3689SLuigi Rizzo u_int npipes = 0; 2247f9790aebSLuigi Rizzo 2248c3e9b4dbSLuiz Otavio O Souza vpna = nm_os_malloc(sizeof(*vpna)); 2249f9790aebSLuigi Rizzo if (vpna == NULL) 2250f9790aebSLuigi Rizzo return ENOMEM; 2251f9790aebSLuigi Rizzo 2252f9790aebSLuigi Rizzo na = &vpna->up; 2253f9790aebSLuigi Rizzo 2254f9790aebSLuigi Rizzo na->ifp = ifp; 22554bf50f18SLuigi Rizzo strncpy(na->name, nmr->nr_name, sizeof(na->name)); 2256f9790aebSLuigi Rizzo 2257f9790aebSLuigi Rizzo /* bound checking */ 2258f9790aebSLuigi Rizzo na->num_tx_rings = nmr->nr_tx_rings; 2259f9790aebSLuigi Rizzo nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 2260f9790aebSLuigi Rizzo nmr->nr_tx_rings = na->num_tx_rings; // write back 2261f9790aebSLuigi Rizzo na->num_rx_rings = nmr->nr_rx_rings; 2262f9790aebSLuigi Rizzo nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 2263f9790aebSLuigi Rizzo nmr->nr_rx_rings = na->num_rx_rings; // write back 2264f9790aebSLuigi Rizzo nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 2265f9790aebSLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 2266f9790aebSLuigi Rizzo na->num_tx_desc = nmr->nr_tx_slots; 2267f9790aebSLuigi Rizzo nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 2268f9790aebSLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 2269f0ea3689SLuigi Rizzo /* validate number of pipes. We want at least 1, 2270f0ea3689SLuigi Rizzo * but probably can do with some more. 2271f0ea3689SLuigi Rizzo * So let's use 2 as default (when 0 is supplied) 2272f0ea3689SLuigi Rizzo */ 2273f0ea3689SLuigi Rizzo npipes = nmr->nr_arg1; 2274f0ea3689SLuigi Rizzo nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); 2275f0ea3689SLuigi Rizzo nmr->nr_arg1 = npipes; /* write back */ 2276f0ea3689SLuigi Rizzo /* validate extra bufs */ 2277f0ea3689SLuigi Rizzo nm_bound_var(&nmr->nr_arg3, 0, 0, 2278f0ea3689SLuigi Rizzo 128*NM_BDG_MAXSLOTS, NULL); 2279f9790aebSLuigi Rizzo na->num_rx_desc = nmr->nr_rx_slots; 2280*4f80b14cSVincenzo Maffione /* Set the mfs to a default value, as it is needed on the VALE 2281*4f80b14cSVincenzo Maffione * mismatch datapath. XXX We should set it according to the MTU 2282*4f80b14cSVincenzo Maffione * known to the kernel. */ 2283*4f80b14cSVincenzo Maffione vpna->mfs = NM_BDG_MFS_DEFAULT; 2284847bf383SLuigi Rizzo vpna->last_smac = ~0llu; 2285f0ea3689SLuigi Rizzo /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? 2286f0ea3689SLuigi Rizzo vpna->mfs = netmap_buf_size; */ 2287f0ea3689SLuigi Rizzo if (netmap_verbose) 2288f0ea3689SLuigi Rizzo D("max frame size %u", vpna->mfs); 2289f9790aebSLuigi Rizzo 2290847bf383SLuigi Rizzo na->na_flags |= NAF_BDG_MAYSLEEP; 229110b8ef3dSLuigi Rizzo /* persistent VALE ports look like hw devices 229210b8ef3dSLuigi Rizzo * with a native netmap adapter 229310b8ef3dSLuigi Rizzo */ 229410b8ef3dSLuigi Rizzo if (ifp) 229510b8ef3dSLuigi Rizzo na->na_flags |= NAF_NATIVE; 22964bf50f18SLuigi Rizzo na->nm_txsync = netmap_vp_txsync; 22974bf50f18SLuigi Rizzo na->nm_rxsync = netmap_vp_rxsync; 22984bf50f18SLuigi Rizzo na->nm_register = netmap_vp_reg; 2299f9790aebSLuigi Rizzo na->nm_krings_create = netmap_vp_krings_create; 2300f9790aebSLuigi Rizzo na->nm_krings_delete = netmap_vp_krings_delete; 23014bf50f18SLuigi Rizzo na->nm_dtor = netmap_vp_dtor; 2302c3e9b4dbSLuiz Otavio O Souza D("nr_arg2 %d", nmr->nr_arg2); 2303c3e9b4dbSLuiz Otavio O Souza na->nm_mem = nmd ? 2304c3e9b4dbSLuiz Otavio O Souza netmap_mem_get(nmd): 2305c3e9b4dbSLuiz Otavio O Souza netmap_mem_private_new( 2306f9790aebSLuigi Rizzo na->num_tx_rings, na->num_tx_desc, 2307f0ea3689SLuigi Rizzo na->num_rx_rings, na->num_rx_desc, 2308f0ea3689SLuigi Rizzo nmr->nr_arg3, npipes, &error); 2309f0ea3689SLuigi Rizzo if (na->nm_mem == NULL) 2310f0ea3689SLuigi Rizzo goto err; 23114bf50f18SLuigi Rizzo na->nm_bdg_attach = netmap_vp_bdg_attach; 2312f9790aebSLuigi Rizzo /* other nmd fields are set in the common routine */ 2313f9790aebSLuigi Rizzo error = netmap_attach_common(na); 2314f0ea3689SLuigi Rizzo if (error) 2315f0ea3689SLuigi Rizzo goto err; 23164bf50f18SLuigi Rizzo *ret = vpna; 2317f0ea3689SLuigi Rizzo return 0; 2318f0ea3689SLuigi Rizzo 2319f0ea3689SLuigi Rizzo err: 2320f0ea3689SLuigi Rizzo if (na->nm_mem != NULL) 2321c3e9b4dbSLuiz Otavio O Souza netmap_mem_put(na->nm_mem); 2322c3e9b4dbSLuiz Otavio O Souza nm_os_free(vpna); 2323f9790aebSLuigi Rizzo return error; 2324f9790aebSLuigi Rizzo } 2325f9790aebSLuigi Rizzo 23264bf50f18SLuigi Rizzo /* Bridge wrapper code (bwrap). 23274bf50f18SLuigi Rizzo * This is used to connect a non-VALE-port netmap_adapter (hwna) to a 23284bf50f18SLuigi Rizzo * VALE switch. 23294bf50f18SLuigi Rizzo * The main task is to swap the meaning of tx and rx rings to match the 23304bf50f18SLuigi Rizzo * expectations of the VALE switch code (see nm_bdg_flush). 23314bf50f18SLuigi Rizzo * 23324bf50f18SLuigi Rizzo * The bwrap works by interposing a netmap_bwrap_adapter between the 23334bf50f18SLuigi Rizzo * rest of the system and the hwna. The netmap_bwrap_adapter looks like 23344bf50f18SLuigi Rizzo * a netmap_vp_adapter to the rest the system, but, internally, it 23354bf50f18SLuigi Rizzo * translates all callbacks to what the hwna expects. 23364bf50f18SLuigi Rizzo * 23374bf50f18SLuigi Rizzo * Note that we have to intercept callbacks coming from two sides: 23384bf50f18SLuigi Rizzo * 23394bf50f18SLuigi Rizzo * - callbacks coming from the netmap module are intercepted by 23404bf50f18SLuigi Rizzo * passing around the netmap_bwrap_adapter instead of the hwna 23414bf50f18SLuigi Rizzo * 23424bf50f18SLuigi Rizzo * - callbacks coming from outside of the netmap module only know 23434bf50f18SLuigi Rizzo * about the hwna. This, however, only happens in interrupt 23444bf50f18SLuigi Rizzo * handlers, where only the hwna->nm_notify callback is called. 23454bf50f18SLuigi Rizzo * What the bwrap does is to overwrite the hwna->nm_notify callback 23464bf50f18SLuigi Rizzo * with its own netmap_bwrap_intr_notify. 23474bf50f18SLuigi Rizzo * XXX This assumes that the hwna->nm_notify callback was the 23484bf50f18SLuigi Rizzo * standard netmap_notify(), as it is the case for nic adapters. 23494bf50f18SLuigi Rizzo * Any additional action performed by hwna->nm_notify will not be 23504bf50f18SLuigi Rizzo * performed by netmap_bwrap_intr_notify. 23514bf50f18SLuigi Rizzo * 23524bf50f18SLuigi Rizzo * Additionally, the bwrap can optionally attach the host rings pair 23534bf50f18SLuigi Rizzo * of the wrapped adapter to a different port of the switch. 23544bf50f18SLuigi Rizzo */ 23554bf50f18SLuigi Rizzo 235617885a7bSLuigi Rizzo 2357f9790aebSLuigi Rizzo static void 2358f9790aebSLuigi Rizzo netmap_bwrap_dtor(struct netmap_adapter *na) 2359f9790aebSLuigi Rizzo { 2360f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 2361f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 236237e3a6d3SLuigi Rizzo struct nm_bridge *b = bna->up.na_bdg, 236337e3a6d3SLuigi Rizzo *bh = bna->host.na_bdg; 236437e3a6d3SLuigi Rizzo 2365*4f80b14cSVincenzo Maffione if (bna->host.up.nm_mem) 2366c3e9b4dbSLuiz Otavio O Souza netmap_mem_put(bna->host.up.nm_mem); 2367c3e9b4dbSLuiz Otavio O Souza 236837e3a6d3SLuigi Rizzo if (b) { 236937e3a6d3SLuigi Rizzo netmap_bdg_detach_common(b, bna->up.bdg_port, 237037e3a6d3SLuigi Rizzo (bh ? bna->host.bdg_port : -1)); 237137e3a6d3SLuigi Rizzo } 2372f9790aebSLuigi Rizzo 2373f9790aebSLuigi Rizzo ND("na %p", na); 2374f9790aebSLuigi Rizzo na->ifp = NULL; 23754bf50f18SLuigi Rizzo bna->host.up.ifp = NULL; 23764bf50f18SLuigi Rizzo hwna->na_private = NULL; 23774bf50f18SLuigi Rizzo hwna->na_vp = hwna->na_hostvp = NULL; 23784bf50f18SLuigi Rizzo hwna->na_flags &= ~NAF_BUSY; 23794bf50f18SLuigi Rizzo netmap_adapter_put(hwna); 2380f9790aebSLuigi Rizzo 2381f9790aebSLuigi Rizzo } 2382f9790aebSLuigi Rizzo 238317885a7bSLuigi Rizzo 2384f9790aebSLuigi Rizzo /* 238517885a7bSLuigi Rizzo * Intr callback for NICs connected to a bridge. 238617885a7bSLuigi Rizzo * Simply ignore tx interrupts (maybe we could try to recover space ?) 238717885a7bSLuigi Rizzo * and pass received packets from nic to the bridge. 238817885a7bSLuigi Rizzo * 2389f9790aebSLuigi Rizzo * XXX TODO check locking: this is called from the interrupt 2390f9790aebSLuigi Rizzo * handler so we should make sure that the interface is not 2391f9790aebSLuigi Rizzo * disconnected while passing down an interrupt. 2392f9790aebSLuigi Rizzo * 239317885a7bSLuigi Rizzo * Note, no user process can access this NIC or the host stack. 239417885a7bSLuigi Rizzo * The only part of the ring that is significant are the slots, 239517885a7bSLuigi Rizzo * and head/cur/tail are set from the kring as needed 239617885a7bSLuigi Rizzo * (part as a receive ring, part as a transmit ring). 239717885a7bSLuigi Rizzo * 239817885a7bSLuigi Rizzo * callback that overwrites the hwna notify callback. 239937e3a6d3SLuigi Rizzo * Packets come from the outside or from the host stack and are put on an 240037e3a6d3SLuigi Rizzo * hwna rx ring. 2401f9790aebSLuigi Rizzo * The bridge wrapper then sends the packets through the bridge. 2402f9790aebSLuigi Rizzo */ 2403f9790aebSLuigi Rizzo static int 2404847bf383SLuigi Rizzo netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) 2405f9790aebSLuigi Rizzo { 2406847bf383SLuigi Rizzo struct netmap_adapter *na = kring->na; 2407f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = na->na_private; 2408847bf383SLuigi Rizzo struct netmap_kring *bkring; 2409f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna = &bna->up; 2410847bf383SLuigi Rizzo u_int ring_nr = kring->ring_id; 241137e3a6d3SLuigi Rizzo int ret = NM_IRQ_COMPLETED; 241237e3a6d3SLuigi Rizzo int error; 2413f9790aebSLuigi Rizzo 241417885a7bSLuigi Rizzo if (netmap_verbose) 2415847bf383SLuigi Rizzo D("%s %s 0x%x", na->name, kring->name, flags); 2416f9790aebSLuigi Rizzo 2417847bf383SLuigi Rizzo bkring = &vpna->up.tx_rings[ring_nr]; 2418f9790aebSLuigi Rizzo 2419f9790aebSLuigi Rizzo /* make sure the ring is not disabled */ 242037e3a6d3SLuigi Rizzo if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { 242137e3a6d3SLuigi Rizzo return EIO; 242237e3a6d3SLuigi Rizzo } 2423f9790aebSLuigi Rizzo 242417885a7bSLuigi Rizzo if (netmap_verbose) 2425847bf383SLuigi Rizzo D("%s head %d cur %d tail %d", na->name, 242617885a7bSLuigi Rizzo kring->rhead, kring->rcur, kring->rtail); 242717885a7bSLuigi Rizzo 2428847bf383SLuigi Rizzo /* simulate a user wakeup on the rx ring 2429847bf383SLuigi Rizzo * fetch packets that have arrived. 2430f9790aebSLuigi Rizzo */ 2431f0ea3689SLuigi Rizzo error = kring->nm_sync(kring, 0); 2432f9790aebSLuigi Rizzo if (error) 2433f9790aebSLuigi Rizzo goto put_out; 243437e3a6d3SLuigi Rizzo if (kring->nr_hwcur == kring->nr_hwtail) { 243537e3a6d3SLuigi Rizzo if (netmap_verbose) 2436f9790aebSLuigi Rizzo D("how strange, interrupt with no packets on %s", 24374bf50f18SLuigi Rizzo na->name); 2438f9790aebSLuigi Rizzo goto put_out; 2439f9790aebSLuigi Rizzo } 244017885a7bSLuigi Rizzo 2441847bf383SLuigi Rizzo /* new packets are kring->rcur to kring->nr_hwtail, and the bkring 2442847bf383SLuigi Rizzo * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail 244317885a7bSLuigi Rizzo * to push all packets out. 244417885a7bSLuigi Rizzo */ 2445847bf383SLuigi Rizzo bkring->rhead = bkring->rcur = kring->nr_hwtail; 244617885a7bSLuigi Rizzo 24474bf50f18SLuigi Rizzo netmap_vp_txsync(bkring, flags); 2448f9790aebSLuigi Rizzo 244917885a7bSLuigi Rizzo /* mark all buffers as released on this ring */ 2450847bf383SLuigi Rizzo kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; 245117885a7bSLuigi Rizzo /* another call to actually release the buffers */ 2452f0ea3689SLuigi Rizzo error = kring->nm_sync(kring, 0); 2453f9790aebSLuigi Rizzo 245437e3a6d3SLuigi Rizzo /* The second rxsync may have further advanced hwtail. If this happens, 245537e3a6d3SLuigi Rizzo * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ 245637e3a6d3SLuigi Rizzo if (kring->rcur != kring->nr_hwtail) { 245737e3a6d3SLuigi Rizzo ret = NM_IRQ_RESCHED; 245837e3a6d3SLuigi Rizzo } 2459f9790aebSLuigi Rizzo put_out: 2460f9790aebSLuigi Rizzo nm_kr_put(kring); 246137e3a6d3SLuigi Rizzo 246237e3a6d3SLuigi Rizzo return error ? error : ret; 2463f9790aebSLuigi Rizzo } 2464f9790aebSLuigi Rizzo 246517885a7bSLuigi Rizzo 24664bf50f18SLuigi Rizzo /* nm_register callback for bwrap */ 2467f9790aebSLuigi Rizzo static int 246837e3a6d3SLuigi Rizzo netmap_bwrap_reg(struct netmap_adapter *na, int onoff) 2469f9790aebSLuigi Rizzo { 2470f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2471f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2472f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2473f9790aebSLuigi Rizzo struct netmap_vp_adapter *hostna = &bna->host; 247437e3a6d3SLuigi Rizzo int error, i; 2475847bf383SLuigi Rizzo enum txrx t; 2476f9790aebSLuigi Rizzo 24774bf50f18SLuigi Rizzo ND("%s %s", na->name, onoff ? "on" : "off"); 2478f9790aebSLuigi Rizzo 2479f9790aebSLuigi Rizzo if (onoff) { 24804bf50f18SLuigi Rizzo /* netmap_do_regif has been called on the bwrap na. 24814bf50f18SLuigi Rizzo * We need to pass the information about the 24824bf50f18SLuigi Rizzo * memory allocator down to the hwna before 24834bf50f18SLuigi Rizzo * putting it in netmap mode 24844bf50f18SLuigi Rizzo */ 2485f9790aebSLuigi Rizzo hwna->na_lut = na->na_lut; 2486f9790aebSLuigi Rizzo 2487f9790aebSLuigi Rizzo if (hostna->na_bdg) { 24884bf50f18SLuigi Rizzo /* if the host rings have been attached to switch, 24894bf50f18SLuigi Rizzo * we need to copy the memory allocator information 24904bf50f18SLuigi Rizzo * in the hostna also 24914bf50f18SLuigi Rizzo */ 2492f9790aebSLuigi Rizzo hostna->up.na_lut = na->na_lut; 2493f9790aebSLuigi Rizzo } 2494f9790aebSLuigi Rizzo 249537e3a6d3SLuigi Rizzo } 249637e3a6d3SLuigi Rizzo 249737e3a6d3SLuigi Rizzo /* pass down the pending ring state information */ 249837e3a6d3SLuigi Rizzo for_rx_tx(t) { 249937e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(na, t) + 1; i++) 250037e3a6d3SLuigi Rizzo NMR(hwna, t)[i].nr_pending_mode = 250137e3a6d3SLuigi Rizzo NMR(na, t)[i].nr_pending_mode; 2502f9790aebSLuigi Rizzo } 2503f9790aebSLuigi Rizzo 25044bf50f18SLuigi Rizzo /* forward the request to the hwna */ 2505f9790aebSLuigi Rizzo error = hwna->nm_register(hwna, onoff); 2506f9790aebSLuigi Rizzo if (error) 2507f9790aebSLuigi Rizzo return error; 2508f9790aebSLuigi Rizzo 250937e3a6d3SLuigi Rizzo /* copy up the current ring state information */ 251037e3a6d3SLuigi Rizzo for_rx_tx(t) { 2511*4f80b14cSVincenzo Maffione for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { 2512*4f80b14cSVincenzo Maffione struct netmap_kring *kring = &NMR(hwna, t)[i]; 2513*4f80b14cSVincenzo Maffione NMR(na, t)[i].nr_mode = kring->nr_mode; 2514*4f80b14cSVincenzo Maffione } 251537e3a6d3SLuigi Rizzo } 251637e3a6d3SLuigi Rizzo 25174bf50f18SLuigi Rizzo /* impersonate a netmap_vp_adapter */ 25184bf50f18SLuigi Rizzo netmap_vp_reg(na, onoff); 25194bf50f18SLuigi Rizzo if (hostna->na_bdg) 25204bf50f18SLuigi Rizzo netmap_vp_reg(&hostna->up, onoff); 2521f9790aebSLuigi Rizzo 2522f9790aebSLuigi Rizzo if (onoff) { 2523847bf383SLuigi Rizzo u_int i; 2524847bf383SLuigi Rizzo /* intercept the hwna nm_nofify callback on the hw rings */ 2525847bf383SLuigi Rizzo for (i = 0; i < hwna->num_rx_rings; i++) { 2526847bf383SLuigi Rizzo hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; 2527847bf383SLuigi Rizzo hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; 2528847bf383SLuigi Rizzo } 2529847bf383SLuigi Rizzo i = hwna->num_rx_rings; /* for safety */ 2530847bf383SLuigi Rizzo /* save the host ring notify unconditionally */ 2531847bf383SLuigi Rizzo hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; 2532847bf383SLuigi Rizzo if (hostna->na_bdg) { 2533847bf383SLuigi Rizzo /* also intercept the host ring notify */ 2534847bf383SLuigi Rizzo hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; 2535847bf383SLuigi Rizzo } 253637e3a6d3SLuigi Rizzo if (na->active_fds == 0) 253737e3a6d3SLuigi Rizzo na->na_flags |= NAF_NETMAP_ON; 2538f9790aebSLuigi Rizzo } else { 2539847bf383SLuigi Rizzo u_int i; 254037e3a6d3SLuigi Rizzo 254137e3a6d3SLuigi Rizzo if (na->active_fds == 0) 254237e3a6d3SLuigi Rizzo na->na_flags &= ~NAF_NETMAP_ON; 254337e3a6d3SLuigi Rizzo 2544847bf383SLuigi Rizzo /* reset all notify callbacks (including host ring) */ 2545847bf383SLuigi Rizzo for (i = 0; i <= hwna->num_rx_rings; i++) { 2546847bf383SLuigi Rizzo hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify; 2547847bf383SLuigi Rizzo hwna->rx_rings[i].save_notify = NULL; 2548847bf383SLuigi Rizzo } 2549847bf383SLuigi Rizzo hwna->na_lut.lut = NULL; 2550847bf383SLuigi Rizzo hwna->na_lut.objtotal = 0; 2551847bf383SLuigi Rizzo hwna->na_lut.objsize = 0; 2552*4f80b14cSVincenzo Maffione 2553*4f80b14cSVincenzo Maffione /* pass ownership of the netmap rings to the hwna */ 2554*4f80b14cSVincenzo Maffione for_rx_tx(t) { 2555*4f80b14cSVincenzo Maffione for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { 2556*4f80b14cSVincenzo Maffione NMR(na, t)[i].ring = NULL; 2557*4f80b14cSVincenzo Maffione } 2558*4f80b14cSVincenzo Maffione } 2559*4f80b14cSVincenzo Maffione 2560f9790aebSLuigi Rizzo } 2561f9790aebSLuigi Rizzo 2562f9790aebSLuigi Rizzo return 0; 2563f9790aebSLuigi Rizzo } 2564f9790aebSLuigi Rizzo 25654bf50f18SLuigi Rizzo /* nm_config callback for bwrap */ 2566f9790aebSLuigi Rizzo static int 2567f9790aebSLuigi Rizzo netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, 2568f9790aebSLuigi Rizzo u_int *rxr, u_int *rxd) 2569f9790aebSLuigi Rizzo { 2570f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2571f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2572f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2573f9790aebSLuigi Rizzo 2574f9790aebSLuigi Rizzo /* forward the request */ 2575f9790aebSLuigi Rizzo netmap_update_config(hwna); 2576f9790aebSLuigi Rizzo /* swap the results */ 2577f9790aebSLuigi Rizzo *txr = hwna->num_rx_rings; 2578f9790aebSLuigi Rizzo *txd = hwna->num_rx_desc; 2579f9790aebSLuigi Rizzo *rxr = hwna->num_tx_rings; 2580f9790aebSLuigi Rizzo *rxd = hwna->num_rx_desc; 2581f9790aebSLuigi Rizzo 2582f9790aebSLuigi Rizzo return 0; 2583f9790aebSLuigi Rizzo } 2584f9790aebSLuigi Rizzo 258517885a7bSLuigi Rizzo 25864bf50f18SLuigi Rizzo /* nm_krings_create callback for bwrap */ 2587f9790aebSLuigi Rizzo static int 2588f9790aebSLuigi Rizzo netmap_bwrap_krings_create(struct netmap_adapter *na) 2589f9790aebSLuigi Rizzo { 2590f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2591f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2592f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2593*4f80b14cSVincenzo Maffione struct netmap_adapter *hostna = &bna->host.up; 259437e3a6d3SLuigi Rizzo int i, error = 0; 259537e3a6d3SLuigi Rizzo enum txrx t; 2596f9790aebSLuigi Rizzo 25974bf50f18SLuigi Rizzo ND("%s", na->name); 2598f9790aebSLuigi Rizzo 25994bf50f18SLuigi Rizzo /* impersonate a netmap_vp_adapter */ 2600f9790aebSLuigi Rizzo error = netmap_vp_krings_create(na); 2601f9790aebSLuigi Rizzo if (error) 2602f9790aebSLuigi Rizzo return error; 2603f9790aebSLuigi Rizzo 26044bf50f18SLuigi Rizzo /* also create the hwna krings */ 2605f9790aebSLuigi Rizzo error = hwna->nm_krings_create(hwna); 2606f9790aebSLuigi Rizzo if (error) { 260737e3a6d3SLuigi Rizzo goto err_del_vp_rings; 2608f9790aebSLuigi Rizzo } 2609f9790aebSLuigi Rizzo 2610*4f80b14cSVincenzo Maffione /* increment the usage counter for all the hwna krings */ 2611*4f80b14cSVincenzo Maffione for_rx_tx(t) { 2612*4f80b14cSVincenzo Maffione for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { 2613*4f80b14cSVincenzo Maffione NMR(hwna, t)[i].users++; 2614*4f80b14cSVincenzo Maffione } 2615*4f80b14cSVincenzo Maffione } 2616*4f80b14cSVincenzo Maffione 2617*4f80b14cSVincenzo Maffione /* now create the actual rings */ 2618*4f80b14cSVincenzo Maffione error = netmap_mem_rings_create(hwna); 2619*4f80b14cSVincenzo Maffione if (error) { 2620*4f80b14cSVincenzo Maffione goto err_dec_users; 2621*4f80b14cSVincenzo Maffione } 2622*4f80b14cSVincenzo Maffione 2623*4f80b14cSVincenzo Maffione /* cross-link the netmap rings 2624*4f80b14cSVincenzo Maffione * The original number of rings comes from hwna, 2625*4f80b14cSVincenzo Maffione * rx rings on one side equals tx rings on the other. 2626*4f80b14cSVincenzo Maffione */ 262737e3a6d3SLuigi Rizzo for_rx_tx(t) { 262837e3a6d3SLuigi Rizzo enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 262937e3a6d3SLuigi Rizzo for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { 263037e3a6d3SLuigi Rizzo NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots; 2631*4f80b14cSVincenzo Maffione NMR(na, t)[i].ring = NMR(hwna, r)[i].ring; 263237e3a6d3SLuigi Rizzo } 2633f0ea3689SLuigi Rizzo } 2634f9790aebSLuigi Rizzo 2635*4f80b14cSVincenzo Maffione if (na->na_flags & NAF_HOST_RINGS) { 2636*4f80b14cSVincenzo Maffione /* the hostna rings are the host rings of the bwrap. 2637*4f80b14cSVincenzo Maffione * The corresponding krings must point back to the 2638*4f80b14cSVincenzo Maffione * hostna 2639*4f80b14cSVincenzo Maffione */ 2640*4f80b14cSVincenzo Maffione hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; 2641*4f80b14cSVincenzo Maffione hostna->tx_rings[0].na = hostna; 2642*4f80b14cSVincenzo Maffione hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; 2643*4f80b14cSVincenzo Maffione hostna->rx_rings[0].na = hostna; 2644*4f80b14cSVincenzo Maffione } 2645*4f80b14cSVincenzo Maffione 2646f9790aebSLuigi Rizzo return 0; 264737e3a6d3SLuigi Rizzo 2648*4f80b14cSVincenzo Maffione err_dec_users: 2649*4f80b14cSVincenzo Maffione for_rx_tx(t) { 2650*4f80b14cSVincenzo Maffione NMR(hwna, t)[i].users--; 2651*4f80b14cSVincenzo Maffione } 2652*4f80b14cSVincenzo Maffione hwna->nm_krings_delete(hwna); 265337e3a6d3SLuigi Rizzo err_del_vp_rings: 265437e3a6d3SLuigi Rizzo netmap_vp_krings_delete(na); 265537e3a6d3SLuigi Rizzo 265637e3a6d3SLuigi Rizzo return error; 2657f9790aebSLuigi Rizzo } 2658f9790aebSLuigi Rizzo 265917885a7bSLuigi Rizzo 2660f9790aebSLuigi Rizzo static void 2661f9790aebSLuigi Rizzo netmap_bwrap_krings_delete(struct netmap_adapter *na) 2662f9790aebSLuigi Rizzo { 2663f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna = 2664f9790aebSLuigi Rizzo (struct netmap_bwrap_adapter *)na; 2665f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2666*4f80b14cSVincenzo Maffione enum txrx t; 2667*4f80b14cSVincenzo Maffione int i; 2668f9790aebSLuigi Rizzo 26694bf50f18SLuigi Rizzo ND("%s", na->name); 2670f9790aebSLuigi Rizzo 2671*4f80b14cSVincenzo Maffione /* decrement the usage counter for all the hwna krings */ 2672*4f80b14cSVincenzo Maffione for_rx_tx(t) { 2673*4f80b14cSVincenzo Maffione for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { 2674*4f80b14cSVincenzo Maffione NMR(hwna, t)[i].users--; 2675*4f80b14cSVincenzo Maffione } 2676*4f80b14cSVincenzo Maffione } 2677*4f80b14cSVincenzo Maffione 2678*4f80b14cSVincenzo Maffione /* delete any netmap rings that are no longer needed */ 2679*4f80b14cSVincenzo Maffione netmap_mem_rings_delete(hwna); 2680f9790aebSLuigi Rizzo hwna->nm_krings_delete(hwna); 2681f9790aebSLuigi Rizzo netmap_vp_krings_delete(na); 2682f9790aebSLuigi Rizzo } 2683f9790aebSLuigi Rizzo 268417885a7bSLuigi Rizzo 2685f9790aebSLuigi Rizzo /* notify method for the bridge-->hwna direction */ 2686f9790aebSLuigi Rizzo static int 2687847bf383SLuigi Rizzo netmap_bwrap_notify(struct netmap_kring *kring, int flags) 2688f9790aebSLuigi Rizzo { 2689847bf383SLuigi Rizzo struct netmap_adapter *na = kring->na; 2690847bf383SLuigi Rizzo struct netmap_bwrap_adapter *bna = na->na_private; 2691f9790aebSLuigi Rizzo struct netmap_adapter *hwna = bna->hwna; 2692847bf383SLuigi Rizzo u_int ring_n = kring->ring_id; 2693847bf383SLuigi Rizzo u_int lim = kring->nkr_num_slots - 1; 2694847bf383SLuigi Rizzo struct netmap_kring *hw_kring; 269537e3a6d3SLuigi Rizzo int error; 2696f9790aebSLuigi Rizzo 2697847bf383SLuigi Rizzo ND("%s: na %s hwna %s", 2698847bf383SLuigi Rizzo (kring ? kring->name : "NULL!"), 2699847bf383SLuigi Rizzo (na ? na->name : "NULL!"), 2700847bf383SLuigi Rizzo (hwna ? hwna->name : "NULL!")); 2701f9790aebSLuigi Rizzo hw_kring = &hwna->tx_rings[ring_n]; 2702847bf383SLuigi Rizzo 270337e3a6d3SLuigi Rizzo if (nm_kr_tryget(hw_kring, 0, NULL)) { 270437e3a6d3SLuigi Rizzo return ENXIO; 270537e3a6d3SLuigi Rizzo } 2706f9790aebSLuigi Rizzo 270717885a7bSLuigi Rizzo /* first step: simulate a user wakeup on the rx ring */ 2708847bf383SLuigi Rizzo netmap_vp_rxsync(kring, flags); 270917885a7bSLuigi Rizzo ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 27104bf50f18SLuigi Rizzo na->name, ring_n, 271117885a7bSLuigi Rizzo kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 271217885a7bSLuigi Rizzo ring->head, ring->cur, ring->tail, 271317885a7bSLuigi Rizzo hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 2714847bf383SLuigi Rizzo /* second step: the new packets are sent on the tx ring 271517885a7bSLuigi Rizzo * (which is actually the same ring) 271617885a7bSLuigi Rizzo */ 2717847bf383SLuigi Rizzo hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; 2718f0ea3689SLuigi Rizzo error = hw_kring->nm_sync(hw_kring, flags); 2719847bf383SLuigi Rizzo if (error) 272037e3a6d3SLuigi Rizzo goto put_out; 272117885a7bSLuigi Rizzo 2722847bf383SLuigi Rizzo /* third step: now we are back the rx ring */ 272317885a7bSLuigi Rizzo /* claim ownership on all hw owned bufs */ 2724847bf383SLuigi Rizzo kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ 272517885a7bSLuigi Rizzo 2726847bf383SLuigi Rizzo /* fourth step: the user goes to sleep again, causing another rxsync */ 2727847bf383SLuigi Rizzo netmap_vp_rxsync(kring, flags); 272817885a7bSLuigi Rizzo ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 27294bf50f18SLuigi Rizzo na->name, ring_n, 273017885a7bSLuigi Rizzo kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 273117885a7bSLuigi Rizzo ring->head, ring->cur, ring->tail, 273217885a7bSLuigi Rizzo hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 273337e3a6d3SLuigi Rizzo put_out: 2734847bf383SLuigi Rizzo nm_kr_put(hw_kring); 273537e3a6d3SLuigi Rizzo 273637e3a6d3SLuigi Rizzo return error ? error : NM_IRQ_COMPLETED; 2737f9790aebSLuigi Rizzo } 2738f9790aebSLuigi Rizzo 273917885a7bSLuigi Rizzo 27404bf50f18SLuigi Rizzo /* nm_bdg_ctl callback for the bwrap. 27414bf50f18SLuigi Rizzo * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. 27424bf50f18SLuigi Rizzo * On attach, it needs to provide a fake netmap_priv_d structure and 27434bf50f18SLuigi Rizzo * perform a netmap_do_regif() on the bwrap. This will put both the 27444bf50f18SLuigi Rizzo * bwrap and the hwna in netmap mode, with the netmap rings shared 27454bf50f18SLuigi Rizzo * and cross linked. Moroever, it will start intercepting interrupts 27464bf50f18SLuigi Rizzo * directed to hwna. 27474bf50f18SLuigi Rizzo */ 2748f9790aebSLuigi Rizzo static int 27494bf50f18SLuigi Rizzo netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) 27504bf50f18SLuigi Rizzo { 27514bf50f18SLuigi Rizzo struct netmap_priv_d *npriv; 27524bf50f18SLuigi Rizzo struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 27534bf50f18SLuigi Rizzo int error = 0; 27544bf50f18SLuigi Rizzo 27554bf50f18SLuigi Rizzo if (attach) { 27564bf50f18SLuigi Rizzo if (NETMAP_OWNED_BY_ANY(na)) { 27574bf50f18SLuigi Rizzo return EBUSY; 27584bf50f18SLuigi Rizzo } 27594bf50f18SLuigi Rizzo if (bna->na_kpriv) { 27604bf50f18SLuigi Rizzo /* nothing to do */ 27614bf50f18SLuigi Rizzo return 0; 27624bf50f18SLuigi Rizzo } 276337e3a6d3SLuigi Rizzo npriv = netmap_priv_new(); 27644bf50f18SLuigi Rizzo if (npriv == NULL) 27654bf50f18SLuigi Rizzo return ENOMEM; 276637e3a6d3SLuigi Rizzo npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ 2767*4f80b14cSVincenzo Maffione error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags); 2768847bf383SLuigi Rizzo if (error) { 276937e3a6d3SLuigi Rizzo netmap_priv_delete(npriv); 27704bf50f18SLuigi Rizzo return error; 27714bf50f18SLuigi Rizzo } 27724bf50f18SLuigi Rizzo bna->na_kpriv = npriv; 27734bf50f18SLuigi Rizzo na->na_flags |= NAF_BUSY; 27744bf50f18SLuigi Rizzo } else { 27754bf50f18SLuigi Rizzo if (na->active_fds == 0) /* not registered */ 27764bf50f18SLuigi Rizzo return EINVAL; 277737e3a6d3SLuigi Rizzo netmap_priv_delete(bna->na_kpriv); 27784bf50f18SLuigi Rizzo bna->na_kpriv = NULL; 27794bf50f18SLuigi Rizzo na->na_flags &= ~NAF_BUSY; 27804bf50f18SLuigi Rizzo } 27814bf50f18SLuigi Rizzo return error; 27824bf50f18SLuigi Rizzo 27834bf50f18SLuigi Rizzo } 27844bf50f18SLuigi Rizzo 27854bf50f18SLuigi Rizzo /* attach a bridge wrapper to the 'real' device */ 27864bf50f18SLuigi Rizzo int 27874bf50f18SLuigi Rizzo netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) 2788f9790aebSLuigi Rizzo { 2789f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna; 27904bf50f18SLuigi Rizzo struct netmap_adapter *na = NULL; 27914bf50f18SLuigi Rizzo struct netmap_adapter *hostna = NULL; 27924bf50f18SLuigi Rizzo int error = 0; 2793847bf383SLuigi Rizzo enum txrx t; 2794f9790aebSLuigi Rizzo 27954bf50f18SLuigi Rizzo /* make sure the NIC is not already in use */ 27964bf50f18SLuigi Rizzo if (NETMAP_OWNED_BY_ANY(hwna)) { 27974bf50f18SLuigi Rizzo D("NIC %s busy, cannot attach to bridge", hwna->name); 27984bf50f18SLuigi Rizzo return EBUSY; 27994bf50f18SLuigi Rizzo } 2800f9790aebSLuigi Rizzo 2801c3e9b4dbSLuiz Otavio O Souza bna = nm_os_malloc(sizeof(*bna)); 28024bf50f18SLuigi Rizzo if (bna == NULL) { 2803f9790aebSLuigi Rizzo return ENOMEM; 28044bf50f18SLuigi Rizzo } 2805f9790aebSLuigi Rizzo 2806f9790aebSLuigi Rizzo na = &bna->up.up; 280737e3a6d3SLuigi Rizzo /* make bwrap ifp point to the real ifp */ 280837e3a6d3SLuigi Rizzo na->ifp = hwna->ifp; 2809c3e9b4dbSLuiz Otavio O Souza if_ref(na->ifp); 2810847bf383SLuigi Rizzo na->na_private = bna; 28114bf50f18SLuigi Rizzo strncpy(na->name, nr_name, sizeof(na->name)); 2812f9790aebSLuigi Rizzo /* fill the ring data for the bwrap adapter with rx/tx meanings 2813f9790aebSLuigi Rizzo * swapped. The real cross-linking will be done during register, 2814f9790aebSLuigi Rizzo * when all the krings will have been created. 2815f9790aebSLuigi Rizzo */ 2816847bf383SLuigi Rizzo for_rx_tx(t) { 2817847bf383SLuigi Rizzo enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 2818847bf383SLuigi Rizzo nma_set_nrings(na, t, nma_get_nrings(hwna, r)); 2819847bf383SLuigi Rizzo nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); 2820847bf383SLuigi Rizzo } 2821f9790aebSLuigi Rizzo na->nm_dtor = netmap_bwrap_dtor; 282237e3a6d3SLuigi Rizzo na->nm_register = netmap_bwrap_reg; 2823f9790aebSLuigi Rizzo // na->nm_txsync = netmap_bwrap_txsync; 2824f9790aebSLuigi Rizzo // na->nm_rxsync = netmap_bwrap_rxsync; 2825f9790aebSLuigi Rizzo na->nm_config = netmap_bwrap_config; 2826f9790aebSLuigi Rizzo na->nm_krings_create = netmap_bwrap_krings_create; 2827f9790aebSLuigi Rizzo na->nm_krings_delete = netmap_bwrap_krings_delete; 2828f9790aebSLuigi Rizzo na->nm_notify = netmap_bwrap_notify; 28294bf50f18SLuigi Rizzo na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; 28304bf50f18SLuigi Rizzo na->pdev = hwna->pdev; 2831c3e9b4dbSLuiz Otavio O Souza na->nm_mem = netmap_mem_get(hwna->nm_mem); 283237e3a6d3SLuigi Rizzo na->virt_hdr_len = hwna->virt_hdr_len; 2833f9790aebSLuigi Rizzo bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 2834*4f80b14cSVincenzo Maffione /* Set the mfs, needed on the VALE mismatch datapath. */ 2835*4f80b14cSVincenzo Maffione bna->up.mfs = NM_BDG_MFS_DEFAULT; 2836f9790aebSLuigi Rizzo 2837f9790aebSLuigi Rizzo bna->hwna = hwna; 2838f9790aebSLuigi Rizzo netmap_adapter_get(hwna); 2839f9790aebSLuigi Rizzo hwna->na_private = bna; /* weak reference */ 28404bf50f18SLuigi Rizzo hwna->na_vp = &bna->up; 2841f9790aebSLuigi Rizzo 2842f0ea3689SLuigi Rizzo if (hwna->na_flags & NAF_HOST_RINGS) { 28434bf50f18SLuigi Rizzo if (hwna->na_flags & NAF_SW_ONLY) 28444bf50f18SLuigi Rizzo na->na_flags |= NAF_SW_ONLY; 2845f0ea3689SLuigi Rizzo na->na_flags |= NAF_HOST_RINGS; 2846f9790aebSLuigi Rizzo hostna = &bna->host.up; 28474bf50f18SLuigi Rizzo snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name); 2848f9790aebSLuigi Rizzo hostna->ifp = hwna->ifp; 2849847bf383SLuigi Rizzo for_rx_tx(t) { 2850847bf383SLuigi Rizzo enum txrx r = nm_txrx_swap(t); 2851847bf383SLuigi Rizzo nma_set_nrings(hostna, t, 1); 2852847bf383SLuigi Rizzo nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); 2853847bf383SLuigi Rizzo } 2854f9790aebSLuigi Rizzo // hostna->nm_txsync = netmap_bwrap_host_txsync; 2855f9790aebSLuigi Rizzo // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 2856847bf383SLuigi Rizzo hostna->nm_notify = netmap_bwrap_notify; 2857c3e9b4dbSLuiz Otavio O Souza hostna->nm_mem = netmap_mem_get(na->nm_mem); 2858f9790aebSLuigi Rizzo hostna->na_private = bna; 28594bf50f18SLuigi Rizzo hostna->na_vp = &bna->up; 28604bf50f18SLuigi Rizzo na->na_hostvp = hwna->na_hostvp = 28614bf50f18SLuigi Rizzo hostna->na_hostvp = &bna->host; 28624bf50f18SLuigi Rizzo hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ 2863*4f80b14cSVincenzo Maffione bna->host.mfs = NM_BDG_MFS_DEFAULT; 2864f0ea3689SLuigi Rizzo } 2865f9790aebSLuigi Rizzo 286617885a7bSLuigi Rizzo ND("%s<->%s txr %d txd %d rxr %d rxd %d", 28674bf50f18SLuigi Rizzo na->name, ifp->if_xname, 2868f9790aebSLuigi Rizzo na->num_tx_rings, na->num_tx_desc, 2869f9790aebSLuigi Rizzo na->num_rx_rings, na->num_rx_desc); 2870f9790aebSLuigi Rizzo 2871f9790aebSLuigi Rizzo error = netmap_attach_common(na); 2872f9790aebSLuigi Rizzo if (error) { 28734bf50f18SLuigi Rizzo goto err_free; 28744bf50f18SLuigi Rizzo } 28754bf50f18SLuigi Rizzo hwna->na_flags |= NAF_BUSY; 28764bf50f18SLuigi Rizzo return 0; 28774bf50f18SLuigi Rizzo 28784bf50f18SLuigi Rizzo err_free: 28794bf50f18SLuigi Rizzo hwna->na_vp = hwna->na_hostvp = NULL; 2880f9790aebSLuigi Rizzo netmap_adapter_put(hwna); 2881c3e9b4dbSLuiz Otavio O Souza nm_os_free(bna); 2882f9790aebSLuigi Rizzo return error; 28834bf50f18SLuigi Rizzo 2884f9790aebSLuigi Rizzo } 2885f9790aebSLuigi Rizzo 2886847bf383SLuigi Rizzo struct nm_bridge * 2887847bf383SLuigi Rizzo netmap_init_bridges2(u_int n) 2888f9790aebSLuigi Rizzo { 2889f9790aebSLuigi Rizzo int i; 2890847bf383SLuigi Rizzo struct nm_bridge *b; 2891847bf383SLuigi Rizzo 2892c3e9b4dbSLuiz Otavio O Souza b = nm_os_malloc(sizeof(struct nm_bridge) * n); 2893847bf383SLuigi Rizzo if (b == NULL) 2894847bf383SLuigi Rizzo return NULL; 2895847bf383SLuigi Rizzo for (i = 0; i < n; i++) 2896847bf383SLuigi Rizzo BDG_RWINIT(&b[i]); 2897847bf383SLuigi Rizzo return b; 2898847bf383SLuigi Rizzo } 2899847bf383SLuigi Rizzo 2900847bf383SLuigi Rizzo void 2901847bf383SLuigi Rizzo netmap_uninit_bridges2(struct nm_bridge *b, u_int n) 2902847bf383SLuigi Rizzo { 2903847bf383SLuigi Rizzo int i; 2904847bf383SLuigi Rizzo 2905847bf383SLuigi Rizzo if (b == NULL) 2906847bf383SLuigi Rizzo return; 2907847bf383SLuigi Rizzo 2908847bf383SLuigi Rizzo for (i = 0; i < n; i++) 2909847bf383SLuigi Rizzo BDG_RWDESTROY(&b[i]); 2910c3e9b4dbSLuiz Otavio O Souza nm_os_free(b); 2911847bf383SLuigi Rizzo } 2912847bf383SLuigi Rizzo 2913847bf383SLuigi Rizzo int 2914847bf383SLuigi Rizzo netmap_init_bridges(void) 2915847bf383SLuigi Rizzo { 2916847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS 2917847bf383SLuigi Rizzo return netmap_bns_register(); 2918847bf383SLuigi Rizzo #else 2919847bf383SLuigi Rizzo nm_bridges = netmap_init_bridges2(NM_BRIDGES); 2920847bf383SLuigi Rizzo if (nm_bridges == NULL) 2921847bf383SLuigi Rizzo return ENOMEM; 2922847bf383SLuigi Rizzo return 0; 2923847bf383SLuigi Rizzo #endif 2924847bf383SLuigi Rizzo } 2925847bf383SLuigi Rizzo 2926847bf383SLuigi Rizzo void 2927847bf383SLuigi Rizzo netmap_uninit_bridges(void) 2928847bf383SLuigi Rizzo { 2929847bf383SLuigi Rizzo #ifdef CONFIG_NET_NS 2930847bf383SLuigi Rizzo netmap_bns_unregister(); 2931847bf383SLuigi Rizzo #else 2932847bf383SLuigi Rizzo netmap_uninit_bridges2(nm_bridges, NM_BRIDGES); 2933847bf383SLuigi Rizzo #endif 2934f9790aebSLuigi Rizzo } 2935f9790aebSLuigi Rizzo #endif /* WITH_VALE */ 2936