1718cf2ccSPedro F. Giffuni /*- 2718cf2ccSPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3718cf2ccSPedro F. Giffuni * 437e3a6d3SLuigi Rizzo * Copyright (C) 2013-2016 Universita` di Pisa 537e3a6d3SLuigi Rizzo * All rights reserved. 6f9790aebSLuigi Rizzo * 7f9790aebSLuigi Rizzo * Redistribution and use in source and binary forms, with or without 8f9790aebSLuigi Rizzo * modification, are permitted provided that the following conditions 9f9790aebSLuigi Rizzo * are met: 10f9790aebSLuigi Rizzo * 1. Redistributions of source code must retain the above copyright 11f9790aebSLuigi Rizzo * notice, this list of conditions and the following disclaimer. 12f9790aebSLuigi Rizzo * 2. Redistributions in binary form must reproduce the above copyright 13f9790aebSLuigi Rizzo * notice, this list of conditions and the following disclaimer in the 14f9790aebSLuigi Rizzo * documentation and/or other materials provided with the distribution. 15f9790aebSLuigi Rizzo * 16f9790aebSLuigi Rizzo * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17f9790aebSLuigi Rizzo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18f9790aebSLuigi Rizzo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19f9790aebSLuigi Rizzo * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20f9790aebSLuigi Rizzo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21f9790aebSLuigi Rizzo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22f9790aebSLuigi Rizzo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23f9790aebSLuigi Rizzo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24f9790aebSLuigi Rizzo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25f9790aebSLuigi Rizzo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26f9790aebSLuigi Rizzo * SUCH DAMAGE. 27f9790aebSLuigi Rizzo */ 28f9790aebSLuigi Rizzo 29f9790aebSLuigi Rizzo 30f9790aebSLuigi Rizzo #if defined(__FreeBSD__) 31f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */ 32f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$"); 33f9790aebSLuigi Rizzo 34f9790aebSLuigi Rizzo #include <sys/types.h> 35f9790aebSLuigi Rizzo #include <sys/errno.h> 36f9790aebSLuigi Rizzo #include <sys/param.h> /* defines used in kernel.h */ 37f9790aebSLuigi Rizzo #include <sys/kernel.h> /* types used in module initialization */ 38f9790aebSLuigi Rizzo #include <sys/conf.h> /* cdevsw struct, UID, GID */ 39f9790aebSLuigi Rizzo #include <sys/sockio.h> 40f9790aebSLuigi Rizzo #include <sys/socketvar.h> /* struct socket */ 41f9790aebSLuigi Rizzo #include <sys/malloc.h> 42f9790aebSLuigi Rizzo #include <sys/poll.h> 43f9790aebSLuigi Rizzo #include <sys/rwlock.h> 44f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */ 45f9790aebSLuigi Rizzo #include <sys/selinfo.h> 46f9790aebSLuigi Rizzo #include <sys/sysctl.h> 47f9790aebSLuigi Rizzo #include <net/if.h> 48f9790aebSLuigi Rizzo #include <net/if_var.h> 49f9790aebSLuigi Rizzo #include <net/bpf.h> /* BIOCIMMEDIATE */ 50f9790aebSLuigi Rizzo #include <machine/bus.h> /* bus_dmamap_* */ 51f9790aebSLuigi Rizzo #include <sys/endian.h> 52f9790aebSLuigi Rizzo #include <sys/refcount.h> 532a7db7a6SVincenzo Maffione #include <sys/smp.h> 54f9790aebSLuigi Rizzo 55f9790aebSLuigi Rizzo 56f9790aebSLuigi Rizzo #elif defined(linux) 57f9790aebSLuigi Rizzo 58f9790aebSLuigi Rizzo #include "bsd_glue.h" 59f9790aebSLuigi Rizzo 60f9790aebSLuigi Rizzo #elif defined(__APPLE__) 61f9790aebSLuigi Rizzo 62f9790aebSLuigi Rizzo #warning OSX support is only partial 63f9790aebSLuigi Rizzo #include "osx_glue.h" 64f9790aebSLuigi Rizzo 6537e3a6d3SLuigi Rizzo #elif defined(_WIN32) 6637e3a6d3SLuigi Rizzo #include "win_glue.h" 6737e3a6d3SLuigi Rizzo 68f9790aebSLuigi Rizzo #else 69f9790aebSLuigi Rizzo 70f9790aebSLuigi Rizzo #error Unsupported platform 71f9790aebSLuigi Rizzo 72f9790aebSLuigi Rizzo #endif /* unsupported */ 73f9790aebSLuigi Rizzo 74f9790aebSLuigi Rizzo /* 75f9790aebSLuigi Rizzo * common headers 76f9790aebSLuigi Rizzo */ 77f9790aebSLuigi Rizzo 78f9790aebSLuigi Rizzo #include <net/netmap.h> 79f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h> 80f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h> 812a7db7a6SVincenzo Maffione #include <dev/netmap/netmap_bdg.h> 82f9790aebSLuigi Rizzo 83f9790aebSLuigi Rizzo #ifdef WITH_VALE 84f9790aebSLuigi Rizzo 85f9790aebSLuigi Rizzo /* 86f9790aebSLuigi Rizzo * system parameters (most of them in netmap_kern.h) 8737e3a6d3SLuigi Rizzo * NM_BDG_NAME prefix for switch port names, default "vale" 88f9790aebSLuigi Rizzo * NM_BDG_MAXPORTS number of ports 89f9790aebSLuigi Rizzo * NM_BRIDGES max number of switches in the system. 90f9790aebSLuigi Rizzo * XXX should become a sysctl or tunable 91f9790aebSLuigi Rizzo * 92f9790aebSLuigi Rizzo * Switch ports are named valeX:Y where X is the switch name and Y 93f9790aebSLuigi Rizzo * is the port. If Y matches a physical interface name, the port is 94f9790aebSLuigi Rizzo * connected to a physical device. 95f9790aebSLuigi Rizzo * 96f9790aebSLuigi Rizzo * Unlike physical interfaces, switch ports use their own memory region 97f9790aebSLuigi Rizzo * for rings and buffers. 98f9790aebSLuigi Rizzo * The virtual interfaces use per-queue lock instead of core lock. 99f9790aebSLuigi Rizzo * In the tx loop, we aggregate traffic in batches to make all operations 100f9790aebSLuigi Rizzo * faster. The batch size is bridge_batch. 101f9790aebSLuigi Rizzo */ 102f9790aebSLuigi Rizzo #define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 103f9790aebSLuigi Rizzo #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 104f9790aebSLuigi Rizzo #define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 105f9790aebSLuigi Rizzo #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 106f9790aebSLuigi Rizzo /* actual size of the tables */ 1072a7db7a6SVincenzo Maffione #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NETMAP_MAX_FRAGS) 108f9790aebSLuigi Rizzo /* NM_FT_NULL terminates a list of slots in the ft */ 109f9790aebSLuigi Rizzo #define NM_FT_NULL NM_BDG_BATCH_MAX 110f9790aebSLuigi Rizzo 111f9790aebSLuigi Rizzo 112f9790aebSLuigi Rizzo /* 113f9790aebSLuigi Rizzo * bridge_batch is set via sysctl to the max batch size to be 114f9790aebSLuigi Rizzo * used in the bridge. The actual value may be larger as the 115f9790aebSLuigi Rizzo * last packet in the block may overflow the size. 116f9790aebSLuigi Rizzo */ 11737e3a6d3SLuigi Rizzo static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 11837e3a6d3SLuigi Rizzo SYSBEGIN(vars_vale); 119f9790aebSLuigi Rizzo SYSCTL_DECL(_dev_netmap); 1204f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0, 1214f80b14cSVincenzo Maffione "Max batch size to be used in the bridge"); 12237e3a6d3SLuigi Rizzo SYSEND; 123f9790aebSLuigi Rizzo 124*b6e66be2SVincenzo Maffione static int netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *, 125c3e9b4dbSLuiz Otavio O Souza struct netmap_mem_d *nmd, struct netmap_vp_adapter **); 126*b6e66be2SVincenzo Maffione static int netmap_vale_vp_bdg_attach(const char *, struct netmap_adapter *, 1272a7db7a6SVincenzo Maffione struct nm_bridge *); 1282a7db7a6SVincenzo Maffione static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *); 129f9790aebSLuigi Rizzo 130f9790aebSLuigi Rizzo /* 131*b6e66be2SVincenzo Maffione * For each output interface, nm_vale_q is used to construct a list. 132f9790aebSLuigi Rizzo * bq_len is the number of output buffers (we can have coalescing 133f9790aebSLuigi Rizzo * during the copy). 134f9790aebSLuigi Rizzo */ 135*b6e66be2SVincenzo Maffione struct nm_vale_q { 136f9790aebSLuigi Rizzo uint16_t bq_head; 137f9790aebSLuigi Rizzo uint16_t bq_tail; 138f9790aebSLuigi Rizzo uint32_t bq_len; /* number of buffers */ 139f9790aebSLuigi Rizzo }; 140f9790aebSLuigi Rizzo 1412ff91c17SVincenzo Maffione /* Holds the default callbacks */ 1422a7db7a6SVincenzo Maffione struct netmap_bdg_ops vale_bdg_ops = { 143*b6e66be2SVincenzo Maffione .lookup = netmap_vale_learning, 1442a7db7a6SVincenzo Maffione .config = NULL, 1452a7db7a6SVincenzo Maffione .dtor = NULL, 146*b6e66be2SVincenzo Maffione .vp_create = netmap_vale_vp_create, 1472a7db7a6SVincenzo Maffione .bwrap_attach = netmap_vale_bwrap_attach, 1482a7db7a6SVincenzo Maffione .name = NM_BDG_NAME, 149f9790aebSLuigi Rizzo }; 150f9790aebSLuigi Rizzo 151f9790aebSLuigi Rizzo /* 152f9790aebSLuigi Rizzo * this is a slightly optimized copy routine which rounds 153f9790aebSLuigi Rizzo * to multiple of 64 bytes and is often faster than dealing 154f9790aebSLuigi Rizzo * with other odd sizes. We assume there is enough room 155f9790aebSLuigi Rizzo * in the source and destination buffers. 156f9790aebSLuigi Rizzo * 157f9790aebSLuigi Rizzo * XXX only for multiples of 64 bytes, non overlapped. 158f9790aebSLuigi Rizzo */ 159f9790aebSLuigi Rizzo static inline void 160f9790aebSLuigi Rizzo pkt_copy(void *_src, void *_dst, int l) 161f9790aebSLuigi Rizzo { 162f9790aebSLuigi Rizzo uint64_t *src = _src; 163f9790aebSLuigi Rizzo uint64_t *dst = _dst; 164f9790aebSLuigi Rizzo if (unlikely(l >= 1024)) { 165f9790aebSLuigi Rizzo memcpy(dst, src, l); 166f9790aebSLuigi Rizzo return; 167f9790aebSLuigi Rizzo } 168f9790aebSLuigi Rizzo for (; likely(l > 0); l-=64) { 169f9790aebSLuigi Rizzo *dst++ = *src++; 170f9790aebSLuigi Rizzo *dst++ = *src++; 171f9790aebSLuigi Rizzo *dst++ = *src++; 172f9790aebSLuigi Rizzo *dst++ = *src++; 173f9790aebSLuigi Rizzo *dst++ = *src++; 174f9790aebSLuigi Rizzo *dst++ = *src++; 175f9790aebSLuigi Rizzo *dst++ = *src++; 176f9790aebSLuigi Rizzo *dst++ = *src++; 177f9790aebSLuigi Rizzo } 178f9790aebSLuigi Rizzo } 179f9790aebSLuigi Rizzo 180f9790aebSLuigi Rizzo 181f9790aebSLuigi Rizzo /* 182f9790aebSLuigi Rizzo * Free the forwarding tables for rings attached to switch ports. 183f9790aebSLuigi Rizzo */ 184f9790aebSLuigi Rizzo static void 185f9790aebSLuigi Rizzo nm_free_bdgfwd(struct netmap_adapter *na) 186f9790aebSLuigi Rizzo { 187f9790aebSLuigi Rizzo int nrings, i; 1882ff91c17SVincenzo Maffione struct netmap_kring **kring; 189f9790aebSLuigi Rizzo 190f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 19117885a7bSLuigi Rizzo nrings = na->num_tx_rings; 19217885a7bSLuigi Rizzo kring = na->tx_rings; 193f9790aebSLuigi Rizzo for (i = 0; i < nrings; i++) { 1942ff91c17SVincenzo Maffione if (kring[i]->nkr_ft) { 1952ff91c17SVincenzo Maffione nm_os_free(kring[i]->nkr_ft); 1962ff91c17SVincenzo Maffione kring[i]->nkr_ft = NULL; /* protect from freeing twice */ 197f9790aebSLuigi Rizzo } 198f9790aebSLuigi Rizzo } 199f9790aebSLuigi Rizzo } 200f9790aebSLuigi Rizzo 201f9790aebSLuigi Rizzo 202f9790aebSLuigi Rizzo /* 203f9790aebSLuigi Rizzo * Allocate the forwarding tables for the rings attached to the bridge ports. 204f9790aebSLuigi Rizzo */ 205f9790aebSLuigi Rizzo static int 206f9790aebSLuigi Rizzo nm_alloc_bdgfwd(struct netmap_adapter *na) 207f9790aebSLuigi Rizzo { 208f9790aebSLuigi Rizzo int nrings, l, i, num_dstq; 2092ff91c17SVincenzo Maffione struct netmap_kring **kring; 210f9790aebSLuigi Rizzo 211f9790aebSLuigi Rizzo NMG_LOCK_ASSERT(); 212f9790aebSLuigi Rizzo /* all port:rings + broadcast */ 213f9790aebSLuigi Rizzo num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 214f9790aebSLuigi Rizzo l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 215*b6e66be2SVincenzo Maffione l += sizeof(struct nm_vale_q) * num_dstq; 216f9790aebSLuigi Rizzo l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 217f9790aebSLuigi Rizzo 218847bf383SLuigi Rizzo nrings = netmap_real_rings(na, NR_TX); 219f9790aebSLuigi Rizzo kring = na->tx_rings; 220f9790aebSLuigi Rizzo for (i = 0; i < nrings; i++) { 221f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft; 222*b6e66be2SVincenzo Maffione struct nm_vale_q *dstq; 223f9790aebSLuigi Rizzo int j; 224f9790aebSLuigi Rizzo 225c3e9b4dbSLuiz Otavio O Souza ft = nm_os_malloc(l); 226f9790aebSLuigi Rizzo if (!ft) { 227f9790aebSLuigi Rizzo nm_free_bdgfwd(na); 228f9790aebSLuigi Rizzo return ENOMEM; 229f9790aebSLuigi Rizzo } 230*b6e66be2SVincenzo Maffione dstq = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX); 231f9790aebSLuigi Rizzo for (j = 0; j < num_dstq; j++) { 232f9790aebSLuigi Rizzo dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 233f9790aebSLuigi Rizzo dstq[j].bq_len = 0; 234f9790aebSLuigi Rizzo } 2352ff91c17SVincenzo Maffione kring[i]->nkr_ft = ft; 236f9790aebSLuigi Rizzo } 237f9790aebSLuigi Rizzo return 0; 238f9790aebSLuigi Rizzo } 239f9790aebSLuigi Rizzo 2402ff91c17SVincenzo Maffione /* Allows external modules to create bridges in exclusive mode, 2412ff91c17SVincenzo Maffione * returns an authentication token that the external module will need 2422ff91c17SVincenzo Maffione * to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(), 2432ff91c17SVincenzo Maffione * and nm_bdg_update_private_data() operations. 2442ff91c17SVincenzo Maffione * Successfully executed if ret != NULL and *return_status == 0. 2452ff91c17SVincenzo Maffione */ 2462ff91c17SVincenzo Maffione void * 2472a7db7a6SVincenzo Maffione netmap_vale_create(const char *bdg_name, int *return_status) 2482ff91c17SVincenzo Maffione { 2492ff91c17SVincenzo Maffione struct nm_bridge *b = NULL; 2502ff91c17SVincenzo Maffione void *ret = NULL; 2512ff91c17SVincenzo Maffione 2522ff91c17SVincenzo Maffione NMG_LOCK(); 2532a7db7a6SVincenzo Maffione b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL); 2542ff91c17SVincenzo Maffione if (b) { 2552ff91c17SVincenzo Maffione *return_status = EEXIST; 2562ff91c17SVincenzo Maffione goto unlock_bdg_create; 2572ff91c17SVincenzo Maffione } 2582ff91c17SVincenzo Maffione 2592a7db7a6SVincenzo Maffione b = nm_find_bridge(bdg_name, 1 /* create */, &vale_bdg_ops); 2602ff91c17SVincenzo Maffione if (!b) { 2612ff91c17SVincenzo Maffione *return_status = ENOMEM; 2622ff91c17SVincenzo Maffione goto unlock_bdg_create; 2632ff91c17SVincenzo Maffione } 2642ff91c17SVincenzo Maffione 2652ff91c17SVincenzo Maffione b->bdg_flags |= NM_BDG_ACTIVE | NM_BDG_EXCLUSIVE; 2662ff91c17SVincenzo Maffione ret = nm_bdg_get_auth_token(b); 2672ff91c17SVincenzo Maffione *return_status = 0; 2682ff91c17SVincenzo Maffione 2692ff91c17SVincenzo Maffione unlock_bdg_create: 2702ff91c17SVincenzo Maffione NMG_UNLOCK(); 2712ff91c17SVincenzo Maffione return ret; 2722ff91c17SVincenzo Maffione } 2732ff91c17SVincenzo Maffione 2742ff91c17SVincenzo Maffione /* Allows external modules to destroy a bridge created through 2752ff91c17SVincenzo Maffione * netmap_bdg_create(), the bridge must be empty. 2762ff91c17SVincenzo Maffione */ 2772ff91c17SVincenzo Maffione int 2782a7db7a6SVincenzo Maffione netmap_vale_destroy(const char *bdg_name, void *auth_token) 2792ff91c17SVincenzo Maffione { 2802ff91c17SVincenzo Maffione struct nm_bridge *b = NULL; 2812ff91c17SVincenzo Maffione int ret = 0; 2822ff91c17SVincenzo Maffione 2832ff91c17SVincenzo Maffione NMG_LOCK(); 2842a7db7a6SVincenzo Maffione b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL); 2852ff91c17SVincenzo Maffione if (!b) { 2862ff91c17SVincenzo Maffione ret = ENXIO; 2872ff91c17SVincenzo Maffione goto unlock_bdg_free; 2882ff91c17SVincenzo Maffione } 2892ff91c17SVincenzo Maffione 2902ff91c17SVincenzo Maffione if (!nm_bdg_valid_auth_token(b, auth_token)) { 2912ff91c17SVincenzo Maffione ret = EACCES; 2922ff91c17SVincenzo Maffione goto unlock_bdg_free; 2932ff91c17SVincenzo Maffione } 2942ff91c17SVincenzo Maffione if (!(b->bdg_flags & NM_BDG_EXCLUSIVE)) { 2952ff91c17SVincenzo Maffione ret = EINVAL; 2962ff91c17SVincenzo Maffione goto unlock_bdg_free; 2972ff91c17SVincenzo Maffione } 2982ff91c17SVincenzo Maffione 2992ff91c17SVincenzo Maffione b->bdg_flags &= ~(NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE); 3002ff91c17SVincenzo Maffione ret = netmap_bdg_free(b); 3012ff91c17SVincenzo Maffione if (ret) { 3022ff91c17SVincenzo Maffione b->bdg_flags |= NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE; 3032ff91c17SVincenzo Maffione } 3042ff91c17SVincenzo Maffione 3052ff91c17SVincenzo Maffione unlock_bdg_free: 3062ff91c17SVincenzo Maffione NMG_UNLOCK(); 3072ff91c17SVincenzo Maffione return ret; 3082ff91c17SVincenzo Maffione } 3092ff91c17SVincenzo Maffione 310*b6e66be2SVincenzo Maffione /* Process NETMAP_REQ_VALE_LIST. */ 311*b6e66be2SVincenzo Maffione int 312*b6e66be2SVincenzo Maffione netmap_vale_list(struct nmreq_header *hdr) 313*b6e66be2SVincenzo Maffione { 314*b6e66be2SVincenzo Maffione struct nmreq_vale_list *req = 315*b6e66be2SVincenzo Maffione (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body; 316*b6e66be2SVincenzo Maffione int namelen = strlen(hdr->nr_name); 317*b6e66be2SVincenzo Maffione struct nm_bridge *b, *bridges; 318*b6e66be2SVincenzo Maffione struct netmap_vp_adapter *vpna; 319*b6e66be2SVincenzo Maffione int error = 0, i, j; 320*b6e66be2SVincenzo Maffione u_int num_bridges; 321*b6e66be2SVincenzo Maffione 322*b6e66be2SVincenzo Maffione netmap_bns_getbridges(&bridges, &num_bridges); 323*b6e66be2SVincenzo Maffione 324*b6e66be2SVincenzo Maffione /* this is used to enumerate bridges and ports */ 325*b6e66be2SVincenzo Maffione if (namelen) { /* look up indexes of bridge and port */ 326*b6e66be2SVincenzo Maffione if (strncmp(hdr->nr_name, NM_BDG_NAME, 327*b6e66be2SVincenzo Maffione strlen(NM_BDG_NAME))) { 328*b6e66be2SVincenzo Maffione return EINVAL; 329*b6e66be2SVincenzo Maffione } 330*b6e66be2SVincenzo Maffione NMG_LOCK(); 331*b6e66be2SVincenzo Maffione b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); 332*b6e66be2SVincenzo Maffione if (!b) { 333*b6e66be2SVincenzo Maffione NMG_UNLOCK(); 334*b6e66be2SVincenzo Maffione return ENOENT; 335*b6e66be2SVincenzo Maffione } 336*b6e66be2SVincenzo Maffione 337*b6e66be2SVincenzo Maffione req->nr_bridge_idx = b - bridges; /* bridge index */ 338*b6e66be2SVincenzo Maffione req->nr_port_idx = NM_BDG_NOPORT; 339*b6e66be2SVincenzo Maffione for (j = 0; j < b->bdg_active_ports; j++) { 340*b6e66be2SVincenzo Maffione i = b->bdg_port_index[j]; 341*b6e66be2SVincenzo Maffione vpna = b->bdg_ports[i]; 342*b6e66be2SVincenzo Maffione if (vpna == NULL) { 343*b6e66be2SVincenzo Maffione nm_prerr("This should not happen"); 344*b6e66be2SVincenzo Maffione continue; 345*b6e66be2SVincenzo Maffione } 346*b6e66be2SVincenzo Maffione /* the former and the latter identify a 347*b6e66be2SVincenzo Maffione * virtual port and a NIC, respectively 348*b6e66be2SVincenzo Maffione */ 349*b6e66be2SVincenzo Maffione if (!strcmp(vpna->up.name, hdr->nr_name)) { 350*b6e66be2SVincenzo Maffione req->nr_port_idx = i; /* port index */ 351*b6e66be2SVincenzo Maffione break; 352*b6e66be2SVincenzo Maffione } 353*b6e66be2SVincenzo Maffione } 354*b6e66be2SVincenzo Maffione NMG_UNLOCK(); 355*b6e66be2SVincenzo Maffione } else { 356*b6e66be2SVincenzo Maffione /* return the first non-empty entry starting from 357*b6e66be2SVincenzo Maffione * bridge nr_arg1 and port nr_arg2. 358*b6e66be2SVincenzo Maffione * 359*b6e66be2SVincenzo Maffione * Users can detect the end of the same bridge by 360*b6e66be2SVincenzo Maffione * seeing the new and old value of nr_arg1, and can 361*b6e66be2SVincenzo Maffione * detect the end of all the bridge by error != 0 362*b6e66be2SVincenzo Maffione */ 363*b6e66be2SVincenzo Maffione i = req->nr_bridge_idx; 364*b6e66be2SVincenzo Maffione j = req->nr_port_idx; 365*b6e66be2SVincenzo Maffione 366*b6e66be2SVincenzo Maffione NMG_LOCK(); 367*b6e66be2SVincenzo Maffione for (error = ENOENT; i < NM_BRIDGES; i++) { 368*b6e66be2SVincenzo Maffione b = bridges + i; 369*b6e66be2SVincenzo Maffione for ( ; j < NM_BDG_MAXPORTS; j++) { 370*b6e66be2SVincenzo Maffione if (b->bdg_ports[j] == NULL) 371*b6e66be2SVincenzo Maffione continue; 372*b6e66be2SVincenzo Maffione vpna = b->bdg_ports[j]; 373*b6e66be2SVincenzo Maffione /* write back the VALE switch name */ 374*b6e66be2SVincenzo Maffione strlcpy(hdr->nr_name, vpna->up.name, 375*b6e66be2SVincenzo Maffione sizeof(hdr->nr_name)); 376*b6e66be2SVincenzo Maffione error = 0; 377*b6e66be2SVincenzo Maffione goto out; 378*b6e66be2SVincenzo Maffione } 379*b6e66be2SVincenzo Maffione j = 0; /* following bridges scan from 0 */ 380*b6e66be2SVincenzo Maffione } 381*b6e66be2SVincenzo Maffione out: 382*b6e66be2SVincenzo Maffione req->nr_bridge_idx = i; 383*b6e66be2SVincenzo Maffione req->nr_port_idx = j; 384*b6e66be2SVincenzo Maffione NMG_UNLOCK(); 385*b6e66be2SVincenzo Maffione } 386*b6e66be2SVincenzo Maffione 387*b6e66be2SVincenzo Maffione return error; 388*b6e66be2SVincenzo Maffione } 389*b6e66be2SVincenzo Maffione 390*b6e66be2SVincenzo Maffione /* Process NETMAP_REQ_VALE_ATTACH. 391*b6e66be2SVincenzo Maffione */ 392*b6e66be2SVincenzo Maffione int 393*b6e66be2SVincenzo Maffione netmap_vale_attach(struct nmreq_header *hdr, void *auth_token) 394*b6e66be2SVincenzo Maffione { 395*b6e66be2SVincenzo Maffione struct nmreq_vale_attach *req = 396*b6e66be2SVincenzo Maffione (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 397*b6e66be2SVincenzo Maffione struct netmap_vp_adapter * vpna; 398*b6e66be2SVincenzo Maffione struct netmap_adapter *na = NULL; 399*b6e66be2SVincenzo Maffione struct netmap_mem_d *nmd = NULL; 400*b6e66be2SVincenzo Maffione struct nm_bridge *b = NULL; 401*b6e66be2SVincenzo Maffione int error; 402*b6e66be2SVincenzo Maffione 403*b6e66be2SVincenzo Maffione NMG_LOCK(); 404*b6e66be2SVincenzo Maffione /* permission check for modified bridges */ 405*b6e66be2SVincenzo Maffione b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); 406*b6e66be2SVincenzo Maffione if (b && !nm_bdg_valid_auth_token(b, auth_token)) { 407*b6e66be2SVincenzo Maffione error = EACCES; 408*b6e66be2SVincenzo Maffione goto unlock_exit; 409*b6e66be2SVincenzo Maffione } 410*b6e66be2SVincenzo Maffione 411*b6e66be2SVincenzo Maffione if (req->reg.nr_mem_id) { 412*b6e66be2SVincenzo Maffione nmd = netmap_mem_find(req->reg.nr_mem_id); 413*b6e66be2SVincenzo Maffione if (nmd == NULL) { 414*b6e66be2SVincenzo Maffione error = EINVAL; 415*b6e66be2SVincenzo Maffione goto unlock_exit; 416*b6e66be2SVincenzo Maffione } 417*b6e66be2SVincenzo Maffione } 418*b6e66be2SVincenzo Maffione 419*b6e66be2SVincenzo Maffione /* check for existing one */ 420*b6e66be2SVincenzo Maffione error = netmap_get_vale_na(hdr, &na, nmd, 0); 421*b6e66be2SVincenzo Maffione if (na) { 422*b6e66be2SVincenzo Maffione error = EBUSY; 423*b6e66be2SVincenzo Maffione goto unref_exit; 424*b6e66be2SVincenzo Maffione } 425*b6e66be2SVincenzo Maffione error = netmap_get_vale_na(hdr, &na, 426*b6e66be2SVincenzo Maffione nmd, 1 /* create if not exists */); 427*b6e66be2SVincenzo Maffione if (error) { /* no device */ 428*b6e66be2SVincenzo Maffione goto unlock_exit; 429*b6e66be2SVincenzo Maffione } 430*b6e66be2SVincenzo Maffione 431*b6e66be2SVincenzo Maffione if (na == NULL) { /* VALE prefix missing */ 432*b6e66be2SVincenzo Maffione error = EINVAL; 433*b6e66be2SVincenzo Maffione goto unlock_exit; 434*b6e66be2SVincenzo Maffione } 435*b6e66be2SVincenzo Maffione 436*b6e66be2SVincenzo Maffione if (NETMAP_OWNED_BY_ANY(na)) { 437*b6e66be2SVincenzo Maffione error = EBUSY; 438*b6e66be2SVincenzo Maffione goto unref_exit; 439*b6e66be2SVincenzo Maffione } 440*b6e66be2SVincenzo Maffione 441*b6e66be2SVincenzo Maffione if (na->nm_bdg_ctl) { 442*b6e66be2SVincenzo Maffione /* nop for VALE ports. The bwrap needs to put the hwna 443*b6e66be2SVincenzo Maffione * in netmap mode (see netmap_bwrap_bdg_ctl) 444*b6e66be2SVincenzo Maffione */ 445*b6e66be2SVincenzo Maffione error = na->nm_bdg_ctl(hdr, na); 446*b6e66be2SVincenzo Maffione if (error) 447*b6e66be2SVincenzo Maffione goto unref_exit; 448*b6e66be2SVincenzo Maffione ND("registered %s to netmap-mode", na->name); 449*b6e66be2SVincenzo Maffione } 450*b6e66be2SVincenzo Maffione vpna = (struct netmap_vp_adapter *)na; 451*b6e66be2SVincenzo Maffione req->port_index = vpna->bdg_port; 452*b6e66be2SVincenzo Maffione 453*b6e66be2SVincenzo Maffione if (nmd) 454*b6e66be2SVincenzo Maffione netmap_mem_put(nmd); 455*b6e66be2SVincenzo Maffione 456*b6e66be2SVincenzo Maffione NMG_UNLOCK(); 457*b6e66be2SVincenzo Maffione return 0; 458*b6e66be2SVincenzo Maffione 459*b6e66be2SVincenzo Maffione unref_exit: 460*b6e66be2SVincenzo Maffione netmap_adapter_put(na); 461*b6e66be2SVincenzo Maffione unlock_exit: 462*b6e66be2SVincenzo Maffione if (nmd) 463*b6e66be2SVincenzo Maffione netmap_mem_put(nmd); 464*b6e66be2SVincenzo Maffione 465*b6e66be2SVincenzo Maffione NMG_UNLOCK(); 466*b6e66be2SVincenzo Maffione return error; 467*b6e66be2SVincenzo Maffione } 468*b6e66be2SVincenzo Maffione 469*b6e66be2SVincenzo Maffione /* Process NETMAP_REQ_VALE_DETACH. 470*b6e66be2SVincenzo Maffione */ 471*b6e66be2SVincenzo Maffione int 472*b6e66be2SVincenzo Maffione netmap_vale_detach(struct nmreq_header *hdr, void *auth_token) 473*b6e66be2SVincenzo Maffione { 474*b6e66be2SVincenzo Maffione struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; 475*b6e66be2SVincenzo Maffione struct netmap_vp_adapter *vpna; 476*b6e66be2SVincenzo Maffione struct netmap_adapter *na; 477*b6e66be2SVincenzo Maffione struct nm_bridge *b = NULL; 478*b6e66be2SVincenzo Maffione int error; 479*b6e66be2SVincenzo Maffione 480*b6e66be2SVincenzo Maffione NMG_LOCK(); 481*b6e66be2SVincenzo Maffione /* permission check for modified bridges */ 482*b6e66be2SVincenzo Maffione b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); 483*b6e66be2SVincenzo Maffione if (b && !nm_bdg_valid_auth_token(b, auth_token)) { 484*b6e66be2SVincenzo Maffione error = EACCES; 485*b6e66be2SVincenzo Maffione goto unlock_exit; 486*b6e66be2SVincenzo Maffione } 487*b6e66be2SVincenzo Maffione 488*b6e66be2SVincenzo Maffione error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); 489*b6e66be2SVincenzo Maffione if (error) { /* no device, or another bridge or user owns the device */ 490*b6e66be2SVincenzo Maffione goto unlock_exit; 491*b6e66be2SVincenzo Maffione } 492*b6e66be2SVincenzo Maffione 493*b6e66be2SVincenzo Maffione if (na == NULL) { /* VALE prefix missing */ 494*b6e66be2SVincenzo Maffione error = EINVAL; 495*b6e66be2SVincenzo Maffione goto unlock_exit; 496*b6e66be2SVincenzo Maffione } else if (nm_is_bwrap(na) && 497*b6e66be2SVincenzo Maffione ((struct netmap_bwrap_adapter *)na)->na_polling_state) { 498*b6e66be2SVincenzo Maffione /* Don't detach a NIC with polling */ 499*b6e66be2SVincenzo Maffione error = EBUSY; 500*b6e66be2SVincenzo Maffione goto unref_exit; 501*b6e66be2SVincenzo Maffione } 502*b6e66be2SVincenzo Maffione 503*b6e66be2SVincenzo Maffione vpna = (struct netmap_vp_adapter *)na; 504*b6e66be2SVincenzo Maffione if (na->na_vp != vpna) { 505*b6e66be2SVincenzo Maffione /* trying to detach first attach of VALE persistent port attached 506*b6e66be2SVincenzo Maffione * to 2 bridges 507*b6e66be2SVincenzo Maffione */ 508*b6e66be2SVincenzo Maffione error = EBUSY; 509*b6e66be2SVincenzo Maffione goto unref_exit; 510*b6e66be2SVincenzo Maffione } 511*b6e66be2SVincenzo Maffione nmreq_det->port_index = vpna->bdg_port; 512*b6e66be2SVincenzo Maffione 513*b6e66be2SVincenzo Maffione if (na->nm_bdg_ctl) { 514*b6e66be2SVincenzo Maffione /* remove the port from bridge. The bwrap 515*b6e66be2SVincenzo Maffione * also needs to put the hwna in normal mode 516*b6e66be2SVincenzo Maffione */ 517*b6e66be2SVincenzo Maffione error = na->nm_bdg_ctl(hdr, na); 518*b6e66be2SVincenzo Maffione } 519*b6e66be2SVincenzo Maffione 520*b6e66be2SVincenzo Maffione unref_exit: 521*b6e66be2SVincenzo Maffione netmap_adapter_put(na); 522*b6e66be2SVincenzo Maffione unlock_exit: 523*b6e66be2SVincenzo Maffione NMG_UNLOCK(); 524*b6e66be2SVincenzo Maffione return error; 525*b6e66be2SVincenzo Maffione 526*b6e66be2SVincenzo Maffione } 5272ff91c17SVincenzo Maffione 5282ff91c17SVincenzo Maffione 5294bf50f18SLuigi Rizzo /* nm_dtor callback for ephemeral VALE ports */ 5304bf50f18SLuigi Rizzo static void 531*b6e66be2SVincenzo Maffione netmap_vale_vp_dtor(struct netmap_adapter *na) 5324bf50f18SLuigi Rizzo { 5334bf50f18SLuigi Rizzo struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 5344bf50f18SLuigi Rizzo struct nm_bridge *b = vpna->na_bdg; 5354bf50f18SLuigi Rizzo 5364bf50f18SLuigi Rizzo ND("%s has %d references", na->name, na->na_refcount); 537f9790aebSLuigi Rizzo 538f9790aebSLuigi Rizzo if (b) { 539f9790aebSLuigi Rizzo netmap_bdg_detach_common(b, vpna->bdg_port, -1); 540f9790aebSLuigi Rizzo } 541c3e9b4dbSLuiz Otavio O Souza 5424f80b14cSVincenzo Maffione if (na->ifp != NULL && !nm_iszombie(na)) { 5432a7db7a6SVincenzo Maffione NM_DETACH_NA(na->ifp); 5444f80b14cSVincenzo Maffione if (vpna->autodelete) { 545c3e9b4dbSLuiz Otavio O Souza ND("releasing %s", na->ifp->if_xname); 546c3e9b4dbSLuiz Otavio O Souza NMG_UNLOCK(); 547c3e9b4dbSLuiz Otavio O Souza nm_os_vi_detach(na->ifp); 548c3e9b4dbSLuiz Otavio O Souza NMG_LOCK(); 549c3e9b4dbSLuiz Otavio O Souza } 550f9790aebSLuigi Rizzo } 5514f80b14cSVincenzo Maffione } 552f9790aebSLuigi Rizzo 5532ff91c17SVincenzo Maffione 5544bf50f18SLuigi Rizzo 5554bf50f18SLuigi Rizzo /* nm_krings_create callback for VALE ports. 5564bf50f18SLuigi Rizzo * Calls the standard netmap_krings_create, then adds leases on rx 5574bf50f18SLuigi Rizzo * rings and bdgfwd on tx rings. 5584bf50f18SLuigi Rizzo */ 559f9790aebSLuigi Rizzo static int 560*b6e66be2SVincenzo Maffione netmap_vale_vp_krings_create(struct netmap_adapter *na) 561f9790aebSLuigi Rizzo { 562f0ea3689SLuigi Rizzo u_int tailroom; 563f9790aebSLuigi Rizzo int error, i; 564f9790aebSLuigi Rizzo uint32_t *leases; 565847bf383SLuigi Rizzo u_int nrx = netmap_real_rings(na, NR_RX); 566f9790aebSLuigi Rizzo 567f9790aebSLuigi Rizzo /* 568f9790aebSLuigi Rizzo * Leases are attached to RX rings on vale ports 569f9790aebSLuigi Rizzo */ 570f9790aebSLuigi Rizzo tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 571f9790aebSLuigi Rizzo 572f0ea3689SLuigi Rizzo error = netmap_krings_create(na, tailroom); 573f9790aebSLuigi Rizzo if (error) 574f9790aebSLuigi Rizzo return error; 575f9790aebSLuigi Rizzo 576f9790aebSLuigi Rizzo leases = na->tailroom; 577f9790aebSLuigi Rizzo 578f9790aebSLuigi Rizzo for (i = 0; i < nrx; i++) { /* Receive rings */ 5792ff91c17SVincenzo Maffione na->rx_rings[i]->nkr_leases = leases; 580f9790aebSLuigi Rizzo leases += na->num_rx_desc; 581f9790aebSLuigi Rizzo } 582f9790aebSLuigi Rizzo 583f9790aebSLuigi Rizzo error = nm_alloc_bdgfwd(na); 584f9790aebSLuigi Rizzo if (error) { 585f9790aebSLuigi Rizzo netmap_krings_delete(na); 586f9790aebSLuigi Rizzo return error; 587f9790aebSLuigi Rizzo } 588f9790aebSLuigi Rizzo 589f9790aebSLuigi Rizzo return 0; 590f9790aebSLuigi Rizzo } 591f9790aebSLuigi Rizzo 59217885a7bSLuigi Rizzo 5934bf50f18SLuigi Rizzo /* nm_krings_delete callback for VALE ports. */ 594f9790aebSLuigi Rizzo static void 595*b6e66be2SVincenzo Maffione netmap_vale_vp_krings_delete(struct netmap_adapter *na) 596f9790aebSLuigi Rizzo { 597f9790aebSLuigi Rizzo nm_free_bdgfwd(na); 598f9790aebSLuigi Rizzo netmap_krings_delete(na); 599f9790aebSLuigi Rizzo } 600f9790aebSLuigi Rizzo 601f9790aebSLuigi Rizzo 602f9790aebSLuigi Rizzo static int 603*b6e66be2SVincenzo Maffione nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, 604f9790aebSLuigi Rizzo struct netmap_vp_adapter *na, u_int ring_nr); 605f9790aebSLuigi Rizzo 606f9790aebSLuigi Rizzo 607f9790aebSLuigi Rizzo /* 6084bf50f18SLuigi Rizzo * main dispatch routine for the bridge. 609f9790aebSLuigi Rizzo * Grab packets from a kring, move them into the ft structure 610f9790aebSLuigi Rizzo * associated to the tx (input) port. Max one instance per port, 611f9790aebSLuigi Rizzo * filtered on input (ioctl, poll or XXX). 612f9790aebSLuigi Rizzo * Returns the next position in the ring. 613f9790aebSLuigi Rizzo */ 614f9790aebSLuigi Rizzo static int 615*b6e66be2SVincenzo Maffione nm_vale_preflush(struct netmap_kring *kring, u_int end) 616f9790aebSLuigi Rizzo { 6174bf50f18SLuigi Rizzo struct netmap_vp_adapter *na = 6184bf50f18SLuigi Rizzo (struct netmap_vp_adapter*)kring->na; 619f9790aebSLuigi Rizzo struct netmap_ring *ring = kring->ring; 620f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft; 6214bf50f18SLuigi Rizzo u_int ring_nr = kring->ring_id; 622f9790aebSLuigi Rizzo u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 623f9790aebSLuigi Rizzo u_int ft_i = 0; /* start from 0 */ 624f9790aebSLuigi Rizzo u_int frags = 1; /* how many frags ? */ 625f9790aebSLuigi Rizzo struct nm_bridge *b = na->na_bdg; 626f9790aebSLuigi Rizzo 627f9790aebSLuigi Rizzo /* To protect against modifications to the bridge we acquire a 628f9790aebSLuigi Rizzo * shared lock, waiting if we can sleep (if the source port is 629f9790aebSLuigi Rizzo * attached to a user process) or with a trylock otherwise (NICs). 630f9790aebSLuigi Rizzo */ 631f9790aebSLuigi Rizzo ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 632f9790aebSLuigi Rizzo if (na->up.na_flags & NAF_BDG_MAYSLEEP) 633f9790aebSLuigi Rizzo BDG_RLOCK(b); 634f9790aebSLuigi Rizzo else if (!BDG_RTRYLOCK(b)) 635c3e9b4dbSLuiz Otavio O Souza return j; 636f9790aebSLuigi Rizzo ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 637f9790aebSLuigi Rizzo ft = kring->nkr_ft; 638f9790aebSLuigi Rizzo 639f9790aebSLuigi Rizzo for (; likely(j != end); j = nm_next(j, lim)) { 640f9790aebSLuigi Rizzo struct netmap_slot *slot = &ring->slot[j]; 641f9790aebSLuigi Rizzo char *buf; 642f9790aebSLuigi Rizzo 643f9790aebSLuigi Rizzo ft[ft_i].ft_len = slot->len; 644f9790aebSLuigi Rizzo ft[ft_i].ft_flags = slot->flags; 6452ff91c17SVincenzo Maffione ft[ft_i].ft_offset = 0; 646f9790aebSLuigi Rizzo 647f9790aebSLuigi Rizzo ND("flags is 0x%x", slot->flags); 648847bf383SLuigi Rizzo /* we do not use the buf changed flag, but we still need to reset it */ 649847bf383SLuigi Rizzo slot->flags &= ~NS_BUF_CHANGED; 650847bf383SLuigi Rizzo 651f9790aebSLuigi Rizzo /* this slot goes into a list so initialize the link field */ 652f9790aebSLuigi Rizzo ft[ft_i].ft_next = NM_FT_NULL; 653f9790aebSLuigi Rizzo buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 6544bf50f18SLuigi Rizzo (void *)(uintptr_t)slot->ptr : NMB(&na->up, slot); 655e31c6ec7SLuigi Rizzo if (unlikely(buf == NULL)) { 656*b6e66be2SVincenzo Maffione nm_prlim(5, "NULL %s buffer pointer from %s slot %d len %d", 657e31c6ec7SLuigi Rizzo (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT", 658e31c6ec7SLuigi Rizzo kring->name, j, ft[ft_i].ft_len); 6594bf50f18SLuigi Rizzo buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up); 660e31c6ec7SLuigi Rizzo ft[ft_i].ft_len = 0; 661e31c6ec7SLuigi Rizzo ft[ft_i].ft_flags = 0; 662e31c6ec7SLuigi Rizzo } 6632e159ef0SLuigi Rizzo __builtin_prefetch(buf); 664f9790aebSLuigi Rizzo ++ft_i; 665f9790aebSLuigi Rizzo if (slot->flags & NS_MOREFRAG) { 666f9790aebSLuigi Rizzo frags++; 667f9790aebSLuigi Rizzo continue; 668f9790aebSLuigi Rizzo } 669f9790aebSLuigi Rizzo if (unlikely(netmap_verbose && frags > 1)) 670f9790aebSLuigi Rizzo RD(5, "%d frags at %d", frags, ft_i - frags); 671f9790aebSLuigi Rizzo ft[ft_i - frags].ft_frags = frags; 672f9790aebSLuigi Rizzo frags = 1; 673f9790aebSLuigi Rizzo if (unlikely((int)ft_i >= bridge_batch)) 674*b6e66be2SVincenzo Maffione ft_i = nm_vale_flush(ft, ft_i, na, ring_nr); 675f9790aebSLuigi Rizzo } 676f9790aebSLuigi Rizzo if (frags > 1) { 67737e3a6d3SLuigi Rizzo /* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we 67837e3a6d3SLuigi Rizzo * have to fix frags count. */ 67937e3a6d3SLuigi Rizzo frags--; 68037e3a6d3SLuigi Rizzo ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG; 68137e3a6d3SLuigi Rizzo ft[ft_i - frags].ft_frags = frags; 682*b6e66be2SVincenzo Maffione nm_prlim(5, "Truncate incomplete fragment at %d (%d frags)", ft_i, frags); 683f9790aebSLuigi Rizzo } 684f9790aebSLuigi Rizzo if (ft_i) 685*b6e66be2SVincenzo Maffione ft_i = nm_vale_flush(ft, ft_i, na, ring_nr); 686f9790aebSLuigi Rizzo BDG_RUNLOCK(b); 687f9790aebSLuigi Rizzo return j; 688f9790aebSLuigi Rizzo } 689f9790aebSLuigi Rizzo 690f9790aebSLuigi Rizzo 691f9790aebSLuigi Rizzo /* ----- FreeBSD if_bridge hash function ------- */ 692f9790aebSLuigi Rizzo 693f9790aebSLuigi Rizzo /* 694f9790aebSLuigi Rizzo * The following hash function is adapted from "Hash Functions" by Bob Jenkins 695f9790aebSLuigi Rizzo * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 696f9790aebSLuigi Rizzo * 697f9790aebSLuigi Rizzo * http://www.burtleburtle.net/bob/hash/spooky.html 698f9790aebSLuigi Rizzo */ 699f9790aebSLuigi Rizzo #define mix(a, b, c) \ 700f9790aebSLuigi Rizzo do { \ 701f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 13); \ 702f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 8); \ 703f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 13); \ 704f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 12); \ 705f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 16); \ 706f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 5); \ 707f9790aebSLuigi Rizzo a -= b; a -= c; a ^= (c >> 3); \ 708f9790aebSLuigi Rizzo b -= c; b -= a; b ^= (a << 10); \ 709f9790aebSLuigi Rizzo c -= a; c -= b; c ^= (b >> 15); \ 710f9790aebSLuigi Rizzo } while (/*CONSTCOND*/0) 711f9790aebSLuigi Rizzo 71217885a7bSLuigi Rizzo 713f9790aebSLuigi Rizzo static __inline uint32_t 714*b6e66be2SVincenzo Maffione nm_vale_rthash(const uint8_t *addr) 715f9790aebSLuigi Rizzo { 716f9790aebSLuigi Rizzo uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 717f9790aebSLuigi Rizzo 718f9790aebSLuigi Rizzo b += addr[5] << 8; 719f9790aebSLuigi Rizzo b += addr[4]; 720f9790aebSLuigi Rizzo a += addr[3] << 24; 721f9790aebSLuigi Rizzo a += addr[2] << 16; 722f9790aebSLuigi Rizzo a += addr[1] << 8; 723f9790aebSLuigi Rizzo a += addr[0]; 724f9790aebSLuigi Rizzo 725f9790aebSLuigi Rizzo mix(a, b, c); 726f9790aebSLuigi Rizzo #define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 727f9790aebSLuigi Rizzo return (c & BRIDGE_RTHASH_MASK); 728f9790aebSLuigi Rizzo } 729f9790aebSLuigi Rizzo 730f9790aebSLuigi Rizzo #undef mix 731f9790aebSLuigi Rizzo 732f9790aebSLuigi Rizzo 733f9790aebSLuigi Rizzo /* 734f9790aebSLuigi Rizzo * Lookup function for a learning bridge. 735f9790aebSLuigi Rizzo * Update the hash table with the source address, 736f9790aebSLuigi Rizzo * and then returns the destination port index, and the 737f9790aebSLuigi Rizzo * ring in *dst_ring (at the moment, always use ring 0) 738f9790aebSLuigi Rizzo */ 7392ff91c17SVincenzo Maffione uint32_t 740*b6e66be2SVincenzo Maffione netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, 7412ff91c17SVincenzo Maffione struct netmap_vp_adapter *na, void *private_data) 742f9790aebSLuigi Rizzo { 7432ff91c17SVincenzo Maffione uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset; 7442ff91c17SVincenzo Maffione u_int buf_len = ft->ft_len - ft->ft_offset; 7452ff91c17SVincenzo Maffione struct nm_hash_ent *ht = private_data; 746f9790aebSLuigi Rizzo uint32_t sh, dh; 747f9790aebSLuigi Rizzo u_int dst, mysrc = na->bdg_port; 748f9790aebSLuigi Rizzo uint64_t smac, dmac; 74937e3a6d3SLuigi Rizzo uint8_t indbuf[12]; 750f9790aebSLuigi Rizzo 7512ff91c17SVincenzo Maffione if (buf_len < 14) { 752f9790aebSLuigi Rizzo return NM_BDG_NOPORT; 753f9790aebSLuigi Rizzo } 75437e3a6d3SLuigi Rizzo 75537e3a6d3SLuigi Rizzo if (ft->ft_flags & NS_INDIRECT) { 75637e3a6d3SLuigi Rizzo if (copyin(buf, indbuf, sizeof(indbuf))) { 75737e3a6d3SLuigi Rizzo return NM_BDG_NOPORT; 75837e3a6d3SLuigi Rizzo } 75937e3a6d3SLuigi Rizzo buf = indbuf; 76037e3a6d3SLuigi Rizzo } 76137e3a6d3SLuigi Rizzo 762f9790aebSLuigi Rizzo dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 763f9790aebSLuigi Rizzo smac = le64toh(*(uint64_t *)(buf + 4)); 764f9790aebSLuigi Rizzo smac >>= 16; 765f9790aebSLuigi Rizzo 766f9790aebSLuigi Rizzo /* 767f9790aebSLuigi Rizzo * The hash is somewhat expensive, there might be some 768f9790aebSLuigi Rizzo * worthwhile optimizations here. 769f9790aebSLuigi Rizzo */ 770847bf383SLuigi Rizzo if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */ 771f9790aebSLuigi Rizzo uint8_t *s = buf+6; 772*b6e66be2SVincenzo Maffione sh = nm_vale_rthash(s); /* hash of source */ 773f9790aebSLuigi Rizzo /* update source port forwarding entry */ 774847bf383SLuigi Rizzo na->last_smac = ht[sh].mac = smac; /* XXX expire ? */ 775f9790aebSLuigi Rizzo ht[sh].ports = mysrc; 776*b6e66be2SVincenzo Maffione if (netmap_debug & NM_DEBUG_VALE) 777*b6e66be2SVincenzo Maffione nm_prinf("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 778f9790aebSLuigi Rizzo s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 779f9790aebSLuigi Rizzo } 780f9790aebSLuigi Rizzo dst = NM_BDG_BROADCAST; 781f9790aebSLuigi Rizzo if ((buf[0] & 1) == 0) { /* unicast */ 782*b6e66be2SVincenzo Maffione dh = nm_vale_rthash(buf); /* hash of dst */ 783f9790aebSLuigi Rizzo if (ht[dh].mac == dmac) { /* found dst */ 784f9790aebSLuigi Rizzo dst = ht[dh].ports; 785f9790aebSLuigi Rizzo } 786f9790aebSLuigi Rizzo } 787f9790aebSLuigi Rizzo return dst; 788f9790aebSLuigi Rizzo } 789f9790aebSLuigi Rizzo 790f9790aebSLuigi Rizzo 791f9790aebSLuigi Rizzo /* 79217885a7bSLuigi Rizzo * Available space in the ring. Only used in VALE code 79317885a7bSLuigi Rizzo * and only with is_rx = 1 79417885a7bSLuigi Rizzo */ 79517885a7bSLuigi Rizzo static inline uint32_t 79617885a7bSLuigi Rizzo nm_kr_space(struct netmap_kring *k, int is_rx) 79717885a7bSLuigi Rizzo { 79817885a7bSLuigi Rizzo int space; 79917885a7bSLuigi Rizzo 80017885a7bSLuigi Rizzo if (is_rx) { 80117885a7bSLuigi Rizzo int busy = k->nkr_hwlease - k->nr_hwcur; 80217885a7bSLuigi Rizzo if (busy < 0) 80317885a7bSLuigi Rizzo busy += k->nkr_num_slots; 80417885a7bSLuigi Rizzo space = k->nkr_num_slots - 1 - busy; 80517885a7bSLuigi Rizzo } else { 80617885a7bSLuigi Rizzo /* XXX never used in this branch */ 80717885a7bSLuigi Rizzo space = k->nr_hwtail - k->nkr_hwlease; 80817885a7bSLuigi Rizzo if (space < 0) 80917885a7bSLuigi Rizzo space += k->nkr_num_slots; 81017885a7bSLuigi Rizzo } 81117885a7bSLuigi Rizzo #if 0 81217885a7bSLuigi Rizzo // sanity check 81317885a7bSLuigi Rizzo if (k->nkr_hwlease >= k->nkr_num_slots || 81417885a7bSLuigi Rizzo k->nr_hwcur >= k->nkr_num_slots || 81517885a7bSLuigi Rizzo k->nr_tail >= k->nkr_num_slots || 81617885a7bSLuigi Rizzo busy < 0 || 81717885a7bSLuigi Rizzo busy >= k->nkr_num_slots) { 81817885a7bSLuigi Rizzo D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 81917885a7bSLuigi Rizzo k->nkr_lease_idx, k->nkr_num_slots); 82017885a7bSLuigi Rizzo } 82117885a7bSLuigi Rizzo #endif 82217885a7bSLuigi Rizzo return space; 82317885a7bSLuigi Rizzo } 82417885a7bSLuigi Rizzo 82517885a7bSLuigi Rizzo 82617885a7bSLuigi Rizzo 82717885a7bSLuigi Rizzo 82817885a7bSLuigi Rizzo /* make a lease on the kring for N positions. return the 82917885a7bSLuigi Rizzo * lease index 83017885a7bSLuigi Rizzo * XXX only used in VALE code and with is_rx = 1 83117885a7bSLuigi Rizzo */ 83217885a7bSLuigi Rizzo static inline uint32_t 83317885a7bSLuigi Rizzo nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 83417885a7bSLuigi Rizzo { 83517885a7bSLuigi Rizzo uint32_t lim = k->nkr_num_slots - 1; 83617885a7bSLuigi Rizzo uint32_t lease_idx = k->nkr_lease_idx; 83717885a7bSLuigi Rizzo 83817885a7bSLuigi Rizzo k->nkr_leases[lease_idx] = NR_NOSLOT; 83917885a7bSLuigi Rizzo k->nkr_lease_idx = nm_next(lease_idx, lim); 84017885a7bSLuigi Rizzo 841*b6e66be2SVincenzo Maffione #ifdef CONFIG_NETMAP_DEBUG 84217885a7bSLuigi Rizzo if (n > nm_kr_space(k, is_rx)) { 843*b6e66be2SVincenzo Maffione nm_prerr("invalid request for %d slots", n); 84417885a7bSLuigi Rizzo panic("x"); 84517885a7bSLuigi Rizzo } 846*b6e66be2SVincenzo Maffione #endif /* CONFIG NETMAP_DEBUG */ 84717885a7bSLuigi Rizzo /* XXX verify that there are n slots */ 84817885a7bSLuigi Rizzo k->nkr_hwlease += n; 84917885a7bSLuigi Rizzo if (k->nkr_hwlease > lim) 85017885a7bSLuigi Rizzo k->nkr_hwlease -= lim + 1; 85117885a7bSLuigi Rizzo 852*b6e66be2SVincenzo Maffione #ifdef CONFIG_NETMAP_DEBUG 85317885a7bSLuigi Rizzo if (k->nkr_hwlease >= k->nkr_num_slots || 85417885a7bSLuigi Rizzo k->nr_hwcur >= k->nkr_num_slots || 85517885a7bSLuigi Rizzo k->nr_hwtail >= k->nkr_num_slots || 85617885a7bSLuigi Rizzo k->nkr_lease_idx >= k->nkr_num_slots) { 857*b6e66be2SVincenzo Maffione nm_prerr("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 8584bf50f18SLuigi Rizzo k->na->name, 85917885a7bSLuigi Rizzo k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 86017885a7bSLuigi Rizzo k->nkr_lease_idx, k->nkr_num_slots); 86117885a7bSLuigi Rizzo } 862*b6e66be2SVincenzo Maffione #endif /* CONFIG_NETMAP_DEBUG */ 86317885a7bSLuigi Rizzo return lease_idx; 86417885a7bSLuigi Rizzo } 86517885a7bSLuigi Rizzo 86617885a7bSLuigi Rizzo /* 8674bf50f18SLuigi Rizzo * 868f9790aebSLuigi Rizzo * This flush routine supports only unicast and broadcast but a large 869f9790aebSLuigi Rizzo * number of ports, and lets us replace the learn and dispatch functions. 870f9790aebSLuigi Rizzo */ 871f9790aebSLuigi Rizzo int 872*b6e66be2SVincenzo Maffione nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 873f9790aebSLuigi Rizzo u_int ring_nr) 874f9790aebSLuigi Rizzo { 875*b6e66be2SVincenzo Maffione struct nm_vale_q *dst_ents, *brddst; 876f9790aebSLuigi Rizzo uint16_t num_dsts = 0, *dsts; 877f9790aebSLuigi Rizzo struct nm_bridge *b = na->na_bdg; 87837e3a6d3SLuigi Rizzo u_int i, me = na->bdg_port; 879f9790aebSLuigi Rizzo 880f9790aebSLuigi Rizzo /* 881f9790aebSLuigi Rizzo * The work area (pointed by ft) is followed by an array of 882f9790aebSLuigi Rizzo * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 883f9790aebSLuigi Rizzo * queues per port plus one for the broadcast traffic. 884f9790aebSLuigi Rizzo * Then we have an array of destination indexes. 885f9790aebSLuigi Rizzo */ 886*b6e66be2SVincenzo Maffione dst_ents = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX); 887f9790aebSLuigi Rizzo dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 888f9790aebSLuigi Rizzo 889f9790aebSLuigi Rizzo /* first pass: find a destination for each packet in the batch */ 890f9790aebSLuigi Rizzo for (i = 0; likely(i < n); i += ft[i].ft_frags) { 891f9790aebSLuigi Rizzo uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 892f9790aebSLuigi Rizzo uint16_t dst_port, d_i; 893*b6e66be2SVincenzo Maffione struct nm_vale_q *d; 8942ff91c17SVincenzo Maffione struct nm_bdg_fwd *start_ft = NULL; 895f9790aebSLuigi Rizzo 896f9790aebSLuigi Rizzo ND("slot %d frags %d", i, ft[i].ft_frags); 8972ff91c17SVincenzo Maffione 8982ff91c17SVincenzo Maffione if (na->up.virt_hdr_len < ft[i].ft_len) { 8992ff91c17SVincenzo Maffione ft[i].ft_offset = na->up.virt_hdr_len; 9002ff91c17SVincenzo Maffione start_ft = &ft[i]; 9012ff91c17SVincenzo Maffione } else if (na->up.virt_hdr_len == ft[i].ft_len && ft[i].ft_flags & NS_MOREFRAG) { 9022ff91c17SVincenzo Maffione ft[i].ft_offset = ft[i].ft_len; 9032ff91c17SVincenzo Maffione start_ft = &ft[i+1]; 9042ff91c17SVincenzo Maffione } else { 905f0ea3689SLuigi Rizzo /* Drop the packet if the virtio-net header is not into the first 9062ff91c17SVincenzo Maffione * fragment nor at the very beginning of the second. 9072ff91c17SVincenzo Maffione */ 908f9790aebSLuigi Rizzo continue; 9092ff91c17SVincenzo Maffione } 910*b6e66be2SVincenzo Maffione dst_port = b->bdg_ops.lookup(start_ft, &dst_ring, na, b->private_data); 911f9790aebSLuigi Rizzo if (netmap_verbose > 255) 912f9790aebSLuigi Rizzo RD(5, "slot %d port %d -> %d", i, me, dst_port); 9134f80b14cSVincenzo Maffione if (dst_port >= NM_BDG_NOPORT) 914f9790aebSLuigi Rizzo continue; /* this packet is identified to be dropped */ 915f9790aebSLuigi Rizzo else if (dst_port == NM_BDG_BROADCAST) 916f9790aebSLuigi Rizzo dst_ring = 0; /* broadcasts always go to ring 0 */ 917f9790aebSLuigi Rizzo else if (unlikely(dst_port == me || 918f9790aebSLuigi Rizzo !b->bdg_ports[dst_port])) 919f9790aebSLuigi Rizzo continue; 920f9790aebSLuigi Rizzo 921f9790aebSLuigi Rizzo /* get a position in the scratch pad */ 922f9790aebSLuigi Rizzo d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 923f9790aebSLuigi Rizzo d = dst_ents + d_i; 924f9790aebSLuigi Rizzo 925f9790aebSLuigi Rizzo /* append the first fragment to the list */ 926f9790aebSLuigi Rizzo if (d->bq_head == NM_FT_NULL) { /* new destination */ 927f9790aebSLuigi Rizzo d->bq_head = d->bq_tail = i; 928f9790aebSLuigi Rizzo /* remember this position to be scanned later */ 929f9790aebSLuigi Rizzo if (dst_port != NM_BDG_BROADCAST) 930f9790aebSLuigi Rizzo dsts[num_dsts++] = d_i; 931f9790aebSLuigi Rizzo } else { 932f9790aebSLuigi Rizzo ft[d->bq_tail].ft_next = i; 933f9790aebSLuigi Rizzo d->bq_tail = i; 934f9790aebSLuigi Rizzo } 935f9790aebSLuigi Rizzo d->bq_len += ft[i].ft_frags; 936f9790aebSLuigi Rizzo } 937f9790aebSLuigi Rizzo 938f9790aebSLuigi Rizzo /* 939f9790aebSLuigi Rizzo * Broadcast traffic goes to ring 0 on all destinations. 940f9790aebSLuigi Rizzo * So we need to add these rings to the list of ports to scan. 941f9790aebSLuigi Rizzo * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 942f9790aebSLuigi Rizzo * expensive. We should keep a compact list of active destinations 943f9790aebSLuigi Rizzo * so we could shorten this loop. 944f9790aebSLuigi Rizzo */ 945f9790aebSLuigi Rizzo brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 946f9790aebSLuigi Rizzo if (brddst->bq_head != NM_FT_NULL) { 94737e3a6d3SLuigi Rizzo u_int j; 948f9790aebSLuigi Rizzo for (j = 0; likely(j < b->bdg_active_ports); j++) { 949f9790aebSLuigi Rizzo uint16_t d_i; 950f9790aebSLuigi Rizzo i = b->bdg_port_index[j]; 951f9790aebSLuigi Rizzo if (unlikely(i == me)) 952f9790aebSLuigi Rizzo continue; 953f9790aebSLuigi Rizzo d_i = i * NM_BDG_MAXRINGS; 954f9790aebSLuigi Rizzo if (dst_ents[d_i].bq_head == NM_FT_NULL) 955f9790aebSLuigi Rizzo dsts[num_dsts++] = d_i; 956f9790aebSLuigi Rizzo } 957f9790aebSLuigi Rizzo } 958f9790aebSLuigi Rizzo 959f9790aebSLuigi Rizzo ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 9604bf50f18SLuigi Rizzo /* second pass: scan destinations */ 961f9790aebSLuigi Rizzo for (i = 0; i < num_dsts; i++) { 962f9790aebSLuigi Rizzo struct netmap_vp_adapter *dst_na; 963f9790aebSLuigi Rizzo struct netmap_kring *kring; 964f9790aebSLuigi Rizzo struct netmap_ring *ring; 965f0ea3689SLuigi Rizzo u_int dst_nr, lim, j, d_i, next, brd_next; 966f9790aebSLuigi Rizzo u_int needed, howmany; 967f9790aebSLuigi Rizzo int retry = netmap_txsync_retry; 968*b6e66be2SVincenzo Maffione struct nm_vale_q *d; 969f9790aebSLuigi Rizzo uint32_t my_start = 0, lease_idx = 0; 970f9790aebSLuigi Rizzo int nrings; 971f0ea3689SLuigi Rizzo int virt_hdr_mismatch = 0; 972f9790aebSLuigi Rizzo 973f9790aebSLuigi Rizzo d_i = dsts[i]; 974f9790aebSLuigi Rizzo ND("second pass %d port %d", i, d_i); 975f9790aebSLuigi Rizzo d = dst_ents + d_i; 976f9790aebSLuigi Rizzo // XXX fix the division 977f9790aebSLuigi Rizzo dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 978f9790aebSLuigi Rizzo /* protect from the lookup function returning an inactive 979f9790aebSLuigi Rizzo * destination port 980f9790aebSLuigi Rizzo */ 981f9790aebSLuigi Rizzo if (unlikely(dst_na == NULL)) 982f9790aebSLuigi Rizzo goto cleanup; 983f9790aebSLuigi Rizzo if (dst_na->up.na_flags & NAF_SW_ONLY) 984f9790aebSLuigi Rizzo goto cleanup; 985f9790aebSLuigi Rizzo /* 986f9790aebSLuigi Rizzo * The interface may be in !netmap mode in two cases: 987f9790aebSLuigi Rizzo * - when na is attached but not activated yet; 988f9790aebSLuigi Rizzo * - when na is being deactivated but is still attached. 989f9790aebSLuigi Rizzo */ 9904bf50f18SLuigi Rizzo if (unlikely(!nm_netmap_on(&dst_na->up))) { 991f9790aebSLuigi Rizzo ND("not in netmap mode!"); 992f9790aebSLuigi Rizzo goto cleanup; 993f9790aebSLuigi Rizzo } 994f9790aebSLuigi Rizzo 995f9790aebSLuigi Rizzo /* there is at least one either unicast or broadcast packet */ 996f9790aebSLuigi Rizzo brd_next = brddst->bq_head; 997f9790aebSLuigi Rizzo next = d->bq_head; 998f9790aebSLuigi Rizzo /* we need to reserve this many slots. If fewer are 999f9790aebSLuigi Rizzo * available, some packets will be dropped. 1000f9790aebSLuigi Rizzo * Packets may have multiple fragments, so we may not use 1001f9790aebSLuigi Rizzo * there is a chance that we may not use all of the slots 1002f9790aebSLuigi Rizzo * we have claimed, so we will need to handle the leftover 1003f9790aebSLuigi Rizzo * ones when we regain the lock. 1004f9790aebSLuigi Rizzo */ 1005f9790aebSLuigi Rizzo needed = d->bq_len + brddst->bq_len; 1006f9790aebSLuigi Rizzo 100737e3a6d3SLuigi Rizzo if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) { 1008c3e9b4dbSLuiz Otavio O Souza if (netmap_verbose) { 100937e3a6d3SLuigi Rizzo RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len, 101037e3a6d3SLuigi Rizzo dst_na->up.virt_hdr_len); 1011c3e9b4dbSLuiz Otavio O Souza } 1012f0ea3689SLuigi Rizzo /* There is a virtio-net header/offloadings mismatch between 1013f0ea3689SLuigi Rizzo * source and destination. The slower mismatch datapath will 1014f0ea3689SLuigi Rizzo * be used to cope with all the mismatches. 1015f0ea3689SLuigi Rizzo */ 1016f0ea3689SLuigi Rizzo virt_hdr_mismatch = 1; 1017f0ea3689SLuigi Rizzo if (dst_na->mfs < na->mfs) { 1018f0ea3689SLuigi Rizzo /* We may need to do segmentation offloadings, and so 1019f0ea3689SLuigi Rizzo * we may need a number of destination slots greater 1020f0ea3689SLuigi Rizzo * than the number of input slots ('needed'). 1021f0ea3689SLuigi Rizzo * We look for the smallest integer 'x' which satisfies: 1022f0ea3689SLuigi Rizzo * needed * na->mfs + x * H <= x * na->mfs 1023f0ea3689SLuigi Rizzo * where 'H' is the length of the longest header that may 1024f0ea3689SLuigi Rizzo * be replicated in the segmentation process (e.g. for 1025f0ea3689SLuigi Rizzo * TCPv4 we must account for ethernet header, IP header 1026f0ea3689SLuigi Rizzo * and TCPv4 header). 1027f0ea3689SLuigi Rizzo */ 10284f80b14cSVincenzo Maffione KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0")); 1029f0ea3689SLuigi Rizzo needed = (needed * na->mfs) / 1030f0ea3689SLuigi Rizzo (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; 1031f0ea3689SLuigi Rizzo ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); 1032f0ea3689SLuigi Rizzo } 1033f0ea3689SLuigi Rizzo } 1034f0ea3689SLuigi Rizzo 1035f9790aebSLuigi Rizzo ND(5, "pass 2 dst %d is %x %s", 1036f9790aebSLuigi Rizzo i, d_i, is_vp ? "virtual" : "nic/host"); 1037f9790aebSLuigi Rizzo dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1038f9790aebSLuigi Rizzo nrings = dst_na->up.num_rx_rings; 1039f9790aebSLuigi Rizzo if (dst_nr >= nrings) 1040f9790aebSLuigi Rizzo dst_nr = dst_nr % nrings; 10412ff91c17SVincenzo Maffione kring = dst_na->up.rx_rings[dst_nr]; 1042f9790aebSLuigi Rizzo ring = kring->ring; 10434f80b14cSVincenzo Maffione /* the destination ring may have not been opened for RX */ 10444f80b14cSVincenzo Maffione if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON)) 10454f80b14cSVincenzo Maffione goto cleanup; 1046f9790aebSLuigi Rizzo lim = kring->nkr_num_slots - 1; 1047f9790aebSLuigi Rizzo 1048f9790aebSLuigi Rizzo retry: 1049f9790aebSLuigi Rizzo 1050f0ea3689SLuigi Rizzo if (dst_na->retry && retry) { 1051f0ea3689SLuigi Rizzo /* try to get some free slot from the previous run */ 1052*b6e66be2SVincenzo Maffione kring->nm_notify(kring, NAF_FORCE_RECLAIM); 10534bf50f18SLuigi Rizzo /* actually useful only for bwraps, since there 10544bf50f18SLuigi Rizzo * the notify will trigger a txsync on the hwna. VALE ports 10554bf50f18SLuigi Rizzo * have dst_na->retry == 0 10564bf50f18SLuigi Rizzo */ 1057f0ea3689SLuigi Rizzo } 1058f9790aebSLuigi Rizzo /* reserve the buffers in the queue and an entry 1059f9790aebSLuigi Rizzo * to report completion, and drop lock. 1060f9790aebSLuigi Rizzo * XXX this might become a helper function. 1061f9790aebSLuigi Rizzo */ 1062f9790aebSLuigi Rizzo mtx_lock(&kring->q_lock); 1063f9790aebSLuigi Rizzo if (kring->nkr_stopped) { 1064f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1065f9790aebSLuigi Rizzo goto cleanup; 1066f9790aebSLuigi Rizzo } 1067f9790aebSLuigi Rizzo my_start = j = kring->nkr_hwlease; 1068f9790aebSLuigi Rizzo howmany = nm_kr_space(kring, 1); 1069f9790aebSLuigi Rizzo if (needed < howmany) 1070f9790aebSLuigi Rizzo howmany = needed; 1071f9790aebSLuigi Rizzo lease_idx = nm_kr_lease(kring, howmany, 1); 1072f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1073f9790aebSLuigi Rizzo 1074f9790aebSLuigi Rizzo /* only retry if we need more than available slots */ 1075f9790aebSLuigi Rizzo if (retry && needed <= howmany) 1076f9790aebSLuigi Rizzo retry = 0; 1077f9790aebSLuigi Rizzo 1078f9790aebSLuigi Rizzo /* copy to the destination queue */ 1079f9790aebSLuigi Rizzo while (howmany > 0) { 1080f9790aebSLuigi Rizzo struct netmap_slot *slot; 1081f9790aebSLuigi Rizzo struct nm_bdg_fwd *ft_p, *ft_end; 1082f9790aebSLuigi Rizzo u_int cnt; 1083f9790aebSLuigi Rizzo 1084f9790aebSLuigi Rizzo /* find the queue from which we pick next packet. 1085f9790aebSLuigi Rizzo * NM_FT_NULL is always higher than valid indexes 1086f9790aebSLuigi Rizzo * so we never dereference it if the other list 1087f9790aebSLuigi Rizzo * has packets (and if both are empty we never 1088f9790aebSLuigi Rizzo * get here). 1089f9790aebSLuigi Rizzo */ 1090f9790aebSLuigi Rizzo if (next < brd_next) { 1091f9790aebSLuigi Rizzo ft_p = ft + next; 1092f9790aebSLuigi Rizzo next = ft_p->ft_next; 1093f9790aebSLuigi Rizzo } else { /* insert broadcast */ 1094f9790aebSLuigi Rizzo ft_p = ft + brd_next; 1095f9790aebSLuigi Rizzo brd_next = ft_p->ft_next; 1096f9790aebSLuigi Rizzo } 1097f9790aebSLuigi Rizzo cnt = ft_p->ft_frags; // cnt > 0 1098f9790aebSLuigi Rizzo if (unlikely(cnt > howmany)) 1099f9790aebSLuigi Rizzo break; /* no more space */ 1100f9790aebSLuigi Rizzo if (netmap_verbose && cnt > 1) 1101f9790aebSLuigi Rizzo RD(5, "rx %d frags to %d", cnt, j); 1102f9790aebSLuigi Rizzo ft_end = ft_p + cnt; 1103f0ea3689SLuigi Rizzo if (unlikely(virt_hdr_mismatch)) { 1104f0ea3689SLuigi Rizzo bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany); 1105f0ea3689SLuigi Rizzo } else { 1106f0ea3689SLuigi Rizzo howmany -= cnt; 1107f9790aebSLuigi Rizzo do { 1108f9790aebSLuigi Rizzo char *dst, *src = ft_p->ft_buf; 1109f9790aebSLuigi Rizzo size_t copy_len = ft_p->ft_len, dst_len = copy_len; 1110f9790aebSLuigi Rizzo 1111f9790aebSLuigi Rizzo slot = &ring->slot[j]; 11124bf50f18SLuigi Rizzo dst = NMB(&dst_na->up, slot); 1113f9790aebSLuigi Rizzo 111417885a7bSLuigi Rizzo ND("send [%d] %d(%d) bytes at %s:%d", 111517885a7bSLuigi Rizzo i, (int)copy_len, (int)dst_len, 111617885a7bSLuigi Rizzo NM_IFPNAME(dst_ifp), j); 1117f9790aebSLuigi Rizzo /* round to a multiple of 64 */ 1118f9790aebSLuigi Rizzo copy_len = (copy_len + 63) & ~63; 1119f9790aebSLuigi Rizzo 11204bf50f18SLuigi Rizzo if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) || 11214bf50f18SLuigi Rizzo copy_len > NETMAP_BUF_SIZE(&na->up))) { 1122e31c6ec7SLuigi Rizzo RD(5, "invalid len %d, down to 64", (int)copy_len); 1123e31c6ec7SLuigi Rizzo copy_len = dst_len = 64; // XXX 1124e31c6ec7SLuigi Rizzo } 1125f9790aebSLuigi Rizzo if (ft_p->ft_flags & NS_INDIRECT) { 1126f9790aebSLuigi Rizzo if (copyin(src, dst, copy_len)) { 1127f9790aebSLuigi Rizzo // invalid user pointer, pretend len is 0 1128f9790aebSLuigi Rizzo dst_len = 0; 1129f9790aebSLuigi Rizzo } 1130f9790aebSLuigi Rizzo } else { 1131f9790aebSLuigi Rizzo //memcpy(dst, src, copy_len); 1132f9790aebSLuigi Rizzo pkt_copy(src, dst, (int)copy_len); 1133f9790aebSLuigi Rizzo } 1134f9790aebSLuigi Rizzo slot->len = dst_len; 1135f9790aebSLuigi Rizzo slot->flags = (cnt << 8)| NS_MOREFRAG; 1136f9790aebSLuigi Rizzo j = nm_next(j, lim); 1137f0ea3689SLuigi Rizzo needed--; 1138f9790aebSLuigi Rizzo ft_p++; 1139f9790aebSLuigi Rizzo } while (ft_p != ft_end); 1140f9790aebSLuigi Rizzo slot->flags = (cnt << 8); /* clear flag on last entry */ 1141f0ea3689SLuigi Rizzo } 1142f9790aebSLuigi Rizzo /* are we done ? */ 1143f9790aebSLuigi Rizzo if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 1144f9790aebSLuigi Rizzo break; 1145f9790aebSLuigi Rizzo } 1146f9790aebSLuigi Rizzo { 1147f9790aebSLuigi Rizzo /* current position */ 1148f9790aebSLuigi Rizzo uint32_t *p = kring->nkr_leases; /* shorthand */ 1149f9790aebSLuigi Rizzo uint32_t update_pos; 1150f9790aebSLuigi Rizzo int still_locked = 1; 1151f9790aebSLuigi Rizzo 1152f9790aebSLuigi Rizzo mtx_lock(&kring->q_lock); 1153f9790aebSLuigi Rizzo if (unlikely(howmany > 0)) { 1154f9790aebSLuigi Rizzo /* not used all bufs. If i am the last one 1155f9790aebSLuigi Rizzo * i can recover the slots, otherwise must 1156f9790aebSLuigi Rizzo * fill them with 0 to mark empty packets. 1157f9790aebSLuigi Rizzo */ 1158f9790aebSLuigi Rizzo ND("leftover %d bufs", howmany); 1159f9790aebSLuigi Rizzo if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 1160f9790aebSLuigi Rizzo /* yes i am the last one */ 1161f9790aebSLuigi Rizzo ND("roll back nkr_hwlease to %d", j); 1162f9790aebSLuigi Rizzo kring->nkr_hwlease = j; 1163f9790aebSLuigi Rizzo } else { 1164f9790aebSLuigi Rizzo while (howmany-- > 0) { 1165f9790aebSLuigi Rizzo ring->slot[j].len = 0; 1166f9790aebSLuigi Rizzo ring->slot[j].flags = 0; 1167f9790aebSLuigi Rizzo j = nm_next(j, lim); 1168f9790aebSLuigi Rizzo } 1169f9790aebSLuigi Rizzo } 1170f9790aebSLuigi Rizzo } 1171f9790aebSLuigi Rizzo p[lease_idx] = j; /* report I am done */ 1172f9790aebSLuigi Rizzo 117317885a7bSLuigi Rizzo update_pos = kring->nr_hwtail; 1174f9790aebSLuigi Rizzo 1175f9790aebSLuigi Rizzo if (my_start == update_pos) { 1176f9790aebSLuigi Rizzo /* all slots before my_start have been reported, 1177f9790aebSLuigi Rizzo * so scan subsequent leases to see if other ranges 1178f9790aebSLuigi Rizzo * have been completed, and to a selwakeup or txsync. 1179f9790aebSLuigi Rizzo */ 1180f9790aebSLuigi Rizzo while (lease_idx != kring->nkr_lease_idx && 1181f9790aebSLuigi Rizzo p[lease_idx] != NR_NOSLOT) { 1182f9790aebSLuigi Rizzo j = p[lease_idx]; 1183f9790aebSLuigi Rizzo p[lease_idx] = NR_NOSLOT; 1184f9790aebSLuigi Rizzo lease_idx = nm_next(lease_idx, lim); 1185f9790aebSLuigi Rizzo } 1186f9790aebSLuigi Rizzo /* j is the new 'write' position. j != my_start 1187f9790aebSLuigi Rizzo * means there are new buffers to report 1188f9790aebSLuigi Rizzo */ 1189f9790aebSLuigi Rizzo if (likely(j != my_start)) { 119017885a7bSLuigi Rizzo kring->nr_hwtail = j; 1191f9790aebSLuigi Rizzo still_locked = 0; 1192f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1193847bf383SLuigi Rizzo kring->nm_notify(kring, 0); 11944bf50f18SLuigi Rizzo /* this is netmap_notify for VALE ports and 11954bf50f18SLuigi Rizzo * netmap_bwrap_notify for bwrap. The latter will 11964bf50f18SLuigi Rizzo * trigger a txsync on the underlying hwna 11974bf50f18SLuigi Rizzo */ 11984bf50f18SLuigi Rizzo if (dst_na->retry && retry--) { 11994bf50f18SLuigi Rizzo /* XXX this is going to call nm_notify again. 12004bf50f18SLuigi Rizzo * Only useful for bwrap in virtual machines 12014bf50f18SLuigi Rizzo */ 1202f9790aebSLuigi Rizzo goto retry; 1203f9790aebSLuigi Rizzo } 1204f9790aebSLuigi Rizzo } 12054bf50f18SLuigi Rizzo } 1206f9790aebSLuigi Rizzo if (still_locked) 1207f9790aebSLuigi Rizzo mtx_unlock(&kring->q_lock); 1208f9790aebSLuigi Rizzo } 1209f9790aebSLuigi Rizzo cleanup: 1210f9790aebSLuigi Rizzo d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 1211f9790aebSLuigi Rizzo d->bq_len = 0; 1212f9790aebSLuigi Rizzo } 1213f9790aebSLuigi Rizzo brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 1214f9790aebSLuigi Rizzo brddst->bq_len = 0; 1215f9790aebSLuigi Rizzo return 0; 1216f9790aebSLuigi Rizzo } 1217f9790aebSLuigi Rizzo 12184bf50f18SLuigi Rizzo /* nm_txsync callback for VALE ports */ 1219f9790aebSLuigi Rizzo static int 1220*b6e66be2SVincenzo Maffione netmap_vale_vp_txsync(struct netmap_kring *kring, int flags) 1221f9790aebSLuigi Rizzo { 12224bf50f18SLuigi Rizzo struct netmap_vp_adapter *na = 12234bf50f18SLuigi Rizzo (struct netmap_vp_adapter *)kring->na; 122417885a7bSLuigi Rizzo u_int done; 122517885a7bSLuigi Rizzo u_int const lim = kring->nkr_num_slots - 1; 1226847bf383SLuigi Rizzo u_int const head = kring->rhead; 1227f9790aebSLuigi Rizzo 1228f9790aebSLuigi Rizzo if (bridge_batch <= 0) { /* testing only */ 1229847bf383SLuigi Rizzo done = head; // used all 1230f9790aebSLuigi Rizzo goto done; 1231f9790aebSLuigi Rizzo } 12324bf50f18SLuigi Rizzo if (!na->na_bdg) { 1233847bf383SLuigi Rizzo done = head; 12344bf50f18SLuigi Rizzo goto done; 12354bf50f18SLuigi Rizzo } 1236f9790aebSLuigi Rizzo if (bridge_batch > NM_BDG_BATCH) 1237f9790aebSLuigi Rizzo bridge_batch = NM_BDG_BATCH; 1238f9790aebSLuigi Rizzo 1239*b6e66be2SVincenzo Maffione done = nm_vale_preflush(kring, head); 1240f9790aebSLuigi Rizzo done: 1241847bf383SLuigi Rizzo if (done != head) 1242*b6e66be2SVincenzo Maffione nm_prerr("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail); 124317885a7bSLuigi Rizzo /* 124417885a7bSLuigi Rizzo * packets between 'done' and 'cur' are left unsent. 124517885a7bSLuigi Rizzo */ 124617885a7bSLuigi Rizzo kring->nr_hwcur = done; 124717885a7bSLuigi Rizzo kring->nr_hwtail = nm_prev(done, lim); 1248*b6e66be2SVincenzo Maffione if (netmap_debug & NM_DEBUG_TXSYNC) 1249*b6e66be2SVincenzo Maffione nm_prinf("%s ring %d flags %d", na->up.name, kring->ring_id, flags); 1250f9790aebSLuigi Rizzo return 0; 1251f9790aebSLuigi Rizzo } 1252f9790aebSLuigi Rizzo 1253f9790aebSLuigi Rizzo 12544bf50f18SLuigi Rizzo /* create a netmap_vp_adapter that describes a VALE port. 12554bf50f18SLuigi Rizzo * Only persistent VALE ports have a non-null ifp. 12564bf50f18SLuigi Rizzo */ 12574bf50f18SLuigi Rizzo static int 1258*b6e66be2SVincenzo Maffione netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *ifp, 12592ff91c17SVincenzo Maffione struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret) 1260f9790aebSLuigi Rizzo { 1261cfa866f6SMatt Macy struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body; 1262f9790aebSLuigi Rizzo struct netmap_vp_adapter *vpna; 1263f9790aebSLuigi Rizzo struct netmap_adapter *na; 1264c3e9b4dbSLuiz Otavio O Souza int error = 0; 1265f0ea3689SLuigi Rizzo u_int npipes = 0; 12662ff91c17SVincenzo Maffione u_int extrabufs = 0; 12672ff91c17SVincenzo Maffione 12682ff91c17SVincenzo Maffione if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { 12692ff91c17SVincenzo Maffione return EINVAL; 12702ff91c17SVincenzo Maffione } 1271f9790aebSLuigi Rizzo 1272c3e9b4dbSLuiz Otavio O Souza vpna = nm_os_malloc(sizeof(*vpna)); 1273f9790aebSLuigi Rizzo if (vpna == NULL) 1274f9790aebSLuigi Rizzo return ENOMEM; 1275f9790aebSLuigi Rizzo 1276f9790aebSLuigi Rizzo na = &vpna->up; 1277f9790aebSLuigi Rizzo 1278f9790aebSLuigi Rizzo na->ifp = ifp; 1279*b6e66be2SVincenzo Maffione strlcpy(na->name, hdr->nr_name, sizeof(na->name)); 1280f9790aebSLuigi Rizzo 1281f9790aebSLuigi Rizzo /* bound checking */ 12822ff91c17SVincenzo Maffione na->num_tx_rings = req->nr_tx_rings; 1283f9790aebSLuigi Rizzo nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 12842ff91c17SVincenzo Maffione req->nr_tx_rings = na->num_tx_rings; /* write back */ 12852ff91c17SVincenzo Maffione na->num_rx_rings = req->nr_rx_rings; 1286f9790aebSLuigi Rizzo nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 12872ff91c17SVincenzo Maffione req->nr_rx_rings = na->num_rx_rings; /* write back */ 12882ff91c17SVincenzo Maffione nm_bound_var(&req->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1289f9790aebSLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 12902ff91c17SVincenzo Maffione na->num_tx_desc = req->nr_tx_slots; 12912ff91c17SVincenzo Maffione nm_bound_var(&req->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1292f9790aebSLuigi Rizzo 1, NM_BDG_MAXSLOTS, NULL); 1293f0ea3689SLuigi Rizzo /* validate number of pipes. We want at least 1, 1294f0ea3689SLuigi Rizzo * but probably can do with some more. 1295f0ea3689SLuigi Rizzo * So let's use 2 as default (when 0 is supplied) 1296f0ea3689SLuigi Rizzo */ 1297f0ea3689SLuigi Rizzo nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); 1298f0ea3689SLuigi Rizzo /* validate extra bufs */ 1299*b6e66be2SVincenzo Maffione extrabufs = req->nr_extra_bufs; 13002ff91c17SVincenzo Maffione nm_bound_var(&extrabufs, 0, 0, 1301f0ea3689SLuigi Rizzo 128*NM_BDG_MAXSLOTS, NULL); 13022ff91c17SVincenzo Maffione req->nr_extra_bufs = extrabufs; /* write back */ 13032ff91c17SVincenzo Maffione na->num_rx_desc = req->nr_rx_slots; 13044f80b14cSVincenzo Maffione /* Set the mfs to a default value, as it is needed on the VALE 13054f80b14cSVincenzo Maffione * mismatch datapath. XXX We should set it according to the MTU 13064f80b14cSVincenzo Maffione * known to the kernel. */ 13074f80b14cSVincenzo Maffione vpna->mfs = NM_BDG_MFS_DEFAULT; 1308847bf383SLuigi Rizzo vpna->last_smac = ~0llu; 1309f0ea3689SLuigi Rizzo /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? 1310f0ea3689SLuigi Rizzo vpna->mfs = netmap_buf_size; */ 1311f0ea3689SLuigi Rizzo if (netmap_verbose) 1312*b6e66be2SVincenzo Maffione nm_prinf("max frame size %u", vpna->mfs); 1313f9790aebSLuigi Rizzo 1314847bf383SLuigi Rizzo na->na_flags |= NAF_BDG_MAYSLEEP; 131510b8ef3dSLuigi Rizzo /* persistent VALE ports look like hw devices 131610b8ef3dSLuigi Rizzo * with a native netmap adapter 131710b8ef3dSLuigi Rizzo */ 131810b8ef3dSLuigi Rizzo if (ifp) 131910b8ef3dSLuigi Rizzo na->na_flags |= NAF_NATIVE; 1320*b6e66be2SVincenzo Maffione na->nm_txsync = netmap_vale_vp_txsync; 1321*b6e66be2SVincenzo Maffione na->nm_rxsync = netmap_vp_rxsync; /* use the one provided by bdg */ 1322*b6e66be2SVincenzo Maffione na->nm_register = netmap_vp_reg; /* use the one provided by bdg */ 1323*b6e66be2SVincenzo Maffione na->nm_krings_create = netmap_vale_vp_krings_create; 1324*b6e66be2SVincenzo Maffione na->nm_krings_delete = netmap_vale_vp_krings_delete; 1325*b6e66be2SVincenzo Maffione na->nm_dtor = netmap_vale_vp_dtor; 13262ff91c17SVincenzo Maffione ND("nr_mem_id %d", req->nr_mem_id); 1327c3e9b4dbSLuiz Otavio O Souza na->nm_mem = nmd ? 1328c3e9b4dbSLuiz Otavio O Souza netmap_mem_get(nmd): 1329c3e9b4dbSLuiz Otavio O Souza netmap_mem_private_new( 1330f9790aebSLuigi Rizzo na->num_tx_rings, na->num_tx_desc, 1331f0ea3689SLuigi Rizzo na->num_rx_rings, na->num_rx_desc, 13322ff91c17SVincenzo Maffione req->nr_extra_bufs, npipes, &error); 1333f0ea3689SLuigi Rizzo if (na->nm_mem == NULL) 1334f0ea3689SLuigi Rizzo goto err; 1335*b6e66be2SVincenzo Maffione na->nm_bdg_attach = netmap_vale_vp_bdg_attach; 1336f9790aebSLuigi Rizzo /* other nmd fields are set in the common routine */ 1337f9790aebSLuigi Rizzo error = netmap_attach_common(na); 1338f0ea3689SLuigi Rizzo if (error) 1339f0ea3689SLuigi Rizzo goto err; 13404bf50f18SLuigi Rizzo *ret = vpna; 1341f0ea3689SLuigi Rizzo return 0; 1342f0ea3689SLuigi Rizzo 1343f0ea3689SLuigi Rizzo err: 1344f0ea3689SLuigi Rizzo if (na->nm_mem != NULL) 1345c3e9b4dbSLuiz Otavio O Souza netmap_mem_put(na->nm_mem); 1346c3e9b4dbSLuiz Otavio O Souza nm_os_free(vpna); 1347f9790aebSLuigi Rizzo return error; 1348f9790aebSLuigi Rizzo } 1349f9790aebSLuigi Rizzo 13502a7db7a6SVincenzo Maffione /* nm_bdg_attach callback for VALE ports 13512a7db7a6SVincenzo Maffione * The na_vp port is this same netmap_adapter. There is no host port. 1352f9790aebSLuigi Rizzo */ 1353f9790aebSLuigi Rizzo static int 1354*b6e66be2SVincenzo Maffione netmap_vale_vp_bdg_attach(const char *name, struct netmap_adapter *na, 13552a7db7a6SVincenzo Maffione struct nm_bridge *b) 1356f9790aebSLuigi Rizzo { 13572a7db7a6SVincenzo Maffione struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 1358f9790aebSLuigi Rizzo 1359*b6e66be2SVincenzo Maffione if ((b->bdg_flags & NM_BDG_NEED_BWRAP) || vpna->na_bdg) { 13602a7db7a6SVincenzo Maffione return NM_NEED_BWRAP; 1361f9790aebSLuigi Rizzo } 13622a7db7a6SVincenzo Maffione na->na_vp = vpna; 1363*b6e66be2SVincenzo Maffione strlcpy(na->name, name, sizeof(na->name)); 13642a7db7a6SVincenzo Maffione na->na_hostvp = NULL; 1365f9790aebSLuigi Rizzo return 0; 1366f9790aebSLuigi Rizzo } 1367f9790aebSLuigi Rizzo 1368f9790aebSLuigi Rizzo static int 13692a7db7a6SVincenzo Maffione netmap_vale_bwrap_krings_create(struct netmap_adapter *na) 1370f9790aebSLuigi Rizzo { 1371cfa866f6SMatt Macy int error; 1372f9790aebSLuigi Rizzo 13734bf50f18SLuigi Rizzo /* impersonate a netmap_vp_adapter */ 1374*b6e66be2SVincenzo Maffione error = netmap_vale_vp_krings_create(na); 1375f9790aebSLuigi Rizzo if (error) 1376f9790aebSLuigi Rizzo return error; 13772a7db7a6SVincenzo Maffione error = netmap_bwrap_krings_create_common(na); 1378f9790aebSLuigi Rizzo if (error) { 1379*b6e66be2SVincenzo Maffione netmap_vale_vp_krings_delete(na); 13802a7db7a6SVincenzo Maffione } 138137e3a6d3SLuigi Rizzo return error; 1382f9790aebSLuigi Rizzo } 1383f9790aebSLuigi Rizzo 1384f9790aebSLuigi Rizzo static void 13852a7db7a6SVincenzo Maffione netmap_vale_bwrap_krings_delete(struct netmap_adapter *na) 1386f9790aebSLuigi Rizzo { 13872a7db7a6SVincenzo Maffione netmap_bwrap_krings_delete_common(na); 1388*b6e66be2SVincenzo Maffione netmap_vale_vp_krings_delete(na); 1389f9790aebSLuigi Rizzo } 1390f9790aebSLuigi Rizzo 1391f9790aebSLuigi Rizzo static int 13922a7db7a6SVincenzo Maffione netmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) 1393f9790aebSLuigi Rizzo { 1394f9790aebSLuigi Rizzo struct netmap_bwrap_adapter *bna; 13954bf50f18SLuigi Rizzo struct netmap_adapter *na = NULL; 13964bf50f18SLuigi Rizzo struct netmap_adapter *hostna = NULL; 13972a7db7a6SVincenzo Maffione int error; 1398f9790aebSLuigi Rizzo 1399c3e9b4dbSLuiz Otavio O Souza bna = nm_os_malloc(sizeof(*bna)); 14004bf50f18SLuigi Rizzo if (bna == NULL) { 1401f9790aebSLuigi Rizzo return ENOMEM; 14024bf50f18SLuigi Rizzo } 1403f9790aebSLuigi Rizzo na = &bna->up.up; 1404*b6e66be2SVincenzo Maffione strlcpy(na->name, nr_name, sizeof(na->name)); 140537e3a6d3SLuigi Rizzo na->nm_register = netmap_bwrap_reg; 1406*b6e66be2SVincenzo Maffione na->nm_txsync = netmap_vale_vp_txsync; 1407f9790aebSLuigi Rizzo // na->nm_rxsync = netmap_bwrap_rxsync; 14082a7db7a6SVincenzo Maffione na->nm_krings_create = netmap_vale_bwrap_krings_create; 14092a7db7a6SVincenzo Maffione na->nm_krings_delete = netmap_vale_bwrap_krings_delete; 1410f9790aebSLuigi Rizzo na->nm_notify = netmap_bwrap_notify; 1411f9790aebSLuigi Rizzo bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 14124f80b14cSVincenzo Maffione /* Set the mfs, needed on the VALE mismatch datapath. */ 14134f80b14cSVincenzo Maffione bna->up.mfs = NM_BDG_MFS_DEFAULT; 1414f9790aebSLuigi Rizzo 1415f0ea3689SLuigi Rizzo if (hwna->na_flags & NAF_HOST_RINGS) { 1416f9790aebSLuigi Rizzo hostna = &bna->host.up; 1417847bf383SLuigi Rizzo hostna->nm_notify = netmap_bwrap_notify; 14184f80b14cSVincenzo Maffione bna->host.mfs = NM_BDG_MFS_DEFAULT; 1419f0ea3689SLuigi Rizzo } 1420f9790aebSLuigi Rizzo 14212a7db7a6SVincenzo Maffione error = netmap_bwrap_attach_common(na, hwna); 1422f9790aebSLuigi Rizzo if (error) { 1423c3e9b4dbSLuiz Otavio O Souza nm_os_free(bna); 14242a7db7a6SVincenzo Maffione } 1425f9790aebSLuigi Rizzo return error; 1426847bf383SLuigi Rizzo } 1427847bf383SLuigi Rizzo 1428847bf383SLuigi Rizzo int 14292a7db7a6SVincenzo Maffione netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na, 14302a7db7a6SVincenzo Maffione struct netmap_mem_d *nmd, int create) 1431847bf383SLuigi Rizzo { 14322a7db7a6SVincenzo Maffione return netmap_get_bdg_na(hdr, na, nmd, create, &vale_bdg_ops); 1433847bf383SLuigi Rizzo } 1434847bf383SLuigi Rizzo 14352a7db7a6SVincenzo Maffione 14362a7db7a6SVincenzo Maffione /* creates a persistent VALE port */ 14372a7db7a6SVincenzo Maffione int 14382a7db7a6SVincenzo Maffione nm_vi_create(struct nmreq_header *hdr) 1439847bf383SLuigi Rizzo { 14402a7db7a6SVincenzo Maffione struct nmreq_vale_newif *req = 14412a7db7a6SVincenzo Maffione (struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body; 14422a7db7a6SVincenzo Maffione int error = 0; 14432a7db7a6SVincenzo Maffione /* Build a nmreq_register out of the nmreq_vale_newif, 14442a7db7a6SVincenzo Maffione * so that we can call netmap_get_bdg_na(). */ 14452a7db7a6SVincenzo Maffione struct nmreq_register regreq; 14462a7db7a6SVincenzo Maffione bzero(®req, sizeof(regreq)); 14472a7db7a6SVincenzo Maffione regreq.nr_tx_slots = req->nr_tx_slots; 14482a7db7a6SVincenzo Maffione regreq.nr_rx_slots = req->nr_rx_slots; 14492a7db7a6SVincenzo Maffione regreq.nr_tx_rings = req->nr_tx_rings; 14502a7db7a6SVincenzo Maffione regreq.nr_rx_rings = req->nr_rx_rings; 14512a7db7a6SVincenzo Maffione regreq.nr_mem_id = req->nr_mem_id; 14522a7db7a6SVincenzo Maffione hdr->nr_reqtype = NETMAP_REQ_REGISTER; 14532a7db7a6SVincenzo Maffione hdr->nr_body = (uintptr_t)®req; 14542a7db7a6SVincenzo Maffione error = netmap_vi_create(hdr, 0 /* no autodelete */); 14552a7db7a6SVincenzo Maffione hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF; 14562a7db7a6SVincenzo Maffione hdr->nr_body = (uintptr_t)req; 14572a7db7a6SVincenzo Maffione /* Write back to the original struct. */ 14582a7db7a6SVincenzo Maffione req->nr_tx_slots = regreq.nr_tx_slots; 14592a7db7a6SVincenzo Maffione req->nr_rx_slots = regreq.nr_rx_slots; 14602a7db7a6SVincenzo Maffione req->nr_tx_rings = regreq.nr_tx_rings; 14612a7db7a6SVincenzo Maffione req->nr_rx_rings = regreq.nr_rx_rings; 14622a7db7a6SVincenzo Maffione req->nr_mem_id = regreq.nr_mem_id; 14632a7db7a6SVincenzo Maffione return error; 1464f9790aebSLuigi Rizzo } 14652a7db7a6SVincenzo Maffione 14662a7db7a6SVincenzo Maffione /* remove a persistent VALE port from the system */ 14672a7db7a6SVincenzo Maffione int 14682a7db7a6SVincenzo Maffione nm_vi_destroy(const char *name) 14692a7db7a6SVincenzo Maffione { 14702a7db7a6SVincenzo Maffione struct ifnet *ifp; 14712a7db7a6SVincenzo Maffione struct netmap_vp_adapter *vpna; 14722a7db7a6SVincenzo Maffione int error; 14732a7db7a6SVincenzo Maffione 14742a7db7a6SVincenzo Maffione ifp = ifunit_ref(name); 14752a7db7a6SVincenzo Maffione if (!ifp) 14762a7db7a6SVincenzo Maffione return ENXIO; 14772a7db7a6SVincenzo Maffione NMG_LOCK(); 14782a7db7a6SVincenzo Maffione /* make sure this is actually a VALE port */ 14792a7db7a6SVincenzo Maffione if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) { 14802a7db7a6SVincenzo Maffione error = EINVAL; 14812a7db7a6SVincenzo Maffione goto err; 14822a7db7a6SVincenzo Maffione } 14832a7db7a6SVincenzo Maffione 14842a7db7a6SVincenzo Maffione vpna = (struct netmap_vp_adapter *)NA(ifp); 14852a7db7a6SVincenzo Maffione 14862a7db7a6SVincenzo Maffione /* we can only destroy ports that were created via NETMAP_BDG_NEWIF */ 14872a7db7a6SVincenzo Maffione if (vpna->autodelete) { 14882a7db7a6SVincenzo Maffione error = EINVAL; 14892a7db7a6SVincenzo Maffione goto err; 14902a7db7a6SVincenzo Maffione } 14912a7db7a6SVincenzo Maffione 14922a7db7a6SVincenzo Maffione /* also make sure that nobody is using the inferface */ 14932a7db7a6SVincenzo Maffione if (NETMAP_OWNED_BY_ANY(&vpna->up) || 14942a7db7a6SVincenzo Maffione vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) { 14952a7db7a6SVincenzo Maffione error = EBUSY; 14962a7db7a6SVincenzo Maffione goto err; 14972a7db7a6SVincenzo Maffione } 14982a7db7a6SVincenzo Maffione 14992a7db7a6SVincenzo Maffione NMG_UNLOCK(); 15002a7db7a6SVincenzo Maffione 1501*b6e66be2SVincenzo Maffione if (netmap_verbose) 1502*b6e66be2SVincenzo Maffione nm_prinf("destroying a persistent vale interface %s", ifp->if_xname); 15032a7db7a6SVincenzo Maffione /* Linux requires all the references are released 15042a7db7a6SVincenzo Maffione * before unregister 15052a7db7a6SVincenzo Maffione */ 15062a7db7a6SVincenzo Maffione netmap_detach(ifp); 15072a7db7a6SVincenzo Maffione if_rele(ifp); 15082a7db7a6SVincenzo Maffione nm_os_vi_detach(ifp); 15092a7db7a6SVincenzo Maffione return 0; 15102a7db7a6SVincenzo Maffione 15112a7db7a6SVincenzo Maffione err: 15122a7db7a6SVincenzo Maffione NMG_UNLOCK(); 15132a7db7a6SVincenzo Maffione if_rele(ifp); 15142a7db7a6SVincenzo Maffione return error; 15152a7db7a6SVincenzo Maffione } 15162a7db7a6SVincenzo Maffione 15172a7db7a6SVincenzo Maffione static int 15182a7db7a6SVincenzo Maffione nm_update_info(struct nmreq_register *req, struct netmap_adapter *na) 15192a7db7a6SVincenzo Maffione { 15202a7db7a6SVincenzo Maffione req->nr_rx_rings = na->num_rx_rings; 15212a7db7a6SVincenzo Maffione req->nr_tx_rings = na->num_tx_rings; 15222a7db7a6SVincenzo Maffione req->nr_rx_slots = na->num_rx_desc; 15232a7db7a6SVincenzo Maffione req->nr_tx_slots = na->num_tx_desc; 15242a7db7a6SVincenzo Maffione return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL, 15252a7db7a6SVincenzo Maffione &req->nr_mem_id); 15262a7db7a6SVincenzo Maffione } 15272a7db7a6SVincenzo Maffione 15282a7db7a6SVincenzo Maffione 15292a7db7a6SVincenzo Maffione /* 15302a7db7a6SVincenzo Maffione * Create a virtual interface registered to the system. 15312a7db7a6SVincenzo Maffione * The interface will be attached to a bridge later. 15322a7db7a6SVincenzo Maffione */ 15332a7db7a6SVincenzo Maffione int 15342a7db7a6SVincenzo Maffione netmap_vi_create(struct nmreq_header *hdr, int autodelete) 15352a7db7a6SVincenzo Maffione { 15362a7db7a6SVincenzo Maffione struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body; 15372a7db7a6SVincenzo Maffione struct ifnet *ifp; 15382a7db7a6SVincenzo Maffione struct netmap_vp_adapter *vpna; 15392a7db7a6SVincenzo Maffione struct netmap_mem_d *nmd = NULL; 15402a7db7a6SVincenzo Maffione int error; 15412a7db7a6SVincenzo Maffione 15422a7db7a6SVincenzo Maffione if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { 15432a7db7a6SVincenzo Maffione return EINVAL; 15442a7db7a6SVincenzo Maffione } 15452a7db7a6SVincenzo Maffione 15462a7db7a6SVincenzo Maffione /* don't include VALE prefix */ 15472a7db7a6SVincenzo Maffione if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME))) 15482a7db7a6SVincenzo Maffione return EINVAL; 15492a7db7a6SVincenzo Maffione if (strlen(hdr->nr_name) >= IFNAMSIZ) { 15502a7db7a6SVincenzo Maffione return EINVAL; 15512a7db7a6SVincenzo Maffione } 15522a7db7a6SVincenzo Maffione ifp = ifunit_ref(hdr->nr_name); 15532a7db7a6SVincenzo Maffione if (ifp) { /* already exist, cannot create new one */ 15542a7db7a6SVincenzo Maffione error = EEXIST; 15552a7db7a6SVincenzo Maffione NMG_LOCK(); 15562a7db7a6SVincenzo Maffione if (NM_NA_VALID(ifp)) { 15572a7db7a6SVincenzo Maffione int update_err = nm_update_info(req, NA(ifp)); 15582a7db7a6SVincenzo Maffione if (update_err) 15592a7db7a6SVincenzo Maffione error = update_err; 15602a7db7a6SVincenzo Maffione } 15612a7db7a6SVincenzo Maffione NMG_UNLOCK(); 15622a7db7a6SVincenzo Maffione if_rele(ifp); 15632a7db7a6SVincenzo Maffione return error; 15642a7db7a6SVincenzo Maffione } 15652a7db7a6SVincenzo Maffione error = nm_os_vi_persist(hdr->nr_name, &ifp); 15662a7db7a6SVincenzo Maffione if (error) 15672a7db7a6SVincenzo Maffione return error; 15682a7db7a6SVincenzo Maffione 15692a7db7a6SVincenzo Maffione NMG_LOCK(); 15702a7db7a6SVincenzo Maffione if (req->nr_mem_id) { 15712a7db7a6SVincenzo Maffione nmd = netmap_mem_find(req->nr_mem_id); 15722a7db7a6SVincenzo Maffione if (nmd == NULL) { 15732a7db7a6SVincenzo Maffione error = EINVAL; 15742a7db7a6SVincenzo Maffione goto err_1; 15752a7db7a6SVincenzo Maffione } 15762a7db7a6SVincenzo Maffione } 15772a7db7a6SVincenzo Maffione /* netmap_vp_create creates a struct netmap_vp_adapter */ 1578*b6e66be2SVincenzo Maffione error = netmap_vale_vp_create(hdr, ifp, nmd, &vpna); 15792a7db7a6SVincenzo Maffione if (error) { 1580*b6e66be2SVincenzo Maffione if (netmap_debug & NM_DEBUG_VALE) 1581*b6e66be2SVincenzo Maffione nm_prerr("error %d", error); 15822a7db7a6SVincenzo Maffione goto err_1; 15832a7db7a6SVincenzo Maffione } 15842a7db7a6SVincenzo Maffione /* persist-specific routines */ 15852a7db7a6SVincenzo Maffione vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl; 15862a7db7a6SVincenzo Maffione if (!autodelete) { 15872a7db7a6SVincenzo Maffione netmap_adapter_get(&vpna->up); 15882a7db7a6SVincenzo Maffione } else { 15892a7db7a6SVincenzo Maffione vpna->autodelete = 1; 15902a7db7a6SVincenzo Maffione } 15912a7db7a6SVincenzo Maffione NM_ATTACH_NA(ifp, &vpna->up); 15922a7db7a6SVincenzo Maffione /* return the updated info */ 15932a7db7a6SVincenzo Maffione error = nm_update_info(req, &vpna->up); 15942a7db7a6SVincenzo Maffione if (error) { 15952a7db7a6SVincenzo Maffione goto err_2; 15962a7db7a6SVincenzo Maffione } 15972a7db7a6SVincenzo Maffione ND("returning nr_mem_id %d", req->nr_mem_id); 15982a7db7a6SVincenzo Maffione if (nmd) 15992a7db7a6SVincenzo Maffione netmap_mem_put(nmd); 16002a7db7a6SVincenzo Maffione NMG_UNLOCK(); 16012a7db7a6SVincenzo Maffione ND("created %s", ifp->if_xname); 16022a7db7a6SVincenzo Maffione return 0; 16032a7db7a6SVincenzo Maffione 16042a7db7a6SVincenzo Maffione err_2: 16052a7db7a6SVincenzo Maffione netmap_detach(ifp); 16062a7db7a6SVincenzo Maffione err_1: 16072a7db7a6SVincenzo Maffione if (nmd) 16082a7db7a6SVincenzo Maffione netmap_mem_put(nmd); 16092a7db7a6SVincenzo Maffione NMG_UNLOCK(); 16102a7db7a6SVincenzo Maffione nm_os_vi_detach(ifp); 16112a7db7a6SVincenzo Maffione 16122a7db7a6SVincenzo Maffione return error; 16132a7db7a6SVincenzo Maffione } 16142a7db7a6SVincenzo Maffione 1615f9790aebSLuigi Rizzo #endif /* WITH_VALE */ 1616