1*f9790aebSLuigi Rizzo /* 2*f9790aebSLuigi Rizzo * Copyright (C) 2013 Universita` di Pisa. All rights reserved. 3*f9790aebSLuigi Rizzo * 4*f9790aebSLuigi Rizzo * Redistribution and use in source and binary forms, with or without 5*f9790aebSLuigi Rizzo * modification, are permitted provided that the following conditions 6*f9790aebSLuigi Rizzo * are met: 7*f9790aebSLuigi Rizzo * 1. Redistributions of source code must retain the above copyright 8*f9790aebSLuigi Rizzo * notice, this list of conditions and the following disclaimer. 9*f9790aebSLuigi Rizzo * 2. Redistributions in binary form must reproduce the above copyright 10*f9790aebSLuigi Rizzo * notice, this list of conditions and the following disclaimer in the 11*f9790aebSLuigi Rizzo * documentation and/or other materials provided with the distribution. 12*f9790aebSLuigi Rizzo * 13*f9790aebSLuigi Rizzo * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14*f9790aebSLuigi Rizzo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15*f9790aebSLuigi Rizzo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16*f9790aebSLuigi Rizzo * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17*f9790aebSLuigi Rizzo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18*f9790aebSLuigi Rizzo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19*f9790aebSLuigi Rizzo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20*f9790aebSLuigi Rizzo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21*f9790aebSLuigi Rizzo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22*f9790aebSLuigi Rizzo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23*f9790aebSLuigi Rizzo * SUCH DAMAGE. 24*f9790aebSLuigi Rizzo */ 25*f9790aebSLuigi Rizzo 26*f9790aebSLuigi Rizzo /* 27*f9790aebSLuigi Rizzo * This module implements netmap support on top of standard, 28*f9790aebSLuigi Rizzo * unmodified device drivers. 29*f9790aebSLuigi Rizzo * 30*f9790aebSLuigi Rizzo * A NIOCREGIF request is handled here if the device does not 31*f9790aebSLuigi Rizzo * have native support. TX and RX rings are emulated as follows: 32*f9790aebSLuigi Rizzo * 33*f9790aebSLuigi Rizzo * NIOCREGIF 34*f9790aebSLuigi Rizzo * We preallocate a block of TX mbufs (roughly as many as 35*f9790aebSLuigi Rizzo * tx descriptors; the number is not critical) to speed up 36*f9790aebSLuigi Rizzo * operation during transmissions. The refcount on most of 37*f9790aebSLuigi Rizzo * these buffers is artificially bumped up so we can recycle 38*f9790aebSLuigi Rizzo * them more easily. Also, the destructor is intercepted 39*f9790aebSLuigi Rizzo * so we use it as an interrupt notification to wake up 40*f9790aebSLuigi Rizzo * processes blocked on a poll(). 41*f9790aebSLuigi Rizzo * 42*f9790aebSLuigi Rizzo * For each receive ring we allocate one "struct mbq" 43*f9790aebSLuigi Rizzo * (an mbuf tailq plus a spinlock). We intercept packets 44*f9790aebSLuigi Rizzo * (through if_input) 45*f9790aebSLuigi Rizzo * on the receive path and put them in the mbq from which 46*f9790aebSLuigi Rizzo * netmap receive routines can grab them. 47*f9790aebSLuigi Rizzo * 48*f9790aebSLuigi Rizzo * TX: 49*f9790aebSLuigi Rizzo * in the generic_txsync() routine, netmap buffers are copied 50*f9790aebSLuigi Rizzo * (or linked, in a future) to the preallocated mbufs 51*f9790aebSLuigi Rizzo * and pushed to the transmit queue. Some of these mbufs 52*f9790aebSLuigi Rizzo * (those with NS_REPORT, or otherwise every half ring) 53*f9790aebSLuigi Rizzo * have the refcount=1, others have refcount=2. 54*f9790aebSLuigi Rizzo * When the destructor is invoked, we take that as 55*f9790aebSLuigi Rizzo * a notification that all mbufs up to that one in 56*f9790aebSLuigi Rizzo * the specific ring have been completed, and generate 57*f9790aebSLuigi Rizzo * the equivalent of a transmit interrupt. 58*f9790aebSLuigi Rizzo * 59*f9790aebSLuigi Rizzo * RX: 60*f9790aebSLuigi Rizzo * 61*f9790aebSLuigi Rizzo */ 62*f9790aebSLuigi Rizzo 63*f9790aebSLuigi Rizzo #ifdef __FreeBSD__ 64*f9790aebSLuigi Rizzo 65*f9790aebSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */ 66*f9790aebSLuigi Rizzo __FBSDID("$FreeBSD$"); 67*f9790aebSLuigi Rizzo 68*f9790aebSLuigi Rizzo #include <sys/types.h> 69*f9790aebSLuigi Rizzo #include <sys/errno.h> 70*f9790aebSLuigi Rizzo #include <sys/malloc.h> 71*f9790aebSLuigi Rizzo #include <sys/lock.h> /* PROT_EXEC */ 72*f9790aebSLuigi Rizzo #include <sys/rwlock.h> 73*f9790aebSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */ 74*f9790aebSLuigi Rizzo #include <sys/selinfo.h> 75*f9790aebSLuigi Rizzo #include <net/if.h> 76*f9790aebSLuigi Rizzo #include <net/if_var.h> 77*f9790aebSLuigi Rizzo #include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */ 78*f9790aebSLuigi Rizzo 79*f9790aebSLuigi Rizzo // XXX temporary - D() defined here 80*f9790aebSLuigi Rizzo #include <net/netmap.h> 81*f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h> 82*f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h> 83*f9790aebSLuigi Rizzo 84*f9790aebSLuigi Rizzo #define rtnl_lock() D("rtnl_lock called"); 85*f9790aebSLuigi Rizzo #define rtnl_unlock() D("rtnl_lock called"); 86*f9790aebSLuigi Rizzo #define MBUF_TXQ(m) ((m)->m_pkthdr.flowid) 87*f9790aebSLuigi Rizzo #define smp_mb() 88*f9790aebSLuigi Rizzo 89*f9790aebSLuigi Rizzo /* 90*f9790aebSLuigi Rizzo * mbuf wrappers 91*f9790aebSLuigi Rizzo */ 92*f9790aebSLuigi Rizzo 93*f9790aebSLuigi Rizzo /* 94*f9790aebSLuigi Rizzo * we allocate an EXT_PACKET 95*f9790aebSLuigi Rizzo */ 96*f9790aebSLuigi Rizzo #define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE) 97*f9790aebSLuigi Rizzo 98*f9790aebSLuigi Rizzo /* mbuf destructor, also need to change the type to EXT_EXTREF, 99*f9790aebSLuigi Rizzo * add an M_NOFREE flag, and then clear the flag and 100*f9790aebSLuigi Rizzo * chain into uma_zfree(zone_pack, mf) 101*f9790aebSLuigi Rizzo * (or reinstall the buffer ?) 102*f9790aebSLuigi Rizzo */ 103*f9790aebSLuigi Rizzo #define SET_MBUF_DESTRUCTOR(m, fn) do { \ 104*f9790aebSLuigi Rizzo (m)->m_ext.ext_free = (void *)fn; \ 105*f9790aebSLuigi Rizzo (m)->m_ext.ext_type = EXT_EXTREF; \ 106*f9790aebSLuigi Rizzo } while (0) 107*f9790aebSLuigi Rizzo 108*f9790aebSLuigi Rizzo 109*f9790aebSLuigi Rizzo #define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1) 110*f9790aebSLuigi Rizzo 111*f9790aebSLuigi Rizzo 112*f9790aebSLuigi Rizzo 113*f9790aebSLuigi Rizzo #else /* linux */ 114*f9790aebSLuigi Rizzo 115*f9790aebSLuigi Rizzo #include "bsd_glue.h" 116*f9790aebSLuigi Rizzo 117*f9790aebSLuigi Rizzo #include <linux/rtnetlink.h> /* rtnl_[un]lock() */ 118*f9790aebSLuigi Rizzo #include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */ 119*f9790aebSLuigi Rizzo #include <linux/hrtimer.h> 120*f9790aebSLuigi Rizzo 121*f9790aebSLuigi Rizzo //#define RATE /* Enables communication statistics. */ 122*f9790aebSLuigi Rizzo 123*f9790aebSLuigi Rizzo //#define REG_RESET 124*f9790aebSLuigi Rizzo 125*f9790aebSLuigi Rizzo #endif /* linux */ 126*f9790aebSLuigi Rizzo 127*f9790aebSLuigi Rizzo 128*f9790aebSLuigi Rizzo /* Common headers. */ 129*f9790aebSLuigi Rizzo #include <net/netmap.h> 130*f9790aebSLuigi Rizzo #include <dev/netmap/netmap_kern.h> 131*f9790aebSLuigi Rizzo #include <dev/netmap/netmap_mem2.h> 132*f9790aebSLuigi Rizzo 133*f9790aebSLuigi Rizzo 134*f9790aebSLuigi Rizzo 135*f9790aebSLuigi Rizzo /* ======================== usage stats =========================== */ 136*f9790aebSLuigi Rizzo 137*f9790aebSLuigi Rizzo #ifdef RATE 138*f9790aebSLuigi Rizzo #define IFRATE(x) x 139*f9790aebSLuigi Rizzo struct rate_stats { 140*f9790aebSLuigi Rizzo unsigned long txpkt; 141*f9790aebSLuigi Rizzo unsigned long txsync; 142*f9790aebSLuigi Rizzo unsigned long txirq; 143*f9790aebSLuigi Rizzo unsigned long rxpkt; 144*f9790aebSLuigi Rizzo unsigned long rxirq; 145*f9790aebSLuigi Rizzo unsigned long rxsync; 146*f9790aebSLuigi Rizzo }; 147*f9790aebSLuigi Rizzo 148*f9790aebSLuigi Rizzo struct rate_context { 149*f9790aebSLuigi Rizzo unsigned refcount; 150*f9790aebSLuigi Rizzo struct timer_list timer; 151*f9790aebSLuigi Rizzo struct rate_stats new; 152*f9790aebSLuigi Rizzo struct rate_stats old; 153*f9790aebSLuigi Rizzo }; 154*f9790aebSLuigi Rizzo 155*f9790aebSLuigi Rizzo #define RATE_PRINTK(_NAME_) \ 156*f9790aebSLuigi Rizzo printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD); 157*f9790aebSLuigi Rizzo #define RATE_PERIOD 2 158*f9790aebSLuigi Rizzo static void rate_callback(unsigned long arg) 159*f9790aebSLuigi Rizzo { 160*f9790aebSLuigi Rizzo struct rate_context * ctx = (struct rate_context *)arg; 161*f9790aebSLuigi Rizzo struct rate_stats cur = ctx->new; 162*f9790aebSLuigi Rizzo int r; 163*f9790aebSLuigi Rizzo 164*f9790aebSLuigi Rizzo RATE_PRINTK(txpkt); 165*f9790aebSLuigi Rizzo RATE_PRINTK(txsync); 166*f9790aebSLuigi Rizzo RATE_PRINTK(txirq); 167*f9790aebSLuigi Rizzo RATE_PRINTK(rxpkt); 168*f9790aebSLuigi Rizzo RATE_PRINTK(rxsync); 169*f9790aebSLuigi Rizzo RATE_PRINTK(rxirq); 170*f9790aebSLuigi Rizzo printk("\n"); 171*f9790aebSLuigi Rizzo 172*f9790aebSLuigi Rizzo ctx->old = cur; 173*f9790aebSLuigi Rizzo r = mod_timer(&ctx->timer, jiffies + 174*f9790aebSLuigi Rizzo msecs_to_jiffies(RATE_PERIOD * 1000)); 175*f9790aebSLuigi Rizzo if (unlikely(r)) 176*f9790aebSLuigi Rizzo D("[v1000] Error: mod_timer()"); 177*f9790aebSLuigi Rizzo } 178*f9790aebSLuigi Rizzo 179*f9790aebSLuigi Rizzo static struct rate_context rate_ctx; 180*f9790aebSLuigi Rizzo 181*f9790aebSLuigi Rizzo #else /* !RATE */ 182*f9790aebSLuigi Rizzo #define IFRATE(x) 183*f9790aebSLuigi Rizzo #endif /* !RATE */ 184*f9790aebSLuigi Rizzo 185*f9790aebSLuigi Rizzo 186*f9790aebSLuigi Rizzo /* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */ 187*f9790aebSLuigi Rizzo #define GENERIC_BUF_SIZE netmap_buf_size /* Size of the mbufs in the Tx pool. */ 188*f9790aebSLuigi Rizzo 189*f9790aebSLuigi Rizzo /* 190*f9790aebSLuigi Rizzo * Wrapper used by the generic adapter layer to notify 191*f9790aebSLuigi Rizzo * the poller threads. Differently from netmap_rx_irq(), we check 192*f9790aebSLuigi Rizzo * only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq. 193*f9790aebSLuigi Rizzo */ 194*f9790aebSLuigi Rizzo static void 195*f9790aebSLuigi Rizzo netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done) 196*f9790aebSLuigi Rizzo { 197*f9790aebSLuigi Rizzo if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP))) 198*f9790aebSLuigi Rizzo return; 199*f9790aebSLuigi Rizzo 200*f9790aebSLuigi Rizzo netmap_common_irq(ifp, q, work_done); 201*f9790aebSLuigi Rizzo } 202*f9790aebSLuigi Rizzo 203*f9790aebSLuigi Rizzo 204*f9790aebSLuigi Rizzo /* Enable/disable netmap mode for a generic network interface. */ 205*f9790aebSLuigi Rizzo int generic_netmap_register(struct netmap_adapter *na, int enable) 206*f9790aebSLuigi Rizzo { 207*f9790aebSLuigi Rizzo struct ifnet *ifp = na->ifp; 208*f9790aebSLuigi Rizzo struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 209*f9790aebSLuigi Rizzo struct mbuf *m; 210*f9790aebSLuigi Rizzo int error; 211*f9790aebSLuigi Rizzo int i, r; 212*f9790aebSLuigi Rizzo 213*f9790aebSLuigi Rizzo if (!na) 214*f9790aebSLuigi Rizzo return EINVAL; 215*f9790aebSLuigi Rizzo 216*f9790aebSLuigi Rizzo #ifdef REG_RESET 217*f9790aebSLuigi Rizzo error = ifp->netdev_ops->ndo_stop(ifp); 218*f9790aebSLuigi Rizzo if (error) { 219*f9790aebSLuigi Rizzo return error; 220*f9790aebSLuigi Rizzo } 221*f9790aebSLuigi Rizzo #endif /* REG_RESET */ 222*f9790aebSLuigi Rizzo 223*f9790aebSLuigi Rizzo if (enable) { /* Enable netmap mode. */ 224*f9790aebSLuigi Rizzo /* Initialize the rx queue, as generic_rx_handler() can 225*f9790aebSLuigi Rizzo * be called as soon as netmap_catch_rx() returns. 226*f9790aebSLuigi Rizzo */ 227*f9790aebSLuigi Rizzo for (r=0; r<na->num_rx_rings; r++) { 228*f9790aebSLuigi Rizzo mbq_safe_init(&na->rx_rings[r].rx_queue); 229*f9790aebSLuigi Rizzo na->rx_rings[r].nr_ntc = 0; 230*f9790aebSLuigi Rizzo } 231*f9790aebSLuigi Rizzo 232*f9790aebSLuigi Rizzo /* Init the mitigation timer. */ 233*f9790aebSLuigi Rizzo netmap_mitigation_init(gna); 234*f9790aebSLuigi Rizzo 235*f9790aebSLuigi Rizzo /* 236*f9790aebSLuigi Rizzo * Preallocate packet buffers for the tx rings. 237*f9790aebSLuigi Rizzo */ 238*f9790aebSLuigi Rizzo for (r=0; r<na->num_tx_rings; r++) { 239*f9790aebSLuigi Rizzo na->tx_rings[r].nr_ntc = 0; 240*f9790aebSLuigi Rizzo na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *), 241*f9790aebSLuigi Rizzo M_DEVBUF, M_NOWAIT | M_ZERO); 242*f9790aebSLuigi Rizzo if (!na->tx_rings[r].tx_pool) { 243*f9790aebSLuigi Rizzo D("tx_pool allocation failed"); 244*f9790aebSLuigi Rizzo error = ENOMEM; 245*f9790aebSLuigi Rizzo goto free_tx_pool; 246*f9790aebSLuigi Rizzo } 247*f9790aebSLuigi Rizzo for (i=0; i<na->num_tx_desc; i++) { 248*f9790aebSLuigi Rizzo m = netmap_get_mbuf(GENERIC_BUF_SIZE); 249*f9790aebSLuigi Rizzo if (!m) { 250*f9790aebSLuigi Rizzo D("tx_pool[%d] allocation failed", i); 251*f9790aebSLuigi Rizzo error = ENOMEM; 252*f9790aebSLuigi Rizzo goto free_mbufs; 253*f9790aebSLuigi Rizzo } 254*f9790aebSLuigi Rizzo na->tx_rings[r].tx_pool[i] = m; 255*f9790aebSLuigi Rizzo } 256*f9790aebSLuigi Rizzo } 257*f9790aebSLuigi Rizzo rtnl_lock(); 258*f9790aebSLuigi Rizzo /* Prepare to intercept incoming traffic. */ 259*f9790aebSLuigi Rizzo error = netmap_catch_rx(na, 1); 260*f9790aebSLuigi Rizzo if (error) { 261*f9790aebSLuigi Rizzo D("netdev_rx_handler_register() failed"); 262*f9790aebSLuigi Rizzo goto register_handler; 263*f9790aebSLuigi Rizzo } 264*f9790aebSLuigi Rizzo ifp->if_capenable |= IFCAP_NETMAP; 265*f9790aebSLuigi Rizzo 266*f9790aebSLuigi Rizzo /* Make netmap control the packet steering. */ 267*f9790aebSLuigi Rizzo netmap_catch_packet_steering(gna, 1); 268*f9790aebSLuigi Rizzo 269*f9790aebSLuigi Rizzo rtnl_unlock(); 270*f9790aebSLuigi Rizzo 271*f9790aebSLuigi Rizzo #ifdef RATE 272*f9790aebSLuigi Rizzo if (rate_ctx.refcount == 0) { 273*f9790aebSLuigi Rizzo D("setup_timer()"); 274*f9790aebSLuigi Rizzo memset(&rate_ctx, 0, sizeof(rate_ctx)); 275*f9790aebSLuigi Rizzo setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx); 276*f9790aebSLuigi Rizzo if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) { 277*f9790aebSLuigi Rizzo D("Error: mod_timer()"); 278*f9790aebSLuigi Rizzo } 279*f9790aebSLuigi Rizzo } 280*f9790aebSLuigi Rizzo rate_ctx.refcount++; 281*f9790aebSLuigi Rizzo #endif /* RATE */ 282*f9790aebSLuigi Rizzo 283*f9790aebSLuigi Rizzo } else { /* Disable netmap mode. */ 284*f9790aebSLuigi Rizzo rtnl_lock(); 285*f9790aebSLuigi Rizzo 286*f9790aebSLuigi Rizzo ifp->if_capenable &= ~IFCAP_NETMAP; 287*f9790aebSLuigi Rizzo 288*f9790aebSLuigi Rizzo /* Release packet steering control. */ 289*f9790aebSLuigi Rizzo netmap_catch_packet_steering(gna, 0); 290*f9790aebSLuigi Rizzo 291*f9790aebSLuigi Rizzo /* Do not intercept packets on the rx path. */ 292*f9790aebSLuigi Rizzo netmap_catch_rx(na, 0); 293*f9790aebSLuigi Rizzo 294*f9790aebSLuigi Rizzo rtnl_unlock(); 295*f9790aebSLuigi Rizzo 296*f9790aebSLuigi Rizzo /* Free the mbufs going to the netmap rings */ 297*f9790aebSLuigi Rizzo for (r=0; r<na->num_rx_rings; r++) { 298*f9790aebSLuigi Rizzo mbq_safe_purge(&na->rx_rings[r].rx_queue); 299*f9790aebSLuigi Rizzo mbq_safe_destroy(&na->rx_rings[r].rx_queue); 300*f9790aebSLuigi Rizzo } 301*f9790aebSLuigi Rizzo 302*f9790aebSLuigi Rizzo netmap_mitigation_cleanup(gna); 303*f9790aebSLuigi Rizzo 304*f9790aebSLuigi Rizzo for (r=0; r<na->num_tx_rings; r++) { 305*f9790aebSLuigi Rizzo for (i=0; i<na->num_tx_desc; i++) { 306*f9790aebSLuigi Rizzo m_freem(na->tx_rings[r].tx_pool[i]); 307*f9790aebSLuigi Rizzo } 308*f9790aebSLuigi Rizzo free(na->tx_rings[r].tx_pool, M_DEVBUF); 309*f9790aebSLuigi Rizzo } 310*f9790aebSLuigi Rizzo 311*f9790aebSLuigi Rizzo #ifdef RATE 312*f9790aebSLuigi Rizzo if (--rate_ctx.refcount == 0) { 313*f9790aebSLuigi Rizzo D("del_timer()"); 314*f9790aebSLuigi Rizzo del_timer(&rate_ctx.timer); 315*f9790aebSLuigi Rizzo } 316*f9790aebSLuigi Rizzo #endif 317*f9790aebSLuigi Rizzo } 318*f9790aebSLuigi Rizzo 319*f9790aebSLuigi Rizzo #ifdef REG_RESET 320*f9790aebSLuigi Rizzo error = ifp->netdev_ops->ndo_open(ifp); 321*f9790aebSLuigi Rizzo if (error) { 322*f9790aebSLuigi Rizzo goto alloc_tx_pool; 323*f9790aebSLuigi Rizzo } 324*f9790aebSLuigi Rizzo #endif 325*f9790aebSLuigi Rizzo 326*f9790aebSLuigi Rizzo return 0; 327*f9790aebSLuigi Rizzo 328*f9790aebSLuigi Rizzo register_handler: 329*f9790aebSLuigi Rizzo rtnl_unlock(); 330*f9790aebSLuigi Rizzo free_tx_pool: 331*f9790aebSLuigi Rizzo r--; 332*f9790aebSLuigi Rizzo i = na->num_tx_desc; /* Useless, but just to stay safe. */ 333*f9790aebSLuigi Rizzo free_mbufs: 334*f9790aebSLuigi Rizzo i--; 335*f9790aebSLuigi Rizzo for (; r>=0; r--) { 336*f9790aebSLuigi Rizzo for (; i>=0; i--) { 337*f9790aebSLuigi Rizzo m_freem(na->tx_rings[r].tx_pool[i]); 338*f9790aebSLuigi Rizzo } 339*f9790aebSLuigi Rizzo free(na->tx_rings[r].tx_pool, M_DEVBUF); 340*f9790aebSLuigi Rizzo i = na->num_tx_desc - 1; 341*f9790aebSLuigi Rizzo } 342*f9790aebSLuigi Rizzo 343*f9790aebSLuigi Rizzo return error; 344*f9790aebSLuigi Rizzo } 345*f9790aebSLuigi Rizzo 346*f9790aebSLuigi Rizzo /* 347*f9790aebSLuigi Rizzo * Callback invoked when the device driver frees an mbuf used 348*f9790aebSLuigi Rizzo * by netmap to transmit a packet. This usually happens when 349*f9790aebSLuigi Rizzo * the NIC notifies the driver that transmission is completed. 350*f9790aebSLuigi Rizzo */ 351*f9790aebSLuigi Rizzo static void 352*f9790aebSLuigi Rizzo generic_mbuf_destructor(struct mbuf *m) 353*f9790aebSLuigi Rizzo { 354*f9790aebSLuigi Rizzo if (netmap_verbose) 355*f9790aebSLuigi Rizzo D("Tx irq (%p) queue %d", m, MBUF_TXQ(m)); 356*f9790aebSLuigi Rizzo netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL); 357*f9790aebSLuigi Rizzo #ifdef __FreeBSD__ 358*f9790aebSLuigi Rizzo m->m_ext.ext_type = EXT_PACKET; 359*f9790aebSLuigi Rizzo m->m_ext.ext_free = NULL; 360*f9790aebSLuigi Rizzo if (*(m->m_ext.ref_cnt) == 0) 361*f9790aebSLuigi Rizzo *(m->m_ext.ref_cnt) = 1; 362*f9790aebSLuigi Rizzo uma_zfree(zone_pack, m); 363*f9790aebSLuigi Rizzo #endif /* __FreeBSD__ */ 364*f9790aebSLuigi Rizzo IFRATE(rate_ctx.new.txirq++); 365*f9790aebSLuigi Rizzo } 366*f9790aebSLuigi Rizzo 367*f9790aebSLuigi Rizzo /* Record completed transmissions and update hwavail. 368*f9790aebSLuigi Rizzo * 369*f9790aebSLuigi Rizzo * nr_ntc is the oldest tx buffer not yet completed 370*f9790aebSLuigi Rizzo * (same as nr_hwavail + nr_hwcur + 1), 371*f9790aebSLuigi Rizzo * nr_hwcur is the first unsent buffer. 372*f9790aebSLuigi Rizzo * When cleaning, we try to recover buffers between nr_ntc and nr_hwcur. 373*f9790aebSLuigi Rizzo */ 374*f9790aebSLuigi Rizzo static int 375*f9790aebSLuigi Rizzo generic_netmap_tx_clean(struct netmap_kring *kring) 376*f9790aebSLuigi Rizzo { 377*f9790aebSLuigi Rizzo u_int num_slots = kring->nkr_num_slots; 378*f9790aebSLuigi Rizzo u_int ntc = kring->nr_ntc; 379*f9790aebSLuigi Rizzo u_int hwcur = kring->nr_hwcur; 380*f9790aebSLuigi Rizzo u_int n = 0; 381*f9790aebSLuigi Rizzo struct mbuf **tx_pool = kring->tx_pool; 382*f9790aebSLuigi Rizzo 383*f9790aebSLuigi Rizzo while (ntc != hwcur) { /* buffers not completed */ 384*f9790aebSLuigi Rizzo struct mbuf *m = tx_pool[ntc]; 385*f9790aebSLuigi Rizzo 386*f9790aebSLuigi Rizzo if (unlikely(m == NULL)) { 387*f9790aebSLuigi Rizzo /* try to replenish the entry */ 388*f9790aebSLuigi Rizzo tx_pool[ntc] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); 389*f9790aebSLuigi Rizzo if (unlikely(m == NULL)) { 390*f9790aebSLuigi Rizzo D("mbuf allocation failed, XXX error"); 391*f9790aebSLuigi Rizzo // XXX how do we proceed ? break ? 392*f9790aebSLuigi Rizzo return -ENOMEM; 393*f9790aebSLuigi Rizzo } 394*f9790aebSLuigi Rizzo } else if (GET_MBUF_REFCNT(m) != 1) { 395*f9790aebSLuigi Rizzo break; /* This mbuf is still busy: its refcnt is 2. */ 396*f9790aebSLuigi Rizzo } 397*f9790aebSLuigi Rizzo if (unlikely(++ntc == num_slots)) { 398*f9790aebSLuigi Rizzo ntc = 0; 399*f9790aebSLuigi Rizzo } 400*f9790aebSLuigi Rizzo n++; 401*f9790aebSLuigi Rizzo } 402*f9790aebSLuigi Rizzo kring->nr_ntc = ntc; 403*f9790aebSLuigi Rizzo kring->nr_hwavail += n; 404*f9790aebSLuigi Rizzo ND("tx completed [%d] -> hwavail %d", n, kring->nr_hwavail); 405*f9790aebSLuigi Rizzo 406*f9790aebSLuigi Rizzo return n; 407*f9790aebSLuigi Rizzo } 408*f9790aebSLuigi Rizzo 409*f9790aebSLuigi Rizzo 410*f9790aebSLuigi Rizzo /* 411*f9790aebSLuigi Rizzo * We have pending packets in the driver between nr_ntc and j. 412*f9790aebSLuigi Rizzo * Compute a position in the middle, to be used to generate 413*f9790aebSLuigi Rizzo * a notification. 414*f9790aebSLuigi Rizzo */ 415*f9790aebSLuigi Rizzo static inline u_int 416*f9790aebSLuigi Rizzo generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur) 417*f9790aebSLuigi Rizzo { 418*f9790aebSLuigi Rizzo u_int n = kring->nkr_num_slots; 419*f9790aebSLuigi Rizzo u_int ntc = kring->nr_ntc; 420*f9790aebSLuigi Rizzo u_int e; 421*f9790aebSLuigi Rizzo 422*f9790aebSLuigi Rizzo if (hwcur >= ntc) { 423*f9790aebSLuigi Rizzo e = (hwcur + ntc) / 2; 424*f9790aebSLuigi Rizzo } else { /* wrap around */ 425*f9790aebSLuigi Rizzo e = (hwcur + n + ntc) / 2; 426*f9790aebSLuigi Rizzo if (e >= n) { 427*f9790aebSLuigi Rizzo e -= n; 428*f9790aebSLuigi Rizzo } 429*f9790aebSLuigi Rizzo } 430*f9790aebSLuigi Rizzo 431*f9790aebSLuigi Rizzo if (unlikely(e >= n)) { 432*f9790aebSLuigi Rizzo D("This cannot happen"); 433*f9790aebSLuigi Rizzo e = 0; 434*f9790aebSLuigi Rizzo } 435*f9790aebSLuigi Rizzo 436*f9790aebSLuigi Rizzo return e; 437*f9790aebSLuigi Rizzo } 438*f9790aebSLuigi Rizzo 439*f9790aebSLuigi Rizzo /* 440*f9790aebSLuigi Rizzo * We have pending packets in the driver between nr_ntc and hwcur. 441*f9790aebSLuigi Rizzo * Schedule a notification approximately in the middle of the two. 442*f9790aebSLuigi Rizzo * There is a race but this is only called within txsync which does 443*f9790aebSLuigi Rizzo * a double check. 444*f9790aebSLuigi Rizzo */ 445*f9790aebSLuigi Rizzo static void 446*f9790aebSLuigi Rizzo generic_set_tx_event(struct netmap_kring *kring, u_int hwcur) 447*f9790aebSLuigi Rizzo { 448*f9790aebSLuigi Rizzo struct mbuf *m; 449*f9790aebSLuigi Rizzo u_int e; 450*f9790aebSLuigi Rizzo 451*f9790aebSLuigi Rizzo if (kring->nr_ntc == hwcur) { 452*f9790aebSLuigi Rizzo return; 453*f9790aebSLuigi Rizzo } 454*f9790aebSLuigi Rizzo e = generic_tx_event_middle(kring, hwcur); 455*f9790aebSLuigi Rizzo 456*f9790aebSLuigi Rizzo m = kring->tx_pool[e]; 457*f9790aebSLuigi Rizzo if (m == NULL) { 458*f9790aebSLuigi Rizzo /* This can happen if there is already an event on the netmap 459*f9790aebSLuigi Rizzo slot 'e': There is nothing to do. */ 460*f9790aebSLuigi Rizzo return; 461*f9790aebSLuigi Rizzo } 462*f9790aebSLuigi Rizzo ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m)); 463*f9790aebSLuigi Rizzo kring->tx_pool[e] = NULL; 464*f9790aebSLuigi Rizzo SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor); 465*f9790aebSLuigi Rizzo 466*f9790aebSLuigi Rizzo // XXX wmb() ? 467*f9790aebSLuigi Rizzo /* Decrement the refcount an free it if we have the last one. */ 468*f9790aebSLuigi Rizzo m_freem(m); 469*f9790aebSLuigi Rizzo smp_mb(); 470*f9790aebSLuigi Rizzo } 471*f9790aebSLuigi Rizzo 472*f9790aebSLuigi Rizzo 473*f9790aebSLuigi Rizzo /* 474*f9790aebSLuigi Rizzo * generic_netmap_txsync() transforms netmap buffers into mbufs 475*f9790aebSLuigi Rizzo * and passes them to the standard device driver 476*f9790aebSLuigi Rizzo * (ndo_start_xmit() or ifp->if_transmit() ). 477*f9790aebSLuigi Rizzo * On linux this is not done directly, but using dev_queue_xmit(), 478*f9790aebSLuigi Rizzo * since it implements the TX flow control (and takes some locks). 479*f9790aebSLuigi Rizzo */ 480*f9790aebSLuigi Rizzo static int 481*f9790aebSLuigi Rizzo generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) 482*f9790aebSLuigi Rizzo { 483*f9790aebSLuigi Rizzo struct ifnet *ifp = na->ifp; 484*f9790aebSLuigi Rizzo struct netmap_kring *kring = &na->tx_rings[ring_nr]; 485*f9790aebSLuigi Rizzo struct netmap_ring *ring = kring->ring; 486*f9790aebSLuigi Rizzo u_int j, k, num_slots = kring->nkr_num_slots; 487*f9790aebSLuigi Rizzo int new_slots, ntx; 488*f9790aebSLuigi Rizzo 489*f9790aebSLuigi Rizzo IFRATE(rate_ctx.new.txsync++); 490*f9790aebSLuigi Rizzo 491*f9790aebSLuigi Rizzo // TODO: handle the case of mbuf allocation failure 492*f9790aebSLuigi Rizzo /* first, reclaim completed buffers */ 493*f9790aebSLuigi Rizzo generic_netmap_tx_clean(kring); 494*f9790aebSLuigi Rizzo 495*f9790aebSLuigi Rizzo /* Take a copy of ring->cur now, and never read it again. */ 496*f9790aebSLuigi Rizzo k = ring->cur; 497*f9790aebSLuigi Rizzo if (unlikely(k >= num_slots)) { 498*f9790aebSLuigi Rizzo return netmap_ring_reinit(kring); 499*f9790aebSLuigi Rizzo } 500*f9790aebSLuigi Rizzo 501*f9790aebSLuigi Rizzo rmb(); 502*f9790aebSLuigi Rizzo j = kring->nr_hwcur; 503*f9790aebSLuigi Rizzo /* 504*f9790aebSLuigi Rizzo * 'new_slots' counts how many new slots have been added: 505*f9790aebSLuigi Rizzo * everything from hwcur to cur, excluding reserved ones, if any. 506*f9790aebSLuigi Rizzo * nr_hwreserved start from hwcur and counts how many slots were 507*f9790aebSLuigi Rizzo * not sent to the NIC from the previous round. 508*f9790aebSLuigi Rizzo */ 509*f9790aebSLuigi Rizzo new_slots = k - j - kring->nr_hwreserved; 510*f9790aebSLuigi Rizzo if (new_slots < 0) { 511*f9790aebSLuigi Rizzo new_slots += num_slots; 512*f9790aebSLuigi Rizzo } 513*f9790aebSLuigi Rizzo ntx = 0; 514*f9790aebSLuigi Rizzo if (j != k) { 515*f9790aebSLuigi Rizzo /* Process new packets to send: 516*f9790aebSLuigi Rizzo * j is the current index in the netmap ring. 517*f9790aebSLuigi Rizzo */ 518*f9790aebSLuigi Rizzo while (j != k) { 519*f9790aebSLuigi Rizzo struct netmap_slot *slot = &ring->slot[j]; /* Current slot in the netmap ring */ 520*f9790aebSLuigi Rizzo void *addr = NMB(slot); 521*f9790aebSLuigi Rizzo u_int len = slot->len; 522*f9790aebSLuigi Rizzo struct mbuf *m; 523*f9790aebSLuigi Rizzo int tx_ret; 524*f9790aebSLuigi Rizzo 525*f9790aebSLuigi Rizzo if (unlikely(addr == netmap_buffer_base || len > NETMAP_BUF_SIZE)) { 526*f9790aebSLuigi Rizzo return netmap_ring_reinit(kring); 527*f9790aebSLuigi Rizzo } 528*f9790aebSLuigi Rizzo /* Tale a mbuf from the tx pool and copy in the user packet. */ 529*f9790aebSLuigi Rizzo m = kring->tx_pool[j]; 530*f9790aebSLuigi Rizzo if (unlikely(!m)) { 531*f9790aebSLuigi Rizzo RD(5, "This should never happen"); 532*f9790aebSLuigi Rizzo kring->tx_pool[j] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); 533*f9790aebSLuigi Rizzo if (unlikely(m == NULL)) { 534*f9790aebSLuigi Rizzo D("mbuf allocation failed"); 535*f9790aebSLuigi Rizzo break; 536*f9790aebSLuigi Rizzo } 537*f9790aebSLuigi Rizzo } 538*f9790aebSLuigi Rizzo /* XXX we should ask notifications when NS_REPORT is set, 539*f9790aebSLuigi Rizzo * or roughly every half frame. We can optimize this 540*f9790aebSLuigi Rizzo * by lazily requesting notifications only when a 541*f9790aebSLuigi Rizzo * transmission fails. Probably the best way is to 542*f9790aebSLuigi Rizzo * break on failures and set notifications when 543*f9790aebSLuigi Rizzo * ring->avail == 0 || j != k 544*f9790aebSLuigi Rizzo */ 545*f9790aebSLuigi Rizzo tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr); 546*f9790aebSLuigi Rizzo if (unlikely(tx_ret)) { 547*f9790aebSLuigi Rizzo RD(5, "start_xmit failed: err %d [%u,%u,%u,%u]", 548*f9790aebSLuigi Rizzo tx_ret, kring->nr_ntc, j, k, kring->nr_hwavail); 549*f9790aebSLuigi Rizzo /* 550*f9790aebSLuigi Rizzo * No room for this mbuf in the device driver. 551*f9790aebSLuigi Rizzo * Request a notification FOR A PREVIOUS MBUF, 552*f9790aebSLuigi Rizzo * then call generic_netmap_tx_clean(kring) to do the 553*f9790aebSLuigi Rizzo * double check and see if we can free more buffers. 554*f9790aebSLuigi Rizzo * If there is space continue, else break; 555*f9790aebSLuigi Rizzo * NOTE: the double check is necessary if the problem 556*f9790aebSLuigi Rizzo * occurs in the txsync call after selrecord(). 557*f9790aebSLuigi Rizzo * Also, we need some way to tell the caller that not 558*f9790aebSLuigi Rizzo * all buffers were queued onto the device (this was 559*f9790aebSLuigi Rizzo * not a problem with native netmap driver where space 560*f9790aebSLuigi Rizzo * is preallocated). The bridge has a similar problem 561*f9790aebSLuigi Rizzo * and we solve it there by dropping the excess packets. 562*f9790aebSLuigi Rizzo */ 563*f9790aebSLuigi Rizzo generic_set_tx_event(kring, j); 564*f9790aebSLuigi Rizzo if (generic_netmap_tx_clean(kring)) { /* space now available */ 565*f9790aebSLuigi Rizzo continue; 566*f9790aebSLuigi Rizzo } else { 567*f9790aebSLuigi Rizzo break; 568*f9790aebSLuigi Rizzo } 569*f9790aebSLuigi Rizzo } 570*f9790aebSLuigi Rizzo slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 571*f9790aebSLuigi Rizzo if (unlikely(++j == num_slots)) 572*f9790aebSLuigi Rizzo j = 0; 573*f9790aebSLuigi Rizzo ntx++; 574*f9790aebSLuigi Rizzo } 575*f9790aebSLuigi Rizzo 576*f9790aebSLuigi Rizzo /* Update hwcur to the next slot to transmit. */ 577*f9790aebSLuigi Rizzo kring->nr_hwcur = j; 578*f9790aebSLuigi Rizzo 579*f9790aebSLuigi Rizzo /* 580*f9790aebSLuigi Rizzo * Report all new slots as unavailable, even those not sent. 581*f9790aebSLuigi Rizzo * We account for them with with hwreserved, so that 582*f9790aebSLuigi Rizzo * nr_hwreserved =:= cur - nr_hwcur 583*f9790aebSLuigi Rizzo */ 584*f9790aebSLuigi Rizzo kring->nr_hwavail -= new_slots; 585*f9790aebSLuigi Rizzo kring->nr_hwreserved = k - j; 586*f9790aebSLuigi Rizzo if (kring->nr_hwreserved < 0) { 587*f9790aebSLuigi Rizzo kring->nr_hwreserved += num_slots; 588*f9790aebSLuigi Rizzo } 589*f9790aebSLuigi Rizzo 590*f9790aebSLuigi Rizzo IFRATE(rate_ctx.new.txpkt += ntx); 591*f9790aebSLuigi Rizzo 592*f9790aebSLuigi Rizzo if (!kring->nr_hwavail) { 593*f9790aebSLuigi Rizzo /* No more available slots? Set a notification event 594*f9790aebSLuigi Rizzo * on a netmap slot that will be cleaned in the future. 595*f9790aebSLuigi Rizzo * No doublecheck is performed, since txsync() will be 596*f9790aebSLuigi Rizzo * called twice by netmap_poll(). 597*f9790aebSLuigi Rizzo */ 598*f9790aebSLuigi Rizzo generic_set_tx_event(kring, j); 599*f9790aebSLuigi Rizzo } 600*f9790aebSLuigi Rizzo ND("tx #%d, hwavail = %d", n, kring->nr_hwavail); 601*f9790aebSLuigi Rizzo } 602*f9790aebSLuigi Rizzo 603*f9790aebSLuigi Rizzo /* Synchronize the user's view to the kernel view. */ 604*f9790aebSLuigi Rizzo ring->avail = kring->nr_hwavail; 605*f9790aebSLuigi Rizzo ring->reserved = kring->nr_hwreserved; 606*f9790aebSLuigi Rizzo 607*f9790aebSLuigi Rizzo return 0; 608*f9790aebSLuigi Rizzo } 609*f9790aebSLuigi Rizzo 610*f9790aebSLuigi Rizzo /* 611*f9790aebSLuigi Rizzo * This handler is registered (through netmap_catch_rx()) 612*f9790aebSLuigi Rizzo * within the attached network interface 613*f9790aebSLuigi Rizzo * in the RX subsystem, so that every mbuf passed up by 614*f9790aebSLuigi Rizzo * the driver can be stolen to the network stack. 615*f9790aebSLuigi Rizzo * Stolen packets are put in a queue where the 616*f9790aebSLuigi Rizzo * generic_netmap_rxsync() callback can extract them. 617*f9790aebSLuigi Rizzo */ 618*f9790aebSLuigi Rizzo void generic_rx_handler(struct ifnet *ifp, struct mbuf *m) 619*f9790aebSLuigi Rizzo { 620*f9790aebSLuigi Rizzo struct netmap_adapter *na = NA(ifp); 621*f9790aebSLuigi Rizzo struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 622*f9790aebSLuigi Rizzo u_int work_done; 623*f9790aebSLuigi Rizzo u_int rr = 0; // receive ring number 624*f9790aebSLuigi Rizzo 625*f9790aebSLuigi Rizzo ND("called"); 626*f9790aebSLuigi Rizzo /* limit the size of the queue */ 627*f9790aebSLuigi Rizzo if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) { 628*f9790aebSLuigi Rizzo m_freem(m); 629*f9790aebSLuigi Rizzo } else { 630*f9790aebSLuigi Rizzo mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m); 631*f9790aebSLuigi Rizzo } 632*f9790aebSLuigi Rizzo 633*f9790aebSLuigi Rizzo if (netmap_generic_mit < 32768) { 634*f9790aebSLuigi Rizzo /* no rx mitigation, pass notification up */ 635*f9790aebSLuigi Rizzo netmap_generic_irq(na->ifp, rr, &work_done); 636*f9790aebSLuigi Rizzo IFRATE(rate_ctx.new.rxirq++); 637*f9790aebSLuigi Rizzo } else { 638*f9790aebSLuigi Rizzo /* same as send combining, filter notification if there is a 639*f9790aebSLuigi Rizzo * pending timer, otherwise pass it up and start a timer. 640*f9790aebSLuigi Rizzo */ 641*f9790aebSLuigi Rizzo if (likely(netmap_mitigation_active(gna))) { 642*f9790aebSLuigi Rizzo /* Record that there is some pending work. */ 643*f9790aebSLuigi Rizzo gna->mit_pending = 1; 644*f9790aebSLuigi Rizzo } else { 645*f9790aebSLuigi Rizzo netmap_generic_irq(na->ifp, rr, &work_done); 646*f9790aebSLuigi Rizzo IFRATE(rate_ctx.new.rxirq++); 647*f9790aebSLuigi Rizzo netmap_mitigation_start(gna); 648*f9790aebSLuigi Rizzo } 649*f9790aebSLuigi Rizzo } 650*f9790aebSLuigi Rizzo } 651*f9790aebSLuigi Rizzo 652*f9790aebSLuigi Rizzo /* 653*f9790aebSLuigi Rizzo * generic_netmap_rxsync() extracts mbufs from the queue filled by 654*f9790aebSLuigi Rizzo * generic_netmap_rx_handler() and puts their content in the netmap 655*f9790aebSLuigi Rizzo * receive ring. 656*f9790aebSLuigi Rizzo * Access must be protected because the rx handler is asynchronous, 657*f9790aebSLuigi Rizzo */ 658*f9790aebSLuigi Rizzo static int 659*f9790aebSLuigi Rizzo generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 660*f9790aebSLuigi Rizzo { 661*f9790aebSLuigi Rizzo struct netmap_kring *kring = &na->rx_rings[ring_nr]; 662*f9790aebSLuigi Rizzo struct netmap_ring *ring = kring->ring; 663*f9790aebSLuigi Rizzo u_int j, n, lim = kring->nkr_num_slots - 1; 664*f9790aebSLuigi Rizzo int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 665*f9790aebSLuigi Rizzo u_int k, resvd = ring->reserved; 666*f9790aebSLuigi Rizzo 667*f9790aebSLuigi Rizzo if (ring->cur > lim) 668*f9790aebSLuigi Rizzo return netmap_ring_reinit(kring); 669*f9790aebSLuigi Rizzo 670*f9790aebSLuigi Rizzo /* Import newly received packets into the netmap ring. */ 671*f9790aebSLuigi Rizzo if (netmap_no_pendintr || force_update) { 672*f9790aebSLuigi Rizzo uint16_t slot_flags = kring->nkr_slot_flags; 673*f9790aebSLuigi Rizzo struct mbuf *m; 674*f9790aebSLuigi Rizzo 675*f9790aebSLuigi Rizzo n = 0; 676*f9790aebSLuigi Rizzo j = kring->nr_ntc; /* first empty slot in the receive ring */ 677*f9790aebSLuigi Rizzo /* extract buffers from the rx queue, stop at most one 678*f9790aebSLuigi Rizzo * slot before nr_hwcur (index k) 679*f9790aebSLuigi Rizzo */ 680*f9790aebSLuigi Rizzo k = (kring->nr_hwcur) ? kring->nr_hwcur-1 : lim; 681*f9790aebSLuigi Rizzo while (j != k) { 682*f9790aebSLuigi Rizzo int len; 683*f9790aebSLuigi Rizzo void *addr = NMB(&ring->slot[j]); 684*f9790aebSLuigi Rizzo 685*f9790aebSLuigi Rizzo if (addr == netmap_buffer_base) { /* Bad buffer */ 686*f9790aebSLuigi Rizzo return netmap_ring_reinit(kring); 687*f9790aebSLuigi Rizzo } 688*f9790aebSLuigi Rizzo /* 689*f9790aebSLuigi Rizzo * Call the locked version of the function. 690*f9790aebSLuigi Rizzo * XXX Ideally we could grab a batch of mbufs at once, 691*f9790aebSLuigi Rizzo * by changing rx_queue into a ring. 692*f9790aebSLuigi Rizzo */ 693*f9790aebSLuigi Rizzo m = mbq_safe_dequeue(&kring->rx_queue); 694*f9790aebSLuigi Rizzo if (!m) 695*f9790aebSLuigi Rizzo break; 696*f9790aebSLuigi Rizzo len = MBUF_LEN(m); 697*f9790aebSLuigi Rizzo m_copydata(m, 0, len, addr); 698*f9790aebSLuigi Rizzo ring->slot[j].len = len; 699*f9790aebSLuigi Rizzo ring->slot[j].flags = slot_flags; 700*f9790aebSLuigi Rizzo m_freem(m); 701*f9790aebSLuigi Rizzo if (unlikely(j++ == lim)) 702*f9790aebSLuigi Rizzo j = 0; 703*f9790aebSLuigi Rizzo n++; 704*f9790aebSLuigi Rizzo } 705*f9790aebSLuigi Rizzo if (n) { 706*f9790aebSLuigi Rizzo kring->nr_ntc = j; 707*f9790aebSLuigi Rizzo kring->nr_hwavail += n; 708*f9790aebSLuigi Rizzo IFRATE(rate_ctx.new.rxpkt += n); 709*f9790aebSLuigi Rizzo } 710*f9790aebSLuigi Rizzo kring->nr_kflags &= ~NKR_PENDINTR; 711*f9790aebSLuigi Rizzo } 712*f9790aebSLuigi Rizzo 713*f9790aebSLuigi Rizzo // XXX should we invert the order ? 714*f9790aebSLuigi Rizzo /* Skip past packets that userspace has released */ 715*f9790aebSLuigi Rizzo j = kring->nr_hwcur; 716*f9790aebSLuigi Rizzo k = ring->cur; 717*f9790aebSLuigi Rizzo if (resvd > 0) { 718*f9790aebSLuigi Rizzo if (resvd + ring->avail >= lim + 1) { 719*f9790aebSLuigi Rizzo D("XXX invalid reserve/avail %d %d", resvd, ring->avail); 720*f9790aebSLuigi Rizzo ring->reserved = resvd = 0; // XXX panic... 721*f9790aebSLuigi Rizzo } 722*f9790aebSLuigi Rizzo k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd; 723*f9790aebSLuigi Rizzo } 724*f9790aebSLuigi Rizzo if (j != k) { 725*f9790aebSLuigi Rizzo /* Userspace has released some packets. */ 726*f9790aebSLuigi Rizzo for (n = 0; j != k; n++) { 727*f9790aebSLuigi Rizzo struct netmap_slot *slot = &ring->slot[j]; 728*f9790aebSLuigi Rizzo 729*f9790aebSLuigi Rizzo slot->flags &= ~NS_BUF_CHANGED; 730*f9790aebSLuigi Rizzo if (unlikely(j++ == lim)) 731*f9790aebSLuigi Rizzo j = 0; 732*f9790aebSLuigi Rizzo } 733*f9790aebSLuigi Rizzo kring->nr_hwavail -= n; 734*f9790aebSLuigi Rizzo kring->nr_hwcur = k; 735*f9790aebSLuigi Rizzo } 736*f9790aebSLuigi Rizzo /* Tell userspace that there are new packets. */ 737*f9790aebSLuigi Rizzo ring->avail = kring->nr_hwavail - resvd; 738*f9790aebSLuigi Rizzo IFRATE(rate_ctx.new.rxsync++); 739*f9790aebSLuigi Rizzo 740*f9790aebSLuigi Rizzo return 0; 741*f9790aebSLuigi Rizzo } 742*f9790aebSLuigi Rizzo 743*f9790aebSLuigi Rizzo static void 744*f9790aebSLuigi Rizzo generic_netmap_dtor(struct netmap_adapter *na) 745*f9790aebSLuigi Rizzo { 746*f9790aebSLuigi Rizzo struct ifnet *ifp = na->ifp; 747*f9790aebSLuigi Rizzo struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na; 748*f9790aebSLuigi Rizzo struct netmap_adapter *prev_na = gna->prev; 749*f9790aebSLuigi Rizzo 750*f9790aebSLuigi Rizzo if (prev_na != NULL) { 751*f9790aebSLuigi Rizzo D("Released generic NA %p", gna); 752*f9790aebSLuigi Rizzo if_rele(na->ifp); 753*f9790aebSLuigi Rizzo netmap_adapter_put(prev_na); 754*f9790aebSLuigi Rizzo } 755*f9790aebSLuigi Rizzo if (ifp != NULL) { 756*f9790aebSLuigi Rizzo WNA(ifp) = prev_na; 757*f9790aebSLuigi Rizzo D("Restored native NA %p", prev_na); 758*f9790aebSLuigi Rizzo na->ifp = NULL; 759*f9790aebSLuigi Rizzo } 760*f9790aebSLuigi Rizzo } 761*f9790aebSLuigi Rizzo 762*f9790aebSLuigi Rizzo /* 763*f9790aebSLuigi Rizzo * generic_netmap_attach() makes it possible to use netmap on 764*f9790aebSLuigi Rizzo * a device without native netmap support. 765*f9790aebSLuigi Rizzo * This is less performant than native support but potentially 766*f9790aebSLuigi Rizzo * faster than raw sockets or similar schemes. 767*f9790aebSLuigi Rizzo * 768*f9790aebSLuigi Rizzo * In this "emulated" mode, netmap rings do not necessarily 769*f9790aebSLuigi Rizzo * have the same size as those in the NIC. We use a default 770*f9790aebSLuigi Rizzo * value and possibly override it if the OS has ways to fetch the 771*f9790aebSLuigi Rizzo * actual configuration. 772*f9790aebSLuigi Rizzo */ 773*f9790aebSLuigi Rizzo int 774*f9790aebSLuigi Rizzo generic_netmap_attach(struct ifnet *ifp) 775*f9790aebSLuigi Rizzo { 776*f9790aebSLuigi Rizzo struct netmap_adapter *na; 777*f9790aebSLuigi Rizzo struct netmap_generic_adapter *gna; 778*f9790aebSLuigi Rizzo int retval; 779*f9790aebSLuigi Rizzo u_int num_tx_desc, num_rx_desc; 780*f9790aebSLuigi Rizzo 781*f9790aebSLuigi Rizzo num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ 782*f9790aebSLuigi Rizzo 783*f9790aebSLuigi Rizzo generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); 784*f9790aebSLuigi Rizzo ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); 785*f9790aebSLuigi Rizzo 786*f9790aebSLuigi Rizzo gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO); 787*f9790aebSLuigi Rizzo if (gna == NULL) { 788*f9790aebSLuigi Rizzo D("no memory on attach, give up"); 789*f9790aebSLuigi Rizzo return ENOMEM; 790*f9790aebSLuigi Rizzo } 791*f9790aebSLuigi Rizzo na = (struct netmap_adapter *)gna; 792*f9790aebSLuigi Rizzo na->ifp = ifp; 793*f9790aebSLuigi Rizzo na->num_tx_desc = num_tx_desc; 794*f9790aebSLuigi Rizzo na->num_rx_desc = num_rx_desc; 795*f9790aebSLuigi Rizzo na->nm_register = &generic_netmap_register; 796*f9790aebSLuigi Rizzo na->nm_txsync = &generic_netmap_txsync; 797*f9790aebSLuigi Rizzo na->nm_rxsync = &generic_netmap_rxsync; 798*f9790aebSLuigi Rizzo na->nm_dtor = &generic_netmap_dtor; 799*f9790aebSLuigi Rizzo /* when using generic, IFCAP_NETMAP is set so we force 800*f9790aebSLuigi Rizzo * NAF_SKIP_INTR to use the regular interrupt handler 801*f9790aebSLuigi Rizzo */ 802*f9790aebSLuigi Rizzo na->na_flags = NAF_SKIP_INTR; 803*f9790aebSLuigi Rizzo 804*f9790aebSLuigi Rizzo ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", 805*f9790aebSLuigi Rizzo ifp->num_tx_queues, ifp->real_num_tx_queues, 806*f9790aebSLuigi Rizzo ifp->tx_queue_len); 807*f9790aebSLuigi Rizzo ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", 808*f9790aebSLuigi Rizzo ifp->num_rx_queues, ifp->real_num_rx_queues); 809*f9790aebSLuigi Rizzo 810*f9790aebSLuigi Rizzo generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings); 811*f9790aebSLuigi Rizzo 812*f9790aebSLuigi Rizzo retval = netmap_attach_common(na); 813*f9790aebSLuigi Rizzo if (retval) { 814*f9790aebSLuigi Rizzo free(gna, M_DEVBUF); 815*f9790aebSLuigi Rizzo } 816*f9790aebSLuigi Rizzo 817*f9790aebSLuigi Rizzo return retval; 818*f9790aebSLuigi Rizzo } 819