xref: /freebsd-14.2/sys/dev/netmap/netmap.c (revision 4f80b14c)
1718cf2ccSPedro F. Giffuni /*-
2718cf2ccSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3718cf2ccSPedro F. Giffuni  *
437e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2014 Matteo Landi
537e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Luigi Rizzo
637e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Giuseppe Lettieri
737e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Vincenzo Maffione
837e3a6d3SLuigi Rizzo  * All rights reserved.
968b8534bSLuigi Rizzo  *
1068b8534bSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
1168b8534bSLuigi Rizzo  * modification, are permitted provided that the following conditions
1268b8534bSLuigi Rizzo  * are met:
1368b8534bSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
1468b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
1568b8534bSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
1668b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
1768b8534bSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
1868b8534bSLuigi Rizzo  *
1968b8534bSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2068b8534bSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2168b8534bSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2268b8534bSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2368b8534bSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2468b8534bSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2568b8534bSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2668b8534bSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2768b8534bSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2868b8534bSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2968b8534bSLuigi Rizzo  * SUCH DAMAGE.
3068b8534bSLuigi Rizzo  */
3168b8534bSLuigi Rizzo 
32ce3ee1e7SLuigi Rizzo 
3368b8534bSLuigi Rizzo /*
34f9790aebSLuigi Rizzo  * $FreeBSD$
35f9790aebSLuigi Rizzo  *
3668b8534bSLuigi Rizzo  * This module supports memory mapped access to network devices,
3768b8534bSLuigi Rizzo  * see netmap(4).
3868b8534bSLuigi Rizzo  *
3968b8534bSLuigi Rizzo  * The module uses a large, memory pool allocated by the kernel
4068b8534bSLuigi Rizzo  * and accessible as mmapped memory by multiple userspace threads/processes.
4168b8534bSLuigi Rizzo  * The memory pool contains packet buffers and "netmap rings",
4268b8534bSLuigi Rizzo  * i.e. user-accessible copies of the interface's queues.
4368b8534bSLuigi Rizzo  *
4468b8534bSLuigi Rizzo  * Access to the network card works like this:
4568b8534bSLuigi Rizzo  * 1. a process/thread issues one or more open() on /dev/netmap, to create
4668b8534bSLuigi Rizzo  *    select()able file descriptor on which events are reported.
4768b8534bSLuigi Rizzo  * 2. on each descriptor, the process issues an ioctl() to identify
4868b8534bSLuigi Rizzo  *    the interface that should report events to the file descriptor.
4968b8534bSLuigi Rizzo  * 3. on each descriptor, the process issues an mmap() request to
5068b8534bSLuigi Rizzo  *    map the shared memory region within the process' address space.
5168b8534bSLuigi Rizzo  *    The list of interesting queues is indicated by a location in
5268b8534bSLuigi Rizzo  *    the shared memory region.
5368b8534bSLuigi Rizzo  * 4. using the functions in the netmap(4) userspace API, a process
5468b8534bSLuigi Rizzo  *    can look up the occupation state of a queue, access memory buffers,
5568b8534bSLuigi Rizzo  *    and retrieve received packets or enqueue packets to transmit.
5668b8534bSLuigi Rizzo  * 5. using some ioctl()s the process can synchronize the userspace view
5768b8534bSLuigi Rizzo  *    of the queue with the actual status in the kernel. This includes both
5868b8534bSLuigi Rizzo  *    receiving the notification of new packets, and transmitting new
5968b8534bSLuigi Rizzo  *    packets on the output interface.
6068b8534bSLuigi Rizzo  * 6. select() or poll() can be used to wait for events on individual
6168b8534bSLuigi Rizzo  *    transmit or receive queues (or all queues for a given interface).
62ce3ee1e7SLuigi Rizzo  *
63ce3ee1e7SLuigi Rizzo 
64ce3ee1e7SLuigi Rizzo 		SYNCHRONIZATION (USER)
65ce3ee1e7SLuigi Rizzo 
66ce3ee1e7SLuigi Rizzo The netmap rings and data structures may be shared among multiple
67ce3ee1e7SLuigi Rizzo user threads or even independent processes.
68ce3ee1e7SLuigi Rizzo Any synchronization among those threads/processes is delegated
69ce3ee1e7SLuigi Rizzo to the threads themselves. Only one thread at a time can be in
70ce3ee1e7SLuigi Rizzo a system call on the same netmap ring. The OS does not enforce
71ce3ee1e7SLuigi Rizzo this and only guarantees against system crashes in case of
72ce3ee1e7SLuigi Rizzo invalid usage.
73ce3ee1e7SLuigi Rizzo 
74ce3ee1e7SLuigi Rizzo 		LOCKING (INTERNAL)
75ce3ee1e7SLuigi Rizzo 
76ce3ee1e7SLuigi Rizzo Within the kernel, access to the netmap rings is protected as follows:
77ce3ee1e7SLuigi Rizzo 
78ce3ee1e7SLuigi Rizzo - a spinlock on each ring, to handle producer/consumer races on
79ce3ee1e7SLuigi Rizzo   RX rings attached to the host stack (against multiple host
80ce3ee1e7SLuigi Rizzo   threads writing from the host stack to the same ring),
81ce3ee1e7SLuigi Rizzo   and on 'destination' rings attached to a VALE switch
82ce3ee1e7SLuigi Rizzo   (i.e. RX rings in VALE ports, and TX rings in NIC/host ports)
83ce3ee1e7SLuigi Rizzo   protecting multiple active senders for the same destination)
84ce3ee1e7SLuigi Rizzo 
85ce3ee1e7SLuigi Rizzo - an atomic variable to guarantee that there is at most one
86ce3ee1e7SLuigi Rizzo   instance of *_*xsync() on the ring at any time.
87ce3ee1e7SLuigi Rizzo   For rings connected to user file
88ce3ee1e7SLuigi Rizzo   descriptors, an atomic_test_and_set() protects this, and the
89ce3ee1e7SLuigi Rizzo   lock on the ring is not actually used.
90ce3ee1e7SLuigi Rizzo   For NIC RX rings connected to a VALE switch, an atomic_test_and_set()
91ce3ee1e7SLuigi Rizzo   is also used to prevent multiple executions (the driver might indeed
92ce3ee1e7SLuigi Rizzo   already guarantee this).
93ce3ee1e7SLuigi Rizzo   For NIC TX rings connected to a VALE switch, the lock arbitrates
94ce3ee1e7SLuigi Rizzo   access to the queue (both when allocating buffers and when pushing
95ce3ee1e7SLuigi Rizzo   them out).
96ce3ee1e7SLuigi Rizzo 
97ce3ee1e7SLuigi Rizzo - *xsync() should be protected against initializations of the card.
98ce3ee1e7SLuigi Rizzo   On FreeBSD most devices have the reset routine protected by
99ce3ee1e7SLuigi Rizzo   a RING lock (ixgbe, igb, em) or core lock (re). lem is missing
100ce3ee1e7SLuigi Rizzo   the RING protection on rx_reset(), this should be added.
101ce3ee1e7SLuigi Rizzo 
102ce3ee1e7SLuigi Rizzo   On linux there is an external lock on the tx path, which probably
103ce3ee1e7SLuigi Rizzo   also arbitrates access to the reset routine. XXX to be revised
104ce3ee1e7SLuigi Rizzo 
105ce3ee1e7SLuigi Rizzo - a per-interface core_lock protecting access from the host stack
106ce3ee1e7SLuigi Rizzo   while interfaces may be detached from netmap mode.
107ce3ee1e7SLuigi Rizzo   XXX there should be no need for this lock if we detach the interfaces
108ce3ee1e7SLuigi Rizzo   only while they are down.
109ce3ee1e7SLuigi Rizzo 
110ce3ee1e7SLuigi Rizzo 
111ce3ee1e7SLuigi Rizzo --- VALE SWITCH ---
112ce3ee1e7SLuigi Rizzo 
113ce3ee1e7SLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
114ce3ee1e7SLuigi Rizzo A switch cannot be deleted until all ports are gone.
115ce3ee1e7SLuigi Rizzo 
116ce3ee1e7SLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
117ce3ee1e7SLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
118ce3ee1e7SLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
119ce3ee1e7SLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
120ce3ee1e7SLuigi Rizzo The lock is held throughout the entire forwarding cycle,
121ce3ee1e7SLuigi Rizzo during which the thread may incur in a page fault.
122ce3ee1e7SLuigi Rizzo Hence it is important that sleepable shared locks are used.
123ce3ee1e7SLuigi Rizzo 
124ce3ee1e7SLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
125ce3ee1e7SLuigi Rizzo a number of slot in the ring, then the lock is released,
126ce3ee1e7SLuigi Rizzo packets are copied from source to destination, and then
127ce3ee1e7SLuigi Rizzo the lock is acquired again and the receive ring is updated.
128ce3ee1e7SLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
129ce3ee1e7SLuigi Rizzo ports attached to the switch)
130ce3ee1e7SLuigi Rizzo 
13168b8534bSLuigi Rizzo  */
13268b8534bSLuigi Rizzo 
1334bf50f18SLuigi Rizzo 
1344bf50f18SLuigi Rizzo /* --- internals ----
1354bf50f18SLuigi Rizzo  *
1364bf50f18SLuigi Rizzo  * Roadmap to the code that implements the above.
1374bf50f18SLuigi Rizzo  *
1384bf50f18SLuigi Rizzo  * > 1. a process/thread issues one or more open() on /dev/netmap, to create
1394bf50f18SLuigi Rizzo  * >    select()able file descriptor on which events are reported.
1404bf50f18SLuigi Rizzo  *
1414bf50f18SLuigi Rizzo  *  	Internally, we allocate a netmap_priv_d structure, that will be
14237e3a6d3SLuigi Rizzo  *  	initialized on ioctl(NIOCREGIF). There is one netmap_priv_d
14337e3a6d3SLuigi Rizzo  *  	structure for each open().
1444bf50f18SLuigi Rizzo  *
1454bf50f18SLuigi Rizzo  *      os-specific:
14637e3a6d3SLuigi Rizzo  *  	    FreeBSD: see netmap_open() (netmap_freebsd.c)
14737e3a6d3SLuigi Rizzo  *  	    linux:   see linux_netmap_open() (netmap_linux.c)
1484bf50f18SLuigi Rizzo  *
1494bf50f18SLuigi Rizzo  * > 2. on each descriptor, the process issues an ioctl() to identify
1504bf50f18SLuigi Rizzo  * >    the interface that should report events to the file descriptor.
1514bf50f18SLuigi Rizzo  *
1524bf50f18SLuigi Rizzo  * 	Implemented by netmap_ioctl(), NIOCREGIF case, with nmr->nr_cmd==0.
1534bf50f18SLuigi Rizzo  * 	Most important things happen in netmap_get_na() and
1544bf50f18SLuigi Rizzo  * 	netmap_do_regif(), called from there. Additional details can be
1554bf50f18SLuigi Rizzo  * 	found in the comments above those functions.
1564bf50f18SLuigi Rizzo  *
1574bf50f18SLuigi Rizzo  * 	In all cases, this action creates/takes-a-reference-to a
1584bf50f18SLuigi Rizzo  * 	netmap_*_adapter describing the port, and allocates a netmap_if
1594bf50f18SLuigi Rizzo  * 	and all necessary netmap rings, filling them with netmap buffers.
1604bf50f18SLuigi Rizzo  *
1614bf50f18SLuigi Rizzo  *      In this phase, the sync callbacks for each ring are set (these are used
1624bf50f18SLuigi Rizzo  *      in steps 5 and 6 below).  The callbacks depend on the type of adapter.
1634bf50f18SLuigi Rizzo  *      The adapter creation/initialization code puts them in the
1644bf50f18SLuigi Rizzo  * 	netmap_adapter (fields na->nm_txsync and na->nm_rxsync).  Then, they
1654bf50f18SLuigi Rizzo  * 	are copied from there to the netmap_kring's during netmap_do_regif(), by
1664bf50f18SLuigi Rizzo  * 	the nm_krings_create() callback.  All the nm_krings_create callbacks
1674bf50f18SLuigi Rizzo  * 	actually call netmap_krings_create() to perform this and the other
1684bf50f18SLuigi Rizzo  * 	common stuff. netmap_krings_create() also takes care of the host rings,
1694bf50f18SLuigi Rizzo  * 	if needed, by setting their sync callbacks appropriately.
1704bf50f18SLuigi Rizzo  *
1714bf50f18SLuigi Rizzo  * 	Additional actions depend on the kind of netmap_adapter that has been
1724bf50f18SLuigi Rizzo  * 	registered:
1734bf50f18SLuigi Rizzo  *
1744bf50f18SLuigi Rizzo  * 	- netmap_hw_adapter:  	     [netmap.c]
1754bf50f18SLuigi Rizzo  * 	     This is a system netdev/ifp with native netmap support.
1764bf50f18SLuigi Rizzo  * 	     The ifp is detached from the host stack by redirecting:
1774bf50f18SLuigi Rizzo  * 	       - transmissions (from the network stack) to netmap_transmit()
1784bf50f18SLuigi Rizzo  * 	       - receive notifications to the nm_notify() callback for
1794bf50f18SLuigi Rizzo  * 	         this adapter. The callback is normally netmap_notify(), unless
1804bf50f18SLuigi Rizzo  * 	         the ifp is attached to a bridge using bwrap, in which case it
1814bf50f18SLuigi Rizzo  * 	         is netmap_bwrap_intr_notify().
1824bf50f18SLuigi Rizzo  *
1834bf50f18SLuigi Rizzo  * 	- netmap_generic_adapter:      [netmap_generic.c]
1844bf50f18SLuigi Rizzo  * 	      A system netdev/ifp without native netmap support.
1854bf50f18SLuigi Rizzo  *
1864bf50f18SLuigi Rizzo  * 	(the decision about native/non native support is taken in
1874bf50f18SLuigi Rizzo  * 	 netmap_get_hw_na(), called by netmap_get_na())
1884bf50f18SLuigi Rizzo  *
1894bf50f18SLuigi Rizzo  * 	- netmap_vp_adapter 		[netmap_vale.c]
1904bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_bdg_na().
1914bf50f18SLuigi Rizzo  * 	      This is a persistent or ephemeral VALE port. Ephemeral ports
1924bf50f18SLuigi Rizzo  * 	      are created on the fly if they don't already exist, and are
1934bf50f18SLuigi Rizzo  * 	      always attached to a bridge.
194453130d9SPedro F. Giffuni  * 	      Persistent VALE ports must must be created separately, and i
1954bf50f18SLuigi Rizzo  * 	      then attached like normal NICs. The NIOCREGIF we are examining
1964bf50f18SLuigi Rizzo  * 	      will find them only if they had previosly been created and
1974bf50f18SLuigi Rizzo  * 	      attached (see VALE_CTL below).
1984bf50f18SLuigi Rizzo  *
1994bf50f18SLuigi Rizzo  * 	- netmap_pipe_adapter 	      [netmap_pipe.c]
2004bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_pipe_na().
2014bf50f18SLuigi Rizzo  * 	      Both pipe ends are created, if they didn't already exist.
2024bf50f18SLuigi Rizzo  *
2034bf50f18SLuigi Rizzo  * 	- netmap_monitor_adapter      [netmap_monitor.c]
2044bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_monitor_na().
2054bf50f18SLuigi Rizzo  * 	      If successful, the nm_sync callbacks of the monitored adapter
2064bf50f18SLuigi Rizzo  * 	      will be intercepted by the returned monitor.
2074bf50f18SLuigi Rizzo  *
2084bf50f18SLuigi Rizzo  * 	- netmap_bwrap_adapter	      [netmap_vale.c]
2094bf50f18SLuigi Rizzo  * 	      Cannot be obtained in this way, see VALE_CTL below
2104bf50f18SLuigi Rizzo  *
2114bf50f18SLuigi Rizzo  *
2124bf50f18SLuigi Rizzo  * 	os-specific:
2134bf50f18SLuigi Rizzo  * 	    linux: we first go through linux_netmap_ioctl() to
2144bf50f18SLuigi Rizzo  * 	           adapt the FreeBSD interface to the linux one.
2154bf50f18SLuigi Rizzo  *
2164bf50f18SLuigi Rizzo  *
2174bf50f18SLuigi Rizzo  * > 3. on each descriptor, the process issues an mmap() request to
2184bf50f18SLuigi Rizzo  * >    map the shared memory region within the process' address space.
2194bf50f18SLuigi Rizzo  * >    The list of interesting queues is indicated by a location in
2204bf50f18SLuigi Rizzo  * >    the shared memory region.
2214bf50f18SLuigi Rizzo  *
2224bf50f18SLuigi Rizzo  *      os-specific:
2234bf50f18SLuigi Rizzo  *  	    FreeBSD: netmap_mmap_single (netmap_freebsd.c).
2244bf50f18SLuigi Rizzo  *  	    linux:   linux_netmap_mmap (netmap_linux.c).
2254bf50f18SLuigi Rizzo  *
2264bf50f18SLuigi Rizzo  * > 4. using the functions in the netmap(4) userspace API, a process
2274bf50f18SLuigi Rizzo  * >    can look up the occupation state of a queue, access memory buffers,
2284bf50f18SLuigi Rizzo  * >    and retrieve received packets or enqueue packets to transmit.
2294bf50f18SLuigi Rizzo  *
2304bf50f18SLuigi Rizzo  * 	these actions do not involve the kernel.
2314bf50f18SLuigi Rizzo  *
2324bf50f18SLuigi Rizzo  * > 5. using some ioctl()s the process can synchronize the userspace view
2334bf50f18SLuigi Rizzo  * >    of the queue with the actual status in the kernel. This includes both
2344bf50f18SLuigi Rizzo  * >    receiving the notification of new packets, and transmitting new
2354bf50f18SLuigi Rizzo  * >    packets on the output interface.
2364bf50f18SLuigi Rizzo  *
2374bf50f18SLuigi Rizzo  * 	These are implemented in netmap_ioctl(), NIOCTXSYNC and NIOCRXSYNC
2384bf50f18SLuigi Rizzo  * 	cases. They invoke the nm_sync callbacks on the netmap_kring
2394bf50f18SLuigi Rizzo  * 	structures, as initialized in step 2 and maybe later modified
2404bf50f18SLuigi Rizzo  * 	by a monitor. Monitors, however, will always call the original
2414bf50f18SLuigi Rizzo  * 	callback before doing anything else.
2424bf50f18SLuigi Rizzo  *
2434bf50f18SLuigi Rizzo  *
2444bf50f18SLuigi Rizzo  * > 6. select() or poll() can be used to wait for events on individual
2454bf50f18SLuigi Rizzo  * >    transmit or receive queues (or all queues for a given interface).
2464bf50f18SLuigi Rizzo  *
2474bf50f18SLuigi Rizzo  * 	Implemented in netmap_poll(). This will call the same nm_sync()
2484bf50f18SLuigi Rizzo  * 	callbacks as in step 5 above.
2494bf50f18SLuigi Rizzo  *
2504bf50f18SLuigi Rizzo  * 	os-specific:
2514bf50f18SLuigi Rizzo  * 		linux: we first go through linux_netmap_poll() to adapt
2524bf50f18SLuigi Rizzo  * 		       the FreeBSD interface to the linux one.
2534bf50f18SLuigi Rizzo  *
2544bf50f18SLuigi Rizzo  *
2554bf50f18SLuigi Rizzo  *  ----  VALE_CTL -----
2564bf50f18SLuigi Rizzo  *
2574bf50f18SLuigi Rizzo  *  VALE switches are controlled by issuing a NIOCREGIF with a non-null
2584bf50f18SLuigi Rizzo  *  nr_cmd in the nmreq structure. These subcommands are handled by
2594bf50f18SLuigi Rizzo  *  netmap_bdg_ctl() in netmap_vale.c. Persistent VALE ports are created
2604bf50f18SLuigi Rizzo  *  and destroyed by issuing the NETMAP_BDG_NEWIF and NETMAP_BDG_DELIF
2614bf50f18SLuigi Rizzo  *  subcommands, respectively.
2624bf50f18SLuigi Rizzo  *
2634bf50f18SLuigi Rizzo  *  Any network interface known to the system (including a persistent VALE
2644bf50f18SLuigi Rizzo  *  port) can be attached to a VALE switch by issuing the
2654bf50f18SLuigi Rizzo  *  NETMAP_BDG_ATTACH subcommand. After the attachment, persistent VALE ports
2664bf50f18SLuigi Rizzo  *  look exactly like ephemeral VALE ports (as created in step 2 above).  The
2674bf50f18SLuigi Rizzo  *  attachment of other interfaces, instead, requires the creation of a
2684bf50f18SLuigi Rizzo  *  netmap_bwrap_adapter.  Moreover, the attached interface must be put in
2694bf50f18SLuigi Rizzo  *  netmap mode. This may require the creation of a netmap_generic_adapter if
2704bf50f18SLuigi Rizzo  *  we have no native support for the interface, or if generic adapters have
2714bf50f18SLuigi Rizzo  *  been forced by sysctl.
2724bf50f18SLuigi Rizzo  *
2734bf50f18SLuigi Rizzo  *  Both persistent VALE ports and bwraps are handled by netmap_get_bdg_na(),
2744bf50f18SLuigi Rizzo  *  called by nm_bdg_ctl_attach(), and discriminated by the nm_bdg_attach()
2754bf50f18SLuigi Rizzo  *  callback.  In the case of the bwrap, the callback creates the
2764bf50f18SLuigi Rizzo  *  netmap_bwrap_adapter.  The initialization of the bwrap is then
2774bf50f18SLuigi Rizzo  *  completed by calling netmap_do_regif() on it, in the nm_bdg_ctl()
2784bf50f18SLuigi Rizzo  *  callback (netmap_bwrap_bdg_ctl in netmap_vale.c).
2794bf50f18SLuigi Rizzo  *  A generic adapter for the wrapped ifp will be created if needed, when
2804bf50f18SLuigi Rizzo  *  netmap_get_bdg_na() calls netmap_get_hw_na().
2814bf50f18SLuigi Rizzo  *
2824bf50f18SLuigi Rizzo  *
2834bf50f18SLuigi Rizzo  *  ---- DATAPATHS -----
2844bf50f18SLuigi Rizzo  *
2854bf50f18SLuigi Rizzo  *              -= SYSTEM DEVICE WITH NATIVE SUPPORT =-
2864bf50f18SLuigi Rizzo  *
2874bf50f18SLuigi Rizzo  *    na == NA(ifp) == netmap_hw_adapter created in DEVICE_netmap_attach()
2884bf50f18SLuigi Rizzo  *
2894bf50f18SLuigi Rizzo  *    - tx from netmap userspace:
2904bf50f18SLuigi Rizzo  *	 concurrently:
2914bf50f18SLuigi Rizzo  *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
2924bf50f18SLuigi Rizzo  *                kring->nm_sync() == DEVICE_netmap_txsync()
2934bf50f18SLuigi Rizzo  *           2) device interrupt handler
2944bf50f18SLuigi Rizzo  *                na->nm_notify()  == netmap_notify()
2954bf50f18SLuigi Rizzo  *    - rx from netmap userspace:
2964bf50f18SLuigi Rizzo  *       concurrently:
2974bf50f18SLuigi Rizzo  *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
2984bf50f18SLuigi Rizzo  *                kring->nm_sync() == DEVICE_netmap_rxsync()
2994bf50f18SLuigi Rizzo  *           2) device interrupt handler
3004bf50f18SLuigi Rizzo  *                na->nm_notify()  == netmap_notify()
301847bf383SLuigi Rizzo  *    - rx from host stack
3024bf50f18SLuigi Rizzo  *       concurrently:
3034bf50f18SLuigi Rizzo  *           1) host stack
3044bf50f18SLuigi Rizzo  *                netmap_transmit()
3054bf50f18SLuigi Rizzo  *                  na->nm_notify  == netmap_notify()
3064bf50f18SLuigi Rizzo  *           2) ioctl(NIOCRXSYNC)/netmap_poll() in process context
30737e3a6d3SLuigi Rizzo  *                kring->nm_sync() == netmap_rxsync_from_host
3084bf50f18SLuigi Rizzo  *                  netmap_rxsync_from_host(na, NULL, NULL)
3094bf50f18SLuigi Rizzo  *    - tx to host stack
3104bf50f18SLuigi Rizzo  *           ioctl(NIOCTXSYNC)/netmap_poll() in process context
31137e3a6d3SLuigi Rizzo  *             kring->nm_sync() == netmap_txsync_to_host
3124bf50f18SLuigi Rizzo  *               netmap_txsync_to_host(na)
31337e3a6d3SLuigi Rizzo  *                 nm_os_send_up()
31437e3a6d3SLuigi Rizzo  *                   FreeBSD: na->if_input() == ether_input()
3154bf50f18SLuigi Rizzo  *                   linux: netif_rx() with NM_MAGIC_PRIORITY_RX
3164bf50f18SLuigi Rizzo  *
3174bf50f18SLuigi Rizzo  *
3184bf50f18SLuigi Rizzo  *               -= SYSTEM DEVICE WITH GENERIC SUPPORT =-
3194bf50f18SLuigi Rizzo  *
320847bf383SLuigi Rizzo  *    na == NA(ifp) == generic_netmap_adapter created in generic_netmap_attach()
321847bf383SLuigi Rizzo  *
322847bf383SLuigi Rizzo  *    - tx from netmap userspace:
323847bf383SLuigi Rizzo  *       concurrently:
324847bf383SLuigi Rizzo  *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
325847bf383SLuigi Rizzo  *               kring->nm_sync() == generic_netmap_txsync()
32637e3a6d3SLuigi Rizzo  *                   nm_os_generic_xmit_frame()
327847bf383SLuigi Rizzo  *                       linux:   dev_queue_xmit() with NM_MAGIC_PRIORITY_TX
32837e3a6d3SLuigi Rizzo  *                           ifp->ndo_start_xmit == generic_ndo_start_xmit()
32937e3a6d3SLuigi Rizzo  *                               gna->save_start_xmit == orig. dev. start_xmit
330847bf383SLuigi Rizzo  *                       FreeBSD: na->if_transmit() == orig. dev if_transmit
331847bf383SLuigi Rizzo  *           2) generic_mbuf_destructor()
332847bf383SLuigi Rizzo  *                   na->nm_notify() == netmap_notify()
333847bf383SLuigi Rizzo  *    - rx from netmap userspace:
334847bf383SLuigi Rizzo  *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
335847bf383SLuigi Rizzo  *               kring->nm_sync() == generic_netmap_rxsync()
336847bf383SLuigi Rizzo  *                   mbq_safe_dequeue()
337847bf383SLuigi Rizzo  *           2) device driver
338847bf383SLuigi Rizzo  *               generic_rx_handler()
339847bf383SLuigi Rizzo  *                   mbq_safe_enqueue()
340847bf383SLuigi Rizzo  *                   na->nm_notify() == netmap_notify()
34137e3a6d3SLuigi Rizzo  *    - rx from host stack
34237e3a6d3SLuigi Rizzo  *        FreeBSD: same as native
34337e3a6d3SLuigi Rizzo  *        Linux: same as native except:
344847bf383SLuigi Rizzo  *           1) host stack
34537e3a6d3SLuigi Rizzo  *               dev_queue_xmit() without NM_MAGIC_PRIORITY_TX
34637e3a6d3SLuigi Rizzo  *                   ifp->ndo_start_xmit == generic_ndo_start_xmit()
347847bf383SLuigi Rizzo  *                       netmap_transmit()
348847bf383SLuigi Rizzo  *                           na->nm_notify() == netmap_notify()
34937e3a6d3SLuigi Rizzo  *    - tx to host stack (same as native):
3504bf50f18SLuigi Rizzo  *
3514bf50f18SLuigi Rizzo  *
352847bf383SLuigi Rizzo  *                           -= VALE =-
3534bf50f18SLuigi Rizzo  *
354847bf383SLuigi Rizzo  *   INCOMING:
3554bf50f18SLuigi Rizzo  *
356847bf383SLuigi Rizzo  *      - VALE ports:
357847bf383SLuigi Rizzo  *          ioctl(NIOCTXSYNC)/netmap_poll() in process context
358847bf383SLuigi Rizzo  *              kring->nm_sync() == netmap_vp_txsync()
3594bf50f18SLuigi Rizzo  *
360847bf383SLuigi Rizzo  *      - system device with native support:
361847bf383SLuigi Rizzo  *         from cable:
362847bf383SLuigi Rizzo  *             interrupt
363847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
364847bf383SLuigi Rizzo  *                     kring->nm_sync() == DEVICE_netmap_rxsync()
365847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
366847bf383SLuigi Rizzo  *                     kring->nm_sync() == DEVICE_netmap_rxsync()
367847bf383SLuigi Rizzo  *         from host stack:
368847bf383SLuigi Rizzo  *             netmap_transmit()
369847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
37037e3a6d3SLuigi Rizzo  *                     kring->nm_sync() == netmap_rxsync_from_host()
371847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
3724bf50f18SLuigi Rizzo  *
373847bf383SLuigi Rizzo  *      - system device with generic support:
374847bf383SLuigi Rizzo  *         from device driver:
375847bf383SLuigi Rizzo  *            generic_rx_handler()
376847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
377847bf383SLuigi Rizzo  *                     kring->nm_sync() == generic_netmap_rxsync()
378847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
379847bf383SLuigi Rizzo  *                     kring->nm_sync() == generic_netmap_rxsync()
380847bf383SLuigi Rizzo  *         from host stack:
381847bf383SLuigi Rizzo  *            netmap_transmit()
382847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
38337e3a6d3SLuigi Rizzo  *                     kring->nm_sync() == netmap_rxsync_from_host()
384847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
3854bf50f18SLuigi Rizzo  *
386847bf383SLuigi Rizzo  *   (all cases) --> nm_bdg_flush()
387847bf383SLuigi Rizzo  *                      dest_na->nm_notify() == (see below)
3884bf50f18SLuigi Rizzo  *
389847bf383SLuigi Rizzo  *   OUTGOING:
3904bf50f18SLuigi Rizzo  *
391847bf383SLuigi Rizzo  *      - VALE ports:
392847bf383SLuigi Rizzo  *         concurrently:
393c3e9b4dbSLuiz Otavio O Souza  *             1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
394847bf383SLuigi Rizzo  *                    kring->nm_sync() == netmap_vp_rxsync()
395847bf383SLuigi Rizzo  *             2) from nm_bdg_flush()
396847bf383SLuigi Rizzo  *                    na->nm_notify() == netmap_notify()
3974bf50f18SLuigi Rizzo  *
398847bf383SLuigi Rizzo  *      - system device with native support:
399847bf383SLuigi Rizzo  *          to cable:
400847bf383SLuigi Rizzo  *             na->nm_notify() == netmap_bwrap_notify()
401847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
402847bf383SLuigi Rizzo  *                 kring->nm_sync() == DEVICE_netmap_txsync()
403847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
404847bf383SLuigi Rizzo  *          to host stack:
405847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
40637e3a6d3SLuigi Rizzo  *                 kring->nm_sync() == netmap_txsync_to_host
407847bf383SLuigi Rizzo  *                 netmap_vp_rxsync_locked()
4084bf50f18SLuigi Rizzo  *
409847bf383SLuigi Rizzo  *      - system device with generic adapter:
410847bf383SLuigi Rizzo  *          to device driver:
411847bf383SLuigi Rizzo  *             na->nm_notify() == netmap_bwrap_notify()
412847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
413847bf383SLuigi Rizzo  *                 kring->nm_sync() == generic_netmap_txsync()
414847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
415847bf383SLuigi Rizzo  *          to host stack:
416847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
41737e3a6d3SLuigi Rizzo  *                 kring->nm_sync() == netmap_txsync_to_host
418847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
4194bf50f18SLuigi Rizzo  *
4204bf50f18SLuigi Rizzo  */
4214bf50f18SLuigi Rizzo 
422ce3ee1e7SLuigi Rizzo /*
423ce3ee1e7SLuigi Rizzo  * OS-specific code that is used only within this file.
424ce3ee1e7SLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
425ce3ee1e7SLuigi Rizzo  * is present in netmap_kern.h
426ce3ee1e7SLuigi Rizzo  */
42701c7d25fSLuigi Rizzo 
428ce3ee1e7SLuigi Rizzo #if defined(__FreeBSD__)
42968b8534bSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
43068b8534bSLuigi Rizzo #include <sys/types.h>
43168b8534bSLuigi Rizzo #include <sys/errno.h>
43268b8534bSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
43368b8534bSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
434f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
43589e3fd52SLuigi Rizzo #include <sys/filio.h>	/* FIONBIO */
43668b8534bSLuigi Rizzo #include <sys/sockio.h>
43768b8534bSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
43868b8534bSLuigi Rizzo #include <sys/malloc.h>
43968b8534bSLuigi Rizzo #include <sys/poll.h>
44089f6b863SAttilio Rao #include <sys/rwlock.h>
44168b8534bSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
44268b8534bSLuigi Rizzo #include <sys/selinfo.h>
44368b8534bSLuigi Rizzo #include <sys/sysctl.h>
444339f59c0SGleb Smirnoff #include <sys/jail.h>
445339f59c0SGleb Smirnoff #include <net/vnet.h>
44668b8534bSLuigi Rizzo #include <net/if.h>
44776039bc8SGleb Smirnoff #include <net/if_var.h>
44868b8534bSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
44968b8534bSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
450ce3ee1e7SLuigi Rizzo #include <sys/endian.h>
451ce3ee1e7SLuigi Rizzo #include <sys/refcount.h>
45268b8534bSLuigi Rizzo 
45368b8534bSLuigi Rizzo 
454ce3ee1e7SLuigi Rizzo #elif defined(linux)
455ce3ee1e7SLuigi Rizzo 
456ce3ee1e7SLuigi Rizzo #include "bsd_glue.h"
457ce3ee1e7SLuigi Rizzo 
458ce3ee1e7SLuigi Rizzo #elif defined(__APPLE__)
459ce3ee1e7SLuigi Rizzo 
460ce3ee1e7SLuigi Rizzo #warning OSX support is only partial
461ce3ee1e7SLuigi Rizzo #include "osx_glue.h"
462ce3ee1e7SLuigi Rizzo 
46337e3a6d3SLuigi Rizzo #elif defined (_WIN32)
46437e3a6d3SLuigi Rizzo 
46537e3a6d3SLuigi Rizzo #include "win_glue.h"
46637e3a6d3SLuigi Rizzo 
467ce3ee1e7SLuigi Rizzo #else
468ce3ee1e7SLuigi Rizzo 
469ce3ee1e7SLuigi Rizzo #error	Unsupported platform
470ce3ee1e7SLuigi Rizzo 
471ce3ee1e7SLuigi Rizzo #endif /* unsupported */
472ce3ee1e7SLuigi Rizzo 
473ce3ee1e7SLuigi Rizzo /*
474ce3ee1e7SLuigi Rizzo  * common headers
475ce3ee1e7SLuigi Rizzo  */
4760b8ed8e0SLuigi Rizzo #include <net/netmap.h>
4770b8ed8e0SLuigi Rizzo #include <dev/netmap/netmap_kern.h>
478ce3ee1e7SLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
4790b8ed8e0SLuigi Rizzo 
480ce3ee1e7SLuigi Rizzo 
4815819da83SLuigi Rizzo /* user-controlled variables */
4825819da83SLuigi Rizzo int netmap_verbose;
4835819da83SLuigi Rizzo 
4845819da83SLuigi Rizzo static int netmap_no_timestamp; /* don't timestamp on rxsync */
485c85cb1a0SLuigi Rizzo int netmap_no_pendintr = 1;
486f18be576SLuigi Rizzo int netmap_txsync_retry = 2;
487c3e9b4dbSLuiz Otavio O Souza static int netmap_fwd = 0;	/* force transparent forwarding */
488f196ce38SLuigi Rizzo 
489f9790aebSLuigi Rizzo /*
490f9790aebSLuigi Rizzo  * netmap_admode selects the netmap mode to use.
491f9790aebSLuigi Rizzo  * Invalid values are reset to NETMAP_ADMODE_BEST
492f9790aebSLuigi Rizzo  */
493f9790aebSLuigi Rizzo enum {	NETMAP_ADMODE_BEST = 0,	/* use native, fallback to generic */
494f9790aebSLuigi Rizzo 	NETMAP_ADMODE_NATIVE,	/* either native or none */
495f9790aebSLuigi Rizzo 	NETMAP_ADMODE_GENERIC,	/* force generic */
496f9790aebSLuigi Rizzo 	NETMAP_ADMODE_LAST };
497f9790aebSLuigi Rizzo static int netmap_admode = NETMAP_ADMODE_BEST;
498f9790aebSLuigi Rizzo 
49937e3a6d3SLuigi Rizzo /* netmap_generic_mit controls mitigation of RX notifications for
50037e3a6d3SLuigi Rizzo  * the generic netmap adapter. The value is a time interval in
50137e3a6d3SLuigi Rizzo  * nanoseconds. */
50237e3a6d3SLuigi Rizzo int netmap_generic_mit = 100*1000;
50337e3a6d3SLuigi Rizzo 
50437e3a6d3SLuigi Rizzo /* We use by default netmap-aware qdiscs with generic netmap adapters,
50537e3a6d3SLuigi Rizzo  * even if there can be a little performance hit with hardware NICs.
50637e3a6d3SLuigi Rizzo  * However, using the qdisc is the safer approach, for two reasons:
50737e3a6d3SLuigi Rizzo  * 1) it prevents non-fifo qdiscs to break the TX notification
50837e3a6d3SLuigi Rizzo  *    scheme, which is based on mbuf destructors when txqdisc is
50937e3a6d3SLuigi Rizzo  *    not used.
51037e3a6d3SLuigi Rizzo  * 2) it makes it possible to transmit over software devices that
51137e3a6d3SLuigi Rizzo  *    change skb->dev, like bridge, veth, ...
51237e3a6d3SLuigi Rizzo  *
51337e3a6d3SLuigi Rizzo  * Anyway users looking for the best performance should
51437e3a6d3SLuigi Rizzo  * use native adapters.
51537e3a6d3SLuigi Rizzo  */
516*4f80b14cSVincenzo Maffione #ifdef linux
51737e3a6d3SLuigi Rizzo int netmap_generic_txqdisc = 1;
518*4f80b14cSVincenzo Maffione #endif
51937e3a6d3SLuigi Rizzo 
52037e3a6d3SLuigi Rizzo /* Default number of slots and queues for generic adapters. */
52137e3a6d3SLuigi Rizzo int netmap_generic_ringsize = 1024;
52237e3a6d3SLuigi Rizzo int netmap_generic_rings = 1;
52337e3a6d3SLuigi Rizzo 
52437e3a6d3SLuigi Rizzo /* Non-zero if ptnet devices are allowed to use virtio-net headers. */
52537e3a6d3SLuigi Rizzo int ptnet_vnet_hdr = 1;
52637e3a6d3SLuigi Rizzo 
527c3e9b4dbSLuiz Otavio O Souza /* 0 if ptnetmap should not use worker threads for TX processing */
528c3e9b4dbSLuiz Otavio O Souza int ptnetmap_tx_workers = 1;
529c3e9b4dbSLuiz Otavio O Souza 
53037e3a6d3SLuigi Rizzo /*
53137e3a6d3SLuigi Rizzo  * SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated
53237e3a6d3SLuigi Rizzo  * in some other operating systems
53337e3a6d3SLuigi Rizzo  */
53437e3a6d3SLuigi Rizzo SYSBEGIN(main_init);
53537e3a6d3SLuigi Rizzo 
53637e3a6d3SLuigi Rizzo SYSCTL_DECL(_dev_netmap);
53737e3a6d3SLuigi Rizzo SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
53837e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
53937e3a6d3SLuigi Rizzo     CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
54037e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
54137e3a6d3SLuigi Rizzo     CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
542*4f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, CTLFLAG_RW, &netmap_no_pendintr,
543*4f80b14cSVincenzo Maffione     0, "Always look for new received packets.");
54437e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
54537e3a6d3SLuigi Rizzo     &netmap_txsync_retry, 0, "Number of txsync loops in bridge's flush.");
546f9790aebSLuigi Rizzo 
547*4f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0,
548*4f80b14cSVincenzo Maffione     "Force NR_FORWARD mode");
549*4f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0,
550*4f80b14cSVincenzo Maffione     "Adapter mode. 0 selects the best option available,"
551*4f80b14cSVincenzo Maffione     "1 forces native adapter, 2 forces emulated adapter");
552*4f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit,
553*4f80b14cSVincenzo Maffione     0, "RX notification interval in nanoseconds");
554*4f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW,
555*4f80b14cSVincenzo Maffione     &netmap_generic_ringsize, 0,
556*4f80b14cSVincenzo Maffione     "Number of per-ring slots for emulated netmap mode");
557*4f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW,
558*4f80b14cSVincenzo Maffione     &netmap_generic_rings, 0,
559*4f80b14cSVincenzo Maffione     "Number of TX/RX queues for emulated netmap adapters");
560*4f80b14cSVincenzo Maffione #ifdef linux
561*4f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW,
562*4f80b14cSVincenzo Maffione     &netmap_generic_txqdisc, 0, "Use qdisc for generic adapters");
563*4f80b14cSVincenzo Maffione #endif
564*4f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr,
565*4f80b14cSVincenzo Maffione     0, "Allow ptnet devices to use virtio-net headers");
566*4f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW,
567*4f80b14cSVincenzo Maffione     &ptnetmap_tx_workers, 0, "Use worker threads for pnetmap TX processing");
56837e3a6d3SLuigi Rizzo 
56937e3a6d3SLuigi Rizzo SYSEND;
570f196ce38SLuigi Rizzo 
571ce3ee1e7SLuigi Rizzo NMG_LOCK_T	netmap_global_lock;
572ce3ee1e7SLuigi Rizzo 
57317885a7bSLuigi Rizzo /*
57417885a7bSLuigi Rizzo  * mark the ring as stopped, and run through the locks
57517885a7bSLuigi Rizzo  * to make sure other users get to see it.
57637e3a6d3SLuigi Rizzo  * stopped must be either NR_KR_STOPPED (for unbounded stop)
57737e3a6d3SLuigi Rizzo  * of NR_KR_LOCKED (brief stop for mutual exclusion purposes)
57817885a7bSLuigi Rizzo  */
5794bf50f18SLuigi Rizzo static void
58037e3a6d3SLuigi Rizzo netmap_disable_ring(struct netmap_kring *kr, int stopped)
581ce3ee1e7SLuigi Rizzo {
58237e3a6d3SLuigi Rizzo 	nm_kr_stop(kr, stopped);
58337e3a6d3SLuigi Rizzo 	// XXX check if nm_kr_stop is sufficient
584ce3ee1e7SLuigi Rizzo 	mtx_lock(&kr->q_lock);
585ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kr->q_lock);
586ce3ee1e7SLuigi Rizzo 	nm_kr_put(kr);
587ce3ee1e7SLuigi Rizzo }
588ce3ee1e7SLuigi Rizzo 
589847bf383SLuigi Rizzo /* stop or enable a single ring */
5904bf50f18SLuigi Rizzo void
591847bf383SLuigi Rizzo netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped)
5924bf50f18SLuigi Rizzo {
5934bf50f18SLuigi Rizzo 	if (stopped)
59437e3a6d3SLuigi Rizzo 		netmap_disable_ring(NMR(na, t) + ring_id, stopped);
5954bf50f18SLuigi Rizzo 	else
596847bf383SLuigi Rizzo 		NMR(na, t)[ring_id].nkr_stopped = 0;
5974bf50f18SLuigi Rizzo }
5984bf50f18SLuigi Rizzo 
599f9790aebSLuigi Rizzo 
60089cc2556SLuigi Rizzo /* stop or enable all the rings of na */
6014bf50f18SLuigi Rizzo void
6024bf50f18SLuigi Rizzo netmap_set_all_rings(struct netmap_adapter *na, int stopped)
603ce3ee1e7SLuigi Rizzo {
604ce3ee1e7SLuigi Rizzo 	int i;
605847bf383SLuigi Rizzo 	enum txrx t;
606ce3ee1e7SLuigi Rizzo 
6074bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
608ce3ee1e7SLuigi Rizzo 		return;
609ce3ee1e7SLuigi Rizzo 
610847bf383SLuigi Rizzo 	for_rx_tx(t) {
611847bf383SLuigi Rizzo 		for (i = 0; i < netmap_real_rings(na, t); i++) {
612847bf383SLuigi Rizzo 			netmap_set_ring(na, i, t, stopped);
613ce3ee1e7SLuigi Rizzo 		}
614ce3ee1e7SLuigi Rizzo 	}
615ce3ee1e7SLuigi Rizzo }
616ce3ee1e7SLuigi Rizzo 
61789cc2556SLuigi Rizzo /*
61889cc2556SLuigi Rizzo  * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
61989cc2556SLuigi Rizzo  * to finish and prevents any new one from starting.  Call this before turning
620ddb13598SKevin Lo  * netmap mode off, or before removing the hardware rings (e.g., on module
62137e3a6d3SLuigi Rizzo  * onload).
62289cc2556SLuigi Rizzo  */
623f9790aebSLuigi Rizzo void
624f9790aebSLuigi Rizzo netmap_disable_all_rings(struct ifnet *ifp)
625f9790aebSLuigi Rizzo {
62637e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
62737e3a6d3SLuigi Rizzo 		netmap_set_all_rings(NA(ifp), NM_KR_STOPPED);
62837e3a6d3SLuigi Rizzo 	}
629f9790aebSLuigi Rizzo }
630f9790aebSLuigi Rizzo 
63189cc2556SLuigi Rizzo /*
63289cc2556SLuigi Rizzo  * Convenience function used in drivers.  Re-enables rxsync and txsync on the
63389cc2556SLuigi Rizzo  * adapter's rings In linux drivers, this should be placed near each
63489cc2556SLuigi Rizzo  * napi_enable().
63589cc2556SLuigi Rizzo  */
636f9790aebSLuigi Rizzo void
637f9790aebSLuigi Rizzo netmap_enable_all_rings(struct ifnet *ifp)
638f9790aebSLuigi Rizzo {
63937e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
6404bf50f18SLuigi Rizzo 		netmap_set_all_rings(NA(ifp), 0 /* enabled */);
641f9790aebSLuigi Rizzo 	}
64237e3a6d3SLuigi Rizzo }
643f9790aebSLuigi Rizzo 
64437e3a6d3SLuigi Rizzo void
64537e3a6d3SLuigi Rizzo netmap_make_zombie(struct ifnet *ifp)
64637e3a6d3SLuigi Rizzo {
64737e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
64837e3a6d3SLuigi Rizzo 		struct netmap_adapter *na = NA(ifp);
64937e3a6d3SLuigi Rizzo 		netmap_set_all_rings(na, NM_KR_LOCKED);
65037e3a6d3SLuigi Rizzo 		na->na_flags |= NAF_ZOMBIE;
65137e3a6d3SLuigi Rizzo 		netmap_set_all_rings(na, 0);
65237e3a6d3SLuigi Rizzo 	}
65337e3a6d3SLuigi Rizzo }
65437e3a6d3SLuigi Rizzo 
65537e3a6d3SLuigi Rizzo void
65637e3a6d3SLuigi Rizzo netmap_undo_zombie(struct ifnet *ifp)
65737e3a6d3SLuigi Rizzo {
65837e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
65937e3a6d3SLuigi Rizzo 		struct netmap_adapter *na = NA(ifp);
66037e3a6d3SLuigi Rizzo 		if (na->na_flags & NAF_ZOMBIE) {
66137e3a6d3SLuigi Rizzo 			netmap_set_all_rings(na, NM_KR_LOCKED);
66237e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_ZOMBIE;
66337e3a6d3SLuigi Rizzo 			netmap_set_all_rings(na, 0);
66437e3a6d3SLuigi Rizzo 		}
66537e3a6d3SLuigi Rizzo 	}
66637e3a6d3SLuigi Rizzo }
667f9790aebSLuigi Rizzo 
668ce3ee1e7SLuigi Rizzo /*
669ce3ee1e7SLuigi Rizzo  * generic bound_checking function
670ce3ee1e7SLuigi Rizzo  */
671ce3ee1e7SLuigi Rizzo u_int
672ce3ee1e7SLuigi Rizzo nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
673ce3ee1e7SLuigi Rizzo {
674ce3ee1e7SLuigi Rizzo 	u_int oldv = *v;
675ce3ee1e7SLuigi Rizzo 	const char *op = NULL;
676ce3ee1e7SLuigi Rizzo 
677ce3ee1e7SLuigi Rizzo 	if (dflt < lo)
678ce3ee1e7SLuigi Rizzo 		dflt = lo;
679ce3ee1e7SLuigi Rizzo 	if (dflt > hi)
680ce3ee1e7SLuigi Rizzo 		dflt = hi;
681ce3ee1e7SLuigi Rizzo 	if (oldv < lo) {
682ce3ee1e7SLuigi Rizzo 		*v = dflt;
683ce3ee1e7SLuigi Rizzo 		op = "Bump";
684ce3ee1e7SLuigi Rizzo 	} else if (oldv > hi) {
685ce3ee1e7SLuigi Rizzo 		*v = hi;
686ce3ee1e7SLuigi Rizzo 		op = "Clamp";
687ce3ee1e7SLuigi Rizzo 	}
688ce3ee1e7SLuigi Rizzo 	if (op && msg)
689c3e9b4dbSLuiz Otavio O Souza 		nm_prinf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
690ce3ee1e7SLuigi Rizzo 	return *v;
691ce3ee1e7SLuigi Rizzo }
692ce3ee1e7SLuigi Rizzo 
693f9790aebSLuigi Rizzo 
694ce3ee1e7SLuigi Rizzo /*
695ce3ee1e7SLuigi Rizzo  * packet-dump function, user-supplied or static buffer.
696ce3ee1e7SLuigi Rizzo  * The destination buffer must be at least 30+4*len
697ce3ee1e7SLuigi Rizzo  */
698ce3ee1e7SLuigi Rizzo const char *
699ce3ee1e7SLuigi Rizzo nm_dump_buf(char *p, int len, int lim, char *dst)
700ce3ee1e7SLuigi Rizzo {
701ce3ee1e7SLuigi Rizzo 	static char _dst[8192];
702ce3ee1e7SLuigi Rizzo 	int i, j, i0;
703ce3ee1e7SLuigi Rizzo 	static char hex[] ="0123456789abcdef";
704ce3ee1e7SLuigi Rizzo 	char *o;	/* output position */
705ce3ee1e7SLuigi Rizzo 
706ce3ee1e7SLuigi Rizzo #define P_HI(x)	hex[((x) & 0xf0)>>4]
707ce3ee1e7SLuigi Rizzo #define P_LO(x)	hex[((x) & 0xf)]
708ce3ee1e7SLuigi Rizzo #define P_C(x)	((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
709ce3ee1e7SLuigi Rizzo 	if (!dst)
710ce3ee1e7SLuigi Rizzo 		dst = _dst;
711ce3ee1e7SLuigi Rizzo 	if (lim <= 0 || lim > len)
712ce3ee1e7SLuigi Rizzo 		lim = len;
713ce3ee1e7SLuigi Rizzo 	o = dst;
714ce3ee1e7SLuigi Rizzo 	sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
715ce3ee1e7SLuigi Rizzo 	o += strlen(o);
716ce3ee1e7SLuigi Rizzo 	/* hexdump routine */
717ce3ee1e7SLuigi Rizzo 	for (i = 0; i < lim; ) {
718ce3ee1e7SLuigi Rizzo 		sprintf(o, "%5d: ", i);
719ce3ee1e7SLuigi Rizzo 		o += strlen(o);
720ce3ee1e7SLuigi Rizzo 		memset(o, ' ', 48);
721ce3ee1e7SLuigi Rizzo 		i0 = i;
722ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++) {
723ce3ee1e7SLuigi Rizzo 			o[j*3] = P_HI(p[i]);
724ce3ee1e7SLuigi Rizzo 			o[j*3+1] = P_LO(p[i]);
725ce3ee1e7SLuigi Rizzo 		}
726ce3ee1e7SLuigi Rizzo 		i = i0;
727ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++)
728ce3ee1e7SLuigi Rizzo 			o[j + 48] = P_C(p[i]);
729ce3ee1e7SLuigi Rizzo 		o[j+48] = '\n';
730ce3ee1e7SLuigi Rizzo 		o += j+49;
731ce3ee1e7SLuigi Rizzo 	}
732ce3ee1e7SLuigi Rizzo 	*o = '\0';
733ce3ee1e7SLuigi Rizzo #undef P_HI
734ce3ee1e7SLuigi Rizzo #undef P_LO
735ce3ee1e7SLuigi Rizzo #undef P_C
736ce3ee1e7SLuigi Rizzo 	return dst;
737ce3ee1e7SLuigi Rizzo }
738f196ce38SLuigi Rizzo 
739f18be576SLuigi Rizzo 
740ae10d1afSLuigi Rizzo /*
741ae10d1afSLuigi Rizzo  * Fetch configuration from the device, to cope with dynamic
742ae10d1afSLuigi Rizzo  * reconfigurations after loading the module.
743ae10d1afSLuigi Rizzo  */
74489cc2556SLuigi Rizzo /* call with NMG_LOCK held */
745f9790aebSLuigi Rizzo int
746ae10d1afSLuigi Rizzo netmap_update_config(struct netmap_adapter *na)
747ae10d1afSLuigi Rizzo {
748ae10d1afSLuigi Rizzo 	u_int txr, txd, rxr, rxd;
749ae10d1afSLuigi Rizzo 
750ae10d1afSLuigi Rizzo 	txr = txd = rxr = rxd = 0;
7516641c68bSLuigi Rizzo 	if (na->nm_config == NULL ||
752847bf383SLuigi Rizzo 	    na->nm_config(na, &txr, &txd, &rxr, &rxd))
753847bf383SLuigi Rizzo 	{
754ae10d1afSLuigi Rizzo 		/* take whatever we had at init time */
755ae10d1afSLuigi Rizzo 		txr = na->num_tx_rings;
756ae10d1afSLuigi Rizzo 		txd = na->num_tx_desc;
757ae10d1afSLuigi Rizzo 		rxr = na->num_rx_rings;
758ae10d1afSLuigi Rizzo 		rxd = na->num_rx_desc;
759ae10d1afSLuigi Rizzo 	}
760ae10d1afSLuigi Rizzo 
761ae10d1afSLuigi Rizzo 	if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
762ae10d1afSLuigi Rizzo 	    na->num_rx_rings == rxr && na->num_rx_desc == rxd)
763ae10d1afSLuigi Rizzo 		return 0; /* nothing changed */
764f9790aebSLuigi Rizzo 	if (netmap_verbose || na->active_fds > 0) {
765ae10d1afSLuigi Rizzo 		D("stored config %s: txring %d x %d, rxring %d x %d",
7664bf50f18SLuigi Rizzo 			na->name,
767ae10d1afSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
768ae10d1afSLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc);
769ae10d1afSLuigi Rizzo 		D("new config %s: txring %d x %d, rxring %d x %d",
7704bf50f18SLuigi Rizzo 			na->name, txr, txd, rxr, rxd);
771ae10d1afSLuigi Rizzo 	}
772f9790aebSLuigi Rizzo 	if (na->active_fds == 0) {
773ae10d1afSLuigi Rizzo 		D("configuration changed (but fine)");
774ae10d1afSLuigi Rizzo 		na->num_tx_rings = txr;
775ae10d1afSLuigi Rizzo 		na->num_tx_desc = txd;
776ae10d1afSLuigi Rizzo 		na->num_rx_rings = rxr;
777ae10d1afSLuigi Rizzo 		na->num_rx_desc = rxd;
778ae10d1afSLuigi Rizzo 		return 0;
779ae10d1afSLuigi Rizzo 	}
780ae10d1afSLuigi Rizzo 	D("configuration changed while active, this is bad...");
781ae10d1afSLuigi Rizzo 	return 1;
782ae10d1afSLuigi Rizzo }
783ae10d1afSLuigi Rizzo 
78437e3a6d3SLuigi Rizzo /* nm_sync callbacks for the host rings */
78537e3a6d3SLuigi Rizzo static int netmap_txsync_to_host(struct netmap_kring *kring, int flags);
78637e3a6d3SLuigi Rizzo static int netmap_rxsync_from_host(struct netmap_kring *kring, int flags);
787f0ea3689SLuigi Rizzo 
788f0ea3689SLuigi Rizzo /* create the krings array and initialize the fields common to all adapters.
789f0ea3689SLuigi Rizzo  * The array layout is this:
790f0ea3689SLuigi Rizzo  *
791f0ea3689SLuigi Rizzo  *                    +----------+
792f0ea3689SLuigi Rizzo  * na->tx_rings ----->|          | \
793f0ea3689SLuigi Rizzo  *                    |          |  } na->num_tx_ring
794f0ea3689SLuigi Rizzo  *                    |          | /
795f0ea3689SLuigi Rizzo  *                    +----------+
796f0ea3689SLuigi Rizzo  *                    |          |    host tx kring
797f0ea3689SLuigi Rizzo  * na->rx_rings ----> +----------+
798f0ea3689SLuigi Rizzo  *                    |          | \
799f0ea3689SLuigi Rizzo  *                    |          |  } na->num_rx_rings
800f0ea3689SLuigi Rizzo  *                    |          | /
801f0ea3689SLuigi Rizzo  *                    +----------+
802f0ea3689SLuigi Rizzo  *                    |          |    host rx kring
803f0ea3689SLuigi Rizzo  *                    +----------+
804f0ea3689SLuigi Rizzo  * na->tailroom ----->|          | \
805f0ea3689SLuigi Rizzo  *                    |          |  } tailroom bytes
806f0ea3689SLuigi Rizzo  *                    |          | /
807f0ea3689SLuigi Rizzo  *                    +----------+
808f0ea3689SLuigi Rizzo  *
809f0ea3689SLuigi Rizzo  * Note: for compatibility, host krings are created even when not needed.
810f0ea3689SLuigi Rizzo  * The tailroom space is currently used by vale ports for allocating leases.
811f0ea3689SLuigi Rizzo  */
81289cc2556SLuigi Rizzo /* call with NMG_LOCK held */
813f9790aebSLuigi Rizzo int
814f0ea3689SLuigi Rizzo netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
815f9790aebSLuigi Rizzo {
816f9790aebSLuigi Rizzo 	u_int i, len, ndesc;
817f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
818847bf383SLuigi Rizzo 	u_int n[NR_TXRX];
819847bf383SLuigi Rizzo 	enum txrx t;
820f9790aebSLuigi Rizzo 
821c3e9b4dbSLuiz Otavio O Souza 	if (na->tx_rings != NULL) {
822c3e9b4dbSLuiz Otavio O Souza 		D("warning: krings were already created");
823c3e9b4dbSLuiz Otavio O Souza 		return 0;
824c3e9b4dbSLuiz Otavio O Souza 	}
825c3e9b4dbSLuiz Otavio O Souza 
826f0ea3689SLuigi Rizzo 	/* account for the (possibly fake) host rings */
827847bf383SLuigi Rizzo 	n[NR_TX] = na->num_tx_rings + 1;
828847bf383SLuigi Rizzo 	n[NR_RX] = na->num_rx_rings + 1;
829f0ea3689SLuigi Rizzo 
830847bf383SLuigi Rizzo 	len = (n[NR_TX] + n[NR_RX]) * sizeof(struct netmap_kring) + tailroom;
831f9790aebSLuigi Rizzo 
832c3e9b4dbSLuiz Otavio O Souza 	na->tx_rings = nm_os_malloc((size_t)len);
833f9790aebSLuigi Rizzo 	if (na->tx_rings == NULL) {
834f9790aebSLuigi Rizzo 		D("Cannot allocate krings");
835f9790aebSLuigi Rizzo 		return ENOMEM;
836f9790aebSLuigi Rizzo 	}
837847bf383SLuigi Rizzo 	na->rx_rings = na->tx_rings + n[NR_TX];
838f9790aebSLuigi Rizzo 
83917885a7bSLuigi Rizzo 	/*
84017885a7bSLuigi Rizzo 	 * All fields in krings are 0 except the one initialized below.
84117885a7bSLuigi Rizzo 	 * but better be explicit on important kring fields.
84217885a7bSLuigi Rizzo 	 */
843847bf383SLuigi Rizzo 	for_rx_tx(t) {
844847bf383SLuigi Rizzo 		ndesc = nma_get_ndesc(na, t);
845847bf383SLuigi Rizzo 		for (i = 0; i < n[t]; i++) {
846847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
847f9790aebSLuigi Rizzo 			bzero(kring, sizeof(*kring));
848f9790aebSLuigi Rizzo 			kring->na = na;
84917885a7bSLuigi Rizzo 			kring->ring_id = i;
850847bf383SLuigi Rizzo 			kring->tx = t;
851f9790aebSLuigi Rizzo 			kring->nkr_num_slots = ndesc;
85237e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
85337e3a6d3SLuigi Rizzo 			kring->nr_pending_mode = NKR_NETMAP_OFF;
854847bf383SLuigi Rizzo 			if (i < nma_get_nrings(na, t)) {
855847bf383SLuigi Rizzo 				kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync);
85637e3a6d3SLuigi Rizzo 			} else {
857847bf383SLuigi Rizzo 				kring->nm_sync = (t == NR_TX ?
85837e3a6d3SLuigi Rizzo 						netmap_txsync_to_host:
85937e3a6d3SLuigi Rizzo 						netmap_rxsync_from_host);
860f0ea3689SLuigi Rizzo 			}
861847bf383SLuigi Rizzo 			kring->nm_notify = na->nm_notify;
862847bf383SLuigi Rizzo 			kring->rhead = kring->rcur = kring->nr_hwcur = 0;
863f9790aebSLuigi Rizzo 			/*
86417885a7bSLuigi Rizzo 			 * IMPORTANT: Always keep one slot empty.
865f9790aebSLuigi Rizzo 			 */
866847bf383SLuigi Rizzo 			kring->rtail = kring->nr_hwtail = (t == NR_TX ? ndesc - 1 : 0);
867847bf383SLuigi Rizzo 			snprintf(kring->name, sizeof(kring->name) - 1, "%s %s%d", na->name,
868847bf383SLuigi Rizzo 					nm_txrx2str(t), i);
869f0ea3689SLuigi Rizzo 			ND("ktx %s h %d c %d t %d",
870f0ea3689SLuigi Rizzo 				kring->name, kring->rhead, kring->rcur, kring->rtail);
871847bf383SLuigi Rizzo 			mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF);
87237e3a6d3SLuigi Rizzo 			nm_os_selinfo_init(&kring->si);
873f9790aebSLuigi Rizzo 		}
87437e3a6d3SLuigi Rizzo 		nm_os_selinfo_init(&na->si[t]);
875f0ea3689SLuigi Rizzo 	}
876f9790aebSLuigi Rizzo 
877847bf383SLuigi Rizzo 	na->tailroom = na->rx_rings + n[NR_RX];
878f9790aebSLuigi Rizzo 
879f9790aebSLuigi Rizzo 	return 0;
880f9790aebSLuigi Rizzo }
881f9790aebSLuigi Rizzo 
882f9790aebSLuigi Rizzo 
883f0ea3689SLuigi Rizzo /* undo the actions performed by netmap_krings_create */
88489cc2556SLuigi Rizzo /* call with NMG_LOCK held */
885f9790aebSLuigi Rizzo void
886f9790aebSLuigi Rizzo netmap_krings_delete(struct netmap_adapter *na)
887f9790aebSLuigi Rizzo {
888f0ea3689SLuigi Rizzo 	struct netmap_kring *kring = na->tx_rings;
889847bf383SLuigi Rizzo 	enum txrx t;
890847bf383SLuigi Rizzo 
891c3e9b4dbSLuiz Otavio O Souza 	if (na->tx_rings == NULL) {
892c3e9b4dbSLuiz Otavio O Souza 		D("warning: krings were already deleted");
893c3e9b4dbSLuiz Otavio O Souza 		return;
894c3e9b4dbSLuiz Otavio O Souza 	}
895c3e9b4dbSLuiz Otavio O Souza 
896847bf383SLuigi Rizzo 	for_rx_tx(t)
89737e3a6d3SLuigi Rizzo 		nm_os_selinfo_uninit(&na->si[t]);
898f9790aebSLuigi Rizzo 
899f0ea3689SLuigi Rizzo 	/* we rely on the krings layout described above */
900f0ea3689SLuigi Rizzo 	for ( ; kring != na->tailroom; kring++) {
901f0ea3689SLuigi Rizzo 		mtx_destroy(&kring->q_lock);
90237e3a6d3SLuigi Rizzo 		nm_os_selinfo_uninit(&kring->si);
903f9790aebSLuigi Rizzo 	}
904c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(na->tx_rings);
905f9790aebSLuigi Rizzo 	na->tx_rings = na->rx_rings = na->tailroom = NULL;
906f9790aebSLuigi Rizzo }
907f9790aebSLuigi Rizzo 
908f9790aebSLuigi Rizzo 
90917885a7bSLuigi Rizzo /*
91017885a7bSLuigi Rizzo  * Destructor for NIC ports. They also have an mbuf queue
91117885a7bSLuigi Rizzo  * on the rings connected to the host so we need to purge
91217885a7bSLuigi Rizzo  * them first.
91317885a7bSLuigi Rizzo  */
91489cc2556SLuigi Rizzo /* call with NMG_LOCK held */
91537e3a6d3SLuigi Rizzo void
91617885a7bSLuigi Rizzo netmap_hw_krings_delete(struct netmap_adapter *na)
91717885a7bSLuigi Rizzo {
91817885a7bSLuigi Rizzo 	struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue;
91917885a7bSLuigi Rizzo 
92017885a7bSLuigi Rizzo 	ND("destroy sw mbq with len %d", mbq_len(q));
92117885a7bSLuigi Rizzo 	mbq_purge(q);
92237e3a6d3SLuigi Rizzo 	mbq_safe_fini(q);
92317885a7bSLuigi Rizzo 	netmap_krings_delete(na);
92417885a7bSLuigi Rizzo }
92517885a7bSLuigi Rizzo 
926*4f80b14cSVincenzo Maffione static void
927*4f80b14cSVincenzo Maffione netmap_mem_drop(struct netmap_adapter *na)
928*4f80b14cSVincenzo Maffione {
929*4f80b14cSVincenzo Maffione 	int last = netmap_mem_deref(na->nm_mem, na);
930*4f80b14cSVincenzo Maffione 	/* if the native allocator had been overrided on regif,
931*4f80b14cSVincenzo Maffione 	 * restore it now and drop the temporary one
932*4f80b14cSVincenzo Maffione 	 */
933*4f80b14cSVincenzo Maffione 	if (last && na->nm_mem_prev) {
934*4f80b14cSVincenzo Maffione 		netmap_mem_put(na->nm_mem);
935*4f80b14cSVincenzo Maffione 		na->nm_mem = na->nm_mem_prev;
936*4f80b14cSVincenzo Maffione 		na->nm_mem_prev = NULL;
937*4f80b14cSVincenzo Maffione 	}
938*4f80b14cSVincenzo Maffione }
939f9790aebSLuigi Rizzo 
94068b8534bSLuigi Rizzo /*
941847bf383SLuigi Rizzo  * Undo everything that was done in netmap_do_regif(). In particular,
942847bf383SLuigi Rizzo  * call nm_register(ifp,0) to stop netmap mode on the interface and
9434bf50f18SLuigi Rizzo  * revert to normal operation.
94468b8534bSLuigi Rizzo  */
945ce3ee1e7SLuigi Rizzo /* call with NMG_LOCK held */
946847bf383SLuigi Rizzo static void netmap_unset_ringid(struct netmap_priv_d *);
94737e3a6d3SLuigi Rizzo static void netmap_krings_put(struct netmap_priv_d *);
94837e3a6d3SLuigi Rizzo void
949847bf383SLuigi Rizzo netmap_do_unregif(struct netmap_priv_d *priv)
95068b8534bSLuigi Rizzo {
951f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
95268b8534bSLuigi Rizzo 
953ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
954f9790aebSLuigi Rizzo 	na->active_fds--;
95537e3a6d3SLuigi Rizzo 	/* unset nr_pending_mode and possibly release exclusive mode */
95637e3a6d3SLuigi Rizzo 	netmap_krings_put(priv);
957847bf383SLuigi Rizzo 
958847bf383SLuigi Rizzo #ifdef	WITH_MONITOR
95937e3a6d3SLuigi Rizzo 	/* XXX check whether we have to do something with monitor
96037e3a6d3SLuigi Rizzo 	 * when rings change nr_mode. */
96137e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0) {
962847bf383SLuigi Rizzo 		/* walk through all the rings and tell any monitor
963847bf383SLuigi Rizzo 		 * that the port is going to exit netmap mode
964847bf383SLuigi Rizzo 		 */
965847bf383SLuigi Rizzo 		netmap_monitor_stop(na);
96637e3a6d3SLuigi Rizzo 	}
967847bf383SLuigi Rizzo #endif
96837e3a6d3SLuigi Rizzo 
96937e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0 || nm_kring_pending(priv)) {
97037e3a6d3SLuigi Rizzo 		na->nm_register(na, 0);
97137e3a6d3SLuigi Rizzo 	}
97237e3a6d3SLuigi Rizzo 
97337e3a6d3SLuigi Rizzo 	/* delete rings and buffers that are no longer needed */
97437e3a6d3SLuigi Rizzo 	netmap_mem_rings_delete(na);
97537e3a6d3SLuigi Rizzo 
97637e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0) {	/* last instance */
97768b8534bSLuigi Rizzo 		/*
97837e3a6d3SLuigi Rizzo 		 * (TO CHECK) We enter here
979f18be576SLuigi Rizzo 		 * when the last reference to this file descriptor goes
980f18be576SLuigi Rizzo 		 * away. This means we cannot have any pending poll()
981f18be576SLuigi Rizzo 		 * or interrupt routine operating on the structure.
982ce3ee1e7SLuigi Rizzo 		 * XXX The file may be closed in a thread while
983ce3ee1e7SLuigi Rizzo 		 * another thread is using it.
984ce3ee1e7SLuigi Rizzo 		 * Linux keeps the file opened until the last reference
985ce3ee1e7SLuigi Rizzo 		 * by any outstanding ioctl/poll or mmap is gone.
986ce3ee1e7SLuigi Rizzo 		 * FreeBSD does not track mmap()s (but we do) and
987ce3ee1e7SLuigi Rizzo 		 * wakes up any sleeping poll(). Need to check what
988ce3ee1e7SLuigi Rizzo 		 * happens if the close() occurs while a concurrent
989ce3ee1e7SLuigi Rizzo 		 * syscall is running.
99068b8534bSLuigi Rizzo 		 */
99137e3a6d3SLuigi Rizzo 		if (netmap_verbose)
99237e3a6d3SLuigi Rizzo 			D("deleting last instance for %s", na->name);
99337e3a6d3SLuigi Rizzo 
99437e3a6d3SLuigi Rizzo                 if (nm_netmap_on(na)) {
99537e3a6d3SLuigi Rizzo                     D("BUG: netmap on while going to delete the krings");
99637e3a6d3SLuigi Rizzo                 }
99737e3a6d3SLuigi Rizzo 
998f9790aebSLuigi Rizzo 		na->nm_krings_delete(na);
99968b8534bSLuigi Rizzo 	}
100037e3a6d3SLuigi Rizzo 
1001847bf383SLuigi Rizzo 	/* possibily decrement counter of tx_si/rx_si users */
1002847bf383SLuigi Rizzo 	netmap_unset_ringid(priv);
1003f9790aebSLuigi Rizzo 	/* delete the nifp */
1004847bf383SLuigi Rizzo 	netmap_mem_if_delete(na, priv->np_nifp);
1005847bf383SLuigi Rizzo 	/* drop the allocator */
1006*4f80b14cSVincenzo Maffione 	netmap_mem_drop(na);
1007847bf383SLuigi Rizzo 	/* mark the priv as unregistered */
1008847bf383SLuigi Rizzo 	priv->np_na = NULL;
1009847bf383SLuigi Rizzo 	priv->np_nifp = NULL;
10105819da83SLuigi Rizzo }
101168b8534bSLuigi Rizzo 
101289cc2556SLuigi Rizzo /* call with NMG_LOCK held */
1013f0ea3689SLuigi Rizzo static __inline int
1014847bf383SLuigi Rizzo nm_si_user(struct netmap_priv_d *priv, enum txrx t)
1015f0ea3689SLuigi Rizzo {
1016f0ea3689SLuigi Rizzo 	return (priv->np_na != NULL &&
1017847bf383SLuigi Rizzo 		(priv->np_qlast[t] - priv->np_qfirst[t] > 1));
1018f0ea3689SLuigi Rizzo }
1019f0ea3689SLuigi Rizzo 
102037e3a6d3SLuigi Rizzo struct netmap_priv_d*
102137e3a6d3SLuigi Rizzo netmap_priv_new(void)
102237e3a6d3SLuigi Rizzo {
102337e3a6d3SLuigi Rizzo 	struct netmap_priv_d *priv;
102437e3a6d3SLuigi Rizzo 
1025c3e9b4dbSLuiz Otavio O Souza 	priv = nm_os_malloc(sizeof(struct netmap_priv_d));
102637e3a6d3SLuigi Rizzo 	if (priv == NULL)
102737e3a6d3SLuigi Rizzo 		return NULL;
102837e3a6d3SLuigi Rizzo 	priv->np_refs = 1;
102937e3a6d3SLuigi Rizzo 	nm_os_get_module();
103037e3a6d3SLuigi Rizzo 	return priv;
103137e3a6d3SLuigi Rizzo }
103237e3a6d3SLuigi Rizzo 
1033ce3ee1e7SLuigi Rizzo /*
10348fd44c93SLuigi Rizzo  * Destructor of the netmap_priv_d, called when the fd is closed
10358fd44c93SLuigi Rizzo  * Action: undo all the things done by NIOCREGIF,
10368fd44c93SLuigi Rizzo  * On FreeBSD we need to track whether there are active mmap()s,
10378fd44c93SLuigi Rizzo  * and we use np_active_mmaps for that. On linux, the field is always 0.
10388fd44c93SLuigi Rizzo  * Return: 1 if we can free priv, 0 otherwise.
103989cc2556SLuigi Rizzo  *
1040ce3ee1e7SLuigi Rizzo  */
104189cc2556SLuigi Rizzo /* call with NMG_LOCK held */
104237e3a6d3SLuigi Rizzo void
104337e3a6d3SLuigi Rizzo netmap_priv_delete(struct netmap_priv_d *priv)
1044ce3ee1e7SLuigi Rizzo {
1045f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1046ce3ee1e7SLuigi Rizzo 
1047847adfb7SLuigi Rizzo 	/* number of active references to this fd */
10488fd44c93SLuigi Rizzo 	if (--priv->np_refs > 0) {
104937e3a6d3SLuigi Rizzo 		return;
1050ce3ee1e7SLuigi Rizzo 	}
105137e3a6d3SLuigi Rizzo 	nm_os_put_module();
105237e3a6d3SLuigi Rizzo 	if (na) {
1053847bf383SLuigi Rizzo 		netmap_do_unregif(priv);
105437e3a6d3SLuigi Rizzo 	}
105537e3a6d3SLuigi Rizzo 	netmap_unget_na(na, priv->np_ifp);
105637e3a6d3SLuigi Rizzo 	bzero(priv, sizeof(*priv));	/* for safety */
1057c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(priv);
1058f196ce38SLuigi Rizzo }
10595819da83SLuigi Rizzo 
1060f9790aebSLuigi Rizzo 
106189cc2556SLuigi Rizzo /* call with NMG_LOCK *not* held */
1062f9790aebSLuigi Rizzo void
10635819da83SLuigi Rizzo netmap_dtor(void *data)
10645819da83SLuigi Rizzo {
10655819da83SLuigi Rizzo 	struct netmap_priv_d *priv = data;
10665819da83SLuigi Rizzo 
1067ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
106837e3a6d3SLuigi Rizzo 	netmap_priv_delete(priv);
1069ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
1070ce3ee1e7SLuigi Rizzo }
107168b8534bSLuigi Rizzo 
1072f18be576SLuigi Rizzo 
107368b8534bSLuigi Rizzo /*
1074c3e9b4dbSLuiz Otavio O Souza  * Handlers for synchronization of the rings from/to the host stack.
1075c3e9b4dbSLuiz Otavio O Souza  * These are associated to a network interface and are just another
1076c3e9b4dbSLuiz Otavio O Souza  * ring pair managed by userspace.
1077c3e9b4dbSLuiz Otavio O Souza  *
1078c3e9b4dbSLuiz Otavio O Souza  * Netmap also supports transparent forwarding (NS_FORWARD and NR_FORWARD
1079c3e9b4dbSLuiz Otavio O Souza  * flags):
1080c3e9b4dbSLuiz Otavio O Souza  *
1081c3e9b4dbSLuiz Otavio O Souza  * - Before releasing buffers on hw RX rings, the application can mark
1082c3e9b4dbSLuiz Otavio O Souza  *   them with the NS_FORWARD flag. During the next RXSYNC or poll(), they
1083c3e9b4dbSLuiz Otavio O Souza  *   will be forwarded to the host stack, similarly to what happened if
1084c3e9b4dbSLuiz Otavio O Souza  *   the application moved them to the host TX ring.
1085c3e9b4dbSLuiz Otavio O Souza  *
1086c3e9b4dbSLuiz Otavio O Souza  * - Before releasing buffers on the host RX ring, the application can
1087c3e9b4dbSLuiz Otavio O Souza  *   mark them with the NS_FORWARD flag. During the next RXSYNC or poll(),
1088c3e9b4dbSLuiz Otavio O Souza  *   they will be forwarded to the hw TX rings, saving the application
1089c3e9b4dbSLuiz Otavio O Souza  *   from doing the same task in user-space.
1090c3e9b4dbSLuiz Otavio O Souza  *
1091c3e9b4dbSLuiz Otavio O Souza  * Transparent fowarding can be enabled per-ring, by setting the NR_FORWARD
1092c3e9b4dbSLuiz Otavio O Souza  * flag, or globally with the netmap_fwd sysctl.
1093c3e9b4dbSLuiz Otavio O Souza  *
1094091fd0abSLuigi Rizzo  * The transfer NIC --> host is relatively easy, just encapsulate
1095091fd0abSLuigi Rizzo  * into mbufs and we are done. The host --> NIC side is slightly
1096091fd0abSLuigi Rizzo  * harder because there might not be room in the tx ring so it
1097091fd0abSLuigi Rizzo  * might take a while before releasing the buffer.
1098091fd0abSLuigi Rizzo  */
1099091fd0abSLuigi Rizzo 
1100f18be576SLuigi Rizzo 
1101091fd0abSLuigi Rizzo /*
1102c3e9b4dbSLuiz Otavio O Souza  * Pass a whole queue of mbufs to the host stack as coming from 'dst'
110317885a7bSLuigi Rizzo  * We do not need to lock because the queue is private.
1104c3e9b4dbSLuiz Otavio O Souza  * After this call the queue is empty.
1105091fd0abSLuigi Rizzo  */
1106091fd0abSLuigi Rizzo static void
1107f9790aebSLuigi Rizzo netmap_send_up(struct ifnet *dst, struct mbq *q)
1108091fd0abSLuigi Rizzo {
1109091fd0abSLuigi Rizzo 	struct mbuf *m;
111037e3a6d3SLuigi Rizzo 	struct mbuf *head = NULL, *prev = NULL;
1111091fd0abSLuigi Rizzo 
1112c3e9b4dbSLuiz Otavio O Souza 	/* Send packets up, outside the lock; head/prev machinery
1113c3e9b4dbSLuiz Otavio O Souza 	 * is only useful for Windows. */
1114f9790aebSLuigi Rizzo 	while ((m = mbq_dequeue(q)) != NULL) {
1115091fd0abSLuigi Rizzo 		if (netmap_verbose & NM_VERB_HOST)
1116091fd0abSLuigi Rizzo 			D("sending up pkt %p size %d", m, MBUF_LEN(m));
111737e3a6d3SLuigi Rizzo 		prev = nm_os_send_up(dst, m, prev);
111837e3a6d3SLuigi Rizzo 		if (head == NULL)
111937e3a6d3SLuigi Rizzo 			head = prev;
1120091fd0abSLuigi Rizzo 	}
112137e3a6d3SLuigi Rizzo 	if (head)
112237e3a6d3SLuigi Rizzo 		nm_os_send_up(dst, NULL, head);
112337e3a6d3SLuigi Rizzo 	mbq_fini(q);
1124091fd0abSLuigi Rizzo }
1125091fd0abSLuigi Rizzo 
1126f18be576SLuigi Rizzo 
1127091fd0abSLuigi Rizzo /*
1128c3e9b4dbSLuiz Otavio O Souza  * Scan the buffers from hwcur to ring->head, and put a copy of those
1129c3e9b4dbSLuiz Otavio O Souza  * marked NS_FORWARD (or all of them if forced) into a queue of mbufs.
1130c3e9b4dbSLuiz Otavio O Souza  * Drop remaining packets in the unlikely event
113117885a7bSLuigi Rizzo  * of an mbuf shortage.
1132091fd0abSLuigi Rizzo  */
1133091fd0abSLuigi Rizzo static void
1134091fd0abSLuigi Rizzo netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
1135091fd0abSLuigi Rizzo {
113617885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1137847bf383SLuigi Rizzo 	u_int const head = kring->rhead;
113817885a7bSLuigi Rizzo 	u_int n;
1139f9790aebSLuigi Rizzo 	struct netmap_adapter *na = kring->na;
1140091fd0abSLuigi Rizzo 
114117885a7bSLuigi Rizzo 	for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) {
114217885a7bSLuigi Rizzo 		struct mbuf *m;
1143091fd0abSLuigi Rizzo 		struct netmap_slot *slot = &kring->ring->slot[n];
1144091fd0abSLuigi Rizzo 
1145091fd0abSLuigi Rizzo 		if ((slot->flags & NS_FORWARD) == 0 && !force)
1146091fd0abSLuigi Rizzo 			continue;
11474bf50f18SLuigi Rizzo 		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE(na)) {
114817885a7bSLuigi Rizzo 			RD(5, "bad pkt at %d len %d", n, slot->len);
1149091fd0abSLuigi Rizzo 			continue;
1150091fd0abSLuigi Rizzo 		}
1151091fd0abSLuigi Rizzo 		slot->flags &= ~NS_FORWARD; // XXX needed ?
115217885a7bSLuigi Rizzo 		/* XXX TODO: adapt to the case of a multisegment packet */
11534bf50f18SLuigi Rizzo 		m = m_devget(NMB(na, slot), slot->len, 0, na->ifp, NULL);
1154091fd0abSLuigi Rizzo 
1155091fd0abSLuigi Rizzo 		if (m == NULL)
1156091fd0abSLuigi Rizzo 			break;
1157f9790aebSLuigi Rizzo 		mbq_enqueue(q, m);
1158091fd0abSLuigi Rizzo 	}
1159091fd0abSLuigi Rizzo }
1160091fd0abSLuigi Rizzo 
116137e3a6d3SLuigi Rizzo static inline int
116237e3a6d3SLuigi Rizzo _nm_may_forward(struct netmap_kring *kring)
116337e3a6d3SLuigi Rizzo {
116437e3a6d3SLuigi Rizzo 	return	((netmap_fwd || kring->ring->flags & NR_FORWARD) &&
116537e3a6d3SLuigi Rizzo 		 kring->na->na_flags & NAF_HOST_RINGS &&
116637e3a6d3SLuigi Rizzo 		 kring->tx == NR_RX);
116737e3a6d3SLuigi Rizzo }
116837e3a6d3SLuigi Rizzo 
116937e3a6d3SLuigi Rizzo static inline int
117037e3a6d3SLuigi Rizzo nm_may_forward_up(struct netmap_kring *kring)
117137e3a6d3SLuigi Rizzo {
117237e3a6d3SLuigi Rizzo 	return	_nm_may_forward(kring) &&
117337e3a6d3SLuigi Rizzo 		 kring->ring_id != kring->na->num_rx_rings;
117437e3a6d3SLuigi Rizzo }
117537e3a6d3SLuigi Rizzo 
117637e3a6d3SLuigi Rizzo static inline int
1177c3e9b4dbSLuiz Otavio O Souza nm_may_forward_down(struct netmap_kring *kring, int sync_flags)
117837e3a6d3SLuigi Rizzo {
117937e3a6d3SLuigi Rizzo 	return	_nm_may_forward(kring) &&
1180c3e9b4dbSLuiz Otavio O Souza 		 (sync_flags & NAF_CAN_FORWARD_DOWN) &&
118137e3a6d3SLuigi Rizzo 		 kring->ring_id == kring->na->num_rx_rings;
118237e3a6d3SLuigi Rizzo }
1183f18be576SLuigi Rizzo 
1184091fd0abSLuigi Rizzo /*
118517885a7bSLuigi Rizzo  * Send to the NIC rings packets marked NS_FORWARD between
1186c3e9b4dbSLuiz Otavio O Souza  * kring->nr_hwcur and kring->rhead.
1187c3e9b4dbSLuiz Otavio O Souza  * Called under kring->rx_queue.lock on the sw rx ring.
1188c3e9b4dbSLuiz Otavio O Souza  *
1189c3e9b4dbSLuiz Otavio O Souza  * It can only be called if the user opened all the TX hw rings,
1190c3e9b4dbSLuiz Otavio O Souza  * see NAF_CAN_FORWARD_DOWN flag.
1191c3e9b4dbSLuiz Otavio O Souza  * We can touch the TX netmap rings (slots, head and cur) since
1192c3e9b4dbSLuiz Otavio O Souza  * we are in poll/ioctl system call context, and the application
1193c3e9b4dbSLuiz Otavio O Souza  * is not supposed to touch the ring (using a different thread)
1194c3e9b4dbSLuiz Otavio O Souza  * during the execution of the system call.
1195091fd0abSLuigi Rizzo  */
119617885a7bSLuigi Rizzo static u_int
1197091fd0abSLuigi Rizzo netmap_sw_to_nic(struct netmap_adapter *na)
1198091fd0abSLuigi Rizzo {
1199091fd0abSLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
120017885a7bSLuigi Rizzo 	struct netmap_slot *rxslot = kring->ring->slot;
120117885a7bSLuigi Rizzo 	u_int i, rxcur = kring->nr_hwcur;
120217885a7bSLuigi Rizzo 	u_int const head = kring->rhead;
120317885a7bSLuigi Rizzo 	u_int const src_lim = kring->nkr_num_slots - 1;
120417885a7bSLuigi Rizzo 	u_int sent = 0;
1205ce3ee1e7SLuigi Rizzo 
120617885a7bSLuigi Rizzo 	/* scan rings to find space, then fill as much as possible */
120717885a7bSLuigi Rizzo 	for (i = 0; i < na->num_tx_rings; i++) {
120817885a7bSLuigi Rizzo 		struct netmap_kring *kdst = &na->tx_rings[i];
120917885a7bSLuigi Rizzo 		struct netmap_ring *rdst = kdst->ring;
121017885a7bSLuigi Rizzo 		u_int const dst_lim = kdst->nkr_num_slots - 1;
1211ce3ee1e7SLuigi Rizzo 
121217885a7bSLuigi Rizzo 		/* XXX do we trust ring or kring->rcur,rtail ? */
121317885a7bSLuigi Rizzo 		for (; rxcur != head && !nm_ring_empty(rdst);
121417885a7bSLuigi Rizzo 		     rxcur = nm_next(rxcur, src_lim) ) {
1215091fd0abSLuigi Rizzo 			struct netmap_slot *src, *dst, tmp;
121637e3a6d3SLuigi Rizzo 			u_int dst_head = rdst->head;
121717885a7bSLuigi Rizzo 
121817885a7bSLuigi Rizzo 			src = &rxslot[rxcur];
121917885a7bSLuigi Rizzo 			if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
122017885a7bSLuigi Rizzo 				continue;
122117885a7bSLuigi Rizzo 
122217885a7bSLuigi Rizzo 			sent++;
122317885a7bSLuigi Rizzo 
122437e3a6d3SLuigi Rizzo 			dst = &rdst->slot[dst_head];
122517885a7bSLuigi Rizzo 
1226091fd0abSLuigi Rizzo 			tmp = *src;
122717885a7bSLuigi Rizzo 
1228091fd0abSLuigi Rizzo 			src->buf_idx = dst->buf_idx;
1229091fd0abSLuigi Rizzo 			src->flags = NS_BUF_CHANGED;
1230091fd0abSLuigi Rizzo 
1231091fd0abSLuigi Rizzo 			dst->buf_idx = tmp.buf_idx;
1232091fd0abSLuigi Rizzo 			dst->len = tmp.len;
1233091fd0abSLuigi Rizzo 			dst->flags = NS_BUF_CHANGED;
1234091fd0abSLuigi Rizzo 
123537e3a6d3SLuigi Rizzo 			rdst->head = rdst->cur = nm_next(dst_head, dst_lim);
1236091fd0abSLuigi Rizzo 		}
1237c3e9b4dbSLuiz Otavio O Souza 		/* if (sent) XXX txsync ? it would be just an optimization */
1238091fd0abSLuigi Rizzo 	}
123917885a7bSLuigi Rizzo 	return sent;
1240091fd0abSLuigi Rizzo }
1241091fd0abSLuigi Rizzo 
1242f18be576SLuigi Rizzo 
1243091fd0abSLuigi Rizzo /*
1244ce3ee1e7SLuigi Rizzo  * netmap_txsync_to_host() passes packets up. We are called from a
124502ad4083SLuigi Rizzo  * system call in user process context, and the only contention
124602ad4083SLuigi Rizzo  * can be among multiple user threads erroneously calling
1247091fd0abSLuigi Rizzo  * this routine concurrently.
124868b8534bSLuigi Rizzo  */
124937e3a6d3SLuigi Rizzo static int
125037e3a6d3SLuigi Rizzo netmap_txsync_to_host(struct netmap_kring *kring, int flags)
125168b8534bSLuigi Rizzo {
125237e3a6d3SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
125317885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1254f0ea3689SLuigi Rizzo 	u_int const head = kring->rhead;
1255f9790aebSLuigi Rizzo 	struct mbq q;
125668b8534bSLuigi Rizzo 
125717885a7bSLuigi Rizzo 	/* Take packets from hwcur to head and pass them up.
1258c3e9b4dbSLuiz Otavio O Souza 	 * Force hwcur = head since netmap_grab_packets() stops at head
125968b8534bSLuigi Rizzo 	 */
1260f9790aebSLuigi Rizzo 	mbq_init(&q);
126117885a7bSLuigi Rizzo 	netmap_grab_packets(kring, &q, 1 /* force */);
126217885a7bSLuigi Rizzo 	ND("have %d pkts in queue", mbq_len(&q));
126317885a7bSLuigi Rizzo 	kring->nr_hwcur = head;
126417885a7bSLuigi Rizzo 	kring->nr_hwtail = head + lim;
126517885a7bSLuigi Rizzo 	if (kring->nr_hwtail > lim)
126617885a7bSLuigi Rizzo 		kring->nr_hwtail -= lim + 1;
126768b8534bSLuigi Rizzo 
1268f9790aebSLuigi Rizzo 	netmap_send_up(na->ifp, &q);
126937e3a6d3SLuigi Rizzo 	return 0;
1270f18be576SLuigi Rizzo }
1271f18be576SLuigi Rizzo 
1272f18be576SLuigi Rizzo 
127368b8534bSLuigi Rizzo /*
127402ad4083SLuigi Rizzo  * rxsync backend for packets coming from the host stack.
127517885a7bSLuigi Rizzo  * They have been put in kring->rx_queue by netmap_transmit().
127617885a7bSLuigi Rizzo  * We protect access to the kring using kring->rx_queue.lock
127702ad4083SLuigi Rizzo  *
1278c3e9b4dbSLuiz Otavio O Souza  * also moves to the nic hw rings any packet the user has marked
1279c3e9b4dbSLuiz Otavio O Souza  * for transparent-mode forwarding, then sets the NR_FORWARD
1280c3e9b4dbSLuiz Otavio O Souza  * flag in the kring to let the caller push them out
128168b8534bSLuigi Rizzo  */
12828fd44c93SLuigi Rizzo static int
128337e3a6d3SLuigi Rizzo netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
128468b8534bSLuigi Rizzo {
128537e3a6d3SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
128668b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
128717885a7bSLuigi Rizzo 	u_int nm_i, n;
128817885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1289f0ea3689SLuigi Rizzo 	u_int const head = kring->rhead;
129017885a7bSLuigi Rizzo 	int ret = 0;
1291847bf383SLuigi Rizzo 	struct mbq *q = &kring->rx_queue, fq;
129268b8534bSLuigi Rizzo 
1293847bf383SLuigi Rizzo 	mbq_init(&fq); /* fq holds packets to be freed */
1294847bf383SLuigi Rizzo 
1295997b054cSLuigi Rizzo 	mbq_lock(q);
129617885a7bSLuigi Rizzo 
129717885a7bSLuigi Rizzo 	/* First part: import newly received packets */
129817885a7bSLuigi Rizzo 	n = mbq_len(q);
129917885a7bSLuigi Rizzo 	if (n) { /* grab packets from the queue */
130017885a7bSLuigi Rizzo 		struct mbuf *m;
130117885a7bSLuigi Rizzo 		uint32_t stop_i;
130217885a7bSLuigi Rizzo 
130317885a7bSLuigi Rizzo 		nm_i = kring->nr_hwtail;
1304c3e9b4dbSLuiz Otavio O Souza 		stop_i = nm_prev(kring->nr_hwcur, lim);
130517885a7bSLuigi Rizzo 		while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
130617885a7bSLuigi Rizzo 			int len = MBUF_LEN(m);
130717885a7bSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[nm_i];
130817885a7bSLuigi Rizzo 
13094bf50f18SLuigi Rizzo 			m_copydata(m, 0, len, NMB(na, slot));
131017885a7bSLuigi Rizzo 			ND("nm %d len %d", nm_i, len);
131117885a7bSLuigi Rizzo 			if (netmap_verbose)
13124bf50f18SLuigi Rizzo                                 D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
131317885a7bSLuigi Rizzo 
131417885a7bSLuigi Rizzo 			slot->len = len;
1315*4f80b14cSVincenzo Maffione 			slot->flags = 0;
131617885a7bSLuigi Rizzo 			nm_i = nm_next(nm_i, lim);
1317847bf383SLuigi Rizzo 			mbq_enqueue(&fq, m);
131864ae02c3SLuigi Rizzo 		}
131917885a7bSLuigi Rizzo 		kring->nr_hwtail = nm_i;
132064ae02c3SLuigi Rizzo 	}
132117885a7bSLuigi Rizzo 
132217885a7bSLuigi Rizzo 	/*
132317885a7bSLuigi Rizzo 	 * Second part: skip past packets that userspace has released.
132417885a7bSLuigi Rizzo 	 */
132517885a7bSLuigi Rizzo 	nm_i = kring->nr_hwcur;
132617885a7bSLuigi Rizzo 	if (nm_i != head) { /* something was released */
1327c3e9b4dbSLuiz Otavio O Souza 		if (nm_may_forward_down(kring, flags)) {
132817885a7bSLuigi Rizzo 			ret = netmap_sw_to_nic(na);
132937e3a6d3SLuigi Rizzo 			if (ret > 0) {
133037e3a6d3SLuigi Rizzo 				kring->nr_kflags |= NR_FORWARD;
133137e3a6d3SLuigi Rizzo 				ret = 0;
133237e3a6d3SLuigi Rizzo 			}
133337e3a6d3SLuigi Rizzo 		}
133417885a7bSLuigi Rizzo 		kring->nr_hwcur = head;
133564ae02c3SLuigi Rizzo 	}
133617885a7bSLuigi Rizzo 
1337997b054cSLuigi Rizzo 	mbq_unlock(q);
1338847bf383SLuigi Rizzo 
1339847bf383SLuigi Rizzo 	mbq_purge(&fq);
134037e3a6d3SLuigi Rizzo 	mbq_fini(&fq);
1341847bf383SLuigi Rizzo 
134217885a7bSLuigi Rizzo 	return ret;
134368b8534bSLuigi Rizzo }
134468b8534bSLuigi Rizzo 
134568b8534bSLuigi Rizzo 
1346f9790aebSLuigi Rizzo /* Get a netmap adapter for the port.
1347f9790aebSLuigi Rizzo  *
1348f9790aebSLuigi Rizzo  * If it is possible to satisfy the request, return 0
1349f9790aebSLuigi Rizzo  * with *na containing the netmap adapter found.
1350f9790aebSLuigi Rizzo  * Otherwise return an error code, with *na containing NULL.
1351f9790aebSLuigi Rizzo  *
1352f9790aebSLuigi Rizzo  * When the port is attached to a bridge, we always return
1353f9790aebSLuigi Rizzo  * EBUSY.
1354f9790aebSLuigi Rizzo  * Otherwise, if the port is already bound to a file descriptor,
1355f9790aebSLuigi Rizzo  * then we unconditionally return the existing adapter into *na.
1356f9790aebSLuigi Rizzo  * In all the other cases, we return (into *na) either native,
1357f9790aebSLuigi Rizzo  * generic or NULL, according to the following table:
1358f9790aebSLuigi Rizzo  *
1359f9790aebSLuigi Rizzo  *					native_support
1360f9790aebSLuigi Rizzo  * active_fds   dev.netmap.admode         YES     NO
1361f9790aebSLuigi Rizzo  * -------------------------------------------------------
1362f9790aebSLuigi Rizzo  *    >0              *                 NA(ifp) NA(ifp)
1363f9790aebSLuigi Rizzo  *
1364f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_BEST      NATIVE  GENERIC
1365f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_NATIVE    NATIVE   NULL
1366f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_GENERIC   GENERIC GENERIC
1367f9790aebSLuigi Rizzo  *
1368f9790aebSLuigi Rizzo  */
136937e3a6d3SLuigi Rizzo static void netmap_hw_dtor(struct netmap_adapter *); /* needed by NM_IS_NATIVE() */
1370f9790aebSLuigi Rizzo int
1371c3e9b4dbSLuiz Otavio O Souza netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adapter **na)
1372f9790aebSLuigi Rizzo {
1373f9790aebSLuigi Rizzo 	/* generic support */
1374f9790aebSLuigi Rizzo 	int i = netmap_admode;	/* Take a snapshot. */
1375f9790aebSLuigi Rizzo 	struct netmap_adapter *prev_na;
1376847bf383SLuigi Rizzo 	int error = 0;
1377f9790aebSLuigi Rizzo 
1378f9790aebSLuigi Rizzo 	*na = NULL; /* default */
1379f9790aebSLuigi Rizzo 
1380f9790aebSLuigi Rizzo 	/* reset in case of invalid value */
1381f9790aebSLuigi Rizzo 	if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST)
1382f9790aebSLuigi Rizzo 		i = netmap_admode = NETMAP_ADMODE_BEST;
1383f9790aebSLuigi Rizzo 
138437e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
13854bf50f18SLuigi Rizzo 		prev_na = NA(ifp);
1386f9790aebSLuigi Rizzo 		/* If an adapter already exists, return it if
1387f9790aebSLuigi Rizzo 		 * there are active file descriptors or if
1388f9790aebSLuigi Rizzo 		 * netmap is not forced to use generic
1389f9790aebSLuigi Rizzo 		 * adapters.
1390f9790aebSLuigi Rizzo 		 */
13914bf50f18SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(prev_na)
13924bf50f18SLuigi Rizzo 			|| i != NETMAP_ADMODE_GENERIC
13934bf50f18SLuigi Rizzo 			|| prev_na->na_flags & NAF_FORCE_NATIVE
13944bf50f18SLuigi Rizzo #ifdef WITH_PIPES
13954bf50f18SLuigi Rizzo 			/* ugly, but we cannot allow an adapter switch
13964bf50f18SLuigi Rizzo 			 * if some pipe is referring to this one
13974bf50f18SLuigi Rizzo 			 */
13984bf50f18SLuigi Rizzo 			|| prev_na->na_next_pipe > 0
13994bf50f18SLuigi Rizzo #endif
14004bf50f18SLuigi Rizzo 		) {
14014bf50f18SLuigi Rizzo 			*na = prev_na;
1402c3e9b4dbSLuiz Otavio O Souza 			goto assign_mem;
1403f9790aebSLuigi Rizzo 		}
1404f9790aebSLuigi Rizzo 	}
1405f9790aebSLuigi Rizzo 
1406f9790aebSLuigi Rizzo 	/* If there isn't native support and netmap is not allowed
1407f9790aebSLuigi Rizzo 	 * to use generic adapters, we cannot satisfy the request.
1408f9790aebSLuigi Rizzo 	 */
140937e3a6d3SLuigi Rizzo 	if (!NM_IS_NATIVE(ifp) && i == NETMAP_ADMODE_NATIVE)
1410f2637526SLuigi Rizzo 		return EOPNOTSUPP;
1411f9790aebSLuigi Rizzo 
1412f9790aebSLuigi Rizzo 	/* Otherwise, create a generic adapter and return it,
1413f9790aebSLuigi Rizzo 	 * saving the previously used netmap adapter, if any.
1414f9790aebSLuigi Rizzo 	 *
1415f9790aebSLuigi Rizzo 	 * Note that here 'prev_na', if not NULL, MUST be a
1416f9790aebSLuigi Rizzo 	 * native adapter, and CANNOT be a generic one. This is
1417f9790aebSLuigi Rizzo 	 * true because generic adapters are created on demand, and
1418f9790aebSLuigi Rizzo 	 * destroyed when not used anymore. Therefore, if the adapter
1419f9790aebSLuigi Rizzo 	 * currently attached to an interface 'ifp' is generic, it
1420f9790aebSLuigi Rizzo 	 * must be that
1421f9790aebSLuigi Rizzo 	 * (NA(ifp)->active_fds > 0 || NETMAP_OWNED_BY_KERN(NA(ifp))).
1422f9790aebSLuigi Rizzo 	 * Consequently, if NA(ifp) is generic, we will enter one of
1423f9790aebSLuigi Rizzo 	 * the branches above. This ensures that we never override
1424f9790aebSLuigi Rizzo 	 * a generic adapter with another generic adapter.
1425f9790aebSLuigi Rizzo 	 */
1426f9790aebSLuigi Rizzo 	error = generic_netmap_attach(ifp);
1427f9790aebSLuigi Rizzo 	if (error)
1428f9790aebSLuigi Rizzo 		return error;
1429f9790aebSLuigi Rizzo 
1430f9790aebSLuigi Rizzo 	*na = NA(ifp);
1431c3e9b4dbSLuiz Otavio O Souza 
1432c3e9b4dbSLuiz Otavio O Souza assign_mem:
1433c3e9b4dbSLuiz Otavio O Souza 	if (nmd != NULL && !((*na)->na_flags & NAF_MEM_OWNER) &&
1434c3e9b4dbSLuiz Otavio O Souza 	    (*na)->active_fds == 0 && ((*na)->nm_mem != nmd)) {
1435*4f80b14cSVincenzo Maffione 		(*na)->nm_mem_prev = (*na)->nm_mem;
1436c3e9b4dbSLuiz Otavio O Souza 		(*na)->nm_mem = netmap_mem_get(nmd);
1437f9790aebSLuigi Rizzo 	}
1438f9790aebSLuigi Rizzo 
1439c3e9b4dbSLuiz Otavio O Souza 	return 0;
1440c3e9b4dbSLuiz Otavio O Souza }
1441f9790aebSLuigi Rizzo 
144268b8534bSLuigi Rizzo /*
1443ce3ee1e7SLuigi Rizzo  * MUST BE CALLED UNDER NMG_LOCK()
1444ce3ee1e7SLuigi Rizzo  *
1445f2637526SLuigi Rizzo  * Get a refcounted reference to a netmap adapter attached
1446f2637526SLuigi Rizzo  * to the interface specified by nmr.
1447ce3ee1e7SLuigi Rizzo  * This is always called in the execution of an ioctl().
1448ce3ee1e7SLuigi Rizzo  *
1449f2637526SLuigi Rizzo  * Return ENXIO if the interface specified by the request does
1450f2637526SLuigi Rizzo  * not exist, ENOTSUP if netmap is not supported by the interface,
1451f2637526SLuigi Rizzo  * EBUSY if the interface is already attached to a bridge,
1452f2637526SLuigi Rizzo  * EINVAL if parameters are invalid, ENOMEM if needed resources
1453f2637526SLuigi Rizzo  * could not be allocated.
1454f2637526SLuigi Rizzo  * If successful, hold a reference to the netmap adapter.
1455f18be576SLuigi Rizzo  *
145637e3a6d3SLuigi Rizzo  * If the interface specified by nmr is a system one, also keep
145737e3a6d3SLuigi Rizzo  * a reference to it and return a valid *ifp.
145868b8534bSLuigi Rizzo  */
1459f9790aebSLuigi Rizzo int
146037e3a6d3SLuigi Rizzo netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
1461c3e9b4dbSLuiz Otavio O Souza 	      struct ifnet **ifp, struct netmap_mem_d *nmd, int create)
146268b8534bSLuigi Rizzo {
1463f9790aebSLuigi Rizzo 	int error = 0;
1464f0ea3689SLuigi Rizzo 	struct netmap_adapter *ret = NULL;
1465c3e9b4dbSLuiz Otavio O Souza 	int nmd_ref = 0;
1466f9790aebSLuigi Rizzo 
1467f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
146837e3a6d3SLuigi Rizzo 	*ifp = NULL;
1469f196ce38SLuigi Rizzo 
1470ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1471ce3ee1e7SLuigi Rizzo 
1472c3e9b4dbSLuiz Otavio O Souza 	/* if the request contain a memid, try to find the
1473c3e9b4dbSLuiz Otavio O Souza 	 * corresponding memory region
1474c3e9b4dbSLuiz Otavio O Souza 	 */
1475c3e9b4dbSLuiz Otavio O Souza 	if (nmd == NULL && nmr->nr_arg2) {
1476c3e9b4dbSLuiz Otavio O Souza 		nmd = netmap_mem_find(nmr->nr_arg2);
1477c3e9b4dbSLuiz Otavio O Souza 		if (nmd == NULL)
1478c3e9b4dbSLuiz Otavio O Souza 			return EINVAL;
1479c3e9b4dbSLuiz Otavio O Souza 		/* keep the rereference */
1480c3e9b4dbSLuiz Otavio O Souza 		nmd_ref = 1;
1481c3e9b4dbSLuiz Otavio O Souza 	}
1482c3e9b4dbSLuiz Otavio O Souza 
148337e3a6d3SLuigi Rizzo 	/* We cascade through all possible types of netmap adapter.
14844bf50f18SLuigi Rizzo 	 * All netmap_get_*_na() functions return an error and an na,
14854bf50f18SLuigi Rizzo 	 * with the following combinations:
14864bf50f18SLuigi Rizzo 	 *
14874bf50f18SLuigi Rizzo 	 * error    na
14884bf50f18SLuigi Rizzo 	 *   0	   NULL		type doesn't match
14894bf50f18SLuigi Rizzo 	 *  !0	   NULL		type matches, but na creation/lookup failed
14904bf50f18SLuigi Rizzo 	 *   0	  !NULL		type matches and na created/found
14914bf50f18SLuigi Rizzo 	 *  !0    !NULL		impossible
14924bf50f18SLuigi Rizzo 	 */
14934bf50f18SLuigi Rizzo 
149437e3a6d3SLuigi Rizzo 	/* try to see if this is a ptnetmap port */
1495c3e9b4dbSLuiz Otavio O Souza 	error = netmap_get_pt_host_na(nmr, na, nmd, create);
149637e3a6d3SLuigi Rizzo 	if (error || *na != NULL)
1497c3e9b4dbSLuiz Otavio O Souza 		goto out;
149837e3a6d3SLuigi Rizzo 
14994bf50f18SLuigi Rizzo 	/* try to see if this is a monitor port */
1500c3e9b4dbSLuiz Otavio O Souza 	error = netmap_get_monitor_na(nmr, na, nmd, create);
15014bf50f18SLuigi Rizzo 	if (error || *na != NULL)
1502c3e9b4dbSLuiz Otavio O Souza 		goto out;
15034bf50f18SLuigi Rizzo 
15044bf50f18SLuigi Rizzo 	/* try to see if this is a pipe port */
1505c3e9b4dbSLuiz Otavio O Souza 	error = netmap_get_pipe_na(nmr, na, nmd, create);
1506f0ea3689SLuigi Rizzo 	if (error || *na != NULL)
1507c3e9b4dbSLuiz Otavio O Souza 		goto out;
1508ce3ee1e7SLuigi Rizzo 
15094bf50f18SLuigi Rizzo 	/* try to see if this is a bridge port */
1510c3e9b4dbSLuiz Otavio O Souza 	error = netmap_get_bdg_na(nmr, na, nmd, create);
1511f0ea3689SLuigi Rizzo 	if (error)
1512c3e9b4dbSLuiz Otavio O Souza 		goto out;
1513f0ea3689SLuigi Rizzo 
1514f0ea3689SLuigi Rizzo 	if (*na != NULL) /* valid match in netmap_get_bdg_na() */
1515847bf383SLuigi Rizzo 		goto out;
1516f0ea3689SLuigi Rizzo 
151789cc2556SLuigi Rizzo 	/*
151889cc2556SLuigi Rizzo 	 * This must be a hardware na, lookup the name in the system.
151989cc2556SLuigi Rizzo 	 * Note that by hardware we actually mean "it shows up in ifconfig".
152089cc2556SLuigi Rizzo 	 * This may still be a tap, a veth/epair, or even a
152189cc2556SLuigi Rizzo 	 * persistent VALE port.
152289cc2556SLuigi Rizzo 	 */
152337e3a6d3SLuigi Rizzo 	*ifp = ifunit_ref(nmr->nr_name);
152437e3a6d3SLuigi Rizzo 	if (*ifp == NULL) {
1525c3e9b4dbSLuiz Otavio O Souza 		error = ENXIO;
1526c3e9b4dbSLuiz Otavio O Souza 		goto out;
1527f196ce38SLuigi Rizzo 	}
1528ce3ee1e7SLuigi Rizzo 
1529c3e9b4dbSLuiz Otavio O Souza 	error = netmap_get_hw_na(*ifp, nmd, &ret);
1530f9790aebSLuigi Rizzo 	if (error)
1531f9790aebSLuigi Rizzo 		goto out;
1532f18be576SLuigi Rizzo 
1533f9790aebSLuigi Rizzo 	*na = ret;
1534f9790aebSLuigi Rizzo 	netmap_adapter_get(ret);
1535f0ea3689SLuigi Rizzo 
1536f9790aebSLuigi Rizzo out:
153737e3a6d3SLuigi Rizzo 	if (error) {
153837e3a6d3SLuigi Rizzo 		if (ret)
1539f0ea3689SLuigi Rizzo 			netmap_adapter_put(ret);
154037e3a6d3SLuigi Rizzo 		if (*ifp) {
154137e3a6d3SLuigi Rizzo 			if_rele(*ifp);
154237e3a6d3SLuigi Rizzo 			*ifp = NULL;
154337e3a6d3SLuigi Rizzo 		}
154437e3a6d3SLuigi Rizzo 	}
1545c3e9b4dbSLuiz Otavio O Souza 	if (nmd_ref)
1546c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_put(nmd);
1547f18be576SLuigi Rizzo 
15485ab0d24dSLuigi Rizzo 	return error;
15495ab0d24dSLuigi Rizzo }
1550ce3ee1e7SLuigi Rizzo 
155137e3a6d3SLuigi Rizzo /* undo netmap_get_na() */
155237e3a6d3SLuigi Rizzo void
155337e3a6d3SLuigi Rizzo netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp)
155437e3a6d3SLuigi Rizzo {
155537e3a6d3SLuigi Rizzo 	if (ifp)
155637e3a6d3SLuigi Rizzo 		if_rele(ifp);
155737e3a6d3SLuigi Rizzo 	if (na)
155837e3a6d3SLuigi Rizzo 		netmap_adapter_put(na);
155937e3a6d3SLuigi Rizzo }
156037e3a6d3SLuigi Rizzo 
156137e3a6d3SLuigi Rizzo 
156237e3a6d3SLuigi Rizzo #define NM_FAIL_ON(t) do {						\
156337e3a6d3SLuigi Rizzo 	if (unlikely(t)) {						\
156437e3a6d3SLuigi Rizzo 		RD(5, "%s: fail '" #t "' "				\
156537e3a6d3SLuigi Rizzo 			"h %d c %d t %d "				\
156637e3a6d3SLuigi Rizzo 			"rh %d rc %d rt %d "				\
156737e3a6d3SLuigi Rizzo 			"hc %d ht %d",					\
156837e3a6d3SLuigi Rizzo 			kring->name,					\
156937e3a6d3SLuigi Rizzo 			head, cur, ring->tail,				\
157037e3a6d3SLuigi Rizzo 			kring->rhead, kring->rcur, kring->rtail,	\
157137e3a6d3SLuigi Rizzo 			kring->nr_hwcur, kring->nr_hwtail);		\
157237e3a6d3SLuigi Rizzo 		return kring->nkr_num_slots;				\
157337e3a6d3SLuigi Rizzo 	}								\
157437e3a6d3SLuigi Rizzo } while (0)
1575ce3ee1e7SLuigi Rizzo 
1576f9790aebSLuigi Rizzo /*
1577f9790aebSLuigi Rizzo  * validate parameters on entry for *_txsync()
1578f9790aebSLuigi Rizzo  * Returns ring->cur if ok, or something >= kring->nkr_num_slots
157917885a7bSLuigi Rizzo  * in case of error.
1580f9790aebSLuigi Rizzo  *
158117885a7bSLuigi Rizzo  * rhead, rcur and rtail=hwtail are stored from previous round.
158217885a7bSLuigi Rizzo  * hwcur is the next packet to send to the ring.
1583f9790aebSLuigi Rizzo  *
158417885a7bSLuigi Rizzo  * We want
158517885a7bSLuigi Rizzo  *    hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail
1586f9790aebSLuigi Rizzo  *
158717885a7bSLuigi Rizzo  * hwcur, rhead, rtail and hwtail are reliable
1588f9790aebSLuigi Rizzo  */
158937e3a6d3SLuigi Rizzo u_int
159037e3a6d3SLuigi Rizzo nm_txsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
1591f9790aebSLuigi Rizzo {
159217885a7bSLuigi Rizzo 	u_int head = ring->head; /* read only once */
1593f9790aebSLuigi Rizzo 	u_int cur = ring->cur; /* read only once */
1594f9790aebSLuigi Rizzo 	u_int n = kring->nkr_num_slots;
1595ce3ee1e7SLuigi Rizzo 
159617885a7bSLuigi Rizzo 	ND(5, "%s kcur %d ktail %d head %d cur %d tail %d",
159717885a7bSLuigi Rizzo 		kring->name,
159817885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail,
159917885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail);
160017885a7bSLuigi Rizzo #if 1 /* kernel sanity checks; but we can trust the kring. */
160137e3a6d3SLuigi Rizzo 	NM_FAIL_ON(kring->nr_hwcur >= n || kring->rhead >= n ||
160237e3a6d3SLuigi Rizzo 	    kring->rtail >= n ||  kring->nr_hwtail >= n);
1603f9790aebSLuigi Rizzo #endif /* kernel sanity checks */
160417885a7bSLuigi Rizzo 	/*
160537e3a6d3SLuigi Rizzo 	 * user sanity checks. We only use head,
160637e3a6d3SLuigi Rizzo 	 * A, B, ... are possible positions for head:
160717885a7bSLuigi Rizzo 	 *
160837e3a6d3SLuigi Rizzo 	 *  0    A  rhead   B  rtail   C  n-1
160937e3a6d3SLuigi Rizzo 	 *  0    D  rtail   E  rhead   F  n-1
161017885a7bSLuigi Rizzo 	 *
161117885a7bSLuigi Rizzo 	 * B, F, D are valid. A, C, E are wrong
161217885a7bSLuigi Rizzo 	 */
161317885a7bSLuigi Rizzo 	if (kring->rtail >= kring->rhead) {
161417885a7bSLuigi Rizzo 		/* want rhead <= head <= rtail */
161537e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->rhead || head > kring->rtail);
161617885a7bSLuigi Rizzo 		/* and also head <= cur <= rtail */
161737e3a6d3SLuigi Rizzo 		NM_FAIL_ON(cur < head || cur > kring->rtail);
161817885a7bSLuigi Rizzo 	} else { /* here rtail < rhead */
161917885a7bSLuigi Rizzo 		/* we need head outside rtail .. rhead */
162037e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head > kring->rtail && head < kring->rhead);
162117885a7bSLuigi Rizzo 
162217885a7bSLuigi Rizzo 		/* two cases now: head <= rtail or head >= rhead  */
162317885a7bSLuigi Rizzo 		if (head <= kring->rtail) {
162417885a7bSLuigi Rizzo 			/* want head <= cur <= rtail */
162537e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head || cur > kring->rtail);
162617885a7bSLuigi Rizzo 		} else { /* head >= rhead */
162717885a7bSLuigi Rizzo 			/* cur must be outside rtail..head */
162837e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur > kring->rtail && cur < head);
1629f18be576SLuigi Rizzo 		}
1630f9790aebSLuigi Rizzo 	}
163117885a7bSLuigi Rizzo 	if (ring->tail != kring->rtail) {
1632a2a74091SLuigi Rizzo 		RD(5, "%s tail overwritten was %d need %d", kring->name,
163317885a7bSLuigi Rizzo 			ring->tail, kring->rtail);
163417885a7bSLuigi Rizzo 		ring->tail = kring->rtail;
163517885a7bSLuigi Rizzo 	}
163617885a7bSLuigi Rizzo 	kring->rhead = head;
163717885a7bSLuigi Rizzo 	kring->rcur = cur;
163817885a7bSLuigi Rizzo 	return head;
163968b8534bSLuigi Rizzo }
164068b8534bSLuigi Rizzo 
164168b8534bSLuigi Rizzo 
164268b8534bSLuigi Rizzo /*
1643f9790aebSLuigi Rizzo  * validate parameters on entry for *_rxsync()
164417885a7bSLuigi Rizzo  * Returns ring->head if ok, kring->nkr_num_slots on error.
1645f9790aebSLuigi Rizzo  *
164617885a7bSLuigi Rizzo  * For a valid configuration,
164717885a7bSLuigi Rizzo  * hwcur <= head <= cur <= tail <= hwtail
1648f9790aebSLuigi Rizzo  *
164917885a7bSLuigi Rizzo  * We only consider head and cur.
165017885a7bSLuigi Rizzo  * hwcur and hwtail are reliable.
1651f9790aebSLuigi Rizzo  *
1652f9790aebSLuigi Rizzo  */
165337e3a6d3SLuigi Rizzo u_int
165437e3a6d3SLuigi Rizzo nm_rxsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
1655f9790aebSLuigi Rizzo {
165617885a7bSLuigi Rizzo 	uint32_t const n = kring->nkr_num_slots;
165717885a7bSLuigi Rizzo 	uint32_t head, cur;
1658f9790aebSLuigi Rizzo 
1659847bf383SLuigi Rizzo 	ND(5,"%s kc %d kt %d h %d c %d t %d",
166017885a7bSLuigi Rizzo 		kring->name,
166117885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail,
166217885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail);
166317885a7bSLuigi Rizzo 	/*
166417885a7bSLuigi Rizzo 	 * Before storing the new values, we should check they do not
166517885a7bSLuigi Rizzo 	 * move backwards. However:
166617885a7bSLuigi Rizzo 	 * - head is not an issue because the previous value is hwcur;
166717885a7bSLuigi Rizzo 	 * - cur could in principle go back, however it does not matter
166817885a7bSLuigi Rizzo 	 *   because we are processing a brand new rxsync()
166917885a7bSLuigi Rizzo 	 */
167017885a7bSLuigi Rizzo 	cur = kring->rcur = ring->cur;	/* read only once */
167117885a7bSLuigi Rizzo 	head = kring->rhead = ring->head;	/* read only once */
1672f9790aebSLuigi Rizzo #if 1 /* kernel sanity checks */
167337e3a6d3SLuigi Rizzo 	NM_FAIL_ON(kring->nr_hwcur >= n || kring->nr_hwtail >= n);
1674f9790aebSLuigi Rizzo #endif /* kernel sanity checks */
1675f9790aebSLuigi Rizzo 	/* user sanity checks */
167617885a7bSLuigi Rizzo 	if (kring->nr_hwtail >= kring->nr_hwcur) {
167717885a7bSLuigi Rizzo 		/* want hwcur <= rhead <= hwtail */
167837e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->nr_hwcur || head > kring->nr_hwtail);
167917885a7bSLuigi Rizzo 		/* and also rhead <= rcur <= hwtail */
168037e3a6d3SLuigi Rizzo 		NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
1681f9790aebSLuigi Rizzo 	} else {
168217885a7bSLuigi Rizzo 		/* we need rhead outside hwtail..hwcur */
168337e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->nr_hwcur && head > kring->nr_hwtail);
168417885a7bSLuigi Rizzo 		/* two cases now: head <= hwtail or head >= hwcur  */
168517885a7bSLuigi Rizzo 		if (head <= kring->nr_hwtail) {
168617885a7bSLuigi Rizzo 			/* want head <= cur <= hwtail */
168737e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
168817885a7bSLuigi Rizzo 		} else {
168917885a7bSLuigi Rizzo 			/* cur must be outside hwtail..head */
169037e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head && cur > kring->nr_hwtail);
1691f9790aebSLuigi Rizzo 		}
1692f9790aebSLuigi Rizzo 	}
169317885a7bSLuigi Rizzo 	if (ring->tail != kring->rtail) {
169417885a7bSLuigi Rizzo 		RD(5, "%s tail overwritten was %d need %d",
169517885a7bSLuigi Rizzo 			kring->name,
169617885a7bSLuigi Rizzo 			ring->tail, kring->rtail);
169717885a7bSLuigi Rizzo 		ring->tail = kring->rtail;
169817885a7bSLuigi Rizzo 	}
169917885a7bSLuigi Rizzo 	return head;
1700f9790aebSLuigi Rizzo }
1701f9790aebSLuigi Rizzo 
170217885a7bSLuigi Rizzo 
1703f9790aebSLuigi Rizzo /*
170468b8534bSLuigi Rizzo  * Error routine called when txsync/rxsync detects an error.
170517885a7bSLuigi Rizzo  * Can't do much more than resetting head =cur = hwcur, tail = hwtail
170668b8534bSLuigi Rizzo  * Return 1 on reinit.
1707506cc70cSLuigi Rizzo  *
1708506cc70cSLuigi Rizzo  * This routine is only called by the upper half of the kernel.
1709506cc70cSLuigi Rizzo  * It only reads hwcur (which is changed only by the upper half, too)
171017885a7bSLuigi Rizzo  * and hwtail (which may be changed by the lower half, but only on
1711506cc70cSLuigi Rizzo  * a tx ring and only to increase it, so any error will be recovered
1712506cc70cSLuigi Rizzo  * on the next call). For the above, we don't strictly need to call
1713506cc70cSLuigi Rizzo  * it under lock.
171468b8534bSLuigi Rizzo  */
171568b8534bSLuigi Rizzo int
171668b8534bSLuigi Rizzo netmap_ring_reinit(struct netmap_kring *kring)
171768b8534bSLuigi Rizzo {
171868b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
171968b8534bSLuigi Rizzo 	u_int i, lim = kring->nkr_num_slots - 1;
172068b8534bSLuigi Rizzo 	int errors = 0;
172168b8534bSLuigi Rizzo 
1722ce3ee1e7SLuigi Rizzo 	// XXX KASSERT nm_kr_tryget
17234bf50f18SLuigi Rizzo 	RD(10, "called for %s", kring->name);
172417885a7bSLuigi Rizzo 	// XXX probably wrong to trust userspace
172517885a7bSLuigi Rizzo 	kring->rhead = ring->head;
172617885a7bSLuigi Rizzo 	kring->rcur  = ring->cur;
172717885a7bSLuigi Rizzo 	kring->rtail = ring->tail;
172817885a7bSLuigi Rizzo 
172968b8534bSLuigi Rizzo 	if (ring->cur > lim)
173068b8534bSLuigi Rizzo 		errors++;
173117885a7bSLuigi Rizzo 	if (ring->head > lim)
173217885a7bSLuigi Rizzo 		errors++;
173317885a7bSLuigi Rizzo 	if (ring->tail > lim)
173417885a7bSLuigi Rizzo 		errors++;
173568b8534bSLuigi Rizzo 	for (i = 0; i <= lim; i++) {
173668b8534bSLuigi Rizzo 		u_int idx = ring->slot[i].buf_idx;
173768b8534bSLuigi Rizzo 		u_int len = ring->slot[i].len;
1738847bf383SLuigi Rizzo 		if (idx < 2 || idx >= kring->na->na_lut.objtotal) {
173917885a7bSLuigi Rizzo 			RD(5, "bad index at slot %d idx %d len %d ", i, idx, len);
174068b8534bSLuigi Rizzo 			ring->slot[i].buf_idx = 0;
174168b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
17424bf50f18SLuigi Rizzo 		} else if (len > NETMAP_BUF_SIZE(kring->na)) {
174368b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
174417885a7bSLuigi Rizzo 			RD(5, "bad len at slot %d idx %d len %d", i, idx, len);
174568b8534bSLuigi Rizzo 		}
174668b8534bSLuigi Rizzo 	}
174768b8534bSLuigi Rizzo 	if (errors) {
17488241616dSLuigi Rizzo 		RD(10, "total %d errors", errors);
174917885a7bSLuigi Rizzo 		RD(10, "%s reinit, cur %d -> %d tail %d -> %d",
175017885a7bSLuigi Rizzo 			kring->name,
175168b8534bSLuigi Rizzo 			ring->cur, kring->nr_hwcur,
175217885a7bSLuigi Rizzo 			ring->tail, kring->nr_hwtail);
175317885a7bSLuigi Rizzo 		ring->head = kring->rhead = kring->nr_hwcur;
175417885a7bSLuigi Rizzo 		ring->cur  = kring->rcur  = kring->nr_hwcur;
175517885a7bSLuigi Rizzo 		ring->tail = kring->rtail = kring->nr_hwtail;
175668b8534bSLuigi Rizzo 	}
175768b8534bSLuigi Rizzo 	return (errors ? 1 : 0);
175868b8534bSLuigi Rizzo }
175968b8534bSLuigi Rizzo 
17604bf50f18SLuigi Rizzo /* interpret the ringid and flags fields of an nmreq, by translating them
17614bf50f18SLuigi Rizzo  * into a pair of intervals of ring indices:
17624bf50f18SLuigi Rizzo  *
17634bf50f18SLuigi Rizzo  * [priv->np_txqfirst, priv->np_txqlast) and
17644bf50f18SLuigi Rizzo  * [priv->np_rxqfirst, priv->np_rxqlast)
17654bf50f18SLuigi Rizzo  *
176668b8534bSLuigi Rizzo  */
17674bf50f18SLuigi Rizzo int
17684bf50f18SLuigi Rizzo netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
176968b8534bSLuigi Rizzo {
1770f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1771f0ea3689SLuigi Rizzo 	u_int j, i = ringid & NETMAP_RING_MASK;
1772f0ea3689SLuigi Rizzo 	u_int reg = flags & NR_REG_MASK;
177337e3a6d3SLuigi Rizzo 	int excluded_direction[] = { NR_TX_RINGS_ONLY, NR_RX_RINGS_ONLY };
1774847bf383SLuigi Rizzo 	enum txrx t;
177568b8534bSLuigi Rizzo 
1776f0ea3689SLuigi Rizzo 	if (reg == NR_REG_DEFAULT) {
1777f0ea3689SLuigi Rizzo 		/* convert from old ringid to flags */
177868b8534bSLuigi Rizzo 		if (ringid & NETMAP_SW_RING) {
1779f0ea3689SLuigi Rizzo 			reg = NR_REG_SW;
178068b8534bSLuigi Rizzo 		} else if (ringid & NETMAP_HW_RING) {
1781f0ea3689SLuigi Rizzo 			reg = NR_REG_ONE_NIC;
178268b8534bSLuigi Rizzo 		} else {
1783f0ea3689SLuigi Rizzo 			reg = NR_REG_ALL_NIC;
1784f0ea3689SLuigi Rizzo 		}
1785f0ea3689SLuigi Rizzo 		D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
1786f0ea3689SLuigi Rizzo 	}
178737e3a6d3SLuigi Rizzo 
1788c3e9b4dbSLuiz Otavio O Souza 	if ((flags & NR_PTNETMAP_HOST) && ((reg != NR_REG_ALL_NIC &&
1789c3e9b4dbSLuiz Otavio O Souza                     reg != NR_REG_PIPE_MASTER && reg != NR_REG_PIPE_SLAVE) ||
179037e3a6d3SLuigi Rizzo 			flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) {
179137e3a6d3SLuigi Rizzo 		D("Error: only NR_REG_ALL_NIC supported with netmap passthrough");
179237e3a6d3SLuigi Rizzo 		return EINVAL;
179337e3a6d3SLuigi Rizzo 	}
179437e3a6d3SLuigi Rizzo 
179537e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
179637e3a6d3SLuigi Rizzo 		if (flags & excluded_direction[t]) {
179737e3a6d3SLuigi Rizzo 			priv->np_qfirst[t] = priv->np_qlast[t] = 0;
179837e3a6d3SLuigi Rizzo 			continue;
179937e3a6d3SLuigi Rizzo 		}
1800f0ea3689SLuigi Rizzo 		switch (reg) {
1801f0ea3689SLuigi Rizzo 		case NR_REG_ALL_NIC:
1802f0ea3689SLuigi Rizzo 		case NR_REG_PIPE_MASTER:
1803f0ea3689SLuigi Rizzo 		case NR_REG_PIPE_SLAVE:
1804847bf383SLuigi Rizzo 			priv->np_qfirst[t] = 0;
1805847bf383SLuigi Rizzo 			priv->np_qlast[t] = nma_get_nrings(na, t);
180637e3a6d3SLuigi Rizzo 			ND("ALL/PIPE: %s %d %d", nm_txrx2str(t),
180737e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1808f0ea3689SLuigi Rizzo 			break;
1809f0ea3689SLuigi Rizzo 		case NR_REG_SW:
1810f0ea3689SLuigi Rizzo 		case NR_REG_NIC_SW:
1811f0ea3689SLuigi Rizzo 			if (!(na->na_flags & NAF_HOST_RINGS)) {
1812f0ea3689SLuigi Rizzo 				D("host rings not supported");
1813f0ea3689SLuigi Rizzo 				return EINVAL;
1814f0ea3689SLuigi Rizzo 			}
1815847bf383SLuigi Rizzo 			priv->np_qfirst[t] = (reg == NR_REG_SW ?
1816847bf383SLuigi Rizzo 				nma_get_nrings(na, t) : 0);
1817847bf383SLuigi Rizzo 			priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
181837e3a6d3SLuigi Rizzo 			ND("%s: %s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
181937e3a6d3SLuigi Rizzo 				nm_txrx2str(t),
182037e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1821f0ea3689SLuigi Rizzo 			break;
1822f0ea3689SLuigi Rizzo 		case NR_REG_ONE_NIC:
1823f0ea3689SLuigi Rizzo 			if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
1824f0ea3689SLuigi Rizzo 				D("invalid ring id %d", i);
1825f0ea3689SLuigi Rizzo 				return EINVAL;
1826f0ea3689SLuigi Rizzo 			}
1827f0ea3689SLuigi Rizzo 			/* if not enough rings, use the first one */
1828f0ea3689SLuigi Rizzo 			j = i;
1829847bf383SLuigi Rizzo 			if (j >= nma_get_nrings(na, t))
1830f0ea3689SLuigi Rizzo 				j = 0;
1831847bf383SLuigi Rizzo 			priv->np_qfirst[t] = j;
1832847bf383SLuigi Rizzo 			priv->np_qlast[t] = j + 1;
183337e3a6d3SLuigi Rizzo 			ND("ONE_NIC: %s %d %d", nm_txrx2str(t),
183437e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1835f0ea3689SLuigi Rizzo 			break;
1836f0ea3689SLuigi Rizzo 		default:
1837f0ea3689SLuigi Rizzo 			D("invalid regif type %d", reg);
1838f0ea3689SLuigi Rizzo 			return EINVAL;
183968b8534bSLuigi Rizzo 		}
184037e3a6d3SLuigi Rizzo 	}
1841f0ea3689SLuigi Rizzo 	priv->np_flags = (flags & ~NR_REG_MASK) | reg;
18424bf50f18SLuigi Rizzo 
1843c3e9b4dbSLuiz Otavio O Souza 	/* Allow transparent forwarding mode in the host --> nic
1844c3e9b4dbSLuiz Otavio O Souza 	 * direction only if all the TX hw rings have been opened. */
1845c3e9b4dbSLuiz Otavio O Souza 	if (priv->np_qfirst[NR_TX] == 0 &&
1846c3e9b4dbSLuiz Otavio O Souza 			priv->np_qlast[NR_TX] >= na->num_tx_rings) {
1847c3e9b4dbSLuiz Otavio O Souza 		priv->np_sync_flags |= NAF_CAN_FORWARD_DOWN;
1848c3e9b4dbSLuiz Otavio O Souza 	}
1849c3e9b4dbSLuiz Otavio O Souza 
1850ae10d1afSLuigi Rizzo 	if (netmap_verbose) {
1851f0ea3689SLuigi Rizzo 		D("%s: tx [%d,%d) rx [%d,%d) id %d",
18524bf50f18SLuigi Rizzo 			na->name,
1853847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1854847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1855847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1856847bf383SLuigi Rizzo 			priv->np_qlast[NR_RX],
1857f0ea3689SLuigi Rizzo 			i);
1858ae10d1afSLuigi Rizzo 	}
185968b8534bSLuigi Rizzo 	return 0;
186068b8534bSLuigi Rizzo }
186168b8534bSLuigi Rizzo 
18624bf50f18SLuigi Rizzo 
18634bf50f18SLuigi Rizzo /*
18644bf50f18SLuigi Rizzo  * Set the ring ID. For devices with a single queue, a request
18654bf50f18SLuigi Rizzo  * for all rings is the same as a single ring.
18664bf50f18SLuigi Rizzo  */
18674bf50f18SLuigi Rizzo static int
18684bf50f18SLuigi Rizzo netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
18694bf50f18SLuigi Rizzo {
18704bf50f18SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
18714bf50f18SLuigi Rizzo 	int error;
1872847bf383SLuigi Rizzo 	enum txrx t;
18734bf50f18SLuigi Rizzo 
18744bf50f18SLuigi Rizzo 	error = netmap_interp_ringid(priv, ringid, flags);
18754bf50f18SLuigi Rizzo 	if (error) {
18764bf50f18SLuigi Rizzo 		return error;
18774bf50f18SLuigi Rizzo 	}
18784bf50f18SLuigi Rizzo 
18794bf50f18SLuigi Rizzo 	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
18804bf50f18SLuigi Rizzo 
18814bf50f18SLuigi Rizzo 	/* optimization: count the users registered for more than
18824bf50f18SLuigi Rizzo 	 * one ring, which are the ones sleeping on the global queue.
18834bf50f18SLuigi Rizzo 	 * The default netmap_notify() callback will then
18844bf50f18SLuigi Rizzo 	 * avoid signaling the global queue if nobody is using it
18854bf50f18SLuigi Rizzo 	 */
1886847bf383SLuigi Rizzo 	for_rx_tx(t) {
1887847bf383SLuigi Rizzo 		if (nm_si_user(priv, t))
1888847bf383SLuigi Rizzo 			na->si_users[t]++;
1889847bf383SLuigi Rizzo 	}
18904bf50f18SLuigi Rizzo 	return 0;
18914bf50f18SLuigi Rizzo }
18924bf50f18SLuigi Rizzo 
1893847bf383SLuigi Rizzo static void
1894847bf383SLuigi Rizzo netmap_unset_ringid(struct netmap_priv_d *priv)
1895847bf383SLuigi Rizzo {
1896847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1897847bf383SLuigi Rizzo 	enum txrx t;
1898847bf383SLuigi Rizzo 
1899847bf383SLuigi Rizzo 	for_rx_tx(t) {
1900847bf383SLuigi Rizzo 		if (nm_si_user(priv, t))
1901847bf383SLuigi Rizzo 			na->si_users[t]--;
1902847bf383SLuigi Rizzo 		priv->np_qfirst[t] = priv->np_qlast[t] = 0;
1903847bf383SLuigi Rizzo 	}
1904847bf383SLuigi Rizzo 	priv->np_flags = 0;
1905847bf383SLuigi Rizzo 	priv->np_txpoll = 0;
1906847bf383SLuigi Rizzo }
1907847bf383SLuigi Rizzo 
1908847bf383SLuigi Rizzo 
190937e3a6d3SLuigi Rizzo /* Set the nr_pending_mode for the requested rings.
191037e3a6d3SLuigi Rizzo  * If requested, also try to get exclusive access to the rings, provided
191137e3a6d3SLuigi Rizzo  * the rings we want to bind are not exclusively owned by a previous bind.
1912847bf383SLuigi Rizzo  */
1913847bf383SLuigi Rizzo static int
191437e3a6d3SLuigi Rizzo netmap_krings_get(struct netmap_priv_d *priv)
1915847bf383SLuigi Rizzo {
1916847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1917847bf383SLuigi Rizzo 	u_int i;
1918847bf383SLuigi Rizzo 	struct netmap_kring *kring;
1919847bf383SLuigi Rizzo 	int excl = (priv->np_flags & NR_EXCLUSIVE);
1920847bf383SLuigi Rizzo 	enum txrx t;
1921847bf383SLuigi Rizzo 
1922*4f80b14cSVincenzo Maffione 	if (netmap_verbose)
1923*4f80b14cSVincenzo Maffione 		D("%s: grabbing tx [%d, %d) rx [%d, %d)",
1924847bf383SLuigi Rizzo 			na->name,
1925847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1926847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1927847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1928847bf383SLuigi Rizzo 			priv->np_qlast[NR_RX]);
1929847bf383SLuigi Rizzo 
1930847bf383SLuigi Rizzo 	/* first round: check that all the requested rings
1931847bf383SLuigi Rizzo 	 * are neither alread exclusively owned, nor we
1932847bf383SLuigi Rizzo 	 * want exclusive ownership when they are already in use
1933847bf383SLuigi Rizzo 	 */
1934847bf383SLuigi Rizzo 	for_rx_tx(t) {
1935847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1936847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
1937847bf383SLuigi Rizzo 			if ((kring->nr_kflags & NKR_EXCLUSIVE) ||
1938847bf383SLuigi Rizzo 			    (kring->users && excl))
1939847bf383SLuigi Rizzo 			{
1940847bf383SLuigi Rizzo 				ND("ring %s busy", kring->name);
1941847bf383SLuigi Rizzo 				return EBUSY;
1942847bf383SLuigi Rizzo 			}
1943847bf383SLuigi Rizzo 		}
1944847bf383SLuigi Rizzo 	}
1945847bf383SLuigi Rizzo 
194637e3a6d3SLuigi Rizzo 	/* second round: increment usage count (possibly marking them
194737e3a6d3SLuigi Rizzo 	 * as exclusive) and set the nr_pending_mode
1948847bf383SLuigi Rizzo 	 */
1949847bf383SLuigi Rizzo 	for_rx_tx(t) {
1950847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1951847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
1952847bf383SLuigi Rizzo 			kring->users++;
1953847bf383SLuigi Rizzo 			if (excl)
1954847bf383SLuigi Rizzo 				kring->nr_kflags |= NKR_EXCLUSIVE;
195537e3a6d3SLuigi Rizzo 	                kring->nr_pending_mode = NKR_NETMAP_ON;
1956847bf383SLuigi Rizzo 		}
1957847bf383SLuigi Rizzo 	}
1958847bf383SLuigi Rizzo 
1959847bf383SLuigi Rizzo 	return 0;
1960847bf383SLuigi Rizzo 
1961847bf383SLuigi Rizzo }
1962847bf383SLuigi Rizzo 
196337e3a6d3SLuigi Rizzo /* Undo netmap_krings_get(). This is done by clearing the exclusive mode
196437e3a6d3SLuigi Rizzo  * if was asked on regif, and unset the nr_pending_mode if we are the
196537e3a6d3SLuigi Rizzo  * last users of the involved rings. */
1966847bf383SLuigi Rizzo static void
196737e3a6d3SLuigi Rizzo netmap_krings_put(struct netmap_priv_d *priv)
1968847bf383SLuigi Rizzo {
1969847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1970847bf383SLuigi Rizzo 	u_int i;
1971847bf383SLuigi Rizzo 	struct netmap_kring *kring;
1972847bf383SLuigi Rizzo 	int excl = (priv->np_flags & NR_EXCLUSIVE);
1973847bf383SLuigi Rizzo 	enum txrx t;
1974847bf383SLuigi Rizzo 
1975847bf383SLuigi Rizzo 	ND("%s: releasing tx [%d, %d) rx [%d, %d)",
1976847bf383SLuigi Rizzo 			na->name,
1977847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1978847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1979847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1980847bf383SLuigi Rizzo 			priv->np_qlast[MR_RX]);
1981847bf383SLuigi Rizzo 
1982847bf383SLuigi Rizzo 
1983847bf383SLuigi Rizzo 	for_rx_tx(t) {
1984847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1985847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
1986847bf383SLuigi Rizzo 			if (excl)
1987847bf383SLuigi Rizzo 				kring->nr_kflags &= ~NKR_EXCLUSIVE;
1988847bf383SLuigi Rizzo 			kring->users--;
198937e3a6d3SLuigi Rizzo 			if (kring->users == 0)
199037e3a6d3SLuigi Rizzo 				kring->nr_pending_mode = NKR_NETMAP_OFF;
1991847bf383SLuigi Rizzo 		}
1992847bf383SLuigi Rizzo 	}
1993847bf383SLuigi Rizzo }
1994847bf383SLuigi Rizzo 
1995f18be576SLuigi Rizzo /*
1996f18be576SLuigi Rizzo  * possibly move the interface to netmap-mode.
1997f18be576SLuigi Rizzo  * If success it returns a pointer to netmap_if, otherwise NULL.
1998ce3ee1e7SLuigi Rizzo  * This must be called with NMG_LOCK held.
19994bf50f18SLuigi Rizzo  *
20004bf50f18SLuigi Rizzo  * The following na callbacks are called in the process:
20014bf50f18SLuigi Rizzo  *
20024bf50f18SLuigi Rizzo  * na->nm_config()			[by netmap_update_config]
20034bf50f18SLuigi Rizzo  * (get current number and size of rings)
20044bf50f18SLuigi Rizzo  *
20054bf50f18SLuigi Rizzo  *  	We have a generic one for linux (netmap_linux_config).
20064bf50f18SLuigi Rizzo  *  	The bwrap has to override this, since it has to forward
20074bf50f18SLuigi Rizzo  *  	the request to the wrapped adapter (netmap_bwrap_config).
20084bf50f18SLuigi Rizzo  *
20094bf50f18SLuigi Rizzo  *
2010847bf383SLuigi Rizzo  * na->nm_krings_create()
20114bf50f18SLuigi Rizzo  * (create and init the krings array)
20124bf50f18SLuigi Rizzo  *
20134bf50f18SLuigi Rizzo  * 	One of the following:
20144bf50f18SLuigi Rizzo  *
20154bf50f18SLuigi Rizzo  *	* netmap_hw_krings_create, 			(hw ports)
20164bf50f18SLuigi Rizzo  *		creates the standard layout for the krings
20174bf50f18SLuigi Rizzo  * 		and adds the mbq (used for the host rings).
20184bf50f18SLuigi Rizzo  *
20194bf50f18SLuigi Rizzo  * 	* netmap_vp_krings_create			(VALE ports)
20204bf50f18SLuigi Rizzo  * 		add leases and scratchpads
20214bf50f18SLuigi Rizzo  *
20224bf50f18SLuigi Rizzo  * 	* netmap_pipe_krings_create			(pipes)
20234bf50f18SLuigi Rizzo  * 		create the krings and rings of both ends and
20244bf50f18SLuigi Rizzo  * 		cross-link them
20254bf50f18SLuigi Rizzo  *
20264bf50f18SLuigi Rizzo  *      * netmap_monitor_krings_create 			(monitors)
20274bf50f18SLuigi Rizzo  *      	avoid allocating the mbq
20284bf50f18SLuigi Rizzo  *
20294bf50f18SLuigi Rizzo  *      * netmap_bwrap_krings_create			(bwraps)
20304bf50f18SLuigi Rizzo  *      	create both the brap krings array,
20314bf50f18SLuigi Rizzo  *      	the krings array of the wrapped adapter, and
20324bf50f18SLuigi Rizzo  *      	(if needed) the fake array for the host adapter
20334bf50f18SLuigi Rizzo  *
20344bf50f18SLuigi Rizzo  * na->nm_register(, 1)
20354bf50f18SLuigi Rizzo  * (put the adapter in netmap mode)
20364bf50f18SLuigi Rizzo  *
20374bf50f18SLuigi Rizzo  * 	This may be one of the following:
20384bf50f18SLuigi Rizzo  *
203937e3a6d3SLuigi Rizzo  * 	* netmap_hw_reg				        (hw ports)
20404bf50f18SLuigi Rizzo  * 		checks that the ifp is still there, then calls
20414bf50f18SLuigi Rizzo  * 		the hardware specific callback;
20424bf50f18SLuigi Rizzo  *
20434bf50f18SLuigi Rizzo  * 	* netmap_vp_reg					(VALE ports)
20444bf50f18SLuigi Rizzo  *		If the port is connected to a bridge,
20454bf50f18SLuigi Rizzo  *		set the NAF_NETMAP_ON flag under the
20464bf50f18SLuigi Rizzo  *		bridge write lock.
20474bf50f18SLuigi Rizzo  *
20484bf50f18SLuigi Rizzo  *	* netmap_pipe_reg				(pipes)
20494bf50f18SLuigi Rizzo  *		inform the other pipe end that it is no
2050453130d9SPedro F. Giffuni  *		longer responsible for the lifetime of this
20514bf50f18SLuigi Rizzo  *		pipe end
20524bf50f18SLuigi Rizzo  *
20534bf50f18SLuigi Rizzo  *	* netmap_monitor_reg				(monitors)
20544bf50f18SLuigi Rizzo  *		intercept the sync callbacks of the monitored
20554bf50f18SLuigi Rizzo  *		rings
20564bf50f18SLuigi Rizzo  *
205737e3a6d3SLuigi Rizzo  *	* netmap_bwrap_reg				(bwraps)
20584bf50f18SLuigi Rizzo  *		cross-link the bwrap and hwna rings,
20594bf50f18SLuigi Rizzo  *		forward the request to the hwna, override
20604bf50f18SLuigi Rizzo  *		the hwna notify callback (to get the frames
20614bf50f18SLuigi Rizzo  *		coming from outside go through the bridge).
20624bf50f18SLuigi Rizzo  *
20634bf50f18SLuigi Rizzo  *
2064f18be576SLuigi Rizzo  */
2065847bf383SLuigi Rizzo int
2066f9790aebSLuigi Rizzo netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
2067847bf383SLuigi Rizzo 	uint16_t ringid, uint32_t flags)
2068f18be576SLuigi Rizzo {
2069f18be576SLuigi Rizzo 	struct netmap_if *nifp = NULL;
2070847bf383SLuigi Rizzo 	int error;
2071f18be576SLuigi Rizzo 
2072ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
2073f18be576SLuigi Rizzo 	/* ring configuration may have changed, fetch from the card */
2074f18be576SLuigi Rizzo 	netmap_update_config(na);
2075f9790aebSLuigi Rizzo 	priv->np_na = na;     /* store the reference */
2076f0ea3689SLuigi Rizzo 	error = netmap_set_ringid(priv, ringid, flags);
2077f18be576SLuigi Rizzo 	if (error)
2078847bf383SLuigi Rizzo 		goto err;
2079847bf383SLuigi Rizzo 	error = netmap_mem_finalize(na->nm_mem, na);
2080ce3ee1e7SLuigi Rizzo 	if (error)
2081847bf383SLuigi Rizzo 		goto err;
2082847bf383SLuigi Rizzo 
2083847bf383SLuigi Rizzo 	if (na->active_fds == 0) {
2084847bf383SLuigi Rizzo 		/*
2085847bf383SLuigi Rizzo 		 * If this is the first registration of the adapter,
2086*4f80b14cSVincenzo Maffione 		 * perform sanity checks and create the in-kernel view
2087*4f80b14cSVincenzo Maffione 		 * of the netmap rings (the netmap krings).
2088847bf383SLuigi Rizzo 		 */
2089*4f80b14cSVincenzo Maffione 		if (na->ifp) {
2090*4f80b14cSVincenzo Maffione 			/* This netmap adapter is attached to an ifnet. */
2091*4f80b14cSVincenzo Maffione 			unsigned nbs = netmap_mem_bufsize(na->nm_mem);
2092*4f80b14cSVincenzo Maffione 			unsigned mtu = nm_os_ifnet_mtu(na->ifp);
2093*4f80b14cSVincenzo Maffione 			/* The maximum amount of bytes that a single
2094*4f80b14cSVincenzo Maffione 			 * receive or transmit NIC descriptor can hold. */
2095*4f80b14cSVincenzo Maffione 			unsigned hw_max_slot_len = 4096;
2096*4f80b14cSVincenzo Maffione 
2097*4f80b14cSVincenzo Maffione 			if (mtu <= hw_max_slot_len) {
2098*4f80b14cSVincenzo Maffione 				/* The MTU fits a single NIC slot. We only
2099*4f80b14cSVincenzo Maffione 				 * Need to check that netmap buffers are
2100*4f80b14cSVincenzo Maffione 				 * large enough to hold an MTU. NS_MOREFRAG
2101*4f80b14cSVincenzo Maffione 				 * cannot be used in this case. */
2102*4f80b14cSVincenzo Maffione 				if (nbs < mtu) {
2103*4f80b14cSVincenzo Maffione 					nm_prerr("error: netmap buf size (%u) "
2104*4f80b14cSVincenzo Maffione 						"< device MTU (%u)", nbs, mtu);
2105*4f80b14cSVincenzo Maffione 					error = EINVAL;
2106*4f80b14cSVincenzo Maffione 					goto err_drop_mem;
2107*4f80b14cSVincenzo Maffione 				}
2108*4f80b14cSVincenzo Maffione 			} else {
2109*4f80b14cSVincenzo Maffione 				/* More NIC slots may be needed to receive
2110*4f80b14cSVincenzo Maffione 				 * or transmit a single packet. Check that
2111*4f80b14cSVincenzo Maffione 				 * the adapter supports NS_MOREFRAG and that
2112*4f80b14cSVincenzo Maffione 				 * netmap buffers are large enough to hold
2113*4f80b14cSVincenzo Maffione 				 * the maximum per-slot size. */
2114*4f80b14cSVincenzo Maffione 				if (!(na->na_flags & NAF_MOREFRAG)) {
2115*4f80b14cSVincenzo Maffione 					nm_prerr("error: large MTU (%d) needed "
2116*4f80b14cSVincenzo Maffione 						"but %s does not support "
2117*4f80b14cSVincenzo Maffione 						"NS_MOREFRAG", mtu,
2118*4f80b14cSVincenzo Maffione 						na->ifp->if_xname);
2119*4f80b14cSVincenzo Maffione 					error = EINVAL;
2120*4f80b14cSVincenzo Maffione 					goto err_drop_mem;
2121*4f80b14cSVincenzo Maffione 				} else if (nbs < hw_max_slot_len) {
2122*4f80b14cSVincenzo Maffione 					nm_prerr("error: using NS_MOREFRAG on "
2123*4f80b14cSVincenzo Maffione 						"%s requires netmap buf size "
2124*4f80b14cSVincenzo Maffione 						">= %u", na->ifp->if_xname,
2125*4f80b14cSVincenzo Maffione 						hw_max_slot_len);
2126*4f80b14cSVincenzo Maffione 					error = EINVAL;
2127*4f80b14cSVincenzo Maffione 					goto err_drop_mem;
2128*4f80b14cSVincenzo Maffione 				} else {
2129*4f80b14cSVincenzo Maffione 					nm_prinf("info: netmap application on "
2130*4f80b14cSVincenzo Maffione 						"%s needs to support "
2131*4f80b14cSVincenzo Maffione 						"NS_MOREFRAG "
2132*4f80b14cSVincenzo Maffione 						"(MTU=%u,netmap_buf_size=%u)",
2133*4f80b14cSVincenzo Maffione 						na->ifp->if_xname, mtu, nbs);
2134*4f80b14cSVincenzo Maffione 				}
2135*4f80b14cSVincenzo Maffione 			}
2136*4f80b14cSVincenzo Maffione 		}
2137847bf383SLuigi Rizzo 
2138847bf383SLuigi Rizzo 		/*
2139847bf383SLuigi Rizzo 		 * Depending on the adapter, this may also create
2140847bf383SLuigi Rizzo 		 * the netmap rings themselves
2141847bf383SLuigi Rizzo 		 */
2142847bf383SLuigi Rizzo 		error = na->nm_krings_create(na);
2143847bf383SLuigi Rizzo 		if (error)
2144847bf383SLuigi Rizzo 			goto err_drop_mem;
2145847bf383SLuigi Rizzo 
2146ce3ee1e7SLuigi Rizzo 	}
2147847bf383SLuigi Rizzo 
214837e3a6d3SLuigi Rizzo 	/* now the krings must exist and we can check whether some
214937e3a6d3SLuigi Rizzo 	 * previous bind has exclusive ownership on them, and set
215037e3a6d3SLuigi Rizzo 	 * nr_pending_mode
2151847bf383SLuigi Rizzo 	 */
215237e3a6d3SLuigi Rizzo 	error = netmap_krings_get(priv);
2153847bf383SLuigi Rizzo 	if (error)
215437e3a6d3SLuigi Rizzo 		goto err_del_krings;
215537e3a6d3SLuigi Rizzo 
215637e3a6d3SLuigi Rizzo 	/* create all needed missing netmap rings */
215737e3a6d3SLuigi Rizzo 	error = netmap_mem_rings_create(na);
215837e3a6d3SLuigi Rizzo 	if (error)
215937e3a6d3SLuigi Rizzo 		goto err_rel_excl;
2160847bf383SLuigi Rizzo 
2161847bf383SLuigi Rizzo 	/* in all cases, create a new netmap if */
2162c3e9b4dbSLuiz Otavio O Souza 	nifp = netmap_mem_if_new(na, priv);
2163847bf383SLuigi Rizzo 	if (nifp == NULL) {
2164f18be576SLuigi Rizzo 		error = ENOMEM;
216537e3a6d3SLuigi Rizzo 		goto err_del_rings;
2166ce3ee1e7SLuigi Rizzo 	}
2167847bf383SLuigi Rizzo 
216837e3a6d3SLuigi Rizzo 	if (na->active_fds == 0) {
216989cc2556SLuigi Rizzo 		/* cache the allocator info in the na */
217037e3a6d3SLuigi Rizzo 		error = netmap_mem_get_lut(na->nm_mem, &na->na_lut);
2171847bf383SLuigi Rizzo 		if (error)
2172847bf383SLuigi Rizzo 			goto err_del_if;
217337e3a6d3SLuigi Rizzo 		ND("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal,
217437e3a6d3SLuigi Rizzo 					    na->na_lut.objsize);
2175f18be576SLuigi Rizzo 	}
2176847bf383SLuigi Rizzo 
217737e3a6d3SLuigi Rizzo 	if (nm_kring_pending(priv)) {
217837e3a6d3SLuigi Rizzo 		/* Some kring is switching mode, tell the adapter to
217937e3a6d3SLuigi Rizzo 		 * react on this. */
218037e3a6d3SLuigi Rizzo 		error = na->nm_register(na, 1);
218137e3a6d3SLuigi Rizzo 		if (error)
218237e3a6d3SLuigi Rizzo 			goto err_put_lut;
218337e3a6d3SLuigi Rizzo 	}
218437e3a6d3SLuigi Rizzo 
218537e3a6d3SLuigi Rizzo 	/* Commit the reference. */
218637e3a6d3SLuigi Rizzo 	na->active_fds++;
218737e3a6d3SLuigi Rizzo 
2188ce3ee1e7SLuigi Rizzo 	/*
2189847bf383SLuigi Rizzo 	 * advertise that the interface is ready by setting np_nifp.
2190847bf383SLuigi Rizzo 	 * The barrier is needed because readers (poll, *SYNC and mmap)
2191ce3ee1e7SLuigi Rizzo 	 * check for priv->np_nifp != NULL without locking
2192ce3ee1e7SLuigi Rizzo 	 */
2193847bf383SLuigi Rizzo 	mb(); /* make sure previous writes are visible to all CPUs */
2194ce3ee1e7SLuigi Rizzo 	priv->np_nifp = nifp;
2195847bf383SLuigi Rizzo 
2196847bf383SLuigi Rizzo 	return 0;
2197847bf383SLuigi Rizzo 
219837e3a6d3SLuigi Rizzo err_put_lut:
219937e3a6d3SLuigi Rizzo 	if (na->active_fds == 0)
2200847bf383SLuigi Rizzo 		memset(&na->na_lut, 0, sizeof(na->na_lut));
220137e3a6d3SLuigi Rizzo err_del_if:
2202847bf383SLuigi Rizzo 	netmap_mem_if_delete(na, nifp);
2203847bf383SLuigi Rizzo err_del_rings:
2204847bf383SLuigi Rizzo 	netmap_mem_rings_delete(na);
2205*4f80b14cSVincenzo Maffione err_rel_excl:
2206*4f80b14cSVincenzo Maffione 	netmap_krings_put(priv);
2207847bf383SLuigi Rizzo err_del_krings:
2208847bf383SLuigi Rizzo 	if (na->active_fds == 0)
2209847bf383SLuigi Rizzo 		na->nm_krings_delete(na);
2210847bf383SLuigi Rizzo err_drop_mem:
2211*4f80b14cSVincenzo Maffione 	netmap_mem_drop(na);
2212847bf383SLuigi Rizzo err:
2213847bf383SLuigi Rizzo 	priv->np_na = NULL;
2214847bf383SLuigi Rizzo 	return error;
2215ce3ee1e7SLuigi Rizzo }
2216847bf383SLuigi Rizzo 
2217847bf383SLuigi Rizzo 
2218847bf383SLuigi Rizzo /*
221937e3a6d3SLuigi Rizzo  * update kring and ring at the end of rxsync/txsync.
2220847bf383SLuigi Rizzo  */
2221847bf383SLuigi Rizzo static inline void
222237e3a6d3SLuigi Rizzo nm_sync_finalize(struct netmap_kring *kring)
2223847bf383SLuigi Rizzo {
222437e3a6d3SLuigi Rizzo 	/*
222537e3a6d3SLuigi Rizzo 	 * Update ring tail to what the kernel knows
222637e3a6d3SLuigi Rizzo 	 * After txsync: head/rhead/hwcur might be behind cur/rcur
222737e3a6d3SLuigi Rizzo 	 * if no carrier.
222837e3a6d3SLuigi Rizzo 	 */
2229847bf383SLuigi Rizzo 	kring->ring->tail = kring->rtail = kring->nr_hwtail;
2230847bf383SLuigi Rizzo 
2231847bf383SLuigi Rizzo 	ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
2232847bf383SLuigi Rizzo 		kring->name, kring->nr_hwcur, kring->nr_hwtail,
2233847bf383SLuigi Rizzo 		kring->rhead, kring->rcur, kring->rtail);
2234847bf383SLuigi Rizzo }
2235847bf383SLuigi Rizzo 
2236c3e9b4dbSLuiz Otavio O Souza /* set ring timestamp */
2237c3e9b4dbSLuiz Otavio O Souza static inline void
2238c3e9b4dbSLuiz Otavio O Souza ring_timestamp_set(struct netmap_ring *ring)
2239c3e9b4dbSLuiz Otavio O Souza {
2240c3e9b4dbSLuiz Otavio O Souza 	if (netmap_no_timestamp == 0 || ring->flags & NR_TIMESTAMP) {
2241c3e9b4dbSLuiz Otavio O Souza 		microtime(&ring->ts);
2242c3e9b4dbSLuiz Otavio O Souza 	}
2243c3e9b4dbSLuiz Otavio O Souza }
2244c3e9b4dbSLuiz Otavio O Souza 
2245c3e9b4dbSLuiz Otavio O Souza 
224668b8534bSLuigi Rizzo /*
224768b8534bSLuigi Rizzo  * ioctl(2) support for the "netmap" device.
224868b8534bSLuigi Rizzo  *
224968b8534bSLuigi Rizzo  * Following a list of accepted commands:
225068b8534bSLuigi Rizzo  * - NIOCGINFO
225168b8534bSLuigi Rizzo  * - SIOCGIFADDR	just for convenience
225268b8534bSLuigi Rizzo  * - NIOCREGIF
225368b8534bSLuigi Rizzo  * - NIOCTXSYNC
225468b8534bSLuigi Rizzo  * - NIOCRXSYNC
225568b8534bSLuigi Rizzo  *
225668b8534bSLuigi Rizzo  * Return 0 on success, errno otherwise.
225768b8534bSLuigi Rizzo  */
2258f9790aebSLuigi Rizzo int
225937e3a6d3SLuigi Rizzo netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *td)
226068b8534bSLuigi Rizzo {
2261c3e9b4dbSLuiz Otavio O Souza 	struct mbq q;	/* packets from RX hw queues to host stack */
226268b8534bSLuigi Rizzo 	struct nmreq *nmr = (struct nmreq *) data;
2263ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na = NULL;
2264c3e9b4dbSLuiz Otavio O Souza 	struct netmap_mem_d *nmd = NULL;
226537e3a6d3SLuigi Rizzo 	struct ifnet *ifp = NULL;
226637e3a6d3SLuigi Rizzo 	int error = 0;
2267f0ea3689SLuigi Rizzo 	u_int i, qfirst, qlast;
226868b8534bSLuigi Rizzo 	struct netmap_if *nifp;
2269ce3ee1e7SLuigi Rizzo 	struct netmap_kring *krings;
2270c3e9b4dbSLuiz Otavio O Souza 	int sync_flags;
2271847bf383SLuigi Rizzo 	enum txrx t;
227268b8534bSLuigi Rizzo 
227317885a7bSLuigi Rizzo 	if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
227417885a7bSLuigi Rizzo 		/* truncate name */
227517885a7bSLuigi Rizzo 		nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';
227617885a7bSLuigi Rizzo 		if (nmr->nr_version != NETMAP_API) {
227717885a7bSLuigi Rizzo 			D("API mismatch for %s got %d need %d",
227817885a7bSLuigi Rizzo 				nmr->nr_name,
227917885a7bSLuigi Rizzo 				nmr->nr_version, NETMAP_API);
228017885a7bSLuigi Rizzo 			nmr->nr_version = NETMAP_API;
2281f0ea3689SLuigi Rizzo 		}
2282f0ea3689SLuigi Rizzo 		if (nmr->nr_version < NETMAP_MIN_API ||
2283f0ea3689SLuigi Rizzo 		    nmr->nr_version > NETMAP_MAX_API) {
228417885a7bSLuigi Rizzo 			return EINVAL;
228517885a7bSLuigi Rizzo 		}
228617885a7bSLuigi Rizzo 	}
228768b8534bSLuigi Rizzo 
228868b8534bSLuigi Rizzo 	switch (cmd) {
228968b8534bSLuigi Rizzo 	case NIOCGINFO:		/* return capabilities etc */
2290f18be576SLuigi Rizzo 		if (nmr->nr_cmd == NETMAP_BDG_LIST) {
2291f18be576SLuigi Rizzo 			error = netmap_bdg_ctl(nmr, NULL);
2292f18be576SLuigi Rizzo 			break;
2293f18be576SLuigi Rizzo 		}
2294ce3ee1e7SLuigi Rizzo 
2295ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2296ce3ee1e7SLuigi Rizzo 		do {
2297ce3ee1e7SLuigi Rizzo 			/* memsize is always valid */
2298ce3ee1e7SLuigi Rizzo 			u_int memflags;
2299*4f80b14cSVincenzo Maffione 			uint64_t memsize;
2300ce3ee1e7SLuigi Rizzo 
2301ce3ee1e7SLuigi Rizzo 			if (nmr->nr_name[0] != '\0') {
230237e3a6d3SLuigi Rizzo 
2303ce3ee1e7SLuigi Rizzo 				/* get a refcount */
2304c3e9b4dbSLuiz Otavio O Souza 				error = netmap_get_na(nmr, &na, &ifp, NULL, 1 /* create */);
230537e3a6d3SLuigi Rizzo 				if (error) {
230637e3a6d3SLuigi Rizzo 					na = NULL;
230737e3a6d3SLuigi Rizzo 					ifp = NULL;
23088241616dSLuigi Rizzo 					break;
230937e3a6d3SLuigi Rizzo 				}
2310f9790aebSLuigi Rizzo 				nmd = na->nm_mem; /* get memory allocator */
2311c3e9b4dbSLuiz Otavio O Souza 			} else {
2312c3e9b4dbSLuiz Otavio O Souza 				nmd = netmap_mem_find(nmr->nr_arg2 ? nmr->nr_arg2 : 1);
2313c3e9b4dbSLuiz Otavio O Souza 				if (nmd == NULL) {
2314c3e9b4dbSLuiz Otavio O Souza 					error = EINVAL;
2315c3e9b4dbSLuiz Otavio O Souza 					break;
2316c3e9b4dbSLuiz Otavio O Souza 				}
2317ce3ee1e7SLuigi Rizzo 			}
2318ce3ee1e7SLuigi Rizzo 
2319*4f80b14cSVincenzo Maffione 			error = netmap_mem_get_info(nmd, &memsize, &memflags,
2320f0ea3689SLuigi Rizzo 				&nmr->nr_arg2);
2321ce3ee1e7SLuigi Rizzo 			if (error)
2322ce3ee1e7SLuigi Rizzo 				break;
2323*4f80b14cSVincenzo Maffione 			nmr->nr_memsize = (uint32_t)memsize;
2324ce3ee1e7SLuigi Rizzo 			if (na == NULL) /* only memory info */
2325ce3ee1e7SLuigi Rizzo 				break;
23268241616dSLuigi Rizzo 			nmr->nr_offset = 0;
23278241616dSLuigi Rizzo 			nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
2328ae10d1afSLuigi Rizzo 			netmap_update_config(na);
2329d76bf4ffSLuigi Rizzo 			nmr->nr_rx_rings = na->num_rx_rings;
2330d76bf4ffSLuigi Rizzo 			nmr->nr_tx_rings = na->num_tx_rings;
233164ae02c3SLuigi Rizzo 			nmr->nr_rx_slots = na->num_rx_desc;
233264ae02c3SLuigi Rizzo 			nmr->nr_tx_slots = na->num_tx_desc;
2333ce3ee1e7SLuigi Rizzo 		} while (0);
233437e3a6d3SLuigi Rizzo 		netmap_unget_na(na, ifp);
2335ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
233668b8534bSLuigi Rizzo 		break;
233768b8534bSLuigi Rizzo 
233868b8534bSLuigi Rizzo 	case NIOCREGIF:
2339844a6f0cSLuigi Rizzo 		/*
2340844a6f0cSLuigi Rizzo 		 * If nmr->nr_cmd is not zero, this NIOCREGIF is not really
2341844a6f0cSLuigi Rizzo 		 * a regif operation, but a different one, specified by the
2342844a6f0cSLuigi Rizzo 		 * value of nmr->nr_cmd.
2343844a6f0cSLuigi Rizzo 		 */
2344f18be576SLuigi Rizzo 		i = nmr->nr_cmd;
2345f9790aebSLuigi Rizzo 		if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
23464bf50f18SLuigi Rizzo 				|| i == NETMAP_BDG_VNET_HDR
23474bf50f18SLuigi Rizzo 				|| i == NETMAP_BDG_NEWIF
234837e3a6d3SLuigi Rizzo 				|| i == NETMAP_BDG_DELIF
234937e3a6d3SLuigi Rizzo 				|| i == NETMAP_BDG_POLLING_ON
235037e3a6d3SLuigi Rizzo 				|| i == NETMAP_BDG_POLLING_OFF) {
2351844a6f0cSLuigi Rizzo 			/* possibly attach/detach NIC and VALE switch */
2352f18be576SLuigi Rizzo 			error = netmap_bdg_ctl(nmr, NULL);
2353f18be576SLuigi Rizzo 			break;
235437e3a6d3SLuigi Rizzo 		} else if (i == NETMAP_PT_HOST_CREATE || i == NETMAP_PT_HOST_DELETE) {
2355844a6f0cSLuigi Rizzo 			/* forward the command to the ptnetmap subsystem */
235637e3a6d3SLuigi Rizzo 			error = ptnetmap_ctl(nmr, priv->np_na);
235737e3a6d3SLuigi Rizzo 			break;
235837e3a6d3SLuigi Rizzo 		} else if (i == NETMAP_VNET_HDR_GET) {
2359844a6f0cSLuigi Rizzo 			/* get vnet-header length for this netmap port */
236037e3a6d3SLuigi Rizzo 			struct ifnet *ifp;
236137e3a6d3SLuigi Rizzo 
236237e3a6d3SLuigi Rizzo 			NMG_LOCK();
2363c3e9b4dbSLuiz Otavio O Souza 			error = netmap_get_na(nmr, &na, &ifp, NULL, 0);
236437e3a6d3SLuigi Rizzo 			if (na && !error) {
236537e3a6d3SLuigi Rizzo 				nmr->nr_arg1 = na->virt_hdr_len;
236637e3a6d3SLuigi Rizzo 			}
236737e3a6d3SLuigi Rizzo 			netmap_unget_na(na, ifp);
236837e3a6d3SLuigi Rizzo 			NMG_UNLOCK();
236937e3a6d3SLuigi Rizzo 			break;
2370844a6f0cSLuigi Rizzo 		} else if (i == NETMAP_POOLS_INFO_GET) {
2371844a6f0cSLuigi Rizzo 			/* get information from the memory allocator */
2372c3e9b4dbSLuiz Otavio O Souza 			NMG_LOCK();
2373c3e9b4dbSLuiz Otavio O Souza 			if (priv->np_na && priv->np_na->nm_mem) {
2374c3e9b4dbSLuiz Otavio O Souza 				struct netmap_mem_d *nmd = priv->np_na->nm_mem;
2375c3e9b4dbSLuiz Otavio O Souza 				error = netmap_mem_pools_info_get(nmr, nmd);
2376c3e9b4dbSLuiz Otavio O Souza 			} else {
2377c3e9b4dbSLuiz Otavio O Souza 				error = EINVAL;
2378c3e9b4dbSLuiz Otavio O Souza 			}
2379c3e9b4dbSLuiz Otavio O Souza 			NMG_UNLOCK();
2380844a6f0cSLuigi Rizzo 			break;
2381*4f80b14cSVincenzo Maffione 		} else if (i == NETMAP_POOLS_CREATE) {
2382*4f80b14cSVincenzo Maffione 			nmd = netmap_mem_ext_create(nmr, &error);
2383*4f80b14cSVincenzo Maffione 			if (nmd == NULL)
2384*4f80b14cSVincenzo Maffione 				break;
2385*4f80b14cSVincenzo Maffione 			/* reset the fields used by POOLS_CREATE to
2386*4f80b14cSVincenzo Maffione 			 * avoid confusing the rest of the code
2387*4f80b14cSVincenzo Maffione 			 */
2388*4f80b14cSVincenzo Maffione 			nmr->nr_cmd = 0;
2389*4f80b14cSVincenzo Maffione 			nmr->nr_arg1 = 0;
2390*4f80b14cSVincenzo Maffione 			nmr->nr_arg2 = 0;
2391*4f80b14cSVincenzo Maffione 			nmr->nr_arg3 = 0;
2392f18be576SLuigi Rizzo 		} else if (i != 0) {
2393f18be576SLuigi Rizzo 			D("nr_cmd must be 0 not %d", i);
2394f18be576SLuigi Rizzo 			error = EINVAL;
2395f18be576SLuigi Rizzo 			break;
2396f18be576SLuigi Rizzo 		}
2397f18be576SLuigi Rizzo 
23988241616dSLuigi Rizzo 		/* protect access to priv from concurrent NIOCREGIF */
2399ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2400ce3ee1e7SLuigi Rizzo 		do {
2401ce3ee1e7SLuigi Rizzo 			u_int memflags;
2402*4f80b14cSVincenzo Maffione 			uint64_t memsize;
2403ce3ee1e7SLuigi Rizzo 
2404847bf383SLuigi Rizzo 			if (priv->np_nifp != NULL) {	/* thread already registered */
2405f0ea3689SLuigi Rizzo 				error = EBUSY;
2406506cc70cSLuigi Rizzo 				break;
2407506cc70cSLuigi Rizzo 			}
2408c3e9b4dbSLuiz Otavio O Souza 
2409c3e9b4dbSLuiz Otavio O Souza 			if (nmr->nr_arg2) {
2410c3e9b4dbSLuiz Otavio O Souza 				/* find the allocator and get a reference */
2411c3e9b4dbSLuiz Otavio O Souza 				nmd = netmap_mem_find(nmr->nr_arg2);
2412c3e9b4dbSLuiz Otavio O Souza 				if (nmd == NULL) {
2413c3e9b4dbSLuiz Otavio O Souza 					error = EINVAL;
2414c3e9b4dbSLuiz Otavio O Souza 					break;
2415c3e9b4dbSLuiz Otavio O Souza 				}
2416c3e9b4dbSLuiz Otavio O Souza 			}
241768b8534bSLuigi Rizzo 			/* find the interface and a reference */
2418c3e9b4dbSLuiz Otavio O Souza 			error = netmap_get_na(nmr, &na, &ifp, nmd,
241937e3a6d3SLuigi Rizzo 					      1 /* create */); /* keep reference */
242068b8534bSLuigi Rizzo 			if (error)
2421ce3ee1e7SLuigi Rizzo 				break;
2422f9790aebSLuigi Rizzo 			if (NETMAP_OWNED_BY_KERN(na)) {
2423ce3ee1e7SLuigi Rizzo 				error = EBUSY;
2424ce3ee1e7SLuigi Rizzo 				break;
2425f196ce38SLuigi Rizzo 			}
242637e3a6d3SLuigi Rizzo 
242737e3a6d3SLuigi Rizzo 			if (na->virt_hdr_len && !(nmr->nr_flags & NR_ACCEPT_VNET_HDR)) {
242837e3a6d3SLuigi Rizzo 				error = EIO;
242937e3a6d3SLuigi Rizzo 				break;
243037e3a6d3SLuigi Rizzo 			}
243137e3a6d3SLuigi Rizzo 
2432847bf383SLuigi Rizzo 			error = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags);
2433847bf383SLuigi Rizzo 			if (error) {    /* reg. failed, release priv and ref */
2434ce3ee1e7SLuigi Rizzo 				break;
243568b8534bSLuigi Rizzo 			}
2436847bf383SLuigi Rizzo 			nifp = priv->np_nifp;
2437f0ea3689SLuigi Rizzo 			priv->np_td = td; // XXX kqueue, debugging only
243868b8534bSLuigi Rizzo 
243968b8534bSLuigi Rizzo 			/* return the offset of the netmap_if object */
2440d76bf4ffSLuigi Rizzo 			nmr->nr_rx_rings = na->num_rx_rings;
2441d76bf4ffSLuigi Rizzo 			nmr->nr_tx_rings = na->num_tx_rings;
244264ae02c3SLuigi Rizzo 			nmr->nr_rx_slots = na->num_rx_desc;
244364ae02c3SLuigi Rizzo 			nmr->nr_tx_slots = na->num_tx_desc;
2444*4f80b14cSVincenzo Maffione 			error = netmap_mem_get_info(na->nm_mem, &memsize, &memflags,
2445f0ea3689SLuigi Rizzo 				&nmr->nr_arg2);
2446ce3ee1e7SLuigi Rizzo 			if (error) {
2447847bf383SLuigi Rizzo 				netmap_do_unregif(priv);
2448ce3ee1e7SLuigi Rizzo 				break;
2449ce3ee1e7SLuigi Rizzo 			}
2450*4f80b14cSVincenzo Maffione 			nmr->nr_memsize = (uint32_t)memsize;
2451ce3ee1e7SLuigi Rizzo 			if (memflags & NETMAP_MEM_PRIVATE) {
24523d819cb6SLuigi Rizzo 				*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
2453ce3ee1e7SLuigi Rizzo 			}
2454847bf383SLuigi Rizzo 			for_rx_tx(t) {
2455847bf383SLuigi Rizzo 				priv->np_si[t] = nm_si_user(priv, t) ?
2456847bf383SLuigi Rizzo 					&na->si[t] : &NMR(na, t)[priv->np_qfirst[t]].si;
2457847bf383SLuigi Rizzo 			}
2458f0ea3689SLuigi Rizzo 
2459f0ea3689SLuigi Rizzo 			if (nmr->nr_arg3) {
246037e3a6d3SLuigi Rizzo 				if (netmap_verbose)
2461f0ea3689SLuigi Rizzo 					D("requested %d extra buffers", nmr->nr_arg3);
2462f0ea3689SLuigi Rizzo 				nmr->nr_arg3 = netmap_extra_alloc(na,
2463f0ea3689SLuigi Rizzo 					&nifp->ni_bufs_head, nmr->nr_arg3);
246437e3a6d3SLuigi Rizzo 				if (netmap_verbose)
2465f0ea3689SLuigi Rizzo 					D("got %d extra buffers", nmr->nr_arg3);
2466f0ea3689SLuigi Rizzo 			}
2467ce3ee1e7SLuigi Rizzo 			nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
246837e3a6d3SLuigi Rizzo 
246937e3a6d3SLuigi Rizzo 			/* store ifp reference so that priv destructor may release it */
247037e3a6d3SLuigi Rizzo 			priv->np_ifp = ifp;
2471ce3ee1e7SLuigi Rizzo 		} while (0);
2472c3e9b4dbSLuiz Otavio O Souza 		if (error) {
2473c3e9b4dbSLuiz Otavio O Souza 			netmap_unget_na(na, ifp);
2474c3e9b4dbSLuiz Otavio O Souza 		}
2475c3e9b4dbSLuiz Otavio O Souza 		/* release the reference from netmap_mem_find() or
2476c3e9b4dbSLuiz Otavio O Souza 		 * netmap_mem_ext_create()
2477c3e9b4dbSLuiz Otavio O Souza 		 */
2478c3e9b4dbSLuiz Otavio O Souza 		if (nmd)
2479c3e9b4dbSLuiz Otavio O Souza 			netmap_mem_put(nmd);
2480ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
248168b8534bSLuigi Rizzo 		break;
248268b8534bSLuigi Rizzo 
248368b8534bSLuigi Rizzo 	case NIOCTXSYNC:
248468b8534bSLuigi Rizzo 	case NIOCRXSYNC:
24858241616dSLuigi Rizzo 		nifp = priv->np_nifp;
24868241616dSLuigi Rizzo 
24878241616dSLuigi Rizzo 		if (nifp == NULL) {
2488506cc70cSLuigi Rizzo 			error = ENXIO;
2489506cc70cSLuigi Rizzo 			break;
2490506cc70cSLuigi Rizzo 		}
24916641c68bSLuigi Rizzo 		mb(); /* make sure following reads are not from cache */
24928241616dSLuigi Rizzo 
2493f9790aebSLuigi Rizzo 		na = priv->np_na;      /* we have a reference */
24948241616dSLuigi Rizzo 
2495f9790aebSLuigi Rizzo 		if (na == NULL) {
2496f9790aebSLuigi Rizzo 			D("Internal error: nifp != NULL && na == NULL");
24978241616dSLuigi Rizzo 			error = ENXIO;
24988241616dSLuigi Rizzo 			break;
24998241616dSLuigi Rizzo 		}
25008241616dSLuigi Rizzo 
2501c3e9b4dbSLuiz Otavio O Souza 		mbq_init(&q);
2502847bf383SLuigi Rizzo 		t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
2503847bf383SLuigi Rizzo 		krings = NMR(na, t);
2504847bf383SLuigi Rizzo 		qfirst = priv->np_qfirst[t];
2505847bf383SLuigi Rizzo 		qlast = priv->np_qlast[t];
2506c3e9b4dbSLuiz Otavio O Souza 		sync_flags = priv->np_sync_flags;
250768b8534bSLuigi Rizzo 
2508f0ea3689SLuigi Rizzo 		for (i = qfirst; i < qlast; i++) {
2509ce3ee1e7SLuigi Rizzo 			struct netmap_kring *kring = krings + i;
251037e3a6d3SLuigi Rizzo 			struct netmap_ring *ring = kring->ring;
251137e3a6d3SLuigi Rizzo 
251237e3a6d3SLuigi Rizzo 			if (unlikely(nm_kr_tryget(kring, 1, &error))) {
251337e3a6d3SLuigi Rizzo 				error = (error ? EIO : 0);
251437e3a6d3SLuigi Rizzo 				continue;
2515ce3ee1e7SLuigi Rizzo 			}
251637e3a6d3SLuigi Rizzo 
251768b8534bSLuigi Rizzo 			if (cmd == NIOCTXSYNC) {
251868b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
25193c0caf6cSLuigi Rizzo 					D("pre txsync ring %d cur %d hwcur %d",
252037e3a6d3SLuigi Rizzo 					    i, ring->cur,
252168b8534bSLuigi Rizzo 					    kring->nr_hwcur);
252237e3a6d3SLuigi Rizzo 				if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
252317885a7bSLuigi Rizzo 					netmap_ring_reinit(kring);
2524c3e9b4dbSLuiz Otavio O Souza 				} else if (kring->nm_sync(kring, sync_flags | NAF_FORCE_RECLAIM) == 0) {
252537e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
252617885a7bSLuigi Rizzo 				}
252768b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
25283c0caf6cSLuigi Rizzo 					D("post txsync ring %d cur %d hwcur %d",
252937e3a6d3SLuigi Rizzo 					    i, ring->cur,
253068b8534bSLuigi Rizzo 					    kring->nr_hwcur);
253168b8534bSLuigi Rizzo 			} else {
253237e3a6d3SLuigi Rizzo 				if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
2533847bf383SLuigi Rizzo 					netmap_ring_reinit(kring);
2534c3e9b4dbSLuiz Otavio O Souza 				}
2535c3e9b4dbSLuiz Otavio O Souza 				if (nm_may_forward_up(kring)) {
2536c3e9b4dbSLuiz Otavio O Souza 					/* transparent forwarding, see netmap_poll() */
2537c3e9b4dbSLuiz Otavio O Souza 					netmap_grab_packets(kring, &q, netmap_fwd);
2538c3e9b4dbSLuiz Otavio O Souza 				}
2539c3e9b4dbSLuiz Otavio O Souza 				if (kring->nm_sync(kring, sync_flags | NAF_FORCE_READ) == 0) {
254037e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
2541847bf383SLuigi Rizzo 				}
2542c3e9b4dbSLuiz Otavio O Souza 				ring_timestamp_set(ring);
254368b8534bSLuigi Rizzo 			}
2544ce3ee1e7SLuigi Rizzo 			nm_kr_put(kring);
254568b8534bSLuigi Rizzo 		}
254668b8534bSLuigi Rizzo 
2547c3e9b4dbSLuiz Otavio O Souza 		if (mbq_peek(&q)) {
2548c3e9b4dbSLuiz Otavio O Souza 			netmap_send_up(na->ifp, &q);
2549c3e9b4dbSLuiz Otavio O Souza 		}
2550c3e9b4dbSLuiz Otavio O Souza 
255168b8534bSLuigi Rizzo 		break;
255268b8534bSLuigi Rizzo 
2553847bf383SLuigi Rizzo #ifdef WITH_VALE
25544bf50f18SLuigi Rizzo 	case NIOCCONFIG:
25554bf50f18SLuigi Rizzo 		error = netmap_bdg_config(nmr);
25564bf50f18SLuigi Rizzo 		break;
2557847bf383SLuigi Rizzo #endif
2558f196ce38SLuigi Rizzo #ifdef __FreeBSD__
255989e3fd52SLuigi Rizzo 	case FIONBIO:
256089e3fd52SLuigi Rizzo 	case FIOASYNC:
256189e3fd52SLuigi Rizzo 		ND("FIONBIO/FIOASYNC are no-ops");
256289e3fd52SLuigi Rizzo 		break;
256389e3fd52SLuigi Rizzo 
256468b8534bSLuigi Rizzo 	case BIOCIMMEDIATE:
256568b8534bSLuigi Rizzo 	case BIOCGHDRCMPLT:
256668b8534bSLuigi Rizzo 	case BIOCSHDRCMPLT:
256768b8534bSLuigi Rizzo 	case BIOCSSEESENT:
256868b8534bSLuigi Rizzo 		D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
256968b8534bSLuigi Rizzo 		break;
257068b8534bSLuigi Rizzo 
2571babc7c12SLuigi Rizzo 	default:	/* allow device-specific ioctls */
257268b8534bSLuigi Rizzo 	    {
2573b3d37588SLuigi Rizzo 		struct ifnet *ifp = ifunit_ref(nmr->nr_name);
2574b3d37588SLuigi Rizzo 		if (ifp == NULL) {
2575b3d37588SLuigi Rizzo 			error = ENXIO;
2576b3d37588SLuigi Rizzo 		} else {
257768b8534bSLuigi Rizzo 			struct socket so;
2578ce3ee1e7SLuigi Rizzo 
257968b8534bSLuigi Rizzo 			bzero(&so, sizeof(so));
258068b8534bSLuigi Rizzo 			so.so_vnet = ifp->if_vnet;
258168b8534bSLuigi Rizzo 			// so->so_proto not null.
258268b8534bSLuigi Rizzo 			error = ifioctl(&so, cmd, data, td);
2583b3d37588SLuigi Rizzo 			if_rele(ifp);
2584b3d37588SLuigi Rizzo 		}
2585babc7c12SLuigi Rizzo 		break;
258668b8534bSLuigi Rizzo 	    }
2587f196ce38SLuigi Rizzo 
2588f196ce38SLuigi Rizzo #else /* linux */
2589f196ce38SLuigi Rizzo 	default:
2590f196ce38SLuigi Rizzo 		error = EOPNOTSUPP;
2591f196ce38SLuigi Rizzo #endif /* linux */
259268b8534bSLuigi Rizzo 	}
259368b8534bSLuigi Rizzo 
259468b8534bSLuigi Rizzo 	return (error);
259568b8534bSLuigi Rizzo }
259668b8534bSLuigi Rizzo 
259768b8534bSLuigi Rizzo 
259868b8534bSLuigi Rizzo /*
259968b8534bSLuigi Rizzo  * select(2) and poll(2) handlers for the "netmap" device.
260068b8534bSLuigi Rizzo  *
260168b8534bSLuigi Rizzo  * Can be called for one or more queues.
260268b8534bSLuigi Rizzo  * Return true the event mask corresponding to ready events.
260368b8534bSLuigi Rizzo  * If there are no ready events, do a selrecord on either individual
2604ce3ee1e7SLuigi Rizzo  * selinfo or on the global one.
260568b8534bSLuigi Rizzo  * Device-dependent parts (locking and sync of tx/rx rings)
260668b8534bSLuigi Rizzo  * are done through callbacks.
2607f196ce38SLuigi Rizzo  *
260801c7d25fSLuigi Rizzo  * On linux, arguments are really pwait, the poll table, and 'td' is struct file *
260901c7d25fSLuigi Rizzo  * The first one is remapped to pwait as selrecord() uses the name as an
261001c7d25fSLuigi Rizzo  * hidden argument.
261168b8534bSLuigi Rizzo  */
2612f9790aebSLuigi Rizzo int
261337e3a6d3SLuigi Rizzo netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
261468b8534bSLuigi Rizzo {
261568b8534bSLuigi Rizzo 	struct netmap_adapter *na;
261668b8534bSLuigi Rizzo 	struct netmap_kring *kring;
261737e3a6d3SLuigi Rizzo 	struct netmap_ring *ring;
2618847bf383SLuigi Rizzo 	u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0;
2619847bf383SLuigi Rizzo #define want_tx want[NR_TX]
2620847bf383SLuigi Rizzo #define want_rx want[NR_RX]
2621c3e9b4dbSLuiz Otavio O Souza 	struct mbq q;	/* packets from RX hw queues to host stack */
262201c7d25fSLuigi Rizzo 
2623f9790aebSLuigi Rizzo 	/*
2624f9790aebSLuigi Rizzo 	 * In order to avoid nested locks, we need to "double check"
2625f9790aebSLuigi Rizzo 	 * txsync and rxsync if we decide to do a selrecord().
2626f9790aebSLuigi Rizzo 	 * retry_tx (and retry_rx, later) prevent looping forever.
2627f9790aebSLuigi Rizzo 	 */
262817885a7bSLuigi Rizzo 	int retry_tx = 1, retry_rx = 1;
2629ce3ee1e7SLuigi Rizzo 
2630c3e9b4dbSLuiz Otavio O Souza 	/* Transparent mode: send_down is 1 if we have found some
2631c3e9b4dbSLuiz Otavio O Souza 	 * packets to forward (host RX ring --> NIC) during the rx
2632c3e9b4dbSLuiz Otavio O Souza 	 * scan and we have not sent them down to the NIC yet.
2633c3e9b4dbSLuiz Otavio O Souza 	 * Transparent mode requires to bind all rings to a single
2634c3e9b4dbSLuiz Otavio O Souza 	 * file descriptor.
2635f0ea3689SLuigi Rizzo 	 */
263637e3a6d3SLuigi Rizzo 	int send_down = 0;
2637c3e9b4dbSLuiz Otavio O Souza 	int sync_flags = priv->np_sync_flags;
263837e3a6d3SLuigi Rizzo 
263937e3a6d3SLuigi Rizzo 	mbq_init(&q);
264068b8534bSLuigi Rizzo 
26418241616dSLuigi Rizzo 	if (priv->np_nifp == NULL) {
26428241616dSLuigi Rizzo 		D("No if registered");
26438241616dSLuigi Rizzo 		return POLLERR;
26448241616dSLuigi Rizzo 	}
2645847bf383SLuigi Rizzo 	mb(); /* make sure following reads are not from cache */
26468241616dSLuigi Rizzo 
2647f9790aebSLuigi Rizzo 	na = priv->np_na;
2648f9790aebSLuigi Rizzo 
26494bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
265068b8534bSLuigi Rizzo 		return POLLERR;
265168b8534bSLuigi Rizzo 
265268b8534bSLuigi Rizzo 	if (netmap_verbose & 0x8000)
26534bf50f18SLuigi Rizzo 		D("device %s events 0x%x", na->name, events);
265468b8534bSLuigi Rizzo 	want_tx = events & (POLLOUT | POLLWRNORM);
265568b8534bSLuigi Rizzo 	want_rx = events & (POLLIN | POLLRDNORM);
265668b8534bSLuigi Rizzo 
265768b8534bSLuigi Rizzo 	/*
2658f9790aebSLuigi Rizzo 	 * check_all_{tx|rx} are set if the card has more than one queue AND
2659f9790aebSLuigi Rizzo 	 * the file descriptor is bound to all of them. If so, we sleep on
2660ce3ee1e7SLuigi Rizzo 	 * the "global" selinfo, otherwise we sleep on individual selinfo
2661ce3ee1e7SLuigi Rizzo 	 * (FreeBSD only allows two selinfo's per file descriptor).
2662ce3ee1e7SLuigi Rizzo 	 * The interrupt routine in the driver wake one or the other
2663ce3ee1e7SLuigi Rizzo 	 * (or both) depending on which clients are active.
266468b8534bSLuigi Rizzo 	 *
266568b8534bSLuigi Rizzo 	 * rxsync() is only called if we run out of buffers on a POLLIN.
266668b8534bSLuigi Rizzo 	 * txsync() is called if we run out of buffers on POLLOUT, or
266768b8534bSLuigi Rizzo 	 * there are pending packets to send. The latter can be disabled
266868b8534bSLuigi Rizzo 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
266968b8534bSLuigi Rizzo 	 */
2670847bf383SLuigi Rizzo 	check_all_tx = nm_si_user(priv, NR_TX);
2671847bf383SLuigi Rizzo 	check_all_rx = nm_si_user(priv, NR_RX);
267264ae02c3SLuigi Rizzo 
2673*4f80b14cSVincenzo Maffione #ifdef __FreeBSD__
267468b8534bSLuigi Rizzo 	/*
2675f9790aebSLuigi Rizzo 	 * We start with a lock free round which is cheap if we have
2676f9790aebSLuigi Rizzo 	 * slots available. If this fails, then lock and call the sync
2677*4f80b14cSVincenzo Maffione 	 * routines. We can't do this on Linux, as the contract says
2678*4f80b14cSVincenzo Maffione 	 * that we must call nm_os_selrecord() unconditionally.
267968b8534bSLuigi Rizzo 	 */
268037e3a6d3SLuigi Rizzo 	if (want_tx) {
2681*4f80b14cSVincenzo Maffione 		enum txrx t = NR_TX;
268237e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
268337e3a6d3SLuigi Rizzo 			kring = &NMR(na, t)[i];
268437e3a6d3SLuigi Rizzo 			/* XXX compare ring->cur and kring->tail */
268537e3a6d3SLuigi Rizzo 			if (!nm_ring_empty(kring->ring)) {
268637e3a6d3SLuigi Rizzo 				revents |= want[t];
268737e3a6d3SLuigi Rizzo 				want[t] = 0;	/* also breaks the loop */
268837e3a6d3SLuigi Rizzo 			}
268937e3a6d3SLuigi Rizzo 		}
269037e3a6d3SLuigi Rizzo 	}
269137e3a6d3SLuigi Rizzo 	if (want_rx) {
2692*4f80b14cSVincenzo Maffione 		enum txrx t = NR_RX;
269337e3a6d3SLuigi Rizzo 		want_rx = 0; /* look for a reason to run the handlers */
269437e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
269537e3a6d3SLuigi Rizzo 			kring = &NMR(na, t)[i];
269637e3a6d3SLuigi Rizzo 			if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */
269737e3a6d3SLuigi Rizzo 			    || kring->rhead != kring->ring->head /* release buffers */) {
269837e3a6d3SLuigi Rizzo 				want_rx = 1;
269937e3a6d3SLuigi Rizzo 			}
270037e3a6d3SLuigi Rizzo 		}
270137e3a6d3SLuigi Rizzo 		if (!want_rx)
270237e3a6d3SLuigi Rizzo 			revents |= events & (POLLIN | POLLRDNORM); /* we have data */
270337e3a6d3SLuigi Rizzo 	}
2704*4f80b14cSVincenzo Maffione #endif
2705*4f80b14cSVincenzo Maffione 
2706*4f80b14cSVincenzo Maffione #ifdef linux
2707*4f80b14cSVincenzo Maffione 	/* The selrecord must be unconditional on linux. */
2708*4f80b14cSVincenzo Maffione 	nm_os_selrecord(sr, check_all_tx ?
2709*4f80b14cSVincenzo Maffione 	    &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
2710*4f80b14cSVincenzo Maffione 	nm_os_selrecord(sr, check_all_rx ?
2711*4f80b14cSVincenzo Maffione 		&na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
2712*4f80b14cSVincenzo Maffione #endif /* linux */
271368b8534bSLuigi Rizzo 
271468b8534bSLuigi Rizzo 	/*
271517885a7bSLuigi Rizzo 	 * If we want to push packets out (priv->np_txpoll) or
271617885a7bSLuigi Rizzo 	 * want_tx is still set, we must issue txsync calls
271717885a7bSLuigi Rizzo 	 * (on all rings, to avoid that the tx rings stall).
2718f9790aebSLuigi Rizzo 	 * Fortunately, normal tx mode has np_txpoll set.
271968b8534bSLuigi Rizzo 	 */
272068b8534bSLuigi Rizzo 	if (priv->np_txpoll || want_tx) {
272117885a7bSLuigi Rizzo 		/*
272217885a7bSLuigi Rizzo 		 * The first round checks if anyone is ready, if not
272317885a7bSLuigi Rizzo 		 * do a selrecord and another round to handle races.
272417885a7bSLuigi Rizzo 		 * want_tx goes to 0 if any space is found, and is
272517885a7bSLuigi Rizzo 		 * used to skip rings with no pending transmissions.
2726ce3ee1e7SLuigi Rizzo 		 */
2727091fd0abSLuigi Rizzo flush_tx:
272837e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_TX]; i++) {
272917885a7bSLuigi Rizzo 			int found = 0;
273017885a7bSLuigi Rizzo 
273168b8534bSLuigi Rizzo 			kring = &na->tx_rings[i];
273237e3a6d3SLuigi Rizzo 			ring = kring->ring;
273337e3a6d3SLuigi Rizzo 
2734*4f80b14cSVincenzo Maffione 			/*
2735*4f80b14cSVincenzo Maffione 			 * Don't try to txsync this TX ring if we already found some
2736*4f80b14cSVincenzo Maffione 			 * space in some of the TX rings (want_tx == 0) and there are no
2737*4f80b14cSVincenzo Maffione 			 * TX slots in this ring that need to be flushed to the NIC
2738*4f80b14cSVincenzo Maffione 			 * (cur == hwcur).
2739*4f80b14cSVincenzo Maffione 			 */
274037e3a6d3SLuigi Rizzo 			if (!send_down && !want_tx && ring->cur == kring->nr_hwcur)
274168b8534bSLuigi Rizzo 				continue;
274237e3a6d3SLuigi Rizzo 
274337e3a6d3SLuigi Rizzo 			if (nm_kr_tryget(kring, 1, &revents))
274417885a7bSLuigi Rizzo 				continue;
274537e3a6d3SLuigi Rizzo 
274637e3a6d3SLuigi Rizzo 			if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
274717885a7bSLuigi Rizzo 				netmap_ring_reinit(kring);
274817885a7bSLuigi Rizzo 				revents |= POLLERR;
274917885a7bSLuigi Rizzo 			} else {
2750c3e9b4dbSLuiz Otavio O Souza 				if (kring->nm_sync(kring, sync_flags))
275168b8534bSLuigi Rizzo 					revents |= POLLERR;
2752847bf383SLuigi Rizzo 				else
275337e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
275417885a7bSLuigi Rizzo 			}
275568b8534bSLuigi Rizzo 
275617885a7bSLuigi Rizzo 			/*
275717885a7bSLuigi Rizzo 			 * If we found new slots, notify potential
275817885a7bSLuigi Rizzo 			 * listeners on the same ring.
275917885a7bSLuigi Rizzo 			 * Since we just did a txsync, look at the copies
276017885a7bSLuigi Rizzo 			 * of cur,tail in the kring.
2761f9790aebSLuigi Rizzo 			 */
276217885a7bSLuigi Rizzo 			found = kring->rcur != kring->rtail;
276317885a7bSLuigi Rizzo 			nm_kr_put(kring);
276417885a7bSLuigi Rizzo 			if (found) { /* notify other listeners */
276568b8534bSLuigi Rizzo 				revents |= want_tx;
276668b8534bSLuigi Rizzo 				want_tx = 0;
2767*4f80b14cSVincenzo Maffione #ifndef linux
2768847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
2769*4f80b14cSVincenzo Maffione #endif /* linux */
277068b8534bSLuigi Rizzo 			}
2771ce3ee1e7SLuigi Rizzo 		}
277237e3a6d3SLuigi Rizzo 		/* if there were any packet to forward we must have handled them by now */
277337e3a6d3SLuigi Rizzo 		send_down = 0;
277437e3a6d3SLuigi Rizzo 		if (want_tx && retry_tx && sr) {
2775*4f80b14cSVincenzo Maffione #ifndef linux
277637e3a6d3SLuigi Rizzo 			nm_os_selrecord(sr, check_all_tx ?
2777847bf383SLuigi Rizzo 			    &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
2778*4f80b14cSVincenzo Maffione #endif /* !linux */
2779ce3ee1e7SLuigi Rizzo 			retry_tx = 0;
2780ce3ee1e7SLuigi Rizzo 			goto flush_tx;
278168b8534bSLuigi Rizzo 		}
278268b8534bSLuigi Rizzo 	}
278368b8534bSLuigi Rizzo 
278468b8534bSLuigi Rizzo 	/*
278517885a7bSLuigi Rizzo 	 * If want_rx is still set scan receive rings.
278668b8534bSLuigi Rizzo 	 * Do it on all rings because otherwise we starve.
278768b8534bSLuigi Rizzo 	 */
278868b8534bSLuigi Rizzo 	if (want_rx) {
278989cc2556SLuigi Rizzo 		/* two rounds here for race avoidance */
2790ce3ee1e7SLuigi Rizzo do_retry_rx:
2791847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) {
279217885a7bSLuigi Rizzo 			int found = 0;
279317885a7bSLuigi Rizzo 
279468b8534bSLuigi Rizzo 			kring = &na->rx_rings[i];
279537e3a6d3SLuigi Rizzo 			ring = kring->ring;
2796ce3ee1e7SLuigi Rizzo 
279737e3a6d3SLuigi Rizzo 			if (unlikely(nm_kr_tryget(kring, 1, &revents)))
279817885a7bSLuigi Rizzo 				continue;
2799ce3ee1e7SLuigi Rizzo 
280037e3a6d3SLuigi Rizzo 			if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
2801847bf383SLuigi Rizzo 				netmap_ring_reinit(kring);
2802847bf383SLuigi Rizzo 				revents |= POLLERR;
2803847bf383SLuigi Rizzo 			}
2804847bf383SLuigi Rizzo 			/* now we can use kring->rcur, rtail */
2805847bf383SLuigi Rizzo 
280617885a7bSLuigi Rizzo 			/*
2807c3e9b4dbSLuiz Otavio O Souza 			 * transparent mode support: collect packets from
2808c3e9b4dbSLuiz Otavio O Souza 			 * hw rxring(s) that have been released by the user
2809ce3ee1e7SLuigi Rizzo 			 */
281037e3a6d3SLuigi Rizzo 			if (nm_may_forward_up(kring)) {
2811091fd0abSLuigi Rizzo 				netmap_grab_packets(kring, &q, netmap_fwd);
2812091fd0abSLuigi Rizzo 			}
281368b8534bSLuigi Rizzo 
2814c3e9b4dbSLuiz Otavio O Souza 			/* Clear the NR_FORWARD flag anyway, it may be set by
2815c3e9b4dbSLuiz Otavio O Souza 			 * the nm_sync() below only on for the host RX ring (see
2816c3e9b4dbSLuiz Otavio O Souza 			 * netmap_rxsync_from_host()). */
281737e3a6d3SLuigi Rizzo 			kring->nr_kflags &= ~NR_FORWARD;
2818c3e9b4dbSLuiz Otavio O Souza 			if (kring->nm_sync(kring, sync_flags))
281968b8534bSLuigi Rizzo 				revents |= POLLERR;
2820847bf383SLuigi Rizzo 			else
282137e3a6d3SLuigi Rizzo 				nm_sync_finalize(kring);
2822c3e9b4dbSLuiz Otavio O Souza 			send_down |= (kring->nr_kflags & NR_FORWARD);
2823c3e9b4dbSLuiz Otavio O Souza 			ring_timestamp_set(ring);
282417885a7bSLuigi Rizzo 			found = kring->rcur != kring->rtail;
282517885a7bSLuigi Rizzo 			nm_kr_put(kring);
282617885a7bSLuigi Rizzo 			if (found) {
282768b8534bSLuigi Rizzo 				revents |= want_rx;
2828ce3ee1e7SLuigi Rizzo 				retry_rx = 0;
2829*4f80b14cSVincenzo Maffione #ifndef linux
2830847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
2831*4f80b14cSVincenzo Maffione #endif /* linux */
283268b8534bSLuigi Rizzo 			}
283368b8534bSLuigi Rizzo 		}
283417885a7bSLuigi Rizzo 
2835*4f80b14cSVincenzo Maffione #ifndef linux
283637e3a6d3SLuigi Rizzo 		if (retry_rx && sr) {
283737e3a6d3SLuigi Rizzo 			nm_os_selrecord(sr, check_all_rx ?
2838847bf383SLuigi Rizzo 			    &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
283937e3a6d3SLuigi Rizzo 		}
2840*4f80b14cSVincenzo Maffione #endif /* !linux */
2841c3e9b4dbSLuiz Otavio O Souza 		if (send_down || retry_rx) {
284217885a7bSLuigi Rizzo 			retry_rx = 0;
284317885a7bSLuigi Rizzo 			if (send_down)
284417885a7bSLuigi Rizzo 				goto flush_tx; /* and retry_rx */
284517885a7bSLuigi Rizzo 			else
2846ce3ee1e7SLuigi Rizzo 				goto do_retry_rx;
2847ce3ee1e7SLuigi Rizzo 		}
284868b8534bSLuigi Rizzo 	}
2849091fd0abSLuigi Rizzo 
285017885a7bSLuigi Rizzo 	/*
2851c3e9b4dbSLuiz Otavio O Souza 	 * Transparent mode: released bufs (i.e. between kring->nr_hwcur and
2852c3e9b4dbSLuiz Otavio O Souza 	 * ring->head) marked with NS_FORWARD on hw rx rings are passed up
2853c3e9b4dbSLuiz Otavio O Souza 	 * to the host stack.
2854ce3ee1e7SLuigi Rizzo 	 */
2855091fd0abSLuigi Rizzo 
2856c3e9b4dbSLuiz Otavio O Souza 	if (mbq_peek(&q)) {
2857f9790aebSLuigi Rizzo 		netmap_send_up(na->ifp, &q);
285837e3a6d3SLuigi Rizzo 	}
285968b8534bSLuigi Rizzo 
286068b8534bSLuigi Rizzo 	return (revents);
2861847bf383SLuigi Rizzo #undef want_tx
2862847bf383SLuigi Rizzo #undef want_rx
286368b8534bSLuigi Rizzo }
286468b8534bSLuigi Rizzo 
2865*4f80b14cSVincenzo Maffione int
2866*4f80b14cSVincenzo Maffione nma_intr_enable(struct netmap_adapter *na, int onoff)
2867*4f80b14cSVincenzo Maffione {
2868*4f80b14cSVincenzo Maffione 	bool changed = false;
2869*4f80b14cSVincenzo Maffione 	enum txrx t;
2870*4f80b14cSVincenzo Maffione 	int i;
2871*4f80b14cSVincenzo Maffione 
2872*4f80b14cSVincenzo Maffione 	for_rx_tx(t) {
2873*4f80b14cSVincenzo Maffione 		for (i = 0; i < nma_get_nrings(na, t); i++) {
2874*4f80b14cSVincenzo Maffione 			struct netmap_kring *kring = &NMR(na, t)[i];
2875*4f80b14cSVincenzo Maffione 			int on = !(kring->nr_kflags & NKR_NOINTR);
2876*4f80b14cSVincenzo Maffione 
2877*4f80b14cSVincenzo Maffione 			if (!!onoff != !!on) {
2878*4f80b14cSVincenzo Maffione 				changed = true;
2879*4f80b14cSVincenzo Maffione 			}
2880*4f80b14cSVincenzo Maffione 			if (onoff) {
2881*4f80b14cSVincenzo Maffione 				kring->nr_kflags &= ~NKR_NOINTR;
2882*4f80b14cSVincenzo Maffione 			} else {
2883*4f80b14cSVincenzo Maffione 				kring->nr_kflags |= NKR_NOINTR;
2884*4f80b14cSVincenzo Maffione 			}
2885*4f80b14cSVincenzo Maffione 		}
2886*4f80b14cSVincenzo Maffione 	}
2887*4f80b14cSVincenzo Maffione 
2888*4f80b14cSVincenzo Maffione 	if (!changed) {
2889*4f80b14cSVincenzo Maffione 		return 0; /* nothing to do */
2890*4f80b14cSVincenzo Maffione 	}
2891*4f80b14cSVincenzo Maffione 
2892*4f80b14cSVincenzo Maffione 	if (!na->nm_intr) {
2893*4f80b14cSVincenzo Maffione 		D("Cannot %s interrupts for %s", onoff ? "enable" : "disable",
2894*4f80b14cSVincenzo Maffione 		  na->name);
2895*4f80b14cSVincenzo Maffione 		return -1;
2896*4f80b14cSVincenzo Maffione 	}
2897*4f80b14cSVincenzo Maffione 
2898*4f80b14cSVincenzo Maffione 	na->nm_intr(na, onoff);
2899*4f80b14cSVincenzo Maffione 
2900*4f80b14cSVincenzo Maffione 	return 0;
2901*4f80b14cSVincenzo Maffione }
2902*4f80b14cSVincenzo Maffione 
290317885a7bSLuigi Rizzo 
290417885a7bSLuigi Rizzo /*-------------------- driver support routines -------------------*/
290568b8534bSLuigi Rizzo 
290689cc2556SLuigi Rizzo /* default notify callback */
2907f9790aebSLuigi Rizzo static int
2908847bf383SLuigi Rizzo netmap_notify(struct netmap_kring *kring, int flags)
2909f9790aebSLuigi Rizzo {
2910847bf383SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
2911847bf383SLuigi Rizzo 	enum txrx t = kring->tx;
2912f9790aebSLuigi Rizzo 
291337e3a6d3SLuigi Rizzo 	nm_os_selwakeup(&kring->si);
291489cc2556SLuigi Rizzo 	/* optimization: avoid a wake up on the global
291589cc2556SLuigi Rizzo 	 * queue if nobody has registered for more
291689cc2556SLuigi Rizzo 	 * than one ring
291789cc2556SLuigi Rizzo 	 */
2918847bf383SLuigi Rizzo 	if (na->si_users[t] > 0)
291937e3a6d3SLuigi Rizzo 		nm_os_selwakeup(&na->si[t]);
2920847bf383SLuigi Rizzo 
292137e3a6d3SLuigi Rizzo 	return NM_IRQ_COMPLETED;
2922f9790aebSLuigi Rizzo }
2923f9790aebSLuigi Rizzo 
292489cc2556SLuigi Rizzo /* called by all routines that create netmap_adapters.
292537e3a6d3SLuigi Rizzo  * provide some defaults and get a reference to the
292637e3a6d3SLuigi Rizzo  * memory allocator
292789cc2556SLuigi Rizzo  */
2928f9790aebSLuigi Rizzo int
2929f9790aebSLuigi Rizzo netmap_attach_common(struct netmap_adapter *na)
2930f9790aebSLuigi Rizzo {
2931f9790aebSLuigi Rizzo 	if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
2932f9790aebSLuigi Rizzo 		D("%s: invalid rings tx %d rx %d",
29334bf50f18SLuigi Rizzo 			na->name, na->num_tx_rings, na->num_rx_rings);
2934f9790aebSLuigi Rizzo 		return EINVAL;
2935f9790aebSLuigi Rizzo 	}
293617885a7bSLuigi Rizzo 
293717885a7bSLuigi Rizzo #ifdef __FreeBSD__
293837e3a6d3SLuigi Rizzo 	if (na->na_flags & NAF_HOST_RINGS && na->ifp) {
293937e3a6d3SLuigi Rizzo 		na->if_input = na->ifp->if_input; /* for netmap_send_up */
29404bf50f18SLuigi Rizzo 	}
2941*4f80b14cSVincenzo Maffione 	na->pdev = na; /* make sure netmap_mem_map() is called */
294237e3a6d3SLuigi Rizzo #endif /* __FreeBSD__ */
2943f9790aebSLuigi Rizzo 	if (na->nm_krings_create == NULL) {
294489cc2556SLuigi Rizzo 		/* we assume that we have been called by a driver,
294589cc2556SLuigi Rizzo 		 * since other port types all provide their own
294689cc2556SLuigi Rizzo 		 * nm_krings_create
294789cc2556SLuigi Rizzo 		 */
2948f9790aebSLuigi Rizzo 		na->nm_krings_create = netmap_hw_krings_create;
294917885a7bSLuigi Rizzo 		na->nm_krings_delete = netmap_hw_krings_delete;
2950f9790aebSLuigi Rizzo 	}
2951f9790aebSLuigi Rizzo 	if (na->nm_notify == NULL)
2952f9790aebSLuigi Rizzo 		na->nm_notify = netmap_notify;
2953f9790aebSLuigi Rizzo 	na->active_fds = 0;
2954f9790aebSLuigi Rizzo 
2955c3e9b4dbSLuiz Otavio O Souza 	if (na->nm_mem == NULL) {
29564bf50f18SLuigi Rizzo 		/* use the global allocator */
2957c3e9b4dbSLuiz Otavio O Souza 		na->nm_mem = netmap_mem_get(&nm_mem);
2958c3e9b4dbSLuiz Otavio O Souza 	}
2959847bf383SLuigi Rizzo #ifdef WITH_VALE
29604bf50f18SLuigi Rizzo 	if (na->nm_bdg_attach == NULL)
29614bf50f18SLuigi Rizzo 		/* no special nm_bdg_attach callback. On VALE
29624bf50f18SLuigi Rizzo 		 * attach, we need to interpose a bwrap
29634bf50f18SLuigi Rizzo 		 */
29644bf50f18SLuigi Rizzo 		na->nm_bdg_attach = netmap_bwrap_attach;
2965847bf383SLuigi Rizzo #endif
296637e3a6d3SLuigi Rizzo 
2967f9790aebSLuigi Rizzo 	return 0;
2968f9790aebSLuigi Rizzo }
2969f9790aebSLuigi Rizzo 
297037e3a6d3SLuigi Rizzo /* Wrapper for the register callback provided netmap-enabled
297137e3a6d3SLuigi Rizzo  * hardware drivers.
297237e3a6d3SLuigi Rizzo  * nm_iszombie(na) means that the driver module has been
29734bf50f18SLuigi Rizzo  * unloaded, so we cannot call into it.
297437e3a6d3SLuigi Rizzo  * nm_os_ifnet_lock() must guarantee mutual exclusion with
297537e3a6d3SLuigi Rizzo  * module unloading.
29764bf50f18SLuigi Rizzo  */
29774bf50f18SLuigi Rizzo static int
297837e3a6d3SLuigi Rizzo netmap_hw_reg(struct netmap_adapter *na, int onoff)
29794bf50f18SLuigi Rizzo {
29804bf50f18SLuigi Rizzo 	struct netmap_hw_adapter *hwna =
29814bf50f18SLuigi Rizzo 		(struct netmap_hw_adapter*)na;
298237e3a6d3SLuigi Rizzo 	int error = 0;
29834bf50f18SLuigi Rizzo 
298437e3a6d3SLuigi Rizzo 	nm_os_ifnet_lock();
29854bf50f18SLuigi Rizzo 
298637e3a6d3SLuigi Rizzo 	if (nm_iszombie(na)) {
298737e3a6d3SLuigi Rizzo 		if (onoff) {
298837e3a6d3SLuigi Rizzo 			error = ENXIO;
298937e3a6d3SLuigi Rizzo 		} else if (na != NULL) {
299037e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_NETMAP_ON;
299137e3a6d3SLuigi Rizzo 		}
299237e3a6d3SLuigi Rizzo 		goto out;
299337e3a6d3SLuigi Rizzo 	}
299437e3a6d3SLuigi Rizzo 
299537e3a6d3SLuigi Rizzo 	error = hwna->nm_hw_register(na, onoff);
299637e3a6d3SLuigi Rizzo 
299737e3a6d3SLuigi Rizzo out:
299837e3a6d3SLuigi Rizzo 	nm_os_ifnet_unlock();
299937e3a6d3SLuigi Rizzo 
300037e3a6d3SLuigi Rizzo 	return error;
300137e3a6d3SLuigi Rizzo }
300237e3a6d3SLuigi Rizzo 
300337e3a6d3SLuigi Rizzo static void
300437e3a6d3SLuigi Rizzo netmap_hw_dtor(struct netmap_adapter *na)
300537e3a6d3SLuigi Rizzo {
300637e3a6d3SLuigi Rizzo 	if (nm_iszombie(na) || na->ifp == NULL)
300737e3a6d3SLuigi Rizzo 		return;
300837e3a6d3SLuigi Rizzo 
300937e3a6d3SLuigi Rizzo 	WNA(na->ifp) = NULL;
30104bf50f18SLuigi Rizzo }
30114bf50f18SLuigi Rizzo 
3012f18be576SLuigi Rizzo 
301368b8534bSLuigi Rizzo /*
3014c3e9b4dbSLuiz Otavio O Souza  * Allocate a netmap_adapter object, and initialize it from the
301537e3a6d3SLuigi Rizzo  * 'arg' passed by the driver on attach.
3016c3e9b4dbSLuiz Otavio O Souza  * We allocate a block of memory of 'size' bytes, which has room
3017c3e9b4dbSLuiz Otavio O Souza  * for struct netmap_adapter plus additional room private to
3018c3e9b4dbSLuiz Otavio O Souza  * the caller.
301968b8534bSLuigi Rizzo  * Return 0 on success, ENOMEM otherwise.
302068b8534bSLuigi Rizzo  */
3021c3e9b4dbSLuiz Otavio O Souza int
3022*4f80b14cSVincenzo Maffione netmap_attach_ext(struct netmap_adapter *arg, size_t size, int override_reg)
302368b8534bSLuigi Rizzo {
3024f9790aebSLuigi Rizzo 	struct netmap_hw_adapter *hwna = NULL;
302537e3a6d3SLuigi Rizzo 	struct ifnet *ifp = NULL;
302668b8534bSLuigi Rizzo 
3027c3e9b4dbSLuiz Otavio O Souza 	if (size < sizeof(struct netmap_hw_adapter)) {
3028c3e9b4dbSLuiz Otavio O Souza 		D("Invalid netmap adapter size %d", (int)size);
3029c3e9b4dbSLuiz Otavio O Souza 		return EINVAL;
3030c3e9b4dbSLuiz Otavio O Souza 	}
3031c3e9b4dbSLuiz Otavio O Souza 
303237e3a6d3SLuigi Rizzo 	if (arg == NULL || arg->ifp == NULL)
3033ae10d1afSLuigi Rizzo 		goto fail;
3034*4f80b14cSVincenzo Maffione 
303537e3a6d3SLuigi Rizzo 	ifp = arg->ifp;
3036*4f80b14cSVincenzo Maffione 	if (NA(ifp) && !NM_NA_VALID(ifp)) {
3037*4f80b14cSVincenzo Maffione 		/* If NA(ifp) is not null but there is no valid netmap
3038*4f80b14cSVincenzo Maffione 		 * adapter it means that someone else is using the same
3039*4f80b14cSVincenzo Maffione 		 * pointer (e.g. ax25_ptr on linux). This happens for
3040*4f80b14cSVincenzo Maffione 		 * instance when also PF_RING is in use. */
3041*4f80b14cSVincenzo Maffione 		D("Error: netmap adapter hook is busy");
3042*4f80b14cSVincenzo Maffione 		return EBUSY;
3043*4f80b14cSVincenzo Maffione 	}
3044*4f80b14cSVincenzo Maffione 
3045c3e9b4dbSLuiz Otavio O Souza 	hwna = nm_os_malloc(size);
3046f9790aebSLuigi Rizzo 	if (hwna == NULL)
3047ae10d1afSLuigi Rizzo 		goto fail;
3048f9790aebSLuigi Rizzo 	hwna->up = *arg;
3049847bf383SLuigi Rizzo 	hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
30504bf50f18SLuigi Rizzo 	strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
3051*4f80b14cSVincenzo Maffione 	if (override_reg) {
30524bf50f18SLuigi Rizzo 		hwna->nm_hw_register = hwna->up.nm_register;
305337e3a6d3SLuigi Rizzo 		hwna->up.nm_register = netmap_hw_reg;
3054*4f80b14cSVincenzo Maffione 	}
3055f9790aebSLuigi Rizzo 	if (netmap_attach_common(&hwna->up)) {
3056c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(hwna);
3057f9790aebSLuigi Rizzo 		goto fail;
3058f9790aebSLuigi Rizzo 	}
3059f9790aebSLuigi Rizzo 	netmap_adapter_get(&hwna->up);
3060f9790aebSLuigi Rizzo 
306137e3a6d3SLuigi Rizzo 	NM_ATTACH_NA(ifp, &hwna->up);
306237e3a6d3SLuigi Rizzo 
306364ae02c3SLuigi Rizzo #ifdef linux
3064f18be576SLuigi Rizzo 	if (ifp->netdev_ops) {
3065f18be576SLuigi Rizzo 		/* prepare a clone of the netdev ops */
3066847bf383SLuigi Rizzo #ifndef NETMAP_LINUX_HAVE_NETDEV_OPS
3067f9790aebSLuigi Rizzo 		hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
3068f18be576SLuigi Rizzo #else
3069f9790aebSLuigi Rizzo 		hwna->nm_ndo = *ifp->netdev_ops;
307037e3a6d3SLuigi Rizzo #endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
3071f18be576SLuigi Rizzo 	}
3072f9790aebSLuigi Rizzo 	hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
3073*4f80b14cSVincenzo Maffione 	hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu;
30744bf50f18SLuigi Rizzo 	if (ifp->ethtool_ops) {
30754bf50f18SLuigi Rizzo 		hwna->nm_eto = *ifp->ethtool_ops;
30764bf50f18SLuigi Rizzo 	}
30774bf50f18SLuigi Rizzo 	hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
3078847bf383SLuigi Rizzo #ifdef NETMAP_LINUX_HAVE_SET_CHANNELS
30794bf50f18SLuigi Rizzo 	hwna->nm_eto.set_channels = linux_netmap_set_channels;
308037e3a6d3SLuigi Rizzo #endif /* NETMAP_LINUX_HAVE_SET_CHANNELS */
30814bf50f18SLuigi Rizzo 	if (arg->nm_config == NULL) {
30824bf50f18SLuigi Rizzo 		hwna->up.nm_config = netmap_linux_config;
30834bf50f18SLuigi Rizzo 	}
3084ce3ee1e7SLuigi Rizzo #endif /* linux */
308537e3a6d3SLuigi Rizzo 	if (arg->nm_dtor == NULL) {
308637e3a6d3SLuigi Rizzo 		hwna->up.nm_dtor = netmap_hw_dtor;
308737e3a6d3SLuigi Rizzo 	}
3088f9790aebSLuigi Rizzo 
3089d82f9014SRui Paulo 	if_printf(ifp, "netmap queues/slots: TX %d/%d, RX %d/%d\n",
3090d82f9014SRui Paulo 	    hwna->up.num_tx_rings, hwna->up.num_tx_desc,
3091d82f9014SRui Paulo 	    hwna->up.num_rx_rings, hwna->up.num_rx_desc);
3092ae10d1afSLuigi Rizzo 	return 0;
309368b8534bSLuigi Rizzo 
3094ae10d1afSLuigi Rizzo fail:
3095f9790aebSLuigi Rizzo 	D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
3096f9790aebSLuigi Rizzo 	return (hwna ? EINVAL : ENOMEM);
309768b8534bSLuigi Rizzo }
309868b8534bSLuigi Rizzo 
309968b8534bSLuigi Rizzo 
310037e3a6d3SLuigi Rizzo int
310137e3a6d3SLuigi Rizzo netmap_attach(struct netmap_adapter *arg)
310237e3a6d3SLuigi Rizzo {
3103*4f80b14cSVincenzo Maffione 	return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter),
3104*4f80b14cSVincenzo Maffione 			1 /* override nm_reg */);
310537e3a6d3SLuigi Rizzo }
310637e3a6d3SLuigi Rizzo 
310737e3a6d3SLuigi Rizzo 
3108f9790aebSLuigi Rizzo void
3109f9790aebSLuigi Rizzo NM_DBG(netmap_adapter_get)(struct netmap_adapter *na)
3110f9790aebSLuigi Rizzo {
3111f9790aebSLuigi Rizzo 	if (!na) {
3112f9790aebSLuigi Rizzo 		return;
3113f9790aebSLuigi Rizzo 	}
3114f9790aebSLuigi Rizzo 
3115f9790aebSLuigi Rizzo 	refcount_acquire(&na->na_refcount);
3116f9790aebSLuigi Rizzo }
3117f9790aebSLuigi Rizzo 
3118f9790aebSLuigi Rizzo 
3119f9790aebSLuigi Rizzo /* returns 1 iff the netmap_adapter is destroyed */
3120f9790aebSLuigi Rizzo int
3121f9790aebSLuigi Rizzo NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
3122f9790aebSLuigi Rizzo {
3123f9790aebSLuigi Rizzo 	if (!na)
3124f9790aebSLuigi Rizzo 		return 1;
3125f9790aebSLuigi Rizzo 
3126f9790aebSLuigi Rizzo 	if (!refcount_release(&na->na_refcount))
3127f9790aebSLuigi Rizzo 		return 0;
3128f9790aebSLuigi Rizzo 
3129f9790aebSLuigi Rizzo 	if (na->nm_dtor)
3130f9790aebSLuigi Rizzo 		na->nm_dtor(na);
3131f9790aebSLuigi Rizzo 
3132*4f80b14cSVincenzo Maffione 	if (na->tx_rings) { /* XXX should not happen */
3133*4f80b14cSVincenzo Maffione 		D("freeing leftover tx_rings");
3134*4f80b14cSVincenzo Maffione 		na->nm_krings_delete(na);
3135*4f80b14cSVincenzo Maffione 	}
3136*4f80b14cSVincenzo Maffione 	netmap_pipe_dealloc(na);
3137*4f80b14cSVincenzo Maffione 	if (na->nm_mem)
3138*4f80b14cSVincenzo Maffione 		netmap_mem_put(na->nm_mem);
3139*4f80b14cSVincenzo Maffione 	bzero(na, sizeof(*na));
3140*4f80b14cSVincenzo Maffione 	nm_os_free(na);
3141f9790aebSLuigi Rizzo 
3142f9790aebSLuigi Rizzo 	return 1;
3143f9790aebSLuigi Rizzo }
3144f9790aebSLuigi Rizzo 
314589cc2556SLuigi Rizzo /* nm_krings_create callback for all hardware native adapters */
3146f9790aebSLuigi Rizzo int
3147f9790aebSLuigi Rizzo netmap_hw_krings_create(struct netmap_adapter *na)
3148f9790aebSLuigi Rizzo {
3149f0ea3689SLuigi Rizzo 	int ret = netmap_krings_create(na, 0);
315017885a7bSLuigi Rizzo 	if (ret == 0) {
315117885a7bSLuigi Rizzo 		/* initialize the mbq for the sw rx ring */
315217885a7bSLuigi Rizzo 		mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue);
315317885a7bSLuigi Rizzo 		ND("initialized sw rx queue %d", na->num_rx_rings);
315417885a7bSLuigi Rizzo 	}
315517885a7bSLuigi Rizzo 	return ret;
3156f9790aebSLuigi Rizzo }
3157f9790aebSLuigi Rizzo 
3158f9790aebSLuigi Rizzo 
3159f9790aebSLuigi Rizzo 
316068b8534bSLuigi Rizzo /*
316189cc2556SLuigi Rizzo  * Called on module unload by the netmap-enabled drivers
316268b8534bSLuigi Rizzo  */
316368b8534bSLuigi Rizzo void
316468b8534bSLuigi Rizzo netmap_detach(struct ifnet *ifp)
316568b8534bSLuigi Rizzo {
316668b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
316768b8534bSLuigi Rizzo 
316868b8534bSLuigi Rizzo 	if (!na)
316968b8534bSLuigi Rizzo 		return;
317068b8534bSLuigi Rizzo 
3171f9790aebSLuigi Rizzo 	NMG_LOCK();
317237e3a6d3SLuigi Rizzo 	netmap_set_all_rings(na, NM_KR_LOCKED);
3173847bf383SLuigi Rizzo 	/*
3174847bf383SLuigi Rizzo 	 * if the netmap adapter is not native, somebody
3175847bf383SLuigi Rizzo 	 * changed it, so we can not release it here.
317637e3a6d3SLuigi Rizzo 	 * The NAF_ZOMBIE flag will notify the new owner that
3177847bf383SLuigi Rizzo 	 * the driver is gone.
3178847bf383SLuigi Rizzo 	 */
3179*4f80b14cSVincenzo Maffione 	if (!(na->na_flags & NAF_NATIVE) || !netmap_adapter_put(na)) {
3180*4f80b14cSVincenzo Maffione 		na->na_flags |= NAF_ZOMBIE;
3181847bf383SLuigi Rizzo 	}
318237e3a6d3SLuigi Rizzo 	/* give active users a chance to notice that NAF_ZOMBIE has been
318337e3a6d3SLuigi Rizzo 	 * turned on, so that they can stop and return an error to userspace.
318437e3a6d3SLuigi Rizzo 	 * Note that this becomes a NOP if there are no active users and,
318537e3a6d3SLuigi Rizzo 	 * therefore, the put() above has deleted the na, since now NA(ifp) is
318637e3a6d3SLuigi Rizzo 	 * NULL.
318737e3a6d3SLuigi Rizzo 	 */
3188f9790aebSLuigi Rizzo 	netmap_enable_all_rings(ifp);
3189f9790aebSLuigi Rizzo 	NMG_UNLOCK();
3190ae10d1afSLuigi Rizzo }
3191f18be576SLuigi Rizzo 
3192f18be576SLuigi Rizzo 
319368b8534bSLuigi Rizzo /*
319402ad4083SLuigi Rizzo  * Intercept packets from the network stack and pass them
319502ad4083SLuigi Rizzo  * to netmap as incoming packets on the 'software' ring.
319617885a7bSLuigi Rizzo  *
319717885a7bSLuigi Rizzo  * We only store packets in a bounded mbq and then copy them
319817885a7bSLuigi Rizzo  * in the relevant rxsync routine.
319917885a7bSLuigi Rizzo  *
3200ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that the ifp and na do not go
3201ce3ee1e7SLuigi Rizzo  * away (typically the caller checks for IFF_DRV_RUNNING or the like).
3202ce3ee1e7SLuigi Rizzo  * In nm_register() or whenever there is a reinitialization,
3203f9790aebSLuigi Rizzo  * we make sure to make the mode change visible here.
320468b8534bSLuigi Rizzo  */
320568b8534bSLuigi Rizzo int
3206ce3ee1e7SLuigi Rizzo netmap_transmit(struct ifnet *ifp, struct mbuf *m)
320768b8534bSLuigi Rizzo {
320868b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
320937e3a6d3SLuigi Rizzo 	struct netmap_kring *kring, *tx_kring;
321017885a7bSLuigi Rizzo 	u_int len = MBUF_LEN(m);
321117885a7bSLuigi Rizzo 	u_int error = ENOBUFS;
321237e3a6d3SLuigi Rizzo 	unsigned int txr;
321317885a7bSLuigi Rizzo 	struct mbq *q;
3214c3e9b4dbSLuiz Otavio O Souza 	int busy;
321568b8534bSLuigi Rizzo 
3216847bf383SLuigi Rizzo 	kring = &na->rx_rings[na->num_rx_rings];
3217ce3ee1e7SLuigi Rizzo 	// XXX [Linux] we do not need this lock
3218ce3ee1e7SLuigi Rizzo 	// if we follow the down/configure/up protocol -gl
3219ce3ee1e7SLuigi Rizzo 	// mtx_lock(&na->core_lock);
322017885a7bSLuigi Rizzo 
32214bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na)) {
32224bf50f18SLuigi Rizzo 		D("%s not in netmap mode anymore", na->name);
3223ce3ee1e7SLuigi Rizzo 		error = ENXIO;
3224ce3ee1e7SLuigi Rizzo 		goto done;
3225ce3ee1e7SLuigi Rizzo 	}
3226ce3ee1e7SLuigi Rizzo 
322737e3a6d3SLuigi Rizzo 	txr = MBUF_TXQ(m);
322837e3a6d3SLuigi Rizzo 	if (txr >= na->num_tx_rings) {
322937e3a6d3SLuigi Rizzo 		txr %= na->num_tx_rings;
323037e3a6d3SLuigi Rizzo 	}
323137e3a6d3SLuigi Rizzo 	tx_kring = &NMR(na, NR_TX)[txr];
323237e3a6d3SLuigi Rizzo 
323337e3a6d3SLuigi Rizzo 	if (tx_kring->nr_mode == NKR_NETMAP_OFF) {
323437e3a6d3SLuigi Rizzo 		return MBUF_TRANSMIT(na, ifp, m);
323537e3a6d3SLuigi Rizzo 	}
323637e3a6d3SLuigi Rizzo 
323717885a7bSLuigi Rizzo 	q = &kring->rx_queue;
323817885a7bSLuigi Rizzo 
3239ce3ee1e7SLuigi Rizzo 	// XXX reconsider long packets if we handle fragments
32404bf50f18SLuigi Rizzo 	if (len > NETMAP_BUF_SIZE(na)) { /* too long for us */
32414bf50f18SLuigi Rizzo 		D("%s from_host, drop packet size %d > %d", na->name,
32424bf50f18SLuigi Rizzo 			len, NETMAP_BUF_SIZE(na));
3243ce3ee1e7SLuigi Rizzo 		goto done;
3244849bec0eSLuigi Rizzo 	}
324517885a7bSLuigi Rizzo 
324637e3a6d3SLuigi Rizzo 	if (nm_os_mbuf_has_offld(m)) {
3247c3e9b4dbSLuiz Otavio O Souza 		RD(1, "%s drop mbuf that needs offloadings", na->name);
324837e3a6d3SLuigi Rizzo 		goto done;
324937e3a6d3SLuigi Rizzo 	}
325037e3a6d3SLuigi Rizzo 
3251c3e9b4dbSLuiz Otavio O Souza 	/* protect against netmap_rxsync_from_host(), netmap_sw_to_nic()
325217885a7bSLuigi Rizzo 	 * and maybe other instances of netmap_transmit (the latter
325317885a7bSLuigi Rizzo 	 * not possible on Linux).
3254c3e9b4dbSLuiz Otavio O Souza 	 * We enqueue the mbuf only if we are sure there is going to be
3255c3e9b4dbSLuiz Otavio O Souza 	 * enough room in the host RX ring, otherwise we drop it.
3256ce3ee1e7SLuigi Rizzo 	 */
3257997b054cSLuigi Rizzo 	mbq_lock(q);
325817885a7bSLuigi Rizzo 
3259c3e9b4dbSLuiz Otavio O Souza         busy = kring->nr_hwtail - kring->nr_hwcur;
3260c3e9b4dbSLuiz Otavio O Souza         if (busy < 0)
3261c3e9b4dbSLuiz Otavio O Souza                 busy += kring->nkr_num_slots;
3262c3e9b4dbSLuiz Otavio O Souza 	if (busy + mbq_len(q) >= kring->nkr_num_slots - 1) {
3263c3e9b4dbSLuiz Otavio O Souza 		RD(2, "%s full hwcur %d hwtail %d qlen %d", na->name,
3264c3e9b4dbSLuiz Otavio O Souza 			kring->nr_hwcur, kring->nr_hwtail, mbq_len(q));
3265ce3ee1e7SLuigi Rizzo 	} else {
326617885a7bSLuigi Rizzo 		mbq_enqueue(q, m);
3267c3e9b4dbSLuiz Otavio O Souza 		ND(2, "%s %d bufs in queue", na->name, mbq_len(q));
326817885a7bSLuigi Rizzo 		/* notify outside the lock */
326917885a7bSLuigi Rizzo 		m = NULL;
327068b8534bSLuigi Rizzo 		error = 0;
3271ce3ee1e7SLuigi Rizzo 	}
3272997b054cSLuigi Rizzo 	mbq_unlock(q);
3273ce3ee1e7SLuigi Rizzo 
327468b8534bSLuigi Rizzo done:
327517885a7bSLuigi Rizzo 	if (m)
327668b8534bSLuigi Rizzo 		m_freem(m);
327717885a7bSLuigi Rizzo 	/* unconditionally wake up listeners */
3278847bf383SLuigi Rizzo 	kring->nm_notify(kring, 0);
327989cc2556SLuigi Rizzo 	/* this is normally netmap_notify(), but for nics
328089cc2556SLuigi Rizzo 	 * connected to a bridge it is netmap_bwrap_intr_notify(),
328189cc2556SLuigi Rizzo 	 * that possibly forwards the frames through the switch
328289cc2556SLuigi Rizzo 	 */
328368b8534bSLuigi Rizzo 
328468b8534bSLuigi Rizzo 	return (error);
328568b8534bSLuigi Rizzo }
328668b8534bSLuigi Rizzo 
328768b8534bSLuigi Rizzo 
328868b8534bSLuigi Rizzo /*
328968b8534bSLuigi Rizzo  * netmap_reset() is called by the driver routines when reinitializing
329068b8534bSLuigi Rizzo  * a ring. The driver is in charge of locking to protect the kring.
3291f9790aebSLuigi Rizzo  * If native netmap mode is not set just return NULL.
329237e3a6d3SLuigi Rizzo  * If native netmap mode is set, in particular, we have to set nr_mode to
329337e3a6d3SLuigi Rizzo  * NKR_NETMAP_ON.
329468b8534bSLuigi Rizzo  */
329568b8534bSLuigi Rizzo struct netmap_slot *
3296ce3ee1e7SLuigi Rizzo netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
329768b8534bSLuigi Rizzo 	u_int new_cur)
329868b8534bSLuigi Rizzo {
329968b8534bSLuigi Rizzo 	struct netmap_kring *kring;
3300506cc70cSLuigi Rizzo 	int new_hwofs, lim;
330168b8534bSLuigi Rizzo 
33024bf50f18SLuigi Rizzo 	if (!nm_native_on(na)) {
33034bf50f18SLuigi Rizzo 		ND("interface not in native netmap mode");
330468b8534bSLuigi Rizzo 		return NULL;	/* nothing to reinitialize */
3305ce3ee1e7SLuigi Rizzo 	}
330668b8534bSLuigi Rizzo 
3307ce3ee1e7SLuigi Rizzo 	/* XXX note- in the new scheme, we are not guaranteed to be
3308ce3ee1e7SLuigi Rizzo 	 * under lock (e.g. when called on a device reset).
3309ce3ee1e7SLuigi Rizzo 	 * In this case, we should set a flag and do not trust too
3310ce3ee1e7SLuigi Rizzo 	 * much the values. In practice: TODO
3311ce3ee1e7SLuigi Rizzo 	 * - set a RESET flag somewhere in the kring
3312ce3ee1e7SLuigi Rizzo 	 * - do the processing in a conservative way
3313ce3ee1e7SLuigi Rizzo 	 * - let the *sync() fixup at the end.
3314ce3ee1e7SLuigi Rizzo 	 */
331564ae02c3SLuigi Rizzo 	if (tx == NR_TX) {
33168241616dSLuigi Rizzo 		if (n >= na->num_tx_rings)
33178241616dSLuigi Rizzo 			return NULL;
331837e3a6d3SLuigi Rizzo 
331964ae02c3SLuigi Rizzo 		kring = na->tx_rings + n;
332037e3a6d3SLuigi Rizzo 
332137e3a6d3SLuigi Rizzo 		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
332237e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
332337e3a6d3SLuigi Rizzo 			return NULL;
332437e3a6d3SLuigi Rizzo 		}
332537e3a6d3SLuigi Rizzo 
332617885a7bSLuigi Rizzo 		// XXX check whether we should use hwcur or rcur
3327506cc70cSLuigi Rizzo 		new_hwofs = kring->nr_hwcur - new_cur;
332864ae02c3SLuigi Rizzo 	} else {
33298241616dSLuigi Rizzo 		if (n >= na->num_rx_rings)
33308241616dSLuigi Rizzo 			return NULL;
333164ae02c3SLuigi Rizzo 		kring = na->rx_rings + n;
333237e3a6d3SLuigi Rizzo 
333337e3a6d3SLuigi Rizzo 		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
333437e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
333537e3a6d3SLuigi Rizzo 			return NULL;
333637e3a6d3SLuigi Rizzo 		}
333737e3a6d3SLuigi Rizzo 
333817885a7bSLuigi Rizzo 		new_hwofs = kring->nr_hwtail - new_cur;
333964ae02c3SLuigi Rizzo 	}
334064ae02c3SLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
3341506cc70cSLuigi Rizzo 	if (new_hwofs > lim)
3342506cc70cSLuigi Rizzo 		new_hwofs -= lim + 1;
3343506cc70cSLuigi Rizzo 
3344ce3ee1e7SLuigi Rizzo 	/* Always set the new offset value and realign the ring. */
334517885a7bSLuigi Rizzo 	if (netmap_verbose)
334617885a7bSLuigi Rizzo 	    D("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
33474bf50f18SLuigi Rizzo 		na->name,
334817885a7bSLuigi Rizzo 		tx == NR_TX ? "TX" : "RX", n,
3349ce3ee1e7SLuigi Rizzo 		kring->nkr_hwofs, new_hwofs,
335017885a7bSLuigi Rizzo 		kring->nr_hwtail,
335117885a7bSLuigi Rizzo 		tx == NR_TX ? lim : kring->nr_hwtail);
3352506cc70cSLuigi Rizzo 	kring->nkr_hwofs = new_hwofs;
335317885a7bSLuigi Rizzo 	if (tx == NR_TX) {
335417885a7bSLuigi Rizzo 		kring->nr_hwtail = kring->nr_hwcur + lim;
335517885a7bSLuigi Rizzo 		if (kring->nr_hwtail > lim)
335617885a7bSLuigi Rizzo 			kring->nr_hwtail -= lim + 1;
335717885a7bSLuigi Rizzo 	}
3358506cc70cSLuigi Rizzo 
335968b8534bSLuigi Rizzo 	/*
3360ce3ee1e7SLuigi Rizzo 	 * Wakeup on the individual and global selwait
3361506cc70cSLuigi Rizzo 	 * We do the wakeup here, but the ring is not yet reconfigured.
3362506cc70cSLuigi Rizzo 	 * However, we are under lock so there are no races.
336368b8534bSLuigi Rizzo 	 */
336437e3a6d3SLuigi Rizzo 	kring->nr_mode = NKR_NETMAP_ON;
3365847bf383SLuigi Rizzo 	kring->nm_notify(kring, 0);
336668b8534bSLuigi Rizzo 	return kring->ring->slot;
336768b8534bSLuigi Rizzo }
336868b8534bSLuigi Rizzo 
336968b8534bSLuigi Rizzo 
3370ce3ee1e7SLuigi Rizzo /*
3371f9790aebSLuigi Rizzo  * Dispatch rx/tx interrupts to the netmap rings.
3372ce3ee1e7SLuigi Rizzo  *
3373ce3ee1e7SLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
3374ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that there is only one active
3375ce3ee1e7SLuigi Rizzo  * instance per queue, and that there is appropriate locking.
3376849bec0eSLuigi Rizzo  *
3377f9790aebSLuigi Rizzo  * The 'notify' routine depends on what the ring is attached to.
3378f9790aebSLuigi Rizzo  * - for a netmap file descriptor, do a selwakeup on the individual
3379f9790aebSLuigi Rizzo  *   waitqueue, plus one on the global one if needed
33804bf50f18SLuigi Rizzo  *   (see netmap_notify)
33814bf50f18SLuigi Rizzo  * - for a nic connected to a switch, call the proper forwarding routine
33824bf50f18SLuigi Rizzo  *   (see netmap_bwrap_intr_notify)
3383f9790aebSLuigi Rizzo  */
338437e3a6d3SLuigi Rizzo int
338537e3a6d3SLuigi Rizzo netmap_common_irq(struct netmap_adapter *na, u_int q, u_int *work_done)
3386f9790aebSLuigi Rizzo {
3387f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
3388847bf383SLuigi Rizzo 	enum txrx t = (work_done ? NR_RX : NR_TX);
3389f9790aebSLuigi Rizzo 
3390f9790aebSLuigi Rizzo 	q &= NETMAP_RING_MASK;
3391f9790aebSLuigi Rizzo 
3392f9790aebSLuigi Rizzo 	if (netmap_verbose) {
3393f9790aebSLuigi Rizzo 	        RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
3394f9790aebSLuigi Rizzo 	}
3395f9790aebSLuigi Rizzo 
3396847bf383SLuigi Rizzo 	if (q >= nma_get_nrings(na, t))
339737e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS; // not a physical queue
3398847bf383SLuigi Rizzo 
3399847bf383SLuigi Rizzo 	kring = NMR(na, t) + q;
3400847bf383SLuigi Rizzo 
340137e3a6d3SLuigi Rizzo 	if (kring->nr_mode == NKR_NETMAP_OFF) {
340237e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
340337e3a6d3SLuigi Rizzo 	}
340437e3a6d3SLuigi Rizzo 
3405847bf383SLuigi Rizzo 	if (t == NR_RX) {
3406f9790aebSLuigi Rizzo 		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
3407f9790aebSLuigi Rizzo 		*work_done = 1; /* do not fire napi again */
3408f9790aebSLuigi Rizzo 	}
340937e3a6d3SLuigi Rizzo 
341037e3a6d3SLuigi Rizzo 	return kring->nm_notify(kring, 0);
3411f9790aebSLuigi Rizzo }
3412f9790aebSLuigi Rizzo 
341317885a7bSLuigi Rizzo 
3414f9790aebSLuigi Rizzo /*
3415f9790aebSLuigi Rizzo  * Default functions to handle rx/tx interrupts from a physical device.
3416f9790aebSLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
3417f9790aebSLuigi Rizzo  *
341837e3a6d3SLuigi Rizzo  * If the card is not in netmap mode, simply return NM_IRQ_PASS,
3419ce3ee1e7SLuigi Rizzo  * so that the caller proceeds with regular processing.
342037e3a6d3SLuigi Rizzo  * Otherwise call netmap_common_irq().
3421ce3ee1e7SLuigi Rizzo  *
3422ce3ee1e7SLuigi Rizzo  * If the card is connected to a netmap file descriptor,
3423ce3ee1e7SLuigi Rizzo  * do a selwakeup on the individual queue, plus one on the global one
3424ce3ee1e7SLuigi Rizzo  * if needed (multiqueue card _and_ there are multiqueue listeners),
342537e3a6d3SLuigi Rizzo  * and return NR_IRQ_COMPLETED.
3426ce3ee1e7SLuigi Rizzo  *
3427ce3ee1e7SLuigi Rizzo  * Finally, if called on rx from an interface connected to a switch,
342837e3a6d3SLuigi Rizzo  * calls the proper forwarding routine.
34291a26580eSLuigi Rizzo  */
3430babc7c12SLuigi Rizzo int
3431ce3ee1e7SLuigi Rizzo netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
34321a26580eSLuigi Rizzo {
34334bf50f18SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
34344bf50f18SLuigi Rizzo 
34354bf50f18SLuigi Rizzo 	/*
34364bf50f18SLuigi Rizzo 	 * XXX emulated netmap mode sets NAF_SKIP_INTR so
34374bf50f18SLuigi Rizzo 	 * we still use the regular driver even though the previous
34384bf50f18SLuigi Rizzo 	 * check fails. It is unclear whether we should use
34394bf50f18SLuigi Rizzo 	 * nm_native_on() here.
34404bf50f18SLuigi Rizzo 	 */
34414bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
344237e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
3443849bec0eSLuigi Rizzo 
34444bf50f18SLuigi Rizzo 	if (na->na_flags & NAF_SKIP_INTR) {
34458241616dSLuigi Rizzo 		ND("use regular interrupt");
344637e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
34478241616dSLuigi Rizzo 	}
34488241616dSLuigi Rizzo 
344937e3a6d3SLuigi Rizzo 	return netmap_common_irq(na, q, work_done);
34501a26580eSLuigi Rizzo }
34511a26580eSLuigi Rizzo 
345264ae02c3SLuigi Rizzo 
345301c7d25fSLuigi Rizzo /*
3454f9790aebSLuigi Rizzo  * Module loader and unloader
3455f196ce38SLuigi Rizzo  *
3456f9790aebSLuigi Rizzo  * netmap_init() creates the /dev/netmap device and initializes
3457f9790aebSLuigi Rizzo  * all global variables. Returns 0 on success, errno on failure
3458f9790aebSLuigi Rizzo  * (but there is no chance)
3459f9790aebSLuigi Rizzo  *
3460f9790aebSLuigi Rizzo  * netmap_fini() destroys everything.
3461f196ce38SLuigi Rizzo  */
3462babc7c12SLuigi Rizzo 
3463babc7c12SLuigi Rizzo static struct cdev *netmap_dev; /* /dev/netmap character device. */
3464f9790aebSLuigi Rizzo extern struct cdevsw netmap_cdevsw;
3465babc7c12SLuigi Rizzo 
346617885a7bSLuigi Rizzo 
3467f9790aebSLuigi Rizzo void
346868b8534bSLuigi Rizzo netmap_fini(void)
346968b8534bSLuigi Rizzo {
3470f9790aebSLuigi Rizzo 	if (netmap_dev)
347168b8534bSLuigi Rizzo 		destroy_dev(netmap_dev);
347237e3a6d3SLuigi Rizzo 	/* we assume that there are no longer netmap users */
347337e3a6d3SLuigi Rizzo 	nm_os_ifnet_fini();
347437e3a6d3SLuigi Rizzo 	netmap_uninit_bridges();
3475ce3ee1e7SLuigi Rizzo 	netmap_mem_fini();
3476ce3ee1e7SLuigi Rizzo 	NMG_LOCK_DESTROY();
3477c3e9b4dbSLuiz Otavio O Souza 	nm_prinf("netmap: unloaded module.\n");
347868b8534bSLuigi Rizzo }
347968b8534bSLuigi Rizzo 
348017885a7bSLuigi Rizzo 
3481f9790aebSLuigi Rizzo int
3482f9790aebSLuigi Rizzo netmap_init(void)
348368b8534bSLuigi Rizzo {
3484f9790aebSLuigi Rizzo 	int error;
348568b8534bSLuigi Rizzo 
3486f9790aebSLuigi Rizzo 	NMG_LOCK_INIT();
348768b8534bSLuigi Rizzo 
3488f9790aebSLuigi Rizzo 	error = netmap_mem_init();
3489f9790aebSLuigi Rizzo 	if (error != 0)
3490f9790aebSLuigi Rizzo 		goto fail;
3491c929ca72SLuigi Rizzo 	/*
3492c929ca72SLuigi Rizzo 	 * MAKEDEV_ETERNAL_KLD avoids an expensive check on syscalls
3493c929ca72SLuigi Rizzo 	 * when the module is compiled in.
3494c929ca72SLuigi Rizzo 	 * XXX could use make_dev_credv() to get error number
3495c929ca72SLuigi Rizzo 	 */
34960e73f29aSLuigi Rizzo 	netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD,
349711c0b69cSAdrian Chadd 		&netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600,
34980e73f29aSLuigi Rizzo 			      "netmap");
3499f9790aebSLuigi Rizzo 	if (!netmap_dev)
3500f9790aebSLuigi Rizzo 		goto fail;
3501f9790aebSLuigi Rizzo 
3502847bf383SLuigi Rizzo 	error = netmap_init_bridges();
3503847bf383SLuigi Rizzo 	if (error)
3504847bf383SLuigi Rizzo 		goto fail;
3505847bf383SLuigi Rizzo 
35064bf50f18SLuigi Rizzo #ifdef __FreeBSD__
350737e3a6d3SLuigi Rizzo 	nm_os_vi_init_index();
35084bf50f18SLuigi Rizzo #endif
3509847bf383SLuigi Rizzo 
351037e3a6d3SLuigi Rizzo 	error = nm_os_ifnet_init();
351137e3a6d3SLuigi Rizzo 	if (error)
351237e3a6d3SLuigi Rizzo 		goto fail;
351337e3a6d3SLuigi Rizzo 
3514c3e9b4dbSLuiz Otavio O Souza 	nm_prinf("netmap: loaded module\n");
3515f9790aebSLuigi Rizzo 	return (0);
3516f9790aebSLuigi Rizzo fail:
351768b8534bSLuigi Rizzo 	netmap_fini();
3518f9790aebSLuigi Rizzo 	return (EINVAL); /* may be incorrect */
351968b8534bSLuigi Rizzo }
3520