xref: /freebsd-14.2/sys/dev/netmap/netmap.c (revision 2ff91c17)
1718cf2ccSPedro F. Giffuni /*-
2718cf2ccSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3718cf2ccSPedro F. Giffuni  *
437e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2014 Matteo Landi
537e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Luigi Rizzo
637e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Giuseppe Lettieri
737e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Vincenzo Maffione
837e3a6d3SLuigi Rizzo  * All rights reserved.
968b8534bSLuigi Rizzo  *
1068b8534bSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
1168b8534bSLuigi Rizzo  * modification, are permitted provided that the following conditions
1268b8534bSLuigi Rizzo  * are met:
1368b8534bSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
1468b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
1568b8534bSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
1668b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
1768b8534bSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
1868b8534bSLuigi Rizzo  *
1968b8534bSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2068b8534bSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2168b8534bSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2268b8534bSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2368b8534bSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2468b8534bSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2568b8534bSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2668b8534bSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2768b8534bSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2868b8534bSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2968b8534bSLuigi Rizzo  * SUCH DAMAGE.
3068b8534bSLuigi Rizzo  */
3168b8534bSLuigi Rizzo 
32ce3ee1e7SLuigi Rizzo 
3368b8534bSLuigi Rizzo /*
34f9790aebSLuigi Rizzo  * $FreeBSD$
35f9790aebSLuigi Rizzo  *
3668b8534bSLuigi Rizzo  * This module supports memory mapped access to network devices,
3768b8534bSLuigi Rizzo  * see netmap(4).
3868b8534bSLuigi Rizzo  *
3968b8534bSLuigi Rizzo  * The module uses a large, memory pool allocated by the kernel
4068b8534bSLuigi Rizzo  * and accessible as mmapped memory by multiple userspace threads/processes.
4168b8534bSLuigi Rizzo  * The memory pool contains packet buffers and "netmap rings",
4268b8534bSLuigi Rizzo  * i.e. user-accessible copies of the interface's queues.
4368b8534bSLuigi Rizzo  *
4468b8534bSLuigi Rizzo  * Access to the network card works like this:
4568b8534bSLuigi Rizzo  * 1. a process/thread issues one or more open() on /dev/netmap, to create
4668b8534bSLuigi Rizzo  *    select()able file descriptor on which events are reported.
4768b8534bSLuigi Rizzo  * 2. on each descriptor, the process issues an ioctl() to identify
4868b8534bSLuigi Rizzo  *    the interface that should report events to the file descriptor.
4968b8534bSLuigi Rizzo  * 3. on each descriptor, the process issues an mmap() request to
5068b8534bSLuigi Rizzo  *    map the shared memory region within the process' address space.
5168b8534bSLuigi Rizzo  *    The list of interesting queues is indicated by a location in
5268b8534bSLuigi Rizzo  *    the shared memory region.
5368b8534bSLuigi Rizzo  * 4. using the functions in the netmap(4) userspace API, a process
5468b8534bSLuigi Rizzo  *    can look up the occupation state of a queue, access memory buffers,
5568b8534bSLuigi Rizzo  *    and retrieve received packets or enqueue packets to transmit.
5668b8534bSLuigi Rizzo  * 5. using some ioctl()s the process can synchronize the userspace view
5768b8534bSLuigi Rizzo  *    of the queue with the actual status in the kernel. This includes both
5868b8534bSLuigi Rizzo  *    receiving the notification of new packets, and transmitting new
5968b8534bSLuigi Rizzo  *    packets on the output interface.
6068b8534bSLuigi Rizzo  * 6. select() or poll() can be used to wait for events on individual
6168b8534bSLuigi Rizzo  *    transmit or receive queues (or all queues for a given interface).
62ce3ee1e7SLuigi Rizzo  *
63ce3ee1e7SLuigi Rizzo 
64ce3ee1e7SLuigi Rizzo 		SYNCHRONIZATION (USER)
65ce3ee1e7SLuigi Rizzo 
66ce3ee1e7SLuigi Rizzo The netmap rings and data structures may be shared among multiple
67ce3ee1e7SLuigi Rizzo user threads or even independent processes.
68ce3ee1e7SLuigi Rizzo Any synchronization among those threads/processes is delegated
69ce3ee1e7SLuigi Rizzo to the threads themselves. Only one thread at a time can be in
70ce3ee1e7SLuigi Rizzo a system call on the same netmap ring. The OS does not enforce
71ce3ee1e7SLuigi Rizzo this and only guarantees against system crashes in case of
72ce3ee1e7SLuigi Rizzo invalid usage.
73ce3ee1e7SLuigi Rizzo 
74ce3ee1e7SLuigi Rizzo 		LOCKING (INTERNAL)
75ce3ee1e7SLuigi Rizzo 
76ce3ee1e7SLuigi Rizzo Within the kernel, access to the netmap rings is protected as follows:
77ce3ee1e7SLuigi Rizzo 
78ce3ee1e7SLuigi Rizzo - a spinlock on each ring, to handle producer/consumer races on
79ce3ee1e7SLuigi Rizzo   RX rings attached to the host stack (against multiple host
80ce3ee1e7SLuigi Rizzo   threads writing from the host stack to the same ring),
81ce3ee1e7SLuigi Rizzo   and on 'destination' rings attached to a VALE switch
82ce3ee1e7SLuigi Rizzo   (i.e. RX rings in VALE ports, and TX rings in NIC/host ports)
83ce3ee1e7SLuigi Rizzo   protecting multiple active senders for the same destination)
84ce3ee1e7SLuigi Rizzo 
85ce3ee1e7SLuigi Rizzo - an atomic variable to guarantee that there is at most one
86ce3ee1e7SLuigi Rizzo   instance of *_*xsync() on the ring at any time.
87ce3ee1e7SLuigi Rizzo   For rings connected to user file
88ce3ee1e7SLuigi Rizzo   descriptors, an atomic_test_and_set() protects this, and the
89ce3ee1e7SLuigi Rizzo   lock on the ring is not actually used.
90ce3ee1e7SLuigi Rizzo   For NIC RX rings connected to a VALE switch, an atomic_test_and_set()
91ce3ee1e7SLuigi Rizzo   is also used to prevent multiple executions (the driver might indeed
92ce3ee1e7SLuigi Rizzo   already guarantee this).
93ce3ee1e7SLuigi Rizzo   For NIC TX rings connected to a VALE switch, the lock arbitrates
94ce3ee1e7SLuigi Rizzo   access to the queue (both when allocating buffers and when pushing
95ce3ee1e7SLuigi Rizzo   them out).
96ce3ee1e7SLuigi Rizzo 
97ce3ee1e7SLuigi Rizzo - *xsync() should be protected against initializations of the card.
98ce3ee1e7SLuigi Rizzo   On FreeBSD most devices have the reset routine protected by
99ce3ee1e7SLuigi Rizzo   a RING lock (ixgbe, igb, em) or core lock (re). lem is missing
100ce3ee1e7SLuigi Rizzo   the RING protection on rx_reset(), this should be added.
101ce3ee1e7SLuigi Rizzo 
102ce3ee1e7SLuigi Rizzo   On linux there is an external lock on the tx path, which probably
103ce3ee1e7SLuigi Rizzo   also arbitrates access to the reset routine. XXX to be revised
104ce3ee1e7SLuigi Rizzo 
105ce3ee1e7SLuigi Rizzo - a per-interface core_lock protecting access from the host stack
106ce3ee1e7SLuigi Rizzo   while interfaces may be detached from netmap mode.
107ce3ee1e7SLuigi Rizzo   XXX there should be no need for this lock if we detach the interfaces
108ce3ee1e7SLuigi Rizzo   only while they are down.
109ce3ee1e7SLuigi Rizzo 
110ce3ee1e7SLuigi Rizzo 
111ce3ee1e7SLuigi Rizzo --- VALE SWITCH ---
112ce3ee1e7SLuigi Rizzo 
113ce3ee1e7SLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
114ce3ee1e7SLuigi Rizzo A switch cannot be deleted until all ports are gone.
115ce3ee1e7SLuigi Rizzo 
116ce3ee1e7SLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
117ce3ee1e7SLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
118ce3ee1e7SLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
119ce3ee1e7SLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
120ce3ee1e7SLuigi Rizzo The lock is held throughout the entire forwarding cycle,
121ce3ee1e7SLuigi Rizzo during which the thread may incur in a page fault.
122ce3ee1e7SLuigi Rizzo Hence it is important that sleepable shared locks are used.
123ce3ee1e7SLuigi Rizzo 
124ce3ee1e7SLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
125ce3ee1e7SLuigi Rizzo a number of slot in the ring, then the lock is released,
126ce3ee1e7SLuigi Rizzo packets are copied from source to destination, and then
127ce3ee1e7SLuigi Rizzo the lock is acquired again and the receive ring is updated.
128ce3ee1e7SLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
129ce3ee1e7SLuigi Rizzo ports attached to the switch)
130ce3ee1e7SLuigi Rizzo 
13168b8534bSLuigi Rizzo  */
13268b8534bSLuigi Rizzo 
1334bf50f18SLuigi Rizzo 
1344bf50f18SLuigi Rizzo /* --- internals ----
1354bf50f18SLuigi Rizzo  *
1364bf50f18SLuigi Rizzo  * Roadmap to the code that implements the above.
1374bf50f18SLuigi Rizzo  *
1384bf50f18SLuigi Rizzo  * > 1. a process/thread issues one or more open() on /dev/netmap, to create
1394bf50f18SLuigi Rizzo  * >    select()able file descriptor on which events are reported.
1404bf50f18SLuigi Rizzo  *
1414bf50f18SLuigi Rizzo  *  	Internally, we allocate a netmap_priv_d structure, that will be
14237e3a6d3SLuigi Rizzo  *  	initialized on ioctl(NIOCREGIF). There is one netmap_priv_d
14337e3a6d3SLuigi Rizzo  *  	structure for each open().
1444bf50f18SLuigi Rizzo  *
1454bf50f18SLuigi Rizzo  *      os-specific:
14637e3a6d3SLuigi Rizzo  *  	    FreeBSD: see netmap_open() (netmap_freebsd.c)
14737e3a6d3SLuigi Rizzo  *  	    linux:   see linux_netmap_open() (netmap_linux.c)
1484bf50f18SLuigi Rizzo  *
1494bf50f18SLuigi Rizzo  * > 2. on each descriptor, the process issues an ioctl() to identify
1504bf50f18SLuigi Rizzo  * >    the interface that should report events to the file descriptor.
1514bf50f18SLuigi Rizzo  *
1524bf50f18SLuigi Rizzo  * 	Implemented by netmap_ioctl(), NIOCREGIF case, with nmr->nr_cmd==0.
1534bf50f18SLuigi Rizzo  * 	Most important things happen in netmap_get_na() and
1544bf50f18SLuigi Rizzo  * 	netmap_do_regif(), called from there. Additional details can be
1554bf50f18SLuigi Rizzo  * 	found in the comments above those functions.
1564bf50f18SLuigi Rizzo  *
1574bf50f18SLuigi Rizzo  * 	In all cases, this action creates/takes-a-reference-to a
1584bf50f18SLuigi Rizzo  * 	netmap_*_adapter describing the port, and allocates a netmap_if
1594bf50f18SLuigi Rizzo  * 	and all necessary netmap rings, filling them with netmap buffers.
1604bf50f18SLuigi Rizzo  *
1614bf50f18SLuigi Rizzo  *      In this phase, the sync callbacks for each ring are set (these are used
1624bf50f18SLuigi Rizzo  *      in steps 5 and 6 below).  The callbacks depend on the type of adapter.
1634bf50f18SLuigi Rizzo  *      The adapter creation/initialization code puts them in the
1644bf50f18SLuigi Rizzo  * 	netmap_adapter (fields na->nm_txsync and na->nm_rxsync).  Then, they
1654bf50f18SLuigi Rizzo  * 	are copied from there to the netmap_kring's during netmap_do_regif(), by
1664bf50f18SLuigi Rizzo  * 	the nm_krings_create() callback.  All the nm_krings_create callbacks
1674bf50f18SLuigi Rizzo  * 	actually call netmap_krings_create() to perform this and the other
1684bf50f18SLuigi Rizzo  * 	common stuff. netmap_krings_create() also takes care of the host rings,
1694bf50f18SLuigi Rizzo  * 	if needed, by setting their sync callbacks appropriately.
1704bf50f18SLuigi Rizzo  *
1714bf50f18SLuigi Rizzo  * 	Additional actions depend on the kind of netmap_adapter that has been
1724bf50f18SLuigi Rizzo  * 	registered:
1734bf50f18SLuigi Rizzo  *
1744bf50f18SLuigi Rizzo  * 	- netmap_hw_adapter:  	     [netmap.c]
1754bf50f18SLuigi Rizzo  * 	     This is a system netdev/ifp with native netmap support.
1764bf50f18SLuigi Rizzo  * 	     The ifp is detached from the host stack by redirecting:
1774bf50f18SLuigi Rizzo  * 	       - transmissions (from the network stack) to netmap_transmit()
1784bf50f18SLuigi Rizzo  * 	       - receive notifications to the nm_notify() callback for
1794bf50f18SLuigi Rizzo  * 	         this adapter. The callback is normally netmap_notify(), unless
1804bf50f18SLuigi Rizzo  * 	         the ifp is attached to a bridge using bwrap, in which case it
1814bf50f18SLuigi Rizzo  * 	         is netmap_bwrap_intr_notify().
1824bf50f18SLuigi Rizzo  *
1834bf50f18SLuigi Rizzo  * 	- netmap_generic_adapter:      [netmap_generic.c]
1844bf50f18SLuigi Rizzo  * 	      A system netdev/ifp without native netmap support.
1854bf50f18SLuigi Rizzo  *
1864bf50f18SLuigi Rizzo  * 	(the decision about native/non native support is taken in
1874bf50f18SLuigi Rizzo  * 	 netmap_get_hw_na(), called by netmap_get_na())
1884bf50f18SLuigi Rizzo  *
1894bf50f18SLuigi Rizzo  * 	- netmap_vp_adapter 		[netmap_vale.c]
1904bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_bdg_na().
1914bf50f18SLuigi Rizzo  * 	      This is a persistent or ephemeral VALE port. Ephemeral ports
1924bf50f18SLuigi Rizzo  * 	      are created on the fly if they don't already exist, and are
1934bf50f18SLuigi Rizzo  * 	      always attached to a bridge.
194453130d9SPedro F. Giffuni  * 	      Persistent VALE ports must must be created separately, and i
1954bf50f18SLuigi Rizzo  * 	      then attached like normal NICs. The NIOCREGIF we are examining
1964bf50f18SLuigi Rizzo  * 	      will find them only if they had previosly been created and
1974bf50f18SLuigi Rizzo  * 	      attached (see VALE_CTL below).
1984bf50f18SLuigi Rizzo  *
1994bf50f18SLuigi Rizzo  * 	- netmap_pipe_adapter 	      [netmap_pipe.c]
2004bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_pipe_na().
2014bf50f18SLuigi Rizzo  * 	      Both pipe ends are created, if they didn't already exist.
2024bf50f18SLuigi Rizzo  *
2034bf50f18SLuigi Rizzo  * 	- netmap_monitor_adapter      [netmap_monitor.c]
2044bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_monitor_na().
2054bf50f18SLuigi Rizzo  * 	      If successful, the nm_sync callbacks of the monitored adapter
2064bf50f18SLuigi Rizzo  * 	      will be intercepted by the returned monitor.
2074bf50f18SLuigi Rizzo  *
2084bf50f18SLuigi Rizzo  * 	- netmap_bwrap_adapter	      [netmap_vale.c]
2094bf50f18SLuigi Rizzo  * 	      Cannot be obtained in this way, see VALE_CTL below
2104bf50f18SLuigi Rizzo  *
2114bf50f18SLuigi Rizzo  *
2124bf50f18SLuigi Rizzo  * 	os-specific:
2134bf50f18SLuigi Rizzo  * 	    linux: we first go through linux_netmap_ioctl() to
2144bf50f18SLuigi Rizzo  * 	           adapt the FreeBSD interface to the linux one.
2154bf50f18SLuigi Rizzo  *
2164bf50f18SLuigi Rizzo  *
2174bf50f18SLuigi Rizzo  * > 3. on each descriptor, the process issues an mmap() request to
2184bf50f18SLuigi Rizzo  * >    map the shared memory region within the process' address space.
2194bf50f18SLuigi Rizzo  * >    The list of interesting queues is indicated by a location in
2204bf50f18SLuigi Rizzo  * >    the shared memory region.
2214bf50f18SLuigi Rizzo  *
2224bf50f18SLuigi Rizzo  *      os-specific:
2234bf50f18SLuigi Rizzo  *  	    FreeBSD: netmap_mmap_single (netmap_freebsd.c).
2244bf50f18SLuigi Rizzo  *  	    linux:   linux_netmap_mmap (netmap_linux.c).
2254bf50f18SLuigi Rizzo  *
2264bf50f18SLuigi Rizzo  * > 4. using the functions in the netmap(4) userspace API, a process
2274bf50f18SLuigi Rizzo  * >    can look up the occupation state of a queue, access memory buffers,
2284bf50f18SLuigi Rizzo  * >    and retrieve received packets or enqueue packets to transmit.
2294bf50f18SLuigi Rizzo  *
2304bf50f18SLuigi Rizzo  * 	these actions do not involve the kernel.
2314bf50f18SLuigi Rizzo  *
2324bf50f18SLuigi Rizzo  * > 5. using some ioctl()s the process can synchronize the userspace view
2334bf50f18SLuigi Rizzo  * >    of the queue with the actual status in the kernel. This includes both
2344bf50f18SLuigi Rizzo  * >    receiving the notification of new packets, and transmitting new
2354bf50f18SLuigi Rizzo  * >    packets on the output interface.
2364bf50f18SLuigi Rizzo  *
2374bf50f18SLuigi Rizzo  * 	These are implemented in netmap_ioctl(), NIOCTXSYNC and NIOCRXSYNC
2384bf50f18SLuigi Rizzo  * 	cases. They invoke the nm_sync callbacks on the netmap_kring
2394bf50f18SLuigi Rizzo  * 	structures, as initialized in step 2 and maybe later modified
2404bf50f18SLuigi Rizzo  * 	by a monitor. Monitors, however, will always call the original
2414bf50f18SLuigi Rizzo  * 	callback before doing anything else.
2424bf50f18SLuigi Rizzo  *
2434bf50f18SLuigi Rizzo  *
2444bf50f18SLuigi Rizzo  * > 6. select() or poll() can be used to wait for events on individual
2454bf50f18SLuigi Rizzo  * >    transmit or receive queues (or all queues for a given interface).
2464bf50f18SLuigi Rizzo  *
2474bf50f18SLuigi Rizzo  * 	Implemented in netmap_poll(). This will call the same nm_sync()
2484bf50f18SLuigi Rizzo  * 	callbacks as in step 5 above.
2494bf50f18SLuigi Rizzo  *
2504bf50f18SLuigi Rizzo  * 	os-specific:
2514bf50f18SLuigi Rizzo  * 		linux: we first go through linux_netmap_poll() to adapt
2524bf50f18SLuigi Rizzo  * 		       the FreeBSD interface to the linux one.
2534bf50f18SLuigi Rizzo  *
2544bf50f18SLuigi Rizzo  *
2554bf50f18SLuigi Rizzo  *  ----  VALE_CTL -----
2564bf50f18SLuigi Rizzo  *
2574bf50f18SLuigi Rizzo  *  VALE switches are controlled by issuing a NIOCREGIF with a non-null
2584bf50f18SLuigi Rizzo  *  nr_cmd in the nmreq structure. These subcommands are handled by
2594bf50f18SLuigi Rizzo  *  netmap_bdg_ctl() in netmap_vale.c. Persistent VALE ports are created
2604bf50f18SLuigi Rizzo  *  and destroyed by issuing the NETMAP_BDG_NEWIF and NETMAP_BDG_DELIF
2614bf50f18SLuigi Rizzo  *  subcommands, respectively.
2624bf50f18SLuigi Rizzo  *
2634bf50f18SLuigi Rizzo  *  Any network interface known to the system (including a persistent VALE
2644bf50f18SLuigi Rizzo  *  port) can be attached to a VALE switch by issuing the
265*2ff91c17SVincenzo Maffione  *  NETMAP_REQ_VALE_ATTACH command. After the attachment, persistent VALE ports
2664bf50f18SLuigi Rizzo  *  look exactly like ephemeral VALE ports (as created in step 2 above).  The
2674bf50f18SLuigi Rizzo  *  attachment of other interfaces, instead, requires the creation of a
2684bf50f18SLuigi Rizzo  *  netmap_bwrap_adapter.  Moreover, the attached interface must be put in
2694bf50f18SLuigi Rizzo  *  netmap mode. This may require the creation of a netmap_generic_adapter if
2704bf50f18SLuigi Rizzo  *  we have no native support for the interface, or if generic adapters have
2714bf50f18SLuigi Rizzo  *  been forced by sysctl.
2724bf50f18SLuigi Rizzo  *
2734bf50f18SLuigi Rizzo  *  Both persistent VALE ports and bwraps are handled by netmap_get_bdg_na(),
2744bf50f18SLuigi Rizzo  *  called by nm_bdg_ctl_attach(), and discriminated by the nm_bdg_attach()
2754bf50f18SLuigi Rizzo  *  callback.  In the case of the bwrap, the callback creates the
2764bf50f18SLuigi Rizzo  *  netmap_bwrap_adapter.  The initialization of the bwrap is then
2774bf50f18SLuigi Rizzo  *  completed by calling netmap_do_regif() on it, in the nm_bdg_ctl()
2784bf50f18SLuigi Rizzo  *  callback (netmap_bwrap_bdg_ctl in netmap_vale.c).
2794bf50f18SLuigi Rizzo  *  A generic adapter for the wrapped ifp will be created if needed, when
2804bf50f18SLuigi Rizzo  *  netmap_get_bdg_na() calls netmap_get_hw_na().
2814bf50f18SLuigi Rizzo  *
2824bf50f18SLuigi Rizzo  *
2834bf50f18SLuigi Rizzo  *  ---- DATAPATHS -----
2844bf50f18SLuigi Rizzo  *
2854bf50f18SLuigi Rizzo  *              -= SYSTEM DEVICE WITH NATIVE SUPPORT =-
2864bf50f18SLuigi Rizzo  *
2874bf50f18SLuigi Rizzo  *    na == NA(ifp) == netmap_hw_adapter created in DEVICE_netmap_attach()
2884bf50f18SLuigi Rizzo  *
2894bf50f18SLuigi Rizzo  *    - tx from netmap userspace:
2904bf50f18SLuigi Rizzo  *	 concurrently:
2914bf50f18SLuigi Rizzo  *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
2924bf50f18SLuigi Rizzo  *                kring->nm_sync() == DEVICE_netmap_txsync()
2934bf50f18SLuigi Rizzo  *           2) device interrupt handler
2944bf50f18SLuigi Rizzo  *                na->nm_notify()  == netmap_notify()
2954bf50f18SLuigi Rizzo  *    - rx from netmap userspace:
2964bf50f18SLuigi Rizzo  *       concurrently:
2974bf50f18SLuigi Rizzo  *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
2984bf50f18SLuigi Rizzo  *                kring->nm_sync() == DEVICE_netmap_rxsync()
2994bf50f18SLuigi Rizzo  *           2) device interrupt handler
3004bf50f18SLuigi Rizzo  *                na->nm_notify()  == netmap_notify()
301847bf383SLuigi Rizzo  *    - rx from host stack
3024bf50f18SLuigi Rizzo  *       concurrently:
3034bf50f18SLuigi Rizzo  *           1) host stack
3044bf50f18SLuigi Rizzo  *                netmap_transmit()
3054bf50f18SLuigi Rizzo  *                  na->nm_notify  == netmap_notify()
3064bf50f18SLuigi Rizzo  *           2) ioctl(NIOCRXSYNC)/netmap_poll() in process context
30737e3a6d3SLuigi Rizzo  *                kring->nm_sync() == netmap_rxsync_from_host
3084bf50f18SLuigi Rizzo  *                  netmap_rxsync_from_host(na, NULL, NULL)
3094bf50f18SLuigi Rizzo  *    - tx to host stack
3104bf50f18SLuigi Rizzo  *           ioctl(NIOCTXSYNC)/netmap_poll() in process context
31137e3a6d3SLuigi Rizzo  *             kring->nm_sync() == netmap_txsync_to_host
3124bf50f18SLuigi Rizzo  *               netmap_txsync_to_host(na)
31337e3a6d3SLuigi Rizzo  *                 nm_os_send_up()
31437e3a6d3SLuigi Rizzo  *                   FreeBSD: na->if_input() == ether_input()
3154bf50f18SLuigi Rizzo  *                   linux: netif_rx() with NM_MAGIC_PRIORITY_RX
3164bf50f18SLuigi Rizzo  *
3174bf50f18SLuigi Rizzo  *
3184bf50f18SLuigi Rizzo  *               -= SYSTEM DEVICE WITH GENERIC SUPPORT =-
3194bf50f18SLuigi Rizzo  *
320847bf383SLuigi Rizzo  *    na == NA(ifp) == generic_netmap_adapter created in generic_netmap_attach()
321847bf383SLuigi Rizzo  *
322847bf383SLuigi Rizzo  *    - tx from netmap userspace:
323847bf383SLuigi Rizzo  *       concurrently:
324847bf383SLuigi Rizzo  *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
325847bf383SLuigi Rizzo  *               kring->nm_sync() == generic_netmap_txsync()
32637e3a6d3SLuigi Rizzo  *                   nm_os_generic_xmit_frame()
327847bf383SLuigi Rizzo  *                       linux:   dev_queue_xmit() with NM_MAGIC_PRIORITY_TX
32837e3a6d3SLuigi Rizzo  *                           ifp->ndo_start_xmit == generic_ndo_start_xmit()
32937e3a6d3SLuigi Rizzo  *                               gna->save_start_xmit == orig. dev. start_xmit
330847bf383SLuigi Rizzo  *                       FreeBSD: na->if_transmit() == orig. dev if_transmit
331847bf383SLuigi Rizzo  *           2) generic_mbuf_destructor()
332847bf383SLuigi Rizzo  *                   na->nm_notify() == netmap_notify()
333847bf383SLuigi Rizzo  *    - rx from netmap userspace:
334847bf383SLuigi Rizzo  *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
335847bf383SLuigi Rizzo  *               kring->nm_sync() == generic_netmap_rxsync()
336847bf383SLuigi Rizzo  *                   mbq_safe_dequeue()
337847bf383SLuigi Rizzo  *           2) device driver
338847bf383SLuigi Rizzo  *               generic_rx_handler()
339847bf383SLuigi Rizzo  *                   mbq_safe_enqueue()
340847bf383SLuigi Rizzo  *                   na->nm_notify() == netmap_notify()
34137e3a6d3SLuigi Rizzo  *    - rx from host stack
34237e3a6d3SLuigi Rizzo  *        FreeBSD: same as native
34337e3a6d3SLuigi Rizzo  *        Linux: same as native except:
344847bf383SLuigi Rizzo  *           1) host stack
34537e3a6d3SLuigi Rizzo  *               dev_queue_xmit() without NM_MAGIC_PRIORITY_TX
34637e3a6d3SLuigi Rizzo  *                   ifp->ndo_start_xmit == generic_ndo_start_xmit()
347847bf383SLuigi Rizzo  *                       netmap_transmit()
348847bf383SLuigi Rizzo  *                           na->nm_notify() == netmap_notify()
34937e3a6d3SLuigi Rizzo  *    - tx to host stack (same as native):
3504bf50f18SLuigi Rizzo  *
3514bf50f18SLuigi Rizzo  *
352847bf383SLuigi Rizzo  *                           -= VALE =-
3534bf50f18SLuigi Rizzo  *
354847bf383SLuigi Rizzo  *   INCOMING:
3554bf50f18SLuigi Rizzo  *
356847bf383SLuigi Rizzo  *      - VALE ports:
357847bf383SLuigi Rizzo  *          ioctl(NIOCTXSYNC)/netmap_poll() in process context
358847bf383SLuigi Rizzo  *              kring->nm_sync() == netmap_vp_txsync()
3594bf50f18SLuigi Rizzo  *
360847bf383SLuigi Rizzo  *      - system device with native support:
361847bf383SLuigi Rizzo  *         from cable:
362847bf383SLuigi Rizzo  *             interrupt
363847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
364847bf383SLuigi Rizzo  *                     kring->nm_sync() == DEVICE_netmap_rxsync()
365847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
366847bf383SLuigi Rizzo  *                     kring->nm_sync() == DEVICE_netmap_rxsync()
367847bf383SLuigi Rizzo  *         from host stack:
368847bf383SLuigi Rizzo  *             netmap_transmit()
369847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
37037e3a6d3SLuigi Rizzo  *                     kring->nm_sync() == netmap_rxsync_from_host()
371847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
3724bf50f18SLuigi Rizzo  *
373847bf383SLuigi Rizzo  *      - system device with generic support:
374847bf383SLuigi Rizzo  *         from device driver:
375847bf383SLuigi Rizzo  *            generic_rx_handler()
376847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
377847bf383SLuigi Rizzo  *                     kring->nm_sync() == generic_netmap_rxsync()
378847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
379847bf383SLuigi Rizzo  *                     kring->nm_sync() == generic_netmap_rxsync()
380847bf383SLuigi Rizzo  *         from host stack:
381847bf383SLuigi Rizzo  *            netmap_transmit()
382847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
38337e3a6d3SLuigi Rizzo  *                     kring->nm_sync() == netmap_rxsync_from_host()
384847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
3854bf50f18SLuigi Rizzo  *
386847bf383SLuigi Rizzo  *   (all cases) --> nm_bdg_flush()
387847bf383SLuigi Rizzo  *                      dest_na->nm_notify() == (see below)
3884bf50f18SLuigi Rizzo  *
389847bf383SLuigi Rizzo  *   OUTGOING:
3904bf50f18SLuigi Rizzo  *
391847bf383SLuigi Rizzo  *      - VALE ports:
392847bf383SLuigi Rizzo  *         concurrently:
393c3e9b4dbSLuiz Otavio O Souza  *             1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
394847bf383SLuigi Rizzo  *                    kring->nm_sync() == netmap_vp_rxsync()
395847bf383SLuigi Rizzo  *             2) from nm_bdg_flush()
396847bf383SLuigi Rizzo  *                    na->nm_notify() == netmap_notify()
3974bf50f18SLuigi Rizzo  *
398847bf383SLuigi Rizzo  *      - system device with native support:
399847bf383SLuigi Rizzo  *          to cable:
400847bf383SLuigi Rizzo  *             na->nm_notify() == netmap_bwrap_notify()
401847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
402847bf383SLuigi Rizzo  *                 kring->nm_sync() == DEVICE_netmap_txsync()
403847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
404847bf383SLuigi Rizzo  *          to host stack:
405847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
40637e3a6d3SLuigi Rizzo  *                 kring->nm_sync() == netmap_txsync_to_host
407847bf383SLuigi Rizzo  *                 netmap_vp_rxsync_locked()
4084bf50f18SLuigi Rizzo  *
409847bf383SLuigi Rizzo  *      - system device with generic adapter:
410847bf383SLuigi Rizzo  *          to device driver:
411847bf383SLuigi Rizzo  *             na->nm_notify() == netmap_bwrap_notify()
412847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
413847bf383SLuigi Rizzo  *                 kring->nm_sync() == generic_netmap_txsync()
414847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
415847bf383SLuigi Rizzo  *          to host stack:
416847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
41737e3a6d3SLuigi Rizzo  *                 kring->nm_sync() == netmap_txsync_to_host
418847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
4194bf50f18SLuigi Rizzo  *
4204bf50f18SLuigi Rizzo  */
4214bf50f18SLuigi Rizzo 
422ce3ee1e7SLuigi Rizzo /*
423ce3ee1e7SLuigi Rizzo  * OS-specific code that is used only within this file.
424ce3ee1e7SLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
425ce3ee1e7SLuigi Rizzo  * is present in netmap_kern.h
426ce3ee1e7SLuigi Rizzo  */
42701c7d25fSLuigi Rizzo 
428ce3ee1e7SLuigi Rizzo #if defined(__FreeBSD__)
42968b8534bSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
43068b8534bSLuigi Rizzo #include <sys/types.h>
43168b8534bSLuigi Rizzo #include <sys/errno.h>
43268b8534bSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
43368b8534bSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
434f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
43589e3fd52SLuigi Rizzo #include <sys/filio.h>	/* FIONBIO */
43668b8534bSLuigi Rizzo #include <sys/sockio.h>
43768b8534bSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
43868b8534bSLuigi Rizzo #include <sys/malloc.h>
43968b8534bSLuigi Rizzo #include <sys/poll.h>
44089f6b863SAttilio Rao #include <sys/rwlock.h>
44168b8534bSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
44268b8534bSLuigi Rizzo #include <sys/selinfo.h>
44368b8534bSLuigi Rizzo #include <sys/sysctl.h>
444339f59c0SGleb Smirnoff #include <sys/jail.h>
445339f59c0SGleb Smirnoff #include <net/vnet.h>
44668b8534bSLuigi Rizzo #include <net/if.h>
44776039bc8SGleb Smirnoff #include <net/if_var.h>
44868b8534bSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
44968b8534bSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
450ce3ee1e7SLuigi Rizzo #include <sys/endian.h>
451ce3ee1e7SLuigi Rizzo #include <sys/refcount.h>
45268b8534bSLuigi Rizzo 
45368b8534bSLuigi Rizzo 
454ce3ee1e7SLuigi Rizzo #elif defined(linux)
455ce3ee1e7SLuigi Rizzo 
456ce3ee1e7SLuigi Rizzo #include "bsd_glue.h"
457ce3ee1e7SLuigi Rizzo 
458ce3ee1e7SLuigi Rizzo #elif defined(__APPLE__)
459ce3ee1e7SLuigi Rizzo 
460ce3ee1e7SLuigi Rizzo #warning OSX support is only partial
461ce3ee1e7SLuigi Rizzo #include "osx_glue.h"
462ce3ee1e7SLuigi Rizzo 
46337e3a6d3SLuigi Rizzo #elif defined (_WIN32)
46437e3a6d3SLuigi Rizzo 
46537e3a6d3SLuigi Rizzo #include "win_glue.h"
46637e3a6d3SLuigi Rizzo 
467ce3ee1e7SLuigi Rizzo #else
468ce3ee1e7SLuigi Rizzo 
469ce3ee1e7SLuigi Rizzo #error	Unsupported platform
470ce3ee1e7SLuigi Rizzo 
471ce3ee1e7SLuigi Rizzo #endif /* unsupported */
472ce3ee1e7SLuigi Rizzo 
473ce3ee1e7SLuigi Rizzo /*
474ce3ee1e7SLuigi Rizzo  * common headers
475ce3ee1e7SLuigi Rizzo  */
4760b8ed8e0SLuigi Rizzo #include <net/netmap.h>
4770b8ed8e0SLuigi Rizzo #include <dev/netmap/netmap_kern.h>
478ce3ee1e7SLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
4790b8ed8e0SLuigi Rizzo 
480ce3ee1e7SLuigi Rizzo 
4815819da83SLuigi Rizzo /* user-controlled variables */
4825819da83SLuigi Rizzo int netmap_verbose;
4835819da83SLuigi Rizzo 
4845819da83SLuigi Rizzo static int netmap_no_timestamp; /* don't timestamp on rxsync */
485c85cb1a0SLuigi Rizzo int netmap_no_pendintr = 1;
486f18be576SLuigi Rizzo int netmap_txsync_retry = 2;
487c3e9b4dbSLuiz Otavio O Souza static int netmap_fwd = 0;	/* force transparent forwarding */
488f196ce38SLuigi Rizzo 
489f9790aebSLuigi Rizzo /*
490f9790aebSLuigi Rizzo  * netmap_admode selects the netmap mode to use.
491f9790aebSLuigi Rizzo  * Invalid values are reset to NETMAP_ADMODE_BEST
492f9790aebSLuigi Rizzo  */
493f9790aebSLuigi Rizzo enum {	NETMAP_ADMODE_BEST = 0,	/* use native, fallback to generic */
494f9790aebSLuigi Rizzo 	NETMAP_ADMODE_NATIVE,	/* either native or none */
495f9790aebSLuigi Rizzo 	NETMAP_ADMODE_GENERIC,	/* force generic */
496f9790aebSLuigi Rizzo 	NETMAP_ADMODE_LAST };
497f9790aebSLuigi Rizzo static int netmap_admode = NETMAP_ADMODE_BEST;
498f9790aebSLuigi Rizzo 
49937e3a6d3SLuigi Rizzo /* netmap_generic_mit controls mitigation of RX notifications for
50037e3a6d3SLuigi Rizzo  * the generic netmap adapter. The value is a time interval in
50137e3a6d3SLuigi Rizzo  * nanoseconds. */
50237e3a6d3SLuigi Rizzo int netmap_generic_mit = 100*1000;
50337e3a6d3SLuigi Rizzo 
50437e3a6d3SLuigi Rizzo /* We use by default netmap-aware qdiscs with generic netmap adapters,
50537e3a6d3SLuigi Rizzo  * even if there can be a little performance hit with hardware NICs.
50637e3a6d3SLuigi Rizzo  * However, using the qdisc is the safer approach, for two reasons:
50737e3a6d3SLuigi Rizzo  * 1) it prevents non-fifo qdiscs to break the TX notification
50837e3a6d3SLuigi Rizzo  *    scheme, which is based on mbuf destructors when txqdisc is
50937e3a6d3SLuigi Rizzo  *    not used.
51037e3a6d3SLuigi Rizzo  * 2) it makes it possible to transmit over software devices that
51137e3a6d3SLuigi Rizzo  *    change skb->dev, like bridge, veth, ...
51237e3a6d3SLuigi Rizzo  *
51337e3a6d3SLuigi Rizzo  * Anyway users looking for the best performance should
51437e3a6d3SLuigi Rizzo  * use native adapters.
51537e3a6d3SLuigi Rizzo  */
5164f80b14cSVincenzo Maffione #ifdef linux
51737e3a6d3SLuigi Rizzo int netmap_generic_txqdisc = 1;
5184f80b14cSVincenzo Maffione #endif
51937e3a6d3SLuigi Rizzo 
52037e3a6d3SLuigi Rizzo /* Default number of slots and queues for generic adapters. */
52137e3a6d3SLuigi Rizzo int netmap_generic_ringsize = 1024;
52237e3a6d3SLuigi Rizzo int netmap_generic_rings = 1;
52337e3a6d3SLuigi Rizzo 
52437e3a6d3SLuigi Rizzo /* Non-zero if ptnet devices are allowed to use virtio-net headers. */
52537e3a6d3SLuigi Rizzo int ptnet_vnet_hdr = 1;
52637e3a6d3SLuigi Rizzo 
527c3e9b4dbSLuiz Otavio O Souza /* 0 if ptnetmap should not use worker threads for TX processing */
528c3e9b4dbSLuiz Otavio O Souza int ptnetmap_tx_workers = 1;
529c3e9b4dbSLuiz Otavio O Souza 
53037e3a6d3SLuigi Rizzo /*
53137e3a6d3SLuigi Rizzo  * SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated
53237e3a6d3SLuigi Rizzo  * in some other operating systems
53337e3a6d3SLuigi Rizzo  */
53437e3a6d3SLuigi Rizzo SYSBEGIN(main_init);
53537e3a6d3SLuigi Rizzo 
53637e3a6d3SLuigi Rizzo SYSCTL_DECL(_dev_netmap);
53737e3a6d3SLuigi Rizzo SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
53837e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
53937e3a6d3SLuigi Rizzo     CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
54037e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
54137e3a6d3SLuigi Rizzo     CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
5424f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, CTLFLAG_RW, &netmap_no_pendintr,
5434f80b14cSVincenzo Maffione     0, "Always look for new received packets.");
54437e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
54537e3a6d3SLuigi Rizzo     &netmap_txsync_retry, 0, "Number of txsync loops in bridge's flush.");
546f9790aebSLuigi Rizzo 
5474f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0,
5484f80b14cSVincenzo Maffione     "Force NR_FORWARD mode");
5494f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0,
5504f80b14cSVincenzo Maffione     "Adapter mode. 0 selects the best option available,"
5514f80b14cSVincenzo Maffione     "1 forces native adapter, 2 forces emulated adapter");
5524f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit,
5534f80b14cSVincenzo Maffione     0, "RX notification interval in nanoseconds");
5544f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW,
5554f80b14cSVincenzo Maffione     &netmap_generic_ringsize, 0,
5564f80b14cSVincenzo Maffione     "Number of per-ring slots for emulated netmap mode");
5574f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW,
5584f80b14cSVincenzo Maffione     &netmap_generic_rings, 0,
5594f80b14cSVincenzo Maffione     "Number of TX/RX queues for emulated netmap adapters");
5604f80b14cSVincenzo Maffione #ifdef linux
5614f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW,
5624f80b14cSVincenzo Maffione     &netmap_generic_txqdisc, 0, "Use qdisc for generic adapters");
5634f80b14cSVincenzo Maffione #endif
5644f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr,
5654f80b14cSVincenzo Maffione     0, "Allow ptnet devices to use virtio-net headers");
5664f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW,
5674f80b14cSVincenzo Maffione     &ptnetmap_tx_workers, 0, "Use worker threads for pnetmap TX processing");
56837e3a6d3SLuigi Rizzo 
56937e3a6d3SLuigi Rizzo SYSEND;
570f196ce38SLuigi Rizzo 
571ce3ee1e7SLuigi Rizzo NMG_LOCK_T	netmap_global_lock;
572ce3ee1e7SLuigi Rizzo 
57317885a7bSLuigi Rizzo /*
57417885a7bSLuigi Rizzo  * mark the ring as stopped, and run through the locks
57517885a7bSLuigi Rizzo  * to make sure other users get to see it.
57637e3a6d3SLuigi Rizzo  * stopped must be either NR_KR_STOPPED (for unbounded stop)
57737e3a6d3SLuigi Rizzo  * of NR_KR_LOCKED (brief stop for mutual exclusion purposes)
57817885a7bSLuigi Rizzo  */
5794bf50f18SLuigi Rizzo static void
58037e3a6d3SLuigi Rizzo netmap_disable_ring(struct netmap_kring *kr, int stopped)
581ce3ee1e7SLuigi Rizzo {
58237e3a6d3SLuigi Rizzo 	nm_kr_stop(kr, stopped);
58337e3a6d3SLuigi Rizzo 	// XXX check if nm_kr_stop is sufficient
584ce3ee1e7SLuigi Rizzo 	mtx_lock(&kr->q_lock);
585ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kr->q_lock);
586ce3ee1e7SLuigi Rizzo 	nm_kr_put(kr);
587ce3ee1e7SLuigi Rizzo }
588ce3ee1e7SLuigi Rizzo 
589847bf383SLuigi Rizzo /* stop or enable a single ring */
5904bf50f18SLuigi Rizzo void
591847bf383SLuigi Rizzo netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped)
5924bf50f18SLuigi Rizzo {
5934bf50f18SLuigi Rizzo 	if (stopped)
594*2ff91c17SVincenzo Maffione 		netmap_disable_ring(NMR(na, t)[ring_id], stopped);
5954bf50f18SLuigi Rizzo 	else
596*2ff91c17SVincenzo Maffione 		NMR(na, t)[ring_id]->nkr_stopped = 0;
5974bf50f18SLuigi Rizzo }
5984bf50f18SLuigi Rizzo 
599f9790aebSLuigi Rizzo 
60089cc2556SLuigi Rizzo /* stop or enable all the rings of na */
6014bf50f18SLuigi Rizzo void
6024bf50f18SLuigi Rizzo netmap_set_all_rings(struct netmap_adapter *na, int stopped)
603ce3ee1e7SLuigi Rizzo {
604ce3ee1e7SLuigi Rizzo 	int i;
605847bf383SLuigi Rizzo 	enum txrx t;
606ce3ee1e7SLuigi Rizzo 
6074bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
608ce3ee1e7SLuigi Rizzo 		return;
609ce3ee1e7SLuigi Rizzo 
610847bf383SLuigi Rizzo 	for_rx_tx(t) {
611847bf383SLuigi Rizzo 		for (i = 0; i < netmap_real_rings(na, t); i++) {
612847bf383SLuigi Rizzo 			netmap_set_ring(na, i, t, stopped);
613ce3ee1e7SLuigi Rizzo 		}
614ce3ee1e7SLuigi Rizzo 	}
615ce3ee1e7SLuigi Rizzo }
616ce3ee1e7SLuigi Rizzo 
61789cc2556SLuigi Rizzo /*
61889cc2556SLuigi Rizzo  * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
61989cc2556SLuigi Rizzo  * to finish and prevents any new one from starting.  Call this before turning
620ddb13598SKevin Lo  * netmap mode off, or before removing the hardware rings (e.g., on module
62137e3a6d3SLuigi Rizzo  * onload).
62289cc2556SLuigi Rizzo  */
623f9790aebSLuigi Rizzo void
624f9790aebSLuigi Rizzo netmap_disable_all_rings(struct ifnet *ifp)
625f9790aebSLuigi Rizzo {
62637e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
62737e3a6d3SLuigi Rizzo 		netmap_set_all_rings(NA(ifp), NM_KR_STOPPED);
62837e3a6d3SLuigi Rizzo 	}
629f9790aebSLuigi Rizzo }
630f9790aebSLuigi Rizzo 
63189cc2556SLuigi Rizzo /*
63289cc2556SLuigi Rizzo  * Convenience function used in drivers.  Re-enables rxsync and txsync on the
63389cc2556SLuigi Rizzo  * adapter's rings In linux drivers, this should be placed near each
63489cc2556SLuigi Rizzo  * napi_enable().
63589cc2556SLuigi Rizzo  */
636f9790aebSLuigi Rizzo void
637f9790aebSLuigi Rizzo netmap_enable_all_rings(struct ifnet *ifp)
638f9790aebSLuigi Rizzo {
63937e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
6404bf50f18SLuigi Rizzo 		netmap_set_all_rings(NA(ifp), 0 /* enabled */);
641f9790aebSLuigi Rizzo 	}
64237e3a6d3SLuigi Rizzo }
643f9790aebSLuigi Rizzo 
64437e3a6d3SLuigi Rizzo void
64537e3a6d3SLuigi Rizzo netmap_make_zombie(struct ifnet *ifp)
64637e3a6d3SLuigi Rizzo {
64737e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
64837e3a6d3SLuigi Rizzo 		struct netmap_adapter *na = NA(ifp);
64937e3a6d3SLuigi Rizzo 		netmap_set_all_rings(na, NM_KR_LOCKED);
65037e3a6d3SLuigi Rizzo 		na->na_flags |= NAF_ZOMBIE;
65137e3a6d3SLuigi Rizzo 		netmap_set_all_rings(na, 0);
65237e3a6d3SLuigi Rizzo 	}
65337e3a6d3SLuigi Rizzo }
65437e3a6d3SLuigi Rizzo 
65537e3a6d3SLuigi Rizzo void
65637e3a6d3SLuigi Rizzo netmap_undo_zombie(struct ifnet *ifp)
65737e3a6d3SLuigi Rizzo {
65837e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
65937e3a6d3SLuigi Rizzo 		struct netmap_adapter *na = NA(ifp);
66037e3a6d3SLuigi Rizzo 		if (na->na_flags & NAF_ZOMBIE) {
66137e3a6d3SLuigi Rizzo 			netmap_set_all_rings(na, NM_KR_LOCKED);
66237e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_ZOMBIE;
66337e3a6d3SLuigi Rizzo 			netmap_set_all_rings(na, 0);
66437e3a6d3SLuigi Rizzo 		}
66537e3a6d3SLuigi Rizzo 	}
66637e3a6d3SLuigi Rizzo }
667f9790aebSLuigi Rizzo 
668ce3ee1e7SLuigi Rizzo /*
669ce3ee1e7SLuigi Rizzo  * generic bound_checking function
670ce3ee1e7SLuigi Rizzo  */
671ce3ee1e7SLuigi Rizzo u_int
672ce3ee1e7SLuigi Rizzo nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
673ce3ee1e7SLuigi Rizzo {
674ce3ee1e7SLuigi Rizzo 	u_int oldv = *v;
675ce3ee1e7SLuigi Rizzo 	const char *op = NULL;
676ce3ee1e7SLuigi Rizzo 
677ce3ee1e7SLuigi Rizzo 	if (dflt < lo)
678ce3ee1e7SLuigi Rizzo 		dflt = lo;
679ce3ee1e7SLuigi Rizzo 	if (dflt > hi)
680ce3ee1e7SLuigi Rizzo 		dflt = hi;
681ce3ee1e7SLuigi Rizzo 	if (oldv < lo) {
682ce3ee1e7SLuigi Rizzo 		*v = dflt;
683ce3ee1e7SLuigi Rizzo 		op = "Bump";
684ce3ee1e7SLuigi Rizzo 	} else if (oldv > hi) {
685ce3ee1e7SLuigi Rizzo 		*v = hi;
686ce3ee1e7SLuigi Rizzo 		op = "Clamp";
687ce3ee1e7SLuigi Rizzo 	}
688ce3ee1e7SLuigi Rizzo 	if (op && msg)
689c3e9b4dbSLuiz Otavio O Souza 		nm_prinf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
690ce3ee1e7SLuigi Rizzo 	return *v;
691ce3ee1e7SLuigi Rizzo }
692ce3ee1e7SLuigi Rizzo 
693f9790aebSLuigi Rizzo 
694ce3ee1e7SLuigi Rizzo /*
695ce3ee1e7SLuigi Rizzo  * packet-dump function, user-supplied or static buffer.
696ce3ee1e7SLuigi Rizzo  * The destination buffer must be at least 30+4*len
697ce3ee1e7SLuigi Rizzo  */
698ce3ee1e7SLuigi Rizzo const char *
699ce3ee1e7SLuigi Rizzo nm_dump_buf(char *p, int len, int lim, char *dst)
700ce3ee1e7SLuigi Rizzo {
701ce3ee1e7SLuigi Rizzo 	static char _dst[8192];
702ce3ee1e7SLuigi Rizzo 	int i, j, i0;
703ce3ee1e7SLuigi Rizzo 	static char hex[] ="0123456789abcdef";
704ce3ee1e7SLuigi Rizzo 	char *o;	/* output position */
705ce3ee1e7SLuigi Rizzo 
706ce3ee1e7SLuigi Rizzo #define P_HI(x)	hex[((x) & 0xf0)>>4]
707ce3ee1e7SLuigi Rizzo #define P_LO(x)	hex[((x) & 0xf)]
708ce3ee1e7SLuigi Rizzo #define P_C(x)	((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
709ce3ee1e7SLuigi Rizzo 	if (!dst)
710ce3ee1e7SLuigi Rizzo 		dst = _dst;
711ce3ee1e7SLuigi Rizzo 	if (lim <= 0 || lim > len)
712ce3ee1e7SLuigi Rizzo 		lim = len;
713ce3ee1e7SLuigi Rizzo 	o = dst;
714ce3ee1e7SLuigi Rizzo 	sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
715ce3ee1e7SLuigi Rizzo 	o += strlen(o);
716ce3ee1e7SLuigi Rizzo 	/* hexdump routine */
717ce3ee1e7SLuigi Rizzo 	for (i = 0; i < lim; ) {
718ce3ee1e7SLuigi Rizzo 		sprintf(o, "%5d: ", i);
719ce3ee1e7SLuigi Rizzo 		o += strlen(o);
720ce3ee1e7SLuigi Rizzo 		memset(o, ' ', 48);
721ce3ee1e7SLuigi Rizzo 		i0 = i;
722ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++) {
723ce3ee1e7SLuigi Rizzo 			o[j*3] = P_HI(p[i]);
724ce3ee1e7SLuigi Rizzo 			o[j*3+1] = P_LO(p[i]);
725ce3ee1e7SLuigi Rizzo 		}
726ce3ee1e7SLuigi Rizzo 		i = i0;
727ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++)
728ce3ee1e7SLuigi Rizzo 			o[j + 48] = P_C(p[i]);
729ce3ee1e7SLuigi Rizzo 		o[j+48] = '\n';
730ce3ee1e7SLuigi Rizzo 		o += j+49;
731ce3ee1e7SLuigi Rizzo 	}
732ce3ee1e7SLuigi Rizzo 	*o = '\0';
733ce3ee1e7SLuigi Rizzo #undef P_HI
734ce3ee1e7SLuigi Rizzo #undef P_LO
735ce3ee1e7SLuigi Rizzo #undef P_C
736ce3ee1e7SLuigi Rizzo 	return dst;
737ce3ee1e7SLuigi Rizzo }
738f196ce38SLuigi Rizzo 
739f18be576SLuigi Rizzo 
740ae10d1afSLuigi Rizzo /*
741ae10d1afSLuigi Rizzo  * Fetch configuration from the device, to cope with dynamic
742ae10d1afSLuigi Rizzo  * reconfigurations after loading the module.
743ae10d1afSLuigi Rizzo  */
74489cc2556SLuigi Rizzo /* call with NMG_LOCK held */
745f9790aebSLuigi Rizzo int
746ae10d1afSLuigi Rizzo netmap_update_config(struct netmap_adapter *na)
747ae10d1afSLuigi Rizzo {
748*2ff91c17SVincenzo Maffione 	struct nm_config_info info;
749ae10d1afSLuigi Rizzo 
750*2ff91c17SVincenzo Maffione 	bzero(&info, sizeof(info));
7516641c68bSLuigi Rizzo 	if (na->nm_config == NULL ||
752*2ff91c17SVincenzo Maffione 	    na->nm_config(na, &info)) {
753ae10d1afSLuigi Rizzo 		/* take whatever we had at init time */
754*2ff91c17SVincenzo Maffione 		info.num_tx_rings = na->num_tx_rings;
755*2ff91c17SVincenzo Maffione 		info.num_tx_descs = na->num_tx_desc;
756*2ff91c17SVincenzo Maffione 		info.num_rx_rings = na->num_rx_rings;
757*2ff91c17SVincenzo Maffione 		info.num_rx_descs = na->num_rx_desc;
758*2ff91c17SVincenzo Maffione 		info.rx_buf_maxsize = na->rx_buf_maxsize;
759ae10d1afSLuigi Rizzo 	}
760ae10d1afSLuigi Rizzo 
761*2ff91c17SVincenzo Maffione 	if (na->num_tx_rings == info.num_tx_rings &&
762*2ff91c17SVincenzo Maffione 	    na->num_tx_desc == info.num_tx_descs &&
763*2ff91c17SVincenzo Maffione 	    na->num_rx_rings == info.num_rx_rings &&
764*2ff91c17SVincenzo Maffione 	    na->num_rx_desc == info.num_rx_descs &&
765*2ff91c17SVincenzo Maffione 	    na->rx_buf_maxsize == info.rx_buf_maxsize)
766ae10d1afSLuigi Rizzo 		return 0; /* nothing changed */
767f9790aebSLuigi Rizzo 	if (na->active_fds == 0) {
768*2ff91c17SVincenzo Maffione 		D("configuration changed for %s: txring %d x %d, "
769*2ff91c17SVincenzo Maffione 			"rxring %d x %d, rxbufsz %d",
770*2ff91c17SVincenzo Maffione 			na->name, na->num_tx_rings, na->num_tx_desc,
771*2ff91c17SVincenzo Maffione 			na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize);
772*2ff91c17SVincenzo Maffione 		na->num_tx_rings = info.num_tx_rings;
773*2ff91c17SVincenzo Maffione 		na->num_tx_desc = info.num_tx_descs;
774*2ff91c17SVincenzo Maffione 		na->num_rx_rings = info.num_rx_rings;
775*2ff91c17SVincenzo Maffione 		na->num_rx_desc = info.num_rx_descs;
776*2ff91c17SVincenzo Maffione 		na->rx_buf_maxsize = info.rx_buf_maxsize;
777ae10d1afSLuigi Rizzo 		return 0;
778ae10d1afSLuigi Rizzo 	}
779*2ff91c17SVincenzo Maffione 	D("WARNING: configuration changed for %s while active: "
780*2ff91c17SVincenzo Maffione 		"txring %d x %d, rxring %d x %d, rxbufsz %d",
781*2ff91c17SVincenzo Maffione 		na->name, info.num_tx_rings, info.num_tx_descs,
782*2ff91c17SVincenzo Maffione 		info.num_rx_rings, info.num_rx_descs,
783*2ff91c17SVincenzo Maffione 		info.rx_buf_maxsize);
784ae10d1afSLuigi Rizzo 	return 1;
785ae10d1afSLuigi Rizzo }
786ae10d1afSLuigi Rizzo 
78737e3a6d3SLuigi Rizzo /* nm_sync callbacks for the host rings */
78837e3a6d3SLuigi Rizzo static int netmap_txsync_to_host(struct netmap_kring *kring, int flags);
78937e3a6d3SLuigi Rizzo static int netmap_rxsync_from_host(struct netmap_kring *kring, int flags);
790f0ea3689SLuigi Rizzo 
791f0ea3689SLuigi Rizzo /* create the krings array and initialize the fields common to all adapters.
792f0ea3689SLuigi Rizzo  * The array layout is this:
793f0ea3689SLuigi Rizzo  *
794f0ea3689SLuigi Rizzo  *                    +----------+
795f0ea3689SLuigi Rizzo  * na->tx_rings ----->|          | \
796f0ea3689SLuigi Rizzo  *                    |          |  } na->num_tx_ring
797f0ea3689SLuigi Rizzo  *                    |          | /
798f0ea3689SLuigi Rizzo  *                    +----------+
799f0ea3689SLuigi Rizzo  *                    |          |    host tx kring
800f0ea3689SLuigi Rizzo  * na->rx_rings ----> +----------+
801f0ea3689SLuigi Rizzo  *                    |          | \
802f0ea3689SLuigi Rizzo  *                    |          |  } na->num_rx_rings
803f0ea3689SLuigi Rizzo  *                    |          | /
804f0ea3689SLuigi Rizzo  *                    +----------+
805f0ea3689SLuigi Rizzo  *                    |          |    host rx kring
806f0ea3689SLuigi Rizzo  *                    +----------+
807f0ea3689SLuigi Rizzo  * na->tailroom ----->|          | \
808f0ea3689SLuigi Rizzo  *                    |          |  } tailroom bytes
809f0ea3689SLuigi Rizzo  *                    |          | /
810f0ea3689SLuigi Rizzo  *                    +----------+
811f0ea3689SLuigi Rizzo  *
812f0ea3689SLuigi Rizzo  * Note: for compatibility, host krings are created even when not needed.
813f0ea3689SLuigi Rizzo  * The tailroom space is currently used by vale ports for allocating leases.
814f0ea3689SLuigi Rizzo  */
81589cc2556SLuigi Rizzo /* call with NMG_LOCK held */
816f9790aebSLuigi Rizzo int
817f0ea3689SLuigi Rizzo netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
818f9790aebSLuigi Rizzo {
819f9790aebSLuigi Rizzo 	u_int i, len, ndesc;
820f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
821847bf383SLuigi Rizzo 	u_int n[NR_TXRX];
822847bf383SLuigi Rizzo 	enum txrx t;
823f9790aebSLuigi Rizzo 
824c3e9b4dbSLuiz Otavio O Souza 	if (na->tx_rings != NULL) {
825c3e9b4dbSLuiz Otavio O Souza 		D("warning: krings were already created");
826c3e9b4dbSLuiz Otavio O Souza 		return 0;
827c3e9b4dbSLuiz Otavio O Souza 	}
828c3e9b4dbSLuiz Otavio O Souza 
829f0ea3689SLuigi Rizzo 	/* account for the (possibly fake) host rings */
830847bf383SLuigi Rizzo 	n[NR_TX] = na->num_tx_rings + 1;
831847bf383SLuigi Rizzo 	n[NR_RX] = na->num_rx_rings + 1;
832f0ea3689SLuigi Rizzo 
833*2ff91c17SVincenzo Maffione 	len = (n[NR_TX] + n[NR_RX]) *
834*2ff91c17SVincenzo Maffione 		(sizeof(struct netmap_kring) + sizeof(struct netmap_kring *))
835*2ff91c17SVincenzo Maffione 		+ tailroom;
836f9790aebSLuigi Rizzo 
837c3e9b4dbSLuiz Otavio O Souza 	na->tx_rings = nm_os_malloc((size_t)len);
838f9790aebSLuigi Rizzo 	if (na->tx_rings == NULL) {
839f9790aebSLuigi Rizzo 		D("Cannot allocate krings");
840f9790aebSLuigi Rizzo 		return ENOMEM;
841f9790aebSLuigi Rizzo 	}
842847bf383SLuigi Rizzo 	na->rx_rings = na->tx_rings + n[NR_TX];
843*2ff91c17SVincenzo Maffione 	na->tailroom = na->rx_rings + n[NR_RX];
844*2ff91c17SVincenzo Maffione 
845*2ff91c17SVincenzo Maffione 	/* link the krings in the krings array */
846*2ff91c17SVincenzo Maffione 	kring = (struct netmap_kring *)((char *)na->tailroom + tailroom);
847*2ff91c17SVincenzo Maffione 	for (i = 0; i < n[NR_TX] + n[NR_RX]; i++) {
848*2ff91c17SVincenzo Maffione 		na->tx_rings[i] = kring;
849*2ff91c17SVincenzo Maffione 		kring++;
850*2ff91c17SVincenzo Maffione 	}
851f9790aebSLuigi Rizzo 
85217885a7bSLuigi Rizzo 	/*
85317885a7bSLuigi Rizzo 	 * All fields in krings are 0 except the one initialized below.
85417885a7bSLuigi Rizzo 	 * but better be explicit on important kring fields.
85517885a7bSLuigi Rizzo 	 */
856847bf383SLuigi Rizzo 	for_rx_tx(t) {
857847bf383SLuigi Rizzo 		ndesc = nma_get_ndesc(na, t);
858847bf383SLuigi Rizzo 		for (i = 0; i < n[t]; i++) {
859*2ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
860f9790aebSLuigi Rizzo 			bzero(kring, sizeof(*kring));
861f9790aebSLuigi Rizzo 			kring->na = na;
862*2ff91c17SVincenzo Maffione 			kring->notify_na = na;
86317885a7bSLuigi Rizzo 			kring->ring_id = i;
864847bf383SLuigi Rizzo 			kring->tx = t;
865f9790aebSLuigi Rizzo 			kring->nkr_num_slots = ndesc;
86637e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
86737e3a6d3SLuigi Rizzo 			kring->nr_pending_mode = NKR_NETMAP_OFF;
868847bf383SLuigi Rizzo 			if (i < nma_get_nrings(na, t)) {
869847bf383SLuigi Rizzo 				kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync);
87037e3a6d3SLuigi Rizzo 			} else {
871*2ff91c17SVincenzo Maffione 				if (!(na->na_flags & NAF_HOST_RINGS))
872*2ff91c17SVincenzo Maffione 					kring->nr_kflags |= NKR_FAKERING;
873847bf383SLuigi Rizzo 				kring->nm_sync = (t == NR_TX ?
87437e3a6d3SLuigi Rizzo 						netmap_txsync_to_host:
87537e3a6d3SLuigi Rizzo 						netmap_rxsync_from_host);
876f0ea3689SLuigi Rizzo 			}
877847bf383SLuigi Rizzo 			kring->nm_notify = na->nm_notify;
878847bf383SLuigi Rizzo 			kring->rhead = kring->rcur = kring->nr_hwcur = 0;
879f9790aebSLuigi Rizzo 			/*
88017885a7bSLuigi Rizzo 			 * IMPORTANT: Always keep one slot empty.
881f9790aebSLuigi Rizzo 			 */
882847bf383SLuigi Rizzo 			kring->rtail = kring->nr_hwtail = (t == NR_TX ? ndesc - 1 : 0);
883847bf383SLuigi Rizzo 			snprintf(kring->name, sizeof(kring->name) - 1, "%s %s%d", na->name,
884847bf383SLuigi Rizzo 					nm_txrx2str(t), i);
885f0ea3689SLuigi Rizzo 			ND("ktx %s h %d c %d t %d",
886f0ea3689SLuigi Rizzo 				kring->name, kring->rhead, kring->rcur, kring->rtail);
887847bf383SLuigi Rizzo 			mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF);
88837e3a6d3SLuigi Rizzo 			nm_os_selinfo_init(&kring->si);
889f9790aebSLuigi Rizzo 		}
89037e3a6d3SLuigi Rizzo 		nm_os_selinfo_init(&na->si[t]);
891f0ea3689SLuigi Rizzo 	}
892f9790aebSLuigi Rizzo 
893f9790aebSLuigi Rizzo 
894f9790aebSLuigi Rizzo 	return 0;
895f9790aebSLuigi Rizzo }
896f9790aebSLuigi Rizzo 
897f9790aebSLuigi Rizzo 
898f0ea3689SLuigi Rizzo /* undo the actions performed by netmap_krings_create */
89989cc2556SLuigi Rizzo /* call with NMG_LOCK held */
900f9790aebSLuigi Rizzo void
901f9790aebSLuigi Rizzo netmap_krings_delete(struct netmap_adapter *na)
902f9790aebSLuigi Rizzo {
903*2ff91c17SVincenzo Maffione 	struct netmap_kring **kring = na->tx_rings;
904847bf383SLuigi Rizzo 	enum txrx t;
905847bf383SLuigi Rizzo 
906c3e9b4dbSLuiz Otavio O Souza 	if (na->tx_rings == NULL) {
907c3e9b4dbSLuiz Otavio O Souza 		D("warning: krings were already deleted");
908c3e9b4dbSLuiz Otavio O Souza 		return;
909c3e9b4dbSLuiz Otavio O Souza 	}
910c3e9b4dbSLuiz Otavio O Souza 
911847bf383SLuigi Rizzo 	for_rx_tx(t)
91237e3a6d3SLuigi Rizzo 		nm_os_selinfo_uninit(&na->si[t]);
913f9790aebSLuigi Rizzo 
914f0ea3689SLuigi Rizzo 	/* we rely on the krings layout described above */
915f0ea3689SLuigi Rizzo 	for ( ; kring != na->tailroom; kring++) {
916*2ff91c17SVincenzo Maffione 		mtx_destroy(&(*kring)->q_lock);
917*2ff91c17SVincenzo Maffione 		nm_os_selinfo_uninit(&(*kring)->si);
918f9790aebSLuigi Rizzo 	}
919c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(na->tx_rings);
920f9790aebSLuigi Rizzo 	na->tx_rings = na->rx_rings = na->tailroom = NULL;
921f9790aebSLuigi Rizzo }
922f9790aebSLuigi Rizzo 
923f9790aebSLuigi Rizzo 
92417885a7bSLuigi Rizzo /*
92517885a7bSLuigi Rizzo  * Destructor for NIC ports. They also have an mbuf queue
92617885a7bSLuigi Rizzo  * on the rings connected to the host so we need to purge
92717885a7bSLuigi Rizzo  * them first.
92817885a7bSLuigi Rizzo  */
92989cc2556SLuigi Rizzo /* call with NMG_LOCK held */
93037e3a6d3SLuigi Rizzo void
93117885a7bSLuigi Rizzo netmap_hw_krings_delete(struct netmap_adapter *na)
93217885a7bSLuigi Rizzo {
933*2ff91c17SVincenzo Maffione 	struct mbq *q = &na->rx_rings[na->num_rx_rings]->rx_queue;
93417885a7bSLuigi Rizzo 
93517885a7bSLuigi Rizzo 	ND("destroy sw mbq with len %d", mbq_len(q));
93617885a7bSLuigi Rizzo 	mbq_purge(q);
93737e3a6d3SLuigi Rizzo 	mbq_safe_fini(q);
93817885a7bSLuigi Rizzo 	netmap_krings_delete(na);
93917885a7bSLuigi Rizzo }
94017885a7bSLuigi Rizzo 
9414f80b14cSVincenzo Maffione static void
9424f80b14cSVincenzo Maffione netmap_mem_drop(struct netmap_adapter *na)
9434f80b14cSVincenzo Maffione {
9444f80b14cSVincenzo Maffione 	int last = netmap_mem_deref(na->nm_mem, na);
9454f80b14cSVincenzo Maffione 	/* if the native allocator had been overrided on regif,
9464f80b14cSVincenzo Maffione 	 * restore it now and drop the temporary one
9474f80b14cSVincenzo Maffione 	 */
9484f80b14cSVincenzo Maffione 	if (last && na->nm_mem_prev) {
9494f80b14cSVincenzo Maffione 		netmap_mem_put(na->nm_mem);
9504f80b14cSVincenzo Maffione 		na->nm_mem = na->nm_mem_prev;
9514f80b14cSVincenzo Maffione 		na->nm_mem_prev = NULL;
9524f80b14cSVincenzo Maffione 	}
9534f80b14cSVincenzo Maffione }
954f9790aebSLuigi Rizzo 
95568b8534bSLuigi Rizzo /*
956847bf383SLuigi Rizzo  * Undo everything that was done in netmap_do_regif(). In particular,
957847bf383SLuigi Rizzo  * call nm_register(ifp,0) to stop netmap mode on the interface and
9584bf50f18SLuigi Rizzo  * revert to normal operation.
95968b8534bSLuigi Rizzo  */
960ce3ee1e7SLuigi Rizzo /* call with NMG_LOCK held */
961847bf383SLuigi Rizzo static void netmap_unset_ringid(struct netmap_priv_d *);
96237e3a6d3SLuigi Rizzo static void netmap_krings_put(struct netmap_priv_d *);
96337e3a6d3SLuigi Rizzo void
964847bf383SLuigi Rizzo netmap_do_unregif(struct netmap_priv_d *priv)
96568b8534bSLuigi Rizzo {
966f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
96768b8534bSLuigi Rizzo 
968ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
969f9790aebSLuigi Rizzo 	na->active_fds--;
97037e3a6d3SLuigi Rizzo 	/* unset nr_pending_mode and possibly release exclusive mode */
97137e3a6d3SLuigi Rizzo 	netmap_krings_put(priv);
972847bf383SLuigi Rizzo 
973847bf383SLuigi Rizzo #ifdef	WITH_MONITOR
97437e3a6d3SLuigi Rizzo 	/* XXX check whether we have to do something with monitor
97537e3a6d3SLuigi Rizzo 	 * when rings change nr_mode. */
97637e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0) {
977847bf383SLuigi Rizzo 		/* walk through all the rings and tell any monitor
978847bf383SLuigi Rizzo 		 * that the port is going to exit netmap mode
979847bf383SLuigi Rizzo 		 */
980847bf383SLuigi Rizzo 		netmap_monitor_stop(na);
98137e3a6d3SLuigi Rizzo 	}
982847bf383SLuigi Rizzo #endif
98337e3a6d3SLuigi Rizzo 
98437e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0 || nm_kring_pending(priv)) {
98537e3a6d3SLuigi Rizzo 		na->nm_register(na, 0);
98637e3a6d3SLuigi Rizzo 	}
98737e3a6d3SLuigi Rizzo 
98837e3a6d3SLuigi Rizzo 	/* delete rings and buffers that are no longer needed */
98937e3a6d3SLuigi Rizzo 	netmap_mem_rings_delete(na);
99037e3a6d3SLuigi Rizzo 
99137e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0) {	/* last instance */
99268b8534bSLuigi Rizzo 		/*
99337e3a6d3SLuigi Rizzo 		 * (TO CHECK) We enter here
994f18be576SLuigi Rizzo 		 * when the last reference to this file descriptor goes
995f18be576SLuigi Rizzo 		 * away. This means we cannot have any pending poll()
996f18be576SLuigi Rizzo 		 * or interrupt routine operating on the structure.
997ce3ee1e7SLuigi Rizzo 		 * XXX The file may be closed in a thread while
998ce3ee1e7SLuigi Rizzo 		 * another thread is using it.
999ce3ee1e7SLuigi Rizzo 		 * Linux keeps the file opened until the last reference
1000ce3ee1e7SLuigi Rizzo 		 * by any outstanding ioctl/poll or mmap is gone.
1001ce3ee1e7SLuigi Rizzo 		 * FreeBSD does not track mmap()s (but we do) and
1002ce3ee1e7SLuigi Rizzo 		 * wakes up any sleeping poll(). Need to check what
1003ce3ee1e7SLuigi Rizzo 		 * happens if the close() occurs while a concurrent
1004ce3ee1e7SLuigi Rizzo 		 * syscall is running.
100568b8534bSLuigi Rizzo 		 */
100637e3a6d3SLuigi Rizzo 		if (netmap_verbose)
100737e3a6d3SLuigi Rizzo 			D("deleting last instance for %s", na->name);
100837e3a6d3SLuigi Rizzo 
100937e3a6d3SLuigi Rizzo                 if (nm_netmap_on(na)) {
101037e3a6d3SLuigi Rizzo                     D("BUG: netmap on while going to delete the krings");
101137e3a6d3SLuigi Rizzo                 }
101237e3a6d3SLuigi Rizzo 
1013f9790aebSLuigi Rizzo 		na->nm_krings_delete(na);
101468b8534bSLuigi Rizzo 	}
101537e3a6d3SLuigi Rizzo 
1016847bf383SLuigi Rizzo 	/* possibily decrement counter of tx_si/rx_si users */
1017847bf383SLuigi Rizzo 	netmap_unset_ringid(priv);
1018f9790aebSLuigi Rizzo 	/* delete the nifp */
1019847bf383SLuigi Rizzo 	netmap_mem_if_delete(na, priv->np_nifp);
1020847bf383SLuigi Rizzo 	/* drop the allocator */
10214f80b14cSVincenzo Maffione 	netmap_mem_drop(na);
1022847bf383SLuigi Rizzo 	/* mark the priv as unregistered */
1023847bf383SLuigi Rizzo 	priv->np_na = NULL;
1024847bf383SLuigi Rizzo 	priv->np_nifp = NULL;
10255819da83SLuigi Rizzo }
102668b8534bSLuigi Rizzo 
102789cc2556SLuigi Rizzo /* call with NMG_LOCK held */
1028f0ea3689SLuigi Rizzo static __inline int
1029847bf383SLuigi Rizzo nm_si_user(struct netmap_priv_d *priv, enum txrx t)
1030f0ea3689SLuigi Rizzo {
1031f0ea3689SLuigi Rizzo 	return (priv->np_na != NULL &&
1032847bf383SLuigi Rizzo 		(priv->np_qlast[t] - priv->np_qfirst[t] > 1));
1033f0ea3689SLuigi Rizzo }
1034f0ea3689SLuigi Rizzo 
103537e3a6d3SLuigi Rizzo struct netmap_priv_d*
103637e3a6d3SLuigi Rizzo netmap_priv_new(void)
103737e3a6d3SLuigi Rizzo {
103837e3a6d3SLuigi Rizzo 	struct netmap_priv_d *priv;
103937e3a6d3SLuigi Rizzo 
1040c3e9b4dbSLuiz Otavio O Souza 	priv = nm_os_malloc(sizeof(struct netmap_priv_d));
104137e3a6d3SLuigi Rizzo 	if (priv == NULL)
104237e3a6d3SLuigi Rizzo 		return NULL;
104337e3a6d3SLuigi Rizzo 	priv->np_refs = 1;
104437e3a6d3SLuigi Rizzo 	nm_os_get_module();
104537e3a6d3SLuigi Rizzo 	return priv;
104637e3a6d3SLuigi Rizzo }
104737e3a6d3SLuigi Rizzo 
1048ce3ee1e7SLuigi Rizzo /*
10498fd44c93SLuigi Rizzo  * Destructor of the netmap_priv_d, called when the fd is closed
10508fd44c93SLuigi Rizzo  * Action: undo all the things done by NIOCREGIF,
10518fd44c93SLuigi Rizzo  * On FreeBSD we need to track whether there are active mmap()s,
10528fd44c93SLuigi Rizzo  * and we use np_active_mmaps for that. On linux, the field is always 0.
10538fd44c93SLuigi Rizzo  * Return: 1 if we can free priv, 0 otherwise.
105489cc2556SLuigi Rizzo  *
1055ce3ee1e7SLuigi Rizzo  */
105689cc2556SLuigi Rizzo /* call with NMG_LOCK held */
105737e3a6d3SLuigi Rizzo void
105837e3a6d3SLuigi Rizzo netmap_priv_delete(struct netmap_priv_d *priv)
1059ce3ee1e7SLuigi Rizzo {
1060f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1061ce3ee1e7SLuigi Rizzo 
1062847adfb7SLuigi Rizzo 	/* number of active references to this fd */
10638fd44c93SLuigi Rizzo 	if (--priv->np_refs > 0) {
106437e3a6d3SLuigi Rizzo 		return;
1065ce3ee1e7SLuigi Rizzo 	}
106637e3a6d3SLuigi Rizzo 	nm_os_put_module();
106737e3a6d3SLuigi Rizzo 	if (na) {
1068847bf383SLuigi Rizzo 		netmap_do_unregif(priv);
106937e3a6d3SLuigi Rizzo 	}
107037e3a6d3SLuigi Rizzo 	netmap_unget_na(na, priv->np_ifp);
107137e3a6d3SLuigi Rizzo 	bzero(priv, sizeof(*priv));	/* for safety */
1072c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(priv);
1073f196ce38SLuigi Rizzo }
10745819da83SLuigi Rizzo 
1075f9790aebSLuigi Rizzo 
107689cc2556SLuigi Rizzo /* call with NMG_LOCK *not* held */
1077f9790aebSLuigi Rizzo void
10785819da83SLuigi Rizzo netmap_dtor(void *data)
10795819da83SLuigi Rizzo {
10805819da83SLuigi Rizzo 	struct netmap_priv_d *priv = data;
10815819da83SLuigi Rizzo 
1082ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
108337e3a6d3SLuigi Rizzo 	netmap_priv_delete(priv);
1084ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
1085ce3ee1e7SLuigi Rizzo }
108668b8534bSLuigi Rizzo 
1087f18be576SLuigi Rizzo 
108868b8534bSLuigi Rizzo /*
1089c3e9b4dbSLuiz Otavio O Souza  * Handlers for synchronization of the rings from/to the host stack.
1090c3e9b4dbSLuiz Otavio O Souza  * These are associated to a network interface and are just another
1091c3e9b4dbSLuiz Otavio O Souza  * ring pair managed by userspace.
1092c3e9b4dbSLuiz Otavio O Souza  *
1093c3e9b4dbSLuiz Otavio O Souza  * Netmap also supports transparent forwarding (NS_FORWARD and NR_FORWARD
1094c3e9b4dbSLuiz Otavio O Souza  * flags):
1095c3e9b4dbSLuiz Otavio O Souza  *
1096c3e9b4dbSLuiz Otavio O Souza  * - Before releasing buffers on hw RX rings, the application can mark
1097c3e9b4dbSLuiz Otavio O Souza  *   them with the NS_FORWARD flag. During the next RXSYNC or poll(), they
1098c3e9b4dbSLuiz Otavio O Souza  *   will be forwarded to the host stack, similarly to what happened if
1099c3e9b4dbSLuiz Otavio O Souza  *   the application moved them to the host TX ring.
1100c3e9b4dbSLuiz Otavio O Souza  *
1101c3e9b4dbSLuiz Otavio O Souza  * - Before releasing buffers on the host RX ring, the application can
1102c3e9b4dbSLuiz Otavio O Souza  *   mark them with the NS_FORWARD flag. During the next RXSYNC or poll(),
1103c3e9b4dbSLuiz Otavio O Souza  *   they will be forwarded to the hw TX rings, saving the application
1104c3e9b4dbSLuiz Otavio O Souza  *   from doing the same task in user-space.
1105c3e9b4dbSLuiz Otavio O Souza  *
1106c3e9b4dbSLuiz Otavio O Souza  * Transparent fowarding can be enabled per-ring, by setting the NR_FORWARD
1107c3e9b4dbSLuiz Otavio O Souza  * flag, or globally with the netmap_fwd sysctl.
1108c3e9b4dbSLuiz Otavio O Souza  *
1109091fd0abSLuigi Rizzo  * The transfer NIC --> host is relatively easy, just encapsulate
1110091fd0abSLuigi Rizzo  * into mbufs and we are done. The host --> NIC side is slightly
1111091fd0abSLuigi Rizzo  * harder because there might not be room in the tx ring so it
1112091fd0abSLuigi Rizzo  * might take a while before releasing the buffer.
1113091fd0abSLuigi Rizzo  */
1114091fd0abSLuigi Rizzo 
1115f18be576SLuigi Rizzo 
1116091fd0abSLuigi Rizzo /*
1117c3e9b4dbSLuiz Otavio O Souza  * Pass a whole queue of mbufs to the host stack as coming from 'dst'
111817885a7bSLuigi Rizzo  * We do not need to lock because the queue is private.
1119c3e9b4dbSLuiz Otavio O Souza  * After this call the queue is empty.
1120091fd0abSLuigi Rizzo  */
1121091fd0abSLuigi Rizzo static void
1122f9790aebSLuigi Rizzo netmap_send_up(struct ifnet *dst, struct mbq *q)
1123091fd0abSLuigi Rizzo {
1124091fd0abSLuigi Rizzo 	struct mbuf *m;
112537e3a6d3SLuigi Rizzo 	struct mbuf *head = NULL, *prev = NULL;
1126091fd0abSLuigi Rizzo 
1127c3e9b4dbSLuiz Otavio O Souza 	/* Send packets up, outside the lock; head/prev machinery
1128c3e9b4dbSLuiz Otavio O Souza 	 * is only useful for Windows. */
1129f9790aebSLuigi Rizzo 	while ((m = mbq_dequeue(q)) != NULL) {
1130091fd0abSLuigi Rizzo 		if (netmap_verbose & NM_VERB_HOST)
1131091fd0abSLuigi Rizzo 			D("sending up pkt %p size %d", m, MBUF_LEN(m));
113237e3a6d3SLuigi Rizzo 		prev = nm_os_send_up(dst, m, prev);
113337e3a6d3SLuigi Rizzo 		if (head == NULL)
113437e3a6d3SLuigi Rizzo 			head = prev;
1135091fd0abSLuigi Rizzo 	}
113637e3a6d3SLuigi Rizzo 	if (head)
113737e3a6d3SLuigi Rizzo 		nm_os_send_up(dst, NULL, head);
113837e3a6d3SLuigi Rizzo 	mbq_fini(q);
1139091fd0abSLuigi Rizzo }
1140091fd0abSLuigi Rizzo 
1141f18be576SLuigi Rizzo 
1142091fd0abSLuigi Rizzo /*
1143c3e9b4dbSLuiz Otavio O Souza  * Scan the buffers from hwcur to ring->head, and put a copy of those
1144c3e9b4dbSLuiz Otavio O Souza  * marked NS_FORWARD (or all of them if forced) into a queue of mbufs.
1145c3e9b4dbSLuiz Otavio O Souza  * Drop remaining packets in the unlikely event
114617885a7bSLuigi Rizzo  * of an mbuf shortage.
1147091fd0abSLuigi Rizzo  */
1148091fd0abSLuigi Rizzo static void
1149091fd0abSLuigi Rizzo netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
1150091fd0abSLuigi Rizzo {
115117885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1152847bf383SLuigi Rizzo 	u_int const head = kring->rhead;
115317885a7bSLuigi Rizzo 	u_int n;
1154f9790aebSLuigi Rizzo 	struct netmap_adapter *na = kring->na;
1155091fd0abSLuigi Rizzo 
115617885a7bSLuigi Rizzo 	for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) {
115717885a7bSLuigi Rizzo 		struct mbuf *m;
1158091fd0abSLuigi Rizzo 		struct netmap_slot *slot = &kring->ring->slot[n];
1159091fd0abSLuigi Rizzo 
1160091fd0abSLuigi Rizzo 		if ((slot->flags & NS_FORWARD) == 0 && !force)
1161091fd0abSLuigi Rizzo 			continue;
11624bf50f18SLuigi Rizzo 		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE(na)) {
116317885a7bSLuigi Rizzo 			RD(5, "bad pkt at %d len %d", n, slot->len);
1164091fd0abSLuigi Rizzo 			continue;
1165091fd0abSLuigi Rizzo 		}
1166091fd0abSLuigi Rizzo 		slot->flags &= ~NS_FORWARD; // XXX needed ?
116717885a7bSLuigi Rizzo 		/* XXX TODO: adapt to the case of a multisegment packet */
11684bf50f18SLuigi Rizzo 		m = m_devget(NMB(na, slot), slot->len, 0, na->ifp, NULL);
1169091fd0abSLuigi Rizzo 
1170091fd0abSLuigi Rizzo 		if (m == NULL)
1171091fd0abSLuigi Rizzo 			break;
1172f9790aebSLuigi Rizzo 		mbq_enqueue(q, m);
1173091fd0abSLuigi Rizzo 	}
1174091fd0abSLuigi Rizzo }
1175091fd0abSLuigi Rizzo 
117637e3a6d3SLuigi Rizzo static inline int
117737e3a6d3SLuigi Rizzo _nm_may_forward(struct netmap_kring *kring)
117837e3a6d3SLuigi Rizzo {
117937e3a6d3SLuigi Rizzo 	return	((netmap_fwd || kring->ring->flags & NR_FORWARD) &&
118037e3a6d3SLuigi Rizzo 		 kring->na->na_flags & NAF_HOST_RINGS &&
118137e3a6d3SLuigi Rizzo 		 kring->tx == NR_RX);
118237e3a6d3SLuigi Rizzo }
118337e3a6d3SLuigi Rizzo 
118437e3a6d3SLuigi Rizzo static inline int
118537e3a6d3SLuigi Rizzo nm_may_forward_up(struct netmap_kring *kring)
118637e3a6d3SLuigi Rizzo {
118737e3a6d3SLuigi Rizzo 	return	_nm_may_forward(kring) &&
118837e3a6d3SLuigi Rizzo 		 kring->ring_id != kring->na->num_rx_rings;
118937e3a6d3SLuigi Rizzo }
119037e3a6d3SLuigi Rizzo 
119137e3a6d3SLuigi Rizzo static inline int
1192c3e9b4dbSLuiz Otavio O Souza nm_may_forward_down(struct netmap_kring *kring, int sync_flags)
119337e3a6d3SLuigi Rizzo {
119437e3a6d3SLuigi Rizzo 	return	_nm_may_forward(kring) &&
1195c3e9b4dbSLuiz Otavio O Souza 		 (sync_flags & NAF_CAN_FORWARD_DOWN) &&
119637e3a6d3SLuigi Rizzo 		 kring->ring_id == kring->na->num_rx_rings;
119737e3a6d3SLuigi Rizzo }
1198f18be576SLuigi Rizzo 
1199091fd0abSLuigi Rizzo /*
120017885a7bSLuigi Rizzo  * Send to the NIC rings packets marked NS_FORWARD between
1201c3e9b4dbSLuiz Otavio O Souza  * kring->nr_hwcur and kring->rhead.
1202c3e9b4dbSLuiz Otavio O Souza  * Called under kring->rx_queue.lock on the sw rx ring.
1203c3e9b4dbSLuiz Otavio O Souza  *
1204c3e9b4dbSLuiz Otavio O Souza  * It can only be called if the user opened all the TX hw rings,
1205c3e9b4dbSLuiz Otavio O Souza  * see NAF_CAN_FORWARD_DOWN flag.
1206c3e9b4dbSLuiz Otavio O Souza  * We can touch the TX netmap rings (slots, head and cur) since
1207c3e9b4dbSLuiz Otavio O Souza  * we are in poll/ioctl system call context, and the application
1208c3e9b4dbSLuiz Otavio O Souza  * is not supposed to touch the ring (using a different thread)
1209c3e9b4dbSLuiz Otavio O Souza  * during the execution of the system call.
1210091fd0abSLuigi Rizzo  */
121117885a7bSLuigi Rizzo static u_int
1212091fd0abSLuigi Rizzo netmap_sw_to_nic(struct netmap_adapter *na)
1213091fd0abSLuigi Rizzo {
1214*2ff91c17SVincenzo Maffione 	struct netmap_kring *kring = na->rx_rings[na->num_rx_rings];
121517885a7bSLuigi Rizzo 	struct netmap_slot *rxslot = kring->ring->slot;
121617885a7bSLuigi Rizzo 	u_int i, rxcur = kring->nr_hwcur;
121717885a7bSLuigi Rizzo 	u_int const head = kring->rhead;
121817885a7bSLuigi Rizzo 	u_int const src_lim = kring->nkr_num_slots - 1;
121917885a7bSLuigi Rizzo 	u_int sent = 0;
1220ce3ee1e7SLuigi Rizzo 
122117885a7bSLuigi Rizzo 	/* scan rings to find space, then fill as much as possible */
122217885a7bSLuigi Rizzo 	for (i = 0; i < na->num_tx_rings; i++) {
1223*2ff91c17SVincenzo Maffione 		struct netmap_kring *kdst = na->tx_rings[i];
122417885a7bSLuigi Rizzo 		struct netmap_ring *rdst = kdst->ring;
122517885a7bSLuigi Rizzo 		u_int const dst_lim = kdst->nkr_num_slots - 1;
1226ce3ee1e7SLuigi Rizzo 
122717885a7bSLuigi Rizzo 		/* XXX do we trust ring or kring->rcur,rtail ? */
122817885a7bSLuigi Rizzo 		for (; rxcur != head && !nm_ring_empty(rdst);
122917885a7bSLuigi Rizzo 		     rxcur = nm_next(rxcur, src_lim) ) {
1230091fd0abSLuigi Rizzo 			struct netmap_slot *src, *dst, tmp;
123137e3a6d3SLuigi Rizzo 			u_int dst_head = rdst->head;
123217885a7bSLuigi Rizzo 
123317885a7bSLuigi Rizzo 			src = &rxslot[rxcur];
123417885a7bSLuigi Rizzo 			if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
123517885a7bSLuigi Rizzo 				continue;
123617885a7bSLuigi Rizzo 
123717885a7bSLuigi Rizzo 			sent++;
123817885a7bSLuigi Rizzo 
123937e3a6d3SLuigi Rizzo 			dst = &rdst->slot[dst_head];
124017885a7bSLuigi Rizzo 
1241091fd0abSLuigi Rizzo 			tmp = *src;
124217885a7bSLuigi Rizzo 
1243091fd0abSLuigi Rizzo 			src->buf_idx = dst->buf_idx;
1244091fd0abSLuigi Rizzo 			src->flags = NS_BUF_CHANGED;
1245091fd0abSLuigi Rizzo 
1246091fd0abSLuigi Rizzo 			dst->buf_idx = tmp.buf_idx;
1247091fd0abSLuigi Rizzo 			dst->len = tmp.len;
1248091fd0abSLuigi Rizzo 			dst->flags = NS_BUF_CHANGED;
1249091fd0abSLuigi Rizzo 
125037e3a6d3SLuigi Rizzo 			rdst->head = rdst->cur = nm_next(dst_head, dst_lim);
1251091fd0abSLuigi Rizzo 		}
1252c3e9b4dbSLuiz Otavio O Souza 		/* if (sent) XXX txsync ? it would be just an optimization */
1253091fd0abSLuigi Rizzo 	}
125417885a7bSLuigi Rizzo 	return sent;
1255091fd0abSLuigi Rizzo }
1256091fd0abSLuigi Rizzo 
1257f18be576SLuigi Rizzo 
1258091fd0abSLuigi Rizzo /*
1259ce3ee1e7SLuigi Rizzo  * netmap_txsync_to_host() passes packets up. We are called from a
126002ad4083SLuigi Rizzo  * system call in user process context, and the only contention
126102ad4083SLuigi Rizzo  * can be among multiple user threads erroneously calling
1262091fd0abSLuigi Rizzo  * this routine concurrently.
126368b8534bSLuigi Rizzo  */
126437e3a6d3SLuigi Rizzo static int
126537e3a6d3SLuigi Rizzo netmap_txsync_to_host(struct netmap_kring *kring, int flags)
126668b8534bSLuigi Rizzo {
126737e3a6d3SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
126817885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1269f0ea3689SLuigi Rizzo 	u_int const head = kring->rhead;
1270f9790aebSLuigi Rizzo 	struct mbq q;
127168b8534bSLuigi Rizzo 
127217885a7bSLuigi Rizzo 	/* Take packets from hwcur to head and pass them up.
1273c3e9b4dbSLuiz Otavio O Souza 	 * Force hwcur = head since netmap_grab_packets() stops at head
127468b8534bSLuigi Rizzo 	 */
1275f9790aebSLuigi Rizzo 	mbq_init(&q);
127617885a7bSLuigi Rizzo 	netmap_grab_packets(kring, &q, 1 /* force */);
127717885a7bSLuigi Rizzo 	ND("have %d pkts in queue", mbq_len(&q));
127817885a7bSLuigi Rizzo 	kring->nr_hwcur = head;
127917885a7bSLuigi Rizzo 	kring->nr_hwtail = head + lim;
128017885a7bSLuigi Rizzo 	if (kring->nr_hwtail > lim)
128117885a7bSLuigi Rizzo 		kring->nr_hwtail -= lim + 1;
128268b8534bSLuigi Rizzo 
1283f9790aebSLuigi Rizzo 	netmap_send_up(na->ifp, &q);
128437e3a6d3SLuigi Rizzo 	return 0;
1285f18be576SLuigi Rizzo }
1286f18be576SLuigi Rizzo 
1287f18be576SLuigi Rizzo 
128868b8534bSLuigi Rizzo /*
128902ad4083SLuigi Rizzo  * rxsync backend for packets coming from the host stack.
129017885a7bSLuigi Rizzo  * They have been put in kring->rx_queue by netmap_transmit().
129117885a7bSLuigi Rizzo  * We protect access to the kring using kring->rx_queue.lock
129202ad4083SLuigi Rizzo  *
1293c3e9b4dbSLuiz Otavio O Souza  * also moves to the nic hw rings any packet the user has marked
1294c3e9b4dbSLuiz Otavio O Souza  * for transparent-mode forwarding, then sets the NR_FORWARD
1295c3e9b4dbSLuiz Otavio O Souza  * flag in the kring to let the caller push them out
129668b8534bSLuigi Rizzo  */
12978fd44c93SLuigi Rizzo static int
129837e3a6d3SLuigi Rizzo netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
129968b8534bSLuigi Rizzo {
130037e3a6d3SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
130168b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
130217885a7bSLuigi Rizzo 	u_int nm_i, n;
130317885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1304f0ea3689SLuigi Rizzo 	u_int const head = kring->rhead;
130517885a7bSLuigi Rizzo 	int ret = 0;
1306847bf383SLuigi Rizzo 	struct mbq *q = &kring->rx_queue, fq;
130768b8534bSLuigi Rizzo 
1308847bf383SLuigi Rizzo 	mbq_init(&fq); /* fq holds packets to be freed */
1309847bf383SLuigi Rizzo 
1310997b054cSLuigi Rizzo 	mbq_lock(q);
131117885a7bSLuigi Rizzo 
131217885a7bSLuigi Rizzo 	/* First part: import newly received packets */
131317885a7bSLuigi Rizzo 	n = mbq_len(q);
131417885a7bSLuigi Rizzo 	if (n) { /* grab packets from the queue */
131517885a7bSLuigi Rizzo 		struct mbuf *m;
131617885a7bSLuigi Rizzo 		uint32_t stop_i;
131717885a7bSLuigi Rizzo 
131817885a7bSLuigi Rizzo 		nm_i = kring->nr_hwtail;
1319c3e9b4dbSLuiz Otavio O Souza 		stop_i = nm_prev(kring->nr_hwcur, lim);
132017885a7bSLuigi Rizzo 		while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
132117885a7bSLuigi Rizzo 			int len = MBUF_LEN(m);
132217885a7bSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[nm_i];
132317885a7bSLuigi Rizzo 
13244bf50f18SLuigi Rizzo 			m_copydata(m, 0, len, NMB(na, slot));
132517885a7bSLuigi Rizzo 			ND("nm %d len %d", nm_i, len);
132617885a7bSLuigi Rizzo 			if (netmap_verbose)
13274bf50f18SLuigi Rizzo                                 D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
132817885a7bSLuigi Rizzo 
132917885a7bSLuigi Rizzo 			slot->len = len;
13304f80b14cSVincenzo Maffione 			slot->flags = 0;
133117885a7bSLuigi Rizzo 			nm_i = nm_next(nm_i, lim);
1332847bf383SLuigi Rizzo 			mbq_enqueue(&fq, m);
133364ae02c3SLuigi Rizzo 		}
133417885a7bSLuigi Rizzo 		kring->nr_hwtail = nm_i;
133564ae02c3SLuigi Rizzo 	}
133617885a7bSLuigi Rizzo 
133717885a7bSLuigi Rizzo 	/*
133817885a7bSLuigi Rizzo 	 * Second part: skip past packets that userspace has released.
133917885a7bSLuigi Rizzo 	 */
134017885a7bSLuigi Rizzo 	nm_i = kring->nr_hwcur;
134117885a7bSLuigi Rizzo 	if (nm_i != head) { /* something was released */
1342c3e9b4dbSLuiz Otavio O Souza 		if (nm_may_forward_down(kring, flags)) {
134317885a7bSLuigi Rizzo 			ret = netmap_sw_to_nic(na);
134437e3a6d3SLuigi Rizzo 			if (ret > 0) {
134537e3a6d3SLuigi Rizzo 				kring->nr_kflags |= NR_FORWARD;
134637e3a6d3SLuigi Rizzo 				ret = 0;
134737e3a6d3SLuigi Rizzo 			}
134837e3a6d3SLuigi Rizzo 		}
134917885a7bSLuigi Rizzo 		kring->nr_hwcur = head;
135064ae02c3SLuigi Rizzo 	}
135117885a7bSLuigi Rizzo 
1352997b054cSLuigi Rizzo 	mbq_unlock(q);
1353847bf383SLuigi Rizzo 
1354847bf383SLuigi Rizzo 	mbq_purge(&fq);
135537e3a6d3SLuigi Rizzo 	mbq_fini(&fq);
1356847bf383SLuigi Rizzo 
135717885a7bSLuigi Rizzo 	return ret;
135868b8534bSLuigi Rizzo }
135968b8534bSLuigi Rizzo 
136068b8534bSLuigi Rizzo 
1361f9790aebSLuigi Rizzo /* Get a netmap adapter for the port.
1362f9790aebSLuigi Rizzo  *
1363f9790aebSLuigi Rizzo  * If it is possible to satisfy the request, return 0
1364f9790aebSLuigi Rizzo  * with *na containing the netmap adapter found.
1365f9790aebSLuigi Rizzo  * Otherwise return an error code, with *na containing NULL.
1366f9790aebSLuigi Rizzo  *
1367f9790aebSLuigi Rizzo  * When the port is attached to a bridge, we always return
1368f9790aebSLuigi Rizzo  * EBUSY.
1369f9790aebSLuigi Rizzo  * Otherwise, if the port is already bound to a file descriptor,
1370f9790aebSLuigi Rizzo  * then we unconditionally return the existing adapter into *na.
1371f9790aebSLuigi Rizzo  * In all the other cases, we return (into *na) either native,
1372f9790aebSLuigi Rizzo  * generic or NULL, according to the following table:
1373f9790aebSLuigi Rizzo  *
1374f9790aebSLuigi Rizzo  *					native_support
1375f9790aebSLuigi Rizzo  * active_fds   dev.netmap.admode         YES     NO
1376f9790aebSLuigi Rizzo  * -------------------------------------------------------
1377f9790aebSLuigi Rizzo  *    >0              *                 NA(ifp) NA(ifp)
1378f9790aebSLuigi Rizzo  *
1379f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_BEST      NATIVE  GENERIC
1380f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_NATIVE    NATIVE   NULL
1381f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_GENERIC   GENERIC GENERIC
1382f9790aebSLuigi Rizzo  *
1383f9790aebSLuigi Rizzo  */
138437e3a6d3SLuigi Rizzo static void netmap_hw_dtor(struct netmap_adapter *); /* needed by NM_IS_NATIVE() */
1385f9790aebSLuigi Rizzo int
1386c3e9b4dbSLuiz Otavio O Souza netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adapter **na)
1387f9790aebSLuigi Rizzo {
1388f9790aebSLuigi Rizzo 	/* generic support */
1389f9790aebSLuigi Rizzo 	int i = netmap_admode;	/* Take a snapshot. */
1390f9790aebSLuigi Rizzo 	struct netmap_adapter *prev_na;
1391847bf383SLuigi Rizzo 	int error = 0;
1392f9790aebSLuigi Rizzo 
1393f9790aebSLuigi Rizzo 	*na = NULL; /* default */
1394f9790aebSLuigi Rizzo 
1395f9790aebSLuigi Rizzo 	/* reset in case of invalid value */
1396f9790aebSLuigi Rizzo 	if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST)
1397f9790aebSLuigi Rizzo 		i = netmap_admode = NETMAP_ADMODE_BEST;
1398f9790aebSLuigi Rizzo 
139937e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
14004bf50f18SLuigi Rizzo 		prev_na = NA(ifp);
1401f9790aebSLuigi Rizzo 		/* If an adapter already exists, return it if
1402f9790aebSLuigi Rizzo 		 * there are active file descriptors or if
1403f9790aebSLuigi Rizzo 		 * netmap is not forced to use generic
1404f9790aebSLuigi Rizzo 		 * adapters.
1405f9790aebSLuigi Rizzo 		 */
14064bf50f18SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(prev_na)
14074bf50f18SLuigi Rizzo 			|| i != NETMAP_ADMODE_GENERIC
14084bf50f18SLuigi Rizzo 			|| prev_na->na_flags & NAF_FORCE_NATIVE
14094bf50f18SLuigi Rizzo #ifdef WITH_PIPES
14104bf50f18SLuigi Rizzo 			/* ugly, but we cannot allow an adapter switch
14114bf50f18SLuigi Rizzo 			 * if some pipe is referring to this one
14124bf50f18SLuigi Rizzo 			 */
14134bf50f18SLuigi Rizzo 			|| prev_na->na_next_pipe > 0
14144bf50f18SLuigi Rizzo #endif
14154bf50f18SLuigi Rizzo 		) {
14164bf50f18SLuigi Rizzo 			*na = prev_na;
1417c3e9b4dbSLuiz Otavio O Souza 			goto assign_mem;
1418f9790aebSLuigi Rizzo 		}
1419f9790aebSLuigi Rizzo 	}
1420f9790aebSLuigi Rizzo 
1421f9790aebSLuigi Rizzo 	/* If there isn't native support and netmap is not allowed
1422f9790aebSLuigi Rizzo 	 * to use generic adapters, we cannot satisfy the request.
1423f9790aebSLuigi Rizzo 	 */
142437e3a6d3SLuigi Rizzo 	if (!NM_IS_NATIVE(ifp) && i == NETMAP_ADMODE_NATIVE)
1425f2637526SLuigi Rizzo 		return EOPNOTSUPP;
1426f9790aebSLuigi Rizzo 
1427f9790aebSLuigi Rizzo 	/* Otherwise, create a generic adapter and return it,
1428f9790aebSLuigi Rizzo 	 * saving the previously used netmap adapter, if any.
1429f9790aebSLuigi Rizzo 	 *
1430f9790aebSLuigi Rizzo 	 * Note that here 'prev_na', if not NULL, MUST be a
1431f9790aebSLuigi Rizzo 	 * native adapter, and CANNOT be a generic one. This is
1432f9790aebSLuigi Rizzo 	 * true because generic adapters are created on demand, and
1433f9790aebSLuigi Rizzo 	 * destroyed when not used anymore. Therefore, if the adapter
1434f9790aebSLuigi Rizzo 	 * currently attached to an interface 'ifp' is generic, it
1435f9790aebSLuigi Rizzo 	 * must be that
1436f9790aebSLuigi Rizzo 	 * (NA(ifp)->active_fds > 0 || NETMAP_OWNED_BY_KERN(NA(ifp))).
1437f9790aebSLuigi Rizzo 	 * Consequently, if NA(ifp) is generic, we will enter one of
1438f9790aebSLuigi Rizzo 	 * the branches above. This ensures that we never override
1439f9790aebSLuigi Rizzo 	 * a generic adapter with another generic adapter.
1440f9790aebSLuigi Rizzo 	 */
1441f9790aebSLuigi Rizzo 	error = generic_netmap_attach(ifp);
1442f9790aebSLuigi Rizzo 	if (error)
1443f9790aebSLuigi Rizzo 		return error;
1444f9790aebSLuigi Rizzo 
1445f9790aebSLuigi Rizzo 	*na = NA(ifp);
1446c3e9b4dbSLuiz Otavio O Souza 
1447c3e9b4dbSLuiz Otavio O Souza assign_mem:
1448c3e9b4dbSLuiz Otavio O Souza 	if (nmd != NULL && !((*na)->na_flags & NAF_MEM_OWNER) &&
1449c3e9b4dbSLuiz Otavio O Souza 	    (*na)->active_fds == 0 && ((*na)->nm_mem != nmd)) {
14504f80b14cSVincenzo Maffione 		(*na)->nm_mem_prev = (*na)->nm_mem;
1451c3e9b4dbSLuiz Otavio O Souza 		(*na)->nm_mem = netmap_mem_get(nmd);
1452f9790aebSLuigi Rizzo 	}
1453f9790aebSLuigi Rizzo 
1454c3e9b4dbSLuiz Otavio O Souza 	return 0;
1455c3e9b4dbSLuiz Otavio O Souza }
1456f9790aebSLuigi Rizzo 
145768b8534bSLuigi Rizzo /*
1458ce3ee1e7SLuigi Rizzo  * MUST BE CALLED UNDER NMG_LOCK()
1459ce3ee1e7SLuigi Rizzo  *
1460f2637526SLuigi Rizzo  * Get a refcounted reference to a netmap adapter attached
1461*2ff91c17SVincenzo Maffione  * to the interface specified by req.
1462ce3ee1e7SLuigi Rizzo  * This is always called in the execution of an ioctl().
1463ce3ee1e7SLuigi Rizzo  *
1464f2637526SLuigi Rizzo  * Return ENXIO if the interface specified by the request does
1465f2637526SLuigi Rizzo  * not exist, ENOTSUP if netmap is not supported by the interface,
1466f2637526SLuigi Rizzo  * EBUSY if the interface is already attached to a bridge,
1467f2637526SLuigi Rizzo  * EINVAL if parameters are invalid, ENOMEM if needed resources
1468f2637526SLuigi Rizzo  * could not be allocated.
1469f2637526SLuigi Rizzo  * If successful, hold a reference to the netmap adapter.
1470f18be576SLuigi Rizzo  *
1471*2ff91c17SVincenzo Maffione  * If the interface specified by req is a system one, also keep
147237e3a6d3SLuigi Rizzo  * a reference to it and return a valid *ifp.
147368b8534bSLuigi Rizzo  */
1474f9790aebSLuigi Rizzo int
1475*2ff91c17SVincenzo Maffione netmap_get_na(struct nmreq_header *hdr,
1476*2ff91c17SVincenzo Maffione 	      struct netmap_adapter **na, struct ifnet **ifp,
1477*2ff91c17SVincenzo Maffione 	      struct netmap_mem_d *nmd, int create)
147868b8534bSLuigi Rizzo {
1479*2ff91c17SVincenzo Maffione 	struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body;
1480f9790aebSLuigi Rizzo 	int error = 0;
1481f0ea3689SLuigi Rizzo 	struct netmap_adapter *ret = NULL;
1482c3e9b4dbSLuiz Otavio O Souza 	int nmd_ref = 0;
1483f9790aebSLuigi Rizzo 
1484f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
148537e3a6d3SLuigi Rizzo 	*ifp = NULL;
1486f196ce38SLuigi Rizzo 
1487*2ff91c17SVincenzo Maffione 	if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
1488*2ff91c17SVincenzo Maffione 		return EINVAL;
1489*2ff91c17SVincenzo Maffione 	}
1490*2ff91c17SVincenzo Maffione 
1491*2ff91c17SVincenzo Maffione 	if (req->nr_mode == NR_REG_PIPE_MASTER ||
1492*2ff91c17SVincenzo Maffione 			req->nr_mode == NR_REG_PIPE_SLAVE) {
1493*2ff91c17SVincenzo Maffione 		/* Do not accept deprecated pipe modes. */
1494*2ff91c17SVincenzo Maffione 		D("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax");
1495*2ff91c17SVincenzo Maffione 		return EINVAL;
1496*2ff91c17SVincenzo Maffione 	}
1497*2ff91c17SVincenzo Maffione 
1498ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1499ce3ee1e7SLuigi Rizzo 
1500c3e9b4dbSLuiz Otavio O Souza 	/* if the request contain a memid, try to find the
1501c3e9b4dbSLuiz Otavio O Souza 	 * corresponding memory region
1502c3e9b4dbSLuiz Otavio O Souza 	 */
1503*2ff91c17SVincenzo Maffione 	if (nmd == NULL && req->nr_mem_id) {
1504*2ff91c17SVincenzo Maffione 		nmd = netmap_mem_find(req->nr_mem_id);
1505c3e9b4dbSLuiz Otavio O Souza 		if (nmd == NULL)
1506c3e9b4dbSLuiz Otavio O Souza 			return EINVAL;
1507c3e9b4dbSLuiz Otavio O Souza 		/* keep the rereference */
1508c3e9b4dbSLuiz Otavio O Souza 		nmd_ref = 1;
1509c3e9b4dbSLuiz Otavio O Souza 	}
1510c3e9b4dbSLuiz Otavio O Souza 
151137e3a6d3SLuigi Rizzo 	/* We cascade through all possible types of netmap adapter.
15124bf50f18SLuigi Rizzo 	 * All netmap_get_*_na() functions return an error and an na,
15134bf50f18SLuigi Rizzo 	 * with the following combinations:
15144bf50f18SLuigi Rizzo 	 *
15154bf50f18SLuigi Rizzo 	 * error    na
15164bf50f18SLuigi Rizzo 	 *   0	   NULL		type doesn't match
15174bf50f18SLuigi Rizzo 	 *  !0	   NULL		type matches, but na creation/lookup failed
15184bf50f18SLuigi Rizzo 	 *   0	  !NULL		type matches and na created/found
15194bf50f18SLuigi Rizzo 	 *  !0    !NULL		impossible
15204bf50f18SLuigi Rizzo 	 */
15214bf50f18SLuigi Rizzo 
152237e3a6d3SLuigi Rizzo 	/* try to see if this is a ptnetmap port */
1523*2ff91c17SVincenzo Maffione 	error = netmap_get_pt_host_na(hdr, na, nmd, create);
152437e3a6d3SLuigi Rizzo 	if (error || *na != NULL)
1525c3e9b4dbSLuiz Otavio O Souza 		goto out;
152637e3a6d3SLuigi Rizzo 
15274bf50f18SLuigi Rizzo 	/* try to see if this is a monitor port */
1528*2ff91c17SVincenzo Maffione 	error = netmap_get_monitor_na(hdr, na, nmd, create);
15294bf50f18SLuigi Rizzo 	if (error || *na != NULL)
1530c3e9b4dbSLuiz Otavio O Souza 		goto out;
15314bf50f18SLuigi Rizzo 
15324bf50f18SLuigi Rizzo 	/* try to see if this is a pipe port */
1533*2ff91c17SVincenzo Maffione 	error = netmap_get_pipe_na(hdr, na, nmd, create);
1534f0ea3689SLuigi Rizzo 	if (error || *na != NULL)
1535c3e9b4dbSLuiz Otavio O Souza 		goto out;
1536ce3ee1e7SLuigi Rizzo 
15374bf50f18SLuigi Rizzo 	/* try to see if this is a bridge port */
1538*2ff91c17SVincenzo Maffione 	error = netmap_get_bdg_na(hdr, na, nmd, create);
1539f0ea3689SLuigi Rizzo 	if (error)
1540c3e9b4dbSLuiz Otavio O Souza 		goto out;
1541f0ea3689SLuigi Rizzo 
1542f0ea3689SLuigi Rizzo 	if (*na != NULL) /* valid match in netmap_get_bdg_na() */
1543847bf383SLuigi Rizzo 		goto out;
1544f0ea3689SLuigi Rizzo 
154589cc2556SLuigi Rizzo 	/*
154689cc2556SLuigi Rizzo 	 * This must be a hardware na, lookup the name in the system.
154789cc2556SLuigi Rizzo 	 * Note that by hardware we actually mean "it shows up in ifconfig".
154889cc2556SLuigi Rizzo 	 * This may still be a tap, a veth/epair, or even a
154989cc2556SLuigi Rizzo 	 * persistent VALE port.
155089cc2556SLuigi Rizzo 	 */
1551*2ff91c17SVincenzo Maffione 	*ifp = ifunit_ref(hdr->nr_name);
155237e3a6d3SLuigi Rizzo 	if (*ifp == NULL) {
1553c3e9b4dbSLuiz Otavio O Souza 		error = ENXIO;
1554c3e9b4dbSLuiz Otavio O Souza 		goto out;
1555f196ce38SLuigi Rizzo 	}
1556ce3ee1e7SLuigi Rizzo 
1557c3e9b4dbSLuiz Otavio O Souza 	error = netmap_get_hw_na(*ifp, nmd, &ret);
1558f9790aebSLuigi Rizzo 	if (error)
1559f9790aebSLuigi Rizzo 		goto out;
1560f18be576SLuigi Rizzo 
1561f9790aebSLuigi Rizzo 	*na = ret;
1562f9790aebSLuigi Rizzo 	netmap_adapter_get(ret);
1563f0ea3689SLuigi Rizzo 
1564f9790aebSLuigi Rizzo out:
156537e3a6d3SLuigi Rizzo 	if (error) {
156637e3a6d3SLuigi Rizzo 		if (ret)
1567f0ea3689SLuigi Rizzo 			netmap_adapter_put(ret);
156837e3a6d3SLuigi Rizzo 		if (*ifp) {
156937e3a6d3SLuigi Rizzo 			if_rele(*ifp);
157037e3a6d3SLuigi Rizzo 			*ifp = NULL;
157137e3a6d3SLuigi Rizzo 		}
157237e3a6d3SLuigi Rizzo 	}
1573c3e9b4dbSLuiz Otavio O Souza 	if (nmd_ref)
1574c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_put(nmd);
1575f18be576SLuigi Rizzo 
15765ab0d24dSLuigi Rizzo 	return error;
15775ab0d24dSLuigi Rizzo }
1578ce3ee1e7SLuigi Rizzo 
157937e3a6d3SLuigi Rizzo /* undo netmap_get_na() */
158037e3a6d3SLuigi Rizzo void
158137e3a6d3SLuigi Rizzo netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp)
158237e3a6d3SLuigi Rizzo {
158337e3a6d3SLuigi Rizzo 	if (ifp)
158437e3a6d3SLuigi Rizzo 		if_rele(ifp);
158537e3a6d3SLuigi Rizzo 	if (na)
158637e3a6d3SLuigi Rizzo 		netmap_adapter_put(na);
158737e3a6d3SLuigi Rizzo }
158837e3a6d3SLuigi Rizzo 
158937e3a6d3SLuigi Rizzo 
159037e3a6d3SLuigi Rizzo #define NM_FAIL_ON(t) do {						\
159137e3a6d3SLuigi Rizzo 	if (unlikely(t)) {						\
159237e3a6d3SLuigi Rizzo 		RD(5, "%s: fail '" #t "' "				\
159337e3a6d3SLuigi Rizzo 			"h %d c %d t %d "				\
159437e3a6d3SLuigi Rizzo 			"rh %d rc %d rt %d "				\
159537e3a6d3SLuigi Rizzo 			"hc %d ht %d",					\
159637e3a6d3SLuigi Rizzo 			kring->name,					\
159737e3a6d3SLuigi Rizzo 			head, cur, ring->tail,				\
159837e3a6d3SLuigi Rizzo 			kring->rhead, kring->rcur, kring->rtail,	\
159937e3a6d3SLuigi Rizzo 			kring->nr_hwcur, kring->nr_hwtail);		\
160037e3a6d3SLuigi Rizzo 		return kring->nkr_num_slots;				\
160137e3a6d3SLuigi Rizzo 	}								\
160237e3a6d3SLuigi Rizzo } while (0)
1603ce3ee1e7SLuigi Rizzo 
1604f9790aebSLuigi Rizzo /*
1605f9790aebSLuigi Rizzo  * validate parameters on entry for *_txsync()
1606f9790aebSLuigi Rizzo  * Returns ring->cur if ok, or something >= kring->nkr_num_slots
160717885a7bSLuigi Rizzo  * in case of error.
1608f9790aebSLuigi Rizzo  *
160917885a7bSLuigi Rizzo  * rhead, rcur and rtail=hwtail are stored from previous round.
161017885a7bSLuigi Rizzo  * hwcur is the next packet to send to the ring.
1611f9790aebSLuigi Rizzo  *
161217885a7bSLuigi Rizzo  * We want
161317885a7bSLuigi Rizzo  *    hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail
1614f9790aebSLuigi Rizzo  *
161517885a7bSLuigi Rizzo  * hwcur, rhead, rtail and hwtail are reliable
1616f9790aebSLuigi Rizzo  */
161737e3a6d3SLuigi Rizzo u_int
161837e3a6d3SLuigi Rizzo nm_txsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
1619f9790aebSLuigi Rizzo {
162017885a7bSLuigi Rizzo 	u_int head = ring->head; /* read only once */
1621f9790aebSLuigi Rizzo 	u_int cur = ring->cur; /* read only once */
1622f9790aebSLuigi Rizzo 	u_int n = kring->nkr_num_slots;
1623ce3ee1e7SLuigi Rizzo 
162417885a7bSLuigi Rizzo 	ND(5, "%s kcur %d ktail %d head %d cur %d tail %d",
162517885a7bSLuigi Rizzo 		kring->name,
162617885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail,
162717885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail);
162817885a7bSLuigi Rizzo #if 1 /* kernel sanity checks; but we can trust the kring. */
162937e3a6d3SLuigi Rizzo 	NM_FAIL_ON(kring->nr_hwcur >= n || kring->rhead >= n ||
163037e3a6d3SLuigi Rizzo 	    kring->rtail >= n ||  kring->nr_hwtail >= n);
1631f9790aebSLuigi Rizzo #endif /* kernel sanity checks */
163217885a7bSLuigi Rizzo 	/*
163337e3a6d3SLuigi Rizzo 	 * user sanity checks. We only use head,
163437e3a6d3SLuigi Rizzo 	 * A, B, ... are possible positions for head:
163517885a7bSLuigi Rizzo 	 *
163637e3a6d3SLuigi Rizzo 	 *  0    A  rhead   B  rtail   C  n-1
163737e3a6d3SLuigi Rizzo 	 *  0    D  rtail   E  rhead   F  n-1
163817885a7bSLuigi Rizzo 	 *
163917885a7bSLuigi Rizzo 	 * B, F, D are valid. A, C, E are wrong
164017885a7bSLuigi Rizzo 	 */
164117885a7bSLuigi Rizzo 	if (kring->rtail >= kring->rhead) {
164217885a7bSLuigi Rizzo 		/* want rhead <= head <= rtail */
164337e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->rhead || head > kring->rtail);
164417885a7bSLuigi Rizzo 		/* and also head <= cur <= rtail */
164537e3a6d3SLuigi Rizzo 		NM_FAIL_ON(cur < head || cur > kring->rtail);
164617885a7bSLuigi Rizzo 	} else { /* here rtail < rhead */
164717885a7bSLuigi Rizzo 		/* we need head outside rtail .. rhead */
164837e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head > kring->rtail && head < kring->rhead);
164917885a7bSLuigi Rizzo 
165017885a7bSLuigi Rizzo 		/* two cases now: head <= rtail or head >= rhead  */
165117885a7bSLuigi Rizzo 		if (head <= kring->rtail) {
165217885a7bSLuigi Rizzo 			/* want head <= cur <= rtail */
165337e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head || cur > kring->rtail);
165417885a7bSLuigi Rizzo 		} else { /* head >= rhead */
165517885a7bSLuigi Rizzo 			/* cur must be outside rtail..head */
165637e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur > kring->rtail && cur < head);
1657f18be576SLuigi Rizzo 		}
1658f9790aebSLuigi Rizzo 	}
165917885a7bSLuigi Rizzo 	if (ring->tail != kring->rtail) {
1660a2a74091SLuigi Rizzo 		RD(5, "%s tail overwritten was %d need %d", kring->name,
166117885a7bSLuigi Rizzo 			ring->tail, kring->rtail);
166217885a7bSLuigi Rizzo 		ring->tail = kring->rtail;
166317885a7bSLuigi Rizzo 	}
166417885a7bSLuigi Rizzo 	kring->rhead = head;
166517885a7bSLuigi Rizzo 	kring->rcur = cur;
166617885a7bSLuigi Rizzo 	return head;
166768b8534bSLuigi Rizzo }
166868b8534bSLuigi Rizzo 
166968b8534bSLuigi Rizzo 
167068b8534bSLuigi Rizzo /*
1671f9790aebSLuigi Rizzo  * validate parameters on entry for *_rxsync()
167217885a7bSLuigi Rizzo  * Returns ring->head if ok, kring->nkr_num_slots on error.
1673f9790aebSLuigi Rizzo  *
167417885a7bSLuigi Rizzo  * For a valid configuration,
167517885a7bSLuigi Rizzo  * hwcur <= head <= cur <= tail <= hwtail
1676f9790aebSLuigi Rizzo  *
167717885a7bSLuigi Rizzo  * We only consider head and cur.
167817885a7bSLuigi Rizzo  * hwcur and hwtail are reliable.
1679f9790aebSLuigi Rizzo  *
1680f9790aebSLuigi Rizzo  */
168137e3a6d3SLuigi Rizzo u_int
168237e3a6d3SLuigi Rizzo nm_rxsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
1683f9790aebSLuigi Rizzo {
168417885a7bSLuigi Rizzo 	uint32_t const n = kring->nkr_num_slots;
168517885a7bSLuigi Rizzo 	uint32_t head, cur;
1686f9790aebSLuigi Rizzo 
1687847bf383SLuigi Rizzo 	ND(5,"%s kc %d kt %d h %d c %d t %d",
168817885a7bSLuigi Rizzo 		kring->name,
168917885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail,
169017885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail);
169117885a7bSLuigi Rizzo 	/*
169217885a7bSLuigi Rizzo 	 * Before storing the new values, we should check they do not
169317885a7bSLuigi Rizzo 	 * move backwards. However:
169417885a7bSLuigi Rizzo 	 * - head is not an issue because the previous value is hwcur;
169517885a7bSLuigi Rizzo 	 * - cur could in principle go back, however it does not matter
169617885a7bSLuigi Rizzo 	 *   because we are processing a brand new rxsync()
169717885a7bSLuigi Rizzo 	 */
169817885a7bSLuigi Rizzo 	cur = kring->rcur = ring->cur;	/* read only once */
169917885a7bSLuigi Rizzo 	head = kring->rhead = ring->head;	/* read only once */
1700f9790aebSLuigi Rizzo #if 1 /* kernel sanity checks */
170137e3a6d3SLuigi Rizzo 	NM_FAIL_ON(kring->nr_hwcur >= n || kring->nr_hwtail >= n);
1702f9790aebSLuigi Rizzo #endif /* kernel sanity checks */
1703f9790aebSLuigi Rizzo 	/* user sanity checks */
170417885a7bSLuigi Rizzo 	if (kring->nr_hwtail >= kring->nr_hwcur) {
170517885a7bSLuigi Rizzo 		/* want hwcur <= rhead <= hwtail */
170637e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->nr_hwcur || head > kring->nr_hwtail);
170717885a7bSLuigi Rizzo 		/* and also rhead <= rcur <= hwtail */
170837e3a6d3SLuigi Rizzo 		NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
1709f9790aebSLuigi Rizzo 	} else {
171017885a7bSLuigi Rizzo 		/* we need rhead outside hwtail..hwcur */
171137e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->nr_hwcur && head > kring->nr_hwtail);
171217885a7bSLuigi Rizzo 		/* two cases now: head <= hwtail or head >= hwcur  */
171317885a7bSLuigi Rizzo 		if (head <= kring->nr_hwtail) {
171417885a7bSLuigi Rizzo 			/* want head <= cur <= hwtail */
171537e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
171617885a7bSLuigi Rizzo 		} else {
171717885a7bSLuigi Rizzo 			/* cur must be outside hwtail..head */
171837e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head && cur > kring->nr_hwtail);
1719f9790aebSLuigi Rizzo 		}
1720f9790aebSLuigi Rizzo 	}
172117885a7bSLuigi Rizzo 	if (ring->tail != kring->rtail) {
172217885a7bSLuigi Rizzo 		RD(5, "%s tail overwritten was %d need %d",
172317885a7bSLuigi Rizzo 			kring->name,
172417885a7bSLuigi Rizzo 			ring->tail, kring->rtail);
172517885a7bSLuigi Rizzo 		ring->tail = kring->rtail;
172617885a7bSLuigi Rizzo 	}
172717885a7bSLuigi Rizzo 	return head;
1728f9790aebSLuigi Rizzo }
1729f9790aebSLuigi Rizzo 
173017885a7bSLuigi Rizzo 
1731f9790aebSLuigi Rizzo /*
173268b8534bSLuigi Rizzo  * Error routine called when txsync/rxsync detects an error.
173317885a7bSLuigi Rizzo  * Can't do much more than resetting head =cur = hwcur, tail = hwtail
173468b8534bSLuigi Rizzo  * Return 1 on reinit.
1735506cc70cSLuigi Rizzo  *
1736506cc70cSLuigi Rizzo  * This routine is only called by the upper half of the kernel.
1737506cc70cSLuigi Rizzo  * It only reads hwcur (which is changed only by the upper half, too)
173817885a7bSLuigi Rizzo  * and hwtail (which may be changed by the lower half, but only on
1739506cc70cSLuigi Rizzo  * a tx ring and only to increase it, so any error will be recovered
1740506cc70cSLuigi Rizzo  * on the next call). For the above, we don't strictly need to call
1741506cc70cSLuigi Rizzo  * it under lock.
174268b8534bSLuigi Rizzo  */
174368b8534bSLuigi Rizzo int
174468b8534bSLuigi Rizzo netmap_ring_reinit(struct netmap_kring *kring)
174568b8534bSLuigi Rizzo {
174668b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
174768b8534bSLuigi Rizzo 	u_int i, lim = kring->nkr_num_slots - 1;
174868b8534bSLuigi Rizzo 	int errors = 0;
174968b8534bSLuigi Rizzo 
1750ce3ee1e7SLuigi Rizzo 	// XXX KASSERT nm_kr_tryget
17514bf50f18SLuigi Rizzo 	RD(10, "called for %s", kring->name);
175217885a7bSLuigi Rizzo 	// XXX probably wrong to trust userspace
175317885a7bSLuigi Rizzo 	kring->rhead = ring->head;
175417885a7bSLuigi Rizzo 	kring->rcur  = ring->cur;
175517885a7bSLuigi Rizzo 	kring->rtail = ring->tail;
175617885a7bSLuigi Rizzo 
175768b8534bSLuigi Rizzo 	if (ring->cur > lim)
175868b8534bSLuigi Rizzo 		errors++;
175917885a7bSLuigi Rizzo 	if (ring->head > lim)
176017885a7bSLuigi Rizzo 		errors++;
176117885a7bSLuigi Rizzo 	if (ring->tail > lim)
176217885a7bSLuigi Rizzo 		errors++;
176368b8534bSLuigi Rizzo 	for (i = 0; i <= lim; i++) {
176468b8534bSLuigi Rizzo 		u_int idx = ring->slot[i].buf_idx;
176568b8534bSLuigi Rizzo 		u_int len = ring->slot[i].len;
1766847bf383SLuigi Rizzo 		if (idx < 2 || idx >= kring->na->na_lut.objtotal) {
176717885a7bSLuigi Rizzo 			RD(5, "bad index at slot %d idx %d len %d ", i, idx, len);
176868b8534bSLuigi Rizzo 			ring->slot[i].buf_idx = 0;
176968b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
17704bf50f18SLuigi Rizzo 		} else if (len > NETMAP_BUF_SIZE(kring->na)) {
177168b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
177217885a7bSLuigi Rizzo 			RD(5, "bad len at slot %d idx %d len %d", i, idx, len);
177368b8534bSLuigi Rizzo 		}
177468b8534bSLuigi Rizzo 	}
177568b8534bSLuigi Rizzo 	if (errors) {
17768241616dSLuigi Rizzo 		RD(10, "total %d errors", errors);
177717885a7bSLuigi Rizzo 		RD(10, "%s reinit, cur %d -> %d tail %d -> %d",
177817885a7bSLuigi Rizzo 			kring->name,
177968b8534bSLuigi Rizzo 			ring->cur, kring->nr_hwcur,
178017885a7bSLuigi Rizzo 			ring->tail, kring->nr_hwtail);
178117885a7bSLuigi Rizzo 		ring->head = kring->rhead = kring->nr_hwcur;
178217885a7bSLuigi Rizzo 		ring->cur  = kring->rcur  = kring->nr_hwcur;
178317885a7bSLuigi Rizzo 		ring->tail = kring->rtail = kring->nr_hwtail;
178468b8534bSLuigi Rizzo 	}
178568b8534bSLuigi Rizzo 	return (errors ? 1 : 0);
178668b8534bSLuigi Rizzo }
178768b8534bSLuigi Rizzo 
17884bf50f18SLuigi Rizzo /* interpret the ringid and flags fields of an nmreq, by translating them
17894bf50f18SLuigi Rizzo  * into a pair of intervals of ring indices:
17904bf50f18SLuigi Rizzo  *
17914bf50f18SLuigi Rizzo  * [priv->np_txqfirst, priv->np_txqlast) and
17924bf50f18SLuigi Rizzo  * [priv->np_rxqfirst, priv->np_rxqlast)
17934bf50f18SLuigi Rizzo  *
179468b8534bSLuigi Rizzo  */
17954bf50f18SLuigi Rizzo int
1796*2ff91c17SVincenzo Maffione netmap_interp_ringid(struct netmap_priv_d *priv, uint32_t nr_mode,
1797*2ff91c17SVincenzo Maffione 			uint16_t nr_ringid, uint64_t nr_flags)
179868b8534bSLuigi Rizzo {
1799f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
180037e3a6d3SLuigi Rizzo 	int excluded_direction[] = { NR_TX_RINGS_ONLY, NR_RX_RINGS_ONLY };
1801847bf383SLuigi Rizzo 	enum txrx t;
1802*2ff91c17SVincenzo Maffione 	u_int j;
180368b8534bSLuigi Rizzo 
1804*2ff91c17SVincenzo Maffione 	if ((nr_flags & NR_PTNETMAP_HOST) && ((nr_mode != NR_REG_ALL_NIC) ||
1805*2ff91c17SVincenzo Maffione 			nr_flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) {
180637e3a6d3SLuigi Rizzo 		D("Error: only NR_REG_ALL_NIC supported with netmap passthrough");
180737e3a6d3SLuigi Rizzo 		return EINVAL;
180837e3a6d3SLuigi Rizzo 	}
180937e3a6d3SLuigi Rizzo 
181037e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
1811*2ff91c17SVincenzo Maffione 		if (nr_flags & excluded_direction[t]) {
181237e3a6d3SLuigi Rizzo 			priv->np_qfirst[t] = priv->np_qlast[t] = 0;
181337e3a6d3SLuigi Rizzo 			continue;
181437e3a6d3SLuigi Rizzo 		}
1815*2ff91c17SVincenzo Maffione 		switch (nr_mode) {
1816f0ea3689SLuigi Rizzo 		case NR_REG_ALL_NIC:
1817847bf383SLuigi Rizzo 			priv->np_qfirst[t] = 0;
1818847bf383SLuigi Rizzo 			priv->np_qlast[t] = nma_get_nrings(na, t);
181937e3a6d3SLuigi Rizzo 			ND("ALL/PIPE: %s %d %d", nm_txrx2str(t),
182037e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1821f0ea3689SLuigi Rizzo 			break;
1822f0ea3689SLuigi Rizzo 		case NR_REG_SW:
1823f0ea3689SLuigi Rizzo 		case NR_REG_NIC_SW:
1824f0ea3689SLuigi Rizzo 			if (!(na->na_flags & NAF_HOST_RINGS)) {
1825f0ea3689SLuigi Rizzo 				D("host rings not supported");
1826f0ea3689SLuigi Rizzo 				return EINVAL;
1827f0ea3689SLuigi Rizzo 			}
1828*2ff91c17SVincenzo Maffione 			priv->np_qfirst[t] = (nr_mode == NR_REG_SW ?
1829847bf383SLuigi Rizzo 				nma_get_nrings(na, t) : 0);
1830847bf383SLuigi Rizzo 			priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
1831*2ff91c17SVincenzo Maffione 			ND("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : "NIC+SW",
183237e3a6d3SLuigi Rizzo 				nm_txrx2str(t),
183337e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1834f0ea3689SLuigi Rizzo 			break;
1835f0ea3689SLuigi Rizzo 		case NR_REG_ONE_NIC:
1836*2ff91c17SVincenzo Maffione 			if (nr_ringid >= na->num_tx_rings &&
1837*2ff91c17SVincenzo Maffione 					nr_ringid >= na->num_rx_rings) {
1838*2ff91c17SVincenzo Maffione 				D("invalid ring id %d", nr_ringid);
1839f0ea3689SLuigi Rizzo 				return EINVAL;
1840f0ea3689SLuigi Rizzo 			}
1841f0ea3689SLuigi Rizzo 			/* if not enough rings, use the first one */
1842*2ff91c17SVincenzo Maffione 			j = nr_ringid;
1843847bf383SLuigi Rizzo 			if (j >= nma_get_nrings(na, t))
1844f0ea3689SLuigi Rizzo 				j = 0;
1845847bf383SLuigi Rizzo 			priv->np_qfirst[t] = j;
1846847bf383SLuigi Rizzo 			priv->np_qlast[t] = j + 1;
184737e3a6d3SLuigi Rizzo 			ND("ONE_NIC: %s %d %d", nm_txrx2str(t),
184837e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1849f0ea3689SLuigi Rizzo 			break;
1850f0ea3689SLuigi Rizzo 		default:
1851*2ff91c17SVincenzo Maffione 			D("invalid regif type %d", nr_mode);
1852f0ea3689SLuigi Rizzo 			return EINVAL;
185368b8534bSLuigi Rizzo 		}
185437e3a6d3SLuigi Rizzo 	}
1855*2ff91c17SVincenzo Maffione 	priv->np_flags = nr_flags | nr_mode; // TODO
18564bf50f18SLuigi Rizzo 
1857c3e9b4dbSLuiz Otavio O Souza 	/* Allow transparent forwarding mode in the host --> nic
1858c3e9b4dbSLuiz Otavio O Souza 	 * direction only if all the TX hw rings have been opened. */
1859c3e9b4dbSLuiz Otavio O Souza 	if (priv->np_qfirst[NR_TX] == 0 &&
1860c3e9b4dbSLuiz Otavio O Souza 			priv->np_qlast[NR_TX] >= na->num_tx_rings) {
1861c3e9b4dbSLuiz Otavio O Souza 		priv->np_sync_flags |= NAF_CAN_FORWARD_DOWN;
1862c3e9b4dbSLuiz Otavio O Souza 	}
1863c3e9b4dbSLuiz Otavio O Souza 
1864ae10d1afSLuigi Rizzo 	if (netmap_verbose) {
1865f0ea3689SLuigi Rizzo 		D("%s: tx [%d,%d) rx [%d,%d) id %d",
18664bf50f18SLuigi Rizzo 			na->name,
1867847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1868847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1869847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1870847bf383SLuigi Rizzo 			priv->np_qlast[NR_RX],
1871*2ff91c17SVincenzo Maffione 			nr_ringid);
1872ae10d1afSLuigi Rizzo 	}
187368b8534bSLuigi Rizzo 	return 0;
187468b8534bSLuigi Rizzo }
187568b8534bSLuigi Rizzo 
18764bf50f18SLuigi Rizzo 
18774bf50f18SLuigi Rizzo /*
18784bf50f18SLuigi Rizzo  * Set the ring ID. For devices with a single queue, a request
18794bf50f18SLuigi Rizzo  * for all rings is the same as a single ring.
18804bf50f18SLuigi Rizzo  */
18814bf50f18SLuigi Rizzo static int
1882*2ff91c17SVincenzo Maffione netmap_set_ringid(struct netmap_priv_d *priv, uint32_t nr_mode,
1883*2ff91c17SVincenzo Maffione 		uint16_t nr_ringid, uint64_t nr_flags)
18844bf50f18SLuigi Rizzo {
18854bf50f18SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
18864bf50f18SLuigi Rizzo 	int error;
1887847bf383SLuigi Rizzo 	enum txrx t;
18884bf50f18SLuigi Rizzo 
1889*2ff91c17SVincenzo Maffione 	error = netmap_interp_ringid(priv, nr_mode, nr_ringid, nr_flags);
18904bf50f18SLuigi Rizzo 	if (error) {
18914bf50f18SLuigi Rizzo 		return error;
18924bf50f18SLuigi Rizzo 	}
18934bf50f18SLuigi Rizzo 
1894*2ff91c17SVincenzo Maffione 	priv->np_txpoll = (nr_flags & NR_NO_TX_POLL) ? 0 : 1;
18954bf50f18SLuigi Rizzo 
18964bf50f18SLuigi Rizzo 	/* optimization: count the users registered for more than
18974bf50f18SLuigi Rizzo 	 * one ring, which are the ones sleeping on the global queue.
18984bf50f18SLuigi Rizzo 	 * The default netmap_notify() callback will then
18994bf50f18SLuigi Rizzo 	 * avoid signaling the global queue if nobody is using it
19004bf50f18SLuigi Rizzo 	 */
1901847bf383SLuigi Rizzo 	for_rx_tx(t) {
1902847bf383SLuigi Rizzo 		if (nm_si_user(priv, t))
1903847bf383SLuigi Rizzo 			na->si_users[t]++;
1904847bf383SLuigi Rizzo 	}
19054bf50f18SLuigi Rizzo 	return 0;
19064bf50f18SLuigi Rizzo }
19074bf50f18SLuigi Rizzo 
1908847bf383SLuigi Rizzo static void
1909847bf383SLuigi Rizzo netmap_unset_ringid(struct netmap_priv_d *priv)
1910847bf383SLuigi Rizzo {
1911847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1912847bf383SLuigi Rizzo 	enum txrx t;
1913847bf383SLuigi Rizzo 
1914847bf383SLuigi Rizzo 	for_rx_tx(t) {
1915847bf383SLuigi Rizzo 		if (nm_si_user(priv, t))
1916847bf383SLuigi Rizzo 			na->si_users[t]--;
1917847bf383SLuigi Rizzo 		priv->np_qfirst[t] = priv->np_qlast[t] = 0;
1918847bf383SLuigi Rizzo 	}
1919847bf383SLuigi Rizzo 	priv->np_flags = 0;
1920847bf383SLuigi Rizzo 	priv->np_txpoll = 0;
1921847bf383SLuigi Rizzo }
1922847bf383SLuigi Rizzo 
1923847bf383SLuigi Rizzo 
192437e3a6d3SLuigi Rizzo /* Set the nr_pending_mode for the requested rings.
192537e3a6d3SLuigi Rizzo  * If requested, also try to get exclusive access to the rings, provided
192637e3a6d3SLuigi Rizzo  * the rings we want to bind are not exclusively owned by a previous bind.
1927847bf383SLuigi Rizzo  */
1928847bf383SLuigi Rizzo static int
192937e3a6d3SLuigi Rizzo netmap_krings_get(struct netmap_priv_d *priv)
1930847bf383SLuigi Rizzo {
1931847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1932847bf383SLuigi Rizzo 	u_int i;
1933847bf383SLuigi Rizzo 	struct netmap_kring *kring;
1934847bf383SLuigi Rizzo 	int excl = (priv->np_flags & NR_EXCLUSIVE);
1935847bf383SLuigi Rizzo 	enum txrx t;
1936847bf383SLuigi Rizzo 
19374f80b14cSVincenzo Maffione 	if (netmap_verbose)
19384f80b14cSVincenzo Maffione 		D("%s: grabbing tx [%d, %d) rx [%d, %d)",
1939847bf383SLuigi Rizzo 			na->name,
1940847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1941847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1942847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1943847bf383SLuigi Rizzo 			priv->np_qlast[NR_RX]);
1944847bf383SLuigi Rizzo 
1945847bf383SLuigi Rizzo 	/* first round: check that all the requested rings
1946847bf383SLuigi Rizzo 	 * are neither alread exclusively owned, nor we
1947847bf383SLuigi Rizzo 	 * want exclusive ownership when they are already in use
1948847bf383SLuigi Rizzo 	 */
1949847bf383SLuigi Rizzo 	for_rx_tx(t) {
1950847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1951*2ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
1952847bf383SLuigi Rizzo 			if ((kring->nr_kflags & NKR_EXCLUSIVE) ||
1953847bf383SLuigi Rizzo 			    (kring->users && excl))
1954847bf383SLuigi Rizzo 			{
1955847bf383SLuigi Rizzo 				ND("ring %s busy", kring->name);
1956847bf383SLuigi Rizzo 				return EBUSY;
1957847bf383SLuigi Rizzo 			}
1958847bf383SLuigi Rizzo 		}
1959847bf383SLuigi Rizzo 	}
1960847bf383SLuigi Rizzo 
196137e3a6d3SLuigi Rizzo 	/* second round: increment usage count (possibly marking them
196237e3a6d3SLuigi Rizzo 	 * as exclusive) and set the nr_pending_mode
1963847bf383SLuigi Rizzo 	 */
1964847bf383SLuigi Rizzo 	for_rx_tx(t) {
1965847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1966*2ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
1967847bf383SLuigi Rizzo 			kring->users++;
1968847bf383SLuigi Rizzo 			if (excl)
1969847bf383SLuigi Rizzo 				kring->nr_kflags |= NKR_EXCLUSIVE;
197037e3a6d3SLuigi Rizzo 	                kring->nr_pending_mode = NKR_NETMAP_ON;
1971847bf383SLuigi Rizzo 		}
1972847bf383SLuigi Rizzo 	}
1973847bf383SLuigi Rizzo 
1974847bf383SLuigi Rizzo 	return 0;
1975847bf383SLuigi Rizzo 
1976847bf383SLuigi Rizzo }
1977847bf383SLuigi Rizzo 
197837e3a6d3SLuigi Rizzo /* Undo netmap_krings_get(). This is done by clearing the exclusive mode
197937e3a6d3SLuigi Rizzo  * if was asked on regif, and unset the nr_pending_mode if we are the
198037e3a6d3SLuigi Rizzo  * last users of the involved rings. */
1981847bf383SLuigi Rizzo static void
198237e3a6d3SLuigi Rizzo netmap_krings_put(struct netmap_priv_d *priv)
1983847bf383SLuigi Rizzo {
1984847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1985847bf383SLuigi Rizzo 	u_int i;
1986847bf383SLuigi Rizzo 	struct netmap_kring *kring;
1987847bf383SLuigi Rizzo 	int excl = (priv->np_flags & NR_EXCLUSIVE);
1988847bf383SLuigi Rizzo 	enum txrx t;
1989847bf383SLuigi Rizzo 
1990847bf383SLuigi Rizzo 	ND("%s: releasing tx [%d, %d) rx [%d, %d)",
1991847bf383SLuigi Rizzo 			na->name,
1992847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1993847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1994847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1995847bf383SLuigi Rizzo 			priv->np_qlast[MR_RX]);
1996847bf383SLuigi Rizzo 
1997847bf383SLuigi Rizzo 	for_rx_tx(t) {
1998847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1999*2ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
2000847bf383SLuigi Rizzo 			if (excl)
2001847bf383SLuigi Rizzo 				kring->nr_kflags &= ~NKR_EXCLUSIVE;
2002847bf383SLuigi Rizzo 			kring->users--;
200337e3a6d3SLuigi Rizzo 			if (kring->users == 0)
200437e3a6d3SLuigi Rizzo 				kring->nr_pending_mode = NKR_NETMAP_OFF;
2005847bf383SLuigi Rizzo 		}
2006847bf383SLuigi Rizzo 	}
2007847bf383SLuigi Rizzo }
2008847bf383SLuigi Rizzo 
2009*2ff91c17SVincenzo Maffione static int
2010*2ff91c17SVincenzo Maffione nm_priv_rx_enabled(struct netmap_priv_d *priv)
2011*2ff91c17SVincenzo Maffione {
2012*2ff91c17SVincenzo Maffione 	return (priv->np_qfirst[NR_RX] != priv->np_qlast[NR_RX]);
2013*2ff91c17SVincenzo Maffione }
2014*2ff91c17SVincenzo Maffione 
2015f18be576SLuigi Rizzo /*
2016f18be576SLuigi Rizzo  * possibly move the interface to netmap-mode.
2017f18be576SLuigi Rizzo  * If success it returns a pointer to netmap_if, otherwise NULL.
2018ce3ee1e7SLuigi Rizzo  * This must be called with NMG_LOCK held.
20194bf50f18SLuigi Rizzo  *
20204bf50f18SLuigi Rizzo  * The following na callbacks are called in the process:
20214bf50f18SLuigi Rizzo  *
20224bf50f18SLuigi Rizzo  * na->nm_config()			[by netmap_update_config]
20234bf50f18SLuigi Rizzo  * (get current number and size of rings)
20244bf50f18SLuigi Rizzo  *
20254bf50f18SLuigi Rizzo  *  	We have a generic one for linux (netmap_linux_config).
20264bf50f18SLuigi Rizzo  *  	The bwrap has to override this, since it has to forward
20274bf50f18SLuigi Rizzo  *  	the request to the wrapped adapter (netmap_bwrap_config).
20284bf50f18SLuigi Rizzo  *
20294bf50f18SLuigi Rizzo  *
2030847bf383SLuigi Rizzo  * na->nm_krings_create()
20314bf50f18SLuigi Rizzo  * (create and init the krings array)
20324bf50f18SLuigi Rizzo  *
20334bf50f18SLuigi Rizzo  * 	One of the following:
20344bf50f18SLuigi Rizzo  *
20354bf50f18SLuigi Rizzo  *	* netmap_hw_krings_create, 			(hw ports)
20364bf50f18SLuigi Rizzo  *		creates the standard layout for the krings
20374bf50f18SLuigi Rizzo  * 		and adds the mbq (used for the host rings).
20384bf50f18SLuigi Rizzo  *
20394bf50f18SLuigi Rizzo  * 	* netmap_vp_krings_create			(VALE ports)
20404bf50f18SLuigi Rizzo  * 		add leases and scratchpads
20414bf50f18SLuigi Rizzo  *
20424bf50f18SLuigi Rizzo  * 	* netmap_pipe_krings_create			(pipes)
20434bf50f18SLuigi Rizzo  * 		create the krings and rings of both ends and
20444bf50f18SLuigi Rizzo  * 		cross-link them
20454bf50f18SLuigi Rizzo  *
20464bf50f18SLuigi Rizzo  *      * netmap_monitor_krings_create 			(monitors)
20474bf50f18SLuigi Rizzo  *      	avoid allocating the mbq
20484bf50f18SLuigi Rizzo  *
20494bf50f18SLuigi Rizzo  *      * netmap_bwrap_krings_create			(bwraps)
20504bf50f18SLuigi Rizzo  *      	create both the brap krings array,
20514bf50f18SLuigi Rizzo  *      	the krings array of the wrapped adapter, and
20524bf50f18SLuigi Rizzo  *      	(if needed) the fake array for the host adapter
20534bf50f18SLuigi Rizzo  *
20544bf50f18SLuigi Rizzo  * na->nm_register(, 1)
20554bf50f18SLuigi Rizzo  * (put the adapter in netmap mode)
20564bf50f18SLuigi Rizzo  *
20574bf50f18SLuigi Rizzo  * 	This may be one of the following:
20584bf50f18SLuigi Rizzo  *
205937e3a6d3SLuigi Rizzo  * 	* netmap_hw_reg				        (hw ports)
20604bf50f18SLuigi Rizzo  * 		checks that the ifp is still there, then calls
20614bf50f18SLuigi Rizzo  * 		the hardware specific callback;
20624bf50f18SLuigi Rizzo  *
20634bf50f18SLuigi Rizzo  * 	* netmap_vp_reg					(VALE ports)
20644bf50f18SLuigi Rizzo  *		If the port is connected to a bridge,
20654bf50f18SLuigi Rizzo  *		set the NAF_NETMAP_ON flag under the
20664bf50f18SLuigi Rizzo  *		bridge write lock.
20674bf50f18SLuigi Rizzo  *
20684bf50f18SLuigi Rizzo  *	* netmap_pipe_reg				(pipes)
20694bf50f18SLuigi Rizzo  *		inform the other pipe end that it is no
2070453130d9SPedro F. Giffuni  *		longer responsible for the lifetime of this
20714bf50f18SLuigi Rizzo  *		pipe end
20724bf50f18SLuigi Rizzo  *
20734bf50f18SLuigi Rizzo  *	* netmap_monitor_reg				(monitors)
20744bf50f18SLuigi Rizzo  *		intercept the sync callbacks of the monitored
20754bf50f18SLuigi Rizzo  *		rings
20764bf50f18SLuigi Rizzo  *
207737e3a6d3SLuigi Rizzo  *	* netmap_bwrap_reg				(bwraps)
20784bf50f18SLuigi Rizzo  *		cross-link the bwrap and hwna rings,
20794bf50f18SLuigi Rizzo  *		forward the request to the hwna, override
20804bf50f18SLuigi Rizzo  *		the hwna notify callback (to get the frames
20814bf50f18SLuigi Rizzo  *		coming from outside go through the bridge).
20824bf50f18SLuigi Rizzo  *
20834bf50f18SLuigi Rizzo  *
2084f18be576SLuigi Rizzo  */
2085847bf383SLuigi Rizzo int
2086f9790aebSLuigi Rizzo netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
2087*2ff91c17SVincenzo Maffione 	uint32_t nr_mode, uint16_t nr_ringid, uint64_t nr_flags)
2088f18be576SLuigi Rizzo {
2089f18be576SLuigi Rizzo 	struct netmap_if *nifp = NULL;
2090847bf383SLuigi Rizzo 	int error;
2091f18be576SLuigi Rizzo 
2092ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
2093f9790aebSLuigi Rizzo 	priv->np_na = na;     /* store the reference */
2094*2ff91c17SVincenzo Maffione 	error = netmap_set_ringid(priv, nr_mode, nr_ringid, nr_flags);
2095f18be576SLuigi Rizzo 	if (error)
2096847bf383SLuigi Rizzo 		goto err;
2097847bf383SLuigi Rizzo 	error = netmap_mem_finalize(na->nm_mem, na);
2098ce3ee1e7SLuigi Rizzo 	if (error)
2099847bf383SLuigi Rizzo 		goto err;
2100847bf383SLuigi Rizzo 
2101847bf383SLuigi Rizzo 	if (na->active_fds == 0) {
2102*2ff91c17SVincenzo Maffione 
2103*2ff91c17SVincenzo Maffione 		/* cache the allocator info in the na */
2104*2ff91c17SVincenzo Maffione 		error = netmap_mem_get_lut(na->nm_mem, &na->na_lut);
2105*2ff91c17SVincenzo Maffione 		if (error)
2106*2ff91c17SVincenzo Maffione 			goto err_drop_mem;
2107*2ff91c17SVincenzo Maffione 		ND("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal,
2108*2ff91c17SVincenzo Maffione 					    na->na_lut.objsize);
2109*2ff91c17SVincenzo Maffione 
2110*2ff91c17SVincenzo Maffione 		/* ring configuration may have changed, fetch from the card */
2111*2ff91c17SVincenzo Maffione 		netmap_update_config(na);
2112*2ff91c17SVincenzo Maffione 
2113847bf383SLuigi Rizzo 		/*
2114847bf383SLuigi Rizzo 		 * If this is the first registration of the adapter,
21154f80b14cSVincenzo Maffione 		 * perform sanity checks and create the in-kernel view
21164f80b14cSVincenzo Maffione 		 * of the netmap rings (the netmap krings).
2117847bf383SLuigi Rizzo 		 */
2118*2ff91c17SVincenzo Maffione 		if (na->ifp && nm_priv_rx_enabled(priv)) {
21194f80b14cSVincenzo Maffione 			/* This netmap adapter is attached to an ifnet. */
21204f80b14cSVincenzo Maffione 			unsigned nbs = netmap_mem_bufsize(na->nm_mem);
21214f80b14cSVincenzo Maffione 			unsigned mtu = nm_os_ifnet_mtu(na->ifp);
21224f80b14cSVincenzo Maffione 
2123*2ff91c17SVincenzo Maffione 			ND("mtu %d rx_buf_maxsize %d netmap_buf_size %d",
2124*2ff91c17SVincenzo Maffione 					mtu, na->rx_buf_maxsize, nbs);
2125*2ff91c17SVincenzo Maffione 
2126*2ff91c17SVincenzo Maffione 			if (mtu <= na->rx_buf_maxsize) {
21274f80b14cSVincenzo Maffione 				/* The MTU fits a single NIC slot. We only
21284f80b14cSVincenzo Maffione 				 * Need to check that netmap buffers are
21294f80b14cSVincenzo Maffione 				 * large enough to hold an MTU. NS_MOREFRAG
21304f80b14cSVincenzo Maffione 				 * cannot be used in this case. */
21314f80b14cSVincenzo Maffione 				if (nbs < mtu) {
21324f80b14cSVincenzo Maffione 					nm_prerr("error: netmap buf size (%u) "
2133*2ff91c17SVincenzo Maffione 						"< device MTU (%u)\n", nbs, mtu);
21344f80b14cSVincenzo Maffione 					error = EINVAL;
21354f80b14cSVincenzo Maffione 					goto err_drop_mem;
21364f80b14cSVincenzo Maffione 				}
21374f80b14cSVincenzo Maffione 			} else {
21384f80b14cSVincenzo Maffione 				/* More NIC slots may be needed to receive
21394f80b14cSVincenzo Maffione 				 * or transmit a single packet. Check that
21404f80b14cSVincenzo Maffione 				 * the adapter supports NS_MOREFRAG and that
21414f80b14cSVincenzo Maffione 				 * netmap buffers are large enough to hold
21424f80b14cSVincenzo Maffione 				 * the maximum per-slot size. */
21434f80b14cSVincenzo Maffione 				if (!(na->na_flags & NAF_MOREFRAG)) {
21444f80b14cSVincenzo Maffione 					nm_prerr("error: large MTU (%d) needed "
21454f80b14cSVincenzo Maffione 						"but %s does not support "
2146*2ff91c17SVincenzo Maffione 						"NS_MOREFRAG\n", mtu,
21474f80b14cSVincenzo Maffione 						na->ifp->if_xname);
21484f80b14cSVincenzo Maffione 					error = EINVAL;
21494f80b14cSVincenzo Maffione 					goto err_drop_mem;
2150*2ff91c17SVincenzo Maffione 				} else if (nbs < na->rx_buf_maxsize) {
21514f80b14cSVincenzo Maffione 					nm_prerr("error: using NS_MOREFRAG on "
21524f80b14cSVincenzo Maffione 						"%s requires netmap buf size "
2153*2ff91c17SVincenzo Maffione 						">= %u\n", na->ifp->if_xname,
2154*2ff91c17SVincenzo Maffione 						na->rx_buf_maxsize);
21554f80b14cSVincenzo Maffione 					error = EINVAL;
21564f80b14cSVincenzo Maffione 					goto err_drop_mem;
21574f80b14cSVincenzo Maffione 				} else {
21584f80b14cSVincenzo Maffione 					nm_prinf("info: netmap application on "
21594f80b14cSVincenzo Maffione 						"%s needs to support "
21604f80b14cSVincenzo Maffione 						"NS_MOREFRAG "
2161*2ff91c17SVincenzo Maffione 						"(MTU=%u,netmap_buf_size=%u)\n",
21624f80b14cSVincenzo Maffione 						na->ifp->if_xname, mtu, nbs);
21634f80b14cSVincenzo Maffione 				}
21644f80b14cSVincenzo Maffione 			}
21654f80b14cSVincenzo Maffione 		}
2166847bf383SLuigi Rizzo 
2167847bf383SLuigi Rizzo 		/*
2168847bf383SLuigi Rizzo 		 * Depending on the adapter, this may also create
2169847bf383SLuigi Rizzo 		 * the netmap rings themselves
2170847bf383SLuigi Rizzo 		 */
2171847bf383SLuigi Rizzo 		error = na->nm_krings_create(na);
2172847bf383SLuigi Rizzo 		if (error)
2173*2ff91c17SVincenzo Maffione 			goto err_put_lut;
2174847bf383SLuigi Rizzo 
2175ce3ee1e7SLuigi Rizzo 	}
2176847bf383SLuigi Rizzo 
217737e3a6d3SLuigi Rizzo 	/* now the krings must exist and we can check whether some
217837e3a6d3SLuigi Rizzo 	 * previous bind has exclusive ownership on them, and set
217937e3a6d3SLuigi Rizzo 	 * nr_pending_mode
2180847bf383SLuigi Rizzo 	 */
218137e3a6d3SLuigi Rizzo 	error = netmap_krings_get(priv);
2182847bf383SLuigi Rizzo 	if (error)
218337e3a6d3SLuigi Rizzo 		goto err_del_krings;
218437e3a6d3SLuigi Rizzo 
218537e3a6d3SLuigi Rizzo 	/* create all needed missing netmap rings */
218637e3a6d3SLuigi Rizzo 	error = netmap_mem_rings_create(na);
218737e3a6d3SLuigi Rizzo 	if (error)
218837e3a6d3SLuigi Rizzo 		goto err_rel_excl;
2189847bf383SLuigi Rizzo 
2190847bf383SLuigi Rizzo 	/* in all cases, create a new netmap if */
2191c3e9b4dbSLuiz Otavio O Souza 	nifp = netmap_mem_if_new(na, priv);
2192847bf383SLuigi Rizzo 	if (nifp == NULL) {
2193f18be576SLuigi Rizzo 		error = ENOMEM;
219437e3a6d3SLuigi Rizzo 		goto err_del_rings;
2195ce3ee1e7SLuigi Rizzo 	}
2196847bf383SLuigi Rizzo 
219737e3a6d3SLuigi Rizzo 	if (nm_kring_pending(priv)) {
219837e3a6d3SLuigi Rizzo 		/* Some kring is switching mode, tell the adapter to
219937e3a6d3SLuigi Rizzo 		 * react on this. */
220037e3a6d3SLuigi Rizzo 		error = na->nm_register(na, 1);
220137e3a6d3SLuigi Rizzo 		if (error)
2202*2ff91c17SVincenzo Maffione 			goto err_del_if;
220337e3a6d3SLuigi Rizzo 	}
220437e3a6d3SLuigi Rizzo 
220537e3a6d3SLuigi Rizzo 	/* Commit the reference. */
220637e3a6d3SLuigi Rizzo 	na->active_fds++;
220737e3a6d3SLuigi Rizzo 
2208ce3ee1e7SLuigi Rizzo 	/*
2209847bf383SLuigi Rizzo 	 * advertise that the interface is ready by setting np_nifp.
2210847bf383SLuigi Rizzo 	 * The barrier is needed because readers (poll, *SYNC and mmap)
2211ce3ee1e7SLuigi Rizzo 	 * check for priv->np_nifp != NULL without locking
2212ce3ee1e7SLuigi Rizzo 	 */
2213847bf383SLuigi Rizzo 	mb(); /* make sure previous writes are visible to all CPUs */
2214ce3ee1e7SLuigi Rizzo 	priv->np_nifp = nifp;
2215847bf383SLuigi Rizzo 
2216847bf383SLuigi Rizzo 	return 0;
2217847bf383SLuigi Rizzo 
221837e3a6d3SLuigi Rizzo err_del_if:
2219847bf383SLuigi Rizzo 	netmap_mem_if_delete(na, nifp);
2220847bf383SLuigi Rizzo err_del_rings:
2221847bf383SLuigi Rizzo 	netmap_mem_rings_delete(na);
22224f80b14cSVincenzo Maffione err_rel_excl:
22234f80b14cSVincenzo Maffione 	netmap_krings_put(priv);
2224847bf383SLuigi Rizzo err_del_krings:
2225847bf383SLuigi Rizzo 	if (na->active_fds == 0)
2226847bf383SLuigi Rizzo 		na->nm_krings_delete(na);
2227*2ff91c17SVincenzo Maffione err_put_lut:
2228*2ff91c17SVincenzo Maffione 	if (na->active_fds == 0)
2229*2ff91c17SVincenzo Maffione 		memset(&na->na_lut, 0, sizeof(na->na_lut));
2230847bf383SLuigi Rizzo err_drop_mem:
22314f80b14cSVincenzo Maffione 	netmap_mem_drop(na);
2232847bf383SLuigi Rizzo err:
2233847bf383SLuigi Rizzo 	priv->np_na = NULL;
2234847bf383SLuigi Rizzo 	return error;
2235ce3ee1e7SLuigi Rizzo }
2236847bf383SLuigi Rizzo 
2237847bf383SLuigi Rizzo 
2238847bf383SLuigi Rizzo /*
223937e3a6d3SLuigi Rizzo  * update kring and ring at the end of rxsync/txsync.
2240847bf383SLuigi Rizzo  */
2241847bf383SLuigi Rizzo static inline void
224237e3a6d3SLuigi Rizzo nm_sync_finalize(struct netmap_kring *kring)
2243847bf383SLuigi Rizzo {
224437e3a6d3SLuigi Rizzo 	/*
224537e3a6d3SLuigi Rizzo 	 * Update ring tail to what the kernel knows
224637e3a6d3SLuigi Rizzo 	 * After txsync: head/rhead/hwcur might be behind cur/rcur
224737e3a6d3SLuigi Rizzo 	 * if no carrier.
224837e3a6d3SLuigi Rizzo 	 */
2249847bf383SLuigi Rizzo 	kring->ring->tail = kring->rtail = kring->nr_hwtail;
2250847bf383SLuigi Rizzo 
2251847bf383SLuigi Rizzo 	ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
2252847bf383SLuigi Rizzo 		kring->name, kring->nr_hwcur, kring->nr_hwtail,
2253847bf383SLuigi Rizzo 		kring->rhead, kring->rcur, kring->rtail);
2254847bf383SLuigi Rizzo }
2255847bf383SLuigi Rizzo 
2256c3e9b4dbSLuiz Otavio O Souza /* set ring timestamp */
2257c3e9b4dbSLuiz Otavio O Souza static inline void
2258c3e9b4dbSLuiz Otavio O Souza ring_timestamp_set(struct netmap_ring *ring)
2259c3e9b4dbSLuiz Otavio O Souza {
2260c3e9b4dbSLuiz Otavio O Souza 	if (netmap_no_timestamp == 0 || ring->flags & NR_TIMESTAMP) {
2261c3e9b4dbSLuiz Otavio O Souza 		microtime(&ring->ts);
2262c3e9b4dbSLuiz Otavio O Souza 	}
2263c3e9b4dbSLuiz Otavio O Souza }
2264c3e9b4dbSLuiz Otavio O Souza 
2265*2ff91c17SVincenzo Maffione static int nmreq_copyin(struct nmreq_header *, int);
2266*2ff91c17SVincenzo Maffione static int nmreq_copyout(struct nmreq_header *, int);
2267*2ff91c17SVincenzo Maffione static int nmreq_checkoptions(struct nmreq_header *);
2268c3e9b4dbSLuiz Otavio O Souza 
226968b8534bSLuigi Rizzo /*
227068b8534bSLuigi Rizzo  * ioctl(2) support for the "netmap" device.
227168b8534bSLuigi Rizzo  *
227268b8534bSLuigi Rizzo  * Following a list of accepted commands:
2273*2ff91c17SVincenzo Maffione  * - NIOCCTRL		device control API
2274*2ff91c17SVincenzo Maffione  * - NIOCTXSYNC		sync TX rings
2275*2ff91c17SVincenzo Maffione  * - NIOCRXSYNC		sync RX rings
227668b8534bSLuigi Rizzo  * - SIOCGIFADDR	just for convenience
2277*2ff91c17SVincenzo Maffione  * - NIOCGINFO		deprecated (legacy API)
2278*2ff91c17SVincenzo Maffione  * - NIOCREGIF		deprecated (legacy API)
227968b8534bSLuigi Rizzo  *
228068b8534bSLuigi Rizzo  * Return 0 on success, errno otherwise.
228168b8534bSLuigi Rizzo  */
2282f9790aebSLuigi Rizzo int
2283*2ff91c17SVincenzo Maffione netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
2284*2ff91c17SVincenzo Maffione 		struct thread *td, int nr_body_is_user)
228568b8534bSLuigi Rizzo {
2286c3e9b4dbSLuiz Otavio O Souza 	struct mbq q;	/* packets from RX hw queues to host stack */
2287ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na = NULL;
2288c3e9b4dbSLuiz Otavio O Souza 	struct netmap_mem_d *nmd = NULL;
228937e3a6d3SLuigi Rizzo 	struct ifnet *ifp = NULL;
229037e3a6d3SLuigi Rizzo 	int error = 0;
2291f0ea3689SLuigi Rizzo 	u_int i, qfirst, qlast;
229268b8534bSLuigi Rizzo 	struct netmap_if *nifp;
2293*2ff91c17SVincenzo Maffione 	struct netmap_kring **krings;
2294c3e9b4dbSLuiz Otavio O Souza 	int sync_flags;
2295847bf383SLuigi Rizzo 	enum txrx t;
229668b8534bSLuigi Rizzo 
2297*2ff91c17SVincenzo Maffione 	switch (cmd) {
2298*2ff91c17SVincenzo Maffione 	case NIOCCTRL: {
2299*2ff91c17SVincenzo Maffione 		struct nmreq_header *hdr = (struct nmreq_header *)data;
2300*2ff91c17SVincenzo Maffione 
2301*2ff91c17SVincenzo Maffione 		if (hdr->nr_version != NETMAP_API) {
2302*2ff91c17SVincenzo Maffione 			D("API mismatch for reqtype %d: got %d need %d",
2303*2ff91c17SVincenzo Maffione 				hdr->nr_version,
2304*2ff91c17SVincenzo Maffione 				hdr->nr_version, NETMAP_API);
2305*2ff91c17SVincenzo Maffione 			hdr->nr_version = NETMAP_API;
2306f0ea3689SLuigi Rizzo 		}
2307*2ff91c17SVincenzo Maffione 		if (hdr->nr_version < NETMAP_MIN_API ||
2308*2ff91c17SVincenzo Maffione 		    hdr->nr_version > NETMAP_MAX_API) {
230917885a7bSLuigi Rizzo 			return EINVAL;
231017885a7bSLuigi Rizzo 		}
231168b8534bSLuigi Rizzo 
2312*2ff91c17SVincenzo Maffione 		/* Make a kernel-space copy of the user-space nr_body.
2313*2ff91c17SVincenzo Maffione 		 * For convenince, the nr_body pointer and the pointers
2314*2ff91c17SVincenzo Maffione 		 * in the options list will be replaced with their
2315*2ff91c17SVincenzo Maffione 		 * kernel-space counterparts. The original pointers are
2316*2ff91c17SVincenzo Maffione                 * saved internally and later restored by nmreq_copyout
2317*2ff91c17SVincenzo Maffione                 */
2318*2ff91c17SVincenzo Maffione 		error = nmreq_copyin(hdr, nr_body_is_user);
231937e3a6d3SLuigi Rizzo 		if (error) {
2320*2ff91c17SVincenzo Maffione 			return error;
2321ce3ee1e7SLuigi Rizzo 		}
2322ce3ee1e7SLuigi Rizzo 
2323*2ff91c17SVincenzo Maffione 		/* Sanitize hdr->nr_name. */
2324*2ff91c17SVincenzo Maffione 		hdr->nr_name[sizeof(hdr->nr_name) - 1] = '\0';
232568b8534bSLuigi Rizzo 
2326*2ff91c17SVincenzo Maffione 		switch (hdr->nr_reqtype) {
2327*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_REGISTER: {
2328*2ff91c17SVincenzo Maffione 			struct nmreq_register *req =
2329*2ff91c17SVincenzo Maffione 				(struct nmreq_register *)hdr->nr_body;
2330*2ff91c17SVincenzo Maffione 			/* Protect access to priv from concurrent requests. */
2331ce3ee1e7SLuigi Rizzo 			NMG_LOCK();
2332ce3ee1e7SLuigi Rizzo 			do {
2333ce3ee1e7SLuigi Rizzo 				u_int memflags;
2334*2ff91c17SVincenzo Maffione #ifdef WITH_EXTMEM
2335*2ff91c17SVincenzo Maffione 				struct nmreq_option *opt;
2336*2ff91c17SVincenzo Maffione #endif /* WITH_EXTMEM */
2337ce3ee1e7SLuigi Rizzo 
2338847bf383SLuigi Rizzo 				if (priv->np_nifp != NULL) {	/* thread already registered */
2339f0ea3689SLuigi Rizzo 					error = EBUSY;
2340506cc70cSLuigi Rizzo 					break;
2341506cc70cSLuigi Rizzo 				}
2342c3e9b4dbSLuiz Otavio O Souza 
2343*2ff91c17SVincenzo Maffione #ifdef WITH_EXTMEM
2344*2ff91c17SVincenzo Maffione 				opt = nmreq_findoption((struct nmreq_option *)hdr->nr_options,
2345*2ff91c17SVincenzo Maffione 						NETMAP_REQ_OPT_EXTMEM);
2346*2ff91c17SVincenzo Maffione 				if (opt != NULL) {
2347*2ff91c17SVincenzo Maffione 					struct nmreq_opt_extmem *e =
2348*2ff91c17SVincenzo Maffione 						(struct nmreq_opt_extmem *)opt;
2349*2ff91c17SVincenzo Maffione 
2350*2ff91c17SVincenzo Maffione 					error = nmreq_checkduplicate(opt);
2351*2ff91c17SVincenzo Maffione 					if (error) {
2352*2ff91c17SVincenzo Maffione 						opt->nro_status = error;
2353*2ff91c17SVincenzo Maffione 						break;
2354*2ff91c17SVincenzo Maffione 					}
2355*2ff91c17SVincenzo Maffione 					nmd = netmap_mem_ext_create(e->nro_usrptr,
2356*2ff91c17SVincenzo Maffione 							&e->nro_info, &error);
2357*2ff91c17SVincenzo Maffione 					opt->nro_status = error;
2358*2ff91c17SVincenzo Maffione 					if (nmd == NULL)
2359*2ff91c17SVincenzo Maffione 						break;
2360*2ff91c17SVincenzo Maffione 				}
2361*2ff91c17SVincenzo Maffione #endif /* WITH_EXTMEM */
2362*2ff91c17SVincenzo Maffione 
2363*2ff91c17SVincenzo Maffione 				if (nmd == NULL && req->nr_mem_id) {
2364c3e9b4dbSLuiz Otavio O Souza 					/* find the allocator and get a reference */
2365*2ff91c17SVincenzo Maffione 					nmd = netmap_mem_find(req->nr_mem_id);
2366c3e9b4dbSLuiz Otavio O Souza 					if (nmd == NULL) {
2367c3e9b4dbSLuiz Otavio O Souza 						error = EINVAL;
2368c3e9b4dbSLuiz Otavio O Souza 						break;
2369c3e9b4dbSLuiz Otavio O Souza 					}
2370c3e9b4dbSLuiz Otavio O Souza 				}
237168b8534bSLuigi Rizzo 				/* find the interface and a reference */
2372*2ff91c17SVincenzo Maffione 				error = netmap_get_na(hdr, &na, &ifp, nmd,
237337e3a6d3SLuigi Rizzo 						      1 /* create */); /* keep reference */
237468b8534bSLuigi Rizzo 				if (error)
2375ce3ee1e7SLuigi Rizzo 					break;
2376f9790aebSLuigi Rizzo 				if (NETMAP_OWNED_BY_KERN(na)) {
2377ce3ee1e7SLuigi Rizzo 					error = EBUSY;
2378ce3ee1e7SLuigi Rizzo 					break;
2379f196ce38SLuigi Rizzo 				}
238037e3a6d3SLuigi Rizzo 
2381*2ff91c17SVincenzo Maffione 				if (na->virt_hdr_len && !(req->nr_flags & NR_ACCEPT_VNET_HDR)) {
238237e3a6d3SLuigi Rizzo 					error = EIO;
238337e3a6d3SLuigi Rizzo 					break;
238437e3a6d3SLuigi Rizzo 				}
238537e3a6d3SLuigi Rizzo 
2386*2ff91c17SVincenzo Maffione 				error = netmap_do_regif(priv, na, req->nr_mode,
2387*2ff91c17SVincenzo Maffione 							req->nr_ringid, req->nr_flags);
2388847bf383SLuigi Rizzo 				if (error) {    /* reg. failed, release priv and ref */
2389ce3ee1e7SLuigi Rizzo 					break;
239068b8534bSLuigi Rizzo 				}
2391847bf383SLuigi Rizzo 				nifp = priv->np_nifp;
2392*2ff91c17SVincenzo Maffione 				priv->np_td = td; /* for debugging purposes */
239368b8534bSLuigi Rizzo 
239468b8534bSLuigi Rizzo 				/* return the offset of the netmap_if object */
2395*2ff91c17SVincenzo Maffione 				req->nr_rx_rings = na->num_rx_rings;
2396*2ff91c17SVincenzo Maffione 				req->nr_tx_rings = na->num_tx_rings;
2397*2ff91c17SVincenzo Maffione 				req->nr_rx_slots = na->num_rx_desc;
2398*2ff91c17SVincenzo Maffione 				req->nr_tx_slots = na->num_tx_desc;
2399*2ff91c17SVincenzo Maffione 				error = netmap_mem_get_info(na->nm_mem, &req->nr_memsize, &memflags,
2400*2ff91c17SVincenzo Maffione 					&req->nr_mem_id);
2401ce3ee1e7SLuigi Rizzo 				if (error) {
2402847bf383SLuigi Rizzo 					netmap_do_unregif(priv);
2403ce3ee1e7SLuigi Rizzo 					break;
2404ce3ee1e7SLuigi Rizzo 				}
2405ce3ee1e7SLuigi Rizzo 				if (memflags & NETMAP_MEM_PRIVATE) {
24063d819cb6SLuigi Rizzo 					*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
2407ce3ee1e7SLuigi Rizzo 				}
2408847bf383SLuigi Rizzo 				for_rx_tx(t) {
2409847bf383SLuigi Rizzo 					priv->np_si[t] = nm_si_user(priv, t) ?
2410*2ff91c17SVincenzo Maffione 						&na->si[t] : &NMR(na, t)[priv->np_qfirst[t]]->si;
2411847bf383SLuigi Rizzo 				}
2412f0ea3689SLuigi Rizzo 
2413*2ff91c17SVincenzo Maffione 				if (req->nr_extra_bufs) {
241437e3a6d3SLuigi Rizzo 					if (netmap_verbose)
2415*2ff91c17SVincenzo Maffione 						D("requested %d extra buffers",
2416*2ff91c17SVincenzo Maffione 							req->nr_extra_bufs);
2417*2ff91c17SVincenzo Maffione 					req->nr_extra_bufs = netmap_extra_alloc(na,
2418*2ff91c17SVincenzo Maffione 						&nifp->ni_bufs_head, req->nr_extra_bufs);
241937e3a6d3SLuigi Rizzo 					if (netmap_verbose)
2420*2ff91c17SVincenzo Maffione 						D("got %d extra buffers", req->nr_extra_bufs);
2421f0ea3689SLuigi Rizzo 				}
2422*2ff91c17SVincenzo Maffione 				req->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
2423*2ff91c17SVincenzo Maffione 
2424*2ff91c17SVincenzo Maffione 				error = nmreq_checkoptions(hdr);
2425*2ff91c17SVincenzo Maffione 				if (error) {
2426*2ff91c17SVincenzo Maffione 					netmap_do_unregif(priv);
2427*2ff91c17SVincenzo Maffione 					break;
2428*2ff91c17SVincenzo Maffione 				}
242937e3a6d3SLuigi Rizzo 
243037e3a6d3SLuigi Rizzo 				/* store ifp reference so that priv destructor may release it */
243137e3a6d3SLuigi Rizzo 				priv->np_ifp = ifp;
2432ce3ee1e7SLuigi Rizzo 			} while (0);
2433c3e9b4dbSLuiz Otavio O Souza 			if (error) {
2434c3e9b4dbSLuiz Otavio O Souza 				netmap_unget_na(na, ifp);
2435c3e9b4dbSLuiz Otavio O Souza 			}
2436c3e9b4dbSLuiz Otavio O Souza 			/* release the reference from netmap_mem_find() or
2437c3e9b4dbSLuiz Otavio O Souza 			 * netmap_mem_ext_create()
2438c3e9b4dbSLuiz Otavio O Souza 			 */
2439c3e9b4dbSLuiz Otavio O Souza 			if (nmd)
2440c3e9b4dbSLuiz Otavio O Souza 				netmap_mem_put(nmd);
2441ce3ee1e7SLuigi Rizzo 			NMG_UNLOCK();
244268b8534bSLuigi Rizzo 			break;
2443*2ff91c17SVincenzo Maffione 		}
2444*2ff91c17SVincenzo Maffione 
2445*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_PORT_INFO_GET: {
2446*2ff91c17SVincenzo Maffione 			struct nmreq_port_info_get *req =
2447*2ff91c17SVincenzo Maffione 				(struct nmreq_port_info_get *)hdr->nr_body;
2448*2ff91c17SVincenzo Maffione 
2449*2ff91c17SVincenzo Maffione 			NMG_LOCK();
2450*2ff91c17SVincenzo Maffione 			do {
2451*2ff91c17SVincenzo Maffione 				u_int memflags;
2452*2ff91c17SVincenzo Maffione 
2453*2ff91c17SVincenzo Maffione 				if (hdr->nr_name[0] != '\0') {
2454*2ff91c17SVincenzo Maffione 					/* Build a nmreq_register out of the nmreq_port_info_get,
2455*2ff91c17SVincenzo Maffione 					 * so that we can call netmap_get_na(). */
2456*2ff91c17SVincenzo Maffione 					struct nmreq_register regreq;
2457*2ff91c17SVincenzo Maffione 					bzero(&regreq, sizeof(regreq));
2458*2ff91c17SVincenzo Maffione 					regreq.nr_tx_slots = req->nr_tx_slots;
2459*2ff91c17SVincenzo Maffione 					regreq.nr_rx_slots = req->nr_rx_slots;
2460*2ff91c17SVincenzo Maffione 					regreq.nr_tx_rings = req->nr_tx_rings;
2461*2ff91c17SVincenzo Maffione 					regreq.nr_rx_rings = req->nr_rx_rings;
2462*2ff91c17SVincenzo Maffione 					regreq.nr_mem_id = req->nr_mem_id;
2463*2ff91c17SVincenzo Maffione 
2464*2ff91c17SVincenzo Maffione 					/* get a refcount */
2465*2ff91c17SVincenzo Maffione 					hdr->nr_reqtype = NETMAP_REQ_REGISTER;
2466*2ff91c17SVincenzo Maffione 					hdr->nr_body = (uint64_t)&regreq;
2467*2ff91c17SVincenzo Maffione 					error = netmap_get_na(hdr, &na, &ifp, NULL, 1 /* create */);
2468*2ff91c17SVincenzo Maffione 					hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET; /* reset type */
2469*2ff91c17SVincenzo Maffione 					hdr->nr_body = (uint64_t)req; /* reset nr_body */
2470*2ff91c17SVincenzo Maffione 					if (error) {
2471*2ff91c17SVincenzo Maffione 						na = NULL;
2472*2ff91c17SVincenzo Maffione 						ifp = NULL;
2473*2ff91c17SVincenzo Maffione 						break;
2474*2ff91c17SVincenzo Maffione 					}
2475*2ff91c17SVincenzo Maffione 					nmd = na->nm_mem; /* get memory allocator */
2476*2ff91c17SVincenzo Maffione 				} else {
2477*2ff91c17SVincenzo Maffione 					nmd = netmap_mem_find(req->nr_mem_id ? req->nr_mem_id : 1);
2478*2ff91c17SVincenzo Maffione 					if (nmd == NULL) {
2479*2ff91c17SVincenzo Maffione 						error = EINVAL;
2480*2ff91c17SVincenzo Maffione 						break;
2481*2ff91c17SVincenzo Maffione 					}
2482*2ff91c17SVincenzo Maffione 				}
2483*2ff91c17SVincenzo Maffione 
2484*2ff91c17SVincenzo Maffione 				error = netmap_mem_get_info(nmd, &req->nr_memsize, &memflags,
2485*2ff91c17SVincenzo Maffione 					&req->nr_mem_id);
2486*2ff91c17SVincenzo Maffione 				if (error)
2487*2ff91c17SVincenzo Maffione 					break;
2488*2ff91c17SVincenzo Maffione 				if (na == NULL) /* only memory info */
2489*2ff91c17SVincenzo Maffione 					break;
2490*2ff91c17SVincenzo Maffione 				req->nr_offset = 0;
2491*2ff91c17SVincenzo Maffione 				req->nr_rx_slots = req->nr_tx_slots = 0;
2492*2ff91c17SVincenzo Maffione 				netmap_update_config(na);
2493*2ff91c17SVincenzo Maffione 				req->nr_rx_rings = na->num_rx_rings;
2494*2ff91c17SVincenzo Maffione 				req->nr_tx_rings = na->num_tx_rings;
2495*2ff91c17SVincenzo Maffione 				req->nr_rx_slots = na->num_rx_desc;
2496*2ff91c17SVincenzo Maffione 				req->nr_tx_slots = na->num_tx_desc;
2497*2ff91c17SVincenzo Maffione 			} while (0);
2498*2ff91c17SVincenzo Maffione 			netmap_unget_na(na, ifp);
2499*2ff91c17SVincenzo Maffione 			NMG_UNLOCK();
2500*2ff91c17SVincenzo Maffione 			break;
2501*2ff91c17SVincenzo Maffione 		}
2502*2ff91c17SVincenzo Maffione #ifdef WITH_VALE
2503*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_ATTACH: {
2504*2ff91c17SVincenzo Maffione 			error = nm_bdg_ctl_attach(hdr, NULL /* userspace request */);
2505*2ff91c17SVincenzo Maffione 			break;
2506*2ff91c17SVincenzo Maffione 		}
2507*2ff91c17SVincenzo Maffione 
2508*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_DETACH: {
2509*2ff91c17SVincenzo Maffione 			error = nm_bdg_ctl_detach(hdr, NULL /* userspace request */);
2510*2ff91c17SVincenzo Maffione 			break;
2511*2ff91c17SVincenzo Maffione 		}
2512*2ff91c17SVincenzo Maffione 
2513*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_LIST: {
2514*2ff91c17SVincenzo Maffione 			error = netmap_bdg_list(hdr);
2515*2ff91c17SVincenzo Maffione 			break;
2516*2ff91c17SVincenzo Maffione 		}
2517*2ff91c17SVincenzo Maffione 
2518*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_PORT_HDR_SET: {
2519*2ff91c17SVincenzo Maffione 			struct nmreq_port_hdr *req =
2520*2ff91c17SVincenzo Maffione 				(struct nmreq_port_hdr *)hdr->nr_body;
2521*2ff91c17SVincenzo Maffione 			/* Build a nmreq_register out of the nmreq_port_hdr,
2522*2ff91c17SVincenzo Maffione 			 * so that we can call netmap_get_bdg_na(). */
2523*2ff91c17SVincenzo Maffione 			struct nmreq_register regreq;
2524*2ff91c17SVincenzo Maffione 			bzero(&regreq, sizeof(regreq));
2525*2ff91c17SVincenzo Maffione 			/* For now we only support virtio-net headers, and only for
2526*2ff91c17SVincenzo Maffione 			 * VALE ports, but this may change in future. Valid lengths
2527*2ff91c17SVincenzo Maffione 			 * for the virtio-net header are 0 (no header), 10 and 12. */
2528*2ff91c17SVincenzo Maffione 			if (req->nr_hdr_len != 0 &&
2529*2ff91c17SVincenzo Maffione 				req->nr_hdr_len != sizeof(struct nm_vnet_hdr) &&
2530*2ff91c17SVincenzo Maffione 					req->nr_hdr_len != 12) {
2531*2ff91c17SVincenzo Maffione 				error = EINVAL;
2532*2ff91c17SVincenzo Maffione 				break;
2533*2ff91c17SVincenzo Maffione 			}
2534*2ff91c17SVincenzo Maffione 			NMG_LOCK();
2535*2ff91c17SVincenzo Maffione 			hdr->nr_reqtype = NETMAP_REQ_REGISTER;
2536*2ff91c17SVincenzo Maffione 			hdr->nr_body = (uint64_t)&regreq;
2537*2ff91c17SVincenzo Maffione 			error = netmap_get_bdg_na(hdr, &na, NULL, 0);
2538*2ff91c17SVincenzo Maffione 			hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET;
2539*2ff91c17SVincenzo Maffione 			hdr->nr_body = (uint64_t)req;
2540*2ff91c17SVincenzo Maffione 			if (na && !error) {
2541*2ff91c17SVincenzo Maffione 				struct netmap_vp_adapter *vpna =
2542*2ff91c17SVincenzo Maffione 					(struct netmap_vp_adapter *)na;
2543*2ff91c17SVincenzo Maffione 				na->virt_hdr_len = req->nr_hdr_len;
2544*2ff91c17SVincenzo Maffione 				if (na->virt_hdr_len) {
2545*2ff91c17SVincenzo Maffione 					vpna->mfs = NETMAP_BUF_SIZE(na);
2546*2ff91c17SVincenzo Maffione 				}
2547*2ff91c17SVincenzo Maffione 				D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
2548*2ff91c17SVincenzo Maffione 				netmap_adapter_put(na);
2549*2ff91c17SVincenzo Maffione 			} else if (!na) {
2550*2ff91c17SVincenzo Maffione 				error = ENXIO;
2551*2ff91c17SVincenzo Maffione 			}
2552*2ff91c17SVincenzo Maffione 			NMG_UNLOCK();
2553*2ff91c17SVincenzo Maffione 			break;
2554*2ff91c17SVincenzo Maffione 		}
2555*2ff91c17SVincenzo Maffione 
2556*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_PORT_HDR_GET: {
2557*2ff91c17SVincenzo Maffione 			/* Get vnet-header length for this netmap port */
2558*2ff91c17SVincenzo Maffione 			struct nmreq_port_hdr *req =
2559*2ff91c17SVincenzo Maffione 				(struct nmreq_port_hdr *)hdr->nr_body;
2560*2ff91c17SVincenzo Maffione 			/* Build a nmreq_register out of the nmreq_port_hdr,
2561*2ff91c17SVincenzo Maffione 			 * so that we can call netmap_get_bdg_na(). */
2562*2ff91c17SVincenzo Maffione 			struct nmreq_register regreq;
2563*2ff91c17SVincenzo Maffione 			struct ifnet *ifp;
2564*2ff91c17SVincenzo Maffione 
2565*2ff91c17SVincenzo Maffione 			bzero(&regreq, sizeof(regreq));
2566*2ff91c17SVincenzo Maffione 			NMG_LOCK();
2567*2ff91c17SVincenzo Maffione 			hdr->nr_reqtype = NETMAP_REQ_REGISTER;
2568*2ff91c17SVincenzo Maffione 			hdr->nr_body = (uint64_t)&regreq;
2569*2ff91c17SVincenzo Maffione 			error = netmap_get_na(hdr, &na, &ifp, NULL, 0);
2570*2ff91c17SVincenzo Maffione 			hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_GET;
2571*2ff91c17SVincenzo Maffione 			hdr->nr_body = (uint64_t)req;
2572*2ff91c17SVincenzo Maffione 			if (na && !error) {
2573*2ff91c17SVincenzo Maffione 				req->nr_hdr_len = na->virt_hdr_len;
2574*2ff91c17SVincenzo Maffione 			}
2575*2ff91c17SVincenzo Maffione 			netmap_unget_na(na, ifp);
2576*2ff91c17SVincenzo Maffione 			NMG_UNLOCK();
2577*2ff91c17SVincenzo Maffione 			break;
2578*2ff91c17SVincenzo Maffione 		}
2579*2ff91c17SVincenzo Maffione 
2580*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_NEWIF: {
2581*2ff91c17SVincenzo Maffione 			error = nm_vi_create(hdr);
2582*2ff91c17SVincenzo Maffione 			break;
2583*2ff91c17SVincenzo Maffione 		}
2584*2ff91c17SVincenzo Maffione 
2585*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_DELIF: {
2586*2ff91c17SVincenzo Maffione 			error = nm_vi_destroy(hdr->nr_name);
2587*2ff91c17SVincenzo Maffione 			break;
2588*2ff91c17SVincenzo Maffione 		}
2589*2ff91c17SVincenzo Maffione 
2590*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_POLLING_ENABLE:
2591*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_POLLING_DISABLE: {
2592*2ff91c17SVincenzo Maffione 			error = nm_bdg_polling(hdr);
2593*2ff91c17SVincenzo Maffione 			break;
2594*2ff91c17SVincenzo Maffione 		}
2595*2ff91c17SVincenzo Maffione #endif  /* WITH_VALE */
2596*2ff91c17SVincenzo Maffione 		case NETMAP_REQ_POOLS_INFO_GET: {
2597*2ff91c17SVincenzo Maffione 			struct nmreq_pools_info *req =
2598*2ff91c17SVincenzo Maffione 				(struct nmreq_pools_info *)hdr->nr_body;
2599*2ff91c17SVincenzo Maffione 			/* Get information from the memory allocator. This
2600*2ff91c17SVincenzo Maffione 			 * netmap device must already be bound to a port.
2601*2ff91c17SVincenzo Maffione 			 * Note that hdr->nr_name is ignored. */
2602*2ff91c17SVincenzo Maffione 			NMG_LOCK();
2603*2ff91c17SVincenzo Maffione 			if (priv->np_na && priv->np_na->nm_mem) {
2604*2ff91c17SVincenzo Maffione 				struct netmap_mem_d *nmd = priv->np_na->nm_mem;
2605*2ff91c17SVincenzo Maffione 				error = netmap_mem_pools_info_get(req, nmd);
2606*2ff91c17SVincenzo Maffione 			} else {
2607*2ff91c17SVincenzo Maffione 				error = EINVAL;
2608*2ff91c17SVincenzo Maffione 			}
2609*2ff91c17SVincenzo Maffione 			NMG_UNLOCK();
2610*2ff91c17SVincenzo Maffione 			break;
2611*2ff91c17SVincenzo Maffione 		}
2612*2ff91c17SVincenzo Maffione 
2613*2ff91c17SVincenzo Maffione 		default: {
2614*2ff91c17SVincenzo Maffione 			error = EINVAL;
2615*2ff91c17SVincenzo Maffione 			break;
2616*2ff91c17SVincenzo Maffione 		}
2617*2ff91c17SVincenzo Maffione 		}
2618*2ff91c17SVincenzo Maffione 		/* Write back request body to userspace and reset the
2619*2ff91c17SVincenzo Maffione 		 * user-space pointer. */
2620*2ff91c17SVincenzo Maffione 		error = nmreq_copyout(hdr, error);
2621*2ff91c17SVincenzo Maffione 		break;
2622*2ff91c17SVincenzo Maffione 	}
262368b8534bSLuigi Rizzo 
262468b8534bSLuigi Rizzo 	case NIOCTXSYNC:
2625*2ff91c17SVincenzo Maffione 	case NIOCRXSYNC: {
26268241616dSLuigi Rizzo 		nifp = priv->np_nifp;
26278241616dSLuigi Rizzo 
26288241616dSLuigi Rizzo 		if (nifp == NULL) {
2629506cc70cSLuigi Rizzo 			error = ENXIO;
2630506cc70cSLuigi Rizzo 			break;
2631506cc70cSLuigi Rizzo 		}
26326641c68bSLuigi Rizzo 		mb(); /* make sure following reads are not from cache */
26338241616dSLuigi Rizzo 
2634f9790aebSLuigi Rizzo 		na = priv->np_na;      /* we have a reference */
26358241616dSLuigi Rizzo 
2636f9790aebSLuigi Rizzo 		if (na == NULL) {
2637f9790aebSLuigi Rizzo 			D("Internal error: nifp != NULL && na == NULL");
26388241616dSLuigi Rizzo 			error = ENXIO;
26398241616dSLuigi Rizzo 			break;
26408241616dSLuigi Rizzo 		}
26418241616dSLuigi Rizzo 
2642c3e9b4dbSLuiz Otavio O Souza 		mbq_init(&q);
2643847bf383SLuigi Rizzo 		t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
2644847bf383SLuigi Rizzo 		krings = NMR(na, t);
2645847bf383SLuigi Rizzo 		qfirst = priv->np_qfirst[t];
2646847bf383SLuigi Rizzo 		qlast = priv->np_qlast[t];
2647c3e9b4dbSLuiz Otavio O Souza 		sync_flags = priv->np_sync_flags;
264868b8534bSLuigi Rizzo 
2649f0ea3689SLuigi Rizzo 		for (i = qfirst; i < qlast; i++) {
2650*2ff91c17SVincenzo Maffione 			struct netmap_kring *kring = krings[i];
265137e3a6d3SLuigi Rizzo 			struct netmap_ring *ring = kring->ring;
265237e3a6d3SLuigi Rizzo 
265337e3a6d3SLuigi Rizzo 			if (unlikely(nm_kr_tryget(kring, 1, &error))) {
265437e3a6d3SLuigi Rizzo 				error = (error ? EIO : 0);
265537e3a6d3SLuigi Rizzo 				continue;
2656ce3ee1e7SLuigi Rizzo 			}
265737e3a6d3SLuigi Rizzo 
265868b8534bSLuigi Rizzo 			if (cmd == NIOCTXSYNC) {
265968b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
26603c0caf6cSLuigi Rizzo 					D("pre txsync ring %d cur %d hwcur %d",
266137e3a6d3SLuigi Rizzo 					    i, ring->cur,
266268b8534bSLuigi Rizzo 					    kring->nr_hwcur);
266337e3a6d3SLuigi Rizzo 				if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
266417885a7bSLuigi Rizzo 					netmap_ring_reinit(kring);
2665c3e9b4dbSLuiz Otavio O Souza 				} else if (kring->nm_sync(kring, sync_flags | NAF_FORCE_RECLAIM) == 0) {
266637e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
266717885a7bSLuigi Rizzo 				}
266868b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
26693c0caf6cSLuigi Rizzo 					D("post txsync ring %d cur %d hwcur %d",
267037e3a6d3SLuigi Rizzo 					    i, ring->cur,
267168b8534bSLuigi Rizzo 					    kring->nr_hwcur);
267268b8534bSLuigi Rizzo 			} else {
267337e3a6d3SLuigi Rizzo 				if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
2674847bf383SLuigi Rizzo 					netmap_ring_reinit(kring);
2675c3e9b4dbSLuiz Otavio O Souza 				}
2676c3e9b4dbSLuiz Otavio O Souza 				if (nm_may_forward_up(kring)) {
2677c3e9b4dbSLuiz Otavio O Souza 					/* transparent forwarding, see netmap_poll() */
2678c3e9b4dbSLuiz Otavio O Souza 					netmap_grab_packets(kring, &q, netmap_fwd);
2679c3e9b4dbSLuiz Otavio O Souza 				}
2680c3e9b4dbSLuiz Otavio O Souza 				if (kring->nm_sync(kring, sync_flags | NAF_FORCE_READ) == 0) {
268137e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
2682847bf383SLuigi Rizzo 				}
2683c3e9b4dbSLuiz Otavio O Souza 				ring_timestamp_set(ring);
268468b8534bSLuigi Rizzo 			}
2685ce3ee1e7SLuigi Rizzo 			nm_kr_put(kring);
268668b8534bSLuigi Rizzo 		}
268768b8534bSLuigi Rizzo 
2688c3e9b4dbSLuiz Otavio O Souza 		if (mbq_peek(&q)) {
2689c3e9b4dbSLuiz Otavio O Souza 			netmap_send_up(na->ifp, &q);
2690c3e9b4dbSLuiz Otavio O Souza 		}
2691c3e9b4dbSLuiz Otavio O Souza 
269268b8534bSLuigi Rizzo 		break;
269368b8534bSLuigi Rizzo 	}
2694f196ce38SLuigi Rizzo 
2695*2ff91c17SVincenzo Maffione 	default: {
2696*2ff91c17SVincenzo Maffione 		return netmap_ioctl_legacy(priv, cmd, data, td);
2697*2ff91c17SVincenzo Maffione 		break;
2698*2ff91c17SVincenzo Maffione 	}
269968b8534bSLuigi Rizzo 	}
270068b8534bSLuigi Rizzo 
270168b8534bSLuigi Rizzo 	return (error);
270268b8534bSLuigi Rizzo }
270368b8534bSLuigi Rizzo 
2704*2ff91c17SVincenzo Maffione size_t
2705*2ff91c17SVincenzo Maffione nmreq_size_by_type(uint16_t nr_reqtype)
2706*2ff91c17SVincenzo Maffione {
2707*2ff91c17SVincenzo Maffione 	switch (nr_reqtype) {
2708*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_REGISTER:
2709*2ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_register);
2710*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_PORT_INFO_GET:
2711*2ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_port_info_get);
2712*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_ATTACH:
2713*2ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_vale_attach);
2714*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_DETACH:
2715*2ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_vale_detach);
2716*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_LIST:
2717*2ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_vale_list);
2718*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_PORT_HDR_SET:
2719*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_PORT_HDR_GET:
2720*2ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_port_hdr);
2721*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_NEWIF:
2722*2ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_vale_newif);
2723*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_DELIF:
2724*2ff91c17SVincenzo Maffione 		return 0;
2725*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_POLLING_ENABLE:
2726*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_POLLING_DISABLE:
2727*2ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_vale_polling);
2728*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_POOLS_INFO_GET:
2729*2ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_pools_info);
2730*2ff91c17SVincenzo Maffione 	}
2731*2ff91c17SVincenzo Maffione 	return 0;
2732*2ff91c17SVincenzo Maffione }
2733*2ff91c17SVincenzo Maffione 
2734*2ff91c17SVincenzo Maffione static size_t
2735*2ff91c17SVincenzo Maffione nmreq_opt_size_by_type(uint16_t nro_reqtype)
2736*2ff91c17SVincenzo Maffione {
2737*2ff91c17SVincenzo Maffione 	size_t rv = sizeof(struct nmreq_option);
2738*2ff91c17SVincenzo Maffione #ifdef NETMAP_REQ_OPT_DEBUG
2739*2ff91c17SVincenzo Maffione 	if (nro_reqtype & NETMAP_REQ_OPT_DEBUG)
2740*2ff91c17SVincenzo Maffione 		return (nro_reqtype & ~NETMAP_REQ_OPT_DEBUG);
2741*2ff91c17SVincenzo Maffione #endif /* NETMAP_REQ_OPT_DEBUG */
2742*2ff91c17SVincenzo Maffione 	switch (nro_reqtype) {
2743*2ff91c17SVincenzo Maffione #ifdef WITH_EXTMEM
2744*2ff91c17SVincenzo Maffione 	case NETMAP_REQ_OPT_EXTMEM:
2745*2ff91c17SVincenzo Maffione 		rv = sizeof(struct nmreq_opt_extmem);
2746*2ff91c17SVincenzo Maffione 		break;
2747*2ff91c17SVincenzo Maffione #endif /* WITH_EXTMEM */
2748*2ff91c17SVincenzo Maffione 	}
2749*2ff91c17SVincenzo Maffione 	/* subtract the common header */
2750*2ff91c17SVincenzo Maffione 	return rv - sizeof(struct nmreq_option);
2751*2ff91c17SVincenzo Maffione }
2752*2ff91c17SVincenzo Maffione 
2753*2ff91c17SVincenzo Maffione int
2754*2ff91c17SVincenzo Maffione nmreq_copyin(struct nmreq_header *hdr, int nr_body_is_user)
2755*2ff91c17SVincenzo Maffione {
2756*2ff91c17SVincenzo Maffione 	size_t rqsz, optsz, bufsz;
2757*2ff91c17SVincenzo Maffione 	int error;
2758*2ff91c17SVincenzo Maffione 	char *ker = NULL, *p;
2759*2ff91c17SVincenzo Maffione 	struct nmreq_option **next, *src;
2760*2ff91c17SVincenzo Maffione 	struct nmreq_option buf;
2761*2ff91c17SVincenzo Maffione 	uint64_t *ptrs;
2762*2ff91c17SVincenzo Maffione 
2763*2ff91c17SVincenzo Maffione 	if (hdr->nr_reserved)
2764*2ff91c17SVincenzo Maffione 		return EINVAL;
2765*2ff91c17SVincenzo Maffione 
2766*2ff91c17SVincenzo Maffione 	if (!nr_body_is_user)
2767*2ff91c17SVincenzo Maffione 		return 0;
2768*2ff91c17SVincenzo Maffione 
2769*2ff91c17SVincenzo Maffione 	hdr->nr_reserved = nr_body_is_user;
2770*2ff91c17SVincenzo Maffione 
2771*2ff91c17SVincenzo Maffione 	/* compute the total size of the buffer */
2772*2ff91c17SVincenzo Maffione 	rqsz = nmreq_size_by_type(hdr->nr_reqtype);
2773*2ff91c17SVincenzo Maffione 	if (rqsz > NETMAP_REQ_MAXSIZE) {
2774*2ff91c17SVincenzo Maffione 		error = EMSGSIZE;
2775*2ff91c17SVincenzo Maffione 		goto out_err;
2776*2ff91c17SVincenzo Maffione 	}
2777*2ff91c17SVincenzo Maffione 	if ((rqsz && hdr->nr_body == (uint64_t)NULL) ||
2778*2ff91c17SVincenzo Maffione 		(!rqsz && hdr->nr_body != (uint64_t)NULL)) {
2779*2ff91c17SVincenzo Maffione 		/* Request body expected, but not found; or
2780*2ff91c17SVincenzo Maffione 		 * request body found but unexpected. */
2781*2ff91c17SVincenzo Maffione 		error = EINVAL;
2782*2ff91c17SVincenzo Maffione 		goto out_err;
2783*2ff91c17SVincenzo Maffione 	}
2784*2ff91c17SVincenzo Maffione 
2785*2ff91c17SVincenzo Maffione 	bufsz = 2 * sizeof(void *) + rqsz;
2786*2ff91c17SVincenzo Maffione 	optsz = 0;
2787*2ff91c17SVincenzo Maffione 	for (src = (struct nmreq_option *)hdr->nr_options; src;
2788*2ff91c17SVincenzo Maffione 	     src = (struct nmreq_option *)buf.nro_next)
2789*2ff91c17SVincenzo Maffione 	{
2790*2ff91c17SVincenzo Maffione 		error = copyin(src, &buf, sizeof(*src));
2791*2ff91c17SVincenzo Maffione 		if (error)
2792*2ff91c17SVincenzo Maffione 			goto out_err;
2793*2ff91c17SVincenzo Maffione 		optsz += sizeof(*src);
2794*2ff91c17SVincenzo Maffione 		optsz += nmreq_opt_size_by_type(buf.nro_reqtype);
2795*2ff91c17SVincenzo Maffione 		if (rqsz + optsz > NETMAP_REQ_MAXSIZE) {
2796*2ff91c17SVincenzo Maffione 			error = EMSGSIZE;
2797*2ff91c17SVincenzo Maffione 			goto out_err;
2798*2ff91c17SVincenzo Maffione 		}
2799*2ff91c17SVincenzo Maffione 		bufsz += optsz + sizeof(void *);
2800*2ff91c17SVincenzo Maffione 	}
2801*2ff91c17SVincenzo Maffione 
2802*2ff91c17SVincenzo Maffione 	ker = nm_os_malloc(bufsz);
2803*2ff91c17SVincenzo Maffione 	if (ker == NULL) {
2804*2ff91c17SVincenzo Maffione 		error = ENOMEM;
2805*2ff91c17SVincenzo Maffione 		goto out_err;
2806*2ff91c17SVincenzo Maffione 	}
2807*2ff91c17SVincenzo Maffione 	p = ker;
2808*2ff91c17SVincenzo Maffione 
2809*2ff91c17SVincenzo Maffione 	/* make a copy of the user pointers */
2810*2ff91c17SVincenzo Maffione 	ptrs = (uint64_t*)p;
2811*2ff91c17SVincenzo Maffione 	*ptrs++ = hdr->nr_body;
2812*2ff91c17SVincenzo Maffione 	*ptrs++ = hdr->nr_options;
2813*2ff91c17SVincenzo Maffione 	p = (char *)ptrs;
2814*2ff91c17SVincenzo Maffione 
2815*2ff91c17SVincenzo Maffione 	/* copy the body */
2816*2ff91c17SVincenzo Maffione 	error = copyin((void *)hdr->nr_body, p, rqsz);
2817*2ff91c17SVincenzo Maffione 	if (error)
2818*2ff91c17SVincenzo Maffione 		goto out_restore;
2819*2ff91c17SVincenzo Maffione 	/* overwrite the user pointer with the in-kernel one */
2820*2ff91c17SVincenzo Maffione 	hdr->nr_body = (uint64_t)p;
2821*2ff91c17SVincenzo Maffione 	p += rqsz;
2822*2ff91c17SVincenzo Maffione 
2823*2ff91c17SVincenzo Maffione 	/* copy the options */
2824*2ff91c17SVincenzo Maffione 	next = (struct nmreq_option **)&hdr->nr_options;
2825*2ff91c17SVincenzo Maffione 	src = *next;
2826*2ff91c17SVincenzo Maffione 	while (src) {
2827*2ff91c17SVincenzo Maffione 		struct nmreq_option *opt;
2828*2ff91c17SVincenzo Maffione 
2829*2ff91c17SVincenzo Maffione 		/* copy the option header */
2830*2ff91c17SVincenzo Maffione 		ptrs = (uint64_t *)p;
2831*2ff91c17SVincenzo Maffione 		opt = (struct nmreq_option *)(ptrs + 1);
2832*2ff91c17SVincenzo Maffione 		error = copyin(src, opt, sizeof(*src));
2833*2ff91c17SVincenzo Maffione 		if (error)
2834*2ff91c17SVincenzo Maffione 			goto out_restore;
2835*2ff91c17SVincenzo Maffione 		/* make a copy of the user next pointer */
2836*2ff91c17SVincenzo Maffione 		*ptrs = opt->nro_next;
2837*2ff91c17SVincenzo Maffione 		/* overwrite the user pointer with the in-kernel one */
2838*2ff91c17SVincenzo Maffione 		*next = opt;
2839*2ff91c17SVincenzo Maffione 
2840*2ff91c17SVincenzo Maffione 		/* initialize the option as not supported.
2841*2ff91c17SVincenzo Maffione 		 * Recognized options will update this field.
2842*2ff91c17SVincenzo Maffione 		 */
2843*2ff91c17SVincenzo Maffione 		opt->nro_status = EOPNOTSUPP;
2844*2ff91c17SVincenzo Maffione 
2845*2ff91c17SVincenzo Maffione 		p = (char *)(opt + 1);
2846*2ff91c17SVincenzo Maffione 
2847*2ff91c17SVincenzo Maffione 		/* copy the option body */
2848*2ff91c17SVincenzo Maffione 		optsz = nmreq_opt_size_by_type(opt->nro_reqtype);
2849*2ff91c17SVincenzo Maffione 		if (optsz) {
2850*2ff91c17SVincenzo Maffione 			/* the option body follows the option header */
2851*2ff91c17SVincenzo Maffione 			error = copyin(src + 1, p, optsz);
2852*2ff91c17SVincenzo Maffione 			if (error)
2853*2ff91c17SVincenzo Maffione 				goto out_restore;
2854*2ff91c17SVincenzo Maffione 			p += optsz;
2855*2ff91c17SVincenzo Maffione 		}
2856*2ff91c17SVincenzo Maffione 
2857*2ff91c17SVincenzo Maffione 		/* move to next option */
2858*2ff91c17SVincenzo Maffione 		next = (struct nmreq_option **)&opt->nro_next;
2859*2ff91c17SVincenzo Maffione 		src = *next;
2860*2ff91c17SVincenzo Maffione 	}
2861*2ff91c17SVincenzo Maffione 	return 0;
2862*2ff91c17SVincenzo Maffione 
2863*2ff91c17SVincenzo Maffione out_restore:
2864*2ff91c17SVincenzo Maffione 	ptrs = (uint64_t *)ker;
2865*2ff91c17SVincenzo Maffione 	hdr->nr_body = *ptrs++;
2866*2ff91c17SVincenzo Maffione 	hdr->nr_options = *ptrs++;
2867*2ff91c17SVincenzo Maffione 	hdr->nr_reserved = 0;
2868*2ff91c17SVincenzo Maffione 	nm_os_free(ker);
2869*2ff91c17SVincenzo Maffione out_err:
2870*2ff91c17SVincenzo Maffione 	return error;
2871*2ff91c17SVincenzo Maffione }
2872*2ff91c17SVincenzo Maffione 
2873*2ff91c17SVincenzo Maffione static int
2874*2ff91c17SVincenzo Maffione nmreq_copyout(struct nmreq_header *hdr, int rerror)
2875*2ff91c17SVincenzo Maffione {
2876*2ff91c17SVincenzo Maffione 	struct nmreq_option *src, *dst;
2877*2ff91c17SVincenzo Maffione 	void *ker = (void *)hdr->nr_body, *bufstart;
2878*2ff91c17SVincenzo Maffione 	uint64_t *ptrs;
2879*2ff91c17SVincenzo Maffione 	size_t bodysz;
2880*2ff91c17SVincenzo Maffione 	int error;
2881*2ff91c17SVincenzo Maffione 
2882*2ff91c17SVincenzo Maffione 	if (!hdr->nr_reserved)
2883*2ff91c17SVincenzo Maffione 		return rerror;
2884*2ff91c17SVincenzo Maffione 
2885*2ff91c17SVincenzo Maffione 	/* restore the user pointers in the header */
2886*2ff91c17SVincenzo Maffione 	ptrs = (uint64_t *)ker - 2;
2887*2ff91c17SVincenzo Maffione 	bufstart = ptrs;
2888*2ff91c17SVincenzo Maffione 	hdr->nr_body = *ptrs++;
2889*2ff91c17SVincenzo Maffione 	src = (struct nmreq_option *)hdr->nr_options;
2890*2ff91c17SVincenzo Maffione 	hdr->nr_options = *ptrs;
2891*2ff91c17SVincenzo Maffione 
2892*2ff91c17SVincenzo Maffione 	if (!rerror) {
2893*2ff91c17SVincenzo Maffione 		/* copy the body */
2894*2ff91c17SVincenzo Maffione 		bodysz = nmreq_size_by_type(hdr->nr_reqtype);
2895*2ff91c17SVincenzo Maffione 		error = copyout(ker, (void *)hdr->nr_body, bodysz);
2896*2ff91c17SVincenzo Maffione 		if (error) {
2897*2ff91c17SVincenzo Maffione 			rerror = error;
2898*2ff91c17SVincenzo Maffione 			goto out;
2899*2ff91c17SVincenzo Maffione 		}
2900*2ff91c17SVincenzo Maffione 	}
2901*2ff91c17SVincenzo Maffione 
2902*2ff91c17SVincenzo Maffione 	/* copy the options */
2903*2ff91c17SVincenzo Maffione 	dst = (struct nmreq_option *)hdr->nr_options;
2904*2ff91c17SVincenzo Maffione 	while (src) {
2905*2ff91c17SVincenzo Maffione 		size_t optsz;
2906*2ff91c17SVincenzo Maffione 		uint64_t next;
2907*2ff91c17SVincenzo Maffione 
2908*2ff91c17SVincenzo Maffione 		/* restore the user pointer */
2909*2ff91c17SVincenzo Maffione 		next = src->nro_next;
2910*2ff91c17SVincenzo Maffione 		ptrs = (uint64_t *)src - 1;
2911*2ff91c17SVincenzo Maffione 		src->nro_next = *ptrs;
2912*2ff91c17SVincenzo Maffione 
2913*2ff91c17SVincenzo Maffione 		/* always copy the option header */
2914*2ff91c17SVincenzo Maffione 		error = copyout(src, dst, sizeof(*src));
2915*2ff91c17SVincenzo Maffione 		if (error) {
2916*2ff91c17SVincenzo Maffione 			rerror = error;
2917*2ff91c17SVincenzo Maffione 			goto out;
2918*2ff91c17SVincenzo Maffione 		}
2919*2ff91c17SVincenzo Maffione 
2920*2ff91c17SVincenzo Maffione 		/* copy the option body only if there was no error */
2921*2ff91c17SVincenzo Maffione 		if (!rerror && !src->nro_status) {
2922*2ff91c17SVincenzo Maffione 			optsz = nmreq_opt_size_by_type(src->nro_reqtype);
2923*2ff91c17SVincenzo Maffione 			if (optsz) {
2924*2ff91c17SVincenzo Maffione 				error = copyout(src + 1, dst + 1, optsz);
2925*2ff91c17SVincenzo Maffione 				if (error) {
2926*2ff91c17SVincenzo Maffione 					rerror = error;
2927*2ff91c17SVincenzo Maffione 					goto out;
2928*2ff91c17SVincenzo Maffione 				}
2929*2ff91c17SVincenzo Maffione 			}
2930*2ff91c17SVincenzo Maffione 		}
2931*2ff91c17SVincenzo Maffione 		src = (struct nmreq_option *)next;
2932*2ff91c17SVincenzo Maffione 		dst = (struct nmreq_option *)*ptrs;
2933*2ff91c17SVincenzo Maffione 	}
2934*2ff91c17SVincenzo Maffione 
2935*2ff91c17SVincenzo Maffione 
2936*2ff91c17SVincenzo Maffione out:
2937*2ff91c17SVincenzo Maffione 	hdr->nr_reserved = 0;
2938*2ff91c17SVincenzo Maffione 	nm_os_free(bufstart);
2939*2ff91c17SVincenzo Maffione 	return rerror;
2940*2ff91c17SVincenzo Maffione }
2941*2ff91c17SVincenzo Maffione 
2942*2ff91c17SVincenzo Maffione struct nmreq_option *
2943*2ff91c17SVincenzo Maffione nmreq_findoption(struct nmreq_option *opt, uint16_t reqtype)
2944*2ff91c17SVincenzo Maffione {
2945*2ff91c17SVincenzo Maffione 	for ( ; opt; opt = (struct nmreq_option *)opt->nro_next)
2946*2ff91c17SVincenzo Maffione 		if (opt->nro_reqtype == reqtype)
2947*2ff91c17SVincenzo Maffione 			return opt;
2948*2ff91c17SVincenzo Maffione 	return NULL;
2949*2ff91c17SVincenzo Maffione }
2950*2ff91c17SVincenzo Maffione 
2951*2ff91c17SVincenzo Maffione int
2952*2ff91c17SVincenzo Maffione nmreq_checkduplicate(struct nmreq_option *opt) {
2953*2ff91c17SVincenzo Maffione 	uint16_t type = opt->nro_reqtype;
2954*2ff91c17SVincenzo Maffione 	int dup = 0;
2955*2ff91c17SVincenzo Maffione 
2956*2ff91c17SVincenzo Maffione 	while ((opt = nmreq_findoption((struct nmreq_option *)opt->nro_next,
2957*2ff91c17SVincenzo Maffione 			type))) {
2958*2ff91c17SVincenzo Maffione 		dup++;
2959*2ff91c17SVincenzo Maffione 		opt->nro_status = EINVAL;
2960*2ff91c17SVincenzo Maffione 	}
2961*2ff91c17SVincenzo Maffione 	return (dup ? EINVAL : 0);
2962*2ff91c17SVincenzo Maffione }
2963*2ff91c17SVincenzo Maffione 
2964*2ff91c17SVincenzo Maffione static int
2965*2ff91c17SVincenzo Maffione nmreq_checkoptions(struct nmreq_header *hdr)
2966*2ff91c17SVincenzo Maffione {
2967*2ff91c17SVincenzo Maffione 	struct nmreq_option *opt;
2968*2ff91c17SVincenzo Maffione 	/* return error if there is still any option
2969*2ff91c17SVincenzo Maffione 	 * marked as not supported
2970*2ff91c17SVincenzo Maffione 	 */
2971*2ff91c17SVincenzo Maffione 
2972*2ff91c17SVincenzo Maffione 	for (opt = (struct nmreq_option *)hdr->nr_options; opt;
2973*2ff91c17SVincenzo Maffione 	     opt = (struct nmreq_option *)opt->nro_next)
2974*2ff91c17SVincenzo Maffione 		if (opt->nro_status == EOPNOTSUPP)
2975*2ff91c17SVincenzo Maffione 			return EOPNOTSUPP;
2976*2ff91c17SVincenzo Maffione 
2977*2ff91c17SVincenzo Maffione 	return 0;
2978*2ff91c17SVincenzo Maffione }
297968b8534bSLuigi Rizzo 
298068b8534bSLuigi Rizzo /*
298168b8534bSLuigi Rizzo  * select(2) and poll(2) handlers for the "netmap" device.
298268b8534bSLuigi Rizzo  *
298368b8534bSLuigi Rizzo  * Can be called for one or more queues.
298468b8534bSLuigi Rizzo  * Return true the event mask corresponding to ready events.
298568b8534bSLuigi Rizzo  * If there are no ready events, do a selrecord on either individual
2986ce3ee1e7SLuigi Rizzo  * selinfo or on the global one.
298768b8534bSLuigi Rizzo  * Device-dependent parts (locking and sync of tx/rx rings)
298868b8534bSLuigi Rizzo  * are done through callbacks.
2989f196ce38SLuigi Rizzo  *
299001c7d25fSLuigi Rizzo  * On linux, arguments are really pwait, the poll table, and 'td' is struct file *
299101c7d25fSLuigi Rizzo  * The first one is remapped to pwait as selrecord() uses the name as an
299201c7d25fSLuigi Rizzo  * hidden argument.
299368b8534bSLuigi Rizzo  */
2994f9790aebSLuigi Rizzo int
299537e3a6d3SLuigi Rizzo netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
299668b8534bSLuigi Rizzo {
299768b8534bSLuigi Rizzo 	struct netmap_adapter *na;
299868b8534bSLuigi Rizzo 	struct netmap_kring *kring;
299937e3a6d3SLuigi Rizzo 	struct netmap_ring *ring;
3000847bf383SLuigi Rizzo 	u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0;
3001847bf383SLuigi Rizzo #define want_tx want[NR_TX]
3002847bf383SLuigi Rizzo #define want_rx want[NR_RX]
3003c3e9b4dbSLuiz Otavio O Souza 	struct mbq q;	/* packets from RX hw queues to host stack */
300401c7d25fSLuigi Rizzo 
3005f9790aebSLuigi Rizzo 	/*
3006f9790aebSLuigi Rizzo 	 * In order to avoid nested locks, we need to "double check"
3007f9790aebSLuigi Rizzo 	 * txsync and rxsync if we decide to do a selrecord().
3008f9790aebSLuigi Rizzo 	 * retry_tx (and retry_rx, later) prevent looping forever.
3009f9790aebSLuigi Rizzo 	 */
301017885a7bSLuigi Rizzo 	int retry_tx = 1, retry_rx = 1;
3011ce3ee1e7SLuigi Rizzo 
3012c3e9b4dbSLuiz Otavio O Souza 	/* Transparent mode: send_down is 1 if we have found some
3013c3e9b4dbSLuiz Otavio O Souza 	 * packets to forward (host RX ring --> NIC) during the rx
3014c3e9b4dbSLuiz Otavio O Souza 	 * scan and we have not sent them down to the NIC yet.
3015c3e9b4dbSLuiz Otavio O Souza 	 * Transparent mode requires to bind all rings to a single
3016c3e9b4dbSLuiz Otavio O Souza 	 * file descriptor.
3017f0ea3689SLuigi Rizzo 	 */
301837e3a6d3SLuigi Rizzo 	int send_down = 0;
3019c3e9b4dbSLuiz Otavio O Souza 	int sync_flags = priv->np_sync_flags;
302037e3a6d3SLuigi Rizzo 
302137e3a6d3SLuigi Rizzo 	mbq_init(&q);
302268b8534bSLuigi Rizzo 
30238241616dSLuigi Rizzo 	if (priv->np_nifp == NULL) {
30248241616dSLuigi Rizzo 		D("No if registered");
30258241616dSLuigi Rizzo 		return POLLERR;
30268241616dSLuigi Rizzo 	}
3027847bf383SLuigi Rizzo 	mb(); /* make sure following reads are not from cache */
30288241616dSLuigi Rizzo 
3029f9790aebSLuigi Rizzo 	na = priv->np_na;
3030f9790aebSLuigi Rizzo 
30314bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
303268b8534bSLuigi Rizzo 		return POLLERR;
303368b8534bSLuigi Rizzo 
303468b8534bSLuigi Rizzo 	if (netmap_verbose & 0x8000)
30354bf50f18SLuigi Rizzo 		D("device %s events 0x%x", na->name, events);
303668b8534bSLuigi Rizzo 	want_tx = events & (POLLOUT | POLLWRNORM);
303768b8534bSLuigi Rizzo 	want_rx = events & (POLLIN | POLLRDNORM);
303868b8534bSLuigi Rizzo 
303968b8534bSLuigi Rizzo 	/*
3040f9790aebSLuigi Rizzo 	 * check_all_{tx|rx} are set if the card has more than one queue AND
3041f9790aebSLuigi Rizzo 	 * the file descriptor is bound to all of them. If so, we sleep on
3042ce3ee1e7SLuigi Rizzo 	 * the "global" selinfo, otherwise we sleep on individual selinfo
3043ce3ee1e7SLuigi Rizzo 	 * (FreeBSD only allows two selinfo's per file descriptor).
3044ce3ee1e7SLuigi Rizzo 	 * The interrupt routine in the driver wake one or the other
3045ce3ee1e7SLuigi Rizzo 	 * (or both) depending on which clients are active.
304668b8534bSLuigi Rizzo 	 *
304768b8534bSLuigi Rizzo 	 * rxsync() is only called if we run out of buffers on a POLLIN.
304868b8534bSLuigi Rizzo 	 * txsync() is called if we run out of buffers on POLLOUT, or
304968b8534bSLuigi Rizzo 	 * there are pending packets to send. The latter can be disabled
305068b8534bSLuigi Rizzo 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
305168b8534bSLuigi Rizzo 	 */
3052847bf383SLuigi Rizzo 	check_all_tx = nm_si_user(priv, NR_TX);
3053847bf383SLuigi Rizzo 	check_all_rx = nm_si_user(priv, NR_RX);
305464ae02c3SLuigi Rizzo 
30554f80b14cSVincenzo Maffione #ifdef __FreeBSD__
305668b8534bSLuigi Rizzo 	/*
3057f9790aebSLuigi Rizzo 	 * We start with a lock free round which is cheap if we have
3058f9790aebSLuigi Rizzo 	 * slots available. If this fails, then lock and call the sync
30594f80b14cSVincenzo Maffione 	 * routines. We can't do this on Linux, as the contract says
30604f80b14cSVincenzo Maffione 	 * that we must call nm_os_selrecord() unconditionally.
306168b8534bSLuigi Rizzo 	 */
306237e3a6d3SLuigi Rizzo 	if (want_tx) {
30634f80b14cSVincenzo Maffione 		enum txrx t = NR_TX;
306437e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
3065*2ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
306637e3a6d3SLuigi Rizzo 			/* XXX compare ring->cur and kring->tail */
306737e3a6d3SLuigi Rizzo 			if (!nm_ring_empty(kring->ring)) {
306837e3a6d3SLuigi Rizzo 				revents |= want[t];
306937e3a6d3SLuigi Rizzo 				want[t] = 0;	/* also breaks the loop */
307037e3a6d3SLuigi Rizzo 			}
307137e3a6d3SLuigi Rizzo 		}
307237e3a6d3SLuigi Rizzo 	}
307337e3a6d3SLuigi Rizzo 	if (want_rx) {
30744f80b14cSVincenzo Maffione 		enum txrx t = NR_RX;
307537e3a6d3SLuigi Rizzo 		want_rx = 0; /* look for a reason to run the handlers */
307637e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
3077*2ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
307837e3a6d3SLuigi Rizzo 			if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */
307937e3a6d3SLuigi Rizzo 			    || kring->rhead != kring->ring->head /* release buffers */) {
308037e3a6d3SLuigi Rizzo 				want_rx = 1;
308137e3a6d3SLuigi Rizzo 			}
308237e3a6d3SLuigi Rizzo 		}
308337e3a6d3SLuigi Rizzo 		if (!want_rx)
308437e3a6d3SLuigi Rizzo 			revents |= events & (POLLIN | POLLRDNORM); /* we have data */
308537e3a6d3SLuigi Rizzo 	}
30864f80b14cSVincenzo Maffione #endif
30874f80b14cSVincenzo Maffione 
30884f80b14cSVincenzo Maffione #ifdef linux
30894f80b14cSVincenzo Maffione 	/* The selrecord must be unconditional on linux. */
30904f80b14cSVincenzo Maffione 	nm_os_selrecord(sr, check_all_tx ?
3091*2ff91c17SVincenzo Maffione 	    &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si);
30924f80b14cSVincenzo Maffione 	nm_os_selrecord(sr, check_all_rx ?
3093*2ff91c17SVincenzo Maffione 		&na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si);
30944f80b14cSVincenzo Maffione #endif /* linux */
309568b8534bSLuigi Rizzo 
309668b8534bSLuigi Rizzo 	/*
309717885a7bSLuigi Rizzo 	 * If we want to push packets out (priv->np_txpoll) or
309817885a7bSLuigi Rizzo 	 * want_tx is still set, we must issue txsync calls
309917885a7bSLuigi Rizzo 	 * (on all rings, to avoid that the tx rings stall).
3100f9790aebSLuigi Rizzo 	 * Fortunately, normal tx mode has np_txpoll set.
310168b8534bSLuigi Rizzo 	 */
310268b8534bSLuigi Rizzo 	if (priv->np_txpoll || want_tx) {
310317885a7bSLuigi Rizzo 		/*
310417885a7bSLuigi Rizzo 		 * The first round checks if anyone is ready, if not
310517885a7bSLuigi Rizzo 		 * do a selrecord and another round to handle races.
310617885a7bSLuigi Rizzo 		 * want_tx goes to 0 if any space is found, and is
310717885a7bSLuigi Rizzo 		 * used to skip rings with no pending transmissions.
3108ce3ee1e7SLuigi Rizzo 		 */
3109091fd0abSLuigi Rizzo flush_tx:
311037e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_TX]; i++) {
311117885a7bSLuigi Rizzo 			int found = 0;
311217885a7bSLuigi Rizzo 
3113*2ff91c17SVincenzo Maffione 			kring = na->tx_rings[i];
311437e3a6d3SLuigi Rizzo 			ring = kring->ring;
311537e3a6d3SLuigi Rizzo 
31164f80b14cSVincenzo Maffione 			/*
31174f80b14cSVincenzo Maffione 			 * Don't try to txsync this TX ring if we already found some
31184f80b14cSVincenzo Maffione 			 * space in some of the TX rings (want_tx == 0) and there are no
31194f80b14cSVincenzo Maffione 			 * TX slots in this ring that need to be flushed to the NIC
3120*2ff91c17SVincenzo Maffione 			 * (head == hwcur).
31214f80b14cSVincenzo Maffione 			 */
3122*2ff91c17SVincenzo Maffione 			if (!send_down && !want_tx && ring->head == kring->nr_hwcur)
312368b8534bSLuigi Rizzo 				continue;
312437e3a6d3SLuigi Rizzo 
312537e3a6d3SLuigi Rizzo 			if (nm_kr_tryget(kring, 1, &revents))
312617885a7bSLuigi Rizzo 				continue;
312737e3a6d3SLuigi Rizzo 
312837e3a6d3SLuigi Rizzo 			if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
312917885a7bSLuigi Rizzo 				netmap_ring_reinit(kring);
313017885a7bSLuigi Rizzo 				revents |= POLLERR;
313117885a7bSLuigi Rizzo 			} else {
3132c3e9b4dbSLuiz Otavio O Souza 				if (kring->nm_sync(kring, sync_flags))
313368b8534bSLuigi Rizzo 					revents |= POLLERR;
3134847bf383SLuigi Rizzo 				else
313537e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
313617885a7bSLuigi Rizzo 			}
313768b8534bSLuigi Rizzo 
313817885a7bSLuigi Rizzo 			/*
313917885a7bSLuigi Rizzo 			 * If we found new slots, notify potential
314017885a7bSLuigi Rizzo 			 * listeners on the same ring.
314117885a7bSLuigi Rizzo 			 * Since we just did a txsync, look at the copies
314217885a7bSLuigi Rizzo 			 * of cur,tail in the kring.
3143f9790aebSLuigi Rizzo 			 */
314417885a7bSLuigi Rizzo 			found = kring->rcur != kring->rtail;
314517885a7bSLuigi Rizzo 			nm_kr_put(kring);
314617885a7bSLuigi Rizzo 			if (found) { /* notify other listeners */
314768b8534bSLuigi Rizzo 				revents |= want_tx;
314868b8534bSLuigi Rizzo 				want_tx = 0;
31494f80b14cSVincenzo Maffione #ifndef linux
3150847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
31514f80b14cSVincenzo Maffione #endif /* linux */
315268b8534bSLuigi Rizzo 			}
3153ce3ee1e7SLuigi Rizzo 		}
315437e3a6d3SLuigi Rizzo 		/* if there were any packet to forward we must have handled them by now */
315537e3a6d3SLuigi Rizzo 		send_down = 0;
315637e3a6d3SLuigi Rizzo 		if (want_tx && retry_tx && sr) {
31574f80b14cSVincenzo Maffione #ifndef linux
315837e3a6d3SLuigi Rizzo 			nm_os_selrecord(sr, check_all_tx ?
3159*2ff91c17SVincenzo Maffione 			    &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si);
31604f80b14cSVincenzo Maffione #endif /* !linux */
3161ce3ee1e7SLuigi Rizzo 			retry_tx = 0;
3162ce3ee1e7SLuigi Rizzo 			goto flush_tx;
316368b8534bSLuigi Rizzo 		}
316468b8534bSLuigi Rizzo 	}
316568b8534bSLuigi Rizzo 
316668b8534bSLuigi Rizzo 	/*
316717885a7bSLuigi Rizzo 	 * If want_rx is still set scan receive rings.
316868b8534bSLuigi Rizzo 	 * Do it on all rings because otherwise we starve.
316968b8534bSLuigi Rizzo 	 */
317068b8534bSLuigi Rizzo 	if (want_rx) {
317189cc2556SLuigi Rizzo 		/* two rounds here for race avoidance */
3172ce3ee1e7SLuigi Rizzo do_retry_rx:
3173847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) {
317417885a7bSLuigi Rizzo 			int found = 0;
317517885a7bSLuigi Rizzo 
3176*2ff91c17SVincenzo Maffione 			kring = na->rx_rings[i];
317737e3a6d3SLuigi Rizzo 			ring = kring->ring;
3178ce3ee1e7SLuigi Rizzo 
317937e3a6d3SLuigi Rizzo 			if (unlikely(nm_kr_tryget(kring, 1, &revents)))
318017885a7bSLuigi Rizzo 				continue;
3181ce3ee1e7SLuigi Rizzo 
318237e3a6d3SLuigi Rizzo 			if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
3183847bf383SLuigi Rizzo 				netmap_ring_reinit(kring);
3184847bf383SLuigi Rizzo 				revents |= POLLERR;
3185847bf383SLuigi Rizzo 			}
3186847bf383SLuigi Rizzo 			/* now we can use kring->rcur, rtail */
3187847bf383SLuigi Rizzo 
318817885a7bSLuigi Rizzo 			/*
3189c3e9b4dbSLuiz Otavio O Souza 			 * transparent mode support: collect packets from
3190c3e9b4dbSLuiz Otavio O Souza 			 * hw rxring(s) that have been released by the user
3191ce3ee1e7SLuigi Rizzo 			 */
319237e3a6d3SLuigi Rizzo 			if (nm_may_forward_up(kring)) {
3193091fd0abSLuigi Rizzo 				netmap_grab_packets(kring, &q, netmap_fwd);
3194091fd0abSLuigi Rizzo 			}
319568b8534bSLuigi Rizzo 
3196c3e9b4dbSLuiz Otavio O Souza 			/* Clear the NR_FORWARD flag anyway, it may be set by
3197c3e9b4dbSLuiz Otavio O Souza 			 * the nm_sync() below only on for the host RX ring (see
3198c3e9b4dbSLuiz Otavio O Souza 			 * netmap_rxsync_from_host()). */
319937e3a6d3SLuigi Rizzo 			kring->nr_kflags &= ~NR_FORWARD;
3200c3e9b4dbSLuiz Otavio O Souza 			if (kring->nm_sync(kring, sync_flags))
320168b8534bSLuigi Rizzo 				revents |= POLLERR;
3202847bf383SLuigi Rizzo 			else
320337e3a6d3SLuigi Rizzo 				nm_sync_finalize(kring);
3204c3e9b4dbSLuiz Otavio O Souza 			send_down |= (kring->nr_kflags & NR_FORWARD);
3205c3e9b4dbSLuiz Otavio O Souza 			ring_timestamp_set(ring);
320617885a7bSLuigi Rizzo 			found = kring->rcur != kring->rtail;
320717885a7bSLuigi Rizzo 			nm_kr_put(kring);
320817885a7bSLuigi Rizzo 			if (found) {
320968b8534bSLuigi Rizzo 				revents |= want_rx;
3210ce3ee1e7SLuigi Rizzo 				retry_rx = 0;
32114f80b14cSVincenzo Maffione #ifndef linux
3212847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
32134f80b14cSVincenzo Maffione #endif /* linux */
321468b8534bSLuigi Rizzo 			}
321568b8534bSLuigi Rizzo 		}
321617885a7bSLuigi Rizzo 
32174f80b14cSVincenzo Maffione #ifndef linux
321837e3a6d3SLuigi Rizzo 		if (retry_rx && sr) {
321937e3a6d3SLuigi Rizzo 			nm_os_selrecord(sr, check_all_rx ?
3220*2ff91c17SVincenzo Maffione 			    &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si);
322137e3a6d3SLuigi Rizzo 		}
32224f80b14cSVincenzo Maffione #endif /* !linux */
3223c3e9b4dbSLuiz Otavio O Souza 		if (send_down || retry_rx) {
322417885a7bSLuigi Rizzo 			retry_rx = 0;
322517885a7bSLuigi Rizzo 			if (send_down)
322617885a7bSLuigi Rizzo 				goto flush_tx; /* and retry_rx */
322717885a7bSLuigi Rizzo 			else
3228ce3ee1e7SLuigi Rizzo 				goto do_retry_rx;
3229ce3ee1e7SLuigi Rizzo 		}
323068b8534bSLuigi Rizzo 	}
3231091fd0abSLuigi Rizzo 
323217885a7bSLuigi Rizzo 	/*
3233c3e9b4dbSLuiz Otavio O Souza 	 * Transparent mode: released bufs (i.e. between kring->nr_hwcur and
3234c3e9b4dbSLuiz Otavio O Souza 	 * ring->head) marked with NS_FORWARD on hw rx rings are passed up
3235c3e9b4dbSLuiz Otavio O Souza 	 * to the host stack.
3236ce3ee1e7SLuigi Rizzo 	 */
3237091fd0abSLuigi Rizzo 
3238c3e9b4dbSLuiz Otavio O Souza 	if (mbq_peek(&q)) {
3239f9790aebSLuigi Rizzo 		netmap_send_up(na->ifp, &q);
324037e3a6d3SLuigi Rizzo 	}
324168b8534bSLuigi Rizzo 
324268b8534bSLuigi Rizzo 	return (revents);
3243847bf383SLuigi Rizzo #undef want_tx
3244847bf383SLuigi Rizzo #undef want_rx
324568b8534bSLuigi Rizzo }
324668b8534bSLuigi Rizzo 
32474f80b14cSVincenzo Maffione int
32484f80b14cSVincenzo Maffione nma_intr_enable(struct netmap_adapter *na, int onoff)
32494f80b14cSVincenzo Maffione {
32504f80b14cSVincenzo Maffione 	bool changed = false;
32514f80b14cSVincenzo Maffione 	enum txrx t;
32524f80b14cSVincenzo Maffione 	int i;
32534f80b14cSVincenzo Maffione 
32544f80b14cSVincenzo Maffione 	for_rx_tx(t) {
32554f80b14cSVincenzo Maffione 		for (i = 0; i < nma_get_nrings(na, t); i++) {
3256*2ff91c17SVincenzo Maffione 			struct netmap_kring *kring = NMR(na, t)[i];
32574f80b14cSVincenzo Maffione 			int on = !(kring->nr_kflags & NKR_NOINTR);
32584f80b14cSVincenzo Maffione 
32594f80b14cSVincenzo Maffione 			if (!!onoff != !!on) {
32604f80b14cSVincenzo Maffione 				changed = true;
32614f80b14cSVincenzo Maffione 			}
32624f80b14cSVincenzo Maffione 			if (onoff) {
32634f80b14cSVincenzo Maffione 				kring->nr_kflags &= ~NKR_NOINTR;
32644f80b14cSVincenzo Maffione 			} else {
32654f80b14cSVincenzo Maffione 				kring->nr_kflags |= NKR_NOINTR;
32664f80b14cSVincenzo Maffione 			}
32674f80b14cSVincenzo Maffione 		}
32684f80b14cSVincenzo Maffione 	}
32694f80b14cSVincenzo Maffione 
32704f80b14cSVincenzo Maffione 	if (!changed) {
32714f80b14cSVincenzo Maffione 		return 0; /* nothing to do */
32724f80b14cSVincenzo Maffione 	}
32734f80b14cSVincenzo Maffione 
32744f80b14cSVincenzo Maffione 	if (!na->nm_intr) {
32754f80b14cSVincenzo Maffione 		D("Cannot %s interrupts for %s", onoff ? "enable" : "disable",
32764f80b14cSVincenzo Maffione 		  na->name);
32774f80b14cSVincenzo Maffione 		return -1;
32784f80b14cSVincenzo Maffione 	}
32794f80b14cSVincenzo Maffione 
32804f80b14cSVincenzo Maffione 	na->nm_intr(na, onoff);
32814f80b14cSVincenzo Maffione 
32824f80b14cSVincenzo Maffione 	return 0;
32834f80b14cSVincenzo Maffione }
32844f80b14cSVincenzo Maffione 
328517885a7bSLuigi Rizzo 
328617885a7bSLuigi Rizzo /*-------------------- driver support routines -------------------*/
328768b8534bSLuigi Rizzo 
328889cc2556SLuigi Rizzo /* default notify callback */
3289f9790aebSLuigi Rizzo static int
3290847bf383SLuigi Rizzo netmap_notify(struct netmap_kring *kring, int flags)
3291f9790aebSLuigi Rizzo {
3292*2ff91c17SVincenzo Maffione 	struct netmap_adapter *na = kring->notify_na;
3293847bf383SLuigi Rizzo 	enum txrx t = kring->tx;
3294f9790aebSLuigi Rizzo 
329537e3a6d3SLuigi Rizzo 	nm_os_selwakeup(&kring->si);
329689cc2556SLuigi Rizzo 	/* optimization: avoid a wake up on the global
329789cc2556SLuigi Rizzo 	 * queue if nobody has registered for more
329889cc2556SLuigi Rizzo 	 * than one ring
329989cc2556SLuigi Rizzo 	 */
3300847bf383SLuigi Rizzo 	if (na->si_users[t] > 0)
330137e3a6d3SLuigi Rizzo 		nm_os_selwakeup(&na->si[t]);
3302847bf383SLuigi Rizzo 
330337e3a6d3SLuigi Rizzo 	return NM_IRQ_COMPLETED;
3304f9790aebSLuigi Rizzo }
3305f9790aebSLuigi Rizzo 
330689cc2556SLuigi Rizzo /* called by all routines that create netmap_adapters.
330737e3a6d3SLuigi Rizzo  * provide some defaults and get a reference to the
330837e3a6d3SLuigi Rizzo  * memory allocator
330989cc2556SLuigi Rizzo  */
3310f9790aebSLuigi Rizzo int
3311f9790aebSLuigi Rizzo netmap_attach_common(struct netmap_adapter *na)
3312f9790aebSLuigi Rizzo {
3313f9790aebSLuigi Rizzo 	if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
3314f9790aebSLuigi Rizzo 		D("%s: invalid rings tx %d rx %d",
33154bf50f18SLuigi Rizzo 			na->name, na->num_tx_rings, na->num_rx_rings);
3316f9790aebSLuigi Rizzo 		return EINVAL;
3317f9790aebSLuigi Rizzo 	}
331817885a7bSLuigi Rizzo 
3319*2ff91c17SVincenzo Maffione 	if (!na->rx_buf_maxsize) {
3320*2ff91c17SVincenzo Maffione 		/* Set a conservative default (larger is safer). */
3321*2ff91c17SVincenzo Maffione 		na->rx_buf_maxsize = PAGE_SIZE;
3322*2ff91c17SVincenzo Maffione 	}
3323*2ff91c17SVincenzo Maffione 
332417885a7bSLuigi Rizzo #ifdef __FreeBSD__
332537e3a6d3SLuigi Rizzo 	if (na->na_flags & NAF_HOST_RINGS && na->ifp) {
332637e3a6d3SLuigi Rizzo 		na->if_input = na->ifp->if_input; /* for netmap_send_up */
33274bf50f18SLuigi Rizzo 	}
33284f80b14cSVincenzo Maffione 	na->pdev = na; /* make sure netmap_mem_map() is called */
332937e3a6d3SLuigi Rizzo #endif /* __FreeBSD__ */
3330f9790aebSLuigi Rizzo 	if (na->nm_krings_create == NULL) {
333189cc2556SLuigi Rizzo 		/* we assume that we have been called by a driver,
333289cc2556SLuigi Rizzo 		 * since other port types all provide their own
333389cc2556SLuigi Rizzo 		 * nm_krings_create
333489cc2556SLuigi Rizzo 		 */
3335f9790aebSLuigi Rizzo 		na->nm_krings_create = netmap_hw_krings_create;
333617885a7bSLuigi Rizzo 		na->nm_krings_delete = netmap_hw_krings_delete;
3337f9790aebSLuigi Rizzo 	}
3338f9790aebSLuigi Rizzo 	if (na->nm_notify == NULL)
3339f9790aebSLuigi Rizzo 		na->nm_notify = netmap_notify;
3340f9790aebSLuigi Rizzo 	na->active_fds = 0;
3341f9790aebSLuigi Rizzo 
3342c3e9b4dbSLuiz Otavio O Souza 	if (na->nm_mem == NULL) {
33434bf50f18SLuigi Rizzo 		/* use the global allocator */
3344c3e9b4dbSLuiz Otavio O Souza 		na->nm_mem = netmap_mem_get(&nm_mem);
3345c3e9b4dbSLuiz Otavio O Souza 	}
3346847bf383SLuigi Rizzo #ifdef WITH_VALE
33474bf50f18SLuigi Rizzo 	if (na->nm_bdg_attach == NULL)
33484bf50f18SLuigi Rizzo 		/* no special nm_bdg_attach callback. On VALE
33494bf50f18SLuigi Rizzo 		 * attach, we need to interpose a bwrap
33504bf50f18SLuigi Rizzo 		 */
33514bf50f18SLuigi Rizzo 		na->nm_bdg_attach = netmap_bwrap_attach;
3352847bf383SLuigi Rizzo #endif
335337e3a6d3SLuigi Rizzo 
3354f9790aebSLuigi Rizzo 	return 0;
3355f9790aebSLuigi Rizzo }
3356f9790aebSLuigi Rizzo 
335737e3a6d3SLuigi Rizzo /* Wrapper for the register callback provided netmap-enabled
335837e3a6d3SLuigi Rizzo  * hardware drivers.
335937e3a6d3SLuigi Rizzo  * nm_iszombie(na) means that the driver module has been
33604bf50f18SLuigi Rizzo  * unloaded, so we cannot call into it.
336137e3a6d3SLuigi Rizzo  * nm_os_ifnet_lock() must guarantee mutual exclusion with
336237e3a6d3SLuigi Rizzo  * module unloading.
33634bf50f18SLuigi Rizzo  */
33644bf50f18SLuigi Rizzo static int
336537e3a6d3SLuigi Rizzo netmap_hw_reg(struct netmap_adapter *na, int onoff)
33664bf50f18SLuigi Rizzo {
33674bf50f18SLuigi Rizzo 	struct netmap_hw_adapter *hwna =
33684bf50f18SLuigi Rizzo 		(struct netmap_hw_adapter*)na;
336937e3a6d3SLuigi Rizzo 	int error = 0;
33704bf50f18SLuigi Rizzo 
337137e3a6d3SLuigi Rizzo 	nm_os_ifnet_lock();
33724bf50f18SLuigi Rizzo 
337337e3a6d3SLuigi Rizzo 	if (nm_iszombie(na)) {
337437e3a6d3SLuigi Rizzo 		if (onoff) {
337537e3a6d3SLuigi Rizzo 			error = ENXIO;
337637e3a6d3SLuigi Rizzo 		} else if (na != NULL) {
337737e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_NETMAP_ON;
337837e3a6d3SLuigi Rizzo 		}
337937e3a6d3SLuigi Rizzo 		goto out;
338037e3a6d3SLuigi Rizzo 	}
338137e3a6d3SLuigi Rizzo 
338237e3a6d3SLuigi Rizzo 	error = hwna->nm_hw_register(na, onoff);
338337e3a6d3SLuigi Rizzo 
338437e3a6d3SLuigi Rizzo out:
338537e3a6d3SLuigi Rizzo 	nm_os_ifnet_unlock();
338637e3a6d3SLuigi Rizzo 
338737e3a6d3SLuigi Rizzo 	return error;
338837e3a6d3SLuigi Rizzo }
338937e3a6d3SLuigi Rizzo 
339037e3a6d3SLuigi Rizzo static void
339137e3a6d3SLuigi Rizzo netmap_hw_dtor(struct netmap_adapter *na)
339237e3a6d3SLuigi Rizzo {
339337e3a6d3SLuigi Rizzo 	if (nm_iszombie(na) || na->ifp == NULL)
339437e3a6d3SLuigi Rizzo 		return;
339537e3a6d3SLuigi Rizzo 
339637e3a6d3SLuigi Rizzo 	WNA(na->ifp) = NULL;
33974bf50f18SLuigi Rizzo }
33984bf50f18SLuigi Rizzo 
3399f18be576SLuigi Rizzo 
340068b8534bSLuigi Rizzo /*
3401c3e9b4dbSLuiz Otavio O Souza  * Allocate a netmap_adapter object, and initialize it from the
340237e3a6d3SLuigi Rizzo  * 'arg' passed by the driver on attach.
3403c3e9b4dbSLuiz Otavio O Souza  * We allocate a block of memory of 'size' bytes, which has room
3404c3e9b4dbSLuiz Otavio O Souza  * for struct netmap_adapter plus additional room private to
3405c3e9b4dbSLuiz Otavio O Souza  * the caller.
340668b8534bSLuigi Rizzo  * Return 0 on success, ENOMEM otherwise.
340768b8534bSLuigi Rizzo  */
3408c3e9b4dbSLuiz Otavio O Souza int
34094f80b14cSVincenzo Maffione netmap_attach_ext(struct netmap_adapter *arg, size_t size, int override_reg)
341068b8534bSLuigi Rizzo {
3411f9790aebSLuigi Rizzo 	struct netmap_hw_adapter *hwna = NULL;
341237e3a6d3SLuigi Rizzo 	struct ifnet *ifp = NULL;
341368b8534bSLuigi Rizzo 
3414c3e9b4dbSLuiz Otavio O Souza 	if (size < sizeof(struct netmap_hw_adapter)) {
3415c3e9b4dbSLuiz Otavio O Souza 		D("Invalid netmap adapter size %d", (int)size);
3416c3e9b4dbSLuiz Otavio O Souza 		return EINVAL;
3417c3e9b4dbSLuiz Otavio O Souza 	}
3418c3e9b4dbSLuiz Otavio O Souza 
341937e3a6d3SLuigi Rizzo 	if (arg == NULL || arg->ifp == NULL)
3420ae10d1afSLuigi Rizzo 		goto fail;
34214f80b14cSVincenzo Maffione 
342237e3a6d3SLuigi Rizzo 	ifp = arg->ifp;
34234f80b14cSVincenzo Maffione 	if (NA(ifp) && !NM_NA_VALID(ifp)) {
34244f80b14cSVincenzo Maffione 		/* If NA(ifp) is not null but there is no valid netmap
34254f80b14cSVincenzo Maffione 		 * adapter it means that someone else is using the same
34264f80b14cSVincenzo Maffione 		 * pointer (e.g. ax25_ptr on linux). This happens for
34274f80b14cSVincenzo Maffione 		 * instance when also PF_RING is in use. */
34284f80b14cSVincenzo Maffione 		D("Error: netmap adapter hook is busy");
34294f80b14cSVincenzo Maffione 		return EBUSY;
34304f80b14cSVincenzo Maffione 	}
34314f80b14cSVincenzo Maffione 
3432c3e9b4dbSLuiz Otavio O Souza 	hwna = nm_os_malloc(size);
3433f9790aebSLuigi Rizzo 	if (hwna == NULL)
3434ae10d1afSLuigi Rizzo 		goto fail;
3435f9790aebSLuigi Rizzo 	hwna->up = *arg;
3436847bf383SLuigi Rizzo 	hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
34374bf50f18SLuigi Rizzo 	strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
34384f80b14cSVincenzo Maffione 	if (override_reg) {
34394bf50f18SLuigi Rizzo 		hwna->nm_hw_register = hwna->up.nm_register;
344037e3a6d3SLuigi Rizzo 		hwna->up.nm_register = netmap_hw_reg;
34414f80b14cSVincenzo Maffione 	}
3442f9790aebSLuigi Rizzo 	if (netmap_attach_common(&hwna->up)) {
3443c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(hwna);
3444f9790aebSLuigi Rizzo 		goto fail;
3445f9790aebSLuigi Rizzo 	}
3446f9790aebSLuigi Rizzo 	netmap_adapter_get(&hwna->up);
3447f9790aebSLuigi Rizzo 
344837e3a6d3SLuigi Rizzo 	NM_ATTACH_NA(ifp, &hwna->up);
344937e3a6d3SLuigi Rizzo 
345064ae02c3SLuigi Rizzo #ifdef linux
3451f18be576SLuigi Rizzo 	if (ifp->netdev_ops) {
3452f18be576SLuigi Rizzo 		/* prepare a clone of the netdev ops */
3453847bf383SLuigi Rizzo #ifndef NETMAP_LINUX_HAVE_NETDEV_OPS
3454f9790aebSLuigi Rizzo 		hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
3455f18be576SLuigi Rizzo #else
3456f9790aebSLuigi Rizzo 		hwna->nm_ndo = *ifp->netdev_ops;
345737e3a6d3SLuigi Rizzo #endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
3458f18be576SLuigi Rizzo 	}
3459f9790aebSLuigi Rizzo 	hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
34604f80b14cSVincenzo Maffione 	hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu;
34614bf50f18SLuigi Rizzo 	if (ifp->ethtool_ops) {
34624bf50f18SLuigi Rizzo 		hwna->nm_eto = *ifp->ethtool_ops;
34634bf50f18SLuigi Rizzo 	}
34644bf50f18SLuigi Rizzo 	hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
3465847bf383SLuigi Rizzo #ifdef NETMAP_LINUX_HAVE_SET_CHANNELS
34664bf50f18SLuigi Rizzo 	hwna->nm_eto.set_channels = linux_netmap_set_channels;
346737e3a6d3SLuigi Rizzo #endif /* NETMAP_LINUX_HAVE_SET_CHANNELS */
34684bf50f18SLuigi Rizzo 	if (arg->nm_config == NULL) {
34694bf50f18SLuigi Rizzo 		hwna->up.nm_config = netmap_linux_config;
34704bf50f18SLuigi Rizzo 	}
3471ce3ee1e7SLuigi Rizzo #endif /* linux */
347237e3a6d3SLuigi Rizzo 	if (arg->nm_dtor == NULL) {
347337e3a6d3SLuigi Rizzo 		hwna->up.nm_dtor = netmap_hw_dtor;
347437e3a6d3SLuigi Rizzo 	}
3475f9790aebSLuigi Rizzo 
3476d82f9014SRui Paulo 	if_printf(ifp, "netmap queues/slots: TX %d/%d, RX %d/%d\n",
3477d82f9014SRui Paulo 	    hwna->up.num_tx_rings, hwna->up.num_tx_desc,
3478d82f9014SRui Paulo 	    hwna->up.num_rx_rings, hwna->up.num_rx_desc);
3479ae10d1afSLuigi Rizzo 	return 0;
348068b8534bSLuigi Rizzo 
3481ae10d1afSLuigi Rizzo fail:
3482f9790aebSLuigi Rizzo 	D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
3483f9790aebSLuigi Rizzo 	return (hwna ? EINVAL : ENOMEM);
348468b8534bSLuigi Rizzo }
348568b8534bSLuigi Rizzo 
348668b8534bSLuigi Rizzo 
348737e3a6d3SLuigi Rizzo int
348837e3a6d3SLuigi Rizzo netmap_attach(struct netmap_adapter *arg)
348937e3a6d3SLuigi Rizzo {
34904f80b14cSVincenzo Maffione 	return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter),
34914f80b14cSVincenzo Maffione 			1 /* override nm_reg */);
349237e3a6d3SLuigi Rizzo }
349337e3a6d3SLuigi Rizzo 
349437e3a6d3SLuigi Rizzo 
3495f9790aebSLuigi Rizzo void
3496f9790aebSLuigi Rizzo NM_DBG(netmap_adapter_get)(struct netmap_adapter *na)
3497f9790aebSLuigi Rizzo {
3498f9790aebSLuigi Rizzo 	if (!na) {
3499f9790aebSLuigi Rizzo 		return;
3500f9790aebSLuigi Rizzo 	}
3501f9790aebSLuigi Rizzo 
3502f9790aebSLuigi Rizzo 	refcount_acquire(&na->na_refcount);
3503f9790aebSLuigi Rizzo }
3504f9790aebSLuigi Rizzo 
3505f9790aebSLuigi Rizzo 
3506f9790aebSLuigi Rizzo /* returns 1 iff the netmap_adapter is destroyed */
3507f9790aebSLuigi Rizzo int
3508f9790aebSLuigi Rizzo NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
3509f9790aebSLuigi Rizzo {
3510f9790aebSLuigi Rizzo 	if (!na)
3511f9790aebSLuigi Rizzo 		return 1;
3512f9790aebSLuigi Rizzo 
3513f9790aebSLuigi Rizzo 	if (!refcount_release(&na->na_refcount))
3514f9790aebSLuigi Rizzo 		return 0;
3515f9790aebSLuigi Rizzo 
3516f9790aebSLuigi Rizzo 	if (na->nm_dtor)
3517f9790aebSLuigi Rizzo 		na->nm_dtor(na);
3518f9790aebSLuigi Rizzo 
35194f80b14cSVincenzo Maffione 	if (na->tx_rings) { /* XXX should not happen */
35204f80b14cSVincenzo Maffione 		D("freeing leftover tx_rings");
35214f80b14cSVincenzo Maffione 		na->nm_krings_delete(na);
35224f80b14cSVincenzo Maffione 	}
35234f80b14cSVincenzo Maffione 	netmap_pipe_dealloc(na);
35244f80b14cSVincenzo Maffione 	if (na->nm_mem)
35254f80b14cSVincenzo Maffione 		netmap_mem_put(na->nm_mem);
35264f80b14cSVincenzo Maffione 	bzero(na, sizeof(*na));
35274f80b14cSVincenzo Maffione 	nm_os_free(na);
3528f9790aebSLuigi Rizzo 
3529f9790aebSLuigi Rizzo 	return 1;
3530f9790aebSLuigi Rizzo }
3531f9790aebSLuigi Rizzo 
353289cc2556SLuigi Rizzo /* nm_krings_create callback for all hardware native adapters */
3533f9790aebSLuigi Rizzo int
3534f9790aebSLuigi Rizzo netmap_hw_krings_create(struct netmap_adapter *na)
3535f9790aebSLuigi Rizzo {
3536f0ea3689SLuigi Rizzo 	int ret = netmap_krings_create(na, 0);
353717885a7bSLuigi Rizzo 	if (ret == 0) {
353817885a7bSLuigi Rizzo 		/* initialize the mbq for the sw rx ring */
3539*2ff91c17SVincenzo Maffione 		mbq_safe_init(&na->rx_rings[na->num_rx_rings]->rx_queue);
354017885a7bSLuigi Rizzo 		ND("initialized sw rx queue %d", na->num_rx_rings);
354117885a7bSLuigi Rizzo 	}
354217885a7bSLuigi Rizzo 	return ret;
3543f9790aebSLuigi Rizzo }
3544f9790aebSLuigi Rizzo 
3545f9790aebSLuigi Rizzo 
3546f9790aebSLuigi Rizzo 
354768b8534bSLuigi Rizzo /*
354889cc2556SLuigi Rizzo  * Called on module unload by the netmap-enabled drivers
354968b8534bSLuigi Rizzo  */
355068b8534bSLuigi Rizzo void
355168b8534bSLuigi Rizzo netmap_detach(struct ifnet *ifp)
355268b8534bSLuigi Rizzo {
355368b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
355468b8534bSLuigi Rizzo 
355568b8534bSLuigi Rizzo 	if (!na)
355668b8534bSLuigi Rizzo 		return;
355768b8534bSLuigi Rizzo 
3558f9790aebSLuigi Rizzo 	NMG_LOCK();
355937e3a6d3SLuigi Rizzo 	netmap_set_all_rings(na, NM_KR_LOCKED);
3560847bf383SLuigi Rizzo 	/*
3561847bf383SLuigi Rizzo 	 * if the netmap adapter is not native, somebody
3562847bf383SLuigi Rizzo 	 * changed it, so we can not release it here.
356337e3a6d3SLuigi Rizzo 	 * The NAF_ZOMBIE flag will notify the new owner that
3564847bf383SLuigi Rizzo 	 * the driver is gone.
3565847bf383SLuigi Rizzo 	 */
35664f80b14cSVincenzo Maffione 	if (!(na->na_flags & NAF_NATIVE) || !netmap_adapter_put(na)) {
35674f80b14cSVincenzo Maffione 		na->na_flags |= NAF_ZOMBIE;
3568847bf383SLuigi Rizzo 	}
356937e3a6d3SLuigi Rizzo 	/* give active users a chance to notice that NAF_ZOMBIE has been
357037e3a6d3SLuigi Rizzo 	 * turned on, so that they can stop and return an error to userspace.
357137e3a6d3SLuigi Rizzo 	 * Note that this becomes a NOP if there are no active users and,
357237e3a6d3SLuigi Rizzo 	 * therefore, the put() above has deleted the na, since now NA(ifp) is
357337e3a6d3SLuigi Rizzo 	 * NULL.
357437e3a6d3SLuigi Rizzo 	 */
3575f9790aebSLuigi Rizzo 	netmap_enable_all_rings(ifp);
3576f9790aebSLuigi Rizzo 	NMG_UNLOCK();
3577ae10d1afSLuigi Rizzo }
3578f18be576SLuigi Rizzo 
3579f18be576SLuigi Rizzo 
358068b8534bSLuigi Rizzo /*
358102ad4083SLuigi Rizzo  * Intercept packets from the network stack and pass them
358202ad4083SLuigi Rizzo  * to netmap as incoming packets on the 'software' ring.
358317885a7bSLuigi Rizzo  *
358417885a7bSLuigi Rizzo  * We only store packets in a bounded mbq and then copy them
358517885a7bSLuigi Rizzo  * in the relevant rxsync routine.
358617885a7bSLuigi Rizzo  *
3587ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that the ifp and na do not go
3588ce3ee1e7SLuigi Rizzo  * away (typically the caller checks for IFF_DRV_RUNNING or the like).
3589ce3ee1e7SLuigi Rizzo  * In nm_register() or whenever there is a reinitialization,
3590f9790aebSLuigi Rizzo  * we make sure to make the mode change visible here.
359168b8534bSLuigi Rizzo  */
359268b8534bSLuigi Rizzo int
3593ce3ee1e7SLuigi Rizzo netmap_transmit(struct ifnet *ifp, struct mbuf *m)
359468b8534bSLuigi Rizzo {
359568b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
359637e3a6d3SLuigi Rizzo 	struct netmap_kring *kring, *tx_kring;
359717885a7bSLuigi Rizzo 	u_int len = MBUF_LEN(m);
359817885a7bSLuigi Rizzo 	u_int error = ENOBUFS;
359937e3a6d3SLuigi Rizzo 	unsigned int txr;
360017885a7bSLuigi Rizzo 	struct mbq *q;
3601c3e9b4dbSLuiz Otavio O Souza 	int busy;
360268b8534bSLuigi Rizzo 
3603*2ff91c17SVincenzo Maffione 	kring = na->rx_rings[na->num_rx_rings];
3604ce3ee1e7SLuigi Rizzo 	// XXX [Linux] we do not need this lock
3605ce3ee1e7SLuigi Rizzo 	// if we follow the down/configure/up protocol -gl
3606ce3ee1e7SLuigi Rizzo 	// mtx_lock(&na->core_lock);
360717885a7bSLuigi Rizzo 
36084bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na)) {
36094bf50f18SLuigi Rizzo 		D("%s not in netmap mode anymore", na->name);
3610ce3ee1e7SLuigi Rizzo 		error = ENXIO;
3611ce3ee1e7SLuigi Rizzo 		goto done;
3612ce3ee1e7SLuigi Rizzo 	}
3613ce3ee1e7SLuigi Rizzo 
361437e3a6d3SLuigi Rizzo 	txr = MBUF_TXQ(m);
361537e3a6d3SLuigi Rizzo 	if (txr >= na->num_tx_rings) {
361637e3a6d3SLuigi Rizzo 		txr %= na->num_tx_rings;
361737e3a6d3SLuigi Rizzo 	}
3618*2ff91c17SVincenzo Maffione 	tx_kring = NMR(na, NR_TX)[txr];
361937e3a6d3SLuigi Rizzo 
362037e3a6d3SLuigi Rizzo 	if (tx_kring->nr_mode == NKR_NETMAP_OFF) {
362137e3a6d3SLuigi Rizzo 		return MBUF_TRANSMIT(na, ifp, m);
362237e3a6d3SLuigi Rizzo 	}
362337e3a6d3SLuigi Rizzo 
362417885a7bSLuigi Rizzo 	q = &kring->rx_queue;
362517885a7bSLuigi Rizzo 
3626ce3ee1e7SLuigi Rizzo 	// XXX reconsider long packets if we handle fragments
36274bf50f18SLuigi Rizzo 	if (len > NETMAP_BUF_SIZE(na)) { /* too long for us */
36284bf50f18SLuigi Rizzo 		D("%s from_host, drop packet size %d > %d", na->name,
36294bf50f18SLuigi Rizzo 			len, NETMAP_BUF_SIZE(na));
3630ce3ee1e7SLuigi Rizzo 		goto done;
3631849bec0eSLuigi Rizzo 	}
363217885a7bSLuigi Rizzo 
363337e3a6d3SLuigi Rizzo 	if (nm_os_mbuf_has_offld(m)) {
3634c3e9b4dbSLuiz Otavio O Souza 		RD(1, "%s drop mbuf that needs offloadings", na->name);
363537e3a6d3SLuigi Rizzo 		goto done;
363637e3a6d3SLuigi Rizzo 	}
363737e3a6d3SLuigi Rizzo 
3638c3e9b4dbSLuiz Otavio O Souza 	/* protect against netmap_rxsync_from_host(), netmap_sw_to_nic()
363917885a7bSLuigi Rizzo 	 * and maybe other instances of netmap_transmit (the latter
364017885a7bSLuigi Rizzo 	 * not possible on Linux).
3641c3e9b4dbSLuiz Otavio O Souza 	 * We enqueue the mbuf only if we are sure there is going to be
3642c3e9b4dbSLuiz Otavio O Souza 	 * enough room in the host RX ring, otherwise we drop it.
3643ce3ee1e7SLuigi Rizzo 	 */
3644997b054cSLuigi Rizzo 	mbq_lock(q);
364517885a7bSLuigi Rizzo 
3646c3e9b4dbSLuiz Otavio O Souza         busy = kring->nr_hwtail - kring->nr_hwcur;
3647c3e9b4dbSLuiz Otavio O Souza         if (busy < 0)
3648c3e9b4dbSLuiz Otavio O Souza                 busy += kring->nkr_num_slots;
3649c3e9b4dbSLuiz Otavio O Souza 	if (busy + mbq_len(q) >= kring->nkr_num_slots - 1) {
3650c3e9b4dbSLuiz Otavio O Souza 		RD(2, "%s full hwcur %d hwtail %d qlen %d", na->name,
3651c3e9b4dbSLuiz Otavio O Souza 			kring->nr_hwcur, kring->nr_hwtail, mbq_len(q));
3652ce3ee1e7SLuigi Rizzo 	} else {
365317885a7bSLuigi Rizzo 		mbq_enqueue(q, m);
3654c3e9b4dbSLuiz Otavio O Souza 		ND(2, "%s %d bufs in queue", na->name, mbq_len(q));
365517885a7bSLuigi Rizzo 		/* notify outside the lock */
365617885a7bSLuigi Rizzo 		m = NULL;
365768b8534bSLuigi Rizzo 		error = 0;
3658ce3ee1e7SLuigi Rizzo 	}
3659997b054cSLuigi Rizzo 	mbq_unlock(q);
3660ce3ee1e7SLuigi Rizzo 
366168b8534bSLuigi Rizzo done:
366217885a7bSLuigi Rizzo 	if (m)
366368b8534bSLuigi Rizzo 		m_freem(m);
366417885a7bSLuigi Rizzo 	/* unconditionally wake up listeners */
3665847bf383SLuigi Rizzo 	kring->nm_notify(kring, 0);
366689cc2556SLuigi Rizzo 	/* this is normally netmap_notify(), but for nics
366789cc2556SLuigi Rizzo 	 * connected to a bridge it is netmap_bwrap_intr_notify(),
366889cc2556SLuigi Rizzo 	 * that possibly forwards the frames through the switch
366989cc2556SLuigi Rizzo 	 */
367068b8534bSLuigi Rizzo 
367168b8534bSLuigi Rizzo 	return (error);
367268b8534bSLuigi Rizzo }
367368b8534bSLuigi Rizzo 
367468b8534bSLuigi Rizzo 
367568b8534bSLuigi Rizzo /*
367668b8534bSLuigi Rizzo  * netmap_reset() is called by the driver routines when reinitializing
367768b8534bSLuigi Rizzo  * a ring. The driver is in charge of locking to protect the kring.
3678f9790aebSLuigi Rizzo  * If native netmap mode is not set just return NULL.
367937e3a6d3SLuigi Rizzo  * If native netmap mode is set, in particular, we have to set nr_mode to
368037e3a6d3SLuigi Rizzo  * NKR_NETMAP_ON.
368168b8534bSLuigi Rizzo  */
368268b8534bSLuigi Rizzo struct netmap_slot *
3683ce3ee1e7SLuigi Rizzo netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
368468b8534bSLuigi Rizzo 	u_int new_cur)
368568b8534bSLuigi Rizzo {
368668b8534bSLuigi Rizzo 	struct netmap_kring *kring;
3687506cc70cSLuigi Rizzo 	int new_hwofs, lim;
368868b8534bSLuigi Rizzo 
36894bf50f18SLuigi Rizzo 	if (!nm_native_on(na)) {
36904bf50f18SLuigi Rizzo 		ND("interface not in native netmap mode");
369168b8534bSLuigi Rizzo 		return NULL;	/* nothing to reinitialize */
3692ce3ee1e7SLuigi Rizzo 	}
369368b8534bSLuigi Rizzo 
3694ce3ee1e7SLuigi Rizzo 	/* XXX note- in the new scheme, we are not guaranteed to be
3695ce3ee1e7SLuigi Rizzo 	 * under lock (e.g. when called on a device reset).
3696ce3ee1e7SLuigi Rizzo 	 * In this case, we should set a flag and do not trust too
3697ce3ee1e7SLuigi Rizzo 	 * much the values. In practice: TODO
3698ce3ee1e7SLuigi Rizzo 	 * - set a RESET flag somewhere in the kring
3699ce3ee1e7SLuigi Rizzo 	 * - do the processing in a conservative way
3700ce3ee1e7SLuigi Rizzo 	 * - let the *sync() fixup at the end.
3701ce3ee1e7SLuigi Rizzo 	 */
370264ae02c3SLuigi Rizzo 	if (tx == NR_TX) {
37038241616dSLuigi Rizzo 		if (n >= na->num_tx_rings)
37048241616dSLuigi Rizzo 			return NULL;
370537e3a6d3SLuigi Rizzo 
3706*2ff91c17SVincenzo Maffione 		kring = na->tx_rings[n];
370737e3a6d3SLuigi Rizzo 
370837e3a6d3SLuigi Rizzo 		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
370937e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
371037e3a6d3SLuigi Rizzo 			return NULL;
371137e3a6d3SLuigi Rizzo 		}
371237e3a6d3SLuigi Rizzo 
371317885a7bSLuigi Rizzo 		// XXX check whether we should use hwcur or rcur
3714506cc70cSLuigi Rizzo 		new_hwofs = kring->nr_hwcur - new_cur;
371564ae02c3SLuigi Rizzo 	} else {
37168241616dSLuigi Rizzo 		if (n >= na->num_rx_rings)
37178241616dSLuigi Rizzo 			return NULL;
3718*2ff91c17SVincenzo Maffione 		kring = na->rx_rings[n];
371937e3a6d3SLuigi Rizzo 
372037e3a6d3SLuigi Rizzo 		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
372137e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
372237e3a6d3SLuigi Rizzo 			return NULL;
372337e3a6d3SLuigi Rizzo 		}
372437e3a6d3SLuigi Rizzo 
372517885a7bSLuigi Rizzo 		new_hwofs = kring->nr_hwtail - new_cur;
372664ae02c3SLuigi Rizzo 	}
372764ae02c3SLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
3728506cc70cSLuigi Rizzo 	if (new_hwofs > lim)
3729506cc70cSLuigi Rizzo 		new_hwofs -= lim + 1;
3730506cc70cSLuigi Rizzo 
3731ce3ee1e7SLuigi Rizzo 	/* Always set the new offset value and realign the ring. */
373217885a7bSLuigi Rizzo 	if (netmap_verbose)
373317885a7bSLuigi Rizzo 	    D("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
37344bf50f18SLuigi Rizzo 		na->name,
373517885a7bSLuigi Rizzo 		tx == NR_TX ? "TX" : "RX", n,
3736ce3ee1e7SLuigi Rizzo 		kring->nkr_hwofs, new_hwofs,
373717885a7bSLuigi Rizzo 		kring->nr_hwtail,
373817885a7bSLuigi Rizzo 		tx == NR_TX ? lim : kring->nr_hwtail);
3739506cc70cSLuigi Rizzo 	kring->nkr_hwofs = new_hwofs;
374017885a7bSLuigi Rizzo 	if (tx == NR_TX) {
374117885a7bSLuigi Rizzo 		kring->nr_hwtail = kring->nr_hwcur + lim;
374217885a7bSLuigi Rizzo 		if (kring->nr_hwtail > lim)
374317885a7bSLuigi Rizzo 			kring->nr_hwtail -= lim + 1;
374417885a7bSLuigi Rizzo 	}
3745506cc70cSLuigi Rizzo 
374668b8534bSLuigi Rizzo 	/*
3747ce3ee1e7SLuigi Rizzo 	 * Wakeup on the individual and global selwait
3748506cc70cSLuigi Rizzo 	 * We do the wakeup here, but the ring is not yet reconfigured.
3749506cc70cSLuigi Rizzo 	 * However, we are under lock so there are no races.
375068b8534bSLuigi Rizzo 	 */
375137e3a6d3SLuigi Rizzo 	kring->nr_mode = NKR_NETMAP_ON;
3752847bf383SLuigi Rizzo 	kring->nm_notify(kring, 0);
375368b8534bSLuigi Rizzo 	return kring->ring->slot;
375468b8534bSLuigi Rizzo }
375568b8534bSLuigi Rizzo 
375668b8534bSLuigi Rizzo 
3757ce3ee1e7SLuigi Rizzo /*
3758f9790aebSLuigi Rizzo  * Dispatch rx/tx interrupts to the netmap rings.
3759ce3ee1e7SLuigi Rizzo  *
3760ce3ee1e7SLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
3761ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that there is only one active
3762ce3ee1e7SLuigi Rizzo  * instance per queue, and that there is appropriate locking.
3763849bec0eSLuigi Rizzo  *
3764f9790aebSLuigi Rizzo  * The 'notify' routine depends on what the ring is attached to.
3765f9790aebSLuigi Rizzo  * - for a netmap file descriptor, do a selwakeup on the individual
3766f9790aebSLuigi Rizzo  *   waitqueue, plus one on the global one if needed
37674bf50f18SLuigi Rizzo  *   (see netmap_notify)
37684bf50f18SLuigi Rizzo  * - for a nic connected to a switch, call the proper forwarding routine
37694bf50f18SLuigi Rizzo  *   (see netmap_bwrap_intr_notify)
3770f9790aebSLuigi Rizzo  */
377137e3a6d3SLuigi Rizzo int
377237e3a6d3SLuigi Rizzo netmap_common_irq(struct netmap_adapter *na, u_int q, u_int *work_done)
3773f9790aebSLuigi Rizzo {
3774f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
3775847bf383SLuigi Rizzo 	enum txrx t = (work_done ? NR_RX : NR_TX);
3776f9790aebSLuigi Rizzo 
3777f9790aebSLuigi Rizzo 	q &= NETMAP_RING_MASK;
3778f9790aebSLuigi Rizzo 
3779f9790aebSLuigi Rizzo 	if (netmap_verbose) {
3780f9790aebSLuigi Rizzo 	        RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
3781f9790aebSLuigi Rizzo 	}
3782f9790aebSLuigi Rizzo 
3783847bf383SLuigi Rizzo 	if (q >= nma_get_nrings(na, t))
378437e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS; // not a physical queue
3785847bf383SLuigi Rizzo 
3786*2ff91c17SVincenzo Maffione 	kring = NMR(na, t)[q];
3787847bf383SLuigi Rizzo 
378837e3a6d3SLuigi Rizzo 	if (kring->nr_mode == NKR_NETMAP_OFF) {
378937e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
379037e3a6d3SLuigi Rizzo 	}
379137e3a6d3SLuigi Rizzo 
3792847bf383SLuigi Rizzo 	if (t == NR_RX) {
3793f9790aebSLuigi Rizzo 		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
3794f9790aebSLuigi Rizzo 		*work_done = 1; /* do not fire napi again */
3795f9790aebSLuigi Rizzo 	}
379637e3a6d3SLuigi Rizzo 
379737e3a6d3SLuigi Rizzo 	return kring->nm_notify(kring, 0);
3798f9790aebSLuigi Rizzo }
3799f9790aebSLuigi Rizzo 
380017885a7bSLuigi Rizzo 
3801f9790aebSLuigi Rizzo /*
3802f9790aebSLuigi Rizzo  * Default functions to handle rx/tx interrupts from a physical device.
3803f9790aebSLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
3804f9790aebSLuigi Rizzo  *
380537e3a6d3SLuigi Rizzo  * If the card is not in netmap mode, simply return NM_IRQ_PASS,
3806ce3ee1e7SLuigi Rizzo  * so that the caller proceeds with regular processing.
380737e3a6d3SLuigi Rizzo  * Otherwise call netmap_common_irq().
3808ce3ee1e7SLuigi Rizzo  *
3809ce3ee1e7SLuigi Rizzo  * If the card is connected to a netmap file descriptor,
3810ce3ee1e7SLuigi Rizzo  * do a selwakeup on the individual queue, plus one on the global one
3811ce3ee1e7SLuigi Rizzo  * if needed (multiqueue card _and_ there are multiqueue listeners),
381237e3a6d3SLuigi Rizzo  * and return NR_IRQ_COMPLETED.
3813ce3ee1e7SLuigi Rizzo  *
3814ce3ee1e7SLuigi Rizzo  * Finally, if called on rx from an interface connected to a switch,
381537e3a6d3SLuigi Rizzo  * calls the proper forwarding routine.
38161a26580eSLuigi Rizzo  */
3817babc7c12SLuigi Rizzo int
3818ce3ee1e7SLuigi Rizzo netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
38191a26580eSLuigi Rizzo {
38204bf50f18SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
38214bf50f18SLuigi Rizzo 
38224bf50f18SLuigi Rizzo 	/*
38234bf50f18SLuigi Rizzo 	 * XXX emulated netmap mode sets NAF_SKIP_INTR so
38244bf50f18SLuigi Rizzo 	 * we still use the regular driver even though the previous
38254bf50f18SLuigi Rizzo 	 * check fails. It is unclear whether we should use
38264bf50f18SLuigi Rizzo 	 * nm_native_on() here.
38274bf50f18SLuigi Rizzo 	 */
38284bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
382937e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
3830849bec0eSLuigi Rizzo 
38314bf50f18SLuigi Rizzo 	if (na->na_flags & NAF_SKIP_INTR) {
38328241616dSLuigi Rizzo 		ND("use regular interrupt");
383337e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
38348241616dSLuigi Rizzo 	}
38358241616dSLuigi Rizzo 
383637e3a6d3SLuigi Rizzo 	return netmap_common_irq(na, q, work_done);
38371a26580eSLuigi Rizzo }
38381a26580eSLuigi Rizzo 
383964ae02c3SLuigi Rizzo 
384001c7d25fSLuigi Rizzo /*
3841f9790aebSLuigi Rizzo  * Module loader and unloader
3842f196ce38SLuigi Rizzo  *
3843f9790aebSLuigi Rizzo  * netmap_init() creates the /dev/netmap device and initializes
3844f9790aebSLuigi Rizzo  * all global variables. Returns 0 on success, errno on failure
3845f9790aebSLuigi Rizzo  * (but there is no chance)
3846f9790aebSLuigi Rizzo  *
3847f9790aebSLuigi Rizzo  * netmap_fini() destroys everything.
3848f196ce38SLuigi Rizzo  */
3849babc7c12SLuigi Rizzo 
3850babc7c12SLuigi Rizzo static struct cdev *netmap_dev; /* /dev/netmap character device. */
3851f9790aebSLuigi Rizzo extern struct cdevsw netmap_cdevsw;
3852babc7c12SLuigi Rizzo 
385317885a7bSLuigi Rizzo 
3854f9790aebSLuigi Rizzo void
385568b8534bSLuigi Rizzo netmap_fini(void)
385668b8534bSLuigi Rizzo {
3857f9790aebSLuigi Rizzo 	if (netmap_dev)
385868b8534bSLuigi Rizzo 		destroy_dev(netmap_dev);
385937e3a6d3SLuigi Rizzo 	/* we assume that there are no longer netmap users */
386037e3a6d3SLuigi Rizzo 	nm_os_ifnet_fini();
386137e3a6d3SLuigi Rizzo 	netmap_uninit_bridges();
3862ce3ee1e7SLuigi Rizzo 	netmap_mem_fini();
3863ce3ee1e7SLuigi Rizzo 	NMG_LOCK_DESTROY();
3864c3e9b4dbSLuiz Otavio O Souza 	nm_prinf("netmap: unloaded module.\n");
386568b8534bSLuigi Rizzo }
386668b8534bSLuigi Rizzo 
386717885a7bSLuigi Rizzo 
3868f9790aebSLuigi Rizzo int
3869f9790aebSLuigi Rizzo netmap_init(void)
387068b8534bSLuigi Rizzo {
3871f9790aebSLuigi Rizzo 	int error;
387268b8534bSLuigi Rizzo 
3873f9790aebSLuigi Rizzo 	NMG_LOCK_INIT();
387468b8534bSLuigi Rizzo 
3875f9790aebSLuigi Rizzo 	error = netmap_mem_init();
3876f9790aebSLuigi Rizzo 	if (error != 0)
3877f9790aebSLuigi Rizzo 		goto fail;
3878c929ca72SLuigi Rizzo 	/*
3879c929ca72SLuigi Rizzo 	 * MAKEDEV_ETERNAL_KLD avoids an expensive check on syscalls
3880c929ca72SLuigi Rizzo 	 * when the module is compiled in.
3881c929ca72SLuigi Rizzo 	 * XXX could use make_dev_credv() to get error number
3882c929ca72SLuigi Rizzo 	 */
38830e73f29aSLuigi Rizzo 	netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD,
388411c0b69cSAdrian Chadd 		&netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600,
38850e73f29aSLuigi Rizzo 			      "netmap");
3886f9790aebSLuigi Rizzo 	if (!netmap_dev)
3887f9790aebSLuigi Rizzo 		goto fail;
3888f9790aebSLuigi Rizzo 
3889847bf383SLuigi Rizzo 	error = netmap_init_bridges();
3890847bf383SLuigi Rizzo 	if (error)
3891847bf383SLuigi Rizzo 		goto fail;
3892847bf383SLuigi Rizzo 
38934bf50f18SLuigi Rizzo #ifdef __FreeBSD__
389437e3a6d3SLuigi Rizzo 	nm_os_vi_init_index();
38954bf50f18SLuigi Rizzo #endif
3896847bf383SLuigi Rizzo 
389737e3a6d3SLuigi Rizzo 	error = nm_os_ifnet_init();
389837e3a6d3SLuigi Rizzo 	if (error)
389937e3a6d3SLuigi Rizzo 		goto fail;
390037e3a6d3SLuigi Rizzo 
3901c3e9b4dbSLuiz Otavio O Souza 	nm_prinf("netmap: loaded module\n");
3902f9790aebSLuigi Rizzo 	return (0);
3903f9790aebSLuigi Rizzo fail:
390468b8534bSLuigi Rizzo 	netmap_fini();
3905f9790aebSLuigi Rizzo 	return (EINVAL); /* may be incorrect */
390668b8534bSLuigi Rizzo }
3907