xref: /freebsd-14.2/sys/dev/netmap/netmap.c (revision 7029da5c)
1718cf2ccSPedro F. Giffuni /*-
2718cf2ccSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3718cf2ccSPedro F. Giffuni  *
437e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2014 Matteo Landi
537e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Luigi Rizzo
637e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Giuseppe Lettieri
737e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Vincenzo Maffione
837e3a6d3SLuigi Rizzo  * All rights reserved.
968b8534bSLuigi Rizzo  *
1068b8534bSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
1168b8534bSLuigi Rizzo  * modification, are permitted provided that the following conditions
1268b8534bSLuigi Rizzo  * are met:
1368b8534bSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
1468b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
1568b8534bSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
1668b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
1768b8534bSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
1868b8534bSLuigi Rizzo  *
1968b8534bSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2068b8534bSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2168b8534bSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2268b8534bSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2368b8534bSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2468b8534bSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2568b8534bSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2668b8534bSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2768b8534bSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2868b8534bSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2968b8534bSLuigi Rizzo  * SUCH DAMAGE.
3068b8534bSLuigi Rizzo  */
3168b8534bSLuigi Rizzo 
32ce3ee1e7SLuigi Rizzo 
3368b8534bSLuigi Rizzo /*
34f9790aebSLuigi Rizzo  * $FreeBSD$
35f9790aebSLuigi Rizzo  *
3668b8534bSLuigi Rizzo  * This module supports memory mapped access to network devices,
3768b8534bSLuigi Rizzo  * see netmap(4).
3868b8534bSLuigi Rizzo  *
3968b8534bSLuigi Rizzo  * The module uses a large, memory pool allocated by the kernel
4068b8534bSLuigi Rizzo  * and accessible as mmapped memory by multiple userspace threads/processes.
4168b8534bSLuigi Rizzo  * The memory pool contains packet buffers and "netmap rings",
4268b8534bSLuigi Rizzo  * i.e. user-accessible copies of the interface's queues.
4368b8534bSLuigi Rizzo  *
4468b8534bSLuigi Rizzo  * Access to the network card works like this:
4568b8534bSLuigi Rizzo  * 1. a process/thread issues one or more open() on /dev/netmap, to create
4668b8534bSLuigi Rizzo  *    select()able file descriptor on which events are reported.
4768b8534bSLuigi Rizzo  * 2. on each descriptor, the process issues an ioctl() to identify
4868b8534bSLuigi Rizzo  *    the interface that should report events to the file descriptor.
4968b8534bSLuigi Rizzo  * 3. on each descriptor, the process issues an mmap() request to
5068b8534bSLuigi Rizzo  *    map the shared memory region within the process' address space.
5168b8534bSLuigi Rizzo  *    The list of interesting queues is indicated by a location in
5268b8534bSLuigi Rizzo  *    the shared memory region.
5368b8534bSLuigi Rizzo  * 4. using the functions in the netmap(4) userspace API, a process
5468b8534bSLuigi Rizzo  *    can look up the occupation state of a queue, access memory buffers,
5568b8534bSLuigi Rizzo  *    and retrieve received packets or enqueue packets to transmit.
5668b8534bSLuigi Rizzo  * 5. using some ioctl()s the process can synchronize the userspace view
5768b8534bSLuigi Rizzo  *    of the queue with the actual status in the kernel. This includes both
5868b8534bSLuigi Rizzo  *    receiving the notification of new packets, and transmitting new
5968b8534bSLuigi Rizzo  *    packets on the output interface.
6068b8534bSLuigi Rizzo  * 6. select() or poll() can be used to wait for events on individual
6168b8534bSLuigi Rizzo  *    transmit or receive queues (or all queues for a given interface).
62ce3ee1e7SLuigi Rizzo  *
63ce3ee1e7SLuigi Rizzo 
64ce3ee1e7SLuigi Rizzo 		SYNCHRONIZATION (USER)
65ce3ee1e7SLuigi Rizzo 
66ce3ee1e7SLuigi Rizzo The netmap rings and data structures may be shared among multiple
67ce3ee1e7SLuigi Rizzo user threads or even independent processes.
68ce3ee1e7SLuigi Rizzo Any synchronization among those threads/processes is delegated
69ce3ee1e7SLuigi Rizzo to the threads themselves. Only one thread at a time can be in
70ce3ee1e7SLuigi Rizzo a system call on the same netmap ring. The OS does not enforce
71ce3ee1e7SLuigi Rizzo this and only guarantees against system crashes in case of
72ce3ee1e7SLuigi Rizzo invalid usage.
73ce3ee1e7SLuigi Rizzo 
74ce3ee1e7SLuigi Rizzo 		LOCKING (INTERNAL)
75ce3ee1e7SLuigi Rizzo 
76ce3ee1e7SLuigi Rizzo Within the kernel, access to the netmap rings is protected as follows:
77ce3ee1e7SLuigi Rizzo 
78ce3ee1e7SLuigi Rizzo - a spinlock on each ring, to handle producer/consumer races on
79ce3ee1e7SLuigi Rizzo   RX rings attached to the host stack (against multiple host
80ce3ee1e7SLuigi Rizzo   threads writing from the host stack to the same ring),
81ce3ee1e7SLuigi Rizzo   and on 'destination' rings attached to a VALE switch
82ce3ee1e7SLuigi Rizzo   (i.e. RX rings in VALE ports, and TX rings in NIC/host ports)
83ce3ee1e7SLuigi Rizzo   protecting multiple active senders for the same destination)
84ce3ee1e7SLuigi Rizzo 
85ce3ee1e7SLuigi Rizzo - an atomic variable to guarantee that there is at most one
86ce3ee1e7SLuigi Rizzo   instance of *_*xsync() on the ring at any time.
87ce3ee1e7SLuigi Rizzo   For rings connected to user file
88ce3ee1e7SLuigi Rizzo   descriptors, an atomic_test_and_set() protects this, and the
89ce3ee1e7SLuigi Rizzo   lock on the ring is not actually used.
90ce3ee1e7SLuigi Rizzo   For NIC RX rings connected to a VALE switch, an atomic_test_and_set()
91ce3ee1e7SLuigi Rizzo   is also used to prevent multiple executions (the driver might indeed
92ce3ee1e7SLuigi Rizzo   already guarantee this).
93ce3ee1e7SLuigi Rizzo   For NIC TX rings connected to a VALE switch, the lock arbitrates
94ce3ee1e7SLuigi Rizzo   access to the queue (both when allocating buffers and when pushing
95ce3ee1e7SLuigi Rizzo   them out).
96ce3ee1e7SLuigi Rizzo 
97ce3ee1e7SLuigi Rizzo - *xsync() should be protected against initializations of the card.
98ce3ee1e7SLuigi Rizzo   On FreeBSD most devices have the reset routine protected by
99ce3ee1e7SLuigi Rizzo   a RING lock (ixgbe, igb, em) or core lock (re). lem is missing
100ce3ee1e7SLuigi Rizzo   the RING protection on rx_reset(), this should be added.
101ce3ee1e7SLuigi Rizzo 
102ce3ee1e7SLuigi Rizzo   On linux there is an external lock on the tx path, which probably
103ce3ee1e7SLuigi Rizzo   also arbitrates access to the reset routine. XXX to be revised
104ce3ee1e7SLuigi Rizzo 
105ce3ee1e7SLuigi Rizzo - a per-interface core_lock protecting access from the host stack
106ce3ee1e7SLuigi Rizzo   while interfaces may be detached from netmap mode.
107ce3ee1e7SLuigi Rizzo   XXX there should be no need for this lock if we detach the interfaces
108ce3ee1e7SLuigi Rizzo   only while they are down.
109ce3ee1e7SLuigi Rizzo 
110ce3ee1e7SLuigi Rizzo 
111ce3ee1e7SLuigi Rizzo --- VALE SWITCH ---
112ce3ee1e7SLuigi Rizzo 
113ce3ee1e7SLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
114ce3ee1e7SLuigi Rizzo A switch cannot be deleted until all ports are gone.
115ce3ee1e7SLuigi Rizzo 
116ce3ee1e7SLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
117ce3ee1e7SLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
118ce3ee1e7SLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
119ce3ee1e7SLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
120ce3ee1e7SLuigi Rizzo The lock is held throughout the entire forwarding cycle,
121ce3ee1e7SLuigi Rizzo during which the thread may incur in a page fault.
122ce3ee1e7SLuigi Rizzo Hence it is important that sleepable shared locks are used.
123ce3ee1e7SLuigi Rizzo 
124ce3ee1e7SLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
125ce3ee1e7SLuigi Rizzo a number of slot in the ring, then the lock is released,
126ce3ee1e7SLuigi Rizzo packets are copied from source to destination, and then
127ce3ee1e7SLuigi Rizzo the lock is acquired again and the receive ring is updated.
128ce3ee1e7SLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
129ce3ee1e7SLuigi Rizzo ports attached to the switch)
130ce3ee1e7SLuigi Rizzo 
13168b8534bSLuigi Rizzo  */
13268b8534bSLuigi Rizzo 
1334bf50f18SLuigi Rizzo 
1344bf50f18SLuigi Rizzo /* --- internals ----
1354bf50f18SLuigi Rizzo  *
1364bf50f18SLuigi Rizzo  * Roadmap to the code that implements the above.
1374bf50f18SLuigi Rizzo  *
1384bf50f18SLuigi Rizzo  * > 1. a process/thread issues one or more open() on /dev/netmap, to create
1394bf50f18SLuigi Rizzo  * >    select()able file descriptor on which events are reported.
1404bf50f18SLuigi Rizzo  *
1414bf50f18SLuigi Rizzo  *  	Internally, we allocate a netmap_priv_d structure, that will be
14237e3a6d3SLuigi Rizzo  *  	initialized on ioctl(NIOCREGIF). There is one netmap_priv_d
14337e3a6d3SLuigi Rizzo  *  	structure for each open().
1444bf50f18SLuigi Rizzo  *
1454bf50f18SLuigi Rizzo  *      os-specific:
14637e3a6d3SLuigi Rizzo  *  	    FreeBSD: see netmap_open() (netmap_freebsd.c)
14737e3a6d3SLuigi Rizzo  *  	    linux:   see linux_netmap_open() (netmap_linux.c)
1484bf50f18SLuigi Rizzo  *
1494bf50f18SLuigi Rizzo  * > 2. on each descriptor, the process issues an ioctl() to identify
1504bf50f18SLuigi Rizzo  * >    the interface that should report events to the file descriptor.
1514bf50f18SLuigi Rizzo  *
1524bf50f18SLuigi Rizzo  * 	Implemented by netmap_ioctl(), NIOCREGIF case, with nmr->nr_cmd==0.
1534bf50f18SLuigi Rizzo  * 	Most important things happen in netmap_get_na() and
1544bf50f18SLuigi Rizzo  * 	netmap_do_regif(), called from there. Additional details can be
1554bf50f18SLuigi Rizzo  * 	found in the comments above those functions.
1564bf50f18SLuigi Rizzo  *
1574bf50f18SLuigi Rizzo  * 	In all cases, this action creates/takes-a-reference-to a
1584bf50f18SLuigi Rizzo  * 	netmap_*_adapter describing the port, and allocates a netmap_if
1594bf50f18SLuigi Rizzo  * 	and all necessary netmap rings, filling them with netmap buffers.
1604bf50f18SLuigi Rizzo  *
1614bf50f18SLuigi Rizzo  *      In this phase, the sync callbacks for each ring are set (these are used
1624bf50f18SLuigi Rizzo  *      in steps 5 and 6 below).  The callbacks depend on the type of adapter.
1634bf50f18SLuigi Rizzo  *      The adapter creation/initialization code puts them in the
1644bf50f18SLuigi Rizzo  * 	netmap_adapter (fields na->nm_txsync and na->nm_rxsync).  Then, they
1654bf50f18SLuigi Rizzo  * 	are copied from there to the netmap_kring's during netmap_do_regif(), by
1664bf50f18SLuigi Rizzo  * 	the nm_krings_create() callback.  All the nm_krings_create callbacks
1674bf50f18SLuigi Rizzo  * 	actually call netmap_krings_create() to perform this and the other
1684bf50f18SLuigi Rizzo  * 	common stuff. netmap_krings_create() also takes care of the host rings,
1694bf50f18SLuigi Rizzo  * 	if needed, by setting their sync callbacks appropriately.
1704bf50f18SLuigi Rizzo  *
1714bf50f18SLuigi Rizzo  * 	Additional actions depend on the kind of netmap_adapter that has been
1724bf50f18SLuigi Rizzo  * 	registered:
1734bf50f18SLuigi Rizzo  *
1744bf50f18SLuigi Rizzo  * 	- netmap_hw_adapter:  	     [netmap.c]
1754bf50f18SLuigi Rizzo  * 	     This is a system netdev/ifp with native netmap support.
1764bf50f18SLuigi Rizzo  * 	     The ifp is detached from the host stack by redirecting:
1774bf50f18SLuigi Rizzo  * 	       - transmissions (from the network stack) to netmap_transmit()
1784bf50f18SLuigi Rizzo  * 	       - receive notifications to the nm_notify() callback for
1794bf50f18SLuigi Rizzo  * 	         this adapter. The callback is normally netmap_notify(), unless
1804bf50f18SLuigi Rizzo  * 	         the ifp is attached to a bridge using bwrap, in which case it
1814bf50f18SLuigi Rizzo  * 	         is netmap_bwrap_intr_notify().
1824bf50f18SLuigi Rizzo  *
1834bf50f18SLuigi Rizzo  * 	- netmap_generic_adapter:      [netmap_generic.c]
1844bf50f18SLuigi Rizzo  * 	      A system netdev/ifp without native netmap support.
1854bf50f18SLuigi Rizzo  *
1864bf50f18SLuigi Rizzo  * 	(the decision about native/non native support is taken in
1874bf50f18SLuigi Rizzo  * 	 netmap_get_hw_na(), called by netmap_get_na())
1884bf50f18SLuigi Rizzo  *
1894bf50f18SLuigi Rizzo  * 	- netmap_vp_adapter 		[netmap_vale.c]
1904bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_bdg_na().
1914bf50f18SLuigi Rizzo  * 	      This is a persistent or ephemeral VALE port. Ephemeral ports
1924bf50f18SLuigi Rizzo  * 	      are created on the fly if they don't already exist, and are
1934bf50f18SLuigi Rizzo  * 	      always attached to a bridge.
194453130d9SPedro F. Giffuni  * 	      Persistent VALE ports must must be created separately, and i
1954bf50f18SLuigi Rizzo  * 	      then attached like normal NICs. The NIOCREGIF we are examining
1964bf50f18SLuigi Rizzo  * 	      will find them only if they had previosly been created and
1974bf50f18SLuigi Rizzo  * 	      attached (see VALE_CTL below).
1984bf50f18SLuigi Rizzo  *
1994bf50f18SLuigi Rizzo  * 	- netmap_pipe_adapter 	      [netmap_pipe.c]
2004bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_pipe_na().
2014bf50f18SLuigi Rizzo  * 	      Both pipe ends are created, if they didn't already exist.
2024bf50f18SLuigi Rizzo  *
2034bf50f18SLuigi Rizzo  * 	- netmap_monitor_adapter      [netmap_monitor.c]
2044bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_monitor_na().
2054bf50f18SLuigi Rizzo  * 	      If successful, the nm_sync callbacks of the monitored adapter
2064bf50f18SLuigi Rizzo  * 	      will be intercepted by the returned monitor.
2074bf50f18SLuigi Rizzo  *
2084bf50f18SLuigi Rizzo  * 	- netmap_bwrap_adapter	      [netmap_vale.c]
2094bf50f18SLuigi Rizzo  * 	      Cannot be obtained in this way, see VALE_CTL below
2104bf50f18SLuigi Rizzo  *
2114bf50f18SLuigi Rizzo  *
2124bf50f18SLuigi Rizzo  * 	os-specific:
2134bf50f18SLuigi Rizzo  * 	    linux: we first go through linux_netmap_ioctl() to
2144bf50f18SLuigi Rizzo  * 	           adapt the FreeBSD interface to the linux one.
2154bf50f18SLuigi Rizzo  *
2164bf50f18SLuigi Rizzo  *
2174bf50f18SLuigi Rizzo  * > 3. on each descriptor, the process issues an mmap() request to
2184bf50f18SLuigi Rizzo  * >    map the shared memory region within the process' address space.
2194bf50f18SLuigi Rizzo  * >    The list of interesting queues is indicated by a location in
2204bf50f18SLuigi Rizzo  * >    the shared memory region.
2214bf50f18SLuigi Rizzo  *
2224bf50f18SLuigi Rizzo  *      os-specific:
2234bf50f18SLuigi Rizzo  *  	    FreeBSD: netmap_mmap_single (netmap_freebsd.c).
2244bf50f18SLuigi Rizzo  *  	    linux:   linux_netmap_mmap (netmap_linux.c).
2254bf50f18SLuigi Rizzo  *
2264bf50f18SLuigi Rizzo  * > 4. using the functions in the netmap(4) userspace API, a process
2274bf50f18SLuigi Rizzo  * >    can look up the occupation state of a queue, access memory buffers,
2284bf50f18SLuigi Rizzo  * >    and retrieve received packets or enqueue packets to transmit.
2294bf50f18SLuigi Rizzo  *
2304bf50f18SLuigi Rizzo  * 	these actions do not involve the kernel.
2314bf50f18SLuigi Rizzo  *
2324bf50f18SLuigi Rizzo  * > 5. using some ioctl()s the process can synchronize the userspace view
2334bf50f18SLuigi Rizzo  * >    of the queue with the actual status in the kernel. This includes both
2344bf50f18SLuigi Rizzo  * >    receiving the notification of new packets, and transmitting new
2354bf50f18SLuigi Rizzo  * >    packets on the output interface.
2364bf50f18SLuigi Rizzo  *
2374bf50f18SLuigi Rizzo  * 	These are implemented in netmap_ioctl(), NIOCTXSYNC and NIOCRXSYNC
2384bf50f18SLuigi Rizzo  * 	cases. They invoke the nm_sync callbacks on the netmap_kring
2394bf50f18SLuigi Rizzo  * 	structures, as initialized in step 2 and maybe later modified
2404bf50f18SLuigi Rizzo  * 	by a monitor. Monitors, however, will always call the original
2414bf50f18SLuigi Rizzo  * 	callback before doing anything else.
2424bf50f18SLuigi Rizzo  *
2434bf50f18SLuigi Rizzo  *
2444bf50f18SLuigi Rizzo  * > 6. select() or poll() can be used to wait for events on individual
2454bf50f18SLuigi Rizzo  * >    transmit or receive queues (or all queues for a given interface).
2464bf50f18SLuigi Rizzo  *
2474bf50f18SLuigi Rizzo  * 	Implemented in netmap_poll(). This will call the same nm_sync()
2484bf50f18SLuigi Rizzo  * 	callbacks as in step 5 above.
2494bf50f18SLuigi Rizzo  *
2504bf50f18SLuigi Rizzo  * 	os-specific:
2514bf50f18SLuigi Rizzo  * 		linux: we first go through linux_netmap_poll() to adapt
2524bf50f18SLuigi Rizzo  * 		       the FreeBSD interface to the linux one.
2534bf50f18SLuigi Rizzo  *
2544bf50f18SLuigi Rizzo  *
2554bf50f18SLuigi Rizzo  *  ----  VALE_CTL -----
2564bf50f18SLuigi Rizzo  *
2574bf50f18SLuigi Rizzo  *  VALE switches are controlled by issuing a NIOCREGIF with a non-null
2584bf50f18SLuigi Rizzo  *  nr_cmd in the nmreq structure. These subcommands are handled by
2594bf50f18SLuigi Rizzo  *  netmap_bdg_ctl() in netmap_vale.c. Persistent VALE ports are created
2604bf50f18SLuigi Rizzo  *  and destroyed by issuing the NETMAP_BDG_NEWIF and NETMAP_BDG_DELIF
2614bf50f18SLuigi Rizzo  *  subcommands, respectively.
2624bf50f18SLuigi Rizzo  *
2634bf50f18SLuigi Rizzo  *  Any network interface known to the system (including a persistent VALE
2644bf50f18SLuigi Rizzo  *  port) can be attached to a VALE switch by issuing the
2652ff91c17SVincenzo Maffione  *  NETMAP_REQ_VALE_ATTACH command. After the attachment, persistent VALE ports
2664bf50f18SLuigi Rizzo  *  look exactly like ephemeral VALE ports (as created in step 2 above).  The
2674bf50f18SLuigi Rizzo  *  attachment of other interfaces, instead, requires the creation of a
2684bf50f18SLuigi Rizzo  *  netmap_bwrap_adapter.  Moreover, the attached interface must be put in
2694bf50f18SLuigi Rizzo  *  netmap mode. This may require the creation of a netmap_generic_adapter if
2704bf50f18SLuigi Rizzo  *  we have no native support for the interface, or if generic adapters have
2714bf50f18SLuigi Rizzo  *  been forced by sysctl.
2724bf50f18SLuigi Rizzo  *
2734bf50f18SLuigi Rizzo  *  Both persistent VALE ports and bwraps are handled by netmap_get_bdg_na(),
2744bf50f18SLuigi Rizzo  *  called by nm_bdg_ctl_attach(), and discriminated by the nm_bdg_attach()
2754bf50f18SLuigi Rizzo  *  callback.  In the case of the bwrap, the callback creates the
2764bf50f18SLuigi Rizzo  *  netmap_bwrap_adapter.  The initialization of the bwrap is then
2774bf50f18SLuigi Rizzo  *  completed by calling netmap_do_regif() on it, in the nm_bdg_ctl()
2784bf50f18SLuigi Rizzo  *  callback (netmap_bwrap_bdg_ctl in netmap_vale.c).
2794bf50f18SLuigi Rizzo  *  A generic adapter for the wrapped ifp will be created if needed, when
2804bf50f18SLuigi Rizzo  *  netmap_get_bdg_na() calls netmap_get_hw_na().
2814bf50f18SLuigi Rizzo  *
2824bf50f18SLuigi Rizzo  *
2834bf50f18SLuigi Rizzo  *  ---- DATAPATHS -----
2844bf50f18SLuigi Rizzo  *
2854bf50f18SLuigi Rizzo  *              -= SYSTEM DEVICE WITH NATIVE SUPPORT =-
2864bf50f18SLuigi Rizzo  *
2874bf50f18SLuigi Rizzo  *    na == NA(ifp) == netmap_hw_adapter created in DEVICE_netmap_attach()
2884bf50f18SLuigi Rizzo  *
2894bf50f18SLuigi Rizzo  *    - tx from netmap userspace:
2904bf50f18SLuigi Rizzo  *	 concurrently:
2914bf50f18SLuigi Rizzo  *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
2924bf50f18SLuigi Rizzo  *                kring->nm_sync() == DEVICE_netmap_txsync()
2934bf50f18SLuigi Rizzo  *           2) device interrupt handler
2944bf50f18SLuigi Rizzo  *                na->nm_notify()  == netmap_notify()
2954bf50f18SLuigi Rizzo  *    - rx from netmap userspace:
2964bf50f18SLuigi Rizzo  *       concurrently:
2974bf50f18SLuigi Rizzo  *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
2984bf50f18SLuigi Rizzo  *                kring->nm_sync() == DEVICE_netmap_rxsync()
2994bf50f18SLuigi Rizzo  *           2) device interrupt handler
3004bf50f18SLuigi Rizzo  *                na->nm_notify()  == netmap_notify()
301847bf383SLuigi Rizzo  *    - rx from host stack
3024bf50f18SLuigi Rizzo  *       concurrently:
3034bf50f18SLuigi Rizzo  *           1) host stack
3044bf50f18SLuigi Rizzo  *                netmap_transmit()
3054bf50f18SLuigi Rizzo  *                  na->nm_notify  == netmap_notify()
3064bf50f18SLuigi Rizzo  *           2) ioctl(NIOCRXSYNC)/netmap_poll() in process context
30737e3a6d3SLuigi Rizzo  *                kring->nm_sync() == netmap_rxsync_from_host
3084bf50f18SLuigi Rizzo  *                  netmap_rxsync_from_host(na, NULL, NULL)
3094bf50f18SLuigi Rizzo  *    - tx to host stack
3104bf50f18SLuigi Rizzo  *           ioctl(NIOCTXSYNC)/netmap_poll() in process context
31137e3a6d3SLuigi Rizzo  *             kring->nm_sync() == netmap_txsync_to_host
3124bf50f18SLuigi Rizzo  *               netmap_txsync_to_host(na)
31337e3a6d3SLuigi Rizzo  *                 nm_os_send_up()
31437e3a6d3SLuigi Rizzo  *                   FreeBSD: na->if_input() == ether_input()
3154bf50f18SLuigi Rizzo  *                   linux: netif_rx() with NM_MAGIC_PRIORITY_RX
3164bf50f18SLuigi Rizzo  *
3174bf50f18SLuigi Rizzo  *
3184bf50f18SLuigi Rizzo  *               -= SYSTEM DEVICE WITH GENERIC SUPPORT =-
3194bf50f18SLuigi Rizzo  *
320847bf383SLuigi Rizzo  *    na == NA(ifp) == generic_netmap_adapter created in generic_netmap_attach()
321847bf383SLuigi Rizzo  *
322847bf383SLuigi Rizzo  *    - tx from netmap userspace:
323847bf383SLuigi Rizzo  *       concurrently:
324847bf383SLuigi Rizzo  *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
325847bf383SLuigi Rizzo  *               kring->nm_sync() == generic_netmap_txsync()
32637e3a6d3SLuigi Rizzo  *                   nm_os_generic_xmit_frame()
327847bf383SLuigi Rizzo  *                       linux:   dev_queue_xmit() with NM_MAGIC_PRIORITY_TX
32837e3a6d3SLuigi Rizzo  *                           ifp->ndo_start_xmit == generic_ndo_start_xmit()
32937e3a6d3SLuigi Rizzo  *                               gna->save_start_xmit == orig. dev. start_xmit
330847bf383SLuigi Rizzo  *                       FreeBSD: na->if_transmit() == orig. dev if_transmit
331847bf383SLuigi Rizzo  *           2) generic_mbuf_destructor()
332847bf383SLuigi Rizzo  *                   na->nm_notify() == netmap_notify()
333847bf383SLuigi Rizzo  *    - rx from netmap userspace:
334847bf383SLuigi Rizzo  *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
335847bf383SLuigi Rizzo  *               kring->nm_sync() == generic_netmap_rxsync()
336847bf383SLuigi Rizzo  *                   mbq_safe_dequeue()
337847bf383SLuigi Rizzo  *           2) device driver
338847bf383SLuigi Rizzo  *               generic_rx_handler()
339847bf383SLuigi Rizzo  *                   mbq_safe_enqueue()
340847bf383SLuigi Rizzo  *                   na->nm_notify() == netmap_notify()
34137e3a6d3SLuigi Rizzo  *    - rx from host stack
34237e3a6d3SLuigi Rizzo  *        FreeBSD: same as native
34337e3a6d3SLuigi Rizzo  *        Linux: same as native except:
344847bf383SLuigi Rizzo  *           1) host stack
34537e3a6d3SLuigi Rizzo  *               dev_queue_xmit() without NM_MAGIC_PRIORITY_TX
34637e3a6d3SLuigi Rizzo  *                   ifp->ndo_start_xmit == generic_ndo_start_xmit()
347847bf383SLuigi Rizzo  *                       netmap_transmit()
348847bf383SLuigi Rizzo  *                           na->nm_notify() == netmap_notify()
34937e3a6d3SLuigi Rizzo  *    - tx to host stack (same as native):
3504bf50f18SLuigi Rizzo  *
3514bf50f18SLuigi Rizzo  *
352847bf383SLuigi Rizzo  *                           -= VALE =-
3534bf50f18SLuigi Rizzo  *
354847bf383SLuigi Rizzo  *   INCOMING:
3554bf50f18SLuigi Rizzo  *
356847bf383SLuigi Rizzo  *      - VALE ports:
357847bf383SLuigi Rizzo  *          ioctl(NIOCTXSYNC)/netmap_poll() in process context
358847bf383SLuigi Rizzo  *              kring->nm_sync() == netmap_vp_txsync()
3594bf50f18SLuigi Rizzo  *
360847bf383SLuigi Rizzo  *      - system device with native support:
361847bf383SLuigi Rizzo  *         from cable:
362847bf383SLuigi Rizzo  *             interrupt
363847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
364847bf383SLuigi Rizzo  *                     kring->nm_sync() == DEVICE_netmap_rxsync()
365847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
366847bf383SLuigi Rizzo  *                     kring->nm_sync() == DEVICE_netmap_rxsync()
367847bf383SLuigi Rizzo  *         from host stack:
368847bf383SLuigi Rizzo  *             netmap_transmit()
369847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
37037e3a6d3SLuigi Rizzo  *                     kring->nm_sync() == netmap_rxsync_from_host()
371847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
3724bf50f18SLuigi Rizzo  *
373847bf383SLuigi Rizzo  *      - system device with generic support:
374847bf383SLuigi Rizzo  *         from device driver:
375847bf383SLuigi Rizzo  *            generic_rx_handler()
376847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
377847bf383SLuigi Rizzo  *                     kring->nm_sync() == generic_netmap_rxsync()
378847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
379847bf383SLuigi Rizzo  *                     kring->nm_sync() == generic_netmap_rxsync()
380847bf383SLuigi Rizzo  *         from host stack:
381847bf383SLuigi Rizzo  *            netmap_transmit()
382847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
38337e3a6d3SLuigi Rizzo  *                     kring->nm_sync() == netmap_rxsync_from_host()
384847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
3854bf50f18SLuigi Rizzo  *
386847bf383SLuigi Rizzo  *   (all cases) --> nm_bdg_flush()
387847bf383SLuigi Rizzo  *                      dest_na->nm_notify() == (see below)
3884bf50f18SLuigi Rizzo  *
389847bf383SLuigi Rizzo  *   OUTGOING:
3904bf50f18SLuigi Rizzo  *
391847bf383SLuigi Rizzo  *      - VALE ports:
392847bf383SLuigi Rizzo  *         concurrently:
393c3e9b4dbSLuiz Otavio O Souza  *             1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
394847bf383SLuigi Rizzo  *                    kring->nm_sync() == netmap_vp_rxsync()
395847bf383SLuigi Rizzo  *             2) from nm_bdg_flush()
396847bf383SLuigi Rizzo  *                    na->nm_notify() == netmap_notify()
3974bf50f18SLuigi Rizzo  *
398847bf383SLuigi Rizzo  *      - system device with native support:
399847bf383SLuigi Rizzo  *          to cable:
400847bf383SLuigi Rizzo  *             na->nm_notify() == netmap_bwrap_notify()
401847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
402847bf383SLuigi Rizzo  *                 kring->nm_sync() == DEVICE_netmap_txsync()
403847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
404847bf383SLuigi Rizzo  *          to host stack:
405847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
40637e3a6d3SLuigi Rizzo  *                 kring->nm_sync() == netmap_txsync_to_host
407847bf383SLuigi Rizzo  *                 netmap_vp_rxsync_locked()
4084bf50f18SLuigi Rizzo  *
409847bf383SLuigi Rizzo  *      - system device with generic adapter:
410847bf383SLuigi Rizzo  *          to device driver:
411847bf383SLuigi Rizzo  *             na->nm_notify() == netmap_bwrap_notify()
412847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
413847bf383SLuigi Rizzo  *                 kring->nm_sync() == generic_netmap_txsync()
414847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
415847bf383SLuigi Rizzo  *          to host stack:
416847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
41737e3a6d3SLuigi Rizzo  *                 kring->nm_sync() == netmap_txsync_to_host
418847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
4194bf50f18SLuigi Rizzo  *
4204bf50f18SLuigi Rizzo  */
4214bf50f18SLuigi Rizzo 
422ce3ee1e7SLuigi Rizzo /*
423ce3ee1e7SLuigi Rizzo  * OS-specific code that is used only within this file.
424ce3ee1e7SLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
425ce3ee1e7SLuigi Rizzo  * is present in netmap_kern.h
426ce3ee1e7SLuigi Rizzo  */
42701c7d25fSLuigi Rizzo 
428ce3ee1e7SLuigi Rizzo #if defined(__FreeBSD__)
42968b8534bSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
43068b8534bSLuigi Rizzo #include <sys/types.h>
43168b8534bSLuigi Rizzo #include <sys/errno.h>
43268b8534bSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
43368b8534bSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
434f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
43589e3fd52SLuigi Rizzo #include <sys/filio.h>	/* FIONBIO */
43668b8534bSLuigi Rizzo #include <sys/sockio.h>
43768b8534bSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
43868b8534bSLuigi Rizzo #include <sys/malloc.h>
43968b8534bSLuigi Rizzo #include <sys/poll.h>
440a4470078SGleb Smirnoff #include <sys/proc.h>
44189f6b863SAttilio Rao #include <sys/rwlock.h>
44268b8534bSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
44368b8534bSLuigi Rizzo #include <sys/selinfo.h>
44468b8534bSLuigi Rizzo #include <sys/sysctl.h>
445339f59c0SGleb Smirnoff #include <sys/jail.h>
446a4470078SGleb Smirnoff #include <sys/epoch.h>
447339f59c0SGleb Smirnoff #include <net/vnet.h>
44868b8534bSLuigi Rizzo #include <net/if.h>
44976039bc8SGleb Smirnoff #include <net/if_var.h>
45068b8534bSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
45168b8534bSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
452ce3ee1e7SLuigi Rizzo #include <sys/endian.h>
453ce3ee1e7SLuigi Rizzo #include <sys/refcount.h>
45489a9a5b5SVincenzo Maffione #include <net/ethernet.h>	/* ETHER_BPF_MTAP */
45568b8534bSLuigi Rizzo 
45668b8534bSLuigi Rizzo 
457ce3ee1e7SLuigi Rizzo #elif defined(linux)
458ce3ee1e7SLuigi Rizzo 
459ce3ee1e7SLuigi Rizzo #include "bsd_glue.h"
460ce3ee1e7SLuigi Rizzo 
461ce3ee1e7SLuigi Rizzo #elif defined(__APPLE__)
462ce3ee1e7SLuigi Rizzo 
463ce3ee1e7SLuigi Rizzo #warning OSX support is only partial
464ce3ee1e7SLuigi Rizzo #include "osx_glue.h"
465ce3ee1e7SLuigi Rizzo 
46637e3a6d3SLuigi Rizzo #elif defined (_WIN32)
46737e3a6d3SLuigi Rizzo 
46837e3a6d3SLuigi Rizzo #include "win_glue.h"
46937e3a6d3SLuigi Rizzo 
470ce3ee1e7SLuigi Rizzo #else
471ce3ee1e7SLuigi Rizzo 
472ce3ee1e7SLuigi Rizzo #error	Unsupported platform
473ce3ee1e7SLuigi Rizzo 
474ce3ee1e7SLuigi Rizzo #endif /* unsupported */
475ce3ee1e7SLuigi Rizzo 
476ce3ee1e7SLuigi Rizzo /*
477ce3ee1e7SLuigi Rizzo  * common headers
478ce3ee1e7SLuigi Rizzo  */
4790b8ed8e0SLuigi Rizzo #include <net/netmap.h>
4800b8ed8e0SLuigi Rizzo #include <dev/netmap/netmap_kern.h>
481ce3ee1e7SLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
4820b8ed8e0SLuigi Rizzo 
483ce3ee1e7SLuigi Rizzo 
4845819da83SLuigi Rizzo /* user-controlled variables */
4855819da83SLuigi Rizzo int netmap_verbose;
486b6e66be2SVincenzo Maffione #ifdef CONFIG_NETMAP_DEBUG
487b6e66be2SVincenzo Maffione int netmap_debug;
488b6e66be2SVincenzo Maffione #endif /* CONFIG_NETMAP_DEBUG */
4895819da83SLuigi Rizzo 
4905819da83SLuigi Rizzo static int netmap_no_timestamp; /* don't timestamp on rxsync */
491c85cb1a0SLuigi Rizzo int netmap_no_pendintr = 1;
492f18be576SLuigi Rizzo int netmap_txsync_retry = 2;
493c3e9b4dbSLuiz Otavio O Souza static int netmap_fwd = 0;	/* force transparent forwarding */
494f196ce38SLuigi Rizzo 
495f9790aebSLuigi Rizzo /*
496f9790aebSLuigi Rizzo  * netmap_admode selects the netmap mode to use.
497f9790aebSLuigi Rizzo  * Invalid values are reset to NETMAP_ADMODE_BEST
498f9790aebSLuigi Rizzo  */
499f9790aebSLuigi Rizzo enum {	NETMAP_ADMODE_BEST = 0,	/* use native, fallback to generic */
500f9790aebSLuigi Rizzo 	NETMAP_ADMODE_NATIVE,	/* either native or none */
501f9790aebSLuigi Rizzo 	NETMAP_ADMODE_GENERIC,	/* force generic */
502f9790aebSLuigi Rizzo 	NETMAP_ADMODE_LAST };
503f9790aebSLuigi Rizzo static int netmap_admode = NETMAP_ADMODE_BEST;
504f9790aebSLuigi Rizzo 
50537e3a6d3SLuigi Rizzo /* netmap_generic_mit controls mitigation of RX notifications for
50637e3a6d3SLuigi Rizzo  * the generic netmap adapter. The value is a time interval in
50737e3a6d3SLuigi Rizzo  * nanoseconds. */
50837e3a6d3SLuigi Rizzo int netmap_generic_mit = 100*1000;
50937e3a6d3SLuigi Rizzo 
51037e3a6d3SLuigi Rizzo /* We use by default netmap-aware qdiscs with generic netmap adapters,
51137e3a6d3SLuigi Rizzo  * even if there can be a little performance hit with hardware NICs.
51237e3a6d3SLuigi Rizzo  * However, using the qdisc is the safer approach, for two reasons:
51337e3a6d3SLuigi Rizzo  * 1) it prevents non-fifo qdiscs to break the TX notification
51437e3a6d3SLuigi Rizzo  *    scheme, which is based on mbuf destructors when txqdisc is
51537e3a6d3SLuigi Rizzo  *    not used.
51637e3a6d3SLuigi Rizzo  * 2) it makes it possible to transmit over software devices that
51737e3a6d3SLuigi Rizzo  *    change skb->dev, like bridge, veth, ...
51837e3a6d3SLuigi Rizzo  *
51937e3a6d3SLuigi Rizzo  * Anyway users looking for the best performance should
52037e3a6d3SLuigi Rizzo  * use native adapters.
52137e3a6d3SLuigi Rizzo  */
5224f80b14cSVincenzo Maffione #ifdef linux
52337e3a6d3SLuigi Rizzo int netmap_generic_txqdisc = 1;
5244f80b14cSVincenzo Maffione #endif
52537e3a6d3SLuigi Rizzo 
52637e3a6d3SLuigi Rizzo /* Default number of slots and queues for generic adapters. */
52737e3a6d3SLuigi Rizzo int netmap_generic_ringsize = 1024;
52837e3a6d3SLuigi Rizzo int netmap_generic_rings = 1;
52937e3a6d3SLuigi Rizzo 
5302a7db7a6SVincenzo Maffione /* Non-zero to enable checksum offloading in NIC drivers */
5312a7db7a6SVincenzo Maffione int netmap_generic_hwcsum = 0;
5322a7db7a6SVincenzo Maffione 
53337e3a6d3SLuigi Rizzo /* Non-zero if ptnet devices are allowed to use virtio-net headers. */
53437e3a6d3SLuigi Rizzo int ptnet_vnet_hdr = 1;
53537e3a6d3SLuigi Rizzo 
53637e3a6d3SLuigi Rizzo /*
53737e3a6d3SLuigi Rizzo  * SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated
53837e3a6d3SLuigi Rizzo  * in some other operating systems
53937e3a6d3SLuigi Rizzo  */
54037e3a6d3SLuigi Rizzo SYSBEGIN(main_init);
54137e3a6d3SLuigi Rizzo 
54237e3a6d3SLuigi Rizzo SYSCTL_DECL(_dev_netmap);
543*7029da5cSPawel Biernacki SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
544*7029da5cSPawel Biernacki     "Netmap args");
54537e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
54637e3a6d3SLuigi Rizzo 		CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
547b6e66be2SVincenzo Maffione #ifdef CONFIG_NETMAP_DEBUG
548b6e66be2SVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, debug,
549b6e66be2SVincenzo Maffione 		CTLFLAG_RW, &netmap_debug, 0, "Debug messages");
550b6e66be2SVincenzo Maffione #endif /* CONFIG_NETMAP_DEBUG */
55137e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
55237e3a6d3SLuigi Rizzo 		CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
5534f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, CTLFLAG_RW, &netmap_no_pendintr,
5544f80b14cSVincenzo Maffione 		0, "Always look for new received packets.");
55537e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
55637e3a6d3SLuigi Rizzo 		&netmap_txsync_retry, 0, "Number of txsync loops in bridge's flush.");
557f9790aebSLuigi Rizzo 
5584f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0,
5594f80b14cSVincenzo Maffione 		"Force NR_FORWARD mode");
5604f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0,
5614f80b14cSVincenzo Maffione 		"Adapter mode. 0 selects the best option available,"
5624f80b14cSVincenzo Maffione 		"1 forces native adapter, 2 forces emulated adapter");
5632a7db7a6SVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_hwcsum, CTLFLAG_RW, &netmap_generic_hwcsum,
5642a7db7a6SVincenzo Maffione 		0, "Hardware checksums. 0 to disable checksum generation by the NIC (default),"
5652a7db7a6SVincenzo Maffione 		"1 to enable checksum generation by the NIC");
5664f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit,
5674f80b14cSVincenzo Maffione 		0, "RX notification interval in nanoseconds");
5684f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW,
5694f80b14cSVincenzo Maffione 		&netmap_generic_ringsize, 0,
5704f80b14cSVincenzo Maffione 		"Number of per-ring slots for emulated netmap mode");
5714f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW,
5724f80b14cSVincenzo Maffione 		&netmap_generic_rings, 0,
5734f80b14cSVincenzo Maffione 		"Number of TX/RX queues for emulated netmap adapters");
5744f80b14cSVincenzo Maffione #ifdef linux
5754f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW,
5764f80b14cSVincenzo Maffione 		&netmap_generic_txqdisc, 0, "Use qdisc for generic adapters");
5774f80b14cSVincenzo Maffione #endif
5784f80b14cSVincenzo Maffione SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr,
5794f80b14cSVincenzo Maffione 		0, "Allow ptnet devices to use virtio-net headers");
58037e3a6d3SLuigi Rizzo 
58137e3a6d3SLuigi Rizzo SYSEND;
582f196ce38SLuigi Rizzo 
583ce3ee1e7SLuigi Rizzo NMG_LOCK_T	netmap_global_lock;
584ce3ee1e7SLuigi Rizzo 
58517885a7bSLuigi Rizzo /*
58617885a7bSLuigi Rizzo  * mark the ring as stopped, and run through the locks
58717885a7bSLuigi Rizzo  * to make sure other users get to see it.
58837e3a6d3SLuigi Rizzo  * stopped must be either NR_KR_STOPPED (for unbounded stop)
58937e3a6d3SLuigi Rizzo  * of NR_KR_LOCKED (brief stop for mutual exclusion purposes)
59017885a7bSLuigi Rizzo  */
5914bf50f18SLuigi Rizzo static void
59237e3a6d3SLuigi Rizzo netmap_disable_ring(struct netmap_kring *kr, int stopped)
593ce3ee1e7SLuigi Rizzo {
59437e3a6d3SLuigi Rizzo 	nm_kr_stop(kr, stopped);
59537e3a6d3SLuigi Rizzo 	// XXX check if nm_kr_stop is sufficient
596ce3ee1e7SLuigi Rizzo 	mtx_lock(&kr->q_lock);
597ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kr->q_lock);
598ce3ee1e7SLuigi Rizzo 	nm_kr_put(kr);
599ce3ee1e7SLuigi Rizzo }
600ce3ee1e7SLuigi Rizzo 
601847bf383SLuigi Rizzo /* stop or enable a single ring */
6024bf50f18SLuigi Rizzo void
603847bf383SLuigi Rizzo netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped)
6044bf50f18SLuigi Rizzo {
6054bf50f18SLuigi Rizzo 	if (stopped)
6062ff91c17SVincenzo Maffione 		netmap_disable_ring(NMR(na, t)[ring_id], stopped);
6074bf50f18SLuigi Rizzo 	else
6082ff91c17SVincenzo Maffione 		NMR(na, t)[ring_id]->nkr_stopped = 0;
6094bf50f18SLuigi Rizzo }
6104bf50f18SLuigi Rizzo 
611f9790aebSLuigi Rizzo 
61289cc2556SLuigi Rizzo /* stop or enable all the rings of na */
6134bf50f18SLuigi Rizzo void
6144bf50f18SLuigi Rizzo netmap_set_all_rings(struct netmap_adapter *na, int stopped)
615ce3ee1e7SLuigi Rizzo {
616ce3ee1e7SLuigi Rizzo 	int i;
617847bf383SLuigi Rizzo 	enum txrx t;
618ce3ee1e7SLuigi Rizzo 
6194bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
620ce3ee1e7SLuigi Rizzo 		return;
621ce3ee1e7SLuigi Rizzo 
622847bf383SLuigi Rizzo 	for_rx_tx(t) {
623847bf383SLuigi Rizzo 		for (i = 0; i < netmap_real_rings(na, t); i++) {
624847bf383SLuigi Rizzo 			netmap_set_ring(na, i, t, stopped);
625ce3ee1e7SLuigi Rizzo 		}
626ce3ee1e7SLuigi Rizzo 	}
627ce3ee1e7SLuigi Rizzo }
628ce3ee1e7SLuigi Rizzo 
62989cc2556SLuigi Rizzo /*
63089cc2556SLuigi Rizzo  * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
63189cc2556SLuigi Rizzo  * to finish and prevents any new one from starting.  Call this before turning
632ddb13598SKevin Lo  * netmap mode off, or before removing the hardware rings (e.g., on module
63337e3a6d3SLuigi Rizzo  * onload).
63489cc2556SLuigi Rizzo  */
635f9790aebSLuigi Rizzo void
636f9790aebSLuigi Rizzo netmap_disable_all_rings(struct ifnet *ifp)
637f9790aebSLuigi Rizzo {
63837e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
63937e3a6d3SLuigi Rizzo 		netmap_set_all_rings(NA(ifp), NM_KR_STOPPED);
64037e3a6d3SLuigi Rizzo 	}
641f9790aebSLuigi Rizzo }
642f9790aebSLuigi Rizzo 
64389cc2556SLuigi Rizzo /*
64489cc2556SLuigi Rizzo  * Convenience function used in drivers.  Re-enables rxsync and txsync on the
64589cc2556SLuigi Rizzo  * adapter's rings In linux drivers, this should be placed near each
64689cc2556SLuigi Rizzo  * napi_enable().
64789cc2556SLuigi Rizzo  */
648f9790aebSLuigi Rizzo void
649f9790aebSLuigi Rizzo netmap_enable_all_rings(struct ifnet *ifp)
650f9790aebSLuigi Rizzo {
65137e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
6524bf50f18SLuigi Rizzo 		netmap_set_all_rings(NA(ifp), 0 /* enabled */);
653f9790aebSLuigi Rizzo 	}
65437e3a6d3SLuigi Rizzo }
655f9790aebSLuigi Rizzo 
65637e3a6d3SLuigi Rizzo void
65737e3a6d3SLuigi Rizzo netmap_make_zombie(struct ifnet *ifp)
65837e3a6d3SLuigi Rizzo {
65937e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
66037e3a6d3SLuigi Rizzo 		struct netmap_adapter *na = NA(ifp);
66137e3a6d3SLuigi Rizzo 		netmap_set_all_rings(na, NM_KR_LOCKED);
66237e3a6d3SLuigi Rizzo 		na->na_flags |= NAF_ZOMBIE;
66337e3a6d3SLuigi Rizzo 		netmap_set_all_rings(na, 0);
66437e3a6d3SLuigi Rizzo 	}
66537e3a6d3SLuigi Rizzo }
66637e3a6d3SLuigi Rizzo 
66737e3a6d3SLuigi Rizzo void
66837e3a6d3SLuigi Rizzo netmap_undo_zombie(struct ifnet *ifp)
66937e3a6d3SLuigi Rizzo {
67037e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
67137e3a6d3SLuigi Rizzo 		struct netmap_adapter *na = NA(ifp);
67237e3a6d3SLuigi Rizzo 		if (na->na_flags & NAF_ZOMBIE) {
67337e3a6d3SLuigi Rizzo 			netmap_set_all_rings(na, NM_KR_LOCKED);
67437e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_ZOMBIE;
67537e3a6d3SLuigi Rizzo 			netmap_set_all_rings(na, 0);
67637e3a6d3SLuigi Rizzo 		}
67737e3a6d3SLuigi Rizzo 	}
67837e3a6d3SLuigi Rizzo }
679f9790aebSLuigi Rizzo 
680ce3ee1e7SLuigi Rizzo /*
681ce3ee1e7SLuigi Rizzo  * generic bound_checking function
682ce3ee1e7SLuigi Rizzo  */
683ce3ee1e7SLuigi Rizzo u_int
684ce3ee1e7SLuigi Rizzo nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
685ce3ee1e7SLuigi Rizzo {
686ce3ee1e7SLuigi Rizzo 	u_int oldv = *v;
687ce3ee1e7SLuigi Rizzo 	const char *op = NULL;
688ce3ee1e7SLuigi Rizzo 
689ce3ee1e7SLuigi Rizzo 	if (dflt < lo)
690ce3ee1e7SLuigi Rizzo 		dflt = lo;
691ce3ee1e7SLuigi Rizzo 	if (dflt > hi)
692ce3ee1e7SLuigi Rizzo 		dflt = hi;
693ce3ee1e7SLuigi Rizzo 	if (oldv < lo) {
694ce3ee1e7SLuigi Rizzo 		*v = dflt;
695ce3ee1e7SLuigi Rizzo 		op = "Bump";
696ce3ee1e7SLuigi Rizzo 	} else if (oldv > hi) {
697ce3ee1e7SLuigi Rizzo 		*v = hi;
698ce3ee1e7SLuigi Rizzo 		op = "Clamp";
699ce3ee1e7SLuigi Rizzo 	}
700ce3ee1e7SLuigi Rizzo 	if (op && msg)
701b6e66be2SVincenzo Maffione 		nm_prinf("%s %s to %d (was %d)", op, msg, *v, oldv);
702ce3ee1e7SLuigi Rizzo 	return *v;
703ce3ee1e7SLuigi Rizzo }
704ce3ee1e7SLuigi Rizzo 
705f9790aebSLuigi Rizzo 
706ce3ee1e7SLuigi Rizzo /*
707ce3ee1e7SLuigi Rizzo  * packet-dump function, user-supplied or static buffer.
708ce3ee1e7SLuigi Rizzo  * The destination buffer must be at least 30+4*len
709ce3ee1e7SLuigi Rizzo  */
710ce3ee1e7SLuigi Rizzo const char *
711ce3ee1e7SLuigi Rizzo nm_dump_buf(char *p, int len, int lim, char *dst)
712ce3ee1e7SLuigi Rizzo {
713ce3ee1e7SLuigi Rizzo 	static char _dst[8192];
714ce3ee1e7SLuigi Rizzo 	int i, j, i0;
715ce3ee1e7SLuigi Rizzo 	static char hex[] ="0123456789abcdef";
716ce3ee1e7SLuigi Rizzo 	char *o;	/* output position */
717ce3ee1e7SLuigi Rizzo 
718ce3ee1e7SLuigi Rizzo #define P_HI(x)	hex[((x) & 0xf0)>>4]
719ce3ee1e7SLuigi Rizzo #define P_LO(x)	hex[((x) & 0xf)]
720ce3ee1e7SLuigi Rizzo #define P_C(x)	((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
721ce3ee1e7SLuigi Rizzo 	if (!dst)
722ce3ee1e7SLuigi Rizzo 		dst = _dst;
723ce3ee1e7SLuigi Rizzo 	if (lim <= 0 || lim > len)
724ce3ee1e7SLuigi Rizzo 		lim = len;
725ce3ee1e7SLuigi Rizzo 	o = dst;
726ce3ee1e7SLuigi Rizzo 	sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
727ce3ee1e7SLuigi Rizzo 	o += strlen(o);
728ce3ee1e7SLuigi Rizzo 	/* hexdump routine */
729ce3ee1e7SLuigi Rizzo 	for (i = 0; i < lim; ) {
730ce3ee1e7SLuigi Rizzo 		sprintf(o, "%5d: ", i);
731ce3ee1e7SLuigi Rizzo 		o += strlen(o);
732ce3ee1e7SLuigi Rizzo 		memset(o, ' ', 48);
733ce3ee1e7SLuigi Rizzo 		i0 = i;
734ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++) {
735ce3ee1e7SLuigi Rizzo 			o[j*3] = P_HI(p[i]);
736ce3ee1e7SLuigi Rizzo 			o[j*3+1] = P_LO(p[i]);
737ce3ee1e7SLuigi Rizzo 		}
738ce3ee1e7SLuigi Rizzo 		i = i0;
739ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++)
740ce3ee1e7SLuigi Rizzo 			o[j + 48] = P_C(p[i]);
741ce3ee1e7SLuigi Rizzo 		o[j+48] = '\n';
742ce3ee1e7SLuigi Rizzo 		o += j+49;
743ce3ee1e7SLuigi Rizzo 	}
744ce3ee1e7SLuigi Rizzo 	*o = '\0';
745ce3ee1e7SLuigi Rizzo #undef P_HI
746ce3ee1e7SLuigi Rizzo #undef P_LO
747ce3ee1e7SLuigi Rizzo #undef P_C
748ce3ee1e7SLuigi Rizzo 	return dst;
749ce3ee1e7SLuigi Rizzo }
750f196ce38SLuigi Rizzo 
751f18be576SLuigi Rizzo 
752ae10d1afSLuigi Rizzo /*
753ae10d1afSLuigi Rizzo  * Fetch configuration from the device, to cope with dynamic
754ae10d1afSLuigi Rizzo  * reconfigurations after loading the module.
755ae10d1afSLuigi Rizzo  */
75689cc2556SLuigi Rizzo /* call with NMG_LOCK held */
757f9790aebSLuigi Rizzo int
758ae10d1afSLuigi Rizzo netmap_update_config(struct netmap_adapter *na)
759ae10d1afSLuigi Rizzo {
7602ff91c17SVincenzo Maffione 	struct nm_config_info info;
761ae10d1afSLuigi Rizzo 
7622ff91c17SVincenzo Maffione 	bzero(&info, sizeof(info));
7636641c68bSLuigi Rizzo 	if (na->nm_config == NULL ||
7642ff91c17SVincenzo Maffione 	    na->nm_config(na, &info)) {
765ae10d1afSLuigi Rizzo 		/* take whatever we had at init time */
7662ff91c17SVincenzo Maffione 		info.num_tx_rings = na->num_tx_rings;
7672ff91c17SVincenzo Maffione 		info.num_tx_descs = na->num_tx_desc;
7682ff91c17SVincenzo Maffione 		info.num_rx_rings = na->num_rx_rings;
7692ff91c17SVincenzo Maffione 		info.num_rx_descs = na->num_rx_desc;
7702ff91c17SVincenzo Maffione 		info.rx_buf_maxsize = na->rx_buf_maxsize;
771ae10d1afSLuigi Rizzo 	}
772ae10d1afSLuigi Rizzo 
7732ff91c17SVincenzo Maffione 	if (na->num_tx_rings == info.num_tx_rings &&
7742ff91c17SVincenzo Maffione 	    na->num_tx_desc == info.num_tx_descs &&
7752ff91c17SVincenzo Maffione 	    na->num_rx_rings == info.num_rx_rings &&
7762ff91c17SVincenzo Maffione 	    na->num_rx_desc == info.num_rx_descs &&
7772ff91c17SVincenzo Maffione 	    na->rx_buf_maxsize == info.rx_buf_maxsize)
778ae10d1afSLuigi Rizzo 		return 0; /* nothing changed */
779f9790aebSLuigi Rizzo 	if (na->active_fds == 0) {
7802ff91c17SVincenzo Maffione 		na->num_tx_rings = info.num_tx_rings;
7812ff91c17SVincenzo Maffione 		na->num_tx_desc = info.num_tx_descs;
7822ff91c17SVincenzo Maffione 		na->num_rx_rings = info.num_rx_rings;
7832ff91c17SVincenzo Maffione 		na->num_rx_desc = info.num_rx_descs;
7842ff91c17SVincenzo Maffione 		na->rx_buf_maxsize = info.rx_buf_maxsize;
785b6e66be2SVincenzo Maffione 		if (netmap_verbose)
786b6e66be2SVincenzo Maffione 			nm_prinf("configuration changed for %s: txring %d x %d, "
787cfa866f6SMatt Macy 				"rxring %d x %d, rxbufsz %d",
788cfa866f6SMatt Macy 				na->name, na->num_tx_rings, na->num_tx_desc,
789cfa866f6SMatt Macy 				na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize);
790ae10d1afSLuigi Rizzo 		return 0;
791ae10d1afSLuigi Rizzo 	}
792b6e66be2SVincenzo Maffione 	nm_prerr("WARNING: configuration changed for %s while active: "
7932ff91c17SVincenzo Maffione 		"txring %d x %d, rxring %d x %d, rxbufsz %d",
7942ff91c17SVincenzo Maffione 		na->name, info.num_tx_rings, info.num_tx_descs,
7952ff91c17SVincenzo Maffione 		info.num_rx_rings, info.num_rx_descs,
7962ff91c17SVincenzo Maffione 		info.rx_buf_maxsize);
797ae10d1afSLuigi Rizzo 	return 1;
798ae10d1afSLuigi Rizzo }
799ae10d1afSLuigi Rizzo 
80037e3a6d3SLuigi Rizzo /* nm_sync callbacks for the host rings */
80137e3a6d3SLuigi Rizzo static int netmap_txsync_to_host(struct netmap_kring *kring, int flags);
80237e3a6d3SLuigi Rizzo static int netmap_rxsync_from_host(struct netmap_kring *kring, int flags);
803f0ea3689SLuigi Rizzo 
804f0ea3689SLuigi Rizzo /* create the krings array and initialize the fields common to all adapters.
805f0ea3689SLuigi Rizzo  * The array layout is this:
806f0ea3689SLuigi Rizzo  *
807f0ea3689SLuigi Rizzo  *                    +----------+
808f0ea3689SLuigi Rizzo  * na->tx_rings ----->|          | \
809f0ea3689SLuigi Rizzo  *                    |          |  } na->num_tx_ring
810f0ea3689SLuigi Rizzo  *                    |          | /
811f0ea3689SLuigi Rizzo  *                    +----------+
812f0ea3689SLuigi Rizzo  *                    |          |    host tx kring
813f0ea3689SLuigi Rizzo  * na->rx_rings ----> +----------+
814f0ea3689SLuigi Rizzo  *                    |          | \
815f0ea3689SLuigi Rizzo  *                    |          |  } na->num_rx_rings
816f0ea3689SLuigi Rizzo  *                    |          | /
817f0ea3689SLuigi Rizzo  *                    +----------+
818f0ea3689SLuigi Rizzo  *                    |          |    host rx kring
819f0ea3689SLuigi Rizzo  *                    +----------+
820f0ea3689SLuigi Rizzo  * na->tailroom ----->|          | \
821f0ea3689SLuigi Rizzo  *                    |          |  } tailroom bytes
822f0ea3689SLuigi Rizzo  *                    |          | /
823f0ea3689SLuigi Rizzo  *                    +----------+
824f0ea3689SLuigi Rizzo  *
825f0ea3689SLuigi Rizzo  * Note: for compatibility, host krings are created even when not needed.
826f0ea3689SLuigi Rizzo  * The tailroom space is currently used by vale ports for allocating leases.
827f0ea3689SLuigi Rizzo  */
82889cc2556SLuigi Rizzo /* call with NMG_LOCK held */
829f9790aebSLuigi Rizzo int
830f0ea3689SLuigi Rizzo netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
831f9790aebSLuigi Rizzo {
832f9790aebSLuigi Rizzo 	u_int i, len, ndesc;
833f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
834847bf383SLuigi Rizzo 	u_int n[NR_TXRX];
835847bf383SLuigi Rizzo 	enum txrx t;
83619c4ec08SVincenzo Maffione 	int err = 0;
837f9790aebSLuigi Rizzo 
838c3e9b4dbSLuiz Otavio O Souza 	if (na->tx_rings != NULL) {
839b6e66be2SVincenzo Maffione 		if (netmap_debug & NM_DEBUG_ON)
840b6e66be2SVincenzo Maffione 			nm_prerr("warning: krings were already created");
841c3e9b4dbSLuiz Otavio O Souza 		return 0;
842c3e9b4dbSLuiz Otavio O Souza 	}
843c3e9b4dbSLuiz Otavio O Souza 
844f0ea3689SLuigi Rizzo 	/* account for the (possibly fake) host rings */
8452a7db7a6SVincenzo Maffione 	n[NR_TX] = netmap_all_rings(na, NR_TX);
8462a7db7a6SVincenzo Maffione 	n[NR_RX] = netmap_all_rings(na, NR_RX);
847f0ea3689SLuigi Rizzo 
8482ff91c17SVincenzo Maffione 	len = (n[NR_TX] + n[NR_RX]) *
8492ff91c17SVincenzo Maffione 		(sizeof(struct netmap_kring) + sizeof(struct netmap_kring *))
8502ff91c17SVincenzo Maffione 		+ tailroom;
851f9790aebSLuigi Rizzo 
852c3e9b4dbSLuiz Otavio O Souza 	na->tx_rings = nm_os_malloc((size_t)len);
853f9790aebSLuigi Rizzo 	if (na->tx_rings == NULL) {
854b6e66be2SVincenzo Maffione 		nm_prerr("Cannot allocate krings");
855f9790aebSLuigi Rizzo 		return ENOMEM;
856f9790aebSLuigi Rizzo 	}
857847bf383SLuigi Rizzo 	na->rx_rings = na->tx_rings + n[NR_TX];
8582ff91c17SVincenzo Maffione 	na->tailroom = na->rx_rings + n[NR_RX];
8592ff91c17SVincenzo Maffione 
8602ff91c17SVincenzo Maffione 	/* link the krings in the krings array */
8612ff91c17SVincenzo Maffione 	kring = (struct netmap_kring *)((char *)na->tailroom + tailroom);
8622ff91c17SVincenzo Maffione 	for (i = 0; i < n[NR_TX] + n[NR_RX]; i++) {
8632ff91c17SVincenzo Maffione 		na->tx_rings[i] = kring;
8642ff91c17SVincenzo Maffione 		kring++;
8652ff91c17SVincenzo Maffione 	}
866f9790aebSLuigi Rizzo 
86717885a7bSLuigi Rizzo 	/*
86817885a7bSLuigi Rizzo 	 * All fields in krings are 0 except the one initialized below.
86917885a7bSLuigi Rizzo 	 * but better be explicit on important kring fields.
87017885a7bSLuigi Rizzo 	 */
871847bf383SLuigi Rizzo 	for_rx_tx(t) {
872847bf383SLuigi Rizzo 		ndesc = nma_get_ndesc(na, t);
873847bf383SLuigi Rizzo 		for (i = 0; i < n[t]; i++) {
8742ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
875f9790aebSLuigi Rizzo 			bzero(kring, sizeof(*kring));
8762ff91c17SVincenzo Maffione 			kring->notify_na = na;
87717885a7bSLuigi Rizzo 			kring->ring_id = i;
878847bf383SLuigi Rizzo 			kring->tx = t;
879f9790aebSLuigi Rizzo 			kring->nkr_num_slots = ndesc;
88037e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
88137e3a6d3SLuigi Rizzo 			kring->nr_pending_mode = NKR_NETMAP_OFF;
882847bf383SLuigi Rizzo 			if (i < nma_get_nrings(na, t)) {
883847bf383SLuigi Rizzo 				kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync);
88437e3a6d3SLuigi Rizzo 			} else {
8852ff91c17SVincenzo Maffione 				if (!(na->na_flags & NAF_HOST_RINGS))
8862ff91c17SVincenzo Maffione 					kring->nr_kflags |= NKR_FAKERING;
887847bf383SLuigi Rizzo 				kring->nm_sync = (t == NR_TX ?
88837e3a6d3SLuigi Rizzo 						netmap_txsync_to_host:
88937e3a6d3SLuigi Rizzo 						netmap_rxsync_from_host);
890f0ea3689SLuigi Rizzo 			}
891847bf383SLuigi Rizzo 			kring->nm_notify = na->nm_notify;
892847bf383SLuigi Rizzo 			kring->rhead = kring->rcur = kring->nr_hwcur = 0;
893f9790aebSLuigi Rizzo 			/*
89417885a7bSLuigi Rizzo 			 * IMPORTANT: Always keep one slot empty.
895f9790aebSLuigi Rizzo 			 */
896847bf383SLuigi Rizzo 			kring->rtail = kring->nr_hwtail = (t == NR_TX ? ndesc - 1 : 0);
897847bf383SLuigi Rizzo 			snprintf(kring->name, sizeof(kring->name) - 1, "%s %s%d", na->name,
898847bf383SLuigi Rizzo 					nm_txrx2str(t), i);
89975f4f3edSVincenzo Maffione 			nm_prdis("ktx %s h %d c %d t %d",
900f0ea3689SLuigi Rizzo 				kring->name, kring->rhead, kring->rcur, kring->rtail);
90119c4ec08SVincenzo Maffione 			err = nm_os_selinfo_init(&kring->si, kring->name);
90219c4ec08SVincenzo Maffione 			if (err) {
90319c4ec08SVincenzo Maffione 				netmap_krings_delete(na);
90419c4ec08SVincenzo Maffione 				return err;
90519c4ec08SVincenzo Maffione 			}
906847bf383SLuigi Rizzo 			mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF);
90719c4ec08SVincenzo Maffione 			kring->na = na;	/* setting this field marks the mutex as initialized */
908f9790aebSLuigi Rizzo 		}
90919c4ec08SVincenzo Maffione 		err = nm_os_selinfo_init(&na->si[t], na->name);
91019c4ec08SVincenzo Maffione 		if (err) {
91119c4ec08SVincenzo Maffione 			netmap_krings_delete(na);
91219c4ec08SVincenzo Maffione 			return err;
913f0ea3689SLuigi Rizzo 		}
91419c4ec08SVincenzo Maffione 	}
915f9790aebSLuigi Rizzo 
916f9790aebSLuigi Rizzo 	return 0;
917f9790aebSLuigi Rizzo }
918f9790aebSLuigi Rizzo 
919f9790aebSLuigi Rizzo 
920f0ea3689SLuigi Rizzo /* undo the actions performed by netmap_krings_create */
92189cc2556SLuigi Rizzo /* call with NMG_LOCK held */
922f9790aebSLuigi Rizzo void
923f9790aebSLuigi Rizzo netmap_krings_delete(struct netmap_adapter *na)
924f9790aebSLuigi Rizzo {
9252ff91c17SVincenzo Maffione 	struct netmap_kring **kring = na->tx_rings;
926847bf383SLuigi Rizzo 	enum txrx t;
927847bf383SLuigi Rizzo 
928c3e9b4dbSLuiz Otavio O Souza 	if (na->tx_rings == NULL) {
929b6e66be2SVincenzo Maffione 		if (netmap_debug & NM_DEBUG_ON)
930b6e66be2SVincenzo Maffione 			nm_prerr("warning: krings were already deleted");
931c3e9b4dbSLuiz Otavio O Souza 		return;
932c3e9b4dbSLuiz Otavio O Souza 	}
933c3e9b4dbSLuiz Otavio O Souza 
934847bf383SLuigi Rizzo 	for_rx_tx(t)
93537e3a6d3SLuigi Rizzo 		nm_os_selinfo_uninit(&na->si[t]);
936f9790aebSLuigi Rizzo 
937f0ea3689SLuigi Rizzo 	/* we rely on the krings layout described above */
938f0ea3689SLuigi Rizzo 	for ( ; kring != na->tailroom; kring++) {
93919c4ec08SVincenzo Maffione 		if ((*kring)->na != NULL)
9402ff91c17SVincenzo Maffione 			mtx_destroy(&(*kring)->q_lock);
9412ff91c17SVincenzo Maffione 		nm_os_selinfo_uninit(&(*kring)->si);
942f9790aebSLuigi Rizzo 	}
943c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(na->tx_rings);
944f9790aebSLuigi Rizzo 	na->tx_rings = na->rx_rings = na->tailroom = NULL;
945f9790aebSLuigi Rizzo }
946f9790aebSLuigi Rizzo 
947f9790aebSLuigi Rizzo 
94817885a7bSLuigi Rizzo /*
94917885a7bSLuigi Rizzo  * Destructor for NIC ports. They also have an mbuf queue
95017885a7bSLuigi Rizzo  * on the rings connected to the host so we need to purge
95117885a7bSLuigi Rizzo  * them first.
95217885a7bSLuigi Rizzo  */
95389cc2556SLuigi Rizzo /* call with NMG_LOCK held */
95437e3a6d3SLuigi Rizzo void
95517885a7bSLuigi Rizzo netmap_hw_krings_delete(struct netmap_adapter *na)
95617885a7bSLuigi Rizzo {
9572a7db7a6SVincenzo Maffione 	u_int lim = netmap_real_rings(na, NR_RX), i;
95817885a7bSLuigi Rizzo 
9592a7db7a6SVincenzo Maffione 	for (i = nma_get_nrings(na, NR_RX); i < lim; i++) {
9602a7db7a6SVincenzo Maffione 		struct mbq *q = &NMR(na, NR_RX)[i]->rx_queue;
96175f4f3edSVincenzo Maffione 		nm_prdis("destroy sw mbq with len %d", mbq_len(q));
96217885a7bSLuigi Rizzo 		mbq_purge(q);
96337e3a6d3SLuigi Rizzo 		mbq_safe_fini(q);
9642a7db7a6SVincenzo Maffione 	}
96517885a7bSLuigi Rizzo 	netmap_krings_delete(na);
96617885a7bSLuigi Rizzo }
96717885a7bSLuigi Rizzo 
9684f80b14cSVincenzo Maffione static void
9694f80b14cSVincenzo Maffione netmap_mem_drop(struct netmap_adapter *na)
9704f80b14cSVincenzo Maffione {
9714f80b14cSVincenzo Maffione 	int last = netmap_mem_deref(na->nm_mem, na);
9724f80b14cSVincenzo Maffione 	/* if the native allocator had been overrided on regif,
9734f80b14cSVincenzo Maffione 	 * restore it now and drop the temporary one
9744f80b14cSVincenzo Maffione 	 */
9754f80b14cSVincenzo Maffione 	if (last && na->nm_mem_prev) {
9764f80b14cSVincenzo Maffione 		netmap_mem_put(na->nm_mem);
9774f80b14cSVincenzo Maffione 		na->nm_mem = na->nm_mem_prev;
9784f80b14cSVincenzo Maffione 		na->nm_mem_prev = NULL;
9794f80b14cSVincenzo Maffione 	}
9804f80b14cSVincenzo Maffione }
981f9790aebSLuigi Rizzo 
98268b8534bSLuigi Rizzo /*
983847bf383SLuigi Rizzo  * Undo everything that was done in netmap_do_regif(). In particular,
984847bf383SLuigi Rizzo  * call nm_register(ifp,0) to stop netmap mode on the interface and
9854bf50f18SLuigi Rizzo  * revert to normal operation.
98668b8534bSLuigi Rizzo  */
987ce3ee1e7SLuigi Rizzo /* call with NMG_LOCK held */
988847bf383SLuigi Rizzo static void netmap_unset_ringid(struct netmap_priv_d *);
98937e3a6d3SLuigi Rizzo static void netmap_krings_put(struct netmap_priv_d *);
99037e3a6d3SLuigi Rizzo void
991847bf383SLuigi Rizzo netmap_do_unregif(struct netmap_priv_d *priv)
99268b8534bSLuigi Rizzo {
993f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
99468b8534bSLuigi Rizzo 
995ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
996f9790aebSLuigi Rizzo 	na->active_fds--;
99737e3a6d3SLuigi Rizzo 	/* unset nr_pending_mode and possibly release exclusive mode */
99837e3a6d3SLuigi Rizzo 	netmap_krings_put(priv);
999847bf383SLuigi Rizzo 
1000847bf383SLuigi Rizzo #ifdef	WITH_MONITOR
100137e3a6d3SLuigi Rizzo 	/* XXX check whether we have to do something with monitor
100237e3a6d3SLuigi Rizzo 	 * when rings change nr_mode. */
100337e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0) {
1004847bf383SLuigi Rizzo 		/* walk through all the rings and tell any monitor
1005847bf383SLuigi Rizzo 		 * that the port is going to exit netmap mode
1006847bf383SLuigi Rizzo 		 */
1007847bf383SLuigi Rizzo 		netmap_monitor_stop(na);
100837e3a6d3SLuigi Rizzo 	}
1009847bf383SLuigi Rizzo #endif
101037e3a6d3SLuigi Rizzo 
101137e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0 || nm_kring_pending(priv)) {
101237e3a6d3SLuigi Rizzo 		na->nm_register(na, 0);
101337e3a6d3SLuigi Rizzo 	}
101437e3a6d3SLuigi Rizzo 
101537e3a6d3SLuigi Rizzo 	/* delete rings and buffers that are no longer needed */
101637e3a6d3SLuigi Rizzo 	netmap_mem_rings_delete(na);
101737e3a6d3SLuigi Rizzo 
101837e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0) {	/* last instance */
101968b8534bSLuigi Rizzo 		/*
102037e3a6d3SLuigi Rizzo 		 * (TO CHECK) We enter here
1021f18be576SLuigi Rizzo 		 * when the last reference to this file descriptor goes
1022f18be576SLuigi Rizzo 		 * away. This means we cannot have any pending poll()
1023f18be576SLuigi Rizzo 		 * or interrupt routine operating on the structure.
1024ce3ee1e7SLuigi Rizzo 		 * XXX The file may be closed in a thread while
1025ce3ee1e7SLuigi Rizzo 		 * another thread is using it.
1026ce3ee1e7SLuigi Rizzo 		 * Linux keeps the file opened until the last reference
1027ce3ee1e7SLuigi Rizzo 		 * by any outstanding ioctl/poll or mmap is gone.
1028ce3ee1e7SLuigi Rizzo 		 * FreeBSD does not track mmap()s (but we do) and
1029ce3ee1e7SLuigi Rizzo 		 * wakes up any sleeping poll(). Need to check what
1030ce3ee1e7SLuigi Rizzo 		 * happens if the close() occurs while a concurrent
1031ce3ee1e7SLuigi Rizzo 		 * syscall is running.
103268b8534bSLuigi Rizzo 		 */
1033b6e66be2SVincenzo Maffione 		if (netmap_debug & NM_DEBUG_ON)
1034b6e66be2SVincenzo Maffione 			nm_prinf("deleting last instance for %s", na->name);
103537e3a6d3SLuigi Rizzo 
103637e3a6d3SLuigi Rizzo 		if (nm_netmap_on(na)) {
1037b6e66be2SVincenzo Maffione 			nm_prerr("BUG: netmap on while going to delete the krings");
103837e3a6d3SLuigi Rizzo 		}
103937e3a6d3SLuigi Rizzo 
1040f9790aebSLuigi Rizzo 		na->nm_krings_delete(na);
1041d12354a5SVincenzo Maffione 
1042d12354a5SVincenzo Maffione 		/* restore the default number of host tx and rx rings */
1043253b2ec1SVincenzo Maffione 		if (na->na_flags & NAF_HOST_RINGS) {
1044d12354a5SVincenzo Maffione 			na->num_host_tx_rings = 1;
1045d12354a5SVincenzo Maffione 			na->num_host_rx_rings = 1;
1046253b2ec1SVincenzo Maffione 		} else {
1047253b2ec1SVincenzo Maffione 			na->num_host_tx_rings = 0;
1048253b2ec1SVincenzo Maffione 			na->num_host_rx_rings = 0;
1049253b2ec1SVincenzo Maffione 		}
105068b8534bSLuigi Rizzo 	}
105137e3a6d3SLuigi Rizzo 
1052847bf383SLuigi Rizzo 	/* possibily decrement counter of tx_si/rx_si users */
1053847bf383SLuigi Rizzo 	netmap_unset_ringid(priv);
1054f9790aebSLuigi Rizzo 	/* delete the nifp */
1055847bf383SLuigi Rizzo 	netmap_mem_if_delete(na, priv->np_nifp);
1056847bf383SLuigi Rizzo 	/* drop the allocator */
10574f80b14cSVincenzo Maffione 	netmap_mem_drop(na);
1058847bf383SLuigi Rizzo 	/* mark the priv as unregistered */
1059847bf383SLuigi Rizzo 	priv->np_na = NULL;
1060847bf383SLuigi Rizzo 	priv->np_nifp = NULL;
10615819da83SLuigi Rizzo }
106268b8534bSLuigi Rizzo 
106337e3a6d3SLuigi Rizzo struct netmap_priv_d*
106437e3a6d3SLuigi Rizzo netmap_priv_new(void)
106537e3a6d3SLuigi Rizzo {
106637e3a6d3SLuigi Rizzo 	struct netmap_priv_d *priv;
106737e3a6d3SLuigi Rizzo 
1068c3e9b4dbSLuiz Otavio O Souza 	priv = nm_os_malloc(sizeof(struct netmap_priv_d));
106937e3a6d3SLuigi Rizzo 	if (priv == NULL)
107037e3a6d3SLuigi Rizzo 		return NULL;
107137e3a6d3SLuigi Rizzo 	priv->np_refs = 1;
107237e3a6d3SLuigi Rizzo 	nm_os_get_module();
107337e3a6d3SLuigi Rizzo 	return priv;
107437e3a6d3SLuigi Rizzo }
107537e3a6d3SLuigi Rizzo 
1076ce3ee1e7SLuigi Rizzo /*
10778fd44c93SLuigi Rizzo  * Destructor of the netmap_priv_d, called when the fd is closed
10788fd44c93SLuigi Rizzo  * Action: undo all the things done by NIOCREGIF,
10798fd44c93SLuigi Rizzo  * On FreeBSD we need to track whether there are active mmap()s,
10808fd44c93SLuigi Rizzo  * and we use np_active_mmaps for that. On linux, the field is always 0.
10818fd44c93SLuigi Rizzo  * Return: 1 if we can free priv, 0 otherwise.
108289cc2556SLuigi Rizzo  *
1083ce3ee1e7SLuigi Rizzo  */
108489cc2556SLuigi Rizzo /* call with NMG_LOCK held */
108537e3a6d3SLuigi Rizzo void
108637e3a6d3SLuigi Rizzo netmap_priv_delete(struct netmap_priv_d *priv)
1087ce3ee1e7SLuigi Rizzo {
1088f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1089ce3ee1e7SLuigi Rizzo 
1090847adfb7SLuigi Rizzo 	/* number of active references to this fd */
10918fd44c93SLuigi Rizzo 	if (--priv->np_refs > 0) {
109237e3a6d3SLuigi Rizzo 		return;
1093ce3ee1e7SLuigi Rizzo 	}
109437e3a6d3SLuigi Rizzo 	nm_os_put_module();
109537e3a6d3SLuigi Rizzo 	if (na) {
1096847bf383SLuigi Rizzo 		netmap_do_unregif(priv);
109737e3a6d3SLuigi Rizzo 	}
109837e3a6d3SLuigi Rizzo 	netmap_unget_na(na, priv->np_ifp);
109937e3a6d3SLuigi Rizzo 	bzero(priv, sizeof(*priv));	/* for safety */
1100c3e9b4dbSLuiz Otavio O Souza 	nm_os_free(priv);
1101f196ce38SLuigi Rizzo }
11025819da83SLuigi Rizzo 
1103f9790aebSLuigi Rizzo 
110489cc2556SLuigi Rizzo /* call with NMG_LOCK *not* held */
1105f9790aebSLuigi Rizzo void
11065819da83SLuigi Rizzo netmap_dtor(void *data)
11075819da83SLuigi Rizzo {
11085819da83SLuigi Rizzo 	struct netmap_priv_d *priv = data;
11095819da83SLuigi Rizzo 
1110ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
111137e3a6d3SLuigi Rizzo 	netmap_priv_delete(priv);
1112ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
1113ce3ee1e7SLuigi Rizzo }
111468b8534bSLuigi Rizzo 
1115f18be576SLuigi Rizzo 
111668b8534bSLuigi Rizzo /*
1117c3e9b4dbSLuiz Otavio O Souza  * Handlers for synchronization of the rings from/to the host stack.
1118c3e9b4dbSLuiz Otavio O Souza  * These are associated to a network interface and are just another
1119c3e9b4dbSLuiz Otavio O Souza  * ring pair managed by userspace.
1120c3e9b4dbSLuiz Otavio O Souza  *
1121c3e9b4dbSLuiz Otavio O Souza  * Netmap also supports transparent forwarding (NS_FORWARD and NR_FORWARD
1122c3e9b4dbSLuiz Otavio O Souza  * flags):
1123c3e9b4dbSLuiz Otavio O Souza  *
1124c3e9b4dbSLuiz Otavio O Souza  * - Before releasing buffers on hw RX rings, the application can mark
1125c3e9b4dbSLuiz Otavio O Souza  *   them with the NS_FORWARD flag. During the next RXSYNC or poll(), they
1126c3e9b4dbSLuiz Otavio O Souza  *   will be forwarded to the host stack, similarly to what happened if
1127c3e9b4dbSLuiz Otavio O Souza  *   the application moved them to the host TX ring.
1128c3e9b4dbSLuiz Otavio O Souza  *
1129c3e9b4dbSLuiz Otavio O Souza  * - Before releasing buffers on the host RX ring, the application can
1130c3e9b4dbSLuiz Otavio O Souza  *   mark them with the NS_FORWARD flag. During the next RXSYNC or poll(),
1131c3e9b4dbSLuiz Otavio O Souza  *   they will be forwarded to the hw TX rings, saving the application
1132c3e9b4dbSLuiz Otavio O Souza  *   from doing the same task in user-space.
1133c3e9b4dbSLuiz Otavio O Souza  *
1134c3e9b4dbSLuiz Otavio O Souza  * Transparent fowarding can be enabled per-ring, by setting the NR_FORWARD
1135c3e9b4dbSLuiz Otavio O Souza  * flag, or globally with the netmap_fwd sysctl.
1136c3e9b4dbSLuiz Otavio O Souza  *
1137091fd0abSLuigi Rizzo  * The transfer NIC --> host is relatively easy, just encapsulate
1138091fd0abSLuigi Rizzo  * into mbufs and we are done. The host --> NIC side is slightly
1139091fd0abSLuigi Rizzo  * harder because there might not be room in the tx ring so it
1140091fd0abSLuigi Rizzo  * might take a while before releasing the buffer.
1141091fd0abSLuigi Rizzo  */
1142091fd0abSLuigi Rizzo 
1143f18be576SLuigi Rizzo 
1144091fd0abSLuigi Rizzo /*
1145c3e9b4dbSLuiz Otavio O Souza  * Pass a whole queue of mbufs to the host stack as coming from 'dst'
114617885a7bSLuigi Rizzo  * We do not need to lock because the queue is private.
1147c3e9b4dbSLuiz Otavio O Souza  * After this call the queue is empty.
1148091fd0abSLuigi Rizzo  */
1149091fd0abSLuigi Rizzo static void
1150f9790aebSLuigi Rizzo netmap_send_up(struct ifnet *dst, struct mbq *q)
1151091fd0abSLuigi Rizzo {
1152a4470078SGleb Smirnoff 	struct epoch_tracker et;
1153091fd0abSLuigi Rizzo 	struct mbuf *m;
115437e3a6d3SLuigi Rizzo 	struct mbuf *head = NULL, *prev = NULL;
1155091fd0abSLuigi Rizzo 
1156a4470078SGleb Smirnoff 	NET_EPOCH_ENTER(et);
1157c3e9b4dbSLuiz Otavio O Souza 	/* Send packets up, outside the lock; head/prev machinery
1158c3e9b4dbSLuiz Otavio O Souza 	 * is only useful for Windows. */
1159f9790aebSLuigi Rizzo 	while ((m = mbq_dequeue(q)) != NULL) {
1160b6e66be2SVincenzo Maffione 		if (netmap_debug & NM_DEBUG_HOST)
1161b6e66be2SVincenzo Maffione 			nm_prinf("sending up pkt %p size %d", m, MBUF_LEN(m));
116237e3a6d3SLuigi Rizzo 		prev = nm_os_send_up(dst, m, prev);
116337e3a6d3SLuigi Rizzo 		if (head == NULL)
116437e3a6d3SLuigi Rizzo 			head = prev;
1165091fd0abSLuigi Rizzo 	}
116637e3a6d3SLuigi Rizzo 	if (head)
116737e3a6d3SLuigi Rizzo 		nm_os_send_up(dst, NULL, head);
1168a4470078SGleb Smirnoff 	NET_EPOCH_EXIT(et);
116937e3a6d3SLuigi Rizzo 	mbq_fini(q);
1170091fd0abSLuigi Rizzo }
1171091fd0abSLuigi Rizzo 
1172f18be576SLuigi Rizzo 
1173091fd0abSLuigi Rizzo /*
1174c3e9b4dbSLuiz Otavio O Souza  * Scan the buffers from hwcur to ring->head, and put a copy of those
1175c3e9b4dbSLuiz Otavio O Souza  * marked NS_FORWARD (or all of them if forced) into a queue of mbufs.
1176c3e9b4dbSLuiz Otavio O Souza  * Drop remaining packets in the unlikely event
117717885a7bSLuigi Rizzo  * of an mbuf shortage.
1178091fd0abSLuigi Rizzo  */
1179091fd0abSLuigi Rizzo static void
1180091fd0abSLuigi Rizzo netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
1181091fd0abSLuigi Rizzo {
118217885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1183847bf383SLuigi Rizzo 	u_int const head = kring->rhead;
118417885a7bSLuigi Rizzo 	u_int n;
1185f9790aebSLuigi Rizzo 	struct netmap_adapter *na = kring->na;
1186091fd0abSLuigi Rizzo 
118717885a7bSLuigi Rizzo 	for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) {
118817885a7bSLuigi Rizzo 		struct mbuf *m;
1189091fd0abSLuigi Rizzo 		struct netmap_slot *slot = &kring->ring->slot[n];
1190091fd0abSLuigi Rizzo 
1191091fd0abSLuigi Rizzo 		if ((slot->flags & NS_FORWARD) == 0 && !force)
1192091fd0abSLuigi Rizzo 			continue;
11934bf50f18SLuigi Rizzo 		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE(na)) {
119475f4f3edSVincenzo Maffione 			nm_prlim(5, "bad pkt at %d len %d", n, slot->len);
1195091fd0abSLuigi Rizzo 			continue;
1196091fd0abSLuigi Rizzo 		}
1197091fd0abSLuigi Rizzo 		slot->flags &= ~NS_FORWARD; // XXX needed ?
119817885a7bSLuigi Rizzo 		/* XXX TODO: adapt to the case of a multisegment packet */
11994bf50f18SLuigi Rizzo 		m = m_devget(NMB(na, slot), slot->len, 0, na->ifp, NULL);
1200091fd0abSLuigi Rizzo 
1201091fd0abSLuigi Rizzo 		if (m == NULL)
1202091fd0abSLuigi Rizzo 			break;
1203f9790aebSLuigi Rizzo 		mbq_enqueue(q, m);
1204091fd0abSLuigi Rizzo 	}
1205091fd0abSLuigi Rizzo }
1206091fd0abSLuigi Rizzo 
120737e3a6d3SLuigi Rizzo static inline int
120837e3a6d3SLuigi Rizzo _nm_may_forward(struct netmap_kring *kring)
120937e3a6d3SLuigi Rizzo {
121037e3a6d3SLuigi Rizzo 	return	((netmap_fwd || kring->ring->flags & NR_FORWARD) &&
121137e3a6d3SLuigi Rizzo 		 kring->na->na_flags & NAF_HOST_RINGS &&
121237e3a6d3SLuigi Rizzo 		 kring->tx == NR_RX);
121337e3a6d3SLuigi Rizzo }
121437e3a6d3SLuigi Rizzo 
121537e3a6d3SLuigi Rizzo static inline int
121637e3a6d3SLuigi Rizzo nm_may_forward_up(struct netmap_kring *kring)
121737e3a6d3SLuigi Rizzo {
121837e3a6d3SLuigi Rizzo 	return	_nm_may_forward(kring) &&
121937e3a6d3SLuigi Rizzo 		 kring->ring_id != kring->na->num_rx_rings;
122037e3a6d3SLuigi Rizzo }
122137e3a6d3SLuigi Rizzo 
122237e3a6d3SLuigi Rizzo static inline int
1223c3e9b4dbSLuiz Otavio O Souza nm_may_forward_down(struct netmap_kring *kring, int sync_flags)
122437e3a6d3SLuigi Rizzo {
122537e3a6d3SLuigi Rizzo 	return	_nm_may_forward(kring) &&
1226c3e9b4dbSLuiz Otavio O Souza 		 (sync_flags & NAF_CAN_FORWARD_DOWN) &&
122737e3a6d3SLuigi Rizzo 		 kring->ring_id == kring->na->num_rx_rings;
122837e3a6d3SLuigi Rizzo }
1229f18be576SLuigi Rizzo 
1230091fd0abSLuigi Rizzo /*
123117885a7bSLuigi Rizzo  * Send to the NIC rings packets marked NS_FORWARD between
1232c3e9b4dbSLuiz Otavio O Souza  * kring->nr_hwcur and kring->rhead.
1233c3e9b4dbSLuiz Otavio O Souza  * Called under kring->rx_queue.lock on the sw rx ring.
1234c3e9b4dbSLuiz Otavio O Souza  *
1235c3e9b4dbSLuiz Otavio O Souza  * It can only be called if the user opened all the TX hw rings,
1236c3e9b4dbSLuiz Otavio O Souza  * see NAF_CAN_FORWARD_DOWN flag.
1237c3e9b4dbSLuiz Otavio O Souza  * We can touch the TX netmap rings (slots, head and cur) since
1238c3e9b4dbSLuiz Otavio O Souza  * we are in poll/ioctl system call context, and the application
1239c3e9b4dbSLuiz Otavio O Souza  * is not supposed to touch the ring (using a different thread)
1240c3e9b4dbSLuiz Otavio O Souza  * during the execution of the system call.
1241091fd0abSLuigi Rizzo  */
124217885a7bSLuigi Rizzo static u_int
1243091fd0abSLuigi Rizzo netmap_sw_to_nic(struct netmap_adapter *na)
1244091fd0abSLuigi Rizzo {
12452ff91c17SVincenzo Maffione 	struct netmap_kring *kring = na->rx_rings[na->num_rx_rings];
124617885a7bSLuigi Rizzo 	struct netmap_slot *rxslot = kring->ring->slot;
124717885a7bSLuigi Rizzo 	u_int i, rxcur = kring->nr_hwcur;
124817885a7bSLuigi Rizzo 	u_int const head = kring->rhead;
124917885a7bSLuigi Rizzo 	u_int const src_lim = kring->nkr_num_slots - 1;
125017885a7bSLuigi Rizzo 	u_int sent = 0;
1251ce3ee1e7SLuigi Rizzo 
125217885a7bSLuigi Rizzo 	/* scan rings to find space, then fill as much as possible */
125317885a7bSLuigi Rizzo 	for (i = 0; i < na->num_tx_rings; i++) {
12542ff91c17SVincenzo Maffione 		struct netmap_kring *kdst = na->tx_rings[i];
125517885a7bSLuigi Rizzo 		struct netmap_ring *rdst = kdst->ring;
125617885a7bSLuigi Rizzo 		u_int const dst_lim = kdst->nkr_num_slots - 1;
1257ce3ee1e7SLuigi Rizzo 
125817885a7bSLuigi Rizzo 		/* XXX do we trust ring or kring->rcur,rtail ? */
125917885a7bSLuigi Rizzo 		for (; rxcur != head && !nm_ring_empty(rdst);
126017885a7bSLuigi Rizzo 		     rxcur = nm_next(rxcur, src_lim) ) {
1261091fd0abSLuigi Rizzo 			struct netmap_slot *src, *dst, tmp;
126237e3a6d3SLuigi Rizzo 			u_int dst_head = rdst->head;
126317885a7bSLuigi Rizzo 
126417885a7bSLuigi Rizzo 			src = &rxslot[rxcur];
126517885a7bSLuigi Rizzo 			if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
126617885a7bSLuigi Rizzo 				continue;
126717885a7bSLuigi Rizzo 
126817885a7bSLuigi Rizzo 			sent++;
126917885a7bSLuigi Rizzo 
127037e3a6d3SLuigi Rizzo 			dst = &rdst->slot[dst_head];
127117885a7bSLuigi Rizzo 
1272091fd0abSLuigi Rizzo 			tmp = *src;
127317885a7bSLuigi Rizzo 
1274091fd0abSLuigi Rizzo 			src->buf_idx = dst->buf_idx;
1275091fd0abSLuigi Rizzo 			src->flags = NS_BUF_CHANGED;
1276091fd0abSLuigi Rizzo 
1277091fd0abSLuigi Rizzo 			dst->buf_idx = tmp.buf_idx;
1278091fd0abSLuigi Rizzo 			dst->len = tmp.len;
1279091fd0abSLuigi Rizzo 			dst->flags = NS_BUF_CHANGED;
1280091fd0abSLuigi Rizzo 
128137e3a6d3SLuigi Rizzo 			rdst->head = rdst->cur = nm_next(dst_head, dst_lim);
1282091fd0abSLuigi Rizzo 		}
1283c3e9b4dbSLuiz Otavio O Souza 		/* if (sent) XXX txsync ? it would be just an optimization */
1284091fd0abSLuigi Rizzo 	}
128517885a7bSLuigi Rizzo 	return sent;
1286091fd0abSLuigi Rizzo }
1287091fd0abSLuigi Rizzo 
1288f18be576SLuigi Rizzo 
1289091fd0abSLuigi Rizzo /*
1290ce3ee1e7SLuigi Rizzo  * netmap_txsync_to_host() passes packets up. We are called from a
129102ad4083SLuigi Rizzo  * system call in user process context, and the only contention
129202ad4083SLuigi Rizzo  * can be among multiple user threads erroneously calling
1293091fd0abSLuigi Rizzo  * this routine concurrently.
129468b8534bSLuigi Rizzo  */
129537e3a6d3SLuigi Rizzo static int
129637e3a6d3SLuigi Rizzo netmap_txsync_to_host(struct netmap_kring *kring, int flags)
129768b8534bSLuigi Rizzo {
129837e3a6d3SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
129917885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1300f0ea3689SLuigi Rizzo 	u_int const head = kring->rhead;
1301f9790aebSLuigi Rizzo 	struct mbq q;
130268b8534bSLuigi Rizzo 
130317885a7bSLuigi Rizzo 	/* Take packets from hwcur to head and pass them up.
1304c3e9b4dbSLuiz Otavio O Souza 	 * Force hwcur = head since netmap_grab_packets() stops at head
130568b8534bSLuigi Rizzo 	 */
1306f9790aebSLuigi Rizzo 	mbq_init(&q);
130717885a7bSLuigi Rizzo 	netmap_grab_packets(kring, &q, 1 /* force */);
130875f4f3edSVincenzo Maffione 	nm_prdis("have %d pkts in queue", mbq_len(&q));
130917885a7bSLuigi Rizzo 	kring->nr_hwcur = head;
131017885a7bSLuigi Rizzo 	kring->nr_hwtail = head + lim;
131117885a7bSLuigi Rizzo 	if (kring->nr_hwtail > lim)
131217885a7bSLuigi Rizzo 		kring->nr_hwtail -= lim + 1;
131368b8534bSLuigi Rizzo 
1314f9790aebSLuigi Rizzo 	netmap_send_up(na->ifp, &q);
131537e3a6d3SLuigi Rizzo 	return 0;
1316f18be576SLuigi Rizzo }
1317f18be576SLuigi Rizzo 
1318f18be576SLuigi Rizzo 
131968b8534bSLuigi Rizzo /*
132002ad4083SLuigi Rizzo  * rxsync backend for packets coming from the host stack.
132117885a7bSLuigi Rizzo  * They have been put in kring->rx_queue by netmap_transmit().
132217885a7bSLuigi Rizzo  * We protect access to the kring using kring->rx_queue.lock
132302ad4083SLuigi Rizzo  *
1324c3e9b4dbSLuiz Otavio O Souza  * also moves to the nic hw rings any packet the user has marked
1325c3e9b4dbSLuiz Otavio O Souza  * for transparent-mode forwarding, then sets the NR_FORWARD
1326c3e9b4dbSLuiz Otavio O Souza  * flag in the kring to let the caller push them out
132768b8534bSLuigi Rizzo  */
13288fd44c93SLuigi Rizzo static int
132937e3a6d3SLuigi Rizzo netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
133068b8534bSLuigi Rizzo {
133137e3a6d3SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
133268b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
133317885a7bSLuigi Rizzo 	u_int nm_i, n;
133417885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1335f0ea3689SLuigi Rizzo 	u_int const head = kring->rhead;
133617885a7bSLuigi Rizzo 	int ret = 0;
1337847bf383SLuigi Rizzo 	struct mbq *q = &kring->rx_queue, fq;
133868b8534bSLuigi Rizzo 
1339847bf383SLuigi Rizzo 	mbq_init(&fq); /* fq holds packets to be freed */
1340847bf383SLuigi Rizzo 
1341997b054cSLuigi Rizzo 	mbq_lock(q);
134217885a7bSLuigi Rizzo 
134317885a7bSLuigi Rizzo 	/* First part: import newly received packets */
134417885a7bSLuigi Rizzo 	n = mbq_len(q);
134517885a7bSLuigi Rizzo 	if (n) { /* grab packets from the queue */
134617885a7bSLuigi Rizzo 		struct mbuf *m;
134717885a7bSLuigi Rizzo 		uint32_t stop_i;
134817885a7bSLuigi Rizzo 
134917885a7bSLuigi Rizzo 		nm_i = kring->nr_hwtail;
1350c3e9b4dbSLuiz Otavio O Souza 		stop_i = nm_prev(kring->nr_hwcur, lim);
135117885a7bSLuigi Rizzo 		while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
135217885a7bSLuigi Rizzo 			int len = MBUF_LEN(m);
135317885a7bSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[nm_i];
135417885a7bSLuigi Rizzo 
13554bf50f18SLuigi Rizzo 			m_copydata(m, 0, len, NMB(na, slot));
135675f4f3edSVincenzo Maffione 			nm_prdis("nm %d len %d", nm_i, len);
1357b6e66be2SVincenzo Maffione 			if (netmap_debug & NM_DEBUG_HOST)
1358b6e66be2SVincenzo Maffione 				nm_prinf("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
135917885a7bSLuigi Rizzo 
136017885a7bSLuigi Rizzo 			slot->len = len;
13614f80b14cSVincenzo Maffione 			slot->flags = 0;
136217885a7bSLuigi Rizzo 			nm_i = nm_next(nm_i, lim);
1363847bf383SLuigi Rizzo 			mbq_enqueue(&fq, m);
136464ae02c3SLuigi Rizzo 		}
136517885a7bSLuigi Rizzo 		kring->nr_hwtail = nm_i;
136664ae02c3SLuigi Rizzo 	}
136717885a7bSLuigi Rizzo 
136817885a7bSLuigi Rizzo 	/*
136917885a7bSLuigi Rizzo 	 * Second part: skip past packets that userspace has released.
137017885a7bSLuigi Rizzo 	 */
137117885a7bSLuigi Rizzo 	nm_i = kring->nr_hwcur;
137217885a7bSLuigi Rizzo 	if (nm_i != head) { /* something was released */
1373c3e9b4dbSLuiz Otavio O Souza 		if (nm_may_forward_down(kring, flags)) {
137417885a7bSLuigi Rizzo 			ret = netmap_sw_to_nic(na);
137537e3a6d3SLuigi Rizzo 			if (ret > 0) {
137637e3a6d3SLuigi Rizzo 				kring->nr_kflags |= NR_FORWARD;
137737e3a6d3SLuigi Rizzo 				ret = 0;
137837e3a6d3SLuigi Rizzo 			}
137937e3a6d3SLuigi Rizzo 		}
138017885a7bSLuigi Rizzo 		kring->nr_hwcur = head;
138164ae02c3SLuigi Rizzo 	}
138217885a7bSLuigi Rizzo 
1383997b054cSLuigi Rizzo 	mbq_unlock(q);
1384847bf383SLuigi Rizzo 
1385847bf383SLuigi Rizzo 	mbq_purge(&fq);
138637e3a6d3SLuigi Rizzo 	mbq_fini(&fq);
1387847bf383SLuigi Rizzo 
138817885a7bSLuigi Rizzo 	return ret;
138968b8534bSLuigi Rizzo }
139068b8534bSLuigi Rizzo 
139168b8534bSLuigi Rizzo 
1392f9790aebSLuigi Rizzo /* Get a netmap adapter for the port.
1393f9790aebSLuigi Rizzo  *
1394f9790aebSLuigi Rizzo  * If it is possible to satisfy the request, return 0
1395f9790aebSLuigi Rizzo  * with *na containing the netmap adapter found.
1396f9790aebSLuigi Rizzo  * Otherwise return an error code, with *na containing NULL.
1397f9790aebSLuigi Rizzo  *
1398f9790aebSLuigi Rizzo  * When the port is attached to a bridge, we always return
1399f9790aebSLuigi Rizzo  * EBUSY.
1400f9790aebSLuigi Rizzo  * Otherwise, if the port is already bound to a file descriptor,
1401f9790aebSLuigi Rizzo  * then we unconditionally return the existing adapter into *na.
1402f9790aebSLuigi Rizzo  * In all the other cases, we return (into *na) either native,
1403f9790aebSLuigi Rizzo  * generic or NULL, according to the following table:
1404f9790aebSLuigi Rizzo  *
1405f9790aebSLuigi Rizzo  *					native_support
1406f9790aebSLuigi Rizzo  * active_fds   dev.netmap.admode         YES     NO
1407f9790aebSLuigi Rizzo  * -------------------------------------------------------
1408f9790aebSLuigi Rizzo  *    >0              *                 NA(ifp) NA(ifp)
1409f9790aebSLuigi Rizzo  *
1410f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_BEST      NATIVE  GENERIC
1411f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_NATIVE    NATIVE   NULL
1412f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_GENERIC   GENERIC GENERIC
1413f9790aebSLuigi Rizzo  *
1414f9790aebSLuigi Rizzo  */
141537e3a6d3SLuigi Rizzo static void netmap_hw_dtor(struct netmap_adapter *); /* needed by NM_IS_NATIVE() */
1416f9790aebSLuigi Rizzo int
1417c3e9b4dbSLuiz Otavio O Souza netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adapter **na)
1418f9790aebSLuigi Rizzo {
1419f9790aebSLuigi Rizzo 	/* generic support */
1420f9790aebSLuigi Rizzo 	int i = netmap_admode;	/* Take a snapshot. */
1421f9790aebSLuigi Rizzo 	struct netmap_adapter *prev_na;
1422847bf383SLuigi Rizzo 	int error = 0;
1423f9790aebSLuigi Rizzo 
1424f9790aebSLuigi Rizzo 	*na = NULL; /* default */
1425f9790aebSLuigi Rizzo 
1426f9790aebSLuigi Rizzo 	/* reset in case of invalid value */
1427f9790aebSLuigi Rizzo 	if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST)
1428f9790aebSLuigi Rizzo 		i = netmap_admode = NETMAP_ADMODE_BEST;
1429f9790aebSLuigi Rizzo 
143037e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
14314bf50f18SLuigi Rizzo 		prev_na = NA(ifp);
1432f9790aebSLuigi Rizzo 		/* If an adapter already exists, return it if
1433f9790aebSLuigi Rizzo 		 * there are active file descriptors or if
1434f9790aebSLuigi Rizzo 		 * netmap is not forced to use generic
1435f9790aebSLuigi Rizzo 		 * adapters.
1436f9790aebSLuigi Rizzo 		 */
14374bf50f18SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(prev_na)
14384bf50f18SLuigi Rizzo 			|| i != NETMAP_ADMODE_GENERIC
14394bf50f18SLuigi Rizzo 			|| prev_na->na_flags & NAF_FORCE_NATIVE
14404bf50f18SLuigi Rizzo #ifdef WITH_PIPES
14414bf50f18SLuigi Rizzo 			/* ugly, but we cannot allow an adapter switch
14424bf50f18SLuigi Rizzo 			 * if some pipe is referring to this one
14434bf50f18SLuigi Rizzo 			 */
14444bf50f18SLuigi Rizzo 			|| prev_na->na_next_pipe > 0
14454bf50f18SLuigi Rizzo #endif
14464bf50f18SLuigi Rizzo 		) {
14474bf50f18SLuigi Rizzo 			*na = prev_na;
1448c3e9b4dbSLuiz Otavio O Souza 			goto assign_mem;
1449f9790aebSLuigi Rizzo 		}
1450f9790aebSLuigi Rizzo 	}
1451f9790aebSLuigi Rizzo 
1452f9790aebSLuigi Rizzo 	/* If there isn't native support and netmap is not allowed
1453f9790aebSLuigi Rizzo 	 * to use generic adapters, we cannot satisfy the request.
1454f9790aebSLuigi Rizzo 	 */
145537e3a6d3SLuigi Rizzo 	if (!NM_IS_NATIVE(ifp) && i == NETMAP_ADMODE_NATIVE)
1456f2637526SLuigi Rizzo 		return EOPNOTSUPP;
1457f9790aebSLuigi Rizzo 
1458f9790aebSLuigi Rizzo 	/* Otherwise, create a generic adapter and return it,
1459f9790aebSLuigi Rizzo 	 * saving the previously used netmap adapter, if any.
1460f9790aebSLuigi Rizzo 	 *
1461f9790aebSLuigi Rizzo 	 * Note that here 'prev_na', if not NULL, MUST be a
1462f9790aebSLuigi Rizzo 	 * native adapter, and CANNOT be a generic one. This is
1463f9790aebSLuigi Rizzo 	 * true because generic adapters are created on demand, and
1464f9790aebSLuigi Rizzo 	 * destroyed when not used anymore. Therefore, if the adapter
1465f9790aebSLuigi Rizzo 	 * currently attached to an interface 'ifp' is generic, it
1466f9790aebSLuigi Rizzo 	 * must be that
1467f9790aebSLuigi Rizzo 	 * (NA(ifp)->active_fds > 0 || NETMAP_OWNED_BY_KERN(NA(ifp))).
1468f9790aebSLuigi Rizzo 	 * Consequently, if NA(ifp) is generic, we will enter one of
1469f9790aebSLuigi Rizzo 	 * the branches above. This ensures that we never override
1470f9790aebSLuigi Rizzo 	 * a generic adapter with another generic adapter.
1471f9790aebSLuigi Rizzo 	 */
1472f9790aebSLuigi Rizzo 	error = generic_netmap_attach(ifp);
1473f9790aebSLuigi Rizzo 	if (error)
1474f9790aebSLuigi Rizzo 		return error;
1475f9790aebSLuigi Rizzo 
1476f9790aebSLuigi Rizzo 	*na = NA(ifp);
1477c3e9b4dbSLuiz Otavio O Souza 
1478c3e9b4dbSLuiz Otavio O Souza assign_mem:
1479c3e9b4dbSLuiz Otavio O Souza 	if (nmd != NULL && !((*na)->na_flags & NAF_MEM_OWNER) &&
1480c3e9b4dbSLuiz Otavio O Souza 	    (*na)->active_fds == 0 && ((*na)->nm_mem != nmd)) {
14814f80b14cSVincenzo Maffione 		(*na)->nm_mem_prev = (*na)->nm_mem;
1482c3e9b4dbSLuiz Otavio O Souza 		(*na)->nm_mem = netmap_mem_get(nmd);
1483f9790aebSLuigi Rizzo 	}
1484f9790aebSLuigi Rizzo 
1485c3e9b4dbSLuiz Otavio O Souza 	return 0;
1486c3e9b4dbSLuiz Otavio O Souza }
1487f9790aebSLuigi Rizzo 
148868b8534bSLuigi Rizzo /*
1489ce3ee1e7SLuigi Rizzo  * MUST BE CALLED UNDER NMG_LOCK()
1490ce3ee1e7SLuigi Rizzo  *
1491f2637526SLuigi Rizzo  * Get a refcounted reference to a netmap adapter attached
14922ff91c17SVincenzo Maffione  * to the interface specified by req.
1493ce3ee1e7SLuigi Rizzo  * This is always called in the execution of an ioctl().
1494ce3ee1e7SLuigi Rizzo  *
1495f2637526SLuigi Rizzo  * Return ENXIO if the interface specified by the request does
1496f2637526SLuigi Rizzo  * not exist, ENOTSUP if netmap is not supported by the interface,
1497f2637526SLuigi Rizzo  * EBUSY if the interface is already attached to a bridge,
1498f2637526SLuigi Rizzo  * EINVAL if parameters are invalid, ENOMEM if needed resources
1499f2637526SLuigi Rizzo  * could not be allocated.
1500f2637526SLuigi Rizzo  * If successful, hold a reference to the netmap adapter.
1501f18be576SLuigi Rizzo  *
15022ff91c17SVincenzo Maffione  * If the interface specified by req is a system one, also keep
150337e3a6d3SLuigi Rizzo  * a reference to it and return a valid *ifp.
150468b8534bSLuigi Rizzo  */
1505f9790aebSLuigi Rizzo int
15062ff91c17SVincenzo Maffione netmap_get_na(struct nmreq_header *hdr,
15072ff91c17SVincenzo Maffione 	      struct netmap_adapter **na, struct ifnet **ifp,
15082ff91c17SVincenzo Maffione 	      struct netmap_mem_d *nmd, int create)
150968b8534bSLuigi Rizzo {
1510cfa866f6SMatt Macy 	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
1511f9790aebSLuigi Rizzo 	int error = 0;
1512f0ea3689SLuigi Rizzo 	struct netmap_adapter *ret = NULL;
1513c3e9b4dbSLuiz Otavio O Souza 	int nmd_ref = 0;
1514f9790aebSLuigi Rizzo 
1515f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
151637e3a6d3SLuigi Rizzo 	*ifp = NULL;
1517f196ce38SLuigi Rizzo 
15182ff91c17SVincenzo Maffione 	if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
15192ff91c17SVincenzo Maffione 		return EINVAL;
15202ff91c17SVincenzo Maffione 	}
15212ff91c17SVincenzo Maffione 
15222ff91c17SVincenzo Maffione 	if (req->nr_mode == NR_REG_PIPE_MASTER ||
15232ff91c17SVincenzo Maffione 			req->nr_mode == NR_REG_PIPE_SLAVE) {
15242ff91c17SVincenzo Maffione 		/* Do not accept deprecated pipe modes. */
1525b6e66be2SVincenzo Maffione 		nm_prerr("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax");
15262ff91c17SVincenzo Maffione 		return EINVAL;
15272ff91c17SVincenzo Maffione 	}
15282ff91c17SVincenzo Maffione 
1529ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1530ce3ee1e7SLuigi Rizzo 
1531c3e9b4dbSLuiz Otavio O Souza 	/* if the request contain a memid, try to find the
1532c3e9b4dbSLuiz Otavio O Souza 	 * corresponding memory region
1533c3e9b4dbSLuiz Otavio O Souza 	 */
15342ff91c17SVincenzo Maffione 	if (nmd == NULL && req->nr_mem_id) {
15352ff91c17SVincenzo Maffione 		nmd = netmap_mem_find(req->nr_mem_id);
1536c3e9b4dbSLuiz Otavio O Souza 		if (nmd == NULL)
1537c3e9b4dbSLuiz Otavio O Souza 			return EINVAL;
1538c3e9b4dbSLuiz Otavio O Souza 		/* keep the rereference */
1539c3e9b4dbSLuiz Otavio O Souza 		nmd_ref = 1;
1540c3e9b4dbSLuiz Otavio O Souza 	}
1541c3e9b4dbSLuiz Otavio O Souza 
154237e3a6d3SLuigi Rizzo 	/* We cascade through all possible types of netmap adapter.
15434bf50f18SLuigi Rizzo 	 * All netmap_get_*_na() functions return an error and an na,
15444bf50f18SLuigi Rizzo 	 * with the following combinations:
15454bf50f18SLuigi Rizzo 	 *
15464bf50f18SLuigi Rizzo 	 * error    na
15474bf50f18SLuigi Rizzo 	 *   0	   NULL		type doesn't match
15484bf50f18SLuigi Rizzo 	 *  !0	   NULL		type matches, but na creation/lookup failed
15494bf50f18SLuigi Rizzo 	 *   0	  !NULL		type matches and na created/found
15504bf50f18SLuigi Rizzo 	 *  !0    !NULL		impossible
15514bf50f18SLuigi Rizzo 	 */
1552b6e66be2SVincenzo Maffione 	error = netmap_get_null_na(hdr, na, nmd, create);
155337e3a6d3SLuigi Rizzo 	if (error || *na != NULL)
1554c3e9b4dbSLuiz Otavio O Souza 		goto out;
155537e3a6d3SLuigi Rizzo 
15564bf50f18SLuigi Rizzo 	/* try to see if this is a monitor port */
15572ff91c17SVincenzo Maffione 	error = netmap_get_monitor_na(hdr, na, nmd, create);
15584bf50f18SLuigi Rizzo 	if (error || *na != NULL)
1559c3e9b4dbSLuiz Otavio O Souza 		goto out;
15604bf50f18SLuigi Rizzo 
15614bf50f18SLuigi Rizzo 	/* try to see if this is a pipe port */
15622ff91c17SVincenzo Maffione 	error = netmap_get_pipe_na(hdr, na, nmd, create);
1563f0ea3689SLuigi Rizzo 	if (error || *na != NULL)
1564c3e9b4dbSLuiz Otavio O Souza 		goto out;
1565ce3ee1e7SLuigi Rizzo 
15664bf50f18SLuigi Rizzo 	/* try to see if this is a bridge port */
15672a7db7a6SVincenzo Maffione 	error = netmap_get_vale_na(hdr, na, nmd, create);
1568f0ea3689SLuigi Rizzo 	if (error)
1569c3e9b4dbSLuiz Otavio O Souza 		goto out;
1570f0ea3689SLuigi Rizzo 
1571f0ea3689SLuigi Rizzo 	if (*na != NULL) /* valid match in netmap_get_bdg_na() */
1572847bf383SLuigi Rizzo 		goto out;
1573f0ea3689SLuigi Rizzo 
157489cc2556SLuigi Rizzo 	/*
157589cc2556SLuigi Rizzo 	 * This must be a hardware na, lookup the name in the system.
157689cc2556SLuigi Rizzo 	 * Note that by hardware we actually mean "it shows up in ifconfig".
157789cc2556SLuigi Rizzo 	 * This may still be a tap, a veth/epair, or even a
157889cc2556SLuigi Rizzo 	 * persistent VALE port.
157989cc2556SLuigi Rizzo 	 */
15802ff91c17SVincenzo Maffione 	*ifp = ifunit_ref(hdr->nr_name);
158137e3a6d3SLuigi Rizzo 	if (*ifp == NULL) {
1582c3e9b4dbSLuiz Otavio O Souza 		error = ENXIO;
1583c3e9b4dbSLuiz Otavio O Souza 		goto out;
1584f196ce38SLuigi Rizzo 	}
1585ce3ee1e7SLuigi Rizzo 
1586c3e9b4dbSLuiz Otavio O Souza 	error = netmap_get_hw_na(*ifp, nmd, &ret);
1587f9790aebSLuigi Rizzo 	if (error)
1588f9790aebSLuigi Rizzo 		goto out;
1589f18be576SLuigi Rizzo 
1590f9790aebSLuigi Rizzo 	*na = ret;
1591f9790aebSLuigi Rizzo 	netmap_adapter_get(ret);
1592f0ea3689SLuigi Rizzo 
1593d12354a5SVincenzo Maffione 	/*
1594d12354a5SVincenzo Maffione 	 * if the adapter supports the host rings and it is not alread open,
1595d12354a5SVincenzo Maffione 	 * try to set the number of host rings as requested by the user
1596d12354a5SVincenzo Maffione 	 */
1597d12354a5SVincenzo Maffione 	if (((*na)->na_flags & NAF_HOST_RINGS) && (*na)->active_fds == 0) {
1598d12354a5SVincenzo Maffione 		if (req->nr_host_tx_rings)
1599d12354a5SVincenzo Maffione 			(*na)->num_host_tx_rings = req->nr_host_tx_rings;
1600d12354a5SVincenzo Maffione 		if (req->nr_host_rx_rings)
1601d12354a5SVincenzo Maffione 			(*na)->num_host_rx_rings = req->nr_host_rx_rings;
1602d12354a5SVincenzo Maffione 	}
1603d12354a5SVincenzo Maffione 	nm_prdis("%s: host tx %d rx %u", (*na)->name, (*na)->num_host_tx_rings,
1604d12354a5SVincenzo Maffione 			(*na)->num_host_rx_rings);
1605d12354a5SVincenzo Maffione 
1606f9790aebSLuigi Rizzo out:
160737e3a6d3SLuigi Rizzo 	if (error) {
160837e3a6d3SLuigi Rizzo 		if (ret)
1609f0ea3689SLuigi Rizzo 			netmap_adapter_put(ret);
161037e3a6d3SLuigi Rizzo 		if (*ifp) {
161137e3a6d3SLuigi Rizzo 			if_rele(*ifp);
161237e3a6d3SLuigi Rizzo 			*ifp = NULL;
161337e3a6d3SLuigi Rizzo 		}
161437e3a6d3SLuigi Rizzo 	}
1615c3e9b4dbSLuiz Otavio O Souza 	if (nmd_ref)
1616c3e9b4dbSLuiz Otavio O Souza 		netmap_mem_put(nmd);
1617f18be576SLuigi Rizzo 
16185ab0d24dSLuigi Rizzo 	return error;
16195ab0d24dSLuigi Rizzo }
1620ce3ee1e7SLuigi Rizzo 
162137e3a6d3SLuigi Rizzo /* undo netmap_get_na() */
162237e3a6d3SLuigi Rizzo void
162337e3a6d3SLuigi Rizzo netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp)
162437e3a6d3SLuigi Rizzo {
162537e3a6d3SLuigi Rizzo 	if (ifp)
162637e3a6d3SLuigi Rizzo 		if_rele(ifp);
162737e3a6d3SLuigi Rizzo 	if (na)
162837e3a6d3SLuigi Rizzo 		netmap_adapter_put(na);
162937e3a6d3SLuigi Rizzo }
163037e3a6d3SLuigi Rizzo 
163137e3a6d3SLuigi Rizzo 
163237e3a6d3SLuigi Rizzo #define NM_FAIL_ON(t) do {						\
163337e3a6d3SLuigi Rizzo 	if (unlikely(t)) {						\
163475f4f3edSVincenzo Maffione 		nm_prlim(5, "%s: fail '" #t "' "				\
163537e3a6d3SLuigi Rizzo 			"h %d c %d t %d "				\
163637e3a6d3SLuigi Rizzo 			"rh %d rc %d rt %d "				\
163737e3a6d3SLuigi Rizzo 			"hc %d ht %d",					\
163837e3a6d3SLuigi Rizzo 			kring->name,					\
163937e3a6d3SLuigi Rizzo 			head, cur, ring->tail,				\
164037e3a6d3SLuigi Rizzo 			kring->rhead, kring->rcur, kring->rtail,	\
164137e3a6d3SLuigi Rizzo 			kring->nr_hwcur, kring->nr_hwtail);		\
164237e3a6d3SLuigi Rizzo 		return kring->nkr_num_slots;				\
164337e3a6d3SLuigi Rizzo 	}								\
164437e3a6d3SLuigi Rizzo } while (0)
1645ce3ee1e7SLuigi Rizzo 
1646f9790aebSLuigi Rizzo /*
1647f9790aebSLuigi Rizzo  * validate parameters on entry for *_txsync()
1648f9790aebSLuigi Rizzo  * Returns ring->cur if ok, or something >= kring->nkr_num_slots
164917885a7bSLuigi Rizzo  * in case of error.
1650f9790aebSLuigi Rizzo  *
165117885a7bSLuigi Rizzo  * rhead, rcur and rtail=hwtail are stored from previous round.
165217885a7bSLuigi Rizzo  * hwcur is the next packet to send to the ring.
1653f9790aebSLuigi Rizzo  *
165417885a7bSLuigi Rizzo  * We want
165517885a7bSLuigi Rizzo  *    hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail
1656f9790aebSLuigi Rizzo  *
165717885a7bSLuigi Rizzo  * hwcur, rhead, rtail and hwtail are reliable
1658f9790aebSLuigi Rizzo  */
165937e3a6d3SLuigi Rizzo u_int
166037e3a6d3SLuigi Rizzo nm_txsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
1661f9790aebSLuigi Rizzo {
166217885a7bSLuigi Rizzo 	u_int head = ring->head; /* read only once */
1663f9790aebSLuigi Rizzo 	u_int cur = ring->cur; /* read only once */
1664f9790aebSLuigi Rizzo 	u_int n = kring->nkr_num_slots;
1665ce3ee1e7SLuigi Rizzo 
166675f4f3edSVincenzo Maffione 	nm_prdis(5, "%s kcur %d ktail %d head %d cur %d tail %d",
166717885a7bSLuigi Rizzo 		kring->name,
166817885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail,
166917885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail);
167017885a7bSLuigi Rizzo #if 1 /* kernel sanity checks; but we can trust the kring. */
167137e3a6d3SLuigi Rizzo 	NM_FAIL_ON(kring->nr_hwcur >= n || kring->rhead >= n ||
167237e3a6d3SLuigi Rizzo 	    kring->rtail >= n ||  kring->nr_hwtail >= n);
1673f9790aebSLuigi Rizzo #endif /* kernel sanity checks */
167417885a7bSLuigi Rizzo 	/*
167537e3a6d3SLuigi Rizzo 	 * user sanity checks. We only use head,
167637e3a6d3SLuigi Rizzo 	 * A, B, ... are possible positions for head:
167717885a7bSLuigi Rizzo 	 *
167837e3a6d3SLuigi Rizzo 	 *  0    A  rhead   B  rtail   C  n-1
167937e3a6d3SLuigi Rizzo 	 *  0    D  rtail   E  rhead   F  n-1
168017885a7bSLuigi Rizzo 	 *
168117885a7bSLuigi Rizzo 	 * B, F, D are valid. A, C, E are wrong
168217885a7bSLuigi Rizzo 	 */
168317885a7bSLuigi Rizzo 	if (kring->rtail >= kring->rhead) {
168417885a7bSLuigi Rizzo 		/* want rhead <= head <= rtail */
168537e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->rhead || head > kring->rtail);
168617885a7bSLuigi Rizzo 		/* and also head <= cur <= rtail */
168737e3a6d3SLuigi Rizzo 		NM_FAIL_ON(cur < head || cur > kring->rtail);
168817885a7bSLuigi Rizzo 	} else { /* here rtail < rhead */
168917885a7bSLuigi Rizzo 		/* we need head outside rtail .. rhead */
169037e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head > kring->rtail && head < kring->rhead);
169117885a7bSLuigi Rizzo 
169217885a7bSLuigi Rizzo 		/* two cases now: head <= rtail or head >= rhead  */
169317885a7bSLuigi Rizzo 		if (head <= kring->rtail) {
169417885a7bSLuigi Rizzo 			/* want head <= cur <= rtail */
169537e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head || cur > kring->rtail);
169617885a7bSLuigi Rizzo 		} else { /* head >= rhead */
169717885a7bSLuigi Rizzo 			/* cur must be outside rtail..head */
169837e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur > kring->rtail && cur < head);
1699f18be576SLuigi Rizzo 		}
1700f9790aebSLuigi Rizzo 	}
170117885a7bSLuigi Rizzo 	if (ring->tail != kring->rtail) {
170275f4f3edSVincenzo Maffione 		nm_prlim(5, "%s tail overwritten was %d need %d", kring->name,
170317885a7bSLuigi Rizzo 			ring->tail, kring->rtail);
170417885a7bSLuigi Rizzo 		ring->tail = kring->rtail;
170517885a7bSLuigi Rizzo 	}
170617885a7bSLuigi Rizzo 	kring->rhead = head;
170717885a7bSLuigi Rizzo 	kring->rcur = cur;
170817885a7bSLuigi Rizzo 	return head;
170968b8534bSLuigi Rizzo }
171068b8534bSLuigi Rizzo 
171168b8534bSLuigi Rizzo 
171268b8534bSLuigi Rizzo /*
1713f9790aebSLuigi Rizzo  * validate parameters on entry for *_rxsync()
171417885a7bSLuigi Rizzo  * Returns ring->head if ok, kring->nkr_num_slots on error.
1715f9790aebSLuigi Rizzo  *
171617885a7bSLuigi Rizzo  * For a valid configuration,
171717885a7bSLuigi Rizzo  * hwcur <= head <= cur <= tail <= hwtail
1718f9790aebSLuigi Rizzo  *
171917885a7bSLuigi Rizzo  * We only consider head and cur.
172017885a7bSLuigi Rizzo  * hwcur and hwtail are reliable.
1721f9790aebSLuigi Rizzo  *
1722f9790aebSLuigi Rizzo  */
172337e3a6d3SLuigi Rizzo u_int
172437e3a6d3SLuigi Rizzo nm_rxsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
1725f9790aebSLuigi Rizzo {
172617885a7bSLuigi Rizzo 	uint32_t const n = kring->nkr_num_slots;
172717885a7bSLuigi Rizzo 	uint32_t head, cur;
1728f9790aebSLuigi Rizzo 
172975f4f3edSVincenzo Maffione 	nm_prdis(5,"%s kc %d kt %d h %d c %d t %d",
173017885a7bSLuigi Rizzo 		kring->name,
173117885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail,
173217885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail);
173317885a7bSLuigi Rizzo 	/*
173417885a7bSLuigi Rizzo 	 * Before storing the new values, we should check they do not
173517885a7bSLuigi Rizzo 	 * move backwards. However:
173617885a7bSLuigi Rizzo 	 * - head is not an issue because the previous value is hwcur;
173717885a7bSLuigi Rizzo 	 * - cur could in principle go back, however it does not matter
173817885a7bSLuigi Rizzo 	 *   because we are processing a brand new rxsync()
173917885a7bSLuigi Rizzo 	 */
174017885a7bSLuigi Rizzo 	cur = kring->rcur = ring->cur;	/* read only once */
174117885a7bSLuigi Rizzo 	head = kring->rhead = ring->head;	/* read only once */
1742f9790aebSLuigi Rizzo #if 1 /* kernel sanity checks */
174337e3a6d3SLuigi Rizzo 	NM_FAIL_ON(kring->nr_hwcur >= n || kring->nr_hwtail >= n);
1744f9790aebSLuigi Rizzo #endif /* kernel sanity checks */
1745f9790aebSLuigi Rizzo 	/* user sanity checks */
174617885a7bSLuigi Rizzo 	if (kring->nr_hwtail >= kring->nr_hwcur) {
174717885a7bSLuigi Rizzo 		/* want hwcur <= rhead <= hwtail */
174837e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->nr_hwcur || head > kring->nr_hwtail);
174917885a7bSLuigi Rizzo 		/* and also rhead <= rcur <= hwtail */
175037e3a6d3SLuigi Rizzo 		NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
1751f9790aebSLuigi Rizzo 	} else {
175217885a7bSLuigi Rizzo 		/* we need rhead outside hwtail..hwcur */
175337e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->nr_hwcur && head > kring->nr_hwtail);
175417885a7bSLuigi Rizzo 		/* two cases now: head <= hwtail or head >= hwcur  */
175517885a7bSLuigi Rizzo 		if (head <= kring->nr_hwtail) {
175617885a7bSLuigi Rizzo 			/* want head <= cur <= hwtail */
175737e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
175817885a7bSLuigi Rizzo 		} else {
175917885a7bSLuigi Rizzo 			/* cur must be outside hwtail..head */
176037e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head && cur > kring->nr_hwtail);
1761f9790aebSLuigi Rizzo 		}
1762f9790aebSLuigi Rizzo 	}
176317885a7bSLuigi Rizzo 	if (ring->tail != kring->rtail) {
176475f4f3edSVincenzo Maffione 		nm_prlim(5, "%s tail overwritten was %d need %d",
176517885a7bSLuigi Rizzo 			kring->name,
176617885a7bSLuigi Rizzo 			ring->tail, kring->rtail);
176717885a7bSLuigi Rizzo 		ring->tail = kring->rtail;
176817885a7bSLuigi Rizzo 	}
176917885a7bSLuigi Rizzo 	return head;
1770f9790aebSLuigi Rizzo }
1771f9790aebSLuigi Rizzo 
177217885a7bSLuigi Rizzo 
1773f9790aebSLuigi Rizzo /*
177468b8534bSLuigi Rizzo  * Error routine called when txsync/rxsync detects an error.
177517885a7bSLuigi Rizzo  * Can't do much more than resetting head = cur = hwcur, tail = hwtail
177668b8534bSLuigi Rizzo  * Return 1 on reinit.
1777506cc70cSLuigi Rizzo  *
1778506cc70cSLuigi Rizzo  * This routine is only called by the upper half of the kernel.
1779506cc70cSLuigi Rizzo  * It only reads hwcur (which is changed only by the upper half, too)
178017885a7bSLuigi Rizzo  * and hwtail (which may be changed by the lower half, but only on
1781506cc70cSLuigi Rizzo  * a tx ring and only to increase it, so any error will be recovered
1782506cc70cSLuigi Rizzo  * on the next call). For the above, we don't strictly need to call
1783506cc70cSLuigi Rizzo  * it under lock.
178468b8534bSLuigi Rizzo  */
178568b8534bSLuigi Rizzo int
178668b8534bSLuigi Rizzo netmap_ring_reinit(struct netmap_kring *kring)
178768b8534bSLuigi Rizzo {
178868b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
178968b8534bSLuigi Rizzo 	u_int i, lim = kring->nkr_num_slots - 1;
179068b8534bSLuigi Rizzo 	int errors = 0;
179168b8534bSLuigi Rizzo 
1792ce3ee1e7SLuigi Rizzo 	// XXX KASSERT nm_kr_tryget
179375f4f3edSVincenzo Maffione 	nm_prlim(10, "called for %s", kring->name);
179417885a7bSLuigi Rizzo 	// XXX probably wrong to trust userspace
179517885a7bSLuigi Rizzo 	kring->rhead = ring->head;
179617885a7bSLuigi Rizzo 	kring->rcur  = ring->cur;
179717885a7bSLuigi Rizzo 	kring->rtail = ring->tail;
179817885a7bSLuigi Rizzo 
179968b8534bSLuigi Rizzo 	if (ring->cur > lim)
180068b8534bSLuigi Rizzo 		errors++;
180117885a7bSLuigi Rizzo 	if (ring->head > lim)
180217885a7bSLuigi Rizzo 		errors++;
180317885a7bSLuigi Rizzo 	if (ring->tail > lim)
180417885a7bSLuigi Rizzo 		errors++;
180568b8534bSLuigi Rizzo 	for (i = 0; i <= lim; i++) {
180668b8534bSLuigi Rizzo 		u_int idx = ring->slot[i].buf_idx;
180768b8534bSLuigi Rizzo 		u_int len = ring->slot[i].len;
1808847bf383SLuigi Rizzo 		if (idx < 2 || idx >= kring->na->na_lut.objtotal) {
180975f4f3edSVincenzo Maffione 			nm_prlim(5, "bad index at slot %d idx %d len %d ", i, idx, len);
181068b8534bSLuigi Rizzo 			ring->slot[i].buf_idx = 0;
181168b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
18124bf50f18SLuigi Rizzo 		} else if (len > NETMAP_BUF_SIZE(kring->na)) {
181368b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
181475f4f3edSVincenzo Maffione 			nm_prlim(5, "bad len at slot %d idx %d len %d", i, idx, len);
181568b8534bSLuigi Rizzo 		}
181668b8534bSLuigi Rizzo 	}
181768b8534bSLuigi Rizzo 	if (errors) {
181875f4f3edSVincenzo Maffione 		nm_prlim(10, "total %d errors", errors);
181975f4f3edSVincenzo Maffione 		nm_prlim(10, "%s reinit, cur %d -> %d tail %d -> %d",
182017885a7bSLuigi Rizzo 			kring->name,
182168b8534bSLuigi Rizzo 			ring->cur, kring->nr_hwcur,
182217885a7bSLuigi Rizzo 			ring->tail, kring->nr_hwtail);
182317885a7bSLuigi Rizzo 		ring->head = kring->rhead = kring->nr_hwcur;
182417885a7bSLuigi Rizzo 		ring->cur  = kring->rcur  = kring->nr_hwcur;
182517885a7bSLuigi Rizzo 		ring->tail = kring->rtail = kring->nr_hwtail;
182668b8534bSLuigi Rizzo 	}
182768b8534bSLuigi Rizzo 	return (errors ? 1 : 0);
182868b8534bSLuigi Rizzo }
182968b8534bSLuigi Rizzo 
18304bf50f18SLuigi Rizzo /* interpret the ringid and flags fields of an nmreq, by translating them
18314bf50f18SLuigi Rizzo  * into a pair of intervals of ring indices:
18324bf50f18SLuigi Rizzo  *
18334bf50f18SLuigi Rizzo  * [priv->np_txqfirst, priv->np_txqlast) and
18344bf50f18SLuigi Rizzo  * [priv->np_rxqfirst, priv->np_rxqlast)
18354bf50f18SLuigi Rizzo  *
183668b8534bSLuigi Rizzo  */
18374bf50f18SLuigi Rizzo int
18382ff91c17SVincenzo Maffione netmap_interp_ringid(struct netmap_priv_d *priv, uint32_t nr_mode,
18392ff91c17SVincenzo Maffione 			uint16_t nr_ringid, uint64_t nr_flags)
184068b8534bSLuigi Rizzo {
1841f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
184237e3a6d3SLuigi Rizzo 	int excluded_direction[] = { NR_TX_RINGS_ONLY, NR_RX_RINGS_ONLY };
1843847bf383SLuigi Rizzo 	enum txrx t;
18442ff91c17SVincenzo Maffione 	u_int j;
184568b8534bSLuigi Rizzo 
184637e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
18472ff91c17SVincenzo Maffione 		if (nr_flags & excluded_direction[t]) {
184837e3a6d3SLuigi Rizzo 			priv->np_qfirst[t] = priv->np_qlast[t] = 0;
184937e3a6d3SLuigi Rizzo 			continue;
185037e3a6d3SLuigi Rizzo 		}
18512ff91c17SVincenzo Maffione 		switch (nr_mode) {
1852f0ea3689SLuigi Rizzo 		case NR_REG_ALL_NIC:
1853b6e66be2SVincenzo Maffione 		case NR_REG_NULL:
1854847bf383SLuigi Rizzo 			priv->np_qfirst[t] = 0;
1855847bf383SLuigi Rizzo 			priv->np_qlast[t] = nma_get_nrings(na, t);
185675f4f3edSVincenzo Maffione 			nm_prdis("ALL/PIPE: %s %d %d", nm_txrx2str(t),
185737e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1858f0ea3689SLuigi Rizzo 			break;
1859f0ea3689SLuigi Rizzo 		case NR_REG_SW:
1860f0ea3689SLuigi Rizzo 		case NR_REG_NIC_SW:
1861f0ea3689SLuigi Rizzo 			if (!(na->na_flags & NAF_HOST_RINGS)) {
1862b6e66be2SVincenzo Maffione 				nm_prerr("host rings not supported");
1863f0ea3689SLuigi Rizzo 				return EINVAL;
1864f0ea3689SLuigi Rizzo 			}
18652ff91c17SVincenzo Maffione 			priv->np_qfirst[t] = (nr_mode == NR_REG_SW ?
1866847bf383SLuigi Rizzo 				nma_get_nrings(na, t) : 0);
18672a7db7a6SVincenzo Maffione 			priv->np_qlast[t] = netmap_all_rings(na, t);
186875f4f3edSVincenzo Maffione 			nm_prdis("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : "NIC+SW",
186937e3a6d3SLuigi Rizzo 				nm_txrx2str(t),
187037e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1871f0ea3689SLuigi Rizzo 			break;
1872f0ea3689SLuigi Rizzo 		case NR_REG_ONE_NIC:
18732ff91c17SVincenzo Maffione 			if (nr_ringid >= na->num_tx_rings &&
18742ff91c17SVincenzo Maffione 					nr_ringid >= na->num_rx_rings) {
1875b6e66be2SVincenzo Maffione 				nm_prerr("invalid ring id %d", nr_ringid);
1876f0ea3689SLuigi Rizzo 				return EINVAL;
1877f0ea3689SLuigi Rizzo 			}
1878f0ea3689SLuigi Rizzo 			/* if not enough rings, use the first one */
18792ff91c17SVincenzo Maffione 			j = nr_ringid;
1880847bf383SLuigi Rizzo 			if (j >= nma_get_nrings(na, t))
1881f0ea3689SLuigi Rizzo 				j = 0;
1882847bf383SLuigi Rizzo 			priv->np_qfirst[t] = j;
1883847bf383SLuigi Rizzo 			priv->np_qlast[t] = j + 1;
188475f4f3edSVincenzo Maffione 			nm_prdis("ONE_NIC: %s %d %d", nm_txrx2str(t),
188537e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1886f0ea3689SLuigi Rizzo 			break;
1887d12354a5SVincenzo Maffione 		case NR_REG_ONE_SW:
1888d12354a5SVincenzo Maffione 			if (!(na->na_flags & NAF_HOST_RINGS)) {
1889d12354a5SVincenzo Maffione 				nm_prerr("host rings not supported");
1890d12354a5SVincenzo Maffione 				return EINVAL;
1891d12354a5SVincenzo Maffione 			}
1892d12354a5SVincenzo Maffione 			if (nr_ringid >= na->num_host_tx_rings &&
1893d12354a5SVincenzo Maffione 					nr_ringid >= na->num_host_rx_rings) {
1894d12354a5SVincenzo Maffione 				nm_prerr("invalid ring id %d", nr_ringid);
1895d12354a5SVincenzo Maffione 				return EINVAL;
1896d12354a5SVincenzo Maffione 			}
1897d12354a5SVincenzo Maffione 			/* if not enough rings, use the first one */
1898d12354a5SVincenzo Maffione 			j = nr_ringid;
1899d12354a5SVincenzo Maffione 			if (j >= nma_get_host_nrings(na, t))
1900d12354a5SVincenzo Maffione 				j = 0;
1901d12354a5SVincenzo Maffione 			priv->np_qfirst[t] = nma_get_nrings(na, t) + j;
1902d12354a5SVincenzo Maffione 			priv->np_qlast[t] = nma_get_nrings(na, t) + j + 1;
1903d12354a5SVincenzo Maffione 			nm_prdis("ONE_SW: %s %d %d", nm_txrx2str(t),
1904d12354a5SVincenzo Maffione 				priv->np_qfirst[t], priv->np_qlast[t]);
1905d12354a5SVincenzo Maffione 			break;
1906f0ea3689SLuigi Rizzo 		default:
1907b6e66be2SVincenzo Maffione 			nm_prerr("invalid regif type %d", nr_mode);
1908f0ea3689SLuigi Rizzo 			return EINVAL;
190968b8534bSLuigi Rizzo 		}
191037e3a6d3SLuigi Rizzo 	}
1911b6e66be2SVincenzo Maffione 	priv->np_flags = nr_flags;
19124bf50f18SLuigi Rizzo 
1913c3e9b4dbSLuiz Otavio O Souza 	/* Allow transparent forwarding mode in the host --> nic
1914c3e9b4dbSLuiz Otavio O Souza 	 * direction only if all the TX hw rings have been opened. */
1915c3e9b4dbSLuiz Otavio O Souza 	if (priv->np_qfirst[NR_TX] == 0 &&
1916c3e9b4dbSLuiz Otavio O Souza 			priv->np_qlast[NR_TX] >= na->num_tx_rings) {
1917c3e9b4dbSLuiz Otavio O Souza 		priv->np_sync_flags |= NAF_CAN_FORWARD_DOWN;
1918c3e9b4dbSLuiz Otavio O Souza 	}
1919c3e9b4dbSLuiz Otavio O Souza 
1920ae10d1afSLuigi Rizzo 	if (netmap_verbose) {
1921b6e66be2SVincenzo Maffione 		nm_prinf("%s: tx [%d,%d) rx [%d,%d) id %d",
19224bf50f18SLuigi Rizzo 			na->name,
1923847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1924847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1925847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1926847bf383SLuigi Rizzo 			priv->np_qlast[NR_RX],
19272ff91c17SVincenzo Maffione 			nr_ringid);
1928ae10d1afSLuigi Rizzo 	}
192968b8534bSLuigi Rizzo 	return 0;
193068b8534bSLuigi Rizzo }
193168b8534bSLuigi Rizzo 
19324bf50f18SLuigi Rizzo 
19334bf50f18SLuigi Rizzo /*
19344bf50f18SLuigi Rizzo  * Set the ring ID. For devices with a single queue, a request
19354bf50f18SLuigi Rizzo  * for all rings is the same as a single ring.
19364bf50f18SLuigi Rizzo  */
19374bf50f18SLuigi Rizzo static int
19382ff91c17SVincenzo Maffione netmap_set_ringid(struct netmap_priv_d *priv, uint32_t nr_mode,
19392ff91c17SVincenzo Maffione 		uint16_t nr_ringid, uint64_t nr_flags)
19404bf50f18SLuigi Rizzo {
19414bf50f18SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
19424bf50f18SLuigi Rizzo 	int error;
1943847bf383SLuigi Rizzo 	enum txrx t;
19444bf50f18SLuigi Rizzo 
19452ff91c17SVincenzo Maffione 	error = netmap_interp_ringid(priv, nr_mode, nr_ringid, nr_flags);
19464bf50f18SLuigi Rizzo 	if (error) {
19474bf50f18SLuigi Rizzo 		return error;
19484bf50f18SLuigi Rizzo 	}
19494bf50f18SLuigi Rizzo 
19502ff91c17SVincenzo Maffione 	priv->np_txpoll = (nr_flags & NR_NO_TX_POLL) ? 0 : 1;
19514bf50f18SLuigi Rizzo 
19524bf50f18SLuigi Rizzo 	/* optimization: count the users registered for more than
19534bf50f18SLuigi Rizzo 	 * one ring, which are the ones sleeping on the global queue.
19544bf50f18SLuigi Rizzo 	 * The default netmap_notify() callback will then
19554bf50f18SLuigi Rizzo 	 * avoid signaling the global queue if nobody is using it
19564bf50f18SLuigi Rizzo 	 */
1957847bf383SLuigi Rizzo 	for_rx_tx(t) {
1958847bf383SLuigi Rizzo 		if (nm_si_user(priv, t))
1959847bf383SLuigi Rizzo 			na->si_users[t]++;
1960847bf383SLuigi Rizzo 	}
19614bf50f18SLuigi Rizzo 	return 0;
19624bf50f18SLuigi Rizzo }
19634bf50f18SLuigi Rizzo 
1964847bf383SLuigi Rizzo static void
1965847bf383SLuigi Rizzo netmap_unset_ringid(struct netmap_priv_d *priv)
1966847bf383SLuigi Rizzo {
1967847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1968847bf383SLuigi Rizzo 	enum txrx t;
1969847bf383SLuigi Rizzo 
1970847bf383SLuigi Rizzo 	for_rx_tx(t) {
1971847bf383SLuigi Rizzo 		if (nm_si_user(priv, t))
1972847bf383SLuigi Rizzo 			na->si_users[t]--;
1973847bf383SLuigi Rizzo 		priv->np_qfirst[t] = priv->np_qlast[t] = 0;
1974847bf383SLuigi Rizzo 	}
1975847bf383SLuigi Rizzo 	priv->np_flags = 0;
1976847bf383SLuigi Rizzo 	priv->np_txpoll = 0;
1977b6e66be2SVincenzo Maffione 	priv->np_kloop_state = 0;
1978847bf383SLuigi Rizzo }
1979847bf383SLuigi Rizzo 
1980847bf383SLuigi Rizzo 
198137e3a6d3SLuigi Rizzo /* Set the nr_pending_mode for the requested rings.
198237e3a6d3SLuigi Rizzo  * If requested, also try to get exclusive access to the rings, provided
198337e3a6d3SLuigi Rizzo  * the rings we want to bind are not exclusively owned by a previous bind.
1984847bf383SLuigi Rizzo  */
1985847bf383SLuigi Rizzo static int
198637e3a6d3SLuigi Rizzo netmap_krings_get(struct netmap_priv_d *priv)
1987847bf383SLuigi Rizzo {
1988847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1989847bf383SLuigi Rizzo 	u_int i;
1990847bf383SLuigi Rizzo 	struct netmap_kring *kring;
1991847bf383SLuigi Rizzo 	int excl = (priv->np_flags & NR_EXCLUSIVE);
1992847bf383SLuigi Rizzo 	enum txrx t;
1993847bf383SLuigi Rizzo 
1994b6e66be2SVincenzo Maffione 	if (netmap_debug & NM_DEBUG_ON)
1995b6e66be2SVincenzo Maffione 		nm_prinf("%s: grabbing tx [%d, %d) rx [%d, %d)",
1996847bf383SLuigi Rizzo 			na->name,
1997847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1998847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1999847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
2000847bf383SLuigi Rizzo 			priv->np_qlast[NR_RX]);
2001847bf383SLuigi Rizzo 
2002847bf383SLuigi Rizzo 	/* first round: check that all the requested rings
2003847bf383SLuigi Rizzo 	 * are neither alread exclusively owned, nor we
2004847bf383SLuigi Rizzo 	 * want exclusive ownership when they are already in use
2005847bf383SLuigi Rizzo 	 */
2006847bf383SLuigi Rizzo 	for_rx_tx(t) {
2007847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
20082ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
2009847bf383SLuigi Rizzo 			if ((kring->nr_kflags & NKR_EXCLUSIVE) ||
2010847bf383SLuigi Rizzo 			    (kring->users && excl))
2011847bf383SLuigi Rizzo 			{
201275f4f3edSVincenzo Maffione 				nm_prdis("ring %s busy", kring->name);
2013847bf383SLuigi Rizzo 				return EBUSY;
2014847bf383SLuigi Rizzo 			}
2015847bf383SLuigi Rizzo 		}
2016847bf383SLuigi Rizzo 	}
2017847bf383SLuigi Rizzo 
201837e3a6d3SLuigi Rizzo 	/* second round: increment usage count (possibly marking them
201937e3a6d3SLuigi Rizzo 	 * as exclusive) and set the nr_pending_mode
2020847bf383SLuigi Rizzo 	 */
2021847bf383SLuigi Rizzo 	for_rx_tx(t) {
2022847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
20232ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
2024847bf383SLuigi Rizzo 			kring->users++;
2025847bf383SLuigi Rizzo 			if (excl)
2026847bf383SLuigi Rizzo 				kring->nr_kflags |= NKR_EXCLUSIVE;
202737e3a6d3SLuigi Rizzo 	                kring->nr_pending_mode = NKR_NETMAP_ON;
2028847bf383SLuigi Rizzo 		}
2029847bf383SLuigi Rizzo 	}
2030847bf383SLuigi Rizzo 
2031847bf383SLuigi Rizzo 	return 0;
2032847bf383SLuigi Rizzo 
2033847bf383SLuigi Rizzo }
2034847bf383SLuigi Rizzo 
203537e3a6d3SLuigi Rizzo /* Undo netmap_krings_get(). This is done by clearing the exclusive mode
203637e3a6d3SLuigi Rizzo  * if was asked on regif, and unset the nr_pending_mode if we are the
203737e3a6d3SLuigi Rizzo  * last users of the involved rings. */
2038847bf383SLuigi Rizzo static void
203937e3a6d3SLuigi Rizzo netmap_krings_put(struct netmap_priv_d *priv)
2040847bf383SLuigi Rizzo {
2041847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
2042847bf383SLuigi Rizzo 	u_int i;
2043847bf383SLuigi Rizzo 	struct netmap_kring *kring;
2044847bf383SLuigi Rizzo 	int excl = (priv->np_flags & NR_EXCLUSIVE);
2045847bf383SLuigi Rizzo 	enum txrx t;
2046847bf383SLuigi Rizzo 
204775f4f3edSVincenzo Maffione 	nm_prdis("%s: releasing tx [%d, %d) rx [%d, %d)",
2048847bf383SLuigi Rizzo 			na->name,
2049847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
2050847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
2051847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
2052847bf383SLuigi Rizzo 			priv->np_qlast[MR_RX]);
2053847bf383SLuigi Rizzo 
2054847bf383SLuigi Rizzo 	for_rx_tx(t) {
2055847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
20562ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
2057847bf383SLuigi Rizzo 			if (excl)
2058847bf383SLuigi Rizzo 				kring->nr_kflags &= ~NKR_EXCLUSIVE;
2059847bf383SLuigi Rizzo 			kring->users--;
206037e3a6d3SLuigi Rizzo 			if (kring->users == 0)
206137e3a6d3SLuigi Rizzo 				kring->nr_pending_mode = NKR_NETMAP_OFF;
2062847bf383SLuigi Rizzo 		}
2063847bf383SLuigi Rizzo 	}
2064847bf383SLuigi Rizzo }
2065847bf383SLuigi Rizzo 
20662ff91c17SVincenzo Maffione static int
20672ff91c17SVincenzo Maffione nm_priv_rx_enabled(struct netmap_priv_d *priv)
20682ff91c17SVincenzo Maffione {
20692ff91c17SVincenzo Maffione 	return (priv->np_qfirst[NR_RX] != priv->np_qlast[NR_RX]);
20702ff91c17SVincenzo Maffione }
20712ff91c17SVincenzo Maffione 
2072b6e66be2SVincenzo Maffione /* Validate the CSB entries for both directions (atok and ktoa).
2073b6e66be2SVincenzo Maffione  * To be called under NMG_LOCK(). */
2074b6e66be2SVincenzo Maffione static int
2075b6e66be2SVincenzo Maffione netmap_csb_validate(struct netmap_priv_d *priv, struct nmreq_opt_csb *csbo)
2076b6e66be2SVincenzo Maffione {
2077b6e66be2SVincenzo Maffione 	struct nm_csb_atok *csb_atok_base =
2078b6e66be2SVincenzo Maffione 		(struct nm_csb_atok *)(uintptr_t)csbo->csb_atok;
2079b6e66be2SVincenzo Maffione 	struct nm_csb_ktoa *csb_ktoa_base =
2080b6e66be2SVincenzo Maffione 		(struct nm_csb_ktoa *)(uintptr_t)csbo->csb_ktoa;
2081b6e66be2SVincenzo Maffione 	enum txrx t;
2082b6e66be2SVincenzo Maffione 	int num_rings[NR_TXRX], tot_rings;
2083b6e66be2SVincenzo Maffione 	size_t entry_size[2];
2084b6e66be2SVincenzo Maffione 	void *csb_start[2];
2085b6e66be2SVincenzo Maffione 	int i;
2086b6e66be2SVincenzo Maffione 
2087b6e66be2SVincenzo Maffione 	if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
2088b6e66be2SVincenzo Maffione 		nm_prerr("Cannot update CSB while kloop is running");
2089b6e66be2SVincenzo Maffione 		return EBUSY;
2090b6e66be2SVincenzo Maffione 	}
2091b6e66be2SVincenzo Maffione 
2092b6e66be2SVincenzo Maffione 	tot_rings = 0;
2093b6e66be2SVincenzo Maffione 	for_rx_tx(t) {
2094b6e66be2SVincenzo Maffione 		num_rings[t] = priv->np_qlast[t] - priv->np_qfirst[t];
2095b6e66be2SVincenzo Maffione 		tot_rings += num_rings[t];
2096b6e66be2SVincenzo Maffione 	}
2097b6e66be2SVincenzo Maffione 	if (tot_rings <= 0)
2098b6e66be2SVincenzo Maffione 		return 0;
2099b6e66be2SVincenzo Maffione 
2100b6e66be2SVincenzo Maffione 	if (!(priv->np_flags & NR_EXCLUSIVE)) {
2101b6e66be2SVincenzo Maffione 		nm_prerr("CSB mode requires NR_EXCLUSIVE");
2102b6e66be2SVincenzo Maffione 		return EINVAL;
2103b6e66be2SVincenzo Maffione 	}
2104b6e66be2SVincenzo Maffione 
2105b6e66be2SVincenzo Maffione 	entry_size[0] = sizeof(*csb_atok_base);
2106b6e66be2SVincenzo Maffione 	entry_size[1] = sizeof(*csb_ktoa_base);
2107b6e66be2SVincenzo Maffione 	csb_start[0] = (void *)csb_atok_base;
2108b6e66be2SVincenzo Maffione 	csb_start[1] = (void *)csb_ktoa_base;
2109b6e66be2SVincenzo Maffione 
2110b6e66be2SVincenzo Maffione 	for (i = 0; i < 2; i++) {
2111b6e66be2SVincenzo Maffione 		/* On Linux we could use access_ok() to simplify
2112b6e66be2SVincenzo Maffione 		 * the validation. However, the advantage of
2113b6e66be2SVincenzo Maffione 		 * this approach is that it works also on
2114b6e66be2SVincenzo Maffione 		 * FreeBSD. */
2115b6e66be2SVincenzo Maffione 		size_t csb_size = tot_rings * entry_size[i];
2116b6e66be2SVincenzo Maffione 		void *tmp;
2117b6e66be2SVincenzo Maffione 		int err;
2118b6e66be2SVincenzo Maffione 
2119b6e66be2SVincenzo Maffione 		if ((uintptr_t)csb_start[i] & (entry_size[i]-1)) {
2120b6e66be2SVincenzo Maffione 			nm_prerr("Unaligned CSB address");
2121b6e66be2SVincenzo Maffione 			return EINVAL;
2122b6e66be2SVincenzo Maffione 		}
2123b6e66be2SVincenzo Maffione 
2124b6e66be2SVincenzo Maffione 		tmp = nm_os_malloc(csb_size);
2125b6e66be2SVincenzo Maffione 		if (!tmp)
2126b6e66be2SVincenzo Maffione 			return ENOMEM;
2127b6e66be2SVincenzo Maffione 		if (i == 0) {
2128b6e66be2SVincenzo Maffione 			/* Application --> kernel direction. */
2129b6e66be2SVincenzo Maffione 			err = copyin(csb_start[i], tmp, csb_size);
2130b6e66be2SVincenzo Maffione 		} else {
2131b6e66be2SVincenzo Maffione 			/* Kernel --> application direction. */
2132b6e66be2SVincenzo Maffione 			memset(tmp, 0, csb_size);
2133b6e66be2SVincenzo Maffione 			err = copyout(tmp, csb_start[i], csb_size);
2134b6e66be2SVincenzo Maffione 		}
2135b6e66be2SVincenzo Maffione 		nm_os_free(tmp);
2136b6e66be2SVincenzo Maffione 		if (err) {
2137b6e66be2SVincenzo Maffione 			nm_prerr("Invalid CSB address");
2138b6e66be2SVincenzo Maffione 			return err;
2139b6e66be2SVincenzo Maffione 		}
2140b6e66be2SVincenzo Maffione 	}
2141b6e66be2SVincenzo Maffione 
2142b6e66be2SVincenzo Maffione 	priv->np_csb_atok_base = csb_atok_base;
2143b6e66be2SVincenzo Maffione 	priv->np_csb_ktoa_base = csb_ktoa_base;
2144b6e66be2SVincenzo Maffione 
2145b6e66be2SVincenzo Maffione 	/* Initialize the CSB. */
2146b6e66be2SVincenzo Maffione 	for_rx_tx(t) {
2147b6e66be2SVincenzo Maffione 		for (i = 0; i < num_rings[t]; i++) {
2148b6e66be2SVincenzo Maffione 			struct netmap_kring *kring =
2149b6e66be2SVincenzo Maffione 				NMR(priv->np_na, t)[i + priv->np_qfirst[t]];
2150b6e66be2SVincenzo Maffione 			struct nm_csb_atok *csb_atok = csb_atok_base + i;
2151b6e66be2SVincenzo Maffione 			struct nm_csb_ktoa *csb_ktoa = csb_ktoa_base + i;
2152b6e66be2SVincenzo Maffione 
2153b6e66be2SVincenzo Maffione 			if (t == NR_RX) {
2154b6e66be2SVincenzo Maffione 				csb_atok += num_rings[NR_TX];
2155b6e66be2SVincenzo Maffione 				csb_ktoa += num_rings[NR_TX];
2156b6e66be2SVincenzo Maffione 			}
2157b6e66be2SVincenzo Maffione 
2158b6e66be2SVincenzo Maffione 			CSB_WRITE(csb_atok, head, kring->rhead);
2159b6e66be2SVincenzo Maffione 			CSB_WRITE(csb_atok, cur, kring->rcur);
2160b6e66be2SVincenzo Maffione 			CSB_WRITE(csb_atok, appl_need_kick, 1);
2161b6e66be2SVincenzo Maffione 			CSB_WRITE(csb_atok, sync_flags, 1);
2162b6e66be2SVincenzo Maffione 			CSB_WRITE(csb_ktoa, hwcur, kring->nr_hwcur);
2163b6e66be2SVincenzo Maffione 			CSB_WRITE(csb_ktoa, hwtail, kring->nr_hwtail);
2164b6e66be2SVincenzo Maffione 			CSB_WRITE(csb_ktoa, kern_need_kick, 1);
2165b6e66be2SVincenzo Maffione 
2166b6e66be2SVincenzo Maffione 			nm_prinf("csb_init for kring %s: head %u, cur %u, "
2167b6e66be2SVincenzo Maffione 				"hwcur %u, hwtail %u", kring->name,
2168b6e66be2SVincenzo Maffione 				kring->rhead, kring->rcur, kring->nr_hwcur,
2169b6e66be2SVincenzo Maffione 				kring->nr_hwtail);
2170b6e66be2SVincenzo Maffione 		}
2171b6e66be2SVincenzo Maffione 	}
2172b6e66be2SVincenzo Maffione 
2173b6e66be2SVincenzo Maffione 	return 0;
2174b6e66be2SVincenzo Maffione }
2175b6e66be2SVincenzo Maffione 
217677a2baf5SVincenzo Maffione /* Ensure that the netmap adapter can support the given MTU.
217777a2baf5SVincenzo Maffione  * @return EINVAL if the na cannot be set to mtu, 0 otherwise.
217877a2baf5SVincenzo Maffione  */
217977a2baf5SVincenzo Maffione int
218077a2baf5SVincenzo Maffione netmap_buf_size_validate(const struct netmap_adapter *na, unsigned mtu) {
218177a2baf5SVincenzo Maffione 	unsigned nbs = NETMAP_BUF_SIZE(na);
218277a2baf5SVincenzo Maffione 
218377a2baf5SVincenzo Maffione 	if (mtu <= na->rx_buf_maxsize) {
218477a2baf5SVincenzo Maffione 		/* The MTU fits a single NIC slot. We only
218577a2baf5SVincenzo Maffione 		 * Need to check that netmap buffers are
218677a2baf5SVincenzo Maffione 		 * large enough to hold an MTU. NS_MOREFRAG
218777a2baf5SVincenzo Maffione 		 * cannot be used in this case. */
218877a2baf5SVincenzo Maffione 		if (nbs < mtu) {
218977a2baf5SVincenzo Maffione 			nm_prerr("error: netmap buf size (%u) "
219077a2baf5SVincenzo Maffione 				 "< device MTU (%u)", nbs, mtu);
219177a2baf5SVincenzo Maffione 			return EINVAL;
219277a2baf5SVincenzo Maffione 		}
219377a2baf5SVincenzo Maffione 	} else {
219477a2baf5SVincenzo Maffione 		/* More NIC slots may be needed to receive
219577a2baf5SVincenzo Maffione 		 * or transmit a single packet. Check that
219677a2baf5SVincenzo Maffione 		 * the adapter supports NS_MOREFRAG and that
219777a2baf5SVincenzo Maffione 		 * netmap buffers are large enough to hold
219877a2baf5SVincenzo Maffione 		 * the maximum per-slot size. */
219977a2baf5SVincenzo Maffione 		if (!(na->na_flags & NAF_MOREFRAG)) {
220077a2baf5SVincenzo Maffione 			nm_prerr("error: large MTU (%d) needed "
220177a2baf5SVincenzo Maffione 				 "but %s does not support "
220277a2baf5SVincenzo Maffione 				 "NS_MOREFRAG", mtu,
220377a2baf5SVincenzo Maffione 				 na->ifp->if_xname);
220477a2baf5SVincenzo Maffione 			return EINVAL;
220577a2baf5SVincenzo Maffione 		} else if (nbs < na->rx_buf_maxsize) {
220677a2baf5SVincenzo Maffione 			nm_prerr("error: using NS_MOREFRAG on "
220777a2baf5SVincenzo Maffione 				 "%s requires netmap buf size "
220877a2baf5SVincenzo Maffione 				 ">= %u", na->ifp->if_xname,
220977a2baf5SVincenzo Maffione 				 na->rx_buf_maxsize);
221077a2baf5SVincenzo Maffione 			return EINVAL;
221177a2baf5SVincenzo Maffione 		} else {
221277a2baf5SVincenzo Maffione 			nm_prinf("info: netmap application on "
221377a2baf5SVincenzo Maffione 				 "%s needs to support "
221477a2baf5SVincenzo Maffione 				 "NS_MOREFRAG "
221577a2baf5SVincenzo Maffione 				 "(MTU=%u,netmap_buf_size=%u)",
221677a2baf5SVincenzo Maffione 				 na->ifp->if_xname, mtu, nbs);
221777a2baf5SVincenzo Maffione 		}
221877a2baf5SVincenzo Maffione 	}
221977a2baf5SVincenzo Maffione 	return 0;
222077a2baf5SVincenzo Maffione }
222177a2baf5SVincenzo Maffione 
222277a2baf5SVincenzo Maffione 
2223f18be576SLuigi Rizzo /*
2224f18be576SLuigi Rizzo  * possibly move the interface to netmap-mode.
2225f18be576SLuigi Rizzo  * If success it returns a pointer to netmap_if, otherwise NULL.
2226ce3ee1e7SLuigi Rizzo  * This must be called with NMG_LOCK held.
22274bf50f18SLuigi Rizzo  *
22284bf50f18SLuigi Rizzo  * The following na callbacks are called in the process:
22294bf50f18SLuigi Rizzo  *
22304bf50f18SLuigi Rizzo  * na->nm_config()			[by netmap_update_config]
22314bf50f18SLuigi Rizzo  * (get current number and size of rings)
22324bf50f18SLuigi Rizzo  *
22334bf50f18SLuigi Rizzo  *  	We have a generic one for linux (netmap_linux_config).
22344bf50f18SLuigi Rizzo  *  	The bwrap has to override this, since it has to forward
22354bf50f18SLuigi Rizzo  *  	the request to the wrapped adapter (netmap_bwrap_config).
22364bf50f18SLuigi Rizzo  *
22374bf50f18SLuigi Rizzo  *
2238847bf383SLuigi Rizzo  * na->nm_krings_create()
22394bf50f18SLuigi Rizzo  * (create and init the krings array)
22404bf50f18SLuigi Rizzo  *
22414bf50f18SLuigi Rizzo  * 	One of the following:
22424bf50f18SLuigi Rizzo  *
22434bf50f18SLuigi Rizzo  *	* netmap_hw_krings_create, 			(hw ports)
22444bf50f18SLuigi Rizzo  *		creates the standard layout for the krings
22454bf50f18SLuigi Rizzo  * 		and adds the mbq (used for the host rings).
22464bf50f18SLuigi Rizzo  *
22474bf50f18SLuigi Rizzo  * 	* netmap_vp_krings_create			(VALE ports)
22484bf50f18SLuigi Rizzo  * 		add leases and scratchpads
22494bf50f18SLuigi Rizzo  *
22504bf50f18SLuigi Rizzo  * 	* netmap_pipe_krings_create			(pipes)
22514bf50f18SLuigi Rizzo  * 		create the krings and rings of both ends and
22524bf50f18SLuigi Rizzo  * 		cross-link them
22534bf50f18SLuigi Rizzo  *
22544bf50f18SLuigi Rizzo  *      * netmap_monitor_krings_create 			(monitors)
22554bf50f18SLuigi Rizzo  *      	avoid allocating the mbq
22564bf50f18SLuigi Rizzo  *
22574bf50f18SLuigi Rizzo  *      * netmap_bwrap_krings_create			(bwraps)
22584bf50f18SLuigi Rizzo  *      	create both the brap krings array,
22594bf50f18SLuigi Rizzo  *      	the krings array of the wrapped adapter, and
22604bf50f18SLuigi Rizzo  *      	(if needed) the fake array for the host adapter
22614bf50f18SLuigi Rizzo  *
22624bf50f18SLuigi Rizzo  * na->nm_register(, 1)
22634bf50f18SLuigi Rizzo  * (put the adapter in netmap mode)
22644bf50f18SLuigi Rizzo  *
22654bf50f18SLuigi Rizzo  * 	This may be one of the following:
22664bf50f18SLuigi Rizzo  *
226737e3a6d3SLuigi Rizzo  * 	* netmap_hw_reg				        (hw ports)
22684bf50f18SLuigi Rizzo  * 		checks that the ifp is still there, then calls
22694bf50f18SLuigi Rizzo  * 		the hardware specific callback;
22704bf50f18SLuigi Rizzo  *
22714bf50f18SLuigi Rizzo  * 	* netmap_vp_reg					(VALE ports)
22724bf50f18SLuigi Rizzo  *		If the port is connected to a bridge,
22734bf50f18SLuigi Rizzo  *		set the NAF_NETMAP_ON flag under the
22744bf50f18SLuigi Rizzo  *		bridge write lock.
22754bf50f18SLuigi Rizzo  *
22764bf50f18SLuigi Rizzo  *	* netmap_pipe_reg				(pipes)
22774bf50f18SLuigi Rizzo  *		inform the other pipe end that it is no
2278453130d9SPedro F. Giffuni  *		longer responsible for the lifetime of this
22794bf50f18SLuigi Rizzo  *		pipe end
22804bf50f18SLuigi Rizzo  *
22814bf50f18SLuigi Rizzo  *	* netmap_monitor_reg				(monitors)
22824bf50f18SLuigi Rizzo  *		intercept the sync callbacks of the monitored
22834bf50f18SLuigi Rizzo  *		rings
22844bf50f18SLuigi Rizzo  *
228537e3a6d3SLuigi Rizzo  *	* netmap_bwrap_reg				(bwraps)
22864bf50f18SLuigi Rizzo  *		cross-link the bwrap and hwna rings,
22874bf50f18SLuigi Rizzo  *		forward the request to the hwna, override
22884bf50f18SLuigi Rizzo  *		the hwna notify callback (to get the frames
22894bf50f18SLuigi Rizzo  *		coming from outside go through the bridge).
22904bf50f18SLuigi Rizzo  *
22914bf50f18SLuigi Rizzo  *
2292f18be576SLuigi Rizzo  */
2293847bf383SLuigi Rizzo int
2294f9790aebSLuigi Rizzo netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
22952ff91c17SVincenzo Maffione 	uint32_t nr_mode, uint16_t nr_ringid, uint64_t nr_flags)
2296f18be576SLuigi Rizzo {
2297f18be576SLuigi Rizzo 	struct netmap_if *nifp = NULL;
2298847bf383SLuigi Rizzo 	int error;
2299f18be576SLuigi Rizzo 
2300ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
2301f9790aebSLuigi Rizzo 	priv->np_na = na;     /* store the reference */
2302847bf383SLuigi Rizzo 	error = netmap_mem_finalize(na->nm_mem, na);
2303ce3ee1e7SLuigi Rizzo 	if (error)
2304847bf383SLuigi Rizzo 		goto err;
2305847bf383SLuigi Rizzo 
2306847bf383SLuigi Rizzo 	if (na->active_fds == 0) {
23072ff91c17SVincenzo Maffione 
23082ff91c17SVincenzo Maffione 		/* cache the allocator info in the na */
23092ff91c17SVincenzo Maffione 		error = netmap_mem_get_lut(na->nm_mem, &na->na_lut);
23102ff91c17SVincenzo Maffione 		if (error)
23112ff91c17SVincenzo Maffione 			goto err_drop_mem;
231275f4f3edSVincenzo Maffione 		nm_prdis("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal,
23132ff91c17SVincenzo Maffione 					    na->na_lut.objsize);
23142ff91c17SVincenzo Maffione 
23152ff91c17SVincenzo Maffione 		/* ring configuration may have changed, fetch from the card */
23162ff91c17SVincenzo Maffione 		netmap_update_config(na);
2317cfa866f6SMatt Macy 	}
23182ff91c17SVincenzo Maffione 
2319cfa866f6SMatt Macy 	/* compute the range of tx and rx rings to monitor */
2320cfa866f6SMatt Macy 	error = netmap_set_ringid(priv, nr_mode, nr_ringid, nr_flags);
2321cfa866f6SMatt Macy 	if (error)
2322cfa866f6SMatt Macy 		goto err_put_lut;
2323cfa866f6SMatt Macy 
2324cfa866f6SMatt Macy 	if (na->active_fds == 0) {
2325847bf383SLuigi Rizzo 		/*
2326847bf383SLuigi Rizzo 		 * If this is the first registration of the adapter,
23274f80b14cSVincenzo Maffione 		 * perform sanity checks and create the in-kernel view
23284f80b14cSVincenzo Maffione 		 * of the netmap rings (the netmap krings).
2329847bf383SLuigi Rizzo 		 */
23302ff91c17SVincenzo Maffione 		if (na->ifp && nm_priv_rx_enabled(priv)) {
23314f80b14cSVincenzo Maffione 			/* This netmap adapter is attached to an ifnet. */
23324f80b14cSVincenzo Maffione 			unsigned mtu = nm_os_ifnet_mtu(na->ifp);
23334f80b14cSVincenzo Maffione 
233475f4f3edSVincenzo Maffione 			nm_prdis("%s: mtu %d rx_buf_maxsize %d netmap_buf_size %d",
233577a2baf5SVincenzo Maffione 				na->name, mtu, na->rx_buf_maxsize, NETMAP_BUF_SIZE(na));
2336cfa866f6SMatt Macy 
2337cfa866f6SMatt Macy 			if (na->rx_buf_maxsize == 0) {
2338b6e66be2SVincenzo Maffione 				nm_prerr("%s: error: rx_buf_maxsize == 0", na->name);
2339cfa866f6SMatt Macy 				error = EIO;
2340cfa866f6SMatt Macy 				goto err_drop_mem;
2341cfa866f6SMatt Macy 			}
23422ff91c17SVincenzo Maffione 
234377a2baf5SVincenzo Maffione 			error = netmap_buf_size_validate(na, mtu);
234477a2baf5SVincenzo Maffione 			if (error)
23454f80b14cSVincenzo Maffione 				goto err_drop_mem;
23464f80b14cSVincenzo Maffione 		}
2347847bf383SLuigi Rizzo 
2348847bf383SLuigi Rizzo 		/*
2349847bf383SLuigi Rizzo 		 * Depending on the adapter, this may also create
2350847bf383SLuigi Rizzo 		 * the netmap rings themselves
2351847bf383SLuigi Rizzo 		 */
2352847bf383SLuigi Rizzo 		error = na->nm_krings_create(na);
2353847bf383SLuigi Rizzo 		if (error)
23542ff91c17SVincenzo Maffione 			goto err_put_lut;
2355847bf383SLuigi Rizzo 
2356ce3ee1e7SLuigi Rizzo 	}
2357847bf383SLuigi Rizzo 
235837e3a6d3SLuigi Rizzo 	/* now the krings must exist and we can check whether some
235937e3a6d3SLuigi Rizzo 	 * previous bind has exclusive ownership on them, and set
236037e3a6d3SLuigi Rizzo 	 * nr_pending_mode
2361847bf383SLuigi Rizzo 	 */
236237e3a6d3SLuigi Rizzo 	error = netmap_krings_get(priv);
2363847bf383SLuigi Rizzo 	if (error)
236437e3a6d3SLuigi Rizzo 		goto err_del_krings;
236537e3a6d3SLuigi Rizzo 
236637e3a6d3SLuigi Rizzo 	/* create all needed missing netmap rings */
236737e3a6d3SLuigi Rizzo 	error = netmap_mem_rings_create(na);
236837e3a6d3SLuigi Rizzo 	if (error)
236937e3a6d3SLuigi Rizzo 		goto err_rel_excl;
2370847bf383SLuigi Rizzo 
2371847bf383SLuigi Rizzo 	/* in all cases, create a new netmap if */
2372c3e9b4dbSLuiz Otavio O Souza 	nifp = netmap_mem_if_new(na, priv);
2373847bf383SLuigi Rizzo 	if (nifp == NULL) {
2374f18be576SLuigi Rizzo 		error = ENOMEM;
2375cfa866f6SMatt Macy 		goto err_rel_excl;
2376ce3ee1e7SLuigi Rizzo 	}
2377847bf383SLuigi Rizzo 
237837e3a6d3SLuigi Rizzo 	if (nm_kring_pending(priv)) {
237937e3a6d3SLuigi Rizzo 		/* Some kring is switching mode, tell the adapter to
238037e3a6d3SLuigi Rizzo 		 * react on this. */
238137e3a6d3SLuigi Rizzo 		error = na->nm_register(na, 1);
238237e3a6d3SLuigi Rizzo 		if (error)
23832ff91c17SVincenzo Maffione 			goto err_del_if;
238437e3a6d3SLuigi Rizzo 	}
238537e3a6d3SLuigi Rizzo 
238637e3a6d3SLuigi Rizzo 	/* Commit the reference. */
238737e3a6d3SLuigi Rizzo 	na->active_fds++;
238837e3a6d3SLuigi Rizzo 
2389ce3ee1e7SLuigi Rizzo 	/*
2390847bf383SLuigi Rizzo 	 * advertise that the interface is ready by setting np_nifp.
2391847bf383SLuigi Rizzo 	 * The barrier is needed because readers (poll, *SYNC and mmap)
2392ce3ee1e7SLuigi Rizzo 	 * check for priv->np_nifp != NULL without locking
2393ce3ee1e7SLuigi Rizzo 	 */
2394847bf383SLuigi Rizzo 	mb(); /* make sure previous writes are visible to all CPUs */
2395ce3ee1e7SLuigi Rizzo 	priv->np_nifp = nifp;
2396847bf383SLuigi Rizzo 
2397847bf383SLuigi Rizzo 	return 0;
2398847bf383SLuigi Rizzo 
239937e3a6d3SLuigi Rizzo err_del_if:
2400847bf383SLuigi Rizzo 	netmap_mem_if_delete(na, nifp);
24014f80b14cSVincenzo Maffione err_rel_excl:
24024f80b14cSVincenzo Maffione 	netmap_krings_put(priv);
2403cfa866f6SMatt Macy 	netmap_mem_rings_delete(na);
2404847bf383SLuigi Rizzo err_del_krings:
2405847bf383SLuigi Rizzo 	if (na->active_fds == 0)
2406847bf383SLuigi Rizzo 		na->nm_krings_delete(na);
24072ff91c17SVincenzo Maffione err_put_lut:
24082ff91c17SVincenzo Maffione 	if (na->active_fds == 0)
24092ff91c17SVincenzo Maffione 		memset(&na->na_lut, 0, sizeof(na->na_lut));
2410847bf383SLuigi Rizzo err_drop_mem:
24114f80b14cSVincenzo Maffione 	netmap_mem_drop(na);
2412847bf383SLuigi Rizzo err:
2413847bf383SLuigi Rizzo 	priv->np_na = NULL;
2414847bf383SLuigi Rizzo 	return error;
2415ce3ee1e7SLuigi Rizzo }
2416847bf383SLuigi Rizzo 
2417847bf383SLuigi Rizzo 
2418847bf383SLuigi Rizzo /*
241937e3a6d3SLuigi Rizzo  * update kring and ring at the end of rxsync/txsync.
2420847bf383SLuigi Rizzo  */
2421847bf383SLuigi Rizzo static inline void
242237e3a6d3SLuigi Rizzo nm_sync_finalize(struct netmap_kring *kring)
2423847bf383SLuigi Rizzo {
242437e3a6d3SLuigi Rizzo 	/*
242537e3a6d3SLuigi Rizzo 	 * Update ring tail to what the kernel knows
242637e3a6d3SLuigi Rizzo 	 * After txsync: head/rhead/hwcur might be behind cur/rcur
242737e3a6d3SLuigi Rizzo 	 * if no carrier.
242837e3a6d3SLuigi Rizzo 	 */
2429847bf383SLuigi Rizzo 	kring->ring->tail = kring->rtail = kring->nr_hwtail;
2430847bf383SLuigi Rizzo 
243175f4f3edSVincenzo Maffione 	nm_prdis(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
2432847bf383SLuigi Rizzo 		kring->name, kring->nr_hwcur, kring->nr_hwtail,
2433847bf383SLuigi Rizzo 		kring->rhead, kring->rcur, kring->rtail);
2434847bf383SLuigi Rizzo }
2435847bf383SLuigi Rizzo 
2436c3e9b4dbSLuiz Otavio O Souza /* set ring timestamp */
2437c3e9b4dbSLuiz Otavio O Souza static inline void
2438c3e9b4dbSLuiz Otavio O Souza ring_timestamp_set(struct netmap_ring *ring)
2439c3e9b4dbSLuiz Otavio O Souza {
2440c3e9b4dbSLuiz Otavio O Souza 	if (netmap_no_timestamp == 0 || ring->flags & NR_TIMESTAMP) {
2441c3e9b4dbSLuiz Otavio O Souza 		microtime(&ring->ts);
2442c3e9b4dbSLuiz Otavio O Souza 	}
2443c3e9b4dbSLuiz Otavio O Souza }
2444c3e9b4dbSLuiz Otavio O Souza 
24452ff91c17SVincenzo Maffione static int nmreq_copyin(struct nmreq_header *, int);
24462ff91c17SVincenzo Maffione static int nmreq_copyout(struct nmreq_header *, int);
24472ff91c17SVincenzo Maffione static int nmreq_checkoptions(struct nmreq_header *);
2448c3e9b4dbSLuiz Otavio O Souza 
244968b8534bSLuigi Rizzo /*
245068b8534bSLuigi Rizzo  * ioctl(2) support for the "netmap" device.
245168b8534bSLuigi Rizzo  *
245268b8534bSLuigi Rizzo  * Following a list of accepted commands:
24532ff91c17SVincenzo Maffione  * - NIOCCTRL		device control API
24542ff91c17SVincenzo Maffione  * - NIOCTXSYNC		sync TX rings
24552ff91c17SVincenzo Maffione  * - NIOCRXSYNC		sync RX rings
245668b8534bSLuigi Rizzo  * - SIOCGIFADDR	just for convenience
24572ff91c17SVincenzo Maffione  * - NIOCGINFO		deprecated (legacy API)
24582ff91c17SVincenzo Maffione  * - NIOCREGIF		deprecated (legacy API)
245968b8534bSLuigi Rizzo  *
246068b8534bSLuigi Rizzo  * Return 0 on success, errno otherwise.
246168b8534bSLuigi Rizzo  */
2462f9790aebSLuigi Rizzo int
24632ff91c17SVincenzo Maffione netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
24642ff91c17SVincenzo Maffione 		struct thread *td, int nr_body_is_user)
246568b8534bSLuigi Rizzo {
2466c3e9b4dbSLuiz Otavio O Souza 	struct mbq q;	/* packets from RX hw queues to host stack */
2467ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na = NULL;
2468c3e9b4dbSLuiz Otavio O Souza 	struct netmap_mem_d *nmd = NULL;
246937e3a6d3SLuigi Rizzo 	struct ifnet *ifp = NULL;
247037e3a6d3SLuigi Rizzo 	int error = 0;
2471f0ea3689SLuigi Rizzo 	u_int i, qfirst, qlast;
24722ff91c17SVincenzo Maffione 	struct netmap_kring **krings;
2473c3e9b4dbSLuiz Otavio O Souza 	int sync_flags;
2474847bf383SLuigi Rizzo 	enum txrx t;
247568b8534bSLuigi Rizzo 
24762ff91c17SVincenzo Maffione 	switch (cmd) {
24772ff91c17SVincenzo Maffione 	case NIOCCTRL: {
24782ff91c17SVincenzo Maffione 		struct nmreq_header *hdr = (struct nmreq_header *)data;
24792ff91c17SVincenzo Maffione 
24802ff91c17SVincenzo Maffione 		if (hdr->nr_version < NETMAP_MIN_API ||
24812ff91c17SVincenzo Maffione 		    hdr->nr_version > NETMAP_MAX_API) {
2482b6e66be2SVincenzo Maffione 			nm_prerr("API mismatch: got %d need %d",
2483b6e66be2SVincenzo Maffione 				hdr->nr_version, NETMAP_API);
248417885a7bSLuigi Rizzo 			return EINVAL;
248517885a7bSLuigi Rizzo 		}
248668b8534bSLuigi Rizzo 
24872ff91c17SVincenzo Maffione 		/* Make a kernel-space copy of the user-space nr_body.
24882ff91c17SVincenzo Maffione 		 * For convenince, the nr_body pointer and the pointers
24892ff91c17SVincenzo Maffione 		 * in the options list will be replaced with their
24902ff91c17SVincenzo Maffione 		 * kernel-space counterparts. The original pointers are
24912ff91c17SVincenzo Maffione 		 * saved internally and later restored by nmreq_copyout
24922ff91c17SVincenzo Maffione 		 */
24932ff91c17SVincenzo Maffione 		error = nmreq_copyin(hdr, nr_body_is_user);
249437e3a6d3SLuigi Rizzo 		if (error) {
24952ff91c17SVincenzo Maffione 			return error;
2496ce3ee1e7SLuigi Rizzo 		}
2497ce3ee1e7SLuigi Rizzo 
24982ff91c17SVincenzo Maffione 		/* Sanitize hdr->nr_name. */
24992ff91c17SVincenzo Maffione 		hdr->nr_name[sizeof(hdr->nr_name) - 1] = '\0';
250068b8534bSLuigi Rizzo 
25012ff91c17SVincenzo Maffione 		switch (hdr->nr_reqtype) {
25022ff91c17SVincenzo Maffione 		case NETMAP_REQ_REGISTER: {
25032ff91c17SVincenzo Maffione 			struct nmreq_register *req =
2504cfa866f6SMatt Macy 				(struct nmreq_register *)(uintptr_t)hdr->nr_body;
2505b6e66be2SVincenzo Maffione 			struct netmap_if *nifp;
2506b6e66be2SVincenzo Maffione 
25072ff91c17SVincenzo Maffione 			/* Protect access to priv from concurrent requests. */
2508ce3ee1e7SLuigi Rizzo 			NMG_LOCK();
2509ce3ee1e7SLuigi Rizzo 			do {
25102ff91c17SVincenzo Maffione 				struct nmreq_option *opt;
2511b6e66be2SVincenzo Maffione 				u_int memflags;
2512ce3ee1e7SLuigi Rizzo 
2513847bf383SLuigi Rizzo 				if (priv->np_nifp != NULL) {	/* thread already registered */
2514f0ea3689SLuigi Rizzo 					error = EBUSY;
2515506cc70cSLuigi Rizzo 					break;
2516506cc70cSLuigi Rizzo 				}
2517c3e9b4dbSLuiz Otavio O Souza 
25182ff91c17SVincenzo Maffione #ifdef WITH_EXTMEM
2519253b2ec1SVincenzo Maffione 				opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_EXTMEM);
25202ff91c17SVincenzo Maffione 				if (opt != NULL) {
25212ff91c17SVincenzo Maffione 					struct nmreq_opt_extmem *e =
25222ff91c17SVincenzo Maffione 						(struct nmreq_opt_extmem *)opt;
25232ff91c17SVincenzo Maffione 
25242ff91c17SVincenzo Maffione 					nmd = netmap_mem_ext_create(e->nro_usrptr,
25252ff91c17SVincenzo Maffione 							&e->nro_info, &error);
25262ff91c17SVincenzo Maffione 					opt->nro_status = error;
25272ff91c17SVincenzo Maffione 					if (nmd == NULL)
25282ff91c17SVincenzo Maffione 						break;
25292ff91c17SVincenzo Maffione 				}
25302ff91c17SVincenzo Maffione #endif /* WITH_EXTMEM */
25312ff91c17SVincenzo Maffione 
25322ff91c17SVincenzo Maffione 				if (nmd == NULL && req->nr_mem_id) {
2533c3e9b4dbSLuiz Otavio O Souza 					/* find the allocator and get a reference */
25342ff91c17SVincenzo Maffione 					nmd = netmap_mem_find(req->nr_mem_id);
2535c3e9b4dbSLuiz Otavio O Souza 					if (nmd == NULL) {
2536b6e66be2SVincenzo Maffione 						if (netmap_verbose) {
2537b6e66be2SVincenzo Maffione 							nm_prerr("%s: failed to find mem_id %u",
2538b6e66be2SVincenzo Maffione 									hdr->nr_name, req->nr_mem_id);
2539b6e66be2SVincenzo Maffione 						}
2540c3e9b4dbSLuiz Otavio O Souza 						error = EINVAL;
2541c3e9b4dbSLuiz Otavio O Souza 						break;
2542c3e9b4dbSLuiz Otavio O Souza 					}
2543c3e9b4dbSLuiz Otavio O Souza 				}
254468b8534bSLuigi Rizzo 				/* find the interface and a reference */
25452ff91c17SVincenzo Maffione 				error = netmap_get_na(hdr, &na, &ifp, nmd,
254637e3a6d3SLuigi Rizzo 						      1 /* create */); /* keep reference */
254768b8534bSLuigi Rizzo 				if (error)
2548ce3ee1e7SLuigi Rizzo 					break;
2549f9790aebSLuigi Rizzo 				if (NETMAP_OWNED_BY_KERN(na)) {
2550ce3ee1e7SLuigi Rizzo 					error = EBUSY;
2551ce3ee1e7SLuigi Rizzo 					break;
2552f196ce38SLuigi Rizzo 				}
255337e3a6d3SLuigi Rizzo 
25542ff91c17SVincenzo Maffione 				if (na->virt_hdr_len && !(req->nr_flags & NR_ACCEPT_VNET_HDR)) {
2555b6e66be2SVincenzo Maffione 					nm_prerr("virt_hdr_len=%d, but application does "
2556b6e66be2SVincenzo Maffione 						"not accept it", na->virt_hdr_len);
255737e3a6d3SLuigi Rizzo 					error = EIO;
255837e3a6d3SLuigi Rizzo 					break;
255937e3a6d3SLuigi Rizzo 				}
256037e3a6d3SLuigi Rizzo 
25612ff91c17SVincenzo Maffione 				error = netmap_do_regif(priv, na, req->nr_mode,
25622ff91c17SVincenzo Maffione 							req->nr_ringid, req->nr_flags);
2563847bf383SLuigi Rizzo 				if (error) {    /* reg. failed, release priv and ref */
2564ce3ee1e7SLuigi Rizzo 					break;
256568b8534bSLuigi Rizzo 				}
2566b6e66be2SVincenzo Maffione 
2567253b2ec1SVincenzo Maffione 				opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_CSB);
2568b6e66be2SVincenzo Maffione 				if (opt != NULL) {
2569b6e66be2SVincenzo Maffione 					struct nmreq_opt_csb *csbo =
2570b6e66be2SVincenzo Maffione 						(struct nmreq_opt_csb *)opt;
2571b6e66be2SVincenzo Maffione 					error = netmap_csb_validate(priv, csbo);
2572b6e66be2SVincenzo Maffione 					opt->nro_status = error;
2573b6e66be2SVincenzo Maffione 					if (error) {
2574b6e66be2SVincenzo Maffione 						netmap_do_unregif(priv);
2575b6e66be2SVincenzo Maffione 						break;
2576b6e66be2SVincenzo Maffione 					}
2577b6e66be2SVincenzo Maffione 				}
2578b6e66be2SVincenzo Maffione 
2579847bf383SLuigi Rizzo 				nifp = priv->np_nifp;
258068b8534bSLuigi Rizzo 
258168b8534bSLuigi Rizzo 				/* return the offset of the netmap_if object */
25822ff91c17SVincenzo Maffione 				req->nr_rx_rings = na->num_rx_rings;
25832ff91c17SVincenzo Maffione 				req->nr_tx_rings = na->num_tx_rings;
25842ff91c17SVincenzo Maffione 				req->nr_rx_slots = na->num_rx_desc;
25852ff91c17SVincenzo Maffione 				req->nr_tx_slots = na->num_tx_desc;
2586d12354a5SVincenzo Maffione 				req->nr_host_tx_rings = na->num_host_tx_rings;
2587d12354a5SVincenzo Maffione 				req->nr_host_rx_rings = na->num_host_rx_rings;
25882ff91c17SVincenzo Maffione 				error = netmap_mem_get_info(na->nm_mem, &req->nr_memsize, &memflags,
25892ff91c17SVincenzo Maffione 					&req->nr_mem_id);
2590ce3ee1e7SLuigi Rizzo 				if (error) {
2591847bf383SLuigi Rizzo 					netmap_do_unregif(priv);
2592ce3ee1e7SLuigi Rizzo 					break;
2593ce3ee1e7SLuigi Rizzo 				}
2594ce3ee1e7SLuigi Rizzo 				if (memflags & NETMAP_MEM_PRIVATE) {
25953d819cb6SLuigi Rizzo 					*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
2596ce3ee1e7SLuigi Rizzo 				}
2597847bf383SLuigi Rizzo 				for_rx_tx(t) {
2598847bf383SLuigi Rizzo 					priv->np_si[t] = nm_si_user(priv, t) ?
25992ff91c17SVincenzo Maffione 						&na->si[t] : &NMR(na, t)[priv->np_qfirst[t]]->si;
2600847bf383SLuigi Rizzo 				}
2601f0ea3689SLuigi Rizzo 
26022ff91c17SVincenzo Maffione 				if (req->nr_extra_bufs) {
260337e3a6d3SLuigi Rizzo 					if (netmap_verbose)
2604b6e66be2SVincenzo Maffione 						nm_prinf("requested %d extra buffers",
26052ff91c17SVincenzo Maffione 							req->nr_extra_bufs);
26062ff91c17SVincenzo Maffione 					req->nr_extra_bufs = netmap_extra_alloc(na,
26072ff91c17SVincenzo Maffione 						&nifp->ni_bufs_head, req->nr_extra_bufs);
260837e3a6d3SLuigi Rizzo 					if (netmap_verbose)
2609b6e66be2SVincenzo Maffione 						nm_prinf("got %d extra buffers", req->nr_extra_bufs);
2610f0ea3689SLuigi Rizzo 				}
26112ff91c17SVincenzo Maffione 				req->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
26122ff91c17SVincenzo Maffione 
26132ff91c17SVincenzo Maffione 				error = nmreq_checkoptions(hdr);
26142ff91c17SVincenzo Maffione 				if (error) {
26152ff91c17SVincenzo Maffione 					netmap_do_unregif(priv);
26162ff91c17SVincenzo Maffione 					break;
26172ff91c17SVincenzo Maffione 				}
261837e3a6d3SLuigi Rizzo 
261937e3a6d3SLuigi Rizzo 				/* store ifp reference so that priv destructor may release it */
262037e3a6d3SLuigi Rizzo 				priv->np_ifp = ifp;
2621ce3ee1e7SLuigi Rizzo 			} while (0);
2622c3e9b4dbSLuiz Otavio O Souza 			if (error) {
2623c3e9b4dbSLuiz Otavio O Souza 				netmap_unget_na(na, ifp);
2624c3e9b4dbSLuiz Otavio O Souza 			}
2625c3e9b4dbSLuiz Otavio O Souza 			/* release the reference from netmap_mem_find() or
2626c3e9b4dbSLuiz Otavio O Souza 			 * netmap_mem_ext_create()
2627c3e9b4dbSLuiz Otavio O Souza 			 */
2628c3e9b4dbSLuiz Otavio O Souza 			if (nmd)
2629c3e9b4dbSLuiz Otavio O Souza 				netmap_mem_put(nmd);
2630ce3ee1e7SLuigi Rizzo 			NMG_UNLOCK();
263168b8534bSLuigi Rizzo 			break;
26322ff91c17SVincenzo Maffione 		}
26332ff91c17SVincenzo Maffione 
26342ff91c17SVincenzo Maffione 		case NETMAP_REQ_PORT_INFO_GET: {
26352ff91c17SVincenzo Maffione 			struct nmreq_port_info_get *req =
2636cfa866f6SMatt Macy 				(struct nmreq_port_info_get *)(uintptr_t)hdr->nr_body;
26372ff91c17SVincenzo Maffione 
26382ff91c17SVincenzo Maffione 			NMG_LOCK();
26392ff91c17SVincenzo Maffione 			do {
26402ff91c17SVincenzo Maffione 				u_int memflags;
26412ff91c17SVincenzo Maffione 
26422ff91c17SVincenzo Maffione 				if (hdr->nr_name[0] != '\0') {
26432ff91c17SVincenzo Maffione 					/* Build a nmreq_register out of the nmreq_port_info_get,
26442ff91c17SVincenzo Maffione 					 * so that we can call netmap_get_na(). */
26452ff91c17SVincenzo Maffione 					struct nmreq_register regreq;
26462ff91c17SVincenzo Maffione 					bzero(&regreq, sizeof(regreq));
2647b6e66be2SVincenzo Maffione 					regreq.nr_mode = NR_REG_ALL_NIC;
26482ff91c17SVincenzo Maffione 					regreq.nr_tx_slots = req->nr_tx_slots;
26492ff91c17SVincenzo Maffione 					regreq.nr_rx_slots = req->nr_rx_slots;
26502ff91c17SVincenzo Maffione 					regreq.nr_tx_rings = req->nr_tx_rings;
26512ff91c17SVincenzo Maffione 					regreq.nr_rx_rings = req->nr_rx_rings;
2652d12354a5SVincenzo Maffione 					regreq.nr_host_tx_rings = req->nr_host_tx_rings;
2653d12354a5SVincenzo Maffione 					regreq.nr_host_rx_rings = req->nr_host_rx_rings;
26542ff91c17SVincenzo Maffione 					regreq.nr_mem_id = req->nr_mem_id;
26552ff91c17SVincenzo Maffione 
26562ff91c17SVincenzo Maffione 					/* get a refcount */
26572ff91c17SVincenzo Maffione 					hdr->nr_reqtype = NETMAP_REQ_REGISTER;
2658cfa866f6SMatt Macy 					hdr->nr_body = (uintptr_t)&regreq;
26592ff91c17SVincenzo Maffione 					error = netmap_get_na(hdr, &na, &ifp, NULL, 1 /* create */);
26602ff91c17SVincenzo Maffione 					hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET; /* reset type */
2661cfa866f6SMatt Macy 					hdr->nr_body = (uintptr_t)req; /* reset nr_body */
26622ff91c17SVincenzo Maffione 					if (error) {
26632ff91c17SVincenzo Maffione 						na = NULL;
26642ff91c17SVincenzo Maffione 						ifp = NULL;
26652ff91c17SVincenzo Maffione 						break;
26662ff91c17SVincenzo Maffione 					}
26672ff91c17SVincenzo Maffione 					nmd = na->nm_mem; /* get memory allocator */
26682ff91c17SVincenzo Maffione 				} else {
26692ff91c17SVincenzo Maffione 					nmd = netmap_mem_find(req->nr_mem_id ? req->nr_mem_id : 1);
26702ff91c17SVincenzo Maffione 					if (nmd == NULL) {
2671b6e66be2SVincenzo Maffione 						if (netmap_verbose)
2672b6e66be2SVincenzo Maffione 							nm_prerr("%s: failed to find mem_id %u",
2673b6e66be2SVincenzo Maffione 									hdr->nr_name,
2674b6e66be2SVincenzo Maffione 									req->nr_mem_id ? req->nr_mem_id : 1);
26752ff91c17SVincenzo Maffione 						error = EINVAL;
26762ff91c17SVincenzo Maffione 						break;
26772ff91c17SVincenzo Maffione 					}
26782ff91c17SVincenzo Maffione 				}
26792ff91c17SVincenzo Maffione 
26802ff91c17SVincenzo Maffione 				error = netmap_mem_get_info(nmd, &req->nr_memsize, &memflags,
26812ff91c17SVincenzo Maffione 					&req->nr_mem_id);
26822ff91c17SVincenzo Maffione 				if (error)
26832ff91c17SVincenzo Maffione 					break;
26842ff91c17SVincenzo Maffione 				if (na == NULL) /* only memory info */
26852ff91c17SVincenzo Maffione 					break;
26862ff91c17SVincenzo Maffione 				netmap_update_config(na);
26872ff91c17SVincenzo Maffione 				req->nr_rx_rings = na->num_rx_rings;
26882ff91c17SVincenzo Maffione 				req->nr_tx_rings = na->num_tx_rings;
26892ff91c17SVincenzo Maffione 				req->nr_rx_slots = na->num_rx_desc;
26902ff91c17SVincenzo Maffione 				req->nr_tx_slots = na->num_tx_desc;
2691d12354a5SVincenzo Maffione 				req->nr_host_tx_rings = na->num_host_tx_rings;
2692d12354a5SVincenzo Maffione 				req->nr_host_rx_rings = na->num_host_rx_rings;
26932ff91c17SVincenzo Maffione 			} while (0);
26942ff91c17SVincenzo Maffione 			netmap_unget_na(na, ifp);
26952ff91c17SVincenzo Maffione 			NMG_UNLOCK();
26962ff91c17SVincenzo Maffione 			break;
26972ff91c17SVincenzo Maffione 		}
26982ff91c17SVincenzo Maffione #ifdef WITH_VALE
26992ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_ATTACH: {
2700b6e66be2SVincenzo Maffione 			error = netmap_vale_attach(hdr, NULL /* userspace request */);
27012ff91c17SVincenzo Maffione 			break;
27022ff91c17SVincenzo Maffione 		}
27032ff91c17SVincenzo Maffione 
27042ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_DETACH: {
2705b6e66be2SVincenzo Maffione 			error = netmap_vale_detach(hdr, NULL /* userspace request */);
27062ff91c17SVincenzo Maffione 			break;
27072ff91c17SVincenzo Maffione 		}
27082ff91c17SVincenzo Maffione 
27092ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_LIST: {
2710b6e66be2SVincenzo Maffione 			error = netmap_vale_list(hdr);
27112ff91c17SVincenzo Maffione 			break;
27122ff91c17SVincenzo Maffione 		}
27132ff91c17SVincenzo Maffione 
27142ff91c17SVincenzo Maffione 		case NETMAP_REQ_PORT_HDR_SET: {
27152ff91c17SVincenzo Maffione 			struct nmreq_port_hdr *req =
2716cfa866f6SMatt Macy 				(struct nmreq_port_hdr *)(uintptr_t)hdr->nr_body;
27172ff91c17SVincenzo Maffione 			/* Build a nmreq_register out of the nmreq_port_hdr,
27182ff91c17SVincenzo Maffione 			 * so that we can call netmap_get_bdg_na(). */
27192ff91c17SVincenzo Maffione 			struct nmreq_register regreq;
27202ff91c17SVincenzo Maffione 			bzero(&regreq, sizeof(regreq));
2721b6e66be2SVincenzo Maffione 			regreq.nr_mode = NR_REG_ALL_NIC;
2722b6e66be2SVincenzo Maffione 
27232ff91c17SVincenzo Maffione 			/* For now we only support virtio-net headers, and only for
27242ff91c17SVincenzo Maffione 			 * VALE ports, but this may change in future. Valid lengths
27252ff91c17SVincenzo Maffione 			 * for the virtio-net header are 0 (no header), 10 and 12. */
27262ff91c17SVincenzo Maffione 			if (req->nr_hdr_len != 0 &&
27272ff91c17SVincenzo Maffione 				req->nr_hdr_len != sizeof(struct nm_vnet_hdr) &&
27282ff91c17SVincenzo Maffione 					req->nr_hdr_len != 12) {
2729b6e66be2SVincenzo Maffione 				if (netmap_verbose)
2730b6e66be2SVincenzo Maffione 					nm_prerr("invalid hdr_len %u", req->nr_hdr_len);
27312ff91c17SVincenzo Maffione 				error = EINVAL;
27322ff91c17SVincenzo Maffione 				break;
27332ff91c17SVincenzo Maffione 			}
27342ff91c17SVincenzo Maffione 			NMG_LOCK();
27352ff91c17SVincenzo Maffione 			hdr->nr_reqtype = NETMAP_REQ_REGISTER;
2736cfa866f6SMatt Macy 			hdr->nr_body = (uintptr_t)&regreq;
27372a7db7a6SVincenzo Maffione 			error = netmap_get_vale_na(hdr, &na, NULL, 0);
27382ff91c17SVincenzo Maffione 			hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET;
2739cfa866f6SMatt Macy 			hdr->nr_body = (uintptr_t)req;
27402ff91c17SVincenzo Maffione 			if (na && !error) {
27412ff91c17SVincenzo Maffione 				struct netmap_vp_adapter *vpna =
27422ff91c17SVincenzo Maffione 					(struct netmap_vp_adapter *)na;
27432ff91c17SVincenzo Maffione 				na->virt_hdr_len = req->nr_hdr_len;
27442ff91c17SVincenzo Maffione 				if (na->virt_hdr_len) {
27452ff91c17SVincenzo Maffione 					vpna->mfs = NETMAP_BUF_SIZE(na);
27462ff91c17SVincenzo Maffione 				}
2747b6e66be2SVincenzo Maffione 				if (netmap_verbose)
2748b6e66be2SVincenzo Maffione 					nm_prinf("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
27492ff91c17SVincenzo Maffione 				netmap_adapter_put(na);
27502ff91c17SVincenzo Maffione 			} else if (!na) {
27512ff91c17SVincenzo Maffione 				error = ENXIO;
27522ff91c17SVincenzo Maffione 			}
27532ff91c17SVincenzo Maffione 			NMG_UNLOCK();
27542ff91c17SVincenzo Maffione 			break;
27552ff91c17SVincenzo Maffione 		}
27562ff91c17SVincenzo Maffione 
27572ff91c17SVincenzo Maffione 		case NETMAP_REQ_PORT_HDR_GET: {
27582ff91c17SVincenzo Maffione 			/* Get vnet-header length for this netmap port */
27592ff91c17SVincenzo Maffione 			struct nmreq_port_hdr *req =
2760cfa866f6SMatt Macy 				(struct nmreq_port_hdr *)(uintptr_t)hdr->nr_body;
27612ff91c17SVincenzo Maffione 			/* Build a nmreq_register out of the nmreq_port_hdr,
27622ff91c17SVincenzo Maffione 			 * so that we can call netmap_get_bdg_na(). */
27632ff91c17SVincenzo Maffione 			struct nmreq_register regreq;
27642ff91c17SVincenzo Maffione 			struct ifnet *ifp;
27652ff91c17SVincenzo Maffione 
27662ff91c17SVincenzo Maffione 			bzero(&regreq, sizeof(regreq));
2767b6e66be2SVincenzo Maffione 			regreq.nr_mode = NR_REG_ALL_NIC;
27682ff91c17SVincenzo Maffione 			NMG_LOCK();
27692ff91c17SVincenzo Maffione 			hdr->nr_reqtype = NETMAP_REQ_REGISTER;
2770cfa866f6SMatt Macy 			hdr->nr_body = (uintptr_t)&regreq;
27712ff91c17SVincenzo Maffione 			error = netmap_get_na(hdr, &na, &ifp, NULL, 0);
27722ff91c17SVincenzo Maffione 			hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_GET;
2773cfa866f6SMatt Macy 			hdr->nr_body = (uintptr_t)req;
27742ff91c17SVincenzo Maffione 			if (na && !error) {
27752ff91c17SVincenzo Maffione 				req->nr_hdr_len = na->virt_hdr_len;
27762ff91c17SVincenzo Maffione 			}
27772ff91c17SVincenzo Maffione 			netmap_unget_na(na, ifp);
27782ff91c17SVincenzo Maffione 			NMG_UNLOCK();
27792ff91c17SVincenzo Maffione 			break;
27802ff91c17SVincenzo Maffione 		}
27812ff91c17SVincenzo Maffione 
27822ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_NEWIF: {
27832ff91c17SVincenzo Maffione 			error = nm_vi_create(hdr);
27842ff91c17SVincenzo Maffione 			break;
27852ff91c17SVincenzo Maffione 		}
27862ff91c17SVincenzo Maffione 
27872ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_DELIF: {
27882ff91c17SVincenzo Maffione 			error = nm_vi_destroy(hdr->nr_name);
27892ff91c17SVincenzo Maffione 			break;
27902ff91c17SVincenzo Maffione 		}
27912ff91c17SVincenzo Maffione 
27922ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_POLLING_ENABLE:
27932ff91c17SVincenzo Maffione 		case NETMAP_REQ_VALE_POLLING_DISABLE: {
27942ff91c17SVincenzo Maffione 			error = nm_bdg_polling(hdr);
27952ff91c17SVincenzo Maffione 			break;
27962ff91c17SVincenzo Maffione 		}
27972ff91c17SVincenzo Maffione #endif  /* WITH_VALE */
27982ff91c17SVincenzo Maffione 		case NETMAP_REQ_POOLS_INFO_GET: {
2799b6e66be2SVincenzo Maffione 			/* Get information from the memory allocator used for
2800b6e66be2SVincenzo Maffione 			 * hdr->nr_name. */
28012ff91c17SVincenzo Maffione 			struct nmreq_pools_info *req =
2802cfa866f6SMatt Macy 				(struct nmreq_pools_info *)(uintptr_t)hdr->nr_body;
28032ff91c17SVincenzo Maffione 			NMG_LOCK();
2804b6e66be2SVincenzo Maffione 			do {
2805b6e66be2SVincenzo Maffione 				/* Build a nmreq_register out of the nmreq_pools_info,
2806b6e66be2SVincenzo Maffione 				 * so that we can call netmap_get_na(). */
2807b6e66be2SVincenzo Maffione 				struct nmreq_register regreq;
2808b6e66be2SVincenzo Maffione 				bzero(&regreq, sizeof(regreq));
2809b6e66be2SVincenzo Maffione 				regreq.nr_mem_id = req->nr_mem_id;
2810b6e66be2SVincenzo Maffione 				regreq.nr_mode = NR_REG_ALL_NIC;
2811b6e66be2SVincenzo Maffione 
2812b6e66be2SVincenzo Maffione 				hdr->nr_reqtype = NETMAP_REQ_REGISTER;
2813b6e66be2SVincenzo Maffione 				hdr->nr_body = (uintptr_t)&regreq;
2814b6e66be2SVincenzo Maffione 				error = netmap_get_na(hdr, &na, &ifp, NULL, 1 /* create */);
2815b6e66be2SVincenzo Maffione 				hdr->nr_reqtype = NETMAP_REQ_POOLS_INFO_GET; /* reset type */
2816b6e66be2SVincenzo Maffione 				hdr->nr_body = (uintptr_t)req; /* reset nr_body */
2817b6e66be2SVincenzo Maffione 				if (error) {
2818b6e66be2SVincenzo Maffione 					na = NULL;
2819b6e66be2SVincenzo Maffione 					ifp = NULL;
2820b6e66be2SVincenzo Maffione 					break;
28212ff91c17SVincenzo Maffione 				}
2822b6e66be2SVincenzo Maffione 				nmd = na->nm_mem; /* grab the memory allocator */
2823b6e66be2SVincenzo Maffione 				if (nmd == NULL) {
2824b6e66be2SVincenzo Maffione 					error = EINVAL;
2825b6e66be2SVincenzo Maffione 					break;
2826b6e66be2SVincenzo Maffione 				}
2827b6e66be2SVincenzo Maffione 
2828b6e66be2SVincenzo Maffione 				/* Finalize the memory allocator, get the pools
2829b6e66be2SVincenzo Maffione 				 * information and release the allocator. */
2830b6e66be2SVincenzo Maffione 				error = netmap_mem_finalize(nmd, na);
2831b6e66be2SVincenzo Maffione 				if (error) {
2832b6e66be2SVincenzo Maffione 					break;
2833b6e66be2SVincenzo Maffione 				}
2834b6e66be2SVincenzo Maffione 				error = netmap_mem_pools_info_get(req, nmd);
2835b6e66be2SVincenzo Maffione 				netmap_mem_drop(na);
2836b6e66be2SVincenzo Maffione 			} while (0);
2837b6e66be2SVincenzo Maffione 			netmap_unget_na(na, ifp);
28382ff91c17SVincenzo Maffione 			NMG_UNLOCK();
28392ff91c17SVincenzo Maffione 			break;
28402ff91c17SVincenzo Maffione 		}
28412ff91c17SVincenzo Maffione 
2842b6e66be2SVincenzo Maffione 		case NETMAP_REQ_CSB_ENABLE: {
2843b6e66be2SVincenzo Maffione 			struct nmreq_option *opt;
2844b6e66be2SVincenzo Maffione 
2845253b2ec1SVincenzo Maffione 			opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_CSB);
2846b6e66be2SVincenzo Maffione 			if (opt == NULL) {
2847b6e66be2SVincenzo Maffione 				error = EINVAL;
2848b6e66be2SVincenzo Maffione 			} else {
2849b6e66be2SVincenzo Maffione 				struct nmreq_opt_csb *csbo =
2850b6e66be2SVincenzo Maffione 					(struct nmreq_opt_csb *)opt;
2851b6e66be2SVincenzo Maffione 				NMG_LOCK();
2852b6e66be2SVincenzo Maffione 				error = netmap_csb_validate(priv, csbo);
2853b6e66be2SVincenzo Maffione 				NMG_UNLOCK();
2854b6e66be2SVincenzo Maffione 				opt->nro_status = error;
2855b6e66be2SVincenzo Maffione 			}
2856b6e66be2SVincenzo Maffione 			break;
2857b6e66be2SVincenzo Maffione 		}
2858b6e66be2SVincenzo Maffione 
2859b6e66be2SVincenzo Maffione 		case NETMAP_REQ_SYNC_KLOOP_START: {
2860b6e66be2SVincenzo Maffione 			error = netmap_sync_kloop(priv, hdr);
2861b6e66be2SVincenzo Maffione 			break;
2862b6e66be2SVincenzo Maffione 		}
2863b6e66be2SVincenzo Maffione 
2864b6e66be2SVincenzo Maffione 		case NETMAP_REQ_SYNC_KLOOP_STOP: {
2865b6e66be2SVincenzo Maffione 			error = netmap_sync_kloop_stop(priv);
2866b6e66be2SVincenzo Maffione 			break;
2867b6e66be2SVincenzo Maffione 		}
2868b6e66be2SVincenzo Maffione 
28692ff91c17SVincenzo Maffione 		default: {
28702ff91c17SVincenzo Maffione 			error = EINVAL;
28712ff91c17SVincenzo Maffione 			break;
28722ff91c17SVincenzo Maffione 		}
28732ff91c17SVincenzo Maffione 		}
28742ff91c17SVincenzo Maffione 		/* Write back request body to userspace and reset the
28752ff91c17SVincenzo Maffione 		 * user-space pointer. */
28762ff91c17SVincenzo Maffione 		error = nmreq_copyout(hdr, error);
28772ff91c17SVincenzo Maffione 		break;
28782ff91c17SVincenzo Maffione 	}
287968b8534bSLuigi Rizzo 
288068b8534bSLuigi Rizzo 	case NIOCTXSYNC:
28812ff91c17SVincenzo Maffione 	case NIOCRXSYNC: {
2882b6e66be2SVincenzo Maffione 		if (unlikely(priv->np_nifp == NULL)) {
2883506cc70cSLuigi Rizzo 			error = ENXIO;
2884506cc70cSLuigi Rizzo 			break;
2885506cc70cSLuigi Rizzo 		}
28866641c68bSLuigi Rizzo 		mb(); /* make sure following reads are not from cache */
28878241616dSLuigi Rizzo 
2888b6e66be2SVincenzo Maffione 		if (unlikely(priv->np_csb_atok_base)) {
2889b6e66be2SVincenzo Maffione 			nm_prerr("Invalid sync in CSB mode");
2890b6e66be2SVincenzo Maffione 			error = EBUSY;
28918241616dSLuigi Rizzo 			break;
28928241616dSLuigi Rizzo 		}
28938241616dSLuigi Rizzo 
2894b6e66be2SVincenzo Maffione 		na = priv->np_na;      /* we have a reference */
2895b6e66be2SVincenzo Maffione 
2896c3e9b4dbSLuiz Otavio O Souza 		mbq_init(&q);
2897847bf383SLuigi Rizzo 		t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
2898847bf383SLuigi Rizzo 		krings = NMR(na, t);
2899847bf383SLuigi Rizzo 		qfirst = priv->np_qfirst[t];
2900847bf383SLuigi Rizzo 		qlast = priv->np_qlast[t];
2901c3e9b4dbSLuiz Otavio O Souza 		sync_flags = priv->np_sync_flags;
290268b8534bSLuigi Rizzo 
2903f0ea3689SLuigi Rizzo 		for (i = qfirst; i < qlast; i++) {
29042ff91c17SVincenzo Maffione 			struct netmap_kring *kring = krings[i];
290537e3a6d3SLuigi Rizzo 			struct netmap_ring *ring = kring->ring;
290637e3a6d3SLuigi Rizzo 
290737e3a6d3SLuigi Rizzo 			if (unlikely(nm_kr_tryget(kring, 1, &error))) {
290837e3a6d3SLuigi Rizzo 				error = (error ? EIO : 0);
290937e3a6d3SLuigi Rizzo 				continue;
2910ce3ee1e7SLuigi Rizzo 			}
291137e3a6d3SLuigi Rizzo 
291268b8534bSLuigi Rizzo 			if (cmd == NIOCTXSYNC) {
2913b6e66be2SVincenzo Maffione 				if (netmap_debug & NM_DEBUG_TXSYNC)
2914b6e66be2SVincenzo Maffione 					nm_prinf("pre txsync ring %d cur %d hwcur %d",
291537e3a6d3SLuigi Rizzo 					    i, ring->cur,
291668b8534bSLuigi Rizzo 					    kring->nr_hwcur);
291737e3a6d3SLuigi Rizzo 				if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
291817885a7bSLuigi Rizzo 					netmap_ring_reinit(kring);
2919c3e9b4dbSLuiz Otavio O Souza 				} else if (kring->nm_sync(kring, sync_flags | NAF_FORCE_RECLAIM) == 0) {
292037e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
292117885a7bSLuigi Rizzo 				}
2922b6e66be2SVincenzo Maffione 				if (netmap_debug & NM_DEBUG_TXSYNC)
2923b6e66be2SVincenzo Maffione 					nm_prinf("post txsync ring %d cur %d hwcur %d",
292437e3a6d3SLuigi Rizzo 					    i, ring->cur,
292568b8534bSLuigi Rizzo 					    kring->nr_hwcur);
292668b8534bSLuigi Rizzo 			} else {
292737e3a6d3SLuigi Rizzo 				if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
2928847bf383SLuigi Rizzo 					netmap_ring_reinit(kring);
2929c3e9b4dbSLuiz Otavio O Souza 				}
2930c3e9b4dbSLuiz Otavio O Souza 				if (nm_may_forward_up(kring)) {
2931c3e9b4dbSLuiz Otavio O Souza 					/* transparent forwarding, see netmap_poll() */
2932c3e9b4dbSLuiz Otavio O Souza 					netmap_grab_packets(kring, &q, netmap_fwd);
2933c3e9b4dbSLuiz Otavio O Souza 				}
2934c3e9b4dbSLuiz Otavio O Souza 				if (kring->nm_sync(kring, sync_flags | NAF_FORCE_READ) == 0) {
293537e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
2936847bf383SLuigi Rizzo 				}
2937c3e9b4dbSLuiz Otavio O Souza 				ring_timestamp_set(ring);
293868b8534bSLuigi Rizzo 			}
2939ce3ee1e7SLuigi Rizzo 			nm_kr_put(kring);
294068b8534bSLuigi Rizzo 		}
294168b8534bSLuigi Rizzo 
2942c3e9b4dbSLuiz Otavio O Souza 		if (mbq_peek(&q)) {
2943c3e9b4dbSLuiz Otavio O Souza 			netmap_send_up(na->ifp, &q);
2944c3e9b4dbSLuiz Otavio O Souza 		}
2945c3e9b4dbSLuiz Otavio O Souza 
294668b8534bSLuigi Rizzo 		break;
294768b8534bSLuigi Rizzo 	}
2948f196ce38SLuigi Rizzo 
29492ff91c17SVincenzo Maffione 	default: {
29502ff91c17SVincenzo Maffione 		return netmap_ioctl_legacy(priv, cmd, data, td);
29512ff91c17SVincenzo Maffione 		break;
29522ff91c17SVincenzo Maffione 	}
295368b8534bSLuigi Rizzo 	}
295468b8534bSLuigi Rizzo 
295568b8534bSLuigi Rizzo 	return (error);
295668b8534bSLuigi Rizzo }
295768b8534bSLuigi Rizzo 
29582ff91c17SVincenzo Maffione size_t
29592ff91c17SVincenzo Maffione nmreq_size_by_type(uint16_t nr_reqtype)
29602ff91c17SVincenzo Maffione {
29612ff91c17SVincenzo Maffione 	switch (nr_reqtype) {
29622ff91c17SVincenzo Maffione 	case NETMAP_REQ_REGISTER:
29632ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_register);
29642ff91c17SVincenzo Maffione 	case NETMAP_REQ_PORT_INFO_GET:
29652ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_port_info_get);
29662ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_ATTACH:
29672ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_vale_attach);
29682ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_DETACH:
29692ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_vale_detach);
29702ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_LIST:
29712ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_vale_list);
29722ff91c17SVincenzo Maffione 	case NETMAP_REQ_PORT_HDR_SET:
29732ff91c17SVincenzo Maffione 	case NETMAP_REQ_PORT_HDR_GET:
29742ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_port_hdr);
29752ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_NEWIF:
29762ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_vale_newif);
29772ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_DELIF:
2978b6e66be2SVincenzo Maffione 	case NETMAP_REQ_SYNC_KLOOP_STOP:
2979b6e66be2SVincenzo Maffione 	case NETMAP_REQ_CSB_ENABLE:
29802ff91c17SVincenzo Maffione 		return 0;
29812ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_POLLING_ENABLE:
29822ff91c17SVincenzo Maffione 	case NETMAP_REQ_VALE_POLLING_DISABLE:
29832ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_vale_polling);
29842ff91c17SVincenzo Maffione 	case NETMAP_REQ_POOLS_INFO_GET:
29852ff91c17SVincenzo Maffione 		return sizeof(struct nmreq_pools_info);
2986b6e66be2SVincenzo Maffione 	case NETMAP_REQ_SYNC_KLOOP_START:
2987b6e66be2SVincenzo Maffione 		return sizeof(struct nmreq_sync_kloop_start);
29882ff91c17SVincenzo Maffione 	}
29892ff91c17SVincenzo Maffione 	return 0;
29902ff91c17SVincenzo Maffione }
29912ff91c17SVincenzo Maffione 
29922ff91c17SVincenzo Maffione static size_t
2993b6e66be2SVincenzo Maffione nmreq_opt_size_by_type(uint32_t nro_reqtype, uint64_t nro_size)
29942ff91c17SVincenzo Maffione {
29952ff91c17SVincenzo Maffione 	size_t rv = sizeof(struct nmreq_option);
29962ff91c17SVincenzo Maffione #ifdef NETMAP_REQ_OPT_DEBUG
29972ff91c17SVincenzo Maffione 	if (nro_reqtype & NETMAP_REQ_OPT_DEBUG)
29982ff91c17SVincenzo Maffione 		return (nro_reqtype & ~NETMAP_REQ_OPT_DEBUG);
29992ff91c17SVincenzo Maffione #endif /* NETMAP_REQ_OPT_DEBUG */
30002ff91c17SVincenzo Maffione 	switch (nro_reqtype) {
30012ff91c17SVincenzo Maffione #ifdef WITH_EXTMEM
30022ff91c17SVincenzo Maffione 	case NETMAP_REQ_OPT_EXTMEM:
30032ff91c17SVincenzo Maffione 		rv = sizeof(struct nmreq_opt_extmem);
30042ff91c17SVincenzo Maffione 		break;
30052ff91c17SVincenzo Maffione #endif /* WITH_EXTMEM */
3006b6e66be2SVincenzo Maffione 	case NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS:
3007b6e66be2SVincenzo Maffione 		if (nro_size >= rv)
3008b6e66be2SVincenzo Maffione 			rv = nro_size;
3009b6e66be2SVincenzo Maffione 		break;
3010b6e66be2SVincenzo Maffione 	case NETMAP_REQ_OPT_CSB:
3011b6e66be2SVincenzo Maffione 		rv = sizeof(struct nmreq_opt_csb);
3012b6e66be2SVincenzo Maffione 		break;
30135faab778SVincenzo Maffione 	case NETMAP_REQ_OPT_SYNC_KLOOP_MODE:
30145faab778SVincenzo Maffione 		rv = sizeof(struct nmreq_opt_sync_kloop_mode);
30155faab778SVincenzo Maffione 		break;
30162ff91c17SVincenzo Maffione 	}
30172ff91c17SVincenzo Maffione 	/* subtract the common header */
30182ff91c17SVincenzo Maffione 	return rv - sizeof(struct nmreq_option);
30192ff91c17SVincenzo Maffione }
30202ff91c17SVincenzo Maffione 
3021253b2ec1SVincenzo Maffione /*
3022253b2ec1SVincenzo Maffione  * nmreq_copyin: create an in-kernel version of the request.
3023253b2ec1SVincenzo Maffione  *
3024253b2ec1SVincenzo Maffione  * We build the following data structure:
3025253b2ec1SVincenzo Maffione  *
3026253b2ec1SVincenzo Maffione  * hdr -> +-------+                buf
3027253b2ec1SVincenzo Maffione  *        |       |          +---------------+
3028253b2ec1SVincenzo Maffione  *        +-------+          |usr body ptr   |
3029253b2ec1SVincenzo Maffione  *        |options|-.        +---------------+
3030253b2ec1SVincenzo Maffione  *        +-------+ |        |usr options ptr|
3031253b2ec1SVincenzo Maffione  *        |body   |--------->+---------------+
3032253b2ec1SVincenzo Maffione  *        +-------+ |        |               |
3033253b2ec1SVincenzo Maffione  *                  |        |  copy of body |
3034253b2ec1SVincenzo Maffione  *                  |        |               |
3035253b2ec1SVincenzo Maffione  *                  |        +---------------+
3036253b2ec1SVincenzo Maffione  *                  |        |    NULL       |
3037253b2ec1SVincenzo Maffione  *                  |        +---------------+
3038253b2ec1SVincenzo Maffione  *                  |    .---|               |\
3039253b2ec1SVincenzo Maffione  *                  |    |   +---------------+ |
3040253b2ec1SVincenzo Maffione  *                  | .------|               | |
3041253b2ec1SVincenzo Maffione  *                  | |  |   +---------------+  \ option table
3042253b2ec1SVincenzo Maffione  *                  | |  |   |      ...      |  / indexed by option
3043253b2ec1SVincenzo Maffione  *                  | |  |   +---------------+ |  type
3044253b2ec1SVincenzo Maffione  *                  | |  |   |               | |
3045253b2ec1SVincenzo Maffione  *                  | |  |   +---------------+/
3046253b2ec1SVincenzo Maffione  *                  | |  |   |usr next ptr 1 |
3047253b2ec1SVincenzo Maffione  *                  `-|----->+---------------+
3048253b2ec1SVincenzo Maffione  *                    |  |   | copy of opt 1 |
3049253b2ec1SVincenzo Maffione  *                    |  |   |               |
3050253b2ec1SVincenzo Maffione  *                    |  | .-| nro_next      |
3051253b2ec1SVincenzo Maffione  *                    |  | | +---------------+
3052253b2ec1SVincenzo Maffione  *                    |  | | |usr next ptr 2 |
3053253b2ec1SVincenzo Maffione  *                    |  `-`>+---------------+
3054253b2ec1SVincenzo Maffione  *                    |      | copy of opt 2 |
3055253b2ec1SVincenzo Maffione  *                    |      |               |
3056253b2ec1SVincenzo Maffione  *                    |    .-| nro_next      |
3057253b2ec1SVincenzo Maffione  *                    |    | +---------------+
3058253b2ec1SVincenzo Maffione  *                    |    | |               |
3059253b2ec1SVincenzo Maffione  *                    ~    ~ ~      ...      ~
3060253b2ec1SVincenzo Maffione  *                    |    .-|               |
3061253b2ec1SVincenzo Maffione  *                    `----->+---------------+
3062253b2ec1SVincenzo Maffione  *                         | |usr next ptr n |
3063253b2ec1SVincenzo Maffione  *                         `>+---------------+
3064253b2ec1SVincenzo Maffione  *                           | copy of opt n |
3065253b2ec1SVincenzo Maffione  *                           |               |
3066253b2ec1SVincenzo Maffione  *                           | nro_next(NULL)|
3067253b2ec1SVincenzo Maffione  *                           +---------------+
3068253b2ec1SVincenzo Maffione  *
3069253b2ec1SVincenzo Maffione  * The options and body fields of the hdr structure are overwritten
3070253b2ec1SVincenzo Maffione  * with in-kernel valid pointers inside the buf. The original user
3071253b2ec1SVincenzo Maffione  * pointers are saved in the buf and restored on copyout.
3072253b2ec1SVincenzo Maffione  * The list of options is copied and the pointers adjusted. The
3073253b2ec1SVincenzo Maffione  * original pointers are saved before the option they belonged.
3074253b2ec1SVincenzo Maffione  *
3075253b2ec1SVincenzo Maffione  * The option table has an entry for every availabe option.  Entries
3076253b2ec1SVincenzo Maffione  * for options that have not been passed contain NULL.
3077253b2ec1SVincenzo Maffione  *
3078253b2ec1SVincenzo Maffione  */
3079253b2ec1SVincenzo Maffione 
30802ff91c17SVincenzo Maffione int
30812ff91c17SVincenzo Maffione nmreq_copyin(struct nmreq_header *hdr, int nr_body_is_user)
30822ff91c17SVincenzo Maffione {
30832ff91c17SVincenzo Maffione 	size_t rqsz, optsz, bufsz;
3084253b2ec1SVincenzo Maffione 	int error = 0;
30852ff91c17SVincenzo Maffione 	char *ker = NULL, *p;
3086253b2ec1SVincenzo Maffione 	struct nmreq_option **next, *src, **opt_tab;
30872ff91c17SVincenzo Maffione 	struct nmreq_option buf;
30882ff91c17SVincenzo Maffione 	uint64_t *ptrs;
30892ff91c17SVincenzo Maffione 
3090b6e66be2SVincenzo Maffione 	if (hdr->nr_reserved) {
3091b6e66be2SVincenzo Maffione 		if (netmap_verbose)
3092b6e66be2SVincenzo Maffione 			nm_prerr("nr_reserved must be zero");
30932ff91c17SVincenzo Maffione 		return EINVAL;
3094b6e66be2SVincenzo Maffione 	}
30952ff91c17SVincenzo Maffione 
30962ff91c17SVincenzo Maffione 	if (!nr_body_is_user)
30972ff91c17SVincenzo Maffione 		return 0;
30982ff91c17SVincenzo Maffione 
30992ff91c17SVincenzo Maffione 	hdr->nr_reserved = nr_body_is_user;
31002ff91c17SVincenzo Maffione 
31012ff91c17SVincenzo Maffione 	/* compute the total size of the buffer */
31022ff91c17SVincenzo Maffione 	rqsz = nmreq_size_by_type(hdr->nr_reqtype);
31032ff91c17SVincenzo Maffione 	if (rqsz > NETMAP_REQ_MAXSIZE) {
31042ff91c17SVincenzo Maffione 		error = EMSGSIZE;
31052ff91c17SVincenzo Maffione 		goto out_err;
31062ff91c17SVincenzo Maffione 	}
3107cfa866f6SMatt Macy 	if ((rqsz && hdr->nr_body == (uintptr_t)NULL) ||
3108cfa866f6SMatt Macy 		(!rqsz && hdr->nr_body != (uintptr_t)NULL)) {
31092ff91c17SVincenzo Maffione 		/* Request body expected, but not found; or
31102ff91c17SVincenzo Maffione 		 * request body found but unexpected. */
3111b6e66be2SVincenzo Maffione 		if (netmap_verbose)
3112b6e66be2SVincenzo Maffione 			nm_prerr("nr_body expected but not found, or vice versa");
31132ff91c17SVincenzo Maffione 		error = EINVAL;
31142ff91c17SVincenzo Maffione 		goto out_err;
31152ff91c17SVincenzo Maffione 	}
31162ff91c17SVincenzo Maffione 
3117253b2ec1SVincenzo Maffione 	bufsz = 2 * sizeof(void *) + rqsz +
3118253b2ec1SVincenzo Maffione 		NETMAP_REQ_OPT_MAX * sizeof(opt_tab);
3119253b2ec1SVincenzo Maffione 	/* compute the size of the buf below the option table.
3120253b2ec1SVincenzo Maffione 	 * It must contain a copy of every received option structure.
3121253b2ec1SVincenzo Maffione 	 * For every option we also need to store a copy of the user
3122253b2ec1SVincenzo Maffione 	 * list pointer.
3123253b2ec1SVincenzo Maffione 	 */
31242ff91c17SVincenzo Maffione 	optsz = 0;
3125cfa866f6SMatt Macy 	for (src = (struct nmreq_option *)(uintptr_t)hdr->nr_options; src;
3126cfa866f6SMatt Macy 	     src = (struct nmreq_option *)(uintptr_t)buf.nro_next)
31272ff91c17SVincenzo Maffione 	{
31282ff91c17SVincenzo Maffione 		error = copyin(src, &buf, sizeof(*src));
31292ff91c17SVincenzo Maffione 		if (error)
31302ff91c17SVincenzo Maffione 			goto out_err;
31312ff91c17SVincenzo Maffione 		optsz += sizeof(*src);
3132b6e66be2SVincenzo Maffione 		optsz += nmreq_opt_size_by_type(buf.nro_reqtype, buf.nro_size);
31332ff91c17SVincenzo Maffione 		if (rqsz + optsz > NETMAP_REQ_MAXSIZE) {
31342ff91c17SVincenzo Maffione 			error = EMSGSIZE;
31352ff91c17SVincenzo Maffione 			goto out_err;
31362ff91c17SVincenzo Maffione 		}
3137253b2ec1SVincenzo Maffione 		bufsz += sizeof(void *);
31382ff91c17SVincenzo Maffione 	}
3139253b2ec1SVincenzo Maffione 	bufsz += optsz;
31402ff91c17SVincenzo Maffione 
31412ff91c17SVincenzo Maffione 	ker = nm_os_malloc(bufsz);
31422ff91c17SVincenzo Maffione 	if (ker == NULL) {
31432ff91c17SVincenzo Maffione 		error = ENOMEM;
31442ff91c17SVincenzo Maffione 		goto out_err;
31452ff91c17SVincenzo Maffione 	}
3146253b2ec1SVincenzo Maffione 	p = ker;	/* write pointer into the buffer */
31472ff91c17SVincenzo Maffione 
31482ff91c17SVincenzo Maffione 	/* make a copy of the user pointers */
31492ff91c17SVincenzo Maffione 	ptrs = (uint64_t*)p;
31502ff91c17SVincenzo Maffione 	*ptrs++ = hdr->nr_body;
31512ff91c17SVincenzo Maffione 	*ptrs++ = hdr->nr_options;
31522ff91c17SVincenzo Maffione 	p = (char *)ptrs;
31532ff91c17SVincenzo Maffione 
31542ff91c17SVincenzo Maffione 	/* copy the body */
3155cfa866f6SMatt Macy 	error = copyin((void *)(uintptr_t)hdr->nr_body, p, rqsz);
31562ff91c17SVincenzo Maffione 	if (error)
31572ff91c17SVincenzo Maffione 		goto out_restore;
31582ff91c17SVincenzo Maffione 	/* overwrite the user pointer with the in-kernel one */
3159cfa866f6SMatt Macy 	hdr->nr_body = (uintptr_t)p;
31602ff91c17SVincenzo Maffione 	p += rqsz;
3161253b2ec1SVincenzo Maffione 	/* start of the options table */
3162253b2ec1SVincenzo Maffione 	opt_tab = (struct nmreq_option **)p;
3163253b2ec1SVincenzo Maffione 	p += sizeof(opt_tab) * NETMAP_REQ_OPT_MAX;
31642ff91c17SVincenzo Maffione 
31652ff91c17SVincenzo Maffione 	/* copy the options */
31662ff91c17SVincenzo Maffione 	next = (struct nmreq_option **)&hdr->nr_options;
31672ff91c17SVincenzo Maffione 	src = *next;
31682ff91c17SVincenzo Maffione 	while (src) {
31692ff91c17SVincenzo Maffione 		struct nmreq_option *opt;
31702ff91c17SVincenzo Maffione 
31712ff91c17SVincenzo Maffione 		/* copy the option header */
31722ff91c17SVincenzo Maffione 		ptrs = (uint64_t *)p;
31732ff91c17SVincenzo Maffione 		opt = (struct nmreq_option *)(ptrs + 1);
31742ff91c17SVincenzo Maffione 		error = copyin(src, opt, sizeof(*src));
31752ff91c17SVincenzo Maffione 		if (error)
31762ff91c17SVincenzo Maffione 			goto out_restore;
31772ff91c17SVincenzo Maffione 		/* make a copy of the user next pointer */
31782ff91c17SVincenzo Maffione 		*ptrs = opt->nro_next;
31792ff91c17SVincenzo Maffione 		/* overwrite the user pointer with the in-kernel one */
31802ff91c17SVincenzo Maffione 		*next = opt;
31812ff91c17SVincenzo Maffione 
31822ff91c17SVincenzo Maffione 		/* initialize the option as not supported.
31832ff91c17SVincenzo Maffione 		 * Recognized options will update this field.
31842ff91c17SVincenzo Maffione 		 */
31852ff91c17SVincenzo Maffione 		opt->nro_status = EOPNOTSUPP;
31862ff91c17SVincenzo Maffione 
3187253b2ec1SVincenzo Maffione 		/* check for invalid types */
3188253b2ec1SVincenzo Maffione 		if (opt->nro_reqtype < 1) {
3189253b2ec1SVincenzo Maffione 			if (netmap_verbose)
3190253b2ec1SVincenzo Maffione 				nm_prinf("invalid option type: %u", opt->nro_reqtype);
3191253b2ec1SVincenzo Maffione 			opt->nro_status = EINVAL;
3192253b2ec1SVincenzo Maffione 			error = EINVAL;
3193253b2ec1SVincenzo Maffione 			goto next;
3194253b2ec1SVincenzo Maffione 		}
3195253b2ec1SVincenzo Maffione 
3196253b2ec1SVincenzo Maffione 		if (opt->nro_reqtype >= NETMAP_REQ_OPT_MAX) {
3197253b2ec1SVincenzo Maffione 			/* opt->nro_status is already EOPNOTSUPP */
3198253b2ec1SVincenzo Maffione 			error = EOPNOTSUPP;
3199253b2ec1SVincenzo Maffione 			goto next;
3200253b2ec1SVincenzo Maffione 		}
3201253b2ec1SVincenzo Maffione 
3202253b2ec1SVincenzo Maffione 		/* if the type is valid, index the option in the table
3203253b2ec1SVincenzo Maffione 		 * unless it is a duplicate.
3204253b2ec1SVincenzo Maffione 		 */
3205253b2ec1SVincenzo Maffione 		if (opt_tab[opt->nro_reqtype] != NULL) {
3206253b2ec1SVincenzo Maffione 			if (netmap_verbose)
3207253b2ec1SVincenzo Maffione 				nm_prinf("duplicate option: %u", opt->nro_reqtype);
3208253b2ec1SVincenzo Maffione 			opt->nro_status = EINVAL;
3209253b2ec1SVincenzo Maffione 			opt_tab[opt->nro_reqtype]->nro_status = EINVAL;
3210253b2ec1SVincenzo Maffione 			error = EINVAL;
3211253b2ec1SVincenzo Maffione 			goto next;
3212253b2ec1SVincenzo Maffione 		}
3213253b2ec1SVincenzo Maffione 		opt_tab[opt->nro_reqtype] = opt;
3214253b2ec1SVincenzo Maffione 
32152ff91c17SVincenzo Maffione 		p = (char *)(opt + 1);
32162ff91c17SVincenzo Maffione 
32172ff91c17SVincenzo Maffione 		/* copy the option body */
3218b6e66be2SVincenzo Maffione 		optsz = nmreq_opt_size_by_type(opt->nro_reqtype,
3219b6e66be2SVincenzo Maffione 						opt->nro_size);
32202ff91c17SVincenzo Maffione 		if (optsz) {
32212ff91c17SVincenzo Maffione 			/* the option body follows the option header */
32222ff91c17SVincenzo Maffione 			error = copyin(src + 1, p, optsz);
32232ff91c17SVincenzo Maffione 			if (error)
32242ff91c17SVincenzo Maffione 				goto out_restore;
32252ff91c17SVincenzo Maffione 			p += optsz;
32262ff91c17SVincenzo Maffione 		}
32272ff91c17SVincenzo Maffione 
3228253b2ec1SVincenzo Maffione 	next:
32292ff91c17SVincenzo Maffione 		/* move to next option */
32302ff91c17SVincenzo Maffione 		next = (struct nmreq_option **)&opt->nro_next;
32312ff91c17SVincenzo Maffione 		src = *next;
32322ff91c17SVincenzo Maffione 	}
3233253b2ec1SVincenzo Maffione 	if (error)
3234253b2ec1SVincenzo Maffione 		nmreq_copyout(hdr, error);
3235253b2ec1SVincenzo Maffione 	return error;
32362ff91c17SVincenzo Maffione 
32372ff91c17SVincenzo Maffione out_restore:
32382ff91c17SVincenzo Maffione 	ptrs = (uint64_t *)ker;
32392ff91c17SVincenzo Maffione 	hdr->nr_body = *ptrs++;
32402ff91c17SVincenzo Maffione 	hdr->nr_options = *ptrs++;
32412ff91c17SVincenzo Maffione 	hdr->nr_reserved = 0;
32422ff91c17SVincenzo Maffione 	nm_os_free(ker);
32432ff91c17SVincenzo Maffione out_err:
32442ff91c17SVincenzo Maffione 	return error;
32452ff91c17SVincenzo Maffione }
32462ff91c17SVincenzo Maffione 
32472ff91c17SVincenzo Maffione static int
32482ff91c17SVincenzo Maffione nmreq_copyout(struct nmreq_header *hdr, int rerror)
32492ff91c17SVincenzo Maffione {
32502ff91c17SVincenzo Maffione 	struct nmreq_option *src, *dst;
3251cfa866f6SMatt Macy 	void *ker = (void *)(uintptr_t)hdr->nr_body, *bufstart;
32522ff91c17SVincenzo Maffione 	uint64_t *ptrs;
32532ff91c17SVincenzo Maffione 	size_t bodysz;
32542ff91c17SVincenzo Maffione 	int error;
32552ff91c17SVincenzo Maffione 
32562ff91c17SVincenzo Maffione 	if (!hdr->nr_reserved)
32572ff91c17SVincenzo Maffione 		return rerror;
32582ff91c17SVincenzo Maffione 
32592ff91c17SVincenzo Maffione 	/* restore the user pointers in the header */
32602ff91c17SVincenzo Maffione 	ptrs = (uint64_t *)ker - 2;
32612ff91c17SVincenzo Maffione 	bufstart = ptrs;
32622ff91c17SVincenzo Maffione 	hdr->nr_body = *ptrs++;
3263cfa866f6SMatt Macy 	src = (struct nmreq_option *)(uintptr_t)hdr->nr_options;
32642ff91c17SVincenzo Maffione 	hdr->nr_options = *ptrs;
32652ff91c17SVincenzo Maffione 
32662ff91c17SVincenzo Maffione 	if (!rerror) {
32672ff91c17SVincenzo Maffione 		/* copy the body */
32682ff91c17SVincenzo Maffione 		bodysz = nmreq_size_by_type(hdr->nr_reqtype);
3269cfa866f6SMatt Macy 		error = copyout(ker, (void *)(uintptr_t)hdr->nr_body, bodysz);
32702ff91c17SVincenzo Maffione 		if (error) {
32712ff91c17SVincenzo Maffione 			rerror = error;
32722ff91c17SVincenzo Maffione 			goto out;
32732ff91c17SVincenzo Maffione 		}
32742ff91c17SVincenzo Maffione 	}
32752ff91c17SVincenzo Maffione 
32762ff91c17SVincenzo Maffione 	/* copy the options */
3277cfa866f6SMatt Macy 	dst = (struct nmreq_option *)(uintptr_t)hdr->nr_options;
32782ff91c17SVincenzo Maffione 	while (src) {
32792ff91c17SVincenzo Maffione 		size_t optsz;
32802ff91c17SVincenzo Maffione 		uint64_t next;
32812ff91c17SVincenzo Maffione 
32822ff91c17SVincenzo Maffione 		/* restore the user pointer */
32832ff91c17SVincenzo Maffione 		next = src->nro_next;
32842ff91c17SVincenzo Maffione 		ptrs = (uint64_t *)src - 1;
32852ff91c17SVincenzo Maffione 		src->nro_next = *ptrs;
32862ff91c17SVincenzo Maffione 
32872ff91c17SVincenzo Maffione 		/* always copy the option header */
32882ff91c17SVincenzo Maffione 		error = copyout(src, dst, sizeof(*src));
32892ff91c17SVincenzo Maffione 		if (error) {
32902ff91c17SVincenzo Maffione 			rerror = error;
32912ff91c17SVincenzo Maffione 			goto out;
32922ff91c17SVincenzo Maffione 		}
32932ff91c17SVincenzo Maffione 
32942ff91c17SVincenzo Maffione 		/* copy the option body only if there was no error */
32952ff91c17SVincenzo Maffione 		if (!rerror && !src->nro_status) {
3296b6e66be2SVincenzo Maffione 			optsz = nmreq_opt_size_by_type(src->nro_reqtype,
3297b6e66be2SVincenzo Maffione 							src->nro_size);
32982ff91c17SVincenzo Maffione 			if (optsz) {
32992ff91c17SVincenzo Maffione 				error = copyout(src + 1, dst + 1, optsz);
33002ff91c17SVincenzo Maffione 				if (error) {
33012ff91c17SVincenzo Maffione 					rerror = error;
33022ff91c17SVincenzo Maffione 					goto out;
33032ff91c17SVincenzo Maffione 				}
33042ff91c17SVincenzo Maffione 			}
33052ff91c17SVincenzo Maffione 		}
3306cfa866f6SMatt Macy 		src = (struct nmreq_option *)(uintptr_t)next;
3307cfa866f6SMatt Macy 		dst = (struct nmreq_option *)(uintptr_t)*ptrs;
33082ff91c17SVincenzo Maffione 	}
33092ff91c17SVincenzo Maffione 
33102ff91c17SVincenzo Maffione 
33112ff91c17SVincenzo Maffione out:
33122ff91c17SVincenzo Maffione 	hdr->nr_reserved = 0;
33132ff91c17SVincenzo Maffione 	nm_os_free(bufstart);
33142ff91c17SVincenzo Maffione 	return rerror;
33152ff91c17SVincenzo Maffione }
33162ff91c17SVincenzo Maffione 
33172ff91c17SVincenzo Maffione struct nmreq_option *
3318253b2ec1SVincenzo Maffione nmreq_getoption(struct nmreq_header *hdr, uint16_t reqtype)
33192ff91c17SVincenzo Maffione {
3320253b2ec1SVincenzo Maffione 	struct nmreq_option **opt_tab;
3321253b2ec1SVincenzo Maffione 
3322253b2ec1SVincenzo Maffione 	if (!hdr->nr_options)
33232ff91c17SVincenzo Maffione 		return NULL;
33242ff91c17SVincenzo Maffione 
3325760fa2abSVincenzo Maffione 	opt_tab = (struct nmreq_option **)((uintptr_t)hdr->nr_options) -
3326760fa2abSVincenzo Maffione 	    (NETMAP_REQ_OPT_MAX + 1);
3327253b2ec1SVincenzo Maffione 	return opt_tab[reqtype];
33282ff91c17SVincenzo Maffione }
33292ff91c17SVincenzo Maffione 
33302ff91c17SVincenzo Maffione static int
33312ff91c17SVincenzo Maffione nmreq_checkoptions(struct nmreq_header *hdr)
33322ff91c17SVincenzo Maffione {
33332ff91c17SVincenzo Maffione 	struct nmreq_option *opt;
33342ff91c17SVincenzo Maffione 	/* return error if there is still any option
33352ff91c17SVincenzo Maffione 	 * marked as not supported
33362ff91c17SVincenzo Maffione 	 */
33372ff91c17SVincenzo Maffione 
3338cfa866f6SMatt Macy 	for (opt = (struct nmreq_option *)(uintptr_t)hdr->nr_options; opt;
3339cfa866f6SMatt Macy 	     opt = (struct nmreq_option *)(uintptr_t)opt->nro_next)
33402ff91c17SVincenzo Maffione 		if (opt->nro_status == EOPNOTSUPP)
33412ff91c17SVincenzo Maffione 			return EOPNOTSUPP;
33422ff91c17SVincenzo Maffione 
33432ff91c17SVincenzo Maffione 	return 0;
33442ff91c17SVincenzo Maffione }
334568b8534bSLuigi Rizzo 
334668b8534bSLuigi Rizzo /*
334768b8534bSLuigi Rizzo  * select(2) and poll(2) handlers for the "netmap" device.
334868b8534bSLuigi Rizzo  *
334968b8534bSLuigi Rizzo  * Can be called for one or more queues.
335068b8534bSLuigi Rizzo  * Return true the event mask corresponding to ready events.
33518c9874f5SVincenzo Maffione  * If there are no ready events (and 'sr' is not NULL), do a
33528c9874f5SVincenzo Maffione  * selrecord on either individual selinfo or on the global one.
335368b8534bSLuigi Rizzo  * Device-dependent parts (locking and sync of tx/rx rings)
335468b8534bSLuigi Rizzo  * are done through callbacks.
3355f196ce38SLuigi Rizzo  *
335601c7d25fSLuigi Rizzo  * On linux, arguments are really pwait, the poll table, and 'td' is struct file *
335701c7d25fSLuigi Rizzo  * The first one is remapped to pwait as selrecord() uses the name as an
335801c7d25fSLuigi Rizzo  * hidden argument.
335968b8534bSLuigi Rizzo  */
3360f9790aebSLuigi Rizzo int
336137e3a6d3SLuigi Rizzo netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
336268b8534bSLuigi Rizzo {
336368b8534bSLuigi Rizzo 	struct netmap_adapter *na;
336468b8534bSLuigi Rizzo 	struct netmap_kring *kring;
336537e3a6d3SLuigi Rizzo 	struct netmap_ring *ring;
3366b6e66be2SVincenzo Maffione 	u_int i, want[NR_TXRX], revents = 0;
3367b6e66be2SVincenzo Maffione 	NM_SELINFO_T *si[NR_TXRX];
3368847bf383SLuigi Rizzo #define want_tx want[NR_TX]
3369847bf383SLuigi Rizzo #define want_rx want[NR_RX]
3370c3e9b4dbSLuiz Otavio O Souza 	struct mbq q;	/* packets from RX hw queues to host stack */
337101c7d25fSLuigi Rizzo 
3372f9790aebSLuigi Rizzo 	/*
3373f9790aebSLuigi Rizzo 	 * In order to avoid nested locks, we need to "double check"
3374f9790aebSLuigi Rizzo 	 * txsync and rxsync if we decide to do a selrecord().
3375f9790aebSLuigi Rizzo 	 * retry_tx (and retry_rx, later) prevent looping forever.
3376f9790aebSLuigi Rizzo 	 */
337717885a7bSLuigi Rizzo 	int retry_tx = 1, retry_rx = 1;
3378ce3ee1e7SLuigi Rizzo 
3379c3e9b4dbSLuiz Otavio O Souza 	/* Transparent mode: send_down is 1 if we have found some
3380c3e9b4dbSLuiz Otavio O Souza 	 * packets to forward (host RX ring --> NIC) during the rx
3381c3e9b4dbSLuiz Otavio O Souza 	 * scan and we have not sent them down to the NIC yet.
3382c3e9b4dbSLuiz Otavio O Souza 	 * Transparent mode requires to bind all rings to a single
3383c3e9b4dbSLuiz Otavio O Souza 	 * file descriptor.
3384f0ea3689SLuigi Rizzo 	 */
338537e3a6d3SLuigi Rizzo 	int send_down = 0;
3386c3e9b4dbSLuiz Otavio O Souza 	int sync_flags = priv->np_sync_flags;
338737e3a6d3SLuigi Rizzo 
338837e3a6d3SLuigi Rizzo 	mbq_init(&q);
338968b8534bSLuigi Rizzo 
3390b6e66be2SVincenzo Maffione 	if (unlikely(priv->np_nifp == NULL)) {
33918241616dSLuigi Rizzo 		return POLLERR;
33928241616dSLuigi Rizzo 	}
3393847bf383SLuigi Rizzo 	mb(); /* make sure following reads are not from cache */
33948241616dSLuigi Rizzo 
3395f9790aebSLuigi Rizzo 	na = priv->np_na;
3396f9790aebSLuigi Rizzo 
3397b6e66be2SVincenzo Maffione 	if (unlikely(!nm_netmap_on(na)))
339868b8534bSLuigi Rizzo 		return POLLERR;
339968b8534bSLuigi Rizzo 
3400b6e66be2SVincenzo Maffione 	if (unlikely(priv->np_csb_atok_base)) {
3401b6e66be2SVincenzo Maffione 		nm_prerr("Invalid poll in CSB mode");
3402b6e66be2SVincenzo Maffione 		return POLLERR;
3403b6e66be2SVincenzo Maffione 	}
3404b6e66be2SVincenzo Maffione 
3405b6e66be2SVincenzo Maffione 	if (netmap_debug & NM_DEBUG_ON)
3406b6e66be2SVincenzo Maffione 		nm_prinf("device %s events 0x%x", na->name, events);
340768b8534bSLuigi Rizzo 	want_tx = events & (POLLOUT | POLLWRNORM);
340868b8534bSLuigi Rizzo 	want_rx = events & (POLLIN | POLLRDNORM);
340968b8534bSLuigi Rizzo 
341068b8534bSLuigi Rizzo 	/*
3411b6e66be2SVincenzo Maffione 	 * If the card has more than one queue AND the file descriptor is
3412b6e66be2SVincenzo Maffione 	 * bound to all of them, we sleep on the "global" selinfo, otherwise
3413b6e66be2SVincenzo Maffione 	 * we sleep on individual selinfo (FreeBSD only allows two selinfo's
3414b6e66be2SVincenzo Maffione 	 * per file descriptor).
3415ce3ee1e7SLuigi Rizzo 	 * The interrupt routine in the driver wake one or the other
3416ce3ee1e7SLuigi Rizzo 	 * (or both) depending on which clients are active.
341768b8534bSLuigi Rizzo 	 *
341868b8534bSLuigi Rizzo 	 * rxsync() is only called if we run out of buffers on a POLLIN.
341968b8534bSLuigi Rizzo 	 * txsync() is called if we run out of buffers on POLLOUT, or
342068b8534bSLuigi Rizzo 	 * there are pending packets to send. The latter can be disabled
342168b8534bSLuigi Rizzo 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
342268b8534bSLuigi Rizzo 	 */
34235faab778SVincenzo Maffione 	si[NR_RX] = priv->np_si[NR_RX];
34245faab778SVincenzo Maffione 	si[NR_TX] = priv->np_si[NR_TX];
342564ae02c3SLuigi Rizzo 
34264f80b14cSVincenzo Maffione #ifdef __FreeBSD__
342768b8534bSLuigi Rizzo 	/*
3428f9790aebSLuigi Rizzo 	 * We start with a lock free round which is cheap if we have
3429f9790aebSLuigi Rizzo 	 * slots available. If this fails, then lock and call the sync
34304f80b14cSVincenzo Maffione 	 * routines. We can't do this on Linux, as the contract says
34314f80b14cSVincenzo Maffione 	 * that we must call nm_os_selrecord() unconditionally.
343268b8534bSLuigi Rizzo 	 */
343337e3a6d3SLuigi Rizzo 	if (want_tx) {
343458e18542SVincenzo Maffione 		const enum txrx t = NR_TX;
343558e18542SVincenzo Maffione 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
34362ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
343758e18542SVincenzo Maffione 			if (kring->ring->cur != kring->ring->tail) {
343858e18542SVincenzo Maffione 				/* Some unseen TX space is available, so what
343958e18542SVincenzo Maffione 				 * we don't need to run txsync. */
344037e3a6d3SLuigi Rizzo 				revents |= want[t];
344158e18542SVincenzo Maffione 				want[t] = 0;
344258e18542SVincenzo Maffione 				break;
344337e3a6d3SLuigi Rizzo 			}
344437e3a6d3SLuigi Rizzo 		}
344537e3a6d3SLuigi Rizzo 	}
344637e3a6d3SLuigi Rizzo 	if (want_rx) {
344758e18542SVincenzo Maffione 		const enum txrx t = NR_RX;
3448e1ed1fbdSVincenzo Maffione 		int rxsync_needed = 0;
3449e1ed1fbdSVincenzo Maffione 
345037e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
34512ff91c17SVincenzo Maffione 			kring = NMR(na, t)[i];
345258e18542SVincenzo Maffione 			if (kring->ring->cur == kring->ring->tail
345358e18542SVincenzo Maffione 				|| kring->rhead != kring->ring->head) {
345458e18542SVincenzo Maffione 				/* There are no unseen packets on this ring,
345558e18542SVincenzo Maffione 				 * or there are some buffers to be returned
345658e18542SVincenzo Maffione 				 * to the netmap port. We therefore go ahead
345758e18542SVincenzo Maffione 				 * and run rxsync. */
3458e1ed1fbdSVincenzo Maffione 				rxsync_needed = 1;
3459e1ed1fbdSVincenzo Maffione 				break;
346037e3a6d3SLuigi Rizzo 			}
346137e3a6d3SLuigi Rizzo 		}
346258e18542SVincenzo Maffione 		if (!rxsync_needed) {
346358e18542SVincenzo Maffione 			revents |= want_rx;
346458e18542SVincenzo Maffione 			want_rx = 0;
346558e18542SVincenzo Maffione 		}
346637e3a6d3SLuigi Rizzo 	}
34674f80b14cSVincenzo Maffione #endif
34684f80b14cSVincenzo Maffione 
34694f80b14cSVincenzo Maffione #ifdef linux
34704f80b14cSVincenzo Maffione 	/* The selrecord must be unconditional on linux. */
3471b6e66be2SVincenzo Maffione 	nm_os_selrecord(sr, si[NR_RX]);
3472b6e66be2SVincenzo Maffione 	nm_os_selrecord(sr, si[NR_TX]);
34734f80b14cSVincenzo Maffione #endif /* linux */
347468b8534bSLuigi Rizzo 
347568b8534bSLuigi Rizzo 	/*
347617885a7bSLuigi Rizzo 	 * If we want to push packets out (priv->np_txpoll) or
347717885a7bSLuigi Rizzo 	 * want_tx is still set, we must issue txsync calls
347817885a7bSLuigi Rizzo 	 * (on all rings, to avoid that the tx rings stall).
3479f9790aebSLuigi Rizzo 	 * Fortunately, normal tx mode has np_txpoll set.
348068b8534bSLuigi Rizzo 	 */
348168b8534bSLuigi Rizzo 	if (priv->np_txpoll || want_tx) {
348217885a7bSLuigi Rizzo 		/*
348317885a7bSLuigi Rizzo 		 * The first round checks if anyone is ready, if not
348417885a7bSLuigi Rizzo 		 * do a selrecord and another round to handle races.
348517885a7bSLuigi Rizzo 		 * want_tx goes to 0 if any space is found, and is
348617885a7bSLuigi Rizzo 		 * used to skip rings with no pending transmissions.
3487ce3ee1e7SLuigi Rizzo 		 */
3488091fd0abSLuigi Rizzo flush_tx:
348937e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_TX]; i++) {
349017885a7bSLuigi Rizzo 			int found = 0;
349117885a7bSLuigi Rizzo 
34922ff91c17SVincenzo Maffione 			kring = na->tx_rings[i];
349337e3a6d3SLuigi Rizzo 			ring = kring->ring;
349437e3a6d3SLuigi Rizzo 
34954f80b14cSVincenzo Maffione 			/*
34964f80b14cSVincenzo Maffione 			 * Don't try to txsync this TX ring if we already found some
34974f80b14cSVincenzo Maffione 			 * space in some of the TX rings (want_tx == 0) and there are no
34984f80b14cSVincenzo Maffione 			 * TX slots in this ring that need to be flushed to the NIC
34992ff91c17SVincenzo Maffione 			 * (head == hwcur).
35004f80b14cSVincenzo Maffione 			 */
35012ff91c17SVincenzo Maffione 			if (!send_down && !want_tx && ring->head == kring->nr_hwcur)
350268b8534bSLuigi Rizzo 				continue;
350337e3a6d3SLuigi Rizzo 
350437e3a6d3SLuigi Rizzo 			if (nm_kr_tryget(kring, 1, &revents))
350517885a7bSLuigi Rizzo 				continue;
350637e3a6d3SLuigi Rizzo 
350737e3a6d3SLuigi Rizzo 			if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
350817885a7bSLuigi Rizzo 				netmap_ring_reinit(kring);
350917885a7bSLuigi Rizzo 				revents |= POLLERR;
351017885a7bSLuigi Rizzo 			} else {
3511c3e9b4dbSLuiz Otavio O Souza 				if (kring->nm_sync(kring, sync_flags))
351268b8534bSLuigi Rizzo 					revents |= POLLERR;
3513847bf383SLuigi Rizzo 				else
351437e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
351517885a7bSLuigi Rizzo 			}
351668b8534bSLuigi Rizzo 
351717885a7bSLuigi Rizzo 			/*
351817885a7bSLuigi Rizzo 			 * If we found new slots, notify potential
351917885a7bSLuigi Rizzo 			 * listeners on the same ring.
352017885a7bSLuigi Rizzo 			 * Since we just did a txsync, look at the copies
352117885a7bSLuigi Rizzo 			 * of cur,tail in the kring.
3522f9790aebSLuigi Rizzo 			 */
352317885a7bSLuigi Rizzo 			found = kring->rcur != kring->rtail;
352417885a7bSLuigi Rizzo 			nm_kr_put(kring);
352517885a7bSLuigi Rizzo 			if (found) { /* notify other listeners */
352668b8534bSLuigi Rizzo 				revents |= want_tx;
352768b8534bSLuigi Rizzo 				want_tx = 0;
35284f80b14cSVincenzo Maffione #ifndef linux
3529847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
35304f80b14cSVincenzo Maffione #endif /* linux */
353168b8534bSLuigi Rizzo 			}
3532ce3ee1e7SLuigi Rizzo 		}
353337e3a6d3SLuigi Rizzo 		/* if there were any packet to forward we must have handled them by now */
353437e3a6d3SLuigi Rizzo 		send_down = 0;
353537e3a6d3SLuigi Rizzo 		if (want_tx && retry_tx && sr) {
35364f80b14cSVincenzo Maffione #ifndef linux
3537b6e66be2SVincenzo Maffione 			nm_os_selrecord(sr, si[NR_TX]);
35384f80b14cSVincenzo Maffione #endif /* !linux */
3539ce3ee1e7SLuigi Rizzo 			retry_tx = 0;
3540ce3ee1e7SLuigi Rizzo 			goto flush_tx;
354168b8534bSLuigi Rizzo 		}
354268b8534bSLuigi Rizzo 	}
354368b8534bSLuigi Rizzo 
354468b8534bSLuigi Rizzo 	/*
354517885a7bSLuigi Rizzo 	 * If want_rx is still set scan receive rings.
354668b8534bSLuigi Rizzo 	 * Do it on all rings because otherwise we starve.
354768b8534bSLuigi Rizzo 	 */
354868b8534bSLuigi Rizzo 	if (want_rx) {
354989cc2556SLuigi Rizzo 		/* two rounds here for race avoidance */
3550ce3ee1e7SLuigi Rizzo do_retry_rx:
3551847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) {
355217885a7bSLuigi Rizzo 			int found = 0;
355317885a7bSLuigi Rizzo 
35542ff91c17SVincenzo Maffione 			kring = na->rx_rings[i];
355537e3a6d3SLuigi Rizzo 			ring = kring->ring;
3556ce3ee1e7SLuigi Rizzo 
355737e3a6d3SLuigi Rizzo 			if (unlikely(nm_kr_tryget(kring, 1, &revents)))
355817885a7bSLuigi Rizzo 				continue;
3559ce3ee1e7SLuigi Rizzo 
356037e3a6d3SLuigi Rizzo 			if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
3561847bf383SLuigi Rizzo 				netmap_ring_reinit(kring);
3562847bf383SLuigi Rizzo 				revents |= POLLERR;
3563847bf383SLuigi Rizzo 			}
3564847bf383SLuigi Rizzo 			/* now we can use kring->rcur, rtail */
3565847bf383SLuigi Rizzo 
356617885a7bSLuigi Rizzo 			/*
3567c3e9b4dbSLuiz Otavio O Souza 			 * transparent mode support: collect packets from
3568c3e9b4dbSLuiz Otavio O Souza 			 * hw rxring(s) that have been released by the user
3569ce3ee1e7SLuigi Rizzo 			 */
357037e3a6d3SLuigi Rizzo 			if (nm_may_forward_up(kring)) {
3571091fd0abSLuigi Rizzo 				netmap_grab_packets(kring, &q, netmap_fwd);
3572091fd0abSLuigi Rizzo 			}
357368b8534bSLuigi Rizzo 
3574c3e9b4dbSLuiz Otavio O Souza 			/* Clear the NR_FORWARD flag anyway, it may be set by
3575c3e9b4dbSLuiz Otavio O Souza 			 * the nm_sync() below only on for the host RX ring (see
3576c3e9b4dbSLuiz Otavio O Souza 			 * netmap_rxsync_from_host()). */
357737e3a6d3SLuigi Rizzo 			kring->nr_kflags &= ~NR_FORWARD;
3578c3e9b4dbSLuiz Otavio O Souza 			if (kring->nm_sync(kring, sync_flags))
357968b8534bSLuigi Rizzo 				revents |= POLLERR;
3580847bf383SLuigi Rizzo 			else
358137e3a6d3SLuigi Rizzo 				nm_sync_finalize(kring);
3582c3e9b4dbSLuiz Otavio O Souza 			send_down |= (kring->nr_kflags & NR_FORWARD);
3583c3e9b4dbSLuiz Otavio O Souza 			ring_timestamp_set(ring);
358417885a7bSLuigi Rizzo 			found = kring->rcur != kring->rtail;
358517885a7bSLuigi Rizzo 			nm_kr_put(kring);
358617885a7bSLuigi Rizzo 			if (found) {
358768b8534bSLuigi Rizzo 				revents |= want_rx;
3588ce3ee1e7SLuigi Rizzo 				retry_rx = 0;
35894f80b14cSVincenzo Maffione #ifndef linux
3590847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
35914f80b14cSVincenzo Maffione #endif /* linux */
359268b8534bSLuigi Rizzo 			}
359368b8534bSLuigi Rizzo 		}
359417885a7bSLuigi Rizzo 
35954f80b14cSVincenzo Maffione #ifndef linux
359637e3a6d3SLuigi Rizzo 		if (retry_rx && sr) {
3597b6e66be2SVincenzo Maffione 			nm_os_selrecord(sr, si[NR_RX]);
359837e3a6d3SLuigi Rizzo 		}
35994f80b14cSVincenzo Maffione #endif /* !linux */
3600c3e9b4dbSLuiz Otavio O Souza 		if (send_down || retry_rx) {
360117885a7bSLuigi Rizzo 			retry_rx = 0;
360217885a7bSLuigi Rizzo 			if (send_down)
360317885a7bSLuigi Rizzo 				goto flush_tx; /* and retry_rx */
360417885a7bSLuigi Rizzo 			else
3605ce3ee1e7SLuigi Rizzo 				goto do_retry_rx;
3606ce3ee1e7SLuigi Rizzo 		}
360768b8534bSLuigi Rizzo 	}
3608091fd0abSLuigi Rizzo 
360917885a7bSLuigi Rizzo 	/*
3610c3e9b4dbSLuiz Otavio O Souza 	 * Transparent mode: released bufs (i.e. between kring->nr_hwcur and
3611c3e9b4dbSLuiz Otavio O Souza 	 * ring->head) marked with NS_FORWARD on hw rx rings are passed up
3612c3e9b4dbSLuiz Otavio O Souza 	 * to the host stack.
3613ce3ee1e7SLuigi Rizzo 	 */
3614091fd0abSLuigi Rizzo 
3615c3e9b4dbSLuiz Otavio O Souza 	if (mbq_peek(&q)) {
3616f9790aebSLuigi Rizzo 		netmap_send_up(na->ifp, &q);
361737e3a6d3SLuigi Rizzo 	}
361868b8534bSLuigi Rizzo 
361968b8534bSLuigi Rizzo 	return (revents);
3620847bf383SLuigi Rizzo #undef want_tx
3621847bf383SLuigi Rizzo #undef want_rx
362268b8534bSLuigi Rizzo }
362368b8534bSLuigi Rizzo 
36244f80b14cSVincenzo Maffione int
36254f80b14cSVincenzo Maffione nma_intr_enable(struct netmap_adapter *na, int onoff)
36264f80b14cSVincenzo Maffione {
36274f80b14cSVincenzo Maffione 	bool changed = false;
36284f80b14cSVincenzo Maffione 	enum txrx t;
36294f80b14cSVincenzo Maffione 	int i;
36304f80b14cSVincenzo Maffione 
36314f80b14cSVincenzo Maffione 	for_rx_tx(t) {
36324f80b14cSVincenzo Maffione 		for (i = 0; i < nma_get_nrings(na, t); i++) {
36332ff91c17SVincenzo Maffione 			struct netmap_kring *kring = NMR(na, t)[i];
36344f80b14cSVincenzo Maffione 			int on = !(kring->nr_kflags & NKR_NOINTR);
36354f80b14cSVincenzo Maffione 
36364f80b14cSVincenzo Maffione 			if (!!onoff != !!on) {
36374f80b14cSVincenzo Maffione 				changed = true;
36384f80b14cSVincenzo Maffione 			}
36394f80b14cSVincenzo Maffione 			if (onoff) {
36404f80b14cSVincenzo Maffione 				kring->nr_kflags &= ~NKR_NOINTR;
36414f80b14cSVincenzo Maffione 			} else {
36424f80b14cSVincenzo Maffione 				kring->nr_kflags |= NKR_NOINTR;
36434f80b14cSVincenzo Maffione 			}
36444f80b14cSVincenzo Maffione 		}
36454f80b14cSVincenzo Maffione 	}
36464f80b14cSVincenzo Maffione 
36474f80b14cSVincenzo Maffione 	if (!changed) {
36484f80b14cSVincenzo Maffione 		return 0; /* nothing to do */
36494f80b14cSVincenzo Maffione 	}
36504f80b14cSVincenzo Maffione 
36514f80b14cSVincenzo Maffione 	if (!na->nm_intr) {
3652b6e66be2SVincenzo Maffione 		nm_prerr("Cannot %s interrupts for %s", onoff ? "enable" : "disable",
36534f80b14cSVincenzo Maffione 		  na->name);
36544f80b14cSVincenzo Maffione 		return -1;
36554f80b14cSVincenzo Maffione 	}
36564f80b14cSVincenzo Maffione 
36574f80b14cSVincenzo Maffione 	na->nm_intr(na, onoff);
36584f80b14cSVincenzo Maffione 
36594f80b14cSVincenzo Maffione 	return 0;
36604f80b14cSVincenzo Maffione }
36614f80b14cSVincenzo Maffione 
366217885a7bSLuigi Rizzo 
366317885a7bSLuigi Rizzo /*-------------------- driver support routines -------------------*/
366468b8534bSLuigi Rizzo 
366589cc2556SLuigi Rizzo /* default notify callback */
3666f9790aebSLuigi Rizzo static int
3667847bf383SLuigi Rizzo netmap_notify(struct netmap_kring *kring, int flags)
3668f9790aebSLuigi Rizzo {
36692ff91c17SVincenzo Maffione 	struct netmap_adapter *na = kring->notify_na;
3670847bf383SLuigi Rizzo 	enum txrx t = kring->tx;
3671f9790aebSLuigi Rizzo 
367237e3a6d3SLuigi Rizzo 	nm_os_selwakeup(&kring->si);
367389cc2556SLuigi Rizzo 	/* optimization: avoid a wake up on the global
367489cc2556SLuigi Rizzo 	 * queue if nobody has registered for more
367589cc2556SLuigi Rizzo 	 * than one ring
367689cc2556SLuigi Rizzo 	 */
3677847bf383SLuigi Rizzo 	if (na->si_users[t] > 0)
367837e3a6d3SLuigi Rizzo 		nm_os_selwakeup(&na->si[t]);
3679847bf383SLuigi Rizzo 
368037e3a6d3SLuigi Rizzo 	return NM_IRQ_COMPLETED;
3681f9790aebSLuigi Rizzo }
3682f9790aebSLuigi Rizzo 
368389cc2556SLuigi Rizzo /* called by all routines that create netmap_adapters.
368437e3a6d3SLuigi Rizzo  * provide some defaults and get a reference to the
368537e3a6d3SLuigi Rizzo  * memory allocator
368689cc2556SLuigi Rizzo  */
3687f9790aebSLuigi Rizzo int
3688f9790aebSLuigi Rizzo netmap_attach_common(struct netmap_adapter *na)
3689f9790aebSLuigi Rizzo {
36902ff91c17SVincenzo Maffione 	if (!na->rx_buf_maxsize) {
36912ff91c17SVincenzo Maffione 		/* Set a conservative default (larger is safer). */
36922ff91c17SVincenzo Maffione 		na->rx_buf_maxsize = PAGE_SIZE;
36932ff91c17SVincenzo Maffione 	}
36942ff91c17SVincenzo Maffione 
369517885a7bSLuigi Rizzo #ifdef __FreeBSD__
369637e3a6d3SLuigi Rizzo 	if (na->na_flags & NAF_HOST_RINGS && na->ifp) {
369737e3a6d3SLuigi Rizzo 		na->if_input = na->ifp->if_input; /* for netmap_send_up */
36984bf50f18SLuigi Rizzo 	}
36994f80b14cSVincenzo Maffione 	na->pdev = na; /* make sure netmap_mem_map() is called */
370037e3a6d3SLuigi Rizzo #endif /* __FreeBSD__ */
37012a7db7a6SVincenzo Maffione 	if (na->na_flags & NAF_HOST_RINGS) {
37022a7db7a6SVincenzo Maffione 		if (na->num_host_rx_rings == 0)
37032a7db7a6SVincenzo Maffione 			na->num_host_rx_rings = 1;
37042a7db7a6SVincenzo Maffione 		if (na->num_host_tx_rings == 0)
37052a7db7a6SVincenzo Maffione 			na->num_host_tx_rings = 1;
37062a7db7a6SVincenzo Maffione 	}
3707f9790aebSLuigi Rizzo 	if (na->nm_krings_create == NULL) {
370889cc2556SLuigi Rizzo 		/* we assume that we have been called by a driver,
370989cc2556SLuigi Rizzo 		 * since other port types all provide their own
371089cc2556SLuigi Rizzo 		 * nm_krings_create
371189cc2556SLuigi Rizzo 		 */
3712f9790aebSLuigi Rizzo 		na->nm_krings_create = netmap_hw_krings_create;
371317885a7bSLuigi Rizzo 		na->nm_krings_delete = netmap_hw_krings_delete;
3714f9790aebSLuigi Rizzo 	}
3715f9790aebSLuigi Rizzo 	if (na->nm_notify == NULL)
3716f9790aebSLuigi Rizzo 		na->nm_notify = netmap_notify;
3717f9790aebSLuigi Rizzo 	na->active_fds = 0;
3718f9790aebSLuigi Rizzo 
3719c3e9b4dbSLuiz Otavio O Souza 	if (na->nm_mem == NULL) {
37204bf50f18SLuigi Rizzo 		/* use the global allocator */
3721c3e9b4dbSLuiz Otavio O Souza 		na->nm_mem = netmap_mem_get(&nm_mem);
3722c3e9b4dbSLuiz Otavio O Souza 	}
3723847bf383SLuigi Rizzo #ifdef WITH_VALE
37244bf50f18SLuigi Rizzo 	if (na->nm_bdg_attach == NULL)
37254bf50f18SLuigi Rizzo 		/* no special nm_bdg_attach callback. On VALE
37264bf50f18SLuigi Rizzo 		 * attach, we need to interpose a bwrap
37274bf50f18SLuigi Rizzo 		 */
37282a7db7a6SVincenzo Maffione 		na->nm_bdg_attach = netmap_default_bdg_attach;
3729847bf383SLuigi Rizzo #endif
373037e3a6d3SLuigi Rizzo 
3731f9790aebSLuigi Rizzo 	return 0;
3732f9790aebSLuigi Rizzo }
3733f9790aebSLuigi Rizzo 
373437e3a6d3SLuigi Rizzo /* Wrapper for the register callback provided netmap-enabled
373537e3a6d3SLuigi Rizzo  * hardware drivers.
373637e3a6d3SLuigi Rizzo  * nm_iszombie(na) means that the driver module has been
37374bf50f18SLuigi Rizzo  * unloaded, so we cannot call into it.
373837e3a6d3SLuigi Rizzo  * nm_os_ifnet_lock() must guarantee mutual exclusion with
373937e3a6d3SLuigi Rizzo  * module unloading.
37404bf50f18SLuigi Rizzo  */
37414bf50f18SLuigi Rizzo static int
374237e3a6d3SLuigi Rizzo netmap_hw_reg(struct netmap_adapter *na, int onoff)
37434bf50f18SLuigi Rizzo {
37444bf50f18SLuigi Rizzo 	struct netmap_hw_adapter *hwna =
37454bf50f18SLuigi Rizzo 		(struct netmap_hw_adapter*)na;
374637e3a6d3SLuigi Rizzo 	int error = 0;
37474bf50f18SLuigi Rizzo 
374837e3a6d3SLuigi Rizzo 	nm_os_ifnet_lock();
37494bf50f18SLuigi Rizzo 
375037e3a6d3SLuigi Rizzo 	if (nm_iszombie(na)) {
375137e3a6d3SLuigi Rizzo 		if (onoff) {
375237e3a6d3SLuigi Rizzo 			error = ENXIO;
375337e3a6d3SLuigi Rizzo 		} else if (na != NULL) {
375437e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_NETMAP_ON;
375537e3a6d3SLuigi Rizzo 		}
375637e3a6d3SLuigi Rizzo 		goto out;
375737e3a6d3SLuigi Rizzo 	}
375837e3a6d3SLuigi Rizzo 
375937e3a6d3SLuigi Rizzo 	error = hwna->nm_hw_register(na, onoff);
376037e3a6d3SLuigi Rizzo 
376137e3a6d3SLuigi Rizzo out:
376237e3a6d3SLuigi Rizzo 	nm_os_ifnet_unlock();
376337e3a6d3SLuigi Rizzo 
376437e3a6d3SLuigi Rizzo 	return error;
376537e3a6d3SLuigi Rizzo }
376637e3a6d3SLuigi Rizzo 
376737e3a6d3SLuigi Rizzo static void
376837e3a6d3SLuigi Rizzo netmap_hw_dtor(struct netmap_adapter *na)
376937e3a6d3SLuigi Rizzo {
37702a7db7a6SVincenzo Maffione 	if (na->ifp == NULL)
377137e3a6d3SLuigi Rizzo 		return;
377237e3a6d3SLuigi Rizzo 
37732a7db7a6SVincenzo Maffione 	NM_DETACH_NA(na->ifp);
37744bf50f18SLuigi Rizzo }
37754bf50f18SLuigi Rizzo 
3776f18be576SLuigi Rizzo 
377768b8534bSLuigi Rizzo /*
3778c3e9b4dbSLuiz Otavio O Souza  * Allocate a netmap_adapter object, and initialize it from the
377937e3a6d3SLuigi Rizzo  * 'arg' passed by the driver on attach.
3780c3e9b4dbSLuiz Otavio O Souza  * We allocate a block of memory of 'size' bytes, which has room
3781c3e9b4dbSLuiz Otavio O Souza  * for struct netmap_adapter plus additional room private to
3782c3e9b4dbSLuiz Otavio O Souza  * the caller.
378368b8534bSLuigi Rizzo  * Return 0 on success, ENOMEM otherwise.
378468b8534bSLuigi Rizzo  */
3785c3e9b4dbSLuiz Otavio O Souza int
37864f80b14cSVincenzo Maffione netmap_attach_ext(struct netmap_adapter *arg, size_t size, int override_reg)
378768b8534bSLuigi Rizzo {
3788f9790aebSLuigi Rizzo 	struct netmap_hw_adapter *hwna = NULL;
378937e3a6d3SLuigi Rizzo 	struct ifnet *ifp = NULL;
379068b8534bSLuigi Rizzo 
3791c3e9b4dbSLuiz Otavio O Souza 	if (size < sizeof(struct netmap_hw_adapter)) {
3792b6e66be2SVincenzo Maffione 		if (netmap_debug & NM_DEBUG_ON)
3793b6e66be2SVincenzo Maffione 			nm_prerr("Invalid netmap adapter size %d", (int)size);
3794c3e9b4dbSLuiz Otavio O Souza 		return EINVAL;
3795c3e9b4dbSLuiz Otavio O Souza 	}
3796c3e9b4dbSLuiz Otavio O Souza 
3797b6e66be2SVincenzo Maffione 	if (arg == NULL || arg->ifp == NULL) {
3798b6e66be2SVincenzo Maffione 		if (netmap_debug & NM_DEBUG_ON)
3799b6e66be2SVincenzo Maffione 			nm_prerr("either arg or arg->ifp is NULL");
38002a7db7a6SVincenzo Maffione 		return EINVAL;
3801b6e66be2SVincenzo Maffione 	}
3802b6e66be2SVincenzo Maffione 
3803b6e66be2SVincenzo Maffione 	if (arg->num_tx_rings == 0 || arg->num_rx_rings == 0) {
3804b6e66be2SVincenzo Maffione 		if (netmap_debug & NM_DEBUG_ON)
3805b6e66be2SVincenzo Maffione 			nm_prerr("%s: invalid rings tx %d rx %d",
3806b6e66be2SVincenzo Maffione 				arg->name, arg->num_tx_rings, arg->num_rx_rings);
3807b6e66be2SVincenzo Maffione 		return EINVAL;
3808b6e66be2SVincenzo Maffione 	}
38094f80b14cSVincenzo Maffione 
381037e3a6d3SLuigi Rizzo 	ifp = arg->ifp;
38112a7db7a6SVincenzo Maffione 	if (NM_NA_CLASH(ifp)) {
38124f80b14cSVincenzo Maffione 		/* If NA(ifp) is not null but there is no valid netmap
38134f80b14cSVincenzo Maffione 		 * adapter it means that someone else is using the same
38144f80b14cSVincenzo Maffione 		 * pointer (e.g. ax25_ptr on linux). This happens for
38154f80b14cSVincenzo Maffione 		 * instance when also PF_RING is in use. */
3816b6e66be2SVincenzo Maffione 		nm_prerr("Error: netmap adapter hook is busy");
38174f80b14cSVincenzo Maffione 		return EBUSY;
38184f80b14cSVincenzo Maffione 	}
38194f80b14cSVincenzo Maffione 
3820c3e9b4dbSLuiz Otavio O Souza 	hwna = nm_os_malloc(size);
3821f9790aebSLuigi Rizzo 	if (hwna == NULL)
3822ae10d1afSLuigi Rizzo 		goto fail;
3823f9790aebSLuigi Rizzo 	hwna->up = *arg;
3824847bf383SLuigi Rizzo 	hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
3825b6e66be2SVincenzo Maffione 	strlcpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
38264f80b14cSVincenzo Maffione 	if (override_reg) {
38274bf50f18SLuigi Rizzo 		hwna->nm_hw_register = hwna->up.nm_register;
382837e3a6d3SLuigi Rizzo 		hwna->up.nm_register = netmap_hw_reg;
38294f80b14cSVincenzo Maffione 	}
3830f9790aebSLuigi Rizzo 	if (netmap_attach_common(&hwna->up)) {
3831c3e9b4dbSLuiz Otavio O Souza 		nm_os_free(hwna);
3832f9790aebSLuigi Rizzo 		goto fail;
3833f9790aebSLuigi Rizzo 	}
3834f9790aebSLuigi Rizzo 	netmap_adapter_get(&hwna->up);
3835f9790aebSLuigi Rizzo 
383637e3a6d3SLuigi Rizzo 	NM_ATTACH_NA(ifp, &hwna->up);
383737e3a6d3SLuigi Rizzo 
38382a7db7a6SVincenzo Maffione 	nm_os_onattach(ifp);
38392a7db7a6SVincenzo Maffione 
384037e3a6d3SLuigi Rizzo 	if (arg->nm_dtor == NULL) {
384137e3a6d3SLuigi Rizzo 		hwna->up.nm_dtor = netmap_hw_dtor;
384237e3a6d3SLuigi Rizzo 	}
3843f9790aebSLuigi Rizzo 
38441ef2a881SVincenzo Maffione 	if_printf(ifp, "netmap queues/slots: TX %d/%d, RX %d/%d\n",
3845d82f9014SRui Paulo 	    hwna->up.num_tx_rings, hwna->up.num_tx_desc,
3846d82f9014SRui Paulo 	    hwna->up.num_rx_rings, hwna->up.num_rx_desc);
3847ae10d1afSLuigi Rizzo 	return 0;
384868b8534bSLuigi Rizzo 
3849ae10d1afSLuigi Rizzo fail:
3850b6e66be2SVincenzo Maffione 	nm_prerr("fail, arg %p ifp %p na %p", arg, ifp, hwna);
3851f9790aebSLuigi Rizzo 	return (hwna ? EINVAL : ENOMEM);
385268b8534bSLuigi Rizzo }
385368b8534bSLuigi Rizzo 
385468b8534bSLuigi Rizzo 
385537e3a6d3SLuigi Rizzo int
385637e3a6d3SLuigi Rizzo netmap_attach(struct netmap_adapter *arg)
385737e3a6d3SLuigi Rizzo {
38584f80b14cSVincenzo Maffione 	return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter),
38594f80b14cSVincenzo Maffione 			1 /* override nm_reg */);
386037e3a6d3SLuigi Rizzo }
386137e3a6d3SLuigi Rizzo 
386237e3a6d3SLuigi Rizzo 
3863f9790aebSLuigi Rizzo void
3864f9790aebSLuigi Rizzo NM_DBG(netmap_adapter_get)(struct netmap_adapter *na)
3865f9790aebSLuigi Rizzo {
3866f9790aebSLuigi Rizzo 	if (!na) {
3867f9790aebSLuigi Rizzo 		return;
3868f9790aebSLuigi Rizzo 	}
3869f9790aebSLuigi Rizzo 
3870f9790aebSLuigi Rizzo 	refcount_acquire(&na->na_refcount);
3871f9790aebSLuigi Rizzo }
3872f9790aebSLuigi Rizzo 
3873f9790aebSLuigi Rizzo 
3874f9790aebSLuigi Rizzo /* returns 1 iff the netmap_adapter is destroyed */
3875f9790aebSLuigi Rizzo int
3876f9790aebSLuigi Rizzo NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
3877f9790aebSLuigi Rizzo {
3878f9790aebSLuigi Rizzo 	if (!na)
3879f9790aebSLuigi Rizzo 		return 1;
3880f9790aebSLuigi Rizzo 
3881f9790aebSLuigi Rizzo 	if (!refcount_release(&na->na_refcount))
3882f9790aebSLuigi Rizzo 		return 0;
3883f9790aebSLuigi Rizzo 
3884f9790aebSLuigi Rizzo 	if (na->nm_dtor)
3885f9790aebSLuigi Rizzo 		na->nm_dtor(na);
3886f9790aebSLuigi Rizzo 
38874f80b14cSVincenzo Maffione 	if (na->tx_rings) { /* XXX should not happen */
3888b6e66be2SVincenzo Maffione 		if (netmap_debug & NM_DEBUG_ON)
3889b6e66be2SVincenzo Maffione 			nm_prerr("freeing leftover tx_rings");
38904f80b14cSVincenzo Maffione 		na->nm_krings_delete(na);
38914f80b14cSVincenzo Maffione 	}
38924f80b14cSVincenzo Maffione 	netmap_pipe_dealloc(na);
38934f80b14cSVincenzo Maffione 	if (na->nm_mem)
38944f80b14cSVincenzo Maffione 		netmap_mem_put(na->nm_mem);
38954f80b14cSVincenzo Maffione 	bzero(na, sizeof(*na));
38964f80b14cSVincenzo Maffione 	nm_os_free(na);
3897f9790aebSLuigi Rizzo 
3898f9790aebSLuigi Rizzo 	return 1;
3899f9790aebSLuigi Rizzo }
3900f9790aebSLuigi Rizzo 
390189cc2556SLuigi Rizzo /* nm_krings_create callback for all hardware native adapters */
3902f9790aebSLuigi Rizzo int
3903f9790aebSLuigi Rizzo netmap_hw_krings_create(struct netmap_adapter *na)
3904f9790aebSLuigi Rizzo {
3905f0ea3689SLuigi Rizzo 	int ret = netmap_krings_create(na, 0);
390617885a7bSLuigi Rizzo 	if (ret == 0) {
390717885a7bSLuigi Rizzo 		/* initialize the mbq for the sw rx ring */
39082a7db7a6SVincenzo Maffione 		u_int lim = netmap_real_rings(na, NR_RX), i;
39092a7db7a6SVincenzo Maffione 		for (i = na->num_rx_rings; i < lim; i++) {
39102a7db7a6SVincenzo Maffione 			mbq_safe_init(&NMR(na, NR_RX)[i]->rx_queue);
39112a7db7a6SVincenzo Maffione 		}
391275f4f3edSVincenzo Maffione 		nm_prdis("initialized sw rx queue %d", na->num_rx_rings);
391317885a7bSLuigi Rizzo 	}
391417885a7bSLuigi Rizzo 	return ret;
3915f9790aebSLuigi Rizzo }
3916f9790aebSLuigi Rizzo 
3917f9790aebSLuigi Rizzo 
3918f9790aebSLuigi Rizzo 
391968b8534bSLuigi Rizzo /*
392089cc2556SLuigi Rizzo  * Called on module unload by the netmap-enabled drivers
392168b8534bSLuigi Rizzo  */
392268b8534bSLuigi Rizzo void
392368b8534bSLuigi Rizzo netmap_detach(struct ifnet *ifp)
392468b8534bSLuigi Rizzo {
392568b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
392668b8534bSLuigi Rizzo 
392768b8534bSLuigi Rizzo 	if (!na)
392868b8534bSLuigi Rizzo 		return;
392968b8534bSLuigi Rizzo 
3930f9790aebSLuigi Rizzo 	NMG_LOCK();
393137e3a6d3SLuigi Rizzo 	netmap_set_all_rings(na, NM_KR_LOCKED);
3932847bf383SLuigi Rizzo 	/*
3933847bf383SLuigi Rizzo 	 * if the netmap adapter is not native, somebody
3934847bf383SLuigi Rizzo 	 * changed it, so we can not release it here.
393537e3a6d3SLuigi Rizzo 	 * The NAF_ZOMBIE flag will notify the new owner that
3936847bf383SLuigi Rizzo 	 * the driver is gone.
3937847bf383SLuigi Rizzo 	 */
39384f80b14cSVincenzo Maffione 	if (!(na->na_flags & NAF_NATIVE) || !netmap_adapter_put(na)) {
39394f80b14cSVincenzo Maffione 		na->na_flags |= NAF_ZOMBIE;
3940847bf383SLuigi Rizzo 	}
394137e3a6d3SLuigi Rizzo 	/* give active users a chance to notice that NAF_ZOMBIE has been
394237e3a6d3SLuigi Rizzo 	 * turned on, so that they can stop and return an error to userspace.
394337e3a6d3SLuigi Rizzo 	 * Note that this becomes a NOP if there are no active users and,
394437e3a6d3SLuigi Rizzo 	 * therefore, the put() above has deleted the na, since now NA(ifp) is
394537e3a6d3SLuigi Rizzo 	 * NULL.
394637e3a6d3SLuigi Rizzo 	 */
3947f9790aebSLuigi Rizzo 	netmap_enable_all_rings(ifp);
3948f9790aebSLuigi Rizzo 	NMG_UNLOCK();
3949ae10d1afSLuigi Rizzo }
3950f18be576SLuigi Rizzo 
3951f18be576SLuigi Rizzo 
395268b8534bSLuigi Rizzo /*
395302ad4083SLuigi Rizzo  * Intercept packets from the network stack and pass them
395402ad4083SLuigi Rizzo  * to netmap as incoming packets on the 'software' ring.
395517885a7bSLuigi Rizzo  *
395617885a7bSLuigi Rizzo  * We only store packets in a bounded mbq and then copy them
395717885a7bSLuigi Rizzo  * in the relevant rxsync routine.
395817885a7bSLuigi Rizzo  *
3959ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that the ifp and na do not go
3960ce3ee1e7SLuigi Rizzo  * away (typically the caller checks for IFF_DRV_RUNNING or the like).
3961ce3ee1e7SLuigi Rizzo  * In nm_register() or whenever there is a reinitialization,
3962f9790aebSLuigi Rizzo  * we make sure to make the mode change visible here.
396368b8534bSLuigi Rizzo  */
396468b8534bSLuigi Rizzo int
3965ce3ee1e7SLuigi Rizzo netmap_transmit(struct ifnet *ifp, struct mbuf *m)
396668b8534bSLuigi Rizzo {
396768b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
396837e3a6d3SLuigi Rizzo 	struct netmap_kring *kring, *tx_kring;
396917885a7bSLuigi Rizzo 	u_int len = MBUF_LEN(m);
397017885a7bSLuigi Rizzo 	u_int error = ENOBUFS;
397137e3a6d3SLuigi Rizzo 	unsigned int txr;
397217885a7bSLuigi Rizzo 	struct mbq *q;
3973c3e9b4dbSLuiz Otavio O Souza 	int busy;
39742a7db7a6SVincenzo Maffione 	u_int i;
397568b8534bSLuigi Rizzo 
39762a7db7a6SVincenzo Maffione 	i = MBUF_TXQ(m);
39772a7db7a6SVincenzo Maffione 	if (i >= na->num_host_rx_rings) {
39782a7db7a6SVincenzo Maffione 		i = i % na->num_host_rx_rings;
39792a7db7a6SVincenzo Maffione 	}
39802a7db7a6SVincenzo Maffione 	kring = NMR(na, NR_RX)[nma_get_nrings(na, NR_RX) + i];
39812a7db7a6SVincenzo Maffione 
3982ce3ee1e7SLuigi Rizzo 	// XXX [Linux] we do not need this lock
3983ce3ee1e7SLuigi Rizzo 	// if we follow the down/configure/up protocol -gl
3984ce3ee1e7SLuigi Rizzo 	// mtx_lock(&na->core_lock);
398517885a7bSLuigi Rizzo 
39864bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na)) {
3987b6e66be2SVincenzo Maffione 		nm_prerr("%s not in netmap mode anymore", na->name);
3988ce3ee1e7SLuigi Rizzo 		error = ENXIO;
3989ce3ee1e7SLuigi Rizzo 		goto done;
3990ce3ee1e7SLuigi Rizzo 	}
3991ce3ee1e7SLuigi Rizzo 
399237e3a6d3SLuigi Rizzo 	txr = MBUF_TXQ(m);
399337e3a6d3SLuigi Rizzo 	if (txr >= na->num_tx_rings) {
399437e3a6d3SLuigi Rizzo 		txr %= na->num_tx_rings;
399537e3a6d3SLuigi Rizzo 	}
39962ff91c17SVincenzo Maffione 	tx_kring = NMR(na, NR_TX)[txr];
399737e3a6d3SLuigi Rizzo 
399837e3a6d3SLuigi Rizzo 	if (tx_kring->nr_mode == NKR_NETMAP_OFF) {
399937e3a6d3SLuigi Rizzo 		return MBUF_TRANSMIT(na, ifp, m);
400037e3a6d3SLuigi Rizzo 	}
400137e3a6d3SLuigi Rizzo 
400217885a7bSLuigi Rizzo 	q = &kring->rx_queue;
400317885a7bSLuigi Rizzo 
4004ce3ee1e7SLuigi Rizzo 	// XXX reconsider long packets if we handle fragments
40054bf50f18SLuigi Rizzo 	if (len > NETMAP_BUF_SIZE(na)) { /* too long for us */
4006b6e66be2SVincenzo Maffione 		nm_prerr("%s from_host, drop packet size %d > %d", na->name,
40074bf50f18SLuigi Rizzo 			len, NETMAP_BUF_SIZE(na));
4008ce3ee1e7SLuigi Rizzo 		goto done;
4009849bec0eSLuigi Rizzo 	}
401017885a7bSLuigi Rizzo 
40112a7db7a6SVincenzo Maffione 	if (!netmap_generic_hwcsum) {
40122a7db7a6SVincenzo Maffione 		if (nm_os_mbuf_has_csum_offld(m)) {
401375f4f3edSVincenzo Maffione 			nm_prlim(1, "%s drop mbuf that needs checksum offload", na->name);
40142a7db7a6SVincenzo Maffione 			goto done;
40152a7db7a6SVincenzo Maffione 		}
40162a7db7a6SVincenzo Maffione 	}
40172a7db7a6SVincenzo Maffione 
40182a7db7a6SVincenzo Maffione 	if (nm_os_mbuf_has_seg_offld(m)) {
401975f4f3edSVincenzo Maffione 		nm_prlim(1, "%s drop mbuf that needs generic segmentation offload", na->name);
402037e3a6d3SLuigi Rizzo 		goto done;
402137e3a6d3SLuigi Rizzo 	}
402237e3a6d3SLuigi Rizzo 
402389a9a5b5SVincenzo Maffione #ifdef __FreeBSD__
402489a9a5b5SVincenzo Maffione 	ETHER_BPF_MTAP(ifp, m);
402589a9a5b5SVincenzo Maffione #endif /* __FreeBSD__ */
402689a9a5b5SVincenzo Maffione 
4027c3e9b4dbSLuiz Otavio O Souza 	/* protect against netmap_rxsync_from_host(), netmap_sw_to_nic()
402817885a7bSLuigi Rizzo 	 * and maybe other instances of netmap_transmit (the latter
402917885a7bSLuigi Rizzo 	 * not possible on Linux).
4030c3e9b4dbSLuiz Otavio O Souza 	 * We enqueue the mbuf only if we are sure there is going to be
4031c3e9b4dbSLuiz Otavio O Souza 	 * enough room in the host RX ring, otherwise we drop it.
4032ce3ee1e7SLuigi Rizzo 	 */
4033997b054cSLuigi Rizzo 	mbq_lock(q);
403417885a7bSLuigi Rizzo 
4035c3e9b4dbSLuiz Otavio O Souza 	busy = kring->nr_hwtail - kring->nr_hwcur;
4036c3e9b4dbSLuiz Otavio O Souza 	if (busy < 0)
4037c3e9b4dbSLuiz Otavio O Souza 		busy += kring->nkr_num_slots;
4038c3e9b4dbSLuiz Otavio O Souza 	if (busy + mbq_len(q) >= kring->nkr_num_slots - 1) {
403975f4f3edSVincenzo Maffione 		nm_prlim(2, "%s full hwcur %d hwtail %d qlen %d", na->name,
4040c3e9b4dbSLuiz Otavio O Souza 			kring->nr_hwcur, kring->nr_hwtail, mbq_len(q));
4041ce3ee1e7SLuigi Rizzo 	} else {
404217885a7bSLuigi Rizzo 		mbq_enqueue(q, m);
404375f4f3edSVincenzo Maffione 		nm_prdis(2, "%s %d bufs in queue", na->name, mbq_len(q));
404417885a7bSLuigi Rizzo 		/* notify outside the lock */
404517885a7bSLuigi Rizzo 		m = NULL;
404668b8534bSLuigi Rizzo 		error = 0;
4047ce3ee1e7SLuigi Rizzo 	}
4048997b054cSLuigi Rizzo 	mbq_unlock(q);
4049ce3ee1e7SLuigi Rizzo 
405068b8534bSLuigi Rizzo done:
405117885a7bSLuigi Rizzo 	if (m)
405268b8534bSLuigi Rizzo 		m_freem(m);
405317885a7bSLuigi Rizzo 	/* unconditionally wake up listeners */
4054847bf383SLuigi Rizzo 	kring->nm_notify(kring, 0);
405589cc2556SLuigi Rizzo 	/* this is normally netmap_notify(), but for nics
405689cc2556SLuigi Rizzo 	 * connected to a bridge it is netmap_bwrap_intr_notify(),
405789cc2556SLuigi Rizzo 	 * that possibly forwards the frames through the switch
405889cc2556SLuigi Rizzo 	 */
405968b8534bSLuigi Rizzo 
406068b8534bSLuigi Rizzo 	return (error);
406168b8534bSLuigi Rizzo }
406268b8534bSLuigi Rizzo 
406368b8534bSLuigi Rizzo 
406468b8534bSLuigi Rizzo /*
406568b8534bSLuigi Rizzo  * netmap_reset() is called by the driver routines when reinitializing
406668b8534bSLuigi Rizzo  * a ring. The driver is in charge of locking to protect the kring.
4067f9790aebSLuigi Rizzo  * If native netmap mode is not set just return NULL.
406837e3a6d3SLuigi Rizzo  * If native netmap mode is set, in particular, we have to set nr_mode to
406937e3a6d3SLuigi Rizzo  * NKR_NETMAP_ON.
407068b8534bSLuigi Rizzo  */
407168b8534bSLuigi Rizzo struct netmap_slot *
4072ce3ee1e7SLuigi Rizzo netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
407368b8534bSLuigi Rizzo 	u_int new_cur)
407468b8534bSLuigi Rizzo {
407568b8534bSLuigi Rizzo 	struct netmap_kring *kring;
4076506cc70cSLuigi Rizzo 	int new_hwofs, lim;
407768b8534bSLuigi Rizzo 
40784bf50f18SLuigi Rizzo 	if (!nm_native_on(na)) {
407975f4f3edSVincenzo Maffione 		nm_prdis("interface not in native netmap mode");
408068b8534bSLuigi Rizzo 		return NULL;	/* nothing to reinitialize */
4081ce3ee1e7SLuigi Rizzo 	}
408268b8534bSLuigi Rizzo 
4083ce3ee1e7SLuigi Rizzo 	/* XXX note- in the new scheme, we are not guaranteed to be
4084ce3ee1e7SLuigi Rizzo 	 * under lock (e.g. when called on a device reset).
4085ce3ee1e7SLuigi Rizzo 	 * In this case, we should set a flag and do not trust too
4086ce3ee1e7SLuigi Rizzo 	 * much the values. In practice: TODO
4087ce3ee1e7SLuigi Rizzo 	 * - set a RESET flag somewhere in the kring
4088ce3ee1e7SLuigi Rizzo 	 * - do the processing in a conservative way
4089ce3ee1e7SLuigi Rizzo 	 * - let the *sync() fixup at the end.
4090ce3ee1e7SLuigi Rizzo 	 */
409164ae02c3SLuigi Rizzo 	if (tx == NR_TX) {
40928241616dSLuigi Rizzo 		if (n >= na->num_tx_rings)
40938241616dSLuigi Rizzo 			return NULL;
409437e3a6d3SLuigi Rizzo 
40952ff91c17SVincenzo Maffione 		kring = na->tx_rings[n];
409637e3a6d3SLuigi Rizzo 
409737e3a6d3SLuigi Rizzo 		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
409837e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
409937e3a6d3SLuigi Rizzo 			return NULL;
410037e3a6d3SLuigi Rizzo 		}
410137e3a6d3SLuigi Rizzo 
410217885a7bSLuigi Rizzo 		// XXX check whether we should use hwcur or rcur
4103506cc70cSLuigi Rizzo 		new_hwofs = kring->nr_hwcur - new_cur;
410464ae02c3SLuigi Rizzo 	} else {
41058241616dSLuigi Rizzo 		if (n >= na->num_rx_rings)
41068241616dSLuigi Rizzo 			return NULL;
41072ff91c17SVincenzo Maffione 		kring = na->rx_rings[n];
410837e3a6d3SLuigi Rizzo 
410937e3a6d3SLuigi Rizzo 		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
411037e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
411137e3a6d3SLuigi Rizzo 			return NULL;
411237e3a6d3SLuigi Rizzo 		}
411337e3a6d3SLuigi Rizzo 
411417885a7bSLuigi Rizzo 		new_hwofs = kring->nr_hwtail - new_cur;
411564ae02c3SLuigi Rizzo 	}
411664ae02c3SLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
4117506cc70cSLuigi Rizzo 	if (new_hwofs > lim)
4118506cc70cSLuigi Rizzo 		new_hwofs -= lim + 1;
4119506cc70cSLuigi Rizzo 
4120ce3ee1e7SLuigi Rizzo 	/* Always set the new offset value and realign the ring. */
4121b6e66be2SVincenzo Maffione 	if (netmap_debug & NM_DEBUG_ON)
4122b6e66be2SVincenzo Maffione 	    nm_prinf("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
41234bf50f18SLuigi Rizzo 		na->name,
412417885a7bSLuigi Rizzo 		tx == NR_TX ? "TX" : "RX", n,
4125ce3ee1e7SLuigi Rizzo 		kring->nkr_hwofs, new_hwofs,
412617885a7bSLuigi Rizzo 		kring->nr_hwtail,
412717885a7bSLuigi Rizzo 		tx == NR_TX ? lim : kring->nr_hwtail);
4128506cc70cSLuigi Rizzo 	kring->nkr_hwofs = new_hwofs;
412917885a7bSLuigi Rizzo 	if (tx == NR_TX) {
413017885a7bSLuigi Rizzo 		kring->nr_hwtail = kring->nr_hwcur + lim;
413117885a7bSLuigi Rizzo 		if (kring->nr_hwtail > lim)
413217885a7bSLuigi Rizzo 			kring->nr_hwtail -= lim + 1;
413317885a7bSLuigi Rizzo 	}
4134506cc70cSLuigi Rizzo 
413568b8534bSLuigi Rizzo 	/*
4136ce3ee1e7SLuigi Rizzo 	 * Wakeup on the individual and global selwait
4137506cc70cSLuigi Rizzo 	 * We do the wakeup here, but the ring is not yet reconfigured.
4138506cc70cSLuigi Rizzo 	 * However, we are under lock so there are no races.
413968b8534bSLuigi Rizzo 	 */
414037e3a6d3SLuigi Rizzo 	kring->nr_mode = NKR_NETMAP_ON;
4141847bf383SLuigi Rizzo 	kring->nm_notify(kring, 0);
414268b8534bSLuigi Rizzo 	return kring->ring->slot;
414368b8534bSLuigi Rizzo }
414468b8534bSLuigi Rizzo 
414568b8534bSLuigi Rizzo 
4146ce3ee1e7SLuigi Rizzo /*
4147f9790aebSLuigi Rizzo  * Dispatch rx/tx interrupts to the netmap rings.
4148ce3ee1e7SLuigi Rizzo  *
4149ce3ee1e7SLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
4150ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that there is only one active
4151ce3ee1e7SLuigi Rizzo  * instance per queue, and that there is appropriate locking.
4152849bec0eSLuigi Rizzo  *
4153f9790aebSLuigi Rizzo  * The 'notify' routine depends on what the ring is attached to.
4154f9790aebSLuigi Rizzo  * - for a netmap file descriptor, do a selwakeup on the individual
4155f9790aebSLuigi Rizzo  *   waitqueue, plus one on the global one if needed
41564bf50f18SLuigi Rizzo  *   (see netmap_notify)
41574bf50f18SLuigi Rizzo  * - for a nic connected to a switch, call the proper forwarding routine
41584bf50f18SLuigi Rizzo  *   (see netmap_bwrap_intr_notify)
4159f9790aebSLuigi Rizzo  */
416037e3a6d3SLuigi Rizzo int
416137e3a6d3SLuigi Rizzo netmap_common_irq(struct netmap_adapter *na, u_int q, u_int *work_done)
4162f9790aebSLuigi Rizzo {
4163f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
4164847bf383SLuigi Rizzo 	enum txrx t = (work_done ? NR_RX : NR_TX);
4165f9790aebSLuigi Rizzo 
4166f9790aebSLuigi Rizzo 	q &= NETMAP_RING_MASK;
4167f9790aebSLuigi Rizzo 
4168b6e66be2SVincenzo Maffione 	if (netmap_debug & (NM_DEBUG_RXINTR|NM_DEBUG_TXINTR)) {
4169b6e66be2SVincenzo Maffione 	        nm_prlim(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
4170f9790aebSLuigi Rizzo 	}
4171f9790aebSLuigi Rizzo 
4172847bf383SLuigi Rizzo 	if (q >= nma_get_nrings(na, t))
417337e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS; // not a physical queue
4174847bf383SLuigi Rizzo 
41752ff91c17SVincenzo Maffione 	kring = NMR(na, t)[q];
4176847bf383SLuigi Rizzo 
417737e3a6d3SLuigi Rizzo 	if (kring->nr_mode == NKR_NETMAP_OFF) {
417837e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
417937e3a6d3SLuigi Rizzo 	}
418037e3a6d3SLuigi Rizzo 
4181847bf383SLuigi Rizzo 	if (t == NR_RX) {
4182f9790aebSLuigi Rizzo 		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
4183f9790aebSLuigi Rizzo 		*work_done = 1; /* do not fire napi again */
4184f9790aebSLuigi Rizzo 	}
418537e3a6d3SLuigi Rizzo 
418637e3a6d3SLuigi Rizzo 	return kring->nm_notify(kring, 0);
4187f9790aebSLuigi Rizzo }
4188f9790aebSLuigi Rizzo 
418917885a7bSLuigi Rizzo 
4190f9790aebSLuigi Rizzo /*
4191f9790aebSLuigi Rizzo  * Default functions to handle rx/tx interrupts from a physical device.
4192f9790aebSLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
4193f9790aebSLuigi Rizzo  *
419437e3a6d3SLuigi Rizzo  * If the card is not in netmap mode, simply return NM_IRQ_PASS,
4195ce3ee1e7SLuigi Rizzo  * so that the caller proceeds with regular processing.
419637e3a6d3SLuigi Rizzo  * Otherwise call netmap_common_irq().
4197ce3ee1e7SLuigi Rizzo  *
4198ce3ee1e7SLuigi Rizzo  * If the card is connected to a netmap file descriptor,
4199ce3ee1e7SLuigi Rizzo  * do a selwakeup on the individual queue, plus one on the global one
4200ce3ee1e7SLuigi Rizzo  * if needed (multiqueue card _and_ there are multiqueue listeners),
420137e3a6d3SLuigi Rizzo  * and return NR_IRQ_COMPLETED.
4202ce3ee1e7SLuigi Rizzo  *
4203ce3ee1e7SLuigi Rizzo  * Finally, if called on rx from an interface connected to a switch,
420437e3a6d3SLuigi Rizzo  * calls the proper forwarding routine.
42051a26580eSLuigi Rizzo  */
4206babc7c12SLuigi Rizzo int
4207ce3ee1e7SLuigi Rizzo netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
42081a26580eSLuigi Rizzo {
42094bf50f18SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
42104bf50f18SLuigi Rizzo 
42114bf50f18SLuigi Rizzo 	/*
42124bf50f18SLuigi Rizzo 	 * XXX emulated netmap mode sets NAF_SKIP_INTR so
42134bf50f18SLuigi Rizzo 	 * we still use the regular driver even though the previous
42144bf50f18SLuigi Rizzo 	 * check fails. It is unclear whether we should use
42154bf50f18SLuigi Rizzo 	 * nm_native_on() here.
42164bf50f18SLuigi Rizzo 	 */
42174bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
421837e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
4219849bec0eSLuigi Rizzo 
42204bf50f18SLuigi Rizzo 	if (na->na_flags & NAF_SKIP_INTR) {
422175f4f3edSVincenzo Maffione 		nm_prdis("use regular interrupt");
422237e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
42238241616dSLuigi Rizzo 	}
42248241616dSLuigi Rizzo 
422537e3a6d3SLuigi Rizzo 	return netmap_common_irq(na, q, work_done);
42261a26580eSLuigi Rizzo }
42271a26580eSLuigi Rizzo 
42282a7db7a6SVincenzo Maffione /* set/clear native flags and if_transmit/netdev_ops */
42292a7db7a6SVincenzo Maffione void
42302a7db7a6SVincenzo Maffione nm_set_native_flags(struct netmap_adapter *na)
42312a7db7a6SVincenzo Maffione {
42322a7db7a6SVincenzo Maffione 	struct ifnet *ifp = na->ifp;
42332a7db7a6SVincenzo Maffione 
42342a7db7a6SVincenzo Maffione 	/* We do the setup for intercepting packets only if we are the
42352a7db7a6SVincenzo Maffione 	 * first user of this adapapter. */
42362a7db7a6SVincenzo Maffione 	if (na->active_fds > 0) {
42372a7db7a6SVincenzo Maffione 		return;
42382a7db7a6SVincenzo Maffione 	}
42392a7db7a6SVincenzo Maffione 
42402a7db7a6SVincenzo Maffione 	na->na_flags |= NAF_NETMAP_ON;
42412a7db7a6SVincenzo Maffione 	nm_os_onenter(ifp);
42422a7db7a6SVincenzo Maffione 	nm_update_hostrings_mode(na);
42432a7db7a6SVincenzo Maffione }
42442a7db7a6SVincenzo Maffione 
42452a7db7a6SVincenzo Maffione void
42462a7db7a6SVincenzo Maffione nm_clear_native_flags(struct netmap_adapter *na)
42472a7db7a6SVincenzo Maffione {
42482a7db7a6SVincenzo Maffione 	struct ifnet *ifp = na->ifp;
42492a7db7a6SVincenzo Maffione 
42502a7db7a6SVincenzo Maffione 	/* We undo the setup for intercepting packets only if we are the
4251b6e66be2SVincenzo Maffione 	 * last user of this adapter. */
42522a7db7a6SVincenzo Maffione 	if (na->active_fds > 0) {
42532a7db7a6SVincenzo Maffione 		return;
42542a7db7a6SVincenzo Maffione 	}
42552a7db7a6SVincenzo Maffione 
42562a7db7a6SVincenzo Maffione 	nm_update_hostrings_mode(na);
42572a7db7a6SVincenzo Maffione 	nm_os_onexit(ifp);
42582a7db7a6SVincenzo Maffione 
42592a7db7a6SVincenzo Maffione 	na->na_flags &= ~NAF_NETMAP_ON;
42602a7db7a6SVincenzo Maffione }
42612a7db7a6SVincenzo Maffione 
426275f4f3edSVincenzo Maffione void
426375f4f3edSVincenzo Maffione netmap_krings_mode_commit(struct netmap_adapter *na, int onoff)
426475f4f3edSVincenzo Maffione {
426575f4f3edSVincenzo Maffione 	enum txrx t;
426675f4f3edSVincenzo Maffione 
426775f4f3edSVincenzo Maffione 	for_rx_tx(t) {
426875f4f3edSVincenzo Maffione 		int i;
426975f4f3edSVincenzo Maffione 
427075f4f3edSVincenzo Maffione 		for (i = 0; i < netmap_real_rings(na, t); i++) {
427175f4f3edSVincenzo Maffione 			struct netmap_kring *kring = NMR(na, t)[i];
427275f4f3edSVincenzo Maffione 
427375f4f3edSVincenzo Maffione 			if (onoff && nm_kring_pending_on(kring))
427475f4f3edSVincenzo Maffione 				kring->nr_mode = NKR_NETMAP_ON;
427575f4f3edSVincenzo Maffione 			else if (!onoff && nm_kring_pending_off(kring))
427675f4f3edSVincenzo Maffione 				kring->nr_mode = NKR_NETMAP_OFF;
427775f4f3edSVincenzo Maffione 		}
427875f4f3edSVincenzo Maffione 	}
427975f4f3edSVincenzo Maffione }
428075f4f3edSVincenzo Maffione 
428101c7d25fSLuigi Rizzo /*
4282f9790aebSLuigi Rizzo  * Module loader and unloader
4283f196ce38SLuigi Rizzo  *
4284f9790aebSLuigi Rizzo  * netmap_init() creates the /dev/netmap device and initializes
4285f9790aebSLuigi Rizzo  * all global variables. Returns 0 on success, errno on failure
4286f9790aebSLuigi Rizzo  * (but there is no chance)
4287f9790aebSLuigi Rizzo  *
4288f9790aebSLuigi Rizzo  * netmap_fini() destroys everything.
4289f196ce38SLuigi Rizzo  */
4290babc7c12SLuigi Rizzo 
4291babc7c12SLuigi Rizzo static struct cdev *netmap_dev; /* /dev/netmap character device. */
4292f9790aebSLuigi Rizzo extern struct cdevsw netmap_cdevsw;
4293babc7c12SLuigi Rizzo 
429417885a7bSLuigi Rizzo 
4295f9790aebSLuigi Rizzo void
429668b8534bSLuigi Rizzo netmap_fini(void)
429768b8534bSLuigi Rizzo {
4298f9790aebSLuigi Rizzo 	if (netmap_dev)
429968b8534bSLuigi Rizzo 		destroy_dev(netmap_dev);
430037e3a6d3SLuigi Rizzo 	/* we assume that there are no longer netmap users */
430137e3a6d3SLuigi Rizzo 	nm_os_ifnet_fini();
430237e3a6d3SLuigi Rizzo 	netmap_uninit_bridges();
4303ce3ee1e7SLuigi Rizzo 	netmap_mem_fini();
4304ce3ee1e7SLuigi Rizzo 	NMG_LOCK_DESTROY();
4305b6e66be2SVincenzo Maffione 	nm_prinf("netmap: unloaded module.");
430668b8534bSLuigi Rizzo }
430768b8534bSLuigi Rizzo 
430817885a7bSLuigi Rizzo 
4309f9790aebSLuigi Rizzo int
4310f9790aebSLuigi Rizzo netmap_init(void)
431168b8534bSLuigi Rizzo {
4312f9790aebSLuigi Rizzo 	int error;
431368b8534bSLuigi Rizzo 
4314f9790aebSLuigi Rizzo 	NMG_LOCK_INIT();
431568b8534bSLuigi Rizzo 
4316f9790aebSLuigi Rizzo 	error = netmap_mem_init();
4317f9790aebSLuigi Rizzo 	if (error != 0)
4318f9790aebSLuigi Rizzo 		goto fail;
4319c929ca72SLuigi Rizzo 	/*
4320c929ca72SLuigi Rizzo 	 * MAKEDEV_ETERNAL_KLD avoids an expensive check on syscalls
4321c929ca72SLuigi Rizzo 	 * when the module is compiled in.
4322c929ca72SLuigi Rizzo 	 * XXX could use make_dev_credv() to get error number
4323c929ca72SLuigi Rizzo 	 */
43240e73f29aSLuigi Rizzo 	netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD,
432511c0b69cSAdrian Chadd 		&netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600,
43260e73f29aSLuigi Rizzo 			      "netmap");
4327f9790aebSLuigi Rizzo 	if (!netmap_dev)
4328f9790aebSLuigi Rizzo 		goto fail;
4329f9790aebSLuigi Rizzo 
4330847bf383SLuigi Rizzo 	error = netmap_init_bridges();
4331847bf383SLuigi Rizzo 	if (error)
4332847bf383SLuigi Rizzo 		goto fail;
4333847bf383SLuigi Rizzo 
43344bf50f18SLuigi Rizzo #ifdef __FreeBSD__
433537e3a6d3SLuigi Rizzo 	nm_os_vi_init_index();
43364bf50f18SLuigi Rizzo #endif
4337847bf383SLuigi Rizzo 
433837e3a6d3SLuigi Rizzo 	error = nm_os_ifnet_init();
433937e3a6d3SLuigi Rizzo 	if (error)
434037e3a6d3SLuigi Rizzo 		goto fail;
434137e3a6d3SLuigi Rizzo 
4342b6e66be2SVincenzo Maffione 	nm_prinf("netmap: loaded module");
4343f9790aebSLuigi Rizzo 	return (0);
4344f9790aebSLuigi Rizzo fail:
434568b8534bSLuigi Rizzo 	netmap_fini();
4346f9790aebSLuigi Rizzo 	return (EINVAL); /* may be incorrect */
434768b8534bSLuigi Rizzo }
4348