xref: /freebsd-14.2/sys/dev/netmap/netmap.c (revision a2a74091)
168b8534bSLuigi Rizzo /*
237e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2014 Matteo Landi
337e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Luigi Rizzo
437e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Giuseppe Lettieri
537e3a6d3SLuigi Rizzo  * Copyright (C) 2011-2016 Vincenzo Maffione
637e3a6d3SLuigi Rizzo  * All rights reserved.
768b8534bSLuigi Rizzo  *
868b8534bSLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
968b8534bSLuigi Rizzo  * modification, are permitted provided that the following conditions
1068b8534bSLuigi Rizzo  * are met:
1168b8534bSLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
1268b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
1368b8534bSLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
1468b8534bSLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
1568b8534bSLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
1668b8534bSLuigi Rizzo  *
1768b8534bSLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1868b8534bSLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1968b8534bSLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2068b8534bSLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2168b8534bSLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2268b8534bSLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2368b8534bSLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2468b8534bSLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2568b8534bSLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2668b8534bSLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2768b8534bSLuigi Rizzo  * SUCH DAMAGE.
2868b8534bSLuigi Rizzo  */
2968b8534bSLuigi Rizzo 
30ce3ee1e7SLuigi Rizzo 
3168b8534bSLuigi Rizzo /*
32f9790aebSLuigi Rizzo  * $FreeBSD$
33f9790aebSLuigi Rizzo  *
3468b8534bSLuigi Rizzo  * This module supports memory mapped access to network devices,
3568b8534bSLuigi Rizzo  * see netmap(4).
3668b8534bSLuigi Rizzo  *
3768b8534bSLuigi Rizzo  * The module uses a large, memory pool allocated by the kernel
3868b8534bSLuigi Rizzo  * and accessible as mmapped memory by multiple userspace threads/processes.
3968b8534bSLuigi Rizzo  * The memory pool contains packet buffers and "netmap rings",
4068b8534bSLuigi Rizzo  * i.e. user-accessible copies of the interface's queues.
4168b8534bSLuigi Rizzo  *
4268b8534bSLuigi Rizzo  * Access to the network card works like this:
4368b8534bSLuigi Rizzo  * 1. a process/thread issues one or more open() on /dev/netmap, to create
4468b8534bSLuigi Rizzo  *    select()able file descriptor on which events are reported.
4568b8534bSLuigi Rizzo  * 2. on each descriptor, the process issues an ioctl() to identify
4668b8534bSLuigi Rizzo  *    the interface that should report events to the file descriptor.
4768b8534bSLuigi Rizzo  * 3. on each descriptor, the process issues an mmap() request to
4868b8534bSLuigi Rizzo  *    map the shared memory region within the process' address space.
4968b8534bSLuigi Rizzo  *    The list of interesting queues is indicated by a location in
5068b8534bSLuigi Rizzo  *    the shared memory region.
5168b8534bSLuigi Rizzo  * 4. using the functions in the netmap(4) userspace API, a process
5268b8534bSLuigi Rizzo  *    can look up the occupation state of a queue, access memory buffers,
5368b8534bSLuigi Rizzo  *    and retrieve received packets or enqueue packets to transmit.
5468b8534bSLuigi Rizzo  * 5. using some ioctl()s the process can synchronize the userspace view
5568b8534bSLuigi Rizzo  *    of the queue with the actual status in the kernel. This includes both
5668b8534bSLuigi Rizzo  *    receiving the notification of new packets, and transmitting new
5768b8534bSLuigi Rizzo  *    packets on the output interface.
5868b8534bSLuigi Rizzo  * 6. select() or poll() can be used to wait for events on individual
5968b8534bSLuigi Rizzo  *    transmit or receive queues (or all queues for a given interface).
60ce3ee1e7SLuigi Rizzo  *
61ce3ee1e7SLuigi Rizzo 
62ce3ee1e7SLuigi Rizzo 		SYNCHRONIZATION (USER)
63ce3ee1e7SLuigi Rizzo 
64ce3ee1e7SLuigi Rizzo The netmap rings and data structures may be shared among multiple
65ce3ee1e7SLuigi Rizzo user threads or even independent processes.
66ce3ee1e7SLuigi Rizzo Any synchronization among those threads/processes is delegated
67ce3ee1e7SLuigi Rizzo to the threads themselves. Only one thread at a time can be in
68ce3ee1e7SLuigi Rizzo a system call on the same netmap ring. The OS does not enforce
69ce3ee1e7SLuigi Rizzo this and only guarantees against system crashes in case of
70ce3ee1e7SLuigi Rizzo invalid usage.
71ce3ee1e7SLuigi Rizzo 
72ce3ee1e7SLuigi Rizzo 		LOCKING (INTERNAL)
73ce3ee1e7SLuigi Rizzo 
74ce3ee1e7SLuigi Rizzo Within the kernel, access to the netmap rings is protected as follows:
75ce3ee1e7SLuigi Rizzo 
76ce3ee1e7SLuigi Rizzo - a spinlock on each ring, to handle producer/consumer races on
77ce3ee1e7SLuigi Rizzo   RX rings attached to the host stack (against multiple host
78ce3ee1e7SLuigi Rizzo   threads writing from the host stack to the same ring),
79ce3ee1e7SLuigi Rizzo   and on 'destination' rings attached to a VALE switch
80ce3ee1e7SLuigi Rizzo   (i.e. RX rings in VALE ports, and TX rings in NIC/host ports)
81ce3ee1e7SLuigi Rizzo   protecting multiple active senders for the same destination)
82ce3ee1e7SLuigi Rizzo 
83ce3ee1e7SLuigi Rizzo - an atomic variable to guarantee that there is at most one
84ce3ee1e7SLuigi Rizzo   instance of *_*xsync() on the ring at any time.
85ce3ee1e7SLuigi Rizzo   For rings connected to user file
86ce3ee1e7SLuigi Rizzo   descriptors, an atomic_test_and_set() protects this, and the
87ce3ee1e7SLuigi Rizzo   lock on the ring is not actually used.
88ce3ee1e7SLuigi Rizzo   For NIC RX rings connected to a VALE switch, an atomic_test_and_set()
89ce3ee1e7SLuigi Rizzo   is also used to prevent multiple executions (the driver might indeed
90ce3ee1e7SLuigi Rizzo   already guarantee this).
91ce3ee1e7SLuigi Rizzo   For NIC TX rings connected to a VALE switch, the lock arbitrates
92ce3ee1e7SLuigi Rizzo   access to the queue (both when allocating buffers and when pushing
93ce3ee1e7SLuigi Rizzo   them out).
94ce3ee1e7SLuigi Rizzo 
95ce3ee1e7SLuigi Rizzo - *xsync() should be protected against initializations of the card.
96ce3ee1e7SLuigi Rizzo   On FreeBSD most devices have the reset routine protected by
97ce3ee1e7SLuigi Rizzo   a RING lock (ixgbe, igb, em) or core lock (re). lem is missing
98ce3ee1e7SLuigi Rizzo   the RING protection on rx_reset(), this should be added.
99ce3ee1e7SLuigi Rizzo 
100ce3ee1e7SLuigi Rizzo   On linux there is an external lock on the tx path, which probably
101ce3ee1e7SLuigi Rizzo   also arbitrates access to the reset routine. XXX to be revised
102ce3ee1e7SLuigi Rizzo 
103ce3ee1e7SLuigi Rizzo - a per-interface core_lock protecting access from the host stack
104ce3ee1e7SLuigi Rizzo   while interfaces may be detached from netmap mode.
105ce3ee1e7SLuigi Rizzo   XXX there should be no need for this lock if we detach the interfaces
106ce3ee1e7SLuigi Rizzo   only while they are down.
107ce3ee1e7SLuigi Rizzo 
108ce3ee1e7SLuigi Rizzo 
109ce3ee1e7SLuigi Rizzo --- VALE SWITCH ---
110ce3ee1e7SLuigi Rizzo 
111ce3ee1e7SLuigi Rizzo NMG_LOCK() serializes all modifications to switches and ports.
112ce3ee1e7SLuigi Rizzo A switch cannot be deleted until all ports are gone.
113ce3ee1e7SLuigi Rizzo 
114ce3ee1e7SLuigi Rizzo For each switch, an SX lock (RWlock on linux) protects
115ce3ee1e7SLuigi Rizzo deletion of ports. When configuring or deleting a new port, the
116ce3ee1e7SLuigi Rizzo lock is acquired in exclusive mode (after holding NMG_LOCK).
117ce3ee1e7SLuigi Rizzo When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
118ce3ee1e7SLuigi Rizzo The lock is held throughout the entire forwarding cycle,
119ce3ee1e7SLuigi Rizzo during which the thread may incur in a page fault.
120ce3ee1e7SLuigi Rizzo Hence it is important that sleepable shared locks are used.
121ce3ee1e7SLuigi Rizzo 
122ce3ee1e7SLuigi Rizzo On the rx ring, the per-port lock is grabbed initially to reserve
123ce3ee1e7SLuigi Rizzo a number of slot in the ring, then the lock is released,
124ce3ee1e7SLuigi Rizzo packets are copied from source to destination, and then
125ce3ee1e7SLuigi Rizzo the lock is acquired again and the receive ring is updated.
126ce3ee1e7SLuigi Rizzo (A similar thing is done on the tx ring for NIC and host stack
127ce3ee1e7SLuigi Rizzo ports attached to the switch)
128ce3ee1e7SLuigi Rizzo 
12968b8534bSLuigi Rizzo  */
13068b8534bSLuigi Rizzo 
1314bf50f18SLuigi Rizzo 
1324bf50f18SLuigi Rizzo /* --- internals ----
1334bf50f18SLuigi Rizzo  *
1344bf50f18SLuigi Rizzo  * Roadmap to the code that implements the above.
1354bf50f18SLuigi Rizzo  *
1364bf50f18SLuigi Rizzo  * > 1. a process/thread issues one or more open() on /dev/netmap, to create
1374bf50f18SLuigi Rizzo  * >    select()able file descriptor on which events are reported.
1384bf50f18SLuigi Rizzo  *
1394bf50f18SLuigi Rizzo  *  	Internally, we allocate a netmap_priv_d structure, that will be
14037e3a6d3SLuigi Rizzo  *  	initialized on ioctl(NIOCREGIF). There is one netmap_priv_d
14137e3a6d3SLuigi Rizzo  *  	structure for each open().
1424bf50f18SLuigi Rizzo  *
1434bf50f18SLuigi Rizzo  *      os-specific:
14437e3a6d3SLuigi Rizzo  *  	    FreeBSD: see netmap_open() (netmap_freebsd.c)
14537e3a6d3SLuigi Rizzo  *  	    linux:   see linux_netmap_open() (netmap_linux.c)
1464bf50f18SLuigi Rizzo  *
1474bf50f18SLuigi Rizzo  * > 2. on each descriptor, the process issues an ioctl() to identify
1484bf50f18SLuigi Rizzo  * >    the interface that should report events to the file descriptor.
1494bf50f18SLuigi Rizzo  *
1504bf50f18SLuigi Rizzo  * 	Implemented by netmap_ioctl(), NIOCREGIF case, with nmr->nr_cmd==0.
1514bf50f18SLuigi Rizzo  * 	Most important things happen in netmap_get_na() and
1524bf50f18SLuigi Rizzo  * 	netmap_do_regif(), called from there. Additional details can be
1534bf50f18SLuigi Rizzo  * 	found in the comments above those functions.
1544bf50f18SLuigi Rizzo  *
1554bf50f18SLuigi Rizzo  * 	In all cases, this action creates/takes-a-reference-to a
1564bf50f18SLuigi Rizzo  * 	netmap_*_adapter describing the port, and allocates a netmap_if
1574bf50f18SLuigi Rizzo  * 	and all necessary netmap rings, filling them with netmap buffers.
1584bf50f18SLuigi Rizzo  *
1594bf50f18SLuigi Rizzo  *      In this phase, the sync callbacks for each ring are set (these are used
1604bf50f18SLuigi Rizzo  *      in steps 5 and 6 below).  The callbacks depend on the type of adapter.
1614bf50f18SLuigi Rizzo  *      The adapter creation/initialization code puts them in the
1624bf50f18SLuigi Rizzo  * 	netmap_adapter (fields na->nm_txsync and na->nm_rxsync).  Then, they
1634bf50f18SLuigi Rizzo  * 	are copied from there to the netmap_kring's during netmap_do_regif(), by
1644bf50f18SLuigi Rizzo  * 	the nm_krings_create() callback.  All the nm_krings_create callbacks
1654bf50f18SLuigi Rizzo  * 	actually call netmap_krings_create() to perform this and the other
1664bf50f18SLuigi Rizzo  * 	common stuff. netmap_krings_create() also takes care of the host rings,
1674bf50f18SLuigi Rizzo  * 	if needed, by setting their sync callbacks appropriately.
1684bf50f18SLuigi Rizzo  *
1694bf50f18SLuigi Rizzo  * 	Additional actions depend on the kind of netmap_adapter that has been
1704bf50f18SLuigi Rizzo  * 	registered:
1714bf50f18SLuigi Rizzo  *
1724bf50f18SLuigi Rizzo  * 	- netmap_hw_adapter:  	     [netmap.c]
1734bf50f18SLuigi Rizzo  * 	     This is a system netdev/ifp with native netmap support.
1744bf50f18SLuigi Rizzo  * 	     The ifp is detached from the host stack by redirecting:
1754bf50f18SLuigi Rizzo  * 	       - transmissions (from the network stack) to netmap_transmit()
1764bf50f18SLuigi Rizzo  * 	       - receive notifications to the nm_notify() callback for
1774bf50f18SLuigi Rizzo  * 	         this adapter. The callback is normally netmap_notify(), unless
1784bf50f18SLuigi Rizzo  * 	         the ifp is attached to a bridge using bwrap, in which case it
1794bf50f18SLuigi Rizzo  * 	         is netmap_bwrap_intr_notify().
1804bf50f18SLuigi Rizzo  *
1814bf50f18SLuigi Rizzo  * 	- netmap_generic_adapter:      [netmap_generic.c]
1824bf50f18SLuigi Rizzo  * 	      A system netdev/ifp without native netmap support.
1834bf50f18SLuigi Rizzo  *
1844bf50f18SLuigi Rizzo  * 	(the decision about native/non native support is taken in
1854bf50f18SLuigi Rizzo  * 	 netmap_get_hw_na(), called by netmap_get_na())
1864bf50f18SLuigi Rizzo  *
1874bf50f18SLuigi Rizzo  * 	- netmap_vp_adapter 		[netmap_vale.c]
1884bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_bdg_na().
1894bf50f18SLuigi Rizzo  * 	      This is a persistent or ephemeral VALE port. Ephemeral ports
1904bf50f18SLuigi Rizzo  * 	      are created on the fly if they don't already exist, and are
1914bf50f18SLuigi Rizzo  * 	      always attached to a bridge.
192453130d9SPedro F. Giffuni  * 	      Persistent VALE ports must must be created separately, and i
1934bf50f18SLuigi Rizzo  * 	      then attached like normal NICs. The NIOCREGIF we are examining
1944bf50f18SLuigi Rizzo  * 	      will find them only if they had previosly been created and
1954bf50f18SLuigi Rizzo  * 	      attached (see VALE_CTL below).
1964bf50f18SLuigi Rizzo  *
1974bf50f18SLuigi Rizzo  * 	- netmap_pipe_adapter 	      [netmap_pipe.c]
1984bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_pipe_na().
1994bf50f18SLuigi Rizzo  * 	      Both pipe ends are created, if they didn't already exist.
2004bf50f18SLuigi Rizzo  *
2014bf50f18SLuigi Rizzo  * 	- netmap_monitor_adapter      [netmap_monitor.c]
2024bf50f18SLuigi Rizzo  * 	      Returned by netmap_get_monitor_na().
2034bf50f18SLuigi Rizzo  * 	      If successful, the nm_sync callbacks of the monitored adapter
2044bf50f18SLuigi Rizzo  * 	      will be intercepted by the returned monitor.
2054bf50f18SLuigi Rizzo  *
2064bf50f18SLuigi Rizzo  * 	- netmap_bwrap_adapter	      [netmap_vale.c]
2074bf50f18SLuigi Rizzo  * 	      Cannot be obtained in this way, see VALE_CTL below
2084bf50f18SLuigi Rizzo  *
2094bf50f18SLuigi Rizzo  *
2104bf50f18SLuigi Rizzo  * 	os-specific:
2114bf50f18SLuigi Rizzo  * 	    linux: we first go through linux_netmap_ioctl() to
2124bf50f18SLuigi Rizzo  * 	           adapt the FreeBSD interface to the linux one.
2134bf50f18SLuigi Rizzo  *
2144bf50f18SLuigi Rizzo  *
2154bf50f18SLuigi Rizzo  * > 3. on each descriptor, the process issues an mmap() request to
2164bf50f18SLuigi Rizzo  * >    map the shared memory region within the process' address space.
2174bf50f18SLuigi Rizzo  * >    The list of interesting queues is indicated by a location in
2184bf50f18SLuigi Rizzo  * >    the shared memory region.
2194bf50f18SLuigi Rizzo  *
2204bf50f18SLuigi Rizzo  *      os-specific:
2214bf50f18SLuigi Rizzo  *  	    FreeBSD: netmap_mmap_single (netmap_freebsd.c).
2224bf50f18SLuigi Rizzo  *  	    linux:   linux_netmap_mmap (netmap_linux.c).
2234bf50f18SLuigi Rizzo  *
2244bf50f18SLuigi Rizzo  * > 4. using the functions in the netmap(4) userspace API, a process
2254bf50f18SLuigi Rizzo  * >    can look up the occupation state of a queue, access memory buffers,
2264bf50f18SLuigi Rizzo  * >    and retrieve received packets or enqueue packets to transmit.
2274bf50f18SLuigi Rizzo  *
2284bf50f18SLuigi Rizzo  * 	these actions do not involve the kernel.
2294bf50f18SLuigi Rizzo  *
2304bf50f18SLuigi Rizzo  * > 5. using some ioctl()s the process can synchronize the userspace view
2314bf50f18SLuigi Rizzo  * >    of the queue with the actual status in the kernel. This includes both
2324bf50f18SLuigi Rizzo  * >    receiving the notification of new packets, and transmitting new
2334bf50f18SLuigi Rizzo  * >    packets on the output interface.
2344bf50f18SLuigi Rizzo  *
2354bf50f18SLuigi Rizzo  * 	These are implemented in netmap_ioctl(), NIOCTXSYNC and NIOCRXSYNC
2364bf50f18SLuigi Rizzo  * 	cases. They invoke the nm_sync callbacks on the netmap_kring
2374bf50f18SLuigi Rizzo  * 	structures, as initialized in step 2 and maybe later modified
2384bf50f18SLuigi Rizzo  * 	by a monitor. Monitors, however, will always call the original
2394bf50f18SLuigi Rizzo  * 	callback before doing anything else.
2404bf50f18SLuigi Rizzo  *
2414bf50f18SLuigi Rizzo  *
2424bf50f18SLuigi Rizzo  * > 6. select() or poll() can be used to wait for events on individual
2434bf50f18SLuigi Rizzo  * >    transmit or receive queues (or all queues for a given interface).
2444bf50f18SLuigi Rizzo  *
2454bf50f18SLuigi Rizzo  * 	Implemented in netmap_poll(). This will call the same nm_sync()
2464bf50f18SLuigi Rizzo  * 	callbacks as in step 5 above.
2474bf50f18SLuigi Rizzo  *
2484bf50f18SLuigi Rizzo  * 	os-specific:
2494bf50f18SLuigi Rizzo  * 		linux: we first go through linux_netmap_poll() to adapt
2504bf50f18SLuigi Rizzo  * 		       the FreeBSD interface to the linux one.
2514bf50f18SLuigi Rizzo  *
2524bf50f18SLuigi Rizzo  *
2534bf50f18SLuigi Rizzo  *  ----  VALE_CTL -----
2544bf50f18SLuigi Rizzo  *
2554bf50f18SLuigi Rizzo  *  VALE switches are controlled by issuing a NIOCREGIF with a non-null
2564bf50f18SLuigi Rizzo  *  nr_cmd in the nmreq structure. These subcommands are handled by
2574bf50f18SLuigi Rizzo  *  netmap_bdg_ctl() in netmap_vale.c. Persistent VALE ports are created
2584bf50f18SLuigi Rizzo  *  and destroyed by issuing the NETMAP_BDG_NEWIF and NETMAP_BDG_DELIF
2594bf50f18SLuigi Rizzo  *  subcommands, respectively.
2604bf50f18SLuigi Rizzo  *
2614bf50f18SLuigi Rizzo  *  Any network interface known to the system (including a persistent VALE
2624bf50f18SLuigi Rizzo  *  port) can be attached to a VALE switch by issuing the
2634bf50f18SLuigi Rizzo  *  NETMAP_BDG_ATTACH subcommand. After the attachment, persistent VALE ports
2644bf50f18SLuigi Rizzo  *  look exactly like ephemeral VALE ports (as created in step 2 above).  The
2654bf50f18SLuigi Rizzo  *  attachment of other interfaces, instead, requires the creation of a
2664bf50f18SLuigi Rizzo  *  netmap_bwrap_adapter.  Moreover, the attached interface must be put in
2674bf50f18SLuigi Rizzo  *  netmap mode. This may require the creation of a netmap_generic_adapter if
2684bf50f18SLuigi Rizzo  *  we have no native support for the interface, or if generic adapters have
2694bf50f18SLuigi Rizzo  *  been forced by sysctl.
2704bf50f18SLuigi Rizzo  *
2714bf50f18SLuigi Rizzo  *  Both persistent VALE ports and bwraps are handled by netmap_get_bdg_na(),
2724bf50f18SLuigi Rizzo  *  called by nm_bdg_ctl_attach(), and discriminated by the nm_bdg_attach()
2734bf50f18SLuigi Rizzo  *  callback.  In the case of the bwrap, the callback creates the
2744bf50f18SLuigi Rizzo  *  netmap_bwrap_adapter.  The initialization of the bwrap is then
2754bf50f18SLuigi Rizzo  *  completed by calling netmap_do_regif() on it, in the nm_bdg_ctl()
2764bf50f18SLuigi Rizzo  *  callback (netmap_bwrap_bdg_ctl in netmap_vale.c).
2774bf50f18SLuigi Rizzo  *  A generic adapter for the wrapped ifp will be created if needed, when
2784bf50f18SLuigi Rizzo  *  netmap_get_bdg_na() calls netmap_get_hw_na().
2794bf50f18SLuigi Rizzo  *
2804bf50f18SLuigi Rizzo  *
2814bf50f18SLuigi Rizzo  *  ---- DATAPATHS -----
2824bf50f18SLuigi Rizzo  *
2834bf50f18SLuigi Rizzo  *              -= SYSTEM DEVICE WITH NATIVE SUPPORT =-
2844bf50f18SLuigi Rizzo  *
2854bf50f18SLuigi Rizzo  *    na == NA(ifp) == netmap_hw_adapter created in DEVICE_netmap_attach()
2864bf50f18SLuigi Rizzo  *
2874bf50f18SLuigi Rizzo  *    - tx from netmap userspace:
2884bf50f18SLuigi Rizzo  *	 concurrently:
2894bf50f18SLuigi Rizzo  *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
2904bf50f18SLuigi Rizzo  *                kring->nm_sync() == DEVICE_netmap_txsync()
2914bf50f18SLuigi Rizzo  *           2) device interrupt handler
2924bf50f18SLuigi Rizzo  *                na->nm_notify()  == netmap_notify()
2934bf50f18SLuigi Rizzo  *    - rx from netmap userspace:
2944bf50f18SLuigi Rizzo  *       concurrently:
2954bf50f18SLuigi Rizzo  *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
2964bf50f18SLuigi Rizzo  *                kring->nm_sync() == DEVICE_netmap_rxsync()
2974bf50f18SLuigi Rizzo  *           2) device interrupt handler
2984bf50f18SLuigi Rizzo  *                na->nm_notify()  == netmap_notify()
299847bf383SLuigi Rizzo  *    - rx from host stack
3004bf50f18SLuigi Rizzo  *       concurrently:
3014bf50f18SLuigi Rizzo  *           1) host stack
3024bf50f18SLuigi Rizzo  *                netmap_transmit()
3034bf50f18SLuigi Rizzo  *                  na->nm_notify  == netmap_notify()
3044bf50f18SLuigi Rizzo  *           2) ioctl(NIOCRXSYNC)/netmap_poll() in process context
30537e3a6d3SLuigi Rizzo  *                kring->nm_sync() == netmap_rxsync_from_host
3064bf50f18SLuigi Rizzo  *                  netmap_rxsync_from_host(na, NULL, NULL)
3074bf50f18SLuigi Rizzo  *    - tx to host stack
3084bf50f18SLuigi Rizzo  *           ioctl(NIOCTXSYNC)/netmap_poll() in process context
30937e3a6d3SLuigi Rizzo  *             kring->nm_sync() == netmap_txsync_to_host
3104bf50f18SLuigi Rizzo  *               netmap_txsync_to_host(na)
31137e3a6d3SLuigi Rizzo  *                 nm_os_send_up()
31237e3a6d3SLuigi Rizzo  *                   FreeBSD: na->if_input() == ether_input()
3134bf50f18SLuigi Rizzo  *                   linux: netif_rx() with NM_MAGIC_PRIORITY_RX
3144bf50f18SLuigi Rizzo  *
3154bf50f18SLuigi Rizzo  *
3164bf50f18SLuigi Rizzo  *               -= SYSTEM DEVICE WITH GENERIC SUPPORT =-
3174bf50f18SLuigi Rizzo  *
318847bf383SLuigi Rizzo  *    na == NA(ifp) == generic_netmap_adapter created in generic_netmap_attach()
319847bf383SLuigi Rizzo  *
320847bf383SLuigi Rizzo  *    - tx from netmap userspace:
321847bf383SLuigi Rizzo  *       concurrently:
322847bf383SLuigi Rizzo  *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
323847bf383SLuigi Rizzo  *               kring->nm_sync() == generic_netmap_txsync()
32437e3a6d3SLuigi Rizzo  *                   nm_os_generic_xmit_frame()
325847bf383SLuigi Rizzo  *                       linux:   dev_queue_xmit() with NM_MAGIC_PRIORITY_TX
32637e3a6d3SLuigi Rizzo  *                           ifp->ndo_start_xmit == generic_ndo_start_xmit()
32737e3a6d3SLuigi Rizzo  *                               gna->save_start_xmit == orig. dev. start_xmit
328847bf383SLuigi Rizzo  *                       FreeBSD: na->if_transmit() == orig. dev if_transmit
329847bf383SLuigi Rizzo  *           2) generic_mbuf_destructor()
330847bf383SLuigi Rizzo  *                   na->nm_notify() == netmap_notify()
331847bf383SLuigi Rizzo  *    - rx from netmap userspace:
332847bf383SLuigi Rizzo  *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
333847bf383SLuigi Rizzo  *               kring->nm_sync() == generic_netmap_rxsync()
334847bf383SLuigi Rizzo  *                   mbq_safe_dequeue()
335847bf383SLuigi Rizzo  *           2) device driver
336847bf383SLuigi Rizzo  *               generic_rx_handler()
337847bf383SLuigi Rizzo  *                   mbq_safe_enqueue()
338847bf383SLuigi Rizzo  *                   na->nm_notify() == netmap_notify()
33937e3a6d3SLuigi Rizzo  *    - rx from host stack
34037e3a6d3SLuigi Rizzo  *        FreeBSD: same as native
34137e3a6d3SLuigi Rizzo  *        Linux: same as native except:
342847bf383SLuigi Rizzo  *           1) host stack
34337e3a6d3SLuigi Rizzo  *               dev_queue_xmit() without NM_MAGIC_PRIORITY_TX
34437e3a6d3SLuigi Rizzo  *                   ifp->ndo_start_xmit == generic_ndo_start_xmit()
345847bf383SLuigi Rizzo  *                       netmap_transmit()
346847bf383SLuigi Rizzo  *                           na->nm_notify() == netmap_notify()
34737e3a6d3SLuigi Rizzo  *    - tx to host stack (same as native):
3484bf50f18SLuigi Rizzo  *
3494bf50f18SLuigi Rizzo  *
350847bf383SLuigi Rizzo  *                           -= VALE =-
3514bf50f18SLuigi Rizzo  *
352847bf383SLuigi Rizzo  *   INCOMING:
3534bf50f18SLuigi Rizzo  *
354847bf383SLuigi Rizzo  *      - VALE ports:
355847bf383SLuigi Rizzo  *          ioctl(NIOCTXSYNC)/netmap_poll() in process context
356847bf383SLuigi Rizzo  *              kring->nm_sync() == netmap_vp_txsync()
3574bf50f18SLuigi Rizzo  *
358847bf383SLuigi Rizzo  *      - system device with native support:
359847bf383SLuigi Rizzo  *         from cable:
360847bf383SLuigi Rizzo  *             interrupt
361847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
362847bf383SLuigi Rizzo  *                     kring->nm_sync() == DEVICE_netmap_rxsync()
363847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
364847bf383SLuigi Rizzo  *                     kring->nm_sync() == DEVICE_netmap_rxsync()
365847bf383SLuigi Rizzo  *         from host stack:
366847bf383SLuigi Rizzo  *             netmap_transmit()
367847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
36837e3a6d3SLuigi Rizzo  *                     kring->nm_sync() == netmap_rxsync_from_host()
369847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
3704bf50f18SLuigi Rizzo  *
371847bf383SLuigi Rizzo  *      - system device with generic support:
372847bf383SLuigi Rizzo  *         from device driver:
373847bf383SLuigi Rizzo  *            generic_rx_handler()
374847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
375847bf383SLuigi Rizzo  *                     kring->nm_sync() == generic_netmap_rxsync()
376847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
377847bf383SLuigi Rizzo  *                     kring->nm_sync() == generic_netmap_rxsync()
378847bf383SLuigi Rizzo  *         from host stack:
379847bf383SLuigi Rizzo  *            netmap_transmit()
380847bf383SLuigi Rizzo  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
38137e3a6d3SLuigi Rizzo  *                     kring->nm_sync() == netmap_rxsync_from_host()
382847bf383SLuigi Rizzo  *                     netmap_vp_txsync()
3834bf50f18SLuigi Rizzo  *
384847bf383SLuigi Rizzo  *   (all cases) --> nm_bdg_flush()
385847bf383SLuigi Rizzo  *                      dest_na->nm_notify() == (see below)
3864bf50f18SLuigi Rizzo  *
387847bf383SLuigi Rizzo  *   OUTGOING:
3884bf50f18SLuigi Rizzo  *
389847bf383SLuigi Rizzo  *      - VALE ports:
390847bf383SLuigi Rizzo  *         concurrently:
391847bf383SLuigi Rizzo  *             1) ioctlNIOCRXSYNC)/netmap_poll() in process context
392847bf383SLuigi Rizzo  *                    kring->nm_sync() == netmap_vp_rxsync()
393847bf383SLuigi Rizzo  *             2) from nm_bdg_flush()
394847bf383SLuigi Rizzo  *                    na->nm_notify() == netmap_notify()
3954bf50f18SLuigi Rizzo  *
396847bf383SLuigi Rizzo  *      - system device with native support:
397847bf383SLuigi Rizzo  *          to cable:
398847bf383SLuigi Rizzo  *             na->nm_notify() == netmap_bwrap_notify()
399847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
400847bf383SLuigi Rizzo  *                 kring->nm_sync() == DEVICE_netmap_txsync()
401847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
402847bf383SLuigi Rizzo  *          to host stack:
403847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
40437e3a6d3SLuigi Rizzo  *                 kring->nm_sync() == netmap_txsync_to_host
405847bf383SLuigi Rizzo  *                 netmap_vp_rxsync_locked()
4064bf50f18SLuigi Rizzo  *
407847bf383SLuigi Rizzo  *      - system device with generic adapter:
408847bf383SLuigi Rizzo  *          to device driver:
409847bf383SLuigi Rizzo  *             na->nm_notify() == netmap_bwrap_notify()
410847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
411847bf383SLuigi Rizzo  *                 kring->nm_sync() == generic_netmap_txsync()
412847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
413847bf383SLuigi Rizzo  *          to host stack:
414847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
41537e3a6d3SLuigi Rizzo  *                 kring->nm_sync() == netmap_txsync_to_host
416847bf383SLuigi Rizzo  *                 netmap_vp_rxsync()
4174bf50f18SLuigi Rizzo  *
4184bf50f18SLuigi Rizzo  */
4194bf50f18SLuigi Rizzo 
420ce3ee1e7SLuigi Rizzo /*
421ce3ee1e7SLuigi Rizzo  * OS-specific code that is used only within this file.
422ce3ee1e7SLuigi Rizzo  * Other OS-specific code that must be accessed by drivers
423ce3ee1e7SLuigi Rizzo  * is present in netmap_kern.h
424ce3ee1e7SLuigi Rizzo  */
42501c7d25fSLuigi Rizzo 
426ce3ee1e7SLuigi Rizzo #if defined(__FreeBSD__)
42768b8534bSLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
42868b8534bSLuigi Rizzo #include <sys/types.h>
42968b8534bSLuigi Rizzo #include <sys/errno.h>
43068b8534bSLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
43168b8534bSLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
432f9790aebSLuigi Rizzo #include <sys/conf.h>	/* cdevsw struct, UID, GID */
43389e3fd52SLuigi Rizzo #include <sys/filio.h>	/* FIONBIO */
43468b8534bSLuigi Rizzo #include <sys/sockio.h>
43568b8534bSLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
43668b8534bSLuigi Rizzo #include <sys/malloc.h>
43768b8534bSLuigi Rizzo #include <sys/poll.h>
43889f6b863SAttilio Rao #include <sys/rwlock.h>
43968b8534bSLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
44068b8534bSLuigi Rizzo #include <sys/selinfo.h>
44168b8534bSLuigi Rizzo #include <sys/sysctl.h>
442339f59c0SGleb Smirnoff #include <sys/jail.h>
443339f59c0SGleb Smirnoff #include <net/vnet.h>
44468b8534bSLuigi Rizzo #include <net/if.h>
44576039bc8SGleb Smirnoff #include <net/if_var.h>
44668b8534bSLuigi Rizzo #include <net/bpf.h>		/* BIOCIMMEDIATE */
44768b8534bSLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
448ce3ee1e7SLuigi Rizzo #include <sys/endian.h>
449ce3ee1e7SLuigi Rizzo #include <sys/refcount.h>
45068b8534bSLuigi Rizzo 
45168b8534bSLuigi Rizzo 
452ce3ee1e7SLuigi Rizzo #elif defined(linux)
453ce3ee1e7SLuigi Rizzo 
454ce3ee1e7SLuigi Rizzo #include "bsd_glue.h"
455ce3ee1e7SLuigi Rizzo 
456ce3ee1e7SLuigi Rizzo #elif defined(__APPLE__)
457ce3ee1e7SLuigi Rizzo 
458ce3ee1e7SLuigi Rizzo #warning OSX support is only partial
459ce3ee1e7SLuigi Rizzo #include "osx_glue.h"
460ce3ee1e7SLuigi Rizzo 
46137e3a6d3SLuigi Rizzo #elif defined (_WIN32)
46237e3a6d3SLuigi Rizzo 
46337e3a6d3SLuigi Rizzo #include "win_glue.h"
46437e3a6d3SLuigi Rizzo 
465ce3ee1e7SLuigi Rizzo #else
466ce3ee1e7SLuigi Rizzo 
467ce3ee1e7SLuigi Rizzo #error	Unsupported platform
468ce3ee1e7SLuigi Rizzo 
469ce3ee1e7SLuigi Rizzo #endif /* unsupported */
470ce3ee1e7SLuigi Rizzo 
471ce3ee1e7SLuigi Rizzo /*
472ce3ee1e7SLuigi Rizzo  * common headers
473ce3ee1e7SLuigi Rizzo  */
4740b8ed8e0SLuigi Rizzo #include <net/netmap.h>
4750b8ed8e0SLuigi Rizzo #include <dev/netmap/netmap_kern.h>
476ce3ee1e7SLuigi Rizzo #include <dev/netmap/netmap_mem2.h>
4770b8ed8e0SLuigi Rizzo 
478ce3ee1e7SLuigi Rizzo 
4795819da83SLuigi Rizzo /* user-controlled variables */
4805819da83SLuigi Rizzo int netmap_verbose;
4815819da83SLuigi Rizzo 
4825819da83SLuigi Rizzo static int netmap_no_timestamp; /* don't timestamp on rxsync */
4835819da83SLuigi Rizzo int netmap_mitigate = 1;
484c85cb1a0SLuigi Rizzo int netmap_no_pendintr = 1;
485f18be576SLuigi Rizzo int netmap_txsync_retry = 2;
486f196ce38SLuigi Rizzo int netmap_flags = 0;	/* debug flags */
48737e3a6d3SLuigi Rizzo static int netmap_fwd = 0;	/* force transparent mode */
488f196ce38SLuigi Rizzo 
489f9790aebSLuigi Rizzo /*
490f9790aebSLuigi Rizzo  * netmap_admode selects the netmap mode to use.
491f9790aebSLuigi Rizzo  * Invalid values are reset to NETMAP_ADMODE_BEST
492f9790aebSLuigi Rizzo  */
493f9790aebSLuigi Rizzo enum {	NETMAP_ADMODE_BEST = 0,	/* use native, fallback to generic */
494f9790aebSLuigi Rizzo 	NETMAP_ADMODE_NATIVE,	/* either native or none */
495f9790aebSLuigi Rizzo 	NETMAP_ADMODE_GENERIC,	/* force generic */
496f9790aebSLuigi Rizzo 	NETMAP_ADMODE_LAST };
497f9790aebSLuigi Rizzo static int netmap_admode = NETMAP_ADMODE_BEST;
498f9790aebSLuigi Rizzo 
49937e3a6d3SLuigi Rizzo /* netmap_generic_mit controls mitigation of RX notifications for
50037e3a6d3SLuigi Rizzo  * the generic netmap adapter. The value is a time interval in
50137e3a6d3SLuigi Rizzo  * nanoseconds. */
50237e3a6d3SLuigi Rizzo int netmap_generic_mit = 100*1000;
50337e3a6d3SLuigi Rizzo 
50437e3a6d3SLuigi Rizzo /* We use by default netmap-aware qdiscs with generic netmap adapters,
50537e3a6d3SLuigi Rizzo  * even if there can be a little performance hit with hardware NICs.
50637e3a6d3SLuigi Rizzo  * However, using the qdisc is the safer approach, for two reasons:
50737e3a6d3SLuigi Rizzo  * 1) it prevents non-fifo qdiscs to break the TX notification
50837e3a6d3SLuigi Rizzo  *    scheme, which is based on mbuf destructors when txqdisc is
50937e3a6d3SLuigi Rizzo  *    not used.
51037e3a6d3SLuigi Rizzo  * 2) it makes it possible to transmit over software devices that
51137e3a6d3SLuigi Rizzo  *    change skb->dev, like bridge, veth, ...
51237e3a6d3SLuigi Rizzo  *
51337e3a6d3SLuigi Rizzo  * Anyway users looking for the best performance should
51437e3a6d3SLuigi Rizzo  * use native adapters.
51537e3a6d3SLuigi Rizzo  */
51637e3a6d3SLuigi Rizzo int netmap_generic_txqdisc = 1;
51737e3a6d3SLuigi Rizzo 
51837e3a6d3SLuigi Rizzo /* Default number of slots and queues for generic adapters. */
51937e3a6d3SLuigi Rizzo int netmap_generic_ringsize = 1024;
52037e3a6d3SLuigi Rizzo int netmap_generic_rings = 1;
52137e3a6d3SLuigi Rizzo 
52237e3a6d3SLuigi Rizzo /* Non-zero if ptnet devices are allowed to use virtio-net headers. */
52337e3a6d3SLuigi Rizzo int ptnet_vnet_hdr = 1;
52437e3a6d3SLuigi Rizzo 
52537e3a6d3SLuigi Rizzo /*
52637e3a6d3SLuigi Rizzo  * SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated
52737e3a6d3SLuigi Rizzo  * in some other operating systems
52837e3a6d3SLuigi Rizzo  */
52937e3a6d3SLuigi Rizzo SYSBEGIN(main_init);
53037e3a6d3SLuigi Rizzo 
53137e3a6d3SLuigi Rizzo SYSCTL_DECL(_dev_netmap);
53237e3a6d3SLuigi Rizzo SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
53337e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
53437e3a6d3SLuigi Rizzo     CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
53537e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
53637e3a6d3SLuigi Rizzo     CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
53737e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
53837e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
53937e3a6d3SLuigi Rizzo     CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
54037e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
54137e3a6d3SLuigi Rizzo     &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
542f9790aebSLuigi Rizzo 
543f196ce38SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
544091fd0abSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
545f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
546f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
547f9790aebSLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
548f0ea3689SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
54937e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW, &netmap_generic_txqdisc, 0 , "");
55037e3a6d3SLuigi Rizzo SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, 0 , "");
55137e3a6d3SLuigi Rizzo 
55237e3a6d3SLuigi Rizzo SYSEND;
553f196ce38SLuigi Rizzo 
554ce3ee1e7SLuigi Rizzo NMG_LOCK_T	netmap_global_lock;
555ce3ee1e7SLuigi Rizzo 
55617885a7bSLuigi Rizzo /*
55717885a7bSLuigi Rizzo  * mark the ring as stopped, and run through the locks
55817885a7bSLuigi Rizzo  * to make sure other users get to see it.
55937e3a6d3SLuigi Rizzo  * stopped must be either NR_KR_STOPPED (for unbounded stop)
56037e3a6d3SLuigi Rizzo  * of NR_KR_LOCKED (brief stop for mutual exclusion purposes)
56117885a7bSLuigi Rizzo  */
5624bf50f18SLuigi Rizzo static void
56337e3a6d3SLuigi Rizzo netmap_disable_ring(struct netmap_kring *kr, int stopped)
564ce3ee1e7SLuigi Rizzo {
56537e3a6d3SLuigi Rizzo 	nm_kr_stop(kr, stopped);
56637e3a6d3SLuigi Rizzo 	// XXX check if nm_kr_stop is sufficient
567ce3ee1e7SLuigi Rizzo 	mtx_lock(&kr->q_lock);
568ce3ee1e7SLuigi Rizzo 	mtx_unlock(&kr->q_lock);
569ce3ee1e7SLuigi Rizzo 	nm_kr_put(kr);
570ce3ee1e7SLuigi Rizzo }
571ce3ee1e7SLuigi Rizzo 
572847bf383SLuigi Rizzo /* stop or enable a single ring */
5734bf50f18SLuigi Rizzo void
574847bf383SLuigi Rizzo netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped)
5754bf50f18SLuigi Rizzo {
5764bf50f18SLuigi Rizzo 	if (stopped)
57737e3a6d3SLuigi Rizzo 		netmap_disable_ring(NMR(na, t) + ring_id, stopped);
5784bf50f18SLuigi Rizzo 	else
579847bf383SLuigi Rizzo 		NMR(na, t)[ring_id].nkr_stopped = 0;
5804bf50f18SLuigi Rizzo }
5814bf50f18SLuigi Rizzo 
582f9790aebSLuigi Rizzo 
58389cc2556SLuigi Rizzo /* stop or enable all the rings of na */
5844bf50f18SLuigi Rizzo void
5854bf50f18SLuigi Rizzo netmap_set_all_rings(struct netmap_adapter *na, int stopped)
586ce3ee1e7SLuigi Rizzo {
587ce3ee1e7SLuigi Rizzo 	int i;
588847bf383SLuigi Rizzo 	enum txrx t;
589ce3ee1e7SLuigi Rizzo 
5904bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
591ce3ee1e7SLuigi Rizzo 		return;
592ce3ee1e7SLuigi Rizzo 
593847bf383SLuigi Rizzo 	for_rx_tx(t) {
594847bf383SLuigi Rizzo 		for (i = 0; i < netmap_real_rings(na, t); i++) {
595847bf383SLuigi Rizzo 			netmap_set_ring(na, i, t, stopped);
596ce3ee1e7SLuigi Rizzo 		}
597ce3ee1e7SLuigi Rizzo 	}
598ce3ee1e7SLuigi Rizzo }
599ce3ee1e7SLuigi Rizzo 
60089cc2556SLuigi Rizzo /*
60189cc2556SLuigi Rizzo  * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
60289cc2556SLuigi Rizzo  * to finish and prevents any new one from starting.  Call this before turning
603ddb13598SKevin Lo  * netmap mode off, or before removing the hardware rings (e.g., on module
60437e3a6d3SLuigi Rizzo  * onload).
60589cc2556SLuigi Rizzo  */
606f9790aebSLuigi Rizzo void
607f9790aebSLuigi Rizzo netmap_disable_all_rings(struct ifnet *ifp)
608f9790aebSLuigi Rizzo {
60937e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
61037e3a6d3SLuigi Rizzo 		netmap_set_all_rings(NA(ifp), NM_KR_STOPPED);
61137e3a6d3SLuigi Rizzo 	}
612f9790aebSLuigi Rizzo }
613f9790aebSLuigi Rizzo 
61489cc2556SLuigi Rizzo /*
61589cc2556SLuigi Rizzo  * Convenience function used in drivers.  Re-enables rxsync and txsync on the
61689cc2556SLuigi Rizzo  * adapter's rings In linux drivers, this should be placed near each
61789cc2556SLuigi Rizzo  * napi_enable().
61889cc2556SLuigi Rizzo  */
619f9790aebSLuigi Rizzo void
620f9790aebSLuigi Rizzo netmap_enable_all_rings(struct ifnet *ifp)
621f9790aebSLuigi Rizzo {
62237e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
6234bf50f18SLuigi Rizzo 		netmap_set_all_rings(NA(ifp), 0 /* enabled */);
624f9790aebSLuigi Rizzo 	}
62537e3a6d3SLuigi Rizzo }
626f9790aebSLuigi Rizzo 
62737e3a6d3SLuigi Rizzo void
62837e3a6d3SLuigi Rizzo netmap_make_zombie(struct ifnet *ifp)
62937e3a6d3SLuigi Rizzo {
63037e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
63137e3a6d3SLuigi Rizzo 		struct netmap_adapter *na = NA(ifp);
63237e3a6d3SLuigi Rizzo 		netmap_set_all_rings(na, NM_KR_LOCKED);
63337e3a6d3SLuigi Rizzo 		na->na_flags |= NAF_ZOMBIE;
63437e3a6d3SLuigi Rizzo 		netmap_set_all_rings(na, 0);
63537e3a6d3SLuigi Rizzo 	}
63637e3a6d3SLuigi Rizzo }
63737e3a6d3SLuigi Rizzo 
63837e3a6d3SLuigi Rizzo void
63937e3a6d3SLuigi Rizzo netmap_undo_zombie(struct ifnet *ifp)
64037e3a6d3SLuigi Rizzo {
64137e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
64237e3a6d3SLuigi Rizzo 		struct netmap_adapter *na = NA(ifp);
64337e3a6d3SLuigi Rizzo 		if (na->na_flags & NAF_ZOMBIE) {
64437e3a6d3SLuigi Rizzo 			netmap_set_all_rings(na, NM_KR_LOCKED);
64537e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_ZOMBIE;
64637e3a6d3SLuigi Rizzo 			netmap_set_all_rings(na, 0);
64737e3a6d3SLuigi Rizzo 		}
64837e3a6d3SLuigi Rizzo 	}
64937e3a6d3SLuigi Rizzo }
650f9790aebSLuigi Rizzo 
651ce3ee1e7SLuigi Rizzo /*
652ce3ee1e7SLuigi Rizzo  * generic bound_checking function
653ce3ee1e7SLuigi Rizzo  */
654ce3ee1e7SLuigi Rizzo u_int
655ce3ee1e7SLuigi Rizzo nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
656ce3ee1e7SLuigi Rizzo {
657ce3ee1e7SLuigi Rizzo 	u_int oldv = *v;
658ce3ee1e7SLuigi Rizzo 	const char *op = NULL;
659ce3ee1e7SLuigi Rizzo 
660ce3ee1e7SLuigi Rizzo 	if (dflt < lo)
661ce3ee1e7SLuigi Rizzo 		dflt = lo;
662ce3ee1e7SLuigi Rizzo 	if (dflt > hi)
663ce3ee1e7SLuigi Rizzo 		dflt = hi;
664ce3ee1e7SLuigi Rizzo 	if (oldv < lo) {
665ce3ee1e7SLuigi Rizzo 		*v = dflt;
666ce3ee1e7SLuigi Rizzo 		op = "Bump";
667ce3ee1e7SLuigi Rizzo 	} else if (oldv > hi) {
668ce3ee1e7SLuigi Rizzo 		*v = hi;
669ce3ee1e7SLuigi Rizzo 		op = "Clamp";
670ce3ee1e7SLuigi Rizzo 	}
671ce3ee1e7SLuigi Rizzo 	if (op && msg)
672ce3ee1e7SLuigi Rizzo 		printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
673ce3ee1e7SLuigi Rizzo 	return *v;
674ce3ee1e7SLuigi Rizzo }
675ce3ee1e7SLuigi Rizzo 
676f9790aebSLuigi Rizzo 
677ce3ee1e7SLuigi Rizzo /*
678ce3ee1e7SLuigi Rizzo  * packet-dump function, user-supplied or static buffer.
679ce3ee1e7SLuigi Rizzo  * The destination buffer must be at least 30+4*len
680ce3ee1e7SLuigi Rizzo  */
681ce3ee1e7SLuigi Rizzo const char *
682ce3ee1e7SLuigi Rizzo nm_dump_buf(char *p, int len, int lim, char *dst)
683ce3ee1e7SLuigi Rizzo {
684ce3ee1e7SLuigi Rizzo 	static char _dst[8192];
685ce3ee1e7SLuigi Rizzo 	int i, j, i0;
686ce3ee1e7SLuigi Rizzo 	static char hex[] ="0123456789abcdef";
687ce3ee1e7SLuigi Rizzo 	char *o;	/* output position */
688ce3ee1e7SLuigi Rizzo 
689ce3ee1e7SLuigi Rizzo #define P_HI(x)	hex[((x) & 0xf0)>>4]
690ce3ee1e7SLuigi Rizzo #define P_LO(x)	hex[((x) & 0xf)]
691ce3ee1e7SLuigi Rizzo #define P_C(x)	((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
692ce3ee1e7SLuigi Rizzo 	if (!dst)
693ce3ee1e7SLuigi Rizzo 		dst = _dst;
694ce3ee1e7SLuigi Rizzo 	if (lim <= 0 || lim > len)
695ce3ee1e7SLuigi Rizzo 		lim = len;
696ce3ee1e7SLuigi Rizzo 	o = dst;
697ce3ee1e7SLuigi Rizzo 	sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
698ce3ee1e7SLuigi Rizzo 	o += strlen(o);
699ce3ee1e7SLuigi Rizzo 	/* hexdump routine */
700ce3ee1e7SLuigi Rizzo 	for (i = 0; i < lim; ) {
701ce3ee1e7SLuigi Rizzo 		sprintf(o, "%5d: ", i);
702ce3ee1e7SLuigi Rizzo 		o += strlen(o);
703ce3ee1e7SLuigi Rizzo 		memset(o, ' ', 48);
704ce3ee1e7SLuigi Rizzo 		i0 = i;
705ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++) {
706ce3ee1e7SLuigi Rizzo 			o[j*3] = P_HI(p[i]);
707ce3ee1e7SLuigi Rizzo 			o[j*3+1] = P_LO(p[i]);
708ce3ee1e7SLuigi Rizzo 		}
709ce3ee1e7SLuigi Rizzo 		i = i0;
710ce3ee1e7SLuigi Rizzo 		for (j=0; j < 16 && i < lim; i++, j++)
711ce3ee1e7SLuigi Rizzo 			o[j + 48] = P_C(p[i]);
712ce3ee1e7SLuigi Rizzo 		o[j+48] = '\n';
713ce3ee1e7SLuigi Rizzo 		o += j+49;
714ce3ee1e7SLuigi Rizzo 	}
715ce3ee1e7SLuigi Rizzo 	*o = '\0';
716ce3ee1e7SLuigi Rizzo #undef P_HI
717ce3ee1e7SLuigi Rizzo #undef P_LO
718ce3ee1e7SLuigi Rizzo #undef P_C
719ce3ee1e7SLuigi Rizzo 	return dst;
720ce3ee1e7SLuigi Rizzo }
721f196ce38SLuigi Rizzo 
722f18be576SLuigi Rizzo 
723ae10d1afSLuigi Rizzo /*
724ae10d1afSLuigi Rizzo  * Fetch configuration from the device, to cope with dynamic
725ae10d1afSLuigi Rizzo  * reconfigurations after loading the module.
726ae10d1afSLuigi Rizzo  */
72789cc2556SLuigi Rizzo /* call with NMG_LOCK held */
728f9790aebSLuigi Rizzo int
729ae10d1afSLuigi Rizzo netmap_update_config(struct netmap_adapter *na)
730ae10d1afSLuigi Rizzo {
731ae10d1afSLuigi Rizzo 	u_int txr, txd, rxr, rxd;
732ae10d1afSLuigi Rizzo 
733ae10d1afSLuigi Rizzo 	txr = txd = rxr = rxd = 0;
7346641c68bSLuigi Rizzo 	if (na->nm_config == NULL ||
735847bf383SLuigi Rizzo 	    na->nm_config(na, &txr, &txd, &rxr, &rxd))
736847bf383SLuigi Rizzo 	{
737ae10d1afSLuigi Rizzo 		/* take whatever we had at init time */
738ae10d1afSLuigi Rizzo 		txr = na->num_tx_rings;
739ae10d1afSLuigi Rizzo 		txd = na->num_tx_desc;
740ae10d1afSLuigi Rizzo 		rxr = na->num_rx_rings;
741ae10d1afSLuigi Rizzo 		rxd = na->num_rx_desc;
742ae10d1afSLuigi Rizzo 	}
743ae10d1afSLuigi Rizzo 
744ae10d1afSLuigi Rizzo 	if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
745ae10d1afSLuigi Rizzo 	    na->num_rx_rings == rxr && na->num_rx_desc == rxd)
746ae10d1afSLuigi Rizzo 		return 0; /* nothing changed */
747f9790aebSLuigi Rizzo 	if (netmap_verbose || na->active_fds > 0) {
748ae10d1afSLuigi Rizzo 		D("stored config %s: txring %d x %d, rxring %d x %d",
7494bf50f18SLuigi Rizzo 			na->name,
750ae10d1afSLuigi Rizzo 			na->num_tx_rings, na->num_tx_desc,
751ae10d1afSLuigi Rizzo 			na->num_rx_rings, na->num_rx_desc);
752ae10d1afSLuigi Rizzo 		D("new config %s: txring %d x %d, rxring %d x %d",
7534bf50f18SLuigi Rizzo 			na->name, txr, txd, rxr, rxd);
754ae10d1afSLuigi Rizzo 	}
755f9790aebSLuigi Rizzo 	if (na->active_fds == 0) {
756ae10d1afSLuigi Rizzo 		D("configuration changed (but fine)");
757ae10d1afSLuigi Rizzo 		na->num_tx_rings = txr;
758ae10d1afSLuigi Rizzo 		na->num_tx_desc = txd;
759ae10d1afSLuigi Rizzo 		na->num_rx_rings = rxr;
760ae10d1afSLuigi Rizzo 		na->num_rx_desc = rxd;
761ae10d1afSLuigi Rizzo 		return 0;
762ae10d1afSLuigi Rizzo 	}
763ae10d1afSLuigi Rizzo 	D("configuration changed while active, this is bad...");
764ae10d1afSLuigi Rizzo 	return 1;
765ae10d1afSLuigi Rizzo }
766ae10d1afSLuigi Rizzo 
76737e3a6d3SLuigi Rizzo /* nm_sync callbacks for the host rings */
76837e3a6d3SLuigi Rizzo static int netmap_txsync_to_host(struct netmap_kring *kring, int flags);
76937e3a6d3SLuigi Rizzo static int netmap_rxsync_from_host(struct netmap_kring *kring, int flags);
770f0ea3689SLuigi Rizzo 
771f0ea3689SLuigi Rizzo /* create the krings array and initialize the fields common to all adapters.
772f0ea3689SLuigi Rizzo  * The array layout is this:
773f0ea3689SLuigi Rizzo  *
774f0ea3689SLuigi Rizzo  *                    +----------+
775f0ea3689SLuigi Rizzo  * na->tx_rings ----->|          | \
776f0ea3689SLuigi Rizzo  *                    |          |  } na->num_tx_ring
777f0ea3689SLuigi Rizzo  *                    |          | /
778f0ea3689SLuigi Rizzo  *                    +----------+
779f0ea3689SLuigi Rizzo  *                    |          |    host tx kring
780f0ea3689SLuigi Rizzo  * na->rx_rings ----> +----------+
781f0ea3689SLuigi Rizzo  *                    |          | \
782f0ea3689SLuigi Rizzo  *                    |          |  } na->num_rx_rings
783f0ea3689SLuigi Rizzo  *                    |          | /
784f0ea3689SLuigi Rizzo  *                    +----------+
785f0ea3689SLuigi Rizzo  *                    |          |    host rx kring
786f0ea3689SLuigi Rizzo  *                    +----------+
787f0ea3689SLuigi Rizzo  * na->tailroom ----->|          | \
788f0ea3689SLuigi Rizzo  *                    |          |  } tailroom bytes
789f0ea3689SLuigi Rizzo  *                    |          | /
790f0ea3689SLuigi Rizzo  *                    +----------+
791f0ea3689SLuigi Rizzo  *
792f0ea3689SLuigi Rizzo  * Note: for compatibility, host krings are created even when not needed.
793f0ea3689SLuigi Rizzo  * The tailroom space is currently used by vale ports for allocating leases.
794f0ea3689SLuigi Rizzo  */
79589cc2556SLuigi Rizzo /* call with NMG_LOCK held */
796f9790aebSLuigi Rizzo int
797f0ea3689SLuigi Rizzo netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
798f9790aebSLuigi Rizzo {
799f9790aebSLuigi Rizzo 	u_int i, len, ndesc;
800f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
801847bf383SLuigi Rizzo 	u_int n[NR_TXRX];
802847bf383SLuigi Rizzo 	enum txrx t;
803f9790aebSLuigi Rizzo 
804f0ea3689SLuigi Rizzo 	/* account for the (possibly fake) host rings */
805847bf383SLuigi Rizzo 	n[NR_TX] = na->num_tx_rings + 1;
806847bf383SLuigi Rizzo 	n[NR_RX] = na->num_rx_rings + 1;
807f0ea3689SLuigi Rizzo 
808847bf383SLuigi Rizzo 	len = (n[NR_TX] + n[NR_RX]) * sizeof(struct netmap_kring) + tailroom;
809f9790aebSLuigi Rizzo 
810f9790aebSLuigi Rizzo 	na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
811f9790aebSLuigi Rizzo 	if (na->tx_rings == NULL) {
812f9790aebSLuigi Rizzo 		D("Cannot allocate krings");
813f9790aebSLuigi Rizzo 		return ENOMEM;
814f9790aebSLuigi Rizzo 	}
815847bf383SLuigi Rizzo 	na->rx_rings = na->tx_rings + n[NR_TX];
816f9790aebSLuigi Rizzo 
81717885a7bSLuigi Rizzo 	/*
81817885a7bSLuigi Rizzo 	 * All fields in krings are 0 except the one initialized below.
81917885a7bSLuigi Rizzo 	 * but better be explicit on important kring fields.
82017885a7bSLuigi Rizzo 	 */
821847bf383SLuigi Rizzo 	for_rx_tx(t) {
822847bf383SLuigi Rizzo 		ndesc = nma_get_ndesc(na, t);
823847bf383SLuigi Rizzo 		for (i = 0; i < n[t]; i++) {
824847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
825f9790aebSLuigi Rizzo 			bzero(kring, sizeof(*kring));
826f9790aebSLuigi Rizzo 			kring->na = na;
82717885a7bSLuigi Rizzo 			kring->ring_id = i;
828847bf383SLuigi Rizzo 			kring->tx = t;
829f9790aebSLuigi Rizzo 			kring->nkr_num_slots = ndesc;
83037e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
83137e3a6d3SLuigi Rizzo 			kring->nr_pending_mode = NKR_NETMAP_OFF;
832847bf383SLuigi Rizzo 			if (i < nma_get_nrings(na, t)) {
833847bf383SLuigi Rizzo 				kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync);
83437e3a6d3SLuigi Rizzo 			} else {
835847bf383SLuigi Rizzo 				kring->nm_sync = (t == NR_TX ?
83637e3a6d3SLuigi Rizzo 						netmap_txsync_to_host:
83737e3a6d3SLuigi Rizzo 						netmap_rxsync_from_host);
838f0ea3689SLuigi Rizzo 			}
839847bf383SLuigi Rizzo 			kring->nm_notify = na->nm_notify;
840847bf383SLuigi Rizzo 			kring->rhead = kring->rcur = kring->nr_hwcur = 0;
841f9790aebSLuigi Rizzo 			/*
84217885a7bSLuigi Rizzo 			 * IMPORTANT: Always keep one slot empty.
843f9790aebSLuigi Rizzo 			 */
844847bf383SLuigi Rizzo 			kring->rtail = kring->nr_hwtail = (t == NR_TX ? ndesc - 1 : 0);
845847bf383SLuigi Rizzo 			snprintf(kring->name, sizeof(kring->name) - 1, "%s %s%d", na->name,
846847bf383SLuigi Rizzo 					nm_txrx2str(t), i);
847f0ea3689SLuigi Rizzo 			ND("ktx %s h %d c %d t %d",
848f0ea3689SLuigi Rizzo 				kring->name, kring->rhead, kring->rcur, kring->rtail);
849847bf383SLuigi Rizzo 			mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF);
85037e3a6d3SLuigi Rizzo 			nm_os_selinfo_init(&kring->si);
851f9790aebSLuigi Rizzo 		}
85237e3a6d3SLuigi Rizzo 		nm_os_selinfo_init(&na->si[t]);
853f0ea3689SLuigi Rizzo 	}
854f9790aebSLuigi Rizzo 
855847bf383SLuigi Rizzo 	na->tailroom = na->rx_rings + n[NR_RX];
856f9790aebSLuigi Rizzo 
857f9790aebSLuigi Rizzo 	return 0;
858f9790aebSLuigi Rizzo }
859f9790aebSLuigi Rizzo 
860f9790aebSLuigi Rizzo 
861f0ea3689SLuigi Rizzo /* undo the actions performed by netmap_krings_create */
86289cc2556SLuigi Rizzo /* call with NMG_LOCK held */
863f9790aebSLuigi Rizzo void
864f9790aebSLuigi Rizzo netmap_krings_delete(struct netmap_adapter *na)
865f9790aebSLuigi Rizzo {
866f0ea3689SLuigi Rizzo 	struct netmap_kring *kring = na->tx_rings;
867847bf383SLuigi Rizzo 	enum txrx t;
868847bf383SLuigi Rizzo 
869847bf383SLuigi Rizzo 	for_rx_tx(t)
87037e3a6d3SLuigi Rizzo 		nm_os_selinfo_uninit(&na->si[t]);
871f9790aebSLuigi Rizzo 
872f0ea3689SLuigi Rizzo 	/* we rely on the krings layout described above */
873f0ea3689SLuigi Rizzo 	for ( ; kring != na->tailroom; kring++) {
874f0ea3689SLuigi Rizzo 		mtx_destroy(&kring->q_lock);
87537e3a6d3SLuigi Rizzo 		nm_os_selinfo_uninit(&kring->si);
876f9790aebSLuigi Rizzo 	}
877f9790aebSLuigi Rizzo 	free(na->tx_rings, M_DEVBUF);
878f9790aebSLuigi Rizzo 	na->tx_rings = na->rx_rings = na->tailroom = NULL;
879f9790aebSLuigi Rizzo }
880f9790aebSLuigi Rizzo 
881f9790aebSLuigi Rizzo 
88217885a7bSLuigi Rizzo /*
88317885a7bSLuigi Rizzo  * Destructor for NIC ports. They also have an mbuf queue
88417885a7bSLuigi Rizzo  * on the rings connected to the host so we need to purge
88517885a7bSLuigi Rizzo  * them first.
88617885a7bSLuigi Rizzo  */
88789cc2556SLuigi Rizzo /* call with NMG_LOCK held */
88837e3a6d3SLuigi Rizzo void
88917885a7bSLuigi Rizzo netmap_hw_krings_delete(struct netmap_adapter *na)
89017885a7bSLuigi Rizzo {
89117885a7bSLuigi Rizzo 	struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue;
89217885a7bSLuigi Rizzo 
89317885a7bSLuigi Rizzo 	ND("destroy sw mbq with len %d", mbq_len(q));
89417885a7bSLuigi Rizzo 	mbq_purge(q);
89537e3a6d3SLuigi Rizzo 	mbq_safe_fini(q);
89617885a7bSLuigi Rizzo 	netmap_krings_delete(na);
89717885a7bSLuigi Rizzo }
89817885a7bSLuigi Rizzo 
89917885a7bSLuigi Rizzo 
900f9790aebSLuigi Rizzo 
90168b8534bSLuigi Rizzo /*
902847bf383SLuigi Rizzo  * Undo everything that was done in netmap_do_regif(). In particular,
903847bf383SLuigi Rizzo  * call nm_register(ifp,0) to stop netmap mode on the interface and
9044bf50f18SLuigi Rizzo  * revert to normal operation.
90568b8534bSLuigi Rizzo  */
906ce3ee1e7SLuigi Rizzo /* call with NMG_LOCK held */
907847bf383SLuigi Rizzo static void netmap_unset_ringid(struct netmap_priv_d *);
90837e3a6d3SLuigi Rizzo static void netmap_krings_put(struct netmap_priv_d *);
90937e3a6d3SLuigi Rizzo void
910847bf383SLuigi Rizzo netmap_do_unregif(struct netmap_priv_d *priv)
91168b8534bSLuigi Rizzo {
912f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
91368b8534bSLuigi Rizzo 
914ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
915f9790aebSLuigi Rizzo 	na->active_fds--;
91637e3a6d3SLuigi Rizzo 	/* unset nr_pending_mode and possibly release exclusive mode */
91737e3a6d3SLuigi Rizzo 	netmap_krings_put(priv);
918847bf383SLuigi Rizzo 
919847bf383SLuigi Rizzo #ifdef	WITH_MONITOR
92037e3a6d3SLuigi Rizzo 	/* XXX check whether we have to do something with monitor
92137e3a6d3SLuigi Rizzo 	 * when rings change nr_mode. */
92237e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0) {
923847bf383SLuigi Rizzo 		/* walk through all the rings and tell any monitor
924847bf383SLuigi Rizzo 		 * that the port is going to exit netmap mode
925847bf383SLuigi Rizzo 		 */
926847bf383SLuigi Rizzo 		netmap_monitor_stop(na);
92737e3a6d3SLuigi Rizzo 	}
928847bf383SLuigi Rizzo #endif
92937e3a6d3SLuigi Rizzo 
93037e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0 || nm_kring_pending(priv)) {
93137e3a6d3SLuigi Rizzo 		na->nm_register(na, 0);
93237e3a6d3SLuigi Rizzo 	}
93337e3a6d3SLuigi Rizzo 
93437e3a6d3SLuigi Rizzo 	/* delete rings and buffers that are no longer needed */
93537e3a6d3SLuigi Rizzo 	netmap_mem_rings_delete(na);
93637e3a6d3SLuigi Rizzo 
93737e3a6d3SLuigi Rizzo 	if (na->active_fds <= 0) {	/* last instance */
93868b8534bSLuigi Rizzo 		/*
93937e3a6d3SLuigi Rizzo 		 * (TO CHECK) We enter here
940f18be576SLuigi Rizzo 		 * when the last reference to this file descriptor goes
941f18be576SLuigi Rizzo 		 * away. This means we cannot have any pending poll()
942f18be576SLuigi Rizzo 		 * or interrupt routine operating on the structure.
943ce3ee1e7SLuigi Rizzo 		 * XXX The file may be closed in a thread while
944ce3ee1e7SLuigi Rizzo 		 * another thread is using it.
945ce3ee1e7SLuigi Rizzo 		 * Linux keeps the file opened until the last reference
946ce3ee1e7SLuigi Rizzo 		 * by any outstanding ioctl/poll or mmap is gone.
947ce3ee1e7SLuigi Rizzo 		 * FreeBSD does not track mmap()s (but we do) and
948ce3ee1e7SLuigi Rizzo 		 * wakes up any sleeping poll(). Need to check what
949ce3ee1e7SLuigi Rizzo 		 * happens if the close() occurs while a concurrent
950ce3ee1e7SLuigi Rizzo 		 * syscall is running.
95168b8534bSLuigi Rizzo 		 */
95237e3a6d3SLuigi Rizzo 		if (netmap_verbose)
95337e3a6d3SLuigi Rizzo 			D("deleting last instance for %s", na->name);
95437e3a6d3SLuigi Rizzo 
95537e3a6d3SLuigi Rizzo                 if (nm_netmap_on(na)) {
95637e3a6d3SLuigi Rizzo                     D("BUG: netmap on while going to delete the krings");
95737e3a6d3SLuigi Rizzo                 }
95837e3a6d3SLuigi Rizzo 
959f9790aebSLuigi Rizzo 		na->nm_krings_delete(na);
96068b8534bSLuigi Rizzo 	}
96137e3a6d3SLuigi Rizzo 
962847bf383SLuigi Rizzo 	/* possibily decrement counter of tx_si/rx_si users */
963847bf383SLuigi Rizzo 	netmap_unset_ringid(priv);
964f9790aebSLuigi Rizzo 	/* delete the nifp */
965847bf383SLuigi Rizzo 	netmap_mem_if_delete(na, priv->np_nifp);
966847bf383SLuigi Rizzo 	/* drop the allocator */
967847bf383SLuigi Rizzo 	netmap_mem_deref(na->nm_mem, na);
968847bf383SLuigi Rizzo 	/* mark the priv as unregistered */
969847bf383SLuigi Rizzo 	priv->np_na = NULL;
970847bf383SLuigi Rizzo 	priv->np_nifp = NULL;
9715819da83SLuigi Rizzo }
97268b8534bSLuigi Rizzo 
97389cc2556SLuigi Rizzo /* call with NMG_LOCK held */
974f0ea3689SLuigi Rizzo static __inline int
975847bf383SLuigi Rizzo nm_si_user(struct netmap_priv_d *priv, enum txrx t)
976f0ea3689SLuigi Rizzo {
977f0ea3689SLuigi Rizzo 	return (priv->np_na != NULL &&
978847bf383SLuigi Rizzo 		(priv->np_qlast[t] - priv->np_qfirst[t] > 1));
979f0ea3689SLuigi Rizzo }
980f0ea3689SLuigi Rizzo 
98137e3a6d3SLuigi Rizzo struct netmap_priv_d*
98237e3a6d3SLuigi Rizzo netmap_priv_new(void)
98337e3a6d3SLuigi Rizzo {
98437e3a6d3SLuigi Rizzo 	struct netmap_priv_d *priv;
98537e3a6d3SLuigi Rizzo 
98637e3a6d3SLuigi Rizzo 	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
98737e3a6d3SLuigi Rizzo 			      M_NOWAIT | M_ZERO);
98837e3a6d3SLuigi Rizzo 	if (priv == NULL)
98937e3a6d3SLuigi Rizzo 		return NULL;
99037e3a6d3SLuigi Rizzo 	priv->np_refs = 1;
99137e3a6d3SLuigi Rizzo 	nm_os_get_module();
99237e3a6d3SLuigi Rizzo 	return priv;
99337e3a6d3SLuigi Rizzo }
99437e3a6d3SLuigi Rizzo 
995ce3ee1e7SLuigi Rizzo /*
9968fd44c93SLuigi Rizzo  * Destructor of the netmap_priv_d, called when the fd is closed
9978fd44c93SLuigi Rizzo  * Action: undo all the things done by NIOCREGIF,
9988fd44c93SLuigi Rizzo  * On FreeBSD we need to track whether there are active mmap()s,
9998fd44c93SLuigi Rizzo  * and we use np_active_mmaps for that. On linux, the field is always 0.
10008fd44c93SLuigi Rizzo  * Return: 1 if we can free priv, 0 otherwise.
100189cc2556SLuigi Rizzo  *
1002ce3ee1e7SLuigi Rizzo  */
100389cc2556SLuigi Rizzo /* call with NMG_LOCK held */
100437e3a6d3SLuigi Rizzo void
100537e3a6d3SLuigi Rizzo netmap_priv_delete(struct netmap_priv_d *priv)
1006ce3ee1e7SLuigi Rizzo {
1007f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1008ce3ee1e7SLuigi Rizzo 
1009847adfb7SLuigi Rizzo 	/* number of active references to this fd */
10108fd44c93SLuigi Rizzo 	if (--priv->np_refs > 0) {
101137e3a6d3SLuigi Rizzo 		return;
1012ce3ee1e7SLuigi Rizzo 	}
101337e3a6d3SLuigi Rizzo 	nm_os_put_module();
101437e3a6d3SLuigi Rizzo 	if (na) {
1015847bf383SLuigi Rizzo 		netmap_do_unregif(priv);
101637e3a6d3SLuigi Rizzo 	}
101737e3a6d3SLuigi Rizzo 	netmap_unget_na(na, priv->np_ifp);
101837e3a6d3SLuigi Rizzo 	bzero(priv, sizeof(*priv));	/* for safety */
101937e3a6d3SLuigi Rizzo 	free(priv, M_DEVBUF);
1020f196ce38SLuigi Rizzo }
10215819da83SLuigi Rizzo 
1022f9790aebSLuigi Rizzo 
102389cc2556SLuigi Rizzo /* call with NMG_LOCK *not* held */
1024f9790aebSLuigi Rizzo void
10255819da83SLuigi Rizzo netmap_dtor(void *data)
10265819da83SLuigi Rizzo {
10275819da83SLuigi Rizzo 	struct netmap_priv_d *priv = data;
10285819da83SLuigi Rizzo 
1029ce3ee1e7SLuigi Rizzo 	NMG_LOCK();
103037e3a6d3SLuigi Rizzo 	netmap_priv_delete(priv);
1031ce3ee1e7SLuigi Rizzo 	NMG_UNLOCK();
1032ce3ee1e7SLuigi Rizzo }
103368b8534bSLuigi Rizzo 
1034f18be576SLuigi Rizzo 
103568b8534bSLuigi Rizzo 
103668b8534bSLuigi Rizzo 
103768b8534bSLuigi Rizzo /*
103802ad4083SLuigi Rizzo  * Handlers for synchronization of the queues from/to the host.
1039091fd0abSLuigi Rizzo  * Netmap has two operating modes:
1040091fd0abSLuigi Rizzo  * - in the default mode, the rings connected to the host stack are
1041091fd0abSLuigi Rizzo  *   just another ring pair managed by userspace;
1042091fd0abSLuigi Rizzo  * - in transparent mode (XXX to be defined) incoming packets
1043091fd0abSLuigi Rizzo  *   (from the host or the NIC) are marked as NS_FORWARD upon
1044091fd0abSLuigi Rizzo  *   arrival, and the user application has a chance to reset the
1045091fd0abSLuigi Rizzo  *   flag for packets that should be dropped.
1046091fd0abSLuigi Rizzo  *   On the RXSYNC or poll(), packets in RX rings between
1047091fd0abSLuigi Rizzo  *   kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
1048091fd0abSLuigi Rizzo  *   to the other side.
1049091fd0abSLuigi Rizzo  * The transfer NIC --> host is relatively easy, just encapsulate
1050091fd0abSLuigi Rizzo  * into mbufs and we are done. The host --> NIC side is slightly
1051091fd0abSLuigi Rizzo  * harder because there might not be room in the tx ring so it
1052091fd0abSLuigi Rizzo  * might take a while before releasing the buffer.
1053091fd0abSLuigi Rizzo  */
1054091fd0abSLuigi Rizzo 
1055f18be576SLuigi Rizzo 
1056091fd0abSLuigi Rizzo /*
1057091fd0abSLuigi Rizzo  * pass a chain of buffers to the host stack as coming from 'dst'
105817885a7bSLuigi Rizzo  * We do not need to lock because the queue is private.
1059091fd0abSLuigi Rizzo  */
1060091fd0abSLuigi Rizzo static void
1061f9790aebSLuigi Rizzo netmap_send_up(struct ifnet *dst, struct mbq *q)
1062091fd0abSLuigi Rizzo {
1063091fd0abSLuigi Rizzo 	struct mbuf *m;
106437e3a6d3SLuigi Rizzo 	struct mbuf *head = NULL, *prev = NULL;
1065091fd0abSLuigi Rizzo 
1066091fd0abSLuigi Rizzo 	/* send packets up, outside the lock */
1067f9790aebSLuigi Rizzo 	while ((m = mbq_dequeue(q)) != NULL) {
1068091fd0abSLuigi Rizzo 		if (netmap_verbose & NM_VERB_HOST)
1069091fd0abSLuigi Rizzo 			D("sending up pkt %p size %d", m, MBUF_LEN(m));
107037e3a6d3SLuigi Rizzo 		prev = nm_os_send_up(dst, m, prev);
107137e3a6d3SLuigi Rizzo 		if (head == NULL)
107237e3a6d3SLuigi Rizzo 			head = prev;
1073091fd0abSLuigi Rizzo 	}
107437e3a6d3SLuigi Rizzo 	if (head)
107537e3a6d3SLuigi Rizzo 		nm_os_send_up(dst, NULL, head);
107637e3a6d3SLuigi Rizzo 	mbq_fini(q);
1077091fd0abSLuigi Rizzo }
1078091fd0abSLuigi Rizzo 
1079f18be576SLuigi Rizzo 
1080091fd0abSLuigi Rizzo /*
1081091fd0abSLuigi Rizzo  * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
108217885a7bSLuigi Rizzo  * Take packets from hwcur to ring->head marked NS_FORWARD (or forced)
108317885a7bSLuigi Rizzo  * and pass them up. Drop remaining packets in the unlikely event
108417885a7bSLuigi Rizzo  * of an mbuf shortage.
1085091fd0abSLuigi Rizzo  */
1086091fd0abSLuigi Rizzo static void
1087091fd0abSLuigi Rizzo netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
1088091fd0abSLuigi Rizzo {
108917885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1090847bf383SLuigi Rizzo 	u_int const head = kring->rhead;
109117885a7bSLuigi Rizzo 	u_int n;
1092f9790aebSLuigi Rizzo 	struct netmap_adapter *na = kring->na;
1093091fd0abSLuigi Rizzo 
109417885a7bSLuigi Rizzo 	for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) {
109517885a7bSLuigi Rizzo 		struct mbuf *m;
1096091fd0abSLuigi Rizzo 		struct netmap_slot *slot = &kring->ring->slot[n];
1097091fd0abSLuigi Rizzo 
1098091fd0abSLuigi Rizzo 		if ((slot->flags & NS_FORWARD) == 0 && !force)
1099091fd0abSLuigi Rizzo 			continue;
11004bf50f18SLuigi Rizzo 		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE(na)) {
110117885a7bSLuigi Rizzo 			RD(5, "bad pkt at %d len %d", n, slot->len);
1102091fd0abSLuigi Rizzo 			continue;
1103091fd0abSLuigi Rizzo 		}
1104091fd0abSLuigi Rizzo 		slot->flags &= ~NS_FORWARD; // XXX needed ?
110517885a7bSLuigi Rizzo 		/* XXX TODO: adapt to the case of a multisegment packet */
11064bf50f18SLuigi Rizzo 		m = m_devget(NMB(na, slot), slot->len, 0, na->ifp, NULL);
1107091fd0abSLuigi Rizzo 
1108091fd0abSLuigi Rizzo 		if (m == NULL)
1109091fd0abSLuigi Rizzo 			break;
1110f9790aebSLuigi Rizzo 		mbq_enqueue(q, m);
1111091fd0abSLuigi Rizzo 	}
1112091fd0abSLuigi Rizzo }
1113091fd0abSLuigi Rizzo 
111437e3a6d3SLuigi Rizzo static inline int
111537e3a6d3SLuigi Rizzo _nm_may_forward(struct netmap_kring *kring)
111637e3a6d3SLuigi Rizzo {
111737e3a6d3SLuigi Rizzo 	return	((netmap_fwd || kring->ring->flags & NR_FORWARD) &&
111837e3a6d3SLuigi Rizzo 		 kring->na->na_flags & NAF_HOST_RINGS &&
111937e3a6d3SLuigi Rizzo 		 kring->tx == NR_RX);
112037e3a6d3SLuigi Rizzo }
112137e3a6d3SLuigi Rizzo 
112237e3a6d3SLuigi Rizzo static inline int
112337e3a6d3SLuigi Rizzo nm_may_forward_up(struct netmap_kring *kring)
112437e3a6d3SLuigi Rizzo {
112537e3a6d3SLuigi Rizzo 	return	_nm_may_forward(kring) &&
112637e3a6d3SLuigi Rizzo 		 kring->ring_id != kring->na->num_rx_rings;
112737e3a6d3SLuigi Rizzo }
112837e3a6d3SLuigi Rizzo 
112937e3a6d3SLuigi Rizzo static inline int
113037e3a6d3SLuigi Rizzo nm_may_forward_down(struct netmap_kring *kring)
113137e3a6d3SLuigi Rizzo {
113237e3a6d3SLuigi Rizzo 	return	_nm_may_forward(kring) &&
113337e3a6d3SLuigi Rizzo 		 kring->ring_id == kring->na->num_rx_rings;
113437e3a6d3SLuigi Rizzo }
1135f18be576SLuigi Rizzo 
1136091fd0abSLuigi Rizzo /*
113717885a7bSLuigi Rizzo  * Send to the NIC rings packets marked NS_FORWARD between
113817885a7bSLuigi Rizzo  * kring->nr_hwcur and kring->rhead
113917885a7bSLuigi Rizzo  * Called under kring->rx_queue.lock on the sw rx ring,
1140091fd0abSLuigi Rizzo  */
114117885a7bSLuigi Rizzo static u_int
1142091fd0abSLuigi Rizzo netmap_sw_to_nic(struct netmap_adapter *na)
1143091fd0abSLuigi Rizzo {
1144091fd0abSLuigi Rizzo 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
114517885a7bSLuigi Rizzo 	struct netmap_slot *rxslot = kring->ring->slot;
114617885a7bSLuigi Rizzo 	u_int i, rxcur = kring->nr_hwcur;
114717885a7bSLuigi Rizzo 	u_int const head = kring->rhead;
114817885a7bSLuigi Rizzo 	u_int const src_lim = kring->nkr_num_slots - 1;
114917885a7bSLuigi Rizzo 	u_int sent = 0;
1150ce3ee1e7SLuigi Rizzo 
115117885a7bSLuigi Rizzo 	/* scan rings to find space, then fill as much as possible */
115217885a7bSLuigi Rizzo 	for (i = 0; i < na->num_tx_rings; i++) {
115317885a7bSLuigi Rizzo 		struct netmap_kring *kdst = &na->tx_rings[i];
115417885a7bSLuigi Rizzo 		struct netmap_ring *rdst = kdst->ring;
115517885a7bSLuigi Rizzo 		u_int const dst_lim = kdst->nkr_num_slots - 1;
1156ce3ee1e7SLuigi Rizzo 
115717885a7bSLuigi Rizzo 		/* XXX do we trust ring or kring->rcur,rtail ? */
115817885a7bSLuigi Rizzo 		for (; rxcur != head && !nm_ring_empty(rdst);
115917885a7bSLuigi Rizzo 		     rxcur = nm_next(rxcur, src_lim) ) {
1160091fd0abSLuigi Rizzo 			struct netmap_slot *src, *dst, tmp;
116137e3a6d3SLuigi Rizzo 			u_int dst_head = rdst->head;
116217885a7bSLuigi Rizzo 
116317885a7bSLuigi Rizzo 			src = &rxslot[rxcur];
116417885a7bSLuigi Rizzo 			if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
116517885a7bSLuigi Rizzo 				continue;
116617885a7bSLuigi Rizzo 
116717885a7bSLuigi Rizzo 			sent++;
116817885a7bSLuigi Rizzo 
116937e3a6d3SLuigi Rizzo 			dst = &rdst->slot[dst_head];
117017885a7bSLuigi Rizzo 
1171091fd0abSLuigi Rizzo 			tmp = *src;
117217885a7bSLuigi Rizzo 
1173091fd0abSLuigi Rizzo 			src->buf_idx = dst->buf_idx;
1174091fd0abSLuigi Rizzo 			src->flags = NS_BUF_CHANGED;
1175091fd0abSLuigi Rizzo 
1176091fd0abSLuigi Rizzo 			dst->buf_idx = tmp.buf_idx;
1177091fd0abSLuigi Rizzo 			dst->len = tmp.len;
1178091fd0abSLuigi Rizzo 			dst->flags = NS_BUF_CHANGED;
1179091fd0abSLuigi Rizzo 
118037e3a6d3SLuigi Rizzo 			rdst->head = rdst->cur = nm_next(dst_head, dst_lim);
1181091fd0abSLuigi Rizzo 		}
118217885a7bSLuigi Rizzo 		/* if (sent) XXX txsync ? */
1183091fd0abSLuigi Rizzo 	}
118417885a7bSLuigi Rizzo 	return sent;
1185091fd0abSLuigi Rizzo }
1186091fd0abSLuigi Rizzo 
1187f18be576SLuigi Rizzo 
1188091fd0abSLuigi Rizzo /*
1189ce3ee1e7SLuigi Rizzo  * netmap_txsync_to_host() passes packets up. We are called from a
119002ad4083SLuigi Rizzo  * system call in user process context, and the only contention
119102ad4083SLuigi Rizzo  * can be among multiple user threads erroneously calling
1192091fd0abSLuigi Rizzo  * this routine concurrently.
119368b8534bSLuigi Rizzo  */
119437e3a6d3SLuigi Rizzo static int
119537e3a6d3SLuigi Rizzo netmap_txsync_to_host(struct netmap_kring *kring, int flags)
119668b8534bSLuigi Rizzo {
119737e3a6d3SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
119817885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1199f0ea3689SLuigi Rizzo 	u_int const head = kring->rhead;
1200f9790aebSLuigi Rizzo 	struct mbq q;
120168b8534bSLuigi Rizzo 
120217885a7bSLuigi Rizzo 	/* Take packets from hwcur to head and pass them up.
120317885a7bSLuigi Rizzo 	 * force head = cur since netmap_grab_packets() stops at head
120468b8534bSLuigi Rizzo 	 * In case of no buffers we give up. At the end of the loop,
120568b8534bSLuigi Rizzo 	 * the queue is drained in all cases.
120668b8534bSLuigi Rizzo 	 */
1207f9790aebSLuigi Rizzo 	mbq_init(&q);
120817885a7bSLuigi Rizzo 	netmap_grab_packets(kring, &q, 1 /* force */);
120917885a7bSLuigi Rizzo 	ND("have %d pkts in queue", mbq_len(&q));
121017885a7bSLuigi Rizzo 	kring->nr_hwcur = head;
121117885a7bSLuigi Rizzo 	kring->nr_hwtail = head + lim;
121217885a7bSLuigi Rizzo 	if (kring->nr_hwtail > lim)
121317885a7bSLuigi Rizzo 		kring->nr_hwtail -= lim + 1;
121468b8534bSLuigi Rizzo 
1215f9790aebSLuigi Rizzo 	netmap_send_up(na->ifp, &q);
121637e3a6d3SLuigi Rizzo 	return 0;
1217f18be576SLuigi Rizzo }
1218f18be576SLuigi Rizzo 
1219f18be576SLuigi Rizzo 
122068b8534bSLuigi Rizzo /*
122102ad4083SLuigi Rizzo  * rxsync backend for packets coming from the host stack.
122217885a7bSLuigi Rizzo  * They have been put in kring->rx_queue by netmap_transmit().
122317885a7bSLuigi Rizzo  * We protect access to the kring using kring->rx_queue.lock
122402ad4083SLuigi Rizzo  *
12254bf50f18SLuigi Rizzo  * This routine also does the selrecord if called from the poll handler
122637e3a6d3SLuigi Rizzo  * (we know because sr != NULL).
12274bf50f18SLuigi Rizzo  *
122817885a7bSLuigi Rizzo  * returns the number of packets delivered to tx queues in
122917885a7bSLuigi Rizzo  * transparent mode, or a negative value if error
123068b8534bSLuigi Rizzo  */
12318fd44c93SLuigi Rizzo static int
123237e3a6d3SLuigi Rizzo netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
123368b8534bSLuigi Rizzo {
123437e3a6d3SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
123568b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
123617885a7bSLuigi Rizzo 	u_int nm_i, n;
123717885a7bSLuigi Rizzo 	u_int const lim = kring->nkr_num_slots - 1;
1238f0ea3689SLuigi Rizzo 	u_int const head = kring->rhead;
123917885a7bSLuigi Rizzo 	int ret = 0;
1240847bf383SLuigi Rizzo 	struct mbq *q = &kring->rx_queue, fq;
124168b8534bSLuigi Rizzo 
1242847bf383SLuigi Rizzo 	mbq_init(&fq); /* fq holds packets to be freed */
1243847bf383SLuigi Rizzo 
1244997b054cSLuigi Rizzo 	mbq_lock(q);
124517885a7bSLuigi Rizzo 
124617885a7bSLuigi Rizzo 	/* First part: import newly received packets */
124717885a7bSLuigi Rizzo 	n = mbq_len(q);
124817885a7bSLuigi Rizzo 	if (n) { /* grab packets from the queue */
124917885a7bSLuigi Rizzo 		struct mbuf *m;
125017885a7bSLuigi Rizzo 		uint32_t stop_i;
125117885a7bSLuigi Rizzo 
125217885a7bSLuigi Rizzo 		nm_i = kring->nr_hwtail;
125317885a7bSLuigi Rizzo 		stop_i = nm_prev(nm_i, lim);
125417885a7bSLuigi Rizzo 		while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
125517885a7bSLuigi Rizzo 			int len = MBUF_LEN(m);
125617885a7bSLuigi Rizzo 			struct netmap_slot *slot = &ring->slot[nm_i];
125717885a7bSLuigi Rizzo 
12584bf50f18SLuigi Rizzo 			m_copydata(m, 0, len, NMB(na, slot));
125917885a7bSLuigi Rizzo 			ND("nm %d len %d", nm_i, len);
126017885a7bSLuigi Rizzo 			if (netmap_verbose)
12614bf50f18SLuigi Rizzo                                 D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
126217885a7bSLuigi Rizzo 
126317885a7bSLuigi Rizzo 			slot->len = len;
126417885a7bSLuigi Rizzo 			slot->flags = kring->nkr_slot_flags;
126517885a7bSLuigi Rizzo 			nm_i = nm_next(nm_i, lim);
1266847bf383SLuigi Rizzo 			mbq_enqueue(&fq, m);
126764ae02c3SLuigi Rizzo 		}
126817885a7bSLuigi Rizzo 		kring->nr_hwtail = nm_i;
126964ae02c3SLuigi Rizzo 	}
127017885a7bSLuigi Rizzo 
127117885a7bSLuigi Rizzo 	/*
127217885a7bSLuigi Rizzo 	 * Second part: skip past packets that userspace has released.
127317885a7bSLuigi Rizzo 	 */
127417885a7bSLuigi Rizzo 	nm_i = kring->nr_hwcur;
127517885a7bSLuigi Rizzo 	if (nm_i != head) { /* something was released */
127637e3a6d3SLuigi Rizzo 		if (nm_may_forward_down(kring)) {
127717885a7bSLuigi Rizzo 			ret = netmap_sw_to_nic(na);
127837e3a6d3SLuigi Rizzo 			if (ret > 0) {
127937e3a6d3SLuigi Rizzo 				kring->nr_kflags |= NR_FORWARD;
128037e3a6d3SLuigi Rizzo 				ret = 0;
128137e3a6d3SLuigi Rizzo 			}
128237e3a6d3SLuigi Rizzo 		}
128317885a7bSLuigi Rizzo 		kring->nr_hwcur = head;
128464ae02c3SLuigi Rizzo 	}
128517885a7bSLuigi Rizzo 
1286997b054cSLuigi Rizzo 	mbq_unlock(q);
1287847bf383SLuigi Rizzo 
1288847bf383SLuigi Rizzo 	mbq_purge(&fq);
128937e3a6d3SLuigi Rizzo 	mbq_fini(&fq);
1290847bf383SLuigi Rizzo 
129117885a7bSLuigi Rizzo 	return ret;
129268b8534bSLuigi Rizzo }
129368b8534bSLuigi Rizzo 
129468b8534bSLuigi Rizzo 
1295f9790aebSLuigi Rizzo /* Get a netmap adapter for the port.
1296f9790aebSLuigi Rizzo  *
1297f9790aebSLuigi Rizzo  * If it is possible to satisfy the request, return 0
1298f9790aebSLuigi Rizzo  * with *na containing the netmap adapter found.
1299f9790aebSLuigi Rizzo  * Otherwise return an error code, with *na containing NULL.
1300f9790aebSLuigi Rizzo  *
1301f9790aebSLuigi Rizzo  * When the port is attached to a bridge, we always return
1302f9790aebSLuigi Rizzo  * EBUSY.
1303f9790aebSLuigi Rizzo  * Otherwise, if the port is already bound to a file descriptor,
1304f9790aebSLuigi Rizzo  * then we unconditionally return the existing adapter into *na.
1305f9790aebSLuigi Rizzo  * In all the other cases, we return (into *na) either native,
1306f9790aebSLuigi Rizzo  * generic or NULL, according to the following table:
1307f9790aebSLuigi Rizzo  *
1308f9790aebSLuigi Rizzo  *					native_support
1309f9790aebSLuigi Rizzo  * active_fds   dev.netmap.admode         YES     NO
1310f9790aebSLuigi Rizzo  * -------------------------------------------------------
1311f9790aebSLuigi Rizzo  *    >0              *                 NA(ifp) NA(ifp)
1312f9790aebSLuigi Rizzo  *
1313f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_BEST      NATIVE  GENERIC
1314f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_NATIVE    NATIVE   NULL
1315f9790aebSLuigi Rizzo  *     0        NETMAP_ADMODE_GENERIC   GENERIC GENERIC
1316f9790aebSLuigi Rizzo  *
1317f9790aebSLuigi Rizzo  */
131837e3a6d3SLuigi Rizzo static void netmap_hw_dtor(struct netmap_adapter *); /* needed by NM_IS_NATIVE() */
1319f9790aebSLuigi Rizzo int
1320f9790aebSLuigi Rizzo netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
1321f9790aebSLuigi Rizzo {
1322f9790aebSLuigi Rizzo 	/* generic support */
1323f9790aebSLuigi Rizzo 	int i = netmap_admode;	/* Take a snapshot. */
1324f9790aebSLuigi Rizzo 	struct netmap_adapter *prev_na;
1325847bf383SLuigi Rizzo 	int error = 0;
1326f9790aebSLuigi Rizzo 
1327f9790aebSLuigi Rizzo 	*na = NULL; /* default */
1328f9790aebSLuigi Rizzo 
1329f9790aebSLuigi Rizzo 	/* reset in case of invalid value */
1330f9790aebSLuigi Rizzo 	if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST)
1331f9790aebSLuigi Rizzo 		i = netmap_admode = NETMAP_ADMODE_BEST;
1332f9790aebSLuigi Rizzo 
133337e3a6d3SLuigi Rizzo 	if (NM_NA_VALID(ifp)) {
13344bf50f18SLuigi Rizzo 		prev_na = NA(ifp);
1335f9790aebSLuigi Rizzo 		/* If an adapter already exists, return it if
1336f9790aebSLuigi Rizzo 		 * there are active file descriptors or if
1337f9790aebSLuigi Rizzo 		 * netmap is not forced to use generic
1338f9790aebSLuigi Rizzo 		 * adapters.
1339f9790aebSLuigi Rizzo 		 */
13404bf50f18SLuigi Rizzo 		if (NETMAP_OWNED_BY_ANY(prev_na)
13414bf50f18SLuigi Rizzo 			|| i != NETMAP_ADMODE_GENERIC
13424bf50f18SLuigi Rizzo 			|| prev_na->na_flags & NAF_FORCE_NATIVE
13434bf50f18SLuigi Rizzo #ifdef WITH_PIPES
13444bf50f18SLuigi Rizzo 			/* ugly, but we cannot allow an adapter switch
13454bf50f18SLuigi Rizzo 			 * if some pipe is referring to this one
13464bf50f18SLuigi Rizzo 			 */
13474bf50f18SLuigi Rizzo 			|| prev_na->na_next_pipe > 0
13484bf50f18SLuigi Rizzo #endif
13494bf50f18SLuigi Rizzo 		) {
13504bf50f18SLuigi Rizzo 			*na = prev_na;
1351f9790aebSLuigi Rizzo 			return 0;
1352f9790aebSLuigi Rizzo 		}
1353f9790aebSLuigi Rizzo 	}
1354f9790aebSLuigi Rizzo 
1355f9790aebSLuigi Rizzo 	/* If there isn't native support and netmap is not allowed
1356f9790aebSLuigi Rizzo 	 * to use generic adapters, we cannot satisfy the request.
1357f9790aebSLuigi Rizzo 	 */
135837e3a6d3SLuigi Rizzo 	if (!NM_IS_NATIVE(ifp) && i == NETMAP_ADMODE_NATIVE)
1359f2637526SLuigi Rizzo 		return EOPNOTSUPP;
1360f9790aebSLuigi Rizzo 
1361f9790aebSLuigi Rizzo 	/* Otherwise, create a generic adapter and return it,
1362f9790aebSLuigi Rizzo 	 * saving the previously used netmap adapter, if any.
1363f9790aebSLuigi Rizzo 	 *
1364f9790aebSLuigi Rizzo 	 * Note that here 'prev_na', if not NULL, MUST be a
1365f9790aebSLuigi Rizzo 	 * native adapter, and CANNOT be a generic one. This is
1366f9790aebSLuigi Rizzo 	 * true because generic adapters are created on demand, and
1367f9790aebSLuigi Rizzo 	 * destroyed when not used anymore. Therefore, if the adapter
1368f9790aebSLuigi Rizzo 	 * currently attached to an interface 'ifp' is generic, it
1369f9790aebSLuigi Rizzo 	 * must be that
1370f9790aebSLuigi Rizzo 	 * (NA(ifp)->active_fds > 0 || NETMAP_OWNED_BY_KERN(NA(ifp))).
1371f9790aebSLuigi Rizzo 	 * Consequently, if NA(ifp) is generic, we will enter one of
1372f9790aebSLuigi Rizzo 	 * the branches above. This ensures that we never override
1373f9790aebSLuigi Rizzo 	 * a generic adapter with another generic adapter.
1374f9790aebSLuigi Rizzo 	 */
1375f9790aebSLuigi Rizzo 	error = generic_netmap_attach(ifp);
1376f9790aebSLuigi Rizzo 	if (error)
1377f9790aebSLuigi Rizzo 		return error;
1378f9790aebSLuigi Rizzo 
1379f9790aebSLuigi Rizzo 	*na = NA(ifp);
1380f9790aebSLuigi Rizzo 	return 0;
1381f9790aebSLuigi Rizzo }
1382f9790aebSLuigi Rizzo 
1383f9790aebSLuigi Rizzo 
138468b8534bSLuigi Rizzo /*
1385ce3ee1e7SLuigi Rizzo  * MUST BE CALLED UNDER NMG_LOCK()
1386ce3ee1e7SLuigi Rizzo  *
1387f2637526SLuigi Rizzo  * Get a refcounted reference to a netmap adapter attached
1388f2637526SLuigi Rizzo  * to the interface specified by nmr.
1389ce3ee1e7SLuigi Rizzo  * This is always called in the execution of an ioctl().
1390ce3ee1e7SLuigi Rizzo  *
1391f2637526SLuigi Rizzo  * Return ENXIO if the interface specified by the request does
1392f2637526SLuigi Rizzo  * not exist, ENOTSUP if netmap is not supported by the interface,
1393f2637526SLuigi Rizzo  * EBUSY if the interface is already attached to a bridge,
1394f2637526SLuigi Rizzo  * EINVAL if parameters are invalid, ENOMEM if needed resources
1395f2637526SLuigi Rizzo  * could not be allocated.
1396f2637526SLuigi Rizzo  * If successful, hold a reference to the netmap adapter.
1397f18be576SLuigi Rizzo  *
139837e3a6d3SLuigi Rizzo  * If the interface specified by nmr is a system one, also keep
139937e3a6d3SLuigi Rizzo  * a reference to it and return a valid *ifp.
140068b8534bSLuigi Rizzo  */
1401f9790aebSLuigi Rizzo int
140237e3a6d3SLuigi Rizzo netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
140337e3a6d3SLuigi Rizzo 	      struct ifnet **ifp, int create)
140468b8534bSLuigi Rizzo {
1405f9790aebSLuigi Rizzo 	int error = 0;
1406f0ea3689SLuigi Rizzo 	struct netmap_adapter *ret = NULL;
1407f9790aebSLuigi Rizzo 
1408f9790aebSLuigi Rizzo 	*na = NULL;     /* default return value */
140937e3a6d3SLuigi Rizzo 	*ifp = NULL;
1410f196ce38SLuigi Rizzo 
1411ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1412ce3ee1e7SLuigi Rizzo 
141337e3a6d3SLuigi Rizzo 	/* We cascade through all possible types of netmap adapter.
14144bf50f18SLuigi Rizzo 	 * All netmap_get_*_na() functions return an error and an na,
14154bf50f18SLuigi Rizzo 	 * with the following combinations:
14164bf50f18SLuigi Rizzo 	 *
14174bf50f18SLuigi Rizzo 	 * error    na
14184bf50f18SLuigi Rizzo 	 *   0	   NULL		type doesn't match
14194bf50f18SLuigi Rizzo 	 *  !0	   NULL		type matches, but na creation/lookup failed
14204bf50f18SLuigi Rizzo 	 *   0	  !NULL		type matches and na created/found
14214bf50f18SLuigi Rizzo 	 *  !0    !NULL		impossible
14224bf50f18SLuigi Rizzo 	 */
14234bf50f18SLuigi Rizzo 
142437e3a6d3SLuigi Rizzo 	/* try to see if this is a ptnetmap port */
142537e3a6d3SLuigi Rizzo 	error = netmap_get_pt_host_na(nmr, na, create);
142637e3a6d3SLuigi Rizzo 	if (error || *na != NULL)
142737e3a6d3SLuigi Rizzo 		return error;
142837e3a6d3SLuigi Rizzo 
14294bf50f18SLuigi Rizzo 	/* try to see if this is a monitor port */
14304bf50f18SLuigi Rizzo 	error = netmap_get_monitor_na(nmr, na, create);
14314bf50f18SLuigi Rizzo 	if (error || *na != NULL)
14324bf50f18SLuigi Rizzo 		return error;
14334bf50f18SLuigi Rizzo 
14344bf50f18SLuigi Rizzo 	/* try to see if this is a pipe port */
1435f0ea3689SLuigi Rizzo 	error = netmap_get_pipe_na(nmr, na, create);
1436f0ea3689SLuigi Rizzo 	if (error || *na != NULL)
1437f9790aebSLuigi Rizzo 		return error;
1438ce3ee1e7SLuigi Rizzo 
14394bf50f18SLuigi Rizzo 	/* try to see if this is a bridge port */
1440f0ea3689SLuigi Rizzo 	error = netmap_get_bdg_na(nmr, na, create);
1441f0ea3689SLuigi Rizzo 	if (error)
1442f0ea3689SLuigi Rizzo 		return error;
1443f0ea3689SLuigi Rizzo 
1444f0ea3689SLuigi Rizzo 	if (*na != NULL) /* valid match in netmap_get_bdg_na() */
1445847bf383SLuigi Rizzo 		goto out;
1446f0ea3689SLuigi Rizzo 
144789cc2556SLuigi Rizzo 	/*
144889cc2556SLuigi Rizzo 	 * This must be a hardware na, lookup the name in the system.
144989cc2556SLuigi Rizzo 	 * Note that by hardware we actually mean "it shows up in ifconfig".
145089cc2556SLuigi Rizzo 	 * This may still be a tap, a veth/epair, or even a
145189cc2556SLuigi Rizzo 	 * persistent VALE port.
145289cc2556SLuigi Rizzo 	 */
145337e3a6d3SLuigi Rizzo 	*ifp = ifunit_ref(nmr->nr_name);
145437e3a6d3SLuigi Rizzo 	if (*ifp == NULL) {
1455ce3ee1e7SLuigi Rizzo 	        return ENXIO;
1456f196ce38SLuigi Rizzo 	}
1457ce3ee1e7SLuigi Rizzo 
145837e3a6d3SLuigi Rizzo 	error = netmap_get_hw_na(*ifp, &ret);
1459f9790aebSLuigi Rizzo 	if (error)
1460f9790aebSLuigi Rizzo 		goto out;
1461f18be576SLuigi Rizzo 
1462f9790aebSLuigi Rizzo 	*na = ret;
1463f9790aebSLuigi Rizzo 	netmap_adapter_get(ret);
1464f0ea3689SLuigi Rizzo 
1465f9790aebSLuigi Rizzo out:
146637e3a6d3SLuigi Rizzo 	if (error) {
146737e3a6d3SLuigi Rizzo 		if (ret)
1468f0ea3689SLuigi Rizzo 			netmap_adapter_put(ret);
146937e3a6d3SLuigi Rizzo 		if (*ifp) {
147037e3a6d3SLuigi Rizzo 			if_rele(*ifp);
147137e3a6d3SLuigi Rizzo 			*ifp = NULL;
147237e3a6d3SLuigi Rizzo 		}
147337e3a6d3SLuigi Rizzo 	}
1474f18be576SLuigi Rizzo 
14755ab0d24dSLuigi Rizzo 	return error;
14765ab0d24dSLuigi Rizzo }
1477ce3ee1e7SLuigi Rizzo 
147837e3a6d3SLuigi Rizzo /* undo netmap_get_na() */
147937e3a6d3SLuigi Rizzo void
148037e3a6d3SLuigi Rizzo netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp)
148137e3a6d3SLuigi Rizzo {
148237e3a6d3SLuigi Rizzo 	if (ifp)
148337e3a6d3SLuigi Rizzo 		if_rele(ifp);
148437e3a6d3SLuigi Rizzo 	if (na)
148537e3a6d3SLuigi Rizzo 		netmap_adapter_put(na);
148637e3a6d3SLuigi Rizzo }
148737e3a6d3SLuigi Rizzo 
148837e3a6d3SLuigi Rizzo 
148937e3a6d3SLuigi Rizzo #define NM_FAIL_ON(t) do {						\
149037e3a6d3SLuigi Rizzo 	if (unlikely(t)) {						\
149137e3a6d3SLuigi Rizzo 		RD(5, "%s: fail '" #t "' "				\
149237e3a6d3SLuigi Rizzo 			"h %d c %d t %d "				\
149337e3a6d3SLuigi Rizzo 			"rh %d rc %d rt %d "				\
149437e3a6d3SLuigi Rizzo 			"hc %d ht %d",					\
149537e3a6d3SLuigi Rizzo 			kring->name,					\
149637e3a6d3SLuigi Rizzo 			head, cur, ring->tail,				\
149737e3a6d3SLuigi Rizzo 			kring->rhead, kring->rcur, kring->rtail,	\
149837e3a6d3SLuigi Rizzo 			kring->nr_hwcur, kring->nr_hwtail);		\
149937e3a6d3SLuigi Rizzo 		return kring->nkr_num_slots;				\
150037e3a6d3SLuigi Rizzo 	}								\
150137e3a6d3SLuigi Rizzo } while (0)
1502ce3ee1e7SLuigi Rizzo 
1503f9790aebSLuigi Rizzo /*
1504f9790aebSLuigi Rizzo  * validate parameters on entry for *_txsync()
1505f9790aebSLuigi Rizzo  * Returns ring->cur if ok, or something >= kring->nkr_num_slots
150617885a7bSLuigi Rizzo  * in case of error.
1507f9790aebSLuigi Rizzo  *
150817885a7bSLuigi Rizzo  * rhead, rcur and rtail=hwtail are stored from previous round.
150917885a7bSLuigi Rizzo  * hwcur is the next packet to send to the ring.
1510f9790aebSLuigi Rizzo  *
151117885a7bSLuigi Rizzo  * We want
151217885a7bSLuigi Rizzo  *    hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail
1513f9790aebSLuigi Rizzo  *
151417885a7bSLuigi Rizzo  * hwcur, rhead, rtail and hwtail are reliable
1515f9790aebSLuigi Rizzo  */
151637e3a6d3SLuigi Rizzo u_int
151737e3a6d3SLuigi Rizzo nm_txsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
1518f9790aebSLuigi Rizzo {
151917885a7bSLuigi Rizzo 	u_int head = ring->head; /* read only once */
1520f9790aebSLuigi Rizzo 	u_int cur = ring->cur; /* read only once */
1521f9790aebSLuigi Rizzo 	u_int n = kring->nkr_num_slots;
1522ce3ee1e7SLuigi Rizzo 
152317885a7bSLuigi Rizzo 	ND(5, "%s kcur %d ktail %d head %d cur %d tail %d",
152417885a7bSLuigi Rizzo 		kring->name,
152517885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail,
152617885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail);
152717885a7bSLuigi Rizzo #if 1 /* kernel sanity checks; but we can trust the kring. */
152837e3a6d3SLuigi Rizzo 	NM_FAIL_ON(kring->nr_hwcur >= n || kring->rhead >= n ||
152937e3a6d3SLuigi Rizzo 	    kring->rtail >= n ||  kring->nr_hwtail >= n);
1530f9790aebSLuigi Rizzo #endif /* kernel sanity checks */
153117885a7bSLuigi Rizzo 	/*
153237e3a6d3SLuigi Rizzo 	 * user sanity checks. We only use head,
153337e3a6d3SLuigi Rizzo 	 * A, B, ... are possible positions for head:
153417885a7bSLuigi Rizzo 	 *
153537e3a6d3SLuigi Rizzo 	 *  0    A  rhead   B  rtail   C  n-1
153637e3a6d3SLuigi Rizzo 	 *  0    D  rtail   E  rhead   F  n-1
153717885a7bSLuigi Rizzo 	 *
153817885a7bSLuigi Rizzo 	 * B, F, D are valid. A, C, E are wrong
153917885a7bSLuigi Rizzo 	 */
154017885a7bSLuigi Rizzo 	if (kring->rtail >= kring->rhead) {
154117885a7bSLuigi Rizzo 		/* want rhead <= head <= rtail */
154237e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->rhead || head > kring->rtail);
154317885a7bSLuigi Rizzo 		/* and also head <= cur <= rtail */
154437e3a6d3SLuigi Rizzo 		NM_FAIL_ON(cur < head || cur > kring->rtail);
154517885a7bSLuigi Rizzo 	} else { /* here rtail < rhead */
154617885a7bSLuigi Rizzo 		/* we need head outside rtail .. rhead */
154737e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head > kring->rtail && head < kring->rhead);
154817885a7bSLuigi Rizzo 
154917885a7bSLuigi Rizzo 		/* two cases now: head <= rtail or head >= rhead  */
155017885a7bSLuigi Rizzo 		if (head <= kring->rtail) {
155117885a7bSLuigi Rizzo 			/* want head <= cur <= rtail */
155237e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head || cur > kring->rtail);
155317885a7bSLuigi Rizzo 		} else { /* head >= rhead */
155417885a7bSLuigi Rizzo 			/* cur must be outside rtail..head */
155537e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur > kring->rtail && cur < head);
1556f18be576SLuigi Rizzo 		}
1557f9790aebSLuigi Rizzo 	}
155817885a7bSLuigi Rizzo 	if (ring->tail != kring->rtail) {
1559*a2a74091SLuigi Rizzo 		RD(5, "%s tail overwritten was %d need %d", kring->name,
156017885a7bSLuigi Rizzo 			ring->tail, kring->rtail);
156117885a7bSLuigi Rizzo 		ring->tail = kring->rtail;
156217885a7bSLuigi Rizzo 	}
156317885a7bSLuigi Rizzo 	kring->rhead = head;
156417885a7bSLuigi Rizzo 	kring->rcur = cur;
156517885a7bSLuigi Rizzo 	return head;
156668b8534bSLuigi Rizzo }
156768b8534bSLuigi Rizzo 
156868b8534bSLuigi Rizzo 
156968b8534bSLuigi Rizzo /*
1570f9790aebSLuigi Rizzo  * validate parameters on entry for *_rxsync()
157117885a7bSLuigi Rizzo  * Returns ring->head if ok, kring->nkr_num_slots on error.
1572f9790aebSLuigi Rizzo  *
157317885a7bSLuigi Rizzo  * For a valid configuration,
157417885a7bSLuigi Rizzo  * hwcur <= head <= cur <= tail <= hwtail
1575f9790aebSLuigi Rizzo  *
157617885a7bSLuigi Rizzo  * We only consider head and cur.
157717885a7bSLuigi Rizzo  * hwcur and hwtail are reliable.
1578f9790aebSLuigi Rizzo  *
1579f9790aebSLuigi Rizzo  */
158037e3a6d3SLuigi Rizzo u_int
158137e3a6d3SLuigi Rizzo nm_rxsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
1582f9790aebSLuigi Rizzo {
158317885a7bSLuigi Rizzo 	uint32_t const n = kring->nkr_num_slots;
158417885a7bSLuigi Rizzo 	uint32_t head, cur;
1585f9790aebSLuigi Rizzo 
1586847bf383SLuigi Rizzo 	ND(5,"%s kc %d kt %d h %d c %d t %d",
158717885a7bSLuigi Rizzo 		kring->name,
158817885a7bSLuigi Rizzo 		kring->nr_hwcur, kring->nr_hwtail,
158917885a7bSLuigi Rizzo 		ring->head, ring->cur, ring->tail);
159017885a7bSLuigi Rizzo 	/*
159117885a7bSLuigi Rizzo 	 * Before storing the new values, we should check they do not
159217885a7bSLuigi Rizzo 	 * move backwards. However:
159317885a7bSLuigi Rizzo 	 * - head is not an issue because the previous value is hwcur;
159417885a7bSLuigi Rizzo 	 * - cur could in principle go back, however it does not matter
159517885a7bSLuigi Rizzo 	 *   because we are processing a brand new rxsync()
159617885a7bSLuigi Rizzo 	 */
159717885a7bSLuigi Rizzo 	cur = kring->rcur = ring->cur;	/* read only once */
159817885a7bSLuigi Rizzo 	head = kring->rhead = ring->head;	/* read only once */
1599f9790aebSLuigi Rizzo #if 1 /* kernel sanity checks */
160037e3a6d3SLuigi Rizzo 	NM_FAIL_ON(kring->nr_hwcur >= n || kring->nr_hwtail >= n);
1601f9790aebSLuigi Rizzo #endif /* kernel sanity checks */
1602f9790aebSLuigi Rizzo 	/* user sanity checks */
160317885a7bSLuigi Rizzo 	if (kring->nr_hwtail >= kring->nr_hwcur) {
160417885a7bSLuigi Rizzo 		/* want hwcur <= rhead <= hwtail */
160537e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->nr_hwcur || head > kring->nr_hwtail);
160617885a7bSLuigi Rizzo 		/* and also rhead <= rcur <= hwtail */
160737e3a6d3SLuigi Rizzo 		NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
1608f9790aebSLuigi Rizzo 	} else {
160917885a7bSLuigi Rizzo 		/* we need rhead outside hwtail..hwcur */
161037e3a6d3SLuigi Rizzo 		NM_FAIL_ON(head < kring->nr_hwcur && head > kring->nr_hwtail);
161117885a7bSLuigi Rizzo 		/* two cases now: head <= hwtail or head >= hwcur  */
161217885a7bSLuigi Rizzo 		if (head <= kring->nr_hwtail) {
161317885a7bSLuigi Rizzo 			/* want head <= cur <= hwtail */
161437e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
161517885a7bSLuigi Rizzo 		} else {
161617885a7bSLuigi Rizzo 			/* cur must be outside hwtail..head */
161737e3a6d3SLuigi Rizzo 			NM_FAIL_ON(cur < head && cur > kring->nr_hwtail);
1618f9790aebSLuigi Rizzo 		}
1619f9790aebSLuigi Rizzo 	}
162017885a7bSLuigi Rizzo 	if (ring->tail != kring->rtail) {
162117885a7bSLuigi Rizzo 		RD(5, "%s tail overwritten was %d need %d",
162217885a7bSLuigi Rizzo 			kring->name,
162317885a7bSLuigi Rizzo 			ring->tail, kring->rtail);
162417885a7bSLuigi Rizzo 		ring->tail = kring->rtail;
162517885a7bSLuigi Rizzo 	}
162617885a7bSLuigi Rizzo 	return head;
1627f9790aebSLuigi Rizzo }
1628f9790aebSLuigi Rizzo 
162917885a7bSLuigi Rizzo 
1630f9790aebSLuigi Rizzo /*
163168b8534bSLuigi Rizzo  * Error routine called when txsync/rxsync detects an error.
163217885a7bSLuigi Rizzo  * Can't do much more than resetting head =cur = hwcur, tail = hwtail
163368b8534bSLuigi Rizzo  * Return 1 on reinit.
1634506cc70cSLuigi Rizzo  *
1635506cc70cSLuigi Rizzo  * This routine is only called by the upper half of the kernel.
1636506cc70cSLuigi Rizzo  * It only reads hwcur (which is changed only by the upper half, too)
163717885a7bSLuigi Rizzo  * and hwtail (which may be changed by the lower half, but only on
1638506cc70cSLuigi Rizzo  * a tx ring and only to increase it, so any error will be recovered
1639506cc70cSLuigi Rizzo  * on the next call). For the above, we don't strictly need to call
1640506cc70cSLuigi Rizzo  * it under lock.
164168b8534bSLuigi Rizzo  */
164268b8534bSLuigi Rizzo int
164368b8534bSLuigi Rizzo netmap_ring_reinit(struct netmap_kring *kring)
164468b8534bSLuigi Rizzo {
164568b8534bSLuigi Rizzo 	struct netmap_ring *ring = kring->ring;
164668b8534bSLuigi Rizzo 	u_int i, lim = kring->nkr_num_slots - 1;
164768b8534bSLuigi Rizzo 	int errors = 0;
164868b8534bSLuigi Rizzo 
1649ce3ee1e7SLuigi Rizzo 	// XXX KASSERT nm_kr_tryget
16504bf50f18SLuigi Rizzo 	RD(10, "called for %s", kring->name);
165117885a7bSLuigi Rizzo 	// XXX probably wrong to trust userspace
165217885a7bSLuigi Rizzo 	kring->rhead = ring->head;
165317885a7bSLuigi Rizzo 	kring->rcur  = ring->cur;
165417885a7bSLuigi Rizzo 	kring->rtail = ring->tail;
165517885a7bSLuigi Rizzo 
165668b8534bSLuigi Rizzo 	if (ring->cur > lim)
165768b8534bSLuigi Rizzo 		errors++;
165817885a7bSLuigi Rizzo 	if (ring->head > lim)
165917885a7bSLuigi Rizzo 		errors++;
166017885a7bSLuigi Rizzo 	if (ring->tail > lim)
166117885a7bSLuigi Rizzo 		errors++;
166268b8534bSLuigi Rizzo 	for (i = 0; i <= lim; i++) {
166368b8534bSLuigi Rizzo 		u_int idx = ring->slot[i].buf_idx;
166468b8534bSLuigi Rizzo 		u_int len = ring->slot[i].len;
1665847bf383SLuigi Rizzo 		if (idx < 2 || idx >= kring->na->na_lut.objtotal) {
166617885a7bSLuigi Rizzo 			RD(5, "bad index at slot %d idx %d len %d ", i, idx, len);
166768b8534bSLuigi Rizzo 			ring->slot[i].buf_idx = 0;
166868b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
16694bf50f18SLuigi Rizzo 		} else if (len > NETMAP_BUF_SIZE(kring->na)) {
167068b8534bSLuigi Rizzo 			ring->slot[i].len = 0;
167117885a7bSLuigi Rizzo 			RD(5, "bad len at slot %d idx %d len %d", i, idx, len);
167268b8534bSLuigi Rizzo 		}
167368b8534bSLuigi Rizzo 	}
167468b8534bSLuigi Rizzo 	if (errors) {
16758241616dSLuigi Rizzo 		RD(10, "total %d errors", errors);
167617885a7bSLuigi Rizzo 		RD(10, "%s reinit, cur %d -> %d tail %d -> %d",
167717885a7bSLuigi Rizzo 			kring->name,
167868b8534bSLuigi Rizzo 			ring->cur, kring->nr_hwcur,
167917885a7bSLuigi Rizzo 			ring->tail, kring->nr_hwtail);
168017885a7bSLuigi Rizzo 		ring->head = kring->rhead = kring->nr_hwcur;
168117885a7bSLuigi Rizzo 		ring->cur  = kring->rcur  = kring->nr_hwcur;
168217885a7bSLuigi Rizzo 		ring->tail = kring->rtail = kring->nr_hwtail;
168368b8534bSLuigi Rizzo 	}
168468b8534bSLuigi Rizzo 	return (errors ? 1 : 0);
168568b8534bSLuigi Rizzo }
168668b8534bSLuigi Rizzo 
16874bf50f18SLuigi Rizzo /* interpret the ringid and flags fields of an nmreq, by translating them
16884bf50f18SLuigi Rizzo  * into a pair of intervals of ring indices:
16894bf50f18SLuigi Rizzo  *
16904bf50f18SLuigi Rizzo  * [priv->np_txqfirst, priv->np_txqlast) and
16914bf50f18SLuigi Rizzo  * [priv->np_rxqfirst, priv->np_rxqlast)
16924bf50f18SLuigi Rizzo  *
169368b8534bSLuigi Rizzo  */
16944bf50f18SLuigi Rizzo int
16954bf50f18SLuigi Rizzo netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
169668b8534bSLuigi Rizzo {
1697f9790aebSLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1698f0ea3689SLuigi Rizzo 	u_int j, i = ringid & NETMAP_RING_MASK;
1699f0ea3689SLuigi Rizzo 	u_int reg = flags & NR_REG_MASK;
170037e3a6d3SLuigi Rizzo 	int excluded_direction[] = { NR_TX_RINGS_ONLY, NR_RX_RINGS_ONLY };
1701847bf383SLuigi Rizzo 	enum txrx t;
170268b8534bSLuigi Rizzo 
1703f0ea3689SLuigi Rizzo 	if (reg == NR_REG_DEFAULT) {
1704f0ea3689SLuigi Rizzo 		/* convert from old ringid to flags */
170568b8534bSLuigi Rizzo 		if (ringid & NETMAP_SW_RING) {
1706f0ea3689SLuigi Rizzo 			reg = NR_REG_SW;
170768b8534bSLuigi Rizzo 		} else if (ringid & NETMAP_HW_RING) {
1708f0ea3689SLuigi Rizzo 			reg = NR_REG_ONE_NIC;
170968b8534bSLuigi Rizzo 		} else {
1710f0ea3689SLuigi Rizzo 			reg = NR_REG_ALL_NIC;
1711f0ea3689SLuigi Rizzo 		}
1712f0ea3689SLuigi Rizzo 		D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
1713f0ea3689SLuigi Rizzo 	}
171437e3a6d3SLuigi Rizzo 
171537e3a6d3SLuigi Rizzo 	if ((flags & NR_PTNETMAP_HOST) && (reg != NR_REG_ALL_NIC ||
171637e3a6d3SLuigi Rizzo 			flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) {
171737e3a6d3SLuigi Rizzo 		D("Error: only NR_REG_ALL_NIC supported with netmap passthrough");
171837e3a6d3SLuigi Rizzo 		return EINVAL;
171937e3a6d3SLuigi Rizzo 	}
172037e3a6d3SLuigi Rizzo 
172137e3a6d3SLuigi Rizzo 	for_rx_tx(t) {
172237e3a6d3SLuigi Rizzo 		if (flags & excluded_direction[t]) {
172337e3a6d3SLuigi Rizzo 			priv->np_qfirst[t] = priv->np_qlast[t] = 0;
172437e3a6d3SLuigi Rizzo 			continue;
172537e3a6d3SLuigi Rizzo 		}
1726f0ea3689SLuigi Rizzo 		switch (reg) {
1727f0ea3689SLuigi Rizzo 		case NR_REG_ALL_NIC:
1728f0ea3689SLuigi Rizzo 		case NR_REG_PIPE_MASTER:
1729f0ea3689SLuigi Rizzo 		case NR_REG_PIPE_SLAVE:
1730847bf383SLuigi Rizzo 			priv->np_qfirst[t] = 0;
1731847bf383SLuigi Rizzo 			priv->np_qlast[t] = nma_get_nrings(na, t);
173237e3a6d3SLuigi Rizzo 			ND("ALL/PIPE: %s %d %d", nm_txrx2str(t),
173337e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1734f0ea3689SLuigi Rizzo 			break;
1735f0ea3689SLuigi Rizzo 		case NR_REG_SW:
1736f0ea3689SLuigi Rizzo 		case NR_REG_NIC_SW:
1737f0ea3689SLuigi Rizzo 			if (!(na->na_flags & NAF_HOST_RINGS)) {
1738f0ea3689SLuigi Rizzo 				D("host rings not supported");
1739f0ea3689SLuigi Rizzo 				return EINVAL;
1740f0ea3689SLuigi Rizzo 			}
1741847bf383SLuigi Rizzo 			priv->np_qfirst[t] = (reg == NR_REG_SW ?
1742847bf383SLuigi Rizzo 				nma_get_nrings(na, t) : 0);
1743847bf383SLuigi Rizzo 			priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
174437e3a6d3SLuigi Rizzo 			ND("%s: %s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
174537e3a6d3SLuigi Rizzo 				nm_txrx2str(t),
174637e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1747f0ea3689SLuigi Rizzo 			break;
1748f0ea3689SLuigi Rizzo 		case NR_REG_ONE_NIC:
1749f0ea3689SLuigi Rizzo 			if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
1750f0ea3689SLuigi Rizzo 				D("invalid ring id %d", i);
1751f0ea3689SLuigi Rizzo 				return EINVAL;
1752f0ea3689SLuigi Rizzo 			}
1753f0ea3689SLuigi Rizzo 			/* if not enough rings, use the first one */
1754f0ea3689SLuigi Rizzo 			j = i;
1755847bf383SLuigi Rizzo 			if (j >= nma_get_nrings(na, t))
1756f0ea3689SLuigi Rizzo 				j = 0;
1757847bf383SLuigi Rizzo 			priv->np_qfirst[t] = j;
1758847bf383SLuigi Rizzo 			priv->np_qlast[t] = j + 1;
175937e3a6d3SLuigi Rizzo 			ND("ONE_NIC: %s %d %d", nm_txrx2str(t),
176037e3a6d3SLuigi Rizzo 				priv->np_qfirst[t], priv->np_qlast[t]);
1761f0ea3689SLuigi Rizzo 			break;
1762f0ea3689SLuigi Rizzo 		default:
1763f0ea3689SLuigi Rizzo 			D("invalid regif type %d", reg);
1764f0ea3689SLuigi Rizzo 			return EINVAL;
176568b8534bSLuigi Rizzo 		}
176637e3a6d3SLuigi Rizzo 	}
1767f0ea3689SLuigi Rizzo 	priv->np_flags = (flags & ~NR_REG_MASK) | reg;
17684bf50f18SLuigi Rizzo 
1769ae10d1afSLuigi Rizzo 	if (netmap_verbose) {
1770f0ea3689SLuigi Rizzo 		D("%s: tx [%d,%d) rx [%d,%d) id %d",
17714bf50f18SLuigi Rizzo 			na->name,
1772847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1773847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1774847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1775847bf383SLuigi Rizzo 			priv->np_qlast[NR_RX],
1776f0ea3689SLuigi Rizzo 			i);
1777ae10d1afSLuigi Rizzo 	}
177868b8534bSLuigi Rizzo 	return 0;
177968b8534bSLuigi Rizzo }
178068b8534bSLuigi Rizzo 
17814bf50f18SLuigi Rizzo 
17824bf50f18SLuigi Rizzo /*
17834bf50f18SLuigi Rizzo  * Set the ring ID. For devices with a single queue, a request
17844bf50f18SLuigi Rizzo  * for all rings is the same as a single ring.
17854bf50f18SLuigi Rizzo  */
17864bf50f18SLuigi Rizzo static int
17874bf50f18SLuigi Rizzo netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
17884bf50f18SLuigi Rizzo {
17894bf50f18SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
17904bf50f18SLuigi Rizzo 	int error;
1791847bf383SLuigi Rizzo 	enum txrx t;
17924bf50f18SLuigi Rizzo 
17934bf50f18SLuigi Rizzo 	error = netmap_interp_ringid(priv, ringid, flags);
17944bf50f18SLuigi Rizzo 	if (error) {
17954bf50f18SLuigi Rizzo 		return error;
17964bf50f18SLuigi Rizzo 	}
17974bf50f18SLuigi Rizzo 
17984bf50f18SLuigi Rizzo 	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
17994bf50f18SLuigi Rizzo 
18004bf50f18SLuigi Rizzo 	/* optimization: count the users registered for more than
18014bf50f18SLuigi Rizzo 	 * one ring, which are the ones sleeping on the global queue.
18024bf50f18SLuigi Rizzo 	 * The default netmap_notify() callback will then
18034bf50f18SLuigi Rizzo 	 * avoid signaling the global queue if nobody is using it
18044bf50f18SLuigi Rizzo 	 */
1805847bf383SLuigi Rizzo 	for_rx_tx(t) {
1806847bf383SLuigi Rizzo 		if (nm_si_user(priv, t))
1807847bf383SLuigi Rizzo 			na->si_users[t]++;
1808847bf383SLuigi Rizzo 	}
18094bf50f18SLuigi Rizzo 	return 0;
18104bf50f18SLuigi Rizzo }
18114bf50f18SLuigi Rizzo 
1812847bf383SLuigi Rizzo static void
1813847bf383SLuigi Rizzo netmap_unset_ringid(struct netmap_priv_d *priv)
1814847bf383SLuigi Rizzo {
1815847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1816847bf383SLuigi Rizzo 	enum txrx t;
1817847bf383SLuigi Rizzo 
1818847bf383SLuigi Rizzo 	for_rx_tx(t) {
1819847bf383SLuigi Rizzo 		if (nm_si_user(priv, t))
1820847bf383SLuigi Rizzo 			na->si_users[t]--;
1821847bf383SLuigi Rizzo 		priv->np_qfirst[t] = priv->np_qlast[t] = 0;
1822847bf383SLuigi Rizzo 	}
1823847bf383SLuigi Rizzo 	priv->np_flags = 0;
1824847bf383SLuigi Rizzo 	priv->np_txpoll = 0;
1825847bf383SLuigi Rizzo }
1826847bf383SLuigi Rizzo 
1827847bf383SLuigi Rizzo 
182837e3a6d3SLuigi Rizzo /* Set the nr_pending_mode for the requested rings.
182937e3a6d3SLuigi Rizzo  * If requested, also try to get exclusive access to the rings, provided
183037e3a6d3SLuigi Rizzo  * the rings we want to bind are not exclusively owned by a previous bind.
1831847bf383SLuigi Rizzo  */
1832847bf383SLuigi Rizzo static int
183337e3a6d3SLuigi Rizzo netmap_krings_get(struct netmap_priv_d *priv)
1834847bf383SLuigi Rizzo {
1835847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1836847bf383SLuigi Rizzo 	u_int i;
1837847bf383SLuigi Rizzo 	struct netmap_kring *kring;
1838847bf383SLuigi Rizzo 	int excl = (priv->np_flags & NR_EXCLUSIVE);
1839847bf383SLuigi Rizzo 	enum txrx t;
1840847bf383SLuigi Rizzo 
1841847bf383SLuigi Rizzo 	ND("%s: grabbing tx [%d, %d) rx [%d, %d)",
1842847bf383SLuigi Rizzo 			na->name,
1843847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1844847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1845847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1846847bf383SLuigi Rizzo 			priv->np_qlast[NR_RX]);
1847847bf383SLuigi Rizzo 
1848847bf383SLuigi Rizzo 	/* first round: check that all the requested rings
1849847bf383SLuigi Rizzo 	 * are neither alread exclusively owned, nor we
1850847bf383SLuigi Rizzo 	 * want exclusive ownership when they are already in use
1851847bf383SLuigi Rizzo 	 */
1852847bf383SLuigi Rizzo 	for_rx_tx(t) {
1853847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1854847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
1855847bf383SLuigi Rizzo 			if ((kring->nr_kflags & NKR_EXCLUSIVE) ||
1856847bf383SLuigi Rizzo 			    (kring->users && excl))
1857847bf383SLuigi Rizzo 			{
1858847bf383SLuigi Rizzo 				ND("ring %s busy", kring->name);
1859847bf383SLuigi Rizzo 				return EBUSY;
1860847bf383SLuigi Rizzo 			}
1861847bf383SLuigi Rizzo 		}
1862847bf383SLuigi Rizzo 	}
1863847bf383SLuigi Rizzo 
186437e3a6d3SLuigi Rizzo 	/* second round: increment usage count (possibly marking them
186537e3a6d3SLuigi Rizzo 	 * as exclusive) and set the nr_pending_mode
1866847bf383SLuigi Rizzo 	 */
1867847bf383SLuigi Rizzo 	for_rx_tx(t) {
1868847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1869847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
1870847bf383SLuigi Rizzo 			kring->users++;
1871847bf383SLuigi Rizzo 			if (excl)
1872847bf383SLuigi Rizzo 				kring->nr_kflags |= NKR_EXCLUSIVE;
187337e3a6d3SLuigi Rizzo 	                kring->nr_pending_mode = NKR_NETMAP_ON;
1874847bf383SLuigi Rizzo 		}
1875847bf383SLuigi Rizzo 	}
1876847bf383SLuigi Rizzo 
1877847bf383SLuigi Rizzo 	return 0;
1878847bf383SLuigi Rizzo 
1879847bf383SLuigi Rizzo }
1880847bf383SLuigi Rizzo 
188137e3a6d3SLuigi Rizzo /* Undo netmap_krings_get(). This is done by clearing the exclusive mode
188237e3a6d3SLuigi Rizzo  * if was asked on regif, and unset the nr_pending_mode if we are the
188337e3a6d3SLuigi Rizzo  * last users of the involved rings. */
1884847bf383SLuigi Rizzo static void
188537e3a6d3SLuigi Rizzo netmap_krings_put(struct netmap_priv_d *priv)
1886847bf383SLuigi Rizzo {
1887847bf383SLuigi Rizzo 	struct netmap_adapter *na = priv->np_na;
1888847bf383SLuigi Rizzo 	u_int i;
1889847bf383SLuigi Rizzo 	struct netmap_kring *kring;
1890847bf383SLuigi Rizzo 	int excl = (priv->np_flags & NR_EXCLUSIVE);
1891847bf383SLuigi Rizzo 	enum txrx t;
1892847bf383SLuigi Rizzo 
1893847bf383SLuigi Rizzo 	ND("%s: releasing tx [%d, %d) rx [%d, %d)",
1894847bf383SLuigi Rizzo 			na->name,
1895847bf383SLuigi Rizzo 			priv->np_qfirst[NR_TX],
1896847bf383SLuigi Rizzo 			priv->np_qlast[NR_TX],
1897847bf383SLuigi Rizzo 			priv->np_qfirst[NR_RX],
1898847bf383SLuigi Rizzo 			priv->np_qlast[MR_RX]);
1899847bf383SLuigi Rizzo 
1900847bf383SLuigi Rizzo 
1901847bf383SLuigi Rizzo 	for_rx_tx(t) {
1902847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
1903847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
1904847bf383SLuigi Rizzo 			if (excl)
1905847bf383SLuigi Rizzo 				kring->nr_kflags &= ~NKR_EXCLUSIVE;
1906847bf383SLuigi Rizzo 			kring->users--;
190737e3a6d3SLuigi Rizzo 			if (kring->users == 0)
190837e3a6d3SLuigi Rizzo 				kring->nr_pending_mode = NKR_NETMAP_OFF;
1909847bf383SLuigi Rizzo 		}
1910847bf383SLuigi Rizzo 	}
1911847bf383SLuigi Rizzo }
1912847bf383SLuigi Rizzo 
1913f18be576SLuigi Rizzo /*
1914f18be576SLuigi Rizzo  * possibly move the interface to netmap-mode.
1915f18be576SLuigi Rizzo  * If success it returns a pointer to netmap_if, otherwise NULL.
1916ce3ee1e7SLuigi Rizzo  * This must be called with NMG_LOCK held.
19174bf50f18SLuigi Rizzo  *
19184bf50f18SLuigi Rizzo  * The following na callbacks are called in the process:
19194bf50f18SLuigi Rizzo  *
19204bf50f18SLuigi Rizzo  * na->nm_config()			[by netmap_update_config]
19214bf50f18SLuigi Rizzo  * (get current number and size of rings)
19224bf50f18SLuigi Rizzo  *
19234bf50f18SLuigi Rizzo  *  	We have a generic one for linux (netmap_linux_config).
19244bf50f18SLuigi Rizzo  *  	The bwrap has to override this, since it has to forward
19254bf50f18SLuigi Rizzo  *  	the request to the wrapped adapter (netmap_bwrap_config).
19264bf50f18SLuigi Rizzo  *
19274bf50f18SLuigi Rizzo  *
1928847bf383SLuigi Rizzo  * na->nm_krings_create()
19294bf50f18SLuigi Rizzo  * (create and init the krings array)
19304bf50f18SLuigi Rizzo  *
19314bf50f18SLuigi Rizzo  * 	One of the following:
19324bf50f18SLuigi Rizzo  *
19334bf50f18SLuigi Rizzo  *	* netmap_hw_krings_create, 			(hw ports)
19344bf50f18SLuigi Rizzo  *		creates the standard layout for the krings
19354bf50f18SLuigi Rizzo  * 		and adds the mbq (used for the host rings).
19364bf50f18SLuigi Rizzo  *
19374bf50f18SLuigi Rizzo  * 	* netmap_vp_krings_create			(VALE ports)
19384bf50f18SLuigi Rizzo  * 		add leases and scratchpads
19394bf50f18SLuigi Rizzo  *
19404bf50f18SLuigi Rizzo  * 	* netmap_pipe_krings_create			(pipes)
19414bf50f18SLuigi Rizzo  * 		create the krings and rings of both ends and
19424bf50f18SLuigi Rizzo  * 		cross-link them
19434bf50f18SLuigi Rizzo  *
19444bf50f18SLuigi Rizzo  *      * netmap_monitor_krings_create 			(monitors)
19454bf50f18SLuigi Rizzo  *      	avoid allocating the mbq
19464bf50f18SLuigi Rizzo  *
19474bf50f18SLuigi Rizzo  *      * netmap_bwrap_krings_create			(bwraps)
19484bf50f18SLuigi Rizzo  *      	create both the brap krings array,
19494bf50f18SLuigi Rizzo  *      	the krings array of the wrapped adapter, and
19504bf50f18SLuigi Rizzo  *      	(if needed) the fake array for the host adapter
19514bf50f18SLuigi Rizzo  *
19524bf50f18SLuigi Rizzo  * na->nm_register(, 1)
19534bf50f18SLuigi Rizzo  * (put the adapter in netmap mode)
19544bf50f18SLuigi Rizzo  *
19554bf50f18SLuigi Rizzo  * 	This may be one of the following:
19564bf50f18SLuigi Rizzo  *
195737e3a6d3SLuigi Rizzo  * 	* netmap_hw_reg				        (hw ports)
19584bf50f18SLuigi Rizzo  * 		checks that the ifp is still there, then calls
19594bf50f18SLuigi Rizzo  * 		the hardware specific callback;
19604bf50f18SLuigi Rizzo  *
19614bf50f18SLuigi Rizzo  * 	* netmap_vp_reg					(VALE ports)
19624bf50f18SLuigi Rizzo  *		If the port is connected to a bridge,
19634bf50f18SLuigi Rizzo  *		set the NAF_NETMAP_ON flag under the
19644bf50f18SLuigi Rizzo  *		bridge write lock.
19654bf50f18SLuigi Rizzo  *
19664bf50f18SLuigi Rizzo  *	* netmap_pipe_reg				(pipes)
19674bf50f18SLuigi Rizzo  *		inform the other pipe end that it is no
1968453130d9SPedro F. Giffuni  *		longer responsible for the lifetime of this
19694bf50f18SLuigi Rizzo  *		pipe end
19704bf50f18SLuigi Rizzo  *
19714bf50f18SLuigi Rizzo  *	* netmap_monitor_reg				(monitors)
19724bf50f18SLuigi Rizzo  *		intercept the sync callbacks of the monitored
19734bf50f18SLuigi Rizzo  *		rings
19744bf50f18SLuigi Rizzo  *
197537e3a6d3SLuigi Rizzo  *	* netmap_bwrap_reg				(bwraps)
19764bf50f18SLuigi Rizzo  *		cross-link the bwrap and hwna rings,
19774bf50f18SLuigi Rizzo  *		forward the request to the hwna, override
19784bf50f18SLuigi Rizzo  *		the hwna notify callback (to get the frames
19794bf50f18SLuigi Rizzo  *		coming from outside go through the bridge).
19804bf50f18SLuigi Rizzo  *
19814bf50f18SLuigi Rizzo  *
1982f18be576SLuigi Rizzo  */
1983847bf383SLuigi Rizzo int
1984f9790aebSLuigi Rizzo netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
1985847bf383SLuigi Rizzo 	uint16_t ringid, uint32_t flags)
1986f18be576SLuigi Rizzo {
1987f18be576SLuigi Rizzo 	struct netmap_if *nifp = NULL;
1988847bf383SLuigi Rizzo 	int error;
1989f18be576SLuigi Rizzo 
1990ce3ee1e7SLuigi Rizzo 	NMG_LOCK_ASSERT();
1991f18be576SLuigi Rizzo 	/* ring configuration may have changed, fetch from the card */
1992f18be576SLuigi Rizzo 	netmap_update_config(na);
1993f9790aebSLuigi Rizzo 	priv->np_na = na;     /* store the reference */
1994f0ea3689SLuigi Rizzo 	error = netmap_set_ringid(priv, ringid, flags);
1995f18be576SLuigi Rizzo 	if (error)
1996847bf383SLuigi Rizzo 		goto err;
1997847bf383SLuigi Rizzo 	error = netmap_mem_finalize(na->nm_mem, na);
1998ce3ee1e7SLuigi Rizzo 	if (error)
1999847bf383SLuigi Rizzo 		goto err;
2000847bf383SLuigi Rizzo 
2001847bf383SLuigi Rizzo 	if (na->active_fds == 0) {
2002847bf383SLuigi Rizzo 		/*
2003847bf383SLuigi Rizzo 		 * If this is the first registration of the adapter,
200437e3a6d3SLuigi Rizzo 		 * create the  in-kernel view of the netmap rings,
2005847bf383SLuigi Rizzo 		 * the netmap krings.
2006847bf383SLuigi Rizzo 		 */
2007847bf383SLuigi Rizzo 
2008847bf383SLuigi Rizzo 		/*
2009847bf383SLuigi Rizzo 		 * Depending on the adapter, this may also create
2010847bf383SLuigi Rizzo 		 * the netmap rings themselves
2011847bf383SLuigi Rizzo 		 */
2012847bf383SLuigi Rizzo 		error = na->nm_krings_create(na);
2013847bf383SLuigi Rizzo 		if (error)
2014847bf383SLuigi Rizzo 			goto err_drop_mem;
2015847bf383SLuigi Rizzo 
2016ce3ee1e7SLuigi Rizzo 	}
2017847bf383SLuigi Rizzo 
201837e3a6d3SLuigi Rizzo 	/* now the krings must exist and we can check whether some
201937e3a6d3SLuigi Rizzo 	 * previous bind has exclusive ownership on them, and set
202037e3a6d3SLuigi Rizzo 	 * nr_pending_mode
2021847bf383SLuigi Rizzo 	 */
202237e3a6d3SLuigi Rizzo 	error = netmap_krings_get(priv);
2023847bf383SLuigi Rizzo 	if (error)
202437e3a6d3SLuigi Rizzo 		goto err_del_krings;
202537e3a6d3SLuigi Rizzo 
202637e3a6d3SLuigi Rizzo 	/* create all needed missing netmap rings */
202737e3a6d3SLuigi Rizzo 	error = netmap_mem_rings_create(na);
202837e3a6d3SLuigi Rizzo 	if (error)
202937e3a6d3SLuigi Rizzo 		goto err_rel_excl;
2030847bf383SLuigi Rizzo 
2031847bf383SLuigi Rizzo 	/* in all cases, create a new netmap if */
2032847bf383SLuigi Rizzo 	nifp = netmap_mem_if_new(na);
2033847bf383SLuigi Rizzo 	if (nifp == NULL) {
2034f18be576SLuigi Rizzo 		error = ENOMEM;
203537e3a6d3SLuigi Rizzo 		goto err_del_rings;
2036ce3ee1e7SLuigi Rizzo 	}
2037847bf383SLuigi Rizzo 
203837e3a6d3SLuigi Rizzo 	if (na->active_fds == 0) {
203989cc2556SLuigi Rizzo 		/* cache the allocator info in the na */
204037e3a6d3SLuigi Rizzo 		error = netmap_mem_get_lut(na->nm_mem, &na->na_lut);
2041847bf383SLuigi Rizzo 		if (error)
2042847bf383SLuigi Rizzo 			goto err_del_if;
204337e3a6d3SLuigi Rizzo 		ND("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal,
204437e3a6d3SLuigi Rizzo 					    na->na_lut.objsize);
2045f18be576SLuigi Rizzo 	}
2046847bf383SLuigi Rizzo 
204737e3a6d3SLuigi Rizzo 	if (nm_kring_pending(priv)) {
204837e3a6d3SLuigi Rizzo 		/* Some kring is switching mode, tell the adapter to
204937e3a6d3SLuigi Rizzo 		 * react on this. */
205037e3a6d3SLuigi Rizzo 		error = na->nm_register(na, 1);
205137e3a6d3SLuigi Rizzo 		if (error)
205237e3a6d3SLuigi Rizzo 			goto err_put_lut;
205337e3a6d3SLuigi Rizzo 	}
205437e3a6d3SLuigi Rizzo 
205537e3a6d3SLuigi Rizzo 	/* Commit the reference. */
205637e3a6d3SLuigi Rizzo 	na->active_fds++;
205737e3a6d3SLuigi Rizzo 
2058ce3ee1e7SLuigi Rizzo 	/*
2059847bf383SLuigi Rizzo 	 * advertise that the interface is ready by setting np_nifp.
2060847bf383SLuigi Rizzo 	 * The barrier is needed because readers (poll, *SYNC and mmap)
2061ce3ee1e7SLuigi Rizzo 	 * check for priv->np_nifp != NULL without locking
2062ce3ee1e7SLuigi Rizzo 	 */
2063847bf383SLuigi Rizzo 	mb(); /* make sure previous writes are visible to all CPUs */
2064ce3ee1e7SLuigi Rizzo 	priv->np_nifp = nifp;
2065847bf383SLuigi Rizzo 
2066847bf383SLuigi Rizzo 	return 0;
2067847bf383SLuigi Rizzo 
206837e3a6d3SLuigi Rizzo err_put_lut:
206937e3a6d3SLuigi Rizzo 	if (na->active_fds == 0)
2070847bf383SLuigi Rizzo 		memset(&na->na_lut, 0, sizeof(na->na_lut));
207137e3a6d3SLuigi Rizzo err_del_if:
2072847bf383SLuigi Rizzo 	netmap_mem_if_delete(na, nifp);
2073847bf383SLuigi Rizzo err_rel_excl:
207437e3a6d3SLuigi Rizzo 	netmap_krings_put(priv);
2075847bf383SLuigi Rizzo err_del_rings:
2076847bf383SLuigi Rizzo 	netmap_mem_rings_delete(na);
2077847bf383SLuigi Rizzo err_del_krings:
2078847bf383SLuigi Rizzo 	if (na->active_fds == 0)
2079847bf383SLuigi Rizzo 		na->nm_krings_delete(na);
2080847bf383SLuigi Rizzo err_drop_mem:
2081847bf383SLuigi Rizzo 	netmap_mem_deref(na->nm_mem, na);
2082847bf383SLuigi Rizzo err:
2083847bf383SLuigi Rizzo 	priv->np_na = NULL;
2084847bf383SLuigi Rizzo 	return error;
2085ce3ee1e7SLuigi Rizzo }
2086847bf383SLuigi Rizzo 
2087847bf383SLuigi Rizzo 
2088847bf383SLuigi Rizzo /*
208937e3a6d3SLuigi Rizzo  * update kring and ring at the end of rxsync/txsync.
2090847bf383SLuigi Rizzo  */
2091847bf383SLuigi Rizzo static inline void
209237e3a6d3SLuigi Rizzo nm_sync_finalize(struct netmap_kring *kring)
2093847bf383SLuigi Rizzo {
209437e3a6d3SLuigi Rizzo 	/*
209537e3a6d3SLuigi Rizzo 	 * Update ring tail to what the kernel knows
209637e3a6d3SLuigi Rizzo 	 * After txsync: head/rhead/hwcur might be behind cur/rcur
209737e3a6d3SLuigi Rizzo 	 * if no carrier.
209837e3a6d3SLuigi Rizzo 	 */
2099847bf383SLuigi Rizzo 	kring->ring->tail = kring->rtail = kring->nr_hwtail;
2100847bf383SLuigi Rizzo 
2101847bf383SLuigi Rizzo 	ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
2102847bf383SLuigi Rizzo 		kring->name, kring->nr_hwcur, kring->nr_hwtail,
2103847bf383SLuigi Rizzo 		kring->rhead, kring->rcur, kring->rtail);
2104847bf383SLuigi Rizzo }
2105847bf383SLuigi Rizzo 
210668b8534bSLuigi Rizzo /*
210768b8534bSLuigi Rizzo  * ioctl(2) support for the "netmap" device.
210868b8534bSLuigi Rizzo  *
210968b8534bSLuigi Rizzo  * Following a list of accepted commands:
211068b8534bSLuigi Rizzo  * - NIOCGINFO
211168b8534bSLuigi Rizzo  * - SIOCGIFADDR	just for convenience
211268b8534bSLuigi Rizzo  * - NIOCREGIF
211368b8534bSLuigi Rizzo  * - NIOCTXSYNC
211468b8534bSLuigi Rizzo  * - NIOCRXSYNC
211568b8534bSLuigi Rizzo  *
211668b8534bSLuigi Rizzo  * Return 0 on success, errno otherwise.
211768b8534bSLuigi Rizzo  */
2118f9790aebSLuigi Rizzo int
211937e3a6d3SLuigi Rizzo netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *td)
212068b8534bSLuigi Rizzo {
212168b8534bSLuigi Rizzo 	struct nmreq *nmr = (struct nmreq *) data;
2122ce3ee1e7SLuigi Rizzo 	struct netmap_adapter *na = NULL;
212337e3a6d3SLuigi Rizzo 	struct ifnet *ifp = NULL;
212437e3a6d3SLuigi Rizzo 	int error = 0;
2125f0ea3689SLuigi Rizzo 	u_int i, qfirst, qlast;
212668b8534bSLuigi Rizzo 	struct netmap_if *nifp;
2127ce3ee1e7SLuigi Rizzo 	struct netmap_kring *krings;
2128847bf383SLuigi Rizzo 	enum txrx t;
212968b8534bSLuigi Rizzo 
213017885a7bSLuigi Rizzo 	if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
213117885a7bSLuigi Rizzo 		/* truncate name */
213217885a7bSLuigi Rizzo 		nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';
213317885a7bSLuigi Rizzo 		if (nmr->nr_version != NETMAP_API) {
213417885a7bSLuigi Rizzo 			D("API mismatch for %s got %d need %d",
213517885a7bSLuigi Rizzo 				nmr->nr_name,
213617885a7bSLuigi Rizzo 				nmr->nr_version, NETMAP_API);
213717885a7bSLuigi Rizzo 			nmr->nr_version = NETMAP_API;
2138f0ea3689SLuigi Rizzo 		}
2139f0ea3689SLuigi Rizzo 		if (nmr->nr_version < NETMAP_MIN_API ||
2140f0ea3689SLuigi Rizzo 		    nmr->nr_version > NETMAP_MAX_API) {
214117885a7bSLuigi Rizzo 			return EINVAL;
214217885a7bSLuigi Rizzo 		}
214317885a7bSLuigi Rizzo 	}
214468b8534bSLuigi Rizzo 
214568b8534bSLuigi Rizzo 	switch (cmd) {
214668b8534bSLuigi Rizzo 	case NIOCGINFO:		/* return capabilities etc */
2147f18be576SLuigi Rizzo 		if (nmr->nr_cmd == NETMAP_BDG_LIST) {
2148f18be576SLuigi Rizzo 			error = netmap_bdg_ctl(nmr, NULL);
2149f18be576SLuigi Rizzo 			break;
2150f18be576SLuigi Rizzo 		}
2151ce3ee1e7SLuigi Rizzo 
2152ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2153ce3ee1e7SLuigi Rizzo 		do {
2154ce3ee1e7SLuigi Rizzo 			/* memsize is always valid */
2155ce3ee1e7SLuigi Rizzo 			struct netmap_mem_d *nmd = &nm_mem;
2156ce3ee1e7SLuigi Rizzo 			u_int memflags;
2157ce3ee1e7SLuigi Rizzo 
2158ce3ee1e7SLuigi Rizzo 			if (nmr->nr_name[0] != '\0') {
215937e3a6d3SLuigi Rizzo 
2160ce3ee1e7SLuigi Rizzo 				/* get a refcount */
216137e3a6d3SLuigi Rizzo 				error = netmap_get_na(nmr, &na, &ifp, 1 /* create */);
216237e3a6d3SLuigi Rizzo 				if (error) {
216337e3a6d3SLuigi Rizzo 					na = NULL;
216437e3a6d3SLuigi Rizzo 					ifp = NULL;
21658241616dSLuigi Rizzo 					break;
216637e3a6d3SLuigi Rizzo 				}
2167f9790aebSLuigi Rizzo 				nmd = na->nm_mem; /* get memory allocator */
2168ce3ee1e7SLuigi Rizzo 			}
2169ce3ee1e7SLuigi Rizzo 
2170f0ea3689SLuigi Rizzo 			error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags,
2171f0ea3689SLuigi Rizzo 				&nmr->nr_arg2);
2172ce3ee1e7SLuigi Rizzo 			if (error)
2173ce3ee1e7SLuigi Rizzo 				break;
2174ce3ee1e7SLuigi Rizzo 			if (na == NULL) /* only memory info */
2175ce3ee1e7SLuigi Rizzo 				break;
21768241616dSLuigi Rizzo 			nmr->nr_offset = 0;
21778241616dSLuigi Rizzo 			nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
2178ae10d1afSLuigi Rizzo 			netmap_update_config(na);
2179d76bf4ffSLuigi Rizzo 			nmr->nr_rx_rings = na->num_rx_rings;
2180d76bf4ffSLuigi Rizzo 			nmr->nr_tx_rings = na->num_tx_rings;
218164ae02c3SLuigi Rizzo 			nmr->nr_rx_slots = na->num_rx_desc;
218264ae02c3SLuigi Rizzo 			nmr->nr_tx_slots = na->num_tx_desc;
2183ce3ee1e7SLuigi Rizzo 		} while (0);
218437e3a6d3SLuigi Rizzo 		netmap_unget_na(na, ifp);
2185ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
218668b8534bSLuigi Rizzo 		break;
218768b8534bSLuigi Rizzo 
218868b8534bSLuigi Rizzo 	case NIOCREGIF:
2189f18be576SLuigi Rizzo 		/* possibly attach/detach NIC and VALE switch */
2190f18be576SLuigi Rizzo 		i = nmr->nr_cmd;
2191f9790aebSLuigi Rizzo 		if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
21924bf50f18SLuigi Rizzo 				|| i == NETMAP_BDG_VNET_HDR
21934bf50f18SLuigi Rizzo 				|| i == NETMAP_BDG_NEWIF
219437e3a6d3SLuigi Rizzo 				|| i == NETMAP_BDG_DELIF
219537e3a6d3SLuigi Rizzo 				|| i == NETMAP_BDG_POLLING_ON
219637e3a6d3SLuigi Rizzo 				|| i == NETMAP_BDG_POLLING_OFF) {
2197f18be576SLuigi Rizzo 			error = netmap_bdg_ctl(nmr, NULL);
2198f18be576SLuigi Rizzo 			break;
219937e3a6d3SLuigi Rizzo 		} else if (i == NETMAP_PT_HOST_CREATE || i == NETMAP_PT_HOST_DELETE) {
220037e3a6d3SLuigi Rizzo 			error = ptnetmap_ctl(nmr, priv->np_na);
220137e3a6d3SLuigi Rizzo 			break;
220237e3a6d3SLuigi Rizzo 		} else if (i == NETMAP_VNET_HDR_GET) {
220337e3a6d3SLuigi Rizzo 			struct ifnet *ifp;
220437e3a6d3SLuigi Rizzo 
220537e3a6d3SLuigi Rizzo 			NMG_LOCK();
220637e3a6d3SLuigi Rizzo 			error = netmap_get_na(nmr, &na, &ifp, 0);
220737e3a6d3SLuigi Rizzo 			if (na && !error) {
220837e3a6d3SLuigi Rizzo 				nmr->nr_arg1 = na->virt_hdr_len;
220937e3a6d3SLuigi Rizzo 			}
221037e3a6d3SLuigi Rizzo 			netmap_unget_na(na, ifp);
221137e3a6d3SLuigi Rizzo 			NMG_UNLOCK();
221237e3a6d3SLuigi Rizzo 			break;
2213f18be576SLuigi Rizzo 		} else if (i != 0) {
2214f18be576SLuigi Rizzo 			D("nr_cmd must be 0 not %d", i);
2215f18be576SLuigi Rizzo 			error = EINVAL;
2216f18be576SLuigi Rizzo 			break;
2217f18be576SLuigi Rizzo 		}
2218f18be576SLuigi Rizzo 
22198241616dSLuigi Rizzo 		/* protect access to priv from concurrent NIOCREGIF */
2220ce3ee1e7SLuigi Rizzo 		NMG_LOCK();
2221ce3ee1e7SLuigi Rizzo 		do {
2222ce3ee1e7SLuigi Rizzo 			u_int memflags;
222337e3a6d3SLuigi Rizzo 			struct ifnet *ifp;
2224ce3ee1e7SLuigi Rizzo 
2225847bf383SLuigi Rizzo 			if (priv->np_nifp != NULL) {	/* thread already registered */
2226f0ea3689SLuigi Rizzo 				error = EBUSY;
2227506cc70cSLuigi Rizzo 				break;
2228506cc70cSLuigi Rizzo 			}
222968b8534bSLuigi Rizzo 			/* find the interface and a reference */
223037e3a6d3SLuigi Rizzo 			error = netmap_get_na(nmr, &na, &ifp,
223137e3a6d3SLuigi Rizzo 					      1 /* create */); /* keep reference */
223268b8534bSLuigi Rizzo 			if (error)
2233ce3ee1e7SLuigi Rizzo 				break;
2234f9790aebSLuigi Rizzo 			if (NETMAP_OWNED_BY_KERN(na)) {
223537e3a6d3SLuigi Rizzo 				netmap_unget_na(na, ifp);
2236ce3ee1e7SLuigi Rizzo 				error = EBUSY;
2237ce3ee1e7SLuigi Rizzo 				break;
2238f196ce38SLuigi Rizzo 			}
223937e3a6d3SLuigi Rizzo 
224037e3a6d3SLuigi Rizzo 			if (na->virt_hdr_len && !(nmr->nr_flags & NR_ACCEPT_VNET_HDR)) {
224137e3a6d3SLuigi Rizzo 				netmap_unget_na(na, ifp);
224237e3a6d3SLuigi Rizzo 				error = EIO;
224337e3a6d3SLuigi Rizzo 				break;
224437e3a6d3SLuigi Rizzo 			}
224537e3a6d3SLuigi Rizzo 
2246847bf383SLuigi Rizzo 			error = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags);
2247847bf383SLuigi Rizzo 			if (error) {    /* reg. failed, release priv and ref */
224837e3a6d3SLuigi Rizzo 				netmap_unget_na(na, ifp);
2249ce3ee1e7SLuigi Rizzo 				break;
225068b8534bSLuigi Rizzo 			}
2251847bf383SLuigi Rizzo 			nifp = priv->np_nifp;
2252f0ea3689SLuigi Rizzo 			priv->np_td = td; // XXX kqueue, debugging only
225368b8534bSLuigi Rizzo 
225468b8534bSLuigi Rizzo 			/* return the offset of the netmap_if object */
2255d76bf4ffSLuigi Rizzo 			nmr->nr_rx_rings = na->num_rx_rings;
2256d76bf4ffSLuigi Rizzo 			nmr->nr_tx_rings = na->num_tx_rings;
225764ae02c3SLuigi Rizzo 			nmr->nr_rx_slots = na->num_rx_desc;
225864ae02c3SLuigi Rizzo 			nmr->nr_tx_slots = na->num_tx_desc;
2259f0ea3689SLuigi Rizzo 			error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags,
2260f0ea3689SLuigi Rizzo 				&nmr->nr_arg2);
2261ce3ee1e7SLuigi Rizzo 			if (error) {
2262847bf383SLuigi Rizzo 				netmap_do_unregif(priv);
226337e3a6d3SLuigi Rizzo 				netmap_unget_na(na, ifp);
2264ce3ee1e7SLuigi Rizzo 				break;
2265ce3ee1e7SLuigi Rizzo 			}
2266ce3ee1e7SLuigi Rizzo 			if (memflags & NETMAP_MEM_PRIVATE) {
22673d819cb6SLuigi Rizzo 				*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
2268ce3ee1e7SLuigi Rizzo 			}
2269847bf383SLuigi Rizzo 			for_rx_tx(t) {
2270847bf383SLuigi Rizzo 				priv->np_si[t] = nm_si_user(priv, t) ?
2271847bf383SLuigi Rizzo 					&na->si[t] : &NMR(na, t)[priv->np_qfirst[t]].si;
2272847bf383SLuigi Rizzo 			}
2273f0ea3689SLuigi Rizzo 
2274f0ea3689SLuigi Rizzo 			if (nmr->nr_arg3) {
227537e3a6d3SLuigi Rizzo 				if (netmap_verbose)
2276f0ea3689SLuigi Rizzo 					D("requested %d extra buffers", nmr->nr_arg3);
2277f0ea3689SLuigi Rizzo 				nmr->nr_arg3 = netmap_extra_alloc(na,
2278f0ea3689SLuigi Rizzo 					&nifp->ni_bufs_head, nmr->nr_arg3);
227937e3a6d3SLuigi Rizzo 				if (netmap_verbose)
2280f0ea3689SLuigi Rizzo 					D("got %d extra buffers", nmr->nr_arg3);
2281f0ea3689SLuigi Rizzo 			}
2282ce3ee1e7SLuigi Rizzo 			nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
228337e3a6d3SLuigi Rizzo 
228437e3a6d3SLuigi Rizzo 			/* store ifp reference so that priv destructor may release it */
228537e3a6d3SLuigi Rizzo 			priv->np_ifp = ifp;
2286ce3ee1e7SLuigi Rizzo 		} while (0);
2287ce3ee1e7SLuigi Rizzo 		NMG_UNLOCK();
228868b8534bSLuigi Rizzo 		break;
228968b8534bSLuigi Rizzo 
229068b8534bSLuigi Rizzo 	case NIOCTXSYNC:
229168b8534bSLuigi Rizzo 	case NIOCRXSYNC:
22928241616dSLuigi Rizzo 		nifp = priv->np_nifp;
22938241616dSLuigi Rizzo 
22948241616dSLuigi Rizzo 		if (nifp == NULL) {
2295506cc70cSLuigi Rizzo 			error = ENXIO;
2296506cc70cSLuigi Rizzo 			break;
2297506cc70cSLuigi Rizzo 		}
22986641c68bSLuigi Rizzo 		mb(); /* make sure following reads are not from cache */
22998241616dSLuigi Rizzo 
2300f9790aebSLuigi Rizzo 		na = priv->np_na;      /* we have a reference */
23018241616dSLuigi Rizzo 
2302f9790aebSLuigi Rizzo 		if (na == NULL) {
2303f9790aebSLuigi Rizzo 			D("Internal error: nifp != NULL && na == NULL");
23048241616dSLuigi Rizzo 			error = ENXIO;
23058241616dSLuigi Rizzo 			break;
23068241616dSLuigi Rizzo 		}
23078241616dSLuigi Rizzo 
2308847bf383SLuigi Rizzo 		t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
2309847bf383SLuigi Rizzo 		krings = NMR(na, t);
2310847bf383SLuigi Rizzo 		qfirst = priv->np_qfirst[t];
2311847bf383SLuigi Rizzo 		qlast = priv->np_qlast[t];
231268b8534bSLuigi Rizzo 
2313f0ea3689SLuigi Rizzo 		for (i = qfirst; i < qlast; i++) {
2314ce3ee1e7SLuigi Rizzo 			struct netmap_kring *kring = krings + i;
231537e3a6d3SLuigi Rizzo 			struct netmap_ring *ring = kring->ring;
231637e3a6d3SLuigi Rizzo 
231737e3a6d3SLuigi Rizzo 			if (unlikely(nm_kr_tryget(kring, 1, &error))) {
231837e3a6d3SLuigi Rizzo 				error = (error ? EIO : 0);
231937e3a6d3SLuigi Rizzo 				continue;
2320ce3ee1e7SLuigi Rizzo 			}
232137e3a6d3SLuigi Rizzo 
232268b8534bSLuigi Rizzo 			if (cmd == NIOCTXSYNC) {
232368b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
23243c0caf6cSLuigi Rizzo 					D("pre txsync ring %d cur %d hwcur %d",
232537e3a6d3SLuigi Rizzo 					    i, ring->cur,
232668b8534bSLuigi Rizzo 					    kring->nr_hwcur);
232737e3a6d3SLuigi Rizzo 				if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
232817885a7bSLuigi Rizzo 					netmap_ring_reinit(kring);
2329847bf383SLuigi Rizzo 				} else if (kring->nm_sync(kring, NAF_FORCE_RECLAIM) == 0) {
233037e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
233117885a7bSLuigi Rizzo 				}
233268b8534bSLuigi Rizzo 				if (netmap_verbose & NM_VERB_TXSYNC)
23333c0caf6cSLuigi Rizzo 					D("post txsync ring %d cur %d hwcur %d",
233437e3a6d3SLuigi Rizzo 					    i, ring->cur,
233568b8534bSLuigi Rizzo 					    kring->nr_hwcur);
233668b8534bSLuigi Rizzo 			} else {
233737e3a6d3SLuigi Rizzo 				if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
2338847bf383SLuigi Rizzo 					netmap_ring_reinit(kring);
2339847bf383SLuigi Rizzo 				} else if (kring->nm_sync(kring, NAF_FORCE_READ) == 0) {
234037e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
2341847bf383SLuigi Rizzo 				}
234237e3a6d3SLuigi Rizzo 				microtime(&ring->ts);
234368b8534bSLuigi Rizzo 			}
2344ce3ee1e7SLuigi Rizzo 			nm_kr_put(kring);
234568b8534bSLuigi Rizzo 		}
234668b8534bSLuigi Rizzo 
234768b8534bSLuigi Rizzo 		break;
234868b8534bSLuigi Rizzo 
2349847bf383SLuigi Rizzo #ifdef WITH_VALE
23504bf50f18SLuigi Rizzo 	case NIOCCONFIG:
23514bf50f18SLuigi Rizzo 		error = netmap_bdg_config(nmr);
23524bf50f18SLuigi Rizzo 		break;
2353847bf383SLuigi Rizzo #endif
2354f196ce38SLuigi Rizzo #ifdef __FreeBSD__
235589e3fd52SLuigi Rizzo 	case FIONBIO:
235689e3fd52SLuigi Rizzo 	case FIOASYNC:
235789e3fd52SLuigi Rizzo 		ND("FIONBIO/FIOASYNC are no-ops");
235889e3fd52SLuigi Rizzo 		break;
235989e3fd52SLuigi Rizzo 
236068b8534bSLuigi Rizzo 	case BIOCIMMEDIATE:
236168b8534bSLuigi Rizzo 	case BIOCGHDRCMPLT:
236268b8534bSLuigi Rizzo 	case BIOCSHDRCMPLT:
236368b8534bSLuigi Rizzo 	case BIOCSSEESENT:
236468b8534bSLuigi Rizzo 		D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
236568b8534bSLuigi Rizzo 		break;
236668b8534bSLuigi Rizzo 
2367babc7c12SLuigi Rizzo 	default:	/* allow device-specific ioctls */
236868b8534bSLuigi Rizzo 	    {
2369b3d37588SLuigi Rizzo 		struct ifnet *ifp = ifunit_ref(nmr->nr_name);
2370b3d37588SLuigi Rizzo 		if (ifp == NULL) {
2371b3d37588SLuigi Rizzo 			error = ENXIO;
2372b3d37588SLuigi Rizzo 		} else {
237368b8534bSLuigi Rizzo 			struct socket so;
2374ce3ee1e7SLuigi Rizzo 
237568b8534bSLuigi Rizzo 			bzero(&so, sizeof(so));
237668b8534bSLuigi Rizzo 			so.so_vnet = ifp->if_vnet;
237768b8534bSLuigi Rizzo 			// so->so_proto not null.
237868b8534bSLuigi Rizzo 			error = ifioctl(&so, cmd, data, td);
2379b3d37588SLuigi Rizzo 			if_rele(ifp);
2380b3d37588SLuigi Rizzo 		}
2381babc7c12SLuigi Rizzo 		break;
238268b8534bSLuigi Rizzo 	    }
2383f196ce38SLuigi Rizzo 
2384f196ce38SLuigi Rizzo #else /* linux */
2385f196ce38SLuigi Rizzo 	default:
2386f196ce38SLuigi Rizzo 		error = EOPNOTSUPP;
2387f196ce38SLuigi Rizzo #endif /* linux */
238868b8534bSLuigi Rizzo 	}
238968b8534bSLuigi Rizzo 
239068b8534bSLuigi Rizzo 	return (error);
239168b8534bSLuigi Rizzo }
239268b8534bSLuigi Rizzo 
239368b8534bSLuigi Rizzo 
239468b8534bSLuigi Rizzo /*
239568b8534bSLuigi Rizzo  * select(2) and poll(2) handlers for the "netmap" device.
239668b8534bSLuigi Rizzo  *
239768b8534bSLuigi Rizzo  * Can be called for one or more queues.
239868b8534bSLuigi Rizzo  * Return true the event mask corresponding to ready events.
239968b8534bSLuigi Rizzo  * If there are no ready events, do a selrecord on either individual
2400ce3ee1e7SLuigi Rizzo  * selinfo or on the global one.
240168b8534bSLuigi Rizzo  * Device-dependent parts (locking and sync of tx/rx rings)
240268b8534bSLuigi Rizzo  * are done through callbacks.
2403f196ce38SLuigi Rizzo  *
240401c7d25fSLuigi Rizzo  * On linux, arguments are really pwait, the poll table, and 'td' is struct file *
240501c7d25fSLuigi Rizzo  * The first one is remapped to pwait as selrecord() uses the name as an
240601c7d25fSLuigi Rizzo  * hidden argument.
240768b8534bSLuigi Rizzo  */
2408f9790aebSLuigi Rizzo int
240937e3a6d3SLuigi Rizzo netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
241068b8534bSLuigi Rizzo {
241168b8534bSLuigi Rizzo 	struct netmap_adapter *na;
241268b8534bSLuigi Rizzo 	struct netmap_kring *kring;
241337e3a6d3SLuigi Rizzo 	struct netmap_ring *ring;
2414847bf383SLuigi Rizzo 	u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0;
2415847bf383SLuigi Rizzo #define want_tx want[NR_TX]
2416847bf383SLuigi Rizzo #define want_rx want[NR_RX]
241717885a7bSLuigi Rizzo 	struct mbq q;		/* packets from hw queues to host stack */
2418847bf383SLuigi Rizzo 	enum txrx t;
241901c7d25fSLuigi Rizzo 
2420f9790aebSLuigi Rizzo 	/*
2421f9790aebSLuigi Rizzo 	 * In order to avoid nested locks, we need to "double check"
2422f9790aebSLuigi Rizzo 	 * txsync and rxsync if we decide to do a selrecord().
2423f9790aebSLuigi Rizzo 	 * retry_tx (and retry_rx, later) prevent looping forever.
2424f9790aebSLuigi Rizzo 	 */
242517885a7bSLuigi Rizzo 	int retry_tx = 1, retry_rx = 1;
2426ce3ee1e7SLuigi Rizzo 
242737e3a6d3SLuigi Rizzo 	/* transparent mode: send_down is 1 if we have found some
242837e3a6d3SLuigi Rizzo 	 * packets to forward during the rx scan and we have not
242937e3a6d3SLuigi Rizzo 	 * sent them down to the nic yet
2430f0ea3689SLuigi Rizzo 	 */
243137e3a6d3SLuigi Rizzo 	int send_down = 0;
243237e3a6d3SLuigi Rizzo 
243337e3a6d3SLuigi Rizzo 	mbq_init(&q);
243468b8534bSLuigi Rizzo 
24358241616dSLuigi Rizzo 	if (priv->np_nifp == NULL) {
24368241616dSLuigi Rizzo 		D("No if registered");
24378241616dSLuigi Rizzo 		return POLLERR;
24388241616dSLuigi Rizzo 	}
2439847bf383SLuigi Rizzo 	mb(); /* make sure following reads are not from cache */
24408241616dSLuigi Rizzo 
2441f9790aebSLuigi Rizzo 	na = priv->np_na;
2442f9790aebSLuigi Rizzo 
24434bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
244468b8534bSLuigi Rizzo 		return POLLERR;
244568b8534bSLuigi Rizzo 
244668b8534bSLuigi Rizzo 	if (netmap_verbose & 0x8000)
24474bf50f18SLuigi Rizzo 		D("device %s events 0x%x", na->name, events);
244868b8534bSLuigi Rizzo 	want_tx = events & (POLLOUT | POLLWRNORM);
244968b8534bSLuigi Rizzo 	want_rx = events & (POLLIN | POLLRDNORM);
245068b8534bSLuigi Rizzo 
245168b8534bSLuigi Rizzo 	/*
2452f9790aebSLuigi Rizzo 	 * check_all_{tx|rx} are set if the card has more than one queue AND
2453f9790aebSLuigi Rizzo 	 * the file descriptor is bound to all of them. If so, we sleep on
2454ce3ee1e7SLuigi Rizzo 	 * the "global" selinfo, otherwise we sleep on individual selinfo
2455ce3ee1e7SLuigi Rizzo 	 * (FreeBSD only allows two selinfo's per file descriptor).
2456ce3ee1e7SLuigi Rizzo 	 * The interrupt routine in the driver wake one or the other
2457ce3ee1e7SLuigi Rizzo 	 * (or both) depending on which clients are active.
245868b8534bSLuigi Rizzo 	 *
245968b8534bSLuigi Rizzo 	 * rxsync() is only called if we run out of buffers on a POLLIN.
246068b8534bSLuigi Rizzo 	 * txsync() is called if we run out of buffers on POLLOUT, or
246168b8534bSLuigi Rizzo 	 * there are pending packets to send. The latter can be disabled
246268b8534bSLuigi Rizzo 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
246368b8534bSLuigi Rizzo 	 */
2464847bf383SLuigi Rizzo 	check_all_tx = nm_si_user(priv, NR_TX);
2465847bf383SLuigi Rizzo 	check_all_rx = nm_si_user(priv, NR_RX);
246664ae02c3SLuigi Rizzo 
246768b8534bSLuigi Rizzo 	/*
2468f9790aebSLuigi Rizzo 	 * We start with a lock free round which is cheap if we have
2469f9790aebSLuigi Rizzo 	 * slots available. If this fails, then lock and call the sync
247068b8534bSLuigi Rizzo 	 * routines.
247168b8534bSLuigi Rizzo 	 */
247237e3a6d3SLuigi Rizzo #if 1 /* new code- call rx if any of the ring needs to release or read buffers */
247337e3a6d3SLuigi Rizzo 	if (want_tx) {
247437e3a6d3SLuigi Rizzo 		t = NR_TX;
247537e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
247637e3a6d3SLuigi Rizzo 			kring = &NMR(na, t)[i];
247737e3a6d3SLuigi Rizzo 			/* XXX compare ring->cur and kring->tail */
247837e3a6d3SLuigi Rizzo 			if (!nm_ring_empty(kring->ring)) {
247937e3a6d3SLuigi Rizzo 				revents |= want[t];
248037e3a6d3SLuigi Rizzo 				want[t] = 0;	/* also breaks the loop */
248137e3a6d3SLuigi Rizzo 			}
248237e3a6d3SLuigi Rizzo 		}
248337e3a6d3SLuigi Rizzo 	}
248437e3a6d3SLuigi Rizzo 	if (want_rx) {
248537e3a6d3SLuigi Rizzo 		want_rx = 0; /* look for a reason to run the handlers */
248637e3a6d3SLuigi Rizzo 		t = NR_RX;
248737e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
248837e3a6d3SLuigi Rizzo 			kring = &NMR(na, t)[i];
248937e3a6d3SLuigi Rizzo 			if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */
249037e3a6d3SLuigi Rizzo 			    || kring->rhead != kring->ring->head /* release buffers */) {
249137e3a6d3SLuigi Rizzo 				want_rx = 1;
249237e3a6d3SLuigi Rizzo 			}
249337e3a6d3SLuigi Rizzo 		}
249437e3a6d3SLuigi Rizzo 		if (!want_rx)
249537e3a6d3SLuigi Rizzo 			revents |= events & (POLLIN | POLLRDNORM); /* we have data */
249637e3a6d3SLuigi Rizzo 	}
249737e3a6d3SLuigi Rizzo #else /* old code */
2498847bf383SLuigi Rizzo 	for_rx_tx(t) {
2499847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
2500847bf383SLuigi Rizzo 			kring = &NMR(na, t)[i];
250117885a7bSLuigi Rizzo 			/* XXX compare ring->cur and kring->tail */
250217885a7bSLuigi Rizzo 			if (!nm_ring_empty(kring->ring)) {
2503847bf383SLuigi Rizzo 				revents |= want[t];
2504847bf383SLuigi Rizzo 				want[t] = 0;	/* also breaks the loop */
250568b8534bSLuigi Rizzo 			}
250668b8534bSLuigi Rizzo 		}
250768b8534bSLuigi Rizzo 	}
250837e3a6d3SLuigi Rizzo #endif /* old code */
250968b8534bSLuigi Rizzo 
251068b8534bSLuigi Rizzo 	/*
251117885a7bSLuigi Rizzo 	 * If we want to push packets out (priv->np_txpoll) or
251217885a7bSLuigi Rizzo 	 * want_tx is still set, we must issue txsync calls
251317885a7bSLuigi Rizzo 	 * (on all rings, to avoid that the tx rings stall).
2514f9790aebSLuigi Rizzo 	 * XXX should also check cur != hwcur on the tx rings.
2515f9790aebSLuigi Rizzo 	 * Fortunately, normal tx mode has np_txpoll set.
251668b8534bSLuigi Rizzo 	 */
251768b8534bSLuigi Rizzo 	if (priv->np_txpoll || want_tx) {
251817885a7bSLuigi Rizzo 		/*
251917885a7bSLuigi Rizzo 		 * The first round checks if anyone is ready, if not
252017885a7bSLuigi Rizzo 		 * do a selrecord and another round to handle races.
252117885a7bSLuigi Rizzo 		 * want_tx goes to 0 if any space is found, and is
252217885a7bSLuigi Rizzo 		 * used to skip rings with no pending transmissions.
2523ce3ee1e7SLuigi Rizzo 		 */
2524091fd0abSLuigi Rizzo flush_tx:
252537e3a6d3SLuigi Rizzo 		for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_TX]; i++) {
252617885a7bSLuigi Rizzo 			int found = 0;
252717885a7bSLuigi Rizzo 
252868b8534bSLuigi Rizzo 			kring = &na->tx_rings[i];
252937e3a6d3SLuigi Rizzo 			ring = kring->ring;
253037e3a6d3SLuigi Rizzo 
253137e3a6d3SLuigi Rizzo 			if (!send_down && !want_tx && ring->cur == kring->nr_hwcur)
253268b8534bSLuigi Rizzo 				continue;
253337e3a6d3SLuigi Rizzo 
253437e3a6d3SLuigi Rizzo 			if (nm_kr_tryget(kring, 1, &revents))
253517885a7bSLuigi Rizzo 				continue;
253637e3a6d3SLuigi Rizzo 
253737e3a6d3SLuigi Rizzo 			if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
253817885a7bSLuigi Rizzo 				netmap_ring_reinit(kring);
253917885a7bSLuigi Rizzo 				revents |= POLLERR;
254017885a7bSLuigi Rizzo 			} else {
2541f0ea3689SLuigi Rizzo 				if (kring->nm_sync(kring, 0))
254268b8534bSLuigi Rizzo 					revents |= POLLERR;
2543847bf383SLuigi Rizzo 				else
254437e3a6d3SLuigi Rizzo 					nm_sync_finalize(kring);
254517885a7bSLuigi Rizzo 			}
254668b8534bSLuigi Rizzo 
254717885a7bSLuigi Rizzo 			/*
254817885a7bSLuigi Rizzo 			 * If we found new slots, notify potential
254917885a7bSLuigi Rizzo 			 * listeners on the same ring.
255017885a7bSLuigi Rizzo 			 * Since we just did a txsync, look at the copies
255117885a7bSLuigi Rizzo 			 * of cur,tail in the kring.
2552f9790aebSLuigi Rizzo 			 */
255317885a7bSLuigi Rizzo 			found = kring->rcur != kring->rtail;
255417885a7bSLuigi Rizzo 			nm_kr_put(kring);
255517885a7bSLuigi Rizzo 			if (found) { /* notify other listeners */
255668b8534bSLuigi Rizzo 				revents |= want_tx;
255768b8534bSLuigi Rizzo 				want_tx = 0;
2558847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
255968b8534bSLuigi Rizzo 			}
2560ce3ee1e7SLuigi Rizzo 		}
256137e3a6d3SLuigi Rizzo 		/* if there were any packet to forward we must have handled them by now */
256237e3a6d3SLuigi Rizzo 		send_down = 0;
256337e3a6d3SLuigi Rizzo 		if (want_tx && retry_tx && sr) {
256437e3a6d3SLuigi Rizzo 			nm_os_selrecord(sr, check_all_tx ?
2565847bf383SLuigi Rizzo 			    &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
2566ce3ee1e7SLuigi Rizzo 			retry_tx = 0;
2567ce3ee1e7SLuigi Rizzo 			goto flush_tx;
256868b8534bSLuigi Rizzo 		}
256968b8534bSLuigi Rizzo 	}
257068b8534bSLuigi Rizzo 
257168b8534bSLuigi Rizzo 	/*
257217885a7bSLuigi Rizzo 	 * If want_rx is still set scan receive rings.
257368b8534bSLuigi Rizzo 	 * Do it on all rings because otherwise we starve.
257468b8534bSLuigi Rizzo 	 */
257568b8534bSLuigi Rizzo 	if (want_rx) {
257689cc2556SLuigi Rizzo 		/* two rounds here for race avoidance */
2577ce3ee1e7SLuigi Rizzo do_retry_rx:
2578847bf383SLuigi Rizzo 		for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) {
257917885a7bSLuigi Rizzo 			int found = 0;
258017885a7bSLuigi Rizzo 
258168b8534bSLuigi Rizzo 			kring = &na->rx_rings[i];
258237e3a6d3SLuigi Rizzo 			ring = kring->ring;
2583ce3ee1e7SLuigi Rizzo 
258437e3a6d3SLuigi Rizzo 			if (unlikely(nm_kr_tryget(kring, 1, &revents)))
258517885a7bSLuigi Rizzo 				continue;
2586ce3ee1e7SLuigi Rizzo 
258737e3a6d3SLuigi Rizzo 			if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
2588847bf383SLuigi Rizzo 				netmap_ring_reinit(kring);
2589847bf383SLuigi Rizzo 				revents |= POLLERR;
2590847bf383SLuigi Rizzo 			}
2591847bf383SLuigi Rizzo 			/* now we can use kring->rcur, rtail */
2592847bf383SLuigi Rizzo 
259317885a7bSLuigi Rizzo 			/*
259417885a7bSLuigi Rizzo 			 * transparent mode support: collect packets
259517885a7bSLuigi Rizzo 			 * from the rxring(s).
2596ce3ee1e7SLuigi Rizzo 			 */
259737e3a6d3SLuigi Rizzo 			if (nm_may_forward_up(kring)) {
2598091fd0abSLuigi Rizzo 				ND(10, "forwarding some buffers up %d to %d",
259937e3a6d3SLuigi Rizzo 				    kring->nr_hwcur, ring->cur);
2600091fd0abSLuigi Rizzo 				netmap_grab_packets(kring, &q, netmap_fwd);
2601091fd0abSLuigi Rizzo 			}
260268b8534bSLuigi Rizzo 
260337e3a6d3SLuigi Rizzo 			kring->nr_kflags &= ~NR_FORWARD;
2604f0ea3689SLuigi Rizzo 			if (kring->nm_sync(kring, 0))
260568b8534bSLuigi Rizzo 				revents |= POLLERR;
2606847bf383SLuigi Rizzo 			else
260737e3a6d3SLuigi Rizzo 				nm_sync_finalize(kring);
260837e3a6d3SLuigi Rizzo 			send_down |= (kring->nr_kflags & NR_FORWARD); /* host ring only */
26095819da83SLuigi Rizzo 			if (netmap_no_timestamp == 0 ||
261037e3a6d3SLuigi Rizzo 					ring->flags & NR_TIMESTAMP) {
261137e3a6d3SLuigi Rizzo 				microtime(&ring->ts);
26125819da83SLuigi Rizzo 			}
261317885a7bSLuigi Rizzo 			found = kring->rcur != kring->rtail;
261417885a7bSLuigi Rizzo 			nm_kr_put(kring);
261517885a7bSLuigi Rizzo 			if (found) {
261668b8534bSLuigi Rizzo 				revents |= want_rx;
2617ce3ee1e7SLuigi Rizzo 				retry_rx = 0;
2618847bf383SLuigi Rizzo 				kring->nm_notify(kring, 0);
261968b8534bSLuigi Rizzo 			}
262068b8534bSLuigi Rizzo 		}
262117885a7bSLuigi Rizzo 
262237e3a6d3SLuigi Rizzo 		if (retry_rx && sr) {
262337e3a6d3SLuigi Rizzo 			nm_os_selrecord(sr, check_all_rx ?
2624847bf383SLuigi Rizzo 			    &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
262537e3a6d3SLuigi Rizzo 		}
262617885a7bSLuigi Rizzo 		if (send_down > 0 || retry_rx) {
262717885a7bSLuigi Rizzo 			retry_rx = 0;
262817885a7bSLuigi Rizzo 			if (send_down)
262917885a7bSLuigi Rizzo 				goto flush_tx; /* and retry_rx */
263017885a7bSLuigi Rizzo 			else
2631ce3ee1e7SLuigi Rizzo 				goto do_retry_rx;
2632ce3ee1e7SLuigi Rizzo 		}
263368b8534bSLuigi Rizzo 	}
2634091fd0abSLuigi Rizzo 
263517885a7bSLuigi Rizzo 	/*
263617885a7bSLuigi Rizzo 	 * Transparent mode: marked bufs on rx rings between
263717885a7bSLuigi Rizzo 	 * kring->nr_hwcur and ring->head
263817885a7bSLuigi Rizzo 	 * are passed to the other endpoint.
263917885a7bSLuigi Rizzo 	 *
264037e3a6d3SLuigi Rizzo 	 * Transparent mode requires to bind all
264117885a7bSLuigi Rizzo  	 * rings to a single file descriptor.
2642ce3ee1e7SLuigi Rizzo 	 */
2643091fd0abSLuigi Rizzo 
264437e3a6d3SLuigi Rizzo 	if (q.head && !nm_kr_tryget(&na->tx_rings[na->num_tx_rings], 1, &revents)) {
2645f9790aebSLuigi Rizzo 		netmap_send_up(na->ifp, &q);
264637e3a6d3SLuigi Rizzo 		nm_kr_put(&na->tx_rings[na->num_tx_rings]);
264737e3a6d3SLuigi Rizzo 	}
264868b8534bSLuigi Rizzo 
264968b8534bSLuigi Rizzo 	return (revents);
2650847bf383SLuigi Rizzo #undef want_tx
2651847bf383SLuigi Rizzo #undef want_rx
265268b8534bSLuigi Rizzo }
265368b8534bSLuigi Rizzo 
265417885a7bSLuigi Rizzo 
265517885a7bSLuigi Rizzo /*-------------------- driver support routines -------------------*/
265668b8534bSLuigi Rizzo 
265789cc2556SLuigi Rizzo /* default notify callback */
2658f9790aebSLuigi Rizzo static int
2659847bf383SLuigi Rizzo netmap_notify(struct netmap_kring *kring, int flags)
2660f9790aebSLuigi Rizzo {
2661847bf383SLuigi Rizzo 	struct netmap_adapter *na = kring->na;
2662847bf383SLuigi Rizzo 	enum txrx t = kring->tx;
2663f9790aebSLuigi Rizzo 
266437e3a6d3SLuigi Rizzo 	nm_os_selwakeup(&kring->si);
266589cc2556SLuigi Rizzo 	/* optimization: avoid a wake up on the global
266689cc2556SLuigi Rizzo 	 * queue if nobody has registered for more
266789cc2556SLuigi Rizzo 	 * than one ring
266889cc2556SLuigi Rizzo 	 */
2669847bf383SLuigi Rizzo 	if (na->si_users[t] > 0)
267037e3a6d3SLuigi Rizzo 		nm_os_selwakeup(&na->si[t]);
2671847bf383SLuigi Rizzo 
267237e3a6d3SLuigi Rizzo 	return NM_IRQ_COMPLETED;
2673f9790aebSLuigi Rizzo }
2674f9790aebSLuigi Rizzo 
267537e3a6d3SLuigi Rizzo #if 0
267637e3a6d3SLuigi Rizzo static int
267737e3a6d3SLuigi Rizzo netmap_notify(struct netmap_adapter *na, u_int n_ring,
267837e3a6d3SLuigi Rizzo enum txrx tx, int flags)
267937e3a6d3SLuigi Rizzo {
268037e3a6d3SLuigi Rizzo 	if (tx == NR_TX) {
268137e3a6d3SLuigi Rizzo 		KeSetEvent(notes->TX_EVENT, 0, FALSE);
268237e3a6d3SLuigi Rizzo 	}
268337e3a6d3SLuigi Rizzo 	else
268437e3a6d3SLuigi Rizzo 	{
268537e3a6d3SLuigi Rizzo 		KeSetEvent(notes->RX_EVENT, 0, FALSE);
268637e3a6d3SLuigi Rizzo 	}
268737e3a6d3SLuigi Rizzo 	return 0;
268837e3a6d3SLuigi Rizzo }
268937e3a6d3SLuigi Rizzo #endif
2690f9790aebSLuigi Rizzo 
269189cc2556SLuigi Rizzo /* called by all routines that create netmap_adapters.
269237e3a6d3SLuigi Rizzo  * provide some defaults and get a reference to the
269337e3a6d3SLuigi Rizzo  * memory allocator
269489cc2556SLuigi Rizzo  */
2695f9790aebSLuigi Rizzo int
2696f9790aebSLuigi Rizzo netmap_attach_common(struct netmap_adapter *na)
2697f9790aebSLuigi Rizzo {
2698f9790aebSLuigi Rizzo 	if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
2699f9790aebSLuigi Rizzo 		D("%s: invalid rings tx %d rx %d",
27004bf50f18SLuigi Rizzo 			na->name, na->num_tx_rings, na->num_rx_rings);
2701f9790aebSLuigi Rizzo 		return EINVAL;
2702f9790aebSLuigi Rizzo 	}
270317885a7bSLuigi Rizzo 
270417885a7bSLuigi Rizzo #ifdef __FreeBSD__
270537e3a6d3SLuigi Rizzo 	if (na->na_flags & NAF_HOST_RINGS && na->ifp) {
270637e3a6d3SLuigi Rizzo 		na->if_input = na->ifp->if_input; /* for netmap_send_up */
27074bf50f18SLuigi Rizzo 	}
270837e3a6d3SLuigi Rizzo #endif /* __FreeBSD__ */
2709f9790aebSLuigi Rizzo 	if (na->nm_krings_create == NULL) {
271089cc2556SLuigi Rizzo 		/* we assume that we have been called by a driver,
271189cc2556SLuigi Rizzo 		 * since other port types all provide their own
271289cc2556SLuigi Rizzo 		 * nm_krings_create
271389cc2556SLuigi Rizzo 		 */
2714f9790aebSLuigi Rizzo 		na->nm_krings_create = netmap_hw_krings_create;
271517885a7bSLuigi Rizzo 		na->nm_krings_delete = netmap_hw_krings_delete;
2716f9790aebSLuigi Rizzo 	}
2717f9790aebSLuigi Rizzo 	if (na->nm_notify == NULL)
2718f9790aebSLuigi Rizzo 		na->nm_notify = netmap_notify;
2719f9790aebSLuigi Rizzo 	na->active_fds = 0;
2720f9790aebSLuigi Rizzo 
2721f9790aebSLuigi Rizzo 	if (na->nm_mem == NULL)
27224bf50f18SLuigi Rizzo 		/* use the global allocator */
2723f9790aebSLuigi Rizzo 		na->nm_mem = &nm_mem;
2724847bf383SLuigi Rizzo 	netmap_mem_get(na->nm_mem);
2725847bf383SLuigi Rizzo #ifdef WITH_VALE
27264bf50f18SLuigi Rizzo 	if (na->nm_bdg_attach == NULL)
27274bf50f18SLuigi Rizzo 		/* no special nm_bdg_attach callback. On VALE
27284bf50f18SLuigi Rizzo 		 * attach, we need to interpose a bwrap
27294bf50f18SLuigi Rizzo 		 */
27304bf50f18SLuigi Rizzo 		na->nm_bdg_attach = netmap_bwrap_attach;
2731847bf383SLuigi Rizzo #endif
273237e3a6d3SLuigi Rizzo 
2733f9790aebSLuigi Rizzo 	return 0;
2734f9790aebSLuigi Rizzo }
2735f9790aebSLuigi Rizzo 
2736f9790aebSLuigi Rizzo 
273789cc2556SLuigi Rizzo /* standard cleanup, called by all destructors */
2738f9790aebSLuigi Rizzo void
2739f9790aebSLuigi Rizzo netmap_detach_common(struct netmap_adapter *na)
2740f9790aebSLuigi Rizzo {
2741f9790aebSLuigi Rizzo 	if (na->tx_rings) { /* XXX should not happen */
2742f9790aebSLuigi Rizzo 		D("freeing leftover tx_rings");
2743f9790aebSLuigi Rizzo 		na->nm_krings_delete(na);
2744f9790aebSLuigi Rizzo 	}
2745f0ea3689SLuigi Rizzo 	netmap_pipe_dealloc(na);
2746847bf383SLuigi Rizzo 	if (na->nm_mem)
2747847bf383SLuigi Rizzo 		netmap_mem_put(na->nm_mem);
2748f9790aebSLuigi Rizzo 	bzero(na, sizeof(*na));
2749f9790aebSLuigi Rizzo 	free(na, M_DEVBUF);
2750f9790aebSLuigi Rizzo }
2751f9790aebSLuigi Rizzo 
275237e3a6d3SLuigi Rizzo /* Wrapper for the register callback provided netmap-enabled
275337e3a6d3SLuigi Rizzo  * hardware drivers.
275437e3a6d3SLuigi Rizzo  * nm_iszombie(na) means that the driver module has been
27554bf50f18SLuigi Rizzo  * unloaded, so we cannot call into it.
275637e3a6d3SLuigi Rizzo  * nm_os_ifnet_lock() must guarantee mutual exclusion with
275737e3a6d3SLuigi Rizzo  * module unloading.
27584bf50f18SLuigi Rizzo  */
27594bf50f18SLuigi Rizzo static int
276037e3a6d3SLuigi Rizzo netmap_hw_reg(struct netmap_adapter *na, int onoff)
27614bf50f18SLuigi Rizzo {
27624bf50f18SLuigi Rizzo 	struct netmap_hw_adapter *hwna =
27634bf50f18SLuigi Rizzo 		(struct netmap_hw_adapter*)na;
276437e3a6d3SLuigi Rizzo 	int error = 0;
27654bf50f18SLuigi Rizzo 
276637e3a6d3SLuigi Rizzo 	nm_os_ifnet_lock();
27674bf50f18SLuigi Rizzo 
276837e3a6d3SLuigi Rizzo 	if (nm_iszombie(na)) {
276937e3a6d3SLuigi Rizzo 		if (onoff) {
277037e3a6d3SLuigi Rizzo 			error = ENXIO;
277137e3a6d3SLuigi Rizzo 		} else if (na != NULL) {
277237e3a6d3SLuigi Rizzo 			na->na_flags &= ~NAF_NETMAP_ON;
277337e3a6d3SLuigi Rizzo 		}
277437e3a6d3SLuigi Rizzo 		goto out;
277537e3a6d3SLuigi Rizzo 	}
277637e3a6d3SLuigi Rizzo 
277737e3a6d3SLuigi Rizzo 	error = hwna->nm_hw_register(na, onoff);
277837e3a6d3SLuigi Rizzo 
277937e3a6d3SLuigi Rizzo out:
278037e3a6d3SLuigi Rizzo 	nm_os_ifnet_unlock();
278137e3a6d3SLuigi Rizzo 
278237e3a6d3SLuigi Rizzo 	return error;
278337e3a6d3SLuigi Rizzo }
278437e3a6d3SLuigi Rizzo 
278537e3a6d3SLuigi Rizzo static void
278637e3a6d3SLuigi Rizzo netmap_hw_dtor(struct netmap_adapter *na)
278737e3a6d3SLuigi Rizzo {
278837e3a6d3SLuigi Rizzo 	if (nm_iszombie(na) || na->ifp == NULL)
278937e3a6d3SLuigi Rizzo 		return;
279037e3a6d3SLuigi Rizzo 
279137e3a6d3SLuigi Rizzo 	WNA(na->ifp) = NULL;
27924bf50f18SLuigi Rizzo }
27934bf50f18SLuigi Rizzo 
2794f18be576SLuigi Rizzo 
279568b8534bSLuigi Rizzo /*
279637e3a6d3SLuigi Rizzo  * Allocate a ``netmap_adapter`` object, and initialize it from the
279737e3a6d3SLuigi Rizzo  * 'arg' passed by the driver on attach.
279868b8534bSLuigi Rizzo  * We allocate a block of memory with room for a struct netmap_adapter
279968b8534bSLuigi Rizzo  * plus two sets of N+2 struct netmap_kring (where N is the number
280068b8534bSLuigi Rizzo  * of hardware rings):
280168b8534bSLuigi Rizzo  * krings	0..N-1	are for the hardware queues.
280268b8534bSLuigi Rizzo  * kring	N	is for the host stack queue
280317885a7bSLuigi Rizzo  * kring	N+1	is only used for the selinfo for all queues. // XXX still true ?
280468b8534bSLuigi Rizzo  * Return 0 on success, ENOMEM otherwise.
280568b8534bSLuigi Rizzo  */
280637e3a6d3SLuigi Rizzo static int
280737e3a6d3SLuigi Rizzo _netmap_attach(struct netmap_adapter *arg, size_t size)
280868b8534bSLuigi Rizzo {
2809f9790aebSLuigi Rizzo 	struct netmap_hw_adapter *hwna = NULL;
281037e3a6d3SLuigi Rizzo 	struct ifnet *ifp = NULL;
281168b8534bSLuigi Rizzo 
281237e3a6d3SLuigi Rizzo 	if (arg == NULL || arg->ifp == NULL)
2813ae10d1afSLuigi Rizzo 		goto fail;
281437e3a6d3SLuigi Rizzo 	ifp = arg->ifp;
281537e3a6d3SLuigi Rizzo 	hwna = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
2816f9790aebSLuigi Rizzo 	if (hwna == NULL)
2817ae10d1afSLuigi Rizzo 		goto fail;
2818f9790aebSLuigi Rizzo 	hwna->up = *arg;
2819847bf383SLuigi Rizzo 	hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
28204bf50f18SLuigi Rizzo 	strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
28214bf50f18SLuigi Rizzo 	hwna->nm_hw_register = hwna->up.nm_register;
282237e3a6d3SLuigi Rizzo 	hwna->up.nm_register = netmap_hw_reg;
2823f9790aebSLuigi Rizzo 	if (netmap_attach_common(&hwna->up)) {
2824f9790aebSLuigi Rizzo 		free(hwna, M_DEVBUF);
2825f9790aebSLuigi Rizzo 		goto fail;
2826f9790aebSLuigi Rizzo 	}
2827f9790aebSLuigi Rizzo 	netmap_adapter_get(&hwna->up);
2828f9790aebSLuigi Rizzo 
282937e3a6d3SLuigi Rizzo 	NM_ATTACH_NA(ifp, &hwna->up);
283037e3a6d3SLuigi Rizzo 
283164ae02c3SLuigi Rizzo #ifdef linux
2832f18be576SLuigi Rizzo 	if (ifp->netdev_ops) {
2833f18be576SLuigi Rizzo 		/* prepare a clone of the netdev ops */
2834847bf383SLuigi Rizzo #ifndef NETMAP_LINUX_HAVE_NETDEV_OPS
2835f9790aebSLuigi Rizzo 		hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
2836f18be576SLuigi Rizzo #else
2837f9790aebSLuigi Rizzo 		hwna->nm_ndo = *ifp->netdev_ops;
283837e3a6d3SLuigi Rizzo #endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
2839f18be576SLuigi Rizzo 	}
2840f9790aebSLuigi Rizzo 	hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
28414bf50f18SLuigi Rizzo 	if (ifp->ethtool_ops) {
28424bf50f18SLuigi Rizzo 		hwna->nm_eto = *ifp->ethtool_ops;
28434bf50f18SLuigi Rizzo 	}
28444bf50f18SLuigi Rizzo 	hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
2845847bf383SLuigi Rizzo #ifdef NETMAP_LINUX_HAVE_SET_CHANNELS
28464bf50f18SLuigi Rizzo 	hwna->nm_eto.set_channels = linux_netmap_set_channels;
284737e3a6d3SLuigi Rizzo #endif /* NETMAP_LINUX_HAVE_SET_CHANNELS */
28484bf50f18SLuigi Rizzo 	if (arg->nm_config == NULL) {
28494bf50f18SLuigi Rizzo 		hwna->up.nm_config = netmap_linux_config;
28504bf50f18SLuigi Rizzo 	}
2851ce3ee1e7SLuigi Rizzo #endif /* linux */
285237e3a6d3SLuigi Rizzo 	if (arg->nm_dtor == NULL) {
285337e3a6d3SLuigi Rizzo 		hwna->up.nm_dtor = netmap_hw_dtor;
285437e3a6d3SLuigi Rizzo 	}
2855f9790aebSLuigi Rizzo 
2856d82f9014SRui Paulo 	if_printf(ifp, "netmap queues/slots: TX %d/%d, RX %d/%d\n",
2857d82f9014SRui Paulo 	    hwna->up.num_tx_rings, hwna->up.num_tx_desc,
2858d82f9014SRui Paulo 	    hwna->up.num_rx_rings, hwna->up.num_rx_desc);
2859ae10d1afSLuigi Rizzo 	return 0;
286068b8534bSLuigi Rizzo 
2861ae10d1afSLuigi Rizzo fail:
2862f9790aebSLuigi Rizzo 	D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
2863f9790aebSLuigi Rizzo 	return (hwna ? EINVAL : ENOMEM);
286468b8534bSLuigi Rizzo }
286568b8534bSLuigi Rizzo 
286668b8534bSLuigi Rizzo 
286737e3a6d3SLuigi Rizzo int
286837e3a6d3SLuigi Rizzo netmap_attach(struct netmap_adapter *arg)
286937e3a6d3SLuigi Rizzo {
287037e3a6d3SLuigi Rizzo 	return _netmap_attach(arg, sizeof(struct netmap_hw_adapter));
287137e3a6d3SLuigi Rizzo }
287237e3a6d3SLuigi Rizzo 
287337e3a6d3SLuigi Rizzo 
287437e3a6d3SLuigi Rizzo #ifdef WITH_PTNETMAP_GUEST
287537e3a6d3SLuigi Rizzo int
287637e3a6d3SLuigi Rizzo netmap_pt_guest_attach(struct netmap_adapter *arg,
287737e3a6d3SLuigi Rizzo 		       void *csb,
287837e3a6d3SLuigi Rizzo 		       unsigned int nifp_offset,
287937e3a6d3SLuigi Rizzo 		       nm_pt_guest_ptctl_t ptctl)
288037e3a6d3SLuigi Rizzo {
288137e3a6d3SLuigi Rizzo 	struct netmap_pt_guest_adapter *ptna;
288237e3a6d3SLuigi Rizzo 	struct ifnet *ifp = arg ? arg->ifp : NULL;
288337e3a6d3SLuigi Rizzo 	int error;
288437e3a6d3SLuigi Rizzo 
288537e3a6d3SLuigi Rizzo 	/* get allocator */
288637e3a6d3SLuigi Rizzo 	arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, ptctl);
288737e3a6d3SLuigi Rizzo 	if (arg->nm_mem == NULL)
288837e3a6d3SLuigi Rizzo 		return ENOMEM;
288937e3a6d3SLuigi Rizzo 	arg->na_flags |= NAF_MEM_OWNER;
289037e3a6d3SLuigi Rizzo 	error = _netmap_attach(arg, sizeof(struct netmap_pt_guest_adapter));
289137e3a6d3SLuigi Rizzo 	if (error)
289237e3a6d3SLuigi Rizzo 		return error;
289337e3a6d3SLuigi Rizzo 
289437e3a6d3SLuigi Rizzo 	/* get the netmap_pt_guest_adapter */
289537e3a6d3SLuigi Rizzo 	ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
289637e3a6d3SLuigi Rizzo 	ptna->csb = csb;
289737e3a6d3SLuigi Rizzo 
289837e3a6d3SLuigi Rizzo 	/* Initialize a separate pass-through netmap adapter that is going to
289937e3a6d3SLuigi Rizzo 	 * be used by the ptnet driver only, and so never exposed to netmap
290037e3a6d3SLuigi Rizzo          * applications. We only need a subset of the available fields. */
290137e3a6d3SLuigi Rizzo 	memset(&ptna->dr, 0, sizeof(ptna->dr));
290237e3a6d3SLuigi Rizzo 	ptna->dr.up.ifp = ifp;
290337e3a6d3SLuigi Rizzo 	ptna->dr.up.nm_mem = ptna->hwup.up.nm_mem;
290437e3a6d3SLuigi Rizzo 	netmap_mem_get(ptna->dr.up.nm_mem);
290537e3a6d3SLuigi Rizzo         ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
290637e3a6d3SLuigi Rizzo 
290737e3a6d3SLuigi Rizzo 	ptna->backend_regifs = 0;
290837e3a6d3SLuigi Rizzo 
290937e3a6d3SLuigi Rizzo 	return 0;
291037e3a6d3SLuigi Rizzo }
291137e3a6d3SLuigi Rizzo #endif /* WITH_PTNETMAP_GUEST */
291237e3a6d3SLuigi Rizzo 
291337e3a6d3SLuigi Rizzo 
2914f9790aebSLuigi Rizzo void
2915f9790aebSLuigi Rizzo NM_DBG(netmap_adapter_get)(struct netmap_adapter *na)
2916f9790aebSLuigi Rizzo {
2917f9790aebSLuigi Rizzo 	if (!na) {
2918f9790aebSLuigi Rizzo 		return;
2919f9790aebSLuigi Rizzo 	}
2920f9790aebSLuigi Rizzo 
2921f9790aebSLuigi Rizzo 	refcount_acquire(&na->na_refcount);
2922f9790aebSLuigi Rizzo }
2923f9790aebSLuigi Rizzo 
2924f9790aebSLuigi Rizzo 
2925f9790aebSLuigi Rizzo /* returns 1 iff the netmap_adapter is destroyed */
2926f9790aebSLuigi Rizzo int
2927f9790aebSLuigi Rizzo NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
2928f9790aebSLuigi Rizzo {
2929f9790aebSLuigi Rizzo 	if (!na)
2930f9790aebSLuigi Rizzo 		return 1;
2931f9790aebSLuigi Rizzo 
2932f9790aebSLuigi Rizzo 	if (!refcount_release(&na->na_refcount))
2933f9790aebSLuigi Rizzo 		return 0;
2934f9790aebSLuigi Rizzo 
2935f9790aebSLuigi Rizzo 	if (na->nm_dtor)
2936f9790aebSLuigi Rizzo 		na->nm_dtor(na);
2937f9790aebSLuigi Rizzo 
2938f9790aebSLuigi Rizzo 	netmap_detach_common(na);
2939f9790aebSLuigi Rizzo 
2940f9790aebSLuigi Rizzo 	return 1;
2941f9790aebSLuigi Rizzo }
2942f9790aebSLuigi Rizzo 
294389cc2556SLuigi Rizzo /* nm_krings_create callback for all hardware native adapters */
2944f9790aebSLuigi Rizzo int
2945f9790aebSLuigi Rizzo netmap_hw_krings_create(struct netmap_adapter *na)
2946f9790aebSLuigi Rizzo {
2947f0ea3689SLuigi Rizzo 	int ret = netmap_krings_create(na, 0);
294817885a7bSLuigi Rizzo 	if (ret == 0) {
294917885a7bSLuigi Rizzo 		/* initialize the mbq for the sw rx ring */
295017885a7bSLuigi Rizzo 		mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue);
295117885a7bSLuigi Rizzo 		ND("initialized sw rx queue %d", na->num_rx_rings);
295217885a7bSLuigi Rizzo 	}
295317885a7bSLuigi Rizzo 	return ret;
2954f9790aebSLuigi Rizzo }
2955f9790aebSLuigi Rizzo 
2956f9790aebSLuigi Rizzo 
2957f9790aebSLuigi Rizzo 
295868b8534bSLuigi Rizzo /*
295989cc2556SLuigi Rizzo  * Called on module unload by the netmap-enabled drivers
296068b8534bSLuigi Rizzo  */
296168b8534bSLuigi Rizzo void
296268b8534bSLuigi Rizzo netmap_detach(struct ifnet *ifp)
296368b8534bSLuigi Rizzo {
296468b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
296568b8534bSLuigi Rizzo 
296668b8534bSLuigi Rizzo 	if (!na)
296768b8534bSLuigi Rizzo 		return;
296868b8534bSLuigi Rizzo 
2969f9790aebSLuigi Rizzo 	NMG_LOCK();
297037e3a6d3SLuigi Rizzo 	netmap_set_all_rings(na, NM_KR_LOCKED);
297137e3a6d3SLuigi Rizzo 	na->na_flags |= NAF_ZOMBIE;
2972847bf383SLuigi Rizzo 	/*
2973847bf383SLuigi Rizzo 	 * if the netmap adapter is not native, somebody
2974847bf383SLuigi Rizzo 	 * changed it, so we can not release it here.
297537e3a6d3SLuigi Rizzo 	 * The NAF_ZOMBIE flag will notify the new owner that
2976847bf383SLuigi Rizzo 	 * the driver is gone.
2977847bf383SLuigi Rizzo 	 */
2978847bf383SLuigi Rizzo 	if (na->na_flags & NAF_NATIVE) {
297937e3a6d3SLuigi Rizzo 	        netmap_adapter_put(na);
2980847bf383SLuigi Rizzo 	}
298137e3a6d3SLuigi Rizzo 	/* give active users a chance to notice that NAF_ZOMBIE has been
298237e3a6d3SLuigi Rizzo 	 * turned on, so that they can stop and return an error to userspace.
298337e3a6d3SLuigi Rizzo 	 * Note that this becomes a NOP if there are no active users and,
298437e3a6d3SLuigi Rizzo 	 * therefore, the put() above has deleted the na, since now NA(ifp) is
298537e3a6d3SLuigi Rizzo 	 * NULL.
298637e3a6d3SLuigi Rizzo 	 */
2987f9790aebSLuigi Rizzo 	netmap_enable_all_rings(ifp);
2988f9790aebSLuigi Rizzo 	NMG_UNLOCK();
2989ae10d1afSLuigi Rizzo }
2990f18be576SLuigi Rizzo 
2991f18be576SLuigi Rizzo 
299268b8534bSLuigi Rizzo /*
299302ad4083SLuigi Rizzo  * Intercept packets from the network stack and pass them
299402ad4083SLuigi Rizzo  * to netmap as incoming packets on the 'software' ring.
299517885a7bSLuigi Rizzo  *
299617885a7bSLuigi Rizzo  * We only store packets in a bounded mbq and then copy them
299717885a7bSLuigi Rizzo  * in the relevant rxsync routine.
299817885a7bSLuigi Rizzo  *
2999ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that the ifp and na do not go
3000ce3ee1e7SLuigi Rizzo  * away (typically the caller checks for IFF_DRV_RUNNING or the like).
3001ce3ee1e7SLuigi Rizzo  * In nm_register() or whenever there is a reinitialization,
3002f9790aebSLuigi Rizzo  * we make sure to make the mode change visible here.
300368b8534bSLuigi Rizzo  */
300468b8534bSLuigi Rizzo int
3005ce3ee1e7SLuigi Rizzo netmap_transmit(struct ifnet *ifp, struct mbuf *m)
300668b8534bSLuigi Rizzo {
300768b8534bSLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
300837e3a6d3SLuigi Rizzo 	struct netmap_kring *kring, *tx_kring;
300917885a7bSLuigi Rizzo 	u_int len = MBUF_LEN(m);
301017885a7bSLuigi Rizzo 	u_int error = ENOBUFS;
301137e3a6d3SLuigi Rizzo 	unsigned int txr;
301217885a7bSLuigi Rizzo 	struct mbq *q;
301317885a7bSLuigi Rizzo 	int space;
301468b8534bSLuigi Rizzo 
3015847bf383SLuigi Rizzo 	kring = &na->rx_rings[na->num_rx_rings];
3016ce3ee1e7SLuigi Rizzo 	// XXX [Linux] we do not need this lock
3017ce3ee1e7SLuigi Rizzo 	// if we follow the down/configure/up protocol -gl
3018ce3ee1e7SLuigi Rizzo 	// mtx_lock(&na->core_lock);
301917885a7bSLuigi Rizzo 
30204bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na)) {
30214bf50f18SLuigi Rizzo 		D("%s not in netmap mode anymore", na->name);
3022ce3ee1e7SLuigi Rizzo 		error = ENXIO;
3023ce3ee1e7SLuigi Rizzo 		goto done;
3024ce3ee1e7SLuigi Rizzo 	}
3025ce3ee1e7SLuigi Rizzo 
302637e3a6d3SLuigi Rizzo 	txr = MBUF_TXQ(m);
302737e3a6d3SLuigi Rizzo 	if (txr >= na->num_tx_rings) {
302837e3a6d3SLuigi Rizzo 		txr %= na->num_tx_rings;
302937e3a6d3SLuigi Rizzo 	}
303037e3a6d3SLuigi Rizzo 	tx_kring = &NMR(na, NR_TX)[txr];
303137e3a6d3SLuigi Rizzo 
303237e3a6d3SLuigi Rizzo 	if (tx_kring->nr_mode == NKR_NETMAP_OFF) {
303337e3a6d3SLuigi Rizzo 		return MBUF_TRANSMIT(na, ifp, m);
303437e3a6d3SLuigi Rizzo 	}
303537e3a6d3SLuigi Rizzo 
303617885a7bSLuigi Rizzo 	q = &kring->rx_queue;
303717885a7bSLuigi Rizzo 
3038ce3ee1e7SLuigi Rizzo 	// XXX reconsider long packets if we handle fragments
30394bf50f18SLuigi Rizzo 	if (len > NETMAP_BUF_SIZE(na)) { /* too long for us */
30404bf50f18SLuigi Rizzo 		D("%s from_host, drop packet size %d > %d", na->name,
30414bf50f18SLuigi Rizzo 			len, NETMAP_BUF_SIZE(na));
3042ce3ee1e7SLuigi Rizzo 		goto done;
3043849bec0eSLuigi Rizzo 	}
304417885a7bSLuigi Rizzo 
304537e3a6d3SLuigi Rizzo 	if (nm_os_mbuf_has_offld(m)) {
304637e3a6d3SLuigi Rizzo 		RD(1, "%s drop mbuf requiring offloadings", na->name);
304737e3a6d3SLuigi Rizzo 		goto done;
304837e3a6d3SLuigi Rizzo 	}
304937e3a6d3SLuigi Rizzo 
305017885a7bSLuigi Rizzo 	/* protect against rxsync_from_host(), netmap_sw_to_nic()
305117885a7bSLuigi Rizzo 	 * and maybe other instances of netmap_transmit (the latter
305217885a7bSLuigi Rizzo 	 * not possible on Linux).
305317885a7bSLuigi Rizzo 	 * Also avoid overflowing the queue.
3054ce3ee1e7SLuigi Rizzo 	 */
3055997b054cSLuigi Rizzo 	mbq_lock(q);
305617885a7bSLuigi Rizzo 
305717885a7bSLuigi Rizzo         space = kring->nr_hwtail - kring->nr_hwcur;
305817885a7bSLuigi Rizzo         if (space < 0)
305917885a7bSLuigi Rizzo                 space += kring->nkr_num_slots;
306017885a7bSLuigi Rizzo 	if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX
306117885a7bSLuigi Rizzo 		RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p",
30624bf50f18SLuigi Rizzo 			na->name, kring->nr_hwcur, kring->nr_hwtail, mbq_len(q),
306317885a7bSLuigi Rizzo 			len, m);
3064ce3ee1e7SLuigi Rizzo 	} else {
306517885a7bSLuigi Rizzo 		mbq_enqueue(q, m);
306617885a7bSLuigi Rizzo 		ND(10, "%s %d bufs in queue len %d m %p",
30674bf50f18SLuigi Rizzo 			na->name, mbq_len(q), len, m);
306817885a7bSLuigi Rizzo 		/* notify outside the lock */
306917885a7bSLuigi Rizzo 		m = NULL;
307068b8534bSLuigi Rizzo 		error = 0;
3071ce3ee1e7SLuigi Rizzo 	}
3072997b054cSLuigi Rizzo 	mbq_unlock(q);
3073ce3ee1e7SLuigi Rizzo 
307468b8534bSLuigi Rizzo done:
307517885a7bSLuigi Rizzo 	if (m)
307668b8534bSLuigi Rizzo 		m_freem(m);
307717885a7bSLuigi Rizzo 	/* unconditionally wake up listeners */
3078847bf383SLuigi Rizzo 	kring->nm_notify(kring, 0);
307989cc2556SLuigi Rizzo 	/* this is normally netmap_notify(), but for nics
308089cc2556SLuigi Rizzo 	 * connected to a bridge it is netmap_bwrap_intr_notify(),
308189cc2556SLuigi Rizzo 	 * that possibly forwards the frames through the switch
308289cc2556SLuigi Rizzo 	 */
308368b8534bSLuigi Rizzo 
308468b8534bSLuigi Rizzo 	return (error);
308568b8534bSLuigi Rizzo }
308668b8534bSLuigi Rizzo 
308768b8534bSLuigi Rizzo 
308868b8534bSLuigi Rizzo /*
308968b8534bSLuigi Rizzo  * netmap_reset() is called by the driver routines when reinitializing
309068b8534bSLuigi Rizzo  * a ring. The driver is in charge of locking to protect the kring.
3091f9790aebSLuigi Rizzo  * If native netmap mode is not set just return NULL.
309237e3a6d3SLuigi Rizzo  * If native netmap mode is set, in particular, we have to set nr_mode to
309337e3a6d3SLuigi Rizzo  * NKR_NETMAP_ON.
309468b8534bSLuigi Rizzo  */
309568b8534bSLuigi Rizzo struct netmap_slot *
3096ce3ee1e7SLuigi Rizzo netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
309768b8534bSLuigi Rizzo 	u_int new_cur)
309868b8534bSLuigi Rizzo {
309968b8534bSLuigi Rizzo 	struct netmap_kring *kring;
3100506cc70cSLuigi Rizzo 	int new_hwofs, lim;
310168b8534bSLuigi Rizzo 
31024bf50f18SLuigi Rizzo 	if (!nm_native_on(na)) {
31034bf50f18SLuigi Rizzo 		ND("interface not in native netmap mode");
310468b8534bSLuigi Rizzo 		return NULL;	/* nothing to reinitialize */
3105ce3ee1e7SLuigi Rizzo 	}
310668b8534bSLuigi Rizzo 
3107ce3ee1e7SLuigi Rizzo 	/* XXX note- in the new scheme, we are not guaranteed to be
3108ce3ee1e7SLuigi Rizzo 	 * under lock (e.g. when called on a device reset).
3109ce3ee1e7SLuigi Rizzo 	 * In this case, we should set a flag and do not trust too
3110ce3ee1e7SLuigi Rizzo 	 * much the values. In practice: TODO
3111ce3ee1e7SLuigi Rizzo 	 * - set a RESET flag somewhere in the kring
3112ce3ee1e7SLuigi Rizzo 	 * - do the processing in a conservative way
3113ce3ee1e7SLuigi Rizzo 	 * - let the *sync() fixup at the end.
3114ce3ee1e7SLuigi Rizzo 	 */
311564ae02c3SLuigi Rizzo 	if (tx == NR_TX) {
31168241616dSLuigi Rizzo 		if (n >= na->num_tx_rings)
31178241616dSLuigi Rizzo 			return NULL;
311837e3a6d3SLuigi Rizzo 
311964ae02c3SLuigi Rizzo 		kring = na->tx_rings + n;
312037e3a6d3SLuigi Rizzo 
312137e3a6d3SLuigi Rizzo 		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
312237e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
312337e3a6d3SLuigi Rizzo 			return NULL;
312437e3a6d3SLuigi Rizzo 		}
312537e3a6d3SLuigi Rizzo 
312617885a7bSLuigi Rizzo 		// XXX check whether we should use hwcur or rcur
3127506cc70cSLuigi Rizzo 		new_hwofs = kring->nr_hwcur - new_cur;
312864ae02c3SLuigi Rizzo 	} else {
31298241616dSLuigi Rizzo 		if (n >= na->num_rx_rings)
31308241616dSLuigi Rizzo 			return NULL;
313164ae02c3SLuigi Rizzo 		kring = na->rx_rings + n;
313237e3a6d3SLuigi Rizzo 
313337e3a6d3SLuigi Rizzo 		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
313437e3a6d3SLuigi Rizzo 			kring->nr_mode = NKR_NETMAP_OFF;
313537e3a6d3SLuigi Rizzo 			return NULL;
313637e3a6d3SLuigi Rizzo 		}
313737e3a6d3SLuigi Rizzo 
313817885a7bSLuigi Rizzo 		new_hwofs = kring->nr_hwtail - new_cur;
313964ae02c3SLuigi Rizzo 	}
314064ae02c3SLuigi Rizzo 	lim = kring->nkr_num_slots - 1;
3141506cc70cSLuigi Rizzo 	if (new_hwofs > lim)
3142506cc70cSLuigi Rizzo 		new_hwofs -= lim + 1;
3143506cc70cSLuigi Rizzo 
3144ce3ee1e7SLuigi Rizzo 	/* Always set the new offset value and realign the ring. */
314517885a7bSLuigi Rizzo 	if (netmap_verbose)
314617885a7bSLuigi Rizzo 	    D("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
31474bf50f18SLuigi Rizzo 		na->name,
314817885a7bSLuigi Rizzo 		tx == NR_TX ? "TX" : "RX", n,
3149ce3ee1e7SLuigi Rizzo 		kring->nkr_hwofs, new_hwofs,
315017885a7bSLuigi Rizzo 		kring->nr_hwtail,
315117885a7bSLuigi Rizzo 		tx == NR_TX ? lim : kring->nr_hwtail);
3152506cc70cSLuigi Rizzo 	kring->nkr_hwofs = new_hwofs;
315317885a7bSLuigi Rizzo 	if (tx == NR_TX) {
315417885a7bSLuigi Rizzo 		kring->nr_hwtail = kring->nr_hwcur + lim;
315517885a7bSLuigi Rizzo 		if (kring->nr_hwtail > lim)
315617885a7bSLuigi Rizzo 			kring->nr_hwtail -= lim + 1;
315717885a7bSLuigi Rizzo 	}
3158506cc70cSLuigi Rizzo 
3159f196ce38SLuigi Rizzo #if 0 // def linux
3160f196ce38SLuigi Rizzo 	/* XXX check that the mappings are correct */
3161f196ce38SLuigi Rizzo 	/* need ring_nr, adapter->pdev, direction */
3162f196ce38SLuigi Rizzo 	buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
3163f196ce38SLuigi Rizzo 	if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
3164f196ce38SLuigi Rizzo 		D("error mapping rx netmap buffer %d", i);
3165f196ce38SLuigi Rizzo 		// XXX fix error handling
3166f196ce38SLuigi Rizzo 	}
3167f196ce38SLuigi Rizzo 
3168f196ce38SLuigi Rizzo #endif /* linux */
316968b8534bSLuigi Rizzo 	/*
3170ce3ee1e7SLuigi Rizzo 	 * Wakeup on the individual and global selwait
3171506cc70cSLuigi Rizzo 	 * We do the wakeup here, but the ring is not yet reconfigured.
3172506cc70cSLuigi Rizzo 	 * However, we are under lock so there are no races.
317368b8534bSLuigi Rizzo 	 */
317437e3a6d3SLuigi Rizzo 	kring->nr_mode = NKR_NETMAP_ON;
3175847bf383SLuigi Rizzo 	kring->nm_notify(kring, 0);
317668b8534bSLuigi Rizzo 	return kring->ring->slot;
317768b8534bSLuigi Rizzo }
317868b8534bSLuigi Rizzo 
317968b8534bSLuigi Rizzo 
3180ce3ee1e7SLuigi Rizzo /*
3181f9790aebSLuigi Rizzo  * Dispatch rx/tx interrupts to the netmap rings.
3182ce3ee1e7SLuigi Rizzo  *
3183ce3ee1e7SLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
3184ce3ee1e7SLuigi Rizzo  * We rely on the OS to make sure that there is only one active
3185ce3ee1e7SLuigi Rizzo  * instance per queue, and that there is appropriate locking.
3186849bec0eSLuigi Rizzo  *
3187f9790aebSLuigi Rizzo  * The 'notify' routine depends on what the ring is attached to.
3188f9790aebSLuigi Rizzo  * - for a netmap file descriptor, do a selwakeup on the individual
3189f9790aebSLuigi Rizzo  *   waitqueue, plus one on the global one if needed
31904bf50f18SLuigi Rizzo  *   (see netmap_notify)
31914bf50f18SLuigi Rizzo  * - for a nic connected to a switch, call the proper forwarding routine
31924bf50f18SLuigi Rizzo  *   (see netmap_bwrap_intr_notify)
3193f9790aebSLuigi Rizzo  */
319437e3a6d3SLuigi Rizzo int
319537e3a6d3SLuigi Rizzo netmap_common_irq(struct netmap_adapter *na, u_int q, u_int *work_done)
3196f9790aebSLuigi Rizzo {
3197f9790aebSLuigi Rizzo 	struct netmap_kring *kring;
3198847bf383SLuigi Rizzo 	enum txrx t = (work_done ? NR_RX : NR_TX);
3199f9790aebSLuigi Rizzo 
3200f9790aebSLuigi Rizzo 	q &= NETMAP_RING_MASK;
3201f9790aebSLuigi Rizzo 
3202f9790aebSLuigi Rizzo 	if (netmap_verbose) {
3203f9790aebSLuigi Rizzo 	        RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
3204f9790aebSLuigi Rizzo 	}
3205f9790aebSLuigi Rizzo 
3206847bf383SLuigi Rizzo 	if (q >= nma_get_nrings(na, t))
320737e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS; // not a physical queue
3208847bf383SLuigi Rizzo 
3209847bf383SLuigi Rizzo 	kring = NMR(na, t) + q;
3210847bf383SLuigi Rizzo 
321137e3a6d3SLuigi Rizzo 	if (kring->nr_mode == NKR_NETMAP_OFF) {
321237e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
321337e3a6d3SLuigi Rizzo 	}
321437e3a6d3SLuigi Rizzo 
3215847bf383SLuigi Rizzo 	if (t == NR_RX) {
3216f9790aebSLuigi Rizzo 		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
3217f9790aebSLuigi Rizzo 		*work_done = 1; /* do not fire napi again */
3218f9790aebSLuigi Rizzo 	}
321937e3a6d3SLuigi Rizzo 
322037e3a6d3SLuigi Rizzo 	return kring->nm_notify(kring, 0);
3221f9790aebSLuigi Rizzo }
3222f9790aebSLuigi Rizzo 
322317885a7bSLuigi Rizzo 
3224f9790aebSLuigi Rizzo /*
3225f9790aebSLuigi Rizzo  * Default functions to handle rx/tx interrupts from a physical device.
3226f9790aebSLuigi Rizzo  * "work_done" is non-null on the RX path, NULL for the TX path.
3227f9790aebSLuigi Rizzo  *
322837e3a6d3SLuigi Rizzo  * If the card is not in netmap mode, simply return NM_IRQ_PASS,
3229ce3ee1e7SLuigi Rizzo  * so that the caller proceeds with regular processing.
323037e3a6d3SLuigi Rizzo  * Otherwise call netmap_common_irq().
3231ce3ee1e7SLuigi Rizzo  *
3232ce3ee1e7SLuigi Rizzo  * If the card is connected to a netmap file descriptor,
3233ce3ee1e7SLuigi Rizzo  * do a selwakeup on the individual queue, plus one on the global one
3234ce3ee1e7SLuigi Rizzo  * if needed (multiqueue card _and_ there are multiqueue listeners),
323537e3a6d3SLuigi Rizzo  * and return NR_IRQ_COMPLETED.
3236ce3ee1e7SLuigi Rizzo  *
3237ce3ee1e7SLuigi Rizzo  * Finally, if called on rx from an interface connected to a switch,
323837e3a6d3SLuigi Rizzo  * calls the proper forwarding routine.
32391a26580eSLuigi Rizzo  */
3240babc7c12SLuigi Rizzo int
3241ce3ee1e7SLuigi Rizzo netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
32421a26580eSLuigi Rizzo {
32434bf50f18SLuigi Rizzo 	struct netmap_adapter *na = NA(ifp);
32444bf50f18SLuigi Rizzo 
32454bf50f18SLuigi Rizzo 	/*
32464bf50f18SLuigi Rizzo 	 * XXX emulated netmap mode sets NAF_SKIP_INTR so
32474bf50f18SLuigi Rizzo 	 * we still use the regular driver even though the previous
32484bf50f18SLuigi Rizzo 	 * check fails. It is unclear whether we should use
32494bf50f18SLuigi Rizzo 	 * nm_native_on() here.
32504bf50f18SLuigi Rizzo 	 */
32514bf50f18SLuigi Rizzo 	if (!nm_netmap_on(na))
325237e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
3253849bec0eSLuigi Rizzo 
32544bf50f18SLuigi Rizzo 	if (na->na_flags & NAF_SKIP_INTR) {
32558241616dSLuigi Rizzo 		ND("use regular interrupt");
325637e3a6d3SLuigi Rizzo 		return NM_IRQ_PASS;
32578241616dSLuigi Rizzo 	}
32588241616dSLuigi Rizzo 
325937e3a6d3SLuigi Rizzo 	return netmap_common_irq(na, q, work_done);
32601a26580eSLuigi Rizzo }
32611a26580eSLuigi Rizzo 
326264ae02c3SLuigi Rizzo 
326301c7d25fSLuigi Rizzo /*
3264f9790aebSLuigi Rizzo  * Module loader and unloader
3265f196ce38SLuigi Rizzo  *
3266f9790aebSLuigi Rizzo  * netmap_init() creates the /dev/netmap device and initializes
3267f9790aebSLuigi Rizzo  * all global variables. Returns 0 on success, errno on failure
3268f9790aebSLuigi Rizzo  * (but there is no chance)
3269f9790aebSLuigi Rizzo  *
3270f9790aebSLuigi Rizzo  * netmap_fini() destroys everything.
3271f196ce38SLuigi Rizzo  */
3272babc7c12SLuigi Rizzo 
3273babc7c12SLuigi Rizzo static struct cdev *netmap_dev; /* /dev/netmap character device. */
3274f9790aebSLuigi Rizzo extern struct cdevsw netmap_cdevsw;
3275babc7c12SLuigi Rizzo 
327617885a7bSLuigi Rizzo 
3277f9790aebSLuigi Rizzo void
327868b8534bSLuigi Rizzo netmap_fini(void)
327968b8534bSLuigi Rizzo {
3280f9790aebSLuigi Rizzo 	if (netmap_dev)
328168b8534bSLuigi Rizzo 		destroy_dev(netmap_dev);
328237e3a6d3SLuigi Rizzo 	/* we assume that there are no longer netmap users */
328337e3a6d3SLuigi Rizzo 	nm_os_ifnet_fini();
328437e3a6d3SLuigi Rizzo 	netmap_uninit_bridges();
3285ce3ee1e7SLuigi Rizzo 	netmap_mem_fini();
3286ce3ee1e7SLuigi Rizzo 	NMG_LOCK_DESTROY();
328768b8534bSLuigi Rizzo 	printf("netmap: unloaded module.\n");
328868b8534bSLuigi Rizzo }
328968b8534bSLuigi Rizzo 
329017885a7bSLuigi Rizzo 
3291f9790aebSLuigi Rizzo int
3292f9790aebSLuigi Rizzo netmap_init(void)
329368b8534bSLuigi Rizzo {
3294f9790aebSLuigi Rizzo 	int error;
329568b8534bSLuigi Rizzo 
3296f9790aebSLuigi Rizzo 	NMG_LOCK_INIT();
329768b8534bSLuigi Rizzo 
3298f9790aebSLuigi Rizzo 	error = netmap_mem_init();
3299f9790aebSLuigi Rizzo 	if (error != 0)
3300f9790aebSLuigi Rizzo 		goto fail;
3301c929ca72SLuigi Rizzo 	/*
3302c929ca72SLuigi Rizzo 	 * MAKEDEV_ETERNAL_KLD avoids an expensive check on syscalls
3303c929ca72SLuigi Rizzo 	 * when the module is compiled in.
3304c929ca72SLuigi Rizzo 	 * XXX could use make_dev_credv() to get error number
3305c929ca72SLuigi Rizzo 	 */
33060e73f29aSLuigi Rizzo 	netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD,
330711c0b69cSAdrian Chadd 		&netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600,
33080e73f29aSLuigi Rizzo 			      "netmap");
3309f9790aebSLuigi Rizzo 	if (!netmap_dev)
3310f9790aebSLuigi Rizzo 		goto fail;
3311f9790aebSLuigi Rizzo 
3312847bf383SLuigi Rizzo 	error = netmap_init_bridges();
3313847bf383SLuigi Rizzo 	if (error)
3314847bf383SLuigi Rizzo 		goto fail;
3315847bf383SLuigi Rizzo 
33164bf50f18SLuigi Rizzo #ifdef __FreeBSD__
331737e3a6d3SLuigi Rizzo 	nm_os_vi_init_index();
33184bf50f18SLuigi Rizzo #endif
3319847bf383SLuigi Rizzo 
332037e3a6d3SLuigi Rizzo 	error = nm_os_ifnet_init();
332137e3a6d3SLuigi Rizzo 	if (error)
332237e3a6d3SLuigi Rizzo 		goto fail;
332337e3a6d3SLuigi Rizzo 
3324f9790aebSLuigi Rizzo 	printf("netmap: loaded module\n");
3325f9790aebSLuigi Rizzo 	return (0);
3326f9790aebSLuigi Rizzo fail:
332768b8534bSLuigi Rizzo 	netmap_fini();
3328f9790aebSLuigi Rizzo 	return (EINVAL); /* may be incorrect */
332968b8534bSLuigi Rizzo }
3330