1 /*
2 * Copyright (C) 2013-2016 Universita` di Pisa
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27
28 /*
29 * This module implements the VALE switch for netmap
30
31 --- VALE SWITCH ---
32
33 NMG_LOCK() serializes all modifications to switches and ports.
34 A switch cannot be deleted until all ports are gone.
35
36 For each switch, an SX lock (RWlock on linux) protects
37 deletion of ports. When configuring or deleting a new port, the
38 lock is acquired in exclusive mode (after holding NMG_LOCK).
39 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
40 The lock is held throughout the entire forwarding cycle,
41 during which the thread may incur in a page fault.
42 Hence it is important that sleepable shared locks are used.
43
44 On the rx ring, the per-port lock is grabbed initially to reserve
45 a number of slot in the ring, then the lock is released,
46 packets are copied from source to destination, and then
47 the lock is acquired again and the receive ring is updated.
48 (A similar thing is done on the tx ring for NIC and host stack
49 ports attached to the switch)
50
51 */
52
53 /*
54 * OS-specific code that is used only within this file.
55 * Other OS-specific code that must be accessed by drivers
56 * is present in netmap_kern.h
57 */
58
59 #if defined(__FreeBSD__)
60 #include <sys/cdefs.h> /* prerequisite */
61 __FBSDID("$FreeBSD$");
62
63 #include <sys/types.h>
64 #include <sys/errno.h>
65 #include <sys/param.h> /* defines used in kernel.h */
66 #include <sys/kernel.h> /* types used in module initialization */
67 #include <sys/conf.h> /* cdevsw struct, UID, GID */
68 #include <sys/sockio.h>
69 #include <sys/socketvar.h> /* struct socket */
70 #include <sys/malloc.h>
71 #include <sys/poll.h>
72 #include <sys/rwlock.h>
73 #include <sys/socket.h> /* sockaddrs */
74 #include <sys/selinfo.h>
75 #include <sys/sysctl.h>
76 #include <net/if.h>
77 #include <net/if_var.h>
78 #include <net/bpf.h> /* BIOCIMMEDIATE */
79 #include <machine/bus.h> /* bus_dmamap_* */
80 #include <sys/endian.h>
81 #include <sys/refcount.h>
82 #include <sys/smp.h>
83
84
85 #elif defined(linux)
86
87 #include "bsd_glue.h"
88
89 #elif defined(__APPLE__)
90
91 #warning OSX support is only partial
92 #include "osx_glue.h"
93
94 #elif defined(_WIN32)
95 #include "win_glue.h"
96
97 #else
98
99 #error Unsupported platform
100
101 #endif /* unsupported */
102
103 /*
104 * common headers
105 */
106
107 #include <net/netmap.h>
108 #include <dev/netmap/netmap_kern.h>
109 #include <dev/netmap/netmap_mem2.h>
110
111 #include <dev/netmap/netmap_bdg.h>
112
113 const char*
netmap_bdg_name(struct netmap_vp_adapter * vp)114 netmap_bdg_name(struct netmap_vp_adapter *vp)
115 {
116 struct nm_bridge *b = vp->na_bdg;
117 if (b == NULL)
118 return NULL;
119 return b->bdg_basename;
120 }
121
122
123 #ifndef CONFIG_NET_NS
124 /*
125 * XXX in principle nm_bridges could be created dynamically
126 * Right now we have a static array and deletions are protected
127 * by an exclusive lock.
128 */
129 struct nm_bridge *nm_bridges;
130 #endif /* !CONFIG_NET_NS */
131
132
133 static int
nm_is_id_char(const char c)134 nm_is_id_char(const char c)
135 {
136 return (c >= 'a' && c <= 'z') ||
137 (c >= 'A' && c <= 'Z') ||
138 (c >= '0' && c <= '9') ||
139 (c == '_');
140 }
141
142 /* Validate the name of a bdg port and return the
143 * position of the ":" character. */
144 static int
nm_bdg_name_validate(const char * name,size_t prefixlen)145 nm_bdg_name_validate(const char *name, size_t prefixlen)
146 {
147 int colon_pos = -1;
148 int i;
149
150 if (!name || strlen(name) < prefixlen) {
151 return -1;
152 }
153
154 for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
155 if (name[i] == ':') {
156 colon_pos = i;
157 break;
158 } else if (!nm_is_id_char(name[i])) {
159 return -1;
160 }
161 }
162
163 if (strlen(name) - colon_pos > IFNAMSIZ) {
164 /* interface name too long */
165 return -1;
166 }
167
168 return colon_pos;
169 }
170
171 /*
172 * locate a bridge among the existing ones.
173 * MUST BE CALLED WITH NMG_LOCK()
174 *
175 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
176 * We assume that this is called with a name of at least NM_NAME chars.
177 */
178 struct nm_bridge *
nm_find_bridge(const char * name,int create,struct netmap_bdg_ops * ops)179 nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
180 {
181 int i, namelen;
182 struct nm_bridge *b = NULL, *bridges;
183 u_int num_bridges;
184
185 NMG_LOCK_ASSERT();
186
187 netmap_bns_getbridges(&bridges, &num_bridges);
188
189 namelen = nm_bdg_name_validate(name,
190 (ops != NULL ? strlen(ops->name) : 0));
191 if (namelen < 0) {
192 nm_prerr("invalid bridge name %s", name ? name : NULL);
193 return NULL;
194 }
195
196 /* lookup the name, remember empty slot if there is one */
197 for (i = 0; i < num_bridges; i++) {
198 struct nm_bridge *x = bridges + i;
199
200 if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
201 if (create && b == NULL)
202 b = x; /* record empty slot */
203 } else if (x->bdg_namelen != namelen) {
204 continue;
205 } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
206 nm_prdis("found '%.*s' at %d", namelen, name, i);
207 b = x;
208 break;
209 }
210 }
211 if (i == num_bridges && b) { /* name not found, can create entry */
212 /* initialize the bridge */
213 nm_prdis("create new bridge %s with ports %d", b->bdg_basename,
214 b->bdg_active_ports);
215 b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
216 if (b->ht == NULL) {
217 nm_prerr("failed to allocate hash table");
218 return NULL;
219 }
220 strncpy(b->bdg_basename, name, namelen);
221 b->bdg_namelen = namelen;
222 b->bdg_active_ports = 0;
223 for (i = 0; i < NM_BDG_MAXPORTS; i++)
224 b->bdg_port_index[i] = i;
225 /* set the default function */
226 b->bdg_ops = b->bdg_saved_ops = *ops;
227 b->private_data = b->ht;
228 b->bdg_flags = 0;
229 NM_BNS_GET(b);
230 }
231 return b;
232 }
233
234
235 int
netmap_bdg_free(struct nm_bridge * b)236 netmap_bdg_free(struct nm_bridge *b)
237 {
238 if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
239 return EBUSY;
240 }
241
242 nm_prdis("marking bridge %s as free", b->bdg_basename);
243 nm_os_free(b->ht);
244 memset(&b->bdg_ops, 0, sizeof(b->bdg_ops));
245 memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops));
246 b->bdg_flags = 0;
247 NM_BNS_PUT(b);
248 return 0;
249 }
250
251 /* Called by external kernel modules (e.g., Openvswitch).
252 * to modify the private data previously given to regops().
253 * 'name' may be just bridge's name (including ':' if it
254 * is not just NM_BDG_NAME).
255 * Called without NMG_LOCK.
256 */
257 int
netmap_bdg_update_private_data(const char * name,bdg_update_private_data_fn_t callback,void * callback_data,void * auth_token)258 netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
259 void *callback_data, void *auth_token)
260 {
261 void *private_data = NULL;
262 struct nm_bridge *b;
263 int error = 0;
264
265 NMG_LOCK();
266 b = nm_find_bridge(name, 0 /* don't create */, NULL);
267 if (!b) {
268 error = EINVAL;
269 goto unlock_update_priv;
270 }
271 if (!nm_bdg_valid_auth_token(b, auth_token)) {
272 error = EACCES;
273 goto unlock_update_priv;
274 }
275 BDG_WLOCK(b);
276 private_data = callback(b->private_data, callback_data, &error);
277 b->private_data = private_data;
278 BDG_WUNLOCK(b);
279
280 unlock_update_priv:
281 NMG_UNLOCK();
282 return error;
283 }
284
285
286
287 /* remove from bridge b the ports in slots hw and sw
288 * (sw can be -1 if not needed)
289 */
290 void
netmap_bdg_detach_common(struct nm_bridge * b,int hw,int sw)291 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
292 {
293 int s_hw = hw, s_sw = sw;
294 int i, lim =b->bdg_active_ports;
295 uint32_t *tmp = b->tmp_bdg_port_index;
296
297 /*
298 New algorithm:
299 make a copy of bdg_port_index;
300 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
301 in the array of bdg_port_index, replacing them with
302 entries from the bottom of the array;
303 decrement bdg_active_ports;
304 acquire BDG_WLOCK() and copy back the array.
305 */
306
307 if (netmap_debug & NM_DEBUG_BDG)
308 nm_prinf("detach %d and %d (lim %d)", hw, sw, lim);
309 /* make a copy of the list of active ports, update it,
310 * and then copy back within BDG_WLOCK().
311 */
312 memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
313 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
314 if (hw >= 0 && tmp[i] == hw) {
315 nm_prdis("detach hw %d at %d", hw, i);
316 lim--; /* point to last active port */
317 tmp[i] = tmp[lim]; /* swap with i */
318 tmp[lim] = hw; /* now this is inactive */
319 hw = -1;
320 } else if (sw >= 0 && tmp[i] == sw) {
321 nm_prdis("detach sw %d at %d", sw, i);
322 lim--;
323 tmp[i] = tmp[lim];
324 tmp[lim] = sw;
325 sw = -1;
326 } else {
327 i++;
328 }
329 }
330 if (hw >= 0 || sw >= 0) {
331 nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw);
332 }
333
334 BDG_WLOCK(b);
335 if (b->bdg_ops.dtor)
336 b->bdg_ops.dtor(b->bdg_ports[s_hw]);
337 b->bdg_ports[s_hw] = NULL;
338 if (s_sw >= 0) {
339 b->bdg_ports[s_sw] = NULL;
340 }
341 memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
342 b->bdg_active_ports = lim;
343 BDG_WUNLOCK(b);
344
345 nm_prdis("now %d active ports", lim);
346 netmap_bdg_free(b);
347 }
348
349
350 /* nm_bdg_ctl callback for VALE ports */
351 int
netmap_vp_bdg_ctl(struct nmreq_header * hdr,struct netmap_adapter * na)352 netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
353 {
354 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
355 struct nm_bridge *b = vpna->na_bdg;
356
357 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
358 return 0; /* nothing to do */
359 }
360 if (b) {
361 netmap_set_all_rings(na, 0 /* disable */);
362 netmap_bdg_detach_common(b, vpna->bdg_port, -1);
363 vpna->na_bdg = NULL;
364 netmap_set_all_rings(na, 1 /* enable */);
365 }
366 /* I have took reference just for attach */
367 netmap_adapter_put(na);
368 return 0;
369 }
370
371 int
netmap_default_bdg_attach(const char * name,struct netmap_adapter * na,struct nm_bridge * b)372 netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
373 struct nm_bridge *b)
374 {
375 return NM_NEED_BWRAP;
376 }
377
378 /* Try to get a reference to a netmap adapter attached to a VALE switch.
379 * If the adapter is found (or is created), this function returns 0, a
380 * non NULL pointer is returned into *na, and the caller holds a
381 * reference to the adapter.
382 * If an adapter is not found, then no reference is grabbed and the
383 * function returns an error code, or 0 if there is just a VALE prefix
384 * mismatch. Therefore the caller holds a reference when
385 * (*na != NULL && return == 0).
386 */
387 int
netmap_get_bdg_na(struct nmreq_header * hdr,struct netmap_adapter ** na,struct netmap_mem_d * nmd,int create,struct netmap_bdg_ops * ops)388 netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
389 struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops)
390 {
391 char *nr_name = hdr->nr_name;
392 const char *ifname;
393 struct ifnet *ifp = NULL;
394 int error = 0;
395 struct netmap_vp_adapter *vpna, *hostna = NULL;
396 struct nm_bridge *b;
397 uint32_t i, j;
398 uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
399 int needed;
400
401 *na = NULL; /* default return value */
402
403 /* first try to see if this is a bridge port. */
404 NMG_LOCK_ASSERT();
405 if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) {
406 return 0; /* no error, but no VALE prefix */
407 }
408
409 b = nm_find_bridge(nr_name, create, ops);
410 if (b == NULL) {
411 nm_prdis("no bridges available for '%s'", nr_name);
412 return (create ? ENOMEM : ENXIO);
413 }
414 if (strlen(nr_name) < b->bdg_namelen) /* impossible */
415 panic("x");
416
417 /* Now we are sure that name starts with the bridge's name,
418 * lookup the port in the bridge. We need to scan the entire
419 * list. It is not important to hold a WLOCK on the bridge
420 * during the search because NMG_LOCK already guarantees
421 * that there are no other possible writers.
422 */
423
424 /* lookup in the local list of ports */
425 for (j = 0; j < b->bdg_active_ports; j++) {
426 i = b->bdg_port_index[j];
427 vpna = b->bdg_ports[i];
428 nm_prdis("checking %s", vpna->up.name);
429 if (!strcmp(vpna->up.name, nr_name)) {
430 netmap_adapter_get(&vpna->up);
431 nm_prdis("found existing if %s refs %d", nr_name)
432 *na = &vpna->up;
433 return 0;
434 }
435 }
436 /* not found, should we create it? */
437 if (!create)
438 return ENXIO;
439 /* yes we should, see if we have space to attach entries */
440 needed = 2; /* in some cases we only need 1 */
441 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
442 nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports);
443 return ENOMEM;
444 }
445 /* record the next two ports available, but do not allocate yet */
446 cand = b->bdg_port_index[b->bdg_active_ports];
447 cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
448 nm_prdis("+++ bridge %s port %s used %d avail %d %d",
449 b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
450
451 /*
452 * try see if there is a matching NIC with this name
453 * (after the bridge's name)
454 */
455 ifname = nr_name + b->bdg_namelen + 1;
456 ifp = ifunit_ref(ifname);
457 if (!ifp) {
458 /* Create an ephemeral virtual port.
459 * This block contains all the ephemeral-specific logic.
460 */
461
462 if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
463 error = EINVAL;
464 goto out;
465 }
466
467 /* bdg_netmap_attach creates a struct netmap_adapter */
468 error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna);
469 if (error) {
470 if (netmap_debug & NM_DEBUG_BDG)
471 nm_prerr("error %d", error);
472 goto out;
473 }
474 /* shortcut - we can skip get_hw_na(),
475 * ownership check and nm_bdg_attach()
476 */
477
478 } else {
479 struct netmap_adapter *hw;
480
481 /* the vale:nic syntax is only valid for some commands */
482 switch (hdr->nr_reqtype) {
483 case NETMAP_REQ_VALE_ATTACH:
484 case NETMAP_REQ_VALE_DETACH:
485 case NETMAP_REQ_VALE_POLLING_ENABLE:
486 case NETMAP_REQ_VALE_POLLING_DISABLE:
487 break; /* ok */
488 default:
489 error = EINVAL;
490 goto out;
491 }
492
493 error = netmap_get_hw_na(ifp, nmd, &hw);
494 if (error || hw == NULL)
495 goto out;
496
497 /* host adapter might not be created */
498 error = hw->nm_bdg_attach(nr_name, hw, b);
499 if (error == NM_NEED_BWRAP) {
500 error = b->bdg_ops.bwrap_attach(nr_name, hw);
501 }
502 if (error)
503 goto out;
504 vpna = hw->na_vp;
505 hostna = hw->na_hostvp;
506 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
507 /* Check if we need to skip the host rings. */
508 struct nmreq_vale_attach *areq =
509 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
510 if (areq->reg.nr_mode != NR_REG_NIC_SW) {
511 hostna = NULL;
512 }
513 }
514 }
515
516 BDG_WLOCK(b);
517 vpna->bdg_port = cand;
518 nm_prdis("NIC %p to bridge port %d", vpna, cand);
519 /* bind the port to the bridge (virtual ports are not active) */
520 b->bdg_ports[cand] = vpna;
521 vpna->na_bdg = b;
522 b->bdg_active_ports++;
523 if (hostna != NULL) {
524 /* also bind the host stack to the bridge */
525 b->bdg_ports[cand2] = hostna;
526 hostna->bdg_port = cand2;
527 hostna->na_bdg = b;
528 b->bdg_active_ports++;
529 nm_prdis("host %p to bridge port %d", hostna, cand2);
530 }
531 nm_prdis("if %s refs %d", ifname, vpna->up.na_refcount);
532 BDG_WUNLOCK(b);
533 *na = &vpna->up;
534 netmap_adapter_get(*na);
535
536 out:
537 if (ifp)
538 if_rele(ifp);
539
540 return error;
541 }
542
543
544 int
nm_is_bwrap(struct netmap_adapter * na)545 nm_is_bwrap(struct netmap_adapter *na)
546 {
547 return na->nm_register == netmap_bwrap_reg;
548 }
549
550
551 struct nm_bdg_polling_state;
552 struct
553 nm_bdg_kthread {
554 struct nm_kctx *nmk;
555 u_int qfirst;
556 u_int qlast;
557 struct nm_bdg_polling_state *bps;
558 };
559
560 struct nm_bdg_polling_state {
561 bool configured;
562 bool stopped;
563 struct netmap_bwrap_adapter *bna;
564 uint32_t mode;
565 u_int qfirst;
566 u_int qlast;
567 u_int cpu_from;
568 u_int ncpus;
569 struct nm_bdg_kthread *kthreads;
570 };
571
572 static void
netmap_bwrap_polling(void * data)573 netmap_bwrap_polling(void *data)
574 {
575 struct nm_bdg_kthread *nbk = data;
576 struct netmap_bwrap_adapter *bna;
577 u_int qfirst, qlast, i;
578 struct netmap_kring **kring0, *kring;
579
580 if (!nbk)
581 return;
582 qfirst = nbk->qfirst;
583 qlast = nbk->qlast;
584 bna = nbk->bps->bna;
585 kring0 = NMR(bna->hwna, NR_RX);
586
587 for (i = qfirst; i < qlast; i++) {
588 kring = kring0[i];
589 kring->nm_notify(kring, 0);
590 }
591 }
592
593 static int
nm_bdg_create_kthreads(struct nm_bdg_polling_state * bps)594 nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
595 {
596 struct nm_kctx_cfg kcfg;
597 int i, j;
598
599 bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
600 if (bps->kthreads == NULL)
601 return ENOMEM;
602
603 bzero(&kcfg, sizeof(kcfg));
604 kcfg.worker_fn = netmap_bwrap_polling;
605 for (i = 0; i < bps->ncpus; i++) {
606 struct nm_bdg_kthread *t = bps->kthreads + i;
607 int all = (bps->ncpus == 1 &&
608 bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
609 int affinity = bps->cpu_from + i;
610
611 t->bps = bps;
612 t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
613 t->qlast = all ? bps->qlast : t->qfirst + 1;
614 if (netmap_verbose)
615 nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
616 t->qlast);
617
618 kcfg.type = i;
619 kcfg.worker_private = t;
620 t->nmk = nm_os_kctx_create(&kcfg, NULL);
621 if (t->nmk == NULL) {
622 goto cleanup;
623 }
624 nm_os_kctx_worker_setaff(t->nmk, affinity);
625 }
626 return 0;
627
628 cleanup:
629 for (j = 0; j < i; j++) {
630 struct nm_bdg_kthread *t = bps->kthreads + i;
631 nm_os_kctx_destroy(t->nmk);
632 }
633 nm_os_free(bps->kthreads);
634 return EFAULT;
635 }
636
637 /* A variant of ptnetmap_start_kthreads() */
638 static int
nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state * bps)639 nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
640 {
641 int error, i, j;
642
643 if (!bps) {
644 nm_prerr("polling is not configured");
645 return EFAULT;
646 }
647 bps->stopped = false;
648
649 for (i = 0; i < bps->ncpus; i++) {
650 struct nm_bdg_kthread *t = bps->kthreads + i;
651 error = nm_os_kctx_worker_start(t->nmk);
652 if (error) {
653 nm_prerr("error in nm_kthread_start(): %d", error);
654 goto cleanup;
655 }
656 }
657 return 0;
658
659 cleanup:
660 for (j = 0; j < i; j++) {
661 struct nm_bdg_kthread *t = bps->kthreads + i;
662 nm_os_kctx_worker_stop(t->nmk);
663 }
664 bps->stopped = true;
665 return error;
666 }
667
668 static void
nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state * bps)669 nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
670 {
671 int i;
672
673 if (!bps)
674 return;
675
676 for (i = 0; i < bps->ncpus; i++) {
677 struct nm_bdg_kthread *t = bps->kthreads + i;
678 nm_os_kctx_worker_stop(t->nmk);
679 nm_os_kctx_destroy(t->nmk);
680 }
681 bps->stopped = true;
682 }
683
684 static int
get_polling_cfg(struct nmreq_vale_polling * req,struct netmap_adapter * na,struct nm_bdg_polling_state * bps)685 get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
686 struct nm_bdg_polling_state *bps)
687 {
688 unsigned int avail_cpus, core_from;
689 unsigned int qfirst, qlast;
690 uint32_t i = req->nr_first_cpu_id;
691 uint32_t req_cpus = req->nr_num_polling_cpus;
692
693 avail_cpus = nm_os_ncpus();
694
695 if (req_cpus == 0) {
696 nm_prerr("req_cpus must be > 0");
697 return EINVAL;
698 } else if (req_cpus >= avail_cpus) {
699 nm_prerr("Cannot use all the CPUs in the system");
700 return EINVAL;
701 }
702
703 if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
704 /* Use a separate core for each ring. If nr_num_polling_cpus>1
705 * more consecutive rings are polled.
706 * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
707 * ring 2 and 3 are polled by core 2 and 3, respectively. */
708 if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
709 nm_prerr("Rings %u-%u not in range (have %d rings)",
710 i, i + req_cpus, nma_get_nrings(na, NR_RX));
711 return EINVAL;
712 }
713 qfirst = i;
714 qlast = qfirst + req_cpus;
715 core_from = qfirst;
716
717 } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
718 /* Poll all the rings using a core specified by nr_first_cpu_id.
719 * the number of cores must be 1. */
720 if (req_cpus != 1) {
721 nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
722 "(was %d)", req_cpus);
723 return EINVAL;
724 }
725 qfirst = 0;
726 qlast = nma_get_nrings(na, NR_RX);
727 core_from = i;
728 } else {
729 nm_prerr("Invalid polling mode");
730 return EINVAL;
731 }
732
733 bps->mode = req->nr_mode;
734 bps->qfirst = qfirst;
735 bps->qlast = qlast;
736 bps->cpu_from = core_from;
737 bps->ncpus = req_cpus;
738 nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u",
739 req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
740 "MULTI" : "SINGLE",
741 qfirst, qlast, core_from, req_cpus);
742 return 0;
743 }
744
745 static int
nm_bdg_ctl_polling_start(struct nmreq_vale_polling * req,struct netmap_adapter * na)746 nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
747 {
748 struct nm_bdg_polling_state *bps;
749 struct netmap_bwrap_adapter *bna;
750 int error;
751
752 bna = (struct netmap_bwrap_adapter *)na;
753 if (bna->na_polling_state) {
754 nm_prerr("ERROR adapter already in polling mode");
755 return EFAULT;
756 }
757
758 bps = nm_os_malloc(sizeof(*bps));
759 if (!bps)
760 return ENOMEM;
761 bps->configured = false;
762 bps->stopped = true;
763
764 if (get_polling_cfg(req, na, bps)) {
765 nm_os_free(bps);
766 return EINVAL;
767 }
768
769 if (nm_bdg_create_kthreads(bps)) {
770 nm_os_free(bps);
771 return EFAULT;
772 }
773
774 bps->configured = true;
775 bna->na_polling_state = bps;
776 bps->bna = bna;
777
778 /* disable interrupts if possible */
779 nma_intr_enable(bna->hwna, 0);
780 /* start kthread now */
781 error = nm_bdg_polling_start_kthreads(bps);
782 if (error) {
783 nm_prerr("ERROR nm_bdg_polling_start_kthread()");
784 nm_os_free(bps->kthreads);
785 nm_os_free(bps);
786 bna->na_polling_state = NULL;
787 nma_intr_enable(bna->hwna, 1);
788 }
789 return error;
790 }
791
792 static int
nm_bdg_ctl_polling_stop(struct netmap_adapter * na)793 nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
794 {
795 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
796 struct nm_bdg_polling_state *bps;
797
798 if (!bna->na_polling_state) {
799 nm_prerr("ERROR adapter is not in polling mode");
800 return EFAULT;
801 }
802 bps = bna->na_polling_state;
803 nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
804 bps->configured = false;
805 nm_os_free(bps);
806 bna->na_polling_state = NULL;
807 /* reenable interrupts */
808 nma_intr_enable(bna->hwna, 1);
809 return 0;
810 }
811
812 int
nm_bdg_polling(struct nmreq_header * hdr)813 nm_bdg_polling(struct nmreq_header *hdr)
814 {
815 struct nmreq_vale_polling *req =
816 (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body;
817 struct netmap_adapter *na = NULL;
818 int error = 0;
819
820 NMG_LOCK();
821 error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0);
822 if (na && !error) {
823 if (!nm_is_bwrap(na)) {
824 error = EOPNOTSUPP;
825 } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
826 error = nm_bdg_ctl_polling_start(req, na);
827 if (!error)
828 netmap_adapter_get(na);
829 } else {
830 error = nm_bdg_ctl_polling_stop(na);
831 if (!error)
832 netmap_adapter_put(na);
833 }
834 netmap_adapter_put(na);
835 } else if (!na && !error) {
836 /* Not VALE port. */
837 error = EINVAL;
838 }
839 NMG_UNLOCK();
840
841 return error;
842 }
843
844 /* Called by external kernel modules (e.g., Openvswitch).
845 * to set configure/lookup/dtor functions of a VALE instance.
846 * Register callbacks to the given bridge. 'name' may be just
847 * bridge's name (including ':' if it is not just NM_BDG_NAME).
848 *
849 * Called without NMG_LOCK.
850 */
851
852 int
netmap_bdg_regops(const char * name,struct netmap_bdg_ops * bdg_ops,void * private_data,void * auth_token)853 netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
854 {
855 struct nm_bridge *b;
856 int error = 0;
857
858 NMG_LOCK();
859 b = nm_find_bridge(name, 0 /* don't create */, NULL);
860 if (!b) {
861 error = ENXIO;
862 goto unlock_regops;
863 }
864 if (!nm_bdg_valid_auth_token(b, auth_token)) {
865 error = EACCES;
866 goto unlock_regops;
867 }
868
869 BDG_WLOCK(b);
870 if (!bdg_ops) {
871 /* resetting the bridge */
872 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
873 b->bdg_ops = b->bdg_saved_ops;
874 b->private_data = b->ht;
875 } else {
876 /* modifying the bridge */
877 b->private_data = private_data;
878 #define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m
879 nm_bdg_override(lookup);
880 nm_bdg_override(config);
881 nm_bdg_override(dtor);
882 nm_bdg_override(vp_create);
883 nm_bdg_override(bwrap_attach);
884 #undef nm_bdg_override
885
886 }
887 BDG_WUNLOCK(b);
888
889 unlock_regops:
890 NMG_UNLOCK();
891 return error;
892 }
893
894
895 int
netmap_bdg_config(struct nm_ifreq * nr)896 netmap_bdg_config(struct nm_ifreq *nr)
897 {
898 struct nm_bridge *b;
899 int error = EINVAL;
900
901 NMG_LOCK();
902 b = nm_find_bridge(nr->nifr_name, 0, NULL);
903 if (!b) {
904 NMG_UNLOCK();
905 return error;
906 }
907 NMG_UNLOCK();
908 /* Don't call config() with NMG_LOCK() held */
909 BDG_RLOCK(b);
910 if (b->bdg_ops.config != NULL)
911 error = b->bdg_ops.config(nr);
912 BDG_RUNLOCK(b);
913 return error;
914 }
915
916
917 /* nm_register callback for VALE ports */
918 int
netmap_vp_reg(struct netmap_adapter * na,int onoff)919 netmap_vp_reg(struct netmap_adapter *na, int onoff)
920 {
921 struct netmap_vp_adapter *vpna =
922 (struct netmap_vp_adapter*)na;
923
924 /* persistent ports may be put in netmap mode
925 * before being attached to a bridge
926 */
927 if (vpna->na_bdg)
928 BDG_WLOCK(vpna->na_bdg);
929 if (onoff) {
930 netmap_krings_mode_commit(na, onoff);
931 if (na->active_fds == 0)
932 na->na_flags |= NAF_NETMAP_ON;
933 /* XXX on FreeBSD, persistent VALE ports should also
934 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
935 */
936 } else {
937 if (na->active_fds == 0)
938 na->na_flags &= ~NAF_NETMAP_ON;
939 netmap_krings_mode_commit(na, onoff);
940 }
941 if (vpna->na_bdg)
942 BDG_WUNLOCK(vpna->na_bdg);
943 return 0;
944 }
945
946
947 /* rxsync code used by VALE ports nm_rxsync callback and also
948 * internally by the brwap
949 */
950 static int
netmap_vp_rxsync_locked(struct netmap_kring * kring,int flags)951 netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
952 {
953 struct netmap_adapter *na = kring->na;
954 struct netmap_ring *ring = kring->ring;
955 u_int nm_i, lim = kring->nkr_num_slots - 1;
956 u_int head = kring->rhead;
957 int n;
958
959 if (head > lim) {
960 nm_prerr("ouch dangerous reset!!!");
961 n = netmap_ring_reinit(kring);
962 goto done;
963 }
964
965 /* First part, import newly received packets. */
966 /* actually nothing to do here, they are already in the kring */
967
968 /* Second part, skip past packets that userspace has released. */
969 nm_i = kring->nr_hwcur;
970 if (nm_i != head) {
971 /* consistency check, but nothing really important here */
972 for (n = 0; likely(nm_i != head); n++) {
973 struct netmap_slot *slot = &ring->slot[nm_i];
974 void *addr = NMB(na, slot);
975
976 if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
977 nm_prerr("bad buffer index %d, ignore ?",
978 slot->buf_idx);
979 }
980 slot->flags &= ~NS_BUF_CHANGED;
981 nm_i = nm_next(nm_i, lim);
982 }
983 kring->nr_hwcur = head;
984 }
985
986 n = 0;
987 done:
988 return n;
989 }
990
991 /*
992 * nm_rxsync callback for VALE ports
993 * user process reading from a VALE switch.
994 * Already protected against concurrent calls from userspace,
995 * but we must acquire the queue's lock to protect against
996 * writers on the same queue.
997 */
998 int
netmap_vp_rxsync(struct netmap_kring * kring,int flags)999 netmap_vp_rxsync(struct netmap_kring *kring, int flags)
1000 {
1001 int n;
1002
1003 mtx_lock(&kring->q_lock);
1004 n = netmap_vp_rxsync_locked(kring, flags);
1005 mtx_unlock(&kring->q_lock);
1006 return n;
1007 }
1008
1009 int
netmap_bwrap_attach(const char * nr_name,struct netmap_adapter * hwna,struct netmap_bdg_ops * ops)1010 netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna,
1011 struct netmap_bdg_ops *ops)
1012 {
1013 return ops->bwrap_attach(nr_name, hwna);
1014 }
1015
1016
1017 /* Bridge wrapper code (bwrap).
1018 * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1019 * VALE switch.
1020 * The main task is to swap the meaning of tx and rx rings to match the
1021 * expectations of the VALE switch code (see nm_bdg_flush).
1022 *
1023 * The bwrap works by interposing a netmap_bwrap_adapter between the
1024 * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1025 * a netmap_vp_adapter to the rest the system, but, internally, it
1026 * translates all callbacks to what the hwna expects.
1027 *
1028 * Note that we have to intercept callbacks coming from two sides:
1029 *
1030 * - callbacks coming from the netmap module are intercepted by
1031 * passing around the netmap_bwrap_adapter instead of the hwna
1032 *
1033 * - callbacks coming from outside of the netmap module only know
1034 * about the hwna. This, however, only happens in interrupt
1035 * handlers, where only the hwna->nm_notify callback is called.
1036 * What the bwrap does is to overwrite the hwna->nm_notify callback
1037 * with its own netmap_bwrap_intr_notify.
1038 * XXX This assumes that the hwna->nm_notify callback was the
1039 * standard netmap_notify(), as it is the case for nic adapters.
1040 * Any additional action performed by hwna->nm_notify will not be
1041 * performed by netmap_bwrap_intr_notify.
1042 *
1043 * Additionally, the bwrap can optionally attach the host rings pair
1044 * of the wrapped adapter to a different port of the switch.
1045 */
1046
1047
1048 static void
netmap_bwrap_dtor(struct netmap_adapter * na)1049 netmap_bwrap_dtor(struct netmap_adapter *na)
1050 {
1051 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1052 struct netmap_adapter *hwna = bna->hwna;
1053 struct nm_bridge *b = bna->up.na_bdg,
1054 *bh = bna->host.na_bdg;
1055
1056 if (bna->host.up.nm_mem)
1057 netmap_mem_put(bna->host.up.nm_mem);
1058
1059 if (b) {
1060 netmap_bdg_detach_common(b, bna->up.bdg_port,
1061 (bh ? bna->host.bdg_port : -1));
1062 }
1063
1064 nm_prdis("na %p", na);
1065 na->ifp = NULL;
1066 bna->host.up.ifp = NULL;
1067 hwna->na_vp = bna->saved_na_vp;
1068 hwna->na_hostvp = NULL;
1069 hwna->na_private = NULL;
1070 hwna->na_flags &= ~NAF_BUSY;
1071 netmap_adapter_put(hwna);
1072
1073 }
1074
1075
1076 /*
1077 * Intr callback for NICs connected to a bridge.
1078 * Simply ignore tx interrupts (maybe we could try to recover space ?)
1079 * and pass received packets from nic to the bridge.
1080 *
1081 * XXX TODO check locking: this is called from the interrupt
1082 * handler so we should make sure that the interface is not
1083 * disconnected while passing down an interrupt.
1084 *
1085 * Note, no user process can access this NIC or the host stack.
1086 * The only part of the ring that is significant are the slots,
1087 * and head/cur/tail are set from the kring as needed
1088 * (part as a receive ring, part as a transmit ring).
1089 *
1090 * callback that overwrites the hwna notify callback.
1091 * Packets come from the outside or from the host stack and are put on an
1092 * hwna rx ring.
1093 * The bridge wrapper then sends the packets through the bridge.
1094 */
1095 static int
netmap_bwrap_intr_notify(struct netmap_kring * kring,int flags)1096 netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
1097 {
1098 struct netmap_adapter *na = kring->na;
1099 struct netmap_bwrap_adapter *bna = na->na_private;
1100 struct netmap_kring *bkring;
1101 struct netmap_vp_adapter *vpna = &bna->up;
1102 u_int ring_nr = kring->ring_id;
1103 int ret = NM_IRQ_COMPLETED;
1104 int error;
1105
1106 if (netmap_debug & NM_DEBUG_RXINTR)
1107 nm_prinf("%s %s 0x%x", na->name, kring->name, flags);
1108
1109 bkring = vpna->up.tx_rings[ring_nr];
1110
1111 /* make sure the ring is not disabled */
1112 if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
1113 return EIO;
1114 }
1115
1116 if (netmap_debug & NM_DEBUG_RXINTR)
1117 nm_prinf("%s head %d cur %d tail %d", na->name,
1118 kring->rhead, kring->rcur, kring->rtail);
1119
1120 /* simulate a user wakeup on the rx ring
1121 * fetch packets that have arrived.
1122 */
1123 error = kring->nm_sync(kring, 0);
1124 if (error)
1125 goto put_out;
1126 if (kring->nr_hwcur == kring->nr_hwtail) {
1127 if (netmap_verbose)
1128 nm_prlim(1, "interrupt with no packets on %s",
1129 kring->name);
1130 goto put_out;
1131 }
1132
1133 /* new packets are kring->rcur to kring->nr_hwtail, and the bkring
1134 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
1135 * to push all packets out.
1136 */
1137 bkring->rhead = bkring->rcur = kring->nr_hwtail;
1138
1139 bkring->nm_sync(bkring, flags);
1140
1141 /* mark all buffers as released on this ring */
1142 kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
1143 /* another call to actually release the buffers */
1144 error = kring->nm_sync(kring, 0);
1145
1146 /* The second rxsync may have further advanced hwtail. If this happens,
1147 * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
1148 if (kring->rcur != kring->nr_hwtail) {
1149 ret = NM_IRQ_RESCHED;
1150 }
1151 put_out:
1152 nm_kr_put(kring);
1153
1154 return error ? error : ret;
1155 }
1156
1157
1158 /* nm_register callback for bwrap */
1159 int
netmap_bwrap_reg(struct netmap_adapter * na,int onoff)1160 netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
1161 {
1162 struct netmap_bwrap_adapter *bna =
1163 (struct netmap_bwrap_adapter *)na;
1164 struct netmap_adapter *hwna = bna->hwna;
1165 struct netmap_vp_adapter *hostna = &bna->host;
1166 int error, i;
1167 enum txrx t;
1168
1169 nm_prdis("%s %s", na->name, onoff ? "on" : "off");
1170
1171 if (onoff) {
1172 /* netmap_do_regif has been called on the bwrap na.
1173 * We need to pass the information about the
1174 * memory allocator down to the hwna before
1175 * putting it in netmap mode
1176 */
1177 hwna->na_lut = na->na_lut;
1178
1179 if (hostna->na_bdg) {
1180 /* if the host rings have been attached to switch,
1181 * we need to copy the memory allocator information
1182 * in the hostna also
1183 */
1184 hostna->up.na_lut = na->na_lut;
1185 }
1186
1187 }
1188
1189 /* pass down the pending ring state information */
1190 for_rx_tx(t) {
1191 for (i = 0; i < netmap_all_rings(na, t); i++) {
1192 NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode =
1193 NMR(na, t)[i]->nr_pending_mode;
1194 }
1195 }
1196
1197 /* forward the request to the hwna */
1198 error = hwna->nm_register(hwna, onoff);
1199 if (error)
1200 return error;
1201
1202 /* copy up the current ring state information */
1203 for_rx_tx(t) {
1204 for (i = 0; i < netmap_all_rings(na, t); i++) {
1205 struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i];
1206 NMR(na, t)[i]->nr_mode = kring->nr_mode;
1207 }
1208 }
1209
1210 /* impersonate a netmap_vp_adapter */
1211 netmap_vp_reg(na, onoff);
1212 if (hostna->na_bdg)
1213 netmap_vp_reg(&hostna->up, onoff);
1214
1215 if (onoff) {
1216 u_int i;
1217 /* intercept the hwna nm_nofify callback on the hw rings */
1218 for (i = 0; i < hwna->num_rx_rings; i++) {
1219 hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
1220 hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
1221 }
1222 i = hwna->num_rx_rings; /* for safety */
1223 /* save the host ring notify unconditionally */
1224 for (; i < netmap_real_rings(hwna, NR_RX); i++) {
1225 hwna->rx_rings[i]->save_notify =
1226 hwna->rx_rings[i]->nm_notify;
1227 if (hostna->na_bdg) {
1228 /* also intercept the host ring notify */
1229 hwna->rx_rings[i]->nm_notify =
1230 netmap_bwrap_intr_notify;
1231 na->tx_rings[i]->nm_sync = na->nm_txsync;
1232 }
1233 }
1234 if (na->active_fds == 0)
1235 na->na_flags |= NAF_NETMAP_ON;
1236 } else {
1237 u_int i;
1238
1239 if (na->active_fds == 0)
1240 na->na_flags &= ~NAF_NETMAP_ON;
1241
1242 /* reset all notify callbacks (including host ring) */
1243 for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) {
1244 hwna->rx_rings[i]->nm_notify =
1245 hwna->rx_rings[i]->save_notify;
1246 hwna->rx_rings[i]->save_notify = NULL;
1247 }
1248 hwna->na_lut.lut = NULL;
1249 hwna->na_lut.plut = NULL;
1250 hwna->na_lut.objtotal = 0;
1251 hwna->na_lut.objsize = 0;
1252
1253 /* pass ownership of the netmap rings to the hwna */
1254 for_rx_tx(t) {
1255 for (i = 0; i < netmap_all_rings(na, t); i++) {
1256 NMR(na, t)[i]->ring = NULL;
1257 }
1258 }
1259 /* reset the number of host rings to default */
1260 for_rx_tx(t) {
1261 nma_set_host_nrings(hwna, t, 1);
1262 }
1263
1264 }
1265
1266 return 0;
1267 }
1268
1269 /* nm_config callback for bwrap */
1270 static int
netmap_bwrap_config(struct netmap_adapter * na,struct nm_config_info * info)1271 netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
1272 {
1273 struct netmap_bwrap_adapter *bna =
1274 (struct netmap_bwrap_adapter *)na;
1275 struct netmap_adapter *hwna = bna->hwna;
1276 int error;
1277
1278 /* Forward the request to the hwna. It may happen that nobody
1279 * registered hwna yet, so netmap_mem_get_lut() may have not
1280 * been called yet. */
1281 error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut);
1282 if (error)
1283 return error;
1284 netmap_update_config(hwna);
1285 /* swap the results and propagate */
1286 info->num_tx_rings = hwna->num_rx_rings;
1287 info->num_tx_descs = hwna->num_rx_desc;
1288 info->num_rx_rings = hwna->num_tx_rings;
1289 info->num_rx_descs = hwna->num_tx_desc;
1290 info->rx_buf_maxsize = hwna->rx_buf_maxsize;
1291
1292 return 0;
1293 }
1294
1295
1296 /* nm_krings_create callback for bwrap */
1297 int
netmap_bwrap_krings_create_common(struct netmap_adapter * na)1298 netmap_bwrap_krings_create_common(struct netmap_adapter *na)
1299 {
1300 struct netmap_bwrap_adapter *bna =
1301 (struct netmap_bwrap_adapter *)na;
1302 struct netmap_adapter *hwna = bna->hwna;
1303 struct netmap_adapter *hostna = &bna->host.up;
1304 int i, error = 0;
1305 enum txrx t;
1306
1307 /* also create the hwna krings */
1308 error = hwna->nm_krings_create(hwna);
1309 if (error) {
1310 return error;
1311 }
1312
1313 /* increment the usage counter for all the hwna krings */
1314 for_rx_tx(t) {
1315 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1316 NMR(hwna, t)[i]->users++;
1317 }
1318 }
1319
1320 /* now create the actual rings */
1321 error = netmap_mem_rings_create(hwna);
1322 if (error) {
1323 goto err_dec_users;
1324 }
1325
1326 /* cross-link the netmap rings
1327 * The original number of rings comes from hwna,
1328 * rx rings on one side equals tx rings on the other.
1329 */
1330 for_rx_tx(t) {
1331 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
1332 for (i = 0; i < netmap_all_rings(hwna, r); i++) {
1333 NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
1334 NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
1335 }
1336 }
1337
1338 if (na->na_flags & NAF_HOST_RINGS) {
1339 /* the hostna rings are the host rings of the bwrap.
1340 * The corresponding krings must point back to the
1341 * hostna
1342 */
1343 hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
1344 hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
1345 for_rx_tx(t) {
1346 for (i = 0; i < nma_get_nrings(hostna, t); i++) {
1347 NMR(hostna, t)[i]->na = hostna;
1348 }
1349 }
1350 }
1351
1352 return 0;
1353
1354 err_dec_users:
1355 for_rx_tx(t) {
1356 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1357 NMR(hwna, t)[i]->users--;
1358 }
1359 }
1360 hwna->nm_krings_delete(hwna);
1361 return error;
1362 }
1363
1364
1365 void
netmap_bwrap_krings_delete_common(struct netmap_adapter * na)1366 netmap_bwrap_krings_delete_common(struct netmap_adapter *na)
1367 {
1368 struct netmap_bwrap_adapter *bna =
1369 (struct netmap_bwrap_adapter *)na;
1370 struct netmap_adapter *hwna = bna->hwna;
1371 enum txrx t;
1372 int i;
1373
1374 nm_prdis("%s", na->name);
1375
1376 /* decrement the usage counter for all the hwna krings */
1377 for_rx_tx(t) {
1378 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1379 NMR(hwna, t)[i]->users--;
1380 }
1381 }
1382
1383 /* delete any netmap rings that are no longer needed */
1384 netmap_mem_rings_delete(hwna);
1385 hwna->nm_krings_delete(hwna);
1386 }
1387
1388
1389 /* notify method for the bridge-->hwna direction */
1390 int
netmap_bwrap_notify(struct netmap_kring * kring,int flags)1391 netmap_bwrap_notify(struct netmap_kring *kring, int flags)
1392 {
1393 struct netmap_adapter *na = kring->na;
1394 struct netmap_bwrap_adapter *bna = na->na_private;
1395 struct netmap_adapter *hwna = bna->hwna;
1396 u_int ring_n = kring->ring_id;
1397 u_int lim = kring->nkr_num_slots - 1;
1398 struct netmap_kring *hw_kring;
1399 int error;
1400
1401 nm_prdis("%s: na %s hwna %s",
1402 (kring ? kring->name : "NULL!"),
1403 (na ? na->name : "NULL!"),
1404 (hwna ? hwna->name : "NULL!"));
1405 hw_kring = hwna->tx_rings[ring_n];
1406
1407 if (nm_kr_tryget(hw_kring, 0, NULL)) {
1408 return ENXIO;
1409 }
1410
1411 /* first step: simulate a user wakeup on the rx ring */
1412 netmap_vp_rxsync(kring, flags);
1413 nm_prdis("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1414 na->name, ring_n,
1415 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1416 kring->rhead, kring->rcur, kring->rtail,
1417 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
1418 /* second step: the new packets are sent on the tx ring
1419 * (which is actually the same ring)
1420 */
1421 hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
1422 error = hw_kring->nm_sync(hw_kring, flags);
1423 if (error)
1424 goto put_out;
1425
1426 /* third step: now we are back the rx ring */
1427 /* claim ownership on all hw owned bufs */
1428 kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
1429
1430 /* fourth step: the user goes to sleep again, causing another rxsync */
1431 netmap_vp_rxsync(kring, flags);
1432 nm_prdis("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1433 na->name, ring_n,
1434 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1435 kring->rhead, kring->rcur, kring->rtail,
1436 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
1437 put_out:
1438 nm_kr_put(hw_kring);
1439
1440 return error ? error : NM_IRQ_COMPLETED;
1441 }
1442
1443
1444 /* nm_bdg_ctl callback for the bwrap.
1445 * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
1446 * On attach, it needs to provide a fake netmap_priv_d structure and
1447 * perform a netmap_do_regif() on the bwrap. This will put both the
1448 * bwrap and the hwna in netmap mode, with the netmap rings shared
1449 * and cross linked. Moroever, it will start intercepting interrupts
1450 * directed to hwna.
1451 */
1452 static int
netmap_bwrap_bdg_ctl(struct nmreq_header * hdr,struct netmap_adapter * na)1453 netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
1454 {
1455 struct netmap_priv_d *npriv;
1456 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1457 int error = 0;
1458
1459 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
1460 struct nmreq_vale_attach *req =
1461 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
1462 if (req->reg.nr_ringid != 0 ||
1463 (req->reg.nr_mode != NR_REG_ALL_NIC &&
1464 req->reg.nr_mode != NR_REG_NIC_SW)) {
1465 /* We only support attaching all the NIC rings
1466 * and/or the host stack. */
1467 return EINVAL;
1468 }
1469 if (NETMAP_OWNED_BY_ANY(na)) {
1470 return EBUSY;
1471 }
1472 if (bna->na_kpriv) {
1473 /* nothing to do */
1474 return 0;
1475 }
1476 npriv = netmap_priv_new();
1477 if (npriv == NULL)
1478 return ENOMEM;
1479 npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
1480 error = netmap_do_regif(npriv, na, req->reg.nr_mode,
1481 req->reg.nr_ringid, req->reg.nr_flags);
1482 if (error) {
1483 netmap_priv_delete(npriv);
1484 return error;
1485 }
1486 bna->na_kpriv = npriv;
1487 na->na_flags |= NAF_BUSY;
1488 } else {
1489 if (na->active_fds == 0) /* not registered */
1490 return EINVAL;
1491 netmap_priv_delete(bna->na_kpriv);
1492 bna->na_kpriv = NULL;
1493 na->na_flags &= ~NAF_BUSY;
1494 }
1495
1496 return error;
1497 }
1498
1499 /* attach a bridge wrapper to the 'real' device */
1500 int
netmap_bwrap_attach_common(struct netmap_adapter * na,struct netmap_adapter * hwna)1501 netmap_bwrap_attach_common(struct netmap_adapter *na,
1502 struct netmap_adapter *hwna)
1503 {
1504 struct netmap_bwrap_adapter *bna;
1505 struct netmap_adapter *hostna = NULL;
1506 int error = 0;
1507 enum txrx t;
1508
1509 /* make sure the NIC is not already in use */
1510 if (NETMAP_OWNED_BY_ANY(hwna)) {
1511 nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name);
1512 return EBUSY;
1513 }
1514
1515 bna = (struct netmap_bwrap_adapter *)na;
1516 /* make bwrap ifp point to the real ifp */
1517 na->ifp = hwna->ifp;
1518 if_ref(na->ifp);
1519 na->na_private = bna;
1520 /* fill the ring data for the bwrap adapter with rx/tx meanings
1521 * swapped. The real cross-linking will be done during register,
1522 * when all the krings will have been created.
1523 */
1524 for_rx_tx(t) {
1525 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
1526 nma_set_nrings(na, t, nma_get_nrings(hwna, r));
1527 nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
1528 }
1529 na->nm_dtor = netmap_bwrap_dtor;
1530 na->nm_config = netmap_bwrap_config;
1531 na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
1532 na->pdev = hwna->pdev;
1533 na->nm_mem = netmap_mem_get(hwna->nm_mem);
1534 na->virt_hdr_len = hwna->virt_hdr_len;
1535 na->rx_buf_maxsize = hwna->rx_buf_maxsize;
1536
1537 bna->hwna = hwna;
1538 netmap_adapter_get(hwna);
1539 hwna->na_private = bna; /* weak reference */
1540 bna->saved_na_vp = hwna->na_vp;
1541 hwna->na_vp = &bna->up;
1542 bna->up.up.na_vp = &(bna->up);
1543
1544 if (hwna->na_flags & NAF_HOST_RINGS) {
1545 if (hwna->na_flags & NAF_SW_ONLY)
1546 na->na_flags |= NAF_SW_ONLY;
1547 na->na_flags |= NAF_HOST_RINGS;
1548 hostna = &bna->host.up;
1549
1550 /* limit the number of host rings to that of hw */
1551 nm_bound_var(&hostna->num_tx_rings, 1, 1,
1552 nma_get_nrings(hwna, NR_TX), NULL);
1553 nm_bound_var(&hostna->num_rx_rings, 1, 1,
1554 nma_get_nrings(hwna, NR_RX), NULL);
1555
1556 snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name);
1557 hostna->ifp = hwna->ifp;
1558 for_rx_tx(t) {
1559 enum txrx r = nm_txrx_swap(t);
1560 u_int nr = nma_get_nrings(hostna, t);
1561
1562 nma_set_nrings(hostna, t, nr);
1563 nma_set_host_nrings(na, t, nr);
1564 if (nma_get_host_nrings(hwna, t) < nr) {
1565 nma_set_host_nrings(hwna, t, nr);
1566 }
1567 nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
1568 }
1569 // hostna->nm_txsync = netmap_bwrap_host_txsync;
1570 // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1571 hostna->nm_mem = netmap_mem_get(na->nm_mem);
1572 hostna->na_private = bna;
1573 hostna->na_vp = &bna->up;
1574 na->na_hostvp = hwna->na_hostvp =
1575 hostna->na_hostvp = &bna->host;
1576 hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
1577 hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
1578 }
1579 if (hwna->na_flags & NAF_MOREFRAG)
1580 na->na_flags |= NAF_MOREFRAG;
1581
1582 nm_prdis("%s<->%s txr %d txd %d rxr %d rxd %d",
1583 na->name, ifp->if_xname,
1584 na->num_tx_rings, na->num_tx_desc,
1585 na->num_rx_rings, na->num_rx_desc);
1586
1587 error = netmap_attach_common(na);
1588 if (error) {
1589 goto err_put;
1590 }
1591 hwna->na_flags |= NAF_BUSY;
1592 return 0;
1593
1594 err_put:
1595 hwna->na_vp = hwna->na_hostvp = NULL;
1596 netmap_adapter_put(hwna);
1597 return error;
1598
1599 }
1600
1601 struct nm_bridge *
netmap_init_bridges2(u_int n)1602 netmap_init_bridges2(u_int n)
1603 {
1604 int i;
1605 struct nm_bridge *b;
1606
1607 b = nm_os_malloc(sizeof(struct nm_bridge) * n);
1608 if (b == NULL)
1609 return NULL;
1610 for (i = 0; i < n; i++)
1611 BDG_RWINIT(&b[i]);
1612 return b;
1613 }
1614
1615 void
netmap_uninit_bridges2(struct nm_bridge * b,u_int n)1616 netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
1617 {
1618 int i;
1619
1620 if (b == NULL)
1621 return;
1622
1623 for (i = 0; i < n; i++)
1624 BDG_RWDESTROY(&b[i]);
1625 nm_os_free(b);
1626 }
1627
1628 int
netmap_init_bridges(void)1629 netmap_init_bridges(void)
1630 {
1631 #ifdef CONFIG_NET_NS
1632 return netmap_bns_register();
1633 #else
1634 nm_bridges = netmap_init_bridges2(NM_BRIDGES);
1635 if (nm_bridges == NULL)
1636 return ENOMEM;
1637 return 0;
1638 #endif
1639 }
1640
1641 void
netmap_uninit_bridges(void)1642 netmap_uninit_bridges(void)
1643 {
1644 #ifdef CONFIG_NET_NS
1645 netmap_bns_unregister();
1646 #else
1647 netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
1648 #endif
1649 }
1650